Source code for chemspipy.search

# -*- coding: utf-8 -*-
"""
chemspipy.search
~~~~~~~~~~~~~~~~

A wrapper for asynchronous search requests.

:copyright: Copyright 2014 by Matt Swain.
:license: MIT, see LICENSE file for more details.
"""

from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
import logging
import threading
import time

try:
    from lxml import etree
except ImportError:
    try:
        import xml.etree.cElementTree as etree
    except ImportError:
        import xml.etree.ElementTree as etree

import six

from .errors import ChemSpiPyServerError, ChemSpiPyTimeoutError
from .utils import duration


log = logging.getLogger(__name__)


[docs]class Results(object):
    """Container class to perform a search on a background thread and hold the results when ready."""

    def __init__(self, cs, searchfunc, searchargs, raise_errors=False, max_requests=40):
        """Generally shouldn't be instantiated directly. See :meth:`~chemspipy.api.ChemSpider.search` instead.

        :param ChemSpider cs: ``ChemSpider`` session.
        :param function searchfunc: Search function that returns a transaction ID.
        :param tuple searchargs: Arguments for the search function.
        :param bool raise_errors: If True, raise exceptions. If False, store on ``exception`` property.
        :param int max_requests: Maximum number of times to check if search results are ready.
        """
        log.debug('Results init')
        self._raise_errors = raise_errors
        self._max_requests = max_requests
        self._status = 'Created'
        self._exception = None
        self._message = None
        self._duration = None
        self._results = []
        self._searchthread = threading.Thread(name='SearchThread', target=self._search, args=(cs, searchfunc, searchargs))
        self._searchthread.start()

    def _search(self, cs, searchfunc, searchargs):
        """Perform the search and retrieve the results."""
        log.debug('Searching in background thread')
        try:
            rid = searchfunc(*searchargs)
            log.debug('Setting rid: %s' % rid)
            for _ in six.moves.range(self._max_requests):
                log.debug('Checking status: %s' % rid)
                status = cs.get_async_search_status_and_count(rid)
                self._status = status['status']
                self._message = status.get('message', '')
                self._duration = duration(status['elapsed'])
                log.debug(status)
                time.sleep(0.2)
                if status['status'] == 'ResultReady':
                    break
                elif status['status'] in {'Failed', 'Unknown', 'Suspended'}:
                    raise ChemSpiPyServerError('Search Failed: %s' % status.get('message', ''))
                elif status['status'] == 'TooManyRecords':
                    raise ChemSpiPyServerError('Too many results')
            else:
                raise ChemSpiPyTimeoutError('Search took too long')
            log.debug('Search success!')
            if status['count'] > 0:
                self._results = cs.get_async_search_result(rid)
                log.debug('Results: %s', self._results)
            elif not self._message:
                self._message = 'No results found'
        except Exception as e:
            # Catch and store exception so we can raise it in the main thread
            self._exception = e

[docs]    def ready(self):
        """Return True if the search finished.

        :rtype: bool
        """
        return not self._searchthread.is_alive()

[docs]    def success(self):
        """Return True if the search finished with no errors.

        :rtype: bool
        """
        return self.ready() and not self._exception

[docs]    def wait(self):
        """Block until the search has completed and optionally raise any resulting exception."""
        log.debug('Waiting for search to finish')
        self._searchthread.join()
        if self._exception and self._raise_errors:
            raise self._exception

    @property
    def status(self):
        """Current status string returned by ChemSpider.


        :returns: 'Unknown', 'Created', 'Scheduled', 'Processing', 'Suspended', 'PartialResultReady', 'ResultReady'
        :rtype: string
        """
        return self._status

    @property
    def exception(self):
        """Any Exception raised during the search. Blocks until the search is finished."""
        self.wait()  # TODO: If raise_errors=True this will raise the exception when trying to access it?
        return self._exception

    @property
    def message(self):
        """A contextual message about the search. Blocks until the search is finished.

        :rtype: string
        """
        self.wait()
        return self._message

    @property
    def count(self):
        """The number of search results. Blocks until the search is finished.

        :rtype: int
        """
        return len(self)

    @property
    def duration(self):
        """The time taken to perform the search. Blocks until the search is finished.

        :rtype: :py:class:`datetime.timedelta`
        """
        self.wait()
        return self._duration

    # @memoized_property
    # def sdf(self):
    #     """Get an SDF containing all the search results.
    #
    #     Warning: The SDF API endpoints don't seem to work properly.
    #
    #     :rtype: string
    #     :returns: SDF containing the search results.
    #     """
    #     self.wait()
    #     return self._cs.get_records_sdf(self._rid)

    def __getitem__(self, index):
        """Get a single result or a slice of results. Blocks until the search is finished.

        This means a Results instance can be treated like a normal Python list. For example::

            cs.search('glucose')[2]
            cs.search('glucose')[0:2]

        An IndexError will be raised if the index is greater than the total number of results.
        """
        self.wait()
        return self._results.__getitem__(index)

    def __len__(self):
        self.wait()
        return self._results.__len__()

    def __iter__(self):
        self.wait()
        return iter(self._results)

    def __repr__(self):
        if self.success():
            return 'Results(%s)' % self._results
        else:
            return 'Results(%s)' % self.status


# TODO: fetch method that gets the property values for every Compound in the list of results.
# Do this by running get_extended_mol_compound_info_list and then inserting info into Compounds
# Do multiple requests in chunks of 250 Compounds if necessary
# Compound will need a method to insert info from JSON response