Source code for chemspipy.search

# -*- coding: utf-8 -*-
"""
chemspipy.search
~~~~~~~~~~~~~~~~

A wrapper for asynchronous search requests.

:copyright: Copyright 2014 by Matt Swain.
:license: MIT, see LICENSE file for more details.
"""

from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
import logging
import threading
import time

try:
    from lxml import etree
except ImportError:
    try:
        import xml.etree.cElementTree as etree
    except ImportError:
        import xml.etree.ElementTree as etree

import six

from .errors import ChemSpiPyServerError, ChemSpiPyTimeoutError
from .utils import duration


log = logging.getLogger(__name__)


[docs]class Results(object): """Container class to perform a search on a background thread and hold the results when ready.""" def __init__(self, cs, searchfunc, searchargs, raise_errors=False, max_requests=40): """Generally shouldn't be instantiated directly. See :meth:`~chemspipy.api.ChemSpider.search` instead. :param ChemSpider cs: ``ChemSpider`` session. :param function searchfunc: Search function that returns a transaction ID. :param tuple searchargs: Arguments for the search function. :param bool raise_errors: If True, raise exceptions. If False, store on ``exception`` property. :param int max_requests: Maximum number of times to check if search results are ready. """ log.debug('Results init') self._raise_errors = raise_errors self._max_requests = max_requests self._status = 'Created' self._exception = None self._message = None self._duration = None self._results = [] self._searchthread = threading.Thread(name='SearchThread', target=self._search, args=(cs, searchfunc, searchargs)) self._searchthread.start() def _search(self, cs, searchfunc, searchargs): """Perform the search and retrieve the results.""" log.debug('Searching in background thread') try: rid = searchfunc(*searchargs) log.debug('Setting rid: %s' % rid) for _ in six.moves.range(self._max_requests): log.debug('Checking status: %s' % rid) status = cs.get_async_search_status_and_count(rid) self._status = status['status'] self._message = status.get('message', '') self._duration = duration(status['elapsed']) log.debug(status) time.sleep(0.2) if status['status'] == 'ResultReady': break elif status['status'] in {'Failed', 'Unknown', 'Suspended'}: raise ChemSpiPyServerError('Search Failed: %s' % status.get('message', '')) elif status['status'] == 'TooManyRecords': raise ChemSpiPyServerError('Too many results') else: raise ChemSpiPyTimeoutError('Search took too long') log.debug('Search success!') if status['count'] > 0: self._results = cs.get_async_search_result(rid) log.debug('Results: %s', self._results) elif not self._message: self._message = 'No results found' except Exception as e: # Catch and store exception so we can raise it in the main thread self._exception = e
[docs] def ready(self): """Return True if the search finished. :rtype: bool """ return not self._searchthread.is_alive()
[docs] def success(self): """Return True if the search finished with no errors. :rtype: bool """ return self.ready() and not self._exception
[docs] def wait(self): """Block until the search has completed and optionally raise any resulting exception.""" log.debug('Waiting for search to finish') self._searchthread.join() if self._exception and self._raise_errors: raise self._exception
@property def status(self): """Current status string returned by ChemSpider. :returns: 'Unknown', 'Created', 'Scheduled', 'Processing', 'Suspended', 'PartialResultReady', 'ResultReady' :rtype: string """ return self._status @property def exception(self): """Any Exception raised during the search. Blocks until the search is finished.""" self.wait() # TODO: If raise_errors=True this will raise the exception when trying to access it? return self._exception @property def message(self): """A contextual message about the search. Blocks until the search is finished. :rtype: string """ self.wait() return self._message @property def count(self): """The number of search results. Blocks until the search is finished. :rtype: int """ return len(self) @property def duration(self): """The time taken to perform the search. Blocks until the search is finished. :rtype: :py:class:`datetime.timedelta` """ self.wait() return self._duration # @memoized_property # def sdf(self): # """Get an SDF containing all the search results. # # Warning: The SDF API endpoints don't seem to work properly. # # :rtype: string # :returns: SDF containing the search results. # """ # self.wait() # return self._cs.get_records_sdf(self._rid) def __getitem__(self, index): """Get a single result or a slice of results. Blocks until the search is finished. This means a Results instance can be treated like a normal Python list. For example:: cs.search('glucose')[2] cs.search('glucose')[0:2] An IndexError will be raised if the index is greater than the total number of results. """ self.wait() return self._results.__getitem__(index) def __len__(self): self.wait() return self._results.__len__() def __iter__(self): self.wait() return iter(self._results) def __repr__(self): if self.success(): return 'Results(%s)' % self._results else: return 'Results(%s)' % self.status
# TODO: fetch method that gets the property values for every Compound in the list of results. # Do this by running get_extended_mol_compound_info_list and then inserting info into Compounds # Do multiple requests in chunks of 250 Compounds if necessary # Compound will need a method to insert info from JSON response