Source code for chemspipy.search

# -*- coding: utf-8 -*-
"""
chemspipy.search
~~~~~~~~~~~~~~~~

A wrapper for asynchronous search requests.

:copyright: Copyright 2014 by Matt Swain.
:license: MIT, see LICENSE file for more details.
"""

from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
import logging
import threading
import time

try:
    from lxml import etree
except ImportError:
    try:
        import xml.etree.cElementTree as etree
    except ImportError:
        import xml.etree.ElementTree as etree

import six

from .errors import ChemSpiPyServerError, ChemSpiPyTimeoutError


log = logging.getLogger(__name__)


[docs]class Results(object): """Container class to perform a search on a background thread and hold the results when ready.""" def __init__(self, cs, searchfunc, searchargs, propagate=True, max_requests=40): """Generally shouldn't be instantiated directly. See :meth:`~chemspipy.api.ChemSpider.search` instead. :param ChemSpider cs: ``ChemSpider`` session. :param function searchfunc: Search function that returns a transaction ID. :param tuple searchargs: Arguments for the search function. :param bool propagate: If True, raise exceptions. If False, store on ``exception`` property. :param int max_requests: Maximum number of times to check if search results are ready. """ log.debug('Results init') self._propagate = propagate self._max_requests = max_requests self._status = 'Created' self._exception = None self._message = None self._duration = None self._results = [] self._searchthread = threading.Thread(name='SearchThread', target=self._search, args=(cs, searchfunc, searchargs)) self._searchthread.start() def _search(self, cs, searchfunc, searchargs): """Perform the search and retrieve the results.""" log.debug('Searching in background thread') try: rid = searchfunc(*searchargs) log.debug('Setting rid: %s' % rid) for i in six.moves.range(self._max_requests): log.debug('Checking status: %s' % rid) status = cs.get_async_search_status_and_count(rid) self._status = status['status'] log.debug(status) time.sleep(0.2) if status['status'] == 'ResultReady': break elif status['status'] in {'Failed', 'Unknown', 'Suspended'}: # TODO: Does status['message'] contain an error message to use in the ChemSpiPyServerError? raise ChemSpiPyServerError('Search Failed') elif status['status'] == 'TooManyRecords': raise ChemSpiPyServerError('Too many results') else: raise ChemSpiPyTimeoutError('Search took too long') log.debug('Search success!') self._message = status.get('message') self._duration = status['elapsed'] if status['count'] > 0: self._results = cs.get_async_search_result(rid) log.debug('Results: %s', self._results) elif not self._message: self._message = 'No results found' except Exception as e: # Catch and store exception so we can raise it in the main thread self._exception = e
[docs] def ready(self): """Return True if the search finished.""" return not self._searchthread.is_alive()
[docs] def success(self): """Return True if the search finished with no errors.""" return self.ready() and not self._exception
[docs] def wait(self): """Block until the search has completed and optionally raise any resulting exception.""" log.debug('Waiting for search to finish') self._searchthread.join() if self._exception and self._propagate: raise self._exception
@property def status(self): """Current status string returned by ChemSpider. One of: 'Unknown', 'Created', 'Scheduled', 'Processing', 'Suspended', 'PartialResultReady', 'ResultReady' """ return self._status @property def exception(self): """Any Exception raised during the search. Blocks until the search is finished.""" self.wait() # TODO: If propagate=True this will raise the exception when trying to access it? return self._exception @property def message(self): """A contextual message about the search. Blocks until the search is finished.""" self.wait() return self._message @property def count(self): """The number of search results. Blocks until the search is finished.""" return len(self) @property def duration(self): """The time taken to perform the search. Blocks until the search is finished.""" self.wait() return self._duration def __getitem__(self, index): """Get a single result or a slice of results. Blocks until the search is finished. This means a Results instance can be treated like a normal Python list. For example:: cs.search('glucose')[2] cs.search('glucose')[0:2] An IndexError will be raised if the index is greater than the total number of results. """ self.wait() return self._results.__getitem__(index) def __len__(self): self.wait() return self._results.__len__() def __iter__(self): self.wait() return iter(self._results) def __repr__(self): if self.success(): return 'Results(%r)' % self._results else: return 'Results(%r)' % self.status