Source code for tendril.utils.www

# Copyright (C) 2015 Chintalagiri Shashank
#
# This file is part of Tendril.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
The WWW Utils Module (:mod:`tendril.utils.www`)
===============================================

This module provides utilities to deal with the internet. All application code
should access the internet through this module, since this where support for
proxies and caching is implemented.

.. rubric:: Main Provided Elements

.. autosummary::

    urlopen
    get_soup
    get_soup_requests
    cached_fetcher
    get_session
    get_soap_client

This module uses the following configuration values from
:mod:`tendril.utils.config`:

.. rubric:: Network Proxy Settings

- :data:`tendril.utils.config.NETWORK_PROXY_TYPE`
- :data:`tendril.utils.config.NETWORK_PROXY_IP`
- :data:`tendril.utils.config.NETWORK_PROXY_PORT`
- :data:`tendril.utils.config.NETWORK_PROXY_USER`
- :data:`tendril.utils.config.NETWORK_PROXY_PASS`

.. rubric:: Caching

- :data:`tendril.utils.config.ENABLE_REDIRECT_CACHING`
  Whether or not redirect caching should be used.

- :data:`tendril.utils.config.MAX_AGE_DEFAULT`
  The default max age to use with all www caching methods which
  support cache expiry.

Redirect caching speeds up network accesses by saving ``301`` and ``302``
redirects, and not needing to get the correct URL on a second access. This
redirect cache is stored as a pickled object in the ``INSTANCE_CACHE``
folder. The effect of this caching is far more apparent when a replicator
cache is also used.

This module also provides the :class:`WWWCachedFetcher` class,
an instance of which is available in :data:`cached_fetcher`, which
is subsequently used by :func:`get_soup` and any application code
that wants cached results.

Overall, caching should look something like this :

- WWWCacheFetcher provides short term (~5 days)
  caching, aggressively caching whatever goes through it. This
  caching is NOT HTTP1.1 compliant. In case HTTP1.1 compliant
  caching is desired, use the requests based implementation
  instead or use an external http-replicator like caching proxy.

- RedirectCacheHandler is something of a special case, handling
  redirects which otherwise would be incredibly expensive.
  Unfortunately, this layer is also the dumbest cacher, and
  does not expire anything, ever. To 'invalidate' something in
  this cache, the entire cache needs to be nuked. It may be
  worthwhile to consider moving this to redis instead.

.. todo::
    Consider replacing uses of urllib/urllib2 backend with
    :mod:`requests` and simplify this module. Currently, the
    cache provided with the ``requests`` implementation here
    is the major bottleneck.


.. rubric:: Class Inheritance

.. inheritance-diagram::
   tendril.utils.www

"""

from __future__ import print_function

from .config import NETWORK_PROXY_TYPE
from .config import NETWORK_PROXY_IP
from .config import NETWORK_PROXY_PORT
from .config import NETWORK_PROXY_USER
from .config import NETWORK_PROXY_PASS

from .config import ENABLE_REDIRECT_CACHING
from .config import INSTANCE_CACHE

from bs4 import BeautifulSoup

from six.moves.urllib.request import HTTPRedirectHandler
from six.moves.urllib.request import ProxyHandler
from six.moves.urllib.request import HTTPHandler, HTTPSHandler
from six.moves.urllib.request import build_opener
from six.moves.urllib.error import HTTPError, URLError

import os
import six
import time

import atexit
import tempfile
import codecs
from hashlib import md5

# import warnings
import logging
import requests
try:
    import cPickle as pickle
except ImportError:
    import pickle
from cachecontrol import CacheControlAdapter
from cachecontrol.caches import FileCache
from cachecontrol.heuristics import ExpiresAfter

from suds.client import Client
from suds.transport.http import HttpAuthenticated
from suds.transport.http import HttpTransport

from fs.opener import fsopendir
from fs.utils import copyfile
from tendril.utils.fsutils import temp_fs
from tendril.utils.config import MAX_AGE_DEFAULT
from tendril.utils import log
logger = log.get_logger(__name__, log.WARNING)

logging.getLogger('cachecontrol.controller').setLevel(logging.INFO)
logging.getLogger('requests.packages.urllib3.connectionpool').\
    setLevel(logging.INFO)

logging.getLogger('suds.xsd.query').setLevel(logging.INFO)
logging.getLogger('suds.xsd.sxbasic').setLevel(logging.INFO)
logging.getLogger('suds.xsd.schema').setLevel(logging.INFO)
logging.getLogger('suds.xsd.sxbase').setLevel(logging.INFO)
logging.getLogger('suds.metrics').setLevel(logging.INFO)
logging.getLogger('suds.wsdl').setLevel(logging.INFO)
logging.getLogger('suds.client').setLevel(logging.INFO)
logging.getLogger('suds.resolver').setLevel(logging.INFO)
logging.getLogger('suds.umx.typed').setLevel(logging.INFO)
logging.getLogger('suds.mx.literal').setLevel(logging.INFO)
logging.getLogger('suds.mx.core').setLevel(logging.INFO)
logging.getLogger('suds.transport.http').setLevel(logging.INFO)

WWW_CACHE = os.path.join(INSTANCE_CACHE, 'soupcache')
REQUESTS_CACHE = os.path.join(INSTANCE_CACHE, 'requestscache')
SOAP_CACHE = os.path.join(INSTANCE_CACHE, 'soapcache')

_internet_connected = False


[docs]def _get_http_proxy_url(): """ Constructs the proxy URL for HTTP proxies from relevant :mod:`tendril.utils.config` Config options, and returns the URL string in the form: ``http://[NP_USER:NP_PASS@]NP_IP[:NP_PORT]`` where NP_xxx is obtained from the :mod:`tendril.utils.config` ConfigOption NETWORK_PROXY_xxx. """ if NETWORK_PROXY_USER is None: proxyurl_http = 'http://' + NETWORK_PROXY_IP else: proxyurl_http = 'http://{0}:{1}@{2}'.format(NETWORK_PROXY_USER, NETWORK_PROXY_PASS, NETWORK_PROXY_IP) if NETWORK_PROXY_PORT: proxyurl_http += ':' + NETWORK_PROXY_PORT return proxyurl_http
[docs]def strencode(string): """ This function converts unicode strings to ASCII, using python's :func:`str.encode`, replacing any unicode characters present in the string. Unicode characters which Tendril expects to see in web content related to it are specifically replaced first with ASCII characters or character sequences which reasonably reproduce the original meanings. :param string: unicode string to be encoded. :return: ASCII version of the string. .. warning:: This function is marked for deprecation by the general (but gradual) move towards ``unicode`` across tendril. """ nstring = '' for char in string: if char == u'\u00b5': char = 'u' if char == u'\u00B1': char = '+/-' nstring += char return nstring.encode('ascii', 'replace')
REDIR_CACHE_FILE = os.path.join(INSTANCE_CACHE, 'redirects.p') try: with open(REDIR_CACHE_FILE, "rb") as rdcf: redirect_cache = pickle.load(rdcf) logger.info('Loaded Redirect Cache from file') except IOError: redirect_cache = {} logger.info('Created new Redirect Cache')
[docs]def dump_redirect_cache(): """ Called during python interpreter shutdown, this function dumps the redirect cache to the file system. """ if DUMP_REDIR_CACHE_ON_EXIT: with open(REDIR_CACHE_FILE, 'wb') as f: pickle.dump(redirect_cache, f, protocol=2) logger.info('Dumping Redirect Cache to file')
DUMP_REDIR_CACHE_ON_EXIT = True if ENABLE_REDIRECT_CACHING is True: atexit.register(dump_redirect_cache)
[docs]class CachingRedirectHandler(HTTPRedirectHandler): """ This handler modifies the behavior of :class:`urllib2.HTTPRedirectHandler`, resulting in a HTTP ``301`` or ``302`` status to be included in the ``result``. When this handler is attached to a ``urllib2`` opener, if the opening of the URL resulted in a redirect via HTTP ``301`` or ``302``, this is reported along with the result. This information can be used by the opener to maintain a redirect cache. """
[docs] def http_error_301(self, req, fp, code, msg, headers): """ Wraps the :func:`urllib2.HTTPRedirectHandler.http_error_301` handler, setting the ``result.status`` to ``301`` in case a http ``301`` error is encountered. """ result = HTTPRedirectHandler.http_error_301( self, req, fp, code, msg, headers) result.status = code return result
[docs] def http_error_302(self, req, fp, code, msg, headers): """ Wraps the :func:`urllib2.HTTPRedirectHandler.http_error_302` handler, setting the ``result.status`` to ``302`` in case a http ``302`` error is encountered. """ result = HTTPRedirectHandler.http_error_302( self, req, fp, code, msg, headers) result.status = code return result
[docs]def get_actual_url(url): # warnings.warn("get_actual_url() is a part of Redirect caching and is " # "deprecated.", DeprecationWarning) if not ENABLE_REDIRECT_CACHING: return url else: while url in redirect_cache.keys(): url = redirect_cache[url] return url
[docs]def _test_opener(openr): """ Tests an opener obtained using :func:`urllib2.build_opener` by attempting to open Google's homepage. This is used to test internet connectivity. """ try: openr.open('http://www.google.com', timeout=5) return True except URLError: return False
[docs]def _create_opener(): """ Creates an opener for the internet. It also attaches the :class:`CachingRedirectHandler` to the opener and sets its User-agent to ``Mozilla/5.0``. If the Network Proxy settings are set and recognized, it creates the opener and attaches the proxy_handler to it. The opener is tested and returned if the test passes. If the test fails an opener without the proxy settings is created instead and is returned instead. """ use_proxy = False proxy_handler = None if NETWORK_PROXY_TYPE == 'http': use_proxy = True proxyurl = _get_http_proxy_url() proxy_handler = ProxyHandler({'http': proxyurl, 'https': proxyurl}) if use_proxy: openr = build_opener(HTTPHandler(), HTTPSHandler(), proxy_handler, CachingRedirectHandler) else: openr = build_opener(HTTPSHandler(), HTTPSHandler(), CachingRedirectHandler) openr.addheaders = [('User-agent', 'Mozilla/5.0')] global _internet_connected _internet_connected = _test_opener(openr) return openr
opener = _create_opener()
[docs]def urlopen(url): """ Opens a url specified by the ``url`` parameter. This function handles redirect caching, if enabled. """ # warnings.warn("urlopen() is a part of the urllib2 based www " # "implementation and is deprecated.", DeprecationWarning) url = get_actual_url(url) try: page = opener.open(url) try: if ENABLE_REDIRECT_CACHING is True and page.status == 301: logger.debug('Detected New Permanent Redirect:\n' + url + '\n' + page.url) redirect_cache[url] = page.url except AttributeError: pass return page except HTTPError as e: logger.error("HTTP Error : {0} {1}".format(e.code, url)) raise except URLError as e: logger.error("URL Error : {0} {1}".format(e.errno, e.reason)) raise
[docs]class CacheBase(object): def __init__(self, cache_dir=WWW_CACHE): """ This class implements a simple filesystem cache which can be used to create and obtain from various cached requests from internet resources. The cache is stored in the folder defined by ``cache_dir``, with a filename constructed by the :func:`_get_filepath` function. If the cache's :func:`_accessor` function is called with the ``getcpath`` attribute set to True, only the path to a (valid) file in the cache filesystem is returned, and opening and reading the file is left to the caller. This hook is provided to help deal with file encoding on a somewhat case-by-case basis, until the overall encoding problems can be ironed out. """ self.cache_fs = fsopendir(cache_dir)
[docs] def _get_filepath(self, *args, **kwargs): """ Given the parameters necessary to obtain the resource in normal circumstances, return a hash which is usable as the filename for the resource in the cache. The filename must be unique for every resource, and filename generation must be deterministic and repeatable. Must be implemented in every subclass. """ raise NotImplementedError
[docs] def _get_fresh_content(self, *args, **kwargs): """ Given the parameters necessary to obtain the resource in normal circumstances, obtain the content of the resource from the source. Must be implemented in every subclass. """ raise NotImplementedError
@staticmethod
[docs] def _serialize(response): """ Given a response (as returned by :func:`_get_fresh_content`), convert it into a string which can be stored in a file. Use this function to serialize structured responses when needed. Unless overridden by the subclass, this function simply returns the response unaltered. The actions of this function should be reversed by :func:`_deserialize`. """ return response
@staticmethod
[docs] def _deserialize(filecontent): """ Given the contents of a cache file, reconstruct the original response in the original format (as returned by :func:`_get_fresh_content`). Use this function to deserialize cache files for structured responses when needed. Unless overridden by the subclass, this function simply returns the file content unaltered. The actions of this function should be reversed by :func:`_serialize`. """ return filecontent
[docs] def _cached_exists(self, filepath): return self.cache_fs.exists(filepath)
[docs] def _is_cache_fresh(self, filepath, max_age): """ Given the path to a file in the cache and the maximum age for the cache content to be considered fresh, returns (boolean) whether or not the cache contains a fresh copy of the response. :param filepath: Path to the filename in the cache corresponding to the request, as returned by :func:`_get_filepath`. :param max_age: Maximum age of fresh content, in seconds. """ if self._cached_exists(filepath): tn = int(time.time()) tc = int(time.mktime( self.cache_fs.getinfo(filepath)['modified_time'].timetuple()) ) if tn - tc < max_age: return True return False
[docs] def _accessor(self, max_age, getcpath=False, *args, **kwargs): """ The primary accessor for the cache instance. Each subclass should provide a function which behaves similarly to that of the original, un-cached version of the resource getter. That function should adapt the parameters provided to it into the form needed for this one, and let this function maintain the cached responses and handle retrieval of the response. If the module's :data:`_internet_connected` is set to False, the cached value is returned regardless. """ filepath = self._get_filepath(*args, **kwargs) send_cached = False if not _internet_connected and self._cached_exists(filepath): send_cached = True if self._is_cache_fresh(filepath, max_age): logger.debug("Cache HIT") send_cached = True if send_cached is True: if getcpath is False: try: filecontent = self.cache_fs.open(filepath, 'rb').read() return self._deserialize(filecontent) except UnicodeDecodeError: # TODO This requires the cache_fs to be a local # filesystem. This may not be very nice. A way # to hook codecs upto to pyfilesystems would be better with codecs.open( self.cache_fs.getsyspath(filepath), encoding='utf-8') as f: filecontent = f.read() return self._deserialize(filecontent) else: return self.cache_fs.getsyspath(filepath) logger.debug("Cache MISS") data = self._get_fresh_content(*args, **kwargs) try: sdata = self._serialize(data) fd, temppath = tempfile.mkstemp() fp = os.fdopen(fd, 'wb') fp.write(sdata) fp.close() logger.debug("Creating new cache entry") # This can be pretty expensive if the move is across a real # filesystem boundary. We should instead use a temporary file # in the cache_fs itself try: copyfile(temp_fs, temp_fs.unsyspath(temppath), self.cache_fs, filepath) except: logger.warning("Unable to write cache file " "{0}".format(filepath)) except: raise if getcpath is False: return data else: return self.cache_fs.getsyspath(filepath)
[docs]class WWWCachedFetcher(CacheBase): """ Subclass of :class:`CacheBase` to handle catching of url ``fetch`` responses. """
[docs] def _get_filepath(self, url): """ Return a filename constructed from the md5 sum of the url (encoded as ``utf-8`` if necessary). :param url: url of the resource to be cached :return: name of the cache file """ # Use MD5 hash of the URL as the filename if six.PY3 or (six.PY2 and isinstance(url, unicode)): filepath = md5(url.encode('utf-8')).hexdigest() else: filepath = md5(url).hexdigest() return filepath
[docs] def _get_fresh_content(self, url): """ Retrieve a fresh copy of the resource from the source. :param url: url of the resource :return: contents of the resource """ logger.debug('Getting url content : {0}'.format(url)) return urlopen(url).read()
[docs] def fetch(self, url, max_age=MAX_AGE_DEFAULT, getcpath=False): """ Return the content located at the ``url`` provided. If a fresh cached version exists, it is returned. If not, a fresh one is obtained, stored in the cache, and returned. :param url: url of the resource to retrieve. :param max_age: maximum age in seconds. :param getcpath: (default False) if True, returns only the path to the cache file. """ # warnings.warn( # "WWWCachedFetcher() is a part of the urllib2 based " # "www implementation and is deprecated.", # DeprecationWarning # ) return self._accessor(max_age, getcpath, url)
#: The module's :class:`WWWCachedFetcher` instance which should be #: used whenever cached results are desired. The cache is stored in #: the directory defined by :data:`tendril.utils.config.WWW_CACHE`. cached_fetcher = WWWCachedFetcher(cache_dir=WWW_CACHE)
[docs]def get_soup(url): """ Gets a :mod:`bs4` parsed soup for the ``url`` specified by the parameter. The :mod:`lxml` parser is used. This function returns a soup constructed of the cached page if one exists and is valid, or obtains one and dumps it into the cache if it doesn't. """ page = cached_fetcher.fetch(url) if page is None: return None soup = BeautifulSoup(page, 'lxml') return soup
[docs]def _get_proxy_dict(): """ Construct a dict containing the proxy settings in a format compatible with the :class:`requests.Session`. This function is used to construct the :data:`_proxy_dict`. """ if NETWORK_PROXY_TYPE == 'http': proxyurl = _get_http_proxy_url() return {'http': proxyurl, 'https': proxyurl} else: return None
#: A dict containing the proxy settings in a format compatible #: with the :class:`requests.Session`. _proxy_dict = _get_proxy_dict() #: The module's :class:`cachecontrol.caches.FileCache` instance which #: should be used whenever cached :mod:`requests` responses are desired. #: The cache is stored in the directory defined by #: :data:`tendril.utils.config.REQUESTS_CACHE`. requests_cache = FileCache(REQUESTS_CACHE)
[docs]def _get_requests_cache_adapter(heuristic): """ Given a heuristic, constructs and returns a :class:`cachecontrol.CacheControlAdapter` attached to the instance's :data:`requests_cache`. """ return CacheControlAdapter( cache=requests_cache, heuristic=heuristic, cache_etags=False )
[docs]def get_session(target='http://', heuristic=None): """ Gets a pre-configured :mod:`requests` session. This function configures the following behavior into the session : - Proxy settings are added to the session. - It is configured to use the instance's :data:`requests_cache`. - Permanent redirect caching is handled by :mod:`CacheControl`. - Temporary redirect caching is not supported. Each module / class instance which uses this should subsequently maintain it's own session with whatever modifications it requires within a scope which makes sense for the use case (and probably close it when it's done). The session returned from here uses the instance's REQUESTS_CACHE with a single - though configurable - heuristic. If additional caches or heuristics need to be added, it's the caller's problem to set them up. .. note:: The caching here seems to be pretty bad, particularly for digikey passive component search. I don't know why. :param target: Defaults to ``'http://'``. string containing a prefix for the targets that should be cached. Use this to setup site-specific heuristics. :param heuristic: The heuristic to use for the cache adapter. :type heuristic: :class:`cachecontrol.heuristics.BaseHeuristic` :rtype: :class:`requests.Session` """ s = requests.session() if _proxy_dict is not None: s.proxies.update(_proxy_dict) if heuristic is None: heuristic = ExpiresAfter(seconds=MAX_AGE_DEFAULT) s.mount(target, _get_requests_cache_adapter(heuristic)) return s
[docs]def get_soup_requests(url, session=None): """ Gets a :mod:`bs4` parsed soup for the ``url`` specified by the parameter. The :mod:`lxml` parser is used. If a ``session`` (previously created from :func:`get_session`) is provided, this session is used and left open. If it is not, a new session is created for the request and closed before the soup is returned. Using a caller-defined session allows re-use of a single session across multiple requests, therefore taking advantage of HTTP keep-alive to speed things up. It also provides a way for the caller to modify the cache heuristic, if needed. Any exceptions encountered will be raised, and are left for the caller to handle. The assumption is that a HTTP or URL error is going to make the soup unusable anyway. """ if session is None: session = get_session() _close_after = True else: _close_after = False r = session.get(url) r.raise_for_status() soup = BeautifulSoup(r.content, 'lxml', from_encoding=r.encoding) if _close_after is True: session.close() return soup
[docs]class ThrottledTransport(HttpAuthenticated): def __init__(self, **kwargs): """ Provides a throttled HTTP transport for respecting rate limits on rate-restricted SOAP APIs using :mod:`suds`. This class is a :class:`suds.transport.Transport` subclass based on the default ``HttpAuthenticated`` transport. :param minimum_spacing: Minimum number of seconds between requests. Default 0. :type minimum_spacing: int .. todo:: Use redis or so to coordinate between threads to allow a maximum requests per hour/day limit. """ self._minumum_spacing = kwargs.pop('minimum_spacing', 0) self._last_called = int(time.time()) HttpTransport.__init__(self, **kwargs)
[docs] def send(self, request): """ Send a request and return the response. If the minimum number of seconds between requests have not yet elapsed, then the function sleeps for the remaining period and then passes the request along. """ now = int(time.time()) logger.debug('Getting SOAP response') tsincelast = now - self._last_called if tsincelast < self._minumum_spacing: tleft = self._minumum_spacing - tsincelast logger.info("Throttling SOAP client for {0}".format(tleft)) time.sleep(tleft) self._last_called = now return HttpAuthenticated.send(self, request)
[docs]class CachedTransport(CacheBase, HttpAuthenticated): def __init__(self, **kwargs): """ Provides a cached HTTP transport with request-based caching for SOAP APIs using :mod:`suds`. This is a subclass of :class:`CacheBase` and the default ``HttpAuthenticated`` transport. :param cache_dir: folder where the cache is located. :param max_age: the maximum age in seconds after which a response is considered stale. """ cache_dir = kwargs.pop('cache_dir') self._max_age = kwargs.pop('max_age', MAX_AGE_DEFAULT) CacheBase.__init__(self, cache_dir=cache_dir)
[docs] def _get_filepath(self, request): """ Return a filename constructed from the md5 hash of a combination of the request URL and message content (encoded as ``utf-8`` if necessary). :param request: the request object for which a cache filename is needed. :return: name of the cache file. """ keystring = request.url + request.message if six.PY3 or (six.PY2 and isinstance(keystring, unicode)): filepath = md5(keystring.encode('utf-8')).hexdigest() else: filepath = md5(keystring).hexdigest() return filepath
[docs] def _get_fresh_content(self, request): """ Retrieve a fresh copy of the resource from the source. :param request: the request object for which the response is needed. :return: the response to the request """ response = HttpAuthenticated.send(self, request) return response
@staticmethod
[docs] def _serialize(response): """ Serializes the suds response object using :mod:`cPickle`. If the response has an error status (anything other than 200), raises ``ValueError``. This is used to avoid caching errored responses. """ if response.code != 200: logger.debug("Bad Status {0}".format(response.code)) raise ValueError return pickle.dumps(response)
@staticmethod
[docs] def _deserialize(filecontent): """ De-serializes the cache content into a suds response object using :mod:`cPickle`. """ return pickle.loads(filecontent)
[docs] def send(self, request): """ Send a request and return the response. If a fresh response to the request is available in the cache, that is returned instead. If it isn't, a fresh response is obtained, cached, and returned. """ response = self._accessor(self._max_age, False, request) return response
[docs]class CachedThrottledTransport(ThrottledTransport, CachedTransport): def __init__(self, **kwargs): """ A cached HTTP transport with both throttling and request-based caching for SOAP APIs using :mod:`suds`. This is a subclass of :class:`CachedTransport` and :class:`ThrottledTransport`. Keyword arguments not handled here are passed on via :class:`ThrottledTransport` to :class:`HttpTransport`. :param cache_dir: folder where the cache is located. :param max_age: the maximum age in seconds after which a response is considered stale. :param minimum_spacing: Minimum number of seconds between requests. Default 0. """ cache_dir = kwargs.pop('cache_dir') max_age = kwargs.pop('max_age', MAX_AGE_DEFAULT) CachedTransport.__init__(self, cache_dir=cache_dir, max_age=max_age) ThrottledTransport.__init__(self, **kwargs)
[docs] def _get_fresh_content(self, request): """ Retrieve a fresh copy of the resource from the source via :func:`ThrottledTransport.send`. :param request: the request object for which the response is needed. :return: the response to the request """ response = ThrottledTransport.send(self, request) return response
[docs] def send(self, request): """ Send a request and return the response, using :func:`CachedTransport.send`. """ return CachedTransport.send(self, request)
[docs]def get_soap_client(wsdl, cache_requests=True, max_age=MAX_AGE_DEFAULT, minimum_spacing=0): """ Creates and returns a suds/SOAP client instance bound to the provided ``WSDL``. If ``cache_requests`` is True, then the client is configured to use a :class:`CachedThrottledTransport`. The transport is constructed to use :data:`SOAP_CACHE` as the cache folder, along with the ``max_age`` and ``minimum_spacing`` parameters if provided. If ``cache_requests`` is ``False``, the client uses the default :class:`suds.transport.http.HttpAuthenticated` transport. """ if cache_requests is True: if _proxy_dict is None: soap_transport = CachedThrottledTransport( cache_dir=SOAP_CACHE, max_age=max_age, minimum_spacing=minimum_spacing, ) else: soap_transport = CachedThrottledTransport( cache_dir=SOAP_CACHE, max_age=max_age, minimum_spacing=minimum_spacing, proxy=_proxy_dict, ) else: if _proxy_dict is None: soap_transport = HttpAuthenticated() else: soap_transport = HttpAuthenticated(proxy=_proxy_dict) return Client(wsdl, transport=soap_transport)