Package ndg :: Package xacml :: Package utils :: Module urlfetcher
[hide private]

Source Code for Module ndg.xacml.utils.urlfetcher

  1  """NDG XACML data fetch by URL utility 
  2   
  3  NERC DataGrid 
  4  """ 
  5  __author__ = "R B Wilkinson" 
  6  __date__ = "03/11/11" 
  7  __copyright__ = "(C) 2011 Science and Technology Facilities Council" 
  8  __contact__ = "Philip.Kershaw@stfc.ac.uk" 
  9  __license__ = "BSD - see LICENSE file in top-level directory" 
 10  __contact__ = "Philip.Kershaw@stfc.ac.uk" 
 11  __revision__ = "$Id$" 
 12  import logging 
 13  import os 
 14  import urllib2 
 15  import urlparse 
 16   
 17  log = logging.getLogger(__name__) 
 18   
19 -def fetch_stream_from_url(url, debug=False):
20 """Returns data retrieved from a URL. 21 @param url: URL to attempt to open 22 @type: str 23 @param debug: debug flag for urllib2 24 @type: bool 25 @return: data retrieved from URL or None 26 @rtype: file derived type 27 """ 28 response = open_url(url, debug) 29 return response
30
31 -def fetch_data_from_url(url, debug=False):
32 """Returns data retrieved from a URL. 33 @param url: URL to attempt to open 34 @type: str 35 @param debug: debug flag for urllib2 36 @type: bool 37 @return: data retrieved from URL or None 38 @rtype: str 39 """ 40 response = open_url(url, debug) 41 return_data = response.read() 42 response.close() 43 return return_data
44
45 -def open_url(url, debug=False):
46 """Attempts to open a connection to a specified URL. 47 @param url: URL to attempt to open 48 @type: str 49 @param debug: debug flag for urllib2 50 @type: bool 51 @return: tuple ( 52 @rtype: tuple ( 53 int: returned HTTP status code or 0 if an error occurred 54 str: returned message or error description 55 file-like: response object 56 ) 57 """ 58 debuglevel = 1 if debug else 0 59 60 # Set up handlers for URL opener. 61 http_handler = urllib2.HTTPHandler(debuglevel=debuglevel) 62 63 handlers = [http_handler] 64 65 # Explicitly remove proxy handling if the host is one listed in the value of 66 # the no_proxy environment variable because urllib2 does use proxy settings 67 # set via http_proxy and https_proxy, but does not take the no_proxy value 68 # into account. 69 if not _should_use_proxy(url): 70 handlers.append(urllib2.ProxyHandler({})) 71 log.debug("Not using proxy") 72 73 opener = urllib2.build_opener(*handlers) 74 75 # Open the URL and check the response. 76 try: 77 response = opener.open(url) 78 except urllib2.HTTPError, exc: 79 # Re-raise as simple exception 80 raise Exception(exc.__str__()) 81 return response
82
83 -def _should_use_proxy(url):
84 """Determines whether a proxy should be used to open a connection to the 85 specified URL, based on the value of the no_proxy environment variable. 86 @param url: URL 87 @type: str 88 @return: flag indicating whether proxy should be used 89 @rtype: bool 90 """ 91 no_proxy = os.environ.get('no_proxy', '') 92 93 urlObj = urlparse.urlparse(url) 94 for np in [h.strip() for h in no_proxy.split(',')]: 95 if urlObj.hostname == np: 96 return False 97 98 return True
99