Index: recipe_modules/url/api.py |
diff --git a/recipe_modules/url/api.py b/recipe_modules/url/api.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..0f7d238b6c1d93472e5487aba155635dc0b044a3 |
--- /dev/null |
+++ b/recipe_modules/url/api.py |
@@ -0,0 +1,256 @@ |
+# Copyright 2017 The LUCI Authors. All rights reserved. |
+# Use of this source code is governed under the Apache License, Version 2.0 |
+# that can be found in the LICENSE file. |
+ |
+import urllib |
+import urlparse |
+ |
+from recipe_engine import recipe_api |
+ |
+ |
+class UrlApi(recipe_api.RecipeApi): |
+ quote = staticmethod(urllib.quote) |
+ urlencode = staticmethod(urllib.urlencode) |
+ |
+ # JSON prefix used with Gerrit and Gitiles. |
+ GERRIT_JSON_PREFIX = ")]}'\n" |
+ |
+ class HTTPError(recipe_api.StepFailure): |
+ def __init__(self, msg, response): |
+ super(UrlApi.HTTPError, self).__init__(msg) |
+ self.response = response |
+ |
+ |
+ class InfraHTTPError(recipe_api.InfraFailure): |
+ def __init__(self, msg, response): |
+ super(UrlApi.InfraHTTPError, self).__init__(msg) |
+ self.response = response |
+ |
+ |
+ class Response(object): |
+ """Response is an HTTP response object.""" |
+ |
+ def __init__(self, method, output, status, infra_step): |
+ self._method = method |
+ self._status = status |
+ self._output = output |
+ self._infra_step = infra_step |
+ |
+ @property |
+ def method(self): |
+ """Returns (str): The HTTP method, currently always GET.""" |
+ return self._method |
+ |
+ @property |
+ def status_code(self): |
+ """Returns (int): The HTTP status code.""" |
+ return self._status['status_code'] |
+ |
+ @property |
+ def output(self): |
+ """ |
+ Returns: |
+ If JSON, the unmarshalled JSON response object. |
+ If text, the result as a text string. |
+ If file, the output Path. |
+ On error, will be None. |
+ """ |
+ return self._output |
+ |
+ @property |
+ def error_body(self): |
+ """Returns the HTTP body when an error was encountered. |
+ |
+ Returns (str or None): The error body, or None if not an error. |
+ """ |
+ return self._status.get('error_body') |
+ |
+ @property |
+ def size(self): |
+ """Returns (int): The number of bytes in the HTTP response.""" |
+ return self._status.get('size') |
+ |
+ def raise_on_error(self): |
+ """Raises an exception if the HTTP operation was not successful. |
+ |
+ Raises: |
+ UrlApi.HTTPError on HTTP failure, if not an infra step. |
+ UrlApi.InfraHTTPError on HTTP failure, if an infra step. |
+ """ |
+ if not self._status['success']: |
+ cls = UrlApi.InfraHTTPError if self._infra_step else UrlApi.HTTPError |
+ raise cls('HTTP status (%d)' % (self.status_code,), self) |
+ |
+ |
+ def join(self, *parts): |
+ """Constructs a URL path from composite parts. |
+ |
+ Args: |
+ parts (str...): Strings to concastenate. Any leading or trailing slashes |
+ will be stripped from intermediate strings to ensure that they join |
+ together. Trailing slashes will not be stripped from the last part. |
+ """ |
+ if parts: |
+ parts = list(parts) |
+ if len(parts) > 1: |
+ for i, p in enumerate(parts[:-1]): |
+ parts[i] = p.strip('/') |
+ parts[-1] = parts[-1].lstrip('/') |
+ return '/'.join(parts) |
+ |
+ def validate_url(self, v): |
+ """Validates that "v" is a valid URL. |
+ |
+ A valid URL has a scheme and netloc, and must begin with HTTP or HTTPS. |
+ |
+ Args: |
+ v (str): The URL to validate. |
+ |
+ Returns (bool): True if the URL is considered secure, False if not. |
+ |
+ Raises: |
+ ValueError: if "v" is not valid. |
+ """ |
+ u = urlparse.urlparse(v) |
+ if u.scheme.lower() not in ('http', 'https'): |
+ raise ValueError('URL scheme must be either http:// or https://') |
+ if not u.netloc: |
+ raise ValueError('URL must specify a network location.') |
+ return u.scheme.lower() == 'https' |
+ |
+ def get_file(self, url, path, step_name=None, headers=None, |
+ transient_retry=True, strip_prefix=None, timeout=None): |
+ """GET data at given URL and writes it to file. |
+ |
+ Args: |
+ url: URL to request. |
+ path (Path): the Path where the content will be written. |
+ step_name: optional step name, 'fetch <url>' by default. |
+ headers: a {header_name: value} dictionary for HTTP headers. |
+ transient_retry (bool or int): Determines how transient HTTP errorts |
+ (>500) will be retried. If True (default), errors will be retried up |
+ to 10 times. If False, no transient retries will occur. If an integer |
+ is supplied, this is the number of transient retries to perform. All |
+ retries have exponential backoff applied. |
+ strip_prefix (str or None): If not None, this prefix must be present at |
+ the beginning of the response, and will be stripped from the resulting |
+ content (e.g., GERRIT_JSON_PREFIX). |
+ timeout: Timeout (see step.__call__). |
+ |
+ Returns (UrlApi.Response): Response with "path" as its "output" value. |
+ |
+ Raises: |
+ HTTPError, InfraHTTPError: if the request failed. |
+ ValueError: If the request was invalid. |
+ """ |
+ return self._get_step(url, path, step_name, headers, transient_retry, |
+ strip_prefix, False, timeout) |
+ |
+ def get_text(self, url, step_name=None, headers=None, transient_retry=True, |
+ timeout=None): |
+ """GET data at given URL and writes it to file. |
+ |
+ Args: |
+ url: URL to request. |
+ step_name: optional step name, 'fetch <url>' by default. |
+ headers: a {header_name: value} dictionary for HTTP headers. |
+ transient_retry (bool or int): Determines how transient HTTP errorts |
+ (>500) will be retried. If True (default), errors will be retried up |
+ to 10 times. If False, no transient retries will occur. If an integer |
+ is supplied, this is the number of transient retries to perform. All |
+ retries have exponential backoff applied. |
+ timeout: Timeout (see step.__call__). |
+ |
+ Returns (UrlApi.Response): Response with the content as its output value. |
+ |
+ Raises: |
+ HTTPError, InfraHTTPError: if the request failed. |
+ ValueError: If the request was invalid. |
+ """ |
+ return self._get_step(url, None, step_name, headers, transient_retry, |
+ None, False, timeout) |
+ |
+ def get_json(self, url, step_name=None, headers=None, transient_retry=True, |
+ strip_prefix=None, log=False, timeout=None): |
+ """GET data at given URL and writes it to file. |
+ |
+ Args: |
+ url: URL to request. |
+ step_name: optional step name, 'fetch <url>' by default. |
+ headers: a {header_name: value} dictionary for HTTP headers. |
+ transient_retry (bool or int): Determines how transient HTTP errorts |
+ (>500) will be retried. If True (default), errors will be retried up |
+ to 10 times. If False, no transient retries will occur. If an integer |
+ is supplied, this is the number of transient retries to perform. All |
+ retries have exponential backoff applied. |
+ strip_prefix (str or None): If not None, this prefix must be present at |
+ the beginning of the response, and will be stripped from the resulting |
+ content (e.g., GERRIT_JSON_PREFIX). |
+ log (bool): If True, emit the JSON content as a log. |
+ timeout: Timeout (see step.__call__). |
+ |
+ Returns (UrlApi.Response): Response with the JSON as its "output" value. |
+ |
+ Raises: |
+ HTTPError, InfraHTTPError: if the request failed. |
+ ValueError: If the request was invalid. |
+ """ |
+ return self._get_step(url, None, step_name, headers, transient_retry, |
+ strip_prefix, 'log' if log else True, timeout) |
+ |
+ def _get_step(self, url, path, step_name, headers, transient_retry, |
+ strip_prefix, as_json, timeout): |
+ |
+ step_name = step_name or 'GET %s' % url |
+ is_secure = self.validate_url(url) |
+ |
+ args = [ |
+ '--url', url, |
+ '--status-json', self.m.json.output(add_json_log=False, |
+ name='status_json'), |
+ ] |
+ |
+ if as_json: |
+ log = as_json == 'log' |
+ args += ['--outfile', self.m.json.output(add_json_log=log, |
+ name='output')] |
+ else: |
+ args += ['--outfile', self.m.raw_io.output_text(leak_to=path, |
+ name='output')] |
+ |
+ if headers: |
+ has_authorization_header = any(k.lower() == 'authorization' |
+ for k in headers.iterkeys()) |
+ if has_authorization_header and not is_secure: |
+ raise ValueError( |
+ 'Refusing to send authorization header to insecure URL: %s' % ( |
+ url,)) |
+ |
+ args += ['--headers-json', self.m.json.input(headers)] |
+ if strip_prefix: |
+ args += ['--strip-prefix', self.m.json.dumps(strip_prefix)] |
+ |
+ assert isinstance(transient_retry, (bool, int, long)) |
+ if transient_retry is False: |
+ args += ['--transient-retry', '0'] |
+ elif transient_retry is not True: |
+ args += ['--transient-retry', str(transient_retry)] |
+ |
+ result = self.m.python( |
+ step_name, |
+ self.resource('pycurl.py'), |
+ args=args, |
+ venv=True, |
+ timeout=timeout) |
+ status = result.json.outputs['status_json'] |
+ |
+ output = path |
+ if not output: |
+ if as_json: |
+ output = result.json.outputs['output'] |
+ else: |
+ output = result.raw_io.output_texts['output'] |
+ |
+ response = self.Response('GET', output, status, self.m.context.infra_step) |
+ response.raise_on_error() |
+ return response |