OLD | NEW |
(Empty) | |
| 1 # Copyright 2017 The LUCI Authors. All rights reserved. |
| 2 # Use of this source code is governed under the Apache License, Version 2.0 |
| 3 # that can be found in the LICENSE file. |
| 4 |
| 5 import urllib |
| 6 import urlparse |
| 7 |
| 8 from recipe_engine import recipe_api |
| 9 |
| 10 |
| 11 class UrlApi(recipe_api.RecipeApi): |
| 12 quote = staticmethod(urllib.quote) |
| 13 urlencode = staticmethod(urllib.urlencode) |
| 14 |
| 15 # JSON prefix used with Gerrit and Gitiles. |
| 16 GERRIT_JSON_PREFIX = ")]}'\n" |
| 17 |
| 18 class HTTPError(recipe_api.StepFailure): |
| 19 def __init__(self, msg, response): |
| 20 super(UrlApi.HTTPError, self).__init__(msg) |
| 21 self.response = response |
| 22 |
| 23 |
| 24 class InfraHTTPError(recipe_api.InfraFailure): |
| 25 def __init__(self, msg, response): |
| 26 super(UrlApi.InfraHTTPError, self).__init__(msg) |
| 27 self.response = response |
| 28 |
| 29 |
| 30 class Response(object): |
| 31 """Response is an HTTP response object.""" |
| 32 |
| 33 def __init__(self, method, output, status, infra_step): |
| 34 self._method = method |
| 35 self._status = status |
| 36 self._output = output |
| 37 self._infra_step = infra_step |
| 38 |
| 39 @property |
| 40 def method(self): |
| 41 """Returns (str): The HTTP method, currently always GET.""" |
| 42 return self._method |
| 43 |
| 44 @property |
| 45 def status_code(self): |
| 46 """Returns (int): The HTTP status code.""" |
| 47 return self._status['status_code'] |
| 48 |
| 49 @property |
| 50 def output(self): |
| 51 """ |
| 52 Returns: |
| 53 If JSON, the unmarshalled JSON response object. |
| 54 If text, the result as a text string. |
| 55 If file, the output Path. |
| 56 On error, will be None. |
| 57 """ |
| 58 return self._output |
| 59 |
| 60 @property |
| 61 def error_body(self): |
| 62 """Returns the HTTP body when an error was encountered. |
| 63 |
| 64 Returns (str or None): The error body, or None if not an error. |
| 65 """ |
| 66 return self._status.get('error_body') |
| 67 |
| 68 @property |
| 69 def size(self): |
| 70 """Returns (int): The number of bytes in the HTTP response.""" |
| 71 return self._status.get('size') |
| 72 |
| 73 def raise_on_error(self): |
| 74 """Raises an exception if the HTTP operation was not successful. |
| 75 |
| 76 Raises: |
| 77 UrlApi.HTTPError on HTTP failure, if not an infra step. |
| 78 UrlApi.InfraHTTPError on HTTP failure, if an infra step. |
| 79 """ |
| 80 if not self._status['success']: |
| 81 cls = UrlApi.InfraHTTPError if self._infra_step else UrlApi.HTTPError |
| 82 raise cls('HTTP status (%d)' % (self.status_code,), self) |
| 83 |
| 84 |
| 85 def join(self, *parts): |
| 86 """Constructs a URL path from composite parts. |
| 87 |
| 88 Args: |
| 89 parts (str...): Strings to concastenate. Any leading or trailing slashes |
| 90 will be stripped from intermediate strings to ensure that they join |
| 91 together. Trailing slashes will not be stripped from the last part. |
| 92 """ |
| 93 if parts: |
| 94 parts = list(parts) |
| 95 if len(parts) > 1: |
| 96 for i, p in enumerate(parts[:-1]): |
| 97 parts[i] = p.strip('/') |
| 98 parts[-1] = parts[-1].lstrip('/') |
| 99 return '/'.join(parts) |
| 100 |
| 101 def validate_url(self, v): |
| 102 """Validates that "v" is a valid URL. |
| 103 |
| 104 A valid URL has a scheme and netloc, and must begin with HTTP or HTTPS. |
| 105 |
| 106 Args: |
| 107 v (str): The URL to validate. |
| 108 |
| 109 Returns (bool): True if the URL is considered secure, False if not. |
| 110 |
| 111 Raises: |
| 112 ValueError: if "v" is not valid. |
| 113 """ |
| 114 u = urlparse.urlparse(v) |
| 115 if u.scheme.lower() not in ('http', 'https'): |
| 116 raise ValueError('URL scheme must be either http:// or https://') |
| 117 if not u.netloc: |
| 118 raise ValueError('URL must specify a network location.') |
| 119 return u.scheme.lower() == 'https' |
| 120 |
| 121 def get_file(self, url, path, step_name=None, headers=None, |
| 122 transient_retry=True, strip_prefix=None, timeout=None): |
| 123 """GET data at given URL and writes it to file. |
| 124 |
| 125 Args: |
| 126 url: URL to request. |
| 127 path (Path): the Path where the content will be written. |
| 128 step_name: optional step name, 'fetch <url>' by default. |
| 129 headers: a {header_name: value} dictionary for HTTP headers. |
| 130 transient_retry (bool or int): Determines how transient HTTP errorts |
| 131 (>500) will be retried. If True (default), errors will be retried up |
| 132 to 10 times. If False, no transient retries will occur. If an integer |
| 133 is supplied, this is the number of transient retries to perform. All |
| 134 retries have exponential backoff applied. |
| 135 strip_prefix (str or None): If not None, this prefix must be present at |
| 136 the beginning of the response, and will be stripped from the resulting |
| 137 content (e.g., GERRIT_JSON_PREFIX). |
| 138 timeout: Timeout (see step.__call__). |
| 139 |
| 140 Returns (UrlApi.Response): Response with "path" as its "output" value. |
| 141 |
| 142 Raises: |
| 143 HTTPError, InfraHTTPError: if the request failed. |
| 144 ValueError: If the request was invalid. |
| 145 """ |
| 146 return self._get_step(url, path, step_name, headers, transient_retry, |
| 147 strip_prefix, False, timeout) |
| 148 |
| 149 def get_text(self, url, step_name=None, headers=None, transient_retry=True, |
| 150 timeout=None): |
| 151 """GET data at given URL and writes it to file. |
| 152 |
| 153 Args: |
| 154 url: URL to request. |
| 155 step_name: optional step name, 'fetch <url>' by default. |
| 156 headers: a {header_name: value} dictionary for HTTP headers. |
| 157 transient_retry (bool or int): Determines how transient HTTP errorts |
| 158 (>500) will be retried. If True (default), errors will be retried up |
| 159 to 10 times. If False, no transient retries will occur. If an integer |
| 160 is supplied, this is the number of transient retries to perform. All |
| 161 retries have exponential backoff applied. |
| 162 timeout: Timeout (see step.__call__). |
| 163 |
| 164 Returns (UrlApi.Response): Response with the content as its output value. |
| 165 |
| 166 Raises: |
| 167 HTTPError, InfraHTTPError: if the request failed. |
| 168 ValueError: If the request was invalid. |
| 169 """ |
| 170 return self._get_step(url, None, step_name, headers, transient_retry, |
| 171 None, False, timeout) |
| 172 |
| 173 def get_json(self, url, step_name=None, headers=None, transient_retry=True, |
| 174 strip_prefix=None, log=False, timeout=None): |
| 175 """GET data at given URL and writes it to file. |
| 176 |
| 177 Args: |
| 178 url: URL to request. |
| 179 step_name: optional step name, 'fetch <url>' by default. |
| 180 headers: a {header_name: value} dictionary for HTTP headers. |
| 181 transient_retry (bool or int): Determines how transient HTTP errorts |
| 182 (>500) will be retried. If True (default), errors will be retried up |
| 183 to 10 times. If False, no transient retries will occur. If an integer |
| 184 is supplied, this is the number of transient retries to perform. All |
| 185 retries have exponential backoff applied. |
| 186 strip_prefix (str or None): If not None, this prefix must be present at |
| 187 the beginning of the response, and will be stripped from the resulting |
| 188 content (e.g., GERRIT_JSON_PREFIX). |
| 189 log (bool): If True, emit the JSON content as a log. |
| 190 timeout: Timeout (see step.__call__). |
| 191 |
| 192 Returns (UrlApi.Response): Response with the JSON as its "output" value. |
| 193 |
| 194 Raises: |
| 195 HTTPError, InfraHTTPError: if the request failed. |
| 196 ValueError: If the request was invalid. |
| 197 """ |
| 198 return self._get_step(url, None, step_name, headers, transient_retry, |
| 199 strip_prefix, 'log' if log else True, timeout) |
| 200 |
| 201 def _get_step(self, url, path, step_name, headers, transient_retry, |
| 202 strip_prefix, as_json, timeout): |
| 203 |
| 204 step_name = step_name or 'GET %s' % url |
| 205 is_secure = self.validate_url(url) |
| 206 |
| 207 args = [ |
| 208 '--url', url, |
| 209 '--status-json', self.m.json.output(add_json_log=False, |
| 210 name='status_json'), |
| 211 ] |
| 212 |
| 213 if as_json: |
| 214 log = as_json == 'log' |
| 215 args += ['--outfile', self.m.json.output(add_json_log=log, |
| 216 name='output')] |
| 217 else: |
| 218 args += ['--outfile', self.m.raw_io.output_text(leak_to=path, |
| 219 name='output')] |
| 220 |
| 221 if headers: |
| 222 has_authorization_header = any(k.lower() == 'authorization' |
| 223 for k in headers.iterkeys()) |
| 224 if has_authorization_header and not is_secure: |
| 225 raise ValueError( |
| 226 'Refusing to send authorization header to insecure URL: %s' % ( |
| 227 url,)) |
| 228 |
| 229 args += ['--headers-json', self.m.json.input(headers)] |
| 230 if strip_prefix: |
| 231 args += ['--strip-prefix', self.m.json.dumps(strip_prefix)] |
| 232 |
| 233 assert isinstance(transient_retry, (bool, int, long)) |
| 234 if transient_retry is False: |
| 235 args += ['--transient-retry', '0'] |
| 236 elif transient_retry is not True: |
| 237 args += ['--transient-retry', str(transient_retry)] |
| 238 |
| 239 result = self.m.python( |
| 240 step_name, |
| 241 self.resource('pycurl.py'), |
| 242 args=args, |
| 243 venv=True, |
| 244 timeout=timeout) |
| 245 status = result.json.outputs['status_json'] |
| 246 |
| 247 output = path |
| 248 if not output: |
| 249 if as_json: |
| 250 output = result.json.outputs['output'] |
| 251 else: |
| 252 output = result.raw_io.output_texts['output'] |
| 253 |
| 254 response = self.Response('GET', output, status, self.m.context.infra_step) |
| 255 response.raise_on_error() |
| 256 return response |
OLD | NEW |