Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright 2017 The LUCI Authors. All rights reserved. | |
| 2 # Use of this source code is governed under the Apache License, Version 2.0 | |
| 3 # that can be found in the LICENSE file. | |
| 4 | |
| 5 from recipe_engine import recipe_api | |
| 6 | |
| 7 import urllib | |
| 8 import urlparse | |
| 9 | |
| 10 | |
| 11 class UrlApi(recipe_api.RecipeApi): | |
| 12 quote = staticmethod(urllib.quote) | |
| 13 urlencode = staticmethod(urllib.urlencode) | |
| 14 | |
| 15 # JSON prefix used with Gerrit and Gitiles. | |
| 16 GERRIT_JSON_PREFIX = ')]}\n' | |
| 17 | |
| 18 | |
| 19 class HTTPError(recipe_api.StepFailure): | |
| 20 def __init__(self, msg, response): | |
| 21 super(UrlApi.HTTPError, self).__init__(msg) | |
| 22 self.response = response | |
| 23 | |
| 24 | |
| 25 class InfraHTTPError(recipe_api.InfraFailure): | |
| 26 def __init__(self, msg, response): | |
| 27 super(UrlApi.InfraHTTPError, self).__init__(msg) | |
| 28 self.response = response | |
| 29 | |
| 30 | |
| 31 class Response(object): | |
| 32 """Response is an HTTP response object.""" | |
| 33 | |
| 34 def __init__(self, method, output, status, infra_step): | |
| 35 self._method = method | |
| 36 self._status = status | |
| 37 self._output = output | |
| 38 self._infra_step = infra_step | |
| 39 | |
| 40 @property | |
| 41 def method(self): | |
| 42 """Returns (str): The HTTP method, currently always GET.""" | |
| 43 return self._method | |
| 44 | |
| 45 @property | |
| 46 def status_code(self): | |
| 47 """Returns (int): The HTTP status code.""" | |
| 48 return self._status['status_code'] | |
| 49 | |
| 50 @property | |
| 51 def output(self): | |
| 52 """ | |
| 53 Returns: | |
| 54 If JSON, the unmarshalled JSON response object. | |
| 55 If text, the result as a text string. | |
| 56 If file, the output Path. | |
| 57 On error, will be None. | |
| 58 """ | |
| 59 return self._output | |
| 60 | |
| 61 @property | |
| 62 def error_body(self): | |
| 63 """Returns the HTTP body when an error was encountered. | |
| 64 | |
| 65 Returns (str or None): The error body, or None if not an error. | |
| 66 """ | |
| 67 return self._status.get('error_body') | |
| 68 | |
| 69 @property | |
| 70 def size(self): | |
| 71 """Returns (int): The number of bytes in the HTTP response.""" | |
| 72 return self._status.get('size') | |
| 73 | |
| 74 def raise_on_error(self): | |
| 75 """Raises an exception if the HTTP operation was not successful. | |
| 76 | |
| 77 Raises: | |
| 78 UrlApi.HTTPError on HTTP failure, if not an infra step. | |
| 79 UrlApi.InfraHTTPError on HTTP failure, if an infra step. | |
| 80 """ | |
| 81 if not self._status['success']: | |
| 82 cls = UrlApi.InfraHTTPError if self._infra_step else UrlApi.HTTPError | |
| 83 raise cls('HTTP status (%d)' % (self.status_code,), self) | |
| 84 | |
| 85 | |
| 86 def join(self, *parts): | |
| 87 """Constructs a URL path from composite parts. | |
| 88 | |
| 89 Args: | |
| 90 parts (str...): Strings to concastenate. Any leading or trailing slashes | |
| 91 will be stripped from intermediate strings to ensure that they join | |
| 92 together. Trailing slashes will not be stripped from the last part. | |
| 93 """ | |
| 94 if parts: | |
| 95 parts = list(parts) | |
| 96 if len(parts) > 1: | |
| 97 for i, p in enumerate(parts[:-1]): | |
| 98 parts[i] = p.strip('/') | |
| 99 parts[-1] = parts[-1].lstrip('/') | |
| 100 return '/'.join(parts) | |
| 101 | |
| 102 def validate_url(self, v): | |
| 103 """Validates that "v" is a valid URL. | |
| 104 | |
| 105 A valid URL has a scheme and netloc, and must begin with HTTP or HTTPS. | |
| 106 | |
| 107 Args: | |
| 108 v (str): The URL to validate. | |
| 109 | |
| 110 Returns (bool): True if the URL is considered secure, False if not. | |
| 111 | |
| 112 Raises: | |
| 113 ValueError: if "v" is not valid. | |
| 114 """ | |
| 115 u = urlparse.urlparse(v) | |
| 116 if u.scheme.lower() not in ('http', 'https'): | |
| 117 raise ValueError('URL scheme must be either http:// or https://') | |
| 118 if not u.netloc: | |
| 119 raise ValueError('URL must specify a network location.') | |
| 120 return u.scheme.lower() == 'https' | |
| 121 | |
| 122 def get_file(self, url, path, step_name=None, headers=None, | |
| 123 transient_retry=True, strip_prefix=None, timeout=None): | |
| 124 """GET data at given URL and writes it to file. | |
| 125 | |
| 126 Args: | |
| 127 url: URL to request. | |
| 128 path (Path): the Path where the content will be written. | |
| 129 step_name: optional step name, 'fetch <url>' by default. | |
| 130 headers: a {header_name: value} dictionary for HTTP headers. | |
| 131 transient_retry (bool): If True (default), transient HTTP errors (>500) | |
| 132 will automatically be retried with exponential backoff. If False, | |
| 133 exactly one attempt will be made. | |
| 134 strip_prefix (str or None): If not None, this prefix must be present at | |
| 135 the beginning of the response, and will be stripped from the resulting | |
| 136 content (e.g., GERRIT_JSON_PREFIX). | |
| 137 timeout: Timeout (see step.__call__). | |
| 138 | |
| 139 Returns: | |
| 140 Response with "path" as its "output" value. | |
| 141 """ | |
| 142 return self._get_step(url, path, step_name, headers, transient_retry, | |
| 143 strip_prefix, False, timeout) | |
| 144 | |
| 145 def get_text(self, url, step_name=None, headers=None, transient_retry=True, | |
| 146 timeout=None): | |
| 147 """GET data at given URL and writes it to file. | |
| 148 | |
| 149 Args: | |
| 150 url: URL to request. | |
| 151 step_name: optional step name, 'fetch <url>' by default. | |
| 152 headers: a {header_name: value} dictionary for HTTP headers. | |
| 153 transient_retry (bool): If True (default), transient HTTP errors (>500) | |
| 154 will automatically be retried with exponential backoff. If False, | |
| 155 exactly one attempt will be made. | |
| 156 timeout: Timeout (see step.__call__). | |
| 157 | |
| 158 Returns: | |
| 159 Response with a string "output" value. | |
| 160 """ | |
| 161 return self._get_step(url, None, step_name, headers, transient_retry, | |
| 162 None, False, timeout) | |
| 163 | |
| 164 def get_json(self, url, step_name=None, headers=None, transient_retry=True, | |
| 165 strip_prefix=None, log=False, timeout=None): | |
| 166 """GET data at given URL and writes it to file. | |
| 167 | |
| 168 Args: | |
| 169 url: URL to request. | |
| 170 step_name: optional step name, 'fetch <url>' by default. | |
| 171 headers: a {header_name: value} dictionary for HTTP headers. | |
| 172 transient_retry (bool): If True (default), transient HTTP errors (>500) | |
| 173 will automatically be retried with exponential backoff. If False, | |
| 174 exactly one attempt will be made. | |
| 175 strip_prefix (str or None): If not None, this prefix must be present at | |
| 176 the beginning of the response, and will be stripped from the resulting | |
| 177 content (e.g., GERRIT_JSON_PREFIX). | |
| 178 log (bool): If True, emit the JSON content as a log. | |
| 179 timeout: Timeout (see step.__call__). | |
| 180 | |
| 181 Returns: | |
| 182 Response with JSON "output" value. | |
| 183 """ | |
| 184 return self._get_step(url, None, step_name, headers, transient_retry, | |
| 185 strip_prefix, 'log' if log else True, timeout) | |
| 186 | |
| 187 def _get_step(self, url, path, step_name, headers, transient_retry, | |
| 188 strip_prefix, as_json, timeout): | |
| 189 headers = headers or {} | |
|
iannucci
2017/05/12 07:52:58
not needed? just move the sekuritay check into the
dnj
2017/05/12 15:50:26
Done.
| |
| 190 | |
| 191 # Validate the request. | |
| 192 is_secure = self.validate_url(url) | |
| 193 if (any(k.lower() == 'authorization' for k in headers.iterkeys()) and | |
| 194 not is_secure): | |
| 195 raise ValueError( | |
| 196 'Refusing to send authorization header to insecure URL: %s' % (url,)) | |
| 197 | |
| 198 step_name = step_name or 'GET %s' % url | |
| 199 | |
| 200 args = [ | |
| 201 '--url', url, | |
| 202 '--status-json', self.m.json.output(add_json_log=False, | |
| 203 name='status_json'), | |
| 204 ] | |
| 205 | |
| 206 if as_json: | |
| 207 log = as_json == 'log' | |
| 208 args += ['--outfile', self.m.json.output(add_json_log=log, | |
| 209 name='output')] | |
| 210 else: | |
| 211 args += ['--outfile', self.m.raw_io.output_text(leak_to=path, | |
| 212 name='output')] | |
| 213 | |
| 214 if headers: | |
| 215 args += ['--headers-json', self.m.json.input(headers)] | |
| 216 if strip_prefix: | |
| 217 args += ['--strip-prefix', strip_prefix] | |
| 218 if not transient_retry: | |
| 219 args.append('--no-transient-retry') | |
| 220 | |
| 221 result = self.m.python( | |
| 222 step_name, | |
| 223 self.resource('pycurl.py'), | |
| 224 args=args, | |
| 225 venv=True, | |
| 226 timeout=timeout) | |
| 227 status = result.json.outputs['status_json'] | |
| 228 | |
| 229 output = path | |
| 230 if not output: | |
| 231 if as_json: | |
| 232 output = result.json.outputs['output'] | |
| 233 else: | |
| 234 output = result.raw_io.output_texts['output'] | |
| 235 | |
| 236 response = self.Response('GET', output, status, self.m.context.infra_step) | |
| 237 response.raise_on_error() | |
| 238 return response | |
| OLD | NEW |