recipe_modules/url/api.py - Issue 2868333004: Add URL recipe module from "depot_tools".

Side by Side Diff: recipe_modules/url/api.py

Issue 2868333004: Add URL recipe module from "depot_tools". (Closed)

Patch Set: Update comments with exceptions Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 # Copyright 2017 The LUCI Authors. All rights reserved.

	2 # Use of this source code is governed under the Apache License, Version 2.0

	3 # that can be found in the LICENSE file.

	4

	5 import urllib

	6 import urlparse

	7

	8 from recipe_engine import recipe_api

	9

	10

	11 class UrlApi(recipe_api.RecipeApi):

	12 quote = staticmethod(urllib.quote)

	13 urlencode = staticmethod(urllib.urlencode)

	14

	15 # JSON prefix used with Gerrit and Gitiles.

	16 GERRIT_JSON_PREFIX = ")]}'\n"

	17

	18 class HTTPError(recipe_api.StepFailure):

	19 def __init__(self, msg, response):

	20 super(UrlApi.HTTPError, self).__init__(msg)

	21 self.response = response

	22

	23

	24 class InfraHTTPError(recipe_api.InfraFailure):

	25 def __init__(self, msg, response):

	26 super(UrlApi.InfraHTTPError, self).__init__(msg)

	27 self.response = response

	28

	29

	30 class Response(object):

	31 """Response is an HTTP response object."""

	32

	33 def __init__(self, method, output, status, infra_step):

	34 self._method = method

	35 self._status = status

	36 self._output = output

	37 self._infra_step = infra_step

	38

	39 @property

	40 def method(self):

	41 """Returns (str): The HTTP method, currently always GET."""

	42 return self._method

	43

	44 @property

	45 def status_code(self):

	46 """Returns (int): The HTTP status code."""

	47 return self._status['status_code']

	48

	49 @property

	50 def output(self):

	51 """

	52 Returns:

	53 If JSON, the unmarshalled JSON response object.

	54 If text, the result as a text string.

	55 If file, the output Path.

	56 On error, will be None.

	57 """

	58 return self._output

	59

	60 @property

	61 def error_body(self):

	62 """Returns the HTTP body when an error was encountered.

	63

	64 Returns (str or None): The error body, or None if not an error.

	65 """

	66 return self._status.get('error_body')

	67

	68 @property

	69 def size(self):

	70 """Returns (int): The number of bytes in the HTTP response."""

	71 return self._status.get('size')

	72

	73 def raise_on_error(self):

	74 """Raises an exception if the HTTP operation was not successful.

	75

	76 Raises:

	77 UrlApi.HTTPError on HTTP failure, if not an infra step.

	78 UrlApi.InfraHTTPError on HTTP failure, if an infra step.

	79 """

	80 if not self._status['success']:

	81 cls = UrlApi.InfraHTTPError if self._infra_step else UrlApi.HTTPError

	82 raise cls('HTTP status (%d)' % (self.status_code,), self)

	83

	84

	85 def join(self, *parts):

	86 """Constructs a URL path from composite parts.

	87

	88 Args:

	89 parts (str...): Strings to concastenate. Any leading or trailing slashes

	90 will be stripped from intermediate strings to ensure that they join

	91 together. Trailing slashes will not be stripped from the last part.

	92 """

	93 if parts:

	94 parts = list(parts)

	95 if len(parts) > 1:

	96 for i, p in enumerate(parts[:-1]):

	97 parts[i] = p.strip('/')

	98 parts[-1] = parts[-1].lstrip('/')

	99 return '/'.join(parts)

	100

	101 def validate_url(self, v):

	102 """Validates that "v" is a valid URL.

	103

	104 A valid URL has a scheme and netloc, and must begin with HTTP or HTTPS.

	105

	106 Args:

	107 v (str): The URL to validate.

	108

	109 Returns (bool): True if the URL is considered secure, False if not.

	110

	111 Raises:

	112 ValueError: if "v" is not valid.

	113 """

	114 u = urlparse.urlparse(v)

	115 if u.scheme.lower() not in ('http', 'https'):

	116 raise ValueError('URL scheme must be either http:// or https://')

	117 if not u.netloc:

	118 raise ValueError('URL must specify a network location.')

	119 return u.scheme.lower() == 'https'

	120

	121 def get_file(self, url, path, step_name=None, headers=None,

	122 transient_retry=True, strip_prefix=None, timeout=None):

	123 """GET data at given URL and writes it to file.

	124

	125 Args:

	126 url: URL to request.

	127 path (Path): the Path where the content will be written.

	128 step_name: optional step name, 'fetch <url>' by default.

	129 headers: a {header_name: value} dictionary for HTTP headers.

	130 transient_retry (bool or int): Determines how transient HTTP errorts

	131 (>500) will be retried. If True (default), errors will be retried up

	132 to 10 times. If False, no transient retries will occur. If an integer

	133 is supplied, this is the number of transient retries to perform. All

	134 retries have exponential backoff applied.

	135 strip_prefix (str or None): If not None, this prefix must be present at

	136 the beginning of the response, and will be stripped from the resulting

	137 content (e.g., GERRIT_JSON_PREFIX).

	138 timeout: Timeout (see step.__call__).

	139

	140 Returns (UrlApi.Response): Response with "path" as its "output" value.

	141

	142 Raises:

	143 HTTPError, InfraHTTPError: if the request failed.

	144 ValueError: If the request was invalid.

	145 """

	146 return self._get_step(url, path, step_name, headers, transient_retry,

	147 strip_prefix, False, timeout)

	148

	149 def get_text(self, url, step_name=None, headers=None, transient_retry=True,

	150 timeout=None):

	151 """GET data at given URL and writes it to file.

	152

	153 Args:

	154 url: URL to request.

	155 step_name: optional step name, 'fetch <url>' by default.

	156 headers: a {header_name: value} dictionary for HTTP headers.

	157 transient_retry (bool or int): Determines how transient HTTP errorts

	158 (>500) will be retried. If True (default), errors will be retried up

	159 to 10 times. If False, no transient retries will occur. If an integer

	160 is supplied, this is the number of transient retries to perform. All

	161 retries have exponential backoff applied.

	162 timeout: Timeout (see step.__call__).

	163

	164 Returns (UrlApi.Response): Response with the content as its output value.

	165

	166 Raises:

	167 HTTPError, InfraHTTPError: if the request failed.

	168 ValueError: If the request was invalid.

	169 """

	170 return self._get_step(url, None, step_name, headers, transient_retry,

	171 None, False, timeout)

	172

	173 def get_json(self, url, step_name=None, headers=None, transient_retry=True,

	174 strip_prefix=None, log=False, timeout=None):

	175 """GET data at given URL and writes it to file.

	176

	177 Args:

	178 url: URL to request.

	179 step_name: optional step name, 'fetch <url>' by default.

	180 headers: a {header_name: value} dictionary for HTTP headers.

	181 transient_retry (bool or int): Determines how transient HTTP errorts

	182 (>500) will be retried. If True (default), errors will be retried up

	183 to 10 times. If False, no transient retries will occur. If an integer

	184 is supplied, this is the number of transient retries to perform. All

	185 retries have exponential backoff applied.

	186 strip_prefix (str or None): If not None, this prefix must be present at

	187 the beginning of the response, and will be stripped from the resulting

	188 content (e.g., GERRIT_JSON_PREFIX).

	189 log (bool): If True, emit the JSON content as a log.

	190 timeout: Timeout (see step.__call__).

	191

	192 Returns (UrlApi.Response): Response with the JSON as its "output" value.

	193

	194 Raises:

	195 HTTPError, InfraHTTPError: if the request failed.

	196 ValueError: If the request was invalid.

	197 """

	198 return self._get_step(url, None, step_name, headers, transient_retry,

	199 strip_prefix, 'log' if log else True, timeout)

	200

	201 def _get_step(self, url, path, step_name, headers, transient_retry,

	202 strip_prefix, as_json, timeout):

	203

	204 step_name = step_name or 'GET %s' % url

	205 is_secure = self.validate_url(url)

	206

	207 args = [

	208 '--url', url,

	209 '--status-json', self.m.json.output(add_json_log=False,

	210 name='status_json'),

	211 ]

	212

	213 if as_json:

	214 log = as_json == 'log'

	215 args += ['--outfile', self.m.json.output(add_json_log=log,

	216 name='output')]

	217 else:

	218 args += ['--outfile', self.m.raw_io.output_text(leak_to=path,

	219 name='output')]

	220

	221 if headers:

	222 has_authorization_header = any(k.lower() == 'authorization'

	223 for k in headers.iterkeys())

	224 if has_authorization_header and not is_secure:

	225 raise ValueError(

	226 'Refusing to send authorization header to insecure URL: %s' % (

	227 url,))

	228

	229 args += ['--headers-json', self.m.json.input(headers)]

	230 if strip_prefix:

	231 args += ['--strip-prefix', self.m.json.dumps(strip_prefix)]

	232

	233 assert isinstance(transient_retry, (bool, int, long))

	234 if transient_retry is False:

	235 args += ['--transient-retry', '0']

	236 elif transient_retry is not True:

	237 args += ['--transient-retry', str(transient_retry)]

	238

	239 result = self.m.python(

	240 step_name,

	241 self.resource('pycurl.py'),

	242 args=args,

	243 venv=True,

	244 timeout=timeout)

	245 status = result.json.outputs['status_json']

	246

	247 output = path

	248 if not output:

	249 if as_json:

	250 output = result.json.outputs['output']

	251 else:

	252 output = result.raw_io.output_texts['output']

	253

	254 response = self.Response('GET', output, status, self.m.context.infra_step)

	255 response.raise_on_error()

	256 return response

OLD	NEW

« no previous file with comments | « recipe_modules/url/__init__.py ('k') | recipe_modules/url/example.py » ('j') | no next file with comments »