OLD | NEW |
---|---|
(Empty) | |
1 # Copyright 2017 The LUCI Authors. All rights reserved. | |
2 # Use of this source code is governed under the Apache License, Version 2.0 | |
3 # that can be found in the LICENSE file. | |
4 | |
5 from recipe_engine import recipe_api | |
6 | |
7 import urllib | |
8 import urlparse | |
9 | |
10 | |
11 class UrlApi(recipe_api.RecipeApi): | |
12 quote = staticmethod(urllib.quote) | |
13 urlencode = staticmethod(urllib.urlencode) | |
14 | |
15 # JSON prefix used with Gerrit and Gitiles. | |
16 GERRIT_JSON_PREFIX = ')]}\n' | |
17 | |
18 | |
19 class HTTPError(recipe_api.StepFailure): | |
20 def __init__(self, msg, response): | |
21 super(UrlApi.HTTPError, self).__init__(msg) | |
22 self.response = response | |
23 | |
24 | |
25 class InfraHTTPError(recipe_api.InfraFailure): | |
26 def __init__(self, msg, response): | |
27 super(UrlApi.InfraHTTPError, self).__init__(msg) | |
28 self.response = response | |
29 | |
30 | |
31 class Response(object): | |
32 """Response is an HTTP response object.""" | |
33 | |
34 def __init__(self, method, output, status, infra_step): | |
35 self._method = method | |
36 self._status = status | |
37 self._output = output | |
38 self._infra_step = infra_step | |
39 | |
40 @property | |
41 def method(self): | |
42 """Returns (str): The HTTP method, currently always GET.""" | |
43 return self._method | |
44 | |
45 @property | |
46 def status_code(self): | |
47 """Returns (int): The HTTP status code.""" | |
48 return self._status['status_code'] | |
49 | |
50 @property | |
51 def output(self): | |
52 """ | |
53 Returns: | |
54 If JSON, the unmarshalled JSON response object. | |
55 If text, the result as a text string. | |
56 If file, the output Path. | |
57 On error, will be None. | |
58 """ | |
59 return self._output | |
60 | |
61 @property | |
62 def error_body(self): | |
63 """Returns the HTTP body when an error was encountered. | |
64 | |
65 Returns (str or None): The error body, or None if not an error. | |
66 """ | |
67 return self._status.get('error_body') | |
68 | |
69 @property | |
70 def size(self): | |
71 """Returns (int): The number of bytes in the HTTP response.""" | |
72 return self._status.get('size') | |
73 | |
74 def raise_on_error(self): | |
75 """Raises an exception if the HTTP operation was not successful. | |
76 | |
77 Raises: | |
78 UrlApi.HTTPError on HTTP failure, if not an infra step. | |
79 UrlApi.InfraHTTPError on HTTP failure, if an infra step. | |
80 """ | |
81 if not self._status['success']: | |
82 cls = UrlApi.InfraHTTPError if self._infra_step else UrlApi.HTTPError | |
83 raise cls('HTTP status (%d)' % (self.status_code,), self) | |
84 | |
85 | |
86 def join(self, *parts): | |
87 """Constructs a URL path from composite parts. | |
88 | |
89 Args: | |
90 parts (str...): Strings to concastenate. Any leading or trailing slashes | |
91 will be stripped from intermediate strings to ensure that they join | |
92 together. Trailing slashes will not be stripped from the last part. | |
93 """ | |
94 if parts: | |
95 parts = list(parts) | |
96 if len(parts) > 1: | |
97 for i, p in enumerate(parts[:-1]): | |
98 parts[i] = p.strip('/') | |
99 parts[-1] = parts[-1].lstrip('/') | |
100 return '/'.join(parts) | |
101 | |
102 def validate_url(self, v): | |
103 """Validates that "v" is a valid URL. | |
104 | |
105 A valid URL has a scheme and netloc, and must begin with HTTP or HTTPS. | |
106 | |
107 Args: | |
108 v (str): The URL to validate. | |
109 | |
110 Returns (bool): True if the URL is considered secure, False if not. | |
111 | |
112 Raises: | |
113 ValueError: if "v" is not valid. | |
114 """ | |
115 u = urlparse.urlparse(v) | |
116 if u.scheme.lower() not in ('http', 'https'): | |
117 raise ValueError('URL scheme must be either http:// or https://') | |
118 if not u.netloc: | |
119 raise ValueError('URL must specify a network location.') | |
120 return u.scheme.lower() == 'https' | |
121 | |
122 def get_file(self, url, path, step_name=None, headers=None, | |
123 transient_retry=True, strip_prefix=None, timeout=None): | |
124 """GET data at given URL and writes it to file. | |
125 | |
126 Args: | |
127 url: URL to request. | |
128 path (Path): the Path where the content will be written. | |
129 step_name: optional step name, 'fetch <url>' by default. | |
130 headers: a {header_name: value} dictionary for HTTP headers. | |
131 transient_retry (bool): If True (default), transient HTTP errors (>500) | |
132 will automatically be retried with exponential backoff. If False, | |
133 exactly one attempt will be made. | |
134 strip_prefix (str or None): If not None, this prefix must be present at | |
135 the beginning of the response, and will be stripped from the resulting | |
136 content (e.g., GERRIT_JSON_PREFIX). | |
137 timeout: Timeout (see step.__call__). | |
138 | |
139 Returns: | |
140 Response with "path" as its "output" value. | |
141 """ | |
142 return self._get_step(url, path, step_name, headers, transient_retry, | |
143 strip_prefix, False, timeout) | |
144 | |
145 def get_text(self, url, step_name=None, headers=None, transient_retry=True, | |
146 timeout=None): | |
147 """GET data at given URL and writes it to file. | |
148 | |
149 Args: | |
150 url: URL to request. | |
151 step_name: optional step name, 'fetch <url>' by default. | |
152 headers: a {header_name: value} dictionary for HTTP headers. | |
153 transient_retry (bool): If True (default), transient HTTP errors (>500) | |
154 will automatically be retried with exponential backoff. If False, | |
155 exactly one attempt will be made. | |
156 timeout: Timeout (see step.__call__). | |
157 | |
158 Returns: | |
159 Response with a string "output" value. | |
160 """ | |
161 return self._get_step(url, None, step_name, headers, transient_retry, | |
162 None, False, timeout) | |
163 | |
164 def get_json(self, url, step_name=None, headers=None, transient_retry=True, | |
165 strip_prefix=None, log=False, timeout=None): | |
166 """GET data at given URL and writes it to file. | |
167 | |
168 Args: | |
169 url: URL to request. | |
170 step_name: optional step name, 'fetch <url>' by default. | |
171 headers: a {header_name: value} dictionary for HTTP headers. | |
172 transient_retry (bool): If True (default), transient HTTP errors (>500) | |
173 will automatically be retried with exponential backoff. If False, | |
174 exactly one attempt will be made. | |
175 strip_prefix (str or None): If not None, this prefix must be present at | |
176 the beginning of the response, and will be stripped from the resulting | |
177 content (e.g., GERRIT_JSON_PREFIX). | |
178 log (bool): If True, emit the JSON content as a log. | |
179 timeout: Timeout (see step.__call__). | |
180 | |
181 Returns: | |
182 Response with JSON "output" value. | |
183 """ | |
184 return self._get_step(url, None, step_name, headers, transient_retry, | |
185 strip_prefix, 'log' if log else True, timeout) | |
186 | |
187 def _get_step(self, url, path, step_name, headers, transient_retry, | |
188 strip_prefix, as_json, timeout): | |
189 headers = headers or {} | |
iannucci
2017/05/12 07:52:58
not needed? just move the sekuritay check into the
dnj
2017/05/12 15:50:26
Done.
| |
190 | |
191 # Validate the request. | |
192 is_secure = self.validate_url(url) | |
193 if (any(k.lower() == 'authorization' for k in headers.iterkeys()) and | |
194 not is_secure): | |
195 raise ValueError( | |
196 'Refusing to send authorization header to insecure URL: %s' % (url,)) | |
197 | |
198 step_name = step_name or 'GET %s' % url | |
199 | |
200 args = [ | |
201 '--url', url, | |
202 '--status-json', self.m.json.output(add_json_log=False, | |
203 name='status_json'), | |
204 ] | |
205 | |
206 if as_json: | |
207 log = as_json == 'log' | |
208 args += ['--outfile', self.m.json.output(add_json_log=log, | |
209 name='output')] | |
210 else: | |
211 args += ['--outfile', self.m.raw_io.output_text(leak_to=path, | |
212 name='output')] | |
213 | |
214 if headers: | |
215 args += ['--headers-json', self.m.json.input(headers)] | |
216 if strip_prefix: | |
217 args += ['--strip-prefix', strip_prefix] | |
218 if not transient_retry: | |
219 args.append('--no-transient-retry') | |
220 | |
221 result = self.m.python( | |
222 step_name, | |
223 self.resource('pycurl.py'), | |
224 args=args, | |
225 venv=True, | |
226 timeout=timeout) | |
227 status = result.json.outputs['status_json'] | |
228 | |
229 output = path | |
230 if not output: | |
231 if as_json: | |
232 output = result.json.outputs['output'] | |
233 else: | |
234 output = result.raw_io.output_texts['output'] | |
235 | |
236 response = self.Response('GET', output, status, self.m.context.infra_step) | |
237 response.raise_on_error() | |
238 return response | |
OLD | NEW |