OLD | NEW |
| (Empty) |
1 # -*- coding: utf-8 -*- | |
2 # Copyright 2010 Google Inc. All Rights Reserved. | |
3 # | |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
5 # you may not use this file except in compliance with the License. | |
6 # You may obtain a copy of the License at | |
7 # | |
8 # http://www.apache.org/licenses/LICENSE-2.0 | |
9 # | |
10 # Unless required by applicable law or agreed to in writing, software | |
11 # distributed under the License is distributed on an "AS IS" BASIS, | |
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 # See the License for the specific language governing permissions and | |
14 # limitations under the License. | |
15 """Static data and helper functions.""" | |
16 | |
17 from __future__ import absolute_import | |
18 | |
19 import errno | |
20 import logging | |
21 import math | |
22 import multiprocessing | |
23 import os | |
24 import pkgutil | |
25 import re | |
26 import struct | |
27 import sys | |
28 import tempfile | |
29 import textwrap | |
30 import threading | |
31 import traceback | |
32 import xml.etree.ElementTree as ElementTree | |
33 | |
34 import boto | |
35 from boto import config | |
36 import boto.auth | |
37 from boto.exception import NoAuthHandlerFound | |
38 from boto.gs.connection import GSConnection | |
39 from boto.provider import Provider | |
40 from boto.pyami.config import BotoConfigLocations | |
41 import httplib2 | |
42 from oauth2client.client import HAS_CRYPTO | |
43 from retry_decorator import retry_decorator | |
44 | |
45 import gslib | |
46 from gslib.exception import CommandException | |
47 from gslib.storage_url import StorageUrlFromString | |
48 from gslib.translation_helper import AclTranslation | |
49 from gslib.translation_helper import GenerationFromUrlAndString | |
50 from gslib.translation_helper import S3_ACL_MARKER_GUID | |
51 from gslib.translation_helper import S3_DELETE_MARKER_GUID | |
52 from gslib.translation_helper import S3_MARKER_GUIDS | |
53 | |
54 # pylint: disable=g-import-not-at-top | |
55 try: | |
56 # This module doesn't necessarily exist on Windows. | |
57 import resource | |
58 HAS_RESOURCE_MODULE = True | |
59 except ImportError, e: | |
60 HAS_RESOURCE_MODULE = False | |
61 | |
62 ONE_KIB = 1024 | |
63 ONE_MIB = 1024 * 1024 | |
64 TWO_MIB = 2 * ONE_MIB | |
65 EIGHT_MIB = 8 * ONE_MIB | |
66 TEN_MIB = 10 * ONE_MIB | |
67 DEFAULT_FILE_BUFFER_SIZE = 8 * ONE_KIB | |
68 _DEFAULT_LINES = 25 | |
69 | |
70 # By default, the timeout for SSL read errors is infinite. This could | |
71 # cause gsutil to hang on network disconnect, so pick a more reasonable | |
72 # timeout. | |
73 SSL_TIMEOUT = 60 | |
74 | |
75 # Start with a progress callback every 64 KiB during uploads/downloads (JSON | |
76 # API). Callback implementation should back off until it hits the maximum size | |
77 # so that callbacks do not create huge amounts of log output. | |
78 START_CALLBACK_PER_BYTES = 1024*64 | |
79 MAX_CALLBACK_PER_BYTES = 1024*1024*100 | |
80 | |
81 # Upload/download files in 8 KiB chunks over the HTTP connection. | |
82 TRANSFER_BUFFER_SIZE = 1024*8 | |
83 | |
84 # Default number of progress callbacks during transfer (XML API). | |
85 XML_PROGRESS_CALLBACKS = 10 | |
86 | |
87 # For files >= this size, output a message indicating that we're running an | |
88 # operation on the file (like hashing or gzipping) so it does not appear to the | |
89 # user that the command is hanging. | |
90 MIN_SIZE_COMPUTE_LOGGING = 100*1024*1024 # 100 MiB | |
91 | |
92 NO_MAX = sys.maxint | |
93 | |
94 UTF8 = 'utf-8' | |
95 | |
96 VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?') | |
97 | |
98 RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt' | |
99 | |
100 # Binary exponentiation strings. | |
101 _EXP_STRINGS = [ | |
102 (0, 'B', 'bit'), | |
103 (10, 'KiB', 'Kibit', 'K'), | |
104 (20, 'MiB', 'Mibit', 'M'), | |
105 (30, 'GiB', 'Gibit', 'G'), | |
106 (40, 'TiB', 'Tibit', 'T'), | |
107 (50, 'PiB', 'Pibit', 'P'), | |
108 (60, 'EiB', 'Eibit', 'E'), | |
109 ] | |
110 | |
111 | |
112 global manager # pylint: disable=global-at-module-level | |
113 certs_file_lock = threading.Lock() | |
114 configured_certs_files = [] | |
115 | |
116 | |
117 def InitializeMultiprocessingVariables(): | |
118 """Perform necessary initialization for multiprocessing. | |
119 | |
120 See gslib.command.InitializeMultiprocessingVariables for an explanation | |
121 of why this is necessary. | |
122 """ | |
123 global manager # pylint: disable=global-variable-undefined | |
124 manager = multiprocessing.Manager() | |
125 | |
126 | |
127 def _GenerateSuffixRegex(): | |
128 """Creates a suffix regex for human-readable byte counts.""" | |
129 human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?' | |
130 suffixes = [] | |
131 suffix_to_si = {} | |
132 for i, si in enumerate(_EXP_STRINGS): | |
133 si_suffixes = [s.lower() for s in list(si)[1:]] | |
134 for suffix in si_suffixes: | |
135 suffix_to_si[suffix] = i | |
136 suffixes.extend(si_suffixes) | |
137 human_bytes_re %= '|'.join(suffixes) | |
138 matcher = re.compile(human_bytes_re) | |
139 return suffix_to_si, matcher | |
140 | |
141 SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex() | |
142 | |
143 SECONDS_PER_DAY = 3600 * 24 | |
144 | |
145 # Detect platform types. | |
146 PLATFORM = str(sys.platform).lower() | |
147 IS_WINDOWS = 'win32' in PLATFORM | |
148 IS_CYGWIN = 'cygwin' in PLATFORM | |
149 IS_LINUX = 'linux' in PLATFORM | |
150 IS_OSX = 'darwin' in PLATFORM | |
151 | |
152 # On Unix-like systems, we will set the maximum number of open files to avoid | |
153 # hitting the limit imposed by the OS. This number was obtained experimentally. | |
154 MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000 | |
155 | |
156 GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz' | |
157 | |
158 Retry = retry_decorator.retry # pylint: disable=invalid-name | |
159 | |
160 # Cache the values from this check such that they're available to all callers | |
161 # without needing to run all the checks again (some of these, such as calling | |
162 # multiprocessing.Manager(), are expensive operations). | |
163 cached_multiprocessing_is_available = None | |
164 cached_multiprocessing_is_available_stack_trace = None | |
165 cached_multiprocessing_is_available_message = None | |
166 | |
167 | |
168 # Enum class for specifying listing style. | |
169 class ListingStyle(object): | |
170 SHORT = 'SHORT' | |
171 LONG = 'LONG' | |
172 LONG_LONG = 'LONG_LONG' | |
173 | |
174 | |
175 def UsingCrcmodExtension(crcmod): | |
176 return (getattr(crcmod, 'crcmod', None) and | |
177 getattr(crcmod.crcmod, '_usingExtension', None)) | |
178 | |
179 | |
180 def CreateDirIfNeeded(dir_path, mode=0777): | |
181 """Creates a directory, suppressing already-exists errors.""" | |
182 if not os.path.exists(dir_path): | |
183 try: | |
184 # Unfortunately, even though we catch and ignore EEXIST, this call will | |
185 # output a (needless) error message (no way to avoid that in Python). | |
186 os.makedirs(dir_path, mode) | |
187 # Ignore 'already exists' in case user tried to start up several | |
188 # resumable uploads concurrently from a machine where no tracker dir had | |
189 # yet been created. | |
190 except OSError as e: | |
191 if e.errno != errno.EEXIST: | |
192 raise | |
193 | |
194 | |
195 def GetGsutilStateDir(): | |
196 """Returns the location of the directory for gsutil state files. | |
197 | |
198 Certain operations, such as cross-process credential sharing and | |
199 resumable transfer tracking, need a known location for state files which | |
200 are created by gsutil as-needed. | |
201 | |
202 This location should only be used for storing data that is required to be in | |
203 a static location. | |
204 | |
205 Returns: | |
206 Path to directory for gsutil static state files. | |
207 """ | |
208 config_file_dir = config.get( | |
209 'GSUtil', 'state_dir', | |
210 os.path.expanduser(os.path.join('~', '.gsutil'))) | |
211 CreateDirIfNeeded(config_file_dir) | |
212 return config_file_dir | |
213 | |
214 | |
215 def GetCredentialStoreFilename(): | |
216 return os.path.join(GetGsutilStateDir(), 'credstore') | |
217 | |
218 | |
219 def GetGceCredentialCacheFilename(): | |
220 return os.path.join(GetGsutilStateDir(), 'gcecredcache') | |
221 | |
222 | |
223 def GetTabCompletionLogFilename(): | |
224 return os.path.join(GetGsutilStateDir(), 'tab-completion-logs') | |
225 | |
226 | |
227 def GetTabCompletionCacheFilename(): | |
228 tab_completion_dir = os.path.join(GetGsutilStateDir(), 'tab-completion') | |
229 # Limit read permissions on the directory to owner for privacy. | |
230 CreateDirIfNeeded(tab_completion_dir, mode=0700) | |
231 return os.path.join(tab_completion_dir, 'cache') | |
232 | |
233 | |
234 def PrintTrackerDirDeprecationWarningIfNeeded(): | |
235 # TODO: Remove this along with the tracker_dir config value 1 year after | |
236 # 4.6 release date. Use state_dir instead. | |
237 if config.has_option('GSUtil', 'resumable_tracker_dir'): | |
238 sys.stderr.write('Warning: you have set resumable_tracker_dir in your ' | |
239 '.boto configuration file. This configuration option is ' | |
240 'deprecated; please use the state_dir configuration ' | |
241 'option instead.\n') | |
242 | |
243 | |
244 # Name of file where we keep the timestamp for the last time we checked whether | |
245 # a new version of gsutil is available. | |
246 PrintTrackerDirDeprecationWarningIfNeeded() | |
247 CreateDirIfNeeded(GetGsutilStateDir()) | |
248 LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = ( | |
249 os.path.join(GetGsutilStateDir(), '.last_software_update_check')) | |
250 | |
251 | |
252 def HasConfiguredCredentials(): | |
253 """Determines if boto credential/config file exists.""" | |
254 has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and | |
255 config.has_option('Credentials', 'gs_secret_access_key')) | |
256 has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and | |
257 config.has_option('Credentials', 'aws_secret_access_key')) | |
258 has_oauth_creds = ( | |
259 config.has_option('Credentials', 'gs_oauth2_refresh_token')) | |
260 has_service_account_creds = ( | |
261 HAS_CRYPTO and | |
262 config.has_option('Credentials', 'gs_service_client_id') and | |
263 config.has_option('Credentials', 'gs_service_key_file')) | |
264 | |
265 if (has_goog_creds or has_amzn_creds or has_oauth_creds or | |
266 has_service_account_creds): | |
267 return True | |
268 | |
269 valid_auth_handler = None | |
270 try: | |
271 valid_auth_handler = boto.auth.get_auth_handler( | |
272 GSConnection.DefaultHost, config, Provider('google'), | |
273 requested_capability=['s3']) | |
274 # Exclude the no-op auth handler as indicating credentials are configured. | |
275 # Note we can't use isinstance() here because the no-op module may not be | |
276 # imported so we can't get a reference to the class type. | |
277 if getattr(getattr(valid_auth_handler, '__class__', None), | |
278 '__name__', None) == 'NoOpAuth': | |
279 valid_auth_handler = None | |
280 except NoAuthHandlerFound: | |
281 pass | |
282 | |
283 return valid_auth_handler | |
284 | |
285 | |
286 def ConfigureNoOpAuthIfNeeded(): | |
287 """Sets up no-op auth handler if no boto credentials are configured.""" | |
288 if not HasConfiguredCredentials(): | |
289 if (config.has_option('Credentials', 'gs_service_client_id') | |
290 and not HAS_CRYPTO): | |
291 if os.environ.get('CLOUDSDK_WRAPPER') == '1': | |
292 raise CommandException('\n'.join(textwrap.wrap( | |
293 'Your gsutil is configured with an OAuth2 service account, but ' | |
294 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. ' | |
295 'Service account authentication requires one of these libraries; ' | |
296 'please reactivate your service account via the gcloud auth ' | |
297 'command and ensure any gcloud packages necessary for ' | |
298 'service accounts are present.'))) | |
299 else: | |
300 raise CommandException('\n'.join(textwrap.wrap( | |
301 'Your gsutil is configured with an OAuth2 service account, but ' | |
302 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. ' | |
303 'Service account authentication requires one of these libraries; ' | |
304 'please install either of them to proceed, or configure a ' | |
305 'different type of credentials with "gsutil config".'))) | |
306 else: | |
307 # With no boto config file the user can still access publicly readable | |
308 # buckets and objects. | |
309 from gslib import no_op_auth_plugin # pylint: disable=unused-variable | |
310 | |
311 | |
312 def GetConfigFilePath(): | |
313 config_path = 'no config found' | |
314 for path in BotoConfigLocations: | |
315 try: | |
316 with open(path, 'r'): | |
317 config_path = path | |
318 break | |
319 except IOError: | |
320 pass | |
321 return config_path | |
322 | |
323 | |
324 def GetBotoConfigFileList(): | |
325 """Returns list of boto config files that exist.""" | |
326 config_paths = boto.pyami.config.BotoConfigLocations | |
327 if 'AWS_CREDENTIAL_FILE' in os.environ: | |
328 config_paths.append(os.environ['AWS_CREDENTIAL_FILE']) | |
329 config_files = {} | |
330 for config_path in config_paths: | |
331 if os.path.exists(config_path): | |
332 config_files[config_path] = 1 | |
333 cf_list = [] | |
334 for config_file in config_files: | |
335 cf_list.append(config_file) | |
336 return cf_list | |
337 | |
338 | |
339 def GetCertsFile(): | |
340 """Configures and returns the CA Certificates file. | |
341 | |
342 If one is already configured, use it. Otherwise, amend the configuration | |
343 (in boto.config) to use the cert roots distributed with gsutil. | |
344 | |
345 Returns: | |
346 string filename of the certs file to use. | |
347 """ | |
348 certs_file = boto.config.get('Boto', 'ca_certificates_file', None) | |
349 if not certs_file: | |
350 with certs_file_lock: | |
351 if configured_certs_files: | |
352 disk_certs_file = configured_certs_files[0] | |
353 else: | |
354 disk_certs_file = os.path.abspath( | |
355 os.path.join(gslib.GSLIB_DIR, 'data', 'cacerts.txt')) | |
356 if not os.path.exists(disk_certs_file): | |
357 # If the file is not present on disk, this means the gslib module | |
358 # doesn't actually exist on disk anywhere. This can happen if it's | |
359 # being imported from a zip file. Unfortunately, we have to copy the | |
360 # certs file to a local temp file on disk because the underlying SSL | |
361 # socket requires it to be a filesystem path. | |
362 certs_data = pkgutil.get_data('gslib', 'data/cacerts.txt') | |
363 if not certs_data: | |
364 raise CommandException('Certificates file not found. Please ' | |
365 'reinstall gsutil from scratch') | |
366 fd, fname = tempfile.mkstemp(suffix='.txt', prefix='gsutil-cacerts') | |
367 f = os.fdopen(fd, 'w') | |
368 f.write(certs_data) | |
369 f.close() | |
370 configured_certs_files.append(fname) | |
371 disk_certs_file = fname | |
372 certs_file = disk_certs_file | |
373 return certs_file | |
374 | |
375 | |
376 def GetCleanupFiles(): | |
377 """Returns a list of temp files to delete (if possible) when program exits.""" | |
378 cleanup_files = [] | |
379 if configured_certs_files: | |
380 cleanup_files += configured_certs_files | |
381 return cleanup_files | |
382 | |
383 | |
384 def ProxyInfoFromEnvironmentVar(proxy_env_var): | |
385 """Reads proxy info from the environment and converts to httplib2.ProxyInfo. | |
386 | |
387 Args: | |
388 proxy_env_var: Environment variable string to read, such as http_proxy or | |
389 https_proxy. | |
390 | |
391 Returns: | |
392 httplib2.ProxyInfo constructed from the environment string. | |
393 """ | |
394 proxy_url = os.environ.get(proxy_env_var) | |
395 if not proxy_url or not proxy_env_var.lower().startswith('http'): | |
396 return httplib2.ProxyInfo(httplib2.socks.PROXY_TYPE_HTTP, None, 0) | |
397 proxy_protocol = proxy_env_var.lower().split('_')[0] | |
398 if not proxy_url.lower().startswith('http'): | |
399 # proxy_info_from_url requires a protocol, which is always http or https. | |
400 proxy_url = proxy_protocol + '://' + proxy_url | |
401 return httplib2.proxy_info_from_url(proxy_url, method=proxy_protocol) | |
402 | |
403 | |
404 def GetNewHttp(http_class=httplib2.Http, **kwargs): | |
405 """Creates and returns a new httplib2.Http instance. | |
406 | |
407 Args: | |
408 http_class: Optional custom Http class to use. | |
409 **kwargs: Arguments to pass to http_class constructor. | |
410 | |
411 Returns: | |
412 An initialized httplib2.Http instance. | |
413 """ | |
414 proxy_info = httplib2.ProxyInfo( | |
415 proxy_type=3, | |
416 proxy_host=boto.config.get('Boto', 'proxy', None), | |
417 proxy_port=boto.config.getint('Boto', 'proxy_port', 0), | |
418 proxy_user=boto.config.get('Boto', 'proxy_user', None), | |
419 proxy_pass=boto.config.get('Boto', 'proxy_pass', None), | |
420 proxy_rdns=boto.config.get('Boto', 'proxy_rdns', False)) | |
421 | |
422 if not (proxy_info.proxy_host and proxy_info.proxy_port): | |
423 # Fall back to using the environment variable. | |
424 for proxy_env_var in ['http_proxy', 'https_proxy', 'HTTPS_PROXY']: | |
425 if proxy_env_var in os.environ and os.environ[proxy_env_var]: | |
426 proxy_info = ProxyInfoFromEnvironmentVar(proxy_env_var) | |
427 # Assume proxy_rnds is True if a proxy environment variable exists. | |
428 proxy_info.proxy_rdns = boto.config.get('Boto', 'proxy_rdns', True) | |
429 break | |
430 | |
431 # Some installers don't package a certs file with httplib2, so use the | |
432 # one included with gsutil. | |
433 kwargs['ca_certs'] = GetCertsFile() | |
434 # Use a non-infinite SSL timeout to avoid hangs during network flakiness. | |
435 kwargs['timeout'] = SSL_TIMEOUT | |
436 http = http_class(proxy_info=proxy_info, **kwargs) | |
437 http.disable_ssl_certificate_validation = (not config.getbool( | |
438 'Boto', 'https_validate_certificates')) | |
439 return http | |
440 | |
441 | |
442 def GetNumRetries(): | |
443 return config.getint('Boto', 'num_retries', 6) | |
444 | |
445 | |
446 def GetMaxRetryDelay(): | |
447 return config.getint('Boto', 'max_retry_delay', 60) | |
448 | |
449 | |
450 # Resumable downloads and uploads make one HTTP call per chunk (and must be | |
451 # in multiples of 256KiB). Overridable for testing. | |
452 def GetJsonResumableChunkSize(): | |
453 chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size', | |
454 1024*1024*100L) | |
455 if chunk_size == 0: | |
456 chunk_size = 1024*256L | |
457 elif chunk_size % 1024*256L != 0: | |
458 chunk_size += (1024*256L - (chunk_size % (1024*256L))) | |
459 return chunk_size | |
460 | |
461 | |
462 def _RoundToNearestExponent(num): | |
463 i = 0 | |
464 while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]): | |
465 i += 1 | |
466 return i, round(float(num) / 2 ** _EXP_STRINGS[i][0], 2) | |
467 | |
468 | |
469 def MakeHumanReadable(num): | |
470 """Generates human readable string for a number of bytes. | |
471 | |
472 Args: | |
473 num: The number, in bytes. | |
474 | |
475 Returns: | |
476 A string form of the number using size abbreviations (KiB, MiB, etc.). | |
477 """ | |
478 i, rounded_val = _RoundToNearestExponent(num) | |
479 return '%g %s' % (rounded_val, _EXP_STRINGS[i][1]) | |
480 | |
481 | |
482 def MakeBitsHumanReadable(num): | |
483 """Generates human readable string for a number of bits. | |
484 | |
485 Args: | |
486 num: The number, in bits. | |
487 | |
488 Returns: | |
489 A string form of the number using bit size abbreviations (kbit, Mbit, etc.) | |
490 """ | |
491 i, rounded_val = _RoundToNearestExponent(num) | |
492 return '%g %s' % (rounded_val, _EXP_STRINGS[i][2]) | |
493 | |
494 | |
495 def HumanReadableToBytes(human_string): | |
496 """Tries to convert a human-readable string to a number of bytes. | |
497 | |
498 Args: | |
499 human_string: A string supplied by user, e.g. '1M', '3 GiB'. | |
500 Returns: | |
501 An integer containing the number of bytes. | |
502 Raises: | |
503 ValueError: on an invalid string. | |
504 """ | |
505 human_string = human_string.lower() | |
506 m = MATCH_HUMAN_BYTES.match(human_string) | |
507 if m: | |
508 num = float(m.group('num')) | |
509 if m.group('suffix'): | |
510 power = _EXP_STRINGS[SUFFIX_TO_SI[m.group('suffix')]][0] | |
511 num *= (2.0 ** power) | |
512 num = int(round(num)) | |
513 return num | |
514 raise ValueError('Invalid byte string specified: %s' % human_string) | |
515 | |
516 | |
517 def Percentile(values, percent, key=lambda x: x): | |
518 """Find the percentile of a list of values. | |
519 | |
520 Taken from: http://code.activestate.com/recipes/511478/ | |
521 | |
522 Args: | |
523 values: a list of numeric values. Note that the values MUST BE already | |
524 sorted. | |
525 percent: a float value from 0.0 to 1.0. | |
526 key: optional key function to compute value from each element of the list | |
527 of values. | |
528 | |
529 Returns: | |
530 The percentile of the values. | |
531 """ | |
532 if not values: | |
533 return None | |
534 k = (len(values) - 1) * percent | |
535 f = math.floor(k) | |
536 c = math.ceil(k) | |
537 if f == c: | |
538 return key(values[int(k)]) | |
539 d0 = key(values[int(f)]) * (c-k) | |
540 d1 = key(values[int(c)]) * (k-f) | |
541 return d0 + d1 | |
542 | |
543 | |
544 def RemoveCRLFFromString(input_str): | |
545 """Returns the input string with all \\n and \\r removed.""" | |
546 return re.sub(r'[\r\n]', '', input_str) | |
547 | |
548 | |
549 def UnaryDictToXml(message): | |
550 """Generates XML representation of a nested dict. | |
551 | |
552 This dict contains exactly one top-level entry and an arbitrary number of | |
553 2nd-level entries, e.g. capturing a WebsiteConfiguration message. | |
554 | |
555 Args: | |
556 message: The dict encoding the message. | |
557 | |
558 Returns: | |
559 XML string representation of the input dict. | |
560 | |
561 Raises: | |
562 Exception: if dict contains more than one top-level entry. | |
563 """ | |
564 if len(message) != 1: | |
565 raise Exception('Expected dict of size 1, got size %d' % len(message)) | |
566 | |
567 name, content = message.items()[0] | |
568 element_type = ElementTree.Element(name) | |
569 for element_property, value in sorted(content.items()): | |
570 node = ElementTree.SubElement(element_type, element_property) | |
571 node.text = value | |
572 return ElementTree.tostring(element_type) | |
573 | |
574 | |
575 def LookUpGsutilVersion(gsutil_api, url_str): | |
576 """Looks up the gsutil version of the specified gsutil tarball URL. | |
577 | |
578 Version is specified in the metadata field set on that object. | |
579 | |
580 Args: | |
581 gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball. | |
582 url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz'). | |
583 | |
584 Returns: | |
585 Version string if URL is a cloud URL containing x-goog-meta-gsutil-version | |
586 metadata, else None. | |
587 """ | |
588 url = StorageUrlFromString(url_str) | |
589 if url.IsCloudUrl(): | |
590 obj = gsutil_api.GetObjectMetadata(url.bucket_name, url.object_name, | |
591 provider=url.scheme, | |
592 fields=['metadata']) | |
593 if obj.metadata and obj.metadata.additionalProperties: | |
594 for prop in obj.metadata.additionalProperties: | |
595 if prop.key == 'gsutil_version': | |
596 return prop.value | |
597 | |
598 | |
599 def GetGsutilVersionModifiedTime(): | |
600 """Returns unix timestamp of when the VERSION file was last modified.""" | |
601 if not gslib.VERSION_FILE: | |
602 return 0 | |
603 return int(os.path.getmtime(gslib.VERSION_FILE)) | |
604 | |
605 | |
606 def IsRunningInteractively(): | |
607 """Returns True if currently running interactively on a TTY.""" | |
608 return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty() | |
609 | |
610 | |
611 def _HttpsValidateCertifcatesEnabled(): | |
612 return config.get('Boto', 'https_validate_certificates', True) | |
613 | |
614 CERTIFICATE_VALIDATION_ENABLED = _HttpsValidateCertifcatesEnabled() | |
615 | |
616 | |
617 def _BotoIsSecure(): | |
618 return config.get('Boto', 'is_secure', True) | |
619 | |
620 BOTO_IS_SECURE = _BotoIsSecure() | |
621 | |
622 | |
623 def ResumableThreshold(): | |
624 return config.getint('GSUtil', 'resumable_threshold', EIGHT_MIB) | |
625 | |
626 | |
627 def AddAcceptEncoding(headers): | |
628 """Adds accept-encoding:gzip to the dictionary of headers.""" | |
629 # If Accept-Encoding is not already set, set it to enable gzip. | |
630 if 'accept-encoding' not in headers: | |
631 headers['accept-encoding'] = 'gzip' | |
632 | |
633 | |
634 # pylint: disable=too-many-statements | |
635 def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True): | |
636 """Print full info for given object (like what displays for gsutil ls -L). | |
637 | |
638 Args: | |
639 bucket_listing_ref: BucketListingRef being listed. | |
640 Must have ref_type OBJECT and a populated root_object | |
641 with the desired fields. | |
642 incl_acl: True if ACL info should be output. | |
643 | |
644 Returns: | |
645 Tuple (number of objects, object_length) | |
646 | |
647 Raises: | |
648 Exception: if calling bug encountered. | |
649 """ | |
650 url_str = bucket_listing_ref.url_string | |
651 storage_url = StorageUrlFromString(url_str) | |
652 obj = bucket_listing_ref.root_object | |
653 | |
654 if (obj.metadata and S3_DELETE_MARKER_GUID in | |
655 obj.metadata.additionalProperties): | |
656 num_bytes = 0 | |
657 num_objs = 0 | |
658 url_str += '<DeleteMarker>' | |
659 else: | |
660 num_bytes = obj.size | |
661 num_objs = 1 | |
662 | |
663 print '%s:' % url_str.encode(UTF8) | |
664 if obj.updated: | |
665 print '\tCreation time:\t\t%s' % obj.updated.strftime( | |
666 '%a, %d %b %Y %H:%M:%S GMT') | |
667 if obj.cacheControl: | |
668 print '\tCache-Control:\t\t%s' % obj.cacheControl | |
669 if obj.contentDisposition: | |
670 print '\tContent-Disposition:\t\t%s' % obj.contentDisposition | |
671 if obj.contentEncoding: | |
672 print '\tContent-Encoding:\t\t%s' % obj.contentEncoding | |
673 if obj.contentLanguage: | |
674 print '\tContent-Language:\t%s' % obj.contentLanguage | |
675 print '\tContent-Length:\t\t%s' % obj.size | |
676 print '\tContent-Type:\t\t%s' % obj.contentType | |
677 if obj.componentCount: | |
678 print '\tComponent-Count:\t%d' % obj.componentCount | |
679 marker_props = {} | |
680 if obj.metadata and obj.metadata.additionalProperties: | |
681 non_marker_props = [] | |
682 for add_prop in obj.metadata.additionalProperties: | |
683 if add_prop.key not in S3_MARKER_GUIDS: | |
684 non_marker_props.append(add_prop) | |
685 else: | |
686 marker_props[add_prop.key] = add_prop.value | |
687 if non_marker_props: | |
688 print '\tMetadata:' | |
689 for ap in non_marker_props: | |
690 meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value) | |
691 print meta_string.encode(UTF8) | |
692 if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c | |
693 if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash | |
694 print '\tETag:\t\t\t%s' % obj.etag.strip('"\'') | |
695 if obj.generation: | |
696 generation_str = GenerationFromUrlAndString(storage_url, obj.generation) | |
697 print '\tGeneration:\t\t%s' % generation_str | |
698 if obj.metageneration: | |
699 print '\tMetageneration:\t\t%s' % obj.metageneration | |
700 if incl_acl: | |
701 # JSON API won't return acls as part of the response unless we have | |
702 # full control scope | |
703 if obj.acl: | |
704 print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl) | |
705 elif S3_ACL_MARKER_GUID in marker_props: | |
706 print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID] | |
707 else: | |
708 print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER ' | |
709 'permission\n\t\t\t\ton the object to read its ACL.') | |
710 | |
711 return (num_objs, num_bytes) | |
712 | |
713 | |
714 def CompareVersions(first, second): | |
715 """Compares the first and second gsutil version strings. | |
716 | |
717 For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33. | |
718 Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes | |
719 (e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as | |
720 less than its non-suffix counterpart (e.g. 3.32 > 3.32pre). | |
721 | |
722 Args: | |
723 first: First gsutil version string. | |
724 second: Second gsutil version string. | |
725 | |
726 Returns: | |
727 (g, m): | |
728 g is True if first known to be greater than second, else False. | |
729 m is True if first known to be greater by at least 1 major version, | |
730 else False. | |
731 """ | |
732 m1 = VERSION_MATCHER.match(str(first)) | |
733 m2 = VERSION_MATCHER.match(str(second)) | |
734 | |
735 # If passed strings we don't know how to handle, be conservative. | |
736 if not m1 or not m2: | |
737 return (False, False) | |
738 | |
739 major_ver1 = int(m1.group('maj')) | |
740 minor_ver1 = int(m1.group('min')) if m1.group('min') else 0 | |
741 suffix_ver1 = m1.group('suffix') | |
742 major_ver2 = int(m2.group('maj')) | |
743 minor_ver2 = int(m2.group('min')) if m2.group('min') else 0 | |
744 suffix_ver2 = m2.group('suffix') | |
745 | |
746 if major_ver1 > major_ver2: | |
747 return (True, True) | |
748 elif major_ver1 == major_ver2: | |
749 if minor_ver1 > minor_ver2: | |
750 return (True, False) | |
751 elif minor_ver1 == minor_ver2: | |
752 return (bool(suffix_ver2) and not suffix_ver1, False) | |
753 return (False, False) | |
754 | |
755 | |
756 def _IncreaseSoftLimitForResource(resource_name, fallback_value): | |
757 """Sets a new soft limit for the maximum number of open files. | |
758 | |
759 The soft limit is used for this process (and its children), but the | |
760 hard limit is set by the system and cannot be exceeded. | |
761 | |
762 We will first try to set the soft limit to the hard limit's value; if that | |
763 fails, we will try to set the soft limit to the fallback_value iff this would | |
764 increase the soft limit. | |
765 | |
766 Args: | |
767 resource_name: Name of the resource to increase the soft limit for. | |
768 fallback_value: Fallback value to be used if we couldn't set the | |
769 soft value to the hard value (e.g., if the hard value | |
770 is "unlimited"). | |
771 | |
772 Returns: | |
773 Current soft limit for the resource (after any changes we were able to | |
774 make), or -1 if the resource doesn't exist. | |
775 """ | |
776 | |
777 # Get the value of the resource. | |
778 try: | |
779 (soft_limit, hard_limit) = resource.getrlimit(resource_name) | |
780 except (resource.error, ValueError): | |
781 # The resource wasn't present, so we can't do anything here. | |
782 return -1 | |
783 | |
784 # Try to set the value of the soft limit to the value of the hard limit. | |
785 if hard_limit > soft_limit: # Some OS's report 0 for "unlimited". | |
786 try: | |
787 resource.setrlimit(resource_name, (hard_limit, hard_limit)) | |
788 return hard_limit | |
789 except (resource.error, ValueError): | |
790 # We'll ignore this and try the fallback value. | |
791 pass | |
792 | |
793 # Try to set the value of the soft limit to the fallback value. | |
794 if soft_limit < fallback_value: | |
795 try: | |
796 resource.setrlimit(resource_name, (fallback_value, hard_limit)) | |
797 return fallback_value | |
798 except (resource.error, ValueError): | |
799 # We couldn't change the soft limit, so just report the current | |
800 # value of the soft limit. | |
801 return soft_limit | |
802 else: | |
803 return soft_limit | |
804 | |
805 | |
806 def GetCloudApiInstance(cls, thread_state=None): | |
807 """Gets a gsutil Cloud API instance. | |
808 | |
809 Since Cloud API implementations are not guaranteed to be thread-safe, each | |
810 thread needs its own instance. These instances are passed to each thread | |
811 via the thread pool logic in command. | |
812 | |
813 Args: | |
814 cls: Command class to be used for single-threaded case. | |
815 thread_state: Per thread state from this thread containing a gsutil | |
816 Cloud API instance. | |
817 | |
818 Returns: | |
819 gsutil Cloud API instance. | |
820 """ | |
821 return thread_state or cls.gsutil_api | |
822 | |
823 | |
824 def GetFileSize(fp, position_to_eof=False): | |
825 """Returns size of file, optionally leaving fp positioned at EOF.""" | |
826 if not position_to_eof: | |
827 cur_pos = fp.tell() | |
828 fp.seek(0, os.SEEK_END) | |
829 cur_file_size = fp.tell() | |
830 if not position_to_eof: | |
831 fp.seek(cur_pos) | |
832 return cur_file_size | |
833 | |
834 | |
835 def GetStreamFromFileUrl(storage_url): | |
836 if storage_url.IsStream(): | |
837 return sys.stdin | |
838 else: | |
839 return open(storage_url.object_name, 'rb') | |
840 | |
841 | |
842 def UrlsAreForSingleProvider(url_args): | |
843 """Tests whether the URLs are all for a single provider. | |
844 | |
845 Args: | |
846 url_args: Strings to check. | |
847 | |
848 Returns: | |
849 True if URLs are for single provider, False otherwise. | |
850 """ | |
851 provider = None | |
852 url = None | |
853 for url_str in url_args: | |
854 url = StorageUrlFromString(url_str) | |
855 if not provider: | |
856 provider = url.scheme | |
857 elif url.scheme != provider: | |
858 return False | |
859 return provider is not None | |
860 | |
861 | |
862 def HaveFileUrls(args_to_check): | |
863 """Checks whether args_to_check contain any file URLs. | |
864 | |
865 Args: | |
866 args_to_check: Command-line argument subset to check. | |
867 | |
868 Returns: | |
869 True if args_to_check contains any file URLs. | |
870 """ | |
871 for url_str in args_to_check: | |
872 storage_url = StorageUrlFromString(url_str) | |
873 if storage_url.IsFileUrl(): | |
874 return True | |
875 return False | |
876 | |
877 | |
878 def HaveProviderUrls(args_to_check): | |
879 """Checks whether args_to_check contains any provider URLs (like 'gs://'). | |
880 | |
881 Args: | |
882 args_to_check: Command-line argument subset to check. | |
883 | |
884 Returns: | |
885 True if args_to_check contains any provider URLs. | |
886 """ | |
887 for url_str in args_to_check: | |
888 storage_url = StorageUrlFromString(url_str) | |
889 if storage_url.IsCloudUrl() and storage_url.IsProvider(): | |
890 return True | |
891 return False | |
892 | |
893 | |
894 def MultiprocessingIsAvailable(logger=None): | |
895 """Checks if multiprocessing is available. | |
896 | |
897 There are some environments in which there is no way to use multiprocessing | |
898 logic that's built into Python (e.g., if /dev/shm is not available, then | |
899 we can't create semaphores). This simply tries out a few things that will be | |
900 needed to make sure the environment can support the pieces of the | |
901 multiprocessing module that we need. | |
902 | |
903 Args: | |
904 logger: logging.logger to use for debug output. | |
905 | |
906 Returns: | |
907 (multiprocessing_is_available, stack_trace): | |
908 multiprocessing_is_available: True iff the multiprocessing module is | |
909 available for use. | |
910 stack_trace: The stack trace generated by the call we tried that failed. | |
911 """ | |
912 # pylint: disable=global-variable-undefined | |
913 global cached_multiprocessing_is_available | |
914 global cached_multiprocessing_check_stack_trace | |
915 global cached_multiprocessing_is_available_message | |
916 if cached_multiprocessing_is_available is not None: | |
917 if logger: | |
918 logger.debug(cached_multiprocessing_check_stack_trace) | |
919 logger.warn(cached_multiprocessing_is_available_message) | |
920 return (cached_multiprocessing_is_available, | |
921 cached_multiprocessing_check_stack_trace) | |
922 | |
923 stack_trace = None | |
924 multiprocessing_is_available = True | |
925 message = """ | |
926 You have requested multiple threads or processes for an operation, but the | |
927 required functionality of Python\'s multiprocessing module is not available. | |
928 Your operations will be performed sequentially, and any requests for | |
929 parallelism will be ignored. | |
930 """ | |
931 try: | |
932 # Fails if /dev/shm (or some equivalent thereof) is not available for use | |
933 # (e.g., there's no implementation, or we can't write to it, etc.). | |
934 try: | |
935 multiprocessing.Value('i', 0) | |
936 except: | |
937 if not IS_WINDOWS: | |
938 message += """ | |
939 Please ensure that you have write access to both /dev/shm and /run/shm. | |
940 """ | |
941 raise # We'll handle this in one place below. | |
942 | |
943 # Manager objects and Windows are generally a pain to work with, so try it | |
944 # out as a sanity check. This definitely works on some versions of Windows, | |
945 # but it's certainly possible that there is some unknown configuration for | |
946 # which it won't. | |
947 multiprocessing.Manager() | |
948 | |
949 # Check that the max number of open files is reasonable. Always check this | |
950 # after we're sure that the basic multiprocessing functionality is | |
951 # available, since this won't matter unless that's true. | |
952 limit = -1 | |
953 if HAS_RESOURCE_MODULE: | |
954 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix | |
955 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the | |
956 # "resource" module is not guaranteed to know about these names. | |
957 try: | |
958 limit = max(limit, | |
959 _IncreaseSoftLimitForResource( | |
960 resource.RLIMIT_NOFILE, | |
961 MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) | |
962 except AttributeError: | |
963 pass | |
964 try: | |
965 limit = max(limit, | |
966 _IncreaseSoftLimitForResource( | |
967 resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT)) | |
968 except AttributeError: | |
969 pass | |
970 | |
971 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS: | |
972 message += (""" | |
973 Your max number of open files, %s, is too low to allow safe multiprocessing. | |
974 On Linux you can fix this by adding something like "ulimit -n 10000" to your | |
975 ~/.bashrc or equivalent file and opening a new terminal. | |
976 | |
977 On MacOS, you may also need to run a command like this once (in addition to the | |
978 above instructions), which might require a restart of your system to take | |
979 effect: | |
980 launchctl limit maxfiles 10000 | |
981 | |
982 Alternatively, edit /etc/launchd.conf with something like: | |
983 limit maxfiles 10000 10000 | |
984 | |
985 """ % limit) | |
986 raise Exception('Max number of open files, %s, is too low.' % limit) | |
987 except: # pylint: disable=bare-except | |
988 stack_trace = traceback.format_exc() | |
989 multiprocessing_is_available = False | |
990 if logger is not None: | |
991 logger.debug(stack_trace) | |
992 logger.warn(message) | |
993 | |
994 # Set the cached values so that we never need to do this check again. | |
995 cached_multiprocessing_is_available = multiprocessing_is_available | |
996 cached_multiprocessing_check_stack_trace = stack_trace | |
997 cached_multiprocessing_is_available_message = message | |
998 return (multiprocessing_is_available, stack_trace) | |
999 | |
1000 | |
1001 def CreateLock(): | |
1002 """Returns either a multiprocessing lock or a threading lock. | |
1003 | |
1004 Use Multiprocessing lock iff we have access to the parts of the | |
1005 multiprocessing module that are necessary to enable parallelism in operations. | |
1006 | |
1007 Returns: | |
1008 Multiprocessing or threading lock. | |
1009 """ | |
1010 if MultiprocessingIsAvailable()[0]: | |
1011 return manager.Lock() | |
1012 else: | |
1013 return threading.Lock() | |
1014 | |
1015 | |
1016 def IsCloudSubdirPlaceholder(url, blr=None): | |
1017 """Determines if URL is a cloud subdir placeholder. | |
1018 | |
1019 This function is needed because GUI tools (like the GCS cloud console) allow | |
1020 users to create empty "folders" by creating a placeholder object; and parts | |
1021 of gsutil need to treat those placeholder objects specially. For example, | |
1022 gsutil rsync needs to avoid downloading those objects because they can cause | |
1023 conflicts (see comments in rsync command for details). | |
1024 | |
1025 We currently detect two cases: | |
1026 - Cloud objects whose name ends with '_$folder$' | |
1027 - Cloud objects whose name ends with '/' | |
1028 | |
1029 Args: | |
1030 url: The URL to be checked. | |
1031 blr: BucketListingRef to check, or None if not available. | |
1032 If None, size won't be checked. | |
1033 | |
1034 Returns: | |
1035 True/False. | |
1036 """ | |
1037 if not url.IsCloudUrl(): | |
1038 return False | |
1039 url_str = url.url_string | |
1040 if url_str.endswith('_$folder$'): | |
1041 return True | |
1042 if blr and blr.IsObject(): | |
1043 size = blr.root_object.size | |
1044 else: | |
1045 size = 0 | |
1046 return size == 0 and url_str.endswith('/') | |
1047 | |
1048 | |
1049 def GetTermLines(): | |
1050 """Returns number of terminal lines.""" | |
1051 # fcntl isn't supported in Windows. | |
1052 try: | |
1053 import fcntl # pylint: disable=g-import-not-at-top | |
1054 import termios # pylint: disable=g-import-not-at-top | |
1055 except ImportError: | |
1056 return _DEFAULT_LINES | |
1057 def ioctl_GWINSZ(fd): # pylint: disable=invalid-name | |
1058 try: | |
1059 return struct.unpack( | |
1060 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))[0] | |
1061 except: # pylint: disable=bare-except | |
1062 return 0 # Failure (so will retry on different file descriptor below). | |
1063 # Try to find a valid number of lines from termio for stdin, stdout, | |
1064 # or stderr, in that order. | |
1065 ioc = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2) | |
1066 if not ioc: | |
1067 try: | |
1068 fd = os.open(os.ctermid(), os.O_RDONLY) | |
1069 ioc = ioctl_GWINSZ(fd) | |
1070 os.close(fd) | |
1071 except: # pylint: disable=bare-except | |
1072 pass | |
1073 if not ioc: | |
1074 ioc = os.environ.get('LINES', _DEFAULT_LINES) | |
1075 return int(ioc) | |
1076 | |
1077 | |
1078 class GsutilStreamHandler(logging.StreamHandler): | |
1079 """A subclass of StreamHandler for use in gsutil.""" | |
1080 | |
1081 def flush(self): | |
1082 # Note: we override the flush method here due to a python 2.6 bug. The | |
1083 # python logging module will try to flush all stream handlers at exit. | |
1084 # If the StreamHandler is pointing to a file that is already closed, the | |
1085 # method throws an exception. Our unit tests temporarily redirect stderr, | |
1086 # which causes the default StreamHandler to open its stream against a | |
1087 # temporary file. By the time the process shuts down, the underlying file | |
1088 # is closed, causing an exception. This was fixed in Python 2.7, but to | |
1089 # remove the flake from Python 2.6, we maintain this here. | |
1090 try: | |
1091 logging.StreamHandler.flush(self) | |
1092 except ValueError: | |
1093 pass | |
1094 | |
1095 | |
1096 def StdinIterator(): | |
1097 """A generator function that returns lines from stdin.""" | |
1098 for line in sys.stdin: | |
1099 # Strip CRLF. | |
1100 yield line.rstrip() | |
OLD | NEW |