Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(357)

Side by Side Diff: third_party/gsutil/gslib/util.py

Issue 1377933002: [catapult] - Copy Telemetry's gsutilz over to third_party. (Closed) Base URL: https://github.com/catapult-project/catapult.git@master
Patch Set: Rename to gsutil. Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # -*- coding: utf-8 -*-
2 # Copyright 2010 Google Inc. All Rights Reserved.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Static data and helper functions."""
16
17 from __future__ import absolute_import
18
19 import errno
20 import logging
21 import math
22 import multiprocessing
23 import os
24 import pkgutil
25 import re
26 import struct
27 import sys
28 import tempfile
29 import textwrap
30 import threading
31 import traceback
32 import xml.etree.ElementTree as ElementTree
33
34 import boto
35 from boto import config
36 import boto.auth
37 from boto.exception import NoAuthHandlerFound
38 from boto.gs.connection import GSConnection
39 from boto.provider import Provider
40 from boto.pyami.config import BotoConfigLocations
41 import httplib2
42 from oauth2client.client import HAS_CRYPTO
43 from retry_decorator import retry_decorator
44
45 import gslib
46 from gslib.exception import CommandException
47 from gslib.storage_url import StorageUrlFromString
48 from gslib.translation_helper import AclTranslation
49 from gslib.translation_helper import GenerationFromUrlAndString
50 from gslib.translation_helper import S3_ACL_MARKER_GUID
51 from gslib.translation_helper import S3_DELETE_MARKER_GUID
52 from gslib.translation_helper import S3_MARKER_GUIDS
53
54 # pylint: disable=g-import-not-at-top
55 try:
56 # This module doesn't necessarily exist on Windows.
57 import resource
58 HAS_RESOURCE_MODULE = True
59 except ImportError, e:
60 HAS_RESOURCE_MODULE = False
61
62 ONE_KIB = 1024
63 ONE_MIB = 1024 * 1024
64 TWO_MIB = 2 * ONE_MIB
65 EIGHT_MIB = 8 * ONE_MIB
66 TEN_MIB = 10 * ONE_MIB
67 DEFAULT_FILE_BUFFER_SIZE = 8 * ONE_KIB
68 _DEFAULT_LINES = 25
69
70 # By default, the timeout for SSL read errors is infinite. This could
71 # cause gsutil to hang on network disconnect, so pick a more reasonable
72 # timeout.
73 SSL_TIMEOUT = 60
74
75 # Start with a progress callback every 64 KiB during uploads/downloads (JSON
76 # API). Callback implementation should back off until it hits the maximum size
77 # so that callbacks do not create huge amounts of log output.
78 START_CALLBACK_PER_BYTES = 1024*64
79 MAX_CALLBACK_PER_BYTES = 1024*1024*100
80
81 # Upload/download files in 8 KiB chunks over the HTTP connection.
82 TRANSFER_BUFFER_SIZE = 1024*8
83
84 # Default number of progress callbacks during transfer (XML API).
85 XML_PROGRESS_CALLBACKS = 10
86
87 # For files >= this size, output a message indicating that we're running an
88 # operation on the file (like hashing or gzipping) so it does not appear to the
89 # user that the command is hanging.
90 MIN_SIZE_COMPUTE_LOGGING = 100*1024*1024 # 100 MiB
91
92 NO_MAX = sys.maxint
93
94 UTF8 = 'utf-8'
95
96 VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?')
97
98 RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt'
99
100 # Binary exponentiation strings.
101 _EXP_STRINGS = [
102 (0, 'B', 'bit'),
103 (10, 'KiB', 'Kibit', 'K'),
104 (20, 'MiB', 'Mibit', 'M'),
105 (30, 'GiB', 'Gibit', 'G'),
106 (40, 'TiB', 'Tibit', 'T'),
107 (50, 'PiB', 'Pibit', 'P'),
108 (60, 'EiB', 'Eibit', 'E'),
109 ]
110
111
112 global manager # pylint: disable=global-at-module-level
113 certs_file_lock = threading.Lock()
114 configured_certs_files = []
115
116
117 def InitializeMultiprocessingVariables():
118 """Perform necessary initialization for multiprocessing.
119
120 See gslib.command.InitializeMultiprocessingVariables for an explanation
121 of why this is necessary.
122 """
123 global manager # pylint: disable=global-variable-undefined
124 manager = multiprocessing.Manager()
125
126
127 def _GenerateSuffixRegex():
128 """Creates a suffix regex for human-readable byte counts."""
129 human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?'
130 suffixes = []
131 suffix_to_si = {}
132 for i, si in enumerate(_EXP_STRINGS):
133 si_suffixes = [s.lower() for s in list(si)[1:]]
134 for suffix in si_suffixes:
135 suffix_to_si[suffix] = i
136 suffixes.extend(si_suffixes)
137 human_bytes_re %= '|'.join(suffixes)
138 matcher = re.compile(human_bytes_re)
139 return suffix_to_si, matcher
140
141 SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex()
142
143 SECONDS_PER_DAY = 3600 * 24
144
145 # Detect platform types.
146 PLATFORM = str(sys.platform).lower()
147 IS_WINDOWS = 'win32' in PLATFORM
148 IS_CYGWIN = 'cygwin' in PLATFORM
149 IS_LINUX = 'linux' in PLATFORM
150 IS_OSX = 'darwin' in PLATFORM
151
152 # On Unix-like systems, we will set the maximum number of open files to avoid
153 # hitting the limit imposed by the OS. This number was obtained experimentally.
154 MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000
155
156 GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz'
157
158 Retry = retry_decorator.retry # pylint: disable=invalid-name
159
160 # Cache the values from this check such that they're available to all callers
161 # without needing to run all the checks again (some of these, such as calling
162 # multiprocessing.Manager(), are expensive operations).
163 cached_multiprocessing_is_available = None
164 cached_multiprocessing_is_available_stack_trace = None
165 cached_multiprocessing_is_available_message = None
166
167
168 # Enum class for specifying listing style.
169 class ListingStyle(object):
170 SHORT = 'SHORT'
171 LONG = 'LONG'
172 LONG_LONG = 'LONG_LONG'
173
174
175 def UsingCrcmodExtension(crcmod):
176 return (getattr(crcmod, 'crcmod', None) and
177 getattr(crcmod.crcmod, '_usingExtension', None))
178
179
180 def CreateDirIfNeeded(dir_path, mode=0777):
181 """Creates a directory, suppressing already-exists errors."""
182 if not os.path.exists(dir_path):
183 try:
184 # Unfortunately, even though we catch and ignore EEXIST, this call will
185 # output a (needless) error message (no way to avoid that in Python).
186 os.makedirs(dir_path, mode)
187 # Ignore 'already exists' in case user tried to start up several
188 # resumable uploads concurrently from a machine where no tracker dir had
189 # yet been created.
190 except OSError as e:
191 if e.errno != errno.EEXIST:
192 raise
193
194
195 def GetGsutilStateDir():
196 """Returns the location of the directory for gsutil state files.
197
198 Certain operations, such as cross-process credential sharing and
199 resumable transfer tracking, need a known location for state files which
200 are created by gsutil as-needed.
201
202 This location should only be used for storing data that is required to be in
203 a static location.
204
205 Returns:
206 Path to directory for gsutil static state files.
207 """
208 config_file_dir = config.get(
209 'GSUtil', 'state_dir',
210 os.path.expanduser(os.path.join('~', '.gsutil')))
211 CreateDirIfNeeded(config_file_dir)
212 return config_file_dir
213
214
215 def GetCredentialStoreFilename():
216 return os.path.join(GetGsutilStateDir(), 'credstore')
217
218
219 def GetGceCredentialCacheFilename():
220 return os.path.join(GetGsutilStateDir(), 'gcecredcache')
221
222
223 def GetTabCompletionLogFilename():
224 return os.path.join(GetGsutilStateDir(), 'tab-completion-logs')
225
226
227 def GetTabCompletionCacheFilename():
228 tab_completion_dir = os.path.join(GetGsutilStateDir(), 'tab-completion')
229 # Limit read permissions on the directory to owner for privacy.
230 CreateDirIfNeeded(tab_completion_dir, mode=0700)
231 return os.path.join(tab_completion_dir, 'cache')
232
233
234 def PrintTrackerDirDeprecationWarningIfNeeded():
235 # TODO: Remove this along with the tracker_dir config value 1 year after
236 # 4.6 release date. Use state_dir instead.
237 if config.has_option('GSUtil', 'resumable_tracker_dir'):
238 sys.stderr.write('Warning: you have set resumable_tracker_dir in your '
239 '.boto configuration file. This configuration option is '
240 'deprecated; please use the state_dir configuration '
241 'option instead.\n')
242
243
244 # Name of file where we keep the timestamp for the last time we checked whether
245 # a new version of gsutil is available.
246 PrintTrackerDirDeprecationWarningIfNeeded()
247 CreateDirIfNeeded(GetGsutilStateDir())
248 LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = (
249 os.path.join(GetGsutilStateDir(), '.last_software_update_check'))
250
251
252 def HasConfiguredCredentials():
253 """Determines if boto credential/config file exists."""
254 has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and
255 config.has_option('Credentials', 'gs_secret_access_key'))
256 has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and
257 config.has_option('Credentials', 'aws_secret_access_key'))
258 has_oauth_creds = (
259 config.has_option('Credentials', 'gs_oauth2_refresh_token'))
260 has_service_account_creds = (
261 HAS_CRYPTO and
262 config.has_option('Credentials', 'gs_service_client_id') and
263 config.has_option('Credentials', 'gs_service_key_file'))
264
265 if (has_goog_creds or has_amzn_creds or has_oauth_creds or
266 has_service_account_creds):
267 return True
268
269 valid_auth_handler = None
270 try:
271 valid_auth_handler = boto.auth.get_auth_handler(
272 GSConnection.DefaultHost, config, Provider('google'),
273 requested_capability=['s3'])
274 # Exclude the no-op auth handler as indicating credentials are configured.
275 # Note we can't use isinstance() here because the no-op module may not be
276 # imported so we can't get a reference to the class type.
277 if getattr(getattr(valid_auth_handler, '__class__', None),
278 '__name__', None) == 'NoOpAuth':
279 valid_auth_handler = None
280 except NoAuthHandlerFound:
281 pass
282
283 return valid_auth_handler
284
285
286 def ConfigureNoOpAuthIfNeeded():
287 """Sets up no-op auth handler if no boto credentials are configured."""
288 if not HasConfiguredCredentials():
289 if (config.has_option('Credentials', 'gs_service_client_id')
290 and not HAS_CRYPTO):
291 if os.environ.get('CLOUDSDK_WRAPPER') == '1':
292 raise CommandException('\n'.join(textwrap.wrap(
293 'Your gsutil is configured with an OAuth2 service account, but '
294 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. '
295 'Service account authentication requires one of these libraries; '
296 'please reactivate your service account via the gcloud auth '
297 'command and ensure any gcloud packages necessary for '
298 'service accounts are present.')))
299 else:
300 raise CommandException('\n'.join(textwrap.wrap(
301 'Your gsutil is configured with an OAuth2 service account, but '
302 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. '
303 'Service account authentication requires one of these libraries; '
304 'please install either of them to proceed, or configure a '
305 'different type of credentials with "gsutil config".')))
306 else:
307 # With no boto config file the user can still access publicly readable
308 # buckets and objects.
309 from gslib import no_op_auth_plugin # pylint: disable=unused-variable
310
311
312 def GetConfigFilePath():
313 config_path = 'no config found'
314 for path in BotoConfigLocations:
315 try:
316 with open(path, 'r'):
317 config_path = path
318 break
319 except IOError:
320 pass
321 return config_path
322
323
324 def GetBotoConfigFileList():
325 """Returns list of boto config files that exist."""
326 config_paths = boto.pyami.config.BotoConfigLocations
327 if 'AWS_CREDENTIAL_FILE' in os.environ:
328 config_paths.append(os.environ['AWS_CREDENTIAL_FILE'])
329 config_files = {}
330 for config_path in config_paths:
331 if os.path.exists(config_path):
332 config_files[config_path] = 1
333 cf_list = []
334 for config_file in config_files:
335 cf_list.append(config_file)
336 return cf_list
337
338
339 def GetCertsFile():
340 """Configures and returns the CA Certificates file.
341
342 If one is already configured, use it. Otherwise, amend the configuration
343 (in boto.config) to use the cert roots distributed with gsutil.
344
345 Returns:
346 string filename of the certs file to use.
347 """
348 certs_file = boto.config.get('Boto', 'ca_certificates_file', None)
349 if not certs_file:
350 with certs_file_lock:
351 if configured_certs_files:
352 disk_certs_file = configured_certs_files[0]
353 else:
354 disk_certs_file = os.path.abspath(
355 os.path.join(gslib.GSLIB_DIR, 'data', 'cacerts.txt'))
356 if not os.path.exists(disk_certs_file):
357 # If the file is not present on disk, this means the gslib module
358 # doesn't actually exist on disk anywhere. This can happen if it's
359 # being imported from a zip file. Unfortunately, we have to copy the
360 # certs file to a local temp file on disk because the underlying SSL
361 # socket requires it to be a filesystem path.
362 certs_data = pkgutil.get_data('gslib', 'data/cacerts.txt')
363 if not certs_data:
364 raise CommandException('Certificates file not found. Please '
365 'reinstall gsutil from scratch')
366 fd, fname = tempfile.mkstemp(suffix='.txt', prefix='gsutil-cacerts')
367 f = os.fdopen(fd, 'w')
368 f.write(certs_data)
369 f.close()
370 configured_certs_files.append(fname)
371 disk_certs_file = fname
372 certs_file = disk_certs_file
373 return certs_file
374
375
376 def GetCleanupFiles():
377 """Returns a list of temp files to delete (if possible) when program exits."""
378 cleanup_files = []
379 if configured_certs_files:
380 cleanup_files += configured_certs_files
381 return cleanup_files
382
383
384 def ProxyInfoFromEnvironmentVar(proxy_env_var):
385 """Reads proxy info from the environment and converts to httplib2.ProxyInfo.
386
387 Args:
388 proxy_env_var: Environment variable string to read, such as http_proxy or
389 https_proxy.
390
391 Returns:
392 httplib2.ProxyInfo constructed from the environment string.
393 """
394 proxy_url = os.environ.get(proxy_env_var)
395 if not proxy_url or not proxy_env_var.lower().startswith('http'):
396 return httplib2.ProxyInfo(httplib2.socks.PROXY_TYPE_HTTP, None, 0)
397 proxy_protocol = proxy_env_var.lower().split('_')[0]
398 if not proxy_url.lower().startswith('http'):
399 # proxy_info_from_url requires a protocol, which is always http or https.
400 proxy_url = proxy_protocol + '://' + proxy_url
401 return httplib2.proxy_info_from_url(proxy_url, method=proxy_protocol)
402
403
404 def GetNewHttp(http_class=httplib2.Http, **kwargs):
405 """Creates and returns a new httplib2.Http instance.
406
407 Args:
408 http_class: Optional custom Http class to use.
409 **kwargs: Arguments to pass to http_class constructor.
410
411 Returns:
412 An initialized httplib2.Http instance.
413 """
414 proxy_info = httplib2.ProxyInfo(
415 proxy_type=3,
416 proxy_host=boto.config.get('Boto', 'proxy', None),
417 proxy_port=boto.config.getint('Boto', 'proxy_port', 0),
418 proxy_user=boto.config.get('Boto', 'proxy_user', None),
419 proxy_pass=boto.config.get('Boto', 'proxy_pass', None),
420 proxy_rdns=boto.config.get('Boto', 'proxy_rdns', False))
421
422 if not (proxy_info.proxy_host and proxy_info.proxy_port):
423 # Fall back to using the environment variable.
424 for proxy_env_var in ['http_proxy', 'https_proxy', 'HTTPS_PROXY']:
425 if proxy_env_var in os.environ and os.environ[proxy_env_var]:
426 proxy_info = ProxyInfoFromEnvironmentVar(proxy_env_var)
427 # Assume proxy_rnds is True if a proxy environment variable exists.
428 proxy_info.proxy_rdns = boto.config.get('Boto', 'proxy_rdns', True)
429 break
430
431 # Some installers don't package a certs file with httplib2, so use the
432 # one included with gsutil.
433 kwargs['ca_certs'] = GetCertsFile()
434 # Use a non-infinite SSL timeout to avoid hangs during network flakiness.
435 kwargs['timeout'] = SSL_TIMEOUT
436 http = http_class(proxy_info=proxy_info, **kwargs)
437 http.disable_ssl_certificate_validation = (not config.getbool(
438 'Boto', 'https_validate_certificates'))
439 return http
440
441
442 def GetNumRetries():
443 return config.getint('Boto', 'num_retries', 6)
444
445
446 def GetMaxRetryDelay():
447 return config.getint('Boto', 'max_retry_delay', 60)
448
449
450 # Resumable downloads and uploads make one HTTP call per chunk (and must be
451 # in multiples of 256KiB). Overridable for testing.
452 def GetJsonResumableChunkSize():
453 chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size',
454 1024*1024*100L)
455 if chunk_size == 0:
456 chunk_size = 1024*256L
457 elif chunk_size % 1024*256L != 0:
458 chunk_size += (1024*256L - (chunk_size % (1024*256L)))
459 return chunk_size
460
461
462 def _RoundToNearestExponent(num):
463 i = 0
464 while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]):
465 i += 1
466 return i, round(float(num) / 2 ** _EXP_STRINGS[i][0], 2)
467
468
469 def MakeHumanReadable(num):
470 """Generates human readable string for a number of bytes.
471
472 Args:
473 num: The number, in bytes.
474
475 Returns:
476 A string form of the number using size abbreviations (KiB, MiB, etc.).
477 """
478 i, rounded_val = _RoundToNearestExponent(num)
479 return '%g %s' % (rounded_val, _EXP_STRINGS[i][1])
480
481
482 def MakeBitsHumanReadable(num):
483 """Generates human readable string for a number of bits.
484
485 Args:
486 num: The number, in bits.
487
488 Returns:
489 A string form of the number using bit size abbreviations (kbit, Mbit, etc.)
490 """
491 i, rounded_val = _RoundToNearestExponent(num)
492 return '%g %s' % (rounded_val, _EXP_STRINGS[i][2])
493
494
495 def HumanReadableToBytes(human_string):
496 """Tries to convert a human-readable string to a number of bytes.
497
498 Args:
499 human_string: A string supplied by user, e.g. '1M', '3 GiB'.
500 Returns:
501 An integer containing the number of bytes.
502 Raises:
503 ValueError: on an invalid string.
504 """
505 human_string = human_string.lower()
506 m = MATCH_HUMAN_BYTES.match(human_string)
507 if m:
508 num = float(m.group('num'))
509 if m.group('suffix'):
510 power = _EXP_STRINGS[SUFFIX_TO_SI[m.group('suffix')]][0]
511 num *= (2.0 ** power)
512 num = int(round(num))
513 return num
514 raise ValueError('Invalid byte string specified: %s' % human_string)
515
516
517 def Percentile(values, percent, key=lambda x: x):
518 """Find the percentile of a list of values.
519
520 Taken from: http://code.activestate.com/recipes/511478/
521
522 Args:
523 values: a list of numeric values. Note that the values MUST BE already
524 sorted.
525 percent: a float value from 0.0 to 1.0.
526 key: optional key function to compute value from each element of the list
527 of values.
528
529 Returns:
530 The percentile of the values.
531 """
532 if not values:
533 return None
534 k = (len(values) - 1) * percent
535 f = math.floor(k)
536 c = math.ceil(k)
537 if f == c:
538 return key(values[int(k)])
539 d0 = key(values[int(f)]) * (c-k)
540 d1 = key(values[int(c)]) * (k-f)
541 return d0 + d1
542
543
544 def RemoveCRLFFromString(input_str):
545 """Returns the input string with all \\n and \\r removed."""
546 return re.sub(r'[\r\n]', '', input_str)
547
548
549 def UnaryDictToXml(message):
550 """Generates XML representation of a nested dict.
551
552 This dict contains exactly one top-level entry and an arbitrary number of
553 2nd-level entries, e.g. capturing a WebsiteConfiguration message.
554
555 Args:
556 message: The dict encoding the message.
557
558 Returns:
559 XML string representation of the input dict.
560
561 Raises:
562 Exception: if dict contains more than one top-level entry.
563 """
564 if len(message) != 1:
565 raise Exception('Expected dict of size 1, got size %d' % len(message))
566
567 name, content = message.items()[0]
568 element_type = ElementTree.Element(name)
569 for element_property, value in sorted(content.items()):
570 node = ElementTree.SubElement(element_type, element_property)
571 node.text = value
572 return ElementTree.tostring(element_type)
573
574
575 def LookUpGsutilVersion(gsutil_api, url_str):
576 """Looks up the gsutil version of the specified gsutil tarball URL.
577
578 Version is specified in the metadata field set on that object.
579
580 Args:
581 gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball.
582 url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz').
583
584 Returns:
585 Version string if URL is a cloud URL containing x-goog-meta-gsutil-version
586 metadata, else None.
587 """
588 url = StorageUrlFromString(url_str)
589 if url.IsCloudUrl():
590 obj = gsutil_api.GetObjectMetadata(url.bucket_name, url.object_name,
591 provider=url.scheme,
592 fields=['metadata'])
593 if obj.metadata and obj.metadata.additionalProperties:
594 for prop in obj.metadata.additionalProperties:
595 if prop.key == 'gsutil_version':
596 return prop.value
597
598
599 def GetGsutilVersionModifiedTime():
600 """Returns unix timestamp of when the VERSION file was last modified."""
601 if not gslib.VERSION_FILE:
602 return 0
603 return int(os.path.getmtime(gslib.VERSION_FILE))
604
605
606 def IsRunningInteractively():
607 """Returns True if currently running interactively on a TTY."""
608 return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty()
609
610
611 def _HttpsValidateCertifcatesEnabled():
612 return config.get('Boto', 'https_validate_certificates', True)
613
614 CERTIFICATE_VALIDATION_ENABLED = _HttpsValidateCertifcatesEnabled()
615
616
617 def _BotoIsSecure():
618 return config.get('Boto', 'is_secure', True)
619
620 BOTO_IS_SECURE = _BotoIsSecure()
621
622
623 def ResumableThreshold():
624 return config.getint('GSUtil', 'resumable_threshold', EIGHT_MIB)
625
626
627 def AddAcceptEncoding(headers):
628 """Adds accept-encoding:gzip to the dictionary of headers."""
629 # If Accept-Encoding is not already set, set it to enable gzip.
630 if 'accept-encoding' not in headers:
631 headers['accept-encoding'] = 'gzip'
632
633
634 # pylint: disable=too-many-statements
635 def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True):
636 """Print full info for given object (like what displays for gsutil ls -L).
637
638 Args:
639 bucket_listing_ref: BucketListingRef being listed.
640 Must have ref_type OBJECT and a populated root_object
641 with the desired fields.
642 incl_acl: True if ACL info should be output.
643
644 Returns:
645 Tuple (number of objects, object_length)
646
647 Raises:
648 Exception: if calling bug encountered.
649 """
650 url_str = bucket_listing_ref.url_string
651 storage_url = StorageUrlFromString(url_str)
652 obj = bucket_listing_ref.root_object
653
654 if (obj.metadata and S3_DELETE_MARKER_GUID in
655 obj.metadata.additionalProperties):
656 num_bytes = 0
657 num_objs = 0
658 url_str += '<DeleteMarker>'
659 else:
660 num_bytes = obj.size
661 num_objs = 1
662
663 print '%s:' % url_str.encode(UTF8)
664 if obj.updated:
665 print '\tCreation time:\t\t%s' % obj.updated.strftime(
666 '%a, %d %b %Y %H:%M:%S GMT')
667 if obj.cacheControl:
668 print '\tCache-Control:\t\t%s' % obj.cacheControl
669 if obj.contentDisposition:
670 print '\tContent-Disposition:\t\t%s' % obj.contentDisposition
671 if obj.contentEncoding:
672 print '\tContent-Encoding:\t\t%s' % obj.contentEncoding
673 if obj.contentLanguage:
674 print '\tContent-Language:\t%s' % obj.contentLanguage
675 print '\tContent-Length:\t\t%s' % obj.size
676 print '\tContent-Type:\t\t%s' % obj.contentType
677 if obj.componentCount:
678 print '\tComponent-Count:\t%d' % obj.componentCount
679 marker_props = {}
680 if obj.metadata and obj.metadata.additionalProperties:
681 non_marker_props = []
682 for add_prop in obj.metadata.additionalProperties:
683 if add_prop.key not in S3_MARKER_GUIDS:
684 non_marker_props.append(add_prop)
685 else:
686 marker_props[add_prop.key] = add_prop.value
687 if non_marker_props:
688 print '\tMetadata:'
689 for ap in non_marker_props:
690 meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value)
691 print meta_string.encode(UTF8)
692 if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c
693 if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash
694 print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')
695 if obj.generation:
696 generation_str = GenerationFromUrlAndString(storage_url, obj.generation)
697 print '\tGeneration:\t\t%s' % generation_str
698 if obj.metageneration:
699 print '\tMetageneration:\t\t%s' % obj.metageneration
700 if incl_acl:
701 # JSON API won't return acls as part of the response unless we have
702 # full control scope
703 if obj.acl:
704 print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl)
705 elif S3_ACL_MARKER_GUID in marker_props:
706 print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID]
707 else:
708 print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER '
709 'permission\n\t\t\t\ton the object to read its ACL.')
710
711 return (num_objs, num_bytes)
712
713
714 def CompareVersions(first, second):
715 """Compares the first and second gsutil version strings.
716
717 For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33.
718 Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes
719 (e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as
720 less than its non-suffix counterpart (e.g. 3.32 > 3.32pre).
721
722 Args:
723 first: First gsutil version string.
724 second: Second gsutil version string.
725
726 Returns:
727 (g, m):
728 g is True if first known to be greater than second, else False.
729 m is True if first known to be greater by at least 1 major version,
730 else False.
731 """
732 m1 = VERSION_MATCHER.match(str(first))
733 m2 = VERSION_MATCHER.match(str(second))
734
735 # If passed strings we don't know how to handle, be conservative.
736 if not m1 or not m2:
737 return (False, False)
738
739 major_ver1 = int(m1.group('maj'))
740 minor_ver1 = int(m1.group('min')) if m1.group('min') else 0
741 suffix_ver1 = m1.group('suffix')
742 major_ver2 = int(m2.group('maj'))
743 minor_ver2 = int(m2.group('min')) if m2.group('min') else 0
744 suffix_ver2 = m2.group('suffix')
745
746 if major_ver1 > major_ver2:
747 return (True, True)
748 elif major_ver1 == major_ver2:
749 if minor_ver1 > minor_ver2:
750 return (True, False)
751 elif minor_ver1 == minor_ver2:
752 return (bool(suffix_ver2) and not suffix_ver1, False)
753 return (False, False)
754
755
756 def _IncreaseSoftLimitForResource(resource_name, fallback_value):
757 """Sets a new soft limit for the maximum number of open files.
758
759 The soft limit is used for this process (and its children), but the
760 hard limit is set by the system and cannot be exceeded.
761
762 We will first try to set the soft limit to the hard limit's value; if that
763 fails, we will try to set the soft limit to the fallback_value iff this would
764 increase the soft limit.
765
766 Args:
767 resource_name: Name of the resource to increase the soft limit for.
768 fallback_value: Fallback value to be used if we couldn't set the
769 soft value to the hard value (e.g., if the hard value
770 is "unlimited").
771
772 Returns:
773 Current soft limit for the resource (after any changes we were able to
774 make), or -1 if the resource doesn't exist.
775 """
776
777 # Get the value of the resource.
778 try:
779 (soft_limit, hard_limit) = resource.getrlimit(resource_name)
780 except (resource.error, ValueError):
781 # The resource wasn't present, so we can't do anything here.
782 return -1
783
784 # Try to set the value of the soft limit to the value of the hard limit.
785 if hard_limit > soft_limit: # Some OS's report 0 for "unlimited".
786 try:
787 resource.setrlimit(resource_name, (hard_limit, hard_limit))
788 return hard_limit
789 except (resource.error, ValueError):
790 # We'll ignore this and try the fallback value.
791 pass
792
793 # Try to set the value of the soft limit to the fallback value.
794 if soft_limit < fallback_value:
795 try:
796 resource.setrlimit(resource_name, (fallback_value, hard_limit))
797 return fallback_value
798 except (resource.error, ValueError):
799 # We couldn't change the soft limit, so just report the current
800 # value of the soft limit.
801 return soft_limit
802 else:
803 return soft_limit
804
805
806 def GetCloudApiInstance(cls, thread_state=None):
807 """Gets a gsutil Cloud API instance.
808
809 Since Cloud API implementations are not guaranteed to be thread-safe, each
810 thread needs its own instance. These instances are passed to each thread
811 via the thread pool logic in command.
812
813 Args:
814 cls: Command class to be used for single-threaded case.
815 thread_state: Per thread state from this thread containing a gsutil
816 Cloud API instance.
817
818 Returns:
819 gsutil Cloud API instance.
820 """
821 return thread_state or cls.gsutil_api
822
823
824 def GetFileSize(fp, position_to_eof=False):
825 """Returns size of file, optionally leaving fp positioned at EOF."""
826 if not position_to_eof:
827 cur_pos = fp.tell()
828 fp.seek(0, os.SEEK_END)
829 cur_file_size = fp.tell()
830 if not position_to_eof:
831 fp.seek(cur_pos)
832 return cur_file_size
833
834
835 def GetStreamFromFileUrl(storage_url):
836 if storage_url.IsStream():
837 return sys.stdin
838 else:
839 return open(storage_url.object_name, 'rb')
840
841
842 def UrlsAreForSingleProvider(url_args):
843 """Tests whether the URLs are all for a single provider.
844
845 Args:
846 url_args: Strings to check.
847
848 Returns:
849 True if URLs are for single provider, False otherwise.
850 """
851 provider = None
852 url = None
853 for url_str in url_args:
854 url = StorageUrlFromString(url_str)
855 if not provider:
856 provider = url.scheme
857 elif url.scheme != provider:
858 return False
859 return provider is not None
860
861
862 def HaveFileUrls(args_to_check):
863 """Checks whether args_to_check contain any file URLs.
864
865 Args:
866 args_to_check: Command-line argument subset to check.
867
868 Returns:
869 True if args_to_check contains any file URLs.
870 """
871 for url_str in args_to_check:
872 storage_url = StorageUrlFromString(url_str)
873 if storage_url.IsFileUrl():
874 return True
875 return False
876
877
878 def HaveProviderUrls(args_to_check):
879 """Checks whether args_to_check contains any provider URLs (like 'gs://').
880
881 Args:
882 args_to_check: Command-line argument subset to check.
883
884 Returns:
885 True if args_to_check contains any provider URLs.
886 """
887 for url_str in args_to_check:
888 storage_url = StorageUrlFromString(url_str)
889 if storage_url.IsCloudUrl() and storage_url.IsProvider():
890 return True
891 return False
892
893
894 def MultiprocessingIsAvailable(logger=None):
895 """Checks if multiprocessing is available.
896
897 There are some environments in which there is no way to use multiprocessing
898 logic that's built into Python (e.g., if /dev/shm is not available, then
899 we can't create semaphores). This simply tries out a few things that will be
900 needed to make sure the environment can support the pieces of the
901 multiprocessing module that we need.
902
903 Args:
904 logger: logging.logger to use for debug output.
905
906 Returns:
907 (multiprocessing_is_available, stack_trace):
908 multiprocessing_is_available: True iff the multiprocessing module is
909 available for use.
910 stack_trace: The stack trace generated by the call we tried that failed.
911 """
912 # pylint: disable=global-variable-undefined
913 global cached_multiprocessing_is_available
914 global cached_multiprocessing_check_stack_trace
915 global cached_multiprocessing_is_available_message
916 if cached_multiprocessing_is_available is not None:
917 if logger:
918 logger.debug(cached_multiprocessing_check_stack_trace)
919 logger.warn(cached_multiprocessing_is_available_message)
920 return (cached_multiprocessing_is_available,
921 cached_multiprocessing_check_stack_trace)
922
923 stack_trace = None
924 multiprocessing_is_available = True
925 message = """
926 You have requested multiple threads or processes for an operation, but the
927 required functionality of Python\'s multiprocessing module is not available.
928 Your operations will be performed sequentially, and any requests for
929 parallelism will be ignored.
930 """
931 try:
932 # Fails if /dev/shm (or some equivalent thereof) is not available for use
933 # (e.g., there's no implementation, or we can't write to it, etc.).
934 try:
935 multiprocessing.Value('i', 0)
936 except:
937 if not IS_WINDOWS:
938 message += """
939 Please ensure that you have write access to both /dev/shm and /run/shm.
940 """
941 raise # We'll handle this in one place below.
942
943 # Manager objects and Windows are generally a pain to work with, so try it
944 # out as a sanity check. This definitely works on some versions of Windows,
945 # but it's certainly possible that there is some unknown configuration for
946 # which it won't.
947 multiprocessing.Manager()
948
949 # Check that the max number of open files is reasonable. Always check this
950 # after we're sure that the basic multiprocessing functionality is
951 # available, since this won't matter unless that's true.
952 limit = -1
953 if HAS_RESOURCE_MODULE:
954 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix
955 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the
956 # "resource" module is not guaranteed to know about these names.
957 try:
958 limit = max(limit,
959 _IncreaseSoftLimitForResource(
960 resource.RLIMIT_NOFILE,
961 MIN_ACCEPTABLE_OPEN_FILES_LIMIT))
962 except AttributeError:
963 pass
964 try:
965 limit = max(limit,
966 _IncreaseSoftLimitForResource(
967 resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT))
968 except AttributeError:
969 pass
970
971 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS:
972 message += ("""
973 Your max number of open files, %s, is too low to allow safe multiprocessing.
974 On Linux you can fix this by adding something like "ulimit -n 10000" to your
975 ~/.bashrc or equivalent file and opening a new terminal.
976
977 On MacOS, you may also need to run a command like this once (in addition to the
978 above instructions), which might require a restart of your system to take
979 effect:
980 launchctl limit maxfiles 10000
981
982 Alternatively, edit /etc/launchd.conf with something like:
983 limit maxfiles 10000 10000
984
985 """ % limit)
986 raise Exception('Max number of open files, %s, is too low.' % limit)
987 except: # pylint: disable=bare-except
988 stack_trace = traceback.format_exc()
989 multiprocessing_is_available = False
990 if logger is not None:
991 logger.debug(stack_trace)
992 logger.warn(message)
993
994 # Set the cached values so that we never need to do this check again.
995 cached_multiprocessing_is_available = multiprocessing_is_available
996 cached_multiprocessing_check_stack_trace = stack_trace
997 cached_multiprocessing_is_available_message = message
998 return (multiprocessing_is_available, stack_trace)
999
1000
1001 def CreateLock():
1002 """Returns either a multiprocessing lock or a threading lock.
1003
1004 Use Multiprocessing lock iff we have access to the parts of the
1005 multiprocessing module that are necessary to enable parallelism in operations.
1006
1007 Returns:
1008 Multiprocessing or threading lock.
1009 """
1010 if MultiprocessingIsAvailable()[0]:
1011 return manager.Lock()
1012 else:
1013 return threading.Lock()
1014
1015
1016 def IsCloudSubdirPlaceholder(url, blr=None):
1017 """Determines if URL is a cloud subdir placeholder.
1018
1019 This function is needed because GUI tools (like the GCS cloud console) allow
1020 users to create empty "folders" by creating a placeholder object; and parts
1021 of gsutil need to treat those placeholder objects specially. For example,
1022 gsutil rsync needs to avoid downloading those objects because they can cause
1023 conflicts (see comments in rsync command for details).
1024
1025 We currently detect two cases:
1026 - Cloud objects whose name ends with '_$folder$'
1027 - Cloud objects whose name ends with '/'
1028
1029 Args:
1030 url: The URL to be checked.
1031 blr: BucketListingRef to check, or None if not available.
1032 If None, size won't be checked.
1033
1034 Returns:
1035 True/False.
1036 """
1037 if not url.IsCloudUrl():
1038 return False
1039 url_str = url.url_string
1040 if url_str.endswith('_$folder$'):
1041 return True
1042 if blr and blr.IsObject():
1043 size = blr.root_object.size
1044 else:
1045 size = 0
1046 return size == 0 and url_str.endswith('/')
1047
1048
1049 def GetTermLines():
1050 """Returns number of terminal lines."""
1051 # fcntl isn't supported in Windows.
1052 try:
1053 import fcntl # pylint: disable=g-import-not-at-top
1054 import termios # pylint: disable=g-import-not-at-top
1055 except ImportError:
1056 return _DEFAULT_LINES
1057 def ioctl_GWINSZ(fd): # pylint: disable=invalid-name
1058 try:
1059 return struct.unpack(
1060 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))[0]
1061 except: # pylint: disable=bare-except
1062 return 0 # Failure (so will retry on different file descriptor below).
1063 # Try to find a valid number of lines from termio for stdin, stdout,
1064 # or stderr, in that order.
1065 ioc = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
1066 if not ioc:
1067 try:
1068 fd = os.open(os.ctermid(), os.O_RDONLY)
1069 ioc = ioctl_GWINSZ(fd)
1070 os.close(fd)
1071 except: # pylint: disable=bare-except
1072 pass
1073 if not ioc:
1074 ioc = os.environ.get('LINES', _DEFAULT_LINES)
1075 return int(ioc)
1076
1077
1078 class GsutilStreamHandler(logging.StreamHandler):
1079 """A subclass of StreamHandler for use in gsutil."""
1080
1081 def flush(self):
1082 # Note: we override the flush method here due to a python 2.6 bug. The
1083 # python logging module will try to flush all stream handlers at exit.
1084 # If the StreamHandler is pointing to a file that is already closed, the
1085 # method throws an exception. Our unit tests temporarily redirect stderr,
1086 # which causes the default StreamHandler to open its stream against a
1087 # temporary file. By the time the process shuts down, the underlying file
1088 # is closed, causing an exception. This was fixed in Python 2.7, but to
1089 # remove the flake from Python 2.6, we maintain this here.
1090 try:
1091 logging.StreamHandler.flush(self)
1092 except ValueError:
1093 pass
1094
1095
1096 def StdinIterator():
1097 """A generator function that returns lines from stdin."""
1098 for line in sys.stdin:
1099 # Strip CRLF.
1100 yield line.rstrip()
OLDNEW
« no previous file with comments | « third_party/gsutil/gslib/translation_helper.py ('k') | third_party/gsutil/gslib/wildcard_iterator.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698