Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(437)

Side by Side Diff: gslib/util.py

Issue 698893003: Update checked in version of gsutil to version 4.6 (Closed) Base URL: http://dart.googlecode.com/svn/third_party/gsutil/
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « gslib/translation_helper.py ('k') | gslib/wildcard_iterator.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # -*- coding: utf-8 -*-
1 # Copyright 2010 Google Inc. All Rights Reserved. 2 # Copyright 2010 Google Inc. All Rights Reserved.
2 # 3 #
3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License. 5 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at 6 # You may obtain a copy of the License at
6 # 7 #
7 # http://www.apache.org/licenses/LICENSE-2.0 8 # http://www.apache.org/licenses/LICENSE-2.0
8 # 9 #
9 # Unless required by applicable law or agreed to in writing, software 10 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and 13 # See the License for the specific language governing permissions and
13 # limitations under the License. 14 # limitations under the License.
14
15 """Static data and helper functions.""" 15 """Static data and helper functions."""
16 16
17 import binascii 17 from __future__ import absolute_import
18 import boto 18
19 import boto.auth
20 import errno 19 import errno
21 import gslib
22 import math 20 import math
23 import multiprocessing 21 import multiprocessing
24 import os 22 import os
23 import pkgutil
25 import re 24 import re
25 import struct
26 import sys 26 import sys
27 import tempfile
27 import textwrap 28 import textwrap
28 import threading 29 import threading
29 import traceback 30 import traceback
30 import xml.etree.ElementTree as ElementTree 31 import xml.etree.ElementTree as ElementTree
31 32
33 import boto
32 from boto import config 34 from boto import config
35 import boto.auth
33 from boto.exception import NoAuthHandlerFound 36 from boto.exception import NoAuthHandlerFound
34 from boto.gs.connection import GSConnection 37 from boto.gs.connection import GSConnection
35 from boto.provider import Provider 38 from boto.provider import Provider
36 from boto.pyami.config import BotoConfigLocations 39 from boto.pyami.config import BotoConfigLocations
40 import httplib2
41 from oauth2client.client import HAS_CRYPTO
42 from retry_decorator import retry_decorator
43
44 import gslib
37 from gslib.exception import CommandException 45 from gslib.exception import CommandException
38 from retry_decorator import retry_decorator 46 from gslib.storage_url import StorageUrlFromString
39 from oauth2client.client import HAS_CRYPTO 47 from gslib.translation_helper import AclTranslation
48 from gslib.translation_helper import GenerationFromUrlAndString
49 from gslib.translation_helper import S3_ACL_MARKER_GUID
50 from gslib.translation_helper import S3_DELETE_MARKER_GUID
51 from gslib.translation_helper import S3_MARKER_GUIDS
40 52
53 # pylint: disable=g-import-not-at-top
41 try: 54 try:
42 # This module doesn't necessarily exist on Windows. 55 # This module doesn't necessarily exist on Windows.
43 import resource 56 import resource
44 HAS_RESOURCE_MODULE = True 57 HAS_RESOURCE_MODULE = True
45 except ImportError, e: 58 except ImportError, e:
46 HAS_RESOURCE_MODULE = False 59 HAS_RESOURCE_MODULE = False
47 60
61 ONE_KB = 1024
48 TWO_MB = 2 * 1024 * 1024 62 TWO_MB = 2 * 1024 * 1024
63 TEN_MB = 10 * 1024 * 1024
64 DEFAULT_FILE_BUFFER_SIZE = 8192
65 _DEFAULT_LINES = 25
66
67 # By default, the timeout for SSL read errors is infinite. This could
68 # cause gsutil to hang on network disconnect, so pick a more reasonable
69 # timeout.
70 SSL_TIMEOUT = 60
71
72 # Start with a progress callback every 64KB during uploads/downloads (JSON API).
73 # Callback implementation should back off until it hits the maximum size
74 # so that callbacks do not create huge amounts of log output.
75 START_CALLBACK_PER_BYTES = 1024*64
76 MAX_CALLBACK_PER_BYTES = 1024*1024*100
77
78 # Upload/download files in 8KB chunks over the HTTP connection.
79 TRANSFER_BUFFER_SIZE = 1024*8
80
81 # Default number of progress callbacks during transfer (XML API).
82 XML_PROGRESS_CALLBACKS = 10
83
84 # For files >= this size, output a message indicating that we're running an
85 # operation on the file (like hashing or gzipping) so it does not appear to the
86 # user that the command is hanging.
87 MIN_SIZE_COMPUTE_LOGGING = 100*1024*1024 # 100 MB
49 88
50 NO_MAX = sys.maxint 89 NO_MAX = sys.maxint
51 90
91 UTF8 = 'utf-8'
92
52 VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?') 93 VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?')
53 94
54 RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt' 95 RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt'
55 96
56 # Binary exponentiation strings. 97 # Binary exponentiation strings.
57 _EXP_STRINGS = [ 98 _EXP_STRINGS = [
58 (0, 'B', 'bit'), 99 (0, 'B', 'bit'),
59 (10, 'KB', 'Kbit', 'K'), 100 (10, 'KB', 'Kbit', 'K'),
60 (20, 'MB', 'Mbit', 'M'), 101 (20, 'MB', 'Mbit', 'M'),
61 (30, 'GB', 'Gbit', 'G'), 102 (30, 'GB', 'Gbit', 'G'),
62 (40, 'TB', 'Tbit', 'T'), 103 (40, 'TB', 'Tbit', 'T'),
63 (50, 'PB', 'Pbit', 'P'), 104 (50, 'PB', 'Pbit', 'P'),
64 (60, 'EB', 'Ebit', 'E'), 105 (60, 'EB', 'Ebit', 'E'),
65 ] 106 ]
66 107
67 global manager 108 configured_certs_file = None
109
110 global manager # pylint: disable=global-at-module-level
111
68 112
69 def InitializeMultiprocessingVariables(): 113 def InitializeMultiprocessingVariables():
114 """Perform necessary initialization for multiprocessing.
115
116 See gslib.command.InitializeMultiprocessingVariables for an explanation
117 of why this is necessary.
70 """ 118 """
71 Perform necessary initialization - see 119 global manager # pylint: disable=global-variable-undefined
72 gslib.command.InitializeMultiprocessingVariables for an explanation of why
73 this is necessary.
74 """
75 global manager
76 manager = multiprocessing.Manager() 120 manager = multiprocessing.Manager()
77 121
122
78 def _GenerateSuffixRegex(): 123 def _GenerateSuffixRegex():
124 """Creates a suffix regex for human-readable byte counts."""
79 human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?' 125 human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?'
80 suffixes = [] 126 suffixes = []
81 suffix_to_si = {} 127 suffix_to_si = {}
82 for i, si in enumerate(_EXP_STRINGS): 128 for i, si in enumerate(_EXP_STRINGS):
83 si_suffixes = [s.lower() for s in list(si)[1:]] 129 si_suffixes = [s.lower() for s in list(si)[1:]]
84 for suffix in si_suffixes: 130 for suffix in si_suffixes:
85 suffix_to_si[suffix] = i 131 suffix_to_si[suffix] = i
86 suffixes.extend(si_suffixes) 132 suffixes.extend(si_suffixes)
87 human_bytes_re = human_bytes_re % '|'.join(suffixes) 133 human_bytes_re %= '|'.join(suffixes)
88 matcher = re.compile(human_bytes_re) 134 matcher = re.compile(human_bytes_re)
89 return suffix_to_si, matcher 135 return suffix_to_si, matcher
90 136
91 SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex() 137 SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex()
92 138
93 SECONDS_PER_DAY = 3600 * 24 139 SECONDS_PER_DAY = 3600 * 24
94 140
95 # Detect platform types. 141 # Detect platform types.
96 PLATFORM = str(sys.platform).lower() 142 PLATFORM = str(sys.platform).lower()
97 IS_WINDOWS = 'win32' in PLATFORM 143 IS_WINDOWS = 'win32' in PLATFORM
98 IS_CYGWIN = 'cygwin' in PLATFORM 144 IS_CYGWIN = 'cygwin' in PLATFORM
99 IS_LINUX = 'linux' in PLATFORM 145 IS_LINUX = 'linux' in PLATFORM
100 IS_OSX = 'darwin' in PLATFORM 146 IS_OSX = 'darwin' in PLATFORM
101 147
102 # On Unix-like systems, we will set the maximum number of open files to avoid 148 # On Unix-like systems, we will set the maximum number of open files to avoid
103 # hitting the limit imposed by the OS. This number was obtained experimentally. 149 # hitting the limit imposed by the OS. This number was obtained experimentally.
104 MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000 150 MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000
105 151
106 GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz' 152 GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz'
107 153
108 Retry = retry_decorator.retry 154 Retry = retry_decorator.retry # pylint: disable=invalid-name
109 155
110 # Cache the values from this check such that they're available to all callers 156 # Cache the values from this check such that they're available to all callers
111 # without needing to run all the checks again (some of these, such as calling 157 # without needing to run all the checks again (some of these, such as calling
112 # multiprocessing.Manager(), are expensive operations). 158 # multiprocessing.Manager(), are expensive operations).
113 cached_multiprocessing_is_available = None 159 cached_multiprocessing_is_available = None
114 cached_multiprocessing_is_available_stack_trace = None 160 cached_multiprocessing_is_available_stack_trace = None
115 cached_multiprocessing_is_available_message = None 161 cached_multiprocessing_is_available_message = None
116 162
163
117 # Enum class for specifying listing style. 164 # Enum class for specifying listing style.
118 class ListingStyle(object): 165 class ListingStyle(object):
119 SHORT = 'SHORT' 166 SHORT = 'SHORT'
120 LONG = 'LONG' 167 LONG = 'LONG'
121 LONG_LONG = 'LONG_LONG' 168 LONG_LONG = 'LONG_LONG'
122 169
123 170
124 def UsingCrcmodExtension(crcmod): 171 def UsingCrcmodExtension(crcmod):
125 return (getattr(crcmod, 'crcmod', None) and 172 return (getattr(crcmod, 'crcmod', None) and
126 getattr(crcmod.crcmod, '_usingExtension', None)) 173 getattr(crcmod.crcmod, '_usingExtension', None))
127 174
128 175
176 def CreateDirIfNeeded(dir_path):
177 """Creates a directory, suppressing already-exists errors."""
178 if not os.path.exists(dir_path):
179 try:
180 # Unfortunately, even though we catch and ignore EEXIST, this call will
181 # output a (needless) error message (no way to avoid that in Python).
182 os.makedirs(dir_path)
183 # Ignore 'already exists' in case user tried to start up several
184 # resumable uploads concurrently from a machine where no tracker dir had
185 # yet been created.
186 except OSError as e:
187 if e.errno != errno.EEXIST:
188 raise
189
190
191 def GetGsutilStateDir():
192 """Returns the location of the directory for gsutil state files.
193
194 Certain operations, such as cross-process credential sharing and
195 resumable transfer tracking, need a known location for state files which
196 are created by gsutil as-needed.
197
198 This location should only be used for storing data that is required to be in
199 a static location.
200
201 Returns:
202 Path to directory for gsutil static state files.
203 """
204 config_file_dir = config.get(
205 'GSUtil', 'state_dir',
206 os.path.expanduser(os.path.join('~', '.gsutil')))
207 CreateDirIfNeeded(config_file_dir)
208 return config_file_dir
209
210
211 def GetCredentialStoreFilename():
212 return os.path.join(GetGsutilStateDir(), 'credcache')
213
214
129 def CreateTrackerDirIfNeeded(): 215 def CreateTrackerDirIfNeeded():
130 """Looks up the configured directory where gsutil keeps its resumable 216 """Looks up or creates the gsutil tracker file directory.
131 transfer tracker files, and creates it if it doesn't already exist. 217
218 This is the configured directory where gsutil keeps its resumable transfer
219 tracker files. This function creates it if it doesn't already exist.
132 220
133 Returns: 221 Returns:
134 The pathname to the tracker directory. 222 The pathname to the tracker directory.
135 """ 223 """
136 tracker_dir = config.get( 224 tracker_dir = config.get(
137 'GSUtil', 'resumable_tracker_dir', 225 'GSUtil', 'resumable_tracker_dir',
138 os.path.expanduser('~' + os.sep + '.gsutil')) 226 os.path.join(GetGsutilStateDir(), 'tracker-files'))
139 if not os.path.exists(tracker_dir): 227 CreateDirIfNeeded(tracker_dir)
140 try:
141 # Unfortunately, even though we catch and ignore EEXIST, this call will
142 # will output a (needless) error message (no way to avoid that in Python).
143 os.makedirs(tracker_dir)
144 # Ignore 'already exists' in case user tried to start up several
145 # resumable uploads concurrently from a machine where no tracker dir had
146 # yet been created.
147 except OSError as e:
148 if e.errno != errno.EEXIST:
149 raise
150 return tracker_dir 228 return tracker_dir
151 229
152 230
231 def PrintTrackerDirDeprecationWarningIfNeeded():
232 # TODO: Remove this along with the tracker_dir config value 1 year after
233 # 4.6 release date. Use state_dir instead.
234 if config.has_option('GSUtil', 'resumable_tracker_dir'):
235 sys.stderr.write('Warning: you have set resumable_tracker_dir in your '
236 '.boto configuration file. This configuration option is '
237 'deprecated; please use the state_dir configuration '
238 'option instead.\n')
239
240
153 # Name of file where we keep the timestamp for the last time we checked whether 241 # Name of file where we keep the timestamp for the last time we checked whether
154 # a new version of gsutil is available. 242 # a new version of gsutil is available.
243 PrintTrackerDirDeprecationWarningIfNeeded()
244 CreateDirIfNeeded(GetGsutilStateDir())
155 LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = ( 245 LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = (
156 os.path.join(CreateTrackerDirIfNeeded(), '.last_software_update_check')) 246 os.path.join(GetGsutilStateDir(), '.last_software_update_check'))
157 247
158 248
159 def HasConfiguredCredentials(): 249 def HasConfiguredCredentials():
160 """Determines if boto credential/config file exists.""" 250 """Determines if boto credential/config file exists."""
161 config = boto.config
162 has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and 251 has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and
163 config.has_option('Credentials', 'gs_secret_access_key')) 252 config.has_option('Credentials', 'gs_secret_access_key'))
164 has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and 253 has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and
165 config.has_option('Credentials', 'aws_secret_access_key')) 254 config.has_option('Credentials', 'aws_secret_access_key'))
166 has_oauth_creds = ( 255 has_oauth_creds = (
167 config.has_option('Credentials', 'gs_oauth2_refresh_token')) 256 config.has_option('Credentials', 'gs_oauth2_refresh_token'))
168 has_service_account_creds = (HAS_CRYPTO and 257 has_service_account_creds = (
169 config.has_option('Credentials', 'gs_service_client_id') 258 HAS_CRYPTO and
170 and config.has_option('Credentials', 'gs_service_key_file')) 259 config.has_option('Credentials', 'gs_service_client_id') and
260 config.has_option('Credentials', 'gs_service_key_file'))
171 261
172 valid_auth_handler = None 262 valid_auth_handler = None
173 try: 263 try:
174 valid_auth_handler = boto.auth.get_auth_handler( 264 valid_auth_handler = boto.auth.get_auth_handler(
175 GSConnection.DefaultHost, config, Provider('google'), 265 GSConnection.DefaultHost, config, Provider('google'),
176 requested_capability=['s3']) 266 requested_capability=['s3'])
177 # Exclude the no-op auth handler as indicating credentials are configured. 267 # Exclude the no-op auth handler as indicating credentials are configured.
178 # Note we can't use isinstance() here because the no-op module may not be 268 # Note we can't use isinstance() here because the no-op module may not be
179 # imported so we can't get a reference to the class type. 269 # imported so we can't get a reference to the class type.
180 if getattr(getattr(valid_auth_handler, '__class__', None), 270 if getattr(getattr(valid_auth_handler, '__class__', None),
181 '__name__', None) == 'NoOpAuth': 271 '__name__', None) == 'NoOpAuth':
182 valid_auth_handler = None 272 valid_auth_handler = None
183 except NoAuthHandlerFound: 273 except NoAuthHandlerFound:
184 pass 274 pass
185 275
186 return (has_goog_creds or has_amzn_creds or has_oauth_creds 276 return (has_goog_creds or has_amzn_creds or has_oauth_creds
187 or has_service_account_creds or valid_auth_handler) 277 or has_service_account_creds or valid_auth_handler)
188 278
189 279
190 def ConfigureNoOpAuthIfNeeded(): 280 def ConfigureNoOpAuthIfNeeded():
191 """ 281 """Sets up no-op auth handler if no boto credentials are configured."""
192 Sets up no-op auth handler if no boto credentials are configured.
193 """
194 config = boto.config
195 if not HasConfiguredCredentials(): 282 if not HasConfiguredCredentials():
196 if (config.has_option('Credentials', 'gs_service_client_id') 283 if (config.has_option('Credentials', 'gs_service_client_id')
197 and not HAS_CRYPTO): 284 and not HAS_CRYPTO):
198 raise CommandException('\n'.join(textwrap.wrap( 285 raise CommandException('\n'.join(textwrap.wrap(
199 'Your gsutil is configured with an OAuth2 service account, but you ' 286 'Your gsutil is configured with an OAuth2 service account, but you '
200 'do not have PyOpenSSL or PyCrypto 2.6 or later installed. Service ' 287 'do not have PyOpenSSL or PyCrypto 2.6 or later installed. Service '
201 'account authentication requires one of these libraries; please ' 288 'account authentication requires one of these libraries; please '
202 'install either of them to proceed, or configure a different type ' 289 'install either of them to proceed, or configure a different type '
203 'of credentials with "gsutil config".'))) 290 'of credentials with "gsutil config".')))
204 else: 291 else:
205 # With no boto config file the user can still access publicly readable 292 # With no boto config file the user can still access publicly readable
206 # buckets and objects. 293 # buckets and objects.
207 from gslib import no_op_auth_plugin 294 from gslib import no_op_auth_plugin # pylint: disable=unused-variable
208 295
209 296
210 def GetConfigFilePath(): 297 def GetConfigFilePath():
211 config_path = 'no config found' 298 config_path = 'no config found'
212 for path in BotoConfigLocations: 299 for path in BotoConfigLocations:
213 try: 300 try:
214 with open(path, 'r'): 301 with open(path, 'r'):
215 config_path = path 302 config_path = path
216 break 303 break
217 except IOError: 304 except IOError:
218 pass 305 pass
219 return config_path 306 return config_path
220 307
221 308
222 def GetBotoConfigFileList(): 309 def GetBotoConfigFileList():
223 """Returns list of boto config files that exist.""" 310 """Returns list of boto config files that exist."""
224 config_paths = boto.pyami.config.BotoConfigLocations 311 config_paths = boto.pyami.config.BotoConfigLocations
225 if 'AWS_CREDENTIAL_FILE' in os.environ: 312 if 'AWS_CREDENTIAL_FILE' in os.environ:
226 config_paths.append(os.environ['AWS_CREDENTIAL_FILE']) 313 config_paths.append(os.environ['AWS_CREDENTIAL_FILE'])
227 config_files = {} 314 config_files = {}
228 for config_path in config_paths: 315 for config_path in config_paths:
229 if os.path.exists(config_path): 316 if os.path.exists(config_path):
230 config_files[config_path] = 1 317 config_files[config_path] = 1
231 cf_list = [] 318 cf_list = []
232 for config_file in config_files: 319 for config_file in config_files:
233 cf_list.append(config_file) 320 cf_list.append(config_file)
234 return cf_list 321 return cf_list
235 322
236 323
324 def GetCertsFile():
325 """Configures and returns the CA Certificates file.
326
327 If one is already configured, use it. Otherwise, amend the configuration
328 (in boto.config) to use the cert roots distributed with gsutil.
329
330 Returns:
331 string filename of the certs file to use.
332 """
333 certs_file = boto.config.get('Boto', 'ca_certificates_file', None)
334 if not certs_file:
335 if configured_certs_file:
336 disk_certs_file = configured_certs_file
337 else:
338 disk_certs_file = os.path.abspath(
339 os.path.join(gslib.GSLIB_DIR, 'data', 'cacerts.txt'))
340 if not os.path.exists(disk_certs_file):
341 # If the file is not present on disk, this means the gslib module
342 # doesn't actually exist on disk anywhere. This can happen if it's
343 # being imported from a zip file. Unfortunately, we have to copy the
344 # certs file to a local temp file on disk because the underlying SSL
345 # socket requires it to be a filesystem path.
346 certs_data = pkgutil.get_data('gslib', 'data/cacerts.txt')
347 if not certs_data:
348 raise CommandException('Certificates file not found. Please '
349 'reinstall gsutil from scratch')
350 fd, fname = tempfile.mkstemp(suffix='.txt', prefix='gsutil-cacerts')
351 f = os.fdopen(fd, 'w')
352 f.write(certs_data)
353 f.close()
354 disk_certs_file = fname
355 certs_file = disk_certs_file
356 return certs_file
357
358
359 def GetCleanupFiles():
360 """Returns a list of temp files to delete (if possible) when program exits."""
361 cleanup_files = []
362 if configured_certs_file:
363 cleanup_files.append(configured_certs_file)
364 return cleanup_files
365
366
367 def GetNewHttp(http_class=httplib2.Http, **kwargs):
368 """Creates and returns a new httplib2.Http instance.
369
370 Args:
371 http_class: Optional custom Http class to use.
372 **kwargs: Arguments to pass to http_class constructor.
373
374 Returns:
375 An initialized httplib2.Http instance.
376 """
377 proxy_info = httplib2.ProxyInfo(
378 proxy_type=3,
379 proxy_host=boto.config.get('Boto', 'proxy', None),
380 proxy_port=boto.config.getint('Boto', 'proxy_port', 0),
381 proxy_user=boto.config.get('Boto', 'proxy_user', None),
382 proxy_pass=boto.config.get('Boto', 'proxy_pass', None),
383 proxy_rdns=boto.config.get('Boto', 'proxy_rdns', False))
384 # Some installers don't package a certs file with httplib2, so use the
385 # one included with gsutil.
386 kwargs['ca_certs'] = GetCertsFile()
387 # Use a non-infinite SSL timeout to avoid hangs during network flakiness.
388 kwargs['timeout'] = SSL_TIMEOUT
389 http = http_class(proxy_info=proxy_info, **kwargs)
390 http.disable_ssl_certificate_validation = (not config.getbool(
391 'Boto', 'https_validate_certificates'))
392 return http
393
394
395 def GetNumRetries():
396 return config.getint('Boto', 'num_retries', 6)
397
398
399 def GetMaxRetryDelay():
400 return config.getint('Boto', 'max_retry_delay', 60)
401
402
237 def _RoundToNearestExponent(num): 403 def _RoundToNearestExponent(num):
238 i = 0 404 i = 0
239 while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]): 405 while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]):
240 i += 1 406 i += 1
241 return i, round(float(num) / 2 ** _EXP_STRINGS[i][0], 2) 407 return i, round(float(num) / 2 ** _EXP_STRINGS[i][0], 2)
242 408
243 409
244 def MakeHumanReadable(num): 410 def MakeHumanReadable(num):
245 """Generates human readable string for a number of bytes. 411 """Generates human readable string for a number of bytes.
246 412
(...skipping 20 matching lines...) Expand all
267 return '%g %s' % (rounded_val, _EXP_STRINGS[i][2]) 433 return '%g %s' % (rounded_val, _EXP_STRINGS[i][2])
268 434
269 435
270 def HumanReadableToBytes(human_string): 436 def HumanReadableToBytes(human_string):
271 """Tries to convert a human-readable string to a number of bytes. 437 """Tries to convert a human-readable string to a number of bytes.
272 438
273 Args: 439 Args:
274 human_string: A string supplied by user, e.g. '1M', '3 GB'. 440 human_string: A string supplied by user, e.g. '1M', '3 GB'.
275 Returns: 441 Returns:
276 An integer containing the number of bytes. 442 An integer containing the number of bytes.
443 Raises:
444 ValueError: on an invalid string.
277 """ 445 """
278 human_string = human_string.lower() 446 human_string = human_string.lower()
279 m = MATCH_HUMAN_BYTES.match(human_string) 447 m = MATCH_HUMAN_BYTES.match(human_string)
280 if m: 448 if m:
281 num = float(m.group('num')) 449 num = float(m.group('num'))
282 if m.group('suffix'): 450 if m.group('suffix'):
283 power = _EXP_STRINGS[SUFFIX_TO_SI[m.group('suffix')]][0] 451 power = _EXP_STRINGS[SUFFIX_TO_SI[m.group('suffix')]][0]
284 num *= (2.0 ** power) 452 num *= (2.0 ** power)
285 num = int(round(num)) 453 num = int(round(num))
286 return num 454 return num
(...skipping 20 matching lines...) Expand all
307 k = (len(values) - 1) * percent 475 k = (len(values) - 1) * percent
308 f = math.floor(k) 476 f = math.floor(k)
309 c = math.ceil(k) 477 c = math.ceil(k)
310 if f == c: 478 if f == c:
311 return key(values[int(k)]) 479 return key(values[int(k)])
312 d0 = key(values[int(f)]) * (c-k) 480 d0 = key(values[int(f)]) * (c-k)
313 d1 = key(values[int(c)]) * (k-f) 481 d1 = key(values[int(c)]) * (k-f)
314 return d0 + d1 482 return d0 + d1
315 483
316 484
317 def ParseErrorDetail(e): 485 def RemoveCRLFFromString(input_str):
318 """Parse <Message> and/or <Details> text from XML content. 486 """Returns the input string with all \\n and \\r removed."""
487 return re.sub(r'[\r\n]', '', input_str)
319 488
320 Args:
321 e: The GSResponseError that includes XML to be parsed.
322
323 Returns:
324 (exception_name, m, d), where m is <Message> text or None,
325 and d is <Details> text or None.
326 """
327 exc_name_parts = re.split("[\.']", str(type(e)))
328 if len(exc_name_parts) < 2:
329 # Shouldn't happen, but have fallback in case.
330 exc_name = str(type(e))
331 else:
332 exc_name = exc_name_parts[-2]
333 if not hasattr(e, 'body') or e.body is None:
334 return (exc_name, None)
335
336 match = re.search(r'<Message>(?P<message>.*)</Message>', e.body)
337 m = match.group('message') if match else None
338 match = re.search(r'<Details>(?P<details>.*)</Details>', e.body)
339 d = match.group('details') if match else None
340 return (exc_name, m, d)
341
342 def FormatErrorMessage(exc_name, status, code, reason, message, detail):
343 """Formats an error message from components parsed by ParseErrorDetail."""
344 if message and detail:
345 return('%s: status=%d, code=%s, reason="%s", message="%s", detail="%s"' %
346 (exc_name, status, code, reason, message, detail))
347 if message:
348 return('%s: status=%d, code=%s, reason="%s", message="%s"' %
349 (exc_name, status, code, reason, message))
350 if detail:
351 return('%s: status=%d, code=%s, reason="%s", detail="%s"' %
352 (exc_name, status, code, reason, detail))
353 return('%s: status=%d, code=%s, reason="%s"' %
354 (exc_name, status, code, reason))
355 489
356 def UnaryDictToXml(message): 490 def UnaryDictToXml(message):
357 """Generates XML representation of a nested dict with exactly one 491 """Generates XML representation of a nested dict.
358 top-level entry and an arbitrary number of 2nd-level entries, e.g. 492
359 capturing a WebsiteConfiguration message. 493 This dict contains exactly one top-level entry and an arbitrary number of
494 2nd-level entries, e.g. capturing a WebsiteConfiguration message.
360 495
361 Args: 496 Args:
362 message: The dict encoding the message. 497 message: The dict encoding the message.
363 498
364 Returns: 499 Returns:
365 XML string representation of the input dict. 500 XML string representation of the input dict.
501
502 Raises:
503 Exception: if dict contains more than one top-level entry.
366 """ 504 """
367 if len(message) != 1: 505 if len(message) != 1:
368 raise Exception("Expected dict of size 1, got size %d" % len(message)) 506 raise Exception('Expected dict of size 1, got size %d' % len(message))
369 507
370 name, content = message.items()[0] 508 name, content = message.items()[0]
371 T = ElementTree.Element(name) 509 element_type = ElementTree.Element(name)
372 for property, value in sorted(content.items()): 510 for element_property, value in sorted(content.items()):
373 node = ElementTree.SubElement(T, property) 511 node = ElementTree.SubElement(element_type, element_property)
374 node.text = value 512 node.text = value
375 return ElementTree.tostring(T) 513 return ElementTree.tostring(element_type)
376 514
377 515
378 def LookUpGsutilVersion(uri): 516 def LookUpGsutilVersion(gsutil_api, url_str):
379 """Looks up the gsutil version of the specified gsutil tarball URI, from the 517 """Looks up the gsutil version of the specified gsutil tarball URL.
380 metadata field set on that object. 518
519 Version is specified in the metadata field set on that object.
381 520
382 Args: 521 Args:
383 URI: gsutil URI tarball (such as gs://pub/gsutil.tar.gz). 522 gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball.
523 url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz').
384 524
385 Returns: 525 Returns:
386 Version string if URI is a cloud URI containing x-goog-meta-gsutil-version 526 Version string if URL is a cloud URL containing x-goog-meta-gsutil-version
387 metadata, else None. 527 metadata, else None.
388 """ 528 """
389 if uri.is_cloud_uri(): 529 url = StorageUrlFromString(url_str)
390 obj = uri.get_key(False) 530 if url.IsCloudUrl():
391 if obj.metadata and 'gsutil_version' in obj.metadata: 531 obj = gsutil_api.GetObjectMetadata(url.bucket_name, url.object_name,
392 return obj.metadata['gsutil_version'] 532 provider=url.scheme,
533 fields=['metadata'])
534 if obj.metadata and obj.metadata.additionalProperties:
535 for prop in obj.metadata.additionalProperties:
536 if prop.key == 'gsutil_version':
537 return prop.value
393 538
394 539
395 def GetGsutilVersionModifiedTime(): 540 def GetGsutilVersionModifiedTime():
396 """Returns unix timestamp of when the VERSION file was last modified.""" 541 """Returns unix timestamp of when the VERSION file was last modified."""
397 if not gslib.VERSION_FILE: 542 if not gslib.VERSION_FILE:
398 return 0 543 return 0
399 return int(os.path.getmtime(gslib.VERSION_FILE)) 544 return int(os.path.getmtime(gslib.VERSION_FILE))
400 545
401 546
402 def IsRunningInteractively(): 547 def IsRunningInteractively():
403 """Returns True if currently running interactively on a TTY.""" 548 """Returns True if currently running interactively on a TTY."""
404 return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty() 549 return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty()
405 550
406 551
552 def _HttpsValidateCertifcatesEnabled():
553 return config.get('Boto', 'https_validate_certificates', True)
554
555 CERTIFICATE_VALIDATION_ENABLED = _HttpsValidateCertifcatesEnabled()
556
557
407 def _BotoIsSecure(): 558 def _BotoIsSecure():
408 for cfg_var in ('is_secure', 'https_validate_certificates'): 559 return config.get('Boto', 'is_secure', True)
409 if (config.has_option('Boto', cfg_var)
410 and not config.getboolean('Boto', cfg_var)):
411 return False, cfg_var
412 return True, ''
413 560
414 BOTO_IS_SECURE = _BotoIsSecure() 561 BOTO_IS_SECURE = _BotoIsSecure()
415 562
416 563
564 def ResumableThreshold():
565 return config.getint('GSUtil', 'resumable_threshold', TWO_MB)
566
567
417 def AddAcceptEncoding(headers): 568 def AddAcceptEncoding(headers):
418 """Adds accept-encoding:gzip to the dictionary of headers.""" 569 """Adds accept-encoding:gzip to the dictionary of headers."""
419 # If Accept-Encoding is not already set, set it to enable gzip. 570 # If Accept-Encoding is not already set, set it to enable gzip.
420 if 'accept-encoding' not in headers: 571 if 'accept-encoding' not in headers:
421 headers['accept-encoding'] = 'gzip' 572 headers['accept-encoding'] = 'gzip'
422 573
423 574
424 def PrintFullInfoAboutUri(uri, incl_acl, headers): 575 # pylint: disable=too-many-statements
425 """Print full info for given URI (like what displays for gsutil ls -L). 576 def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True):
577 """Print full info for given object (like what displays for gsutil ls -L).
426 578
427 Args: 579 Args:
428 uri: StorageUri being listed. 580 bucket_listing_ref: BucketListingRef being listed.
581 Must have ref_type OBJECT and a populated root_object
582 with the desired fields.
429 incl_acl: True if ACL info should be output. 583 incl_acl: True if ACL info should be output.
430 headers: The headers to pass to boto, if any.
431 584
432 Returns: 585 Returns:
433 Tuple (number of objects, 586 Tuple (number of objects, object_length)
434 object length, if listing_style is one of the long listing formats)
435 587
436 Raises: 588 Raises:
437 Exception: if calling bug encountered. 589 Exception: if calling bug encountered.
438 """ 590 """
439 # Run in a try/except clause so we can continue listings past 591 url_str = bucket_listing_ref.url_string
440 # access-denied errors (which can happen because user may have READ 592 storage_url = StorageUrlFromString(url_str)
441 # permission on object and thus see the bucket listing data, but lack 593 obj = bucket_listing_ref.root_object
442 # FULL_CONTROL over individual objects and thus not be able to read 594
443 # their ACLs). 595 if (obj.metadata and S3_DELETE_MARKER_GUID in
444 # TODO: Switch this code to use string formatting instead of tabs. 596 obj.metadata.additionalProperties):
445 try: 597 num_bytes = 0
446 print '%s:' % uri.uri.encode('utf-8') 598 num_objs = 0
447 headers = headers.copy() 599 url_str += '<DeleteMarker>'
448 # Add accept encoding so that the HEAD request matches what would be 600 else:
449 # sent for a GET request. 601 num_bytes = obj.size
450 AddAcceptEncoding(headers) 602 num_objs = 1
451 got_key = False 603
452 obj = uri.get_key(False, headers=headers) 604 print '%s:' % url_str.encode(UTF8)
453 got_key = True 605 if obj.updated:
454 print '\tCreation time:\t\t%s' % obj.last_modified 606 print '\tCreation time:\t\t%s' % obj.updated.strftime(
455 if obj.cache_control: 607 '%a, %d %b %Y %H:%M:%S GMT')
456 print '\tCache-Control:\t\t%s' % obj.cache_control 608 if obj.cacheControl:
457 if obj.content_disposition: 609 print '\tCache-Control:\t\t%s' % obj.cacheControl
458 print '\tContent-Disposition:\t\t%s' % obj.content_disposition 610 if obj.contentDisposition:
459 if obj.content_encoding: 611 print '\tContent-Disposition:\t\t%s' % obj.contentDisposition
460 print '\tContent-Encoding:\t%s' % obj.content_encoding 612 if obj.contentEncoding:
461 if obj.content_language: 613 print '\tContent-Encoding:\t\t%s' % obj.contentEncoding
462 print '\tContent-Language:\t%s' % obj.content_language 614 if obj.contentLanguage:
463 print '\tContent-Length:\t\t%s' % obj.size 615 print '\tContent-Language:\t%s' % obj.contentLanguage
464 print '\tContent-Type:\t\t%s' % obj.content_type 616 print '\tContent-Length:\t\t%s' % obj.size
465 if hasattr(obj, 'component_count') and obj.component_count: 617 print '\tContent-Type:\t\t%s' % obj.contentType
466 print '\tComponent-Count:\t%d' % obj.component_count 618 if obj.componentCount:
467 if obj.metadata: 619 print '\tComponent-Count:\t%d' % obj.componentCount
468 prefix = uri.get_provider().metadata_prefix 620 marker_props = {}
469 for name in obj.metadata: 621 if obj.metadata and obj.metadata.additionalProperties:
470 meta_string = '\t%s%s:\t%s' % (prefix, name, obj.metadata[name]) 622 non_marker_props = []
471 print meta_string.encode('utf-8') 623 for add_prop in obj.metadata.additionalProperties:
472 if hasattr(obj, 'cloud_hashes'): 624 if add_prop.key not in S3_MARKER_GUIDS:
473 for alg in obj.cloud_hashes: 625 non_marker_props.append(add_prop)
474 print '\tHash (%s):\t\t%s' % (
475 alg, binascii.b2a_hex(obj.cloud_hashes[alg]))
476 print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')
477 if hasattr(obj, 'generation'):
478 print '\tGeneration:\t\t%s' % obj.generation
479 if hasattr(obj, 'metageneration'):
480 print '\tMetageneration:\t\t%s' % obj.metageneration
481 if incl_acl:
482 print '\tACL:\t\t%s' % (uri.get_acl(False, headers))
483 return (1, obj.size)
484 except boto.exception.GSResponseError as e:
485 if e.status == 403:
486 if got_key:
487 print ('\tACL:\t\t\tACCESS DENIED. Note: you need FULL_CONTROL '
488 'permission\n\t\t\ton the object to read its ACL.')
489 return (1, obj.size)
490 else: 626 else:
491 print "You aren't authorized to read %s - skipping" % uri 627 marker_props[add_prop.key] = add_prop.value
492 return (1, 0) 628 if non_marker_props:
629 print '\tMetadata:'
630 for ap in non_marker_props:
631 meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value)
632 print meta_string.encode(UTF8)
633 if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c
634 if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash
635 print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')
636 if obj.generation:
637 generation_str = GenerationFromUrlAndString(storage_url, obj.generation)
638 print '\tGeneration:\t\t%s' % generation_str
639 if obj.metageneration:
640 print '\tMetageneration:\t\t%s' % obj.metageneration
641 if incl_acl:
642 # JSON API won't return acls as part of the response unless we have
643 # full control scope
644 if obj.acl:
645 print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl)
646 elif S3_ACL_MARKER_GUID in marker_props:
647 print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID]
493 else: 648 else:
494 raise e 649 print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER '
495 return (numobjs, numbytes) 650 'permission\n\t\t\t\ton the object to read its ACL.')
651
652 return (num_objs, num_bytes)
653
496 654
497 def CompareVersions(first, second): 655 def CompareVersions(first, second):
498 """Compares the first and second gsutil version strings. 656 """Compares the first and second gsutil version strings.
499 657
500 For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33. 658 For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33.
501 Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes 659 Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes
502 (e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as 660 (e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as
503 less than its non-suffix counterpart (e.g. 3.32 > 3.32pre). 661 less than its non-suffix counterpart (e.g. 3.32 > 3.32pre).
504 662
663 Args:
664 first: First gsutil version string.
665 second: Second gsutil version string.
666
505 Returns: 667 Returns:
506 (g, m): 668 (g, m):
507 g is True if first known to be greater than second, else False. 669 g is True if first known to be greater than second, else False.
508 m is True if first known to be greater by at least 1 major version, 670 m is True if first known to be greater by at least 1 major version,
509 else False. 671 else False.
510 """ 672 """
511 m1 = VERSION_MATCHER.match(str(first)) 673 m1 = VERSION_MATCHER.match(str(first))
512 m2 = VERSION_MATCHER.match(str(second)) 674 m2 = VERSION_MATCHER.match(str(second))
513 675
514 # If passed strings we don't know how to handle, be conservative. 676 # If passed strings we don't know how to handle, be conservative.
515 if not m1 or not m2: 677 if not m1 or not m2:
516 return (False, False) 678 return (False, False)
517 679
518 major_ver1 = int(m1.group('maj')) 680 major_ver1 = int(m1.group('maj'))
519 minor_ver1 = int(m1.group('min')) if m1.group('min') else 0 681 minor_ver1 = int(m1.group('min')) if m1.group('min') else 0
520 suffix_ver1 = m1.group('suffix') 682 suffix_ver1 = m1.group('suffix')
521 major_ver2 = int(m2.group('maj')) 683 major_ver2 = int(m2.group('maj'))
522 minor_ver2 = int(m2.group('min')) if m2.group('min') else 0 684 minor_ver2 = int(m2.group('min')) if m2.group('min') else 0
523 suffix_ver2 = m2.group('suffix') 685 suffix_ver2 = m2.group('suffix')
524 686
525 if major_ver1 > major_ver2: 687 if major_ver1 > major_ver2:
526 return (True, True) 688 return (True, True)
527 elif major_ver1 == major_ver2: 689 elif major_ver1 == major_ver2:
528 if minor_ver1 > minor_ver2: 690 if minor_ver1 > minor_ver2:
529 return (True, False) 691 return (True, False)
530 elif minor_ver1 == minor_ver2: 692 elif minor_ver1 == minor_ver2:
531 return (bool(suffix_ver2) and not suffix_ver1, False) 693 return (bool(suffix_ver2) and not suffix_ver1, False)
532 return (False, False) 694 return (False, False)
533 695
534 def _IncreaseSoftLimitForResource(resource_name): 696
535 """Sets a new soft limit for the maximum number of open files. 697 def _IncreaseSoftLimitForResource(resource_name, fallback_value):
536 The soft limit is used for this process (and its children), but the 698 """Sets a new soft limit for the maximum number of open files.
537 hard limit is set by the system and cannot be exceeded. 699
700 The soft limit is used for this process (and its children), but the
701 hard limit is set by the system and cannot be exceeded.
702
703 We will first try to set the soft limit to the hard limit's value; if that
704 fails, we will try to set the soft limit to the fallback_value iff this would
705 increase the soft limit.
706
707 Args:
708 resource_name: Name of the resource to increase the soft limit for.
709 fallback_value: Fallback value to be used if we couldn't set the
710 soft value to the hard value (e.g., if the hard value
711 is "unlimited").
712
713 Returns:
714 Current soft limit for the resource (after any changes we were able to
715 make), or -1 if the resource doesn't exist.
538 """ 716 """
717
718 # Get the value of the resource.
539 try: 719 try:
540 (soft_limit, hard_limit) = resource.getrlimit(resource_name) 720 (soft_limit, hard_limit) = resource.getrlimit(resource_name)
541 resource.setrlimit(resource_name, (hard_limit, hard_limit)) 721 except (resource.error, ValueError):
542 return hard_limit 722 # The resource wasn't present, so we can't do anything here.
543 except (resource.error, ValueError), e: 723 return -1
544 return 0 724
725 # Try to set the value of the soft limit to the value of the hard limit.
726 if hard_limit > soft_limit: # Some OS's report 0 for "unlimited".
727 try:
728 resource.setrlimit(resource_name, (hard_limit, hard_limit))
729 return hard_limit
730 except (resource.error, ValueError):
731 # We'll ignore this and try the fallback value.
732 pass
733
734 # Try to set the value of the soft limit to the fallback value.
735 if soft_limit < fallback_value:
736 try:
737 resource.setrlimit(resource_name, (fallback_value, hard_limit))
738 return fallback_value
739 except (resource.error, ValueError):
740 # We couldn't change the soft limit, so just report the current
741 # value of the soft limit.
742 return soft_limit
743 else:
744 return soft_limit
745
746
747 def GetCloudApiInstance(cls, thread_state=None):
748 """Gets a gsutil Cloud API instance.
749
750 Since Cloud API implementations are not guaranteed to be thread-safe, each
751 thread needs its own instance. These instances are passed to each thread
752 via the thread pool logic in command.
753
754 Args:
755 cls: Command class to be used for single-threaded case.
756 thread_state: Per thread state from this thread containing a gsutil
757 Cloud API instance.
758
759 Returns:
760 gsutil Cloud API instance.
761 """
762 return thread_state or cls.gsutil_api
763
764
765 def GetFileSize(fp, position_to_eof=False):
766 """Returns size of file, optionally leaving fp positioned at EOF."""
767 if not position_to_eof:
768 cur_pos = fp.tell()
769 fp.seek(0, os.SEEK_END)
770 cur_file_size = fp.tell()
771 if not position_to_eof:
772 fp.seek(cur_pos)
773 return cur_file_size
774
775
776 def GetStreamFromFileUrl(storage_url):
777 if storage_url.IsStream():
778 return sys.stdin
779 else:
780 return open(storage_url.object_name, 'rb')
781
782
783 def UrlsAreForSingleProvider(url_args):
784 """Tests whether the URLs are all for a single provider.
785
786 Args:
787 url_args: Strings to check.
788
789 Returns:
790 True if URLs are for single provider, False otherwise.
791 """
792 provider = None
793 url = None
794 for url_str in url_args:
795 url = StorageUrlFromString(url_str)
796 if not provider:
797 provider = url.scheme
798 elif url.scheme != provider:
799 return False
800 return provider is not None
801
802
803 def HaveFileUrls(args_to_check):
804 """Checks whether args_to_check contain any file URLs.
805
806 Args:
807 args_to_check: Command-line argument subset to check.
808
809 Returns:
810 True if args_to_check contains any file URLs.
811 """
812 for url_str in args_to_check:
813 storage_url = StorageUrlFromString(url_str)
814 if storage_url.IsFileUrl():
815 return True
816 return False
817
818
819 def HaveProviderUrls(args_to_check):
820 """Checks whether args_to_check contains any provider URLs (like 'gs://').
821
822 Args:
823 args_to_check: Command-line argument subset to check.
824
825 Returns:
826 True if args_to_check contains any provider URLs.
827 """
828 for url_str in args_to_check:
829 storage_url = StorageUrlFromString(url_str)
830 if storage_url.IsCloudUrl() and storage_url.IsProvider():
831 return True
832 return False
833
545 834
546 def MultiprocessingIsAvailable(logger=None): 835 def MultiprocessingIsAvailable(logger=None):
547 """ 836 """Checks if multiprocessing is available.
837
548 There are some environments in which there is no way to use multiprocessing 838 There are some environments in which there is no way to use multiprocessing
549 logic that's built into Python (e.g., if /dev/shm is not available, then 839 logic that's built into Python (e.g., if /dev/shm is not available, then
550 we can't create semaphores). This simply tries out a few things that will be 840 we can't create semaphores). This simply tries out a few things that will be
551 needed to make sure the environment can support the pieces of the 841 needed to make sure the environment can support the pieces of the
552 multiprocessing module that we need. 842 multiprocessing module that we need.
553 843
844 Args:
845 logger: logging.logger to use for debug output.
846
554 Returns: 847 Returns:
555 (multiprocessing_is_available, stack_trace): 848 (multiprocessing_is_available, stack_trace):
556 multiprocessing_is_available: True iff the multiprocessing module is 849 multiprocessing_is_available: True iff the multiprocessing module is
557 available for use. 850 available for use.
558 stack_trace: The stack trace generated by the call we tried that failed. 851 stack_trace: The stack trace generated by the call we tried that failed.
559 """ 852 """
853 # pylint: disable=global-variable-undefined
560 global cached_multiprocessing_is_available 854 global cached_multiprocessing_is_available
561 global cached_multiprocessing_check_stack_trace 855 global cached_multiprocessing_check_stack_trace
562 global cached_multiprocessing_is_available_message 856 global cached_multiprocessing_is_available_message
563 if cached_multiprocessing_is_available is not None: 857 if cached_multiprocessing_is_available is not None:
564 if logger: 858 if logger:
565 logger.debug(cached_multiprocessing_check_stack_trace) 859 logger.debug(cached_multiprocessing_check_stack_trace)
566 logger.warn('\n'.join(textwrap.wrap( 860 logger.warn(cached_multiprocessing_is_available_message)
567 cached_multiprocessing_is_available_message + '\n')))
568 return (cached_multiprocessing_is_available, 861 return (cached_multiprocessing_is_available,
569 cached_multiprocessing_check_stack_trace) 862 cached_multiprocessing_check_stack_trace)
570 863
571 stack_trace = None 864 stack_trace = None
572 multiprocessing_is_available = True 865 multiprocessing_is_available = True
573 message = ( 866 message = """
574 'You have requested multiple threads or processes for an operation,' 867 You have requested multiple threads or processes for an operation, but the
575 ' but the required functionality of Python\'s multiprocessing ' 868 required functionality of Python\'s multiprocessing module is not available.
576 'module is not available. Your operations will be performed ' 869 Your operations will be performed sequentially, and any requests for
577 'sequentially, and any requests for parallelism will be ignored.') 870 parallelism will be ignored.
871 """
578 try: 872 try:
579 # Fails if /dev/shm (or some equivalent thereof) is not available for use 873 # Fails if /dev/shm (or some equivalent thereof) is not available for use
580 # (e.g., there's no implementation, or we can't write to it, etc.). 874 # (e.g., there's no implementation, or we can't write to it, etc.).
581 try: 875 try:
582 multiprocessing.Value('i', 0) 876 multiprocessing.Value('i', 0)
583 except: 877 except:
584 if not IS_WINDOWS: 878 if not IS_WINDOWS:
585 message += ('\nPlease ensure that you have write access to both ' 879 message += """
586 '/dev/shm and /run/shm.') 880 Please ensure that you have write access to both /dev/shm and /run/shm.
881 """
587 raise # We'll handle this in one place below. 882 raise # We'll handle this in one place below.
588 883
589 # Manager objects and Windows are generally a pain to work with, so try it 884 # Manager objects and Windows are generally a pain to work with, so try it
590 # out as a sanity check. This definitely works on some versions of Windows, 885 # out as a sanity check. This definitely works on some versions of Windows,
591 # but it's certainly possible that there is some unknown configuration for 886 # but it's certainly possible that there is some unknown configuration for
592 # which it won't. 887 # which it won't.
593 multiprocessing.Manager() 888 multiprocessing.Manager()
594 889
595 # Check that the max number of open files is reasonable. Always check this 890 # Check that the max number of open files is reasonable. Always check this
596 # after we're sure that the basic multiprocessing functionality is 891 # after we're sure that the basic multiprocessing functionality is
597 # available, since this won't matter unless that's true. 892 # available, since this won't matter unless that's true.
598 limit = 0 893 limit = -1
599 if HAS_RESOURCE_MODULE: 894 if HAS_RESOURCE_MODULE:
600 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix 895 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix
601 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the 896 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the
602 # "resource" module is not guaranteed to know about these names. 897 # "resource" module is not guaranteed to know about these names.
603 try: 898 try:
604 limit = max(limit, 899 limit = max(limit,
605 _IncreaseSoftLimitForResource(resource.RLIMIT_NOFILE)) 900 _IncreaseSoftLimitForResource(
606 except AttributeError, e: 901 resource.RLIMIT_NOFILE,
902 MIN_ACCEPTABLE_OPEN_FILES_LIMIT))
903 except AttributeError:
607 pass 904 pass
608 try: 905 try:
609 limit = max(limit, 906 limit = max(limit,
610 _IncreaseSoftLimitForResource(resource.RLIMIT_OFILE)) 907 _IncreaseSoftLimitForResource(
611 except AttributeError, e: 908 resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT))
909 except AttributeError:
612 pass 910 pass
911
613 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS: 912 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS:
614 message += ( 913 message += ("""
615 '\nYour max number of open files, %s, is too low to allow safe ' 914 Your max number of open files, %s, is too low to allow safe multiprocessing.
616 'multiprocessing calls. If you are on a Unix-like OS, then you can ' 915 On Linux you can fix this by adding something like "ulimit -n 10000" to your
617 'fix this by adding something like "ulimit -n 10000" to your ' 916 ~/.bashrc or equivalent file and opening a new terminal.
618 '~/.bashrc (Linux), ~/.bash_profile (OS X), or equivalent file, ' 917
619 'and opening a new terminal.' % limit) 918 On MacOS, you may also need to run a command like this once (in addition to the
919 above instructions), which might require a restart of your system to take
920 effect:
921 launchctl limit maxfiles 10000
922
923 Alternatively, edit /etc/launchd.conf with something like:
924 limit maxfiles 10000 10000
925
926 """ % limit)
620 raise Exception('Max number of open files, %s, is too low.' % limit) 927 raise Exception('Max number of open files, %s, is too low.' % limit)
621 except: 928 except: # pylint: disable=bare-except
622 stack_trace = traceback.format_exc() 929 stack_trace = traceback.format_exc()
623 multiprocessing_is_available = False 930 multiprocessing_is_available = False
624 if logger is not None: 931 if logger is not None:
625 logger.debug(stack_trace) 932 logger.debug(stack_trace)
626 logger.warn('\n'.join(textwrap.wrap(message + '\n'))) 933 logger.warn(message)
627 934
628 # Set the cached values so that we never need to do this check again. 935 # Set the cached values so that we never need to do this check again.
629 cached_multiprocessing_is_available = multiprocessing_is_available 936 cached_multiprocessing_is_available = multiprocessing_is_available
630 cached_multiprocessing_check_stack_trace = stack_trace 937 cached_multiprocessing_check_stack_trace = stack_trace
631 cached_multiprocessing_is_available_message = message 938 cached_multiprocessing_is_available_message = message
632 return (multiprocessing_is_available, stack_trace) 939 return (multiprocessing_is_available, stack_trace)
633 940
941
634 def CreateLock(): 942 def CreateLock():
635 """ 943 """Returns either a multiprocessing lock or a threading lock.
636 Returns either a multiprocessing lock or a threading lock. We will use the 944
637 former iff we have access to the parts of the multiprocessing module that 945 Use Multiprocessing lock iff we have access to the parts of the
638 are necessary to enable parallelism in operations. 946 multiprocessing module that are necessary to enable parallelism in operations.
947
948 Returns:
949 Multiprocessing or threading lock.
639 """ 950 """
640 if MultiprocessingIsAvailable()[0]: 951 if MultiprocessingIsAvailable()[0]:
641 return manager.Lock() 952 return manager.Lock()
642 else: 953 else:
643 return threading.Lock() 954 return threading.Lock()
955
956
957 def IsCloudSubdirPlaceholder(url, blr=None):
958 """Determines if URL is a cloud subdir placeholder.
959
960 This function is needed because GUI tools (like the GCS cloud console) allow
961 users to create empty "folders" by creating a placeholder object; and parts
962 of gsutil need to treat those placeholder objects specially. For example,
963 gsutil rsync needs to avoid downloading those objects because they can cause
964 conflicts (see comments in rsync command for details).
965
966 We currently detect two cases:
967 - Cloud objects whose name ends with '_$folder$'
968 - Cloud objects whose name ends with '/'
969
970 Args:
971 url: The URL to be checked.
972 blr: BucketListingRef to check, or None if not available.
973 If None, size won't be checked.
974
975 Returns:
976 True/False.
977 """
978 if not url.IsCloudUrl():
979 return False
980 url_str = url.url_string
981 if url_str.endswith('_$folder$'):
982 return True
983 if blr and blr.IsObject():
984 size = blr.root_object.size
985 else:
986 size = 0
987 return size == 0 and url_str.endswith('/')
988
989
990 def GetTermLines():
991 """Returns number of terminal lines."""
992 # fcntl isn't supported in Windows.
993 try:
994 import fcntl # pylint: disable=g-import-not-at-top
995 import termios # pylint: disable=g-import-not-at-top
996 except ImportError:
997 return _DEFAULT_LINES
998 def ioctl_GWINSZ(fd): # pylint: disable=invalid-name
999 try:
1000 return struct.unpack(
1001 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))[0]
1002 except: # pylint: disable=bare-except
1003 return 0 # Failure (so will retry on different file descriptor below).
1004 # Try to find a valid number of lines from termio for stdin, stdout,
1005 # or stderr, in that order.
1006 ioc = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
1007 if not ioc:
1008 try:
1009 fd = os.open(os.ctermid(), os.O_RDONLY)
1010 ioc = ioctl_GWINSZ(fd)
1011 os.close(fd)
1012 except: # pylint: disable=bare-except
1013 pass
1014 if not ioc:
1015 ioc = os.environ.get('LINES', _DEFAULT_LINES)
1016 return int(ioc)
OLDNEW
« no previous file with comments | « gslib/translation_helper.py ('k') | gslib/wildcard_iterator.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698