gslib/util.py - Issue 698893003: Update checked in version of gsutil to version 4.6

Side by Side Diff: gslib/util.py

Issue 698893003: Update checked in version of gsutil to version 4.6 (Closed) Base URL: http://dart.googlecode.com/svn/third_party/gsutil/

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	1 # -- coding: utf-8 --

1 # Copyright 2010 Google Inc. All Rights Reserved.	2 # Copyright 2010 Google Inc. All Rights Reserved.

2 #	3 #

3 # Licensed under the Apache License, Version 2.0 (the "License");	4 # Licensed under the Apache License, Version 2.0 (the "License");

4 # you may not use this file except in compliance with the License.	5 # you may not use this file except in compliance with the License.

5 # You may obtain a copy of the License at	6 # You may obtain a copy of the License at

6 #	7 #

7 # http://www.apache.org/licenses/LICENSE-2.0	8 # http://www.apache.org/licenses/LICENSE-2.0

8 #	9 #

9 # Unless required by applicable law or agreed to in writing, software	10 # Unless required by applicable law or agreed to in writing, software

10 # distributed under the License is distributed on an "AS IS" BASIS,	11 # distributed under the License is distributed on an "AS IS" BASIS,

11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.	12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12 # See the License for the specific language governing permissions and	13 # See the License for the specific language governing permissions and

13 # limitations under the License.	14 # limitations under the License.

14

15 """Static data and helper functions."""	15 """Static data and helper functions."""

16	16

17 import binascii	17 from __future__ import absolute_import

18 import boto	18

19 import boto.auth

20 import errno	19 import errno

21 import gslib

22 import math	20 import math

23 import multiprocessing	21 import multiprocessing

24 import os	22 import os

	23 import pkgutil

25 import re	24 import re

	25 import struct

26 import sys	26 import sys

	27 import tempfile

27 import textwrap	28 import textwrap

28 import threading	29 import threading

29 import traceback	30 import traceback

30 import xml.etree.ElementTree as ElementTree	31 import xml.etree.ElementTree as ElementTree

31	32

	33 import boto

32 from boto import config	34 from boto import config

	35 import boto.auth

33 from boto.exception import NoAuthHandlerFound	36 from boto.exception import NoAuthHandlerFound

34 from boto.gs.connection import GSConnection	37 from boto.gs.connection import GSConnection

35 from boto.provider import Provider	38 from boto.provider import Provider

36 from boto.pyami.config import BotoConfigLocations	39 from boto.pyami.config import BotoConfigLocations

	40 import httplib2

	41 from oauth2client.client import HAS_CRYPTO

	42 from retry_decorator import retry_decorator

	43

	44 import gslib

37 from gslib.exception import CommandException	45 from gslib.exception import CommandException

38 from retry_decorator import retry_decorator	46 from gslib.storage_url import StorageUrlFromString

39 from oauth2client.client import HAS_CRYPTO	47 from gslib.translation_helper import AclTranslation

	48 from gslib.translation_helper import GenerationFromUrlAndString

	49 from gslib.translation_helper import S3_ACL_MARKER_GUID

	50 from gslib.translation_helper import S3_DELETE_MARKER_GUID

	51 from gslib.translation_helper import S3_MARKER_GUIDS

40	52

	53 # pylint: disable=g-import-not-at-top

41 try:	54 try:

42 # This module doesn't necessarily exist on Windows.	55 # This module doesn't necessarily exist on Windows.

43 import resource	56 import resource

44 HAS_RESOURCE_MODULE = True	57 HAS_RESOURCE_MODULE = True

45 except ImportError, e:	58 except ImportError, e:

46 HAS_RESOURCE_MODULE = False	59 HAS_RESOURCE_MODULE = False

47	60

	61 ONE_KB = 1024

48 TWO_MB = 2 * 1024 * 1024	62 TWO_MB = 2 * 1024 * 1024

	63 TEN_MB = 10 * 1024 * 1024

	64 DEFAULT_FILE_BUFFER_SIZE = 8192

	65 _DEFAULT_LINES = 25

	66

	67 # By default, the timeout for SSL read errors is infinite. This could

	68 # cause gsutil to hang on network disconnect, so pick a more reasonable

	69 # timeout.

	70 SSL_TIMEOUT = 60

	71

	72 # Start with a progress callback every 64KB during uploads/downloads (JSON API).

	73 # Callback implementation should back off until it hits the maximum size

	74 # so that callbacks do not create huge amounts of log output.

	75 START_CALLBACK_PER_BYTES = 1024*64

	76 MAX_CALLBACK_PER_BYTES = 10241024100

	77

	78 # Upload/download files in 8KB chunks over the HTTP connection.

	79 TRANSFER_BUFFER_SIZE = 1024*8

	80

	81 # Default number of progress callbacks during transfer (XML API).

	82 XML_PROGRESS_CALLBACKS = 10

	83

	84 # For files >= this size, output a message indicating that we're running an

	85 # operation on the file (like hashing or gzipping) so it does not appear to the

	86 # user that the command is hanging.

	87 MIN_SIZE_COMPUTE_LOGGING = 10010241024 # 100 MB

49	88

50 NO_MAX = sys.maxint	89 NO_MAX = sys.maxint

51	90

	91 UTF8 = 'utf-8'

	92

52 VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?')	93 VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?')

53	94

54 RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt'	95 RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt'

55	96

56 # Binary exponentiation strings.	97 # Binary exponentiation strings.

57 _EXP_STRINGS = [	98 _EXP_STRINGS = [

58 (0, 'B', 'bit'),	99 (0, 'B', 'bit'),

59 (10, 'KB', 'Kbit', 'K'),	100 (10, 'KB', 'Kbit', 'K'),

60 (20, 'MB', 'Mbit', 'M'),	101 (20, 'MB', 'Mbit', 'M'),

61 (30, 'GB', 'Gbit', 'G'),	102 (30, 'GB', 'Gbit', 'G'),

62 (40, 'TB', 'Tbit', 'T'),	103 (40, 'TB', 'Tbit', 'T'),

63 (50, 'PB', 'Pbit', 'P'),	104 (50, 'PB', 'Pbit', 'P'),

64 (60, 'EB', 'Ebit', 'E'),	105 (60, 'EB', 'Ebit', 'E'),

65 ]	106 ]

66	107

67 global manager	108 configured_certs_file = None

	109

	110 global manager # pylint: disable=global-at-module-level

	111

68	112

69 def InitializeMultiprocessingVariables():	113 def InitializeMultiprocessingVariables():

	114 """Perform necessary initialization for multiprocessing.

	115

	116 See gslib.command.InitializeMultiprocessingVariables for an explanation

	117 of why this is necessary.

70 """	118 """

71 Perform necessary initialization - see	119 global manager # pylint: disable=global-variable-undefined

72 gslib.command.InitializeMultiprocessingVariables for an explanation of why

73 this is necessary.

74 """

75 global manager

76 manager = multiprocessing.Manager()	120 manager = multiprocessing.Manager()

77	121

	122

78 def _GenerateSuffixRegex():	123 def _GenerateSuffixRegex():

	124 """Creates a suffix regex for human-readable byte counts."""

79 human_bytes_re = r'(?P<num>\d\.\d+\|\d+)\s(?P<suffix>%s)?'	125 human_bytes_re = r'(?P<num>\d\.\d+\|\d+)\s(?P<suffix>%s)?'

80 suffixes = []	126 suffixes = []

81 suffix_to_si = {}	127 suffix_to_si = {}

82 for i, si in enumerate(_EXP_STRINGS):	128 for i, si in enumerate(_EXP_STRINGS):

83 si_suffixes = [s.lower() for s in list(si)[1:]]	129 si_suffixes = [s.lower() for s in list(si)[1:]]

84 for suffix in si_suffixes:	130 for suffix in si_suffixes:

85 suffix_to_si[suffix] = i	131 suffix_to_si[suffix] = i

86 suffixes.extend(si_suffixes)	132 suffixes.extend(si_suffixes)

87 human_bytes_re = human_bytes_re % '\|'.join(suffixes)	133 human_bytes_re %= '\|'.join(suffixes)

88 matcher = re.compile(human_bytes_re)	134 matcher = re.compile(human_bytes_re)

89 return suffix_to_si, matcher	135 return suffix_to_si, matcher

90	136

91 SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex()	137 SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex()

92	138

93 SECONDS_PER_DAY = 3600 * 24	139 SECONDS_PER_DAY = 3600 * 24

94	140

95 # Detect platform types.	141 # Detect platform types.

96 PLATFORM = str(sys.platform).lower()	142 PLATFORM = str(sys.platform).lower()

97 IS_WINDOWS = 'win32' in PLATFORM	143 IS_WINDOWS = 'win32' in PLATFORM

98 IS_CYGWIN = 'cygwin' in PLATFORM	144 IS_CYGWIN = 'cygwin' in PLATFORM

99 IS_LINUX = 'linux' in PLATFORM	145 IS_LINUX = 'linux' in PLATFORM

100 IS_OSX = 'darwin' in PLATFORM	146 IS_OSX = 'darwin' in PLATFORM

101	147

102 # On Unix-like systems, we will set the maximum number of open files to avoid	148 # On Unix-like systems, we will set the maximum number of open files to avoid

103 # hitting the limit imposed by the OS. This number was obtained experimentally.	149 # hitting the limit imposed by the OS. This number was obtained experimentally.

104 MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000	150 MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000

105	151

106 GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz'	152 GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz'

107	153

108 Retry = retry_decorator.retry	154 Retry = retry_decorator.retry # pylint: disable=invalid-name

109	155

110 # Cache the values from this check such that they're available to all callers	156 # Cache the values from this check such that they're available to all callers

111 # without needing to run all the checks again (some of these, such as calling	157 # without needing to run all the checks again (some of these, such as calling

112 # multiprocessing.Manager(), are expensive operations).	158 # multiprocessing.Manager(), are expensive operations).

113 cached_multiprocessing_is_available = None	159 cached_multiprocessing_is_available = None

114 cached_multiprocessing_is_available_stack_trace = None	160 cached_multiprocessing_is_available_stack_trace = None

115 cached_multiprocessing_is_available_message = None	161 cached_multiprocessing_is_available_message = None

116	162

	163

117 # Enum class for specifying listing style.	164 # Enum class for specifying listing style.

118 class ListingStyle(object):	165 class ListingStyle(object):

119 SHORT = 'SHORT'	166 SHORT = 'SHORT'

120 LONG = 'LONG'	167 LONG = 'LONG'

121 LONG_LONG = 'LONG_LONG'	168 LONG_LONG = 'LONG_LONG'

122	169

123	170

124 def UsingCrcmodExtension(crcmod):	171 def UsingCrcmodExtension(crcmod):

125 return (getattr(crcmod, 'crcmod', None) and	172 return (getattr(crcmod, 'crcmod', None) and

126 getattr(crcmod.crcmod, '_usingExtension', None))	173 getattr(crcmod.crcmod, '_usingExtension', None))

127	174

128	175

	176 def CreateDirIfNeeded(dir_path):

	177 """Creates a directory, suppressing already-exists errors."""

	178 if not os.path.exists(dir_path):

	179 try:

	180 # Unfortunately, even though we catch and ignore EEXIST, this call will

	181 # output a (needless) error message (no way to avoid that in Python).

	182 os.makedirs(dir_path)

	183 # Ignore 'already exists' in case user tried to start up several

	184 # resumable uploads concurrently from a machine where no tracker dir had

	185 # yet been created.

	186 except OSError as e:

	187 if e.errno != errno.EEXIST:

	188 raise

	189

	190

	191 def GetGsutilStateDir():

	192 """Returns the location of the directory for gsutil state files.

	193

	194 Certain operations, such as cross-process credential sharing and

	195 resumable transfer tracking, need a known location for state files which

	196 are created by gsutil as-needed.

	197

	198 This location should only be used for storing data that is required to be in

	199 a static location.

	200

	201 Returns:

	202 Path to directory for gsutil static state files.

	203 """

	204 config_file_dir = config.get(

	205 'GSUtil', 'state_dir',

	206 os.path.expanduser(os.path.join('~', '.gsutil')))

	207 CreateDirIfNeeded(config_file_dir)

	208 return config_file_dir

	209

	210

	211 def GetCredentialStoreFilename():

	212 return os.path.join(GetGsutilStateDir(), 'credcache')

	213

	214

129 def CreateTrackerDirIfNeeded():	215 def CreateTrackerDirIfNeeded():

130 """Looks up the configured directory where gsutil keeps its resumable	216 """Looks up or creates the gsutil tracker file directory.

131 transfer tracker files, and creates it if it doesn't already exist.	217

	218 This is the configured directory where gsutil keeps its resumable transfer

	219 tracker files. This function creates it if it doesn't already exist.

132	220

133 Returns:	221 Returns:

134 The pathname to the tracker directory.	222 The pathname to the tracker directory.

135 """	223 """

136 tracker_dir = config.get(	224 tracker_dir = config.get(

137 'GSUtil', 'resumable_tracker_dir',	225 'GSUtil', 'resumable_tracker_dir',

138 os.path.expanduser('~' + os.sep + '.gsutil'))	226 os.path.join(GetGsutilStateDir(), 'tracker-files'))

139 if not os.path.exists(tracker_dir):	227 CreateDirIfNeeded(tracker_dir)

140 try:

141 # Unfortunately, even though we catch and ignore EEXIST, this call will

142 # will output a (needless) error message (no way to avoid that in Python).

143 os.makedirs(tracker_dir)

144 # Ignore 'already exists' in case user tried to start up several

145 # resumable uploads concurrently from a machine where no tracker dir had

146 # yet been created.

147 except OSError as e:

148 if e.errno != errno.EEXIST:

149 raise

150 return tracker_dir	228 return tracker_dir

151	229

152	230

	231 def PrintTrackerDirDeprecationWarningIfNeeded():

	232 # TODO: Remove this along with the tracker_dir config value 1 year after

	233 # 4.6 release date. Use state_dir instead.

	234 if config.has_option('GSUtil', 'resumable_tracker_dir'):

	235 sys.stderr.write('Warning: you have set resumable_tracker_dir in your '

	236 '.boto configuration file. This configuration option is '

	237 'deprecated; please use the state_dir configuration '

	238 'option instead.\n')

	239

	240

153 # Name of file where we keep the timestamp for the last time we checked whether	241 # Name of file where we keep the timestamp for the last time we checked whether

154 # a new version of gsutil is available.	242 # a new version of gsutil is available.

	243 PrintTrackerDirDeprecationWarningIfNeeded()

	244 CreateDirIfNeeded(GetGsutilStateDir())

155 LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = (	245 LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = (

156 os.path.join(CreateTrackerDirIfNeeded(), '.last_software_update_check'))	246 os.path.join(GetGsutilStateDir(), '.last_software_update_check'))

157	247

158	248

159 def HasConfiguredCredentials():	249 def HasConfiguredCredentials():

160 """Determines if boto credential/config file exists."""	250 """Determines if boto credential/config file exists."""

161 config = boto.config

162 has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and	251 has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and

163 config.has_option('Credentials', 'gs_secret_access_key'))	252 config.has_option('Credentials', 'gs_secret_access_key'))

164 has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and	253 has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and

165 config.has_option('Credentials', 'aws_secret_access_key'))	254 config.has_option('Credentials', 'aws_secret_access_key'))

166 has_oauth_creds = (	255 has_oauth_creds = (

167 config.has_option('Credentials', 'gs_oauth2_refresh_token'))	256 config.has_option('Credentials', 'gs_oauth2_refresh_token'))

168 has_service_account_creds = (HAS_CRYPTO and	257 has_service_account_creds = (

169 config.has_option('Credentials', 'gs_service_client_id')	258 HAS_CRYPTO and

170 and config.has_option('Credentials', 'gs_service_key_file'))	259 config.has_option('Credentials', 'gs_service_client_id') and

	260 config.has_option('Credentials', 'gs_service_key_file'))

171	261

172 valid_auth_handler = None	262 valid_auth_handler = None

173 try:	263 try:

174 valid_auth_handler = boto.auth.get_auth_handler(	264 valid_auth_handler = boto.auth.get_auth_handler(

175 GSConnection.DefaultHost, config, Provider('google'),	265 GSConnection.DefaultHost, config, Provider('google'),

176 requested_capability=['s3'])	266 requested_capability=['s3'])

177 # Exclude the no-op auth handler as indicating credentials are configured.	267 # Exclude the no-op auth handler as indicating credentials are configured.

178 # Note we can't use isinstance() here because the no-op module may not be	268 # Note we can't use isinstance() here because the no-op module may not be

179 # imported so we can't get a reference to the class type.	269 # imported so we can't get a reference to the class type.

180 if getattr(getattr(valid_auth_handler, '__class__', None),	270 if getattr(getattr(valid_auth_handler, '__class__', None),

181 '__name__', None) == 'NoOpAuth':	271 '__name__', None) == 'NoOpAuth':

182 valid_auth_handler = None	272 valid_auth_handler = None

183 except NoAuthHandlerFound:	273 except NoAuthHandlerFound:

184 pass	274 pass

185	275

186 return (has_goog_creds or has_amzn_creds or has_oauth_creds	276 return (has_goog_creds or has_amzn_creds or has_oauth_creds

187 or has_service_account_creds or valid_auth_handler)	277 or has_service_account_creds or valid_auth_handler)

188	278

189	279

190 def ConfigureNoOpAuthIfNeeded():	280 def ConfigureNoOpAuthIfNeeded():

191 """	281 """Sets up no-op auth handler if no boto credentials are configured."""

192 Sets up no-op auth handler if no boto credentials are configured.

193 """

194 config = boto.config

195 if not HasConfiguredCredentials():	282 if not HasConfiguredCredentials():

196 if (config.has_option('Credentials', 'gs_service_client_id')	283 if (config.has_option('Credentials', 'gs_service_client_id')

197 and not HAS_CRYPTO):	284 and not HAS_CRYPTO):

198 raise CommandException('\n'.join(textwrap.wrap(	285 raise CommandException('\n'.join(textwrap.wrap(

199 'Your gsutil is configured with an OAuth2 service account, but you '	286 'Your gsutil is configured with an OAuth2 service account, but you '

200 'do not have PyOpenSSL or PyCrypto 2.6 or later installed. Service '	287 'do not have PyOpenSSL or PyCrypto 2.6 or later installed. Service '

201 'account authentication requires one of these libraries; please '	288 'account authentication requires one of these libraries; please '

202 'install either of them to proceed, or configure a different type '	289 'install either of them to proceed, or configure a different type '

203 'of credentials with "gsutil config".')))	290 'of credentials with "gsutil config".')))

204 else:	291 else:

205 # With no boto config file the user can still access publicly readable	292 # With no boto config file the user can still access publicly readable

206 # buckets and objects.	293 # buckets and objects.

207 from gslib import no_op_auth_plugin	294 from gslib import no_op_auth_plugin # pylint: disable=unused-variable

208	295

209	296

210 def GetConfigFilePath():	297 def GetConfigFilePath():

211 config_path = 'no config found'	298 config_path = 'no config found'

212 for path in BotoConfigLocations:	299 for path in BotoConfigLocations:

213 try:	300 try:

214 with open(path, 'r'):	301 with open(path, 'r'):

215 config_path = path	302 config_path = path

216 break	303 break

217 except IOError:	304 except IOError:

218 pass	305 pass

219 return config_path	306 return config_path

220	307

221	308

222 def GetBotoConfigFileList():	309 def GetBotoConfigFileList():

223 """Returns list of boto config files that exist."""	310 """Returns list of boto config files that exist."""

224 config_paths = boto.pyami.config.BotoConfigLocations	311 config_paths = boto.pyami.config.BotoConfigLocations

225 if 'AWS_CREDENTIAL_FILE' in os.environ:	312 if 'AWS_CREDENTIAL_FILE' in os.environ:

226 config_paths.append(os.environ['AWS_CREDENTIAL_FILE'])	313 config_paths.append(os.environ['AWS_CREDENTIAL_FILE'])

227 config_files = {}	314 config_files = {}

228 for config_path in config_paths:	315 for config_path in config_paths:

229 if os.path.exists(config_path):	316 if os.path.exists(config_path):

230 config_files[config_path] = 1	317 config_files[config_path] = 1

231 cf_list = []	318 cf_list = []

232 for config_file in config_files:	319 for config_file in config_files:

233 cf_list.append(config_file)	320 cf_list.append(config_file)

234 return cf_list	321 return cf_list

235	322

236	323

	324 def GetCertsFile():

	325 """Configures and returns the CA Certificates file.

	326

	327 If one is already configured, use it. Otherwise, amend the configuration

	328 (in boto.config) to use the cert roots distributed with gsutil.

	329

	330 Returns:

	331 string filename of the certs file to use.

	332 """

	333 certs_file = boto.config.get('Boto', 'ca_certificates_file', None)

	334 if not certs_file:

	335 if configured_certs_file:

	336 disk_certs_file = configured_certs_file

	337 else:

	338 disk_certs_file = os.path.abspath(

	339 os.path.join(gslib.GSLIB_DIR, 'data', 'cacerts.txt'))

	340 if not os.path.exists(disk_certs_file):

	341 # If the file is not present on disk, this means the gslib module

	342 # doesn't actually exist on disk anywhere. This can happen if it's

	343 # being imported from a zip file. Unfortunately, we have to copy the

	344 # certs file to a local temp file on disk because the underlying SSL

	345 # socket requires it to be a filesystem path.

	346 certs_data = pkgutil.get_data('gslib', 'data/cacerts.txt')

	347 if not certs_data:

	348 raise CommandException('Certificates file not found. Please '

	349 'reinstall gsutil from scratch')

	350 fd, fname = tempfile.mkstemp(suffix='.txt', prefix='gsutil-cacerts')

	351 f = os.fdopen(fd, 'w')

	352 f.write(certs_data)

	353 f.close()

	354 disk_certs_file = fname

	355 certs_file = disk_certs_file

	356 return certs_file

	357

	358

	359 def GetCleanupFiles():

	360 """Returns a list of temp files to delete (if possible) when program exits."""

	361 cleanup_files = []

	362 if configured_certs_file:

	363 cleanup_files.append(configured_certs_file)

	364 return cleanup_files

	365

	366

	367 def GetNewHttp(http_class=httplib2.Http, **kwargs):

	368 """Creates and returns a new httplib2.Http instance.

	369

	370 Args:

	371 http_class: Optional custom Http class to use.

	372 **kwargs: Arguments to pass to http_class constructor.

	373

	374 Returns:

	375 An initialized httplib2.Http instance.

	376 """

	377 proxy_info = httplib2.ProxyInfo(

	378 proxy_type=3,

	379 proxy_host=boto.config.get('Boto', 'proxy', None),

	380 proxy_port=boto.config.getint('Boto', 'proxy_port', 0),

	381 proxy_user=boto.config.get('Boto', 'proxy_user', None),

	382 proxy_pass=boto.config.get('Boto', 'proxy_pass', None),

	383 proxy_rdns=boto.config.get('Boto', 'proxy_rdns', False))

	384 # Some installers don't package a certs file with httplib2, so use the

	385 # one included with gsutil.

	386 kwargs['ca_certs'] = GetCertsFile()

	387 # Use a non-infinite SSL timeout to avoid hangs during network flakiness.

	388 kwargs['timeout'] = SSL_TIMEOUT

	389 http = http_class(proxy_info=proxy_info, **kwargs)

	390 http.disable_ssl_certificate_validation = (not config.getbool(

	391 'Boto', 'https_validate_certificates'))

	392 return http

	393

	394

	395 def GetNumRetries():

	396 return config.getint('Boto', 'num_retries', 6)

	397

	398

	399 def GetMaxRetryDelay():

	400 return config.getint('Boto', 'max_retry_delay', 60)

	401

	402

237 def _RoundToNearestExponent(num):	403 def _RoundToNearestExponent(num):

238 i = 0	404 i = 0

239 while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]):	405 while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]):

240 i += 1	406 i += 1

241 return i, round(float(num) / 2 ** _EXP_STRINGS[i][0], 2)	407 return i, round(float(num) / 2 ** _EXP_STRINGS[i][0], 2)

242	408

243	409

244 def MakeHumanReadable(num):	410 def MakeHumanReadable(num):

245 """Generates human readable string for a number of bytes.	411 """Generates human readable string for a number of bytes.

246	412

(...skipping 20 matching lines...) Expand all Loading...
267 return '%g %s' % (rounded_val, _EXP_STRINGS[i][2])	433 return '%g %s' % (rounded_val, _EXP_STRINGS[i][2])

268	434

269	435

270 def HumanReadableToBytes(human_string):	436 def HumanReadableToBytes(human_string):

271 """Tries to convert a human-readable string to a number of bytes.	437 """Tries to convert a human-readable string to a number of bytes.

272	438

273 Args:	439 Args:

274 human_string: A string supplied by user, e.g. '1M', '3 GB'.	440 human_string: A string supplied by user, e.g. '1M', '3 GB'.

275 Returns:	441 Returns:

276 An integer containing the number of bytes.	442 An integer containing the number of bytes.

	443 Raises:

	444 ValueError: on an invalid string.

277 """	445 """

278 human_string = human_string.lower()	446 human_string = human_string.lower()

279 m = MATCH_HUMAN_BYTES.match(human_string)	447 m = MATCH_HUMAN_BYTES.match(human_string)

280 if m:	448 if m:

281 num = float(m.group('num'))	449 num = float(m.group('num'))

282 if m.group('suffix'):	450 if m.group('suffix'):

283 power = _EXP_STRINGS[SUFFIX_TO_SI[m.group('suffix')]][0]	451 power = _EXP_STRINGS[SUFFIX_TO_SI[m.group('suffix')]][0]

284 num = (2.0 * power)	452 num = (2.0 * power)

285 num = int(round(num))	453 num = int(round(num))

286 return num	454 return num

(...skipping 20 matching lines...) Expand all Loading...
307 k = (len(values) - 1) * percent	475 k = (len(values) - 1) * percent

308 f = math.floor(k)	476 f = math.floor(k)

309 c = math.ceil(k)	477 c = math.ceil(k)

310 if f == c:	478 if f == c:

311 return key(values[int(k)])	479 return key(values[int(k)])

312 d0 = key(values[int(f)]) * (c-k)	480 d0 = key(values[int(f)]) * (c-k)

313 d1 = key(values[int(c)]) * (k-f)	481 d1 = key(values[int(c)]) * (k-f)

314 return d0 + d1	482 return d0 + d1

315	483

316	484

317 def ParseErrorDetail(e):	485 def RemoveCRLFFromString(input_str):

318 """Parse <Message> and/or <Details> text from XML content.	486 """Returns the input string with all \\n and \\r removed."""

	487 return re.sub(r'[\r\n]', '', input_str)

319	488

320 Args:

321 e: The GSResponseError that includes XML to be parsed.

322

323 Returns:

324 (exception_name, m, d), where m is <Message> text or None,

325 and d is <Details> text or None.

326 """

327 exc_name_parts = re.split("[\.']", str(type(e)))

328 if len(exc_name_parts) < 2:

329 # Shouldn't happen, but have fallback in case.

330 exc_name = str(type(e))

331 else:

332 exc_name = exc_name_parts[-2]

333 if not hasattr(e, 'body') or e.body is None:

334 return (exc_name, None)

335

336 match = re.search(r'<Message>(?P<message>.*)</Message>', e.body)

337 m = match.group('message') if match else None

338 match = re.search(r'<Details>(?P<details>.*)</Details>', e.body)

339 d = match.group('details') if match else None

340 return (exc_name, m, d)

341

342 def FormatErrorMessage(exc_name, status, code, reason, message, detail):

343 """Formats an error message from components parsed by ParseErrorDetail."""

344 if message and detail:

345 return('%s: status=%d, code=%s, reason="%s", message="%s", detail="%s"' %

346 (exc_name, status, code, reason, message, detail))

347 if message:

348 return('%s: status=%d, code=%s, reason="%s", message="%s"' %

349 (exc_name, status, code, reason, message))

350 if detail:

351 return('%s: status=%d, code=%s, reason="%s", detail="%s"' %

352 (exc_name, status, code, reason, detail))

353 return('%s: status=%d, code=%s, reason="%s"' %

354 (exc_name, status, code, reason))

355	489

356 def UnaryDictToXml(message):	490 def UnaryDictToXml(message):

357 """Generates XML representation of a nested dict with exactly one	491 """Generates XML representation of a nested dict.

358 top-level entry and an arbitrary number of 2nd-level entries, e.g.	492

359 capturing a WebsiteConfiguration message.	493 This dict contains exactly one top-level entry and an arbitrary number of

	494 2nd-level entries, e.g. capturing a WebsiteConfiguration message.

360	495

361 Args:	496 Args:

362 message: The dict encoding the message.	497 message: The dict encoding the message.

363	498

364 Returns:	499 Returns:

365 XML string representation of the input dict.	500 XML string representation of the input dict.

	501

	502 Raises:

	503 Exception: if dict contains more than one top-level entry.

366 """	504 """

367 if len(message) != 1:	505 if len(message) != 1:

368 raise Exception("Expected dict of size 1, got size %d" % len(message))	506 raise Exception('Expected dict of size 1, got size %d' % len(message))

369	507

370 name, content = message.items()[0]	508 name, content = message.items()[0]

371 T = ElementTree.Element(name)	509 element_type = ElementTree.Element(name)

372 for property, value in sorted(content.items()):	510 for element_property, value in sorted(content.items()):

373 node = ElementTree.SubElement(T, property)	511 node = ElementTree.SubElement(element_type, element_property)

374 node.text = value	512 node.text = value

375 return ElementTree.tostring(T)	513 return ElementTree.tostring(element_type)

376	514

377	515

378 def LookUpGsutilVersion(uri):	516 def LookUpGsutilVersion(gsutil_api, url_str):

379 """Looks up the gsutil version of the specified gsutil tarball URI, from the	517 """Looks up the gsutil version of the specified gsutil tarball URL.

380 metadata field set on that object.	518

	519 Version is specified in the metadata field set on that object.

381	520

382 Args:	521 Args:

383 URI: gsutil URI tarball (such as gs://pub/gsutil.tar.gz).	522 gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball.

	523 url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz').

384	524

385 Returns:	525 Returns:

386 Version string if URI is a cloud URI containing x-goog-meta-gsutil-version	526 Version string if URL is a cloud URL containing x-goog-meta-gsutil-version

387 metadata, else None.	527 metadata, else None.

388 """	528 """

389 if uri.is_cloud_uri():	529 url = StorageUrlFromString(url_str)

390 obj = uri.get_key(False)	530 if url.IsCloudUrl():

391 if obj.metadata and 'gsutil_version' in obj.metadata:	531 obj = gsutil_api.GetObjectMetadata(url.bucket_name, url.object_name,

392 return obj.metadata['gsutil_version']	532 provider=url.scheme,

	533 fields=['metadata'])

	534 if obj.metadata and obj.metadata.additionalProperties:

	535 for prop in obj.metadata.additionalProperties:

	536 if prop.key == 'gsutil_version':

	537 return prop.value

393	538

394	539

395 def GetGsutilVersionModifiedTime():	540 def GetGsutilVersionModifiedTime():

396 """Returns unix timestamp of when the VERSION file was last modified."""	541 """Returns unix timestamp of when the VERSION file was last modified."""

397 if not gslib.VERSION_FILE:	542 if not gslib.VERSION_FILE:

398 return 0	543 return 0

399 return int(os.path.getmtime(gslib.VERSION_FILE))	544 return int(os.path.getmtime(gslib.VERSION_FILE))

400	545

401	546

402 def IsRunningInteractively():	547 def IsRunningInteractively():

403 """Returns True if currently running interactively on a TTY."""	548 """Returns True if currently running interactively on a TTY."""

404 return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty()	549 return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty()

405	550

406	551

	552 def _HttpsValidateCertifcatesEnabled():

	553 return config.get('Boto', 'https_validate_certificates', True)

	554

	555 CERTIFICATE_VALIDATION_ENABLED = _HttpsValidateCertifcatesEnabled()

	556

	557

407 def _BotoIsSecure():	558 def _BotoIsSecure():

408 for cfg_var in ('is_secure', 'https_validate_certificates'):	559 return config.get('Boto', 'is_secure', True)

409 if (config.has_option('Boto', cfg_var)

410 and not config.getboolean('Boto', cfg_var)):

411 return False, cfg_var

412 return True, ''

413	560

414 BOTO_IS_SECURE = _BotoIsSecure()	561 BOTO_IS_SECURE = _BotoIsSecure()

415	562

416	563

	564 def ResumableThreshold():

	565 return config.getint('GSUtil', 'resumable_threshold', TWO_MB)

	566

	567

417 def AddAcceptEncoding(headers):	568 def AddAcceptEncoding(headers):

418 """Adds accept-encoding:gzip to the dictionary of headers."""	569 """Adds accept-encoding:gzip to the dictionary of headers."""

419 # If Accept-Encoding is not already set, set it to enable gzip.	570 # If Accept-Encoding is not already set, set it to enable gzip.

420 if 'accept-encoding' not in headers:	571 if 'accept-encoding' not in headers:

421 headers['accept-encoding'] = 'gzip'	572 headers['accept-encoding'] = 'gzip'

422	573

423	574

424 def PrintFullInfoAboutUri(uri, incl_acl, headers):	575 # pylint: disable=too-many-statements

425 """Print full info for given URI (like what displays for gsutil ls -L).	576 def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True):

	577 """Print full info for given object (like what displays for gsutil ls -L).

426	578

427 Args:	579 Args:

428 uri: StorageUri being listed.	580 bucket_listing_ref: BucketListingRef being listed.

	581 Must have ref_type OBJECT and a populated root_object

	582 with the desired fields.

429 incl_acl: True if ACL info should be output.	583 incl_acl: True if ACL info should be output.

430 headers: The headers to pass to boto, if any.

431	584

432 Returns:	585 Returns:

433 Tuple (number of objects,	586 Tuple (number of objects, object_length)

434 object length, if listing_style is one of the long listing formats)

435	587

436 Raises:	588 Raises:

437 Exception: if calling bug encountered.	589 Exception: if calling bug encountered.

438 """	590 """

439 # Run in a try/except clause so we can continue listings past	591 url_str = bucket_listing_ref.url_string

440 # access-denied errors (which can happen because user may have READ	592 storage_url = StorageUrlFromString(url_str)

441 # permission on object and thus see the bucket listing data, but lack	593 obj = bucket_listing_ref.root_object

442 # FULL_CONTROL over individual objects and thus not be able to read	594

443 # their ACLs).	595 if (obj.metadata and S3_DELETE_MARKER_GUID in

444 # TODO: Switch this code to use string formatting instead of tabs.	596 obj.metadata.additionalProperties):

445 try:	597 num_bytes = 0

446 print '%s:' % uri.uri.encode('utf-8')	598 num_objs = 0

447 headers = headers.copy()	599 url_str += '<DeleteMarker>'

448 # Add accept encoding so that the HEAD request matches what would be	600 else:

449 # sent for a GET request.	601 num_bytes = obj.size

450 AddAcceptEncoding(headers)	602 num_objs = 1

451 got_key = False	603

452 obj = uri.get_key(False, headers=headers)	604 print '%s:' % url_str.encode(UTF8)

453 got_key = True	605 if obj.updated:

454 print '\tCreation time:\t\t%s' % obj.last_modified	606 print '\tCreation time:\t\t%s' % obj.updated.strftime(

455 if obj.cache_control:	607 '%a, %d %b %Y %H:%M:%S GMT')

456 print '\tCache-Control:\t\t%s' % obj.cache_control	608 if obj.cacheControl:

457 if obj.content_disposition:	609 print '\tCache-Control:\t\t%s' % obj.cacheControl

458 print '\tContent-Disposition:\t\t%s' % obj.content_disposition	610 if obj.contentDisposition:

459 if obj.content_encoding:	611 print '\tContent-Disposition:\t\t%s' % obj.contentDisposition

460 print '\tContent-Encoding:\t%s' % obj.content_encoding	612 if obj.contentEncoding:

461 if obj.content_language:	613 print '\tContent-Encoding:\t\t%s' % obj.contentEncoding

462 print '\tContent-Language:\t%s' % obj.content_language	614 if obj.contentLanguage:

463 print '\tContent-Length:\t\t%s' % obj.size	615 print '\tContent-Language:\t%s' % obj.contentLanguage

464 print '\tContent-Type:\t\t%s' % obj.content_type	616 print '\tContent-Length:\t\t%s' % obj.size

465 if hasattr(obj, 'component_count') and obj.component_count:	617 print '\tContent-Type:\t\t%s' % obj.contentType

466 print '\tComponent-Count:\t%d' % obj.component_count	618 if obj.componentCount:

467 if obj.metadata:	619 print '\tComponent-Count:\t%d' % obj.componentCount

468 prefix = uri.get_provider().metadata_prefix	620 marker_props = {}

469 for name in obj.metadata:	621 if obj.metadata and obj.metadata.additionalProperties:

470 meta_string = '\t%s%s:\t%s' % (prefix, name, obj.metadata[name])	622 non_marker_props = []

471 print meta_string.encode('utf-8')	623 for add_prop in obj.metadata.additionalProperties:

472 if hasattr(obj, 'cloud_hashes'):	624 if add_prop.key not in S3_MARKER_GUIDS:

473 for alg in obj.cloud_hashes:	625 non_marker_props.append(add_prop)

474 print '\tHash (%s):\t\t%s' % (

475 alg, binascii.b2a_hex(obj.cloud_hashes[alg]))

476 print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')

477 if hasattr(obj, 'generation'):

478 print '\tGeneration:\t\t%s' % obj.generation

479 if hasattr(obj, 'metageneration'):

480 print '\tMetageneration:\t\t%s' % obj.metageneration

481 if incl_acl:

482 print '\tACL:\t\t%s' % (uri.get_acl(False, headers))

483 return (1, obj.size)

484 except boto.exception.GSResponseError as e:

485 if e.status == 403:

486 if got_key:

487 print ('\tACL:\t\t\tACCESS DENIED. Note: you need FULL_CONTROL '

488 'permission\n\t\t\ton the object to read its ACL.')

489 return (1, obj.size)

490 else:	626 else:

491 print "You aren't authorized to read %s - skipping" % uri	627 marker_props[add_prop.key] = add_prop.value

492 return (1, 0)	628 if non_marker_props:

	629 print '\tMetadata:'

	630 for ap in non_marker_props:

	631 meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value)

	632 print meta_string.encode(UTF8)

	633 if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c

	634 if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash

	635 print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')

	636 if obj.generation:

	637 generation_str = GenerationFromUrlAndString(storage_url, obj.generation)

	638 print '\tGeneration:\t\t%s' % generation_str

	639 if obj.metageneration:

	640 print '\tMetageneration:\t\t%s' % obj.metageneration

	641 if incl_acl:

	642 # JSON API won't return acls as part of the response unless we have

	643 # full control scope

	644 if obj.acl:

	645 print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl)

	646 elif S3_ACL_MARKER_GUID in marker_props:

	647 print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID]

493 else:	648 else:

494 raise e	649 print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER '

495 return (numobjs, numbytes)	650 'permission\n\t\t\t\ton the object to read its ACL.')

	651

	652 return (num_objs, num_bytes)

	653

496	654

497 def CompareVersions(first, second):	655 def CompareVersions(first, second):

498 """Compares the first and second gsutil version strings.	656 """Compares the first and second gsutil version strings.

499	657

500 For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33.	658 For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33.

501 Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes	659 Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes

502 (e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as	660 (e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as

503 less than its non-suffix counterpart (e.g. 3.32 > 3.32pre).	661 less than its non-suffix counterpart (e.g. 3.32 > 3.32pre).

504	662

	663 Args:

	664 first: First gsutil version string.

	665 second: Second gsutil version string.

	666

505 Returns:	667 Returns:

506 (g, m):	668 (g, m):

507 g is True if first known to be greater than second, else False.	669 g is True if first known to be greater than second, else False.

508 m is True if first known to be greater by at least 1 major version,	670 m is True if first known to be greater by at least 1 major version,

509 else False.	671 else False.

510 """	672 """

511 m1 = VERSION_MATCHER.match(str(first))	673 m1 = VERSION_MATCHER.match(str(first))

512 m2 = VERSION_MATCHER.match(str(second))	674 m2 = VERSION_MATCHER.match(str(second))

513	675

514 # If passed strings we don't know how to handle, be conservative.	676 # If passed strings we don't know how to handle, be conservative.

515 if not m1 or not m2:	677 if not m1 or not m2:

516 return (False, False)	678 return (False, False)

517	679

518 major_ver1 = int(m1.group('maj'))	680 major_ver1 = int(m1.group('maj'))

519 minor_ver1 = int(m1.group('min')) if m1.group('min') else 0	681 minor_ver1 = int(m1.group('min')) if m1.group('min') else 0

520 suffix_ver1 = m1.group('suffix')	682 suffix_ver1 = m1.group('suffix')

521 major_ver2 = int(m2.group('maj'))	683 major_ver2 = int(m2.group('maj'))

522 minor_ver2 = int(m2.group('min')) if m2.group('min') else 0	684 minor_ver2 = int(m2.group('min')) if m2.group('min') else 0

523 suffix_ver2 = m2.group('suffix')	685 suffix_ver2 = m2.group('suffix')

524	686

525 if major_ver1 > major_ver2:	687 if major_ver1 > major_ver2:

526 return (True, True)	688 return (True, True)

527 elif major_ver1 == major_ver2:	689 elif major_ver1 == major_ver2:

528 if minor_ver1 > minor_ver2:	690 if minor_ver1 > minor_ver2:

529 return (True, False)	691 return (True, False)

530 elif minor_ver1 == minor_ver2:	692 elif minor_ver1 == minor_ver2:

531 return (bool(suffix_ver2) and not suffix_ver1, False)	693 return (bool(suffix_ver2) and not suffix_ver1, False)

532 return (False, False)	694 return (False, False)

533	695

534 def _IncreaseSoftLimitForResource(resource_name):	696

535 """Sets a new soft limit for the maximum number of open files.	697 def _IncreaseSoftLimitForResource(resource_name, fallback_value):

536 The soft limit is used for this process (and its children), but the	698 """Sets a new soft limit for the maximum number of open files.

537 hard limit is set by the system and cannot be exceeded.	699

	700 The soft limit is used for this process (and its children), but the

	701 hard limit is set by the system and cannot be exceeded.

	702

	703 We will first try to set the soft limit to the hard limit's value; if that

	704 fails, we will try to set the soft limit to the fallback_value iff this would

	705 increase the soft limit.

	706

	707 Args:

	708 resource_name: Name of the resource to increase the soft limit for.

	709 fallback_value: Fallback value to be used if we couldn't set the

	710 soft value to the hard value (e.g., if the hard value

	711 is "unlimited").

	712

	713 Returns:

	714 Current soft limit for the resource (after any changes we were able to

	715 make), or -1 if the resource doesn't exist.

538 """	716 """

	717

	718 # Get the value of the resource.

539 try:	719 try:

540 (soft_limit, hard_limit) = resource.getrlimit(resource_name)	720 (soft_limit, hard_limit) = resource.getrlimit(resource_name)

541 resource.setrlimit(resource_name, (hard_limit, hard_limit))	721 except (resource.error, ValueError):

542 return hard_limit	722 # The resource wasn't present, so we can't do anything here.

543 except (resource.error, ValueError), e:	723 return -1

544 return 0	724

	725 # Try to set the value of the soft limit to the value of the hard limit.

	726 if hard_limit > soft_limit: # Some OS's report 0 for "unlimited".

	727 try:

	728 resource.setrlimit(resource_name, (hard_limit, hard_limit))

	729 return hard_limit

	730 except (resource.error, ValueError):

	731 # We'll ignore this and try the fallback value.

	732 pass

	733

	734 # Try to set the value of the soft limit to the fallback value.

	735 if soft_limit < fallback_value:

	736 try:

	737 resource.setrlimit(resource_name, (fallback_value, hard_limit))

	738 return fallback_value

	739 except (resource.error, ValueError):

	740 # We couldn't change the soft limit, so just report the current

	741 # value of the soft limit.

	742 return soft_limit

	743 else:

	744 return soft_limit

	745

	746

	747 def GetCloudApiInstance(cls, thread_state=None):

	748 """Gets a gsutil Cloud API instance.

	749

	750 Since Cloud API implementations are not guaranteed to be thread-safe, each

	751 thread needs its own instance. These instances are passed to each thread

	752 via the thread pool logic in command.

	753

	754 Args:

	755 cls: Command class to be used for single-threaded case.

	756 thread_state: Per thread state from this thread containing a gsutil

	757 Cloud API instance.

	758

	759 Returns:

	760 gsutil Cloud API instance.

	761 """

	762 return thread_state or cls.gsutil_api

	763

	764

	765 def GetFileSize(fp, position_to_eof=False):

	766 """Returns size of file, optionally leaving fp positioned at EOF."""

	767 if not position_to_eof:

	768 cur_pos = fp.tell()

	769 fp.seek(0, os.SEEK_END)

	770 cur_file_size = fp.tell()

	771 if not position_to_eof:

	772 fp.seek(cur_pos)

	773 return cur_file_size

	774

	775

	776 def GetStreamFromFileUrl(storage_url):

	777 if storage_url.IsStream():

	778 return sys.stdin

	779 else:

	780 return open(storage_url.object_name, 'rb')

	781

	782

	783 def UrlsAreForSingleProvider(url_args):

	784 """Tests whether the URLs are all for a single provider.

	785

	786 Args:

	787 url_args: Strings to check.

	788

	789 Returns:

	790 True if URLs are for single provider, False otherwise.

	791 """

	792 provider = None

	793 url = None

	794 for url_str in url_args:

	795 url = StorageUrlFromString(url_str)

	796 if not provider:

	797 provider = url.scheme

	798 elif url.scheme != provider:

	799 return False

	800 return provider is not None

	801

	802

	803 def HaveFileUrls(args_to_check):

	804 """Checks whether args_to_check contain any file URLs.

	805

	806 Args:

	807 args_to_check: Command-line argument subset to check.

	808

	809 Returns:

	810 True if args_to_check contains any file URLs.

	811 """

	812 for url_str in args_to_check:

	813 storage_url = StorageUrlFromString(url_str)

	814 if storage_url.IsFileUrl():

	815 return True

	816 return False

	817

	818

	819 def HaveProviderUrls(args_to_check):

	820 """Checks whether args_to_check contains any provider URLs (like 'gs://').

	821

	822 Args:

	823 args_to_check: Command-line argument subset to check.

	824

	825 Returns:

	826 True if args_to_check contains any provider URLs.

	827 """

	828 for url_str in args_to_check:

	829 storage_url = StorageUrlFromString(url_str)

	830 if storage_url.IsCloudUrl() and storage_url.IsProvider():

	831 return True

	832 return False

	833

545	834

546 def MultiprocessingIsAvailable(logger=None):	835 def MultiprocessingIsAvailable(logger=None):

547 """	836 """Checks if multiprocessing is available.

	837

548 There are some environments in which there is no way to use multiprocessing	838 There are some environments in which there is no way to use multiprocessing

549 logic that's built into Python (e.g., if /dev/shm is not available, then	839 logic that's built into Python (e.g., if /dev/shm is not available, then

550 we can't create semaphores). This simply tries out a few things that will be	840 we can't create semaphores). This simply tries out a few things that will be

551 needed to make sure the environment can support the pieces of the	841 needed to make sure the environment can support the pieces of the

552 multiprocessing module that we need.	842 multiprocessing module that we need.

553	843

	844 Args:

	845 logger: logging.logger to use for debug output.

	846

554 Returns:	847 Returns:

555 (multiprocessing_is_available, stack_trace):	848 (multiprocessing_is_available, stack_trace):

556 multiprocessing_is_available: True iff the multiprocessing module is	849 multiprocessing_is_available: True iff the multiprocessing module is

557 available for use.	850 available for use.

558 stack_trace: The stack trace generated by the call we tried that failed.	851 stack_trace: The stack trace generated by the call we tried that failed.

559 """	852 """

	853 # pylint: disable=global-variable-undefined

560 global cached_multiprocessing_is_available	854 global cached_multiprocessing_is_available

561 global cached_multiprocessing_check_stack_trace	855 global cached_multiprocessing_check_stack_trace

562 global cached_multiprocessing_is_available_message	856 global cached_multiprocessing_is_available_message

563 if cached_multiprocessing_is_available is not None:	857 if cached_multiprocessing_is_available is not None:

564 if logger:	858 if logger:

565 logger.debug(cached_multiprocessing_check_stack_trace)	859 logger.debug(cached_multiprocessing_check_stack_trace)

566 logger.warn('\n'.join(textwrap.wrap(	860 logger.warn(cached_multiprocessing_is_available_message)

567 cached_multiprocessing_is_available_message + '\n')))

568 return (cached_multiprocessing_is_available,	861 return (cached_multiprocessing_is_available,

569 cached_multiprocessing_check_stack_trace)	862 cached_multiprocessing_check_stack_trace)

570	863

571 stack_trace = None	864 stack_trace = None

572 multiprocessing_is_available = True	865 multiprocessing_is_available = True

573 message = (	866 message = """

574 'You have requested multiple threads or processes for an operation,'	867 You have requested multiple threads or processes for an operation, but the

575 ' but the required functionality of Python\'s multiprocessing '	868 required functionality of Python\'s multiprocessing module is not available.

576 'module is not available. Your operations will be performed '	869 Your operations will be performed sequentially, and any requests for

577 'sequentially, and any requests for parallelism will be ignored.')	870 parallelism will be ignored.

	871 """

578 try:	872 try:

579 # Fails if /dev/shm (or some equivalent thereof) is not available for use	873 # Fails if /dev/shm (or some equivalent thereof) is not available for use

580 # (e.g., there's no implementation, or we can't write to it, etc.).	874 # (e.g., there's no implementation, or we can't write to it, etc.).

581 try:	875 try:

582 multiprocessing.Value('i', 0)	876 multiprocessing.Value('i', 0)

583 except:	877 except:

584 if not IS_WINDOWS:	878 if not IS_WINDOWS:

585 message += ('\nPlease ensure that you have write access to both '	879 message += """

586 '/dev/shm and /run/shm.')	880 Please ensure that you have write access to both /dev/shm and /run/shm.

	881 """

587 raise # We'll handle this in one place below.	882 raise # We'll handle this in one place below.

588	883

589 # Manager objects and Windows are generally a pain to work with, so try it	884 # Manager objects and Windows are generally a pain to work with, so try it

590 # out as a sanity check. This definitely works on some versions of Windows,	885 # out as a sanity check. This definitely works on some versions of Windows,

591 # but it's certainly possible that there is some unknown configuration for	886 # but it's certainly possible that there is some unknown configuration for

592 # which it won't.	887 # which it won't.

593 multiprocessing.Manager()	888 multiprocessing.Manager()

594	889

595 # Check that the max number of open files is reasonable. Always check this	890 # Check that the max number of open files is reasonable. Always check this

596 # after we're sure that the basic multiprocessing functionality is	891 # after we're sure that the basic multiprocessing functionality is

597 # available, since this won't matter unless that's true.	892 # available, since this won't matter unless that's true.

598 limit = 0	893 limit = -1

599 if HAS_RESOURCE_MODULE:	894 if HAS_RESOURCE_MODULE:

600 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix	895 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix

601 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the	896 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the

602 # "resource" module is not guaranteed to know about these names.	897 # "resource" module is not guaranteed to know about these names.

603 try:	898 try:

604 limit = max(limit,	899 limit = max(limit,

605 _IncreaseSoftLimitForResource(resource.RLIMIT_NOFILE))	900 _IncreaseSoftLimitForResource(

606 except AttributeError, e:	901 resource.RLIMIT_NOFILE,

	902 MIN_ACCEPTABLE_OPEN_FILES_LIMIT))

	903 except AttributeError:

607 pass	904 pass

608 try:	905 try:

609 limit = max(limit,	906 limit = max(limit,

610 _IncreaseSoftLimitForResource(resource.RLIMIT_OFILE))	907 _IncreaseSoftLimitForResource(

611 except AttributeError, e:	908 resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT))

	909 except AttributeError:

612 pass	910 pass

	911

613 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS:	912 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS:

614 message += (	913 message += ("""

615 '\nYour max number of open files, %s, is too low to allow safe '	914 Your max number of open files, %s, is too low to allow safe multiprocessing.

616 'multiprocessing calls. If you are on a Unix-like OS, then you can '	915 On Linux you can fix this by adding something like "ulimit -n 10000" to your

617 'fix this by adding something like "ulimit -n 10000" to your '	916 ~/.bashrc or equivalent file and opening a new terminal.

618 '~/.bashrc (Linux), ~/.bash_profile (OS X), or equivalent file, '	917

619 'and opening a new terminal.' % limit)	918 On MacOS, you may also need to run a command like this once (in addition to the

	919 above instructions), which might require a restart of your system to take

	920 effect:

	921 launchctl limit maxfiles 10000

	922

	923 Alternatively, edit /etc/launchd.conf with something like:

	924 limit maxfiles 10000 10000

	925

	926 """ % limit)

620 raise Exception('Max number of open files, %s, is too low.' % limit)	927 raise Exception('Max number of open files, %s, is too low.' % limit)

621 except:	928 except: # pylint: disable=bare-except

622 stack_trace = traceback.format_exc()	929 stack_trace = traceback.format_exc()

623 multiprocessing_is_available = False	930 multiprocessing_is_available = False

624 if logger is not None:	931 if logger is not None:

625 logger.debug(stack_trace)	932 logger.debug(stack_trace)

626 logger.warn('\n'.join(textwrap.wrap(message + '\n')))	933 logger.warn(message)

627	934

628 # Set the cached values so that we never need to do this check again.	935 # Set the cached values so that we never need to do this check again.

629 cached_multiprocessing_is_available = multiprocessing_is_available	936 cached_multiprocessing_is_available = multiprocessing_is_available

630 cached_multiprocessing_check_stack_trace = stack_trace	937 cached_multiprocessing_check_stack_trace = stack_trace

631 cached_multiprocessing_is_available_message = message	938 cached_multiprocessing_is_available_message = message

632 return (multiprocessing_is_available, stack_trace)	939 return (multiprocessing_is_available, stack_trace)

633	940

	941

634 def CreateLock():	942 def CreateLock():

635 """	943 """Returns either a multiprocessing lock or a threading lock.

636 Returns either a multiprocessing lock or a threading lock. We will use the	944

637 former iff we have access to the parts of the multiprocessing module that	945 Use Multiprocessing lock iff we have access to the parts of the

638 are necessary to enable parallelism in operations.	946 multiprocessing module that are necessary to enable parallelism in operations.

	947

	948 Returns:

	949 Multiprocessing or threading lock.

639 """	950 """

640 if MultiprocessingIsAvailable()[0]:	951 if MultiprocessingIsAvailable()[0]:

641 return manager.Lock()	952 return manager.Lock()

642 else:	953 else:

643 return threading.Lock()	954 return threading.Lock()

	955

	956

	957 def IsCloudSubdirPlaceholder(url, blr=None):

	958 """Determines if URL is a cloud subdir placeholder.

	959

	960 This function is needed because GUI tools (like the GCS cloud console) allow

	961 users to create empty "folders" by creating a placeholder object; and parts

	962 of gsutil need to treat those placeholder objects specially. For example,

	963 gsutil rsync needs to avoid downloading those objects because they can cause

	964 conflicts (see comments in rsync command for details).

	965

	966 We currently detect two cases:

	967 - Cloud objects whose name ends with '_$folder$'

	968 - Cloud objects whose name ends with '/'

	969

	970 Args:

	971 url: The URL to be checked.

	972 blr: BucketListingRef to check, or None if not available.

	973 If None, size won't be checked.

	974

	975 Returns:

	976 True/False.

	977 """

	978 if not url.IsCloudUrl():

	979 return False

	980 url_str = url.url_string

	981 if url_str.endswith('_$folder$'):

	982 return True

	983 if blr and blr.IsObject():

	984 size = blr.root_object.size

	985 else:

	986 size = 0

	987 return size == 0 and url_str.endswith('/')

	988

	989

	990 def GetTermLines():

	991 """Returns number of terminal lines."""

	992 # fcntl isn't supported in Windows.

	993 try:

	994 import fcntl # pylint: disable=g-import-not-at-top

	995 import termios # pylint: disable=g-import-not-at-top

	996 except ImportError:

	997 return _DEFAULT_LINES

	998 def ioctl_GWINSZ(fd): # pylint: disable=invalid-name

	999 try:

	1000 return struct.unpack(

	1001 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))[0]

	1002 except: # pylint: disable=bare-except

	1003 return 0 # Failure (so will retry on different file descriptor below).

	1004 # Try to find a valid number of lines from termio for stdin, stdout,

	1005 # or stderr, in that order.

	1006 ioc = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)

	1007 if not ioc:

	1008 try:

	1009 fd = os.open(os.ctermid(), os.O_RDONLY)

	1010 ioc = ioctl_GWINSZ(fd)

	1011 os.close(fd)

	1012 except: # pylint: disable=bare-except

	1013 pass

	1014 if not ioc:

	1015 ioc = os.environ.get('LINES', _DEFAULT_LINES)

	1016 return int(ioc)

OLD	NEW

« no previous file with comments | « gslib/translation_helper.py ('k') | gslib/wildcard_iterator.py » ('j') | no next file with comments »