upload_to_google_storage.py - Issue 1048103002: In upload_to_google_storage, pass -z argument through to gsutil.

Side by Side Diff: upload_to_google_storage.py

Issue 1048103002: In upload_to_google_storage, pass -z argument through to gsutil. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master

Patch Set: Address comments from hinoka Created 5 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Uploads files to Google Storage content addressed."""	6 """Uploads files to Google Storage content addressed."""

7	7

8 import hashlib	8 import hashlib

9 import optparse	9 import optparse

10 import os	10 import os

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
60 return md5_match.group(1)	60 return md5_match.group(1)

61 else:	61 else:

62 md5_hash = get_md5(filename)	62 md5_hash = get_md5(filename)

63 with open('%s.md5' % filename, 'wb') as f:	63 with open('%s.md5' % filename, 'wb') as f:

64 f.write(md5_hash)	64 f.write(md5_hash)

65 return md5_hash	65 return md5_hash

66	66

67	67

68 def _upload_worker(	68 def _upload_worker(

69 thread_num, upload_queue, base_url, gsutil, md5_lock, force,	69 thread_num, upload_queue, base_url, gsutil, md5_lock, force,

70 use_md5, stdout_queue, ret_codes):	70 use_md5, stdout_queue, ret_codes, gzip):

71 while True:	71 while True:

72 filename, sha1_sum = upload_queue.get()	72 filename, sha1_sum = upload_queue.get()

73 if not filename:	73 if not filename:

74 break	74 break

75 file_url = '%s/%s' % (base_url, sha1_sum)	75 file_url = '%s/%s' % (base_url, sha1_sum)

76 if gsutil.check_call('ls', file_url)[0] == 0 and not force:	76 if gsutil.check_call('ls', file_url)[0] == 0 and not force:

77 # File exists, check MD5 hash.	77 # File exists, check MD5 hash.

78 _, out, _ = gsutil.check_call('ls', '-L', file_url)	78 _, out, _ = gsutil.check_call('ls', '-L', file_url)

79 etag_match = re.search('ETag:\s+([a-z0-9]{32})', out)	79 etag_match = re.search('ETag:\s+([a-z0-9]{32})', out)

80 if etag_match:	80 if etag_match:

81 remote_md5 = etag_match.group(1)	81 remote_md5 = etag_match.group(1)

82 # Calculate the MD5 checksum to match it to Google Storage's ETag.	82 # Calculate the MD5 checksum to match it to Google Storage's ETag.

83 with md5_lock:	83 with md5_lock:

84 if use_md5:	84 if use_md5:

85 local_md5 = get_md5_cached(filename)	85 local_md5 = get_md5_cached(filename)

86 else:	86 else:

87 local_md5 = get_md5(filename)	87 local_md5 = get_md5(filename)

88 if local_md5 == remote_md5:	88 if local_md5 == remote_md5:

89 stdout_queue.put(	89 stdout_queue.put(

90 '%d> File %s already exists and MD5 matches, upload skipped' %	90 '%d> File %s already exists and MD5 matches, upload skipped' %

91 (thread_num, filename))	91 (thread_num, filename))

92 continue	92 continue

93 stdout_queue.put('%d> Uploading %s...' % (	93 stdout_queue.put('%d> Uploading %s...' % (

94 thread_num, filename))	94 thread_num, filename))

95 code, _, err = gsutil.check_call('cp', filename, file_url)	95 gsutil_args = ['cp']

	96 if gzip:

	97 gsutil_args.extend(['-z', gzip])

	98 gsutil_args.extend([filename, file_url])

	99 code, _, err = gsutil.check_call(*gsutil_args)

96 if code != 0:	100 if code != 0:

97 ret_codes.put(	101 ret_codes.put(

98 (code,	102 (code,

99 'Encountered error on uploading %s to %s\n%s' %	103 'Encountered error on uploading %s to %s\n%s' %

100 (filename, file_url, err)))	104 (filename, file_url, err)))

101 continue	105 continue

102	106

103 # Mark executable files with the header "x-goog-meta-executable: 1" which	107 # Mark executable files with the header "x-goog-meta-executable: 1" which

104 # the download script will check for to preserve the executable bit.	108 # the download script will check for to preserve the executable bit.

105 if not sys.platform.startswith('win'):	109 if not sys.platform.startswith('win'):

(...skipping 16 matching lines...) Expand all Loading...
122 if use_null_terminator:	126 if use_null_terminator:

123 return sys.stdin.read().split('\0')	127 return sys.stdin.read().split('\0')

124 else:	128 else:

125 return sys.stdin.read().splitlines()	129 return sys.stdin.read().splitlines()

126 else:	130 else:

127 return args	131 return args

128	132

129	133

130 def upload_to_google_storage(	134 def upload_to_google_storage(

131 input_filenames, base_url, gsutil, force,	135 input_filenames, base_url, gsutil, force,

132 use_md5, num_threads, skip_hashing):	136 use_md5, num_threads, skip_hashing, gzip):

133 # We only want one MD5 calculation happening at a time to avoid HD thrashing.	137 # We only want one MD5 calculation happening at a time to avoid HD thrashing.

134 md5_lock = threading.Lock()	138 md5_lock = threading.Lock()

135	139

136 # Start up all the worker threads plus the printer thread.	140 # Start up all the worker threads plus the printer thread.

137 all_threads = []	141 all_threads = []

138 ret_codes = Queue.Queue()	142 ret_codes = Queue.Queue()

139 ret_codes.put((0, None))	143 ret_codes.put((0, None))

140 upload_queue = Queue.Queue()	144 upload_queue = Queue.Queue()

141 upload_timer = time.time()	145 upload_timer = time.time()

142 stdout_queue = Queue.Queue()	146 stdout_queue = Queue.Queue()

143 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue])	147 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue])

144 printer_thread.daemon = True	148 printer_thread.daemon = True

145 printer_thread.start()	149 printer_thread.start()

146 for thread_num in range(num_threads):	150 for thread_num in range(num_threads):

147 t = threading.Thread(	151 t = threading.Thread(

148 target=_upload_worker,	152 target=_upload_worker,

149 args=[thread_num, upload_queue, base_url, gsutil, md5_lock,	153 args=[thread_num, upload_queue, base_url, gsutil, md5_lock,

150 force, use_md5, stdout_queue, ret_codes])	154 force, use_md5, stdout_queue, ret_codes, gzip])

151 t.daemon = True	155 t.daemon = True

152 t.start()	156 t.start()

153 all_threads.append(t)	157 all_threads.append(t)

154	158

155 # We want to hash everything in a single thread since its faster.	159 # We want to hash everything in a single thread since its faster.

156 # The bottleneck is in disk IO, not CPU.	160 # The bottleneck is in disk IO, not CPU.

157 hashing_start = time.time()	161 hashing_start = time.time()

158 for filename in input_filenames:	162 for filename in input_filenames:

159 if not os.path.exists(filename):	163 if not os.path.exists(filename):

160 stdout_queue.put('Main> Error: %s not found, skipping.' % filename)	164 stdout_queue.put('Main> Error: %s not found, skipping.' % filename)

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
216 help='Generate MD5 files when scanning, and don\'t check '	220 help='Generate MD5 files when scanning, and don\'t check '

217 'the MD5 checksum if a .md5 file is found.')	221 'the MD5 checksum if a .md5 file is found.')

218 parser.add_option('-t', '--num_threads', default=1, type='int',	222 parser.add_option('-t', '--num_threads', default=1, type='int',

219 help='Number of uploader threads to run.')	223 help='Number of uploader threads to run.')

220 parser.add_option('-s', '--skip_hashing', action='store_true',	224 parser.add_option('-s', '--skip_hashing', action='store_true',

221 help='Skip hashing if .sha1 file exists.')	225 help='Skip hashing if .sha1 file exists.')

222 parser.add_option('-0', '--use_null_terminator', action='store_true',	226 parser.add_option('-0', '--use_null_terminator', action='store_true',

223 help='Use \\0 instead of \\n when parsing '	227 help='Use \\0 instead of \\n when parsing '

224 'the file list from stdin. This is useful if the input '	228 'the file list from stdin. This is useful if the input '

225 'is coming from "find ... -print0".')	229 'is coming from "find ... -print0".')

	230 parser.add_option('-z', '--gzip', metavar='ext',

	231 help='Gzip files which end in ext. '

	232 'ext is a comma-separated list')

226 (options, args) = parser.parse_args()	233 (options, args) = parser.parse_args()

227	234

228 # Enumerate our inputs.	235 # Enumerate our inputs.

229 input_filenames = get_targets(args, parser, options.use_null_terminator)	236 input_filenames = get_targets(args, parser, options.use_null_terminator)

230	237

231 # Make sure we can find a working instance of gsutil.	238 # Make sure we can find a working instance of gsutil.

232 if os.path.exists(GSUTIL_DEFAULT_PATH):	239 if os.path.exists(GSUTIL_DEFAULT_PATH):

233 gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=options.boto)	240 gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=options.boto)

234 else:	241 else:

235 gsutil = None	242 gsutil = None

236 for path in os.environ["PATH"].split(os.pathsep):	243 for path in os.environ["PATH"].split(os.pathsep):

237 if os.path.exists(path) and 'gsutil' in os.listdir(path):	244 if os.path.exists(path) and 'gsutil' in os.listdir(path):

238 gsutil = Gsutil(os.path.join(path, 'gsutil'), boto_path=options.boto)	245 gsutil = Gsutil(os.path.join(path, 'gsutil'), boto_path=options.boto)

239 if not gsutil:	246 if not gsutil:

240 parser.error('gsutil not found in %s, bad depot_tools checkout?' %	247 parser.error('gsutil not found in %s, bad depot_tools checkout?' %

241 GSUTIL_DEFAULT_PATH)	248 GSUTIL_DEFAULT_PATH)

242	249

243 base_url = 'gs://%s' % options.bucket	250 base_url = 'gs://%s' % options.bucket

244	251

245 return upload_to_google_storage(	252 return upload_to_google_storage(

246 input_filenames, base_url, gsutil, options.force, options.use_md5,	253 input_filenames, base_url, gsutil, options.force, options.use_md5,

247 options.num_threads, options.skip_hashing)	254 options.num_threads, options.skip_hashing, options.gzip)

248	255

249	256

250 if __name__ == '__main__':	257 if __name__ == '__main__':

251 try:	258 try:

252 sys.exit(main())	259 sys.exit(main())

253 except KeyboardInterrupt:	260 except KeyboardInterrupt:

254 sys.stderr.write('interrupted\n')	261 sys.stderr.write('interrupted\n')

255 sys.exit(1)	262 sys.exit(1)

OLD	NEW

« no previous file with comments | « tests/upload_to_google_storage_unittests.py ('k') | no next file » | no next file with comments »