Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(421)

Unified Diff: common.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Removed gsutil/tests and gsutil/docs Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | download_from_google_storage.py » ('j') | download_from_google_storage.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: common.py
diff --git a/common.py b/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..f476facf1a88ccf817bffbbb876ddd272ab43b2f
--- /dev/null
+++ b/common.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+# common.py: Utility functions common to the Google storage scripts.
M-A Ruel 2013/02/22 01:15:56 docstring
Ryan Tseng 2013/02/22 02:38:00 Done.
+
+import subprocess2
+import hashlib
M-A Ruel 2013/02/22 01:15:56 sort
Ryan Tseng 2013/02/22 02:38:00 Done.
+import os
+import sys
+import re
+
M-A Ruel 2013/02/22 01:15:56 2 lines
Ryan Tseng 2013/02/22 02:38:00 Done.
+class Gsutil(object):
+ """A convenience class to call gsutil with some predefined settings."""
M-A Ruel 2013/02/22 01:15:56 """Calls gsutil ...
Ryan Tseng 2013/02/22 02:38:00 Done.
+ def __init__(self, path, boto_path=None, timeout=None):
+ if not os.path.exists(path):
+ raise OSError('GSUtil not found in %s' % path)
+ self.path = path
+
M-A Ruel 2013/02/22 01:15:56 Remove empty line
Ryan Tseng 2013/02/22 02:38:00 Done.
+ self.timeout = timeout
+ self.boto_path = boto_path
+
+ def call(self, *args):
+ env = os.environ.copy()
+ if self.boto_path is not None:
+ env['AWS_CREDENTIAL_FILE'] = self.boto_path
+ return subprocess2.call((sys.executable, self.path) + args,
+ env=env,
+ timeout=self.timeout)
+
+ def check_call(self, *args):
+ env = os.environ.copy()
+ if self.boto_path is not None:
+ env['AWS_CREDENTIAL_FILE'] = self.boto_path
+ ((out, err), code) = subprocess2.communicate(
+ (sys.executable, self.path) + args,
+ stdout=subprocess2.PIPE,
+ stderr=subprocess2.PIPE,
+ env=env,
+ timeout=self.timeout)
+
+ # Parse output.
+ status_code_match = re.search('status=([0-9]+)', err)
+ if status_code_match:
+ return int(status_code_match.groups(1))
+ elif ('You are attempting to access protected data with '
+ 'no configured credentials.' in err):
+ return (403, out, err)
+ elif 'No such object' in err:
+ return (404, out, err)
+ else:
+ return (code, out, err)
+
+ def clone(self):
+ return Gsutil(self.path, self.boto_path, self.timeout)
+
+
+def GetSHA1(filename):
+ sha1 = hashlib.sha1()
+ with open(filename, 'rb') as f:
+ while True:
+ # Read in 1mb chunks, so it doesn't all have to be loaded into memory.
+ chunk = f.read(1024*1024)
+ if not chunk:
+ break
+ sha1.update(chunk)
+ return sha1.hexdigest()
+
+
+def CheckSHA1(sha1_sum, filename):
M-A Ruel 2013/02/22 01:15:56 I don't think this function is super useful.
Ryan Tseng 2013/02/22 02:38:00 Removed.
+ return sha1_sum == GetSHA1(filename)
+
+
+def GetMD5(filename, lock, use_md5):
M-A Ruel 2013/02/22 01:15:56 Why is this function so different/asymetric than G
Ryan Tseng 2013/02/22 02:38:00 There is an option to cache the md5 sum into a fil
M-A Ruel 2013/02/25 15:15:06 Then wrap the caching logic into a separate functi
Ryan Tseng 2013/02/27 02:06:55 Done.
+ # See if we can find an existing MD5 sum stored in a file.
+ if use_md5 and os.path.exists('%s.md5' % filename):
+ with open('%s.md5' % filename) as f:
+ md5_match = re.search('([a-z0-9]{32})', f.read())
+ if md5_match:
+ return md5_match.groups()[0]
+
+ # Calculate the MD5 checksum of the file.
+ md5_calculator = hashlib.md5()
+ with lock:
+ with open(filename, 'rb') as f:
+ while True:
+ chunk = f.read(1024*1024)
+ if not chunk:
+ break
+ md5_calculator.update(chunk)
+ local_md5 = md5_calculator.hexdigest()
+ if use_md5:
+ with open('%s.md5' % filename, 'w') as f:
+ f.write(local_md5)
+ return local_md5
« no previous file with comments | « no previous file | download_from_google_storage.py » ('j') | download_from_google_storage.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698