Chromium Code Reviews| Index: tools/win/toolchain/get_toolchain_if_necessary.py |
| diff --git a/tools/win/toolchain/get_toolchain_if_necessary.py b/tools/win/toolchain/get_toolchain_if_necessary.py |
| index e93e6f029fa4352b3f9d39ec5242c25d18df6f97..5e63c649acf654e818973945a0f2f32051700175 100644 |
| --- a/tools/win/toolchain/get_toolchain_if_necessary.py |
| +++ b/tools/win/toolchain/get_toolchain_if_necessary.py |
| @@ -3,6 +3,7 @@ |
| # found in the LICENSE file. |
| import hashlib |
| +import json |
| import os |
| import subprocess |
| import sys |
| @@ -11,23 +12,69 @@ import sys |
| BASEDIR = os.path.dirname(os.path.abspath(__file__)) |
| -def CalculateHash(root): |
| - """Calculates the sha1 of the paths to all files in the given |root| and the |
| - contents of those files, and returns as a hex string.""" |
| +def GetFileList(root): |
| + """Gets a normalized list of files under |root|.""" |
| assert not os.path.isabs(root) |
| assert os.path.normpath(root) == root |
| - digest = hashlib.sha1() |
| - count = 0 |
| + file_list = [] |
| for root, dirs, files in os.walk(root): |
| dirs.sort() |
| for name in sorted(f.lower() for f in files): |
|
M-A Ruel
2013/11/29 00:33:30
file_list = []
for root, dirs, files in os.walk(ro
scottmg
2013/11/29 01:54:10
Interestingly, it results in a different order. Pr
M-A Ruel
2013/11/29 02:07:28
Ah! I didn't think about that. As you prefer.
|
| - path = os.path.join(root, name) |
| - digest.update(path.lower()) |
| - with open(path, 'rb') as f: |
| - digest.update(f.read()) |
| + file_list.append(os.path.join(root, name).lower()) |
| + return file_list |
| + |
| + |
| +def MakeTimestampsFileName(root): |
| + return os.path.join(root, '..', '.timestamps') |
| + |
| + |
| +def CalculateHash(root): |
| + """Calculates the sha1 of the paths to all files in the given |root| and the |
| + contents of those files, and returns as a hex string.""" |
| + file_list = GetFileList(root) |
| + |
| + # Check whether we previously saved timestamps in $root/../.timestamps. If |
| + # we didn't, or they don't match, then do the full calculation, otherwise |
| + # return the saved value. |
| + timestamps_file = MakeTimestampsFileName(root) |
| + timestamps_data = {'files': [], 'sha1': ''} |
| + if os.path.exists(timestamps_file): |
| + with open(timestamps_file, 'rb') as f: |
| + try: |
| + timestamps_data = json.load(f) |
| + except ValueError: |
| + # json couldn't be loaded, empty data will force a re-hash. |
| + pass |
| + |
| + matches = len(file_list) == len(timestamps_data['files']) |
|
M-A Ruel
2013/11/29 00:33:30
I wonder about "legitimate" junk files, like thumb
scottmg
2013/11/29 01:54:10
Yeah... If it comes up we could blacklist some fil
|
| + if matches: |
| + for disk, cached in zip(file_list, timestamps_data['files']): |
| + if disk != cached[0] or os.stat(disk).st_mtime != cached[1]: |
| + matches = False |
| + break |
| + if matches: |
| + return timestamps_data['sha1'] |
| + |
| + digest = hashlib.sha1() |
| + for path in file_list: |
| + digest.update(path) |
| + with open(path, 'rb') as f: |
| + digest.update(f.read()) |
| return digest.hexdigest() |
| +def SaveTimestampsAndHash(root, sha1): |
| + """Save timestamps and the final hash to be able to early-out more quickly |
| + next time.""" |
| + file_list = GetFileList(root) |
| + timestamps_data = { |
| + 'files': [[f, os.stat(f).st_mtime] for f in file_list], |
| + 'sha1': sha1, |
| + } |
| + with open(MakeTimestampsFileName(root), 'wb') as f: |
| + json.dump(timestamps_data, f) |
| + |
| + |
| def main(): |
| if sys.platform not in ('win32', 'cygwin'): |
| return 0 |
| @@ -48,9 +95,9 @@ def main(): |
| desired_hash = f.read().strip() |
| # If the current hash doesn't match what we want in the file, nuke and pave. |
| - # Note that this script is only run when a .sha1 file is updated (per DEPS) |
| - # so this relatively expensive step of hashing everything only happens when |
| - # the toolchain is updated. |
| + # Typically this script is only run when the .sha1 one file is updated, but |
| + # directly calling "gclient runhooks" will also run it, so we cache |
| + # based on timestamps to make that case fast. |
| current_hash = CalculateHash(target_dir) |
| if current_hash != desired_hash: |
| print 'Windows toolchain out of date or doesn\'t exist, updating...' |
| @@ -60,14 +107,15 @@ def main(): |
| sys.executable, |
| 'src\\tools\\win\\toolchain\\toolchain2013.py', |
| '--targetdir', target_dir]) |
| + current_hash = CalculateHash(target_dir) |
| + if current_hash != desired_hash: |
| + print >> sys.stderr, ( |
| + 'Got wrong hash after pulling a new toolchain. ' |
| + 'Wanted \'%s\', got \'%s\'.' % ( |
| + desired_hash, current_hash)) |
| + return 1 |
| + SaveTimestampsAndHash(target_dir, current_hash) |
| - current_hash = CalculateHash(target_dir) |
| - if current_hash != desired_hash: |
| - print >> sys.stderr, ( |
| - 'Got wrong hash after pulling a new toolchain. ' |
| - 'Wanted \'%s\', got \'%s\'.' % ( |
| - desired_hash, current_hash)) |
| - return 1 |
| return 0 |