Index: win_toolchain/treehash/treehash.cc |
diff --git a/win_toolchain/treehash/treehash.cc b/win_toolchain/treehash/treehash.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..d9528a1b6d0880362a880863e1ca478c274947b1 |
--- /dev/null |
+++ b/win_toolchain/treehash/treehash.cc |
@@ -0,0 +1,337 @@ |
+// Copyright 2014 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+// The equivalent Python program |
+// (http://src.chromium.org/viewvc/chrome/trunk/tools/depot_tools/win_toolchain/get_toolchain_if_necessary.py?revision=259915) |
+// and treehash.py here takes about 1s on a fast, hot SSD to hash the windows |
+// toolchain tree. This is annoying when used inside GN as its runtime is |
+// otherwise ~0s. |
+ |
+// What this tool does: |
+// |
+// Calculate (recursive) the sha1 of a directory tree. Because the actual |
+// sha1'ing takes non-zero time, saves a cache of the sha1 and the mtime of |
+// all the files in the tree, and uses this next time. If the cache file |
+// exists, and no mtimes have changed, the sha1 of the previous run will be |
+// returned. If it actually did the sha1ing, then it updates the cache file |
+// for next time. |
+ |
+#include <windows.h> |
+#include <wincrypt.h> |
+ |
+#include <algorithm> |
+#include <iostream> |
+#include <stack> |
+#include <string> |
+#include <vector> |
+ |
+#pragma warning(disable : 4127) // Conditional expression is constant. |
+#pragma warning(disable : 4706) // Assignment within conditional. |
+#pragma warning(disable : 4800) // Forcing value to bool. |
+ |
+#define SHA1LEN 20 |
+#define SHA1LEN_HEXBYTES (SHA1LEN * 2) |
+ |
+using namespace std; |
+ |
+struct FileAndTimestamp { |
+ string file; |
+ FILETIME timestamp; |
+}; |
+ |
+#define CHECK(condition) \ |
+ do { \ |
+ if (!(condition)) { \ |
+ fprintf(stderr, \ |
+ "%s failed, line %d: %d\n", \ |
+ #condition, \ |
+ __LINE__, \ |
+ GetLastError()); \ |
+ exit(1); \ |
+ } \ |
+ } while (0); |
+ |
+// Adds the name and contents of the given file to the hash. |
+void UpdateHashWithFile(const string& filename, |
+ HCRYPTHASH hash) { |
+ HANDLE file = CreateFile(filename.c_str(), |
+ GENERIC_READ, |
+ FILE_SHARE_READ, |
+ NULL, |
+ OPEN_EXISTING, |
+ FILE_FLAG_SEQUENTIAL_SCAN, |
+ NULL); |
+ CHECK(file != INVALID_HANDLE_VALUE); |
+ |
+ // Filename. |
+ CHECK(CryptHashData(hash, |
+ reinterpret_cast<const BYTE*>(filename.c_str()), |
+ static_cast<DWORD>(filename.size() * sizeof(char)), |
+ 0)); |
+ |
+ // File data. |
+ BOOL result; |
+ BYTE file_data[1 << 15]; |
+ DWORD bytes_read = 0; |
+ while (result = |
+ ReadFile(file, file_data, sizeof(file_data), &bytes_read, NULL)) { |
+ if (bytes_read == 0) |
+ break; |
+ CHECK(CryptHashData(hash, file_data, bytes_read, 0)); |
+ } |
+ CHECK(result); |
+ |
+ CloseHandle(file); |
+} |
+ |
+// Use CryptoAPI to calculate SHA1 of a file tree (both the file names and the |
+// file contents). This isn't particularly on the fast path because in our |
+// standard usage there should always be a matching timestamp cache (except |
+// when the toolchain is rev'd). Returns the SHA1 as a hex string. |
+string CalculateDigestOfTree(const string& root, |
+ const vector<FileAndTimestamp>& files) { |
+ HCRYPTPROV prov = 0; |
+ HCRYPTHASH hash = 0; |
+ |
+ // Get handle to the crypto provider |
+ CHECK(CryptAcquireContext( |
+ &prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)); |
+ CHECK(CryptCreateHash(prov, CALG_SHA1, 0, 0, &hash)); |
+ |
+ for (vector<FileAndTimestamp>::const_iterator i(files.begin()); |
+ i != files.end(); |
+ ++i) { |
+ UpdateHashWithFile(root + "\\" + i->file, hash); |
+ } |
+ |
+ DWORD hash_bytes = SHA1LEN; |
+ BYTE hash_result[SHA1LEN]; |
+ CHECK(CryptGetHashParam(hash, HP_HASHVAL, hash_result, &hash_bytes, 0)); |
+ string result; |
+ const char digits[] = "0123456789abcdef"; |
+ for (DWORD i = 0; i < hash_bytes; ++i) { |
+ result += digits[hash_result[i] >> 4]; |
+ result += digits[hash_result[i] & 0xf]; |
+ } |
+ |
+ CryptDestroyHash(hash); |
+ CryptReleaseContext(prov, 0); |
+ |
+ return result; |
+} |
+ |
+// Gets a list of files under the specified root that are not marked |
+// hidden/system. File paths returned are relative to given root, converted to |
+// lower case, and the entire result is sorted. This is to make the hash |
+// consistent when the file names as well as the contents are included in the |
+// digest. |
+void GetFileList(string root, vector<FileAndTimestamp>* files) { |
+ HANDLE find_handle = INVALID_HANDLE_VALUE; |
+ WIN32_FIND_DATA ffd; |
+ string spec; |
+ stack<string> directories; |
+ const string original_root = root; |
+ size_t original_length = original_root.size(); |
+ FINDEX_INFO_LEVELS info_level_id = FindExInfoBasic; |
+ DWORD additional_flags = FIND_FIRST_EX_LARGE_FETCH; |
+ |
+ // info_level_id and additional_flags need to be 0 on <= Vista. |
+ OSVERSIONINFO version_info = {}; |
+ version_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); |
+#pragma warning(push) |
+#pragma warning(disable : 4996) // GetVersionEx is deprecated. |
+ if (!GetVersionEx(&version_info) || |
+ (version_info.dwMajorVersion < 6 || (version_info.dwMajorVersion == 6 && |
+ version_info.dwMinorVersion == 0))) { |
+ info_level_id = static_cast<FINDEX_INFO_LEVELS>(0); |
+ additional_flags = 0; |
+ } |
+#pragma warning(pop) |
+ |
+ directories.push(root); |
+ files->clear(); |
+ |
+ while (!directories.empty()) { |
+ root = directories.top(); |
+ spec = root + "\\*"; |
+ directories.pop(); |
+ |
+ find_handle = FindFirstFileEx(spec.c_str(), |
+ info_level_id, |
+ &ffd, |
+ FindExSearchNameMatch, |
+ NULL, |
+ additional_flags); |
+ CHECK(find_handle != INVALID_HANDLE_VALUE); |
+ |
+ do { |
+ if (strcmp(ffd.cFileName, ".") != 0 && strcmp(ffd.cFileName, "..") != 0) { |
+ if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { |
+ directories.push(root + "\\" + ffd.cFileName); |
+ } else { |
+ if ((ffd.dwFileAttributes & |
+ (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM)) == 0) { |
+ string relative_to_root = |
+ (root + "\\" + ffd.cFileName).substr(original_length + 1); |
+ transform(relative_to_root.begin(), |
+ relative_to_root.end(), |
+ relative_to_root.begin(), |
+ ::tolower); |
+ FileAndTimestamp file_data; |
+ file_data.file = relative_to_root; |
+ file_data.timestamp = ffd.ftLastWriteTime; |
+ files->push_back(file_data); |
+ } |
+ } |
+ } |
+ } while (FindNextFile(find_handle, &ffd) != 0); |
+ CHECK(GetLastError() == ERROR_NO_MORE_FILES); |
+ |
+ FindClose(find_handle); |
+ find_handle = INVALID_HANDLE_VALUE; |
+ } |
+ |
+ sort(files->begin(), |
+ files->end(), |
+ [](const FileAndTimestamp& a, const FileAndTimestamp& b) { |
+ return a.file < b.file; |
+ }); |
+} |
+ |
+bool LoadTimestamps(const string& filename, |
+ vector<FileAndTimestamp>* files, |
+ string* digest) { |
+ files->clear(); |
+ HANDLE file = CreateFile(filename.c_str(), |
+ GENERIC_READ, |
+ FILE_SHARE_READ, |
+ NULL, |
+ OPEN_EXISTING, |
+ FILE_FLAG_SEQUENTIAL_SCAN, |
+ NULL); |
+ // Not existing is fine, emptying from above will cause a re-hash. |
+ // No CHECKs in this function as the file contents could be garbage and in |
+ // that case we want to ignore it. |
+ if (file == INVALID_HANDLE_VALUE) |
+ return false; |
+ |
+ // See SaveTimestamps for format. |
+ DWORD bytes_read; |
+ char digest_buffer[SHA1LEN_HEXBYTES]; |
+ if (!ReadFile(file, digest_buffer, sizeof(digest_buffer), &bytes_read, NULL)) |
+ return false; |
+ if (sizeof(digest_buffer) != bytes_read) |
+ return false; |
+ *digest = digest_buffer; |
+ |
+ for (;;) { |
+ FileAndTimestamp file_data; |
+ BOOL result = ReadFile(file, |
+ &file_data.timestamp, |
+ sizeof(file_data.timestamp), |
+ &bytes_read, |
+ NULL); |
+ if (result && bytes_read == 0) { |
+ // At EOF. |
+ break; |
+ } |
+ if (bytes_read != sizeof(file_data.timestamp)) |
+ return false; |
+ |
+ WORD filename_length; |
+ if (!ReadFile( |
+ file, &filename_length, sizeof(filename_length), &bytes_read, NULL)) |
+ return false; |
+ if (bytes_read != sizeof(filename_length)) |
+ return false; |
+ |
+ char filename_buffer[1<<15]; |
+ if (!ReadFile(file, filename_buffer, filename_length, &bytes_read, NULL)) |
+ return false; |
+ if (bytes_read != filename_length) |
+ return false; |
+ file_data.file = string(filename_buffer, filename_length); |
+ files->push_back(file_data); |
+ } |
+ |
+ CloseHandle(file); |
+ return true; |
+} |
+ |
+void SaveTimestamps(const string& digest, |
+ const vector<FileAndTimestamp>& files, |
+ const string& filename) { |
+ HANDLE file = CreateFile( |
+ filename.c_str(), GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, 0, NULL); |
+ CHECK(file != INVALID_HANDLE_VALUE); |
+ DWORD bytes_written; |
+ CHECK(digest.size() == SHA1LEN_HEXBYTES); |
+ CHECK(WriteFile(file, digest.c_str(), digest.size(), &bytes_written, NULL)); |
+ CHECK(bytes_written == digest.size()); |
+ for (vector<FileAndTimestamp>::const_iterator i(files.begin()); |
+ i != files.end(); |
+ ++i) { |
+ // 64 bits of timestamp. |
+ CHECK(WriteFile( |
+ file, &i->timestamp, sizeof(i->timestamp), &bytes_written, NULL)); |
+ CHECK(bytes_written == sizeof(i->timestamp)); |
+ |
+ // 16 bits of filename length. |
+ WORD filename_length = static_cast<WORD>(i->file.size()); |
+ CHECK(WriteFile(file, |
+ &filename_length, |
+ sizeof(filename_length), |
+ &bytes_written, |
+ NULL)); |
+ CHECK(bytes_written == sizeof(filename_length)); |
+ |
+ // Filename. |
+ CHECK(WriteFile( |
+ file, i->file.c_str(), filename_length, &bytes_written, NULL)); |
+ } |
+ CloseHandle(file); |
+} |
+ |
+int main(int argc, char* argv[]) { |
+ if (argc < 3) { |
+ fprintf(stderr, "usage: treehash root_dir timestamps_file\n\n"); |
+ fprintf( |
+ stderr, |
+ "prints hash of directory tree rooted at |root_dir| to stdout, and \n" |
+ "saves mtime cache to |timestamps_file|.\n"); |
+ return 1; |
+ } |
+ |
+ vector<FileAndTimestamp> files; |
+ GetFileList(argv[1], &files); |
+ |
+ vector<FileAndTimestamp> cached_files; |
+ string cached_digest; |
+ if (LoadTimestamps(argv[2], &cached_files, &cached_digest)) { |
+ // Loaded saved hashes. |
+ bool matches = cached_files.size() == files.size(); |
+ if (matches) { |
+ for (size_t i = 0; i < files.size(); ++i) { |
+ if (cached_files[i].file != files[i].file || |
+ cached_files[i].timestamp.dwLowDateTime != |
+ files[i].timestamp.dwLowDateTime || |
+ cached_files[i].timestamp.dwHighDateTime != |
+ files[i].timestamp.dwHighDateTime) { |
+ matches = false; |
+ break; |
+ } |
+ } |
+ } |
+ if (matches) { |
+ printf("%s\n", cached_digest.c_str()); |
+ return 0; |
+ } |
+ } |
+ |
+ // Otherwise we need to rehash. |
+ string digest = CalculateDigestOfTree(argv[1], files); |
+ SaveTimestamps(digest, files, argv[2]); |
+ printf("%s\n", digest.c_str()); |
+ return 0; |
+} |