Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(139)

Unified Diff: win_toolchain/treehash/treehash.cc

Issue 228093002: Add treehash tool for win_toolchain (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: tidying Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « win_toolchain/treehash/m.bat ('k') | win_toolchain/treehash/treehash.exe » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: win_toolchain/treehash/treehash.cc
diff --git a/win_toolchain/treehash/treehash.cc b/win_toolchain/treehash/treehash.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d9528a1b6d0880362a880863e1ca478c274947b1
--- /dev/null
+++ b/win_toolchain/treehash/treehash.cc
@@ -0,0 +1,337 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The equivalent Python program
+// (http://src.chromium.org/viewvc/chrome/trunk/tools/depot_tools/win_toolchain/get_toolchain_if_necessary.py?revision=259915)
+// and treehash.py here takes about 1s on a fast, hot SSD to hash the windows
+// toolchain tree. This is annoying when used inside GN as its runtime is
+// otherwise ~0s.
+
+// What this tool does:
+//
+// Calculate (recursive) the sha1 of a directory tree. Because the actual
+// sha1'ing takes non-zero time, saves a cache of the sha1 and the mtime of
+// all the files in the tree, and uses this next time. If the cache file
+// exists, and no mtimes have changed, the sha1 of the previous run will be
+// returned. If it actually did the sha1ing, then it updates the cache file
+// for next time.
+
+#include <windows.h>
+#include <wincrypt.h>
+
+#include <algorithm>
+#include <iostream>
+#include <stack>
+#include <string>
+#include <vector>
+
+#pragma warning(disable : 4127) // Conditional expression is constant.
+#pragma warning(disable : 4706) // Assignment within conditional.
+#pragma warning(disable : 4800) // Forcing value to bool.
+
+#define SHA1LEN 20
+#define SHA1LEN_HEXBYTES (SHA1LEN * 2)
+
+using namespace std;
+
+struct FileAndTimestamp {
+ string file;
+ FILETIME timestamp;
+};
+
+#define CHECK(condition) \
+ do { \
+ if (!(condition)) { \
+ fprintf(stderr, \
+ "%s failed, line %d: %d\n", \
+ #condition, \
+ __LINE__, \
+ GetLastError()); \
+ exit(1); \
+ } \
+ } while (0);
+
+// Adds the name and contents of the given file to the hash.
+void UpdateHashWithFile(const string& filename,
+ HCRYPTHASH hash) {
+ HANDLE file = CreateFile(filename.c_str(),
+ GENERIC_READ,
+ FILE_SHARE_READ,
+ NULL,
+ OPEN_EXISTING,
+ FILE_FLAG_SEQUENTIAL_SCAN,
+ NULL);
+ CHECK(file != INVALID_HANDLE_VALUE);
+
+ // Filename.
+ CHECK(CryptHashData(hash,
+ reinterpret_cast<const BYTE*>(filename.c_str()),
+ static_cast<DWORD>(filename.size() * sizeof(char)),
+ 0));
+
+ // File data.
+ BOOL result;
+ BYTE file_data[1 << 15];
+ DWORD bytes_read = 0;
+ while (result =
+ ReadFile(file, file_data, sizeof(file_data), &bytes_read, NULL)) {
+ if (bytes_read == 0)
+ break;
+ CHECK(CryptHashData(hash, file_data, bytes_read, 0));
+ }
+ CHECK(result);
+
+ CloseHandle(file);
+}
+
+// Use CryptoAPI to calculate SHA1 of a file tree (both the file names and the
+// file contents). This isn't particularly on the fast path because in our
+// standard usage there should always be a matching timestamp cache (except
+// when the toolchain is rev'd). Returns the SHA1 as a hex string.
+string CalculateDigestOfTree(const string& root,
+ const vector<FileAndTimestamp>& files) {
+ HCRYPTPROV prov = 0;
+ HCRYPTHASH hash = 0;
+
+ // Get handle to the crypto provider
+ CHECK(CryptAcquireContext(
+ &prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT));
+ CHECK(CryptCreateHash(prov, CALG_SHA1, 0, 0, &hash));
+
+ for (vector<FileAndTimestamp>::const_iterator i(files.begin());
+ i != files.end();
+ ++i) {
+ UpdateHashWithFile(root + "\\" + i->file, hash);
+ }
+
+ DWORD hash_bytes = SHA1LEN;
+ BYTE hash_result[SHA1LEN];
+ CHECK(CryptGetHashParam(hash, HP_HASHVAL, hash_result, &hash_bytes, 0));
+ string result;
+ const char digits[] = "0123456789abcdef";
+ for (DWORD i = 0; i < hash_bytes; ++i) {
+ result += digits[hash_result[i] >> 4];
+ result += digits[hash_result[i] & 0xf];
+ }
+
+ CryptDestroyHash(hash);
+ CryptReleaseContext(prov, 0);
+
+ return result;
+}
+
+// Gets a list of files under the specified root that are not marked
+// hidden/system. File paths returned are relative to given root, converted to
+// lower case, and the entire result is sorted. This is to make the hash
+// consistent when the file names as well as the contents are included in the
+// digest.
+void GetFileList(string root, vector<FileAndTimestamp>* files) {
+ HANDLE find_handle = INVALID_HANDLE_VALUE;
+ WIN32_FIND_DATA ffd;
+ string spec;
+ stack<string> directories;
+ const string original_root = root;
+ size_t original_length = original_root.size();
+ FINDEX_INFO_LEVELS info_level_id = FindExInfoBasic;
+ DWORD additional_flags = FIND_FIRST_EX_LARGE_FETCH;
+
+ // info_level_id and additional_flags need to be 0 on <= Vista.
+ OSVERSIONINFO version_info = {};
+ version_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+#pragma warning(push)
+#pragma warning(disable : 4996) // GetVersionEx is deprecated.
+ if (!GetVersionEx(&version_info) ||
+ (version_info.dwMajorVersion < 6 || (version_info.dwMajorVersion == 6 &&
+ version_info.dwMinorVersion == 0))) {
+ info_level_id = static_cast<FINDEX_INFO_LEVELS>(0);
+ additional_flags = 0;
+ }
+#pragma warning(pop)
+
+ directories.push(root);
+ files->clear();
+
+ while (!directories.empty()) {
+ root = directories.top();
+ spec = root + "\\*";
+ directories.pop();
+
+ find_handle = FindFirstFileEx(spec.c_str(),
+ info_level_id,
+ &ffd,
+ FindExSearchNameMatch,
+ NULL,
+ additional_flags);
+ CHECK(find_handle != INVALID_HANDLE_VALUE);
+
+ do {
+ if (strcmp(ffd.cFileName, ".") != 0 && strcmp(ffd.cFileName, "..") != 0) {
+ if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
+ directories.push(root + "\\" + ffd.cFileName);
+ } else {
+ if ((ffd.dwFileAttributes &
+ (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM)) == 0) {
+ string relative_to_root =
+ (root + "\\" + ffd.cFileName).substr(original_length + 1);
+ transform(relative_to_root.begin(),
+ relative_to_root.end(),
+ relative_to_root.begin(),
+ ::tolower);
+ FileAndTimestamp file_data;
+ file_data.file = relative_to_root;
+ file_data.timestamp = ffd.ftLastWriteTime;
+ files->push_back(file_data);
+ }
+ }
+ }
+ } while (FindNextFile(find_handle, &ffd) != 0);
+ CHECK(GetLastError() == ERROR_NO_MORE_FILES);
+
+ FindClose(find_handle);
+ find_handle = INVALID_HANDLE_VALUE;
+ }
+
+ sort(files->begin(),
+ files->end(),
+ [](const FileAndTimestamp& a, const FileAndTimestamp& b) {
+ return a.file < b.file;
+ });
+}
+
+bool LoadTimestamps(const string& filename,
+ vector<FileAndTimestamp>* files,
+ string* digest) {
+ files->clear();
+ HANDLE file = CreateFile(filename.c_str(),
+ GENERIC_READ,
+ FILE_SHARE_READ,
+ NULL,
+ OPEN_EXISTING,
+ FILE_FLAG_SEQUENTIAL_SCAN,
+ NULL);
+ // Not existing is fine, emptying from above will cause a re-hash.
+ // No CHECKs in this function as the file contents could be garbage and in
+ // that case we want to ignore it.
+ if (file == INVALID_HANDLE_VALUE)
+ return false;
+
+ // See SaveTimestamps for format.
+ DWORD bytes_read;
+ char digest_buffer[SHA1LEN_HEXBYTES];
+ if (!ReadFile(file, digest_buffer, sizeof(digest_buffer), &bytes_read, NULL))
+ return false;
+ if (sizeof(digest_buffer) != bytes_read)
+ return false;
+ *digest = digest_buffer;
+
+ for (;;) {
+ FileAndTimestamp file_data;
+ BOOL result = ReadFile(file,
+ &file_data.timestamp,
+ sizeof(file_data.timestamp),
+ &bytes_read,
+ NULL);
+ if (result && bytes_read == 0) {
+ // At EOF.
+ break;
+ }
+ if (bytes_read != sizeof(file_data.timestamp))
+ return false;
+
+ WORD filename_length;
+ if (!ReadFile(
+ file, &filename_length, sizeof(filename_length), &bytes_read, NULL))
+ return false;
+ if (bytes_read != sizeof(filename_length))
+ return false;
+
+ char filename_buffer[1<<15];
+ if (!ReadFile(file, filename_buffer, filename_length, &bytes_read, NULL))
+ return false;
+ if (bytes_read != filename_length)
+ return false;
+ file_data.file = string(filename_buffer, filename_length);
+ files->push_back(file_data);
+ }
+
+ CloseHandle(file);
+ return true;
+}
+
+void SaveTimestamps(const string& digest,
+ const vector<FileAndTimestamp>& files,
+ const string& filename) {
+ HANDLE file = CreateFile(
+ filename.c_str(), GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, 0, NULL);
+ CHECK(file != INVALID_HANDLE_VALUE);
+ DWORD bytes_written;
+ CHECK(digest.size() == SHA1LEN_HEXBYTES);
+ CHECK(WriteFile(file, digest.c_str(), digest.size(), &bytes_written, NULL));
+ CHECK(bytes_written == digest.size());
+ for (vector<FileAndTimestamp>::const_iterator i(files.begin());
+ i != files.end();
+ ++i) {
+ // 64 bits of timestamp.
+ CHECK(WriteFile(
+ file, &i->timestamp, sizeof(i->timestamp), &bytes_written, NULL));
+ CHECK(bytes_written == sizeof(i->timestamp));
+
+ // 16 bits of filename length.
+ WORD filename_length = static_cast<WORD>(i->file.size());
+ CHECK(WriteFile(file,
+ &filename_length,
+ sizeof(filename_length),
+ &bytes_written,
+ NULL));
+ CHECK(bytes_written == sizeof(filename_length));
+
+ // Filename.
+ CHECK(WriteFile(
+ file, i->file.c_str(), filename_length, &bytes_written, NULL));
+ }
+ CloseHandle(file);
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ fprintf(stderr, "usage: treehash root_dir timestamps_file\n\n");
+ fprintf(
+ stderr,
+ "prints hash of directory tree rooted at |root_dir| to stdout, and \n"
+ "saves mtime cache to |timestamps_file|.\n");
+ return 1;
+ }
+
+ vector<FileAndTimestamp> files;
+ GetFileList(argv[1], &files);
+
+ vector<FileAndTimestamp> cached_files;
+ string cached_digest;
+ if (LoadTimestamps(argv[2], &cached_files, &cached_digest)) {
+ // Loaded saved hashes.
+ bool matches = cached_files.size() == files.size();
+ if (matches) {
+ for (size_t i = 0; i < files.size(); ++i) {
+ if (cached_files[i].file != files[i].file ||
+ cached_files[i].timestamp.dwLowDateTime !=
+ files[i].timestamp.dwLowDateTime ||
+ cached_files[i].timestamp.dwHighDateTime !=
+ files[i].timestamp.dwHighDateTime) {
+ matches = false;
+ break;
+ }
+ }
+ }
+ if (matches) {
+ printf("%s\n", cached_digest.c_str());
+ return 0;
+ }
+ }
+
+ // Otherwise we need to rehash.
+ string digest = CalculateDigestOfTree(argv[1], files);
+ SaveTimestamps(digest, files, argv[2]);
+ printf("%s\n", digest.c_str());
+ return 0;
+}
« no previous file with comments | « win_toolchain/treehash/m.bat ('k') | win_toolchain/treehash/treehash.exe » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698