| Index: win_toolchain/treehash/treehash.cc
|
| diff --git a/win_toolchain/treehash/treehash.cc b/win_toolchain/treehash/treehash.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..d9528a1b6d0880362a880863e1ca478c274947b1
|
| --- /dev/null
|
| +++ b/win_toolchain/treehash/treehash.cc
|
| @@ -0,0 +1,337 @@
|
| +// Copyright 2014 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +// The equivalent Python program
|
| +// (http://src.chromium.org/viewvc/chrome/trunk/tools/depot_tools/win_toolchain/get_toolchain_if_necessary.py?revision=259915)
|
| +// and treehash.py here takes about 1s on a fast, hot SSD to hash the windows
|
| +// toolchain tree. This is annoying when used inside GN as its runtime is
|
| +// otherwise ~0s.
|
| +
|
| +// What this tool does:
|
| +//
|
| +// Calculate (recursive) the sha1 of a directory tree. Because the actual
|
| +// sha1'ing takes non-zero time, saves a cache of the sha1 and the mtime of
|
| +// all the files in the tree, and uses this next time. If the cache file
|
| +// exists, and no mtimes have changed, the sha1 of the previous run will be
|
| +// returned. If it actually did the sha1ing, then it updates the cache file
|
| +// for next time.
|
| +
|
| +#include <windows.h>
|
| +#include <wincrypt.h>
|
| +
|
| +#include <algorithm>
|
| +#include <iostream>
|
| +#include <stack>
|
| +#include <string>
|
| +#include <vector>
|
| +
|
| +#pragma warning(disable : 4127) // Conditional expression is constant.
|
| +#pragma warning(disable : 4706) // Assignment within conditional.
|
| +#pragma warning(disable : 4800) // Forcing value to bool.
|
| +
|
| +#define SHA1LEN 20
|
| +#define SHA1LEN_HEXBYTES (SHA1LEN * 2)
|
| +
|
| +using namespace std;
|
| +
|
| +struct FileAndTimestamp {
|
| + string file;
|
| + FILETIME timestamp;
|
| +};
|
| +
|
| +#define CHECK(condition) \
|
| + do { \
|
| + if (!(condition)) { \
|
| + fprintf(stderr, \
|
| + "%s failed, line %d: %d\n", \
|
| + #condition, \
|
| + __LINE__, \
|
| + GetLastError()); \
|
| + exit(1); \
|
| + } \
|
| + } while (0);
|
| +
|
| +// Adds the name and contents of the given file to the hash.
|
| +void UpdateHashWithFile(const string& filename,
|
| + HCRYPTHASH hash) {
|
| + HANDLE file = CreateFile(filename.c_str(),
|
| + GENERIC_READ,
|
| + FILE_SHARE_READ,
|
| + NULL,
|
| + OPEN_EXISTING,
|
| + FILE_FLAG_SEQUENTIAL_SCAN,
|
| + NULL);
|
| + CHECK(file != INVALID_HANDLE_VALUE);
|
| +
|
| + // Filename.
|
| + CHECK(CryptHashData(hash,
|
| + reinterpret_cast<const BYTE*>(filename.c_str()),
|
| + static_cast<DWORD>(filename.size() * sizeof(char)),
|
| + 0));
|
| +
|
| + // File data.
|
| + BOOL result;
|
| + BYTE file_data[1 << 15];
|
| + DWORD bytes_read = 0;
|
| + while (result =
|
| + ReadFile(file, file_data, sizeof(file_data), &bytes_read, NULL)) {
|
| + if (bytes_read == 0)
|
| + break;
|
| + CHECK(CryptHashData(hash, file_data, bytes_read, 0));
|
| + }
|
| + CHECK(result);
|
| +
|
| + CloseHandle(file);
|
| +}
|
| +
|
| +// Use CryptoAPI to calculate SHA1 of a file tree (both the file names and the
|
| +// file contents). This isn't particularly on the fast path because in our
|
| +// standard usage there should always be a matching timestamp cache (except
|
| +// when the toolchain is rev'd). Returns the SHA1 as a hex string.
|
| +string CalculateDigestOfTree(const string& root,
|
| + const vector<FileAndTimestamp>& files) {
|
| + HCRYPTPROV prov = 0;
|
| + HCRYPTHASH hash = 0;
|
| +
|
| + // Get handle to the crypto provider
|
| + CHECK(CryptAcquireContext(
|
| + &prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT));
|
| + CHECK(CryptCreateHash(prov, CALG_SHA1, 0, 0, &hash));
|
| +
|
| + for (vector<FileAndTimestamp>::const_iterator i(files.begin());
|
| + i != files.end();
|
| + ++i) {
|
| + UpdateHashWithFile(root + "\\" + i->file, hash);
|
| + }
|
| +
|
| + DWORD hash_bytes = SHA1LEN;
|
| + BYTE hash_result[SHA1LEN];
|
| + CHECK(CryptGetHashParam(hash, HP_HASHVAL, hash_result, &hash_bytes, 0));
|
| + string result;
|
| + const char digits[] = "0123456789abcdef";
|
| + for (DWORD i = 0; i < hash_bytes; ++i) {
|
| + result += digits[hash_result[i] >> 4];
|
| + result += digits[hash_result[i] & 0xf];
|
| + }
|
| +
|
| + CryptDestroyHash(hash);
|
| + CryptReleaseContext(prov, 0);
|
| +
|
| + return result;
|
| +}
|
| +
|
| +// Gets a list of files under the specified root that are not marked
|
| +// hidden/system. File paths returned are relative to given root, converted to
|
| +// lower case, and the entire result is sorted. This is to make the hash
|
| +// consistent when the file names as well as the contents are included in the
|
| +// digest.
|
| +void GetFileList(string root, vector<FileAndTimestamp>* files) {
|
| + HANDLE find_handle = INVALID_HANDLE_VALUE;
|
| + WIN32_FIND_DATA ffd;
|
| + string spec;
|
| + stack<string> directories;
|
| + const string original_root = root;
|
| + size_t original_length = original_root.size();
|
| + FINDEX_INFO_LEVELS info_level_id = FindExInfoBasic;
|
| + DWORD additional_flags = FIND_FIRST_EX_LARGE_FETCH;
|
| +
|
| + // info_level_id and additional_flags need to be 0 on <= Vista.
|
| + OSVERSIONINFO version_info = {};
|
| + version_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
|
| +#pragma warning(push)
|
| +#pragma warning(disable : 4996) // GetVersionEx is deprecated.
|
| + if (!GetVersionEx(&version_info) ||
|
| + (version_info.dwMajorVersion < 6 || (version_info.dwMajorVersion == 6 &&
|
| + version_info.dwMinorVersion == 0))) {
|
| + info_level_id = static_cast<FINDEX_INFO_LEVELS>(0);
|
| + additional_flags = 0;
|
| + }
|
| +#pragma warning(pop)
|
| +
|
| + directories.push(root);
|
| + files->clear();
|
| +
|
| + while (!directories.empty()) {
|
| + root = directories.top();
|
| + spec = root + "\\*";
|
| + directories.pop();
|
| +
|
| + find_handle = FindFirstFileEx(spec.c_str(),
|
| + info_level_id,
|
| + &ffd,
|
| + FindExSearchNameMatch,
|
| + NULL,
|
| + additional_flags);
|
| + CHECK(find_handle != INVALID_HANDLE_VALUE);
|
| +
|
| + do {
|
| + if (strcmp(ffd.cFileName, ".") != 0 && strcmp(ffd.cFileName, "..") != 0) {
|
| + if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
|
| + directories.push(root + "\\" + ffd.cFileName);
|
| + } else {
|
| + if ((ffd.dwFileAttributes &
|
| + (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM)) == 0) {
|
| + string relative_to_root =
|
| + (root + "\\" + ffd.cFileName).substr(original_length + 1);
|
| + transform(relative_to_root.begin(),
|
| + relative_to_root.end(),
|
| + relative_to_root.begin(),
|
| + ::tolower);
|
| + FileAndTimestamp file_data;
|
| + file_data.file = relative_to_root;
|
| + file_data.timestamp = ffd.ftLastWriteTime;
|
| + files->push_back(file_data);
|
| + }
|
| + }
|
| + }
|
| + } while (FindNextFile(find_handle, &ffd) != 0);
|
| + CHECK(GetLastError() == ERROR_NO_MORE_FILES);
|
| +
|
| + FindClose(find_handle);
|
| + find_handle = INVALID_HANDLE_VALUE;
|
| + }
|
| +
|
| + sort(files->begin(),
|
| + files->end(),
|
| + [](const FileAndTimestamp& a, const FileAndTimestamp& b) {
|
| + return a.file < b.file;
|
| + });
|
| +}
|
| +
|
| +bool LoadTimestamps(const string& filename,
|
| + vector<FileAndTimestamp>* files,
|
| + string* digest) {
|
| + files->clear();
|
| + HANDLE file = CreateFile(filename.c_str(),
|
| + GENERIC_READ,
|
| + FILE_SHARE_READ,
|
| + NULL,
|
| + OPEN_EXISTING,
|
| + FILE_FLAG_SEQUENTIAL_SCAN,
|
| + NULL);
|
| + // Not existing is fine, emptying from above will cause a re-hash.
|
| + // No CHECKs in this function as the file contents could be garbage and in
|
| + // that case we want to ignore it.
|
| + if (file == INVALID_HANDLE_VALUE)
|
| + return false;
|
| +
|
| + // See SaveTimestamps for format.
|
| + DWORD bytes_read;
|
| + char digest_buffer[SHA1LEN_HEXBYTES];
|
| + if (!ReadFile(file, digest_buffer, sizeof(digest_buffer), &bytes_read, NULL))
|
| + return false;
|
| + if (sizeof(digest_buffer) != bytes_read)
|
| + return false;
|
| + *digest = digest_buffer;
|
| +
|
| + for (;;) {
|
| + FileAndTimestamp file_data;
|
| + BOOL result = ReadFile(file,
|
| + &file_data.timestamp,
|
| + sizeof(file_data.timestamp),
|
| + &bytes_read,
|
| + NULL);
|
| + if (result && bytes_read == 0) {
|
| + // At EOF.
|
| + break;
|
| + }
|
| + if (bytes_read != sizeof(file_data.timestamp))
|
| + return false;
|
| +
|
| + WORD filename_length;
|
| + if (!ReadFile(
|
| + file, &filename_length, sizeof(filename_length), &bytes_read, NULL))
|
| + return false;
|
| + if (bytes_read != sizeof(filename_length))
|
| + return false;
|
| +
|
| + char filename_buffer[1<<15];
|
| + if (!ReadFile(file, filename_buffer, filename_length, &bytes_read, NULL))
|
| + return false;
|
| + if (bytes_read != filename_length)
|
| + return false;
|
| + file_data.file = string(filename_buffer, filename_length);
|
| + files->push_back(file_data);
|
| + }
|
| +
|
| + CloseHandle(file);
|
| + return true;
|
| +}
|
| +
|
| +void SaveTimestamps(const string& digest,
|
| + const vector<FileAndTimestamp>& files,
|
| + const string& filename) {
|
| + HANDLE file = CreateFile(
|
| + filename.c_str(), GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, 0, NULL);
|
| + CHECK(file != INVALID_HANDLE_VALUE);
|
| + DWORD bytes_written;
|
| + CHECK(digest.size() == SHA1LEN_HEXBYTES);
|
| + CHECK(WriteFile(file, digest.c_str(), digest.size(), &bytes_written, NULL));
|
| + CHECK(bytes_written == digest.size());
|
| + for (vector<FileAndTimestamp>::const_iterator i(files.begin());
|
| + i != files.end();
|
| + ++i) {
|
| + // 64 bits of timestamp.
|
| + CHECK(WriteFile(
|
| + file, &i->timestamp, sizeof(i->timestamp), &bytes_written, NULL));
|
| + CHECK(bytes_written == sizeof(i->timestamp));
|
| +
|
| + // 16 bits of filename length.
|
| + WORD filename_length = static_cast<WORD>(i->file.size());
|
| + CHECK(WriteFile(file,
|
| + &filename_length,
|
| + sizeof(filename_length),
|
| + &bytes_written,
|
| + NULL));
|
| + CHECK(bytes_written == sizeof(filename_length));
|
| +
|
| + // Filename.
|
| + CHECK(WriteFile(
|
| + file, i->file.c_str(), filename_length, &bytes_written, NULL));
|
| + }
|
| + CloseHandle(file);
|
| +}
|
| +
|
| +int main(int argc, char* argv[]) {
|
| + if (argc < 3) {
|
| + fprintf(stderr, "usage: treehash root_dir timestamps_file\n\n");
|
| + fprintf(
|
| + stderr,
|
| + "prints hash of directory tree rooted at |root_dir| to stdout, and \n"
|
| + "saves mtime cache to |timestamps_file|.\n");
|
| + return 1;
|
| + }
|
| +
|
| + vector<FileAndTimestamp> files;
|
| + GetFileList(argv[1], &files);
|
| +
|
| + vector<FileAndTimestamp> cached_files;
|
| + string cached_digest;
|
| + if (LoadTimestamps(argv[2], &cached_files, &cached_digest)) {
|
| + // Loaded saved hashes.
|
| + bool matches = cached_files.size() == files.size();
|
| + if (matches) {
|
| + for (size_t i = 0; i < files.size(); ++i) {
|
| + if (cached_files[i].file != files[i].file ||
|
| + cached_files[i].timestamp.dwLowDateTime !=
|
| + files[i].timestamp.dwLowDateTime ||
|
| + cached_files[i].timestamp.dwHighDateTime !=
|
| + files[i].timestamp.dwHighDateTime) {
|
| + matches = false;
|
| + break;
|
| + }
|
| + }
|
| + }
|
| + if (matches) {
|
| + printf("%s\n", cached_digest.c_str());
|
| + return 0;
|
| + }
|
| + }
|
| +
|
| + // Otherwise we need to rehash.
|
| + string digest = CalculateDigestOfTree(argv[1], files);
|
| + SaveTimestamps(digest, files, argv[2]);
|
| + printf("%s\n", digest.c_str());
|
| + return 0;
|
| +}
|
|
|