win_toolchain/treehash/treehash.cc - Issue 228093002: Add treehash tool for win_toolchain

Unified Diff: win_toolchain/treehash/treehash.cc

Issue 228093002: Add treehash tool for win_toolchain (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools

Patch Set: tidying Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: win_toolchain/treehash/treehash.cc

diff --git a/win_toolchain/treehash/treehash.cc b/win_toolchain/treehash/treehash.cc

new file mode 100644

index 0000000000000000000000000000000000000000..d9528a1b6d0880362a880863e1ca478c274947b1

--- /dev/null

+++ b/win_toolchain/treehash/treehash.cc

@@ -0,0 +1,337 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+// The equivalent Python program

+// (http://src.chromium.org/viewvc/chrome/trunk/tools/depot_tools/win_toolchain/get_toolchain_if_necessary.py?revision=259915)

+// and treehash.py here takes about 1s on a fast, hot SSD to hash the windows

+// toolchain tree. This is annoying when used inside GN as its runtime is

+// otherwise ~0s.

+// What this tool does:

+//

+// Calculate (recursive) the sha1 of a directory tree. Because the actual

+// sha1'ing takes non-zero time, saves a cache of the sha1 and the mtime of

+// all the files in the tree, and uses this next time. If the cache file

+// exists, and no mtimes have changed, the sha1 of the previous run will be

+// returned. If it actually did the sha1ing, then it updates the cache file

+// for next time.

+#include <windows.h>

+#include <wincrypt.h>

+#include <algorithm>

+#include <iostream>

+#include <stack>

+#include <string>

+#include <vector>

+#pragma warning(disable : 4127) // Conditional expression is constant.

+#pragma warning(disable : 4706) // Assignment within conditional.

+#pragma warning(disable : 4800) // Forcing value to bool.

+#define SHA1LEN 20

+#define SHA1LEN_HEXBYTES (SHA1LEN * 2)

+using namespace std;

+struct FileAndTimestamp {

+ string file;

+ FILETIME timestamp;

+};

+#define CHECK(condition) \

+ do { \

+ if (!(condition)) { \

+ fprintf(stderr, \

+ "%s failed, line %d: %d\n", \

+ #condition, \

+ __LINE__, \

+ GetLastError()); \

+ exit(1); \

+ } \

+ } while (0);

+// Adds the name and contents of the given file to the hash.

+void UpdateHashWithFile(const string& filename,

+ HCRYPTHASH hash) {

+ HANDLE file = CreateFile(filename.c_str(),

+ GENERIC_READ,

+ FILE_SHARE_READ,

+ NULL,

+ OPEN_EXISTING,

+ FILE_FLAG_SEQUENTIAL_SCAN,

+ NULL);

+ CHECK(file != INVALID_HANDLE_VALUE);

+ // Filename.

+ CHECK(CryptHashData(hash,

+ reinterpret_cast<const BYTE*>(filename.c_str()),

+ static_cast<DWORD>(filename.size() * sizeof(char)),

+ 0));

+ // File data.

+ BOOL result;

+ BYTE file_data[1 << 15];

+ DWORD bytes_read = 0;

+ while (result =

+ ReadFile(file, file_data, sizeof(file_data), &bytes_read, NULL)) {

+ if (bytes_read == 0)

+ break;

+ CHECK(CryptHashData(hash, file_data, bytes_read, 0));

+ }

+ CHECK(result);

+ CloseHandle(file);

+// Use CryptoAPI to calculate SHA1 of a file tree (both the file names and the

+// file contents). This isn't particularly on the fast path because in our

+// standard usage there should always be a matching timestamp cache (except

+// when the toolchain is rev'd). Returns the SHA1 as a hex string.

+string CalculateDigestOfTree(const string& root,

+ const vector<FileAndTimestamp>& files) {

+ HCRYPTPROV prov = 0;

+ HCRYPTHASH hash = 0;

+ // Get handle to the crypto provider

+ CHECK(CryptAcquireContext(

+ &prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT));

+ CHECK(CryptCreateHash(prov, CALG_SHA1, 0, 0, &hash));

+ for (vector<FileAndTimestamp>::const_iterator i(files.begin());

+ i != files.end();

+ ++i) {

+ UpdateHashWithFile(root + "\\" + i->file, hash);

+ }

+ DWORD hash_bytes = SHA1LEN;

+ BYTE hash_result[SHA1LEN];

+ CHECK(CryptGetHashParam(hash, HP_HASHVAL, hash_result, &hash_bytes, 0));

+ string result;

+ const char digits[] = "0123456789abcdef";

+ for (DWORD i = 0; i < hash_bytes; ++i) {

+ result += digits[hash_result[i] >> 4];

+ result += digits[hash_result[i] & 0xf];

+ }

+ CryptDestroyHash(hash);

+ CryptReleaseContext(prov, 0);

+ return result;

+// Gets a list of files under the specified root that are not marked

+// hidden/system. File paths returned are relative to given root, converted to

+// lower case, and the entire result is sorted. This is to make the hash

+// consistent when the file names as well as the contents are included in the

+// digest.

+void GetFileList(string root, vector<FileAndTimestamp>* files) {

+ HANDLE find_handle = INVALID_HANDLE_VALUE;

+ WIN32_FIND_DATA ffd;

+ string spec;

+ stack<string> directories;

+ const string original_root = root;

+ size_t original_length = original_root.size();

+ FINDEX_INFO_LEVELS info_level_id = FindExInfoBasic;

+ DWORD additional_flags = FIND_FIRST_EX_LARGE_FETCH;

+ // info_level_id and additional_flags need to be 0 on <= Vista.

+ OSVERSIONINFO version_info = {};

+ version_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);

+#pragma warning(push)

+#pragma warning(disable : 4996) // GetVersionEx is deprecated.

+ if (!GetVersionEx(&version_info) ||

+ (version_info.dwMajorVersion < 6 || (version_info.dwMajorVersion == 6 &&

+ version_info.dwMinorVersion == 0))) {

+ info_level_id = static_cast<FINDEX_INFO_LEVELS>(0);

+ additional_flags = 0;

+ }

+#pragma warning(pop)

+ directories.push(root);

+ files->clear();

+ while (!directories.empty()) {

+ root = directories.top();

+ spec = root + "\\*";

+ directories.pop();

+ find_handle = FindFirstFileEx(spec.c_str(),

+ info_level_id,

+ &ffd,

+ FindExSearchNameMatch,

+ NULL,

+ additional_flags);

+ CHECK(find_handle != INVALID_HANDLE_VALUE);

+ do {

+ if (strcmp(ffd.cFileName, ".") != 0 && strcmp(ffd.cFileName, "..") != 0) {

+ if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {

+ directories.push(root + "\\" + ffd.cFileName);

+ } else {

+ if ((ffd.dwFileAttributes &

+ (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM)) == 0) {

+ string relative_to_root =

+ (root + "\\" + ffd.cFileName).substr(original_length + 1);

+ transform(relative_to_root.begin(),

+ relative_to_root.end(),

+ relative_to_root.begin(),

+ ::tolower);

+ FileAndTimestamp file_data;

+ file_data.file = relative_to_root;

+ file_data.timestamp = ffd.ftLastWriteTime;

+ files->push_back(file_data);

+ }

+ } while (FindNextFile(find_handle, &ffd) != 0);

+ CHECK(GetLastError() == ERROR_NO_MORE_FILES);

+ FindClose(find_handle);

+ find_handle = INVALID_HANDLE_VALUE;

+ }

+ sort(files->begin(),

+ files->end(),

+ [](const FileAndTimestamp& a, const FileAndTimestamp& b) {

+ return a.file < b.file;

+ });

+bool LoadTimestamps(const string& filename,

+ vector<FileAndTimestamp>* files,

+ string* digest) {

+ files->clear();

+ HANDLE file = CreateFile(filename.c_str(),

+ GENERIC_READ,

+ FILE_SHARE_READ,

+ NULL,

+ OPEN_EXISTING,

+ FILE_FLAG_SEQUENTIAL_SCAN,

+ NULL);

+ // Not existing is fine, emptying from above will cause a re-hash.

+ // No CHECKs in this function as the file contents could be garbage and in

+ // that case we want to ignore it.

+ if (file == INVALID_HANDLE_VALUE)

+ return false;

+ // See SaveTimestamps for format.

+ DWORD bytes_read;

+ char digest_buffer[SHA1LEN_HEXBYTES];

+ if (!ReadFile(file, digest_buffer, sizeof(digest_buffer), &bytes_read, NULL))

+ return false;

+ if (sizeof(digest_buffer) != bytes_read)

+ return false;

+ *digest = digest_buffer;

+ for (;;) {

+ FileAndTimestamp file_data;

+ BOOL result = ReadFile(file,

+ &file_data.timestamp,

+ sizeof(file_data.timestamp),

+ &bytes_read,

+ NULL);

+ if (result && bytes_read == 0) {

+ // At EOF.

+ break;

+ }

+ if (bytes_read != sizeof(file_data.timestamp))

+ return false;

+ WORD filename_length;

+ if (!ReadFile(

+ file, &filename_length, sizeof(filename_length), &bytes_read, NULL))

+ return false;

+ if (bytes_read != sizeof(filename_length))

+ return false;

+ char filename_buffer[1<<15];

+ if (!ReadFile(file, filename_buffer, filename_length, &bytes_read, NULL))

+ return false;

+ if (bytes_read != filename_length)

+ return false;

+ file_data.file = string(filename_buffer, filename_length);

+ files->push_back(file_data);

+ }

+ CloseHandle(file);

+ return true;

+void SaveTimestamps(const string& digest,

+ const vector<FileAndTimestamp>& files,

+ const string& filename) {

+ HANDLE file = CreateFile(

+ filename.c_str(), GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, 0, NULL);

+ CHECK(file != INVALID_HANDLE_VALUE);

+ DWORD bytes_written;

+ CHECK(digest.size() == SHA1LEN_HEXBYTES);

+ CHECK(WriteFile(file, digest.c_str(), digest.size(), &bytes_written, NULL));

+ CHECK(bytes_written == digest.size());

+ for (vector<FileAndTimestamp>::const_iterator i(files.begin());

+ i != files.end();

+ ++i) {

+ // 64 bits of timestamp.

+ CHECK(WriteFile(

+ file, &i->timestamp, sizeof(i->timestamp), &bytes_written, NULL));

+ CHECK(bytes_written == sizeof(i->timestamp));

+ // 16 bits of filename length.

+ WORD filename_length = static_cast<WORD>(i->file.size());

+ CHECK(WriteFile(file,

+ &filename_length,

+ sizeof(filename_length),

+ &bytes_written,

+ NULL));

+ CHECK(bytes_written == sizeof(filename_length));

+ // Filename.

+ CHECK(WriteFile(

+ file, i->file.c_str(), filename_length, &bytes_written, NULL));

+ }

+ CloseHandle(file);

+int main(int argc, char* argv[]) {

+ if (argc < 3) {

+ fprintf(stderr, "usage: treehash root_dir timestamps_file\n\n");

+ fprintf(

+ stderr,

+ "prints hash of directory tree rooted at |root_dir| to stdout, and \n"

+ "saves mtime cache to |timestamps_file|.\n");

+ return 1;

+ }

+ vector<FileAndTimestamp> files;

+ GetFileList(argv[1], &files);

+ vector<FileAndTimestamp> cached_files;

+ string cached_digest;

+ if (LoadTimestamps(argv[2], &cached_files, &cached_digest)) {

+ // Loaded saved hashes.

+ bool matches = cached_files.size() == files.size();

+ if (matches) {

+ for (size_t i = 0; i < files.size(); ++i) {

+ if (cached_files[i].file != files[i].file ||

+ cached_files[i].timestamp.dwLowDateTime !=

+ files[i].timestamp.dwLowDateTime ||

+ cached_files[i].timestamp.dwHighDateTime !=

+ files[i].timestamp.dwHighDateTime) {

+ matches = false;

+ break;

+ }

+ if (matches) {

+ printf("%s\n", cached_digest.c_str());

+ return 0;

+ }

+ // Otherwise we need to rehash.

+ string digest = CalculateDigestOfTree(argv[1], files);

+ SaveTimestamps(digest, files, argv[2]);

+ printf("%s\n", digest.c_str());

+ return 0;

« no previous file with comments | « win_toolchain/treehash/m.bat ('k') | win_toolchain/treehash/treehash.exe » ('j') | no next file with comments »