Index: tools/win/ShowGlobals/ShowGlobals.cc |
diff --git a/tools/win/ShowGlobals/ShowGlobals.cc b/tools/win/ShowGlobals/ShowGlobals.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..748b36602d81252d7af1cc561cf5bdbd613bfa77 |
--- /dev/null |
+++ b/tools/win/ShowGlobals/ShowGlobals.cc |
@@ -0,0 +1,249 @@ |
+// Copyright (c) 2016 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+// This tool scans a PDB file and prints out information about 'interesting' |
+// global variables. This includes duplicates and large globals. This is often |
+// helpful inunderstanding code bloat or finding inefficient globals. |
+// |
+// Duplicate global variables often happen when constructs like this are placed |
+// in a header file: |
+// |
+// const double sqrt_two = sqrt(2.0); |
+// |
+// Many (although usually not all) of the translation units that include this |
+// header file will get a copy of sqrt_two, possibly including an initializer. |
+// Because 'const' implies 'static' there are no warnings or errors from the |
+// linker. This duplication can happen with float/double, structs and classes, |
+// and arrays - any non-integral type. |
+// |
+// Global variables are not necessarily a problem but it is useful to understand |
+// them, and monitoring their changes can be instructive. |
+ |
+#include <dia2.h> |
+#include <stdio.h> |
+ |
+#include <algorithm> |
+#include <vector> |
+ |
+// Helper function for comparing strings - returns a strcmp/wcscmp compatible |
+// value. |
+int StringCompare(const std::wstring& lhs, const std::wstring& rhs) { |
+ return wcscmp(lhs.c_str(), rhs.c_str()); |
+} |
+ |
+// Use this struct to record data about symbols for sorting and analysis. |
+struct SymbolData { |
+ SymbolData(DWORD size, DWORD section, const wchar_t* name) |
+ : size(size), section(section), name(name) {} |
+ |
+ DWORD size; |
+ DWORD section; |
+ std::wstring name; |
+}; |
+ |
+// Comparison function for when sorting symbol data by name, in order to allow |
+// looking for duplicate symbols. It uses the symbol size as a tiebreaker. This |
+// is necessary because sometimes there are symbols with matching names but |
+// different sizes in which case they aren't actually duplicates. These false |
+// positives happen because namespaces are omitted from the symbol names that |
+// DIA2 returns. |
+bool NameCompare(const SymbolData& lhs, const SymbolData& rhs) { |
+ int nameCompare = StringCompare(lhs.name, rhs.name); |
+ if (nameCompare == 0) |
+ return lhs.size < rhs.size; |
+ return nameCompare < 0; |
+} |
+ |
+// Comparison function for when sorting symbols by size, in order to allow |
+// finding the largest global variables. Use the symbol names as a tiebreaker |
+// in order to get consistent ordering. |
+bool SizeCompare(const SymbolData& lhs, const SymbolData& rhs) { |
+ if (lhs.size == rhs.size) { |
+ return StringCompare(lhs.name, rhs.name) < 0; |
+ } |
stanisc
2016/12/19 22:11:40
Maybe skip braces to make this similar to NameComp
brucedawson
2016/12/19 23:00:51
Done.
|
+ return lhs.size < rhs.size; |
+} |
+ |
+// Use this struct to store data about repeated globals, for later sorting. |
+struct RepeatData { |
+ RepeatData(size_t repeat_count, DWORD bytes_wasted, const std::wstring& name) |
+ : repeat_count(repeat_count), bytes_wasted(bytes_wasted), name(name) {} |
+ bool operator<(const RepeatData& rhs) { |
+ return bytes_wasted < rhs.bytes_wasted; |
+ } |
+ |
+ size_t repeat_count; |
+ DWORD bytes_wasted; |
+ std::wstring name; |
+}; |
+ |
+bool DumpInterestingGlobals(IDiaSymbol* global, const wchar_t* filename) { |
+ wprintf(L"#Dups\tDupSize\tSize\tSection\tSymbol name\tPDB name\n"); |
+ |
+ // How many bytes must be wasted on repeats before being listed. |
+ const int kWastageThreshold = 100; |
+ // How big must an individual symbol be before being listed. |
+ const int kBigSizeThreshold = 500; |
+ |
+ std::vector<SymbolData> symbols; |
+ std::vector<RepeatData> repeats; |
+ |
+ IDiaEnumSymbols* enum_symbols; |
+ HRESULT result = |
+ global->findChildren(SymTagData, NULL, nsNone, &enum_symbols); |
+ if (FAILED(result)) { |
+ wprintf(L"ERROR - DumpInterestingGlobals() returned no symbols.\n"); |
+ return false; |
+ } |
+ |
+ IDiaSymbol* symbol; |
+ ULONG celt = 0; |
+ while (SUCCEEDED(enum_symbols->Next(1, &symbol, &celt)) && (celt == 1)) { |
+ // If we get get_length on symbol it works for functions but not for |
+ // data. For some reason for data we have to call get_type() to get |
+ // another IDiaSymbol object which we can query for length. |
+ IDiaSymbol* type_symbol; |
chengx
2016/12/19 21:48:02
I think type_symbol needs to be freed to avoid mem
brucedawson
2016/12/19 23:00:51
Yep. The original sample code used manual calls to
|
+ symbol->get_type(&type_symbol); |
+ |
+ ULONGLONG size = 0; |
+ type_symbol->get_length(&size); |
+ |
+ // Use -1 and -2 as canary values to indicate various failures. |
+ DWORD section = (DWORD)-1; |
+ if (symbol->get_addressSection(§ion) != S_OK) |
+ section = -2; |
stanisc
2016/12/19 22:11:40
Should this need (DWORD) cast as well?
brucedawson
2016/12/19 23:00:51
Done. And changed to static_cast.
|
+ |
+ BSTR name; |
+ if (symbol->get_name(&name) == S_OK) { |
+ symbols.push_back(SymbolData((DWORD)size, section, name)); |
chengx
2016/12/19 21:48:02
Not very familiar with BSTR. But no conversion nee
brucedawson
2016/12/19 23:00:51
BSTR is basically a null-terminated string with we
|
+ ::SysFreeString(name); |
+ } |
+ |
+ symbol->Release(); |
+ } |
+ |
+ // Sort the symbols by name/size so that we can print a report about duplicate |
+ // variables. |
+ std::sort(symbols.begin(), symbols.end(), NameCompare); |
+ for (auto p = symbols.begin(); p != symbols.end(); /**/) { |
+ auto pScan = p; |
+ // Scan the data looking for symbols that have the same name |
+ // and size. |
+ while (pScan != symbols.end() && p->size == pScan->size && |
+ StringCompare(p->name, pScan->name) == 0) |
+ ++pScan; |
+ |
+ // Calculate how many times the symbol name/size appears in this PDB. |
+ size_t repeat_count = pScan - p; |
+ if (repeat_count > 1) { |
+ // Change the count from how many instances of this variable there are to |
+ // how many *excess* instances there are. |
+ --repeat_count; |
+ DWORD bytes_wasted = repeat_count * p->size; |
+ if (bytes_wasted > kWastageThreshold) { |
+ repeats.push_back(RepeatData(repeat_count, bytes_wasted, p->name)); |
+ } |
+ } |
+ |
+ p = pScan; |
+ } |
+ |
+ // Print a summary of duplicated variables, sorted to put the worst offenders |
+ // first. |
+ std::sort(repeats.begin(), repeats.end()); |
+ std::reverse(repeats.begin(), repeats.end()); |
chengx
2016/12/19 21:48:02
Another way is to define a comparison function usi
stanisc
2016/12/19 22:11:40
nit: You could avoid reversing by changing the com
brucedawson
2016/12/19 23:00:51
I had that originally for RepeatData and decided I
|
+ for (auto& repeat : repeats) { |
chengx
2016/12/19 21:48:02
Is "auto const &" better?
brucedawson
2016/12/19 23:00:51
Yes. const all the things.
|
+ // The empty field contain a zero so that Excel/sheets will more easily |
+ // create the pivot tables that I want. |
+ wprintf(L"%d\t%u\t0\t0\t%s\t%s\n", repeat.repeat_count, repeat.bytes_wasted, |
stanisc
2016/12/19 22:11:40
nit: I think this might not align with the header.
brucedawson
2016/12/19 23:00:51
Yeah, PDB name doesn't align. But it generally doe
|
+ repeat.name.c_str(), filename); |
+ } |
+ wprintf(L"\n"); |
+ |
+ // Print a summary of the largest global variables |
+ std::sort(symbols.begin(), symbols.end(), SizeCompare); |
+ std::reverse(symbols.begin(), symbols.end()); |
chengx
2016/12/19 21:48:02
Same as line 155, reverse operation can be avoided
stanisc
2016/12/19 22:11:40
The same as above.
|
+ for (auto p = symbols.begin(); p != symbols.end(); ++p) { |
stanisc
2016/12/19 22:11:40
Could use for (auto& p : symbols) cycle
brucedawson
2016/12/19 23:00:51
Done.
|
+ if (p->size < kBigSizeThreshold) |
+ break; |
+ // The empty fields contain a zero so that the columns line up which can |
+ // be important when pasting the data into a spreadsheet. |
+ wprintf(L"0\t0\t%6d\t%d\t%s\t%s\n", p->size, p->section, p->name.c_str(), |
+ filename); |
+ } |
+ |
+ return true; |
+} |
+ |
+bool Initialize(const wchar_t* filename, |
+ IDiaDataSource** source, |
+ IDiaSession** session, |
+ IDiaSymbol** global) { |
+ HRESULT hr = CoInitialize(NULL); |
+ if (FAILED(hr)) { |
+ wprintf(L"CoInitialize failed - %08X.", hr); |
+ return false; |
+ } |
+ |
+ // Initialize DIA2 |
+ hr = CoCreateInstance(__uuidof(DiaSource), NULL, CLSCTX_INPROC_SERVER, |
+ __uuidof(IDiaDataSource), (void**)source); |
+ if (FAILED(hr)) { |
+ wprintf(L"Failed to initialized DIA2 - %08X.\n", hr); |
+ return false; |
+ } |
+ |
+ // Open the PDB |
+ hr = (*source)->loadDataFromPdb(filename); |
+ if (FAILED(hr)) { |
+ wprintf(L"LoadDataFromPdb failed - %08X.\n", hr); |
+ return false; |
+ } |
+ |
+ hr = (*source)->openSession(session); |
+ if (FAILED(hr)) { |
+ wprintf(L"OpenSession failed - %08X.\n", hr); |
+ return false; |
+ } |
+ |
+ // Retrieve a reference to the global scope |
+ hr = (*session)->get_globalScope(global); |
+ if (hr != S_OK) { |
+ wprintf(L"Get_globalScope failed - %08X.\n", hr); |
+ return false; |
+ } |
+ |
+ return true; |
+} |
+ |
+void Cleanup(IDiaSession* session, IDiaSymbol* global) { |
+ if (global) { |
+ global->Release(); |
+ } |
+ |
+ if (session) { |
+ session->Release(); |
+ } |
+ |
+ CoUninitialize(); |
+} |
+ |
+int wmain(int argc, wchar_t* argv[]) { |
+ if (argc < 2) { |
+ wprintf(L"Usage: ShowGlobals file.pdb"); |
+ return -1; |
+ } |
+ |
+ const wchar_t* filename = argv[1]; |
+ |
+ IDiaDataSource* source = nullptr; |
+ IDiaSession* session = nullptr; |
+ IDiaSymbol* global = nullptr; |
+ if (!(Initialize(filename, &source, &session, &global))) |
+ return -1; |
+ |
+ DumpInterestingGlobals(global, filename); |
+ |
+ Cleanup(session, global); |
stanisc
2016/12/19 22:11:40
What about releasing the source?
brucedawson
2016/12/19 23:00:51
Sample code didn't, but I do now.
|
+} |