Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2016 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // This tool scans a PDB file and prints out information about 'interesting' | |
| 6 // global variables. This includes duplicates and large globals. This is often | |
| 7 // helpful inunderstanding code bloat or finding inefficient globals. | |
| 8 // | |
| 9 // Duplicate global variables often happen when constructs like this are placed | |
| 10 // in a header file: | |
| 11 // | |
| 12 // const double sqrt_two = sqrt(2.0); | |
| 13 // | |
| 14 // Many (although usually not all) of the translation units that include this | |
| 15 // header file will get a copy of sqrt_two, possibly including an initializer. | |
| 16 // Because 'const' implies 'static' there are no warnings or errors from the | |
| 17 // linker. This duplication can happen with float/double, structs and classes, | |
| 18 // and arrays - any non-integral type. | |
| 19 // | |
| 20 // Global variables are not necessarily a problem but it is useful to understand | |
| 21 // them, and monitoring their changes can be instructive. | |
| 22 | |
| 23 #include <dia2.h> | |
| 24 #include <stdio.h> | |
| 25 | |
| 26 #include <algorithm> | |
| 27 #include <vector> | |
| 28 | |
| 29 // Helper function for comparing strings - returns a strcmp/wcscmp compatible | |
| 30 // value. | |
| 31 int StringCompare(const std::wstring& lhs, const std::wstring& rhs) { | |
| 32 return wcscmp(lhs.c_str(), rhs.c_str()); | |
| 33 } | |
| 34 | |
| 35 // Use this struct to record data about symbols for sorting and analysis. | |
| 36 struct SymbolData { | |
| 37 SymbolData(DWORD size, DWORD section, const wchar_t* name) | |
| 38 : size(size), section(section), name(name) {} | |
| 39 | |
| 40 DWORD size; | |
| 41 DWORD section; | |
| 42 std::wstring name; | |
| 43 }; | |
| 44 | |
| 45 // Comparison function for when sorting symbol data by name, in order to allow | |
| 46 // looking for duplicate symbols. It uses the symbol size as a tiebreaker. This | |
| 47 // is necessary because sometimes there are symbols with matching names but | |
| 48 // different sizes in which case they aren't actually duplicates. These false | |
| 49 // positives happen because namespaces are omitted from the symbol names that | |
| 50 // DIA2 returns. | |
| 51 bool NameCompare(const SymbolData& lhs, const SymbolData& rhs) { | |
| 52 int nameCompare = StringCompare(lhs.name, rhs.name); | |
| 53 if (nameCompare == 0) | |
| 54 return lhs.size < rhs.size; | |
| 55 return nameCompare < 0; | |
| 56 } | |
| 57 | |
| 58 // Comparison function for when sorting symbols by size, in order to allow | |
| 59 // finding the largest global variables. Use the symbol names as a tiebreaker | |
| 60 // in order to get consistent ordering. | |
| 61 bool SizeCompare(const SymbolData& lhs, const SymbolData& rhs) { | |
| 62 if (lhs.size == rhs.size) { | |
| 63 return StringCompare(lhs.name, rhs.name) < 0; | |
| 64 } | |
|
stanisc
2016/12/19 22:11:40
Maybe skip braces to make this similar to NameComp
brucedawson
2016/12/19 23:00:51
Done.
| |
| 65 return lhs.size < rhs.size; | |
| 66 } | |
| 67 | |
| 68 // Use this struct to store data about repeated globals, for later sorting. | |
| 69 struct RepeatData { | |
| 70 RepeatData(size_t repeat_count, DWORD bytes_wasted, const std::wstring& name) | |
| 71 : repeat_count(repeat_count), bytes_wasted(bytes_wasted), name(name) {} | |
| 72 bool operator<(const RepeatData& rhs) { | |
| 73 return bytes_wasted < rhs.bytes_wasted; | |
| 74 } | |
| 75 | |
| 76 size_t repeat_count; | |
| 77 DWORD bytes_wasted; | |
| 78 std::wstring name; | |
| 79 }; | |
| 80 | |
| 81 bool DumpInterestingGlobals(IDiaSymbol* global, const wchar_t* filename) { | |
| 82 wprintf(L"#Dups\tDupSize\tSize\tSection\tSymbol name\tPDB name\n"); | |
| 83 | |
| 84 // How many bytes must be wasted on repeats before being listed. | |
| 85 const int kWastageThreshold = 100; | |
| 86 // How big must an individual symbol be before being listed. | |
| 87 const int kBigSizeThreshold = 500; | |
| 88 | |
| 89 std::vector<SymbolData> symbols; | |
| 90 std::vector<RepeatData> repeats; | |
| 91 | |
| 92 IDiaEnumSymbols* enum_symbols; | |
| 93 HRESULT result = | |
| 94 global->findChildren(SymTagData, NULL, nsNone, &enum_symbols); | |
| 95 if (FAILED(result)) { | |
| 96 wprintf(L"ERROR - DumpInterestingGlobals() returned no symbols.\n"); | |
| 97 return false; | |
| 98 } | |
| 99 | |
| 100 IDiaSymbol* symbol; | |
| 101 ULONG celt = 0; | |
| 102 while (SUCCEEDED(enum_symbols->Next(1, &symbol, &celt)) && (celt == 1)) { | |
| 103 // If we get get_length on symbol it works for functions but not for | |
| 104 // data. For some reason for data we have to call get_type() to get | |
| 105 // another IDiaSymbol object which we can query for length. | |
| 106 IDiaSymbol* type_symbol; | |
|
chengx
2016/12/19 21:48:02
I think type_symbol needs to be freed to avoid mem
brucedawson
2016/12/19 23:00:51
Yep. The original sample code used manual calls to
| |
| 107 symbol->get_type(&type_symbol); | |
| 108 | |
| 109 ULONGLONG size = 0; | |
| 110 type_symbol->get_length(&size); | |
| 111 | |
| 112 // Use -1 and -2 as canary values to indicate various failures. | |
| 113 DWORD section = (DWORD)-1; | |
| 114 if (symbol->get_addressSection(§ion) != S_OK) | |
| 115 section = -2; | |
|
stanisc
2016/12/19 22:11:40
Should this need (DWORD) cast as well?
brucedawson
2016/12/19 23:00:51
Done. And changed to static_cast.
| |
| 116 | |
| 117 BSTR name; | |
| 118 if (symbol->get_name(&name) == S_OK) { | |
| 119 symbols.push_back(SymbolData((DWORD)size, section, name)); | |
|
chengx
2016/12/19 21:48:02
Not very familiar with BSTR. But no conversion nee
brucedawson
2016/12/19 23:00:51
BSTR is basically a null-terminated string with we
| |
| 120 ::SysFreeString(name); | |
| 121 } | |
| 122 | |
| 123 symbol->Release(); | |
| 124 } | |
| 125 | |
| 126 // Sort the symbols by name/size so that we can print a report about duplicate | |
| 127 // variables. | |
| 128 std::sort(symbols.begin(), symbols.end(), NameCompare); | |
| 129 for (auto p = symbols.begin(); p != symbols.end(); /**/) { | |
| 130 auto pScan = p; | |
| 131 // Scan the data looking for symbols that have the same name | |
| 132 // and size. | |
| 133 while (pScan != symbols.end() && p->size == pScan->size && | |
| 134 StringCompare(p->name, pScan->name) == 0) | |
| 135 ++pScan; | |
| 136 | |
| 137 // Calculate how many times the symbol name/size appears in this PDB. | |
| 138 size_t repeat_count = pScan - p; | |
| 139 if (repeat_count > 1) { | |
| 140 // Change the count from how many instances of this variable there are to | |
| 141 // how many *excess* instances there are. | |
| 142 --repeat_count; | |
| 143 DWORD bytes_wasted = repeat_count * p->size; | |
| 144 if (bytes_wasted > kWastageThreshold) { | |
| 145 repeats.push_back(RepeatData(repeat_count, bytes_wasted, p->name)); | |
| 146 } | |
| 147 } | |
| 148 | |
| 149 p = pScan; | |
| 150 } | |
| 151 | |
| 152 // Print a summary of duplicated variables, sorted to put the worst offenders | |
| 153 // first. | |
| 154 std::sort(repeats.begin(), repeats.end()); | |
| 155 std::reverse(repeats.begin(), repeats.end()); | |
|
chengx
2016/12/19 21:48:02
Another way is to define a comparison function usi
stanisc
2016/12/19 22:11:40
nit: You could avoid reversing by changing the com
brucedawson
2016/12/19 23:00:51
I had that originally for RepeatData and decided I
| |
| 156 for (auto& repeat : repeats) { | |
|
chengx
2016/12/19 21:48:02
Is "auto const &" better?
brucedawson
2016/12/19 23:00:51
Yes. const all the things.
| |
| 157 // The empty field contain a zero so that Excel/sheets will more easily | |
| 158 // create the pivot tables that I want. | |
| 159 wprintf(L"%d\t%u\t0\t0\t%s\t%s\n", repeat.repeat_count, repeat.bytes_wasted, | |
|
stanisc
2016/12/19 22:11:40
nit: I think this might not align with the header.
brucedawson
2016/12/19 23:00:51
Yeah, PDB name doesn't align. But it generally doe
| |
| 160 repeat.name.c_str(), filename); | |
| 161 } | |
| 162 wprintf(L"\n"); | |
| 163 | |
| 164 // Print a summary of the largest global variables | |
| 165 std::sort(symbols.begin(), symbols.end(), SizeCompare); | |
| 166 std::reverse(symbols.begin(), symbols.end()); | |
|
chengx
2016/12/19 21:48:02
Same as line 155, reverse operation can be avoided
stanisc
2016/12/19 22:11:40
The same as above.
| |
| 167 for (auto p = symbols.begin(); p != symbols.end(); ++p) { | |
|
stanisc
2016/12/19 22:11:40
Could use for (auto& p : symbols) cycle
brucedawson
2016/12/19 23:00:51
Done.
| |
| 168 if (p->size < kBigSizeThreshold) | |
| 169 break; | |
| 170 // The empty fields contain a zero so that the columns line up which can | |
| 171 // be important when pasting the data into a spreadsheet. | |
| 172 wprintf(L"0\t0\t%6d\t%d\t%s\t%s\n", p->size, p->section, p->name.c_str(), | |
| 173 filename); | |
| 174 } | |
| 175 | |
| 176 return true; | |
| 177 } | |
| 178 | |
| 179 bool Initialize(const wchar_t* filename, | |
| 180 IDiaDataSource** source, | |
| 181 IDiaSession** session, | |
| 182 IDiaSymbol** global) { | |
| 183 HRESULT hr = CoInitialize(NULL); | |
| 184 if (FAILED(hr)) { | |
| 185 wprintf(L"CoInitialize failed - %08X.", hr); | |
| 186 return false; | |
| 187 } | |
| 188 | |
| 189 // Initialize DIA2 | |
| 190 hr = CoCreateInstance(__uuidof(DiaSource), NULL, CLSCTX_INPROC_SERVER, | |
| 191 __uuidof(IDiaDataSource), (void**)source); | |
| 192 if (FAILED(hr)) { | |
| 193 wprintf(L"Failed to initialized DIA2 - %08X.\n", hr); | |
| 194 return false; | |
| 195 } | |
| 196 | |
| 197 // Open the PDB | |
| 198 hr = (*source)->loadDataFromPdb(filename); | |
| 199 if (FAILED(hr)) { | |
| 200 wprintf(L"LoadDataFromPdb failed - %08X.\n", hr); | |
| 201 return false; | |
| 202 } | |
| 203 | |
| 204 hr = (*source)->openSession(session); | |
| 205 if (FAILED(hr)) { | |
| 206 wprintf(L"OpenSession failed - %08X.\n", hr); | |
| 207 return false; | |
| 208 } | |
| 209 | |
| 210 // Retrieve a reference to the global scope | |
| 211 hr = (*session)->get_globalScope(global); | |
| 212 if (hr != S_OK) { | |
| 213 wprintf(L"Get_globalScope failed - %08X.\n", hr); | |
| 214 return false; | |
| 215 } | |
| 216 | |
| 217 return true; | |
| 218 } | |
| 219 | |
| 220 void Cleanup(IDiaSession* session, IDiaSymbol* global) { | |
| 221 if (global) { | |
| 222 global->Release(); | |
| 223 } | |
| 224 | |
| 225 if (session) { | |
| 226 session->Release(); | |
| 227 } | |
| 228 | |
| 229 CoUninitialize(); | |
| 230 } | |
| 231 | |
| 232 int wmain(int argc, wchar_t* argv[]) { | |
| 233 if (argc < 2) { | |
| 234 wprintf(L"Usage: ShowGlobals file.pdb"); | |
| 235 return -1; | |
| 236 } | |
| 237 | |
| 238 const wchar_t* filename = argv[1]; | |
| 239 | |
| 240 IDiaDataSource* source = nullptr; | |
| 241 IDiaSession* session = nullptr; | |
| 242 IDiaSymbol* global = nullptr; | |
| 243 if (!(Initialize(filename, &source, &session, &global))) | |
| 244 return -1; | |
| 245 | |
| 246 DumpInterestingGlobals(global, filename); | |
| 247 | |
| 248 Cleanup(session, global); | |
|
stanisc
2016/12/19 22:11:40
What about releasing the source?
brucedawson
2016/12/19 23:00:51
Sample code didn't, but I do now.
| |
| 249 } | |
| OLD | NEW |