OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // This tool scans a PDB file and prints out information about 'interesting' | |
6 // global variables. This includes duplicates and large globals. This is often | |
7 // helpful inunderstanding code bloat or finding inefficient globals. | |
8 // | |
9 // Duplicate global variables often happen when constructs like this are placed | |
10 // in a header file: | |
11 // | |
12 // const double sqrt_two = sqrt(2.0); | |
13 // | |
14 // Many (although usually not all) of the translation units that include this | |
15 // header file will get a copy of sqrt_two, possibly including an initializer. | |
16 // Because 'const' implies 'static' there are no warnings or errors from the | |
17 // linker. This duplication can happen with float/double, structs and classes, | |
18 // and arrays - any non-integral type. | |
19 // | |
20 // Global variables are not necessarily a problem but it is useful to understand | |
21 // them, and monitoring their changes can be instructive. | |
22 | |
23 #include <dia2.h> | |
24 #include <stdio.h> | |
25 | |
26 #include <algorithm> | |
27 #include <vector> | |
28 | |
29 // Helper function for comparing strings - returns a strcmp/wcscmp compatible | |
30 // value. | |
31 int StringCompare(const std::wstring& lhs, const std::wstring& rhs) { | |
32 return wcscmp(lhs.c_str(), rhs.c_str()); | |
33 } | |
34 | |
35 // Use this struct to record data about symbols for sorting and analysis. | |
36 struct SymbolData { | |
37 SymbolData(DWORD size, DWORD section, const wchar_t* name) | |
38 : size(size), section(section), name(name) {} | |
39 | |
40 DWORD size; | |
41 DWORD section; | |
42 std::wstring name; | |
43 }; | |
44 | |
45 // Comparison function for when sorting symbol data by name, in order to allow | |
46 // looking for duplicate symbols. It uses the symbol size as a tiebreaker. This | |
47 // is necessary because sometimes there are symbols with matching names but | |
48 // different sizes in which case they aren't actually duplicates. These false | |
49 // positives happen because namespaces are omitted from the symbol names that | |
50 // DIA2 returns. | |
51 bool NameCompare(const SymbolData& lhs, const SymbolData& rhs) { | |
52 int nameCompare = StringCompare(lhs.name, rhs.name); | |
53 if (nameCompare == 0) | |
54 return lhs.size < rhs.size; | |
55 return nameCompare < 0; | |
56 } | |
57 | |
58 // Comparison function for when sorting symbols by size, in order to allow | |
59 // finding the largest global variables. Use the symbol names as a tiebreaker | |
60 // in order to get consistent ordering. | |
61 bool SizeCompare(const SymbolData& lhs, const SymbolData& rhs) { | |
62 if (lhs.size == rhs.size) { | |
63 return StringCompare(lhs.name, rhs.name) < 0; | |
64 } | |
stanisc
2016/12/19 22:11:40
Maybe skip braces to make this similar to NameComp
brucedawson
2016/12/19 23:00:51
Done.
| |
65 return lhs.size < rhs.size; | |
66 } | |
67 | |
68 // Use this struct to store data about repeated globals, for later sorting. | |
69 struct RepeatData { | |
70 RepeatData(size_t repeat_count, DWORD bytes_wasted, const std::wstring& name) | |
71 : repeat_count(repeat_count), bytes_wasted(bytes_wasted), name(name) {} | |
72 bool operator<(const RepeatData& rhs) { | |
73 return bytes_wasted < rhs.bytes_wasted; | |
74 } | |
75 | |
76 size_t repeat_count; | |
77 DWORD bytes_wasted; | |
78 std::wstring name; | |
79 }; | |
80 | |
81 bool DumpInterestingGlobals(IDiaSymbol* global, const wchar_t* filename) { | |
82 wprintf(L"#Dups\tDupSize\tSize\tSection\tSymbol name\tPDB name\n"); | |
83 | |
84 // How many bytes must be wasted on repeats before being listed. | |
85 const int kWastageThreshold = 100; | |
86 // How big must an individual symbol be before being listed. | |
87 const int kBigSizeThreshold = 500; | |
88 | |
89 std::vector<SymbolData> symbols; | |
90 std::vector<RepeatData> repeats; | |
91 | |
92 IDiaEnumSymbols* enum_symbols; | |
93 HRESULT result = | |
94 global->findChildren(SymTagData, NULL, nsNone, &enum_symbols); | |
95 if (FAILED(result)) { | |
96 wprintf(L"ERROR - DumpInterestingGlobals() returned no symbols.\n"); | |
97 return false; | |
98 } | |
99 | |
100 IDiaSymbol* symbol; | |
101 ULONG celt = 0; | |
102 while (SUCCEEDED(enum_symbols->Next(1, &symbol, &celt)) && (celt == 1)) { | |
103 // If we get get_length on symbol it works for functions but not for | |
104 // data. For some reason for data we have to call get_type() to get | |
105 // another IDiaSymbol object which we can query for length. | |
106 IDiaSymbol* type_symbol; | |
chengx
2016/12/19 21:48:02
I think type_symbol needs to be freed to avoid mem
brucedawson
2016/12/19 23:00:51
Yep. The original sample code used manual calls to
| |
107 symbol->get_type(&type_symbol); | |
108 | |
109 ULONGLONG size = 0; | |
110 type_symbol->get_length(&size); | |
111 | |
112 // Use -1 and -2 as canary values to indicate various failures. | |
113 DWORD section = (DWORD)-1; | |
114 if (symbol->get_addressSection(§ion) != S_OK) | |
115 section = -2; | |
stanisc
2016/12/19 22:11:40
Should this need (DWORD) cast as well?
brucedawson
2016/12/19 23:00:51
Done. And changed to static_cast.
| |
116 | |
117 BSTR name; | |
118 if (symbol->get_name(&name) == S_OK) { | |
119 symbols.push_back(SymbolData((DWORD)size, section, name)); | |
chengx
2016/12/19 21:48:02
Not very familiar with BSTR. But no conversion nee
brucedawson
2016/12/19 23:00:51
BSTR is basically a null-terminated string with we
| |
120 ::SysFreeString(name); | |
121 } | |
122 | |
123 symbol->Release(); | |
124 } | |
125 | |
126 // Sort the symbols by name/size so that we can print a report about duplicate | |
127 // variables. | |
128 std::sort(symbols.begin(), symbols.end(), NameCompare); | |
129 for (auto p = symbols.begin(); p != symbols.end(); /**/) { | |
130 auto pScan = p; | |
131 // Scan the data looking for symbols that have the same name | |
132 // and size. | |
133 while (pScan != symbols.end() && p->size == pScan->size && | |
134 StringCompare(p->name, pScan->name) == 0) | |
135 ++pScan; | |
136 | |
137 // Calculate how many times the symbol name/size appears in this PDB. | |
138 size_t repeat_count = pScan - p; | |
139 if (repeat_count > 1) { | |
140 // Change the count from how many instances of this variable there are to | |
141 // how many *excess* instances there are. | |
142 --repeat_count; | |
143 DWORD bytes_wasted = repeat_count * p->size; | |
144 if (bytes_wasted > kWastageThreshold) { | |
145 repeats.push_back(RepeatData(repeat_count, bytes_wasted, p->name)); | |
146 } | |
147 } | |
148 | |
149 p = pScan; | |
150 } | |
151 | |
152 // Print a summary of duplicated variables, sorted to put the worst offenders | |
153 // first. | |
154 std::sort(repeats.begin(), repeats.end()); | |
155 std::reverse(repeats.begin(), repeats.end()); | |
chengx
2016/12/19 21:48:02
Another way is to define a comparison function usi
stanisc
2016/12/19 22:11:40
nit: You could avoid reversing by changing the com
brucedawson
2016/12/19 23:00:51
I had that originally for RepeatData and decided I
| |
156 for (auto& repeat : repeats) { | |
chengx
2016/12/19 21:48:02
Is "auto const &" better?
brucedawson
2016/12/19 23:00:51
Yes. const all the things.
| |
157 // The empty field contain a zero so that Excel/sheets will more easily | |
158 // create the pivot tables that I want. | |
159 wprintf(L"%d\t%u\t0\t0\t%s\t%s\n", repeat.repeat_count, repeat.bytes_wasted, | |
stanisc
2016/12/19 22:11:40
nit: I think this might not align with the header.
brucedawson
2016/12/19 23:00:51
Yeah, PDB name doesn't align. But it generally doe
| |
160 repeat.name.c_str(), filename); | |
161 } | |
162 wprintf(L"\n"); | |
163 | |
164 // Print a summary of the largest global variables | |
165 std::sort(symbols.begin(), symbols.end(), SizeCompare); | |
166 std::reverse(symbols.begin(), symbols.end()); | |
chengx
2016/12/19 21:48:02
Same as line 155, reverse operation can be avoided
stanisc
2016/12/19 22:11:40
The same as above.
| |
167 for (auto p = symbols.begin(); p != symbols.end(); ++p) { | |
stanisc
2016/12/19 22:11:40
Could use for (auto& p : symbols) cycle
brucedawson
2016/12/19 23:00:51
Done.
| |
168 if (p->size < kBigSizeThreshold) | |
169 break; | |
170 // The empty fields contain a zero so that the columns line up which can | |
171 // be important when pasting the data into a spreadsheet. | |
172 wprintf(L"0\t0\t%6d\t%d\t%s\t%s\n", p->size, p->section, p->name.c_str(), | |
173 filename); | |
174 } | |
175 | |
176 return true; | |
177 } | |
178 | |
179 bool Initialize(const wchar_t* filename, | |
180 IDiaDataSource** source, | |
181 IDiaSession** session, | |
182 IDiaSymbol** global) { | |
183 HRESULT hr = CoInitialize(NULL); | |
184 if (FAILED(hr)) { | |
185 wprintf(L"CoInitialize failed - %08X.", hr); | |
186 return false; | |
187 } | |
188 | |
189 // Initialize DIA2 | |
190 hr = CoCreateInstance(__uuidof(DiaSource), NULL, CLSCTX_INPROC_SERVER, | |
191 __uuidof(IDiaDataSource), (void**)source); | |
192 if (FAILED(hr)) { | |
193 wprintf(L"Failed to initialized DIA2 - %08X.\n", hr); | |
194 return false; | |
195 } | |
196 | |
197 // Open the PDB | |
198 hr = (*source)->loadDataFromPdb(filename); | |
199 if (FAILED(hr)) { | |
200 wprintf(L"LoadDataFromPdb failed - %08X.\n", hr); | |
201 return false; | |
202 } | |
203 | |
204 hr = (*source)->openSession(session); | |
205 if (FAILED(hr)) { | |
206 wprintf(L"OpenSession failed - %08X.\n", hr); | |
207 return false; | |
208 } | |
209 | |
210 // Retrieve a reference to the global scope | |
211 hr = (*session)->get_globalScope(global); | |
212 if (hr != S_OK) { | |
213 wprintf(L"Get_globalScope failed - %08X.\n", hr); | |
214 return false; | |
215 } | |
216 | |
217 return true; | |
218 } | |
219 | |
220 void Cleanup(IDiaSession* session, IDiaSymbol* global) { | |
221 if (global) { | |
222 global->Release(); | |
223 } | |
224 | |
225 if (session) { | |
226 session->Release(); | |
227 } | |
228 | |
229 CoUninitialize(); | |
230 } | |
231 | |
232 int wmain(int argc, wchar_t* argv[]) { | |
233 if (argc < 2) { | |
234 wprintf(L"Usage: ShowGlobals file.pdb"); | |
235 return -1; | |
236 } | |
237 | |
238 const wchar_t* filename = argv[1]; | |
239 | |
240 IDiaDataSource* source = nullptr; | |
241 IDiaSession* session = nullptr; | |
242 IDiaSymbol* global = nullptr; | |
243 if (!(Initialize(filename, &source, &session, &global))) | |
244 return -1; | |
245 | |
246 DumpInterestingGlobals(global, filename); | |
247 | |
248 Cleanup(session, global); | |
stanisc
2016/12/19 22:11:40
What about releasing the source?
brucedawson
2016/12/19 23:00:51
Sample code didn't, but I do now.
| |
249 } | |
OLD | NEW |