tools/win/ShowGlobals/ShowGlobals.cc - Issue 2580833003: ShowGlobals tool to dump large/repeated Win32 globals

Unified Diff: tools/win/ShowGlobals/ShowGlobals.cc

Issue 2580833003: ShowGlobals tool to dump large/repeated Win32 globals (Closed)

Patch Set: Fixing copyright date Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/win/ShowGlobals/ShowGlobals.cc

diff --git a/tools/win/ShowGlobals/ShowGlobals.cc b/tools/win/ShowGlobals/ShowGlobals.cc

new file mode 100644

index 0000000000000000000000000000000000000000..748b36602d81252d7af1cc561cf5bdbd613bfa77

--- /dev/null

+++ b/tools/win/ShowGlobals/ShowGlobals.cc

@@ -0,0 +1,249 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+// This tool scans a PDB file and prints out information about 'interesting'

+// global variables. This includes duplicates and large globals. This is often

+// helpful inunderstanding code bloat or finding inefficient globals.

+//

+// Duplicate global variables often happen when constructs like this are placed

+// in a header file:

+//

+// const double sqrt_two = sqrt(2.0);

+//

+// Many (although usually not all) of the translation units that include this

+// header file will get a copy of sqrt_two, possibly including an initializer.

+// Because 'const' implies 'static' there are no warnings or errors from the

+// linker. This duplication can happen with float/double, structs and classes,

+// and arrays - any non-integral type.

+//

+// Global variables are not necessarily a problem but it is useful to understand

+// them, and monitoring their changes can be instructive.

+#include <dia2.h>

+#include <stdio.h>

+#include <algorithm>

+#include <vector>

+// Helper function for comparing strings - returns a strcmp/wcscmp compatible

+// value.

+int StringCompare(const std::wstring& lhs, const std::wstring& rhs) {

+ return wcscmp(lhs.c_str(), rhs.c_str());

+// Use this struct to record data about symbols for sorting and analysis.

+struct SymbolData {

+ SymbolData(DWORD size, DWORD section, const wchar_t* name)

+ : size(size), section(section), name(name) {}

+ DWORD size;

+ DWORD section;

+ std::wstring name;

+};

+// Comparison function for when sorting symbol data by name, in order to allow

+// looking for duplicate symbols. It uses the symbol size as a tiebreaker. This

+// is necessary because sometimes there are symbols with matching names but

+// different sizes in which case they aren't actually duplicates. These false

+// positives happen because namespaces are omitted from the symbol names that

+// DIA2 returns.

+bool NameCompare(const SymbolData& lhs, const SymbolData& rhs) {

+ int nameCompare = StringCompare(lhs.name, rhs.name);

+ if (nameCompare == 0)

+ return lhs.size < rhs.size;

+ return nameCompare < 0;

+// Comparison function for when sorting symbols by size, in order to allow

+// finding the largest global variables. Use the symbol names as a tiebreaker

+// in order to get consistent ordering.

+bool SizeCompare(const SymbolData& lhs, const SymbolData& rhs) {

+ if (lhs.size == rhs.size) {

+ return StringCompare(lhs.name, rhs.name) < 0;

+ }

stanisc 2016/12/19 22:11:40 Maybe skip braces to make this similar to NameComp

brucedawson 2016/12/19 23:00:51 Done.

+ return lhs.size < rhs.size;

+// Use this struct to store data about repeated globals, for later sorting.

+struct RepeatData {

+ RepeatData(size_t repeat_count, DWORD bytes_wasted, const std::wstring& name)

+ : repeat_count(repeat_count), bytes_wasted(bytes_wasted), name(name) {}

+ bool operator<(const RepeatData& rhs) {

+ return bytes_wasted < rhs.bytes_wasted;

+ }

+ size_t repeat_count;

+ DWORD bytes_wasted;

+ std::wstring name;

+};

+bool DumpInterestingGlobals(IDiaSymbol* global, const wchar_t* filename) {

+ wprintf(L"#Dups\tDupSize\tSize\tSection\tSymbol name\tPDB name\n");

+ // How many bytes must be wasted on repeats before being listed.

+ const int kWastageThreshold = 100;

+ // How big must an individual symbol be before being listed.

+ const int kBigSizeThreshold = 500;

+ std::vector<SymbolData> symbols;

+ std::vector<RepeatData> repeats;

+ IDiaEnumSymbols* enum_symbols;

+ HRESULT result =

+ global->findChildren(SymTagData, NULL, nsNone, &enum_symbols);

+ if (FAILED(result)) {

+ wprintf(L"ERROR - DumpInterestingGlobals() returned no symbols.\n");

+ return false;

+ }

+ IDiaSymbol* symbol;

+ ULONG celt = 0;

+ while (SUCCEEDED(enum_symbols->Next(1, &symbol, &celt)) && (celt == 1)) {

+ // If we get get_length on symbol it works for functions but not for

+ // data. For some reason for data we have to call get_type() to get

+ // another IDiaSymbol object which we can query for length.

+ IDiaSymbol* type_symbol;

chengx 2016/12/19 21:48:02 I think type_symbol needs to be freed to avoid mem

brucedawson 2016/12/19 23:00:51 Yep. The original sample code used manual calls to

+ symbol->get_type(&type_symbol);

+ ULONGLONG size = 0;

+ type_symbol->get_length(&size);

+ // Use -1 and -2 as canary values to indicate various failures.

+ DWORD section = (DWORD)-1;

+ if (symbol->get_addressSection(&section) != S_OK)

+ section = -2;

stanisc 2016/12/19 22:11:40 Should this need (DWORD) cast as well?

brucedawson 2016/12/19 23:00:51 Done. And changed to static_cast.

+ BSTR name;

+ if (symbol->get_name(&name) == S_OK) {

+ symbols.push_back(SymbolData((DWORD)size, section, name));

chengx 2016/12/19 21:48:02 Not very familiar with BSTR. But no conversion nee

brucedawson 2016/12/19 23:00:51 BSTR is basically a null-terminated string with we

+ ::SysFreeString(name);

+ }

+ symbol->Release();

+ }

+ // Sort the symbols by name/size so that we can print a report about duplicate

+ // variables.

+ std::sort(symbols.begin(), symbols.end(), NameCompare);

+ for (auto p = symbols.begin(); p != symbols.end(); /**/) {

+ auto pScan = p;

+ // Scan the data looking for symbols that have the same name

+ // and size.

+ while (pScan != symbols.end() && p->size == pScan->size &&

+ StringCompare(p->name, pScan->name) == 0)

+ ++pScan;

+ // Calculate how many times the symbol name/size appears in this PDB.

+ size_t repeat_count = pScan - p;

+ if (repeat_count > 1) {

+ // Change the count from how many instances of this variable there are to

+ // how many *excess* instances there are.

+ --repeat_count;

+ DWORD bytes_wasted = repeat_count * p->size;

+ if (bytes_wasted > kWastageThreshold) {

+ repeats.push_back(RepeatData(repeat_count, bytes_wasted, p->name));

+ }

+ p = pScan;

+ }

+ // Print a summary of duplicated variables, sorted to put the worst offenders

+ // first.

+ std::sort(repeats.begin(), repeats.end());

+ std::reverse(repeats.begin(), repeats.end());

chengx 2016/12/19 21:48:02 Another way is to define a comparison function usi

stanisc 2016/12/19 22:11:40 nit: You could avoid reversing by changing the com

brucedawson 2016/12/19 23:00:51 I had that originally for RepeatData and decided I

+ for (auto& repeat : repeats) {

chengx 2016/12/19 21:48:02 Is "auto const &" better?

brucedawson 2016/12/19 23:00:51 Yes. const all the things.

+ // The empty field contain a zero so that Excel/sheets will more easily

+ // create the pivot tables that I want.

+ wprintf(L"%d\t%u\t0\t0\t%s\t%s\n", repeat.repeat_count, repeat.bytes_wasted,

stanisc 2016/12/19 22:11:40 nit: I think this might not align with the header.

brucedawson 2016/12/19 23:00:51 Yeah, PDB name doesn't align. But it generally doe

+ repeat.name.c_str(), filename);

+ }

+ wprintf(L"\n");

+ // Print a summary of the largest global variables

+ std::sort(symbols.begin(), symbols.end(), SizeCompare);

+ std::reverse(symbols.begin(), symbols.end());

chengx 2016/12/19 21:48:02 Same as line 155, reverse operation can be avoided

stanisc 2016/12/19 22:11:40 The same as above.

+ for (auto p = symbols.begin(); p != symbols.end(); ++p) {

stanisc 2016/12/19 22:11:40 Could use for (auto& p : symbols) cycle

brucedawson 2016/12/19 23:00:51 Done.

+ if (p->size < kBigSizeThreshold)

+ break;

+ // The empty fields contain a zero so that the columns line up which can

+ // be important when pasting the data into a spreadsheet.

+ wprintf(L"0\t0\t%6d\t%d\t%s\t%s\n", p->size, p->section, p->name.c_str(),

+ filename);

+ }

+ return true;

+bool Initialize(const wchar_t* filename,

+ IDiaDataSource** source,

+ IDiaSession** session,

+ IDiaSymbol** global) {

+ HRESULT hr = CoInitialize(NULL);

+ if (FAILED(hr)) {

+ wprintf(L"CoInitialize failed - %08X.", hr);

+ return false;

+ }

+ // Initialize DIA2

+ hr = CoCreateInstance(__uuidof(DiaSource), NULL, CLSCTX_INPROC_SERVER,

+ __uuidof(IDiaDataSource), (void**)source);

+ if (FAILED(hr)) {

+ wprintf(L"Failed to initialized DIA2 - %08X.\n", hr);

+ return false;

+ }

+ // Open the PDB

+ hr = (*source)->loadDataFromPdb(filename);

+ if (FAILED(hr)) {

+ wprintf(L"LoadDataFromPdb failed - %08X.\n", hr);

+ return false;

+ }

+ hr = (*source)->openSession(session);

+ if (FAILED(hr)) {

+ wprintf(L"OpenSession failed - %08X.\n", hr);

+ return false;

+ }

+ // Retrieve a reference to the global scope

+ hr = (*session)->get_globalScope(global);

+ if (hr != S_OK) {

+ wprintf(L"Get_globalScope failed - %08X.\n", hr);

+ return false;

+ }

+ return true;

+void Cleanup(IDiaSession* session, IDiaSymbol* global) {

+ if (global) {

+ global->Release();

+ }

+ if (session) {

+ session->Release();

+ }

+ CoUninitialize();

+int wmain(int argc, wchar_t* argv[]) {

+ if (argc < 2) {

+ wprintf(L"Usage: ShowGlobals file.pdb");

+ return -1;

+ }

+ const wchar_t* filename = argv[1];

+ IDiaDataSource* source = nullptr;

+ IDiaSession* session = nullptr;

+ IDiaSymbol* global = nullptr;

+ if (!(Initialize(filename, &source, &session, &global)))

+ return -1;

+ DumpInterestingGlobals(global, filename);

+ Cleanup(session, global);

stanisc 2016/12/19 22:11:40 What about releasing the source?

brucedawson 2016/12/19 23:00:51 Sample code didn't, but I do now.

« no previous file with comments | « no previous file | tools/win/ShowGlobals/ShowGlobals.sln » ('j') | no next file with comments »