tools/traceline/traceline/main.cc - Issue 20494: Import Traceline, a Windows performance trace event logger.

Unified Diff: tools/traceline/traceline/main.cc

Issue 20494: Import Traceline, a Windows performance trace event logger. (Closed)

Patch Set: Feedback. Created 11 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/traceline/traceline/main.cc

diff --git a/tools/traceline/traceline/main.cc b/tools/traceline/traceline/main.cc

new file mode 100755

index 0000000000000000000000000000000000000000..d0e220bdc17f8578a0bcba0476c015ab32d02ef4

--- /dev/null

+++ b/tools/traceline/traceline/main.cc

@@ -0,0 +1,1335 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+// TODO

+// - Make capturing system call arguments optional and the number configurable.

+// - Lots of places depend on the ABI so that we can modify EAX or EDX, this

+// is safe, but these could be moved to be saved and restored anyway.

+// - Understand the loader better, and make some more meaningful hooks with

+// proper data collection and durations. Right now it's just noise.

+// - Get the returned pointer from AllocateHeap.

+#include <windows.h>

+#include <stdio.h>

+#include <map>

+#include <string>

+#include "assembler.h"

+#include "logging.h"

+#include "rdtsc.h"

+#include "sym_resolver.h"

+#include "syscall_map.h"

+#include "sidestep/mini_disassembler.h"

+namespace {

+std::string JSONString(const std::string& str) {

+ static const char hextable[] = "0123456789abcdef";

+ std::string out;

+ out.push_back('"');

+ for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) {

+ unsigned char c = static_cast<unsigned char>(*it);

+ switch (c) {

+ case '\\':

+ case '"':

+ case '\'':

+ out.push_back('\\'); out.push_back(c);

+ break;

+ default:

+ if (c < 20 || c >= 127) {

+ out.push_back('\\'); out.push_back('x');

+ out.push_back(hextable[c >> 4]); out.push_back(hextable[c & 0xf]);

+ } else {

+ // Unescaped.

+ out.push_back(c);

+ }

+ break;

+ }

+ out.push_back('"');

+ return out;

+} // namespace

+class Playground {

+ public:

+ static const int kPlaygroundSize = 64 * 1024 * 1024;

+ // Encapsulate the configuration options to the playground.

+ class Options {

+ public:

+ Options()

+ : stack_unwind_depth_(0),

+ log_heap_(false),

+ log_lock_(false),

+ vista_(false) { }

+ // The maximum amount of frames we should unwind from the call stack.

+ int stack_unwind_depth() { return stack_unwind_depth_; }

+ void set_stack_unwind_depth(int depth) { stack_unwind_depth_ = depth; }

+ // Whether we should log heap operations (alloc / free).

+ bool log_heap() { return log_heap_; }

+ void set_log_heap(bool x) { log_heap_ = x; }

+ // Whether we should log lock (critical section) operations.

+ bool log_lock() { return log_lock_; }

+ void set_log_lock(bool x) { log_lock_ = x; }

+ // Whether we are running on Vista.

+ bool vista() { return vista_; }

+ void set_vista(bool x) { vista_ = x; }

+ private:

+ int stack_unwind_depth_;

+ bool log_heap_;

+ bool log_lock_;

+ bool vista_;

+ };

+ Playground(HANDLE proc, const Options& options)

+ : proc_(proc),

+ remote_addr_(NULL),

+ resolver_("ntdll.dll"),

+ options_(options) {

+ // We copy the entire playground into the remote process, and we have

+ // fields that we expect to be zero. TODO this could be a lot better.

+ memset(buf_, 0, sizeof(buf_));

+ }

+ void AllocateInRemote() {

+ // Try to get something out of the way and easy to debug.

+ static void* kPlaygroundAddr = reinterpret_cast<void*>(0x66660000);

+ // Allocate our playground memory in the target process. This is a big

+ // slab of read/write/execute memory that we use for our code

+ // instrumentation, and the memory for writing out our logging events.

+ remote_addr_ = reinterpret_cast<char*>(

+ VirtualAllocEx(proc_,

+ kPlaygroundAddr,

+ kPlaygroundSize,

+ MEM_COMMIT | MEM_RESERVE,

+ PAGE_EXECUTE_READWRITE));

+ if (remote_addr_ == NULL || remote_addr_ != kPlaygroundAddr) {

+ NOTREACHED("Falied to allocate playground: 0x%08x", remote_addr_);

+ }

+ void CopyToRemote() {

+ WriteProcessMemory(proc_,

+ remote_addr_,

+ buf_,

+ sizeof(buf_),

+ NULL);

+ }

+ void CopyFromRemote() {

+ SIZE_T size = 0;

+ ReadProcessMemory(proc_,

+ remote_addr_,

+ buf_,

+ sizeof(buf_),

+ &size);

+ }

+ enum EventRecordType {

+ EVENT_TYPE_LDR = 0,

+ EVENT_TYPE_THREADBEGIN = 1,

+ EVENT_TYPE_THREADNAME = 2,

+ EVENT_TYPE_EXCEPTION = 3,

+ EVENT_TYPE_PROCESSEXIT = 4,

+ EVENT_TYPE_CREATETHREAD = 5,

+ EVENT_TYPE_THREADEXIT = 6,

+ EVENT_TYPE_ALLOCHEAP = 7,

+ EVENT_TYPE_FREEHEAP = 8,

+ EVENT_TYPE_SYSCALL = 9,

+ EVENT_TYPE_ENTER_CS = 10,

+ EVENT_TYPE_TRYENTER_CS = 11,

+ EVENT_TYPE_LEAVE_CS = 12,

+ EVENT_TYPE_APC = 13

+ };

+ static const int kThreadNameBufSize = 64;

+ static const int kLdrBufSize = 512; // Looks like internal buffer is 512.

+ static const int kCodeBlockSize = 256;

+ static const int kOffLdrCode = 0 * kCodeBlockSize;

+ static const int kOffCreateThreadCode = 1 * kCodeBlockSize;

+ static const int kOffThreadCode = 2 * kCodeBlockSize;

+ static const int kOffExpCode = 3 * kCodeBlockSize;

+ static const int kOffExitCode = 4 * kCodeBlockSize;

+ static const int kOffThreadExitCode = 5 * kCodeBlockSize;

+ static const int kOffAllocHeapCode = 6 * kCodeBlockSize;

+ static const int kOffFreeHeapCode = 7 * kCodeBlockSize;

+ static const int kOffSyscallCode = 8 * kCodeBlockSize;

+ static const int kOffEnterCritSecCode = 9 * kCodeBlockSize;

+ static const int kOffTryEnterCritSecCode = 10 * kCodeBlockSize;

+ static const int kOffLeaveCritSecCode = 11 * kCodeBlockSize;

+ static const int kOffApcDispCode = 12 * kCodeBlockSize;

+ static const int kOffLogAreaPtr = 4096;

+ static const int kOffLogAreaData = 4096 + 4;

+ static const int kRecordHeaderSize = 8 + 4 + 4 + 4;

+ // Given the address to the start of a function, patch the function to jump

+ // to a given offset into the playground. This function will try to take

+ // advantage of hotpatch code, if the function is prefixed with 5 0x90 bytes.

+ // Returns a std::string of any assembly instructions that must be relocated,

+ // as they were overwritten during patching.

+ std::string PatchPreamble(int func_addr, int playground_off) {

+ sidestep::MiniDisassembler disas;

+ int stub_addr = reinterpret_cast<int>(remote_addr_ + playground_off);

+ std::string instrs;

+ char buf[15];

+ if (ReadProcessMemory(proc_,

+ reinterpret_cast<void*>(func_addr - 5),

+ buf,

+ sizeof(buf),

+ NULL) == 0) {

+ NOTREACHED("ReadProcessMemory(0x%08x) failed: %d",

+ func_addr - 5, GetLastError());

+ }

+ if (memcmp(buf, "\x90\x90\x90\x90\x90", 5) == 0 ||

+ memcmp(buf, "\x00\x8D\x64\x24\x00", 5) == 0) {

+ unsigned int instr_bytes = 0;

+ // We might have a hotpatch no-op of mov edi, edi "\x8b\xff". It is a

+ // bit of a waste to relocate it, but it makes everything simpler.

+ while (instr_bytes < 2) {

+ if (disas.Disassemble(

+ reinterpret_cast<unsigned char*>(buf + 5 + instr_bytes),

+ &instr_bytes) != sidestep::IT_GENERIC) {

+ NOTREACHED("Could not disassemble or relocate instruction.");

+ }

+ // We only read 10 bytes worth of instructions.

+ CHECK(instr_bytes < 10);

+ }

+ instrs.assign(buf + 5, instr_bytes);

+ // We have a hotpatch prefix of 5 nop bytes. We can use this for our

+ // long jump, and then overwrite the first 2 bytes to jump back to there.

+ CodeBuffer patch(buf);

+ int off = stub_addr - func_addr;

+ patch.jmp_rel(off);

+ patch.jmp_rel_short(-2 - 5);

+ } else {

+ // We need a full 5 bytes for the jump.

+ unsigned int instr_bytes = 0;

+ while (instr_bytes < 5) {

+ if (disas.Disassemble(

+ reinterpret_cast<unsigned char*>(buf + 5 + instr_bytes),

+ &instr_bytes) != sidestep::IT_GENERIC) {

+ NOTREACHED("Could not disassemble or relocate instruction.");

+ }

+ // We only read 10 bytes worth of instructions.

+ CHECK(instr_bytes < 10);

+ }

+ instrs.assign(buf + 5, instr_bytes);

+ // Overwrite the first 5 bytes with a relative jump to our stub.

+ CodeBuffer patch(buf + 5);

+ int off = stub_addr - (func_addr + 5);

+ patch.jmp_rel(off);

+ }

+ // Write back the bytes, we are really probably writing more back than we

+ // need to, but it shouldn't really matter.

+ if (WriteProcessMemory(proc_,

+ reinterpret_cast<void*>(func_addr - 5),

+ buf,

+ sizeof(buf),

+ NULL) == 0) {

+ NOTREACHED("WriteProcessMemory(0x%08x) failed: %d",

+ func_addr - 5, GetLastError());

+ }

+ return instrs;

+ }

+ std::string PatchPreamble(const char* func_name, int playground_off) {

+ return PatchPreamble(

+ reinterpret_cast<int>(resolver_.Resolve(func_name)), playground_off);

+ }

+ // Restore any instructions that needed to be moved to make space for our

+ // patch and jump back to the original code.

+ void ResumeOriginalFunction(const char* func_name,

+ const std::string& moved_instructions,

+ int stub_offset,

+ CodeBuffer* cb) {

+ cb->emit_bytes(moved_instructions);

+ int off = resolver_.Resolve(func_name) +

+ moved_instructions.size() -

+ (remote_addr_ + stub_offset + cb->size() + 5);

+ cb->jmp_rel(off);

+ }

+ // Makes a call to NtQueryPerformanceCounter, writing the timestamp to the

+ // buffer pointed to by EDI. EDI it not incremented. EAX is not preserved.

+ void AssembleQueryPerformanceCounter(CodeBuffer* cb) {

+ // Make a call to NtQueryPerformanceCounter and write the result into

+ // the log area. The buffer we write to should be aligned, but we should

+ // garantee that anyway for the logging area for performance.

+ cb->push_imm(0); // PerformanceFrequency

+ cb->push(EDI); // PerformanceCounter

+ cb->mov_imm(EAX, reinterpret_cast<int>(

+ resolver_.Resolve("ntdll!NtQueryPerformanceCounter")));

+ cb->call(EAX);

+ }

+ // This is the common log setup routine. It will allocate a new log entry,

+ // and write out the common log header to the event entry. The header is:

+ // is [ 64bit QPC ] [ 32bit cpu id ] [ 32bit thread id ] [ 32bit rec id ]

+ // EDI will be left pointing to the log entry, with |space| bytes left for

+ // type specific data. All other registers should not be clobbered.

+ void AssembleHeaderCode(CodeBuffer* cb, EventRecordType rt, int space) {

+ cb->push(EAX);

+ cb->push(EDX);

+ cb->push(ECX);

+ cb->push(ESI);

+ int unwind_depth = options_.stack_unwind_depth();

+ // Load EDI with the number of bytes we want for our log entry, this will

+ // be used in the atomic increment to allocate the log entry.

+ cb->mov_imm(EDI, kRecordHeaderSize + (unwind_depth * 4) + space);

+ // Do the increment and have EDI point to our log entry buffer space.

+ cb->mov_imm(EDX, reinterpret_cast<int>(remote_addr_ + kOffLogAreaPtr));

+ cb->inc_atomic(EDX, EDI);

+ // EDI is the buffer offset, make it a pointer to the record entry.

+ cb->add_imm(EDI, reinterpret_cast<int>(remote_addr_ + kOffLogAreaData));

+ AssembleQueryPerformanceCounter(cb);

+ cb->add_imm(EDI, 8);

+ cb->which_cpu();

+ cb->stosd();

+ cb->which_thread();

+ cb->stosd();

+ // Stack unwinding, follow EBP to the maximum number of frames, and make

+ // sure that it stays on the stack (between ESP and TEB.StackBase).

+ if (unwind_depth > 0) {

+ cb->mov_imm(ECX, unwind_depth);

+ cb->fs(); cb->mov(EDX, Operand(0x04)); // get TEB.StackBase

+ // Start at EBP.

+ cb->mov(EAX, EBP);

+ Label unwind_loop, bail;

+ cb->bind(&unwind_loop);

+ // Bail if (EAX < ESP) (below the stack)

+ cb->cmp(EAX, ESP);

+ cb->jcc(below, &bail);

+ // Bail if (EAX >= EDX) (above the stack)

+ cb->cmp(EAX, EDX);

+ cb->jcc(above_equal, &bail);

+ // We have a valid stack pointer, it should point to something like:

+ // [ saved frame pointer ] [ return address ] [ arguments ... ]

+ cb->mov(ESI, EAX);

+ cb->lodsd(); // Get the new stack pointer to follow in EAX

+ cb->movsd(); // Copy the return address to the log area.

+ cb->loop(&unwind_loop);

+ cb->bind(&bail);

+ // If we did managed to unwind to the max, fill the rest with 0 (really

+ // we just want to inc EDI to the end, and this is an easy way).

+ cb->mov_imm(EAX, 0); // TODO use an xor

+ cb->rep(); cb->stosd();

+ }

+ // Store the type for this record entry.

+ cb->mov_imm(EAX, rt);

+ cb->stosd();

+ cb->pop(ESI);

+ cb->pop(ECX);

+ cb->pop(EDX);

+ cb->pop(EAX);

+ }

+ void PatchLoader() {

+ static const EventRecordType kRecordType = EVENT_TYPE_LDR;

+ static const char* kFuncName = "ntdll!DebugPrint";

+ static const int kStubOffset = kOffLdrCode;

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ // Set ShowSnaps to one to get the print routines to be called.

+ char enabled = 1;

+ WriteProcessMemory(

+ proc_, resolver_.Resolve("ntdll!ShowSnaps"), &enabled, 1, NULL);

+ CodeBuffer cb(buf_ + kStubOffset);

+ cb.pop(EDX); // return address

+ cb.pop(EAX); // First param in eax

+ cb.push(ESI);

+ cb.push(EDI);

+ cb.push(EDX);

+ cb.mov(ESI, EAX); // ESI points at the string structure.

+ // We used to do variable length based on the length supplied in the str

+ // structure, but it's easier (and sloppier) to just copy a fixed amount.

+ AssembleHeaderCode(&cb, kRecordType, kLdrBufSize);

+ cb.lodsd(); // Load the character count

+ cb.lodsd(); // Load the char*

+ cb.mov(ESI, EAX);

+ cb.mov_imm(ECX, kLdrBufSize / 4); // load the char count as the rep count

+ cb.rep(); cb.movsb(); // Copy the string to the logging buffer

+ // Return

+ cb.pop(EDX);

+ cb.pop(EDI);

+ cb.pop(ESI);

+ cb.pop(ECX); // don't care

+ cb.jmp(EDX);

+ }

+ void PatchCreateThread() {

+ static const EventRecordType kRecordType = EVENT_TYPE_CREATETHREAD;

+ static const char* kFuncName =

+ options_.vista() ? "ntdll!NtCreateThreadEx" : "ntdll!NtCreateThread";

+ static const int kStubOffset = kOffCreateThreadCode;

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ CodeBuffer cb(buf_ + kStubOffset);

+ cb.push(EDI);

+ cb.push(ESI);

+ AssembleHeaderCode(&cb, kRecordType, 8);

+ cb.mov(EAX, Operand(ESP, 0x18 + 8));

+ // Super ugly hack. To coorrelate between creating a thread and the new

+ // thread running, we stash something to identify the creating event when

+ // we log the created event. We just use a pointer to the event log data

+ // since this will be unique and can tie the two events together. We pass

+ // it by writing into the context structure, so it will be passed in ESI.

+ cb.add_imm(EAX, 0xa0);

+ cb.push(EDI);

+ cb.mov(EDI, EAX);

+ cb.pop(EAX);

+ cb.push(EAX);

+ cb.stosd();

+ // Get and save CONTEXT.Eip

+ cb.mov(ESI, EDI);

+ cb.add_imm(ESI, 20);

+ cb.pop(EDI);

+ cb.mov(EAX, EDI);

+ cb.stosd(); // Record the event identifier to tie together the events.

+ cb.movsd(); // write Eip to the log event

+ cb.pop(ESI);

+ cb.pop(EDI);

+ ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);

+ }

+ void PatchThreadBegin() {

+ static const EventRecordType kRecordType = EVENT_TYPE_THREADBEGIN;

+ static const char* kFuncName = "ntdll!CsrNewThread";

+ static const int kStubOffset = kOffThreadCode;

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ CodeBuffer cb(buf_ + kStubOffset);

+ cb.push(EDI);

+ AssembleHeaderCode(&cb, kRecordType, 8);

+ cb.mov(EAX, ESI); // We stashed the creator's eventid in the context ESI.

+ cb.stosd();

+ // TODO(deanm): The pointer is going to point into the CRT or something,

+ // should we dig deeper to get more information about the real entry?

+ cb.mov(EAX, Operand(EBP, 0x8));

+ cb.stosd();

+ cb.pop(EDI);

+ ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);

+ }

+ void PatchThreadBeginVista() {

+ static const EventRecordType kRecordType = EVENT_TYPE_THREADBEGIN;

+ static const char* kFuncName = "ntdll!_RtlUserThreadStart";

+ static const int kStubOffset = kOffThreadCode;

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ CodeBuffer cb(buf_ + kStubOffset);

+ cb.push(EDI);

+ AssembleHeaderCode(&cb, kRecordType, 8);

+ cb.mov(EAX, ESI); // We stashed the creator's eventid in the context ESI.

+ cb.stosd();

+ // TODO(deanm): The pointer is going to point into the CRT or something,

+ // should we dig deeper to get more information about the real entry?

+ //cb.mov(EAX, Operand(EBP, 0x8));

+ cb.mov_imm(EAX, 0);

+ cb.stosd();

+ cb.pop(EDI);

+ ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);

+ }

+ // Intercept exception dispatching so we can catch when threads set a thread

+ // name (which is an exception with a special code). TODO it could be

+ // useful to log all exceptions.

+ void PatchSetThreadName() {

+ static const EventRecordType kRecordType = EVENT_TYPE_THREADNAME;

+ static const char* kFuncName = "ntdll!RtlDispatchException";

+ static const int kStubOffset = kOffExpCode;

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ CodeBuffer cb(buf_ + kStubOffset);

+ cb.pop(EDX); // return address

+ cb.pop(EAX); // ExceptionRecord

+ cb.push(EAX);

+ cb.push(EDX);

+ cb.push(ESI);

+ cb.mov(ESI, EAX);

+ cb.lodsd();

+ Label bail;

+ // exception code

+ cb.cmp_imm(EAX, 0x406D1388);

+ cb.jcc(not_equal, &bail);

+ cb.push(EDI);

+ AssembleHeaderCode(&cb, kRecordType, kThreadNameBufSize);

+ // Fetch the second parameter.

+ for (int i = 0; i < 6; ++i) {

+ cb.lodsd();

+ }

+ // TODO This is sloppy and we could run into unmapped memory...

+ cb.mov(ESI, EAX);

+ cb.mov_imm(ECX, kThreadNameBufSize / 4);

+ cb.rep(); cb.movsd();

+ cb.pop(EDI);

+ cb.bind(&bail);

+ cb.pop(ESI);

+ ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);

+ }

+ void PatchThreadExit() {

+ static const EventRecordType kRecordType = EVENT_TYPE_THREADEXIT;

+ static const char* kFuncName = "ntdll!LdrShutdownThread";

+ static const int kStubOffset = kOffThreadExitCode;

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ CodeBuffer cb(buf_ + kStubOffset);

+ cb.push(EDI);

+ AssembleHeaderCode(&cb, kRecordType, 0);

+ cb.pop(EDI);

+ ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);

+ }

+ void PatchAllocateHeap() {

+ static const EventRecordType kRecordType = EVENT_TYPE_ALLOCHEAP;

+ static const char* kFuncName = "ntdll!RtlAllocateHeap";

+ static const int kStubOffset = kOffAllocHeapCode;

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ CodeBuffer cb(buf_ + kStubOffset);

+ cb.push(EDI);

+ cb.push(ESI);

+ AssembleHeaderCode(&cb, kRecordType, 12);

+ cb.mov(ESI, ESP);

+ cb.add_imm(ESI, 12); // Skip over our saved and the return address

+ cb.movsd(); cb.movsd(); cb.movsd(); // Copy the 3 parameters

+ cb.pop(ESI);

+ cb.pop(EDI);

+ ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);

+ }

+ void PatchFreeHeap() {

+ static const EventRecordType kRecordType = EVENT_TYPE_FREEHEAP;

+ static const char* kFuncName = "ntdll!RtlFreeHeap";

+ static const int kStubOffset = kOffFreeHeapCode;

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ CodeBuffer cb(buf_ + kStubOffset);

+ cb.push(EDI);

+ cb.push(ESI);

+ AssembleHeaderCode(&cb, kRecordType, 12);

+ cb.mov(ESI, ESP);

+ cb.add_imm(ESI, 12); // Skip over our saved and the return address

+ cb.movsd(); cb.movsd(); cb.movsd(); // Copy the 3 parameters

+ cb.pop(ESI);

+ cb.pop(EDI);

+ ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);

+ }

+ // Don't even bother going back to the original code, just implement our

+ // own KiFastSystemCall. The original looks like:

+ // .text:7C90EB8B mov edx, esp

+ // .text:7C90EB8D sysenter

+ // .text:7C90EB8F nop

+ // .text:7C90EB90 nop

+ // .text:7C90EB91 nop

+ // .text:7C90EB92 nop

+ // .text:7C90EB93 nop

+ void PatchSyscall() {

+ static const EventRecordType kRecordType = EVENT_TYPE_SYSCALL;

+ static const char* kFuncName = "ntdll!KiFastSystemCall";

+ static const int kStubOffset = kOffSyscallCode;

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ {

+ CodeBuffer cb(buf_ + kStubOffset);

+ Label skip;

+ // Skip 0xa5 which is QueryPerformanceCounter, to make sure we don't log

+ // our own logging's QPC. Disabled for now, using ret addr check...

+ // cb.cmp_imm(EAX, 0xa5);

+ // cb.jcc(equal, &skip);

+ // Check if the return address is from 0x6666 (our code region).

+ // 66817C24066666 cmp word [esp+0x6],0x6666

+ cb.emit(0x66); cb.emit(0x81); cb.emit(0x7C);

+ cb.emit(0x24); cb.emit(0x06); cb.emit(0x66); cb.emit(0x66);

+ cb.jcc(equal, &skip);

+ // This is all a bit shit. Originally I thought I could store some state

+ // on the stack above ESP, however, it seems that when APCs, etc are

+ // queued, they will use the stack above ESP. Well, not above ESP, above

+ // what was passed in as EDX into the systemcall, not matter if ESP was

+ // different than this :(. So we need to store our state in the event

+ // log record, and then we stick a pointer to that over a ret addr...

+ // Our stack starts like:

+ // [ ret addr ] [ ret addr 2 ] [ arguments ]

+ // We will update it to look like

+ // [ ret stub addr ] [ event entry ptr ] [ arguments ]

+ cb.push(EDI); // save EDI since we're using it

+ AssembleHeaderCode(&cb, kRecordType, 16 + 16 + 8);

+ cb.mov(EDX, EAX); // Save EAX...

+ cb.stosd(); // eax is the syscall number

+ cb.pop(EAX);

+ cb.stosd(); // store the saved EDI

+ cb.pop(EAX);

+ cb.stosd(); // store the real return address

+ cb.pop(EAX);

+ cb.stosd(); // store the real (secondary) return address;

+ cb.push(ESI);

+ cb.mov(ESI, ESP);

+ cb.lodsd();

+ cb.movsd(); // argument 1

+ cb.movsd(); // argument 2

+ cb.movsd(); // argument 3

+ cb.pop(ESI);

+ cb.push(EDI); // store our event ptr over the secondary ret addr.

+ cb.push_imm(reinterpret_cast<int>(remote_addr_ + kOffSyscallCode + 200));

+ cb.mov(EAX, EDX); // restore EAX

+ cb.bind(&skip);

+ cb.mov(EDX, ESP);

+ cb.sysenter();

+ if (cb.size() > 200) {

+ NOTREACHED("code too big: %d", cb.size());

+ }

+ {

+ CodeBuffer cb(buf_ + kStubOffset + 200);

+ // TODO share the QPC code, this is a copy and paste...

+ cb.pop(EDI); // get the log area

+ cb.stosd(); // Log the system call return value.

+ // QPC will clobber EAX, and it's very important to save it since it

+ // is the return value from the system call. TODO validate if there is

+ // anything else we need to save...

+ cb.push(EAX);

+ AssembleQueryPerformanceCounter(&cb);

+ cb.pop(EAX);

+ // We need to:

+ // - Restore the original "seconary" return address

+ // - Restore the original value of the EDI register

+ // - Jump control flow to the original return address

+ // All 3 of these values are stored in the log record...

+ // [ syscall num ] [ saved edi ] [ real rets ] [ args ] [ retval ] [ ts ]

+ // currently edi points here ----^

+ cb.push(Operand(EDI, -4 - 16)); // push the real 2nd ret

+ cb.push(Operand(EDI, -8 - 16)); // push the real ret

+ cb.push(Operand(EDI, -12 - 16)); // push the saved EDI

+ cb.pop(EDI); // restore EDI that was saved in the record

+ cb.ret(); // jmp back to the real ret ...

+ if (cb.size() > 56) {

+ NOTREACHED("ug");

+ }

+ // Patch lock (criticial section) holding.

+ void PatchEnterCriticalSection() {

+ static const EventRecordType kRecordType = EVENT_TYPE_ENTER_CS;

+ static const char* kFuncName = "ntdll!RtlEnterCriticalSection";

+ static const int kStubOffset = kOffEnterCritSecCode;

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ // We just want to capture the return address and original argument, so

+ // we know when EnterCriticalSection returned, we don't want to know when

+ // it entered because it could sit waiting. We want to know when the lock

+ // actually started being held. The compiler will sometimes generated code

+ // that overwrites arguments, so we'll keep a copy of the argument just in

+ // case code like this is ever generated in the future. TODO is it enough

+ // to just assume a LPCRITICAL_SECTION uniquely identifies the lock, or

+ // can the same lock have multiple different copies, I would assume not.

+ {

+ CodeBuffer cb(buf_ + kStubOffset);

+ // Set up an additional frame so that we capture the return.

+ // TODO use memory instructions instead of using registers.

+ cb.pop(EAX); // return address

+ cb.pop(EDX); // first argument (critical section pointer)

+ cb.push(EDX);

+ cb.push(EAX);

+ cb.push(EDX);

+ cb.push_imm(

+ reinterpret_cast<int>(remote_addr_ + kStubOffset + 40));

+ ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);

+ CHECK(cb.size() < 40);

+ }

+ {

+ CodeBuffer cb(buf_ + kStubOffset + 40);

+ cb.push(ESI);

+ cb.mov(ESI, ESP);

+ cb.push(EAX);

+ cb.push(EDI);

+ AssembleHeaderCode(&cb, kRecordType, 4);

+ cb.lodsd(); // Skip over our saved ESI

+ cb.lodsd(); // Skip over the return address

+ cb.movsd(); // Write the CRITICAL_SECTION* to the event record.

+ cb.pop(EDI);

+ cb.pop(EAX);

+ cb.pop(ESI);

+ cb.ret(0x04);

+ }

+ void PatchTryEnterCriticalSection() {

+ static const EventRecordType kRecordType = EVENT_TYPE_TRYENTER_CS;

+ static const char* kFuncName = "ntdll!RtlTryEnterCriticalSection";

+ static const int kStubOffset = kOffTryEnterCritSecCode;

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ {

+ CodeBuffer cb(buf_ + kStubOffset);

+ // Set up an additional frame so that we capture the return.

+ // TODO use memory instructions instead of using registers.

+ cb.pop(EAX); // return address

+ cb.pop(EDX); // first argument (critical section pointer)

+ cb.push(EDX);

+ cb.push(EAX);

+ cb.push(EDX);

+ cb.push_imm(reinterpret_cast<int>(remote_addr_ + kStubOffset + 40));

+ ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);

+ CHECK(cb.size() < 40);

+ }

+ {

+ CodeBuffer cb(buf_ + kStubOffset + 40);

+ cb.push(ESI);

+ cb.mov(ESI, ESP);

+ cb.push(EDI);

+ cb.push(EAX);

+ AssembleHeaderCode(&cb, kRecordType, 8);

+ cb.lodsd(); // Skip over our saved ESI

+ cb.lodsd(); // Skip over the return address

+ cb.movsd(); // Write the CRITICAL_SECTION* to the event record.

+ cb.pop(EAX);

+ cb.stosd(); // Write the return value to the event record.

+ cb.pop(EDI);

+ cb.pop(ESI);

+ cb.ret(0x04);

+ }

+ void PatchLeaveCriticalSection() {

+ static const EventRecordType kRecordType = EVENT_TYPE_LEAVE_CS;

+ static const char* kFuncName = "ntdll!RtlLeaveCriticalSection";

+ static const int kStubOffset = kOffLeaveCritSecCode;

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ CodeBuffer cb(buf_ + kStubOffset);

+ // TODO use memory instructions instead of using registers.

+ cb.pop(EDX); // return address

+ cb.pop(EAX); // first argument (critical section pointer)

+ cb.push(EAX);

+ cb.push(EDX);

+ cb.push(EDI);

+ AssembleHeaderCode(&cb, kRecordType, 4);

+ cb.stosd(); // Write the CRITICAL_SECTION* to the event record.

+ cb.pop(EDI);

+ ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);

+ }

+ // Patch APC dispatching. This is a bit hacky, since the return to kernel

+ // mode is done with NtContinue, we have to shim in a stub return address to

+ // catch when the callback is finished. It is probably a bit fragile.

+ void PatchApcDispatcher() {

+ static const EventRecordType kRecordType = EVENT_TYPE_APC;

+ static const char* kFuncName = "ntdll!KiUserApcDispatcher";

+ static const int kStubOffset = kOffApcDispCode;

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ {

+ CodeBuffer cb(buf_ + kStubOffset);

+ // We don't really need to preserve these since we're the first thing

+ // executing from the kernel dispatch, but yeah, it is good practice.

+ cb.push(EDI);

+ cb.push(EAX);

+ AssembleHeaderCode(&cb, kRecordType, 4 + 4 + 8);

+ cb.mov_imm(EAX, reinterpret_cast<int>(remote_addr_ + kStubOffset + 140));

+ cb.xchg(EAX, Operand(ESP, 8)); // Swap the callback address with ours.

+ cb.stosd(); // Store the original callback function address.

+ // TODO for now we're lazy and depend that ESI will be preserved, and we

+ // use it to store the pointer into our log record. EDI isn't preserved.

+ cb.mov(ESI, EDI);

+ cb.pop(EAX);

+ cb.pop(EDI);

+ ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);

+ CHECK(cb.size() < 140);

+ }

+ {

+ CodeBuffer cb(buf_ + kStubOffset + 140);

+ // This is our shim, we need to call the original callback function, then

+ // we can catch the return and log when it was completed.

+ cb.pop(EAX); // The real return address, safe to use EAX w/ the ABI?

+ cb.push(EDI);

+ cb.mov(EDI, ESI);

+ cb.stosd(); // Store the real return address, we'll need it.

+ cb.add_imm(ESI, -4);

+ cb.lodsd(); // Load the real callback address.

+ cb.mov(ESI, EDI);

+ cb.pop(EDI);

+ cb.call(EAX); // Call the original callback address.

+ cb.push(EAX);

+ cb.push(EDI);

+ cb.mov(EDI, ESI);

+ AssembleQueryPerformanceCounter(&cb);

+ cb.pop(EDI);

+ cb.pop(EAX);

+ cb.push(Operand(ESI, -4)); // Push the real return address.

+ cb.ret(); // Return back to the APC Dispatcher.

+ CHECK(cb.size() < 50);

+ }

+ // We need to hook into process shutdown for two reasons. Most importantly,

+ // we need to copy the playground back from the process before the address

+ // space goes away. We could avoid this with shared memory, however, there

+ // is a reason two. In order to capture symbols for all of the libraries

+ // loaded into arbitrary applications, on shutdown we do an instrusive load

+ // of symbols into the traced process.

+ //

+ // ntdll!LdrShutdownProcess

+ // - NtSetEvent(event, 0);

+ // - NtWaitForSingleObject(event, FALSE, NULL);

+ // - jmp back

+ void PatchExit(HANDLE exiting, HANDLE exited) {

+ static const EventRecordType kRecordType = EVENT_TYPE_PROCESSEXIT;

+ static const char* kFuncName = "ntdll!LdrShutdownProcess";

+ static const int kStubOffset = kOffExitCode;

+ HANDLE rexiting, rexited;

+ if (!DuplicateHandle(::GetCurrentProcess(),

+ exiting,

+ proc_,

+ &rexiting,

+ 0,

+ FALSE,

+ DUPLICATE_SAME_ACCESS)) {

+ NOTREACHED("");

+ }

+ if (!DuplicateHandle(::GetCurrentProcess(),

+ exited,

+ proc_,

+ &rexited,

+ 0,

+ FALSE,

+ DUPLICATE_SAME_ACCESS)) {

+ NOTREACHED("");

+ }

+ std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);

+ CodeBuffer cb(buf_ + kStubOffset);

+ cb.push(EDI);

+ AssembleHeaderCode(&cb, kRecordType, 0);

+ cb.pop(EDI);

+ // NtSetEvent(exiting, 0);

+ cb.push_imm(0);

+ cb.push_imm(reinterpret_cast<int>(rexiting));

+ cb.mov_imm(EAX, reinterpret_cast<int>(

+ resolver_.Resolve("ntdll!NtSetEvent")));

+ cb.call(EAX);

+ // NtWaitForSingleObject(exited, FALSE, INFINITE);

+ cb.push_imm(0);

+ cb.push_imm(reinterpret_cast<int>(rexited));

+ cb.mov_imm(EAX, reinterpret_cast<int>(

+ resolver_.Resolve("ntdll!NtWaitForSingleObject")));

+ cb.call(EAX);

+ ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);

+ }

+ void Patch() {

+ //PatchCreateThread();

+ if (options_.vista()) {

+ PatchThreadBeginVista();

+ } else {

+ PatchThreadBegin();

+ }

+ PatchThreadExit();

+ PatchSetThreadName();

+#if 0

+ // FIXME

+ PatchSyscall();

+#endif

+ PatchApcDispatcher();

+ // The loader logging needs to be improved a bit to really be useful.

+ //PatchLoader();

+ // These are interesting, but will collect a ton of data:

+ if (options_.log_heap()) {

+ PatchAllocateHeap();

+ PatchFreeHeap();

+ }

+ if (options_.log_lock()) {

+ PatchEnterCriticalSection();

+ PatchTryEnterCriticalSection();

+ PatchLeaveCriticalSection();

+ }

+ // Dump the event records from the playground to stdout in a JSON format.

+ // TODO: Drop RDTSCNormalizer, it was from old code that tried to use the

+ // rdtsc counters from the CPU, and this required a bunch of normalization

+ // to account for non-syncronized timestamps across different cores, etc.

+ void DumpJSON(RDTSCNormalizer* rdn, SymResolver* res) {

+ int pos = kOffLogAreaPtr;

+ int i = IntAt(pos);

+ pos += 4;

+ std::map<int, const char*> syscalls = CreateSyscallMap();

+ printf("parseEvents([\n");

+ for (int end = pos + i; pos < end; ) {

+ printf("{\n");

+ __int64 ts = Int64At(pos);

+ pos += 8;

+ void* cpuid = reinterpret_cast<void*>(IntAt(pos));

+ pos += 4;

+ printf("'ms': %f,\n", rdn->MsFromStart(cpuid, ts));

+ printf("'cpu': 0x%x,\n'thread': 0x%x,\n", cpuid, IntAt(pos));

+ pos += 4;

+ if (options_.stack_unwind_depth() > 0) {

+ printf("'stacktrace': [\n");

+ for (int i = 0; i < options_.stack_unwind_depth(); ++i) {

+ int retaddr = IntAt(pos + (i * 4));

+ if (!retaddr)

+ break;

+ printf(" [ 0x%x, %s ],\n",

+ retaddr,

+ res ? JSONString(res->Unresolve(retaddr)).c_str() : "\"\"");

+ }

+ printf("],\n");

+ pos += (options_.stack_unwind_depth() * 4);

+ }

+ EventRecordType rt = static_cast<EventRecordType>(IntAt(pos));

+ pos += 4;

+ switch (rt) {

+ case EVENT_TYPE_LDR:

+ {

+ printf("'eventtype': 'EVENT_TYPE_LDR',\n");

+ std::string str(&buf_[pos], kLdrBufSize);

+ str = str.substr(0, str.find('\0'));

+ printf("'ldrinfo': %s,\n", JSONString(str).c_str());

+ pos += kLdrBufSize;

+ break;

+ }

+ case EVENT_TYPE_CREATETHREAD:

+ {

+ printf("'eventtype': 'EVENT_TYPE_CREATETHREAD',\n"

+ "'eventid': 0x%x,\n"

+ "'startaddr': 0x%x,\n",

+ IntAt(pos), IntAt(pos+4));

+ pos += 8;

+ break;

+ }

+ case EVENT_TYPE_THREADBEGIN:

+ {

+ printf("'eventtype': 'EVENT_TYPE_THREADBEGIN',\n"

+ "'parenteventid': 0x%x,\n"

+ "'startaddr': 0x%x,\n",

+ IntAt(pos), IntAt(pos+4));

+ pos += 8;

+ break;

+ }

+ case EVENT_TYPE_THREADNAME:

+ {

+ std::string str(&buf_[pos], kThreadNameBufSize);

+ str = str.substr(0, str.find('\0'));

+ printf("'eventtype': 'EVENT_TYPE_THREADNAME',\n"

+ "'threadname': %s,\n",

+ JSONString(str).c_str());

+ pos += kThreadNameBufSize;

+ break;

+ }

+ case EVENT_TYPE_PROCESSEXIT:

+ {

+ printf("'eventtype': 'EVENT_TYPE_PROCESSEXIT',\n");

+ break;

+ }

+ case EVENT_TYPE_THREADEXIT:

+ {

+ printf("'eventtype': 'EVENT_TYPE_THREADEXIT',\n");

+ break;

+ }

+ case EVENT_TYPE_ALLOCHEAP:

+ {

+ printf("'eventtype': 'EVENT_TYPE_ALLOCHEAP',\n"

+ "'heaphandle': 0x%x,\n"

+ "'heapflags': 0x%x,\n"

+ "'heapsize': %d,\n",

+ IntAt(pos), IntAt(pos+4), IntAt(pos+8));

+ pos += 12;

+ break;

+ }

+ case EVENT_TYPE_FREEHEAP:

+ {

+ printf("'eventtype': 'EVENT_TYPE_FREEHEAP',\n"

+ "'heaphandle': 0x%x,\n"

+ "'heapflags': 0x%x,\n"

+ "'heapptr': %d,\n",

+ IntAt(pos), IntAt(pos+4), IntAt(pos+8));

+ pos += 12;

+ break;

+ }

+ case EVENT_TYPE_SYSCALL:

+ {

+ int syscall = IntAt(pos);

+ printf("'eventtype': 'EVENT_TYPE_SYSCALL',\n"

+ "'syscall': 0x%x,\n", syscall);

+ pos += 16;

+ printf("'syscallargs': [\n");

+ for (int i = 0; i < 3; ++i) {

+ printf(" 0x%x,\n", IntAt(pos));

+ pos += 4;

+ }

+ printf("],\n");

+ printf("'retval': 0x%x,\n"

+ "'done': %f,\n",

+ IntAt(pos), rdn->MsFromStart(0, Int64At(pos+4)));

+ pos += 12;

+ if (syscalls.count(syscall) == 1) {

+ std::string sname = syscalls[syscall];

+ printf("'syscallname': %s,\n",

+ JSONString(sname).c_str());

+ // Mark system calls that we should consider "waiting" system

+ // calls, where we are not actually active.

+ if (sname.find("WaitFor") != std::string::npos ||

+ sname.find("RemoveIoCompletion") != std::string::npos) {

+ printf("'waiting': 1,\n");

+ }

+ break;

+ }

+ case EVENT_TYPE_ENTER_CS:

+ {

+ printf("'eventtype': 'EVENT_TYPE_ENTER_CS',\n"

+ "'critical_section': 0x%x,\n", IntAt(pos));

+ pos += 4;

+ break;

+ }

+ case EVENT_TYPE_TRYENTER_CS:

+ {

+ printf("'eventtype': 'EVENT_TYPE_TRYENTER_CS',\n"

+ "'critical_section': 0x%x,\n"

+ "'retval': 0x%x,\n",

+ IntAt(pos), IntAt(pos+4));

+ pos += 8;

+ break;

+ }

+ case EVENT_TYPE_LEAVE_CS:

+ {

+ printf("'eventtype': 'EVENT_TYPE_LEAVE_CS',\n"

+ "'critical_section': 0x%x,\n", IntAt(pos));

+ pos += 4;

+ break;

+ }

+ case EVENT_TYPE_APC:

+ {

+ int func_addr = IntAt(pos);

+ printf("'eventtype': 'EVENT_TYPE_APC',\n"

+ "'func_addr': 0x%x,\n"

+ "'func_addr_name': %s,\n"

+ "'ret_addr': 0x%x,\n"

+ "'done': %f,\n",

+ func_addr,

+ res ? JSONString(res->Unresolve(func_addr)).c_str() : "\"\"",

+ IntAt(pos+4), rdn->MsFromStart(0, Int64At(pos+8)));

+ pos += 16;

+ break;

+ }

+ default:

+ NOTREACHED("Unknown event type: %d", rt);

+ break;

+ }

+ printf("},\n");

+ }

+ printf("]);");

+ }

+ int IntAt(int pos) { return *reinterpret_cast<int*>(&buf_[pos]); }

+ __int64 Int64At(int pos) { return *reinterpret_cast<__int64*>(&buf_[pos]); }

+ private:

+ // Handle the process we install into or read back from.

+ HANDLE proc_;

+ // The address where we will keep our playground in the remote process.

+ char* remote_addr_;

+ // Lookup addresses from symbol names for ntdll.dll.

+ SymResolver resolver_;

+ Options options_;

+ // A local copy of the playground data, we copy it into the remote process.

+ char buf_[kPlaygroundSize];

+};

+int main(int argc, char** argv) {

+ std::string command_line;

+ bool use_symbols = false;

+ bool attaching = false;

+ bool launched = false;

+ bool manual_quit = false;

+ Playground::Options options;

+ PROCESS_INFORMATION info = {0};

+ argc--; argv++;

+ while (argc > 0) {

+ if (std::string("--symbols") == argv[0]) {

+ use_symbols = true;

+ } else if (std::string("--vista") == argv[0]) {

+ options.set_vista(true);

+ } else if (std::string("--log-heap") == argv[0]) {

+ options.set_log_heap(true);

+ } else if (std::string("--log-lock") == argv[0]) {

+ options.set_log_lock(true);

+ } else if (std::string("--manual-quit") == argv[0]) {

+ manual_quit = true;

+ } else if (argc >= 2 && std::string("--unwind") == argv[0]) {

+ options.set_stack_unwind_depth(atoi(argv[1]));

+ argc--; argv++;

+ } else if (argc >= 2 && !launched && std::string("--attach") == argv[0]) {

+ attaching = true;

+ info.hProcess = OpenProcess(PROCESS_ALL_ACCESS, FALSE, atoi(argv[1]));

+ launched = true;

+ argc--; argv++;

+ } else if (!launched) {

+ STARTUPINFO start_info = {0};

+ start_info.cb = sizeof(start_info);

+ if (!CreateProcessA(NULL,

+ argv[0],

+ NULL,

+ FALSE,

+ CREATE_SUSPENDED,

+ NULL,

+ &start_info,

+ &info)) {

+ NOTREACHED("Failed to launch \"%s\": %d\n", argv[0], GetLastError());

+ return 1;

+ }

+ launched = true;

+ } else {

+ NOTREACHED("error parsing command line.");

+ }

+ argc--; argv++;

+ }

+ if (!launched) {

+ printf("usage: traceline.exe \"app.exe my arguments\"\n"

+ " --attach 123: buggy support for attaching to a process\n"

+ " --unwind 16: unwind the stack to the specified max depth\n"

+ " --symbols: use symbols for stacktraces\n"

+ " --log-heap: log heap operations (alloc / free).\n"

+ " --log-lock: log lock (critical section) operations.\n");

+ return 1;

+ }

+ HANDLE exiting = CreateEvent(NULL, FALSE, FALSE, NULL);

+ HANDLE exited = CreateEvent(NULL, FALSE, FALSE, NULL);

+ // The playground object is big (32MB), dynamically alloc.

+ Playground* pg = new Playground(info.hProcess, options);

+ pg->AllocateInRemote();

+ pg->Patch();

+ pg->PatchExit(exiting, exited);

+ pg->CopyToRemote();

+ RDTSCNormalizer rdn;

+ rdn.Start();

+ if (!attaching)

+ ResumeThread(info.hThread);

+ // Wait until we have been notified that it's exiting.

+ if (manual_quit) {

+ printf("Press enter when you want to collect.\n");

+ getchar();

+ } else {

+ HANDLE whs[] = {exiting, info.hProcess};

+ if (WaitForMultipleObjects(2, whs, FALSE, INFINITE) != WAIT_OBJECT_0) {

+ NOTREACHED("Failed to correctly capture process shutdown.");

+ }

+ pg->CopyFromRemote();

+ if (use_symbols) {

+ // Break in and get the symbols...

+ SymResolver res(NULL, info.hProcess);

+ pg->DumpJSON(&rdn, &res);

+ } else {

+ pg->DumpJSON(&rdn, NULL);

+ }

+ // Notify that it can exit now, we are done.

+ SetEvent(exited);

+ CloseHandle(info.hProcess);

+ CloseHandle(info.hThread);

+ delete pg;

« no previous file with comments | « tools/traceline/traceline/logging.h ('k') | tools/traceline/traceline/rdtsc.h » ('j') | no next file with comments »