Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(351)

Side by Side Diff: src/IceGlobalContext.cpp

Issue 1147023007: Subzero: Basic Block Profiler. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Adds Basic Block Profiling. Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===// 1 //===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file defines aspects of the compilation that persist across 10 // This file defines aspects of the compilation that persist across
11 // multiple functions. 11 // multiple functions.
12 // 12 //
13 //===----------------------------------------------------------------------===// 13 //===----------------------------------------------------------------------===//
14 14
15 #include <ctype.h> // isdigit(), isupper() 15 #include <ctype.h> // isdigit(), isupper()
16 #include <locale> // locale 16 #include <locale> // locale
17 #include <unordered_map> 17 #include <unordered_map>
18 18
19 #include "llvm/Support/Timer.h" 19 #include "llvm/Support/Timer.h"
20 20
21 #include "IceCfg.h" 21 #include "IceCfg.h"
22 #include "IceCfgNode.h"
22 #include "IceClFlags.h" 23 #include "IceClFlags.h"
23 #include "IceDefs.h" 24 #include "IceDefs.h"
24 #include "IceELFObjectWriter.h" 25 #include "IceELFObjectWriter.h"
25 #include "IceGlobalContext.h" 26 #include "IceGlobalContext.h"
26 #include "IceGlobalInits.h" 27 #include "IceGlobalInits.h"
27 #include "IceOperand.h" 28 #include "IceOperand.h"
28 #include "IceTargetLowering.h" 29 #include "IceTargetLowering.h"
29 #include "IceTimerTree.h" 30 #include "IceTimerTree.h"
30 #include "IceTypes.h" 31 #include "IceTypes.h"
31 32
(...skipping 216 matching lines...) Expand 10 before | Expand all | Expand 10 after
248 switch (Flags.getOutFileType()) { 249 switch (Flags.getOutFileType()) {
249 case FT_Elf: 250 case FT_Elf:
250 ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr)); 251 ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr));
251 break; 252 break;
252 case FT_Asm: 253 case FT_Asm:
253 case FT_Iasm: 254 case FT_Iasm:
254 break; 255 break;
255 } 256 }
256 } 257 }
257 258
259 namespace {
260 void addCallToProfileSummary(Cfg *Func) {
261 auto *Context = Func->getContext();
262 auto *ProfileSummarySym = Context->getConstantExternSym("profile_summary");
263 constexpr bool HasTailCall = false;
Jim Stichnoth 2015/06/08 23:45:32 We've been using "const bool ..." instead of const
John 2015/06/09 15:36:18 constexpr is a compile time constant, and the comp
264 auto *Call =
265 InstCall::create(Func, 0, nullptr, ProfileSummarySym, HasTailCall);
266 Func->getEntryNode()->getInsts().push_front(Call);
267 }
268 } // end of namespace
Jim Stichnoth 2015/06/08 23:45:31 end of anonymous namespace
John 2015/06/09 15:36:19 Done.
269
258 void GlobalContext::translateFunctions() { 270 void GlobalContext::translateFunctions() {
259 while (std::unique_ptr<Cfg> Func = optQueueBlockingPop()) { 271 while (std::unique_ptr<Cfg> Func = optQueueBlockingPop()) {
260 // Install Func in TLS for Cfg-specific container allocators. 272 // Install Func in TLS for Cfg-specific container allocators.
261 Cfg::setCurrentCfg(Func.get()); 273 Cfg::setCurrentCfg(Func.get());
262 // Reset per-function stats being accumulated in TLS. 274 // Reset per-function stats being accumulated in TLS.
263 resetStats(); 275 resetStats();
264 // Set verbose level to none if the current function does NOT 276 // Set verbose level to none if the current function does NOT
265 // match the -verbose-focus command-line option. 277 // match the -verbose-focus command-line option.
266 if (!matchSymbolName(Func->getFunctionName(), 278 if (!matchSymbolName(Func->getFunctionName(),
267 getFlags().getVerboseFocusOn())) 279 getFlags().getVerboseFocusOn()))
268 Func->setVerbose(IceV_None); 280 Func->setVerbose(IceV_None);
269 // Disable translation if -notranslate is specified, or if the 281 // Disable translation if -notranslate is specified, or if the
270 // current function matches the -translate-only option. If 282 // current function matches the -translate-only option. If
271 // translation is disabled, just dump the high-level IR and 283 // translation is disabled, just dump the high-level IR and
272 // continue. 284 // continue.
273 if (getFlags().getDisableTranslation() || 285 if (getFlags().getDisableTranslation() ||
274 !matchSymbolName(Func->getFunctionName(), 286 !matchSymbolName(Func->getFunctionName(),
275 getFlags().getTranslateOnly())) { 287 getFlags().getTranslateOnly())) {
276 Func->dump(); 288 Func->dump();
277 Cfg::setCurrentCfg(nullptr); 289 Cfg::setCurrentCfg(nullptr);
278 continue; // Func goes out of scope and gets deleted 290 continue; // Func goes out of scope and gets deleted
279 } 291 }
292
293 // ProfilerInits contains the VariableDeclarations for all the
Jim Stichnoth 2015/06/08 23:45:32 Would it be reasonable to move this new code into
John 2015/06/09 15:36:20 I thought about that in the beginning, but then I
294 // profiler-related symbols. If profiling is disabled, this array
295 // will be null.
296 std::unique_ptr<VariableDeclarationList> ProfilerInits;
297 if (getFlags().getEnableBlockProfile()) {
298 ProfilerInits = Func->profileBlocks();
299 if (matchSymbolName(Func->getFunctionName(), "exit")) {
Jim Stichnoth 2015/06/08 23:45:32 Does a normal return from main() lead to a call to
John 2015/06/09 15:36:19 It does, at least in the current library:
300 addCallToProfileSummary(Func.get());
301 }
302 }
303
280 Func->translate(); 304 Func->translate();
281 EmitterWorkItem *Item = nullptr; 305 EmitterWorkItem *Item = nullptr;
282 if (Func->hasError()) { 306 if (Func->hasError()) {
283 getErrorStatus()->assign(EC_Translation); 307 getErrorStatus()->assign(EC_Translation);
284 OstreamLocker L(this); 308 OstreamLocker L(this);
285 getStrError() << "ICE translation error: " << Func->getFunctionName() 309 getStrError() << "ICE translation error: " << Func->getFunctionName()
286 << ": " << Func->getError() << "\n"; 310 << ": " << Func->getError() << "\n";
287 Item = new EmitterWorkItem(Func->getSequenceNumber()); 311 Item = new EmitterWorkItem(Func->getSequenceNumber());
288 } else { 312 } else {
289 Func->getAssembler<>()->setInternal(Func->getInternal()); 313 Func->getAssembler<>()->setInternal(Func->getInternal());
(...skipping 12 matching lines...) Expand all
302 } break; 326 } break;
303 case FT_Asm: 327 case FT_Asm:
304 // The Cfg has not been emitted yet, so stats are not ready 328 // The Cfg has not been emitted yet, so stats are not ready
305 // to be dumped. 329 // to be dumped.
306 Item = new EmitterWorkItem(Func->getSequenceNumber(), Func.release()); 330 Item = new EmitterWorkItem(Func->getSequenceNumber(), Func.release());
307 break; 331 break;
308 } 332 }
309 } 333 }
310 Cfg::setCurrentCfg(nullptr); 334 Cfg::setCurrentCfg(nullptr);
311 assert(Item); 335 assert(Item);
336 Item->setProfilerInits(std::move(ProfilerInits));
312 emitQueueBlockingPush(Item); 337 emitQueueBlockingPush(Item);
313 // The Cfg now gets deleted as Func goes out of scope. 338 // The Cfg now gets deleted as Func goes out of scope.
314 } 339 }
315 } 340 }
316 341
317 namespace { 342 namespace {
318 343
344 VariableDeclaration *blockProfileInfo(const VariableDeclarationList &Globals) {
345 auto *Var = VariableDeclaration::create();
346 Var->setAlignment(8);
347 Var->setIsConstant(true);
348 Var->setName("block_profile_info");
349 Var->setLinkage(llvm::GlobalValue::ExternalLinkage);
350 for (const auto *Global : Globals) {
351 if (Cfg::isProfileGlobal(*Global)) {
352 Var->addInitializer(new VariableDeclaration::RelocInitializer(Global, 0));
Jim Stichnoth 2015/06/08 23:45:32 Can you "document" the 0, and the 8 a few lines do
John 2015/06/09 15:36:20 Done.
353 }
354 }
355
356 Var->addInitializer(new VariableDeclaration::ZeroInitializer(8));
357
358 return Var;
359 }
360
361 void addBlockProfileInfoArrayToGlobals(VariableDeclarationList *Globals) {
362 // Purposefully create the Var temp to prevent bugs in case the compilers
Jim Stichnoth 2015/06/08 23:45:32 compilers ==> compiler
John 2015/06/09 15:36:19 Done.
363 // reorders instructions in a way that Globals is extended before the call
364 // to profileInfoArray.
365 auto *Var = blockProfileInfo(*Globals);
366 Globals->push_back(Var);
367 }
368
319 void lowerGlobals(GlobalContext *Ctx, 369 void lowerGlobals(GlobalContext *Ctx,
320 std::unique_ptr<VariableDeclarationList> VariableDeclarations, 370 std::unique_ptr<VariableDeclarationList> VariableDeclarations,
321 TargetDataLowering *DataLowering) { 371 TargetDataLowering *DataLowering) {
322 TimerMarker T(TimerStack::TT_emitGlobalInitializers, Ctx); 372 TimerMarker T(TimerStack::TT_emitGlobalInitializers, Ctx);
323 const bool DumpGlobalVariables = ALLOW_DUMP && Ctx->getFlags().getVerbose() && 373 const bool DumpGlobalVariables = ALLOW_DUMP && Ctx->getFlags().getVerbose() &&
324 Ctx->getFlags().getVerboseFocusOn().empty(); 374 Ctx->getFlags().getVerboseFocusOn().empty();
325 if (DumpGlobalVariables) { 375 if (DumpGlobalVariables) {
326 OstreamLocker L(Ctx); 376 OstreamLocker L(Ctx);
327 Ostream &Stream = Ctx->getStrDump(); 377 Ostream &Stream = Ctx->getStrDump();
328 for (const Ice::VariableDeclaration *Global : *VariableDeclarations) { 378 for (const Ice::VariableDeclaration *Global : *VariableDeclarations) {
329 Global->dump(Ctx, Stream); 379 Global->dump(Ctx, Stream);
330 } 380 }
331 } 381 }
332 if (Ctx->getFlags().getDisableTranslation()) 382 if (Ctx->getFlags().getDisableTranslation())
333 return; 383 return;
384
385 // There should be no need to emit the block_profile_info array if profiling
386 // is disabled. In practice, given that szrt_profiler.o will always be embeded
Jim Stichnoth 2015/06/08 23:45:31 embedded
John 2015/06/09 15:36:18 Done.
387 // in the application, we need to add it. In a non-profiled build this array
388 // will only contain the nullptr terminator.
389 addBlockProfileInfoArrayToGlobals(VariableDeclarations.get());
390
334 DataLowering->lowerGlobals(std::move(VariableDeclarations)); 391 DataLowering->lowerGlobals(std::move(VariableDeclarations));
335 } 392 }
336 393
337 // Ensure Pending is large enough that Pending[Index] is valid. 394 // Ensure Pending is large enough that Pending[Index] is valid.
338 void resizePending(std::vector<EmitterWorkItem *> &Pending, uint32_t Index) { 395 void resizePending(std::vector<EmitterWorkItem *> &Pending, uint32_t Index) {
339 if (Index >= Pending.size()) 396 if (Index >= Pending.size())
340 Pending.resize(Index + 1); 397 Pending.resize(Index + 1);
341 } 398 }
342 399
400 void AddAllIfNotNull(std::unique_ptr<VariableDeclarationList> src,
Jim Stichnoth 2015/06/08 23:45:32 AddAllIfNotNull ==> addAllIfNotNull
John 2015/06/09 15:36:20 Done.
401 VariableDeclarationList *dst) {
402 if (src != nullptr) {
403 dst->insert(dst->end(), src->begin(), src->end());
404 }
405 }
406
343 } // end of anonymous namespace 407 } // end of anonymous namespace
344 408
345 void GlobalContext::emitItems() { 409 void GlobalContext::emitItems() {
346 const bool Threaded = !getFlags().isSequential(); 410 const bool Threaded = !getFlags().isSequential();
347 // Pending is a vector containing the reassembled, ordered list of 411 // Pending is a vector containing the reassembled, ordered list of
348 // work items. When we're ready for the next item, we first check 412 // work items. When we're ready for the next item, we first check
349 // whether it's in the Pending list. If not, we take an item from 413 // whether it's in the Pending list. If not, we take an item from
350 // the work queue, and if it's not the item we're waiting for, we 414 // the work queue, and if it's not the item we're waiting for, we
351 // insert it into Pending and repeat. The work item is deleted 415 // insert it into Pending and repeat. The work item is deleted
352 // after it is processed. 416 // after it is processed.
417 std::unique_ptr<VariableDeclarationList> GlobalInits(
418 new VariableDeclarationList());
353 std::vector<EmitterWorkItem *> Pending; 419 std::vector<EmitterWorkItem *> Pending;
354 uint32_t DesiredSequenceNumber = getFirstSequenceNumber(); 420 uint32_t DesiredSequenceNumber = getFirstSequenceNumber();
355 while (true) { 421 while (true) {
356 resizePending(Pending, DesiredSequenceNumber); 422 resizePending(Pending, DesiredSequenceNumber);
357 // See if Pending contains DesiredSequenceNumber. 423 // See if Pending contains DesiredSequenceNumber.
358 EmitterWorkItem *RawItem = Pending[DesiredSequenceNumber]; 424 EmitterWorkItem *RawItem = Pending[DesiredSequenceNumber];
359 if (RawItem == nullptr) 425 if (RawItem == nullptr)
360 RawItem = emitQueueBlockingPop(); 426 RawItem = emitQueueBlockingPop();
361 if (RawItem == nullptr) 427 if (RawItem == nullptr)
362 return; 428 break;
363 uint32_t ItemSeq = RawItem->getSequenceNumber(); 429 uint32_t ItemSeq = RawItem->getSequenceNumber();
364 if (Threaded && ItemSeq != DesiredSequenceNumber) { 430 if (Threaded && ItemSeq != DesiredSequenceNumber) {
365 resizePending(Pending, ItemSeq); 431 resizePending(Pending, ItemSeq);
366 Pending[ItemSeq] = RawItem; 432 Pending[ItemSeq] = RawItem;
367 continue; 433 continue;
368 } 434 }
369 435
370 std::unique_ptr<EmitterWorkItem> Item(RawItem); 436 std::unique_ptr<EmitterWorkItem> Item(RawItem);
371 ++DesiredSequenceNumber; 437 ++DesiredSequenceNumber;
372 switch (Item->getKind()) { 438 switch (Item->getKind()) {
373 case EmitterWorkItem::WI_Nop: 439 case EmitterWorkItem::WI_Nop:
374 break; 440 break;
375 case EmitterWorkItem::WI_GlobalInits: { 441 case EmitterWorkItem::WI_GlobalInits: {
376 lowerGlobals(this, Item->getGlobalInits(), 442 AddAllIfNotNull(Item->getGlobalInits(), GlobalInits.get());
377 TargetDataLowering::createLowering(this).get());
378 } break; 443 } break;
379 case EmitterWorkItem::WI_Asm: { 444 case EmitterWorkItem::WI_Asm: {
445 AddAllIfNotNull(Item->getProfilerInits(), GlobalInits.get());
380 std::unique_ptr<Assembler> Asm = Item->getAsm(); 446 std::unique_ptr<Assembler> Asm = Item->getAsm();
381 Asm->alignFunction(); 447 Asm->alignFunction();
382 IceString MangledName = mangleName(Asm->getFunctionName()); 448 IceString MangledName = mangleName(Asm->getFunctionName());
383 switch (getFlags().getOutFileType()) { 449 switch (getFlags().getOutFileType()) {
384 case FT_Elf: 450 case FT_Elf:
385 getObjectWriter()->writeFunctionCode(MangledName, Asm->getInternal(), 451 getObjectWriter()->writeFunctionCode(MangledName, Asm->getInternal(),
386 Asm.get()); 452 Asm.get());
387 break; 453 break;
388 case FT_Iasm: { 454 case FT_Iasm: {
389 OstreamLocker L(this); 455 OstreamLocker L(this);
390 Cfg::emitTextHeader(MangledName, this, Asm.get()); 456 Cfg::emitTextHeader(MangledName, this, Asm.get());
391 Asm->emitIASBytes(this); 457 Asm->emitIASBytes(this);
392 } break; 458 } break;
393 case FT_Asm: 459 case FT_Asm:
394 llvm::report_fatal_error("Unexpected FT_Asm"); 460 llvm::report_fatal_error("Unexpected FT_Asm");
395 break; 461 break;
396 } 462 }
397 } break; 463 } break;
398 case EmitterWorkItem::WI_Cfg: { 464 case EmitterWorkItem::WI_Cfg: {
399 if (!ALLOW_DUMP) 465 if (!ALLOW_DUMP)
400 llvm::report_fatal_error("WI_Cfg work item created inappropriately"); 466 llvm::report_fatal_error("WI_Cfg work item created inappropriately");
467
468 AddAllIfNotNull(Item->getProfilerInits(), GlobalInits.get());
469
401 assert(getFlags().getOutFileType() == FT_Asm); 470 assert(getFlags().getOutFileType() == FT_Asm);
402 std::unique_ptr<Cfg> Func = Item->getCfg(); 471 std::unique_ptr<Cfg> Func = Item->getCfg();
403 // Unfortunately, we have to temporarily install the Cfg in TLS 472 // Unfortunately, we have to temporarily install the Cfg in TLS
404 // because Variable::asType() uses the allocator to create the 473 // because Variable::asType() uses the allocator to create the
405 // differently-typed copy. 474 // differently-typed copy.
406 Cfg::setCurrentCfg(Func.get()); 475 Cfg::setCurrentCfg(Func.get());
407 Func->emit(); 476 Func->emit();
408 Cfg::setCurrentCfg(nullptr); 477 Cfg::setCurrentCfg(nullptr);
409 dumpStats(Func->getFunctionName()); 478 dumpStats(Func->getFunctionName());
410 } break; 479 } break;
411 } 480 }
412 } 481 }
482
483 lowerGlobals(this, std::move(GlobalInits),
484 TargetDataLowering::createLowering(this).get());
413 } 485 }
414 486
415 // Scan a string for S[0-9A-Z]*_ patterns and replace them with 487 // Scan a string for S[0-9A-Z]*_ patterns and replace them with
416 // S<num>_ where <num> is the next base-36 value. If a type name 488 // S<num>_ where <num> is the next base-36 value. If a type name
417 // legitimately contains that pattern, then the substitution will be 489 // legitimately contains that pattern, then the substitution will be
418 // made in error and most likely the link will fail. In this case, 490 // made in error and most likely the link will fail. In this case,
419 // the test classes can be rewritten not to use that pattern, which is 491 // the test classes can be rewritten not to use that pattern, which is
420 // much simpler and more reliable than implementing a full demangling 492 // much simpler and more reliable than implementing a full demangling
421 // parser. Another substitution-in-error may occur if a type 493 // parser. Another substitution-in-error may occur if a type
422 // identifier ends with the pattern S[0-9A-Z]*, because an immediately 494 // identifier ends with the pattern S[0-9A-Z]*, because an immediately
(...skipping 406 matching lines...) Expand 10 before | Expand all | Expand 10 after
829 Ctx = Func->getContext(); 901 Ctx = Func->getContext();
830 Active = 902 Active =
831 Func->getFocusedTiming() || Ctx->getFlags().getSubzeroTimingEnabled(); 903 Func->getFocusedTiming() || Ctx->getFlags().getSubzeroTimingEnabled();
832 if (Active) 904 if (Active)
833 Ctx->pushTimer(ID, StackID); 905 Ctx->pushTimer(ID, StackID);
834 } 906 }
835 907
836 ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS); 908 ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS);
837 909
838 } // end of namespace Ice 910 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698