Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(485)

Side by Side Diff: src/IceGlobalContext.cpp

Issue 916653004: Subzero: Emit functions and global initializers in a separate thread. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Const change Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceGlobalContext.h ('k') | src/IceTargetLowering.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===// 1 //===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file defines aspects of the compilation that persist across 10 // This file defines aspects of the compilation that persist across
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
127 Str << "\n"; 127 Str << "\n";
128 } 128 }
129 129
130 GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, 130 GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit,
131 ELFStreamer *ELFStr, VerboseMask Mask, 131 ELFStreamer *ELFStr, VerboseMask Mask,
132 TargetArch Arch, OptLevel Opt, 132 TargetArch Arch, OptLevel Opt,
133 IceString TestPrefix, const ClFlags &Flags) 133 IceString TestPrefix, const ClFlags &Flags)
134 : ConstPool(new ConstantPool()), ErrorStatus(), StrDump(OsDump), 134 : ConstPool(new ConstantPool()), ErrorStatus(), StrDump(OsDump),
135 StrEmit(OsEmit), VMask(Mask), Arch(Arch), Opt(Opt), 135 StrEmit(OsEmit), VMask(Mask), Arch(Arch), Opt(Opt),
136 TestPrefix(TestPrefix), Flags(Flags), RNG(""), ObjectWriter(), 136 TestPrefix(TestPrefix), Flags(Flags), RNG(""), ObjectWriter(),
137 CfgQ(/*MaxSize=*/Flags.getNumTranslationThreads(), 137 OptQ(/*Sequential=*/Flags.isSequential(),
138 /*Sequential=*/(Flags.getNumTranslationThreads() == 0)) { 138 /*MaxSize=*/Flags.getNumTranslationThreads()),
139 // EmitQ is allowed unlimited size.
140 EmitQ(/*Sequential=*/Flags.isSequential()) {
139 // Make sure thread_local fields are properly initialized before any 141 // Make sure thread_local fields are properly initialized before any
140 // accesses are made. Do this here instead of at the start of 142 // accesses are made. Do this here instead of at the start of
141 // main() so that all clients (e.g. unit tests) can benefit for 143 // main() so that all clients (e.g. unit tests) can benefit for
142 // free. 144 // free.
143 GlobalContext::TlsInit(); 145 GlobalContext::TlsInit();
144 Cfg::TlsInit(); 146 Cfg::TlsInit();
145 // Create a new ThreadContext for the current thread. No need to 147 // Create a new ThreadContext for the current thread. No need to
146 // lock AllThreadContexts at this point since no other threads have 148 // lock AllThreadContexts at this point since no other threads have
147 // access yet to this GlobalContext object. 149 // access yet to this GlobalContext object.
148 ThreadContext *MyTLS = new ThreadContext(); 150 ThreadContext *MyTLS = new ThreadContext();
149 AllThreadContexts.push_back(MyTLS); 151 AllThreadContexts.push_back(MyTLS);
150 ICE_TLS_SET_FIELD(TLS, MyTLS); 152 ICE_TLS_SET_FIELD(TLS, MyTLS);
151 // Pre-register built-in stack names. 153 // Pre-register built-in stack names.
152 if (ALLOW_DUMP) { 154 if (ALLOW_DUMP) {
153 // TODO(stichnot): There needs to be a strong relationship between 155 // TODO(stichnot): There needs to be a strong relationship between
154 // the newTimerStackID() return values and TSK_Default/TSK_Funcs. 156 // the newTimerStackID() return values and TSK_Default/TSK_Funcs.
155 newTimerStackID("Total across all functions"); 157 newTimerStackID("Total across all functions");
156 newTimerStackID("Per-function summary"); 158 newTimerStackID("Per-function summary");
157 } 159 }
158 Timers.initInto(MyTLS->Timers); 160 Timers.initInto(MyTLS->Timers);
159 if (Flags.getUseELFWriter()) { 161 if (Flags.getUseELFWriter()) {
160 ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr)); 162 ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr));
161 } 163 }
162 } 164 }
163 165
164 void GlobalContext::translateFunctions() { 166 void GlobalContext::translateFunctions() {
165 while (std::unique_ptr<Cfg> Func = cfgQueueBlockingPop()) { 167 while (std::unique_ptr<Cfg> Func = optQueueBlockingPop()) {
166 // Install Func in TLS for Cfg-specific container allocators. 168 // Install Func in TLS for Cfg-specific container allocators.
167 Cfg::setCurrentCfg(Func.get()); 169 Cfg::setCurrentCfg(Func.get());
168 // Reset per-function stats being accumulated in TLS. 170 // Reset per-function stats being accumulated in TLS.
169 resetStats(); 171 resetStats();
170 // Set verbose level to none if the current function does NOT 172 // Set verbose level to none if the current function does NOT
171 // match the -verbose-focus command-line option. 173 // match the -verbose-focus command-line option.
172 if (!matchSymbolName(Func->getFunctionName(), 174 if (!matchSymbolName(Func->getFunctionName(),
173 getFlags().getVerboseFocusOn())) 175 getFlags().getVerboseFocusOn()))
174 Func->setVerbose(IceV_None); 176 Func->setVerbose(IceV_None);
175 // Disable translation if -notranslate is specified, or if the 177 // Disable translation if -notranslate is specified, or if the
176 // current function matches the -translate-only option. If 178 // current function matches the -translate-only option. If
177 // translation is disabled, just dump the high-level IR and 179 // translation is disabled, just dump the high-level IR and
178 // continue. 180 // continue.
179 if (getFlags().getDisableTranslation() || 181 if (getFlags().getDisableTranslation() ||
180 !matchSymbolName(Func->getFunctionName(), 182 !matchSymbolName(Func->getFunctionName(),
181 getFlags().getTranslateOnly())) { 183 getFlags().getTranslateOnly())) {
182 Func->dump(); 184 Func->dump();
185 Cfg::setCurrentCfg(nullptr);
186 continue; // Func goes out of scope and gets deleted
187 }
188 Func->translate();
189 EmitterWorkItem *Item = nullptr;
190 if (Func->hasError()) {
191 getErrorStatus()->assign(EC_Translation);
192 OstreamLocker L(this);
193 getStrDump() << "ICE translation error: " << Func->getError() << "\n";
194 Item = new EmitterWorkItem(Func->getSequenceNumber());
183 } else { 195 } else {
184 Func->translate(); 196 if (getFlags().getUseIntegratedAssembler()) {
185 if (Func->hasError()) { 197 Func->emitIAS();
186 getErrorStatus()->assign(EC_Translation); 198 // The Cfg has already emitted into the assembly buffer, so
187 OstreamLocker L(this); 199 // stats have been fully collected into this thread's TLS.
188 getStrDump() << "ICE translation error: " << Func->getError() << "\n"; 200 // Dump them before TLS is reset for the next Cfg.
201 dumpStats(Func->getFunctionName());
202 Assembler *Asm = Func->releaseAssembler();
203 // Copy relevant fields into Asm before Func is deleted.
204 Asm->setFunctionName(Func->getFunctionName());
205 Asm->setInternal(Func->getInternal());
206 Item = new EmitterWorkItem(Func->getSequenceNumber(), Asm);
189 } else { 207 } else {
190 if (getFlags().getUseIntegratedAssembler()) 208 // The Cfg has not been emitted yet, so stats are not ready
191 Func->emitIAS(); 209 // to be dumped.
192 else 210 Item = new EmitterWorkItem(Func->getSequenceNumber(), Func.release());
193 Func->emit();
194 // TODO(stichnot): actually add to emit queue
195 } 211 }
196 dumpStats(Func->getFunctionName());
197 } 212 }
198 Cfg::setCurrentCfg(nullptr); 213 Cfg::setCurrentCfg(nullptr);
214 assert(Item);
215 emitQueueBlockingPush(Item);
199 // The Cfg now gets deleted as Func goes out of scope. 216 // The Cfg now gets deleted as Func goes out of scope.
200 } 217 }
201 } 218 }
202 219
220 namespace {
221
222 void lowerGlobals(GlobalContext *Ctx,
223 std::unique_ptr<VariableDeclarationList> VariableDeclarations,
224 TargetDataLowering *DataLowering) {
225 TimerMarker T(TimerStack::TT_emitGlobalInitializers, Ctx);
226 const bool DumpGlobalVariables = ALLOW_DUMP && Ctx->getVerbose() &&
227 Ctx->getFlags().getVerboseFocusOn().empty();
228 if (DumpGlobalVariables) {
229 OstreamLocker L(Ctx);
230 Ostream &Stream = Ctx->getStrDump();
231 for (const Ice::VariableDeclaration *Global : *VariableDeclarations) {
232 Global->dump(Ctx, Stream);
233 }
234 }
235 if (Ctx->getFlags().getDisableTranslation())
236 return;
237 DataLowering->lowerGlobals(std::move(VariableDeclarations));
238 }
239
240 // Ensure Pending is large enough that Pending[Index] is valid.
241 void resizePending(std::vector<EmitterWorkItem *> &Pending, uint32_t Index) {
242 if (Index >= Pending.size())
243 Pending.resize(Index + 1);
244 }
245
246 } // end of anonymous namespace
247
248 void GlobalContext::emitItems() {
249 const bool Threaded = !getFlags().isSequential();
250 // Pending is a vector containing the reassembled, ordered list of
251 // work items. When we're ready for the next item, we first check
252 // whether it's in the Pending list. If not, we take an item from
253 // the work queue, and if it's not the item we're waiting for, we
254 // insert it into Pending and repeat. The work item is deleted
255 // after it is processed.
256 std::vector<EmitterWorkItem *> Pending;
257 uint32_t DesiredSequenceNumber = getFirstSequenceNumber();
258 while (true) {
259 resizePending(Pending, DesiredSequenceNumber);
260 // See if Pending contains DesiredSequenceNumber.
261 EmitterWorkItem *RawItem = Pending[DesiredSequenceNumber];
262 if (RawItem == nullptr)
263 RawItem = emitQueueBlockingPop();
264 if (RawItem == nullptr)
265 return;
266 uint32_t ItemSeq = RawItem->getSequenceNumber();
267 if (Threaded && ItemSeq != DesiredSequenceNumber) {
268 resizePending(Pending, ItemSeq);
269 Pending[ItemSeq] = RawItem;
270 continue;
271 }
272
273 std::unique_ptr<EmitterWorkItem> Item(RawItem);
274 ++DesiredSequenceNumber;
275 switch (Item->getKind()) {
276 case EmitterWorkItem::WI_Nop:
277 break;
278 case EmitterWorkItem::WI_GlobalInits: {
279 lowerGlobals(this, Item->getGlobalInits(),
280 TargetDataLowering::createLowering(this).get());
281 } break;
282 case EmitterWorkItem::WI_Asm: {
283 std::unique_ptr<Assembler> Asm = Item->getAsm();
284 Asm->alignFunction();
285 IceString MangledName = mangleName(Asm->getFunctionName());
286 if (getFlags().getUseELFWriter()) {
287 getObjectWriter()->writeFunctionCode(MangledName, Asm->getInternal(),
288 Asm.get());
289 } else {
290 OstreamLocker L(this);
291 Cfg::emitTextHeader(MangledName, this, Asm.get());
292 Asm->emitIASBytes(this);
293 }
294 } break;
295 case EmitterWorkItem::WI_Cfg: {
296 if (!ALLOW_DUMP)
297 llvm::report_fatal_error("WI_Cfg work item created inappropriately");
298 assert(!getFlags().getUseIntegratedAssembler());
299 std::unique_ptr<Cfg> Func = Item->getCfg();
300 // Unfortunately, we have to temporarily install the Cfg in TLS
301 // because Variable::asType() uses the allocator to create the
302 // differently-typed copy.
303 Cfg::setCurrentCfg(Func.get());
304 Func->emit();
305 Cfg::setCurrentCfg(nullptr);
306 dumpStats(Func->getFunctionName());
307 } break;
308 }
309 }
310 }
311
203 // Scan a string for S[0-9A-Z]*_ patterns and replace them with 312 // Scan a string for S[0-9A-Z]*_ patterns and replace them with
204 // S<num>_ where <num> is the next base-36 value. If a type name 313 // S<num>_ where <num> is the next base-36 value. If a type name
205 // legitimately contains that pattern, then the substitution will be 314 // legitimately contains that pattern, then the substitution will be
206 // made in error and most likely the link will fail. In this case, 315 // made in error and most likely the link will fail. In this case,
207 // the test classes can be rewritten not to use that pattern, which is 316 // the test classes can be rewritten not to use that pattern, which is
208 // much simpler and more reliable than implementing a full demangling 317 // much simpler and more reliable than implementing a full demangling
209 // parser. Another substitution-in-error may occur if a type 318 // parser. Another substitution-in-error may occur if a type
210 // identifier ends with the pattern S[0-9A-Z]*, because an immediately 319 // identifier ends with the pattern S[0-9A-Z]*, because an immediately
211 // following substitution string like "S1_" or "PS1_" may be combined 320 // following substitution string like "S1_" or "PS1_" may be combined
212 // with the previous type. 321 // with the previous type.
(...skipping 330 matching lines...) Expand 10 before | Expand all | Expand 10 after
543 Timers->at(StackID).reset(); 652 Timers->at(StackID).reset();
544 } 653 }
545 654
546 void GlobalContext::setTimerName(TimerStackIdT StackID, 655 void GlobalContext::setTimerName(TimerStackIdT StackID,
547 const IceString &NewName) { 656 const IceString &NewName) {
548 auto Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; 657 auto Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
549 assert(StackID < Timers->size()); 658 assert(StackID < Timers->size());
550 Timers->at(StackID).setName(NewName); 659 Timers->at(StackID).setName(NewName);
551 } 660 }
552 661
553 // Note: cfgQueueBlockingPush and cfgQueueBlockingPop use unique_ptr 662 // Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr
554 // at the interface to take and transfer ownership, but they 663 // at the interface to take and transfer ownership, but they
555 // internally store the raw Cfg pointer in the work queue. This 664 // internally store the raw Cfg pointer in the work queue. This
556 // allows e.g. future queue optimizations such as the use of atomics 665 // allows e.g. future queue optimizations such as the use of atomics
557 // to modify queue elements. 666 // to modify queue elements.
558 void GlobalContext::cfgQueueBlockingPush(std::unique_ptr<Cfg> Func) { 667 void GlobalContext::optQueueBlockingPush(std::unique_ptr<Cfg> Func) {
559 CfgQ.blockingPush(Func.release()); 668 assert(Func);
669 OptQ.blockingPush(Func.release());
670 if (getFlags().isSequential())
671 translateFunctions();
560 } 672 }
561 673
562 std::unique_ptr<Cfg> GlobalContext::cfgQueueBlockingPop() { 674 std::unique_ptr<Cfg> GlobalContext::optQueueBlockingPop() {
563 return std::unique_ptr<Cfg>(CfgQ.blockingPop()); 675 return std::unique_ptr<Cfg>(OptQ.blockingPop());
676 }
677
678 void GlobalContext::emitQueueBlockingPush(EmitterWorkItem *Item) {
679 assert(Item);
680 EmitQ.blockingPush(Item);
681 if (getFlags().isSequential())
682 emitItems();
683 }
684
685 EmitterWorkItem *GlobalContext::emitQueueBlockingPop() {
686 return EmitQ.blockingPop();
564 } 687 }
565 688
566 void GlobalContext::dumpStats(const IceString &Name, bool Final) { 689 void GlobalContext::dumpStats(const IceString &Name, bool Final) {
567 if (!getFlags().getDumpStats()) 690 if (!getFlags().getDumpStats())
568 return; 691 return;
569 OstreamLocker OL(this); 692 OstreamLocker OL(this);
570 if (Final) { 693 if (Final) {
571 getStatsCumulative()->dump(Name, getStrDump()); 694 getStatsCumulative()->dump(Name, getStrDump());
572 } else { 695 } else {
573 ICE_TLS_GET_FIELD(TLS)->StatsFunction.dump(Name, getStrDump()); 696 ICE_TLS_GET_FIELD(TLS)->StatsFunction.dump(Name, getStrDump());
(...skipping 28 matching lines...) Expand all
602 Ctx = Func->getContext(); 725 Ctx = Func->getContext();
603 Active = 726 Active =
604 Func->getFocusedTiming() || Ctx->getFlags().getSubzeroTimingEnabled(); 727 Func->getFocusedTiming() || Ctx->getFlags().getSubzeroTimingEnabled();
605 if (Active) 728 if (Active)
606 Ctx->pushTimer(ID, StackID); 729 Ctx->pushTimer(ID, StackID);
607 } 730 }
608 731
609 ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS); 732 ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS);
610 733
611 } // end of namespace Ice 734 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceGlobalContext.h ('k') | src/IceTargetLowering.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698