OLD | NEW |
| (Empty) |
1 //===- LowerEmAsyncify - transform asynchronous functions for Emscripten/JS --
---------===// | |
2 // | |
3 // The LLVM Compiler Infrastructure | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 // | |
10 // Lu Wang <coolwanglu@gmail.com> | |
11 // | |
12 // In JS we don't have functions like sleep(), which is on the other hand very p
opuar in C/C++ etc. | |
13 // This pass tries to convert funcitons calling sleep() into a valid form in Jav
aScript | |
14 // The basic idea is to split the callee at the place where sleep() is called, | |
15 // then the first half may schedule the second half using setTimeout. | |
16 // But we need to pay lots of attention to analyzing/saving/restoring context va
riables and return values | |
17 // | |
18 //===----------------------------------------------------------------------===// | |
19 | |
20 #include "llvm/ADT/SmallPtrSet.h" | |
21 #include "llvm/ADT/DenseMap.h" | |
22 #include "llvm/IR/Dominators.h" | |
23 #include "llvm/IR/DataLayout.h" | |
24 #include "llvm/IR/Instructions.h" | |
25 #include "llvm/IR/Function.h" | |
26 #include "llvm/IR/Module.h" | |
27 #include "llvm/IR/Value.h" | |
28 #include "llvm/IR/CallSite.h" | |
29 #include "llvm/Support/CommandLine.h" | |
30 #include "llvm/IR/InstIterator.h" | |
31 #include "llvm/Transforms/NaCl.h" | |
32 #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |
33 #include "llvm/Transforms/Utils/Cloning.h" | |
34 #include "llvm/Transforms/Utils/Local.h" // for DemoteRegToStack, removeUnreacha
bleBlocks | |
35 #include "llvm/Transforms/Utils/PromoteMemToReg.h" // for PromoteMemToReg | |
36 #include "llvm/Transforms/Utils/ValueMapper.h" | |
37 #include "llvm/Pass.h" | |
38 | |
39 #include <vector> | |
40 | |
41 #ifdef NDEBUG | |
42 #undef assert | |
43 #define assert(x) { if (!(x)) report_fatal_error(#x); } | |
44 #endif | |
45 | |
46 using namespace llvm; | |
47 | |
48 static cl::list<std::string> | |
49 AsyncifyFunctions("emscripten-asyncify-functions", | |
50 cl::desc("Functions that call one of these functions, directly
or indirectly, will be asyncified"), | |
51 cl::CommaSeparated); | |
52 | |
53 static cl::list<std::string> | |
54 AsyncifyWhiteList("emscripten-asyncify-whitelist", | |
55 cl::desc("Functions that should not be asyncified"), | |
56 cl::CommaSeparated); | |
57 | |
58 namespace { | |
59 class LowerEmAsyncify: public ModulePass { | |
60 Module *TheModule; | |
61 | |
62 public: | |
63 static char ID; // Pass identification, replacement for typeid | |
64 explicit LowerEmAsyncify() : ModulePass(ID), TheModule(NULL) { | |
65 initializeLowerEmAsyncifyPass(*PassRegistry::getPassRegistry()); | |
66 } | |
67 virtual ~LowerEmAsyncify() { } | |
68 bool runOnModule(Module &M); | |
69 | |
70 private: | |
71 const DataLayout *DL; | |
72 | |
73 Type *Void, *I1, *I32, *I32Ptr; | |
74 FunctionType *VFunction, *I1Function, *I32PFunction; | |
75 FunctionType *VI32PFunction, *I32PI32Function; | |
76 FunctionType *CallbackFunctionType; | |
77 | |
78 Function *AllocAsyncCtxFunction, *ReallocAsyncCtxFunction, *FreeAsyncCtxFunc
tion; | |
79 Function *CheckAsyncFunction; | |
80 Function *DoNotUnwindFunction, *DoNotUnwindAsyncFunction; | |
81 Function *GetAsyncReturnValueAddrFunction; | |
82 | |
83 void initTypesAndFunctions(void); | |
84 | |
85 typedef std::vector<Instruction *> Instructions; | |
86 typedef DenseMap<Function*, Instructions> FunctionInstructionsMap; | |
87 typedef std::vector<Value*> Values; | |
88 typedef SmallPtrSet<BasicBlock*, 16> BasicBlockSet; | |
89 | |
90 // all the information we want for an async call | |
91 struct AsyncCallEntry { | |
92 Instruction *AsyncCallInst; // calling an async function | |
93 BasicBlock *AfterCallBlock; // the block we should continue on after getti
ng the return value of AsynCallInst | |
94 CallInst *AllocAsyncCtxInst; // where we allocate the async ctx before th
e async call, in the original function | |
95 Values ContextVariables; // those need to be saved and restored for the as
ync call | |
96 StructType *ContextStructType; // The structure constructing all the conte
xt variables | |
97 BasicBlock *SaveAsyncCtxBlock; // the block in which we save all the varia
bles | |
98 Function *CallbackFunc; // the callback function for this async call, whic
h is converted from the original function | |
99 }; | |
100 | |
101 BasicBlockSet FindReachableBlocksFrom(BasicBlock *src); | |
102 | |
103 // Find everything that we should save and restore for the async call | |
104 // save them to Entry.ContextVariables | |
105 void FindContextVariables(AsyncCallEntry & Entry); | |
106 | |
107 // The essential function | |
108 // F is now in the sync form, transform it into an async form that is valid
in JS | |
109 void transformAsyncFunction(Function &F, Instructions const& AsyncCalls); | |
110 | |
111 bool IsFunctionPointerCall(const Instruction *I); | |
112 }; | |
113 } | |
114 | |
115 char LowerEmAsyncify::ID = 0; | |
116 INITIALIZE_PASS(LowerEmAsyncify, "loweremasyncify", | |
117 "Lower async functions for js/emscripten", | |
118 false, false) | |
119 | |
120 bool LowerEmAsyncify::runOnModule(Module &M) { | |
121 TheModule = &M; | |
122 DL = &M.getDataLayout(); | |
123 | |
124 std::set<std::string> WhiteList(AsyncifyWhiteList.begin(), AsyncifyWhiteList.e
nd()); | |
125 | |
126 /* | |
127 * collect all the functions that should be asyncified | |
128 * any function that _might_ call an async function is also async | |
129 */ | |
130 std::vector<Function*> AsyncFunctionsPending; | |
131 for(unsigned i = 0; i < AsyncifyFunctions.size(); ++i) { | |
132 std::string const& AFName = AsyncifyFunctions[i]; | |
133 Function *F = TheModule->getFunction(AFName); | |
134 if (F && !WhiteList.count(F->getName())) { | |
135 AsyncFunctionsPending.push_back(F); | |
136 } | |
137 } | |
138 | |
139 // No function needed to transform | |
140 if (AsyncFunctionsPending.empty()) return false; | |
141 | |
142 // Walk through the call graph and find all the async functions | |
143 FunctionInstructionsMap AsyncFunctionCalls; | |
144 { | |
145 // pessimistic: consider all indirect calls as possibly async | |
146 // TODO: deduce based on function types | |
147 for (Module::iterator FI = TheModule->begin(), FE = TheModule->end(); FI !=
FE; ++FI) { | |
148 if (WhiteList.count(FI->getName())) continue; | |
149 | |
150 bool has_indirect_call = false; | |
151 for (inst_iterator I = inst_begin(FI), E = inst_end(FI); I != E; ++I) { | |
152 if (IsFunctionPointerCall(&*I)) { | |
153 has_indirect_call = true; | |
154 AsyncFunctionCalls[FI].push_back(&*I); | |
155 } | |
156 } | |
157 | |
158 if (has_indirect_call) AsyncFunctionsPending.push_back(FI); | |
159 } | |
160 | |
161 while (!AsyncFunctionsPending.empty()) { | |
162 Function *CurFunction = AsyncFunctionsPending.back(); | |
163 AsyncFunctionsPending.pop_back(); | |
164 | |
165 for (Value::user_iterator UI = CurFunction->user_begin(), E = CurFunction-
>user_end(); UI != E; ++UI) { | |
166 ImmutableCallSite ICS(*UI); | |
167 if (!ICS) continue; | |
168 // we only need those instructions calling the function | |
169 // if the function address is used for other purpose, we don't care | |
170 if (CurFunction != ICS.getCalledValue()->stripPointerCasts()) continue; | |
171 // Now I is either CallInst or InvokeInst | |
172 Instruction *I = cast<Instruction>(*UI); | |
173 Function *F = I->getParent()->getParent(); | |
174 if (AsyncFunctionCalls.count(F) == 0) { | |
175 AsyncFunctionsPending.push_back(F); | |
176 } | |
177 AsyncFunctionCalls[F].push_back(I); | |
178 } | |
179 } | |
180 } | |
181 | |
182 // exit if no async function is found at all | |
183 if (AsyncFunctionCalls.empty()) return false; | |
184 | |
185 initTypesAndFunctions(); | |
186 | |
187 for (FunctionInstructionsMap::iterator I = AsyncFunctionCalls.begin(), E = Asy
ncFunctionCalls.end(); | |
188 I != E; ++I) { | |
189 transformAsyncFunction(*(I->first), I->second); | |
190 } | |
191 | |
192 return true; | |
193 } | |
194 | |
195 void LowerEmAsyncify::initTypesAndFunctions(void) { | |
196 // Data types | |
197 Void = Type::getVoidTy(TheModule->getContext()); | |
198 I1 = Type::getInt1Ty(TheModule->getContext()); | |
199 I32 = Type::getInt32Ty(TheModule->getContext()); | |
200 I32Ptr = Type::getInt32PtrTy(TheModule->getContext()); | |
201 | |
202 // Function types | |
203 SmallVector<Type*, 2> ArgTypes; | |
204 VFunction = FunctionType::get(Void, false); | |
205 I1Function = FunctionType::get(I1, false); | |
206 I32PFunction = FunctionType::get(I32Ptr, false); | |
207 | |
208 ArgTypes.clear(); | |
209 ArgTypes.push_back(I32Ptr); | |
210 VI32PFunction = FunctionType::get(Void, ArgTypes, false); | |
211 | |
212 ArgTypes.clear(); | |
213 ArgTypes.push_back(I32); | |
214 I32PI32Function = FunctionType::get(I32Ptr, ArgTypes, false); | |
215 | |
216 CallbackFunctionType = VI32PFunction; | |
217 | |
218 // Functions | |
219 CheckAsyncFunction = Function::Create( | |
220 I1Function, | |
221 GlobalValue::ExternalLinkage, | |
222 "emscripten_check_async", | |
223 TheModule | |
224 ); | |
225 | |
226 AllocAsyncCtxFunction = Function::Create( | |
227 I32PI32Function, | |
228 GlobalValue::ExternalLinkage, | |
229 "emscripten_alloc_async_context", | |
230 TheModule | |
231 ); | |
232 | |
233 ReallocAsyncCtxFunction = Function::Create( | |
234 I32PI32Function, | |
235 GlobalValue::ExternalLinkage, | |
236 "emscripten_realloc_async_context", | |
237 TheModule | |
238 ); | |
239 | |
240 FreeAsyncCtxFunction = Function::Create( | |
241 VI32PFunction, | |
242 GlobalValue::ExternalLinkage, | |
243 "emscripten_free_async_context", | |
244 TheModule | |
245 ); | |
246 | |
247 DoNotUnwindFunction = Function::Create( | |
248 VFunction, | |
249 GlobalValue::ExternalLinkage, | |
250 "emscripten_do_not_unwind", | |
251 TheModule | |
252 ); | |
253 | |
254 DoNotUnwindAsyncFunction = Function::Create( | |
255 VFunction, | |
256 GlobalValue::ExternalLinkage, | |
257 "emscripten_do_not_unwind_async", | |
258 TheModule | |
259 ); | |
260 | |
261 GetAsyncReturnValueAddrFunction = Function::Create( | |
262 I32PFunction, | |
263 GlobalValue::ExternalLinkage, | |
264 "emscripten_get_async_return_value_addr", | |
265 TheModule | |
266 ); | |
267 } | |
268 | |
269 LowerEmAsyncify::BasicBlockSet LowerEmAsyncify::FindReachableBlocksFrom(BasicBlo
ck *src) { | |
270 BasicBlockSet ReachableBlockSet; | |
271 std::vector<BasicBlock*> pending; | |
272 ReachableBlockSet.insert(src); | |
273 pending.push_back(src); | |
274 while (!pending.empty()) { | |
275 BasicBlock *CurBlock = pending.back(); | |
276 pending.pop_back(); | |
277 for (succ_iterator SI = succ_begin(CurBlock), SE = succ_end(CurBlock); SI !=
SE; ++SI) { | |
278 if (ReachableBlockSet.count(*SI) == 0) { | |
279 ReachableBlockSet.insert(*SI); | |
280 pending.push_back(*SI); | |
281 } | |
282 } | |
283 } | |
284 return ReachableBlockSet; | |
285 } | |
286 | |
287 void LowerEmAsyncify::FindContextVariables(AsyncCallEntry & Entry) { | |
288 BasicBlock *AfterCallBlock = Entry.AfterCallBlock; | |
289 | |
290 Function & F = *AfterCallBlock->getParent(); | |
291 | |
292 // Create a new entry block as if in the callback function | |
293 // theck check variables that no longer properly dominate their uses | |
294 BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "", &F, &
F.getEntryBlock()); | |
295 BranchInst::Create(AfterCallBlock, EntryBlock); | |
296 | |
297 DominatorTreeWrapperPass DTW; | |
298 DTW.runOnFunction(F); | |
299 DominatorTree& DT = DTW.getDomTree(); | |
300 | |
301 // These blocks may be using some values defined at or before AsyncCallBlock | |
302 BasicBlockSet Ramifications = FindReachableBlocksFrom(AfterCallBlock); | |
303 | |
304 SmallPtrSet<Value*, 256> ContextVariables; | |
305 Values Pending; | |
306 | |
307 // Examine the instructions, find all variables that we need to store in the c
ontext | |
308 for (BasicBlockSet::iterator RI = Ramifications.begin(), RE = Ramifications.en
d(); RI != RE; ++RI) { | |
309 for (BasicBlock::iterator I = (*RI)->begin(), E = (*RI)->end(); I != E; ++I)
{ | |
310 for (unsigned i = 0, NumOperands = I->getNumOperands(); i < NumOperands; +
+i) { | |
311 Value *O = I->getOperand(i); | |
312 if (Instruction *Inst = dyn_cast<Instruction>(O)) { | |
313 if (Inst == Entry.AsyncCallInst) continue; // for the original async c
all, we will load directly from async return value | |
314 if (ContextVariables.count(Inst) != 0) continue; // already examined | |
315 | |
316 if (!DT.dominates(Inst, I->getOperandUse(i))) { | |
317 // `I` is using `Inst`, yet `Inst` does not dominate `I` if we arriv
e directly at AfterCallBlock | |
318 // so we need to save `Inst` in the context | |
319 ContextVariables.insert(Inst); | |
320 Pending.push_back(Inst); | |
321 } | |
322 } else if (Argument *Arg = dyn_cast<Argument>(O)) { | |
323 // count() should be as fast/slow as insert, so just insert here | |
324 ContextVariables.insert(Arg); | |
325 } | |
326 } | |
327 } | |
328 } | |
329 | |
330 // restore F | |
331 EntryBlock->eraseFromParent(); | |
332 | |
333 Entry.ContextVariables.clear(); | |
334 Entry.ContextVariables.reserve(ContextVariables.size()); | |
335 for (SmallPtrSet<Value*, 256>::iterator I = ContextVariables.begin(), E = Cont
extVariables.end(); I != E; ++I) { | |
336 Entry.ContextVariables.push_back(*I); | |
337 } | |
338 } | |
339 | |
340 /* | |
341 * Consider that F contains a call to G, both of which are async: | |
342 * | |
343 * function F: | |
344 * ... | |
345 * %0 = G(%1, %2, ...); | |
346 * ... | |
347 * return %%; | |
348 * | |
349 * We want to convert F and generate F__asyn_cb | |
350 * they are similar, but with minor yet important differences | |
351 * Note those `main func only` and `callback func only` instructions | |
352 | |
353 ////////////////////////////////////////////////////////// | |
354 function F: | |
355 ... | |
356 ctx = alloc_ctx(len, sp); // main func only | |
357 // TODO | |
358 // we could also do this only after an async call | |
359 // but in that case we will need to pass ctx to the fun
ction | |
360 // since ctx is no longer in the top async stack frame | |
361 %0 = G(%1, %2, ...); | |
362 if (async) { // G was async | |
363 save context variables in ctx | |
364 register F.async_cb as the callback in frame | |
365 return without unwinding the stack frame | |
366 } else { // G was sync | |
367 // use %0 as normal | |
368 free_ctx(ctx); // main func only | |
369 // ctx is freed here, because so far F is still a sync function | |
370 // and we don't want any side effects | |
371 ... | |
372 async return value = %%; | |
373 return & normally unwind the stack frame // main func only | |
374 } | |
375 ////////////////////////////////////////////////////////// | |
376 | |
377 * And here's F.async_cb | |
378 | |
379 ////////////////////////////////////////////////////////// | |
380 function F.async_cb(ctx): | |
381 load variables from ctx // callback func only | |
382 goto resume_point; // callback func only | |
383 ... | |
384 ctx = realloc_ctx(len); // callback func only | |
385 // realloc_ctx is different from alloc_ctx | |
386 // which reused the current async stack frame | |
387 // we want to keep the saved stack pointer | |
388 %0 = G(%1, %2, ...); | |
389 if (async) { | |
390 save context variables in ctx | |
391 register F.async_cb as the callback | |
392 return without unwinding the stack frame | |
393 } else { | |
394 resume_point: | |
395 %0'= either $0 or the async return value // callback func only | |
396 ... | |
397 async return value = %% | |
398 return restore the stack pointer back to the value stored in F // callback f
unc only | |
399 // no need to free the ctx | |
400 // the scheduler will be aware of this return and handle the stack frames | |
401 } | |
402 ////////////////////////////////////////////////////////// | |
403 | |
404 */ | |
405 | |
406 void LowerEmAsyncify::transformAsyncFunction(Function &F, Instructions const& As
yncCalls) { | |
407 assert(!AsyncCalls.empty()); | |
408 | |
409 // Pass 0 | |
410 // collect all the return instructions from the original function | |
411 // will use later | |
412 std::vector<ReturnInst*> OrigReturns; | |
413 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { | |
414 if (ReturnInst *RI = dyn_cast<ReturnInst>(&*I)) { | |
415 OrigReturns.push_back(RI); | |
416 } | |
417 } | |
418 | |
419 // Pass 1 | |
420 // Scan each async call and make the basic structure: | |
421 // All these will be cloned into the callback functions | |
422 // - allocate the async context before calling an async function | |
423 // - check async right after calling an async function, save context & return
if async, continue if not | |
424 // - retrieve the async return value and free the async context if the called
function turns out to be sync | |
425 std::vector<AsyncCallEntry> AsyncCallEntries; | |
426 AsyncCallEntries.reserve(AsyncCalls.size()); | |
427 for (Instructions::const_iterator I = AsyncCalls.begin(), E = AsyncCalls.end()
; I != E; ++I) { | |
428 // prepare blocks | |
429 Instruction *CurAsyncCall = *I; | |
430 | |
431 // The block containing the async call | |
432 BasicBlock *CurBlock = CurAsyncCall->getParent(); | |
433 // The block should run after the async call | |
434 BasicBlock *AfterCallBlock = SplitBlock(CurBlock, CurAsyncCall->getNextNode(
)); | |
435 // The block where we store the context and return | |
436 BasicBlock *SaveAsyncCtxBlock = BasicBlock::Create(TheModule->getContext(),
"SaveAsyncCtx", &F, AfterCallBlock); | |
437 // return a dummy value at the end, to make the block valid | |
438 new UnreachableInst(TheModule->getContext(), SaveAsyncCtxBlock); | |
439 | |
440 // allocate the context before making the call | |
441 // we don't know the size yet, will fix it later | |
442 // we cannot insert the instruction later because, | |
443 // we need to make sure that all the instructions and blocks are fixed befor
e we can generate DT and find context variables | |
444 // In CallHandler.h `sp` will be put as the second parameter | |
445 // such that we can take a note of the original sp | |
446 CallInst *AllocAsyncCtxInst = CallInst::Create(AllocAsyncCtxFunction, Consta
nt::getNullValue(I32), "AsyncCtx", CurAsyncCall); | |
447 | |
448 // Right after the call | |
449 // check async and return if so | |
450 // TODO: we can define truly async functions and partial async functions | |
451 { | |
452 // remove old terminator, which came from SplitBlock | |
453 CurBlock->getTerminator()->eraseFromParent(); | |
454 // go to SaveAsyncCtxBlock if the previous call is async | |
455 // otherwise just continue to AfterCallBlock | |
456 CallInst *CheckAsync = CallInst::Create(CheckAsyncFunction, "IsAsync", Cur
Block); | |
457 BranchInst::Create(SaveAsyncCtxBlock, AfterCallBlock, CheckAsync, CurBlock
); | |
458 } | |
459 | |
460 // take a note of this async call | |
461 AsyncCallEntry CurAsyncCallEntry; | |
462 CurAsyncCallEntry.AsyncCallInst = CurAsyncCall; | |
463 CurAsyncCallEntry.AfterCallBlock = AfterCallBlock; | |
464 CurAsyncCallEntry.AllocAsyncCtxInst = AllocAsyncCtxInst; | |
465 CurAsyncCallEntry.SaveAsyncCtxBlock = SaveAsyncCtxBlock; | |
466 // create an empty function for the callback, which will be constructed late
r | |
467 CurAsyncCallEntry.CallbackFunc = Function::Create(CallbackFunctionType, F.ge
tLinkage(), F.getName() + "__async_cb", TheModule); | |
468 AsyncCallEntries.push_back(CurAsyncCallEntry); | |
469 } | |
470 | |
471 | |
472 // Pass 2 | |
473 // analyze the context variables and construct SaveAsyncCtxBlock for each asyn
c call | |
474 // also calculate the size of the context and allocate the async context accor
dingly | |
475 for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE =
AsyncCallEntries.end(); EI != EE; ++EI) { | |
476 AsyncCallEntry & CurEntry = *EI; | |
477 | |
478 // Collect everything to be saved | |
479 FindContextVariables(CurEntry); | |
480 | |
481 // Pack the variables as a struct | |
482 { | |
483 // TODO: sort them from large memeber to small ones, in order to make the
struct compact even when aligned | |
484 SmallVector<Type*, 8> Types; | |
485 Types.push_back(CallbackFunctionType->getPointerTo()); | |
486 for (Values::iterator VI = CurEntry.ContextVariables.begin(), VE = CurEntr
y.ContextVariables.end(); VI != VE; ++VI) { | |
487 Types.push_back((*VI)->getType()); | |
488 } | |
489 CurEntry.ContextStructType = StructType::get(TheModule->getContext(), Type
s); | |
490 } | |
491 | |
492 // fix the size of allocation | |
493 CurEntry.AllocAsyncCtxInst->setOperand(0, | |
494 ConstantInt::get(I32, DL->getTypeStoreSize(CurEntry.ContextStructType)))
; | |
495 | |
496 // construct SaveAsyncCtxBlock | |
497 { | |
498 // fill in SaveAsyncCtxBlock | |
499 // temporarily remove the terminator for convenience | |
500 CurEntry.SaveAsyncCtxBlock->getTerminator()->eraseFromParent(); | |
501 assert(CurEntry.SaveAsyncCtxBlock->empty()); | |
502 | |
503 Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo(); | |
504 BitCastInst *AsyncCtxAddr = new BitCastInst(CurEntry.AllocAsyncCtxInst, As
yncCtxAddrTy, "AsyncCtxAddr", CurEntry.SaveAsyncCtxBlock); | |
505 SmallVector<Value*, 2> Indices; | |
506 // store the callback | |
507 { | |
508 Indices.push_back(ConstantInt::get(I32, 0)); | |
509 Indices.push_back(ConstantInt::get(I32, 0)); | |
510 GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddr
Ty, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); | |
511 new StoreInst(CurEntry.CallbackFunc, AsyncVarAddr, CurEntry.SaveAsyncCtx
Block); | |
512 } | |
513 // store the context variables | |
514 for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { | |
515 Indices.clear(); | |
516 Indices.push_back(ConstantInt::get(I32, 0)); | |
517 Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element is t
he callback function | |
518 GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddr
Ty, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); | |
519 new StoreInst(CurEntry.ContextVariables[i], AsyncVarAddr, CurEntry.SaveA
syncCtxBlock); | |
520 } | |
521 // to exit the block, we want to return without unwinding the stack frame | |
522 CallInst::Create(DoNotUnwindFunction, "", CurEntry.SaveAsyncCtxBlock); | |
523 ReturnInst::Create(TheModule->getContext(), | |
524 (F.getReturnType()->isVoidTy() ? 0 : Constant::getNullValue(F.getRetur
nType())), | |
525 CurEntry.SaveAsyncCtxBlock); | |
526 } | |
527 } | |
528 | |
529 // Pass 3 | |
530 // now all the SaveAsyncCtxBlock's have been constructed | |
531 // we can clone F and construct callback functions | |
532 // we could not construct the callbacks in Pass 2 because we need _all_ those
SaveAsyncCtxBlock's appear in _each_ callback | |
533 for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE =
AsyncCallEntries.end(); EI != EE; ++EI) { | |
534 AsyncCallEntry & CurEntry = *EI; | |
535 | |
536 Function *CurCallbackFunc = CurEntry.CallbackFunc; | |
537 ValueToValueMapTy VMap; | |
538 | |
539 // Add the entry block | |
540 // load variables from the context | |
541 // also update VMap for CloneFunction | |
542 BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "AsyncC
allbackEntry", CurCallbackFunc); | |
543 std::vector<LoadInst *> LoadedAsyncVars; | |
544 { | |
545 Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo(); | |
546 BitCastInst *AsyncCtxAddr = new BitCastInst(CurCallbackFunc->arg_begin(),
AsyncCtxAddrTy, "AsyncCtx", EntryBlock); | |
547 SmallVector<Value*, 2> Indices; | |
548 for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { | |
549 Indices.clear(); | |
550 Indices.push_back(ConstantInt::get(I32, 0)); | |
551 Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element of A
syncCtx is the callback function | |
552 GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddr
Ty, AsyncCtxAddr, Indices, "", EntryBlock); | |
553 LoadedAsyncVars.push_back(new LoadInst(AsyncVarAddr, "", EntryBlock)); | |
554 // we want the argument to be replaced by the loaded value | |
555 if (isa<Argument>(CurEntry.ContextVariables[i])) | |
556 VMap[CurEntry.ContextVariables[i]] = LoadedAsyncVars.back(); | |
557 } | |
558 } | |
559 | |
560 // we don't need any argument, just leave dummy entries there to cheat Clone
FunctionInto | |
561 for (Function::const_arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI !
= AE; ++AI) { | |
562 if (VMap.count(AI) == 0) | |
563 VMap[AI] = Constant::getNullValue(AI->getType()); | |
564 } | |
565 | |
566 // Clone the function | |
567 { | |
568 SmallVector<ReturnInst*, 8> Returns; | |
569 CloneFunctionInto(CurCallbackFunc, &F, VMap, false, Returns); | |
570 | |
571 // return type of the callback functions is always void | |
572 // need to fix the return type | |
573 if (!F.getReturnType()->isVoidTy()) { | |
574 // for those return instructions that are from the original function | |
575 // it means we are 'truly' leaving this function | |
576 // need to store the return value right before ruturn | |
577 for (size_t i = 0; i < OrigReturns.size(); ++i) { | |
578 ReturnInst *RI = cast<ReturnInst>(VMap[OrigReturns[i]]); | |
579 // Need to store the return value into the global area | |
580 CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunc
tion, "", RI); | |
581 BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, F.getReturnTy
pe()->getPointerTo(), "AsyncRetValAddr", RI); | |
582 new StoreInst(RI->getOperand(0), RetValAddr, RI); | |
583 } | |
584 // we want to unwind the stack back to where it was before the original
function as called | |
585 // but we don't actually need to do this here | |
586 // at this point it must be true that no callback is pended | |
587 // so the scheduler will correct the stack pointer and pop the frame | |
588 // here we just fix the return type | |
589 for (size_t i = 0; i < Returns.size(); ++i) { | |
590 ReplaceInstWithInst(Returns[i], ReturnInst::Create(TheModule->getConte
xt())); | |
591 } | |
592 } | |
593 } | |
594 | |
595 // the callback function does not have any return value | |
596 // so clear all the attributes for return | |
597 { | |
598 AttributeSet Attrs = CurCallbackFunc->getAttributes(); | |
599 CurCallbackFunc->setAttributes( | |
600 Attrs.removeAttributes(TheModule->getContext(), AttributeSet::ReturnInde
x, Attrs.getRetAttributes()) | |
601 ); | |
602 } | |
603 | |
604 // in the callback function, we never allocate a new async frame | |
605 // instead we reuse the existing one | |
606 for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE
= AsyncCallEntries.end(); EI != EE; ++EI) { | |
607 Instruction *I = cast<Instruction>(VMap[EI->AllocAsyncCtxInst]); | |
608 ReplaceInstWithInst(I, CallInst::Create(ReallocAsyncCtxFunction, I->getOpe
rand(0), "ReallocAsyncCtx")); | |
609 } | |
610 | |
611 // mapped entry point & async call | |
612 BasicBlock *ResumeBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]); | |
613 Instruction *MappedAsyncCall = cast<Instruction>(VMap[CurEntry.AsyncCallInst
]); | |
614 | |
615 // To save space, for each async call in the callback function, we just igno
re the sync case, and leave it to the scheduler | |
616 // TODO need an option for this | |
617 { | |
618 for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(),
EE = AsyncCallEntries.end(); EI != EE; ++EI) { | |
619 AsyncCallEntry & CurEntry = *EI; | |
620 Instruction *MappedAsyncCallInst = cast<Instruction>(VMap[CurEntry.Async
CallInst]); | |
621 BasicBlock *MappedAsyncCallBlock = MappedAsyncCallInst->getParent(); | |
622 BasicBlock *MappedAfterCallBlock = cast<BasicBlock>(VMap[CurEntry.AfterC
allBlock]); | |
623 | |
624 // for the sync case of the call, go to NewBlock (instead of MappedAfter
CallBlock) | |
625 BasicBlock *NewBlock = BasicBlock::Create(TheModule->getContext(), "", C
urCallbackFunc, MappedAfterCallBlock); | |
626 MappedAsyncCallBlock->getTerminator()->setSuccessor(1, NewBlock); | |
627 // store the return value | |
628 if (!MappedAsyncCallInst->use_empty()) { | |
629 CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunc
tion, "", NewBlock); | |
630 BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCa
llInst->getType()->getPointerTo(), "AsyncRetValAddr", NewBlock); | |
631 new StoreInst(MappedAsyncCallInst, RetValAddr, NewBlock); | |
632 } | |
633 // tell the scheduler that we want to keep the current async stack frame | |
634 CallInst::Create(DoNotUnwindAsyncFunction, "", NewBlock); | |
635 // finally we go to the SaveAsyncCtxBlock, to register the callbac, save
the local variables and leave | |
636 BasicBlock *MappedSaveAsyncCtxBlock = cast<BasicBlock>(VMap[CurEntry.Sav
eAsyncCtxBlock]); | |
637 BranchInst::Create(MappedSaveAsyncCtxBlock, NewBlock); | |
638 } | |
639 } | |
640 | |
641 std::vector<AllocaInst*> ToPromote; | |
642 // applying loaded variables in the entry block | |
643 { | |
644 BasicBlockSet ReachableBlocks = FindReachableBlocksFrom(ResumeBlock); | |
645 for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { | |
646 Value *OrigVar = CurEntry.ContextVariables[i]; | |
647 if (isa<Argument>(OrigVar)) continue; // already processed | |
648 Value *CurVar = VMap[OrigVar]; | |
649 assert(CurVar != MappedAsyncCall); | |
650 if (Instruction *Inst = dyn_cast<Instruction>(CurVar)) { | |
651 if (ReachableBlocks.count(Inst->getParent())) { | |
652 // Inst could be either defined or loaded from the async context | |
653 // Do the dirty works in memory | |
654 // TODO: might need to check the safety first | |
655 // TODO: can we create phi directly? | |
656 AllocaInst *Addr = DemoteRegToStack(*Inst, false); | |
657 new StoreInst(LoadedAsyncVars[i], Addr, EntryBlock); | |
658 ToPromote.push_back(Addr); | |
659 } else { | |
660 // The parent block is not reachable, which means there is no confli
ction | |
661 // it's safe to replace Inst with the loaded value | |
662 assert(Inst != LoadedAsyncVars[i]); // this should only happen when
OrigVar is an Argument | |
663 Inst->replaceAllUsesWith(LoadedAsyncVars[i]); | |
664 } | |
665 } | |
666 } | |
667 } | |
668 | |
669 // resolve the return value of the previous async function | |
670 // it could be the value just loaded from the global area | |
671 // or directly returned by the function (in its sync case) | |
672 if (!CurEntry.AsyncCallInst->use_empty()) { | |
673 // load the async return value | |
674 CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction
, "", EntryBlock); | |
675 BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCall->
getType()->getPointerTo(), "AsyncRetValAddr", EntryBlock); | |
676 LoadInst *RetVal = new LoadInst(RetValAddr, "AsyncRetVal", EntryBlock); | |
677 | |
678 AllocaInst *Addr = DemoteRegToStack(*MappedAsyncCall, false); | |
679 new StoreInst(RetVal, Addr, EntryBlock); | |
680 ToPromote.push_back(Addr); | |
681 } | |
682 | |
683 // TODO remove unreachable blocks before creating phi | |
684 | |
685 // We go right to ResumeBlock from the EntryBlock | |
686 BranchInst::Create(ResumeBlock, EntryBlock); | |
687 | |
688 /* | |
689 * Creating phi's | |
690 * Normal stack frames and async stack frames are interleaving with each oth
er. | |
691 * In a callback function, if we call an async function, we might need to re
alloc the async ctx. | |
692 * at this point we don't want anything stored after the ctx, | |
693 * such that we can free and extend the ctx by simply update STACKTOP. | |
694 * Therefore we don't want any alloca's in callback functions. | |
695 * | |
696 */ | |
697 if (!ToPromote.empty()) { | |
698 DominatorTreeWrapperPass DTW; | |
699 DTW.runOnFunction(*CurCallbackFunc); | |
700 PromoteMemToReg(ToPromote, DTW.getDomTree()); | |
701 } | |
702 | |
703 removeUnreachableBlocks(*CurCallbackFunc); | |
704 } | |
705 | |
706 // Pass 4 | |
707 // Here are modifications to the original function, which we won't want to be
cloned into the callback functions | |
708 for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE =
AsyncCallEntries.end(); EI != EE; ++EI) { | |
709 AsyncCallEntry & CurEntry = *EI; | |
710 // remove the frame if no async functinon has been called | |
711 CallInst::Create(FreeAsyncCtxFunction, CurEntry.AllocAsyncCtxInst, "", CurEn
try.AfterCallBlock->getFirstNonPHI()); | |
712 } | |
713 } | |
714 | |
715 bool LowerEmAsyncify::IsFunctionPointerCall(const Instruction *I) { | |
716 // mostly from CallHandler.h | |
717 ImmutableCallSite CS(I); | |
718 if (!CS) return false; // not call nor invoke | |
719 const Value *CV = CS.getCalledValue()->stripPointerCasts(); | |
720 return !isa<const Function>(CV); | |
721 } | |
722 | |
723 ModulePass *llvm::createLowerEmAsyncifyPass() { | |
724 return new LowerEmAsyncify(); | |
725 } | |
OLD | NEW |