| OLD | NEW |
| (Empty) |
| 1 //===- LowerEmAsyncify - transform asynchronous functions for Emscripten/JS --
---------===// | |
| 2 // | |
| 3 // The LLVM Compiler Infrastructure | |
| 4 // | |
| 5 // This file is distributed under the University of Illinois Open Source | |
| 6 // License. See LICENSE.TXT for details. | |
| 7 // | |
| 8 //===----------------------------------------------------------------------===// | |
| 9 // | |
| 10 // Lu Wang <coolwanglu@gmail.com> | |
| 11 // | |
| 12 // In JS we don't have functions like sleep(), which is on the other hand very p
opuar in C/C++ etc. | |
| 13 // This pass tries to convert funcitons calling sleep() into a valid form in Jav
aScript | |
| 14 // The basic idea is to split the callee at the place where sleep() is called, | |
| 15 // then the first half may schedule the second half using setTimeout. | |
| 16 // But we need to pay lots of attention to analyzing/saving/restoring context va
riables and return values | |
| 17 // | |
| 18 //===----------------------------------------------------------------------===// | |
| 19 | |
| 20 #include "llvm/ADT/SmallPtrSet.h" | |
| 21 #include "llvm/ADT/DenseMap.h" | |
| 22 #include "llvm/IR/Dominators.h" | |
| 23 #include "llvm/IR/DataLayout.h" | |
| 24 #include "llvm/IR/Instructions.h" | |
| 25 #include "llvm/IR/Function.h" | |
| 26 #include "llvm/IR/Module.h" | |
| 27 #include "llvm/IR/Value.h" | |
| 28 #include "llvm/IR/CallSite.h" | |
| 29 #include "llvm/Support/CommandLine.h" | |
| 30 #include "llvm/IR/InstIterator.h" | |
| 31 #include "llvm/Transforms/NaCl.h" | |
| 32 #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |
| 33 #include "llvm/Transforms/Utils/Cloning.h" | |
| 34 #include "llvm/Transforms/Utils/Local.h" // for DemoteRegToStack, removeUnreacha
bleBlocks | |
| 35 #include "llvm/Transforms/Utils/PromoteMemToReg.h" // for PromoteMemToReg | |
| 36 #include "llvm/Transforms/Utils/ValueMapper.h" | |
| 37 #include "llvm/Pass.h" | |
| 38 | |
| 39 #include <vector> | |
| 40 | |
| 41 #ifdef NDEBUG | |
| 42 #undef assert | |
| 43 #define assert(x) { if (!(x)) report_fatal_error(#x); } | |
| 44 #endif | |
| 45 | |
| 46 using namespace llvm; | |
| 47 | |
| 48 static cl::list<std::string> | |
| 49 AsyncifyFunctions("emscripten-asyncify-functions", | |
| 50 cl::desc("Functions that call one of these functions, directly
or indirectly, will be asyncified"), | |
| 51 cl::CommaSeparated); | |
| 52 | |
| 53 static cl::list<std::string> | |
| 54 AsyncifyWhiteList("emscripten-asyncify-whitelist", | |
| 55 cl::desc("Functions that should not be asyncified"), | |
| 56 cl::CommaSeparated); | |
| 57 | |
| 58 namespace { | |
| 59 class LowerEmAsyncify: public ModulePass { | |
| 60 Module *TheModule; | |
| 61 | |
| 62 public: | |
| 63 static char ID; // Pass identification, replacement for typeid | |
| 64 explicit LowerEmAsyncify() : ModulePass(ID), TheModule(NULL) { | |
| 65 initializeLowerEmAsyncifyPass(*PassRegistry::getPassRegistry()); | |
| 66 } | |
| 67 virtual ~LowerEmAsyncify() { } | |
| 68 bool runOnModule(Module &M); | |
| 69 | |
| 70 private: | |
| 71 const DataLayout *DL; | |
| 72 | |
| 73 Type *Void, *I1, *I32, *I32Ptr; | |
| 74 FunctionType *VFunction, *I1Function, *I32PFunction; | |
| 75 FunctionType *VI32PFunction, *I32PI32Function; | |
| 76 FunctionType *CallbackFunctionType; | |
| 77 | |
| 78 Function *AllocAsyncCtxFunction, *ReallocAsyncCtxFunction, *FreeAsyncCtxFunc
tion; | |
| 79 Function *CheckAsyncFunction; | |
| 80 Function *DoNotUnwindFunction, *DoNotUnwindAsyncFunction; | |
| 81 Function *GetAsyncReturnValueAddrFunction; | |
| 82 | |
| 83 void initTypesAndFunctions(void); | |
| 84 | |
| 85 typedef std::vector<Instruction *> Instructions; | |
| 86 typedef DenseMap<Function*, Instructions> FunctionInstructionsMap; | |
| 87 typedef std::vector<Value*> Values; | |
| 88 typedef SmallPtrSet<BasicBlock*, 16> BasicBlockSet; | |
| 89 | |
| 90 // all the information we want for an async call | |
| 91 struct AsyncCallEntry { | |
| 92 Instruction *AsyncCallInst; // calling an async function | |
| 93 BasicBlock *AfterCallBlock; // the block we should continue on after getti
ng the return value of AsynCallInst | |
| 94 CallInst *AllocAsyncCtxInst; // where we allocate the async ctx before th
e async call, in the original function | |
| 95 Values ContextVariables; // those need to be saved and restored for the as
ync call | |
| 96 StructType *ContextStructType; // The structure constructing all the conte
xt variables | |
| 97 BasicBlock *SaveAsyncCtxBlock; // the block in which we save all the varia
bles | |
| 98 Function *CallbackFunc; // the callback function for this async call, whic
h is converted from the original function | |
| 99 }; | |
| 100 | |
| 101 BasicBlockSet FindReachableBlocksFrom(BasicBlock *src); | |
| 102 | |
| 103 // Find everything that we should save and restore for the async call | |
| 104 // save them to Entry.ContextVariables | |
| 105 void FindContextVariables(AsyncCallEntry & Entry); | |
| 106 | |
| 107 // The essential function | |
| 108 // F is now in the sync form, transform it into an async form that is valid
in JS | |
| 109 void transformAsyncFunction(Function &F, Instructions const& AsyncCalls); | |
| 110 | |
| 111 bool IsFunctionPointerCall(const Instruction *I); | |
| 112 }; | |
| 113 } | |
| 114 | |
| 115 char LowerEmAsyncify::ID = 0; | |
| 116 INITIALIZE_PASS(LowerEmAsyncify, "loweremasyncify", | |
| 117 "Lower async functions for js/emscripten", | |
| 118 false, false) | |
| 119 | |
| 120 bool LowerEmAsyncify::runOnModule(Module &M) { | |
| 121 TheModule = &M; | |
| 122 DL = &M.getDataLayout(); | |
| 123 | |
| 124 std::set<std::string> WhiteList(AsyncifyWhiteList.begin(), AsyncifyWhiteList.e
nd()); | |
| 125 | |
| 126 /* | |
| 127 * collect all the functions that should be asyncified | |
| 128 * any function that _might_ call an async function is also async | |
| 129 */ | |
| 130 std::vector<Function*> AsyncFunctionsPending; | |
| 131 for(unsigned i = 0; i < AsyncifyFunctions.size(); ++i) { | |
| 132 std::string const& AFName = AsyncifyFunctions[i]; | |
| 133 Function *F = TheModule->getFunction(AFName); | |
| 134 if (F && !WhiteList.count(F->getName())) { | |
| 135 AsyncFunctionsPending.push_back(F); | |
| 136 } | |
| 137 } | |
| 138 | |
| 139 // No function needed to transform | |
| 140 if (AsyncFunctionsPending.empty()) return false; | |
| 141 | |
| 142 // Walk through the call graph and find all the async functions | |
| 143 FunctionInstructionsMap AsyncFunctionCalls; | |
| 144 { | |
| 145 // pessimistic: consider all indirect calls as possibly async | |
| 146 // TODO: deduce based on function types | |
| 147 for (Module::iterator FI = TheModule->begin(), FE = TheModule->end(); FI !=
FE; ++FI) { | |
| 148 if (WhiteList.count(FI->getName())) continue; | |
| 149 | |
| 150 bool has_indirect_call = false; | |
| 151 for (inst_iterator I = inst_begin(FI), E = inst_end(FI); I != E; ++I) { | |
| 152 if (IsFunctionPointerCall(&*I)) { | |
| 153 has_indirect_call = true; | |
| 154 AsyncFunctionCalls[FI].push_back(&*I); | |
| 155 } | |
| 156 } | |
| 157 | |
| 158 if (has_indirect_call) AsyncFunctionsPending.push_back(FI); | |
| 159 } | |
| 160 | |
| 161 while (!AsyncFunctionsPending.empty()) { | |
| 162 Function *CurFunction = AsyncFunctionsPending.back(); | |
| 163 AsyncFunctionsPending.pop_back(); | |
| 164 | |
| 165 for (Value::user_iterator UI = CurFunction->user_begin(), E = CurFunction-
>user_end(); UI != E; ++UI) { | |
| 166 ImmutableCallSite ICS(*UI); | |
| 167 if (!ICS) continue; | |
| 168 // we only need those instructions calling the function | |
| 169 // if the function address is used for other purpose, we don't care | |
| 170 if (CurFunction != ICS.getCalledValue()->stripPointerCasts()) continue; | |
| 171 // Now I is either CallInst or InvokeInst | |
| 172 Instruction *I = cast<Instruction>(*UI); | |
| 173 Function *F = I->getParent()->getParent(); | |
| 174 if (AsyncFunctionCalls.count(F) == 0) { | |
| 175 AsyncFunctionsPending.push_back(F); | |
| 176 } | |
| 177 AsyncFunctionCalls[F].push_back(I); | |
| 178 } | |
| 179 } | |
| 180 } | |
| 181 | |
| 182 // exit if no async function is found at all | |
| 183 if (AsyncFunctionCalls.empty()) return false; | |
| 184 | |
| 185 initTypesAndFunctions(); | |
| 186 | |
| 187 for (FunctionInstructionsMap::iterator I = AsyncFunctionCalls.begin(), E = Asy
ncFunctionCalls.end(); | |
| 188 I != E; ++I) { | |
| 189 transformAsyncFunction(*(I->first), I->second); | |
| 190 } | |
| 191 | |
| 192 return true; | |
| 193 } | |
| 194 | |
| 195 void LowerEmAsyncify::initTypesAndFunctions(void) { | |
| 196 // Data types | |
| 197 Void = Type::getVoidTy(TheModule->getContext()); | |
| 198 I1 = Type::getInt1Ty(TheModule->getContext()); | |
| 199 I32 = Type::getInt32Ty(TheModule->getContext()); | |
| 200 I32Ptr = Type::getInt32PtrTy(TheModule->getContext()); | |
| 201 | |
| 202 // Function types | |
| 203 SmallVector<Type*, 2> ArgTypes; | |
| 204 VFunction = FunctionType::get(Void, false); | |
| 205 I1Function = FunctionType::get(I1, false); | |
| 206 I32PFunction = FunctionType::get(I32Ptr, false); | |
| 207 | |
| 208 ArgTypes.clear(); | |
| 209 ArgTypes.push_back(I32Ptr); | |
| 210 VI32PFunction = FunctionType::get(Void, ArgTypes, false); | |
| 211 | |
| 212 ArgTypes.clear(); | |
| 213 ArgTypes.push_back(I32); | |
| 214 I32PI32Function = FunctionType::get(I32Ptr, ArgTypes, false); | |
| 215 | |
| 216 CallbackFunctionType = VI32PFunction; | |
| 217 | |
| 218 // Functions | |
| 219 CheckAsyncFunction = Function::Create( | |
| 220 I1Function, | |
| 221 GlobalValue::ExternalLinkage, | |
| 222 "emscripten_check_async", | |
| 223 TheModule | |
| 224 ); | |
| 225 | |
| 226 AllocAsyncCtxFunction = Function::Create( | |
| 227 I32PI32Function, | |
| 228 GlobalValue::ExternalLinkage, | |
| 229 "emscripten_alloc_async_context", | |
| 230 TheModule | |
| 231 ); | |
| 232 | |
| 233 ReallocAsyncCtxFunction = Function::Create( | |
| 234 I32PI32Function, | |
| 235 GlobalValue::ExternalLinkage, | |
| 236 "emscripten_realloc_async_context", | |
| 237 TheModule | |
| 238 ); | |
| 239 | |
| 240 FreeAsyncCtxFunction = Function::Create( | |
| 241 VI32PFunction, | |
| 242 GlobalValue::ExternalLinkage, | |
| 243 "emscripten_free_async_context", | |
| 244 TheModule | |
| 245 ); | |
| 246 | |
| 247 DoNotUnwindFunction = Function::Create( | |
| 248 VFunction, | |
| 249 GlobalValue::ExternalLinkage, | |
| 250 "emscripten_do_not_unwind", | |
| 251 TheModule | |
| 252 ); | |
| 253 | |
| 254 DoNotUnwindAsyncFunction = Function::Create( | |
| 255 VFunction, | |
| 256 GlobalValue::ExternalLinkage, | |
| 257 "emscripten_do_not_unwind_async", | |
| 258 TheModule | |
| 259 ); | |
| 260 | |
| 261 GetAsyncReturnValueAddrFunction = Function::Create( | |
| 262 I32PFunction, | |
| 263 GlobalValue::ExternalLinkage, | |
| 264 "emscripten_get_async_return_value_addr", | |
| 265 TheModule | |
| 266 ); | |
| 267 } | |
| 268 | |
| 269 LowerEmAsyncify::BasicBlockSet LowerEmAsyncify::FindReachableBlocksFrom(BasicBlo
ck *src) { | |
| 270 BasicBlockSet ReachableBlockSet; | |
| 271 std::vector<BasicBlock*> pending; | |
| 272 ReachableBlockSet.insert(src); | |
| 273 pending.push_back(src); | |
| 274 while (!pending.empty()) { | |
| 275 BasicBlock *CurBlock = pending.back(); | |
| 276 pending.pop_back(); | |
| 277 for (succ_iterator SI = succ_begin(CurBlock), SE = succ_end(CurBlock); SI !=
SE; ++SI) { | |
| 278 if (ReachableBlockSet.count(*SI) == 0) { | |
| 279 ReachableBlockSet.insert(*SI); | |
| 280 pending.push_back(*SI); | |
| 281 } | |
| 282 } | |
| 283 } | |
| 284 return ReachableBlockSet; | |
| 285 } | |
| 286 | |
| 287 void LowerEmAsyncify::FindContextVariables(AsyncCallEntry & Entry) { | |
| 288 BasicBlock *AfterCallBlock = Entry.AfterCallBlock; | |
| 289 | |
| 290 Function & F = *AfterCallBlock->getParent(); | |
| 291 | |
| 292 // Create a new entry block as if in the callback function | |
| 293 // theck check variables that no longer properly dominate their uses | |
| 294 BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "", &F, &
F.getEntryBlock()); | |
| 295 BranchInst::Create(AfterCallBlock, EntryBlock); | |
| 296 | |
| 297 DominatorTreeWrapperPass DTW; | |
| 298 DTW.runOnFunction(F); | |
| 299 DominatorTree& DT = DTW.getDomTree(); | |
| 300 | |
| 301 // These blocks may be using some values defined at or before AsyncCallBlock | |
| 302 BasicBlockSet Ramifications = FindReachableBlocksFrom(AfterCallBlock); | |
| 303 | |
| 304 SmallPtrSet<Value*, 256> ContextVariables; | |
| 305 Values Pending; | |
| 306 | |
| 307 // Examine the instructions, find all variables that we need to store in the c
ontext | |
| 308 for (BasicBlockSet::iterator RI = Ramifications.begin(), RE = Ramifications.en
d(); RI != RE; ++RI) { | |
| 309 for (BasicBlock::iterator I = (*RI)->begin(), E = (*RI)->end(); I != E; ++I)
{ | |
| 310 for (unsigned i = 0, NumOperands = I->getNumOperands(); i < NumOperands; +
+i) { | |
| 311 Value *O = I->getOperand(i); | |
| 312 if (Instruction *Inst = dyn_cast<Instruction>(O)) { | |
| 313 if (Inst == Entry.AsyncCallInst) continue; // for the original async c
all, we will load directly from async return value | |
| 314 if (ContextVariables.count(Inst) != 0) continue; // already examined | |
| 315 | |
| 316 if (!DT.dominates(Inst, I->getOperandUse(i))) { | |
| 317 // `I` is using `Inst`, yet `Inst` does not dominate `I` if we arriv
e directly at AfterCallBlock | |
| 318 // so we need to save `Inst` in the context | |
| 319 ContextVariables.insert(Inst); | |
| 320 Pending.push_back(Inst); | |
| 321 } | |
| 322 } else if (Argument *Arg = dyn_cast<Argument>(O)) { | |
| 323 // count() should be as fast/slow as insert, so just insert here | |
| 324 ContextVariables.insert(Arg); | |
| 325 } | |
| 326 } | |
| 327 } | |
| 328 } | |
| 329 | |
| 330 // restore F | |
| 331 EntryBlock->eraseFromParent(); | |
| 332 | |
| 333 Entry.ContextVariables.clear(); | |
| 334 Entry.ContextVariables.reserve(ContextVariables.size()); | |
| 335 for (SmallPtrSet<Value*, 256>::iterator I = ContextVariables.begin(), E = Cont
extVariables.end(); I != E; ++I) { | |
| 336 Entry.ContextVariables.push_back(*I); | |
| 337 } | |
| 338 } | |
| 339 | |
| 340 /* | |
| 341 * Consider that F contains a call to G, both of which are async: | |
| 342 * | |
| 343 * function F: | |
| 344 * ... | |
| 345 * %0 = G(%1, %2, ...); | |
| 346 * ... | |
| 347 * return %%; | |
| 348 * | |
| 349 * We want to convert F and generate F__asyn_cb | |
| 350 * they are similar, but with minor yet important differences | |
| 351 * Note those `main func only` and `callback func only` instructions | |
| 352 | |
| 353 ////////////////////////////////////////////////////////// | |
| 354 function F: | |
| 355 ... | |
| 356 ctx = alloc_ctx(len, sp); // main func only | |
| 357 // TODO | |
| 358 // we could also do this only after an async call | |
| 359 // but in that case we will need to pass ctx to the fun
ction | |
| 360 // since ctx is no longer in the top async stack frame | |
| 361 %0 = G(%1, %2, ...); | |
| 362 if (async) { // G was async | |
| 363 save context variables in ctx | |
| 364 register F.async_cb as the callback in frame | |
| 365 return without unwinding the stack frame | |
| 366 } else { // G was sync | |
| 367 // use %0 as normal | |
| 368 free_ctx(ctx); // main func only | |
| 369 // ctx is freed here, because so far F is still a sync function | |
| 370 // and we don't want any side effects | |
| 371 ... | |
| 372 async return value = %%; | |
| 373 return & normally unwind the stack frame // main func only | |
| 374 } | |
| 375 ////////////////////////////////////////////////////////// | |
| 376 | |
| 377 * And here's F.async_cb | |
| 378 | |
| 379 ////////////////////////////////////////////////////////// | |
| 380 function F.async_cb(ctx): | |
| 381 load variables from ctx // callback func only | |
| 382 goto resume_point; // callback func only | |
| 383 ... | |
| 384 ctx = realloc_ctx(len); // callback func only | |
| 385 // realloc_ctx is different from alloc_ctx | |
| 386 // which reused the current async stack frame | |
| 387 // we want to keep the saved stack pointer | |
| 388 %0 = G(%1, %2, ...); | |
| 389 if (async) { | |
| 390 save context variables in ctx | |
| 391 register F.async_cb as the callback | |
| 392 return without unwinding the stack frame | |
| 393 } else { | |
| 394 resume_point: | |
| 395 %0'= either $0 or the async return value // callback func only | |
| 396 ... | |
| 397 async return value = %% | |
| 398 return restore the stack pointer back to the value stored in F // callback f
unc only | |
| 399 // no need to free the ctx | |
| 400 // the scheduler will be aware of this return and handle the stack frames | |
| 401 } | |
| 402 ////////////////////////////////////////////////////////// | |
| 403 | |
| 404 */ | |
| 405 | |
| 406 void LowerEmAsyncify::transformAsyncFunction(Function &F, Instructions const& As
yncCalls) { | |
| 407 assert(!AsyncCalls.empty()); | |
| 408 | |
| 409 // Pass 0 | |
| 410 // collect all the return instructions from the original function | |
| 411 // will use later | |
| 412 std::vector<ReturnInst*> OrigReturns; | |
| 413 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { | |
| 414 if (ReturnInst *RI = dyn_cast<ReturnInst>(&*I)) { | |
| 415 OrigReturns.push_back(RI); | |
| 416 } | |
| 417 } | |
| 418 | |
| 419 // Pass 1 | |
| 420 // Scan each async call and make the basic structure: | |
| 421 // All these will be cloned into the callback functions | |
| 422 // - allocate the async context before calling an async function | |
| 423 // - check async right after calling an async function, save context & return
if async, continue if not | |
| 424 // - retrieve the async return value and free the async context if the called
function turns out to be sync | |
| 425 std::vector<AsyncCallEntry> AsyncCallEntries; | |
| 426 AsyncCallEntries.reserve(AsyncCalls.size()); | |
| 427 for (Instructions::const_iterator I = AsyncCalls.begin(), E = AsyncCalls.end()
; I != E; ++I) { | |
| 428 // prepare blocks | |
| 429 Instruction *CurAsyncCall = *I; | |
| 430 | |
| 431 // The block containing the async call | |
| 432 BasicBlock *CurBlock = CurAsyncCall->getParent(); | |
| 433 // The block should run after the async call | |
| 434 BasicBlock *AfterCallBlock = SplitBlock(CurBlock, CurAsyncCall->getNextNode(
)); | |
| 435 // The block where we store the context and return | |
| 436 BasicBlock *SaveAsyncCtxBlock = BasicBlock::Create(TheModule->getContext(),
"SaveAsyncCtx", &F, AfterCallBlock); | |
| 437 // return a dummy value at the end, to make the block valid | |
| 438 new UnreachableInst(TheModule->getContext(), SaveAsyncCtxBlock); | |
| 439 | |
| 440 // allocate the context before making the call | |
| 441 // we don't know the size yet, will fix it later | |
| 442 // we cannot insert the instruction later because, | |
| 443 // we need to make sure that all the instructions and blocks are fixed befor
e we can generate DT and find context variables | |
| 444 // In CallHandler.h `sp` will be put as the second parameter | |
| 445 // such that we can take a note of the original sp | |
| 446 CallInst *AllocAsyncCtxInst = CallInst::Create(AllocAsyncCtxFunction, Consta
nt::getNullValue(I32), "AsyncCtx", CurAsyncCall); | |
| 447 | |
| 448 // Right after the call | |
| 449 // check async and return if so | |
| 450 // TODO: we can define truly async functions and partial async functions | |
| 451 { | |
| 452 // remove old terminator, which came from SplitBlock | |
| 453 CurBlock->getTerminator()->eraseFromParent(); | |
| 454 // go to SaveAsyncCtxBlock if the previous call is async | |
| 455 // otherwise just continue to AfterCallBlock | |
| 456 CallInst *CheckAsync = CallInst::Create(CheckAsyncFunction, "IsAsync", Cur
Block); | |
| 457 BranchInst::Create(SaveAsyncCtxBlock, AfterCallBlock, CheckAsync, CurBlock
); | |
| 458 } | |
| 459 | |
| 460 // take a note of this async call | |
| 461 AsyncCallEntry CurAsyncCallEntry; | |
| 462 CurAsyncCallEntry.AsyncCallInst = CurAsyncCall; | |
| 463 CurAsyncCallEntry.AfterCallBlock = AfterCallBlock; | |
| 464 CurAsyncCallEntry.AllocAsyncCtxInst = AllocAsyncCtxInst; | |
| 465 CurAsyncCallEntry.SaveAsyncCtxBlock = SaveAsyncCtxBlock; | |
| 466 // create an empty function for the callback, which will be constructed late
r | |
| 467 CurAsyncCallEntry.CallbackFunc = Function::Create(CallbackFunctionType, F.ge
tLinkage(), F.getName() + "__async_cb", TheModule); | |
| 468 AsyncCallEntries.push_back(CurAsyncCallEntry); | |
| 469 } | |
| 470 | |
| 471 | |
| 472 // Pass 2 | |
| 473 // analyze the context variables and construct SaveAsyncCtxBlock for each asyn
c call | |
| 474 // also calculate the size of the context and allocate the async context accor
dingly | |
| 475 for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE =
AsyncCallEntries.end(); EI != EE; ++EI) { | |
| 476 AsyncCallEntry & CurEntry = *EI; | |
| 477 | |
| 478 // Collect everything to be saved | |
| 479 FindContextVariables(CurEntry); | |
| 480 | |
| 481 // Pack the variables as a struct | |
| 482 { | |
| 483 // TODO: sort them from large memeber to small ones, in order to make the
struct compact even when aligned | |
| 484 SmallVector<Type*, 8> Types; | |
| 485 Types.push_back(CallbackFunctionType->getPointerTo()); | |
| 486 for (Values::iterator VI = CurEntry.ContextVariables.begin(), VE = CurEntr
y.ContextVariables.end(); VI != VE; ++VI) { | |
| 487 Types.push_back((*VI)->getType()); | |
| 488 } | |
| 489 CurEntry.ContextStructType = StructType::get(TheModule->getContext(), Type
s); | |
| 490 } | |
| 491 | |
| 492 // fix the size of allocation | |
| 493 CurEntry.AllocAsyncCtxInst->setOperand(0, | |
| 494 ConstantInt::get(I32, DL->getTypeStoreSize(CurEntry.ContextStructType)))
; | |
| 495 | |
| 496 // construct SaveAsyncCtxBlock | |
| 497 { | |
| 498 // fill in SaveAsyncCtxBlock | |
| 499 // temporarily remove the terminator for convenience | |
| 500 CurEntry.SaveAsyncCtxBlock->getTerminator()->eraseFromParent(); | |
| 501 assert(CurEntry.SaveAsyncCtxBlock->empty()); | |
| 502 | |
| 503 Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo(); | |
| 504 BitCastInst *AsyncCtxAddr = new BitCastInst(CurEntry.AllocAsyncCtxInst, As
yncCtxAddrTy, "AsyncCtxAddr", CurEntry.SaveAsyncCtxBlock); | |
| 505 SmallVector<Value*, 2> Indices; | |
| 506 // store the callback | |
| 507 { | |
| 508 Indices.push_back(ConstantInt::get(I32, 0)); | |
| 509 Indices.push_back(ConstantInt::get(I32, 0)); | |
| 510 GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddr
Ty, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); | |
| 511 new StoreInst(CurEntry.CallbackFunc, AsyncVarAddr, CurEntry.SaveAsyncCtx
Block); | |
| 512 } | |
| 513 // store the context variables | |
| 514 for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { | |
| 515 Indices.clear(); | |
| 516 Indices.push_back(ConstantInt::get(I32, 0)); | |
| 517 Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element is t
he callback function | |
| 518 GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddr
Ty, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); | |
| 519 new StoreInst(CurEntry.ContextVariables[i], AsyncVarAddr, CurEntry.SaveA
syncCtxBlock); | |
| 520 } | |
| 521 // to exit the block, we want to return without unwinding the stack frame | |
| 522 CallInst::Create(DoNotUnwindFunction, "", CurEntry.SaveAsyncCtxBlock); | |
| 523 ReturnInst::Create(TheModule->getContext(), | |
| 524 (F.getReturnType()->isVoidTy() ? 0 : Constant::getNullValue(F.getRetur
nType())), | |
| 525 CurEntry.SaveAsyncCtxBlock); | |
| 526 } | |
| 527 } | |
| 528 | |
| 529 // Pass 3 | |
| 530 // now all the SaveAsyncCtxBlock's have been constructed | |
| 531 // we can clone F and construct callback functions | |
| 532 // we could not construct the callbacks in Pass 2 because we need _all_ those
SaveAsyncCtxBlock's appear in _each_ callback | |
| 533 for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE =
AsyncCallEntries.end(); EI != EE; ++EI) { | |
| 534 AsyncCallEntry & CurEntry = *EI; | |
| 535 | |
| 536 Function *CurCallbackFunc = CurEntry.CallbackFunc; | |
| 537 ValueToValueMapTy VMap; | |
| 538 | |
| 539 // Add the entry block | |
| 540 // load variables from the context | |
| 541 // also update VMap for CloneFunction | |
| 542 BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "AsyncC
allbackEntry", CurCallbackFunc); | |
| 543 std::vector<LoadInst *> LoadedAsyncVars; | |
| 544 { | |
| 545 Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo(); | |
| 546 BitCastInst *AsyncCtxAddr = new BitCastInst(CurCallbackFunc->arg_begin(),
AsyncCtxAddrTy, "AsyncCtx", EntryBlock); | |
| 547 SmallVector<Value*, 2> Indices; | |
| 548 for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { | |
| 549 Indices.clear(); | |
| 550 Indices.push_back(ConstantInt::get(I32, 0)); | |
| 551 Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element of A
syncCtx is the callback function | |
| 552 GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddr
Ty, AsyncCtxAddr, Indices, "", EntryBlock); | |
| 553 LoadedAsyncVars.push_back(new LoadInst(AsyncVarAddr, "", EntryBlock)); | |
| 554 // we want the argument to be replaced by the loaded value | |
| 555 if (isa<Argument>(CurEntry.ContextVariables[i])) | |
| 556 VMap[CurEntry.ContextVariables[i]] = LoadedAsyncVars.back(); | |
| 557 } | |
| 558 } | |
| 559 | |
| 560 // we don't need any argument, just leave dummy entries there to cheat Clone
FunctionInto | |
| 561 for (Function::const_arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI !
= AE; ++AI) { | |
| 562 if (VMap.count(AI) == 0) | |
| 563 VMap[AI] = Constant::getNullValue(AI->getType()); | |
| 564 } | |
| 565 | |
| 566 // Clone the function | |
| 567 { | |
| 568 SmallVector<ReturnInst*, 8> Returns; | |
| 569 CloneFunctionInto(CurCallbackFunc, &F, VMap, false, Returns); | |
| 570 | |
| 571 // return type of the callback functions is always void | |
| 572 // need to fix the return type | |
| 573 if (!F.getReturnType()->isVoidTy()) { | |
| 574 // for those return instructions that are from the original function | |
| 575 // it means we are 'truly' leaving this function | |
| 576 // need to store the return value right before ruturn | |
| 577 for (size_t i = 0; i < OrigReturns.size(); ++i) { | |
| 578 ReturnInst *RI = cast<ReturnInst>(VMap[OrigReturns[i]]); | |
| 579 // Need to store the return value into the global area | |
| 580 CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunc
tion, "", RI); | |
| 581 BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, F.getReturnTy
pe()->getPointerTo(), "AsyncRetValAddr", RI); | |
| 582 new StoreInst(RI->getOperand(0), RetValAddr, RI); | |
| 583 } | |
| 584 // we want to unwind the stack back to where it was before the original
function as called | |
| 585 // but we don't actually need to do this here | |
| 586 // at this point it must be true that no callback is pended | |
| 587 // so the scheduler will correct the stack pointer and pop the frame | |
| 588 // here we just fix the return type | |
| 589 for (size_t i = 0; i < Returns.size(); ++i) { | |
| 590 ReplaceInstWithInst(Returns[i], ReturnInst::Create(TheModule->getConte
xt())); | |
| 591 } | |
| 592 } | |
| 593 } | |
| 594 | |
| 595 // the callback function does not have any return value | |
| 596 // so clear all the attributes for return | |
| 597 { | |
| 598 AttributeSet Attrs = CurCallbackFunc->getAttributes(); | |
| 599 CurCallbackFunc->setAttributes( | |
| 600 Attrs.removeAttributes(TheModule->getContext(), AttributeSet::ReturnInde
x, Attrs.getRetAttributes()) | |
| 601 ); | |
| 602 } | |
| 603 | |
| 604 // in the callback function, we never allocate a new async frame | |
| 605 // instead we reuse the existing one | |
| 606 for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE
= AsyncCallEntries.end(); EI != EE; ++EI) { | |
| 607 Instruction *I = cast<Instruction>(VMap[EI->AllocAsyncCtxInst]); | |
| 608 ReplaceInstWithInst(I, CallInst::Create(ReallocAsyncCtxFunction, I->getOpe
rand(0), "ReallocAsyncCtx")); | |
| 609 } | |
| 610 | |
| 611 // mapped entry point & async call | |
| 612 BasicBlock *ResumeBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]); | |
| 613 Instruction *MappedAsyncCall = cast<Instruction>(VMap[CurEntry.AsyncCallInst
]); | |
| 614 | |
| 615 // To save space, for each async call in the callback function, we just igno
re the sync case, and leave it to the scheduler | |
| 616 // TODO need an option for this | |
| 617 { | |
| 618 for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(),
EE = AsyncCallEntries.end(); EI != EE; ++EI) { | |
| 619 AsyncCallEntry & CurEntry = *EI; | |
| 620 Instruction *MappedAsyncCallInst = cast<Instruction>(VMap[CurEntry.Async
CallInst]); | |
| 621 BasicBlock *MappedAsyncCallBlock = MappedAsyncCallInst->getParent(); | |
| 622 BasicBlock *MappedAfterCallBlock = cast<BasicBlock>(VMap[CurEntry.AfterC
allBlock]); | |
| 623 | |
| 624 // for the sync case of the call, go to NewBlock (instead of MappedAfter
CallBlock) | |
| 625 BasicBlock *NewBlock = BasicBlock::Create(TheModule->getContext(), "", C
urCallbackFunc, MappedAfterCallBlock); | |
| 626 MappedAsyncCallBlock->getTerminator()->setSuccessor(1, NewBlock); | |
| 627 // store the return value | |
| 628 if (!MappedAsyncCallInst->use_empty()) { | |
| 629 CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunc
tion, "", NewBlock); | |
| 630 BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCa
llInst->getType()->getPointerTo(), "AsyncRetValAddr", NewBlock); | |
| 631 new StoreInst(MappedAsyncCallInst, RetValAddr, NewBlock); | |
| 632 } | |
| 633 // tell the scheduler that we want to keep the current async stack frame | |
| 634 CallInst::Create(DoNotUnwindAsyncFunction, "", NewBlock); | |
| 635 // finally we go to the SaveAsyncCtxBlock, to register the callbac, save
the local variables and leave | |
| 636 BasicBlock *MappedSaveAsyncCtxBlock = cast<BasicBlock>(VMap[CurEntry.Sav
eAsyncCtxBlock]); | |
| 637 BranchInst::Create(MappedSaveAsyncCtxBlock, NewBlock); | |
| 638 } | |
| 639 } | |
| 640 | |
| 641 std::vector<AllocaInst*> ToPromote; | |
| 642 // applying loaded variables in the entry block | |
| 643 { | |
| 644 BasicBlockSet ReachableBlocks = FindReachableBlocksFrom(ResumeBlock); | |
| 645 for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { | |
| 646 Value *OrigVar = CurEntry.ContextVariables[i]; | |
| 647 if (isa<Argument>(OrigVar)) continue; // already processed | |
| 648 Value *CurVar = VMap[OrigVar]; | |
| 649 assert(CurVar != MappedAsyncCall); | |
| 650 if (Instruction *Inst = dyn_cast<Instruction>(CurVar)) { | |
| 651 if (ReachableBlocks.count(Inst->getParent())) { | |
| 652 // Inst could be either defined or loaded from the async context | |
| 653 // Do the dirty works in memory | |
| 654 // TODO: might need to check the safety first | |
| 655 // TODO: can we create phi directly? | |
| 656 AllocaInst *Addr = DemoteRegToStack(*Inst, false); | |
| 657 new StoreInst(LoadedAsyncVars[i], Addr, EntryBlock); | |
| 658 ToPromote.push_back(Addr); | |
| 659 } else { | |
| 660 // The parent block is not reachable, which means there is no confli
ction | |
| 661 // it's safe to replace Inst with the loaded value | |
| 662 assert(Inst != LoadedAsyncVars[i]); // this should only happen when
OrigVar is an Argument | |
| 663 Inst->replaceAllUsesWith(LoadedAsyncVars[i]); | |
| 664 } | |
| 665 } | |
| 666 } | |
| 667 } | |
| 668 | |
| 669 // resolve the return value of the previous async function | |
| 670 // it could be the value just loaded from the global area | |
| 671 // or directly returned by the function (in its sync case) | |
| 672 if (!CurEntry.AsyncCallInst->use_empty()) { | |
| 673 // load the async return value | |
| 674 CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction
, "", EntryBlock); | |
| 675 BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCall->
getType()->getPointerTo(), "AsyncRetValAddr", EntryBlock); | |
| 676 LoadInst *RetVal = new LoadInst(RetValAddr, "AsyncRetVal", EntryBlock); | |
| 677 | |
| 678 AllocaInst *Addr = DemoteRegToStack(*MappedAsyncCall, false); | |
| 679 new StoreInst(RetVal, Addr, EntryBlock); | |
| 680 ToPromote.push_back(Addr); | |
| 681 } | |
| 682 | |
| 683 // TODO remove unreachable blocks before creating phi | |
| 684 | |
| 685 // We go right to ResumeBlock from the EntryBlock | |
| 686 BranchInst::Create(ResumeBlock, EntryBlock); | |
| 687 | |
| 688 /* | |
| 689 * Creating phi's | |
| 690 * Normal stack frames and async stack frames are interleaving with each oth
er. | |
| 691 * In a callback function, if we call an async function, we might need to re
alloc the async ctx. | |
| 692 * at this point we don't want anything stored after the ctx, | |
| 693 * such that we can free and extend the ctx by simply update STACKTOP. | |
| 694 * Therefore we don't want any alloca's in callback functions. | |
| 695 * | |
| 696 */ | |
| 697 if (!ToPromote.empty()) { | |
| 698 DominatorTreeWrapperPass DTW; | |
| 699 DTW.runOnFunction(*CurCallbackFunc); | |
| 700 PromoteMemToReg(ToPromote, DTW.getDomTree()); | |
| 701 } | |
| 702 | |
| 703 removeUnreachableBlocks(*CurCallbackFunc); | |
| 704 } | |
| 705 | |
| 706 // Pass 4 | |
| 707 // Here are modifications to the original function, which we won't want to be
cloned into the callback functions | |
| 708 for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE =
AsyncCallEntries.end(); EI != EE; ++EI) { | |
| 709 AsyncCallEntry & CurEntry = *EI; | |
| 710 // remove the frame if no async functinon has been called | |
| 711 CallInst::Create(FreeAsyncCtxFunction, CurEntry.AllocAsyncCtxInst, "", CurEn
try.AfterCallBlock->getFirstNonPHI()); | |
| 712 } | |
| 713 } | |
| 714 | |
| 715 bool LowerEmAsyncify::IsFunctionPointerCall(const Instruction *I) { | |
| 716 // mostly from CallHandler.h | |
| 717 ImmutableCallSite CS(I); | |
| 718 if (!CS) return false; // not call nor invoke | |
| 719 const Value *CV = CS.getCalledValue()->stripPointerCasts(); | |
| 720 return !isa<const Function>(CV); | |
| 721 } | |
| 722 | |
| 723 ModulePass *llvm::createLowerEmAsyncifyPass() { | |
| 724 return new LowerEmAsyncify(); | |
| 725 } | |
| OLD | NEW |