OLD | NEW |
(Empty) | |
| 1 //===-- NaClValueEnumerator.cpp ------------------------------------------===// |
| 2 // Number values and types for bitcode writer |
| 3 // |
| 4 // The LLVM Compiler Infrastructure |
| 5 // |
| 6 // This file is distributed under the University of Illinois Open Source |
| 7 // License. See LICENSE.TXT for details. |
| 8 // |
| 9 //===----------------------------------------------------------------------===// |
| 10 // |
| 11 // This file implements the NaClValueEnumerator class. |
| 12 // |
| 13 //===----------------------------------------------------------------------===// |
| 14 |
| 15 #include "NaClValueEnumerator.h" |
| 16 #include "llvm/ADT/STLExtras.h" |
| 17 #include "llvm/ADT/SmallPtrSet.h" |
| 18 #include "llvm/IR/Constants.h" |
| 19 #include "llvm/IR/DerivedTypes.h" |
| 20 #include "llvm/IR/Instructions.h" |
| 21 #include "llvm/IR/IntrinsicInst.h" |
| 22 #include "llvm/IR/Module.h" |
| 23 #include "llvm/IR/ValueSymbolTable.h" |
| 24 #include "llvm/Support/Debug.h" |
| 25 #include "llvm/Support/raw_ostream.h" |
| 26 #include <algorithm> |
| 27 #include <set> |
| 28 |
| 29 using namespace llvm; |
| 30 |
| 31 static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) { |
| 32 return V.first->getType()->isIntOrIntVectorTy(); |
| 33 } |
| 34 |
| 35 /// NaClValueEnumerator - Enumerate module-level information. |
| 36 NaClValueEnumerator::NaClValueEnumerator(const Module *M) { |
| 37 // Create map for counting frequency of types, and set field |
| 38 // TypeCountMap accordingly. Note: Pointer field TypeCountMap is |
| 39 // used to deal with the fact that types are added through various |
| 40 // method calls in this routine. Rather than pass it as an argument, |
| 41 // we use a field. The field is a pointer so that the memory |
| 42 // footprint of count_map can be garbage collected when this |
| 43 // constructor completes. |
| 44 TypeCountMapType count_map; |
| 45 TypeCountMap = &count_map; |
| 46 |
| 47 IntPtrType = IntegerType::get(M->getContext(), PNaClIntPtrTypeBitSize); |
| 48 |
| 49 // Enumerate the functions. Note: We do this before global |
| 50 // variables, so that global variable initializations can refer to |
| 51 // the functions without a forward reference. |
| 52 for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { |
| 53 EnumerateValue(I); |
| 54 } |
| 55 |
| 56 // Enumerate the global variables. |
| 57 FirstGlobalVarID = Values.size(); |
| 58 for (Module::const_global_iterator I = M->global_begin(), |
| 59 E = M->global_end(); I != E; ++I) |
| 60 EnumerateValue(I); |
| 61 NumGlobalVarIDs = Values.size() - FirstGlobalVarID; |
| 62 |
| 63 // Enumerate the aliases. |
| 64 for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); |
| 65 I != E; ++I) |
| 66 EnumerateValue(I); |
| 67 |
| 68 // Remember what is the cutoff between globalvalue's and other constants. |
| 69 unsigned FirstConstant = Values.size(); |
| 70 |
| 71 // Skip global variable initializers since they are handled within |
| 72 // WriteGlobalVars of file NaClBitcodeWriter.cpp. |
| 73 |
| 74 // Enumerate the aliasees. |
| 75 for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); |
| 76 I != E; ++I) |
| 77 EnumerateValue(I->getAliasee()); |
| 78 |
| 79 // Insert constants that are named at module level into the slot |
| 80 // pool so that the module symbol table can refer to them... |
| 81 EnumerateValueSymbolTable(M->getValueSymbolTable()); |
| 82 |
| 83 // Enumerate types used by function bodies and argument lists. |
| 84 for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { |
| 85 |
| 86 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); |
| 87 I != E; ++I) |
| 88 EnumerateType(I->getType()); |
| 89 |
| 90 for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) |
| 91 for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){ |
| 92 // Don't generate types for elided pointer casts! |
| 93 if (IsElidedCast(I)) |
| 94 continue; |
| 95 |
| 96 if (const SwitchInst *SI = dyn_cast<SwitchInst>(I)) { |
| 97 // Handle switch instruction specially, so that we don't |
| 98 // write out unnecessary vector/array types used to model case |
| 99 // selectors. |
| 100 EnumerateOperandType(SI->getCondition()); |
| 101 } else { |
| 102 for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); |
| 103 OI != E; ++OI) { |
| 104 EnumerateOperandType(*OI); |
| 105 } |
| 106 } |
| 107 EnumerateType(I->getType()); |
| 108 } |
| 109 } |
| 110 |
| 111 // Optimized type indicies to put "common" expected types in with small |
| 112 // indices. |
| 113 OptimizeTypes(M); |
| 114 TypeCountMap = NULL; |
| 115 |
| 116 // Optimize constant ordering. |
| 117 OptimizeConstants(FirstConstant, Values.size()); |
| 118 } |
| 119 |
| 120 void NaClValueEnumerator::OptimizeTypes(const Module *M) { |
| 121 |
| 122 // Sort types by count, so that we can index them based on |
| 123 // frequency. Use indices of built TypeMap, so that order of |
| 124 // construction is repeatable. |
| 125 std::set<unsigned> type_counts; |
| 126 typedef std::set<unsigned> TypeSetType; |
| 127 std::map<unsigned, TypeSetType> usage_count_map; |
| 128 TypeList IdType(Types); |
| 129 |
| 130 for (TypeCountMapType::iterator iter = TypeCountMap->begin(); |
| 131 iter != TypeCountMap->end(); ++ iter) { |
| 132 type_counts.insert(iter->second); |
| 133 usage_count_map[iter->second].insert(TypeMap[iter->first]-1); |
| 134 } |
| 135 |
| 136 // Reset type tracking maps, so that we can re-enter based |
| 137 // on fequency ordering. |
| 138 TypeCountMap = NULL; |
| 139 Types.clear(); |
| 140 TypeMap.clear(); |
| 141 |
| 142 // Reinsert types, based on frequency. |
| 143 for (std::set<unsigned>::reverse_iterator count_iter = type_counts.rbegin(); |
| 144 count_iter != type_counts.rend(); ++count_iter) { |
| 145 TypeSetType& count_types = usage_count_map[*count_iter]; |
| 146 for (TypeSetType::iterator type_iter = count_types.begin(); |
| 147 type_iter != count_types.end(); ++type_iter) |
| 148 EnumerateType((IdType[*type_iter]), true); |
| 149 } |
| 150 } |
| 151 |
| 152 unsigned NaClValueEnumerator::getInstructionID(const Instruction *Inst) const { |
| 153 InstructionMapType::const_iterator I = InstructionMap.find(Inst); |
| 154 assert(I != InstructionMap.end() && "Instruction is not mapped!"); |
| 155 return I->second; |
| 156 } |
| 157 |
| 158 void NaClValueEnumerator::setInstructionID(const Instruction *I) { |
| 159 InstructionMap[I] = InstructionCount++; |
| 160 } |
| 161 |
| 162 unsigned NaClValueEnumerator::getValueID(const Value *V) const { |
| 163 ValueMapType::const_iterator I = ValueMap.find(V); |
| 164 assert(I != ValueMap.end() && "Value not in slotcalculator!"); |
| 165 return I->second-1; |
| 166 } |
| 167 |
| 168 void NaClValueEnumerator::dump() const { |
| 169 print(dbgs(), ValueMap, "Default"); |
| 170 dbgs() << '\n'; |
| 171 } |
| 172 |
| 173 void NaClValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map, |
| 174 const char *Name) const { |
| 175 |
| 176 OS << "Map Name: " << Name << "\n"; |
| 177 OS << "Size: " << Map.size() << "\n"; |
| 178 for (ValueMapType::const_iterator I = Map.begin(), |
| 179 E = Map.end(); I != E; ++I) { |
| 180 |
| 181 const Value *V = I->first; |
| 182 if (V->hasName()) |
| 183 OS << "Value: " << V->getName(); |
| 184 else |
| 185 OS << "Value: [null]\n"; |
| 186 V->dump(); |
| 187 |
| 188 OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):"; |
| 189 for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); |
| 190 UI != UE; ++UI) { |
| 191 if (UI != V->use_begin()) |
| 192 OS << ","; |
| 193 if((*UI)->hasName()) |
| 194 OS << " " << (*UI)->getName(); |
| 195 else |
| 196 OS << " [null]"; |
| 197 |
| 198 } |
| 199 OS << "\n\n"; |
| 200 } |
| 201 } |
| 202 |
| 203 // Optimize constant ordering. |
| 204 namespace { |
| 205 struct CstSortPredicate { |
| 206 NaClValueEnumerator &VE; |
| 207 explicit CstSortPredicate(NaClValueEnumerator &ve) : VE(ve) {} |
| 208 bool operator()(const std::pair<const Value*, unsigned> &LHS, |
| 209 const std::pair<const Value*, unsigned> &RHS) { |
| 210 // Sort by plane. |
| 211 if (LHS.first->getType() != RHS.first->getType()) |
| 212 return VE.getTypeID(LHS.first->getType()) < |
| 213 VE.getTypeID(RHS.first->getType()); |
| 214 // Then by frequency. |
| 215 return LHS.second > RHS.second; |
| 216 } |
| 217 }; |
| 218 } |
| 219 |
| 220 /// OptimizeConstants - Reorder constant pool for denser encoding. |
| 221 void NaClValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd)
{ |
| 222 if (CstStart == CstEnd || CstStart+1 == CstEnd) return; |
| 223 |
| 224 CstSortPredicate P(*this); |
| 225 std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P); |
| 226 |
| 227 // Ensure that integer and vector of integer constants are at the start of the |
| 228 // constant pool. This is important so that GEP structure indices come before |
| 229 // gep constant exprs. |
| 230 std::partition(Values.begin()+CstStart, Values.begin()+CstEnd, |
| 231 isIntOrIntVectorValue); |
| 232 |
| 233 // Rebuild the modified portion of ValueMap. |
| 234 for (; CstStart != CstEnd; ++CstStart) |
| 235 ValueMap[Values[CstStart].first] = CstStart+1; |
| 236 } |
| 237 |
| 238 |
| 239 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol |
| 240 /// table into the values table. |
| 241 void NaClValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST)
{ |
| 242 for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end(); |
| 243 VI != VE; ++VI) |
| 244 EnumerateValue(VI->getValue()); |
| 245 } |
| 246 |
| 247 void NaClValueEnumerator::EnumerateValue(const Value *VIn) { |
| 248 // Skip over elided values. |
| 249 const Value *V = ElideCasts(VIn); |
| 250 if (V != VIn) return; |
| 251 |
| 252 assert(!V->getType()->isVoidTy() && "Can't insert void values!"); |
| 253 assert(!isa<MDNode>(V) && !isa<MDString>(V) && |
| 254 "EnumerateValue doesn't handle Metadata!"); |
| 255 |
| 256 // Check to see if it's already in! |
| 257 unsigned &ValueID = ValueMap[V]; |
| 258 if (ValueID) { |
| 259 // Increment use count. |
| 260 Values[ValueID-1].second++; |
| 261 return; |
| 262 } |
| 263 |
| 264 // Enumerate the type of this value. Skip global values since no |
| 265 // types are dumped for global variables. |
| 266 if (!isa<GlobalVariable>(V)) |
| 267 EnumerateType(V->getType()); |
| 268 |
| 269 if (const Constant *C = dyn_cast<Constant>(V)) { |
| 270 if (isa<GlobalValue>(C)) { |
| 271 // Initializers for globals are handled explicitly elsewhere. |
| 272 } else if (C->getNumOperands()) { |
| 273 // If a constant has operands, enumerate them. This makes sure that if a |
| 274 // constant has uses (for example an array of const ints), that they are |
| 275 // inserted also. |
| 276 |
| 277 // We prefer to enumerate them with values before we enumerate the user |
| 278 // itself. This makes it more likely that we can avoid forward references |
| 279 // in the reader. We know that there can be no cycles in the constants |
| 280 // graph that don't go through a global variable. |
| 281 for (User::const_op_iterator I = C->op_begin(), E = C->op_end(); |
| 282 I != E; ++I) |
| 283 if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress. |
| 284 EnumerateValue(*I); |
| 285 |
| 286 // Finally, add the value. Doing this could make the ValueID reference be |
| 287 // dangling, don't reuse it. |
| 288 Values.push_back(std::make_pair(V, 1U)); |
| 289 ValueMap[V] = Values.size(); |
| 290 return; |
| 291 } |
| 292 } |
| 293 |
| 294 // Add the value. |
| 295 Values.push_back(std::make_pair(V, 1U)); |
| 296 ValueID = Values.size(); |
| 297 } |
| 298 |
| 299 |
| 300 Type *NaClValueEnumerator::NormalizeType(Type *Ty) const { |
| 301 if (Ty->isPointerTy()) |
| 302 return IntPtrType; |
| 303 if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) { |
| 304 SmallVector<Type *, 8> ArgTypes; |
| 305 for (unsigned I = 0, E = FTy->getNumParams(); I < E; ++I) |
| 306 ArgTypes.push_back(NormalizeType(FTy->getParamType(I))); |
| 307 return FunctionType::get(NormalizeType(FTy->getReturnType()), |
| 308 ArgTypes, false); |
| 309 } |
| 310 return Ty; |
| 311 } |
| 312 |
| 313 void NaClValueEnumerator::EnumerateType(Type *Ty, bool InsideOptimizeTypes) { |
| 314 // Pointer types do not need to be given type IDs. |
| 315 if (Ty->isPointerTy()) |
| 316 Ty = Ty->getPointerElementType(); |
| 317 |
| 318 Ty = NormalizeType(Ty); |
| 319 |
| 320 // The label type does not need to be given a type ID. |
| 321 if (Ty->isLabelTy()) |
| 322 return; |
| 323 |
| 324 // This function is used to enumerate types referenced by the given |
| 325 // module. This function is called in two phases, based on the value |
| 326 // of TypeCountMap. These phases are: |
| 327 // |
| 328 // (1) In this phase, InsideOptimizeTypes=false. We are collecting types |
| 329 // and all corresponding (implicitly) referenced types. In addition, |
| 330 // we are keeping track of the number of references to each type in |
| 331 // TypeCountMap. These reference counts will be used by method |
| 332 // OptimizeTypes to associate the smallest type ID's with the most |
| 333 // referenced types. |
| 334 // |
| 335 // (2) In this phase, InsideOptimizeTypes=true. We are registering types |
| 336 // based on frequency. To minimize type IDs for frequently used |
| 337 // types, (unlike the other context) we are inserting the minimal |
| 338 // (implicitly) referenced types needed for each type. |
| 339 unsigned *TypeID = &TypeMap[Ty]; |
| 340 |
| 341 if (TypeCountMap) ++((*TypeCountMap)[Ty]); |
| 342 |
| 343 // We've already seen this type. |
| 344 if (*TypeID) |
| 345 return; |
| 346 |
| 347 // If it is a non-anonymous struct, mark the type as being visited so that we |
| 348 // don't recursively visit it. This is safe because we allow forward |
| 349 // references of these in the bitcode reader. |
| 350 if (StructType *STy = dyn_cast<StructType>(Ty)) |
| 351 if (!STy->isLiteral()) |
| 352 *TypeID = ~0U; |
| 353 |
| 354 // If in the second phase (i.e. inside optimize types), don't expand |
| 355 // pointers to structures, since we can just generate a forward |
| 356 // reference to it. This way, we don't use up unnecessary (small) ID |
| 357 // values just to define the pointer. |
| 358 bool EnumerateSubtypes = true; |
| 359 if (InsideOptimizeTypes) |
| 360 if (PointerType *PTy = dyn_cast<PointerType>(Ty)) |
| 361 if (StructType *STy = dyn_cast<StructType>(PTy->getElementType())) |
| 362 if (!STy->isLiteral()) |
| 363 EnumerateSubtypes = false; |
| 364 |
| 365 // Enumerate all of the subtypes before we enumerate this type. This ensures |
| 366 // that the type will be enumerated in an order that can be directly built. |
| 367 if (EnumerateSubtypes) { |
| 368 for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); |
| 369 I != E; ++I) |
| 370 EnumerateType(*I, InsideOptimizeTypes); |
| 371 } |
| 372 |
| 373 // Refresh the TypeID pointer in case the table rehashed. |
| 374 TypeID = &TypeMap[Ty]; |
| 375 |
| 376 // Check to see if we got the pointer another way. This can happen when |
| 377 // enumerating recursive types that hit the base case deeper than they start. |
| 378 // |
| 379 // If this is actually a struct that we are treating as forward ref'able, |
| 380 // then emit the definition now that all of its contents are available. |
| 381 if (*TypeID && *TypeID != ~0U) |
| 382 return; |
| 383 |
| 384 // Add this type now that its contents are all happily enumerated. |
| 385 Types.push_back(Ty); |
| 386 |
| 387 *TypeID = Types.size(); |
| 388 } |
| 389 |
| 390 // Enumerate the types for the specified value. If the value is a constant, |
| 391 // walk through it, enumerating the types of the constant. |
| 392 void NaClValueEnumerator::EnumerateOperandType(const Value *V) { |
| 393 // Note: We intentionally don't create a type id for global variables, |
| 394 // since the type is automatically generated by the reader before any |
| 395 // use of the global variable. |
| 396 if (isa<GlobalVariable>(V)) return; |
| 397 |
| 398 EnumerateType(V->getType()); |
| 399 |
| 400 if (const Constant *C = dyn_cast<Constant>(V)) { |
| 401 // If this constant is already enumerated, ignore it, we know its type must |
| 402 // be enumerated. |
| 403 if (ValueMap.count(V)) return; |
| 404 |
| 405 // This constant may have operands, make sure to enumerate the types in |
| 406 // them. |
| 407 for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { |
| 408 const Value *Op = C->getOperand(i); |
| 409 |
| 410 // Don't enumerate basic blocks here, this happens as operands to |
| 411 // blockaddress. |
| 412 if (isa<BasicBlock>(Op)) continue; |
| 413 |
| 414 EnumerateOperandType(Op); |
| 415 } |
| 416 } |
| 417 } |
| 418 |
| 419 void NaClValueEnumerator::incorporateFunction(const Function &F) { |
| 420 InstructionCount = 0; |
| 421 NumModuleValues = Values.size(); |
| 422 |
| 423 // Make sure no insertions outside of a function. |
| 424 assert(FnForwardTypeRefs.empty()); |
| 425 |
| 426 // Adding function arguments to the value table. |
| 427 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); |
| 428 I != E; ++I) |
| 429 EnumerateValue(I); |
| 430 |
| 431 FirstFuncConstantID = Values.size(); |
| 432 |
| 433 // Add all function-level constants to the value table. |
| 434 for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { |
| 435 for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { |
| 436 if (const SwitchInst *SI = dyn_cast<SwitchInst>(I)) { |
| 437 // Handle switch instruction specially, so that we don't write |
| 438 // out unnecessary vector/array constants used to model case selectors. |
| 439 if (isa<Constant>(SI->getCondition())) { |
| 440 EnumerateValue(SI->getCondition()); |
| 441 } |
| 442 } else { |
| 443 for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); |
| 444 OI != E; ++OI) { |
| 445 if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) || |
| 446 isa<InlineAsm>(*OI)) |
| 447 EnumerateValue(*OI); |
| 448 } |
| 449 } |
| 450 } |
| 451 BasicBlocks.push_back(BB); |
| 452 ValueMap[BB] = BasicBlocks.size(); |
| 453 } |
| 454 |
| 455 // Optimize the constant layout. |
| 456 OptimizeConstants(FirstFuncConstantID, Values.size()); |
| 457 |
| 458 FirstInstID = Values.size(); |
| 459 |
| 460 // Add all of the instructions. |
| 461 for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { |
| 462 for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { |
| 463 if (!I->getType()->isVoidTy()) |
| 464 EnumerateValue(I); |
| 465 } |
| 466 } |
| 467 } |
| 468 |
| 469 void NaClValueEnumerator::purgeFunction() { |
| 470 /// Remove purged values from the ValueMap. |
| 471 for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i) |
| 472 ValueMap.erase(Values[i].first); |
| 473 for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i) |
| 474 ValueMap.erase(BasicBlocks[i]); |
| 475 |
| 476 Values.resize(NumModuleValues); |
| 477 BasicBlocks.clear(); |
| 478 FnForwardTypeRefs.clear(); |
| 479 } |
| 480 |
| 481 // The normal form required by the PNaCl ABI verifier (documented in |
| 482 // ReplacePtrsWithInts.cpp) allows us to omit the following pointer |
| 483 // casts from the bitcode file. |
| 484 const Value *NaClValueEnumerator::ElideCasts(const Value *V) const { |
| 485 if (const Instruction *I = dyn_cast<Instruction>(V)) { |
| 486 switch (I->getOpcode()) { |
| 487 default: |
| 488 break; |
| 489 case Instruction::BitCast: |
| 490 if (I->getType()->isPointerTy()) { |
| 491 V = I->getOperand(0); |
| 492 } |
| 493 break; |
| 494 case Instruction::IntToPtr: |
| 495 V = ElideCasts(I->getOperand(0)); |
| 496 break; |
| 497 case Instruction::PtrToInt: |
| 498 if (IsIntPtrType(I->getType())) { |
| 499 V = I->getOperand(0); |
| 500 } |
| 501 break; |
| 502 } |
| 503 } |
| 504 return V; |
| 505 } |
OLD | NEW |