lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp - Issue 14495008: Create type IDs based on reference counts.

Side by Side Diff: lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp

Issue 14495008: Create type IDs based on reference counts. (Closed) Base URL: http://git.chromium.org/native_client/pnacl-llvm.git@master

Patch Set: Created 7 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===-- NaClValueEnumerator.cpp ------------------------------------------===//	1 //===-- NaClValueEnumerator.cpp ------------------------------------------===//

2 // Number values and types for bitcode writer	2 // Number values and types for bitcode writer

3 //	3 //

4 // The LLVM Compiler Infrastructure	4 // The LLVM Compiler Infrastructure

5 //	5 //

6 // This file is distributed under the University of Illinois Open Source	6 // This file is distributed under the University of Illinois Open Source

7 // License. See LICENSE.TXT for details.	7 // License. See LICENSE.TXT for details.

8 //	8 //

9 //===----------------------------------------------------------------------===//	9 //===----------------------------------------------------------------------===//

10 //	10 //

11 // This file implements the NaClValueEnumerator class.	11 // This file implements the NaClValueEnumerator class.

12 //	12 //

13 //===----------------------------------------------------------------------===//	13 //===----------------------------------------------------------------------===//

14	14

15 #include "NaClValueEnumerator.h"	15 #include "NaClValueEnumerator.h"

16 #include "llvm/ADT/STLExtras.h"	16 #include "llvm/ADT/STLExtras.h"

17 #include "llvm/ADT/SmallPtrSet.h"	17 #include "llvm/ADT/SmallPtrSet.h"

18 #include "llvm/IR/Constants.h"	18 #include "llvm/IR/Constants.h"

19 #include "llvm/IR/DerivedTypes.h"	19 #include "llvm/IR/DerivedTypes.h"

20 #include "llvm/IR/Instructions.h"	20 #include "llvm/IR/Instructions.h"

21 #include "llvm/IR/Module.h"	21 #include "llvm/IR/Module.h"

22 #include "llvm/IR/ValueSymbolTable.h"	22 #include "llvm/IR/ValueSymbolTable.h"

23 #include "llvm/Support/Debug.h"	23 #include "llvm/Support/Debug.h"

24 #include "llvm/Support/raw_ostream.h"	24 #include "llvm/Support/raw_ostream.h"

25 #include <algorithm>	25 #include <algorithm>

	26 #include <set>

	27

26 using namespace llvm;	28 using namespace llvm;

27	29

28 static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {	30 static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {

29 return V.first->getType()->isIntOrIntVectorTy();	31 return V.first->getType()->isIntOrIntVectorTy();

30 }	32 }

31	33

32 /// NaClValueEnumerator - Enumerate module-level information.	34 /// NaClValueEnumerator - Enumerate module-level information.

33 NaClValueEnumerator::NaClValueEnumerator(const Module *M) {	35 NaClValueEnumerator::NaClValueEnumerator(const Module *M) {

	36 // Create map for counting frequency of types, and set feild

	37 // TypeCountMap accordingly. Let OptimizeTypes call (below) reset

	38 // TypeCountMap to NULL, once collected information has been used.

	39 TypeCountMapType count_map;

	40 TypeCountMap = &count_map;
	Derek Schuff 2013/05/02 15:41:45 it's weird having this extra pointer and having NU it's weird having this extra pointer and having NULL be a special meaning, when the structure itself is also local. Just use the local structure directly, and instead of having a NULL pointer, just use a boolean with a more helpful name. Karl 2013/05/15 20:12:20 The point wasn't just that it is NULL, but that it Show quoted text On 2013/05/02 15:41:45, Derek Schuff wrote: > it's weird having this extra pointer and having NULL be a special meaning, when > the structure itself is also local. Just use the local structure directly, and > instead of having a NULL pointer, just use a boolean with a more helpful name. The point wasn't just that it is NULL, but that it is referenced nonlocally through method calls. As such, it needs to be a field. Added a comment to try and explain this.
34 // Enumerate the global variables.	41 // Enumerate the global variables.

35 for (Module::const_global_iterator I = M->global_begin(),	42 for (Module::const_global_iterator I = M->global_begin(),

36 E = M->global_end(); I != E; ++I)	43 E = M->global_end(); I != E; ++I)

37 EnumerateValue(I);	44 EnumerateValue(I);

38	45

39 // Enumerate the functions.	46 // Enumerate the functions.

40 for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {	47 for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {

41 EnumerateValue(I);	48 EnumerateValue(I);

42 EnumerateAttributes(cast<Function>(I)->getAttributes());	49 EnumerateAttributes(cast<Function>(I)->getAttributes());

43 }	50 }

(...skipping 10 matching lines...) Expand all Loading...
54 for (Module::const_global_iterator I = M->global_begin(),	61 for (Module::const_global_iterator I = M->global_begin(),

55 E = M->global_end(); I != E; ++I)	62 E = M->global_end(); I != E; ++I)

56 if (I->hasInitializer())	63 if (I->hasInitializer())

57 EnumerateValue(I->getInitializer());	64 EnumerateValue(I->getInitializer());

58	65

59 // Enumerate the aliasees.	66 // Enumerate the aliasees.

60 for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();	67 for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();

61 I != E; ++I)	68 I != E; ++I)

62 EnumerateValue(I->getAliasee());	69 EnumerateValue(I->getAliasee());

63	70

64 // Insert constants and metadata that are named at module level into the slot	71 // Insert constants and metadata that are named at module level into the slot

65 // pool so that the module symbol table can refer to them...	72 // pool so that the module symbol table can refer to them...

66 EnumerateValueSymbolTable(M->getValueSymbolTable());	73 EnumerateValueSymbolTable(M->getValueSymbolTable());

67 EnumerateNamedMetadata(M);	74 EnumerateNamedMetadata(M);

68	75

69 SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;	76 SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;

70	77

71 // Enumerate types used by function bodies and argument lists.	78 // Enumerate types used by function bodies and argument lists.

72 for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {	79 for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {

73	80

74 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();	81 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();

(...skipping 24 matching lines...) Expand all Loading...
99	106

100 if (!I->getDebugLoc().isUnknown()) {	107 if (!I->getDebugLoc().isUnknown()) {

101 MDNode Scope, IA;	108 MDNode Scope, IA;

102 I->getDebugLoc().getScopeAndInlinedAt(Scope, IA, I->getContext());	109 I->getDebugLoc().getScopeAndInlinedAt(Scope, IA, I->getContext());

103 if (Scope) EnumerateMetadata(Scope);	110 if (Scope) EnumerateMetadata(Scope);

104 if (IA) EnumerateMetadata(IA);	111 if (IA) EnumerateMetadata(IA);

105 }	112 }

106 }	113 }

107 }	114 }

108	115

	116 // Optimized type indicies to put "common" expected types in with small

	117 // indices.

	118 OptimizeTypes(M);

	119

109 // Optimize constant ordering.	120 // Optimize constant ordering.

110 OptimizeConstants(FirstConstant, Values.size());	121 OptimizeConstants(FirstConstant, Values.size());

111 }	122 }

112	123

	124 void NaClValueEnumerator::OptimizeTypes(const Module *M) {

	125

	126 // Sort types by count, so that we can index them based on

	127 // frequency.

	128 std::set<unsigned> type_counts;

	129 typedef std::set<Type*> TypeSetType;

	130 std::map<unsigned, TypeSetType> usage_count_map;

	131

	132 for (TypeCountMapType::iterator iter = TypeCountMap->begin();

	133 iter != TypeCountMap->end(); ++ iter) {

	134 type_counts.insert(iter->second);

	135 usage_count_map[iter->second].insert(iter->first);

	136 }

	137

	138 // Reset type tracking maps, so that we can re-enter based

	139 // on fequency ordering.

	140 TypeCountMap = NULL;

	141 Types.clear();

	142 TypeMap.clear();

	143

	144 // Start by adding Common integer types.

	145 EnumerateType(Type::getInt1Ty(M->getContext()));

	146 EnumerateType(Type::getInt8Ty(M->getContext()));
	jvoung (off chromium) 2013/05/02 16:35:40 So these primitive types weren't already part of t So these primitive types weren't already part of the TypeCountMap? It would be nice if they were automatically tracked also, then we wouldn't have to worry about remembering to add the "half" type or not. It would just have a count of 0 if the program never had any "half" types. Karl 2013/05/15 20:12:20 Reasonable point. Restructured code to make sure w Show quoted text On 2013/05/02 16:35:40, jvoung (cr) wrote: > So these primitive types weren't already part of the TypeCountMap? It would be > nice if they were automatically tracked also, then we wouldn't have to worry > about remembering to add the "half" type or not. It would just have a count of > 0 if the program never had any "half" types. Reasonable point. Restructured code to make sure we only add the type if it is used. jvoung (off chromium) 2013/05/15 21:01:36 Do they not get counted in the type_counts then? Show quoted text On 2013/05/15 20:12:20, Karl wrote: > On 2013/05/02 16:35:40, jvoung (cr) wrote: > > So these primitive types weren't already part of the TypeCountMap? It would > be > > nice if they were automatically tracked also, then we wouldn't have to worry > > about remembering to add the "half" type or not. It would just have a count > of > > 0 if the program never had any "half" types. > > Reasonable point. Restructured code to make sure we only add the type if it is > used. Do they not get counted in the type_counts then? If so, I don't see why they need to be given put ahead of the other types. The sorting would fall out naturally from the type_counts map? E.g., what if i16 is actually rare compared to i32, why should i16 come first (even though, yes the order of the first N types probably doesn't matter so much)? It's more about having less special code. Karl 2013/05/20 21:48:51 The point of explicitly ordering was to (in the fu Show quoted text On 2013/05/15 21:01:36, jvoung (cr) wrote: > On 2013/05/15 20:12:20, Karl wrote: > > On 2013/05/02 16:35:40, jvoung (cr) wrote: > > > So these primitive types weren't already part of the TypeCountMap? It would > > be > > > nice if they were automatically tracked also, then we wouldn't have to worry > > > about remembering to add the "half" type or not. It would just have a count > > of > > > 0 if the program never had any "half" types. > > > > Reasonable point. Restructured code to make sure we only add the type if it is > > used. > > Do they not get counted in the type_counts then? If so, I don't see why they > need to be given put ahead of the other types. The sorting would fall out > naturally from the type_counts map? > > E.g., what if i16 is actually rare compared to i32, why should i16 come first > (even though, yes the order of the first N types probably doesn't matter so > much)? > > It's more about having less special code. The point of explicitly ordering was to (in the future) shrink the bit width needed for integer/floating point operations. I may be presuming too much here. Not to presume, I'll remove this until I'm actually ready to change binary integer/floating operations. That way, I'll have a better feel for whether this is a good assumption.
	147 EnumerateType(Type::getInt16Ty(M->getContext()));

	148 EnumerateType(Type::getInt32Ty(M->getContext()));

	149 EnumerateType(Type::getInt64Ty(M->getContext()));

	150

	151 // Add common float types.

	152 // Note: This list is specific to NaCL, and one may want to use a

	153 // more complete list if adding to LLVM bitcode format.

	154 EnumerateType(Type::getFloatTy(M->getContext()));

	155 EnumerateType(Type::getDoubleTy(M->getContext()));

	156

	157 // Insert remaining types, based on frequency.

	158 for (std::set<unsigned>::reverse_iterator count_iter = type_counts.rbegin();

	159 count_iter != type_counts.rend(); ++count_iter) {

	160 TypeSetType& count_types = usage_count_map[*count_iter];

	161 for (TypeSetType::iterator type_iter = count_types.begin();

	162 type_iter != count_types.end(); ++type_iter) {

	163 EnumerateType(*type_iter);

	164 }

	165 }

	166 }

	167

113 unsigned NaClValueEnumerator::getInstructionID(const Instruction *Inst) const {	168 unsigned NaClValueEnumerator::getInstructionID(const Instruction *Inst) const {

114 InstructionMapType::const_iterator I = InstructionMap.find(Inst);	169 InstructionMapType::const_iterator I = InstructionMap.find(Inst);

115 assert(I != InstructionMap.end() && "Instruction is not mapped!");	170 assert(I != InstructionMap.end() && "Instruction is not mapped!");

116 return I->second;	171 return I->second;

117 }	172 }

118	173

119 void NaClValueEnumerator::setInstructionID(const Instruction *I) {	174 void NaClValueEnumerator::setInstructionID(const Instruction *I) {

120 InstructionMap[I] = InstructionCount++;	175 InstructionMap[I] = InstructionCount++;

121 }	176 }

122	177

(...skipping 222 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
345 }	400 }

346 }	401 }

347	402

348 // Add the value.	403 // Add the value.

349 Values.push_back(std::make_pair(V, 1U));	404 Values.push_back(std::make_pair(V, 1U));

350 ValueID = Values.size();	405 ValueID = Values.size();

351 }	406 }

352	407

353	408

354 void NaClValueEnumerator::EnumerateType(Type *Ty) {	409 void NaClValueEnumerator::EnumerateType(Type *Ty) {

	410 // This function is used to enumerate types referenced by the given

	411 // module. This function is called in two phases, based on the value

	412 // of TypeCountMap. These phases are:

	413 //

	414 // (1) In this phase, TypeCountMap!=NULL. We are collecting types

	415 // and all corresponding (implicitly) referenced types. In addition,

	416 // we are keeping track of the number of references to each type in

	417 // TypeCountMap. These reference counts will be used by method

	418 // OptimizeTypes to associate the smallest type ID's with the most

	419 // referenced types.

	420 //

	421 // (2) In this phase, TypeCountMap==NULL. We are registering types

	422 // based on frequency. To minimize type IDs for frequently used

	423 // types, (unlike the other context) we are inserting the minimal

	424 // (implicitly) referenced types needed for each type.

355 unsigned *TypeID = &TypeMap[Ty];	425 unsigned *TypeID = &TypeMap[Ty];

356	426

	427 if (TypeCountMap) ++((*TypeCountMap)[Ty]);

	428

357 // We've already seen this type.	429 // We've already seen this type.

358 if (*TypeID)	430 if (*TypeID)

359 return;	431 return;

360	432

361 // If it is a non-anonymous struct, mark the type as being visited so that we	433 // If it is a non-anonymous struct, mark the type as being visited so that we

362 // don't recursively visit it. This is safe because we allow forward	434 // don't recursively visit it. This is safe because we allow forward

363 // references of these in the bitcode reader.	435 // references of these in the bitcode reader.

364 if (StructType *STy = dyn_cast<StructType>(Ty))	436 if (StructType *STy = dyn_cast<StructType>(Ty))

365 if (!STy->isLiteral())	437 if (!STy->isLiteral())

366 *TypeID = ~0U;	438 *TypeID = ~0U;

367	439

	440 // If in the second phase, don't expand pointers to structures, since

	441 // we can just generate a forward reference to it. This way, we don't

	442 // use up unnecessary (small) ID values just to define the pointer.
	jvoung (off chromium) 2013/05/02 16:35:40 Is there a test that would demonstrate this waste Is there a test that would demonstrate this waste vs savings, to make sure that this is working? Karl 2013/05/15 20:12:20 Ok. Done. Show quoted text On 2013/05/02 16:35:40, jvoung (cr) wrote: > Is there a test that would demonstrate this waste vs savings, to make sure that > this is working? Ok. Done.
	443 bool EnumerateSubtypes = true;

	444 if (TypeCountMap == NULL)

	445 if (PointerType *PTy = dyn_cast<PointerType>(Ty))

	446 if (StructType *STy = dyn_cast<StructType>(PTy))
	jvoung (off chromium) 2013/05/02 16:35:40 Unless I'm reading this wrong, it still seems like Unless I'm reading this wrong, it still seems like it isn't possible to enter this branch and set EnumerateSubtypes to false. You're just casting Ty -> PTy -> STy, and it's all the same value, right? If you wanted the pointed-to-value's Type, you would need to cast PTy->getElementType(), instead of PTy? Karl 2013/05/15 20:12:20 Your right. I fixed it. It definitely wasn't firin Show quoted text On 2013/05/02 16:35:40, jvoung (cr) wrote: > Unless I'm reading this wrong, it still seems like it isn't possible to enter > this branch and set EnumerateSubtypes to false. > Your right. I fixed it. It definitely wasn't firing (you can easily see the savings with pnacl_translate.pexe). Show quoted text > You're just casting Ty -> PTy -> STy, and it's all the same value, right? If > you wanted the pointed-to-value's Type, you would need to cast > PTy->getElementType(), instead of PTy?
	447 if (!STy->isLiteral())

	448 EnumerateSubtypes = false;

	449

368 // Enumerate all of the subtypes before we enumerate this type. This ensures	450 // Enumerate all of the subtypes before we enumerate this type. This ensures

369 // that the type will be enumerated in an order that can be directly built.	451 // that the type will be enumerated in an order that can be directly built.

370 for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();	452 if (EnumerateSubtypes) {

371 I != E; ++I)	453 for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();

372 EnumerateType(*I);	454 I != E; ++I)

	455 EnumerateType(*I);

	456 }

373	457

374 // Refresh the TypeID pointer in case the table rehashed.	458 // Refresh the TypeID pointer in case the table rehashed.

375 TypeID = &TypeMap[Ty];	459 TypeID = &TypeMap[Ty];

376	460

377 // Check to see if we got the pointer another way. This can happen when	461 // Check to see if we got the pointer another way. This can happen when

378 // enumerating recursive types that hit the base case deeper than they start.	462 // enumerating recursive types that hit the base case deeper than they start.

379 //	463 //

380 // If this is actually a struct that we are treating as forward ref'able,	464 // If this is actually a struct that we are treating as forward ref'able,

381 // then emit the definition now that all of its contents are available.	465 // then emit the definition now that all of its contents are available.

382 if (TypeID && TypeID != ~0U)	466 if (TypeID && TypeID != ~0U)

(...skipping 148 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
531 /// specified basic block. This is relatively expensive information, so it	615 /// specified basic block. This is relatively expensive information, so it

532 /// should only be used by rare constructs such as address-of-label.	616 /// should only be used by rare constructs such as address-of-label.

533 unsigned NaClValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {	617 unsigned NaClValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {

534 unsigned &Idx = GlobalBasicBlockIDs[BB];	618 unsigned &Idx = GlobalBasicBlockIDs[BB];

535 if (Idx != 0)	619 if (Idx != 0)

536 return Idx-1;	620 return Idx-1;

537	621

538 IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);	622 IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);

539 return getGlobalBasicBlockID(BB);	623 return getGlobalBasicBlockID(BB);

540 }	624 }

541

OLD	NEW

« lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp ('K') | « lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h ('k') | no next file » | no next file with comments »