Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(173)

Side by Side Diff: lib/Target/R600/AMDILPeepholeOptimizer.cpp

Issue 183273009: Prep for merging 3.4: Undo changes from 3.3 branch (Closed) Base URL: http://git.chromium.org/native_client/pnacl-llvm.git@master
Patch Set: Retry Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/Target/R600/AMDILDeviceInfo.cpp ('k') | lib/Target/R600/CMakeLists.txt » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 //===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===/ /
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 //==-----------------------------------------------------------------------===//
10
11 #define DEBUG_TYPE "PeepholeOpt"
12 #ifdef DEBUG
13 #define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
14 #else
15 #define DEBUGME 0
16 #endif
17
18 #include "AMDILDevices.h"
19 #include "AMDGPUInstrInfo.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/Twine.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32
33 #include <sstream>
34
35 #if 0
36 STATISTIC(PointerAssignments, "Number of dynamic pointer "
37 "assigments discovered");
38 STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
39 #endif
40
41 using namespace llvm;
42 // The Peephole optimization pass is used to do simple last minute optimizations
43 // that are required for correct code or to remove redundant functions
44 namespace {
45
46 class OpaqueType;
47
48 class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
49 public:
50 TargetMachine &TM;
51 static char ID;
52 AMDGPUPeepholeOpt(TargetMachine &tm);
53 ~AMDGPUPeepholeOpt();
54 const char *getPassName() const;
55 bool runOnFunction(Function &F);
56 bool doInitialization(Module &M);
57 bool doFinalization(Module &M);
58 void getAnalysisUsage(AnalysisUsage &AU) const;
59 protected:
60 private:
61 // Function to initiate all of the instruction level optimizations.
62 bool instLevelOptimizations(BasicBlock::iterator *inst);
63 // Quick check to see if we need to dump all of the pointers into the
64 // arena. If this is correct, then we set all pointers to exist in arena. This
65 // is a workaround for aliasing of pointers in a struct/union.
66 bool dumpAllIntoArena(Function &F);
67 // Because I don't want to invalidate any pointers while in the
68 // safeNestedForEachFunction. I push atomic conversions to a vector and handle
69 // it later. This function does the conversions if required.
70 void doAtomicConversionIfNeeded(Function &F);
71 // Because __amdil_is_constant cannot be properly evaluated if
72 // optimizations are disabled, the call's are placed in a vector
73 // and evaluated after the __amdil_image* functions are evaluated
74 // which should allow the __amdil_is_constant function to be
75 // evaluated correctly.
76 void doIsConstCallConversionIfNeeded();
77 bool mChanged;
78 bool mDebug;
79 bool mConvertAtomics;
80 CodeGenOpt::Level optLevel;
81 // Run a series of tests to see if we can optimize a CALL instruction.
82 bool optimizeCallInst(BasicBlock::iterator *bbb);
83 // A peephole optimization to optimize bit extract sequences.
84 bool optimizeBitExtract(Instruction *inst);
85 // A peephole optimization to optimize bit insert sequences.
86 bool optimizeBitInsert(Instruction *inst);
87 bool setupBitInsert(Instruction *base,
88 Instruction *&src,
89 Constant *&mask,
90 Constant *&shift);
91 // Expand the bit field insert instruction on versions of OpenCL that
92 // don't support it.
93 bool expandBFI(CallInst *CI);
94 // Expand the bit field mask instruction on version of OpenCL that
95 // don't support it.
96 bool expandBFM(CallInst *CI);
97 // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
98 // this case we need to expand them. These functions check for 24bit functions
99 // and then expand.
100 bool isSigned24BitOps(CallInst *CI);
101 void expandSigned24BitOps(CallInst *CI);
102 // One optimization that can occur is that if the required workgroup size is
103 // specified then the result of get_local_size is known at compile time and
104 // can be returned accordingly.
105 bool isRWGLocalOpt(CallInst *CI);
106 // On northern island cards, the division is slightly less accurate than on
107 // previous generations, so we need to utilize a more accurate division. So we
108 // can translate the accurate divide to a normal divide on all other cards.
109 bool convertAccurateDivide(CallInst *CI);
110 void expandAccurateDivide(CallInst *CI);
111 // If the alignment is set incorrectly, it can produce really inefficient
112 // code. This checks for this scenario and fixes it if possible.
113 bool correctMisalignedMemOp(Instruction *inst);
114
115 // If we are in no opt mode, then we need to make sure that
116 // local samplers are properly propagated as constant propagation
117 // doesn't occur and we need to know the value of kernel defined
118 // samplers at compile time.
119 bool propagateSamplerInst(CallInst *CI);
120
121 // Helper functions
122
123 // Group of functions that recursively calculate the size of a structure based
124 // on it's sub-types.
125 size_t getTypeSize(Type * const T, bool dereferencePtr = false);
126 size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
127 size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
128 size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
129 size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
130 size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
131 size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
132 size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
133
134 LLVMContext *mCTX;
135 Function *mF;
136 const AMDGPUSubtarget *mSTM;
137 SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
138 SmallVector<CallInst *, 16> isConstVec;
139 }; // class AMDGPUPeepholeOpt
140 char AMDGPUPeepholeOpt::ID = 0;
141
142 // A template function that has two levels of looping before calling the
143 // function with a pointer to the current iterator.
144 template<class InputIterator, class SecondIterator, class Function>
145 Function safeNestedForEach(InputIterator First, InputIterator Last,
146 SecondIterator S, Function F) {
147 for ( ; First != Last; ++First) {
148 SecondIterator sf, sl;
149 for (sf = First->begin(), sl = First->end();
150 sf != sl; ) {
151 if (!F(&sf)) {
152 ++sf;
153 }
154 }
155 }
156 return F;
157 }
158
159 } // anonymous namespace
160
161 namespace llvm {
162 FunctionPass *
163 createAMDGPUPeepholeOpt(TargetMachine &tm) {
164 return new AMDGPUPeepholeOpt(tm);
165 }
166 } // llvm namespace
167
168 AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
169 : FunctionPass(ID), TM(tm) {
170 mDebug = DEBUGME;
171 optLevel = TM.getOptLevel();
172
173 }
174
175 AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt() {
176 }
177
178 const char *
179 AMDGPUPeepholeOpt::getPassName() const {
180 return "AMDGPU PeepHole Optimization Pass";
181 }
182
183 bool
184 containsPointerType(Type *Ty) {
185 if (!Ty) {
186 return false;
187 }
188 switch(Ty->getTypeID()) {
189 default:
190 return false;
191 case Type::StructTyID: {
192 const StructType *ST = dyn_cast<StructType>(Ty);
193 for (StructType::element_iterator stb = ST->element_begin(),
194 ste = ST->element_end(); stb != ste; ++stb) {
195 if (!containsPointerType(*stb)) {
196 continue;
197 }
198 return true;
199 }
200 break;
201 }
202 case Type::VectorTyID:
203 case Type::ArrayTyID:
204 return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
205 case Type::PointerTyID:
206 return true;
207 };
208 return false;
209 }
210
211 bool
212 AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F) {
213 bool dumpAll = false;
214 for (Function::const_arg_iterator cab = F.arg_begin(),
215 cae = F.arg_end(); cab != cae; ++cab) {
216 const Argument *arg = cab;
217 const PointerType *PT = dyn_cast<PointerType>(arg->getType());
218 if (!PT) {
219 continue;
220 }
221 Type *DereferencedType = PT->getElementType();
222 if (!dyn_cast<StructType>(DereferencedType)
223 ) {
224 continue;
225 }
226 if (!containsPointerType(DereferencedType)) {
227 continue;
228 }
229 // FIXME: Because a pointer inside of a struct/union may be aliased to
230 // another pointer we need to take the conservative approach and place all
231 // pointers into the arena until more advanced detection is implemented.
232 dumpAll = true;
233 }
234 return dumpAll;
235 }
236 void
237 AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() {
238 if (isConstVec.empty()) {
239 return;
240 }
241 for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
242 CallInst *CI = isConstVec[x];
243 Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
244 Type *aType = Type::getInt32Ty(*mCTX);
245 Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
246 : ConstantInt::get(aType, 0);
247 CI->replaceAllUsesWith(Val);
248 CI->eraseFromParent();
249 }
250 isConstVec.clear();
251 }
252 void
253 AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F) {
254 // Don't do anything if we don't have any atomic operations.
255 if (atomicFuncs.empty()) {
256 return;
257 }
258 // Change the function name for the atomic if it is required
259 uint32_t size = atomicFuncs.size();
260 for (uint32_t x = 0; x < size; ++x) {
261 atomicFuncs[x].first->setOperand(
262 atomicFuncs[x].first->getNumOperands()-1,
263 atomicFuncs[x].second);
264
265 }
266 mChanged = true;
267 if (mConvertAtomics) {
268 return;
269 }
270 }
271
272 bool
273 AMDGPUPeepholeOpt::runOnFunction(Function &MF) {
274 mChanged = false;
275 mF = &MF;
276 mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
277 if (mDebug) {
278 MF.dump();
279 }
280 mCTX = &MF.getType()->getContext();
281 mConvertAtomics = true;
282 safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
283 std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
284 this));
285
286 doAtomicConversionIfNeeded(MF);
287 doIsConstCallConversionIfNeeded();
288
289 if (mDebug) {
290 MF.dump();
291 }
292 return mChanged;
293 }
294
295 bool
296 AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) {
297 Instruction *inst = (*bbb);
298 CallInst *CI = dyn_cast<CallInst>(inst);
299 if (!CI) {
300 return false;
301 }
302 if (isSigned24BitOps(CI)) {
303 expandSigned24BitOps(CI);
304 ++(*bbb);
305 CI->eraseFromParent();
306 return true;
307 }
308 if (propagateSamplerInst(CI)) {
309 return false;
310 }
311 if (expandBFI(CI) || expandBFM(CI)) {
312 ++(*bbb);
313 CI->eraseFromParent();
314 return true;
315 }
316 if (convertAccurateDivide(CI)) {
317 expandAccurateDivide(CI);
318 ++(*bbb);
319 CI->eraseFromParent();
320 return true;
321 }
322
323 StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
324 if (calleeName.startswith("__amdil_is_constant")) {
325 // If we do not have optimizations, then this
326 // cannot be properly evaluated, so we add the
327 // call instruction to a vector and process
328 // them at the end of processing after the
329 // samplers have been correctly handled.
330 if (optLevel == CodeGenOpt::None) {
331 isConstVec.push_back(CI);
332 return false;
333 } else {
334 Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
335 Type *aType = Type::getInt32Ty(*mCTX);
336 Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
337 : ConstantInt::get(aType, 0);
338 CI->replaceAllUsesWith(Val);
339 ++(*bbb);
340 CI->eraseFromParent();
341 return true;
342 }
343 }
344
345 if (calleeName.equals("__amdil_is_asic_id_i32")) {
346 ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
347 Type *aType = Type::getInt32Ty(*mCTX);
348 Value *Val = CV;
349 if (Val) {
350 Val = ConstantInt::get(aType,
351 mSTM->device()->getDeviceFlag() & CV->getZExtValue());
352 } else {
353 Val = ConstantInt::get(aType, 0);
354 }
355 CI->replaceAllUsesWith(Val);
356 ++(*bbb);
357 CI->eraseFromParent();
358 return true;
359 }
360 Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
361 if (!F) {
362 return false;
363 }
364 if (F->getName().startswith("__atom") && !CI->getNumUses()
365 && F->getName().find("_xchg") == StringRef::npos) {
366 std::string buffer(F->getName().str() + "_noret");
367 F = dyn_cast<Function>(
368 F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
369 atomicFuncs.push_back(std::make_pair(CI, F));
370 }
371
372 if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
373 && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
374 return false;
375 }
376 if (!mConvertAtomics) {
377 return false;
378 }
379 StringRef name = F->getName();
380 if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
381 mConvertAtomics = false;
382 }
383 return false;
384 }
385
386 bool
387 AMDGPUPeepholeOpt::setupBitInsert(Instruction *base,
388 Instruction *&src,
389 Constant *&mask,
390 Constant *&shift) {
391 if (!base) {
392 if (mDebug) {
393 dbgs() << "Null pointer passed into function.\n";
394 }
395 return false;
396 }
397 bool andOp = false;
398 if (base->getOpcode() == Instruction::Shl) {
399 shift = dyn_cast<Constant>(base->getOperand(1));
400 } else if (base->getOpcode() == Instruction::And) {
401 mask = dyn_cast<Constant>(base->getOperand(1));
402 andOp = true;
403 } else {
404 if (mDebug) {
405 dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
406 }
407 // If the base is neither a Shl or a And, we don't fit any of the patterns a bove.
408 return false;
409 }
410 src = dyn_cast<Instruction>(base->getOperand(0));
411 if (!src) {
412 if (mDebug) {
413 dbgs() << "Failed setup since the base operand is not an instruction!\n";
414 }
415 return false;
416 }
417 // If we find an 'and' operation, then we don't need to
418 // find the next operation as we already know the
419 // bits that are valid at this point.
420 if (andOp) {
421 return true;
422 }
423 if (src->getOpcode() == Instruction::Shl && !shift) {
424 shift = dyn_cast<Constant>(src->getOperand(1));
425 src = dyn_cast<Instruction>(src->getOperand(0));
426 } else if (src->getOpcode() == Instruction::And && !mask) {
427 mask = dyn_cast<Constant>(src->getOperand(1));
428 }
429 if (!mask && !shift) {
430 if (mDebug) {
431 dbgs() << "Failed setup since both mask and shift are NULL!\n";
432 }
433 // Did not find a constant mask or a shift.
434 return false;
435 }
436 return true;
437 }
438 bool
439 AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) {
440 if (!inst) {
441 return false;
442 }
443 if (!inst->isBinaryOp()) {
444 return false;
445 }
446 if (inst->getOpcode() != Instruction::Or) {
447 return false;
448 }
449 if (optLevel == CodeGenOpt::None) {
450 return false;
451 }
452 // We want to do an optimization on a sequence of ops that in the end equals a
453 // single ISA instruction.
454 // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
455 // Some simplified versions of this pattern are as follows:
456 // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
457 // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
458 // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
459 // (A & B) | (D << F) when (1 << F) >= B
460 // (A << C) | (D & E) when (1 << C) >= E
461 if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
462 // The HD4XXX hardware doesn't support the ubit_insert instruction.
463 return false;
464 }
465 Type *aType = inst->getType();
466 bool isVector = aType->isVectorTy();
467 int numEle = 1;
468 // This optimization only works on 32bit integers.
469 if (aType->getScalarType()
470 != Type::getInt32Ty(inst->getContext())) {
471 return false;
472 }
473 if (isVector) {
474 const VectorType *VT = dyn_cast<VectorType>(aType);
475 numEle = VT->getNumElements();
476 // We currently cannot support more than 4 elements in a intrinsic and we
477 // cannot support Vec3 types.
478 if (numEle > 4 || numEle == 3) {
479 return false;
480 }
481 }
482 // TODO: Handle vectors.
483 if (isVector) {
484 if (mDebug) {
485 dbgs() << "!!! Vectors are not supported yet!\n";
486 }
487 return false;
488 }
489 Instruction *LHSSrc = NULL, *RHSSrc = NULL;
490 Constant *LHSMask = NULL, *RHSMask = NULL;
491 Constant *LHSShift = NULL, *RHSShift = NULL;
492 Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
493 Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
494 if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
495 if (mDebug) {
496 dbgs() << "Found an OR Operation that failed setup!\n";
497 inst->dump();
498 if (LHS) { LHS->dump(); }
499 if (LHSSrc) { LHSSrc->dump(); }
500 if (LHSMask) { LHSMask->dump(); }
501 if (LHSShift) { LHSShift->dump(); }
502 }
503 // There was an issue with the setup for BitInsert.
504 return false;
505 }
506 if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
507 if (mDebug) {
508 dbgs() << "Found an OR Operation that failed setup!\n";
509 inst->dump();
510 if (RHS) { RHS->dump(); }
511 if (RHSSrc) { RHSSrc->dump(); }
512 if (RHSMask) { RHSMask->dump(); }
513 if (RHSShift) { RHSShift->dump(); }
514 }
515 // There was an issue with the setup for BitInsert.
516 return false;
517 }
518 if (mDebug) {
519 dbgs() << "Found an OR operation that can possible be optimized to ubit inse rt!\n";
520 dbgs() << "Op: "; inst->dump();
521 dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\ n"; }
522 dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "( None)\n"; }
523 dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
524 dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() < < "(None)\n"; }
525 dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\ n"; }
526 dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "( None)\n"; }
527 dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
528 dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() < < "(None)\n"; }
529 }
530 Constant *offset = NULL;
531 Constant *width = NULL;
532 uint32_t lhsMaskVal = 0, rhsMaskVal = 0;
533 uint32_t lhsShiftVal = 0, rhsShiftVal = 0;
534 uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
535 uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
536 lhsMaskVal = (LHSMask
537 ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
538 rhsMaskVal = (RHSMask
539 ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
540 lhsShiftVal = (LHSShift
541 ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
542 rhsShiftVal = (RHSShift
543 ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
544 lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
545 rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
546 lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
547 rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
548 // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
549 if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
550 return false;
551 }
552 if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
553 offset = ConstantInt::get(aType, lhsMaskOffset, false);
554 width = ConstantInt::get(aType, lhsMaskWidth, false);
555 RHSSrc = RHS;
556 if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
557 return false;
558 }
559 if (!LHSShift) {
560 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
561 "MaskShr", LHS);
562 } else if (lhsShiftVal != lhsMaskOffset) {
563 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
564 "MaskShr", LHS);
565 }
566 if (mDebug) {
567 dbgs() << "Optimizing LHS!\n";
568 }
569 } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
570 offset = ConstantInt::get(aType, rhsMaskOffset, false);
571 width = ConstantInt::get(aType, rhsMaskWidth, false);
572 LHSSrc = RHSSrc;
573 RHSSrc = LHS;
574 if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
575 return false;
576 }
577 if (!RHSShift) {
578 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
579 "MaskShr", RHS);
580 } else if (rhsShiftVal != rhsMaskOffset) {
581 LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
582 "MaskShr", RHS);
583 }
584 if (mDebug) {
585 dbgs() << "Optimizing RHS!\n";
586 }
587 } else {
588 if (mDebug) {
589 dbgs() << "Failed constraint 3!\n";
590 }
591 return false;
592 }
593 if (mDebug) {
594 dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n" ; }
595 dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\ n"; }
596 dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\ n"; }
597 dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\ n"; }
598 }
599 if (!offset || !width) {
600 if (mDebug) {
601 dbgs() << "Either width or offset are NULL, failed detection!\n";
602 }
603 return false;
604 }
605 // Lets create the function signature.
606 std::vector<Type *> callTypes;
607 callTypes.push_back(aType);
608 callTypes.push_back(aType);
609 callTypes.push_back(aType);
610 callTypes.push_back(aType);
611 FunctionType *funcType = FunctionType::get(aType, callTypes, false);
612 std::string name = "__amdil_ubit_insert";
613 if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32" ; }
614 Function *Func =
615 dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
616 getOrInsertFunction(StringRef(name), funcType));
617 Value *Operands[4] = {
618 width,
619 offset,
620 LHSSrc,
621 RHSSrc
622 };
623 CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
624 if (mDebug) {
625 dbgs() << "Old Inst: ";
626 inst->dump();
627 dbgs() << "New Inst: ";
628 CI->dump();
629 dbgs() << "\n\n";
630 }
631 CI->insertBefore(inst);
632 inst->replaceAllUsesWith(CI);
633 return true;
634 }
635
636 bool
637 AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) {
638 if (!inst) {
639 return false;
640 }
641 if (!inst->isBinaryOp()) {
642 return false;
643 }
644 if (inst->getOpcode() != Instruction::And) {
645 return false;
646 }
647 if (optLevel == CodeGenOpt::None) {
648 return false;
649 }
650 // We want to do some simple optimizations on Shift right/And patterns. The
651 // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
652 // value smaller than 32 and C is a mask. If C is a constant value, then the
653 // following transformation can occur. For signed integers, it turns into the
654 // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
655 // integers, it turns into the function call dst =
656 // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
657 // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
658 // Evergreen hardware.
659 if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
660 // This does not work on HD4XXX hardware.
661 return false;
662 }
663 Type *aType = inst->getType();
664 bool isVector = aType->isVectorTy();
665
666 // XXX Support vector types
667 if (isVector) {
668 return false;
669 }
670 int numEle = 1;
671 // This only works on 32bit integers
672 if (aType->getScalarType()
673 != Type::getInt32Ty(inst->getContext())) {
674 return false;
675 }
676 if (isVector) {
677 const VectorType *VT = dyn_cast<VectorType>(aType);
678 numEle = VT->getNumElements();
679 // We currently cannot support more than 4 elements in a intrinsic and we
680 // cannot support Vec3 types.
681 if (numEle > 4 || numEle == 3) {
682 return false;
683 }
684 }
685 BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
686 // If the first operand is not a shift instruction, then we can return as it
687 // doesn't match this pattern.
688 if (!ShiftInst || !ShiftInst->isShift()) {
689 return false;
690 }
691 // If we are a shift left, then we need don't match this pattern.
692 if (ShiftInst->getOpcode() == Instruction::Shl) {
693 return false;
694 }
695 bool isSigned = ShiftInst->isArithmeticShift();
696 Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
697 Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
698 // Lets make sure that the shift value and the and mask are constant integers.
699 if (!AndMask || !ShrVal) {
700 return false;
701 }
702 Constant *newMaskConst;
703 Constant *shiftValConst;
704 if (isVector) {
705 // Handle the vector case
706 std::vector<Constant *> maskVals;
707 std::vector<Constant *> shiftVals;
708 ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
709 ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
710 Type *scalarType = AndMaskVec->getType()->getScalarType();
711 assert(AndMaskVec->getNumOperands() ==
712 ShrValVec->getNumOperands() && "cannot have a "
713 "combination where the number of elements to a "
714 "shift and an and are different!");
715 for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
716 ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
717 ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
718 if (!AndCI || !ShiftIC) {
719 return false;
720 }
721 uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
722 if (!isMask_32(maskVal)) {
723 return false;
724 }
725 maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
726 uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
727 // If the mask or shiftval is greater than the bitcount, then break out.
728 if (maskVal >= 32 || shiftVal >= 32) {
729 return false;
730 }
731 // If the mask val is greater than the the number of original bits left
732 // then this optimization is invalid.
733 if (maskVal > (32 - shiftVal)) {
734 return false;
735 }
736 maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
737 shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
738 }
739 newMaskConst = ConstantVector::get(maskVals);
740 shiftValConst = ConstantVector::get(shiftVals);
741 } else {
742 // Handle the scalar case
743 uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
744 // This must be a mask value where all lower bits are set to 1 and then any
745 // bit higher is set to 0.
746 if (!isMask_32(maskVal)) {
747 return false;
748 }
749 maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
750 // Count the number of bits set in the mask, this is the width of the
751 // resulting bit set that is extracted from the source value.
752 uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
753 // If the mask or shift val is greater than the bitcount, then break out.
754 if (maskVal >= 32 || shiftVal >= 32) {
755 return false;
756 }
757 // If the mask val is greater than the the number of original bits left then
758 // this optimization is invalid.
759 if (maskVal > (32 - shiftVal)) {
760 return false;
761 }
762 newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
763 shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
764 }
765 // Lets create the function signature.
766 std::vector<Type *> callTypes;
767 callTypes.push_back(aType);
768 callTypes.push_back(aType);
769 callTypes.push_back(aType);
770 FunctionType *funcType = FunctionType::get(aType, callTypes, false);
771 std::string name = "llvm.AMDGPU.bit.extract.u32";
772 if (isVector) {
773 name += ".v" + itostr(numEle) + "i32";
774 } else {
775 name += ".";
776 }
777 // Lets create the function.
778 Function *Func =
779 dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
780 getOrInsertFunction(StringRef(name), funcType));
781 Value *Operands[3] = {
782 ShiftInst->getOperand(0),
783 shiftValConst,
784 newMaskConst
785 };
786 // Lets create the Call with the operands
787 CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
788 CI->setDoesNotAccessMemory();
789 CI->insertBefore(inst);
790 inst->replaceAllUsesWith(CI);
791 return true;
792 }
793
794 bool
795 AMDGPUPeepholeOpt::expandBFI(CallInst *CI) {
796 if (!CI) {
797 return false;
798 }
799 Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
800 if (!LHS->getName().startswith("__amdil_bfi")) {
801 return false;
802 }
803 Type* type = CI->getOperand(0)->getType();
804 Constant *negOneConst = NULL;
805 if (type->isVectorTy()) {
806 std::vector<Constant *> negOneVals;
807 negOneConst = ConstantInt::get(CI->getContext(),
808 APInt(32, StringRef("-1"), 10));
809 for (size_t x = 0,
810 y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
811 negOneVals.push_back(negOneConst);
812 }
813 negOneConst = ConstantVector::get(negOneVals);
814 } else {
815 negOneConst = ConstantInt::get(CI->getContext(),
816 APInt(32, StringRef("-1"), 10));
817 }
818 // __amdil_bfi => (A & B) | (~A & C)
819 BinaryOperator *lhs =
820 BinaryOperator::Create(Instruction::And, CI->getOperand(0),
821 CI->getOperand(1), "bfi_and", CI);
822 BinaryOperator *rhs =
823 BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
824 "bfi_not", CI);
825 rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
826 "bfi_and", CI);
827 lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
828 CI->replaceAllUsesWith(lhs);
829 return true;
830 }
831
832 bool
833 AMDGPUPeepholeOpt::expandBFM(CallInst *CI) {
834 if (!CI) {
835 return false;
836 }
837 Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
838 if (!LHS->getName().startswith("__amdil_bfm")) {
839 return false;
840 }
841 // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
842 Constant *newMaskConst = NULL;
843 Constant *newShiftConst = NULL;
844 Type* type = CI->getOperand(0)->getType();
845 if (type->isVectorTy()) {
846 std::vector<Constant*> newMaskVals, newShiftVals;
847 newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
848 newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
849 for (size_t x = 0,
850 y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
851 newMaskVals.push_back(newMaskConst);
852 newShiftVals.push_back(newShiftConst);
853 }
854 newMaskConst = ConstantVector::get(newMaskVals);
855 newShiftConst = ConstantVector::get(newShiftVals);
856 } else {
857 newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
858 newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
859 }
860 BinaryOperator *lhs =
861 BinaryOperator::Create(Instruction::And, CI->getOperand(0),
862 newMaskConst, "bfm_mask", CI);
863 lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
864 lhs, "bfm_shl", CI);
865 lhs = BinaryOperator::Create(Instruction::Sub, lhs,
866 newShiftConst, "bfm_sub", CI);
867 BinaryOperator *rhs =
868 BinaryOperator::Create(Instruction::And, CI->getOperand(1),
869 newMaskConst, "bfm_mask", CI);
870 lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
871 CI->replaceAllUsesWith(lhs);
872 return true;
873 }
874
875 bool
876 AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) {
877 Instruction *inst = (*bbb);
878 if (optimizeCallInst(bbb)) {
879 return true;
880 }
881 if (optimizeBitExtract(inst)) {
882 return false;
883 }
884 if (optimizeBitInsert(inst)) {
885 return false;
886 }
887 if (correctMisalignedMemOp(inst)) {
888 return false;
889 }
890 return false;
891 }
892 bool
893 AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) {
894 LoadInst *linst = dyn_cast<LoadInst>(inst);
895 StoreInst *sinst = dyn_cast<StoreInst>(inst);
896 unsigned alignment;
897 Type* Ty = inst->getType();
898 if (linst) {
899 alignment = linst->getAlignment();
900 Ty = inst->getType();
901 } else if (sinst) {
902 alignment = sinst->getAlignment();
903 Ty = sinst->getValueOperand()->getType();
904 } else {
905 return false;
906 }
907 unsigned size = getTypeSize(Ty);
908 if (size == alignment || size < alignment) {
909 return false;
910 }
911 if (!Ty->isStructTy()) {
912 return false;
913 }
914 if (alignment < 4) {
915 if (linst) {
916 linst->setAlignment(0);
917 return true;
918 } else if (sinst) {
919 sinst->setAlignment(0);
920 return true;
921 }
922 }
923 return false;
924 }
925 bool
926 AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI) {
927 if (!CI) {
928 return false;
929 }
930 Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
931 std::string namePrefix = LHS->getName().substr(0, 14);
932 if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
933 && namePrefix != "__amdil__imul24_high") {
934 return false;
935 }
936 if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
937 return false;
938 }
939 return true;
940 }
941
942 void
943 AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) {
944 assert(isSigned24BitOps(CI) && "Must be a "
945 "signed 24 bit operation to call this function!");
946 Value *LHS = CI->getOperand(CI->getNumOperands()-1);
947 // On 7XX and 8XX we do not have signed 24bit, so we need to
948 // expand it to the following:
949 // imul24 turns into 32bit imul
950 // imad24 turns into 32bit imad
951 // imul24_high turns into 32bit imulhigh
952 if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
953 Type *aType = CI->getOperand(0)->getType();
954 bool isVector = aType->isVectorTy();
955 int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
956 std::vector<Type*> callTypes;
957 callTypes.push_back(CI->getOperand(0)->getType());
958 callTypes.push_back(CI->getOperand(1)->getType());
959 callTypes.push_back(CI->getOperand(2)->getType());
960 FunctionType *funcType =
961 FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
962 std::string name = "__amdil_imad";
963 if (isVector) {
964 name += "_v" + itostr(numEle) + "i32";
965 } else {
966 name += "_i32";
967 }
968 Function *Func = dyn_cast<Function>(
969 CI->getParent()->getParent()->getParent()->
970 getOrInsertFunction(StringRef(name), funcType));
971 Value *Operands[3] = {
972 CI->getOperand(0),
973 CI->getOperand(1),
974 CI->getOperand(2)
975 };
976 CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
977 nCI->insertBefore(CI);
978 CI->replaceAllUsesWith(nCI);
979 } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
980 BinaryOperator *mulOp =
981 BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
982 CI->getOperand(1), "imul24", CI);
983 CI->replaceAllUsesWith(mulOp);
984 } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
985 Type *aType = CI->getOperand(0)->getType();
986
987 bool isVector = aType->isVectorTy();
988 int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
989 std::vector<Type*> callTypes;
990 callTypes.push_back(CI->getOperand(0)->getType());
991 callTypes.push_back(CI->getOperand(1)->getType());
992 FunctionType *funcType =
993 FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
994 std::string name = "__amdil_imul_high";
995 if (isVector) {
996 name += "_v" + itostr(numEle) + "i32";
997 } else {
998 name += "_i32";
999 }
1000 Function *Func = dyn_cast<Function>(
1001 CI->getParent()->getParent()->getParent()->
1002 getOrInsertFunction(StringRef(name), funcType));
1003 Value *Operands[2] = {
1004 CI->getOperand(0),
1005 CI->getOperand(1)
1006 };
1007 CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
1008 nCI->insertBefore(CI);
1009 CI->replaceAllUsesWith(nCI);
1010 }
1011 }
1012
1013 bool
1014 AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI) {
1015 return (CI != NULL
1016 && CI->getOperand(CI->getNumOperands() - 1)->getName()
1017 == "__amdil_get_local_size_int");
1018 }
1019
1020 bool
1021 AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI) {
1022 if (!CI) {
1023 return false;
1024 }
1025 if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
1026 && (mSTM->getDeviceName() == "cayman")) {
1027 return false;
1028 }
1029 return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
1030 == "__amdil_improved_div";
1031 }
1032
1033 void
1034 AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI) {
1035 assert(convertAccurateDivide(CI)
1036 && "expanding accurate divide can only happen if it is expandable!");
1037 BinaryOperator *divOp =
1038 BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
1039 CI->getOperand(1), "fdiv32", CI);
1040 CI->replaceAllUsesWith(divOp);
1041 }
1042
1043 bool
1044 AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) {
1045 if (optLevel != CodeGenOpt::None) {
1046 return false;
1047 }
1048
1049 if (!CI) {
1050 return false;
1051 }
1052
1053 unsigned funcNameIdx = 0;
1054 funcNameIdx = CI->getNumOperands() - 1;
1055 StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
1056 if (calleeName != "__amdil_image2d_read_norm"
1057 && calleeName != "__amdil_image2d_read_unnorm"
1058 && calleeName != "__amdil_image3d_read_norm"
1059 && calleeName != "__amdil_image3d_read_unnorm") {
1060 return false;
1061 }
1062
1063 unsigned samplerIdx = 2;
1064 samplerIdx = 1;
1065 Value *sampler = CI->getOperand(samplerIdx);
1066 LoadInst *lInst = dyn_cast<LoadInst>(sampler);
1067 if (!lInst) {
1068 return false;
1069 }
1070
1071 if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1072 return false;
1073 }
1074
1075 GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
1076 // If we are loading from what is not a global value, then we
1077 // fail and return.
1078 if (!gv) {
1079 return false;
1080 }
1081
1082 // If we don't have an initializer or we have an initializer and
1083 // the initializer is not a 32bit integer, we fail.
1084 if (!gv->hasInitializer()
1085 || !gv->getInitializer()->getType()->isIntegerTy(32)) {
1086 return false;
1087 }
1088
1089 // Now that we have the global variable initializer, lets replace
1090 // all uses of the load instruction with the samplerVal and
1091 // reparse the __amdil_is_constant() function.
1092 Constant *samplerVal = gv->getInitializer();
1093 lInst->replaceAllUsesWith(samplerVal);
1094 return true;
1095 }
1096
1097 bool
1098 AMDGPUPeepholeOpt::doInitialization(Module &M) {
1099 return false;
1100 }
1101
1102 bool
1103 AMDGPUPeepholeOpt::doFinalization(Module &M) {
1104 return false;
1105 }
1106
1107 void
1108 AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const {
1109 AU.addRequired<MachineFunctionAnalysis>();
1110 FunctionPass::getAnalysisUsage(AU);
1111 AU.setPreservesAll();
1112 }
1113
1114 size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
1115 size_t size = 0;
1116 if (!T) {
1117 return size;
1118 }
1119 switch (T->getTypeID()) {
1120 case Type::X86_FP80TyID:
1121 case Type::FP128TyID:
1122 case Type::PPC_FP128TyID:
1123 case Type::LabelTyID:
1124 assert(0 && "These types are not supported by this backend");
1125 default:
1126 case Type::FloatTyID:
1127 case Type::DoubleTyID:
1128 size = T->getPrimitiveSizeInBits() >> 3;
1129 break;
1130 case Type::PointerTyID:
1131 size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
1132 break;
1133 case Type::IntegerTyID:
1134 size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
1135 break;
1136 case Type::StructTyID:
1137 size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
1138 break;
1139 case Type::ArrayTyID:
1140 size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
1141 break;
1142 case Type::FunctionTyID:
1143 size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
1144 break;
1145 case Type::VectorTyID:
1146 size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
1147 break;
1148 };
1149 return size;
1150 }
1151
1152 size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
1153 bool dereferencePtr) {
1154 size_t size = 0;
1155 if (!ST) {
1156 return size;
1157 }
1158 Type *curType;
1159 StructType::element_iterator eib;
1160 StructType::element_iterator eie;
1161 for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
1162 curType = *eib;
1163 size += getTypeSize(curType, dereferencePtr);
1164 }
1165 return size;
1166 }
1167
1168 size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
1169 bool dereferencePtr) {
1170 return IT ? (IT->getBitWidth() >> 3) : 0;
1171 }
1172
1173 size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
1174 bool dereferencePtr) {
1175 assert(0 && "Should not be able to calculate the size of an function type");
1176 return 0;
1177 }
1178
1179 size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
1180 bool dereferencePtr) {
1181 return (size_t)(AT ? (getTypeSize(AT->getElementType(),
1182 dereferencePtr) * AT->getNumElements())
1183 : 0);
1184 }
1185
1186 size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
1187 bool dereferencePtr) {
1188 return VT ? (VT->getBitWidth() >> 3) : 0;
1189 }
1190
1191 size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
1192 bool dereferencePtr) {
1193 if (!PT) {
1194 return 0;
1195 }
1196 Type *CT = PT->getElementType();
1197 if (CT->getTypeID() == Type::StructTyID &&
1198 PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
1199 return getTypeSize(dyn_cast<StructType>(CT));
1200 } else if (dereferencePtr) {
1201 size_t size = 0;
1202 for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
1203 size += getTypeSize(PT->getContainedType(x), dereferencePtr);
1204 }
1205 return size;
1206 } else {
1207 return 4;
1208 }
1209 }
1210
1211 size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
1212 bool dereferencePtr) {
1213 //assert(0 && "Should not be able to calculate the size of an opaque type");
1214 return 4;
1215 }
OLDNEW
« no previous file with comments | « lib/Target/R600/AMDILDeviceInfo.cpp ('k') | lib/Target/R600/CMakeLists.txt » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698