lib/Target/R600/AMDILPeepholeOptimizer.cpp - Issue 183273009: Prep for merging 3.4: Undo changes from 3.3 branch

Unified Diff: lib/Target/R600/AMDILPeepholeOptimizer.cpp

Issue 183273009: Prep for merging 3.4: Undo changes from 3.3 branch (Closed) Base URL: http://git.chromium.org/native_client/pnacl-llvm.git@master

Patch Set: Retry Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: lib/Target/R600/AMDILPeepholeOptimizer.cpp

diff --git a/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/lib/Target/R600/AMDILPeepholeOptimizer.cpp

new file mode 100644

index 0000000000000000000000000000000000000000..3a28038666f716f3d43dad7c66677fac670d9289

--- /dev/null

+++ b/lib/Target/R600/AMDILPeepholeOptimizer.cpp

@@ -0,0 +1,1215 @@

+//===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+/// \file

+//==-----------------------------------------------------------------------===//

+#define DEBUG_TYPE "PeepholeOpt"

+#ifdef DEBUG

+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))

+#else

+#define DEBUGME 0

+#endif

+#include "AMDILDevices.h"

+#include "AMDGPUInstrInfo.h"

+#include "llvm/ADT/Statistic.h"

+#include "llvm/ADT/StringExtras.h"

+#include "llvm/ADT/StringRef.h"

+#include "llvm/ADT/Twine.h"

+#include "llvm/IR/Constants.h"

+#include "llvm/CodeGen/MachineFunction.h"

+#include "llvm/CodeGen/MachineFunctionAnalysis.h"

+#include "llvm/IR/Function.h"

+#include "llvm/IR/Instructions.h"

+#include "llvm/IR/Module.h"

+#include "llvm/Support/Debug.h"

+#include "llvm/Support/MathExtras.h"

+#include <sstream>

+#if 0

+STATISTIC(PointerAssignments, "Number of dynamic pointer "

+ "assigments discovered");

+STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");

+#endif

+using namespace llvm;

+// The Peephole optimization pass is used to do simple last minute optimizations

+// that are required for correct code or to remove redundant functions

+namespace {

+class OpaqueType;

+class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {

+public:

+ TargetMachine &TM;

+ static char ID;

+ AMDGPUPeepholeOpt(TargetMachine &tm);

+ ~AMDGPUPeepholeOpt();

+ const char *getPassName() const;

+ bool runOnFunction(Function &F);

+ bool doInitialization(Module &M);

+ bool doFinalization(Module &M);

+ void getAnalysisUsage(AnalysisUsage &AU) const;

+protected:

+private:

+ // Function to initiate all of the instruction level optimizations.

+ bool instLevelOptimizations(BasicBlock::iterator *inst);

+ // Quick check to see if we need to dump all of the pointers into the

+ // arena. If this is correct, then we set all pointers to exist in arena. This

+ // is a workaround for aliasing of pointers in a struct/union.

+ bool dumpAllIntoArena(Function &F);

+ // Because I don't want to invalidate any pointers while in the

+ // safeNestedForEachFunction. I push atomic conversions to a vector and handle

+ // it later. This function does the conversions if required.

+ void doAtomicConversionIfNeeded(Function &F);

+ // Because __amdil_is_constant cannot be properly evaluated if

+ // optimizations are disabled, the call's are placed in a vector

+ // and evaluated after the __amdil_image* functions are evaluated

+ // which should allow the __amdil_is_constant function to be

+ // evaluated correctly.

+ void doIsConstCallConversionIfNeeded();

+ bool mChanged;

+ bool mDebug;

+ bool mConvertAtomics;

+ CodeGenOpt::Level optLevel;

+ // Run a series of tests to see if we can optimize a CALL instruction.

+ bool optimizeCallInst(BasicBlock::iterator *bbb);

+ // A peephole optimization to optimize bit extract sequences.

+ bool optimizeBitExtract(Instruction *inst);

+ // A peephole optimization to optimize bit insert sequences.

+ bool optimizeBitInsert(Instruction *inst);

+ bool setupBitInsert(Instruction *base,

+ Instruction *&src,

+ Constant *&mask,

+ Constant *&shift);

+ // Expand the bit field insert instruction on versions of OpenCL that

+ // don't support it.

+ bool expandBFI(CallInst *CI);

+ // Expand the bit field mask instruction on version of OpenCL that

+ // don't support it.

+ bool expandBFM(CallInst *CI);

+ // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in

+ // this case we need to expand them. These functions check for 24bit functions

+ // and then expand.

+ bool isSigned24BitOps(CallInst *CI);

+ void expandSigned24BitOps(CallInst *CI);

+ // One optimization that can occur is that if the required workgroup size is

+ // specified then the result of get_local_size is known at compile time and

+ // can be returned accordingly.

+ bool isRWGLocalOpt(CallInst *CI);

+ // On northern island cards, the division is slightly less accurate than on

+ // previous generations, so we need to utilize a more accurate division. So we

+ // can translate the accurate divide to a normal divide on all other cards.

+ bool convertAccurateDivide(CallInst *CI);

+ void expandAccurateDivide(CallInst *CI);

+ // If the alignment is set incorrectly, it can produce really inefficient

+ // code. This checks for this scenario and fixes it if possible.

+ bool correctMisalignedMemOp(Instruction *inst);

+ // If we are in no opt mode, then we need to make sure that

+ // local samplers are properly propagated as constant propagation

+ // doesn't occur and we need to know the value of kernel defined

+ // samplers at compile time.

+ bool propagateSamplerInst(CallInst *CI);

+ // Helper functions

+ // Group of functions that recursively calculate the size of a structure based

+ // on it's sub-types.

+ size_t getTypeSize(Type * const T, bool dereferencePtr = false);

+ size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);

+ size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);

+ size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);

+ size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);

+ size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);

+ size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);

+ size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);

+ LLVMContext *mCTX;

+ Function *mF;

+ const AMDGPUSubtarget *mSTM;

+ SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;

+ SmallVector<CallInst *, 16> isConstVec;

+}; // class AMDGPUPeepholeOpt

+ char AMDGPUPeepholeOpt::ID = 0;

+// A template function that has two levels of looping before calling the

+// function with a pointer to the current iterator.

+template<class InputIterator, class SecondIterator, class Function>

+Function safeNestedForEach(InputIterator First, InputIterator Last,

+ SecondIterator S, Function F) {

+ for ( ; First != Last; ++First) {

+ SecondIterator sf, sl;

+ for (sf = First->begin(), sl = First->end();

+ sf != sl; ) {

+ if (!F(&sf)) {

+ ++sf;

+ }

+ return F;

+} // anonymous namespace

+namespace llvm {

+ FunctionPass *

+ createAMDGPUPeepholeOpt(TargetMachine &tm) {

+ return new AMDGPUPeepholeOpt(tm);

+ }

+} // llvm namespace

+AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)

+ : FunctionPass(ID), TM(tm) {

+ mDebug = DEBUGME;

+ optLevel = TM.getOptLevel();

+AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt() {

+const char *

+AMDGPUPeepholeOpt::getPassName() const {

+ return "AMDGPU PeepHole Optimization Pass";

+bool

+containsPointerType(Type *Ty) {

+ if (!Ty) {

+ return false;

+ }

+ switch(Ty->getTypeID()) {

+ default:

+ return false;

+ case Type::StructTyID: {

+ const StructType *ST = dyn_cast<StructType>(Ty);

+ for (StructType::element_iterator stb = ST->element_begin(),

+ ste = ST->element_end(); stb != ste; ++stb) {

+ if (!containsPointerType(*stb)) {

+ continue;

+ }

+ return true;

+ }

+ break;

+ }

+ case Type::VectorTyID:

+ case Type::ArrayTyID:

+ return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());

+ case Type::PointerTyID:

+ return true;

+ };

+ return false;

+bool

+AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F) {

+ bool dumpAll = false;

+ for (Function::const_arg_iterator cab = F.arg_begin(),

+ cae = F.arg_end(); cab != cae; ++cab) {

+ const Argument *arg = cab;

+ const PointerType *PT = dyn_cast<PointerType>(arg->getType());

+ if (!PT) {

+ continue;

+ }

+ Type *DereferencedType = PT->getElementType();

+ if (!dyn_cast<StructType>(DereferencedType)

+ ) {

+ continue;

+ }

+ if (!containsPointerType(DereferencedType)) {

+ continue;

+ }

+ // FIXME: Because a pointer inside of a struct/union may be aliased to

+ // another pointer we need to take the conservative approach and place all

+ // pointers into the arena until more advanced detection is implemented.

+ dumpAll = true;

+ }

+ return dumpAll;

+void

+AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() {

+ if (isConstVec.empty()) {

+ return;

+ }

+ for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {

+ CallInst *CI = isConstVec[x];

+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));

+ Type *aType = Type::getInt32Ty(*mCTX);

+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)

+ : ConstantInt::get(aType, 0);

+ CI->replaceAllUsesWith(Val);

+ CI->eraseFromParent();

+ }

+ isConstVec.clear();

+void

+AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F) {

+ // Don't do anything if we don't have any atomic operations.

+ if (atomicFuncs.empty()) {

+ return;

+ }

+ // Change the function name for the atomic if it is required

+ uint32_t size = atomicFuncs.size();

+ for (uint32_t x = 0; x < size; ++x) {

+ atomicFuncs[x].first->setOperand(

+ atomicFuncs[x].first->getNumOperands()-1,

+ atomicFuncs[x].second);

+ }

+ mChanged = true;

+ if (mConvertAtomics) {

+ return;

+ }

+bool

+AMDGPUPeepholeOpt::runOnFunction(Function &MF) {

+ mChanged = false;

+ mF = &MF;

+ mSTM = &TM.getSubtarget<AMDGPUSubtarget>();

+ if (mDebug) {

+ MF.dump();

+ }

+ mCTX = &MF.getType()->getContext();

+ mConvertAtomics = true;

+ safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),

+ std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),

+ this));

+ doAtomicConversionIfNeeded(MF);

+ doIsConstCallConversionIfNeeded();

+ if (mDebug) {

+ MF.dump();

+ }

+ return mChanged;

+bool

+AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) {

+ Instruction *inst = (*bbb);

+ CallInst *CI = dyn_cast<CallInst>(inst);

+ if (!CI) {

+ return false;

+ }

+ if (isSigned24BitOps(CI)) {

+ expandSigned24BitOps(CI);

+ ++(*bbb);

+ CI->eraseFromParent();

+ return true;

+ }

+ if (propagateSamplerInst(CI)) {

+ return false;

+ }

+ if (expandBFI(CI) || expandBFM(CI)) {

+ ++(*bbb);

+ CI->eraseFromParent();

+ return true;

+ }

+ if (convertAccurateDivide(CI)) {

+ expandAccurateDivide(CI);

+ ++(*bbb);

+ CI->eraseFromParent();

+ return true;

+ }

+ StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();

+ if (calleeName.startswith("__amdil_is_constant")) {

+ // If we do not have optimizations, then this

+ // cannot be properly evaluated, so we add the

+ // call instruction to a vector and process

+ // them at the end of processing after the

+ // samplers have been correctly handled.

+ if (optLevel == CodeGenOpt::None) {

+ isConstVec.push_back(CI);

+ return false;

+ } else {

+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));

+ Type *aType = Type::getInt32Ty(*mCTX);

+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)

+ : ConstantInt::get(aType, 0);

+ CI->replaceAllUsesWith(Val);

+ ++(*bbb);

+ CI->eraseFromParent();

+ return true;

+ }

+ if (calleeName.equals("__amdil_is_asic_id_i32")) {

+ ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));

+ Type *aType = Type::getInt32Ty(*mCTX);

+ Value *Val = CV;

+ if (Val) {

+ Val = ConstantInt::get(aType,

+ mSTM->device()->getDeviceFlag() & CV->getZExtValue());

+ } else {

+ Val = ConstantInt::get(aType, 0);

+ }

+ CI->replaceAllUsesWith(Val);

+ ++(*bbb);

+ CI->eraseFromParent();

+ return true;

+ }

+ Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));

+ if (!F) {

+ return false;

+ }

+ if (F->getName().startswith("__atom") && !CI->getNumUses()

+ && F->getName().find("_xchg") == StringRef::npos) {

+ std::string buffer(F->getName().str() + "_noret");

+ F = dyn_cast<Function>(

+ F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));

+ atomicFuncs.push_back(std::make_pair(CI, F));

+ }

+ if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)

+ && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {

+ return false;

+ }

+ if (!mConvertAtomics) {

+ return false;

+ }

+ StringRef name = F->getName();

+ if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {

+ mConvertAtomics = false;

+ }

+ return false;

+bool

+AMDGPUPeepholeOpt::setupBitInsert(Instruction *base,

+ Instruction *&src,

+ Constant *&mask,

+ Constant *&shift) {

+ if (!base) {

+ if (mDebug) {

+ dbgs() << "Null pointer passed into function.\n";

+ }

+ return false;

+ }

+ bool andOp = false;

+ if (base->getOpcode() == Instruction::Shl) {

+ shift = dyn_cast<Constant>(base->getOperand(1));

+ } else if (base->getOpcode() == Instruction::And) {

+ mask = dyn_cast<Constant>(base->getOperand(1));

+ andOp = true;

+ } else {

+ if (mDebug) {

+ dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";

+ }

+ // If the base is neither a Shl or a And, we don't fit any of the patterns above.

+ return false;

+ }

+ src = dyn_cast<Instruction>(base->getOperand(0));

+ if (!src) {

+ if (mDebug) {

+ dbgs() << "Failed setup since the base operand is not an instruction!\n";

+ }

+ return false;

+ }

+ // If we find an 'and' operation, then we don't need to

+ // find the next operation as we already know the

+ // bits that are valid at this point.

+ if (andOp) {

+ return true;

+ }

+ if (src->getOpcode() == Instruction::Shl && !shift) {

+ shift = dyn_cast<Constant>(src->getOperand(1));

+ src = dyn_cast<Instruction>(src->getOperand(0));

+ } else if (src->getOpcode() == Instruction::And && !mask) {

+ mask = dyn_cast<Constant>(src->getOperand(1));

+ }

+ if (!mask && !shift) {

+ if (mDebug) {

+ dbgs() << "Failed setup since both mask and shift are NULL!\n";

+ }

+ // Did not find a constant mask or a shift.

+ return false;

+ }

+ return true;

+bool

+AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) {

+ if (!inst) {

+ return false;

+ }

+ if (!inst->isBinaryOp()) {

+ return false;

+ }

+ if (inst->getOpcode() != Instruction::Or) {

+ return false;

+ }

+ if (optLevel == CodeGenOpt::None) {

+ return false;

+ }

+ // We want to do an optimization on a sequence of ops that in the end equals a

+ // single ISA instruction.

+ // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)

+ // Some simplified versions of this pattern are as follows:

+ // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0

+ // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E

+ // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B

+ // (A & B) | (D << F) when (1 << F) >= B

+ // (A << C) | (D & E) when (1 << C) >= E

+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {

+ // The HD4XXX hardware doesn't support the ubit_insert instruction.

+ return false;

+ }

+ Type *aType = inst->getType();

+ bool isVector = aType->isVectorTy();

+ int numEle = 1;

+ // This optimization only works on 32bit integers.

+ if (aType->getScalarType()

+ != Type::getInt32Ty(inst->getContext())) {

+ return false;

+ }

+ if (isVector) {

+ const VectorType *VT = dyn_cast<VectorType>(aType);

+ numEle = VT->getNumElements();

+ // We currently cannot support more than 4 elements in a intrinsic and we

+ // cannot support Vec3 types.

+ if (numEle > 4 || numEle == 3) {

+ return false;

+ }

+ // TODO: Handle vectors.

+ if (isVector) {

+ if (mDebug) {

+ dbgs() << "!!! Vectors are not supported yet!\n";

+ }

+ return false;

+ }

+ Instruction *LHSSrc = NULL, *RHSSrc = NULL;

+ Constant *LHSMask = NULL, *RHSMask = NULL;

+ Constant *LHSShift = NULL, *RHSShift = NULL;

+ Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));

+ Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));

+ if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {

+ if (mDebug) {

+ dbgs() << "Found an OR Operation that failed setup!\n";

+ inst->dump();

+ if (LHS) { LHS->dump(); }

+ if (LHSSrc) { LHSSrc->dump(); }

+ if (LHSMask) { LHSMask->dump(); }

+ if (LHSShift) { LHSShift->dump(); }

+ }

+ // There was an issue with the setup for BitInsert.

+ return false;

+ }

+ if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {

+ if (mDebug) {

+ dbgs() << "Found an OR Operation that failed setup!\n";

+ inst->dump();

+ if (RHS) { RHS->dump(); }

+ if (RHSSrc) { RHSSrc->dump(); }

+ if (RHSMask) { RHSMask->dump(); }

+ if (RHSShift) { RHSShift->dump(); }

+ }

+ // There was an issue with the setup for BitInsert.

+ return false;

+ }

+ if (mDebug) {

+ dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";

+ dbgs() << "Op: "; inst->dump();

+ dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }

+ dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }

+ dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }

+ dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }

+ dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }

+ dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }

+ dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }

+ dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }

+ }

+ Constant *offset = NULL;

+ Constant *width = NULL;

+ uint32_t lhsMaskVal = 0, rhsMaskVal = 0;

+ uint32_t lhsShiftVal = 0, rhsShiftVal = 0;

+ uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0;

+ uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0;

+ lhsMaskVal = (LHSMask

+ ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);

+ rhsMaskVal = (RHSMask

+ ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);

+ lhsShiftVal = (LHSShift

+ ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);

+ rhsShiftVal = (RHSShift

+ ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);

+ lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;

+ rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;

+ lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;

+ rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;

+ // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).

+ if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {

+ return false;

+ }

+ if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {

+ offset = ConstantInt::get(aType, lhsMaskOffset, false);

+ width = ConstantInt::get(aType, lhsMaskWidth, false);

+ RHSSrc = RHS;

+ if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {

+ return false;

+ }

+ if (!LHSShift) {

+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,

+ "MaskShr", LHS);

+ } else if (lhsShiftVal != lhsMaskOffset) {

+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,

+ "MaskShr", LHS);

+ }

+ if (mDebug) {

+ dbgs() << "Optimizing LHS!\n";

+ }

+ } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {

+ offset = ConstantInt::get(aType, rhsMaskOffset, false);

+ width = ConstantInt::get(aType, rhsMaskWidth, false);

+ LHSSrc = RHSSrc;

+ RHSSrc = LHS;

+ if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {

+ return false;

+ }

+ if (!RHSShift) {

+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,

+ "MaskShr", RHS);

+ } else if (rhsShiftVal != rhsMaskOffset) {

+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,

+ "MaskShr", RHS);

+ }

+ if (mDebug) {

+ dbgs() << "Optimizing RHS!\n";

+ }

+ } else {

+ if (mDebug) {

+ dbgs() << "Failed constraint 3!\n";

+ }

+ return false;

+ }

+ if (mDebug) {

+ dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }

+ dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }

+ dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }

+ dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }

+ }

+ if (!offset || !width) {

+ if (mDebug) {

+ dbgs() << "Either width or offset are NULL, failed detection!\n";

+ }

+ return false;

+ }

+ // Lets create the function signature.

+ std::vector<Type *> callTypes;

+ callTypes.push_back(aType);

+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);

+ std::string name = "__amdil_ubit_insert";

+ if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }

+ Function *Func =

+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->

+ getOrInsertFunction(StringRef(name), funcType));

+ Value *Operands[4] = {

+ width,

+ offset,

+ LHSSrc,

+ RHSSrc

+ };

+ CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");

+ if (mDebug) {

+ dbgs() << "Old Inst: ";

+ inst->dump();

+ dbgs() << "New Inst: ";

+ CI->dump();

+ dbgs() << "\n\n";

+ }

+ CI->insertBefore(inst);

+ inst->replaceAllUsesWith(CI);

+ return true;

+bool

+AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) {

+ if (!inst) {

+ return false;

+ }

+ if (!inst->isBinaryOp()) {

+ return false;

+ }

+ if (inst->getOpcode() != Instruction::And) {

+ return false;

+ }

+ if (optLevel == CodeGenOpt::None) {

+ return false;

+ }

+ // We want to do some simple optimizations on Shift right/And patterns. The

+ // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a

+ // value smaller than 32 and C is a mask. If C is a constant value, then the

+ // following transformation can occur. For signed integers, it turns into the

+ // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned

+ // integers, it turns into the function call dst =

+ // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract

+ // can be found in Section 7.9 of the ATI IL spec of the stream SDK for

+ // Evergreen hardware.

+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {

+ // This does not work on HD4XXX hardware.

+ return false;

+ }

+ Type *aType = inst->getType();

+ bool isVector = aType->isVectorTy();

+ // XXX Support vector types

+ if (isVector) {

+ return false;

+ }

+ int numEle = 1;

+ // This only works on 32bit integers

+ if (aType->getScalarType()

+ != Type::getInt32Ty(inst->getContext())) {

+ return false;

+ }

+ if (isVector) {

+ const VectorType *VT = dyn_cast<VectorType>(aType);

+ numEle = VT->getNumElements();

+ // We currently cannot support more than 4 elements in a intrinsic and we

+ // cannot support Vec3 types.

+ if (numEle > 4 || numEle == 3) {

+ return false;

+ }

+ BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));

+ // If the first operand is not a shift instruction, then we can return as it

+ // doesn't match this pattern.

+ if (!ShiftInst || !ShiftInst->isShift()) {

+ return false;

+ }

+ // If we are a shift left, then we need don't match this pattern.

+ if (ShiftInst->getOpcode() == Instruction::Shl) {

+ return false;

+ }

+ bool isSigned = ShiftInst->isArithmeticShift();

+ Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));

+ Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));

+ // Lets make sure that the shift value and the and mask are constant integers.

+ if (!AndMask || !ShrVal) {

+ return false;

+ }

+ Constant *newMaskConst;

+ Constant *shiftValConst;

+ if (isVector) {

+ // Handle the vector case

+ std::vector<Constant *> maskVals;

+ std::vector<Constant *> shiftVals;

+ ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);

+ ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);

+ Type *scalarType = AndMaskVec->getType()->getScalarType();

+ assert(AndMaskVec->getNumOperands() ==

+ ShrValVec->getNumOperands() && "cannot have a "

+ "combination where the number of elements to a "

+ "shift and an and are different!");

+ for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {

+ ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));

+ ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));

+ if (!AndCI || !ShiftIC) {

+ return false;

+ }

+ uint32_t maskVal = (uint32_t)AndCI->getZExtValue();

+ if (!isMask_32(maskVal)) {

+ return false;

+ }

+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);

+ uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();

+ // If the mask or shiftval is greater than the bitcount, then break out.

+ if (maskVal >= 32 || shiftVal >= 32) {

+ return false;

+ }

+ // If the mask val is greater than the the number of original bits left

+ // then this optimization is invalid.

+ if (maskVal > (32 - shiftVal)) {

+ return false;

+ }

+ maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));

+ shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));

+ }

+ newMaskConst = ConstantVector::get(maskVals);

+ shiftValConst = ConstantVector::get(shiftVals);

+ } else {

+ // Handle the scalar case

+ uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();

+ // This must be a mask value where all lower bits are set to 1 and then any

+ // bit higher is set to 0.

+ if (!isMask_32(maskVal)) {

+ return false;

+ }

+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);

+ // Count the number of bits set in the mask, this is the width of the

+ // resulting bit set that is extracted from the source value.

+ uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();

+ // If the mask or shift val is greater than the bitcount, then break out.

+ if (maskVal >= 32 || shiftVal >= 32) {

+ return false;

+ }

+ // If the mask val is greater than the the number of original bits left then

+ // this optimization is invalid.

+ if (maskVal > (32 - shiftVal)) {

+ return false;

+ }

+ newMaskConst = ConstantInt::get(aType, maskVal, isSigned);

+ shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);

+ }

+ // Lets create the function signature.

+ std::vector<Type *> callTypes;

+ callTypes.push_back(aType);

+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);

+ std::string name = "llvm.AMDGPU.bit.extract.u32";

+ if (isVector) {

+ name += ".v" + itostr(numEle) + "i32";

+ } else {

+ name += ".";

+ }

+ // Lets create the function.

+ Function *Func =

+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->

+ getOrInsertFunction(StringRef(name), funcType));

+ Value *Operands[3] = {

+ ShiftInst->getOperand(0),

+ shiftValConst,

+ newMaskConst

+ };

+ // Lets create the Call with the operands

+ CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");

+ CI->setDoesNotAccessMemory();

+ CI->insertBefore(inst);

+ inst->replaceAllUsesWith(CI);

+ return true;

+bool

+AMDGPUPeepholeOpt::expandBFI(CallInst *CI) {

+ if (!CI) {

+ return false;

+ }

+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);

+ if (!LHS->getName().startswith("__amdil_bfi")) {

+ return false;

+ }

+ Type* type = CI->getOperand(0)->getType();

+ Constant *negOneConst = NULL;

+ if (type->isVectorTy()) {

+ std::vector<Constant *> negOneVals;

+ negOneConst = ConstantInt::get(CI->getContext(),

+ APInt(32, StringRef("-1"), 10));

+ for (size_t x = 0,

+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {

+ negOneVals.push_back(negOneConst);

+ }

+ negOneConst = ConstantVector::get(negOneVals);

+ } else {

+ negOneConst = ConstantInt::get(CI->getContext(),

+ APInt(32, StringRef("-1"), 10));

+ }

+ // __amdil_bfi => (A & B) | (~A & C)

+ BinaryOperator *lhs =

+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),

+ CI->getOperand(1), "bfi_and", CI);

+ BinaryOperator *rhs =

+ BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,

+ "bfi_not", CI);

+ rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),

+ "bfi_and", CI);

+ lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);

+ CI->replaceAllUsesWith(lhs);

+ return true;

+bool

+AMDGPUPeepholeOpt::expandBFM(CallInst *CI) {

+ if (!CI) {

+ return false;

+ }

+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);

+ if (!LHS->getName().startswith("__amdil_bfm")) {

+ return false;

+ }

+ // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)

+ Constant *newMaskConst = NULL;

+ Constant *newShiftConst = NULL;

+ Type* type = CI->getOperand(0)->getType();

+ if (type->isVectorTy()) {

+ std::vector<Constant*> newMaskVals, newShiftVals;

+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);

+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);

+ for (size_t x = 0,

+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {

+ newMaskVals.push_back(newMaskConst);

+ newShiftVals.push_back(newShiftConst);

+ }

+ newMaskConst = ConstantVector::get(newMaskVals);

+ newShiftConst = ConstantVector::get(newShiftVals);

+ } else {

+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);

+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);

+ }

+ BinaryOperator *lhs =

+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),

+ newMaskConst, "bfm_mask", CI);

+ lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,

+ lhs, "bfm_shl", CI);

+ lhs = BinaryOperator::Create(Instruction::Sub, lhs,

+ newShiftConst, "bfm_sub", CI);

+ BinaryOperator *rhs =

+ BinaryOperator::Create(Instruction::And, CI->getOperand(1),

+ newMaskConst, "bfm_mask", CI);

+ lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);

+ CI->replaceAllUsesWith(lhs);

+ return true;

+bool

+AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) {

+ Instruction *inst = (*bbb);

+ if (optimizeCallInst(bbb)) {

+ return true;

+ }

+ if (optimizeBitExtract(inst)) {

+ return false;

+ }

+ if (optimizeBitInsert(inst)) {

+ return false;

+ }

+ if (correctMisalignedMemOp(inst)) {

+ return false;

+ }

+ return false;

+bool

+AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) {

+ LoadInst *linst = dyn_cast<LoadInst>(inst);

+ StoreInst *sinst = dyn_cast<StoreInst>(inst);

+ unsigned alignment;

+ Type* Ty = inst->getType();

+ if (linst) {

+ alignment = linst->getAlignment();

+ Ty = inst->getType();

+ } else if (sinst) {

+ alignment = sinst->getAlignment();

+ Ty = sinst->getValueOperand()->getType();

+ } else {

+ return false;

+ }

+ unsigned size = getTypeSize(Ty);

+ if (size == alignment || size < alignment) {

+ return false;

+ }

+ if (!Ty->isStructTy()) {

+ return false;

+ }

+ if (alignment < 4) {

+ if (linst) {

+ linst->setAlignment(0);

+ return true;

+ } else if (sinst) {

+ sinst->setAlignment(0);

+ return true;

+ }

+ return false;

+bool

+AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI) {

+ if (!CI) {

+ return false;

+ }

+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);

+ std::string namePrefix = LHS->getName().substr(0, 14);

+ if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"

+ && namePrefix != "__amdil__imul24_high") {

+ return false;

+ }

+ if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {

+ return false;

+ }

+ return true;

+void

+AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) {

+ assert(isSigned24BitOps(CI) && "Must be a "

+ "signed 24 bit operation to call this function!");

+ Value *LHS = CI->getOperand(CI->getNumOperands()-1);

+ // On 7XX and 8XX we do not have signed 24bit, so we need to

+ // expand it to the following:

+ // imul24 turns into 32bit imul

+ // imad24 turns into 32bit imad

+ // imul24_high turns into 32bit imulhigh

+ if (LHS->getName().substr(0, 14) == "__amdil_imad24") {

+ Type *aType = CI->getOperand(0)->getType();

+ bool isVector = aType->isVectorTy();

+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;

+ std::vector<Type*> callTypes;

+ callTypes.push_back(CI->getOperand(0)->getType());

+ callTypes.push_back(CI->getOperand(1)->getType());

+ callTypes.push_back(CI->getOperand(2)->getType());

+ FunctionType *funcType =

+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);

+ std::string name = "__amdil_imad";

+ if (isVector) {

+ name += "_v" + itostr(numEle) + "i32";

+ } else {

+ name += "_i32";

+ }

+ Function *Func = dyn_cast<Function>(

+ CI->getParent()->getParent()->getParent()->

+ getOrInsertFunction(StringRef(name), funcType));

+ Value *Operands[3] = {

+ CI->getOperand(0),

+ CI->getOperand(1),

+ CI->getOperand(2)

+ };

+ CallInst *nCI = CallInst::Create(Func, Operands, "imad24");

+ nCI->insertBefore(CI);

+ CI->replaceAllUsesWith(nCI);

+ } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {

+ BinaryOperator *mulOp =

+ BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),

+ CI->getOperand(1), "imul24", CI);

+ CI->replaceAllUsesWith(mulOp);

+ } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {

+ Type *aType = CI->getOperand(0)->getType();

+ bool isVector = aType->isVectorTy();

+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;

+ std::vector<Type*> callTypes;

+ callTypes.push_back(CI->getOperand(0)->getType());

+ callTypes.push_back(CI->getOperand(1)->getType());

+ FunctionType *funcType =

+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);

+ std::string name = "__amdil_imul_high";

+ if (isVector) {

+ name += "_v" + itostr(numEle) + "i32";

+ } else {

+ name += "_i32";

+ }

+ Function *Func = dyn_cast<Function>(

+ CI->getParent()->getParent()->getParent()->

+ getOrInsertFunction(StringRef(name), funcType));

+ Value *Operands[2] = {

+ CI->getOperand(0),

+ CI->getOperand(1)

+ };

+ CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");

+ nCI->insertBefore(CI);

+ CI->replaceAllUsesWith(nCI);

+ }

+bool

+AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI) {

+ return (CI != NULL

+ && CI->getOperand(CI->getNumOperands() - 1)->getName()

+ == "__amdil_get_local_size_int");

+bool

+AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI) {

+ if (!CI) {

+ return false;

+ }

+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX

+ && (mSTM->getDeviceName() == "cayman")) {

+ return false;

+ }

+ return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)

+ == "__amdil_improved_div";

+void

+AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI) {

+ assert(convertAccurateDivide(CI)

+ && "expanding accurate divide can only happen if it is expandable!");

+ BinaryOperator *divOp =

+ BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),

+ CI->getOperand(1), "fdiv32", CI);

+ CI->replaceAllUsesWith(divOp);

+bool

+AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) {

+ if (optLevel != CodeGenOpt::None) {

+ return false;

+ }

+ if (!CI) {

+ return false;

+ }

+ unsigned funcNameIdx = 0;

+ funcNameIdx = CI->getNumOperands() - 1;

+ StringRef calleeName = CI->getOperand(funcNameIdx)->getName();

+ if (calleeName != "__amdil_image2d_read_norm"

+ && calleeName != "__amdil_image2d_read_unnorm"

+ && calleeName != "__amdil_image3d_read_norm"

+ && calleeName != "__amdil_image3d_read_unnorm") {

+ return false;

+ }

+ unsigned samplerIdx = 2;

+ samplerIdx = 1;

+ Value *sampler = CI->getOperand(samplerIdx);

+ LoadInst *lInst = dyn_cast<LoadInst>(sampler);

+ if (!lInst) {

+ return false;

+ }

+ if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {

+ return false;

+ }

+ GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());

+ // If we are loading from what is not a global value, then we

+ // fail and return.

+ if (!gv) {

+ return false;

+ }

+ // If we don't have an initializer or we have an initializer and

+ // the initializer is not a 32bit integer, we fail.

+ if (!gv->hasInitializer()

+ || !gv->getInitializer()->getType()->isIntegerTy(32)) {

+ return false;

+ }

+ // Now that we have the global variable initializer, lets replace

+ // all uses of the load instruction with the samplerVal and

+ // reparse the __amdil_is_constant() function.

+ Constant *samplerVal = gv->getInitializer();

+ lInst->replaceAllUsesWith(samplerVal);

+ return true;

+bool

+AMDGPUPeepholeOpt::doInitialization(Module &M) {

+ return false;

+bool

+AMDGPUPeepholeOpt::doFinalization(Module &M) {

+ return false;

+void

+AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const {

+ AU.addRequired<MachineFunctionAnalysis>();

+ FunctionPass::getAnalysisUsage(AU);

+ AU.setPreservesAll();

+size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {

+ size_t size = 0;

+ if (!T) {

+ return size;

+ }

+ switch (T->getTypeID()) {

+ case Type::X86_FP80TyID:

+ case Type::FP128TyID:

+ case Type::PPC_FP128TyID:

+ case Type::LabelTyID:

+ assert(0 && "These types are not supported by this backend");

+ default:

+ case Type::FloatTyID:

+ case Type::DoubleTyID:

+ size = T->getPrimitiveSizeInBits() >> 3;

+ break;

+ case Type::PointerTyID:

+ size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);

+ break;

+ case Type::IntegerTyID:

+ size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);

+ break;

+ case Type::StructTyID:

+ size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);

+ break;

+ case Type::ArrayTyID:

+ size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);

+ break;

+ case Type::FunctionTyID:

+ size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);

+ break;

+ case Type::VectorTyID:

+ size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);

+ break;

+ };

+ return size;

+size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,

+ bool dereferencePtr) {

+ size_t size = 0;

+ if (!ST) {

+ return size;

+ }

+ Type *curType;

+ StructType::element_iterator eib;

+ StructType::element_iterator eie;

+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {

+ curType = *eib;

+ size += getTypeSize(curType, dereferencePtr);

+ }

+ return size;

+size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,

+ bool dereferencePtr) {

+ return IT ? (IT->getBitWidth() >> 3) : 0;

+size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,

+ bool dereferencePtr) {

+ assert(0 && "Should not be able to calculate the size of an function type");

+ return 0;

+size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,

+ bool dereferencePtr) {

+ return (size_t)(AT ? (getTypeSize(AT->getElementType(),

+ dereferencePtr) * AT->getNumElements())

+ : 0);

+size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,

+ bool dereferencePtr) {

+ return VT ? (VT->getBitWidth() >> 3) : 0;

+size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,

+ bool dereferencePtr) {

+ if (!PT) {

+ return 0;

+ }

+ Type *CT = PT->getElementType();

+ if (CT->getTypeID() == Type::StructTyID &&

+ PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {

+ return getTypeSize(dyn_cast<StructType>(CT));

+ } else if (dereferencePtr) {

+ size_t size = 0;

+ for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {

+ size += getTypeSize(PT->getContainedType(x), dereferencePtr);

+ }

+ return size;

+ } else {

+ return 4;

+ }

+size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,

+ bool dereferencePtr) {

+ //assert(0 && "Should not be able to calculate the size of an opaque type");

+ return 4;

« no previous file with comments | « lib/Target/R600/AMDILDeviceInfo.cpp ('k') | lib/Target/R600/CMakeLists.txt » ('j') | no next file with comments »