| OLD | NEW |
| 1 //===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===// | 1 //===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===// |
| 2 // | 2 // |
| 3 // The LLVM Compiler Infrastructure | 3 // The LLVM Compiler Infrastructure |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops | 10 // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 73 #include "llvm/IR/IntrinsicInst.h" | 73 #include "llvm/IR/IntrinsicInst.h" |
| 74 #include "llvm/IR/LLVMContext.h" | 74 #include "llvm/IR/LLVMContext.h" |
| 75 #include "llvm/IR/Module.h" | 75 #include "llvm/IR/Module.h" |
| 76 #include "llvm/IR/Type.h" | 76 #include "llvm/IR/Type.h" |
| 77 #include "llvm/IR/Value.h" | 77 #include "llvm/IR/Value.h" |
| 78 #include "llvm/Pass.h" | 78 #include "llvm/Pass.h" |
| 79 #include "llvm/Support/CommandLine.h" | 79 #include "llvm/Support/CommandLine.h" |
| 80 #include "llvm/Support/Debug.h" | 80 #include "llvm/Support/Debug.h" |
| 81 #include "llvm/Support/PatternMatch.h" | 81 #include "llvm/Support/PatternMatch.h" |
| 82 #include "llvm/Support/raw_ostream.h" | 82 #include "llvm/Support/raw_ostream.h" |
| 83 #include "llvm/Support/ValueHandle.h" | |
| 84 #include "llvm/Target/TargetLibraryInfo.h" | 83 #include "llvm/Target/TargetLibraryInfo.h" |
| 85 #include "llvm/Transforms/Scalar.h" | 84 #include "llvm/Transforms/Scalar.h" |
| 86 #include "llvm/Transforms/Utils/BasicBlockUtils.h" | 85 #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 87 #include "llvm/Transforms/Utils/Local.h" | 86 #include "llvm/Transforms/Utils/Local.h" |
| 88 #include <algorithm> | 87 #include <algorithm> |
| 89 #include <map> | 88 #include <map> |
| 90 | 89 |
| 91 using namespace llvm; | 90 using namespace llvm; |
| 92 using namespace llvm::PatternMatch; | 91 using namespace llvm::PatternMatch; |
| 93 | 92 |
| (...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 210 /// Create a broadcast instruction. This method generates a broadcast | 209 /// Create a broadcast instruction. This method generates a broadcast |
| 211 /// instruction (shuffle) for loop invariant values and for the induction | 210 /// instruction (shuffle) for loop invariant values and for the induction |
| 212 /// value. If this is the induction variable then we extend it to N, N+1, ... | 211 /// value. If this is the induction variable then we extend it to N, N+1, ... |
| 213 /// this is needed because each iteration in the loop corresponds to a SIMD | 212 /// this is needed because each iteration in the loop corresponds to a SIMD |
| 214 /// element. | 213 /// element. |
| 215 Value *getBroadcastInstrs(Value *V); | 214 Value *getBroadcastInstrs(Value *V); |
| 216 | 215 |
| 217 /// This function adds 0, 1, 2 ... to each vector element, starting at zero. | 216 /// This function adds 0, 1, 2 ... to each vector element, starting at zero. |
| 218 /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...). | 217 /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...). |
| 219 /// The sequence starts at StartIndex. | 218 /// The sequence starts at StartIndex. |
| 220 Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate); | 219 Value *getConsecutiveVector(Value* Val, unsigned StartIdx, bool Negate); |
| 221 | 220 |
| 222 /// When we go over instructions in the basic block we rely on previous | 221 /// When we go over instructions in the basic block we rely on previous |
| 223 /// values within the current basic block or on loop invariant values. | 222 /// values within the current basic block or on loop invariant values. |
| 224 /// When we widen (vectorize) values we place them in the map. If the values | 223 /// When we widen (vectorize) values we place them in the map. If the values |
| 225 /// are not within the map, they have to be loop invariant, so we simply | 224 /// are not within the map, they have to be loop invariant, so we simply |
| 226 /// broadcast them into a vector. | 225 /// broadcast them into a vector. |
| 227 VectorParts &getVectorValue(Value *V); | 226 VectorParts &getVectorValue(Value *V); |
| 228 | 227 |
| 229 /// Generate a shuffle sequence that will reverse the vector Vec. | 228 /// Generate a shuffle sequence that will reverse the vector Vec. |
| 230 Value *reverseVector(Value *Vec); | 229 Value *reverseVector(Value *Vec); |
| (...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 376 struct ReductionDescriptor { | 375 struct ReductionDescriptor { |
| 377 ReductionDescriptor() : StartValue(0), LoopExitInstr(0), | 376 ReductionDescriptor() : StartValue(0), LoopExitInstr(0), |
| 378 Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {} | 377 Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {} |
| 379 | 378 |
| 380 ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K, | 379 ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K, |
| 381 MinMaxReductionKind MK) | 380 MinMaxReductionKind MK) |
| 382 : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {} | 381 : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {} |
| 383 | 382 |
| 384 // The starting value of the reduction. | 383 // The starting value of the reduction. |
| 385 // It does not have to be zero! | 384 // It does not have to be zero! |
| 386 TrackingVH<Value> StartValue; | 385 Value *StartValue; |
| 387 // The instruction who's value is used outside the loop. | 386 // The instruction who's value is used outside the loop. |
| 388 Instruction *LoopExitInstr; | 387 Instruction *LoopExitInstr; |
| 389 // The kind of the reduction. | 388 // The kind of the reduction. |
| 390 ReductionKind Kind; | 389 ReductionKind Kind; |
| 391 // If this a min/max reduction the kind of reduction. | 390 // If this a min/max reduction the kind of reduction. |
| 392 MinMaxReductionKind MinMaxKind; | 391 MinMaxReductionKind MinMaxKind; |
| 393 }; | 392 }; |
| 394 | 393 |
| 395 /// This POD struct holds information about a potential reduction operation. | 394 /// This POD struct holds information about a potential reduction operation. |
| 396 struct ReductionInstDesc { | 395 struct ReductionInstDesc { |
| (...skipping 24 matching lines...) Expand all Loading... |
| 421 Starts.clear(); | 420 Starts.clear(); |
| 422 Ends.clear(); | 421 Ends.clear(); |
| 423 } | 422 } |
| 424 | 423 |
| 425 /// Insert a pointer and calculate the start and end SCEVs. | 424 /// Insert a pointer and calculate the start and end SCEVs. |
| 426 void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr); | 425 void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr); |
| 427 | 426 |
| 428 /// This flag indicates if we need to add the runtime check. | 427 /// This flag indicates if we need to add the runtime check. |
| 429 bool Need; | 428 bool Need; |
| 430 /// Holds the pointers that we need to check. | 429 /// Holds the pointers that we need to check. |
| 431 SmallVector<TrackingVH<Value>, 2> Pointers; | 430 SmallVector<Value*, 2> Pointers; |
| 432 /// Holds the pointer value at the beginning of the loop. | 431 /// Holds the pointer value at the beginning of the loop. |
| 433 SmallVector<const SCEV*, 2> Starts; | 432 SmallVector<const SCEV*, 2> Starts; |
| 434 /// Holds the pointer value at the end of the loop. | 433 /// Holds the pointer value at the end of the loop. |
| 435 SmallVector<const SCEV*, 2> Ends; | 434 SmallVector<const SCEV*, 2> Ends; |
| 436 /// Holds the information if this pointer is used for writing to memory. | 435 /// Holds the information if this pointer is used for writing to memory. |
| 437 SmallVector<bool, 2> IsWritePtr; | 436 SmallVector<bool, 2> IsWritePtr; |
| 438 }; | 437 }; |
| 439 | 438 |
| 440 /// A POD for saving information about induction variables. | 439 /// A POD for saving information about induction variables. |
| 441 struct InductionInfo { | 440 struct InductionInfo { |
| 442 InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} | 441 InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} |
| 443 InductionInfo() : StartValue(0), IK(IK_NoInduction) {} | 442 InductionInfo() : StartValue(0), IK(IK_NoInduction) {} |
| 444 /// Start value. | 443 /// Start value. |
| 445 TrackingVH<Value> StartValue; | 444 Value *StartValue; |
| 446 /// Induction kind. | 445 /// Induction kind. |
| 447 InductionKind IK; | 446 InductionKind IK; |
| 448 }; | 447 }; |
| 449 | 448 |
| 450 /// ReductionList contains the reduction descriptors for all | 449 /// ReductionList contains the reduction descriptors for all |
| 451 /// of the reductions that were found in the loop. | 450 /// of the reductions that were found in the loop. |
| 452 typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList; | 451 typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList; |
| 453 | 452 |
| 454 /// InductionList saves induction variables and maps them to the | 453 /// InductionList saves induction variables and maps them to the |
| 455 /// induction descriptor. | 454 /// induction descriptor. |
| (...skipping 367 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 823 // Broadcast the scalar into all locations in the vector. | 822 // Broadcast the scalar into all locations in the vector. |
| 824 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast"); | 823 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast"); |
| 825 | 824 |
| 826 // Restore the builder insertion point. | 825 // Restore the builder insertion point. |
| 827 if (Invariant) | 826 if (Invariant) |
| 828 Builder.SetInsertPoint(Loc); | 827 Builder.SetInsertPoint(Loc); |
| 829 | 828 |
| 830 return Shuf; | 829 return Shuf; |
| 831 } | 830 } |
| 832 | 831 |
| 833 Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx, | 832 Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx, |
| 834 bool Negate) { | 833 bool Negate) { |
| 835 assert(Val->getType()->isVectorTy() && "Must be a vector"); | 834 assert(Val->getType()->isVectorTy() && "Must be a vector"); |
| 836 assert(Val->getType()->getScalarType()->isIntegerTy() && | 835 assert(Val->getType()->getScalarType()->isIntegerTy() && |
| 837 "Elem must be an integer"); | 836 "Elem must be an integer"); |
| 838 // Create the types. | 837 // Create the types. |
| 839 Type *ITy = Val->getType()->getScalarType(); | 838 Type *ITy = Val->getType()->getScalarType(); |
| 840 VectorType *Ty = cast<VectorType>(Val->getType()); | 839 VectorType *Ty = cast<VectorType>(Val->getType()); |
| 841 int VLen = Ty->getNumElements(); | 840 int VLen = Ty->getNumElements(); |
| 842 SmallVector<Constant*, 8> Indices; | 841 SmallVector<Constant*, 8> Indices; |
| 843 | 842 |
| 844 // Create a vector of consecutive numbers from zero to VF. | 843 // Create a vector of consecutive numbers from zero to VF. |
| 845 for (int i = 0; i < VLen; ++i) { | 844 for (int i = 0; i < VLen; ++i) { |
| 846 int64_t Idx = Negate ? (-i) : i; | 845 int Idx = Negate ? (-i): i; |
| 847 Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate)); | 846 Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx)); |
| 848 } | 847 } |
| 849 | 848 |
| 850 // Add the consecutive indices to the vector value. | 849 // Add the consecutive indices to the vector value. |
| 851 Constant *Cv = ConstantVector::get(Indices); | 850 Constant *Cv = ConstantVector::get(Indices); |
| 852 assert(Cv->getType() == Val->getType() && "Invalid consecutive vec"); | 851 assert(Cv->getType() == Val->getType() && "Invalid consecutive vec"); |
| 853 return Builder.CreateAdd(Val, Cv, "induction"); | 852 return Builder.CreateAdd(Val, Cv, "induction"); |
| 854 } | 853 } |
| 855 | 854 |
| 856 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { | 855 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { |
| 857 assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr"); | 856 assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr"); |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 957 // Attempt to issue a wide load. | 956 // Attempt to issue a wide load. |
| 958 LoadInst *LI = dyn_cast<LoadInst>(Instr); | 957 LoadInst *LI = dyn_cast<LoadInst>(Instr); |
| 959 StoreInst *SI = dyn_cast<StoreInst>(Instr); | 958 StoreInst *SI = dyn_cast<StoreInst>(Instr); |
| 960 | 959 |
| 961 assert((LI || SI) && "Invalid Load/Store instruction"); | 960 assert((LI || SI) && "Invalid Load/Store instruction"); |
| 962 | 961 |
| 963 Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType(); | 962 Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType(); |
| 964 Type *DataTy = VectorType::get(ScalarDataTy, VF); | 963 Type *DataTy = VectorType::get(ScalarDataTy, VF); |
| 965 Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); | 964 Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); |
| 966 unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment(); | 965 unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment(); |
| 967 unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace(); | 966 |
| 968 unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy); | 967 unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy); |
| 969 unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF; | 968 unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF; |
| 970 | 969 |
| 971 if (ScalarAllocatedSize != VectorElementSize) | 970 if (ScalarAllocatedSize != VectorElementSize) |
| 972 return scalarizeInstruction(Instr); | 971 return scalarizeInstruction(Instr); |
| 973 | 972 |
| 974 // If the pointer is loop invariant or if it is non consecutive, | 973 // If the pointer is loop invariant or if it is non consecutive, |
| 975 // scalarize the load. | 974 // scalarize the load. |
| 976 int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); | 975 int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); |
| 977 bool Reverse = ConsecutiveStride < 0; | 976 bool Reverse = ConsecutiveStride < 0; |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1032 if (Reverse) { | 1031 if (Reverse) { |
| 1033 // If we store to reverse consecutive memory locations then we need | 1032 // If we store to reverse consecutive memory locations then we need |
| 1034 // to reverse the order of elements in the stored value. | 1033 // to reverse the order of elements in the stored value. |
| 1035 StoredVal[Part] = reverseVector(StoredVal[Part]); | 1034 StoredVal[Part] = reverseVector(StoredVal[Part]); |
| 1036 // If the address is consecutive but reversed, then the | 1035 // If the address is consecutive but reversed, then the |
| 1037 // wide store needs to start at the last vector element. | 1036 // wide store needs to start at the last vector element. |
| 1038 PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); | 1037 PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); |
| 1039 PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); | 1038 PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); |
| 1040 } | 1039 } |
| 1041 | 1040 |
| 1042 Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(Addres
sSpace)); | 1041 Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo()); |
| 1043 Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment); | 1042 Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment); |
| 1044 } | 1043 } |
| 1045 } | 1044 } |
| 1046 | 1045 |
| 1047 for (unsigned Part = 0; Part < UF; ++Part) { | 1046 for (unsigned Part = 0; Part < UF; ++Part) { |
| 1048 // Calculate the pointer for the specific unroll-part. | 1047 // Calculate the pointer for the specific unroll-part. |
| 1049 Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); | 1048 Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); |
| 1050 | 1049 |
| 1051 if (Reverse) { | 1050 if (Reverse) { |
| 1052 // If the address is consecutive but reversed, then the | 1051 // If the address is consecutive but reversed, then the |
| 1053 // wide store needs to start at the last vector element. | 1052 // wide store needs to start at the last vector element. |
| 1054 PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); | 1053 PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); |
| 1055 PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); | 1054 PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); |
| 1056 } | 1055 } |
| 1057 | 1056 |
| 1058 Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressS
pace)); | 1057 Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo()); |
| 1059 Value *LI = Builder.CreateLoad(VecPtr, "wide.load"); | 1058 Value *LI = Builder.CreateLoad(VecPtr, "wide.load"); |
| 1060 cast<LoadInst>(LI)->setAlignment(Alignment); | 1059 cast<LoadInst>(LI)->setAlignment(Alignment); |
| 1061 Entry[Part] = Reverse ? reverseVector(LI) : LI; | 1060 Entry[Part] = Reverse ? reverseVector(LI) : LI; |
| 1062 } | 1061 } |
| 1063 } | 1062 } |
| 1064 | 1063 |
| 1065 void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) { | 1064 void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) { |
| 1066 assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); | 1065 assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); |
| 1067 // Holds vector parameters or scalars, in case of uniform vals. | 1066 // Holds vector parameters or scalars, in case of uniform vals. |
| 1068 SmallVector<VectorParts, 4> Params; | 1067 SmallVector<VectorParts, 4> Params; |
| (...skipping 997 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2066 Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy, | 2065 Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy, |
| 2067 "resize.norm.idx"); | 2066 "resize.norm.idx"); |
| 2068 Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI, | 2067 Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI, |
| 2069 "reverse.idx"); | 2068 "reverse.idx"); |
| 2070 | 2069 |
| 2071 // This is a new value so do not hoist it out. | 2070 // This is a new value so do not hoist it out. |
| 2072 Value *Broadcasted = getBroadcastInstrs(ReverseInd); | 2071 Value *Broadcasted = getBroadcastInstrs(ReverseInd); |
| 2073 // After broadcasting the induction variable we need to make the | 2072 // After broadcasting the induction variable we need to make the |
| 2074 // vector consecutive by adding ... -3, -2, -1, 0. | 2073 // vector consecutive by adding ... -3, -2, -1, 0. |
| 2075 for (unsigned part = 0; part < UF; ++part) | 2074 for (unsigned part = 0; part < UF; ++part) |
| 2076 Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part, | 2075 Entry[part] = getConsecutiveVector(Broadcasted, -VF * part, true); |
| 2077 true); | |
| 2078 continue; | 2076 continue; |
| 2079 } | 2077 } |
| 2080 | 2078 |
| 2081 // Handle the pointer induction variable case. | 2079 // Handle the pointer induction variable case. |
| 2082 assert(P->getType()->isPointerTy() && "Unexpected type."); | 2080 assert(P->getType()->isPointerTy() && "Unexpected type."); |
| 2083 | 2081 |
| 2084 // Is this a reverse induction ptr or a consecutive induction ptr. | 2082 // Is this a reverse induction ptr or a consecutive induction ptr. |
| 2085 bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction == | 2083 bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction == |
| 2086 II.IK); | 2084 II.IK); |
| 2087 | 2085 |
| (...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2301 // We must be able to predicate all blocks that need to be predicated. | 2299 // We must be able to predicate all blocks that need to be predicated. |
| 2302 if (blockNeedsPredication(BB) && !blockCanBePredicated(BB)) | 2300 if (blockNeedsPredication(BB) && !blockCanBePredicated(BB)) |
| 2303 return false; | 2301 return false; |
| 2304 } | 2302 } |
| 2305 | 2303 |
| 2306 // We can if-convert this loop. | 2304 // We can if-convert this loop. |
| 2307 return true; | 2305 return true; |
| 2308 } | 2306 } |
| 2309 | 2307 |
| 2310 bool LoopVectorizationLegality::canVectorize() { | 2308 bool LoopVectorizationLegality::canVectorize() { |
| 2311 // We must have a loop in canonical form. Loops with indirectbr in them cannot | 2309 assert(TheLoop->getLoopPreheader() && "No preheader!!"); |
| 2312 // be canonicalized. | |
| 2313 if (!TheLoop->getLoopPreheader()) | |
| 2314 return false; | |
| 2315 | 2310 |
| 2316 // We can only vectorize innermost loops. | 2311 // We can only vectorize innermost loops. |
| 2317 if (TheLoop->getSubLoopsVector().size()) | 2312 if (TheLoop->getSubLoopsVector().size()) |
| 2318 return false; | 2313 return false; |
| 2319 | 2314 |
| 2320 // We must have a single backedge. | 2315 // We must have a single backedge. |
| 2321 if (TheLoop->getNumBackEdges() != 1) | 2316 if (TheLoop->getNumBackEdges() != 1) |
| 2322 return false; | 2317 return false; |
| 2323 | 2318 |
| 2324 // We must have a single exiting block. | 2319 // We must have a single exiting block. |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2371 DEBUG(dbgs() << "LV: We can vectorize this loop" << | 2366 DEBUG(dbgs() << "LV: We can vectorize this loop" << |
| 2372 (PtrRtCheck.Need ? " (with a runtime bound check)" : "") | 2367 (PtrRtCheck.Need ? " (with a runtime bound check)" : "") |
| 2373 <<"!\n"); | 2368 <<"!\n"); |
| 2374 | 2369 |
| 2375 // Okay! We can vectorize. At this point we don't have any other mem analysis | 2370 // Okay! We can vectorize. At this point we don't have any other mem analysis |
| 2376 // which may limit our maximum vectorization factor, so just return true with | 2371 // which may limit our maximum vectorization factor, so just return true with |
| 2377 // no restrictions. | 2372 // no restrictions. |
| 2378 return true; | 2373 return true; |
| 2379 } | 2374 } |
| 2380 | 2375 |
| 2381 /// \brief Check that the instruction has outside loop users and is not an | |
| 2382 /// identified reduction variable. | |
| 2383 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, | |
| 2384 SmallPtrSet<Value *, 4> &Reductions) { | |
| 2385 // Reduction instructions are allowed to have exit users. All other | |
| 2386 // instructions must not have external users. | |
| 2387 if (!Reductions.count(Inst)) | |
| 2388 //Check that all of the users of the loop are inside the BB. | |
| 2389 for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end(); | |
| 2390 I != E; ++I) { | |
| 2391 Instruction *U = cast<Instruction>(*I); | |
| 2392 // This user may be a reduction exit value. | |
| 2393 if (!TheLoop->contains(U)) { | |
| 2394 DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); | |
| 2395 return true; | |
| 2396 } | |
| 2397 } | |
| 2398 return false; | |
| 2399 } | |
| 2400 | |
| 2401 bool LoopVectorizationLegality::canVectorizeInstrs() { | 2376 bool LoopVectorizationLegality::canVectorizeInstrs() { |
| 2402 BasicBlock *PreHeader = TheLoop->getLoopPreheader(); | 2377 BasicBlock *PreHeader = TheLoop->getLoopPreheader(); |
| 2403 BasicBlock *Header = TheLoop->getHeader(); | 2378 BasicBlock *Header = TheLoop->getHeader(); |
| 2404 | 2379 |
| 2405 // If we marked the scalar loop as "already vectorized" then no need | 2380 // If we marked the scalar loop as "already vectorized" then no need |
| 2406 // to vectorize it again. | 2381 // to vectorize it again. |
| 2407 if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) { | 2382 if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) { |
| 2408 DEBUG(dbgs() << "LV: This loop was vectorized before\n"); | 2383 DEBUG(dbgs() << "LV: This loop was vectorized before\n"); |
| 2409 return false; | 2384 return false; |
| 2410 } | 2385 } |
| (...skipping 18 matching lines...) Expand all Loading... |
| 2429 if (!Phi->getType()->isIntegerTy() && | 2404 if (!Phi->getType()->isIntegerTy() && |
| 2430 !Phi->getType()->isFloatingPointTy() && | 2405 !Phi->getType()->isFloatingPointTy() && |
| 2431 !Phi->getType()->isPointerTy()) { | 2406 !Phi->getType()->isPointerTy()) { |
| 2432 DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); | 2407 DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); |
| 2433 return false; | 2408 return false; |
| 2434 } | 2409 } |
| 2435 | 2410 |
| 2436 // If this PHINode is not in the header block, then we know that we | 2411 // If this PHINode is not in the header block, then we know that we |
| 2437 // can convert it to select during if-conversion. No need to check if | 2412 // can convert it to select during if-conversion. No need to check if |
| 2438 // the PHIs in this block are induction or reduction variables. | 2413 // the PHIs in this block are induction or reduction variables. |
| 2439 if (*bb != Header) { | 2414 if (*bb != Header) |
| 2440 // Check that this instruction has no outside users or is an | 2415 continue; |
| 2441 // identified reduction value with an outside user. | |
| 2442 if(!hasOutsideLoopUser(TheLoop, it, AllowedExit)) | |
| 2443 continue; | |
| 2444 return false; | |
| 2445 } | |
| 2446 | 2416 |
| 2447 // We only allow if-converted PHIs with more than two incoming values. | 2417 // We only allow if-converted PHIs with more than two incoming values. |
| 2448 if (Phi->getNumIncomingValues() != 2) { | 2418 if (Phi->getNumIncomingValues() != 2) { |
| 2449 DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); | 2419 DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); |
| 2450 return false; | 2420 return false; |
| 2451 } | 2421 } |
| 2452 | 2422 |
| 2453 // This is the value coming from the preheader. | 2423 // This is the value coming from the preheader. |
| 2454 Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); | 2424 Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); |
| 2455 // Check if this is an induction variable. | 2425 // Check if this is an induction variable. |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2528 | 2498 |
| 2529 // Check that the stored type is vectorizable. | 2499 // Check that the stored type is vectorizable. |
| 2530 if (StoreInst *ST = dyn_cast<StoreInst>(it)) { | 2500 if (StoreInst *ST = dyn_cast<StoreInst>(it)) { |
| 2531 Type *T = ST->getValueOperand()->getType(); | 2501 Type *T = ST->getValueOperand()->getType(); |
| 2532 if (!VectorType::isValidElementType(T)) | 2502 if (!VectorType::isValidElementType(T)) |
| 2533 return false; | 2503 return false; |
| 2534 } | 2504 } |
| 2535 | 2505 |
| 2536 // Reduction instructions are allowed to have exit users. | 2506 // Reduction instructions are allowed to have exit users. |
| 2537 // All other instructions must not have external users. | 2507 // All other instructions must not have external users. |
| 2538 if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) | 2508 if (!AllowedExit.count(it)) |
| 2539 return false; | 2509 //Check that all of the users of the loop are inside the BB. |
| 2540 | 2510 for (Value::use_iterator I = it->use_begin(), E = it->use_end(); |
| 2511 I != E; ++I) { |
| 2512 Instruction *U = cast<Instruction>(*I); |
| 2513 // This user may be a reduction exit value. |
| 2514 if (!TheLoop->contains(U)) { |
| 2515 DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); |
| 2516 return false; |
| 2517 } |
| 2518 } |
| 2541 } // next instr. | 2519 } // next instr. |
| 2542 | 2520 |
| 2543 } | 2521 } |
| 2544 | 2522 |
| 2545 if (!Induction) { | 2523 if (!Induction) { |
| 2546 DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); | 2524 DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); |
| 2547 assert(getInductionVars()->size() && "No induction variables"); | 2525 assert(getInductionVars()->size() && "No induction variables"); |
| 2548 } | 2526 } |
| 2549 | 2527 |
| 2550 return true; | 2528 return true; |
| (...skipping 1208 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3759 // Check for a store. | 3737 // Check for a store. |
| 3760 if (StoreInst *ST = dyn_cast<StoreInst>(Inst)) | 3738 if (StoreInst *ST = dyn_cast<StoreInst>(Inst)) |
| 3761 return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0; | 3739 return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0; |
| 3762 | 3740 |
| 3763 // Check for a load. | 3741 // Check for a load. |
| 3764 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) | 3742 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) |
| 3765 return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0; | 3743 return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0; |
| 3766 | 3744 |
| 3767 return false; | 3745 return false; |
| 3768 } | 3746 } |
| OLD | NEW |