OLD | NEW |
1 //===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===// | 1 //===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===// |
2 // | 2 // |
3 // The LLVM Compiler Infrastructure | 3 // The LLVM Compiler Infrastructure |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops | 10 // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
73 #include "llvm/IR/IntrinsicInst.h" | 73 #include "llvm/IR/IntrinsicInst.h" |
74 #include "llvm/IR/LLVMContext.h" | 74 #include "llvm/IR/LLVMContext.h" |
75 #include "llvm/IR/Module.h" | 75 #include "llvm/IR/Module.h" |
76 #include "llvm/IR/Type.h" | 76 #include "llvm/IR/Type.h" |
77 #include "llvm/IR/Value.h" | 77 #include "llvm/IR/Value.h" |
78 #include "llvm/Pass.h" | 78 #include "llvm/Pass.h" |
79 #include "llvm/Support/CommandLine.h" | 79 #include "llvm/Support/CommandLine.h" |
80 #include "llvm/Support/Debug.h" | 80 #include "llvm/Support/Debug.h" |
81 #include "llvm/Support/PatternMatch.h" | 81 #include "llvm/Support/PatternMatch.h" |
82 #include "llvm/Support/raw_ostream.h" | 82 #include "llvm/Support/raw_ostream.h" |
83 #include "llvm/Support/ValueHandle.h" | |
84 #include "llvm/Target/TargetLibraryInfo.h" | 83 #include "llvm/Target/TargetLibraryInfo.h" |
85 #include "llvm/Transforms/Scalar.h" | 84 #include "llvm/Transforms/Scalar.h" |
86 #include "llvm/Transforms/Utils/BasicBlockUtils.h" | 85 #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
87 #include "llvm/Transforms/Utils/Local.h" | 86 #include "llvm/Transforms/Utils/Local.h" |
88 #include <algorithm> | 87 #include <algorithm> |
89 #include <map> | 88 #include <map> |
90 | 89 |
91 using namespace llvm; | 90 using namespace llvm; |
92 using namespace llvm::PatternMatch; | 91 using namespace llvm::PatternMatch; |
93 | 92 |
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
210 /// Create a broadcast instruction. This method generates a broadcast | 209 /// Create a broadcast instruction. This method generates a broadcast |
211 /// instruction (shuffle) for loop invariant values and for the induction | 210 /// instruction (shuffle) for loop invariant values and for the induction |
212 /// value. If this is the induction variable then we extend it to N, N+1, ... | 211 /// value. If this is the induction variable then we extend it to N, N+1, ... |
213 /// this is needed because each iteration in the loop corresponds to a SIMD | 212 /// this is needed because each iteration in the loop corresponds to a SIMD |
214 /// element. | 213 /// element. |
215 Value *getBroadcastInstrs(Value *V); | 214 Value *getBroadcastInstrs(Value *V); |
216 | 215 |
217 /// This function adds 0, 1, 2 ... to each vector element, starting at zero. | 216 /// This function adds 0, 1, 2 ... to each vector element, starting at zero. |
218 /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...). | 217 /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...). |
219 /// The sequence starts at StartIndex. | 218 /// The sequence starts at StartIndex. |
220 Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate); | 219 Value *getConsecutiveVector(Value* Val, unsigned StartIdx, bool Negate); |
221 | 220 |
222 /// When we go over instructions in the basic block we rely on previous | 221 /// When we go over instructions in the basic block we rely on previous |
223 /// values within the current basic block or on loop invariant values. | 222 /// values within the current basic block or on loop invariant values. |
224 /// When we widen (vectorize) values we place them in the map. If the values | 223 /// When we widen (vectorize) values we place them in the map. If the values |
225 /// are not within the map, they have to be loop invariant, so we simply | 224 /// are not within the map, they have to be loop invariant, so we simply |
226 /// broadcast them into a vector. | 225 /// broadcast them into a vector. |
227 VectorParts &getVectorValue(Value *V); | 226 VectorParts &getVectorValue(Value *V); |
228 | 227 |
229 /// Generate a shuffle sequence that will reverse the vector Vec. | 228 /// Generate a shuffle sequence that will reverse the vector Vec. |
230 Value *reverseVector(Value *Vec); | 229 Value *reverseVector(Value *Vec); |
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
376 struct ReductionDescriptor { | 375 struct ReductionDescriptor { |
377 ReductionDescriptor() : StartValue(0), LoopExitInstr(0), | 376 ReductionDescriptor() : StartValue(0), LoopExitInstr(0), |
378 Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {} | 377 Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {} |
379 | 378 |
380 ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K, | 379 ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K, |
381 MinMaxReductionKind MK) | 380 MinMaxReductionKind MK) |
382 : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {} | 381 : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {} |
383 | 382 |
384 // The starting value of the reduction. | 383 // The starting value of the reduction. |
385 // It does not have to be zero! | 384 // It does not have to be zero! |
386 TrackingVH<Value> StartValue; | 385 Value *StartValue; |
387 // The instruction who's value is used outside the loop. | 386 // The instruction who's value is used outside the loop. |
388 Instruction *LoopExitInstr; | 387 Instruction *LoopExitInstr; |
389 // The kind of the reduction. | 388 // The kind of the reduction. |
390 ReductionKind Kind; | 389 ReductionKind Kind; |
391 // If this a min/max reduction the kind of reduction. | 390 // If this a min/max reduction the kind of reduction. |
392 MinMaxReductionKind MinMaxKind; | 391 MinMaxReductionKind MinMaxKind; |
393 }; | 392 }; |
394 | 393 |
395 /// This POD struct holds information about a potential reduction operation. | 394 /// This POD struct holds information about a potential reduction operation. |
396 struct ReductionInstDesc { | 395 struct ReductionInstDesc { |
(...skipping 24 matching lines...) Expand all Loading... |
421 Starts.clear(); | 420 Starts.clear(); |
422 Ends.clear(); | 421 Ends.clear(); |
423 } | 422 } |
424 | 423 |
425 /// Insert a pointer and calculate the start and end SCEVs. | 424 /// Insert a pointer and calculate the start and end SCEVs. |
426 void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr); | 425 void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr); |
427 | 426 |
428 /// This flag indicates if we need to add the runtime check. | 427 /// This flag indicates if we need to add the runtime check. |
429 bool Need; | 428 bool Need; |
430 /// Holds the pointers that we need to check. | 429 /// Holds the pointers that we need to check. |
431 SmallVector<TrackingVH<Value>, 2> Pointers; | 430 SmallVector<Value*, 2> Pointers; |
432 /// Holds the pointer value at the beginning of the loop. | 431 /// Holds the pointer value at the beginning of the loop. |
433 SmallVector<const SCEV*, 2> Starts; | 432 SmallVector<const SCEV*, 2> Starts; |
434 /// Holds the pointer value at the end of the loop. | 433 /// Holds the pointer value at the end of the loop. |
435 SmallVector<const SCEV*, 2> Ends; | 434 SmallVector<const SCEV*, 2> Ends; |
436 /// Holds the information if this pointer is used for writing to memory. | 435 /// Holds the information if this pointer is used for writing to memory. |
437 SmallVector<bool, 2> IsWritePtr; | 436 SmallVector<bool, 2> IsWritePtr; |
438 }; | 437 }; |
439 | 438 |
440 /// A POD for saving information about induction variables. | 439 /// A POD for saving information about induction variables. |
441 struct InductionInfo { | 440 struct InductionInfo { |
442 InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} | 441 InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} |
443 InductionInfo() : StartValue(0), IK(IK_NoInduction) {} | 442 InductionInfo() : StartValue(0), IK(IK_NoInduction) {} |
444 /// Start value. | 443 /// Start value. |
445 TrackingVH<Value> StartValue; | 444 Value *StartValue; |
446 /// Induction kind. | 445 /// Induction kind. |
447 InductionKind IK; | 446 InductionKind IK; |
448 }; | 447 }; |
449 | 448 |
450 /// ReductionList contains the reduction descriptors for all | 449 /// ReductionList contains the reduction descriptors for all |
451 /// of the reductions that were found in the loop. | 450 /// of the reductions that were found in the loop. |
452 typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList; | 451 typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList; |
453 | 452 |
454 /// InductionList saves induction variables and maps them to the | 453 /// InductionList saves induction variables and maps them to the |
455 /// induction descriptor. | 454 /// induction descriptor. |
(...skipping 367 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
823 // Broadcast the scalar into all locations in the vector. | 822 // Broadcast the scalar into all locations in the vector. |
824 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast"); | 823 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast"); |
825 | 824 |
826 // Restore the builder insertion point. | 825 // Restore the builder insertion point. |
827 if (Invariant) | 826 if (Invariant) |
828 Builder.SetInsertPoint(Loc); | 827 Builder.SetInsertPoint(Loc); |
829 | 828 |
830 return Shuf; | 829 return Shuf; |
831 } | 830 } |
832 | 831 |
833 Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx, | 832 Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx, |
834 bool Negate) { | 833 bool Negate) { |
835 assert(Val->getType()->isVectorTy() && "Must be a vector"); | 834 assert(Val->getType()->isVectorTy() && "Must be a vector"); |
836 assert(Val->getType()->getScalarType()->isIntegerTy() && | 835 assert(Val->getType()->getScalarType()->isIntegerTy() && |
837 "Elem must be an integer"); | 836 "Elem must be an integer"); |
838 // Create the types. | 837 // Create the types. |
839 Type *ITy = Val->getType()->getScalarType(); | 838 Type *ITy = Val->getType()->getScalarType(); |
840 VectorType *Ty = cast<VectorType>(Val->getType()); | 839 VectorType *Ty = cast<VectorType>(Val->getType()); |
841 int VLen = Ty->getNumElements(); | 840 int VLen = Ty->getNumElements(); |
842 SmallVector<Constant*, 8> Indices; | 841 SmallVector<Constant*, 8> Indices; |
843 | 842 |
844 // Create a vector of consecutive numbers from zero to VF. | 843 // Create a vector of consecutive numbers from zero to VF. |
845 for (int i = 0; i < VLen; ++i) { | 844 for (int i = 0; i < VLen; ++i) { |
846 int64_t Idx = Negate ? (-i) : i; | 845 int Idx = Negate ? (-i): i; |
847 Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate)); | 846 Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx)); |
848 } | 847 } |
849 | 848 |
850 // Add the consecutive indices to the vector value. | 849 // Add the consecutive indices to the vector value. |
851 Constant *Cv = ConstantVector::get(Indices); | 850 Constant *Cv = ConstantVector::get(Indices); |
852 assert(Cv->getType() == Val->getType() && "Invalid consecutive vec"); | 851 assert(Cv->getType() == Val->getType() && "Invalid consecutive vec"); |
853 return Builder.CreateAdd(Val, Cv, "induction"); | 852 return Builder.CreateAdd(Val, Cv, "induction"); |
854 } | 853 } |
855 | 854 |
856 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { | 855 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { |
857 assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr"); | 856 assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr"); |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
957 // Attempt to issue a wide load. | 956 // Attempt to issue a wide load. |
958 LoadInst *LI = dyn_cast<LoadInst>(Instr); | 957 LoadInst *LI = dyn_cast<LoadInst>(Instr); |
959 StoreInst *SI = dyn_cast<StoreInst>(Instr); | 958 StoreInst *SI = dyn_cast<StoreInst>(Instr); |
960 | 959 |
961 assert((LI || SI) && "Invalid Load/Store instruction"); | 960 assert((LI || SI) && "Invalid Load/Store instruction"); |
962 | 961 |
963 Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType(); | 962 Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType(); |
964 Type *DataTy = VectorType::get(ScalarDataTy, VF); | 963 Type *DataTy = VectorType::get(ScalarDataTy, VF); |
965 Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); | 964 Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); |
966 unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment(); | 965 unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment(); |
967 unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace(); | 966 |
968 unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy); | 967 unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy); |
969 unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF; | 968 unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF; |
970 | 969 |
971 if (ScalarAllocatedSize != VectorElementSize) | 970 if (ScalarAllocatedSize != VectorElementSize) |
972 return scalarizeInstruction(Instr); | 971 return scalarizeInstruction(Instr); |
973 | 972 |
974 // If the pointer is loop invariant or if it is non consecutive, | 973 // If the pointer is loop invariant or if it is non consecutive, |
975 // scalarize the load. | 974 // scalarize the load. |
976 int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); | 975 int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); |
977 bool Reverse = ConsecutiveStride < 0; | 976 bool Reverse = ConsecutiveStride < 0; |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1032 if (Reverse) { | 1031 if (Reverse) { |
1033 // If we store to reverse consecutive memory locations then we need | 1032 // If we store to reverse consecutive memory locations then we need |
1034 // to reverse the order of elements in the stored value. | 1033 // to reverse the order of elements in the stored value. |
1035 StoredVal[Part] = reverseVector(StoredVal[Part]); | 1034 StoredVal[Part] = reverseVector(StoredVal[Part]); |
1036 // If the address is consecutive but reversed, then the | 1035 // If the address is consecutive but reversed, then the |
1037 // wide store needs to start at the last vector element. | 1036 // wide store needs to start at the last vector element. |
1038 PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); | 1037 PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); |
1039 PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); | 1038 PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); |
1040 } | 1039 } |
1041 | 1040 |
1042 Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(Addres
sSpace)); | 1041 Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo()); |
1043 Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment); | 1042 Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment); |
1044 } | 1043 } |
1045 } | 1044 } |
1046 | 1045 |
1047 for (unsigned Part = 0; Part < UF; ++Part) { | 1046 for (unsigned Part = 0; Part < UF; ++Part) { |
1048 // Calculate the pointer for the specific unroll-part. | 1047 // Calculate the pointer for the specific unroll-part. |
1049 Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); | 1048 Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); |
1050 | 1049 |
1051 if (Reverse) { | 1050 if (Reverse) { |
1052 // If the address is consecutive but reversed, then the | 1051 // If the address is consecutive but reversed, then the |
1053 // wide store needs to start at the last vector element. | 1052 // wide store needs to start at the last vector element. |
1054 PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); | 1053 PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); |
1055 PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); | 1054 PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); |
1056 } | 1055 } |
1057 | 1056 |
1058 Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressS
pace)); | 1057 Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo()); |
1059 Value *LI = Builder.CreateLoad(VecPtr, "wide.load"); | 1058 Value *LI = Builder.CreateLoad(VecPtr, "wide.load"); |
1060 cast<LoadInst>(LI)->setAlignment(Alignment); | 1059 cast<LoadInst>(LI)->setAlignment(Alignment); |
1061 Entry[Part] = Reverse ? reverseVector(LI) : LI; | 1060 Entry[Part] = Reverse ? reverseVector(LI) : LI; |
1062 } | 1061 } |
1063 } | 1062 } |
1064 | 1063 |
1065 void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) { | 1064 void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) { |
1066 assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); | 1065 assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); |
1067 // Holds vector parameters or scalars, in case of uniform vals. | 1066 // Holds vector parameters or scalars, in case of uniform vals. |
1068 SmallVector<VectorParts, 4> Params; | 1067 SmallVector<VectorParts, 4> Params; |
(...skipping 997 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2066 Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy, | 2065 Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy, |
2067 "resize.norm.idx"); | 2066 "resize.norm.idx"); |
2068 Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI, | 2067 Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI, |
2069 "reverse.idx"); | 2068 "reverse.idx"); |
2070 | 2069 |
2071 // This is a new value so do not hoist it out. | 2070 // This is a new value so do not hoist it out. |
2072 Value *Broadcasted = getBroadcastInstrs(ReverseInd); | 2071 Value *Broadcasted = getBroadcastInstrs(ReverseInd); |
2073 // After broadcasting the induction variable we need to make the | 2072 // After broadcasting the induction variable we need to make the |
2074 // vector consecutive by adding ... -3, -2, -1, 0. | 2073 // vector consecutive by adding ... -3, -2, -1, 0. |
2075 for (unsigned part = 0; part < UF; ++part) | 2074 for (unsigned part = 0; part < UF; ++part) |
2076 Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part, | 2075 Entry[part] = getConsecutiveVector(Broadcasted, -VF * part, true); |
2077 true); | |
2078 continue; | 2076 continue; |
2079 } | 2077 } |
2080 | 2078 |
2081 // Handle the pointer induction variable case. | 2079 // Handle the pointer induction variable case. |
2082 assert(P->getType()->isPointerTy() && "Unexpected type."); | 2080 assert(P->getType()->isPointerTy() && "Unexpected type."); |
2083 | 2081 |
2084 // Is this a reverse induction ptr or a consecutive induction ptr. | 2082 // Is this a reverse induction ptr or a consecutive induction ptr. |
2085 bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction == | 2083 bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction == |
2086 II.IK); | 2084 II.IK); |
2087 | 2085 |
(...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2301 // We must be able to predicate all blocks that need to be predicated. | 2299 // We must be able to predicate all blocks that need to be predicated. |
2302 if (blockNeedsPredication(BB) && !blockCanBePredicated(BB)) | 2300 if (blockNeedsPredication(BB) && !blockCanBePredicated(BB)) |
2303 return false; | 2301 return false; |
2304 } | 2302 } |
2305 | 2303 |
2306 // We can if-convert this loop. | 2304 // We can if-convert this loop. |
2307 return true; | 2305 return true; |
2308 } | 2306 } |
2309 | 2307 |
2310 bool LoopVectorizationLegality::canVectorize() { | 2308 bool LoopVectorizationLegality::canVectorize() { |
2311 // We must have a loop in canonical form. Loops with indirectbr in them cannot | 2309 assert(TheLoop->getLoopPreheader() && "No preheader!!"); |
2312 // be canonicalized. | |
2313 if (!TheLoop->getLoopPreheader()) | |
2314 return false; | |
2315 | 2310 |
2316 // We can only vectorize innermost loops. | 2311 // We can only vectorize innermost loops. |
2317 if (TheLoop->getSubLoopsVector().size()) | 2312 if (TheLoop->getSubLoopsVector().size()) |
2318 return false; | 2313 return false; |
2319 | 2314 |
2320 // We must have a single backedge. | 2315 // We must have a single backedge. |
2321 if (TheLoop->getNumBackEdges() != 1) | 2316 if (TheLoop->getNumBackEdges() != 1) |
2322 return false; | 2317 return false; |
2323 | 2318 |
2324 // We must have a single exiting block. | 2319 // We must have a single exiting block. |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2371 DEBUG(dbgs() << "LV: We can vectorize this loop" << | 2366 DEBUG(dbgs() << "LV: We can vectorize this loop" << |
2372 (PtrRtCheck.Need ? " (with a runtime bound check)" : "") | 2367 (PtrRtCheck.Need ? " (with a runtime bound check)" : "") |
2373 <<"!\n"); | 2368 <<"!\n"); |
2374 | 2369 |
2375 // Okay! We can vectorize. At this point we don't have any other mem analysis | 2370 // Okay! We can vectorize. At this point we don't have any other mem analysis |
2376 // which may limit our maximum vectorization factor, so just return true with | 2371 // which may limit our maximum vectorization factor, so just return true with |
2377 // no restrictions. | 2372 // no restrictions. |
2378 return true; | 2373 return true; |
2379 } | 2374 } |
2380 | 2375 |
2381 /// \brief Check that the instruction has outside loop users and is not an | |
2382 /// identified reduction variable. | |
2383 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, | |
2384 SmallPtrSet<Value *, 4> &Reductions) { | |
2385 // Reduction instructions are allowed to have exit users. All other | |
2386 // instructions must not have external users. | |
2387 if (!Reductions.count(Inst)) | |
2388 //Check that all of the users of the loop are inside the BB. | |
2389 for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end(); | |
2390 I != E; ++I) { | |
2391 Instruction *U = cast<Instruction>(*I); | |
2392 // This user may be a reduction exit value. | |
2393 if (!TheLoop->contains(U)) { | |
2394 DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); | |
2395 return true; | |
2396 } | |
2397 } | |
2398 return false; | |
2399 } | |
2400 | |
2401 bool LoopVectorizationLegality::canVectorizeInstrs() { | 2376 bool LoopVectorizationLegality::canVectorizeInstrs() { |
2402 BasicBlock *PreHeader = TheLoop->getLoopPreheader(); | 2377 BasicBlock *PreHeader = TheLoop->getLoopPreheader(); |
2403 BasicBlock *Header = TheLoop->getHeader(); | 2378 BasicBlock *Header = TheLoop->getHeader(); |
2404 | 2379 |
2405 // If we marked the scalar loop as "already vectorized" then no need | 2380 // If we marked the scalar loop as "already vectorized" then no need |
2406 // to vectorize it again. | 2381 // to vectorize it again. |
2407 if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) { | 2382 if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) { |
2408 DEBUG(dbgs() << "LV: This loop was vectorized before\n"); | 2383 DEBUG(dbgs() << "LV: This loop was vectorized before\n"); |
2409 return false; | 2384 return false; |
2410 } | 2385 } |
(...skipping 18 matching lines...) Expand all Loading... |
2429 if (!Phi->getType()->isIntegerTy() && | 2404 if (!Phi->getType()->isIntegerTy() && |
2430 !Phi->getType()->isFloatingPointTy() && | 2405 !Phi->getType()->isFloatingPointTy() && |
2431 !Phi->getType()->isPointerTy()) { | 2406 !Phi->getType()->isPointerTy()) { |
2432 DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); | 2407 DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); |
2433 return false; | 2408 return false; |
2434 } | 2409 } |
2435 | 2410 |
2436 // If this PHINode is not in the header block, then we know that we | 2411 // If this PHINode is not in the header block, then we know that we |
2437 // can convert it to select during if-conversion. No need to check if | 2412 // can convert it to select during if-conversion. No need to check if |
2438 // the PHIs in this block are induction or reduction variables. | 2413 // the PHIs in this block are induction or reduction variables. |
2439 if (*bb != Header) { | 2414 if (*bb != Header) |
2440 // Check that this instruction has no outside users or is an | 2415 continue; |
2441 // identified reduction value with an outside user. | |
2442 if(!hasOutsideLoopUser(TheLoop, it, AllowedExit)) | |
2443 continue; | |
2444 return false; | |
2445 } | |
2446 | 2416 |
2447 // We only allow if-converted PHIs with more than two incoming values. | 2417 // We only allow if-converted PHIs with more than two incoming values. |
2448 if (Phi->getNumIncomingValues() != 2) { | 2418 if (Phi->getNumIncomingValues() != 2) { |
2449 DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); | 2419 DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); |
2450 return false; | 2420 return false; |
2451 } | 2421 } |
2452 | 2422 |
2453 // This is the value coming from the preheader. | 2423 // This is the value coming from the preheader. |
2454 Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); | 2424 Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); |
2455 // Check if this is an induction variable. | 2425 // Check if this is an induction variable. |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2528 | 2498 |
2529 // Check that the stored type is vectorizable. | 2499 // Check that the stored type is vectorizable. |
2530 if (StoreInst *ST = dyn_cast<StoreInst>(it)) { | 2500 if (StoreInst *ST = dyn_cast<StoreInst>(it)) { |
2531 Type *T = ST->getValueOperand()->getType(); | 2501 Type *T = ST->getValueOperand()->getType(); |
2532 if (!VectorType::isValidElementType(T)) | 2502 if (!VectorType::isValidElementType(T)) |
2533 return false; | 2503 return false; |
2534 } | 2504 } |
2535 | 2505 |
2536 // Reduction instructions are allowed to have exit users. | 2506 // Reduction instructions are allowed to have exit users. |
2537 // All other instructions must not have external users. | 2507 // All other instructions must not have external users. |
2538 if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) | 2508 if (!AllowedExit.count(it)) |
2539 return false; | 2509 //Check that all of the users of the loop are inside the BB. |
2540 | 2510 for (Value::use_iterator I = it->use_begin(), E = it->use_end(); |
| 2511 I != E; ++I) { |
| 2512 Instruction *U = cast<Instruction>(*I); |
| 2513 // This user may be a reduction exit value. |
| 2514 if (!TheLoop->contains(U)) { |
| 2515 DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); |
| 2516 return false; |
| 2517 } |
| 2518 } |
2541 } // next instr. | 2519 } // next instr. |
2542 | 2520 |
2543 } | 2521 } |
2544 | 2522 |
2545 if (!Induction) { | 2523 if (!Induction) { |
2546 DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); | 2524 DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); |
2547 assert(getInductionVars()->size() && "No induction variables"); | 2525 assert(getInductionVars()->size() && "No induction variables"); |
2548 } | 2526 } |
2549 | 2527 |
2550 return true; | 2528 return true; |
(...skipping 1208 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3759 // Check for a store. | 3737 // Check for a store. |
3760 if (StoreInst *ST = dyn_cast<StoreInst>(Inst)) | 3738 if (StoreInst *ST = dyn_cast<StoreInst>(Inst)) |
3761 return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0; | 3739 return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0; |
3762 | 3740 |
3763 // Check for a load. | 3741 // Check for a load. |
3764 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) | 3742 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) |
3765 return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0; | 3743 return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0; |
3766 | 3744 |
3767 return false; | 3745 return false; |
3768 } | 3746 } |
OLD | NEW |