Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(367)

Unified Diff: test/Transforms/NaCl/vector-canonicalization-stores.ll

Issue 1423873002: PNaCl: Add a vector type legalization pass. Base URL: https://chromium.googlesource.com/native_client/pnacl-llvm.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « test/Transforms/NaCl/vector-canonicalization-shuffles.ll ('k') | tools/bugpoint/bugpoint.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: test/Transforms/NaCl/vector-canonicalization-stores.ll
diff --git a/test/Transforms/NaCl/vector-canonicalization-stores.ll b/test/Transforms/NaCl/vector-canonicalization-stores.ll
new file mode 100644
index 0000000000000000000000000000000000000000..e6dd3738fc3b8be9547c763497f2596714c61d88
--- /dev/null
+++ b/test/Transforms/NaCl/vector-canonicalization-stores.ll
@@ -0,0 +1,8422 @@
+; RUN: opt -S -pnacl-vector-canonicalization %s | FileCheck %s
+
+; Auto-generated tests for store instructions.
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128"
+
+define i32 @store_to_2xi8_unaligned(<2 x i8>*, <2 x i8>) {
+entry:
+ store <2 x i8> %1, <2 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi8_unaligned(<2 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi16_unaligned(<2 x i16>*, <2 x i16>) {
+entry:
+ store <2 x i16> %1, <2 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi16_unaligned(<2 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store i16 %3, i16* %2
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store i16 %5, i16* %4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi32_unaligned(<2 x i32>*, <2 x i32>) {
+entry:
+ store <2 x i32> %1, <2 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi32_unaligned(<2 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: store i32 %3, i32* %2
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: store i32 %5, i32* %4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi64_unaligned(<2 x i64>*, <2 x i64>) {
+entry:
+ store <2 x i64> %1, <2 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi64_unaligned(<2 x i64>*, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi8ptr_unaligned(<2 x i8*>*, <2 x i8*>) {
+entry:
+ store <2 x i8*> %1, <2 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi8ptr_unaligned(<2 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i8*> %1, i32 0
+; CHECK-NEXT: store i8* %3, i8** %2
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i8*> %1, i32 1
+; CHECK-NEXT: store i8* %5, i8** %4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xfloat_unaligned(<2 x float>*, <2 x float>) {
+entry:
+ store <2 x float> %1, <2 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xfloat_unaligned(<2 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: store float %3, float* %2
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: store float %5, float* %4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xdouble_unaligned(<2 x double>*, <2 x double>) {
+entry:
+ store <2 x double> %1, <2 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xdouble_unaligned(<2 x double>*, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi8_unaligned(<4 x i8>*, <4 x i8>) {
+entry:
+ store <4 x i8> %1, <4 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi8_unaligned(<4 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4
+; CHECK-NEXT: %6 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6
+; CHECK-NEXT: %8 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi16_unaligned(<4 x i16>*, <4 x i16>) {
+entry:
+ store <4 x i16> %1, <4 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi16_unaligned(<4 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store i16 %3, i16* %2
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store i16 %5, i16* %4
+; CHECK-NEXT: %6 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store i16 %7, i16* %6
+; CHECK-NEXT: %8 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store i16 %9, i16* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi32_unaligned(<4 x i32>*, <4 x i32>) {
+entry:
+ store <4 x i32> %1, <4 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi32_unaligned(<4 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi64_unaligned(<4 x i64>*, <4 x i64>) {
+entry:
+ store <4 x i64> %1, <4 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi64_unaligned(<4 x i64>*, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i64* %3 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %4
+; CHECK-NEXT: %5 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %6
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi8ptr_unaligned(<4 x i8*>*, <4 x i8*>) {
+entry:
+ store <4 x i8*> %1, <4 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi8ptr_unaligned(<4 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xfloat_unaligned(<4 x float>*, <4 x float>) {
+entry:
+ store <4 x float> %1, <4 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xfloat_unaligned(<4 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xdouble_unaligned(<4 x double>*, <4 x double>) {
+entry:
+ store <4 x double> %1, <4 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xdouble_unaligned(<4 x double>*, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast double* %3 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %4
+; CHECK-NEXT: %5 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %6
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi8_unaligned(<6 x i8>*, <6 x i8>) {
+entry:
+ store <6 x i8> %1, <6 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi8_unaligned(<6 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4
+; CHECK-NEXT: %6 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6
+; CHECK-NEXT: %8 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store i8 %11, i8* %10
+; CHECK-NEXT: %12 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store i8 %13, i8* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi16_unaligned(<6 x i16>*, <6 x i16>) {
+entry:
+ store <6 x i16> %1, <6 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi16_unaligned(<6 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store i16 %3, i16* %2
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store i16 %5, i16* %4
+; CHECK-NEXT: %6 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store i16 %7, i16* %6
+; CHECK-NEXT: %8 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store i16 %9, i16* %8
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: store i16 %11, i16* %10
+; CHECK-NEXT: %12 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: store i16 %13, i16* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi32_unaligned(<6 x i32>*, <6 x i32>) {
+entry:
+ store <6 x i32> %1, <6 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi32_unaligned(<6 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %4
+; CHECK-NEXT: %5 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: store i32 %6, i32* %5
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: store i32 %8, i32* %7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi64_unaligned(<6 x i64>*, <6 x i64>) {
+entry:
+ store <6 x i64> %1, <6 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi64_unaligned(<6 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %5
+; CHECK-NEXT: %6 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast i64* %6 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %7
+; CHECK-NEXT: %8 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i64* %8 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %9
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi8ptr_unaligned(<6 x i8*>*, <6 x i8*>) {
+entry:
+ store <6 x i8*> %1, <6 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi8ptr_unaligned(<6 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %4
+; CHECK-NEXT: %5 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i8*> %2, i32 0
+; CHECK-NEXT: store i8* %6, i8** %5
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i8*> %2, i32 1
+; CHECK-NEXT: store i8* %8, i8** %7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xfloat_unaligned(<6 x float>*, <6 x float>) {
+entry:
+ store <6 x float> %1, <6 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xfloat_unaligned(<6 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %4
+; CHECK-NEXT: %5 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: store float %6, float* %5
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: store float %8, float* %7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xdouble_unaligned(<6 x double>*, <6 x double>) {
+entry:
+ store <6 x double> %1, <6 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xdouble_unaligned(<6 x double>*, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %5
+; CHECK-NEXT: %6 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast double* %6 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %7
+; CHECK-NEXT: %8 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast double* %8 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %9
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi8_unaligned(<8 x i8>*, <8 x i8>) {
+entry:
+ store <8 x i8> %1, <8 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi8_unaligned(<8 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4
+; CHECK-NEXT: %6 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6
+; CHECK-NEXT: %8 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store i8 %11, i8* %10
+; CHECK-NEXT: %12 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store i8 %13, i8* %12
+; CHECK-NEXT: %14 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store i8 %15, i8* %14
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store i8 %17, i8* %16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi16_unaligned(<8 x i16>*, <8 x i16>) {
+entry:
+ store <8 x i16> %1, <8 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi16_unaligned(<8 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi32_unaligned(<8 x i32>*, <8 x i32>) {
+entry:
+ store <8 x i32> %1, <8 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi32_unaligned(<8 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %4
+; CHECK-NEXT: %5 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %6
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi64_unaligned(<8 x i64>*, <8 x i64>) {
+entry:
+ store <8 x i64> %1, <8 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi64_unaligned(<8 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %6
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %8
+; CHECK-NEXT: %9 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %10
+; CHECK-NEXT: %11 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi8ptr_unaligned(<8 x i8*>*, <8 x i8*>) {
+entry:
+ store <8 x i8*> %1, <8 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi8ptr_unaligned(<8 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %4
+; CHECK-NEXT: %5 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %6
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xfloat_unaligned(<8 x float>*, <8 x float>) {
+entry:
+ store <8 x float> %1, <8 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xfloat_unaligned(<8 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %4
+; CHECK-NEXT: %5 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %6
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xdouble_unaligned(<8 x double>*, <8 x double>) {
+entry:
+ store <8 x double> %1, <8 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xdouble_unaligned(<8 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %6
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %8
+; CHECK-NEXT: %9 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %10
+; CHECK-NEXT: %11 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi8_unaligned(<12 x i8>*, <12 x i8>) {
+entry:
+ store <12 x i8> %1, <12 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi8_unaligned(<12 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4
+; CHECK-NEXT: %6 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6
+; CHECK-NEXT: %8 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store i8 %11, i8* %10
+; CHECK-NEXT: %12 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store i8 %13, i8* %12
+; CHECK-NEXT: %14 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store i8 %15, i8* %14
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store i8 %17, i8* %16
+; CHECK-NEXT: %18 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %19 = extractelement <16 x i8> %1, i32 8
+; CHECK-NEXT: store i8 %19, i8* %18
+; CHECK-NEXT: %20 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 9
+; CHECK-NEXT: store i8 %21, i8* %20
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %23 = extractelement <16 x i8> %1, i32 10
+; CHECK-NEXT: store i8 %23, i8* %22
+; CHECK-NEXT: %24 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %25 = extractelement <16 x i8> %1, i32 11
+; CHECK-NEXT: store i8 %25, i8* %24
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi16_unaligned(<12 x i16>*, <12 x i16>) {
+entry:
+ store <12 x i16> %1, <12 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi16_unaligned(<12 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %4
+; CHECK-NEXT: %5 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: store i16 %6, i16* %5
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: store i16 %8, i16* %7
+; CHECK-NEXT: %9 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %10 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: store i16 %10, i16* %9
+; CHECK-NEXT: %11 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %12 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: store i16 %12, i16* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi32_unaligned(<12 x i32>*, <12 x i32>) {
+entry:
+ store <12 x i32> %1, <12 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi32_unaligned(<12 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %5
+; CHECK-NEXT: %6 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %7
+; CHECK-NEXT: %8 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %9
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi64_unaligned(<12 x i64>*, <12 x i64>) {
+entry:
+ store <12 x i64> %1, <12 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi64_unaligned(<12 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %8
+; CHECK-NEXT: %9 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %10
+; CHECK-NEXT: %11 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %12
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %14
+; CHECK-NEXT: %15 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %16
+; CHECK-NEXT: %17 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %6, <2 x i64>* %18
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi8ptr_unaligned(<12 x i8*>*, <12 x i8*>) {
+entry:
+ store <12 x i8*> %1, <12 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi8ptr_unaligned(<12 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %5
+; CHECK-NEXT: %6 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %7
+; CHECK-NEXT: %8 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %3, <4 x i8*>* %9
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xfloat_unaligned(<12 x float>*, <12 x float>) {
+entry:
+ store <12 x float> %1, <12 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xfloat_unaligned(<12 x float>*, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %5
+; CHECK-NEXT: %6 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %7
+; CHECK-NEXT: %8 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %3, <4 x float>* %9
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xdouble_unaligned(<12 x double>*, <12 x double>) {
+entry:
+ store <12 x double> %1, <12 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xdouble_unaligned(<12 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %8
+; CHECK-NEXT: %9 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %10
+; CHECK-NEXT: %11 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %12
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %14
+; CHECK-NEXT: %15 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %16
+; CHECK-NEXT: %17 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %6, <2 x double>* %18
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi8_unaligned(<16 x i8>*, <16 x i8>) {
+entry:
+ store <16 x i8> %1, <16 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi8_unaligned(<16 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: store <16 x i8> %1, <16 x i8>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi16_unaligned(<16 x i16>*, <16 x i16>) {
+entry:
+ store <16 x i16> %1, <16 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi16_unaligned(<16 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %4
+; CHECK-NEXT: %5 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = bitcast i16* %5 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %2, <8 x i16>* %6
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi32_unaligned(<16 x i32>*, <16 x i32>) {
+entry:
+ store <16 x i32> %1, <16 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi32_unaligned(<16 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %6
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %8
+; CHECK-NEXT: %9 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i32* %9 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %10
+; CHECK-NEXT: %11 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i32* %11 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %4, <4 x i32>* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi64_unaligned(<16 x i64>*, <16 x i64>) {
+entry:
+ store <16 x i64> %1, <16 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi64_unaligned(<16 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %10
+; CHECK-NEXT: %11 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %12
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %14
+; CHECK-NEXT: %15 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %16
+; CHECK-NEXT: %17 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %18
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %6, <2 x i64>* %20
+; CHECK-NEXT: %21 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %7, <2 x i64>* %22
+; CHECK-NEXT: %23 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %8, <2 x i64>* %24
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi8ptr_unaligned(<16 x i8*>*, <16 x i8*>) {
+entry:
+ store <16 x i8*> %1, <16 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi8ptr_unaligned(<16 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %6
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %8
+; CHECK-NEXT: %9 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i8** %9 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %3, <4 x i8*>* %10
+; CHECK-NEXT: %11 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i8** %11 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %4, <4 x i8*>* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xfloat_unaligned(<16 x float>*, <16 x float>) {
+entry:
+ store <16 x float> %1, <16 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xfloat_unaligned(<16 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %6
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %8
+; CHECK-NEXT: %9 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast float* %9 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %3, <4 x float>* %10
+; CHECK-NEXT: %11 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast float* %11 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %4, <4 x float>* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xdouble_unaligned(<16 x double>*, <16 x double>) {
+entry:
+ store <16 x double> %1, <16 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xdouble_unaligned(<16 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %10
+; CHECK-NEXT: %11 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %12
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %14
+; CHECK-NEXT: %15 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %16
+; CHECK-NEXT: %17 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %18
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %6, <2 x double>* %20
+; CHECK-NEXT: %21 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %7, <2 x double>* %22
+; CHECK-NEXT: %23 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %8, <2 x double>* %24
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi8_unaligned(<20 x i8>*, <20 x i8>) {
+entry:
+ store <20 x i8> %1, <20 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi8_unaligned(<20 x i8>*, <16 x i8>, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8* %3 to <16 x i8>*
+; CHECK-NEXT: store <16 x i8> %1, <16 x i8>* %4
+; CHECK-NEXT: %5 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %6 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: store i8 %6, i8* %5
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: store i8 %8, i8* %7
+; CHECK-NEXT: %9 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: store i8 %10, i8* %9
+; CHECK-NEXT: %11 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %12 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: store i8 %12, i8* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi16_unaligned(<20 x i16>*, <20 x i16>) {
+entry:
+ store <20 x i16> %1, <20 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi16_unaligned(<20 x i16>*, <8 x i16>, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %5
+; CHECK-NEXT: %6 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %7 = bitcast i16* %6 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %2, <8 x i16>* %7
+; CHECK-NEXT: %8 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %9 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: store i16 %9, i16* %8
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: store i16 %11, i16* %10
+; CHECK-NEXT: %12 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %13 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: store i16 %13, i16* %12
+; CHECK-NEXT: %14 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %15 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: store i16 %15, i16* %14
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi32_unaligned(<20 x i32>*, <20 x i32>) {
+entry:
+ store <20 x i32> %1, <20 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi32_unaligned(<20 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %7
+; CHECK-NEXT: %8 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %9
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %11
+; CHECK-NEXT: %12 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i32* %12 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %4, <4 x i32>* %13
+; CHECK-NEXT: %14 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i32* %14 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %15
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi64_unaligned(<20 x i64>*, <20 x i64>) {
+entry:
+ store <20 x i64> %1, <20 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi64_unaligned(<20 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %12
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %14
+; CHECK-NEXT: %15 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %16
+; CHECK-NEXT: %17 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %18
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %20
+; CHECK-NEXT: %21 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %6, <2 x i64>* %22
+; CHECK-NEXT: %23 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %7, <2 x i64>* %24
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %8, <2 x i64>* %26
+; CHECK-NEXT: %27 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast i64* %27 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %9, <2 x i64>* %28
+; CHECK-NEXT: %29 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast i64* %29 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %10, <2 x i64>* %30
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi8ptr_unaligned(<20 x i8*>*, <20 x i8*>) {
+entry:
+ store <20 x i8*> %1, <20 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi8ptr_unaligned(<20 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %7
+; CHECK-NEXT: %8 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %9
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %3, <4 x i8*>* %11
+; CHECK-NEXT: %12 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i8** %12 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %4, <4 x i8*>* %13
+; CHECK-NEXT: %14 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i8** %14 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %5, <4 x i8*>* %15
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xfloat_unaligned(<20 x float>*, <20 x float>) {
+entry:
+ store <20 x float> %1, <20 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xfloat_unaligned(<20 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %7
+; CHECK-NEXT: %8 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %9
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %3, <4 x float>* %11
+; CHECK-NEXT: %12 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast float* %12 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %4, <4 x float>* %13
+; CHECK-NEXT: %14 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast float* %14 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %5, <4 x float>* %15
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xdouble_unaligned(<20 x double>*, <20 x double>) {
+entry:
+ store <20 x double> %1, <20 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xdouble_unaligned(<20 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %12
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %14
+; CHECK-NEXT: %15 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %16
+; CHECK-NEXT: %17 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %18
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %20
+; CHECK-NEXT: %21 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %6, <2 x double>* %22
+; CHECK-NEXT: %23 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %7, <2 x double>* %24
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %8, <2 x double>* %26
+; CHECK-NEXT: %27 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast double* %27 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %9, <2 x double>* %28
+; CHECK-NEXT: %29 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast double* %29 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %10, <2 x double>* %30
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi8_align_1(<2 x i8>*, <2 x i8>) {
+entry:
+ store <2 x i8> %1, <2 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi8_align_1(<2 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi16_align_1(<2 x i16>*, <2 x i16>) {
+entry:
+ store <2 x i16> %1, <2 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi16_align_1(<2 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store i16 %3, i16* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store i16 %5, i16* %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi32_align_1(<2 x i32>*, <2 x i32>) {
+entry:
+ store <2 x i32> %1, <2 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi32_align_1(<2 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: store i32 %3, i32* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: store i32 %5, i32* %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi64_align_1(<2 x i64>*, <2 x i64>) {
+entry:
+ store <2 x i64> %1, <2 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi64_align_1(<2 x i64>*, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi8ptr_align_1(<2 x i8*>*, <2 x i8*>) {
+entry:
+ store <2 x i8*> %1, <2 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi8ptr_align_1(<2 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i8*> %1, i32 0
+; CHECK-NEXT: store i8* %3, i8** %2, align 1
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i8*> %1, i32 1
+; CHECK-NEXT: store i8* %5, i8** %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xfloat_align_1(<2 x float>*, <2 x float>) {
+entry:
+ store <2 x float> %1, <2 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xfloat_align_1(<2 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: store float %3, float* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: store float %5, float* %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xdouble_align_1(<2 x double>*, <2 x double>) {
+entry:
+ store <2 x double> %1, <2 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xdouble_align_1(<2 x double>*, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi8_align_1(<4 x i8>*, <4 x i8>) {
+entry:
+ store <4 x i8> %1, <4 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi8_align_1(<4 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6, align 1
+; CHECK-NEXT: %8 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi16_align_1(<4 x i16>*, <4 x i16>) {
+entry:
+ store <4 x i16> %1, <4 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi16_align_1(<4 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store i16 %3, i16* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store i16 %5, i16* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store i16 %7, i16* %6, align 1
+; CHECK-NEXT: %8 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store i16 %9, i16* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi32_align_1(<4 x i32>*, <4 x i32>) {
+entry:
+ store <4 x i32> %1, <4 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi32_align_1(<4 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi64_align_1(<4 x i64>*, <4 x i64>) {
+entry:
+ store <4 x i64> %1, <4 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi64_align_1(<4 x i64>*, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i64* %3 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %6, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi8ptr_align_1(<4 x i8*>*, <4 x i8*>) {
+entry:
+ store <4 x i8*> %1, <4 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi8ptr_align_1(<4 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xfloat_align_1(<4 x float>*, <4 x float>) {
+entry:
+ store <4 x float> %1, <4 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xfloat_align_1(<4 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xdouble_align_1(<4 x double>*, <4 x double>) {
+entry:
+ store <4 x double> %1, <4 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xdouble_align_1(<4 x double>*, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast double* %3 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %6, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi8_align_1(<6 x i8>*, <6 x i8>) {
+entry:
+ store <6 x i8> %1, <6 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi8_align_1(<6 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6, align 1
+; CHECK-NEXT: %8 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store i8 %11, i8* %10, align 1
+; CHECK-NEXT: %12 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store i8 %13, i8* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi16_align_1(<6 x i16>*, <6 x i16>) {
+entry:
+ store <6 x i16> %1, <6 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi16_align_1(<6 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store i16 %3, i16* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store i16 %5, i16* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store i16 %7, i16* %6, align 1
+; CHECK-NEXT: %8 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store i16 %9, i16* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: store i16 %11, i16* %10, align 1
+; CHECK-NEXT: %12 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: store i16 %13, i16* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi32_align_1(<6 x i32>*, <6 x i32>) {
+entry:
+ store <6 x i32> %1, <6 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi32_align_1(<6 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: store i32 %6, i32* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: store i32 %8, i32* %7, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi64_align_1(<6 x i64>*, <6 x i64>) {
+entry:
+ store <6 x i64> %1, <6 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi64_align_1(<6 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %5, align 1
+; CHECK-NEXT: %6 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast i64* %6 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i64* %8 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %9, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi8ptr_align_1(<6 x i8*>*, <6 x i8*>) {
+entry:
+ store <6 x i8*> %1, <6 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi8ptr_align_1(<6 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i8*> %2, i32 0
+; CHECK-NEXT: store i8* %6, i8** %5, align 1
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i8*> %2, i32 1
+; CHECK-NEXT: store i8* %8, i8** %7, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xfloat_align_1(<6 x float>*, <6 x float>) {
+entry:
+ store <6 x float> %1, <6 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xfloat_align_1(<6 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: store float %6, float* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: store float %8, float* %7, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xdouble_align_1(<6 x double>*, <6 x double>) {
+entry:
+ store <6 x double> %1, <6 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xdouble_align_1(<6 x double>*, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %5, align 1
+; CHECK-NEXT: %6 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast double* %6 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast double* %8 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %9, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi8_align_1(<8 x i8>*, <8 x i8>) {
+entry:
+ store <8 x i8> %1, <8 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi8_align_1(<8 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6, align 1
+; CHECK-NEXT: %8 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store i8 %11, i8* %10, align 1
+; CHECK-NEXT: %12 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store i8 %13, i8* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store i8 %15, i8* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store i8 %17, i8* %16, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi16_align_1(<8 x i16>*, <8 x i16>) {
+entry:
+ store <8 x i16> %1, <8 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi16_align_1(<8 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi32_align_1(<8 x i32>*, <8 x i32>) {
+entry:
+ store <8 x i32> %1, <8 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi32_align_1(<8 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %6, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi64_align_1(<8 x i64>*, <8 x i64>) {
+entry:
+ store <8 x i64> %1, <8 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi64_align_1(<8 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %6, align 1
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi8ptr_align_1(<8 x i8*>*, <8 x i8*>) {
+entry:
+ store <8 x i8*> %1, <8 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi8ptr_align_1(<8 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %6, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xfloat_align_1(<8 x float>*, <8 x float>) {
+entry:
+ store <8 x float> %1, <8 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xfloat_align_1(<8 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %6, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xdouble_align_1(<8 x double>*, <8 x double>) {
+entry:
+ store <8 x double> %1, <8 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xdouble_align_1(<8 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %6, align 1
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi8_align_1(<12 x i8>*, <12 x i8>) {
+entry:
+ store <12 x i8> %1, <12 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi8_align_1(<12 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6, align 1
+; CHECK-NEXT: %8 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store i8 %11, i8* %10, align 1
+; CHECK-NEXT: %12 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store i8 %13, i8* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store i8 %15, i8* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store i8 %17, i8* %16, align 1
+; CHECK-NEXT: %18 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %19 = extractelement <16 x i8> %1, i32 8
+; CHECK-NEXT: store i8 %19, i8* %18, align 1
+; CHECK-NEXT: %20 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 9
+; CHECK-NEXT: store i8 %21, i8* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %23 = extractelement <16 x i8> %1, i32 10
+; CHECK-NEXT: store i8 %23, i8* %22, align 1
+; CHECK-NEXT: %24 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %25 = extractelement <16 x i8> %1, i32 11
+; CHECK-NEXT: store i8 %25, i8* %24, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi16_align_1(<12 x i16>*, <12 x i16>) {
+entry:
+ store <12 x i16> %1, <12 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi16_align_1(<12 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: store i16 %6, i16* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: store i16 %8, i16* %7, align 1
+; CHECK-NEXT: %9 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %10 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: store i16 %10, i16* %9, align 1
+; CHECK-NEXT: %11 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %12 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: store i16 %12, i16* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi32_align_1(<12 x i32>*, <12 x i32>) {
+entry:
+ store <12 x i32> %1, <12 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi32_align_1(<12 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %5, align 1
+; CHECK-NEXT: %6 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %9, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi64_align_1(<12 x i64>*, <12 x i64>) {
+entry:
+ store <12 x i64> %1, <12 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi64_align_1(<12 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %12, align 1
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %14, align 1
+; CHECK-NEXT: %15 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %16, align 1
+; CHECK-NEXT: %17 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %6, <2 x i64>* %18, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi8ptr_align_1(<12 x i8*>*, <12 x i8*>) {
+entry:
+ store <12 x i8*> %1, <12 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi8ptr_align_1(<12 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %5, align 1
+; CHECK-NEXT: %6 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %3, <4 x i8*>* %9, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xfloat_align_1(<12 x float>*, <12 x float>) {
+entry:
+ store <12 x float> %1, <12 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xfloat_align_1(<12 x float>*, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %5, align 1
+; CHECK-NEXT: %6 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %3, <4 x float>* %9, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xdouble_align_1(<12 x double>*, <12 x double>) {
+entry:
+ store <12 x double> %1, <12 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xdouble_align_1(<12 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %12, align 1
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %14, align 1
+; CHECK-NEXT: %15 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %16, align 1
+; CHECK-NEXT: %17 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %6, <2 x double>* %18, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi8_align_1(<16 x i8>*, <16 x i8>) {
+entry:
+ store <16 x i8> %1, <16 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi8_align_1(<16 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: store <16 x i8> %1, <16 x i8>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi16_align_1(<16 x i16>*, <16 x i16>) {
+entry:
+ store <16 x i16> %1, <16 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi16_align_1(<16 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = bitcast i16* %5 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %2, <8 x i16>* %6, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi32_align_1(<16 x i32>*, <16 x i32>) {
+entry:
+ store <16 x i32> %1, <16 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi32_align_1(<16 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %6, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i32* %9 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i32* %11 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %4, <4 x i32>* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi64_align_1(<16 x i64>*, <16 x i64>) {
+entry:
+ store <16 x i64> %1, <16 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi64_align_1(<16 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %12, align 1
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %14, align 1
+; CHECK-NEXT: %15 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %16, align 1
+; CHECK-NEXT: %17 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %18, align 1
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %6, <2 x i64>* %20, align 1
+; CHECK-NEXT: %21 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %7, <2 x i64>* %22, align 1
+; CHECK-NEXT: %23 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %8, <2 x i64>* %24, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi8ptr_align_1(<16 x i8*>*, <16 x i8*>) {
+entry:
+ store <16 x i8*> %1, <16 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi8ptr_align_1(<16 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %6, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i8** %9 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %3, <4 x i8*>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i8** %11 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %4, <4 x i8*>* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xfloat_align_1(<16 x float>*, <16 x float>) {
+entry:
+ store <16 x float> %1, <16 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xfloat_align_1(<16 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %6, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast float* %9 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %3, <4 x float>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast float* %11 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %4, <4 x float>* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xdouble_align_1(<16 x double>*, <16 x double>) {
+entry:
+ store <16 x double> %1, <16 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xdouble_align_1(<16 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %12, align 1
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %14, align 1
+; CHECK-NEXT: %15 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %16, align 1
+; CHECK-NEXT: %17 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %18, align 1
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %6, <2 x double>* %20, align 1
+; CHECK-NEXT: %21 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %7, <2 x double>* %22, align 1
+; CHECK-NEXT: %23 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %8, <2 x double>* %24, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi8_align_1(<20 x i8>*, <20 x i8>) {
+entry:
+ store <20 x i8> %1, <20 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi8_align_1(<20 x i8>*, <16 x i8>, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8* %3 to <16 x i8>*
+; CHECK-NEXT: store <16 x i8> %1, <16 x i8>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %6 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: store i8 %6, i8* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: store i8 %8, i8* %7, align 1
+; CHECK-NEXT: %9 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: store i8 %10, i8* %9, align 1
+; CHECK-NEXT: %11 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %12 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: store i8 %12, i8* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi16_align_1(<20 x i16>*, <20 x i16>) {
+entry:
+ store <20 x i16> %1, <20 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi16_align_1(<20 x i16>*, <8 x i16>, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %5, align 1
+; CHECK-NEXT: %6 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %7 = bitcast i16* %6 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %2, <8 x i16>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %9 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: store i16 %9, i16* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: store i16 %11, i16* %10, align 1
+; CHECK-NEXT: %12 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %13 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: store i16 %13, i16* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %15 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: store i16 %15, i16* %14, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi32_align_1(<20 x i32>*, <20 x i32>) {
+entry:
+ store <20 x i32> %1, <20 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi32_align_1(<20 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %9, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %11, align 1
+; CHECK-NEXT: %12 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i32* %12 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %4, <4 x i32>* %13, align 1
+; CHECK-NEXT: %14 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i32* %14 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %15, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi64_align_1(<20 x i64>*, <20 x i64>) {
+entry:
+ store <20 x i64> %1, <20 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi64_align_1(<20 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %12, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %14, align 1
+; CHECK-NEXT: %15 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %16, align 1
+; CHECK-NEXT: %17 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %18, align 1
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %20, align 1
+; CHECK-NEXT: %21 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %6, <2 x i64>* %22, align 1
+; CHECK-NEXT: %23 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %7, <2 x i64>* %24, align 1
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %8, <2 x i64>* %26, align 1
+; CHECK-NEXT: %27 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast i64* %27 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %9, <2 x i64>* %28, align 1
+; CHECK-NEXT: %29 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast i64* %29 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %10, <2 x i64>* %30, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi8ptr_align_1(<20 x i8*>*, <20 x i8*>) {
+entry:
+ store <20 x i8*> %1, <20 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi8ptr_align_1(<20 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %9, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %3, <4 x i8*>* %11, align 1
+; CHECK-NEXT: %12 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i8** %12 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %4, <4 x i8*>* %13, align 1
+; CHECK-NEXT: %14 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i8** %14 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %5, <4 x i8*>* %15, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xfloat_align_1(<20 x float>*, <20 x float>) {
+entry:
+ store <20 x float> %1, <20 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xfloat_align_1(<20 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %9, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %3, <4 x float>* %11, align 1
+; CHECK-NEXT: %12 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast float* %12 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %4, <4 x float>* %13, align 1
+; CHECK-NEXT: %14 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast float* %14 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %5, <4 x float>* %15, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xdouble_align_1(<20 x double>*, <20 x double>) {
+entry:
+ store <20 x double> %1, <20 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xdouble_align_1(<20 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %12, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %14, align 1
+; CHECK-NEXT: %15 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %16, align 1
+; CHECK-NEXT: %17 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %18, align 1
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %20, align 1
+; CHECK-NEXT: %21 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %6, <2 x double>* %22, align 1
+; CHECK-NEXT: %23 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %7, <2 x double>* %24, align 1
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %8, <2 x double>* %26, align 1
+; CHECK-NEXT: %27 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast double* %27 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %9, <2 x double>* %28, align 1
+; CHECK-NEXT: %29 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast double* %29 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %10, <2 x double>* %30, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi8_align_8(<2 x i8>*, <2 x i8>) {
+entry:
+ store <2 x i8> %1, <2 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi8_align_8(<2 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi16_align_8(<2 x i16>*, <2 x i16>) {
+entry:
+ store <2 x i16> %1, <2 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi16_align_8(<2 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store i16 %3, i16* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store i16 %5, i16* %4, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi32_align_8(<2 x i32>*, <2 x i32>) {
+entry:
+ store <2 x i32> %1, <2 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi32_align_8(<2 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: store i32 %3, i32* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: store i32 %5, i32* %4, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi64_align_8(<2 x i64>*, <2 x i64>) {
+entry:
+ store <2 x i64> %1, <2 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi64_align_8(<2 x i64>*, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi8ptr_align_8(<2 x i8*>*, <2 x i8*>) {
+entry:
+ store <2 x i8*> %1, <2 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi8ptr_align_8(<2 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i8*> %1, i32 0
+; CHECK-NEXT: store i8* %3, i8** %2, align 8
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i8*> %1, i32 1
+; CHECK-NEXT: store i8* %5, i8** %4, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xfloat_align_8(<2 x float>*, <2 x float>) {
+entry:
+ store <2 x float> %1, <2 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xfloat_align_8(<2 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: store float %3, float* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: store float %5, float* %4, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xdouble_align_8(<2 x double>*, <2 x double>) {
+entry:
+ store <2 x double> %1, <2 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xdouble_align_8(<2 x double>*, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi8_align_8(<4 x i8>*, <4 x i8>) {
+entry:
+ store <4 x i8> %1, <4 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi8_align_8(<4 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi16_align_8(<4 x i16>*, <4 x i16>) {
+entry:
+ store <4 x i16> %1, <4 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi16_align_8(<4 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store i16 %3, i16* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store i16 %5, i16* %4, align 2
+; CHECK-NEXT: %6 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store i16 %7, i16* %6, align 4
+; CHECK-NEXT: %8 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store i16 %9, i16* %8, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi32_align_8(<4 x i32>*, <4 x i32>) {
+entry:
+ store <4 x i32> %1, <4 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi32_align_8(<4 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi64_align_8(<4 x i64>*, <4 x i64>) {
+entry:
+ store <4 x i64> %1, <4 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi64_align_8(<4 x i64>*, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i64* %3 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %6, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi8ptr_align_8(<4 x i8*>*, <4 x i8*>) {
+entry:
+ store <4 x i8*> %1, <4 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi8ptr_align_8(<4 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xfloat_align_8(<4 x float>*, <4 x float>) {
+entry:
+ store <4 x float> %1, <4 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xfloat_align_8(<4 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xdouble_align_8(<4 x double>*, <4 x double>) {
+entry:
+ store <4 x double> %1, <4 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xdouble_align_8(<4 x double>*, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast double* %3 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %6, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi8_align_8(<6 x i8>*, <6 x i8>) {
+entry:
+ store <6 x i8> %1, <6 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi8_align_8(<6 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store i8 %11, i8* %10, align 4
+; CHECK-NEXT: %12 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store i8 %13, i8* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi16_align_8(<6 x i16>*, <6 x i16>) {
+entry:
+ store <6 x i16> %1, <6 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi16_align_8(<6 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store i16 %3, i16* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store i16 %5, i16* %4, align 2
+; CHECK-NEXT: %6 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store i16 %7, i16* %6, align 4
+; CHECK-NEXT: %8 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store i16 %9, i16* %8, align 2
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: store i16 %11, i16* %10, align 8
+; CHECK-NEXT: %12 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: store i16 %13, i16* %12, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi32_align_8(<6 x i32>*, <6 x i32>) {
+entry:
+ store <6 x i32> %1, <6 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi32_align_8(<6 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: store i32 %6, i32* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: store i32 %8, i32* %7, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi64_align_8(<6 x i64>*, <6 x i64>) {
+entry:
+ store <6 x i64> %1, <6 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi64_align_8(<6 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %5, align 8
+; CHECK-NEXT: %6 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast i64* %6 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i64* %8 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %9, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi8ptr_align_8(<6 x i8*>*, <6 x i8*>) {
+entry:
+ store <6 x i8*> %1, <6 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi8ptr_align_8(<6 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i8*> %2, i32 0
+; CHECK-NEXT: store i8* %6, i8** %5, align 8
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i8*> %2, i32 1
+; CHECK-NEXT: store i8* %8, i8** %7, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xfloat_align_8(<6 x float>*, <6 x float>) {
+entry:
+ store <6 x float> %1, <6 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xfloat_align_8(<6 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: store float %6, float* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: store float %8, float* %7, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xdouble_align_8(<6 x double>*, <6 x double>) {
+entry:
+ store <6 x double> %1, <6 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xdouble_align_8(<6 x double>*, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %5, align 8
+; CHECK-NEXT: %6 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast double* %6 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast double* %8 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %9, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi8_align_8(<8 x i8>*, <8 x i8>) {
+entry:
+ store <8 x i8> %1, <8 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi8_align_8(<8 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store i8 %11, i8* %10, align 4
+; CHECK-NEXT: %12 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store i8 %13, i8* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store i8 %15, i8* %14, align 2
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store i8 %17, i8* %16, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi16_align_8(<8 x i16>*, <8 x i16>) {
+entry:
+ store <8 x i16> %1, <8 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi16_align_8(<8 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi32_align_8(<8 x i32>*, <8 x i32>) {
+entry:
+ store <8 x i32> %1, <8 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi32_align_8(<8 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %6, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi64_align_8(<8 x i64>*, <8 x i64>) {
+entry:
+ store <8 x i64> %1, <8 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi64_align_8(<8 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %6, align 8
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %12, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi8ptr_align_8(<8 x i8*>*, <8 x i8*>) {
+entry:
+ store <8 x i8*> %1, <8 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi8ptr_align_8(<8 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %6, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xfloat_align_8(<8 x float>*, <8 x float>) {
+entry:
+ store <8 x float> %1, <8 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xfloat_align_8(<8 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %6, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xdouble_align_8(<8 x double>*, <8 x double>) {
+entry:
+ store <8 x double> %1, <8 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xdouble_align_8(<8 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %6, align 8
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %12, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi8_align_8(<12 x i8>*, <12 x i8>) {
+entry:
+ store <12 x i8> %1, <12 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi8_align_8(<12 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store i8 %11, i8* %10, align 4
+; CHECK-NEXT: %12 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store i8 %13, i8* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store i8 %15, i8* %14, align 2
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store i8 %17, i8* %16, align 1
+; CHECK-NEXT: %18 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %19 = extractelement <16 x i8> %1, i32 8
+; CHECK-NEXT: store i8 %19, i8* %18, align 8
+; CHECK-NEXT: %20 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 9
+; CHECK-NEXT: store i8 %21, i8* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %23 = extractelement <16 x i8> %1, i32 10
+; CHECK-NEXT: store i8 %23, i8* %22, align 2
+; CHECK-NEXT: %24 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %25 = extractelement <16 x i8> %1, i32 11
+; CHECK-NEXT: store i8 %25, i8* %24, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi16_align_8(<12 x i16>*, <12 x i16>) {
+entry:
+ store <12 x i16> %1, <12 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi16_align_8(<12 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: store i16 %6, i16* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: store i16 %8, i16* %7, align 2
+; CHECK-NEXT: %9 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %10 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: store i16 %10, i16* %9, align 4
+; CHECK-NEXT: %11 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %12 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: store i16 %12, i16* %11, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi32_align_8(<12 x i32>*, <12 x i32>) {
+entry:
+ store <12 x i32> %1, <12 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi32_align_8(<12 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %5, align 8
+; CHECK-NEXT: %6 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %9, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi64_align_8(<12 x i64>*, <12 x i64>) {
+entry:
+ store <12 x i64> %1, <12 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi64_align_8(<12 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %12, align 8
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %14, align 8
+; CHECK-NEXT: %15 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %16, align 8
+; CHECK-NEXT: %17 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %6, <2 x i64>* %18, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi8ptr_align_8(<12 x i8*>*, <12 x i8*>) {
+entry:
+ store <12 x i8*> %1, <12 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi8ptr_align_8(<12 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %5, align 8
+; CHECK-NEXT: %6 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %3, <4 x i8*>* %9, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xfloat_align_8(<12 x float>*, <12 x float>) {
+entry:
+ store <12 x float> %1, <12 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xfloat_align_8(<12 x float>*, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %5, align 8
+; CHECK-NEXT: %6 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %3, <4 x float>* %9, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xdouble_align_8(<12 x double>*, <12 x double>) {
+entry:
+ store <12 x double> %1, <12 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xdouble_align_8(<12 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %12, align 8
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %14, align 8
+; CHECK-NEXT: %15 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %16, align 8
+; CHECK-NEXT: %17 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %6, <2 x double>* %18, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi8_align_8(<16 x i8>*, <16 x i8>) {
+entry:
+ store <16 x i8> %1, <16 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi8_align_8(<16 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: store <16 x i8> %1, <16 x i8>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi16_align_8(<16 x i16>*, <16 x i16>) {
+entry:
+ store <16 x i16> %1, <16 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi16_align_8(<16 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = bitcast i16* %5 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %2, <8 x i16>* %6, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi32_align_8(<16 x i32>*, <16 x i32>) {
+entry:
+ store <16 x i32> %1, <16 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi32_align_8(<16 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %6, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i32* %9 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i32* %11 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %4, <4 x i32>* %12, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi64_align_8(<16 x i64>*, <16 x i64>) {
+entry:
+ store <16 x i64> %1, <16 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi64_align_8(<16 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %12, align 8
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %14, align 8
+; CHECK-NEXT: %15 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %16, align 8
+; CHECK-NEXT: %17 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %18, align 8
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %6, <2 x i64>* %20, align 8
+; CHECK-NEXT: %21 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %7, <2 x i64>* %22, align 8
+; CHECK-NEXT: %23 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %8, <2 x i64>* %24, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi8ptr_align_8(<16 x i8*>*, <16 x i8*>) {
+entry:
+ store <16 x i8*> %1, <16 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi8ptr_align_8(<16 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %6, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i8** %9 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %3, <4 x i8*>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i8** %11 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %4, <4 x i8*>* %12, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xfloat_align_8(<16 x float>*, <16 x float>) {
+entry:
+ store <16 x float> %1, <16 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xfloat_align_8(<16 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %6, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast float* %9 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %3, <4 x float>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast float* %11 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %4, <4 x float>* %12, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xdouble_align_8(<16 x double>*, <16 x double>) {
+entry:
+ store <16 x double> %1, <16 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xdouble_align_8(<16 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %12, align 8
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %14, align 8
+; CHECK-NEXT: %15 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %16, align 8
+; CHECK-NEXT: %17 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %18, align 8
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %6, <2 x double>* %20, align 8
+; CHECK-NEXT: %21 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %7, <2 x double>* %22, align 8
+; CHECK-NEXT: %23 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %8, <2 x double>* %24, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi8_align_8(<20 x i8>*, <20 x i8>) {
+entry:
+ store <20 x i8> %1, <20 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi8_align_8(<20 x i8>*, <16 x i8>, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8* %3 to <16 x i8>*
+; CHECK-NEXT: store <16 x i8> %1, <16 x i8>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %6 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: store i8 %6, i8* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: store i8 %8, i8* %7, align 1
+; CHECK-NEXT: %9 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: store i8 %10, i8* %9, align 2
+; CHECK-NEXT: %11 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %12 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: store i8 %12, i8* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi16_align_8(<20 x i16>*, <20 x i16>) {
+entry:
+ store <20 x i16> %1, <20 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi16_align_8(<20 x i16>*, <8 x i16>, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %5, align 8
+; CHECK-NEXT: %6 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %7 = bitcast i16* %6 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %2, <8 x i16>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %9 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: store i16 %9, i16* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: store i16 %11, i16* %10, align 2
+; CHECK-NEXT: %12 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %13 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: store i16 %13, i16* %12, align 4
+; CHECK-NEXT: %14 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %15 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: store i16 %15, i16* %14, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi32_align_8(<20 x i32>*, <20 x i32>) {
+entry:
+ store <20 x i32> %1, <20 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi32_align_8(<20 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %9, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %11, align 8
+; CHECK-NEXT: %12 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i32* %12 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %4, <4 x i32>* %13, align 8
+; CHECK-NEXT: %14 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i32* %14 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %15, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi64_align_8(<20 x i64>*, <20 x i64>) {
+entry:
+ store <20 x i64> %1, <20 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi64_align_8(<20 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %12, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %14, align 8
+; CHECK-NEXT: %15 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %16, align 8
+; CHECK-NEXT: %17 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %18, align 8
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %20, align 8
+; CHECK-NEXT: %21 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %6, <2 x i64>* %22, align 8
+; CHECK-NEXT: %23 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %7, <2 x i64>* %24, align 8
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %8, <2 x i64>* %26, align 8
+; CHECK-NEXT: %27 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast i64* %27 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %9, <2 x i64>* %28, align 8
+; CHECK-NEXT: %29 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast i64* %29 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %10, <2 x i64>* %30, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi8ptr_align_8(<20 x i8*>*, <20 x i8*>) {
+entry:
+ store <20 x i8*> %1, <20 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi8ptr_align_8(<20 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %9, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %3, <4 x i8*>* %11, align 8
+; CHECK-NEXT: %12 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i8** %12 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %4, <4 x i8*>* %13, align 8
+; CHECK-NEXT: %14 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i8** %14 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %5, <4 x i8*>* %15, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xfloat_align_8(<20 x float>*, <20 x float>) {
+entry:
+ store <20 x float> %1, <20 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xfloat_align_8(<20 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %9, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %3, <4 x float>* %11, align 8
+; CHECK-NEXT: %12 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast float* %12 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %4, <4 x float>* %13, align 8
+; CHECK-NEXT: %14 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast float* %14 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %5, <4 x float>* %15, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xdouble_align_8(<20 x double>*, <20 x double>) {
+entry:
+ store <20 x double> %1, <20 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xdouble_align_8(<20 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %12, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %14, align 8
+; CHECK-NEXT: %15 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %16, align 8
+; CHECK-NEXT: %17 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %18, align 8
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %20, align 8
+; CHECK-NEXT: %21 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %6, <2 x double>* %22, align 8
+; CHECK-NEXT: %23 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %7, <2 x double>* %24, align 8
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %8, <2 x double>* %26, align 8
+; CHECK-NEXT: %27 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast double* %27 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %9, <2 x double>* %28, align 8
+; CHECK-NEXT: %29 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast double* %29 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %10, <2 x double>* %30, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi8_align_32(<2 x i8>*, <2 x i8>) {
+entry:
+ store <2 x i8> %1, <2 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi8_align_32(<2 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi16_align_32(<2 x i16>*, <2 x i16>) {
+entry:
+ store <2 x i16> %1, <2 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi16_align_32(<2 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store i16 %3, i16* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store i16 %5, i16* %4, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi32_align_32(<2 x i32>*, <2 x i32>) {
+entry:
+ store <2 x i32> %1, <2 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi32_align_32(<2 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: store i32 %3, i32* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: store i32 %5, i32* %4, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi64_align_32(<2 x i64>*, <2 x i64>) {
+entry:
+ store <2 x i64> %1, <2 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi64_align_32(<2 x i64>*, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xi8ptr_align_32(<2 x i8*>*, <2 x i8*>) {
+entry:
+ store <2 x i8*> %1, <2 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xi8ptr_align_32(<2 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i8*> %1, i32 0
+; CHECK-NEXT: store i8* %3, i8** %2, align 32
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i8*> %1, i32 1
+; CHECK-NEXT: store i8* %5, i8** %4, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xfloat_align_32(<2 x float>*, <2 x float>) {
+entry:
+ store <2 x float> %1, <2 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xfloat_align_32(<2 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: store float %3, float* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: store float %5, float* %4, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_2xdouble_align_32(<2 x double>*, <2 x double>) {
+entry:
+ store <2 x double> %1, <2 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_2xdouble_align_32(<2 x double>*, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi8_align_32(<4 x i8>*, <4 x i8>) {
+entry:
+ store <4 x i8> %1, <4 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi8_align_32(<4 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi16_align_32(<4 x i16>*, <4 x i16>) {
+entry:
+ store <4 x i16> %1, <4 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi16_align_32(<4 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store i16 %3, i16* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store i16 %5, i16* %4, align 2
+; CHECK-NEXT: %6 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store i16 %7, i16* %6, align 4
+; CHECK-NEXT: %8 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store i16 %9, i16* %8, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi32_align_32(<4 x i32>*, <4 x i32>) {
+entry:
+ store <4 x i32> %1, <4 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi32_align_32(<4 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi64_align_32(<4 x i64>*, <4 x i64>) {
+entry:
+ store <4 x i64> %1, <4 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi64_align_32(<4 x i64>*, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i64* %3 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xi8ptr_align_32(<4 x i8*>*, <4 x i8*>) {
+entry:
+ store <4 x i8*> %1, <4 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xi8ptr_align_32(<4 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xfloat_align_32(<4 x float>*, <4 x float>) {
+entry:
+ store <4 x float> %1, <4 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xfloat_align_32(<4 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_4xdouble_align_32(<4 x double>*, <4 x double>) {
+entry:
+ store <4 x double> %1, <4 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_4xdouble_align_32(<4 x double>*, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast double* %3 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %6, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi8_align_32(<6 x i8>*, <6 x i8>) {
+entry:
+ store <6 x i8> %1, <6 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi8_align_32(<6 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store i8 %11, i8* %10, align 4
+; CHECK-NEXT: %12 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store i8 %13, i8* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi16_align_32(<6 x i16>*, <6 x i16>) {
+entry:
+ store <6 x i16> %1, <6 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi16_align_32(<6 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store i16 %3, i16* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store i16 %5, i16* %4, align 2
+; CHECK-NEXT: %6 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store i16 %7, i16* %6, align 4
+; CHECK-NEXT: %8 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store i16 %9, i16* %8, align 2
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: store i16 %11, i16* %10, align 8
+; CHECK-NEXT: %12 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: store i16 %13, i16* %12, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi32_align_32(<6 x i32>*, <6 x i32>) {
+entry:
+ store <6 x i32> %1, <6 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi32_align_32(<6 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: store i32 %6, i32* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: store i32 %8, i32* %7, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi64_align_32(<6 x i64>*, <6 x i64>) {
+entry:
+ store <6 x i64> %1, <6 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi64_align_32(<6 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %5, align 32
+; CHECK-NEXT: %6 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast i64* %6 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %7, align 16
+; CHECK-NEXT: %8 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i64* %8 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %9, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xi8ptr_align_32(<6 x i8*>*, <6 x i8*>) {
+entry:
+ store <6 x i8*> %1, <6 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xi8ptr_align_32(<6 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i8*> %2, i32 0
+; CHECK-NEXT: store i8* %6, i8** %5, align 16
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i8*> %2, i32 1
+; CHECK-NEXT: store i8* %8, i8** %7, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xfloat_align_32(<6 x float>*, <6 x float>) {
+entry:
+ store <6 x float> %1, <6 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xfloat_align_32(<6 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: store float %6, float* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: store float %8, float* %7, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_6xdouble_align_32(<6 x double>*, <6 x double>) {
+entry:
+ store <6 x double> %1, <6 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_6xdouble_align_32(<6 x double>*, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %5, align 32
+; CHECK-NEXT: %6 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast double* %6 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %7, align 16
+; CHECK-NEXT: %8 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast double* %8 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %9, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi8_align_32(<8 x i8>*, <8 x i8>) {
+entry:
+ store <8 x i8> %1, <8 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi8_align_32(<8 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store i8 %11, i8* %10, align 4
+; CHECK-NEXT: %12 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store i8 %13, i8* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store i8 %15, i8* %14, align 2
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store i8 %17, i8* %16, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi16_align_32(<8 x i16>*, <8 x i16>) {
+entry:
+ store <8 x i16> %1, <8 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi16_align_32(<8 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi32_align_32(<8 x i32>*, <8 x i32>) {
+entry:
+ store <8 x i32> %1, <8 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi32_align_32(<8 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %6, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi64_align_32(<8 x i64>*, <8 x i64>) {
+entry:
+ store <8 x i64> %1, <8 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi64_align_32(<8 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %6, align 32
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %8, align 16
+; CHECK-NEXT: %9 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %12, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xi8ptr_align_32(<8 x i8*>*, <8 x i8*>) {
+entry:
+ store <8 x i8*> %1, <8 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xi8ptr_align_32(<8 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %6, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xfloat_align_32(<8 x float>*, <8 x float>) {
+entry:
+ store <8 x float> %1, <8 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xfloat_align_32(<8 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %6, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_8xdouble_align_32(<8 x double>*, <8 x double>) {
+entry:
+ store <8 x double> %1, <8 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_8xdouble_align_32(<8 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %6, align 32
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %8, align 16
+; CHECK-NEXT: %9 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %12, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi8_align_32(<12 x i8>*, <12 x i8>) {
+entry:
+ store <12 x i8> %1, <12 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi8_align_32(<12 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store i8 %3, i8* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store i8 %11, i8* %10, align 4
+; CHECK-NEXT: %12 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store i8 %13, i8* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store i8 %15, i8* %14, align 2
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store i8 %17, i8* %16, align 1
+; CHECK-NEXT: %18 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %19 = extractelement <16 x i8> %1, i32 8
+; CHECK-NEXT: store i8 %19, i8* %18, align 8
+; CHECK-NEXT: %20 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 9
+; CHECK-NEXT: store i8 %21, i8* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %23 = extractelement <16 x i8> %1, i32 10
+; CHECK-NEXT: store i8 %23, i8* %22, align 2
+; CHECK-NEXT: %24 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %25 = extractelement <16 x i8> %1, i32 11
+; CHECK-NEXT: store i8 %25, i8* %24, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi16_align_32(<12 x i16>*, <12 x i16>) {
+entry:
+ store <12 x i16> %1, <12 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi16_align_32(<12 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: store i16 %6, i16* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: store i16 %8, i16* %7, align 2
+; CHECK-NEXT: %9 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %10 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: store i16 %10, i16* %9, align 4
+; CHECK-NEXT: %11 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %12 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: store i16 %12, i16* %11, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi32_align_32(<12 x i32>*, <12 x i32>) {
+entry:
+ store <12 x i32> %1, <12 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi32_align_32(<12 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %5, align 32
+; CHECK-NEXT: %6 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %7, align 16
+; CHECK-NEXT: %8 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %9, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi64_align_32(<12 x i64>*, <12 x i64>) {
+entry:
+ store <12 x i64> %1, <12 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi64_align_32(<12 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %8, align 32
+; CHECK-NEXT: %9 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %10, align 16
+; CHECK-NEXT: %11 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %12, align 32
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %14, align 16
+; CHECK-NEXT: %15 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %16, align 32
+; CHECK-NEXT: %17 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %6, <2 x i64>* %18, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xi8ptr_align_32(<12 x i8*>*, <12 x i8*>) {
+entry:
+ store <12 x i8*> %1, <12 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xi8ptr_align_32(<12 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %5, align 32
+; CHECK-NEXT: %6 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %7, align 16
+; CHECK-NEXT: %8 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %3, <4 x i8*>* %9, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xfloat_align_32(<12 x float>*, <12 x float>) {
+entry:
+ store <12 x float> %1, <12 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xfloat_align_32(<12 x float>*, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %5, align 32
+; CHECK-NEXT: %6 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %7, align 16
+; CHECK-NEXT: %8 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %3, <4 x float>* %9, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_12xdouble_align_32(<12 x double>*, <12 x double>) {
+entry:
+ store <12 x double> %1, <12 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_12xdouble_align_32(<12 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %8, align 32
+; CHECK-NEXT: %9 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %10, align 16
+; CHECK-NEXT: %11 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %12, align 32
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %14, align 16
+; CHECK-NEXT: %15 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %16, align 32
+; CHECK-NEXT: %17 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %6, <2 x double>* %18, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi8_align_32(<16 x i8>*, <16 x i8>) {
+entry:
+ store <16 x i8> %1, <16 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi8_align_32(<16 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: store <16 x i8> %1, <16 x i8>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi16_align_32(<16 x i16>*, <16 x i16>) {
+entry:
+ store <16 x i16> %1, <16 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi16_align_32(<16 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = bitcast i16* %5 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %2, <8 x i16>* %6, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi32_align_32(<16 x i32>*, <16 x i32>) {
+entry:
+ store <16 x i32> %1, <16 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi32_align_32(<16 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %6, align 32
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %8, align 16
+; CHECK-NEXT: %9 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i32* %9 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i32* %11 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %4, <4 x i32>* %12, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi64_align_32(<16 x i64>*, <16 x i64>) {
+entry:
+ store <16 x i64> %1, <16 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi64_align_32(<16 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %12, align 16
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %14, align 32
+; CHECK-NEXT: %15 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %16, align 16
+; CHECK-NEXT: %17 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %18, align 32
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %6, <2 x i64>* %20, align 16
+; CHECK-NEXT: %21 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %7, <2 x i64>* %22, align 32
+; CHECK-NEXT: %23 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %8, <2 x i64>* %24, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xi8ptr_align_32(<16 x i8*>*, <16 x i8*>) {
+entry:
+ store <16 x i8*> %1, <16 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xi8ptr_align_32(<16 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %6, align 32
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %8, align 16
+; CHECK-NEXT: %9 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i8** %9 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %3, <4 x i8*>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i8** %11 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %4, <4 x i8*>* %12, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xfloat_align_32(<16 x float>*, <16 x float>) {
+entry:
+ store <16 x float> %1, <16 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xfloat_align_32(<16 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %6, align 32
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %8, align 16
+; CHECK-NEXT: %9 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast float* %9 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %3, <4 x float>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast float* %11 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %4, <4 x float>* %12, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_16xdouble_align_32(<16 x double>*, <16 x double>) {
+entry:
+ store <16 x double> %1, <16 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_16xdouble_align_32(<16 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %12, align 16
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %14, align 32
+; CHECK-NEXT: %15 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %16, align 16
+; CHECK-NEXT: %17 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %18, align 32
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %6, <2 x double>* %20, align 16
+; CHECK-NEXT: %21 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %7, <2 x double>* %22, align 32
+; CHECK-NEXT: %23 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %8, <2 x double>* %24, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi8_align_32(<20 x i8>*, <20 x i8>) {
+entry:
+ store <20 x i8> %1, <20 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi8_align_32(<20 x i8>*, <16 x i8>, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8* %3 to <16 x i8>*
+; CHECK-NEXT: store <16 x i8> %1, <16 x i8>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %6 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: store i8 %6, i8* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: store i8 %8, i8* %7, align 1
+; CHECK-NEXT: %9 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: store i8 %10, i8* %9, align 2
+; CHECK-NEXT: %11 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %12 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: store i8 %12, i8* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi16_align_32(<20 x i16>*, <20 x i16>) {
+entry:
+ store <20 x i16> %1, <20 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi16_align_32(<20 x i16>*, <8 x i16>, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %1, <8 x i16>* %5, align 32
+; CHECK-NEXT: %6 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %7 = bitcast i16* %6 to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> %2, <8 x i16>* %7, align 16
+; CHECK-NEXT: %8 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %9 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: store i16 %9, i16* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: store i16 %11, i16* %10, align 2
+; CHECK-NEXT: %12 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %13 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: store i16 %13, i16* %12, align 4
+; CHECK-NEXT: %14 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %15 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: store i16 %15, i16* %14, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi32_align_32(<20 x i32>*, <20 x i32>) {
+entry:
+ store <20 x i32> %1, <20 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi32_align_32(<20 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %7, align 32
+; CHECK-NEXT: %8 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %9, align 16
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %3, <4 x i32>* %11, align 32
+; CHECK-NEXT: %12 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i32* %12 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %4, <4 x i32>* %13, align 16
+; CHECK-NEXT: %14 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i32* %14 to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %15, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi64_align_32(<20 x i64>*, <20 x i64>) {
+entry:
+ store <20 x i64> %1, <20 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi64_align_32(<20 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %1, <2 x i64>* %12, align 32
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %2, <2 x i64>* %14, align 16
+; CHECK-NEXT: %15 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %3, <2 x i64>* %16, align 32
+; CHECK-NEXT: %17 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %4, <2 x i64>* %18, align 16
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %20, align 32
+; CHECK-NEXT: %21 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %6, <2 x i64>* %22, align 16
+; CHECK-NEXT: %23 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %7, <2 x i64>* %24, align 32
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %8, <2 x i64>* %26, align 16
+; CHECK-NEXT: %27 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast i64* %27 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %9, <2 x i64>* %28, align 32
+; CHECK-NEXT: %29 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast i64* %29 to <2 x i64>*
+; CHECK-NEXT: store <2 x i64> %10, <2 x i64>* %30, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xi8ptr_align_32(<20 x i8*>*, <20 x i8*>) {
+entry:
+ store <20 x i8*> %1, <20 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xi8ptr_align_32(<20 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %1, <4 x i8*>* %7, align 32
+; CHECK-NEXT: %8 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %2, <4 x i8*>* %9, align 16
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %3, <4 x i8*>* %11, align 32
+; CHECK-NEXT: %12 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i8** %12 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %4, <4 x i8*>* %13, align 16
+; CHECK-NEXT: %14 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i8** %14 to <4 x i8*>*
+; CHECK-NEXT: store <4 x i8*> %5, <4 x i8*>* %15, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xfloat_align_32(<20 x float>*, <20 x float>) {
+entry:
+ store <20 x float> %1, <20 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xfloat_align_32(<20 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %1, <4 x float>* %7, align 32
+; CHECK-NEXT: %8 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %2, <4 x float>* %9, align 16
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %3, <4 x float>* %11, align 32
+; CHECK-NEXT: %12 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast float* %12 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %4, <4 x float>* %13, align 16
+; CHECK-NEXT: %14 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast float* %14 to <4 x float>*
+; CHECK-NEXT: store <4 x float> %5, <4 x float>* %15, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @store_to_20xdouble_align_32(<20 x double>*, <20 x double>) {
+entry:
+ store <20 x double> %1, <20 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @store_to_20xdouble_align_32(<20 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %1, <2 x double>* %12, align 32
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %2, <2 x double>* %14, align 16
+; CHECK-NEXT: %15 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %3, <2 x double>* %16, align 32
+; CHECK-NEXT: %17 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %4, <2 x double>* %18, align 16
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %20, align 32
+; CHECK-NEXT: %21 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %6, <2 x double>* %22, align 16
+; CHECK-NEXT: %23 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %7, <2 x double>* %24, align 32
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %8, <2 x double>* %26, align 16
+; CHECK-NEXT: %27 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast double* %27 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %9, <2 x double>* %28, align 32
+; CHECK-NEXT: %29 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast double* %29 to <2 x double>*
+; CHECK-NEXT: store <2 x double> %10, <2 x double>* %30, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi8_unaligned(<2 x i8>*, <2 x i8>) {
+entry:
+ store volatile <2 x i8> %1, <2 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi8_unaligned(<2 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi16_unaligned(<2 x i16>*, <2 x i16>) {
+entry:
+ store volatile <2 x i16> %1, <2 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi16_unaligned(<2 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store volatile i16 %3, i16* %2
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store volatile i16 %5, i16* %4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi32_unaligned(<2 x i32>*, <2 x i32>) {
+entry:
+ store volatile <2 x i32> %1, <2 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi32_unaligned(<2 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: store volatile i32 %3, i32* %2
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: store volatile i32 %5, i32* %4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi64_unaligned(<2 x i64>*, <2 x i64>) {
+entry:
+ store volatile <2 x i64> %1, <2 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi64_unaligned(<2 x i64>*, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi8ptr_unaligned(<2 x i8*>*, <2 x i8*>) {
+entry:
+ store volatile <2 x i8*> %1, <2 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi8ptr_unaligned(<2 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i8*> %1, i32 0
+; CHECK-NEXT: store volatile i8* %3, i8** %2
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i8*> %1, i32 1
+; CHECK-NEXT: store volatile i8* %5, i8** %4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xfloat_unaligned(<2 x float>*, <2 x float>) {
+entry:
+ store volatile <2 x float> %1, <2 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xfloat_unaligned(<2 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: store volatile float %3, float* %2
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: store volatile float %5, float* %4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xdouble_unaligned(<2 x double>*, <2 x double>) {
+entry:
+ store volatile <2 x double> %1, <2 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xdouble_unaligned(<2 x double>*, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi8_unaligned(<4 x i8>*, <4 x i8>) {
+entry:
+ store volatile <4 x i8> %1, <4 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi8_unaligned(<4 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4
+; CHECK-NEXT: %6 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6
+; CHECK-NEXT: %8 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi16_unaligned(<4 x i16>*, <4 x i16>) {
+entry:
+ store volatile <4 x i16> %1, <4 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi16_unaligned(<4 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store volatile i16 %3, i16* %2
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store volatile i16 %5, i16* %4
+; CHECK-NEXT: %6 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store volatile i16 %7, i16* %6
+; CHECK-NEXT: %8 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store volatile i16 %9, i16* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi32_unaligned(<4 x i32>*, <4 x i32>) {
+entry:
+ store volatile <4 x i32> %1, <4 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi32_unaligned(<4 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi64_unaligned(<4 x i64>*, <4 x i64>) {
+entry:
+ store volatile <4 x i64> %1, <4 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi64_unaligned(<4 x i64>*, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i64* %3 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %4
+; CHECK-NEXT: %5 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %6
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi8ptr_unaligned(<4 x i8*>*, <4 x i8*>) {
+entry:
+ store volatile <4 x i8*> %1, <4 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi8ptr_unaligned(<4 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xfloat_unaligned(<4 x float>*, <4 x float>) {
+entry:
+ store volatile <4 x float> %1, <4 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xfloat_unaligned(<4 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xdouble_unaligned(<4 x double>*, <4 x double>) {
+entry:
+ store volatile <4 x double> %1, <4 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xdouble_unaligned(<4 x double>*, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast double* %3 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %4
+; CHECK-NEXT: %5 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %6
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi8_unaligned(<6 x i8>*, <6 x i8>) {
+entry:
+ store volatile <6 x i8> %1, <6 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi8_unaligned(<6 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4
+; CHECK-NEXT: %6 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6
+; CHECK-NEXT: %8 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store volatile i8 %11, i8* %10
+; CHECK-NEXT: %12 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store volatile i8 %13, i8* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi16_unaligned(<6 x i16>*, <6 x i16>) {
+entry:
+ store volatile <6 x i16> %1, <6 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi16_unaligned(<6 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store volatile i16 %3, i16* %2
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store volatile i16 %5, i16* %4
+; CHECK-NEXT: %6 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store volatile i16 %7, i16* %6
+; CHECK-NEXT: %8 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store volatile i16 %9, i16* %8
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: store volatile i16 %11, i16* %10
+; CHECK-NEXT: %12 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: store volatile i16 %13, i16* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi32_unaligned(<6 x i32>*, <6 x i32>) {
+entry:
+ store volatile <6 x i32> %1, <6 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi32_unaligned(<6 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %4
+; CHECK-NEXT: %5 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: store volatile i32 %6, i32* %5
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: store volatile i32 %8, i32* %7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi64_unaligned(<6 x i64>*, <6 x i64>) {
+entry:
+ store volatile <6 x i64> %1, <6 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi64_unaligned(<6 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %5
+; CHECK-NEXT: %6 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast i64* %6 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %7
+; CHECK-NEXT: %8 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i64* %8 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %9
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi8ptr_unaligned(<6 x i8*>*, <6 x i8*>) {
+entry:
+ store volatile <6 x i8*> %1, <6 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi8ptr_unaligned(<6 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %4
+; CHECK-NEXT: %5 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i8*> %2, i32 0
+; CHECK-NEXT: store volatile i8* %6, i8** %5
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i8*> %2, i32 1
+; CHECK-NEXT: store volatile i8* %8, i8** %7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xfloat_unaligned(<6 x float>*, <6 x float>) {
+entry:
+ store volatile <6 x float> %1, <6 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xfloat_unaligned(<6 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %4
+; CHECK-NEXT: %5 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: store volatile float %6, float* %5
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: store volatile float %8, float* %7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xdouble_unaligned(<6 x double>*, <6 x double>) {
+entry:
+ store volatile <6 x double> %1, <6 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xdouble_unaligned(<6 x double>*, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %5
+; CHECK-NEXT: %6 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast double* %6 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %7
+; CHECK-NEXT: %8 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast double* %8 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %9
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi8_unaligned(<8 x i8>*, <8 x i8>) {
+entry:
+ store volatile <8 x i8> %1, <8 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi8_unaligned(<8 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4
+; CHECK-NEXT: %6 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6
+; CHECK-NEXT: %8 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store volatile i8 %11, i8* %10
+; CHECK-NEXT: %12 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store volatile i8 %13, i8* %12
+; CHECK-NEXT: %14 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store volatile i8 %15, i8* %14
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store volatile i8 %17, i8* %16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi16_unaligned(<8 x i16>*, <8 x i16>) {
+entry:
+ store volatile <8 x i16> %1, <8 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi16_unaligned(<8 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi32_unaligned(<8 x i32>*, <8 x i32>) {
+entry:
+ store volatile <8 x i32> %1, <8 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi32_unaligned(<8 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %4
+; CHECK-NEXT: %5 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %6
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi64_unaligned(<8 x i64>*, <8 x i64>) {
+entry:
+ store volatile <8 x i64> %1, <8 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi64_unaligned(<8 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %6
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %8
+; CHECK-NEXT: %9 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %10
+; CHECK-NEXT: %11 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi8ptr_unaligned(<8 x i8*>*, <8 x i8*>) {
+entry:
+ store volatile <8 x i8*> %1, <8 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi8ptr_unaligned(<8 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %4
+; CHECK-NEXT: %5 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %6
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xfloat_unaligned(<8 x float>*, <8 x float>) {
+entry:
+ store volatile <8 x float> %1, <8 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xfloat_unaligned(<8 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %4
+; CHECK-NEXT: %5 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %6
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xdouble_unaligned(<8 x double>*, <8 x double>) {
+entry:
+ store volatile <8 x double> %1, <8 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xdouble_unaligned(<8 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %6
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %8
+; CHECK-NEXT: %9 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %10
+; CHECK-NEXT: %11 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi8_unaligned(<12 x i8>*, <12 x i8>) {
+entry:
+ store volatile <12 x i8> %1, <12 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi8_unaligned(<12 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4
+; CHECK-NEXT: %6 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6
+; CHECK-NEXT: %8 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store volatile i8 %11, i8* %10
+; CHECK-NEXT: %12 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store volatile i8 %13, i8* %12
+; CHECK-NEXT: %14 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store volatile i8 %15, i8* %14
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store volatile i8 %17, i8* %16
+; CHECK-NEXT: %18 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %19 = extractelement <16 x i8> %1, i32 8
+; CHECK-NEXT: store volatile i8 %19, i8* %18
+; CHECK-NEXT: %20 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 9
+; CHECK-NEXT: store volatile i8 %21, i8* %20
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %23 = extractelement <16 x i8> %1, i32 10
+; CHECK-NEXT: store volatile i8 %23, i8* %22
+; CHECK-NEXT: %24 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %25 = extractelement <16 x i8> %1, i32 11
+; CHECK-NEXT: store volatile i8 %25, i8* %24
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi16_unaligned(<12 x i16>*, <12 x i16>) {
+entry:
+ store volatile <12 x i16> %1, <12 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi16_unaligned(<12 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %4
+; CHECK-NEXT: %5 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: store volatile i16 %6, i16* %5
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: store volatile i16 %8, i16* %7
+; CHECK-NEXT: %9 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %10 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: store volatile i16 %10, i16* %9
+; CHECK-NEXT: %11 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %12 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: store volatile i16 %12, i16* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi32_unaligned(<12 x i32>*, <12 x i32>) {
+entry:
+ store volatile <12 x i32> %1, <12 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi32_unaligned(<12 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %5
+; CHECK-NEXT: %6 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %7
+; CHECK-NEXT: %8 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %3, <4 x i32>* %9
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi64_unaligned(<12 x i64>*, <12 x i64>) {
+entry:
+ store volatile <12 x i64> %1, <12 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi64_unaligned(<12 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %8
+; CHECK-NEXT: %9 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %10
+; CHECK-NEXT: %11 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %12
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %14
+; CHECK-NEXT: %15 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %5, <2 x i64>* %16
+; CHECK-NEXT: %17 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %6, <2 x i64>* %18
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi8ptr_unaligned(<12 x i8*>*, <12 x i8*>) {
+entry:
+ store volatile <12 x i8*> %1, <12 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi8ptr_unaligned(<12 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %5
+; CHECK-NEXT: %6 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %7
+; CHECK-NEXT: %8 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %3, <4 x i8*>* %9
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xfloat_unaligned(<12 x float>*, <12 x float>) {
+entry:
+ store volatile <12 x float> %1, <12 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xfloat_unaligned(<12 x float>*, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %5
+; CHECK-NEXT: %6 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %7
+; CHECK-NEXT: %8 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %3, <4 x float>* %9
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xdouble_unaligned(<12 x double>*, <12 x double>) {
+entry:
+ store volatile <12 x double> %1, <12 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xdouble_unaligned(<12 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %8
+; CHECK-NEXT: %9 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %10
+; CHECK-NEXT: %11 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %12
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %14
+; CHECK-NEXT: %15 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %5, <2 x double>* %16
+; CHECK-NEXT: %17 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %6, <2 x double>* %18
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi8_unaligned(<16 x i8>*, <16 x i8>) {
+entry:
+ store volatile <16 x i8> %1, <16 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi8_unaligned(<16 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <16 x i8> %1, <16 x i8>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi16_unaligned(<16 x i16>*, <16 x i16>) {
+entry:
+ store volatile <16 x i16> %1, <16 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi16_unaligned(<16 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %4
+; CHECK-NEXT: %5 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = bitcast i16* %5 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %2, <8 x i16>* %6
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi32_unaligned(<16 x i32>*, <16 x i32>) {
+entry:
+ store volatile <16 x i32> %1, <16 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi32_unaligned(<16 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %6
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %8
+; CHECK-NEXT: %9 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i32* %9 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %3, <4 x i32>* %10
+; CHECK-NEXT: %11 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i32* %11 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %4, <4 x i32>* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi64_unaligned(<16 x i64>*, <16 x i64>) {
+entry:
+ store volatile <16 x i64> %1, <16 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi64_unaligned(<16 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %10
+; CHECK-NEXT: %11 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %12
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %14
+; CHECK-NEXT: %15 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %16
+; CHECK-NEXT: %17 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %5, <2 x i64>* %18
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %6, <2 x i64>* %20
+; CHECK-NEXT: %21 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %7, <2 x i64>* %22
+; CHECK-NEXT: %23 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %8, <2 x i64>* %24
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi8ptr_unaligned(<16 x i8*>*, <16 x i8*>) {
+entry:
+ store volatile <16 x i8*> %1, <16 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi8ptr_unaligned(<16 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %6
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %8
+; CHECK-NEXT: %9 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i8** %9 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %3, <4 x i8*>* %10
+; CHECK-NEXT: %11 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i8** %11 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %4, <4 x i8*>* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xfloat_unaligned(<16 x float>*, <16 x float>) {
+entry:
+ store volatile <16 x float> %1, <16 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xfloat_unaligned(<16 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %6
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %8
+; CHECK-NEXT: %9 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast float* %9 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %3, <4 x float>* %10
+; CHECK-NEXT: %11 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast float* %11 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %4, <4 x float>* %12
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xdouble_unaligned(<16 x double>*, <16 x double>) {
+entry:
+ store volatile <16 x double> %1, <16 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xdouble_unaligned(<16 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %10
+; CHECK-NEXT: %11 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %12
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %14
+; CHECK-NEXT: %15 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %16
+; CHECK-NEXT: %17 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %5, <2 x double>* %18
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %6, <2 x double>* %20
+; CHECK-NEXT: %21 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %7, <2 x double>* %22
+; CHECK-NEXT: %23 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %8, <2 x double>* %24
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi8_unaligned(<20 x i8>*, <20 x i8>) {
+entry:
+ store volatile <20 x i8> %1, <20 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi8_unaligned(<20 x i8>*, <16 x i8>, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8* %3 to <16 x i8>*
+; CHECK-NEXT: store volatile <16 x i8> %1, <16 x i8>* %4
+; CHECK-NEXT: %5 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %6 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: store volatile i8 %6, i8* %5
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: store volatile i8 %8, i8* %7
+; CHECK-NEXT: %9 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: store volatile i8 %10, i8* %9
+; CHECK-NEXT: %11 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %12 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: store volatile i8 %12, i8* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi16_unaligned(<20 x i16>*, <20 x i16>) {
+entry:
+ store volatile <20 x i16> %1, <20 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi16_unaligned(<20 x i16>*, <8 x i16>, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %5
+; CHECK-NEXT: %6 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %7 = bitcast i16* %6 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %2, <8 x i16>* %7
+; CHECK-NEXT: %8 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %9 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: store volatile i16 %9, i16* %8
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: store volatile i16 %11, i16* %10
+; CHECK-NEXT: %12 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %13 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: store volatile i16 %13, i16* %12
+; CHECK-NEXT: %14 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %15 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: store volatile i16 %15, i16* %14
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi32_unaligned(<20 x i32>*, <20 x i32>) {
+entry:
+ store volatile <20 x i32> %1, <20 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi32_unaligned(<20 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %7
+; CHECK-NEXT: %8 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %9
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %3, <4 x i32>* %11
+; CHECK-NEXT: %12 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i32* %12 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %4, <4 x i32>* %13
+; CHECK-NEXT: %14 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i32* %14 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %5, <4 x i32>* %15
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi64_unaligned(<20 x i64>*, <20 x i64>) {
+entry:
+ store volatile <20 x i64> %1, <20 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi64_unaligned(<20 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %12
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %14
+; CHECK-NEXT: %15 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %16
+; CHECK-NEXT: %17 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %18
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %5, <2 x i64>* %20
+; CHECK-NEXT: %21 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %6, <2 x i64>* %22
+; CHECK-NEXT: %23 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %7, <2 x i64>* %24
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %8, <2 x i64>* %26
+; CHECK-NEXT: %27 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast i64* %27 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %9, <2 x i64>* %28
+; CHECK-NEXT: %29 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast i64* %29 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %10, <2 x i64>* %30
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi8ptr_unaligned(<20 x i8*>*, <20 x i8*>) {
+entry:
+ store volatile <20 x i8*> %1, <20 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi8ptr_unaligned(<20 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %7
+; CHECK-NEXT: %8 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %9
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %3, <4 x i8*>* %11
+; CHECK-NEXT: %12 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i8** %12 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %4, <4 x i8*>* %13
+; CHECK-NEXT: %14 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i8** %14 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %5, <4 x i8*>* %15
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xfloat_unaligned(<20 x float>*, <20 x float>) {
+entry:
+ store volatile <20 x float> %1, <20 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xfloat_unaligned(<20 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %7
+; CHECK-NEXT: %8 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %9
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %3, <4 x float>* %11
+; CHECK-NEXT: %12 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast float* %12 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %4, <4 x float>* %13
+; CHECK-NEXT: %14 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast float* %14 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %5, <4 x float>* %15
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xdouble_unaligned(<20 x double>*, <20 x double>) {
+entry:
+ store volatile <20 x double> %1, <20 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xdouble_unaligned(<20 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %12
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %14
+; CHECK-NEXT: %15 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %16
+; CHECK-NEXT: %17 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %18
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %5, <2 x double>* %20
+; CHECK-NEXT: %21 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %6, <2 x double>* %22
+; CHECK-NEXT: %23 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %7, <2 x double>* %24
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %8, <2 x double>* %26
+; CHECK-NEXT: %27 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast double* %27 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %9, <2 x double>* %28
+; CHECK-NEXT: %29 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast double* %29 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %10, <2 x double>* %30
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi8_align_1(<2 x i8>*, <2 x i8>) {
+entry:
+ store volatile <2 x i8> %1, <2 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi8_align_1(<2 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi16_align_1(<2 x i16>*, <2 x i16>) {
+entry:
+ store volatile <2 x i16> %1, <2 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi16_align_1(<2 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store volatile i16 %3, i16* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store volatile i16 %5, i16* %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi32_align_1(<2 x i32>*, <2 x i32>) {
+entry:
+ store volatile <2 x i32> %1, <2 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi32_align_1(<2 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: store volatile i32 %3, i32* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: store volatile i32 %5, i32* %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi64_align_1(<2 x i64>*, <2 x i64>) {
+entry:
+ store volatile <2 x i64> %1, <2 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi64_align_1(<2 x i64>*, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi8ptr_align_1(<2 x i8*>*, <2 x i8*>) {
+entry:
+ store volatile <2 x i8*> %1, <2 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi8ptr_align_1(<2 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i8*> %1, i32 0
+; CHECK-NEXT: store volatile i8* %3, i8** %2, align 1
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i8*> %1, i32 1
+; CHECK-NEXT: store volatile i8* %5, i8** %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xfloat_align_1(<2 x float>*, <2 x float>) {
+entry:
+ store volatile <2 x float> %1, <2 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xfloat_align_1(<2 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: store volatile float %3, float* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: store volatile float %5, float* %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xdouble_align_1(<2 x double>*, <2 x double>) {
+entry:
+ store volatile <2 x double> %1, <2 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xdouble_align_1(<2 x double>*, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi8_align_1(<4 x i8>*, <4 x i8>) {
+entry:
+ store volatile <4 x i8> %1, <4 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi8_align_1(<4 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6, align 1
+; CHECK-NEXT: %8 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi16_align_1(<4 x i16>*, <4 x i16>) {
+entry:
+ store volatile <4 x i16> %1, <4 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi16_align_1(<4 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store volatile i16 %3, i16* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store volatile i16 %5, i16* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store volatile i16 %7, i16* %6, align 1
+; CHECK-NEXT: %8 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store volatile i16 %9, i16* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi32_align_1(<4 x i32>*, <4 x i32>) {
+entry:
+ store volatile <4 x i32> %1, <4 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi32_align_1(<4 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi64_align_1(<4 x i64>*, <4 x i64>) {
+entry:
+ store volatile <4 x i64> %1, <4 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi64_align_1(<4 x i64>*, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i64* %3 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %6, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi8ptr_align_1(<4 x i8*>*, <4 x i8*>) {
+entry:
+ store volatile <4 x i8*> %1, <4 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi8ptr_align_1(<4 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xfloat_align_1(<4 x float>*, <4 x float>) {
+entry:
+ store volatile <4 x float> %1, <4 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xfloat_align_1(<4 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xdouble_align_1(<4 x double>*, <4 x double>) {
+entry:
+ store volatile <4 x double> %1, <4 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xdouble_align_1(<4 x double>*, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast double* %3 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %6, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi8_align_1(<6 x i8>*, <6 x i8>) {
+entry:
+ store volatile <6 x i8> %1, <6 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi8_align_1(<6 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6, align 1
+; CHECK-NEXT: %8 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store volatile i8 %11, i8* %10, align 1
+; CHECK-NEXT: %12 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store volatile i8 %13, i8* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi16_align_1(<6 x i16>*, <6 x i16>) {
+entry:
+ store volatile <6 x i16> %1, <6 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi16_align_1(<6 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store volatile i16 %3, i16* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store volatile i16 %5, i16* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store volatile i16 %7, i16* %6, align 1
+; CHECK-NEXT: %8 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store volatile i16 %9, i16* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: store volatile i16 %11, i16* %10, align 1
+; CHECK-NEXT: %12 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: store volatile i16 %13, i16* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi32_align_1(<6 x i32>*, <6 x i32>) {
+entry:
+ store volatile <6 x i32> %1, <6 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi32_align_1(<6 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: store volatile i32 %6, i32* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: store volatile i32 %8, i32* %7, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi64_align_1(<6 x i64>*, <6 x i64>) {
+entry:
+ store volatile <6 x i64> %1, <6 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi64_align_1(<6 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %5, align 1
+; CHECK-NEXT: %6 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast i64* %6 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i64* %8 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %9, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi8ptr_align_1(<6 x i8*>*, <6 x i8*>) {
+entry:
+ store volatile <6 x i8*> %1, <6 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi8ptr_align_1(<6 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i8*> %2, i32 0
+; CHECK-NEXT: store volatile i8* %6, i8** %5, align 1
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i8*> %2, i32 1
+; CHECK-NEXT: store volatile i8* %8, i8** %7, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xfloat_align_1(<6 x float>*, <6 x float>) {
+entry:
+ store volatile <6 x float> %1, <6 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xfloat_align_1(<6 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: store volatile float %6, float* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: store volatile float %8, float* %7, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xdouble_align_1(<6 x double>*, <6 x double>) {
+entry:
+ store volatile <6 x double> %1, <6 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xdouble_align_1(<6 x double>*, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %5, align 1
+; CHECK-NEXT: %6 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast double* %6 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast double* %8 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %9, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi8_align_1(<8 x i8>*, <8 x i8>) {
+entry:
+ store volatile <8 x i8> %1, <8 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi8_align_1(<8 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6, align 1
+; CHECK-NEXT: %8 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store volatile i8 %11, i8* %10, align 1
+; CHECK-NEXT: %12 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store volatile i8 %13, i8* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store volatile i8 %15, i8* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store volatile i8 %17, i8* %16, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi16_align_1(<8 x i16>*, <8 x i16>) {
+entry:
+ store volatile <8 x i16> %1, <8 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi16_align_1(<8 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi32_align_1(<8 x i32>*, <8 x i32>) {
+entry:
+ store volatile <8 x i32> %1, <8 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi32_align_1(<8 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %6, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi64_align_1(<8 x i64>*, <8 x i64>) {
+entry:
+ store volatile <8 x i64> %1, <8 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi64_align_1(<8 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %6, align 1
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi8ptr_align_1(<8 x i8*>*, <8 x i8*>) {
+entry:
+ store volatile <8 x i8*> %1, <8 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi8ptr_align_1(<8 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %6, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xfloat_align_1(<8 x float>*, <8 x float>) {
+entry:
+ store volatile <8 x float> %1, <8 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xfloat_align_1(<8 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %6, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xdouble_align_1(<8 x double>*, <8 x double>) {
+entry:
+ store volatile <8 x double> %1, <8 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xdouble_align_1(<8 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %6, align 1
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi8_align_1(<12 x i8>*, <12 x i8>) {
+entry:
+ store volatile <12 x i8> %1, <12 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi8_align_1(<12 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6, align 1
+; CHECK-NEXT: %8 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store volatile i8 %11, i8* %10, align 1
+; CHECK-NEXT: %12 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store volatile i8 %13, i8* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store volatile i8 %15, i8* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store volatile i8 %17, i8* %16, align 1
+; CHECK-NEXT: %18 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %19 = extractelement <16 x i8> %1, i32 8
+; CHECK-NEXT: store volatile i8 %19, i8* %18, align 1
+; CHECK-NEXT: %20 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 9
+; CHECK-NEXT: store volatile i8 %21, i8* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %23 = extractelement <16 x i8> %1, i32 10
+; CHECK-NEXT: store volatile i8 %23, i8* %22, align 1
+; CHECK-NEXT: %24 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %25 = extractelement <16 x i8> %1, i32 11
+; CHECK-NEXT: store volatile i8 %25, i8* %24, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi16_align_1(<12 x i16>*, <12 x i16>) {
+entry:
+ store volatile <12 x i16> %1, <12 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi16_align_1(<12 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: store volatile i16 %6, i16* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: store volatile i16 %8, i16* %7, align 1
+; CHECK-NEXT: %9 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %10 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: store volatile i16 %10, i16* %9, align 1
+; CHECK-NEXT: %11 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %12 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: store volatile i16 %12, i16* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi32_align_1(<12 x i32>*, <12 x i32>) {
+entry:
+ store volatile <12 x i32> %1, <12 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi32_align_1(<12 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %5, align 1
+; CHECK-NEXT: %6 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %3, <4 x i32>* %9, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi64_align_1(<12 x i64>*, <12 x i64>) {
+entry:
+ store volatile <12 x i64> %1, <12 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi64_align_1(<12 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %12, align 1
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %14, align 1
+; CHECK-NEXT: %15 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %5, <2 x i64>* %16, align 1
+; CHECK-NEXT: %17 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %6, <2 x i64>* %18, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi8ptr_align_1(<12 x i8*>*, <12 x i8*>) {
+entry:
+ store volatile <12 x i8*> %1, <12 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi8ptr_align_1(<12 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %5, align 1
+; CHECK-NEXT: %6 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %3, <4 x i8*>* %9, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xfloat_align_1(<12 x float>*, <12 x float>) {
+entry:
+ store volatile <12 x float> %1, <12 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xfloat_align_1(<12 x float>*, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %5, align 1
+; CHECK-NEXT: %6 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %3, <4 x float>* %9, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xdouble_align_1(<12 x double>*, <12 x double>) {
+entry:
+ store volatile <12 x double> %1, <12 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xdouble_align_1(<12 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %12, align 1
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %14, align 1
+; CHECK-NEXT: %15 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %5, <2 x double>* %16, align 1
+; CHECK-NEXT: %17 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %6, <2 x double>* %18, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi8_align_1(<16 x i8>*, <16 x i8>) {
+entry:
+ store volatile <16 x i8> %1, <16 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi8_align_1(<16 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <16 x i8> %1, <16 x i8>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi16_align_1(<16 x i16>*, <16 x i16>) {
+entry:
+ store volatile <16 x i16> %1, <16 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi16_align_1(<16 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = bitcast i16* %5 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %2, <8 x i16>* %6, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi32_align_1(<16 x i32>*, <16 x i32>) {
+entry:
+ store volatile <16 x i32> %1, <16 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi32_align_1(<16 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %6, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i32* %9 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %3, <4 x i32>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i32* %11 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %4, <4 x i32>* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi64_align_1(<16 x i64>*, <16 x i64>) {
+entry:
+ store volatile <16 x i64> %1, <16 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi64_align_1(<16 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %12, align 1
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %14, align 1
+; CHECK-NEXT: %15 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %16, align 1
+; CHECK-NEXT: %17 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %5, <2 x i64>* %18, align 1
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %6, <2 x i64>* %20, align 1
+; CHECK-NEXT: %21 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %7, <2 x i64>* %22, align 1
+; CHECK-NEXT: %23 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %8, <2 x i64>* %24, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi8ptr_align_1(<16 x i8*>*, <16 x i8*>) {
+entry:
+ store volatile <16 x i8*> %1, <16 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi8ptr_align_1(<16 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %6, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i8** %9 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %3, <4 x i8*>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i8** %11 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %4, <4 x i8*>* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xfloat_align_1(<16 x float>*, <16 x float>) {
+entry:
+ store volatile <16 x float> %1, <16 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xfloat_align_1(<16 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %6, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %8, align 1
+; CHECK-NEXT: %9 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast float* %9 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %3, <4 x float>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast float* %11 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %4, <4 x float>* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xdouble_align_1(<16 x double>*, <16 x double>) {
+entry:
+ store volatile <16 x double> %1, <16 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xdouble_align_1(<16 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %10, align 1
+; CHECK-NEXT: %11 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %12, align 1
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %14, align 1
+; CHECK-NEXT: %15 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %16, align 1
+; CHECK-NEXT: %17 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %5, <2 x double>* %18, align 1
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %6, <2 x double>* %20, align 1
+; CHECK-NEXT: %21 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %7, <2 x double>* %22, align 1
+; CHECK-NEXT: %23 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %8, <2 x double>* %24, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi8_align_1(<20 x i8>*, <20 x i8>) {
+entry:
+ store volatile <20 x i8> %1, <20 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi8_align_1(<20 x i8>*, <16 x i8>, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8* %3 to <16 x i8>*
+; CHECK-NEXT: store volatile <16 x i8> %1, <16 x i8>* %4, align 1
+; CHECK-NEXT: %5 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %6 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: store volatile i8 %6, i8* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: store volatile i8 %8, i8* %7, align 1
+; CHECK-NEXT: %9 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: store volatile i8 %10, i8* %9, align 1
+; CHECK-NEXT: %11 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %12 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: store volatile i8 %12, i8* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi16_align_1(<20 x i16>*, <20 x i16>) {
+entry:
+ store volatile <20 x i16> %1, <20 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi16_align_1(<20 x i16>*, <8 x i16>, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %5, align 1
+; CHECK-NEXT: %6 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %7 = bitcast i16* %6 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %2, <8 x i16>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %9 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: store volatile i16 %9, i16* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: store volatile i16 %11, i16* %10, align 1
+; CHECK-NEXT: %12 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %13 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: store volatile i16 %13, i16* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %15 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: store volatile i16 %15, i16* %14, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi32_align_1(<20 x i32>*, <20 x i32>) {
+entry:
+ store volatile <20 x i32> %1, <20 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi32_align_1(<20 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %9, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %3, <4 x i32>* %11, align 1
+; CHECK-NEXT: %12 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i32* %12 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %4, <4 x i32>* %13, align 1
+; CHECK-NEXT: %14 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i32* %14 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %5, <4 x i32>* %15, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi64_align_1(<20 x i64>*, <20 x i64>) {
+entry:
+ store volatile <20 x i64> %1, <20 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi64_align_1(<20 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %12, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %14, align 1
+; CHECK-NEXT: %15 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %16, align 1
+; CHECK-NEXT: %17 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %18, align 1
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %5, <2 x i64>* %20, align 1
+; CHECK-NEXT: %21 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %6, <2 x i64>* %22, align 1
+; CHECK-NEXT: %23 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %7, <2 x i64>* %24, align 1
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %8, <2 x i64>* %26, align 1
+; CHECK-NEXT: %27 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast i64* %27 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %9, <2 x i64>* %28, align 1
+; CHECK-NEXT: %29 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast i64* %29 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %10, <2 x i64>* %30, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi8ptr_align_1(<20 x i8*>*, <20 x i8*>) {
+entry:
+ store volatile <20 x i8*> %1, <20 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi8ptr_align_1(<20 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %9, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %3, <4 x i8*>* %11, align 1
+; CHECK-NEXT: %12 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i8** %12 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %4, <4 x i8*>* %13, align 1
+; CHECK-NEXT: %14 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i8** %14 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %5, <4 x i8*>* %15, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xfloat_align_1(<20 x float>*, <20 x float>) {
+entry:
+ store volatile <20 x float> %1, <20 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xfloat_align_1(<20 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %7, align 1
+; CHECK-NEXT: %8 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %9, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %3, <4 x float>* %11, align 1
+; CHECK-NEXT: %12 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast float* %12 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %4, <4 x float>* %13, align 1
+; CHECK-NEXT: %14 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast float* %14 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %5, <4 x float>* %15, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xdouble_align_1(<20 x double>*, <20 x double>) {
+entry:
+ store volatile <20 x double> %1, <20 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xdouble_align_1(<20 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %12, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %14, align 1
+; CHECK-NEXT: %15 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %16, align 1
+; CHECK-NEXT: %17 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %18, align 1
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %5, <2 x double>* %20, align 1
+; CHECK-NEXT: %21 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %6, <2 x double>* %22, align 1
+; CHECK-NEXT: %23 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %7, <2 x double>* %24, align 1
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %8, <2 x double>* %26, align 1
+; CHECK-NEXT: %27 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast double* %27 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %9, <2 x double>* %28, align 1
+; CHECK-NEXT: %29 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast double* %29 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %10, <2 x double>* %30, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi8_align_8(<2 x i8>*, <2 x i8>) {
+entry:
+ store volatile <2 x i8> %1, <2 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi8_align_8(<2 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi16_align_8(<2 x i16>*, <2 x i16>) {
+entry:
+ store volatile <2 x i16> %1, <2 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi16_align_8(<2 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store volatile i16 %3, i16* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store volatile i16 %5, i16* %4, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi32_align_8(<2 x i32>*, <2 x i32>) {
+entry:
+ store volatile <2 x i32> %1, <2 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi32_align_8(<2 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: store volatile i32 %3, i32* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: store volatile i32 %5, i32* %4, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi64_align_8(<2 x i64>*, <2 x i64>) {
+entry:
+ store volatile <2 x i64> %1, <2 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi64_align_8(<2 x i64>*, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi8ptr_align_8(<2 x i8*>*, <2 x i8*>) {
+entry:
+ store volatile <2 x i8*> %1, <2 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi8ptr_align_8(<2 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i8*> %1, i32 0
+; CHECK-NEXT: store volatile i8* %3, i8** %2, align 8
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i8*> %1, i32 1
+; CHECK-NEXT: store volatile i8* %5, i8** %4, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xfloat_align_8(<2 x float>*, <2 x float>) {
+entry:
+ store volatile <2 x float> %1, <2 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xfloat_align_8(<2 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: store volatile float %3, float* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: store volatile float %5, float* %4, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xdouble_align_8(<2 x double>*, <2 x double>) {
+entry:
+ store volatile <2 x double> %1, <2 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xdouble_align_8(<2 x double>*, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi8_align_8(<4 x i8>*, <4 x i8>) {
+entry:
+ store volatile <4 x i8> %1, <4 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi8_align_8(<4 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi16_align_8(<4 x i16>*, <4 x i16>) {
+entry:
+ store volatile <4 x i16> %1, <4 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi16_align_8(<4 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store volatile i16 %3, i16* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store volatile i16 %5, i16* %4, align 2
+; CHECK-NEXT: %6 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store volatile i16 %7, i16* %6, align 4
+; CHECK-NEXT: %8 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store volatile i16 %9, i16* %8, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi32_align_8(<4 x i32>*, <4 x i32>) {
+entry:
+ store volatile <4 x i32> %1, <4 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi32_align_8(<4 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi64_align_8(<4 x i64>*, <4 x i64>) {
+entry:
+ store volatile <4 x i64> %1, <4 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi64_align_8(<4 x i64>*, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i64* %3 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %6, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi8ptr_align_8(<4 x i8*>*, <4 x i8*>) {
+entry:
+ store volatile <4 x i8*> %1, <4 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi8ptr_align_8(<4 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xfloat_align_8(<4 x float>*, <4 x float>) {
+entry:
+ store volatile <4 x float> %1, <4 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xfloat_align_8(<4 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xdouble_align_8(<4 x double>*, <4 x double>) {
+entry:
+ store volatile <4 x double> %1, <4 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xdouble_align_8(<4 x double>*, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast double* %3 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %6, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi8_align_8(<6 x i8>*, <6 x i8>) {
+entry:
+ store volatile <6 x i8> %1, <6 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi8_align_8(<6 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store volatile i8 %11, i8* %10, align 4
+; CHECK-NEXT: %12 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store volatile i8 %13, i8* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi16_align_8(<6 x i16>*, <6 x i16>) {
+entry:
+ store volatile <6 x i16> %1, <6 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi16_align_8(<6 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store volatile i16 %3, i16* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store volatile i16 %5, i16* %4, align 2
+; CHECK-NEXT: %6 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store volatile i16 %7, i16* %6, align 4
+; CHECK-NEXT: %8 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store volatile i16 %9, i16* %8, align 2
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: store volatile i16 %11, i16* %10, align 8
+; CHECK-NEXT: %12 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: store volatile i16 %13, i16* %12, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi32_align_8(<6 x i32>*, <6 x i32>) {
+entry:
+ store volatile <6 x i32> %1, <6 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi32_align_8(<6 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: store volatile i32 %6, i32* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: store volatile i32 %8, i32* %7, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi64_align_8(<6 x i64>*, <6 x i64>) {
+entry:
+ store volatile <6 x i64> %1, <6 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi64_align_8(<6 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %5, align 8
+; CHECK-NEXT: %6 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast i64* %6 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i64* %8 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %9, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi8ptr_align_8(<6 x i8*>*, <6 x i8*>) {
+entry:
+ store volatile <6 x i8*> %1, <6 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi8ptr_align_8(<6 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i8*> %2, i32 0
+; CHECK-NEXT: store volatile i8* %6, i8** %5, align 8
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i8*> %2, i32 1
+; CHECK-NEXT: store volatile i8* %8, i8** %7, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xfloat_align_8(<6 x float>*, <6 x float>) {
+entry:
+ store volatile <6 x float> %1, <6 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xfloat_align_8(<6 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: store volatile float %6, float* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: store volatile float %8, float* %7, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xdouble_align_8(<6 x double>*, <6 x double>) {
+entry:
+ store volatile <6 x double> %1, <6 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xdouble_align_8(<6 x double>*, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %5, align 8
+; CHECK-NEXT: %6 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast double* %6 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast double* %8 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %9, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi8_align_8(<8 x i8>*, <8 x i8>) {
+entry:
+ store volatile <8 x i8> %1, <8 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi8_align_8(<8 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store volatile i8 %11, i8* %10, align 4
+; CHECK-NEXT: %12 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store volatile i8 %13, i8* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store volatile i8 %15, i8* %14, align 2
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store volatile i8 %17, i8* %16, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi16_align_8(<8 x i16>*, <8 x i16>) {
+entry:
+ store volatile <8 x i16> %1, <8 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi16_align_8(<8 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi32_align_8(<8 x i32>*, <8 x i32>) {
+entry:
+ store volatile <8 x i32> %1, <8 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi32_align_8(<8 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %6, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi64_align_8(<8 x i64>*, <8 x i64>) {
+entry:
+ store volatile <8 x i64> %1, <8 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi64_align_8(<8 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %6, align 8
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %12, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi8ptr_align_8(<8 x i8*>*, <8 x i8*>) {
+entry:
+ store volatile <8 x i8*> %1, <8 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi8ptr_align_8(<8 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %6, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xfloat_align_8(<8 x float>*, <8 x float>) {
+entry:
+ store volatile <8 x float> %1, <8 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xfloat_align_8(<8 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %6, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xdouble_align_8(<8 x double>*, <8 x double>) {
+entry:
+ store volatile <8 x double> %1, <8 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xdouble_align_8(<8 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %6, align 8
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %12, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi8_align_8(<12 x i8>*, <12 x i8>) {
+entry:
+ store volatile <12 x i8> %1, <12 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi8_align_8(<12 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store volatile i8 %11, i8* %10, align 4
+; CHECK-NEXT: %12 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store volatile i8 %13, i8* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store volatile i8 %15, i8* %14, align 2
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store volatile i8 %17, i8* %16, align 1
+; CHECK-NEXT: %18 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %19 = extractelement <16 x i8> %1, i32 8
+; CHECK-NEXT: store volatile i8 %19, i8* %18, align 8
+; CHECK-NEXT: %20 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 9
+; CHECK-NEXT: store volatile i8 %21, i8* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %23 = extractelement <16 x i8> %1, i32 10
+; CHECK-NEXT: store volatile i8 %23, i8* %22, align 2
+; CHECK-NEXT: %24 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %25 = extractelement <16 x i8> %1, i32 11
+; CHECK-NEXT: store volatile i8 %25, i8* %24, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi16_align_8(<12 x i16>*, <12 x i16>) {
+entry:
+ store volatile <12 x i16> %1, <12 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi16_align_8(<12 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: store volatile i16 %6, i16* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: store volatile i16 %8, i16* %7, align 2
+; CHECK-NEXT: %9 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %10 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: store volatile i16 %10, i16* %9, align 4
+; CHECK-NEXT: %11 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %12 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: store volatile i16 %12, i16* %11, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi32_align_8(<12 x i32>*, <12 x i32>) {
+entry:
+ store volatile <12 x i32> %1, <12 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi32_align_8(<12 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %5, align 8
+; CHECK-NEXT: %6 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %3, <4 x i32>* %9, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi64_align_8(<12 x i64>*, <12 x i64>) {
+entry:
+ store volatile <12 x i64> %1, <12 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi64_align_8(<12 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %12, align 8
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %14, align 8
+; CHECK-NEXT: %15 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %5, <2 x i64>* %16, align 8
+; CHECK-NEXT: %17 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %6, <2 x i64>* %18, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi8ptr_align_8(<12 x i8*>*, <12 x i8*>) {
+entry:
+ store volatile <12 x i8*> %1, <12 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi8ptr_align_8(<12 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %5, align 8
+; CHECK-NEXT: %6 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %3, <4 x i8*>* %9, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xfloat_align_8(<12 x float>*, <12 x float>) {
+entry:
+ store volatile <12 x float> %1, <12 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xfloat_align_8(<12 x float>*, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %5, align 8
+; CHECK-NEXT: %6 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %3, <4 x float>* %9, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xdouble_align_8(<12 x double>*, <12 x double>) {
+entry:
+ store volatile <12 x double> %1, <12 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xdouble_align_8(<12 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %12, align 8
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %14, align 8
+; CHECK-NEXT: %15 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %5, <2 x double>* %16, align 8
+; CHECK-NEXT: %17 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %6, <2 x double>* %18, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi8_align_8(<16 x i8>*, <16 x i8>) {
+entry:
+ store volatile <16 x i8> %1, <16 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi8_align_8(<16 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <16 x i8> %1, <16 x i8>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi16_align_8(<16 x i16>*, <16 x i16>) {
+entry:
+ store volatile <16 x i16> %1, <16 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi16_align_8(<16 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = bitcast i16* %5 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %2, <8 x i16>* %6, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi32_align_8(<16 x i32>*, <16 x i32>) {
+entry:
+ store volatile <16 x i32> %1, <16 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi32_align_8(<16 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %6, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i32* %9 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %3, <4 x i32>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i32* %11 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %4, <4 x i32>* %12, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi64_align_8(<16 x i64>*, <16 x i64>) {
+entry:
+ store volatile <16 x i64> %1, <16 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi64_align_8(<16 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %12, align 8
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %14, align 8
+; CHECK-NEXT: %15 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %16, align 8
+; CHECK-NEXT: %17 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %5, <2 x i64>* %18, align 8
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %6, <2 x i64>* %20, align 8
+; CHECK-NEXT: %21 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %7, <2 x i64>* %22, align 8
+; CHECK-NEXT: %23 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %8, <2 x i64>* %24, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi8ptr_align_8(<16 x i8*>*, <16 x i8*>) {
+entry:
+ store volatile <16 x i8*> %1, <16 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi8ptr_align_8(<16 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %6, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i8** %9 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %3, <4 x i8*>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i8** %11 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %4, <4 x i8*>* %12, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xfloat_align_8(<16 x float>*, <16 x float>) {
+entry:
+ store volatile <16 x float> %1, <16 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xfloat_align_8(<16 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %6, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %8, align 8
+; CHECK-NEXT: %9 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast float* %9 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %3, <4 x float>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast float* %11 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %4, <4 x float>* %12, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xdouble_align_8(<16 x double>*, <16 x double>) {
+entry:
+ store volatile <16 x double> %1, <16 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xdouble_align_8(<16 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %10, align 8
+; CHECK-NEXT: %11 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %12, align 8
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %14, align 8
+; CHECK-NEXT: %15 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %16, align 8
+; CHECK-NEXT: %17 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %5, <2 x double>* %18, align 8
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %6, <2 x double>* %20, align 8
+; CHECK-NEXT: %21 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %7, <2 x double>* %22, align 8
+; CHECK-NEXT: %23 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %8, <2 x double>* %24, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi8_align_8(<20 x i8>*, <20 x i8>) {
+entry:
+ store volatile <20 x i8> %1, <20 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi8_align_8(<20 x i8>*, <16 x i8>, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8* %3 to <16 x i8>*
+; CHECK-NEXT: store volatile <16 x i8> %1, <16 x i8>* %4, align 8
+; CHECK-NEXT: %5 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %6 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: store volatile i8 %6, i8* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: store volatile i8 %8, i8* %7, align 1
+; CHECK-NEXT: %9 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: store volatile i8 %10, i8* %9, align 2
+; CHECK-NEXT: %11 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %12 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: store volatile i8 %12, i8* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi16_align_8(<20 x i16>*, <20 x i16>) {
+entry:
+ store volatile <20 x i16> %1, <20 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi16_align_8(<20 x i16>*, <8 x i16>, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %5, align 8
+; CHECK-NEXT: %6 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %7 = bitcast i16* %6 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %2, <8 x i16>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %9 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: store volatile i16 %9, i16* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: store volatile i16 %11, i16* %10, align 2
+; CHECK-NEXT: %12 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %13 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: store volatile i16 %13, i16* %12, align 4
+; CHECK-NEXT: %14 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %15 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: store volatile i16 %15, i16* %14, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi32_align_8(<20 x i32>*, <20 x i32>) {
+entry:
+ store volatile <20 x i32> %1, <20 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi32_align_8(<20 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %9, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %3, <4 x i32>* %11, align 8
+; CHECK-NEXT: %12 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i32* %12 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %4, <4 x i32>* %13, align 8
+; CHECK-NEXT: %14 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i32* %14 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %5, <4 x i32>* %15, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi64_align_8(<20 x i64>*, <20 x i64>) {
+entry:
+ store volatile <20 x i64> %1, <20 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi64_align_8(<20 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %12, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %14, align 8
+; CHECK-NEXT: %15 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %16, align 8
+; CHECK-NEXT: %17 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %18, align 8
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %5, <2 x i64>* %20, align 8
+; CHECK-NEXT: %21 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %6, <2 x i64>* %22, align 8
+; CHECK-NEXT: %23 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %7, <2 x i64>* %24, align 8
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %8, <2 x i64>* %26, align 8
+; CHECK-NEXT: %27 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast i64* %27 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %9, <2 x i64>* %28, align 8
+; CHECK-NEXT: %29 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast i64* %29 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %10, <2 x i64>* %30, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi8ptr_align_8(<20 x i8*>*, <20 x i8*>) {
+entry:
+ store volatile <20 x i8*> %1, <20 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi8ptr_align_8(<20 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %9, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %3, <4 x i8*>* %11, align 8
+; CHECK-NEXT: %12 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i8** %12 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %4, <4 x i8*>* %13, align 8
+; CHECK-NEXT: %14 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i8** %14 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %5, <4 x i8*>* %15, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xfloat_align_8(<20 x float>*, <20 x float>) {
+entry:
+ store volatile <20 x float> %1, <20 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xfloat_align_8(<20 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %7, align 8
+; CHECK-NEXT: %8 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %9, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %3, <4 x float>* %11, align 8
+; CHECK-NEXT: %12 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast float* %12 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %4, <4 x float>* %13, align 8
+; CHECK-NEXT: %14 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast float* %14 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %5, <4 x float>* %15, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xdouble_align_8(<20 x double>*, <20 x double>) {
+entry:
+ store volatile <20 x double> %1, <20 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xdouble_align_8(<20 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %12, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %14, align 8
+; CHECK-NEXT: %15 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %16, align 8
+; CHECK-NEXT: %17 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %18, align 8
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %5, <2 x double>* %20, align 8
+; CHECK-NEXT: %21 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %6, <2 x double>* %22, align 8
+; CHECK-NEXT: %23 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %7, <2 x double>* %24, align 8
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %8, <2 x double>* %26, align 8
+; CHECK-NEXT: %27 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast double* %27 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %9, <2 x double>* %28, align 8
+; CHECK-NEXT: %29 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast double* %29 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %10, <2 x double>* %30, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi8_align_32(<2 x i8>*, <2 x i8>) {
+entry:
+ store volatile <2 x i8> %1, <2 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi8_align_32(<2 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi16_align_32(<2 x i16>*, <2 x i16>) {
+entry:
+ store volatile <2 x i16> %1, <2 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi16_align_32(<2 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store volatile i16 %3, i16* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store volatile i16 %5, i16* %4, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi32_align_32(<2 x i32>*, <2 x i32>) {
+entry:
+ store volatile <2 x i32> %1, <2 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi32_align_32(<2 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: store volatile i32 %3, i32* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: store volatile i32 %5, i32* %4, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi64_align_32(<2 x i64>*, <2 x i64>) {
+entry:
+ store volatile <2 x i64> %1, <2 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi64_align_32(<2 x i64>*, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xi8ptr_align_32(<2 x i8*>*, <2 x i8*>) {
+entry:
+ store volatile <2 x i8*> %1, <2 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xi8ptr_align_32(<2 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x i8*> %1, i32 0
+; CHECK-NEXT: store volatile i8* %3, i8** %2, align 32
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x i8*> %1, i32 1
+; CHECK-NEXT: store volatile i8* %5, i8** %4, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xfloat_align_32(<2 x float>*, <2 x float>) {
+entry:
+ store volatile <2 x float> %1, <2 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xfloat_align_32(<2 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: store volatile float %3, float* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: store volatile float %5, float* %4, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_2xdouble_align_32(<2 x double>*, <2 x double>) {
+entry:
+ store volatile <2 x double> %1, <2 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_2xdouble_align_32(<2 x double>*, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi8_align_32(<4 x i8>*, <4 x i8>) {
+entry:
+ store volatile <4 x i8> %1, <4 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi8_align_32(<4 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi16_align_32(<4 x i16>*, <4 x i16>) {
+entry:
+ store volatile <4 x i16> %1, <4 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi16_align_32(<4 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store volatile i16 %3, i16* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store volatile i16 %5, i16* %4, align 2
+; CHECK-NEXT: %6 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store volatile i16 %7, i16* %6, align 4
+; CHECK-NEXT: %8 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store volatile i16 %9, i16* %8, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi32_align_32(<4 x i32>*, <4 x i32>) {
+entry:
+ store volatile <4 x i32> %1, <4 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi32_align_32(<4 x i32>*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi64_align_32(<4 x i64>*, <4 x i64>) {
+entry:
+ store volatile <4 x i64> %1, <4 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi64_align_32(<4 x i64>*, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i64* %3 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xi8ptr_align_32(<4 x i8*>*, <4 x i8*>) {
+entry:
+ store volatile <4 x i8*> %1, <4 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xi8ptr_align_32(<4 x i8*>*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xfloat_align_32(<4 x float>*, <4 x float>) {
+entry:
+ store volatile <4 x float> %1, <4 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xfloat_align_32(<4 x float>*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_4xdouble_align_32(<4 x double>*, <4 x double>) {
+entry:
+ store volatile <4 x double> %1, <4 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_4xdouble_align_32(<4 x double>*, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast double* %3 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %6, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi8_align_32(<6 x i8>*, <6 x i8>) {
+entry:
+ store volatile <6 x i8> %1, <6 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi8_align_32(<6 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store volatile i8 %11, i8* %10, align 4
+; CHECK-NEXT: %12 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store volatile i8 %13, i8* %12, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi16_align_32(<6 x i16>*, <6 x i16>) {
+entry:
+ store volatile <6 x i16> %1, <6 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi16_align_32(<6 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: store volatile i16 %3, i16* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: store volatile i16 %5, i16* %4, align 2
+; CHECK-NEXT: %6 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: store volatile i16 %7, i16* %6, align 4
+; CHECK-NEXT: %8 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: store volatile i16 %9, i16* %8, align 2
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: store volatile i16 %11, i16* %10, align 8
+; CHECK-NEXT: %12 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: store volatile i16 %13, i16* %12, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi32_align_32(<6 x i32>*, <6 x i32>) {
+entry:
+ store volatile <6 x i32> %1, <6 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi32_align_32(<6 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: store volatile i32 %6, i32* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: store volatile i32 %8, i32* %7, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi64_align_32(<6 x i64>*, <6 x i64>) {
+entry:
+ store volatile <6 x i64> %1, <6 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi64_align_32(<6 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %5, align 32
+; CHECK-NEXT: %6 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast i64* %6 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %7, align 16
+; CHECK-NEXT: %8 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i64* %8 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %9, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xi8ptr_align_32(<6 x i8*>*, <6 x i8*>) {
+entry:
+ store volatile <6 x i8*> %1, <6 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xi8ptr_align_32(<6 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x i8*> %2, i32 0
+; CHECK-NEXT: store volatile i8* %6, i8** %5, align 16
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x i8*> %2, i32 1
+; CHECK-NEXT: store volatile i8* %8, i8** %7, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xfloat_align_32(<6 x float>*, <6 x float>) {
+entry:
+ store volatile <6 x float> %1, <6 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xfloat_align_32(<6 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: store volatile float %6, float* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: store volatile float %8, float* %7, align 4
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_6xdouble_align_32(<6 x double>*, <6 x double>) {
+entry:
+ store volatile <6 x double> %1, <6 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_6xdouble_align_32(<6 x double>*, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %5, align 32
+; CHECK-NEXT: %6 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = bitcast double* %6 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %7, align 16
+; CHECK-NEXT: %8 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast double* %8 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %9, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi8_align_32(<8 x i8>*, <8 x i8>) {
+entry:
+ store volatile <8 x i8> %1, <8 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi8_align_32(<8 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store volatile i8 %11, i8* %10, align 4
+; CHECK-NEXT: %12 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store volatile i8 %13, i8* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store volatile i8 %15, i8* %14, align 2
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store volatile i8 %17, i8* %16, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi16_align_32(<8 x i16>*, <8 x i16>) {
+entry:
+ store volatile <8 x i16> %1, <8 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi16_align_32(<8 x i16>*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi32_align_32(<8 x i32>*, <8 x i32>) {
+entry:
+ store volatile <8 x i32> %1, <8 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi32_align_32(<8 x i32>*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i32* %3 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %6, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi64_align_32(<8 x i64>*, <8 x i64>) {
+entry:
+ store volatile <8 x i64> %1, <8 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi64_align_32(<8 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i64* %5 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %6, align 32
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %8, align 16
+; CHECK-NEXT: %9 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %12, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xi8ptr_align_32(<8 x i8*>*, <8 x i8*>) {
+entry:
+ store volatile <8 x i8*> %1, <8 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xi8ptr_align_32(<8 x i8*>*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8** %3 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %6, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xfloat_align_32(<8 x float>*, <8 x float>) {
+entry:
+ store volatile <8 x float> %1, <8 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xfloat_align_32(<8 x float>*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast float* %3 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %6, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_8xdouble_align_32(<8 x double>*, <8 x double>) {
+entry:
+ store volatile <8 x double> %1, <8 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_8xdouble_align_32(<8 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast double* %5 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %6, align 32
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %8, align 16
+; CHECK-NEXT: %9 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %12, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi8_align_32(<12 x i8>*, <12 x i8>) {
+entry:
+ store volatile <12 x i8> %1, <12 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi8_align_32(<12 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: store volatile i8 %3, i8* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: store volatile i8 %5, i8* %4, align 1
+; CHECK-NEXT: %6 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %7 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: store volatile i8 %7, i8* %6, align 2
+; CHECK-NEXT: %8 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: store volatile i8 %9, i8* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %11 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: store volatile i8 %11, i8* %10, align 4
+; CHECK-NEXT: %12 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %13 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: store volatile i8 %13, i8* %12, align 1
+; CHECK-NEXT: %14 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: store volatile i8 %15, i8* %14, align 2
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %17 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: store volatile i8 %17, i8* %16, align 1
+; CHECK-NEXT: %18 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %19 = extractelement <16 x i8> %1, i32 8
+; CHECK-NEXT: store volatile i8 %19, i8* %18, align 8
+; CHECK-NEXT: %20 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 9
+; CHECK-NEXT: store volatile i8 %21, i8* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %23 = extractelement <16 x i8> %1, i32 10
+; CHECK-NEXT: store volatile i8 %23, i8* %22, align 2
+; CHECK-NEXT: %24 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %25 = extractelement <16 x i8> %1, i32 11
+; CHECK-NEXT: store volatile i8 %25, i8* %24, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi16_align_32(<12 x i16>*, <12 x i16>) {
+entry:
+ store volatile <12 x i16> %1, <12 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi16_align_32(<12 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: store volatile i16 %6, i16* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: store volatile i16 %8, i16* %7, align 2
+; CHECK-NEXT: %9 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %10 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: store volatile i16 %10, i16* %9, align 4
+; CHECK-NEXT: %11 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %12 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: store volatile i16 %12, i16* %11, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi32_align_32(<12 x i32>*, <12 x i32>) {
+entry:
+ store volatile <12 x i32> %1, <12 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi32_align_32(<12 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %5, align 32
+; CHECK-NEXT: %6 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %7, align 16
+; CHECK-NEXT: %8 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %3, <4 x i32>* %9, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi64_align_32(<12 x i64>*, <12 x i64>) {
+entry:
+ store volatile <12 x i64> %1, <12 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi64_align_32(<12 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %8, align 32
+; CHECK-NEXT: %9 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %10, align 16
+; CHECK-NEXT: %11 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %12, align 32
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %14, align 16
+; CHECK-NEXT: %15 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %5, <2 x i64>* %16, align 32
+; CHECK-NEXT: %17 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %6, <2 x i64>* %18, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xi8ptr_align_32(<12 x i8*>*, <12 x i8*>) {
+entry:
+ store volatile <12 x i8*> %1, <12 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xi8ptr_align_32(<12 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %5, align 32
+; CHECK-NEXT: %6 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %7, align 16
+; CHECK-NEXT: %8 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %3, <4 x i8*>* %9, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xfloat_align_32(<12 x float>*, <12 x float>) {
+entry:
+ store volatile <12 x float> %1, <12 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xfloat_align_32(<12 x float>*, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %5, align 32
+; CHECK-NEXT: %6 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %7, align 16
+; CHECK-NEXT: %8 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %3, <4 x float>* %9, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_12xdouble_align_32(<12 x double>*, <12 x double>) {
+entry:
+ store volatile <12 x double> %1, <12 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_12xdouble_align_32(<12 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %8, align 32
+; CHECK-NEXT: %9 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %10, align 16
+; CHECK-NEXT: %11 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %12, align 32
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %14, align 16
+; CHECK-NEXT: %15 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %5, <2 x double>* %16, align 32
+; CHECK-NEXT: %17 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %6, <2 x double>* %18, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi8_align_32(<16 x i8>*, <16 x i8>) {
+entry:
+ store volatile <16 x i8> %1, <16 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi8_align_32(<16 x i8>*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: store volatile <16 x i8> %1, <16 x i8>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi16_align_32(<16 x i16>*, <16 x i16>) {
+entry:
+ store volatile <16 x i16> %1, <16 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi16_align_32(<16 x i16>*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i16* %3 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %6 = bitcast i16* %5 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %2, <8 x i16>* %6, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi32_align_32(<16 x i32>*, <16 x i32>) {
+entry:
+ store volatile <16 x i32> %1, <16 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi32_align_32(<16 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i32* %5 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %6, align 32
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %8, align 16
+; CHECK-NEXT: %9 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i32* %9 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %3, <4 x i32>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i32* %11 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %4, <4 x i32>* %12, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi64_align_32(<16 x i64>*, <16 x i64>) {
+entry:
+ store volatile <16 x i64> %1, <16 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi64_align_32(<16 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast i64* %9 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %12, align 16
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %14, align 32
+; CHECK-NEXT: %15 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %16, align 16
+; CHECK-NEXT: %17 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %5, <2 x i64>* %18, align 32
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %6, <2 x i64>* %20, align 16
+; CHECK-NEXT: %21 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %7, <2 x i64>* %22, align 32
+; CHECK-NEXT: %23 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %8, <2 x i64>* %24, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xi8ptr_align_32(<16 x i8*>*, <16 x i8*>) {
+entry:
+ store volatile <16 x i8*> %1, <16 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xi8ptr_align_32(<16 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast i8** %5 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %6, align 32
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %8, align 16
+; CHECK-NEXT: %9 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast i8** %9 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %3, <4 x i8*>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast i8** %11 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %4, <4 x i8*>* %12, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xfloat_align_32(<16 x float>*, <16 x float>) {
+entry:
+ store volatile <16 x float> %1, <16 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xfloat_align_32(<16 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %6 = bitcast float* %5 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %6, align 32
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %8, align 16
+; CHECK-NEXT: %9 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %10 = bitcast float* %9 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %3, <4 x float>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %12 = bitcast float* %11 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %4, <4 x float>* %12, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_16xdouble_align_32(<16 x double>*, <16 x double>) {
+entry:
+ store volatile <16 x double> %1, <16 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_16xdouble_align_32(<16 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %10 = bitcast double* %9 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %10, align 32
+; CHECK-NEXT: %11 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %12, align 16
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %14, align 32
+; CHECK-NEXT: %15 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %16, align 16
+; CHECK-NEXT: %17 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %5, <2 x double>* %18, align 32
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %6, <2 x double>* %20, align 16
+; CHECK-NEXT: %21 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %7, <2 x double>* %22, align 32
+; CHECK-NEXT: %23 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %8, <2 x double>* %24, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi8_align_32(<20 x i8>*, <20 x i8>) {
+entry:
+ store volatile <20 x i8> %1, <20 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi8_align_32(<20 x i8>*, <16 x i8>, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %4 = bitcast i8* %3 to <16 x i8>*
+; CHECK-NEXT: store volatile <16 x i8> %1, <16 x i8>* %4, align 32
+; CHECK-NEXT: %5 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %6 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: store volatile i8 %6, i8* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: store volatile i8 %8, i8* %7, align 1
+; CHECK-NEXT: %9 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: store volatile i8 %10, i8* %9, align 2
+; CHECK-NEXT: %11 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %12 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: store volatile i8 %12, i8* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi16_align_32(<20 x i16>*, <20 x i16>) {
+entry:
+ store volatile <20 x i16> %1, <20 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi16_align_32(<20 x i16>*, <8 x i16>, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %1, <8 x i16>* %5, align 32
+; CHECK-NEXT: %6 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %7 = bitcast i16* %6 to <8 x i16>*
+; CHECK-NEXT: store volatile <8 x i16> %2, <8 x i16>* %7, align 16
+; CHECK-NEXT: %8 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %9 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: store volatile i16 %9, i16* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: store volatile i16 %11, i16* %10, align 2
+; CHECK-NEXT: %12 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %13 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: store volatile i16 %13, i16* %12, align 4
+; CHECK-NEXT: %14 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %15 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: store volatile i16 %15, i16* %14, align 2
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi32_align_32(<20 x i32>*, <20 x i32>) {
+entry:
+ store volatile <20 x i32> %1, <20 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi32_align_32(<20 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i32* %6 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %1, <4 x i32>* %7, align 32
+; CHECK-NEXT: %8 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %2, <4 x i32>* %9, align 16
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %3, <4 x i32>* %11, align 32
+; CHECK-NEXT: %12 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i32* %12 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %4, <4 x i32>* %13, align 16
+; CHECK-NEXT: %14 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i32* %14 to <4 x i32>*
+; CHECK-NEXT: store volatile <4 x i32> %5, <4 x i32>* %15, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi64_align_32(<20 x i64>*, <20 x i64>) {
+entry:
+ store volatile <20 x i64> %1, <20 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi64_align_32(<20 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast i64* %11 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %1, <2 x i64>* %12, align 32
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %2, <2 x i64>* %14, align 16
+; CHECK-NEXT: %15 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast i64* %15 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %3, <2 x i64>* %16, align 32
+; CHECK-NEXT: %17 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast i64* %17 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %4, <2 x i64>* %18, align 16
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %5, <2 x i64>* %20, align 32
+; CHECK-NEXT: %21 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast i64* %21 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %6, <2 x i64>* %22, align 16
+; CHECK-NEXT: %23 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast i64* %23 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %7, <2 x i64>* %24, align 32
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %8, <2 x i64>* %26, align 16
+; CHECK-NEXT: %27 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast i64* %27 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %9, <2 x i64>* %28, align 32
+; CHECK-NEXT: %29 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast i64* %29 to <2 x i64>*
+; CHECK-NEXT: store volatile <2 x i64> %10, <2 x i64>* %30, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xi8ptr_align_32(<20 x i8*>*, <20 x i8*>) {
+entry:
+ store volatile <20 x i8*> %1, <20 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xi8ptr_align_32(<20 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast i8** %6 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %1, <4 x i8*>* %7, align 32
+; CHECK-NEXT: %8 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast i8** %8 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %2, <4 x i8*>* %9, align 16
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %3, <4 x i8*>* %11, align 32
+; CHECK-NEXT: %12 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast i8** %12 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %4, <4 x i8*>* %13, align 16
+; CHECK-NEXT: %14 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast i8** %14 to <4 x i8*>*
+; CHECK-NEXT: store volatile <4 x i8*> %5, <4 x i8*>* %15, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xfloat_align_32(<20 x float>*, <20 x float>) {
+entry:
+ store volatile <20 x float> %1, <20 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xfloat_align_32(<20 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %7 = bitcast float* %6 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %1, <4 x float>* %7, align 32
+; CHECK-NEXT: %8 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %9 = bitcast float* %8 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %2, <4 x float>* %9, align 16
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %3, <4 x float>* %11, align 32
+; CHECK-NEXT: %12 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %13 = bitcast float* %12 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %4, <4 x float>* %13, align 16
+; CHECK-NEXT: %14 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %15 = bitcast float* %14 to <4 x float>*
+; CHECK-NEXT: store volatile <4 x float> %5, <4 x float>* %15, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_store_to_20xdouble_align_32(<20 x double>*, <20 x double>) {
+entry:
+ store volatile <20 x double> %1, <20 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_store_to_20xdouble_align_32(<20 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %12 = bitcast double* %11 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %1, <2 x double>* %12, align 32
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %2, <2 x double>* %14, align 16
+; CHECK-NEXT: %15 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %16 = bitcast double* %15 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %3, <2 x double>* %16, align 32
+; CHECK-NEXT: %17 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %18 = bitcast double* %17 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %4, <2 x double>* %18, align 16
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %5, <2 x double>* %20, align 32
+; CHECK-NEXT: %21 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %22 = bitcast double* %21 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %6, <2 x double>* %22, align 16
+; CHECK-NEXT: %23 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %24 = bitcast double* %23 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %7, <2 x double>* %24, align 32
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %8, <2 x double>* %26, align 16
+; CHECK-NEXT: %27 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %28 = bitcast double* %27 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %9, <2 x double>* %28, align 32
+; CHECK-NEXT: %29 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %30 = bitcast double* %29 to <2 x double>*
+; CHECK-NEXT: store volatile <2 x double> %10, <2 x double>* %30, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
« no previous file with comments | « test/Transforms/NaCl/vector-canonicalization-shuffles.ll ('k') | tools/bugpoint/bugpoint.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698