Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(117)

Unified Diff: test/Transforms/NaCl/vector-canonicalization-loads.ll

Issue 1423873002: PNaCl: Add a vector type legalization pass. Base URL: https://chromium.googlesource.com/native_client/pnacl-llvm.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: test/Transforms/NaCl/vector-canonicalization-loads.ll
diff --git a/test/Transforms/NaCl/vector-canonicalization-loads.ll b/test/Transforms/NaCl/vector-canonicalization-loads.ll
new file mode 100644
index 0000000000000000000000000000000000000000..f78f27ff0798566158de894f1505da5fe3531ede
--- /dev/null
+++ b/test/Transforms/NaCl/vector-canonicalization-loads.ll
@@ -0,0 +1,8422 @@
+; RUN: opt -S -pnacl-vector-canonicalization %s | FileCheck %s
+
+; Auto-generated tests for load instructions.
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128"
+
+define i32 @load_from_2xi8_unaligned(<2 x i8>*) {
+entry:
+ %1 = load <2 x i8>, <2 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi8_unaligned(<2 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi16_unaligned(<2 x i16>*) {
+entry:
+ %1 = load <2 x i16>, <2 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi16_unaligned(<2 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i16, i16* %1
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i16, i16* %4
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi32_unaligned(<2 x i32>*) {
+entry:
+ %1 = load <2 x i32>, <2 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi32_unaligned(<2 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i32, i32* %1
+; CHECK-NEXT: %3 = insertelement <4 x i32> undef, i32 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i32, i32* %4
+; CHECK-NEXT: %6 = insertelement <4 x i32> %3, i32 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi64_unaligned(<2 x i64>*) {
+entry:
+ %1 = load <2 x i64>, <2 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi64_unaligned(<2 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <2 x i64>, <2 x i64>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi8ptr_unaligned(<2 x i8*>*) {
+entry:
+ %1 = load <2 x i8*>, <2 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi8ptr_unaligned(<2 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8*, i8** %1
+; CHECK-NEXT: %3 = insertelement <4 x i8*> undef, i8* %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8*, i8** %4
+; CHECK-NEXT: %6 = insertelement <4 x i8*> %3, i8* %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xfloat_unaligned(<2 x float>*) {
+entry:
+ %1 = load <2 x float>, <2 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xfloat_unaligned(<2 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load float, float* %1
+; CHECK-NEXT: %3 = insertelement <4 x float> undef, float %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load float, float* %4
+; CHECK-NEXT: %6 = insertelement <4 x float> %3, float %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xdouble_unaligned(<2 x double>*) {
+entry:
+ %1 = load <2 x double>, <2 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xdouble_unaligned(<2 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <2 x double>, <2 x double>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi8_unaligned(<4 x i8>*) {
+entry:
+ %1 = load <4 x i8>, <4 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi8_unaligned(<4 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi16_unaligned(<4 x i16>*) {
+entry:
+ %1 = load <4 x i16>, <4 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi16_unaligned(<4 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i16, i16* %1
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i16, i16* %4
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i16, i16* %7
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i16, i16* %10
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi32_unaligned(<4 x i32>*) {
+entry:
+ %1 = load <4 x i32>, <4 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi32_unaligned(<4 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <4 x i32>, <4 x i32>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi64_unaligned(<4 x i64>*) {
+entry:
+ %1 = load <4 x i64>, <4 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi64_unaligned(<4 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2
+; CHECK-NEXT: %4 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi8ptr_unaligned(<4 x i8*>*) {
+entry:
+ %1 = load <4 x i8*>, <4 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi8ptr_unaligned(<4 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <4 x i8*>, <4 x i8*>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xfloat_unaligned(<4 x float>*) {
+entry:
+ %1 = load <4 x float>, <4 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xfloat_unaligned(<4 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <4 x float>, <4 x float>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xdouble_unaligned(<4 x double>*) {
+entry:
+ %1 = load <4 x double>, <4 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xdouble_unaligned(<4 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2
+; CHECK-NEXT: %4 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi8_unaligned(<6 x i8>*) {
+entry:
+ %1 = load <6 x i8>, <6 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi8_unaligned(<6 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i8, i8* %13
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i8, i8* %16
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi16_unaligned(<6 x i16>*) {
+entry:
+ %1 = load <6 x i16>, <6 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi16_unaligned(<6 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i16, i16* %1
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i16, i16* %4
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i16, i16* %7
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i16, i16* %10
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i16, i16* %13
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i16, i16* %16
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi32_unaligned(<6 x i32>*) {
+entry:
+ %1 = load <6 x i32>, <6 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi32_unaligned(<6 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2
+; CHECK-NEXT: %4 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load i32, i32* %4
+; CHECK-NEXT: %6 = insertelement <4 x i32> undef, i32 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load i32, i32* %7
+; CHECK-NEXT: %9 = insertelement <4 x i32> %6, i32 %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi64_unaligned(<6 x i64>*) {
+entry:
+ %1 = load <6 x i64>, <6 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi64_unaligned(<6 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5
+; CHECK-NEXT: %7 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi8ptr_unaligned(<6 x i8*>*) {
+entry:
+ %1 = load <6 x i8*>, <6 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi8ptr_unaligned(<6 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2
+; CHECK-NEXT: %4 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load i8*, i8** %4
+; CHECK-NEXT: %6 = insertelement <4 x i8*> undef, i8* %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load i8*, i8** %7
+; CHECK-NEXT: %9 = insertelement <4 x i8*> %6, i8* %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xfloat_unaligned(<6 x float>*) {
+entry:
+ %1 = load <6 x float>, <6 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xfloat_unaligned(<6 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2
+; CHECK-NEXT: %4 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load float, float* %4
+; CHECK-NEXT: %6 = insertelement <4 x float> undef, float %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load float, float* %7
+; CHECK-NEXT: %9 = insertelement <4 x float> %6, float %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xdouble_unaligned(<6 x double>*) {
+entry:
+ %1 = load <6 x double>, <6 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xdouble_unaligned(<6 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5
+; CHECK-NEXT: %7 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi8_unaligned(<8 x i8>*) {
+entry:
+ %1 = load <8 x i8>, <8 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi8_unaligned(<8 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i8, i8* %13
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i8, i8* %16
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load i8, i8* %19
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load i8, i8* %22
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi16_unaligned(<8 x i16>*) {
+entry:
+ %1 = load <8 x i16>, <8 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi16_unaligned(<8 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <8 x i16>, <8 x i16>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi32_unaligned(<8 x i32>*) {
+entry:
+ %1 = load <8 x i32>, <8 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi32_unaligned(<8 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2
+; CHECK-NEXT: %4 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi64_unaligned(<8 x i64>*) {
+entry:
+ %1 = load <8 x i64>, <8 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi64_unaligned(<8 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2
+; CHECK-NEXT: %4 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8
+; CHECK-NEXT: %10 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi8ptr_unaligned(<8 x i8*>*) {
+entry:
+ %1 = load <8 x i8*>, <8 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi8ptr_unaligned(<8 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2
+; CHECK-NEXT: %4 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xfloat_unaligned(<8 x float>*) {
+entry:
+ %1 = load <8 x float>, <8 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xfloat_unaligned(<8 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2
+; CHECK-NEXT: %4 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xdouble_unaligned(<8 x double>*) {
+entry:
+ %1 = load <8 x double>, <8 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xdouble_unaligned(<8 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2
+; CHECK-NEXT: %4 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8
+; CHECK-NEXT: %10 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi8_unaligned(<12 x i8>*) {
+entry:
+ %1 = load <12 x i8>, <12 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi8_unaligned(<12 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i8, i8* %13
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i8, i8* %16
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load i8, i8* %19
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load i8, i8* %22
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: %25 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %26 = load i8, i8* %25
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 8
+; CHECK-NEXT: %28 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %29 = load i8, i8* %28
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 9
+; CHECK-NEXT: %31 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %32 = load i8, i8* %31
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 10
+; CHECK-NEXT: %34 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %35 = load i8, i8* %34
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi16_unaligned(<12 x i16>*) {
+entry:
+ %1 = load <12 x i16>, <12 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi16_unaligned(<12 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load <8 x i16>, <8 x i16>* %2
+; CHECK-NEXT: %4 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = load i16, i16* %4
+; CHECK-NEXT: %6 = insertelement <8 x i16> undef, i16 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = load i16, i16* %7
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %11 = load i16, i16* %10
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %14 = load i16, i16* %13
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi32_unaligned(<12 x i32>*) {
+entry:
+ %1 = load <12 x i32>, <12 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi32_unaligned(<12 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5
+; CHECK-NEXT: %7 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi64_unaligned(<12 x i64>*) {
+entry:
+ %1 = load <12 x i64>, <12 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi64_unaligned(<12 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2
+; CHECK-NEXT: %4 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8
+; CHECK-NEXT: %10 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load <2 x i64>, <2 x i64>* %14
+; CHECK-NEXT: %16 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %17
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi8ptr_unaligned(<12 x i8*>*) {
+entry:
+ %1 = load <12 x i8*>, <12 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi8ptr_unaligned(<12 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5
+; CHECK-NEXT: %7 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xfloat_unaligned(<12 x float>*) {
+entry:
+ %1 = load <12 x float>, <12 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xfloat_unaligned(<12 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5
+; CHECK-NEXT: %7 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xdouble_unaligned(<12 x double>*) {
+entry:
+ %1 = load <12 x double>, <12 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xdouble_unaligned(<12 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2
+; CHECK-NEXT: %4 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8
+; CHECK-NEXT: %10 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load <2 x double>, <2 x double>* %14
+; CHECK-NEXT: %16 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %17
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi8_unaligned(<16 x i8>*) {
+entry:
+ %1 = load <16 x i8>, <16 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi8_unaligned(<16 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <16 x i8>, <16 x i8>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi16_unaligned(<16 x i16>*) {
+entry:
+ %1 = load <16 x i16>, <16 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi16_unaligned(<16 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load <8 x i16>, <8 x i16>* %2
+; CHECK-NEXT: %4 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load <8 x i16>, <8 x i16>* %5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi32_unaligned(<16 x i32>*) {
+entry:
+ %1 = load <16 x i32>, <16 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi32_unaligned(<16 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2
+; CHECK-NEXT: %4 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %8
+; CHECK-NEXT: %10 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load <4 x i32>, <4 x i32>* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi64_unaligned(<16 x i64>*) {
+entry:
+ %1 = load <16 x i64>, <16 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi64_unaligned(<16 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2
+; CHECK-NEXT: %4 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5
+; CHECK-NEXT: %7 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8
+; CHECK-NEXT: %10 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load <2 x i64>, <2 x i64>* %14
+; CHECK-NEXT: %16 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %17
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load <2 x i64>, <2 x i64>* %20
+; CHECK-NEXT: %22 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load <2 x i64>, <2 x i64>* %23
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi8ptr_unaligned(<16 x i8*>*) {
+entry:
+ %1 = load <16 x i8*>, <16 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi8ptr_unaligned(<16 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2
+; CHECK-NEXT: %4 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %8
+; CHECK-NEXT: %10 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load <4 x i8*>, <4 x i8*>* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xfloat_unaligned(<16 x float>*) {
+entry:
+ %1 = load <16 x float>, <16 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xfloat_unaligned(<16 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2
+; CHECK-NEXT: %4 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %8
+; CHECK-NEXT: %10 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load <4 x float>, <4 x float>* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xdouble_unaligned(<16 x double>*) {
+entry:
+ %1 = load <16 x double>, <16 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xdouble_unaligned(<16 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2
+; CHECK-NEXT: %4 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5
+; CHECK-NEXT: %7 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8
+; CHECK-NEXT: %10 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load <2 x double>, <2 x double>* %14
+; CHECK-NEXT: %16 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %17
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load <2 x double>, <2 x double>* %20
+; CHECK-NEXT: %22 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load <2 x double>, <2 x double>* %23
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi8_unaligned(<20 x i8>*) {
+entry:
+ %1 = load <20 x i8>, <20 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi8_unaligned(<20 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8* %1 to <16 x i8>*
+; CHECK-NEXT: %3 = load <16 x i8>, <16 x i8>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %5 = load i8, i8* %4
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = load i8, i8* %7
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %11 = load i8, i8* %10
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %14 = load i8, i8* %13
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi16_unaligned(<20 x i16>*) {
+entry:
+ %1 = load <20 x i16>, <20 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi16_unaligned(<20 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load <8 x i16>, <8 x i16>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load <8 x i16>, <8 x i16>* %5
+; CHECK-NEXT: %7 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %8 = load i16, i16* %7
+; CHECK-NEXT: %9 = insertelement <8 x i16> undef, i16 %8, i32 0
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = load i16, i16* %10
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 1
+; CHECK-NEXT: %13 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %14 = load i16, i16* %13
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 2
+; CHECK-NEXT: %16 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %17 = load i16, i16* %16
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi32_unaligned(<20 x i32>*) {
+entry:
+ %1 = load <20 x i32>, <20 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi32_unaligned(<20 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5
+; CHECK-NEXT: %7 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %8
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load <4 x i32>, <4 x i32>* %11
+; CHECK-NEXT: %13 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i32* %13 to <4 x i32>*
+; CHECK-NEXT: %15 = load <4 x i32>, <4 x i32>* %14
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi64_unaligned(<20 x i64>*) {
+entry:
+ %1 = load <20 x i64>, <20 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi64_unaligned(<20 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5
+; CHECK-NEXT: %7 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8
+; CHECK-NEXT: %10 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load <2 x i64>, <2 x i64>* %14
+; CHECK-NEXT: %16 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %17
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load <2 x i64>, <2 x i64>* %20
+; CHECK-NEXT: %22 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load <2 x i64>, <2 x i64>* %23
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: %27 = load <2 x i64>, <2 x i64>* %26
+; CHECK-NEXT: %28 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast i64* %28 to <2 x i64>*
+; CHECK-NEXT: %30 = load <2 x i64>, <2 x i64>* %29
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi8ptr_unaligned(<20 x i8*>*) {
+entry:
+ %1 = load <20 x i8*>, <20 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi8ptr_unaligned(<20 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5
+; CHECK-NEXT: %7 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %8
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load <4 x i8*>, <4 x i8*>* %11
+; CHECK-NEXT: %13 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i8** %13 to <4 x i8*>*
+; CHECK-NEXT: %15 = load <4 x i8*>, <4 x i8*>* %14
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xfloat_unaligned(<20 x float>*) {
+entry:
+ %1 = load <20 x float>, <20 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xfloat_unaligned(<20 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5
+; CHECK-NEXT: %7 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %8
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load <4 x float>, <4 x float>* %11
+; CHECK-NEXT: %13 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast float* %13 to <4 x float>*
+; CHECK-NEXT: %15 = load <4 x float>, <4 x float>* %14
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xdouble_unaligned(<20 x double>*) {
+entry:
+ %1 = load <20 x double>, <20 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xdouble_unaligned(<20 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5
+; CHECK-NEXT: %7 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8
+; CHECK-NEXT: %10 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load <2 x double>, <2 x double>* %14
+; CHECK-NEXT: %16 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %17
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load <2 x double>, <2 x double>* %20
+; CHECK-NEXT: %22 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load <2 x double>, <2 x double>* %23
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: %27 = load <2 x double>, <2 x double>* %26
+; CHECK-NEXT: %28 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast double* %28 to <2 x double>*
+; CHECK-NEXT: %30 = load <2 x double>, <2 x double>* %29
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi8_align_1(<2 x i8>*) {
+entry:
+ %1 = load <2 x i8>, <2 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi8_align_1(<2 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi16_align_1(<2 x i16>*) {
+entry:
+ %1 = load <2 x i16>, <2 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi16_align_1(<2 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i16, i16* %1, align 1
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i16, i16* %4, align 1
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi32_align_1(<2 x i32>*) {
+entry:
+ %1 = load <2 x i32>, <2 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi32_align_1(<2 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i32, i32* %1, align 1
+; CHECK-NEXT: %3 = insertelement <4 x i32> undef, i32 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i32, i32* %4, align 1
+; CHECK-NEXT: %6 = insertelement <4 x i32> %3, i32 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi64_align_1(<2 x i64>*) {
+entry:
+ %1 = load <2 x i64>, <2 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi64_align_1(<2 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <2 x i64>, <2 x i64>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi8ptr_align_1(<2 x i8*>*) {
+entry:
+ %1 = load <2 x i8*>, <2 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi8ptr_align_1(<2 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8*, i8** %1, align 1
+; CHECK-NEXT: %3 = insertelement <4 x i8*> undef, i8* %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8*, i8** %4, align 1
+; CHECK-NEXT: %6 = insertelement <4 x i8*> %3, i8* %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xfloat_align_1(<2 x float>*) {
+entry:
+ %1 = load <2 x float>, <2 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xfloat_align_1(<2 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load float, float* %1, align 1
+; CHECK-NEXT: %3 = insertelement <4 x float> undef, float %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load float, float* %4, align 1
+; CHECK-NEXT: %6 = insertelement <4 x float> %3, float %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xdouble_align_1(<2 x double>*) {
+entry:
+ %1 = load <2 x double>, <2 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xdouble_align_1(<2 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <2 x double>, <2 x double>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi8_align_1(<4 x i8>*) {
+entry:
+ %1 = load <4 x i8>, <4 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi8_align_1(<4 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi16_align_1(<4 x i16>*) {
+entry:
+ %1 = load <4 x i16>, <4 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi16_align_1(<4 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i16, i16* %1, align 1
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i16, i16* %4, align 1
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i16, i16* %7, align 1
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i16, i16* %10, align 1
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi32_align_1(<4 x i32>*) {
+entry:
+ %1 = load <4 x i32>, <4 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi32_align_1(<4 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi64_align_1(<4 x i64>*) {
+entry:
+ %1 = load <4 x i64>, <4 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi64_align_1(<4 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi8ptr_align_1(<4 x i8*>*) {
+entry:
+ %1 = load <4 x i8*>, <4 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi8ptr_align_1(<4 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <4 x i8*>, <4 x i8*>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xfloat_align_1(<4 x float>*) {
+entry:
+ %1 = load <4 x float>, <4 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xfloat_align_1(<4 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <4 x float>, <4 x float>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xdouble_align_1(<4 x double>*) {
+entry:
+ %1 = load <4 x double>, <4 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xdouble_align_1(<4 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi8_align_1(<6 x i8>*) {
+entry:
+ %1 = load <6 x i8>, <6 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi8_align_1(<6 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i8, i8* %13, align 1
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi16_align_1(<6 x i16>*) {
+entry:
+ %1 = load <6 x i16>, <6 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi16_align_1(<6 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i16, i16* %1, align 1
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i16, i16* %4, align 1
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i16, i16* %7, align 1
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i16, i16* %10, align 1
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i16, i16* %13, align 1
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i16, i16* %16, align 1
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi32_align_1(<6 x i32>*) {
+entry:
+ %1 = load <6 x i32>, <6 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi32_align_1(<6 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load i32, i32* %4, align 1
+; CHECK-NEXT: %6 = insertelement <4 x i32> undef, i32 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load i32, i32* %7, align 1
+; CHECK-NEXT: %9 = insertelement <4 x i32> %6, i32 %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi64_align_1(<6 x i64>*) {
+entry:
+ %1 = load <6 x i64>, <6 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi64_align_1(<6 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi8ptr_align_1(<6 x i8*>*) {
+entry:
+ %1 = load <6 x i8*>, <6 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi8ptr_align_1(<6 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load i8*, i8** %4, align 1
+; CHECK-NEXT: %6 = insertelement <4 x i8*> undef, i8* %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load i8*, i8** %7, align 1
+; CHECK-NEXT: %9 = insertelement <4 x i8*> %6, i8* %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xfloat_align_1(<6 x float>*) {
+entry:
+ %1 = load <6 x float>, <6 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xfloat_align_1(<6 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load float, float* %4, align 1
+; CHECK-NEXT: %6 = insertelement <4 x float> undef, float %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load float, float* %7, align 1
+; CHECK-NEXT: %9 = insertelement <4 x float> %6, float %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xdouble_align_1(<6 x double>*) {
+entry:
+ %1 = load <6 x double>, <6 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xdouble_align_1(<6 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi8_align_1(<8 x i8>*) {
+entry:
+ %1 = load <8 x i8>, <8 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi8_align_1(<8 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i8, i8* %13, align 1
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load i8, i8* %19, align 1
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load i8, i8* %22, align 1
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi16_align_1(<8 x i16>*) {
+entry:
+ %1 = load <8 x i16>, <8 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi16_align_1(<8 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <8 x i16>, <8 x i16>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi32_align_1(<8 x i32>*) {
+entry:
+ %1 = load <8 x i32>, <8 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi32_align_1(<8 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi64_align_1(<8 x i64>*) {
+entry:
+ %1 = load <8 x i64>, <8 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi64_align_1(<8 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi8ptr_align_1(<8 x i8*>*) {
+entry:
+ %1 = load <8 x i8*>, <8 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi8ptr_align_1(<8 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xfloat_align_1(<8 x float>*) {
+entry:
+ %1 = load <8 x float>, <8 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xfloat_align_1(<8 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xdouble_align_1(<8 x double>*) {
+entry:
+ %1 = load <8 x double>, <8 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xdouble_align_1(<8 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi8_align_1(<12 x i8>*) {
+entry:
+ %1 = load <12 x i8>, <12 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi8_align_1(<12 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i8, i8* %13, align 1
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load i8, i8* %19, align 1
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load i8, i8* %22, align 1
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: %25 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %26 = load i8, i8* %25, align 1
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 8
+; CHECK-NEXT: %28 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %29 = load i8, i8* %28, align 1
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 9
+; CHECK-NEXT: %31 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %32 = load i8, i8* %31, align 1
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 10
+; CHECK-NEXT: %34 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %35 = load i8, i8* %34, align 1
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi16_align_1(<12 x i16>*) {
+entry:
+ %1 = load <12 x i16>, <12 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi16_align_1(<12 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load <8 x i16>, <8 x i16>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = load i16, i16* %4, align 1
+; CHECK-NEXT: %6 = insertelement <8 x i16> undef, i16 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = load i16, i16* %7, align 1
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %11 = load i16, i16* %10, align 1
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %14 = load i16, i16* %13, align 1
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi32_align_1(<12 x i32>*) {
+entry:
+ %1 = load <12 x i32>, <12 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi32_align_1(<12 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi64_align_1(<12 x i64>*) {
+entry:
+ %1 = load <12 x i64>, <12 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi64_align_1(<12 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load <2 x i64>, <2 x i64>* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %17, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi8ptr_align_1(<12 x i8*>*) {
+entry:
+ %1 = load <12 x i8*>, <12 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi8ptr_align_1(<12 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xfloat_align_1(<12 x float>*) {
+entry:
+ %1 = load <12 x float>, <12 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xfloat_align_1(<12 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xdouble_align_1(<12 x double>*) {
+entry:
+ %1 = load <12 x double>, <12 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xdouble_align_1(<12 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load <2 x double>, <2 x double>* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %17, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi8_align_1(<16 x i8>*) {
+entry:
+ %1 = load <16 x i8>, <16 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi8_align_1(<16 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <16 x i8>, <16 x i8>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi16_align_1(<16 x i16>*) {
+entry:
+ %1 = load <16 x i16>, <16 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi16_align_1(<16 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load <8 x i16>, <8 x i16>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load <8 x i16>, <8 x i16>* %5, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi32_align_1(<16 x i32>*) {
+entry:
+ %1 = load <16 x i32>, <16 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi32_align_1(<16 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load <4 x i32>, <4 x i32>* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi64_align_1(<16 x i64>*) {
+entry:
+ %1 = load <16 x i64>, <16 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi64_align_1(<16 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load <2 x i64>, <2 x i64>* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %17, align 1
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load <2 x i64>, <2 x i64>* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load <2 x i64>, <2 x i64>* %23, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi8ptr_align_1(<16 x i8*>*) {
+entry:
+ %1 = load <16 x i8*>, <16 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi8ptr_align_1(<16 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load <4 x i8*>, <4 x i8*>* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xfloat_align_1(<16 x float>*) {
+entry:
+ %1 = load <16 x float>, <16 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xfloat_align_1(<16 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load <4 x float>, <4 x float>* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xdouble_align_1(<16 x double>*) {
+entry:
+ %1 = load <16 x double>, <16 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xdouble_align_1(<16 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load <2 x double>, <2 x double>* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %17, align 1
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load <2 x double>, <2 x double>* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load <2 x double>, <2 x double>* %23, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi8_align_1(<20 x i8>*) {
+entry:
+ %1 = load <20 x i8>, <20 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi8_align_1(<20 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8* %1 to <16 x i8>*
+; CHECK-NEXT: %3 = load <16 x i8>, <16 x i8>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = load i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %11 = load i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %14 = load i8, i8* %13, align 1
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi16_align_1(<20 x i16>*) {
+entry:
+ %1 = load <20 x i16>, <20 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi16_align_1(<20 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load <8 x i16>, <8 x i16>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load <8 x i16>, <8 x i16>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %8 = load i16, i16* %7, align 1
+; CHECK-NEXT: %9 = insertelement <8 x i16> undef, i16 %8, i32 0
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = load i16, i16* %10, align 1
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 1
+; CHECK-NEXT: %13 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %14 = load i16, i16* %13, align 1
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 2
+; CHECK-NEXT: %16 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %17 = load i16, i16* %16, align 1
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi32_align_1(<20 x i32>*) {
+entry:
+ %1 = load <20 x i32>, <20 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi32_align_1(<20 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load <4 x i32>, <4 x i32>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i32* %13 to <4 x i32>*
+; CHECK-NEXT: %15 = load <4 x i32>, <4 x i32>* %14, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi64_align_1(<20 x i64>*) {
+entry:
+ %1 = load <20 x i64>, <20 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi64_align_1(<20 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load <2 x i64>, <2 x i64>* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %17, align 1
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load <2 x i64>, <2 x i64>* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load <2 x i64>, <2 x i64>* %23, align 1
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: %27 = load <2 x i64>, <2 x i64>* %26, align 1
+; CHECK-NEXT: %28 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast i64* %28 to <2 x i64>*
+; CHECK-NEXT: %30 = load <2 x i64>, <2 x i64>* %29, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi8ptr_align_1(<20 x i8*>*) {
+entry:
+ %1 = load <20 x i8*>, <20 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi8ptr_align_1(<20 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load <4 x i8*>, <4 x i8*>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i8** %13 to <4 x i8*>*
+; CHECK-NEXT: %15 = load <4 x i8*>, <4 x i8*>* %14, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xfloat_align_1(<20 x float>*) {
+entry:
+ %1 = load <20 x float>, <20 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xfloat_align_1(<20 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load <4 x float>, <4 x float>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast float* %13 to <4 x float>*
+; CHECK-NEXT: %15 = load <4 x float>, <4 x float>* %14, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xdouble_align_1(<20 x double>*) {
+entry:
+ %1 = load <20 x double>, <20 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xdouble_align_1(<20 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load <2 x double>, <2 x double>* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %17, align 1
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load <2 x double>, <2 x double>* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load <2 x double>, <2 x double>* %23, align 1
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: %27 = load <2 x double>, <2 x double>* %26, align 1
+; CHECK-NEXT: %28 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast double* %28 to <2 x double>*
+; CHECK-NEXT: %30 = load <2 x double>, <2 x double>* %29, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi8_align_8(<2 x i8>*) {
+entry:
+ %1 = load <2 x i8>, <2 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi8_align_8(<2 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 8
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi16_align_8(<2 x i16>*) {
+entry:
+ %1 = load <2 x i16>, <2 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi16_align_8(<2 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i16, i16* %1, align 8
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i16, i16* %4, align 2
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi32_align_8(<2 x i32>*) {
+entry:
+ %1 = load <2 x i32>, <2 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi32_align_8(<2 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i32, i32* %1, align 8
+; CHECK-NEXT: %3 = insertelement <4 x i32> undef, i32 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i32, i32* %4, align 4
+; CHECK-NEXT: %6 = insertelement <4 x i32> %3, i32 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi64_align_8(<2 x i64>*) {
+entry:
+ %1 = load <2 x i64>, <2 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi64_align_8(<2 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <2 x i64>, <2 x i64>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi8ptr_align_8(<2 x i8*>*) {
+entry:
+ %1 = load <2 x i8*>, <2 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi8ptr_align_8(<2 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8*, i8** %1, align 8
+; CHECK-NEXT: %3 = insertelement <4 x i8*> undef, i8* %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8*, i8** %4, align 4
+; CHECK-NEXT: %6 = insertelement <4 x i8*> %3, i8* %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xfloat_align_8(<2 x float>*) {
+entry:
+ %1 = load <2 x float>, <2 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xfloat_align_8(<2 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load float, float* %1, align 8
+; CHECK-NEXT: %3 = insertelement <4 x float> undef, float %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load float, float* %4, align 4
+; CHECK-NEXT: %6 = insertelement <4 x float> %3, float %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xdouble_align_8(<2 x double>*) {
+entry:
+ %1 = load <2 x double>, <2 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xdouble_align_8(<2 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <2 x double>, <2 x double>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi8_align_8(<4 x i8>*) {
+entry:
+ %1 = load <4 x i8>, <4 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi8_align_8(<4 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 8
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi16_align_8(<4 x i16>*) {
+entry:
+ %1 = load <4 x i16>, <4 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi16_align_8(<4 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i16, i16* %1, align 8
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i16, i16* %4, align 2
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i16, i16* %7, align 4
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i16, i16* %10, align 2
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi32_align_8(<4 x i32>*) {
+entry:
+ %1 = load <4 x i32>, <4 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi32_align_8(<4 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi64_align_8(<4 x i64>*) {
+entry:
+ %1 = load <4 x i64>, <4 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi64_align_8(<4 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi8ptr_align_8(<4 x i8*>*) {
+entry:
+ %1 = load <4 x i8*>, <4 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi8ptr_align_8(<4 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <4 x i8*>, <4 x i8*>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xfloat_align_8(<4 x float>*) {
+entry:
+ %1 = load <4 x float>, <4 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xfloat_align_8(<4 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <4 x float>, <4 x float>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xdouble_align_8(<4 x double>*) {
+entry:
+ %1 = load <4 x double>, <4 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xdouble_align_8(<4 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi8_align_8(<6 x i8>*) {
+entry:
+ %1 = load <6 x i8>, <6 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi8_align_8(<6 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 8
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i8, i8* %13, align 4
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi16_align_8(<6 x i16>*) {
+entry:
+ %1 = load <6 x i16>, <6 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi16_align_8(<6 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i16, i16* %1, align 8
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i16, i16* %4, align 2
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i16, i16* %7, align 4
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i16, i16* %10, align 2
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i16, i16* %13, align 8
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i16, i16* %16, align 2
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi32_align_8(<6 x i32>*) {
+entry:
+ %1 = load <6 x i32>, <6 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi32_align_8(<6 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load i32, i32* %4, align 8
+; CHECK-NEXT: %6 = insertelement <4 x i32> undef, i32 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load i32, i32* %7, align 4
+; CHECK-NEXT: %9 = insertelement <4 x i32> %6, i32 %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi64_align_8(<6 x i64>*) {
+entry:
+ %1 = load <6 x i64>, <6 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi64_align_8(<6 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi8ptr_align_8(<6 x i8*>*) {
+entry:
+ %1 = load <6 x i8*>, <6 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi8ptr_align_8(<6 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load i8*, i8** %4, align 8
+; CHECK-NEXT: %6 = insertelement <4 x i8*> undef, i8* %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load i8*, i8** %7, align 4
+; CHECK-NEXT: %9 = insertelement <4 x i8*> %6, i8* %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xfloat_align_8(<6 x float>*) {
+entry:
+ %1 = load <6 x float>, <6 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xfloat_align_8(<6 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load float, float* %4, align 8
+; CHECK-NEXT: %6 = insertelement <4 x float> undef, float %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load float, float* %7, align 4
+; CHECK-NEXT: %9 = insertelement <4 x float> %6, float %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xdouble_align_8(<6 x double>*) {
+entry:
+ %1 = load <6 x double>, <6 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xdouble_align_8(<6 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi8_align_8(<8 x i8>*) {
+entry:
+ %1 = load <8 x i8>, <8 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi8_align_8(<8 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 8
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i8, i8* %13, align 4
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load i8, i8* %19, align 2
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load i8, i8* %22, align 1
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi16_align_8(<8 x i16>*) {
+entry:
+ %1 = load <8 x i16>, <8 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi16_align_8(<8 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <8 x i16>, <8 x i16>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi32_align_8(<8 x i32>*) {
+entry:
+ %1 = load <8 x i32>, <8 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi32_align_8(<8 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi64_align_8(<8 x i64>*) {
+entry:
+ %1 = load <8 x i64>, <8 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi64_align_8(<8 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi8ptr_align_8(<8 x i8*>*) {
+entry:
+ %1 = load <8 x i8*>, <8 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi8ptr_align_8(<8 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xfloat_align_8(<8 x float>*) {
+entry:
+ %1 = load <8 x float>, <8 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xfloat_align_8(<8 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xdouble_align_8(<8 x double>*) {
+entry:
+ %1 = load <8 x double>, <8 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xdouble_align_8(<8 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi8_align_8(<12 x i8>*) {
+entry:
+ %1 = load <12 x i8>, <12 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi8_align_8(<12 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 8
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i8, i8* %13, align 4
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load i8, i8* %19, align 2
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load i8, i8* %22, align 1
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: %25 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %26 = load i8, i8* %25, align 8
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 8
+; CHECK-NEXT: %28 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %29 = load i8, i8* %28, align 1
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 9
+; CHECK-NEXT: %31 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %32 = load i8, i8* %31, align 2
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 10
+; CHECK-NEXT: %34 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %35 = load i8, i8* %34, align 1
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi16_align_8(<12 x i16>*) {
+entry:
+ %1 = load <12 x i16>, <12 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi16_align_8(<12 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load <8 x i16>, <8 x i16>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = load i16, i16* %4, align 8
+; CHECK-NEXT: %6 = insertelement <8 x i16> undef, i16 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = load i16, i16* %7, align 2
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %11 = load i16, i16* %10, align 4
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %14 = load i16, i16* %13, align 2
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi32_align_8(<12 x i32>*) {
+entry:
+ %1 = load <12 x i32>, <12 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi32_align_8(<12 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %8, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi64_align_8(<12 x i64>*) {
+entry:
+ %1 = load <12 x i64>, <12 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi64_align_8(<12 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load <2 x i64>, <2 x i64>* %14, align 8
+; CHECK-NEXT: %16 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %17, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi8ptr_align_8(<12 x i8*>*) {
+entry:
+ %1 = load <12 x i8*>, <12 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi8ptr_align_8(<12 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %8, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xfloat_align_8(<12 x float>*) {
+entry:
+ %1 = load <12 x float>, <12 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xfloat_align_8(<12 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %8, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xdouble_align_8(<12 x double>*) {
+entry:
+ %1 = load <12 x double>, <12 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xdouble_align_8(<12 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load <2 x double>, <2 x double>* %14, align 8
+; CHECK-NEXT: %16 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %17, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi8_align_8(<16 x i8>*) {
+entry:
+ %1 = load <16 x i8>, <16 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi8_align_8(<16 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <16 x i8>, <16 x i8>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi16_align_8(<16 x i16>*) {
+entry:
+ %1 = load <16 x i16>, <16 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi16_align_8(<16 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load <8 x i16>, <8 x i16>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load <8 x i16>, <8 x i16>* %5, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi32_align_8(<16 x i32>*) {
+entry:
+ %1 = load <16 x i32>, <16 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi32_align_8(<16 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load <4 x i32>, <4 x i32>* %11, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi64_align_8(<16 x i64>*) {
+entry:
+ %1 = load <16 x i64>, <16 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi64_align_8(<16 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load <2 x i64>, <2 x i64>* %14, align 8
+; CHECK-NEXT: %16 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %17, align 8
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load <2 x i64>, <2 x i64>* %20, align 8
+; CHECK-NEXT: %22 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load <2 x i64>, <2 x i64>* %23, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi8ptr_align_8(<16 x i8*>*) {
+entry:
+ %1 = load <16 x i8*>, <16 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi8ptr_align_8(<16 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load <4 x i8*>, <4 x i8*>* %11, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xfloat_align_8(<16 x float>*) {
+entry:
+ %1 = load <16 x float>, <16 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xfloat_align_8(<16 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load <4 x float>, <4 x float>* %11, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xdouble_align_8(<16 x double>*) {
+entry:
+ %1 = load <16 x double>, <16 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xdouble_align_8(<16 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load <2 x double>, <2 x double>* %14, align 8
+; CHECK-NEXT: %16 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %17, align 8
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load <2 x double>, <2 x double>* %20, align 8
+; CHECK-NEXT: %22 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load <2 x double>, <2 x double>* %23, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi8_align_8(<20 x i8>*) {
+entry:
+ %1 = load <20 x i8>, <20 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi8_align_8(<20 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8* %1 to <16 x i8>*
+; CHECK-NEXT: %3 = load <16 x i8>, <16 x i8>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %5 = load i8, i8* %4, align 8
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = load i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %11 = load i8, i8* %10, align 2
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %14 = load i8, i8* %13, align 1
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi16_align_8(<20 x i16>*) {
+entry:
+ %1 = load <20 x i16>, <20 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi16_align_8(<20 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load <8 x i16>, <8 x i16>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load <8 x i16>, <8 x i16>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %8 = load i16, i16* %7, align 8
+; CHECK-NEXT: %9 = insertelement <8 x i16> undef, i16 %8, i32 0
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = load i16, i16* %10, align 2
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 1
+; CHECK-NEXT: %13 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %14 = load i16, i16* %13, align 4
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 2
+; CHECK-NEXT: %16 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %17 = load i16, i16* %16, align 2
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi32_align_8(<20 x i32>*) {
+entry:
+ %1 = load <20 x i32>, <20 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi32_align_8(<20 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load <4 x i32>, <4 x i32>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i32* %13 to <4 x i32>*
+; CHECK-NEXT: %15 = load <4 x i32>, <4 x i32>* %14, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi64_align_8(<20 x i64>*) {
+entry:
+ %1 = load <20 x i64>, <20 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi64_align_8(<20 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load <2 x i64>, <2 x i64>* %14, align 8
+; CHECK-NEXT: %16 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %17, align 8
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load <2 x i64>, <2 x i64>* %20, align 8
+; CHECK-NEXT: %22 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load <2 x i64>, <2 x i64>* %23, align 8
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: %27 = load <2 x i64>, <2 x i64>* %26, align 8
+; CHECK-NEXT: %28 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast i64* %28 to <2 x i64>*
+; CHECK-NEXT: %30 = load <2 x i64>, <2 x i64>* %29, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi8ptr_align_8(<20 x i8*>*) {
+entry:
+ %1 = load <20 x i8*>, <20 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi8ptr_align_8(<20 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load <4 x i8*>, <4 x i8*>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i8** %13 to <4 x i8*>*
+; CHECK-NEXT: %15 = load <4 x i8*>, <4 x i8*>* %14, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xfloat_align_8(<20 x float>*) {
+entry:
+ %1 = load <20 x float>, <20 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xfloat_align_8(<20 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load <4 x float>, <4 x float>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast float* %13 to <4 x float>*
+; CHECK-NEXT: %15 = load <4 x float>, <4 x float>* %14, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xdouble_align_8(<20 x double>*) {
+entry:
+ %1 = load <20 x double>, <20 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xdouble_align_8(<20 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load <2 x double>, <2 x double>* %14, align 8
+; CHECK-NEXT: %16 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %17, align 8
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load <2 x double>, <2 x double>* %20, align 8
+; CHECK-NEXT: %22 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load <2 x double>, <2 x double>* %23, align 8
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: %27 = load <2 x double>, <2 x double>* %26, align 8
+; CHECK-NEXT: %28 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast double* %28 to <2 x double>*
+; CHECK-NEXT: %30 = load <2 x double>, <2 x double>* %29, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi8_align_32(<2 x i8>*) {
+entry:
+ %1 = load <2 x i8>, <2 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi8_align_32(<2 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 32
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi16_align_32(<2 x i16>*) {
+entry:
+ %1 = load <2 x i16>, <2 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi16_align_32(<2 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i16, i16* %1, align 32
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i16, i16* %4, align 2
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi32_align_32(<2 x i32>*) {
+entry:
+ %1 = load <2 x i32>, <2 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi32_align_32(<2 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i32, i32* %1, align 32
+; CHECK-NEXT: %3 = insertelement <4 x i32> undef, i32 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i32, i32* %4, align 4
+; CHECK-NEXT: %6 = insertelement <4 x i32> %3, i32 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi64_align_32(<2 x i64>*) {
+entry:
+ %1 = load <2 x i64>, <2 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi64_align_32(<2 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <2 x i64>, <2 x i64>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xi8ptr_align_32(<2 x i8*>*) {
+entry:
+ %1 = load <2 x i8*>, <2 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xi8ptr_align_32(<2 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8*, i8** %1, align 32
+; CHECK-NEXT: %3 = insertelement <4 x i8*> undef, i8* %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8*, i8** %4, align 4
+; CHECK-NEXT: %6 = insertelement <4 x i8*> %3, i8* %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xfloat_align_32(<2 x float>*) {
+entry:
+ %1 = load <2 x float>, <2 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xfloat_align_32(<2 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load float, float* %1, align 32
+; CHECK-NEXT: %3 = insertelement <4 x float> undef, float %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load float, float* %4, align 4
+; CHECK-NEXT: %6 = insertelement <4 x float> %3, float %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_2xdouble_align_32(<2 x double>*) {
+entry:
+ %1 = load <2 x double>, <2 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_2xdouble_align_32(<2 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <2 x double>, <2 x double>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi8_align_32(<4 x i8>*) {
+entry:
+ %1 = load <4 x i8>, <4 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi8_align_32(<4 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 32
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi16_align_32(<4 x i16>*) {
+entry:
+ %1 = load <4 x i16>, <4 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi16_align_32(<4 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i16, i16* %1, align 32
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i16, i16* %4, align 2
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i16, i16* %7, align 4
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i16, i16* %10, align 2
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi32_align_32(<4 x i32>*) {
+entry:
+ %1 = load <4 x i32>, <4 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi32_align_32(<4 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <4 x i32>, <4 x i32>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi64_align_32(<4 x i64>*) {
+entry:
+ %1 = load <4 x i64>, <4 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi64_align_32(<4 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xi8ptr_align_32(<4 x i8*>*) {
+entry:
+ %1 = load <4 x i8*>, <4 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xi8ptr_align_32(<4 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <4 x i8*>, <4 x i8*>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xfloat_align_32(<4 x float>*) {
+entry:
+ %1 = load <4 x float>, <4 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xfloat_align_32(<4 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <4 x float>, <4 x float>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_4xdouble_align_32(<4 x double>*) {
+entry:
+ %1 = load <4 x double>, <4 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_4xdouble_align_32(<4 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi8_align_32(<6 x i8>*) {
+entry:
+ %1 = load <6 x i8>, <6 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi8_align_32(<6 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 32
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i8, i8* %13, align 4
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi16_align_32(<6 x i16>*) {
+entry:
+ %1 = load <6 x i16>, <6 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi16_align_32(<6 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i16, i16* %1, align 32
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i16, i16* %4, align 2
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i16, i16* %7, align 4
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i16, i16* %10, align 2
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i16, i16* %13, align 8
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i16, i16* %16, align 2
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi32_align_32(<6 x i32>*) {
+entry:
+ %1 = load <6 x i32>, <6 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi32_align_32(<6 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load i32, i32* %4, align 16
+; CHECK-NEXT: %6 = insertelement <4 x i32> undef, i32 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load i32, i32* %7, align 4
+; CHECK-NEXT: %9 = insertelement <4 x i32> %6, i32 %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi64_align_32(<6 x i64>*) {
+entry:
+ %1 = load <6 x i64>, <6 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi64_align_32(<6 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xi8ptr_align_32(<6 x i8*>*) {
+entry:
+ %1 = load <6 x i8*>, <6 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xi8ptr_align_32(<6 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load i8*, i8** %4, align 16
+; CHECK-NEXT: %6 = insertelement <4 x i8*> undef, i8* %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load i8*, i8** %7, align 4
+; CHECK-NEXT: %9 = insertelement <4 x i8*> %6, i8* %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xfloat_align_32(<6 x float>*) {
+entry:
+ %1 = load <6 x float>, <6 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xfloat_align_32(<6 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load float, float* %4, align 16
+; CHECK-NEXT: %6 = insertelement <4 x float> undef, float %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load float, float* %7, align 4
+; CHECK-NEXT: %9 = insertelement <4 x float> %6, float %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_6xdouble_align_32(<6 x double>*) {
+entry:
+ %1 = load <6 x double>, <6 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_6xdouble_align_32(<6 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi8_align_32(<8 x i8>*) {
+entry:
+ %1 = load <8 x i8>, <8 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi8_align_32(<8 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 32
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i8, i8* %13, align 4
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load i8, i8* %19, align 2
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load i8, i8* %22, align 1
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi16_align_32(<8 x i16>*) {
+entry:
+ %1 = load <8 x i16>, <8 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi16_align_32(<8 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <8 x i16>, <8 x i16>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi32_align_32(<8 x i32>*) {
+entry:
+ %1 = load <8 x i32>, <8 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi32_align_32(<8 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi64_align_32(<8 x i64>*) {
+entry:
+ %1 = load <8 x i64>, <8 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi64_align_32(<8 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xi8ptr_align_32(<8 x i8*>*) {
+entry:
+ %1 = load <8 x i8*>, <8 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xi8ptr_align_32(<8 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xfloat_align_32(<8 x float>*) {
+entry:
+ %1 = load <8 x float>, <8 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xfloat_align_32(<8 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_8xdouble_align_32(<8 x double>*) {
+entry:
+ %1 = load <8 x double>, <8 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_8xdouble_align_32(<8 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi8_align_32(<12 x i8>*) {
+entry:
+ %1 = load <12 x i8>, <12 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi8_align_32(<12 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load i8, i8* %1, align 32
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load i8, i8* %13, align 4
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load i8, i8* %19, align 2
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load i8, i8* %22, align 1
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: %25 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %26 = load i8, i8* %25, align 8
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 8
+; CHECK-NEXT: %28 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %29 = load i8, i8* %28, align 1
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 9
+; CHECK-NEXT: %31 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %32 = load i8, i8* %31, align 2
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 10
+; CHECK-NEXT: %34 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %35 = load i8, i8* %34, align 1
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi16_align_32(<12 x i16>*) {
+entry:
+ %1 = load <12 x i16>, <12 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi16_align_32(<12 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load <8 x i16>, <8 x i16>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = load i16, i16* %4, align 16
+; CHECK-NEXT: %6 = insertelement <8 x i16> undef, i16 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = load i16, i16* %7, align 2
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %11 = load i16, i16* %10, align 4
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %14 = load i16, i16* %13, align 2
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi32_align_32(<12 x i32>*) {
+entry:
+ %1 = load <12 x i32>, <12 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi32_align_32(<12 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %8, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi64_align_32(<12 x i64>*) {
+entry:
+ %1 = load <12 x i64>, <12 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi64_align_32(<12 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load <2 x i64>, <2 x i64>* %14, align 32
+; CHECK-NEXT: %16 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %17, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xi8ptr_align_32(<12 x i8*>*) {
+entry:
+ %1 = load <12 x i8*>, <12 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xi8ptr_align_32(<12 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %8, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xfloat_align_32(<12 x float>*) {
+entry:
+ %1 = load <12 x float>, <12 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xfloat_align_32(<12 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %8, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_12xdouble_align_32(<12 x double>*) {
+entry:
+ %1 = load <12 x double>, <12 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_12xdouble_align_32(<12 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load <2 x double>, <2 x double>* %14, align 32
+; CHECK-NEXT: %16 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %17, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi8_align_32(<16 x i8>*) {
+entry:
+ %1 = load <16 x i8>, <16 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi8_align_32(<16 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load <16 x i8>, <16 x i8>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi16_align_32(<16 x i16>*) {
+entry:
+ %1 = load <16 x i16>, <16 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi16_align_32(<16 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load <8 x i16>, <8 x i16>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load <8 x i16>, <8 x i16>* %5, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi32_align_32(<16 x i32>*) {
+entry:
+ %1 = load <16 x i32>, <16 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi32_align_32(<16 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load <4 x i32>, <4 x i32>* %11, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi64_align_32(<16 x i64>*) {
+entry:
+ %1 = load <16 x i64>, <16 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi64_align_32(<16 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load <2 x i64>, <2 x i64>* %14, align 32
+; CHECK-NEXT: %16 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %17, align 16
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load <2 x i64>, <2 x i64>* %20, align 32
+; CHECK-NEXT: %22 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load <2 x i64>, <2 x i64>* %23, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xi8ptr_align_32(<16 x i8*>*) {
+entry:
+ %1 = load <16 x i8*>, <16 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xi8ptr_align_32(<16 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load <4 x i8*>, <4 x i8*>* %11, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xfloat_align_32(<16 x float>*) {
+entry:
+ %1 = load <16 x float>, <16 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xfloat_align_32(<16 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load <4 x float>, <4 x float>* %11, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_16xdouble_align_32(<16 x double>*) {
+entry:
+ %1 = load <16 x double>, <16 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_16xdouble_align_32(<16 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load <2 x double>, <2 x double>* %14, align 32
+; CHECK-NEXT: %16 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %17, align 16
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load <2 x double>, <2 x double>* %20, align 32
+; CHECK-NEXT: %22 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load <2 x double>, <2 x double>* %23, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi8_align_32(<20 x i8>*) {
+entry:
+ %1 = load <20 x i8>, <20 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi8_align_32(<20 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8* %1 to <16 x i8>*
+; CHECK-NEXT: %3 = load <16 x i8>, <16 x i8>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %5 = load i8, i8* %4, align 16
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = load i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %11 = load i8, i8* %10, align 2
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %14 = load i8, i8* %13, align 1
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi16_align_32(<20 x i16>*) {
+entry:
+ %1 = load <20 x i16>, <20 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi16_align_32(<20 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load <8 x i16>, <8 x i16>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load <8 x i16>, <8 x i16>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %8 = load i16, i16* %7, align 32
+; CHECK-NEXT: %9 = insertelement <8 x i16> undef, i16 %8, i32 0
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = load i16, i16* %10, align 2
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 1
+; CHECK-NEXT: %13 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %14 = load i16, i16* %13, align 4
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 2
+; CHECK-NEXT: %16 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %17 = load i16, i16* %16, align 2
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi32_align_32(<20 x i32>*) {
+entry:
+ %1 = load <20 x i32>, <20 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi32_align_32(<20 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load <4 x i32>, <4 x i32>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load <4 x i32>, <4 x i32>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load <4 x i32>, <4 x i32>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i32* %13 to <4 x i32>*
+; CHECK-NEXT: %15 = load <4 x i32>, <4 x i32>* %14, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi64_align_32(<20 x i64>*) {
+entry:
+ %1 = load <20 x i64>, <20 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi64_align_32(<20 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load <2 x i64>, <2 x i64>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load <2 x i64>, <2 x i64>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load <2 x i64>, <2 x i64>* %14, align 32
+; CHECK-NEXT: %16 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %17, align 16
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load <2 x i64>, <2 x i64>* %20, align 32
+; CHECK-NEXT: %22 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load <2 x i64>, <2 x i64>* %23, align 16
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: %27 = load <2 x i64>, <2 x i64>* %26, align 32
+; CHECK-NEXT: %28 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast i64* %28 to <2 x i64>*
+; CHECK-NEXT: %30 = load <2 x i64>, <2 x i64>* %29, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xi8ptr_align_32(<20 x i8*>*) {
+entry:
+ %1 = load <20 x i8*>, <20 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xi8ptr_align_32(<20 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load <4 x i8*>, <4 x i8*>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load <4 x i8*>, <4 x i8*>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load <4 x i8*>, <4 x i8*>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i8** %13 to <4 x i8*>*
+; CHECK-NEXT: %15 = load <4 x i8*>, <4 x i8*>* %14, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xfloat_align_32(<20 x float>*) {
+entry:
+ %1 = load <20 x float>, <20 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xfloat_align_32(<20 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load <4 x float>, <4 x float>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load <4 x float>, <4 x float>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load <4 x float>, <4 x float>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast float* %13 to <4 x float>*
+; CHECK-NEXT: %15 = load <4 x float>, <4 x float>* %14, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @load_from_20xdouble_align_32(<20 x double>*) {
+entry:
+ %1 = load <20 x double>, <20 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @load_from_20xdouble_align_32(<20 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load <2 x double>, <2 x double>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load <2 x double>, <2 x double>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load <2 x double>, <2 x double>* %14, align 32
+; CHECK-NEXT: %16 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %17, align 16
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load <2 x double>, <2 x double>* %20, align 32
+; CHECK-NEXT: %22 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load <2 x double>, <2 x double>* %23, align 16
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: %27 = load <2 x double>, <2 x double>* %26, align 32
+; CHECK-NEXT: %28 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast double* %28 to <2 x double>*
+; CHECK-NEXT: %30 = load <2 x double>, <2 x double>* %29, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi8_unaligned(<2 x i8>*) {
+entry:
+ %1 = load volatile <2 x i8>, <2 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi8_unaligned(<2 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi16_unaligned(<2 x i16>*) {
+entry:
+ %1 = load volatile <2 x i16>, <2 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi16_unaligned(<2 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i16, i16* %1
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i16, i16* %4
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi32_unaligned(<2 x i32>*) {
+entry:
+ %1 = load volatile <2 x i32>, <2 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi32_unaligned(<2 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i32, i32* %1
+; CHECK-NEXT: %3 = insertelement <4 x i32> undef, i32 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i32, i32* %4
+; CHECK-NEXT: %6 = insertelement <4 x i32> %3, i32 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi64_unaligned(<2 x i64>*) {
+entry:
+ %1 = load volatile <2 x i64>, <2 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi64_unaligned(<2 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <2 x i64>, <2 x i64>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi8ptr_unaligned(<2 x i8*>*) {
+entry:
+ %1 = load volatile <2 x i8*>, <2 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi8ptr_unaligned(<2 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8*, i8** %1
+; CHECK-NEXT: %3 = insertelement <4 x i8*> undef, i8* %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8*, i8** %4
+; CHECK-NEXT: %6 = insertelement <4 x i8*> %3, i8* %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xfloat_unaligned(<2 x float>*) {
+entry:
+ %1 = load volatile <2 x float>, <2 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xfloat_unaligned(<2 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile float, float* %1
+; CHECK-NEXT: %3 = insertelement <4 x float> undef, float %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile float, float* %4
+; CHECK-NEXT: %6 = insertelement <4 x float> %3, float %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xdouble_unaligned(<2 x double>*) {
+entry:
+ %1 = load volatile <2 x double>, <2 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xdouble_unaligned(<2 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <2 x double>, <2 x double>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi8_unaligned(<4 x i8>*) {
+entry:
+ %1 = load volatile <4 x i8>, <4 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi8_unaligned(<4 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi16_unaligned(<4 x i16>*) {
+entry:
+ %1 = load volatile <4 x i16>, <4 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi16_unaligned(<4 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i16, i16* %1
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i16, i16* %4
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i16, i16* %7
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i16, i16* %10
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi32_unaligned(<4 x i32>*) {
+entry:
+ %1 = load volatile <4 x i32>, <4 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi32_unaligned(<4 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <4 x i32>, <4 x i32>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi64_unaligned(<4 x i64>*) {
+entry:
+ %1 = load volatile <4 x i64>, <4 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi64_unaligned(<4 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2
+; CHECK-NEXT: %4 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi8ptr_unaligned(<4 x i8*>*) {
+entry:
+ %1 = load volatile <4 x i8*>, <4 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi8ptr_unaligned(<4 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <4 x i8*>, <4 x i8*>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xfloat_unaligned(<4 x float>*) {
+entry:
+ %1 = load volatile <4 x float>, <4 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xfloat_unaligned(<4 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <4 x float>, <4 x float>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xdouble_unaligned(<4 x double>*) {
+entry:
+ %1 = load volatile <4 x double>, <4 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xdouble_unaligned(<4 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2
+; CHECK-NEXT: %4 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi8_unaligned(<6 x i8>*) {
+entry:
+ %1 = load volatile <6 x i8>, <6 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi8_unaligned(<6 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i8, i8* %13
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i8, i8* %16
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi16_unaligned(<6 x i16>*) {
+entry:
+ %1 = load volatile <6 x i16>, <6 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi16_unaligned(<6 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i16, i16* %1
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i16, i16* %4
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i16, i16* %7
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i16, i16* %10
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i16, i16* %13
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i16, i16* %16
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi32_unaligned(<6 x i32>*) {
+entry:
+ %1 = load volatile <6 x i32>, <6 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi32_unaligned(<6 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2
+; CHECK-NEXT: %4 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load volatile i32, i32* %4
+; CHECK-NEXT: %6 = insertelement <4 x i32> undef, i32 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load volatile i32, i32* %7
+; CHECK-NEXT: %9 = insertelement <4 x i32> %6, i32 %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi64_unaligned(<6 x i64>*) {
+entry:
+ %1 = load volatile <6 x i64>, <6 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi64_unaligned(<6 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5
+; CHECK-NEXT: %7 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi8ptr_unaligned(<6 x i8*>*) {
+entry:
+ %1 = load volatile <6 x i8*>, <6 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi8ptr_unaligned(<6 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2
+; CHECK-NEXT: %4 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load volatile i8*, i8** %4
+; CHECK-NEXT: %6 = insertelement <4 x i8*> undef, i8* %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load volatile i8*, i8** %7
+; CHECK-NEXT: %9 = insertelement <4 x i8*> %6, i8* %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xfloat_unaligned(<6 x float>*) {
+entry:
+ %1 = load volatile <6 x float>, <6 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xfloat_unaligned(<6 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2
+; CHECK-NEXT: %4 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load volatile float, float* %4
+; CHECK-NEXT: %6 = insertelement <4 x float> undef, float %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load volatile float, float* %7
+; CHECK-NEXT: %9 = insertelement <4 x float> %6, float %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xdouble_unaligned(<6 x double>*) {
+entry:
+ %1 = load volatile <6 x double>, <6 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xdouble_unaligned(<6 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5
+; CHECK-NEXT: %7 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi8_unaligned(<8 x i8>*) {
+entry:
+ %1 = load volatile <8 x i8>, <8 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi8_unaligned(<8 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i8, i8* %13
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i8, i8* %16
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load volatile i8, i8* %19
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load volatile i8, i8* %22
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi16_unaligned(<8 x i16>*) {
+entry:
+ %1 = load volatile <8 x i16>, <8 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi16_unaligned(<8 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <8 x i16>, <8 x i16>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi32_unaligned(<8 x i32>*) {
+entry:
+ %1 = load volatile <8 x i32>, <8 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi32_unaligned(<8 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2
+; CHECK-NEXT: %4 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi64_unaligned(<8 x i64>*) {
+entry:
+ %1 = load volatile <8 x i64>, <8 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi64_unaligned(<8 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2
+; CHECK-NEXT: %4 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8
+; CHECK-NEXT: %10 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi8ptr_unaligned(<8 x i8*>*) {
+entry:
+ %1 = load volatile <8 x i8*>, <8 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi8ptr_unaligned(<8 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2
+; CHECK-NEXT: %4 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xfloat_unaligned(<8 x float>*) {
+entry:
+ %1 = load volatile <8 x float>, <8 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xfloat_unaligned(<8 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2
+; CHECK-NEXT: %4 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xdouble_unaligned(<8 x double>*) {
+entry:
+ %1 = load volatile <8 x double>, <8 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xdouble_unaligned(<8 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2
+; CHECK-NEXT: %4 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8
+; CHECK-NEXT: %10 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi8_unaligned(<12 x i8>*) {
+entry:
+ %1 = load volatile <12 x i8>, <12 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi8_unaligned(<12 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i8, i8* %13
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i8, i8* %16
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load volatile i8, i8* %19
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load volatile i8, i8* %22
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: %25 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %26 = load volatile i8, i8* %25
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 8
+; CHECK-NEXT: %28 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %29 = load volatile i8, i8* %28
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 9
+; CHECK-NEXT: %31 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %32 = load volatile i8, i8* %31
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 10
+; CHECK-NEXT: %34 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %35 = load volatile i8, i8* %34
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi16_unaligned(<12 x i16>*) {
+entry:
+ %1 = load volatile <12 x i16>, <12 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi16_unaligned(<12 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load volatile <8 x i16>, <8 x i16>* %2
+; CHECK-NEXT: %4 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = load volatile i16, i16* %4
+; CHECK-NEXT: %6 = insertelement <8 x i16> undef, i16 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = load volatile i16, i16* %7
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %11 = load volatile i16, i16* %10
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %14 = load volatile i16, i16* %13
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi32_unaligned(<12 x i32>*) {
+entry:
+ %1 = load volatile <12 x i32>, <12 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi32_unaligned(<12 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5
+; CHECK-NEXT: %7 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load volatile <4 x i32>, <4 x i32>* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi64_unaligned(<12 x i64>*) {
+entry:
+ %1 = load volatile <12 x i64>, <12 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi64_unaligned(<12 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2
+; CHECK-NEXT: %4 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8
+; CHECK-NEXT: %10 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load volatile <2 x i64>, <2 x i64>* %14
+; CHECK-NEXT: %16 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load volatile <2 x i64>, <2 x i64>* %17
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi8ptr_unaligned(<12 x i8*>*) {
+entry:
+ %1 = load volatile <12 x i8*>, <12 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi8ptr_unaligned(<12 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5
+; CHECK-NEXT: %7 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load volatile <4 x i8*>, <4 x i8*>* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xfloat_unaligned(<12 x float>*) {
+entry:
+ %1 = load volatile <12 x float>, <12 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xfloat_unaligned(<12 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5
+; CHECK-NEXT: %7 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load volatile <4 x float>, <4 x float>* %8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xdouble_unaligned(<12 x double>*) {
+entry:
+ %1 = load volatile <12 x double>, <12 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xdouble_unaligned(<12 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2
+; CHECK-NEXT: %4 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8
+; CHECK-NEXT: %10 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load volatile <2 x double>, <2 x double>* %14
+; CHECK-NEXT: %16 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load volatile <2 x double>, <2 x double>* %17
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi8_unaligned(<16 x i8>*) {
+entry:
+ %1 = load volatile <16 x i8>, <16 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi8_unaligned(<16 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <16 x i8>, <16 x i8>* %0
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi16_unaligned(<16 x i16>*) {
+entry:
+ %1 = load volatile <16 x i16>, <16 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi16_unaligned(<16 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load volatile <8 x i16>, <8 x i16>* %2
+; CHECK-NEXT: %4 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load volatile <8 x i16>, <8 x i16>* %5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi32_unaligned(<16 x i32>*) {
+entry:
+ %1 = load volatile <16 x i32>, <16 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi32_unaligned(<16 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2
+; CHECK-NEXT: %4 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load volatile <4 x i32>, <4 x i32>* %8
+; CHECK-NEXT: %10 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load volatile <4 x i32>, <4 x i32>* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi64_unaligned(<16 x i64>*) {
+entry:
+ %1 = load volatile <16 x i64>, <16 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi64_unaligned(<16 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2
+; CHECK-NEXT: %4 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5
+; CHECK-NEXT: %7 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8
+; CHECK-NEXT: %10 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load volatile <2 x i64>, <2 x i64>* %14
+; CHECK-NEXT: %16 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load volatile <2 x i64>, <2 x i64>* %17
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load volatile <2 x i64>, <2 x i64>* %20
+; CHECK-NEXT: %22 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load volatile <2 x i64>, <2 x i64>* %23
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi8ptr_unaligned(<16 x i8*>*) {
+entry:
+ %1 = load volatile <16 x i8*>, <16 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi8ptr_unaligned(<16 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2
+; CHECK-NEXT: %4 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load volatile <4 x i8*>, <4 x i8*>* %8
+; CHECK-NEXT: %10 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load volatile <4 x i8*>, <4 x i8*>* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xfloat_unaligned(<16 x float>*) {
+entry:
+ %1 = load volatile <16 x float>, <16 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xfloat_unaligned(<16 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2
+; CHECK-NEXT: %4 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load volatile <4 x float>, <4 x float>* %8
+; CHECK-NEXT: %10 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load volatile <4 x float>, <4 x float>* %11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xdouble_unaligned(<16 x double>*) {
+entry:
+ %1 = load volatile <16 x double>, <16 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xdouble_unaligned(<16 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2
+; CHECK-NEXT: %4 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5
+; CHECK-NEXT: %7 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8
+; CHECK-NEXT: %10 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load volatile <2 x double>, <2 x double>* %14
+; CHECK-NEXT: %16 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load volatile <2 x double>, <2 x double>* %17
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load volatile <2 x double>, <2 x double>* %20
+; CHECK-NEXT: %22 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load volatile <2 x double>, <2 x double>* %23
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi8_unaligned(<20 x i8>*) {
+entry:
+ %1 = load volatile <20 x i8>, <20 x i8>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi8_unaligned(<20 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8* %1 to <16 x i8>*
+; CHECK-NEXT: %3 = load volatile <16 x i8>, <16 x i8>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %5 = load volatile i8, i8* %4
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = load volatile i8, i8* %7
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %11 = load volatile i8, i8* %10
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %14 = load volatile i8, i8* %13
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi16_unaligned(<20 x i16>*) {
+entry:
+ %1 = load volatile <20 x i16>, <20 x i16>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi16_unaligned(<20 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load volatile <8 x i16>, <8 x i16>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load volatile <8 x i16>, <8 x i16>* %5
+; CHECK-NEXT: %7 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %8 = load volatile i16, i16* %7
+; CHECK-NEXT: %9 = insertelement <8 x i16> undef, i16 %8, i32 0
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = load volatile i16, i16* %10
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 1
+; CHECK-NEXT: %13 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %14 = load volatile i16, i16* %13
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 2
+; CHECK-NEXT: %16 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %17 = load volatile i16, i16* %16
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi32_unaligned(<20 x i32>*) {
+entry:
+ %1 = load volatile <20 x i32>, <20 x i32>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi32_unaligned(<20 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5
+; CHECK-NEXT: %7 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load volatile <4 x i32>, <4 x i32>* %8
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load volatile <4 x i32>, <4 x i32>* %11
+; CHECK-NEXT: %13 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i32* %13 to <4 x i32>*
+; CHECK-NEXT: %15 = load volatile <4 x i32>, <4 x i32>* %14
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi64_unaligned(<20 x i64>*) {
+entry:
+ %1 = load volatile <20 x i64>, <20 x i64>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi64_unaligned(<20 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5
+; CHECK-NEXT: %7 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8
+; CHECK-NEXT: %10 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load volatile <2 x i64>, <2 x i64>* %14
+; CHECK-NEXT: %16 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load volatile <2 x i64>, <2 x i64>* %17
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load volatile <2 x i64>, <2 x i64>* %20
+; CHECK-NEXT: %22 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load volatile <2 x i64>, <2 x i64>* %23
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: %27 = load volatile <2 x i64>, <2 x i64>* %26
+; CHECK-NEXT: %28 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast i64* %28 to <2 x i64>*
+; CHECK-NEXT: %30 = load volatile <2 x i64>, <2 x i64>* %29
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi8ptr_unaligned(<20 x i8*>*) {
+entry:
+ %1 = load volatile <20 x i8*>, <20 x i8*>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi8ptr_unaligned(<20 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5
+; CHECK-NEXT: %7 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load volatile <4 x i8*>, <4 x i8*>* %8
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load volatile <4 x i8*>, <4 x i8*>* %11
+; CHECK-NEXT: %13 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i8** %13 to <4 x i8*>*
+; CHECK-NEXT: %15 = load volatile <4 x i8*>, <4 x i8*>* %14
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xfloat_unaligned(<20 x float>*) {
+entry:
+ %1 = load volatile <20 x float>, <20 x float>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xfloat_unaligned(<20 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5
+; CHECK-NEXT: %7 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load volatile <4 x float>, <4 x float>* %8
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load volatile <4 x float>, <4 x float>* %11
+; CHECK-NEXT: %13 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast float* %13 to <4 x float>*
+; CHECK-NEXT: %15 = load volatile <4 x float>, <4 x float>* %14
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xdouble_unaligned(<20 x double>*) {
+entry:
+ %1 = load volatile <20 x double>, <20 x double>* %0
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xdouble_unaligned(<20 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2
+; CHECK-NEXT: %4 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5
+; CHECK-NEXT: %7 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8
+; CHECK-NEXT: %10 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load volatile <2 x double>, <2 x double>* %14
+; CHECK-NEXT: %16 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load volatile <2 x double>, <2 x double>* %17
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load volatile <2 x double>, <2 x double>* %20
+; CHECK-NEXT: %22 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load volatile <2 x double>, <2 x double>* %23
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: %27 = load volatile <2 x double>, <2 x double>* %26
+; CHECK-NEXT: %28 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast double* %28 to <2 x double>*
+; CHECK-NEXT: %30 = load volatile <2 x double>, <2 x double>* %29
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi8_align_1(<2 x i8>*) {
+entry:
+ %1 = load volatile <2 x i8>, <2 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi8_align_1(<2 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi16_align_1(<2 x i16>*) {
+entry:
+ %1 = load volatile <2 x i16>, <2 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi16_align_1(<2 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i16, i16* %1, align 1
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i16, i16* %4, align 1
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi32_align_1(<2 x i32>*) {
+entry:
+ %1 = load volatile <2 x i32>, <2 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi32_align_1(<2 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i32, i32* %1, align 1
+; CHECK-NEXT: %3 = insertelement <4 x i32> undef, i32 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i32, i32* %4, align 1
+; CHECK-NEXT: %6 = insertelement <4 x i32> %3, i32 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi64_align_1(<2 x i64>*) {
+entry:
+ %1 = load volatile <2 x i64>, <2 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi64_align_1(<2 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <2 x i64>, <2 x i64>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi8ptr_align_1(<2 x i8*>*) {
+entry:
+ %1 = load volatile <2 x i8*>, <2 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi8ptr_align_1(<2 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8*, i8** %1, align 1
+; CHECK-NEXT: %3 = insertelement <4 x i8*> undef, i8* %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8*, i8** %4, align 1
+; CHECK-NEXT: %6 = insertelement <4 x i8*> %3, i8* %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xfloat_align_1(<2 x float>*) {
+entry:
+ %1 = load volatile <2 x float>, <2 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xfloat_align_1(<2 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile float, float* %1, align 1
+; CHECK-NEXT: %3 = insertelement <4 x float> undef, float %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile float, float* %4, align 1
+; CHECK-NEXT: %6 = insertelement <4 x float> %3, float %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xdouble_align_1(<2 x double>*) {
+entry:
+ %1 = load volatile <2 x double>, <2 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xdouble_align_1(<2 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <2 x double>, <2 x double>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi8_align_1(<4 x i8>*) {
+entry:
+ %1 = load volatile <4 x i8>, <4 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi8_align_1(<4 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi16_align_1(<4 x i16>*) {
+entry:
+ %1 = load volatile <4 x i16>, <4 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi16_align_1(<4 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i16, i16* %1, align 1
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i16, i16* %4, align 1
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i16, i16* %7, align 1
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i16, i16* %10, align 1
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi32_align_1(<4 x i32>*) {
+entry:
+ %1 = load volatile <4 x i32>, <4 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi32_align_1(<4 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <4 x i32>, <4 x i32>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi64_align_1(<4 x i64>*) {
+entry:
+ %1 = load volatile <4 x i64>, <4 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi64_align_1(<4 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi8ptr_align_1(<4 x i8*>*) {
+entry:
+ %1 = load volatile <4 x i8*>, <4 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi8ptr_align_1(<4 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <4 x i8*>, <4 x i8*>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xfloat_align_1(<4 x float>*) {
+entry:
+ %1 = load volatile <4 x float>, <4 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xfloat_align_1(<4 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <4 x float>, <4 x float>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xdouble_align_1(<4 x double>*) {
+entry:
+ %1 = load volatile <4 x double>, <4 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xdouble_align_1(<4 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi8_align_1(<6 x i8>*) {
+entry:
+ %1 = load volatile <6 x i8>, <6 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi8_align_1(<6 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i8, i8* %13, align 1
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi16_align_1(<6 x i16>*) {
+entry:
+ %1 = load volatile <6 x i16>, <6 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi16_align_1(<6 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i16, i16* %1, align 1
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i16, i16* %4, align 1
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i16, i16* %7, align 1
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i16, i16* %10, align 1
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i16, i16* %13, align 1
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i16, i16* %16, align 1
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi32_align_1(<6 x i32>*) {
+entry:
+ %1 = load volatile <6 x i32>, <6 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi32_align_1(<6 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load volatile i32, i32* %4, align 1
+; CHECK-NEXT: %6 = insertelement <4 x i32> undef, i32 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load volatile i32, i32* %7, align 1
+; CHECK-NEXT: %9 = insertelement <4 x i32> %6, i32 %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi64_align_1(<6 x i64>*) {
+entry:
+ %1 = load volatile <6 x i64>, <6 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi64_align_1(<6 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi8ptr_align_1(<6 x i8*>*) {
+entry:
+ %1 = load volatile <6 x i8*>, <6 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi8ptr_align_1(<6 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load volatile i8*, i8** %4, align 1
+; CHECK-NEXT: %6 = insertelement <4 x i8*> undef, i8* %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load volatile i8*, i8** %7, align 1
+; CHECK-NEXT: %9 = insertelement <4 x i8*> %6, i8* %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xfloat_align_1(<6 x float>*) {
+entry:
+ %1 = load volatile <6 x float>, <6 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xfloat_align_1(<6 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load volatile float, float* %4, align 1
+; CHECK-NEXT: %6 = insertelement <4 x float> undef, float %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load volatile float, float* %7, align 1
+; CHECK-NEXT: %9 = insertelement <4 x float> %6, float %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xdouble_align_1(<6 x double>*) {
+entry:
+ %1 = load volatile <6 x double>, <6 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xdouble_align_1(<6 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi8_align_1(<8 x i8>*) {
+entry:
+ %1 = load volatile <8 x i8>, <8 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi8_align_1(<8 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i8, i8* %13, align 1
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load volatile i8, i8* %19, align 1
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load volatile i8, i8* %22, align 1
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi16_align_1(<8 x i16>*) {
+entry:
+ %1 = load volatile <8 x i16>, <8 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi16_align_1(<8 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <8 x i16>, <8 x i16>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi32_align_1(<8 x i32>*) {
+entry:
+ %1 = load volatile <8 x i32>, <8 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi32_align_1(<8 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi64_align_1(<8 x i64>*) {
+entry:
+ %1 = load volatile <8 x i64>, <8 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi64_align_1(<8 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi8ptr_align_1(<8 x i8*>*) {
+entry:
+ %1 = load volatile <8 x i8*>, <8 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi8ptr_align_1(<8 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xfloat_align_1(<8 x float>*) {
+entry:
+ %1 = load volatile <8 x float>, <8 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xfloat_align_1(<8 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xdouble_align_1(<8 x double>*) {
+entry:
+ %1 = load volatile <8 x double>, <8 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xdouble_align_1(<8 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi8_align_1(<12 x i8>*) {
+entry:
+ %1 = load volatile <12 x i8>, <12 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi8_align_1(<12 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 1
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i8, i8* %13, align 1
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load volatile i8, i8* %19, align 1
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load volatile i8, i8* %22, align 1
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: %25 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %26 = load volatile i8, i8* %25, align 1
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 8
+; CHECK-NEXT: %28 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %29 = load volatile i8, i8* %28, align 1
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 9
+; CHECK-NEXT: %31 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %32 = load volatile i8, i8* %31, align 1
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 10
+; CHECK-NEXT: %34 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %35 = load volatile i8, i8* %34, align 1
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi16_align_1(<12 x i16>*) {
+entry:
+ %1 = load volatile <12 x i16>, <12 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi16_align_1(<12 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load volatile <8 x i16>, <8 x i16>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = load volatile i16, i16* %4, align 1
+; CHECK-NEXT: %6 = insertelement <8 x i16> undef, i16 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = load volatile i16, i16* %7, align 1
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %11 = load volatile i16, i16* %10, align 1
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %14 = load volatile i16, i16* %13, align 1
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi32_align_1(<12 x i32>*) {
+entry:
+ %1 = load volatile <12 x i32>, <12 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi32_align_1(<12 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load volatile <4 x i32>, <4 x i32>* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi64_align_1(<12 x i64>*) {
+entry:
+ %1 = load volatile <12 x i64>, <12 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi64_align_1(<12 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load volatile <2 x i64>, <2 x i64>* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load volatile <2 x i64>, <2 x i64>* %17, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi8ptr_align_1(<12 x i8*>*) {
+entry:
+ %1 = load volatile <12 x i8*>, <12 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi8ptr_align_1(<12 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load volatile <4 x i8*>, <4 x i8*>* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xfloat_align_1(<12 x float>*) {
+entry:
+ %1 = load volatile <12 x float>, <12 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xfloat_align_1(<12 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load volatile <4 x float>, <4 x float>* %8, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xdouble_align_1(<12 x double>*) {
+entry:
+ %1 = load volatile <12 x double>, <12 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xdouble_align_1(<12 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load volatile <2 x double>, <2 x double>* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load volatile <2 x double>, <2 x double>* %17, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi8_align_1(<16 x i8>*) {
+entry:
+ %1 = load volatile <16 x i8>, <16 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi8_align_1(<16 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <16 x i8>, <16 x i8>* %0, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi16_align_1(<16 x i16>*) {
+entry:
+ %1 = load volatile <16 x i16>, <16 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi16_align_1(<16 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load volatile <8 x i16>, <8 x i16>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load volatile <8 x i16>, <8 x i16>* %5, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi32_align_1(<16 x i32>*) {
+entry:
+ %1 = load volatile <16 x i32>, <16 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi32_align_1(<16 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load volatile <4 x i32>, <4 x i32>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load volatile <4 x i32>, <4 x i32>* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi64_align_1(<16 x i64>*) {
+entry:
+ %1 = load volatile <16 x i64>, <16 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi64_align_1(<16 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load volatile <2 x i64>, <2 x i64>* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load volatile <2 x i64>, <2 x i64>* %17, align 1
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load volatile <2 x i64>, <2 x i64>* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load volatile <2 x i64>, <2 x i64>* %23, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi8ptr_align_1(<16 x i8*>*) {
+entry:
+ %1 = load volatile <16 x i8*>, <16 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi8ptr_align_1(<16 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load volatile <4 x i8*>, <4 x i8*>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load volatile <4 x i8*>, <4 x i8*>* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xfloat_align_1(<16 x float>*) {
+entry:
+ %1 = load volatile <16 x float>, <16 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xfloat_align_1(<16 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load volatile <4 x float>, <4 x float>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load volatile <4 x float>, <4 x float>* %11, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xdouble_align_1(<16 x double>*) {
+entry:
+ %1 = load volatile <16 x double>, <16 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xdouble_align_1(<16 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load volatile <2 x double>, <2 x double>* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load volatile <2 x double>, <2 x double>* %17, align 1
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load volatile <2 x double>, <2 x double>* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load volatile <2 x double>, <2 x double>* %23, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi8_align_1(<20 x i8>*) {
+entry:
+ %1 = load volatile <20 x i8>, <20 x i8>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi8_align_1(<20 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8* %1 to <16 x i8>*
+; CHECK-NEXT: %3 = load volatile <16 x i8>, <16 x i8>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %14 = load volatile i8, i8* %13, align 1
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi16_align_1(<20 x i16>*) {
+entry:
+ %1 = load volatile <20 x i16>, <20 x i16>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi16_align_1(<20 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load volatile <8 x i16>, <8 x i16>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load volatile <8 x i16>, <8 x i16>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %8 = load volatile i16, i16* %7, align 1
+; CHECK-NEXT: %9 = insertelement <8 x i16> undef, i16 %8, i32 0
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = load volatile i16, i16* %10, align 1
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 1
+; CHECK-NEXT: %13 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %14 = load volatile i16, i16* %13, align 1
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 2
+; CHECK-NEXT: %16 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %17 = load volatile i16, i16* %16, align 1
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi32_align_1(<20 x i32>*) {
+entry:
+ %1 = load volatile <20 x i32>, <20 x i32>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi32_align_1(<20 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load volatile <4 x i32>, <4 x i32>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load volatile <4 x i32>, <4 x i32>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i32* %13 to <4 x i32>*
+; CHECK-NEXT: %15 = load volatile <4 x i32>, <4 x i32>* %14, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi64_align_1(<20 x i64>*) {
+entry:
+ %1 = load volatile <20 x i64>, <20 x i64>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi64_align_1(<20 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load volatile <2 x i64>, <2 x i64>* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load volatile <2 x i64>, <2 x i64>* %17, align 1
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load volatile <2 x i64>, <2 x i64>* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load volatile <2 x i64>, <2 x i64>* %23, align 1
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: %27 = load volatile <2 x i64>, <2 x i64>* %26, align 1
+; CHECK-NEXT: %28 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast i64* %28 to <2 x i64>*
+; CHECK-NEXT: %30 = load volatile <2 x i64>, <2 x i64>* %29, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi8ptr_align_1(<20 x i8*>*) {
+entry:
+ %1 = load volatile <20 x i8*>, <20 x i8*>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi8ptr_align_1(<20 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load volatile <4 x i8*>, <4 x i8*>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load volatile <4 x i8*>, <4 x i8*>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i8** %13 to <4 x i8*>*
+; CHECK-NEXT: %15 = load volatile <4 x i8*>, <4 x i8*>* %14, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xfloat_align_1(<20 x float>*) {
+entry:
+ %1 = load volatile <20 x float>, <20 x float>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xfloat_align_1(<20 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load volatile <4 x float>, <4 x float>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load volatile <4 x float>, <4 x float>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast float* %13 to <4 x float>*
+; CHECK-NEXT: %15 = load volatile <4 x float>, <4 x float>* %14, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xdouble_align_1(<20 x double>*) {
+entry:
+ %1 = load volatile <20 x double>, <20 x double>* %0, align 1
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xdouble_align_1(<20 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 1
+; CHECK-NEXT: %4 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 1
+; CHECK-NEXT: %7 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 1
+; CHECK-NEXT: %10 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11, align 1
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load volatile <2 x double>, <2 x double>* %14, align 1
+; CHECK-NEXT: %16 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load volatile <2 x double>, <2 x double>* %17, align 1
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load volatile <2 x double>, <2 x double>* %20, align 1
+; CHECK-NEXT: %22 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load volatile <2 x double>, <2 x double>* %23, align 1
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: %27 = load volatile <2 x double>, <2 x double>* %26, align 1
+; CHECK-NEXT: %28 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast double* %28 to <2 x double>*
+; CHECK-NEXT: %30 = load volatile <2 x double>, <2 x double>* %29, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi8_align_8(<2 x i8>*) {
+entry:
+ %1 = load volatile <2 x i8>, <2 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi8_align_8(<2 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 8
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi16_align_8(<2 x i16>*) {
+entry:
+ %1 = load volatile <2 x i16>, <2 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi16_align_8(<2 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i16, i16* %1, align 8
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i16, i16* %4, align 2
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi32_align_8(<2 x i32>*) {
+entry:
+ %1 = load volatile <2 x i32>, <2 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi32_align_8(<2 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i32, i32* %1, align 8
+; CHECK-NEXT: %3 = insertelement <4 x i32> undef, i32 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i32, i32* %4, align 4
+; CHECK-NEXT: %6 = insertelement <4 x i32> %3, i32 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi64_align_8(<2 x i64>*) {
+entry:
+ %1 = load volatile <2 x i64>, <2 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi64_align_8(<2 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <2 x i64>, <2 x i64>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi8ptr_align_8(<2 x i8*>*) {
+entry:
+ %1 = load volatile <2 x i8*>, <2 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi8ptr_align_8(<2 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8*, i8** %1, align 8
+; CHECK-NEXT: %3 = insertelement <4 x i8*> undef, i8* %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8*, i8** %4, align 4
+; CHECK-NEXT: %6 = insertelement <4 x i8*> %3, i8* %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xfloat_align_8(<2 x float>*) {
+entry:
+ %1 = load volatile <2 x float>, <2 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xfloat_align_8(<2 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile float, float* %1, align 8
+; CHECK-NEXT: %3 = insertelement <4 x float> undef, float %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile float, float* %4, align 4
+; CHECK-NEXT: %6 = insertelement <4 x float> %3, float %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xdouble_align_8(<2 x double>*) {
+entry:
+ %1 = load volatile <2 x double>, <2 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xdouble_align_8(<2 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <2 x double>, <2 x double>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi8_align_8(<4 x i8>*) {
+entry:
+ %1 = load volatile <4 x i8>, <4 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi8_align_8(<4 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 8
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi16_align_8(<4 x i16>*) {
+entry:
+ %1 = load volatile <4 x i16>, <4 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi16_align_8(<4 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i16, i16* %1, align 8
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i16, i16* %4, align 2
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i16, i16* %7, align 4
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i16, i16* %10, align 2
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi32_align_8(<4 x i32>*) {
+entry:
+ %1 = load volatile <4 x i32>, <4 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi32_align_8(<4 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <4 x i32>, <4 x i32>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi64_align_8(<4 x i64>*) {
+entry:
+ %1 = load volatile <4 x i64>, <4 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi64_align_8(<4 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi8ptr_align_8(<4 x i8*>*) {
+entry:
+ %1 = load volatile <4 x i8*>, <4 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi8ptr_align_8(<4 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <4 x i8*>, <4 x i8*>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xfloat_align_8(<4 x float>*) {
+entry:
+ %1 = load volatile <4 x float>, <4 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xfloat_align_8(<4 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <4 x float>, <4 x float>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xdouble_align_8(<4 x double>*) {
+entry:
+ %1 = load volatile <4 x double>, <4 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xdouble_align_8(<4 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi8_align_8(<6 x i8>*) {
+entry:
+ %1 = load volatile <6 x i8>, <6 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi8_align_8(<6 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 8
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i8, i8* %13, align 4
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi16_align_8(<6 x i16>*) {
+entry:
+ %1 = load volatile <6 x i16>, <6 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi16_align_8(<6 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i16, i16* %1, align 8
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i16, i16* %4, align 2
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i16, i16* %7, align 4
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i16, i16* %10, align 2
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i16, i16* %13, align 8
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i16, i16* %16, align 2
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi32_align_8(<6 x i32>*) {
+entry:
+ %1 = load volatile <6 x i32>, <6 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi32_align_8(<6 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load volatile i32, i32* %4, align 8
+; CHECK-NEXT: %6 = insertelement <4 x i32> undef, i32 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load volatile i32, i32* %7, align 4
+; CHECK-NEXT: %9 = insertelement <4 x i32> %6, i32 %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi64_align_8(<6 x i64>*) {
+entry:
+ %1 = load volatile <6 x i64>, <6 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi64_align_8(<6 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi8ptr_align_8(<6 x i8*>*) {
+entry:
+ %1 = load volatile <6 x i8*>, <6 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi8ptr_align_8(<6 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load volatile i8*, i8** %4, align 8
+; CHECK-NEXT: %6 = insertelement <4 x i8*> undef, i8* %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load volatile i8*, i8** %7, align 4
+; CHECK-NEXT: %9 = insertelement <4 x i8*> %6, i8* %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xfloat_align_8(<6 x float>*) {
+entry:
+ %1 = load volatile <6 x float>, <6 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xfloat_align_8(<6 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load volatile float, float* %4, align 8
+; CHECK-NEXT: %6 = insertelement <4 x float> undef, float %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load volatile float, float* %7, align 4
+; CHECK-NEXT: %9 = insertelement <4 x float> %6, float %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xdouble_align_8(<6 x double>*) {
+entry:
+ %1 = load volatile <6 x double>, <6 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xdouble_align_8(<6 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi8_align_8(<8 x i8>*) {
+entry:
+ %1 = load volatile <8 x i8>, <8 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi8_align_8(<8 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 8
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i8, i8* %13, align 4
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load volatile i8, i8* %19, align 2
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load volatile i8, i8* %22, align 1
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi16_align_8(<8 x i16>*) {
+entry:
+ %1 = load volatile <8 x i16>, <8 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi16_align_8(<8 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <8 x i16>, <8 x i16>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi32_align_8(<8 x i32>*) {
+entry:
+ %1 = load volatile <8 x i32>, <8 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi32_align_8(<8 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi64_align_8(<8 x i64>*) {
+entry:
+ %1 = load volatile <8 x i64>, <8 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi64_align_8(<8 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi8ptr_align_8(<8 x i8*>*) {
+entry:
+ %1 = load volatile <8 x i8*>, <8 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi8ptr_align_8(<8 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xfloat_align_8(<8 x float>*) {
+entry:
+ %1 = load volatile <8 x float>, <8 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xfloat_align_8(<8 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xdouble_align_8(<8 x double>*) {
+entry:
+ %1 = load volatile <8 x double>, <8 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xdouble_align_8(<8 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi8_align_8(<12 x i8>*) {
+entry:
+ %1 = load volatile <12 x i8>, <12 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi8_align_8(<12 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 8
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i8, i8* %13, align 4
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load volatile i8, i8* %19, align 2
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load volatile i8, i8* %22, align 1
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: %25 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %26 = load volatile i8, i8* %25, align 8
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 8
+; CHECK-NEXT: %28 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %29 = load volatile i8, i8* %28, align 1
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 9
+; CHECK-NEXT: %31 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %32 = load volatile i8, i8* %31, align 2
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 10
+; CHECK-NEXT: %34 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %35 = load volatile i8, i8* %34, align 1
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi16_align_8(<12 x i16>*) {
+entry:
+ %1 = load volatile <12 x i16>, <12 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi16_align_8(<12 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load volatile <8 x i16>, <8 x i16>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = load volatile i16, i16* %4, align 8
+; CHECK-NEXT: %6 = insertelement <8 x i16> undef, i16 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = load volatile i16, i16* %7, align 2
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %11 = load volatile i16, i16* %10, align 4
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %14 = load volatile i16, i16* %13, align 2
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi32_align_8(<12 x i32>*) {
+entry:
+ %1 = load volatile <12 x i32>, <12 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi32_align_8(<12 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load volatile <4 x i32>, <4 x i32>* %8, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi64_align_8(<12 x i64>*) {
+entry:
+ %1 = load volatile <12 x i64>, <12 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi64_align_8(<12 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load volatile <2 x i64>, <2 x i64>* %14, align 8
+; CHECK-NEXT: %16 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load volatile <2 x i64>, <2 x i64>* %17, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi8ptr_align_8(<12 x i8*>*) {
+entry:
+ %1 = load volatile <12 x i8*>, <12 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi8ptr_align_8(<12 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load volatile <4 x i8*>, <4 x i8*>* %8, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xfloat_align_8(<12 x float>*) {
+entry:
+ %1 = load volatile <12 x float>, <12 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xfloat_align_8(<12 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load volatile <4 x float>, <4 x float>* %8, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xdouble_align_8(<12 x double>*) {
+entry:
+ %1 = load volatile <12 x double>, <12 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xdouble_align_8(<12 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load volatile <2 x double>, <2 x double>* %14, align 8
+; CHECK-NEXT: %16 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load volatile <2 x double>, <2 x double>* %17, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi8_align_8(<16 x i8>*) {
+entry:
+ %1 = load volatile <16 x i8>, <16 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi8_align_8(<16 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <16 x i8>, <16 x i8>* %0, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi16_align_8(<16 x i16>*) {
+entry:
+ %1 = load volatile <16 x i16>, <16 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi16_align_8(<16 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load volatile <8 x i16>, <8 x i16>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load volatile <8 x i16>, <8 x i16>* %5, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi32_align_8(<16 x i32>*) {
+entry:
+ %1 = load volatile <16 x i32>, <16 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi32_align_8(<16 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load volatile <4 x i32>, <4 x i32>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load volatile <4 x i32>, <4 x i32>* %11, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi64_align_8(<16 x i64>*) {
+entry:
+ %1 = load volatile <16 x i64>, <16 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi64_align_8(<16 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load volatile <2 x i64>, <2 x i64>* %14, align 8
+; CHECK-NEXT: %16 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load volatile <2 x i64>, <2 x i64>* %17, align 8
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load volatile <2 x i64>, <2 x i64>* %20, align 8
+; CHECK-NEXT: %22 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load volatile <2 x i64>, <2 x i64>* %23, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi8ptr_align_8(<16 x i8*>*) {
+entry:
+ %1 = load volatile <16 x i8*>, <16 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi8ptr_align_8(<16 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load volatile <4 x i8*>, <4 x i8*>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load volatile <4 x i8*>, <4 x i8*>* %11, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xfloat_align_8(<16 x float>*) {
+entry:
+ %1 = load volatile <16 x float>, <16 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xfloat_align_8(<16 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load volatile <4 x float>, <4 x float>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load volatile <4 x float>, <4 x float>* %11, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xdouble_align_8(<16 x double>*) {
+entry:
+ %1 = load volatile <16 x double>, <16 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xdouble_align_8(<16 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load volatile <2 x double>, <2 x double>* %14, align 8
+; CHECK-NEXT: %16 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load volatile <2 x double>, <2 x double>* %17, align 8
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load volatile <2 x double>, <2 x double>* %20, align 8
+; CHECK-NEXT: %22 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load volatile <2 x double>, <2 x double>* %23, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi8_align_8(<20 x i8>*) {
+entry:
+ %1 = load volatile <20 x i8>, <20 x i8>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi8_align_8(<20 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8* %1 to <16 x i8>*
+; CHECK-NEXT: %3 = load volatile <16 x i8>, <16 x i8>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 8
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 2
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %14 = load volatile i8, i8* %13, align 1
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi16_align_8(<20 x i16>*) {
+entry:
+ %1 = load volatile <20 x i16>, <20 x i16>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi16_align_8(<20 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load volatile <8 x i16>, <8 x i16>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load volatile <8 x i16>, <8 x i16>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %8 = load volatile i16, i16* %7, align 8
+; CHECK-NEXT: %9 = insertelement <8 x i16> undef, i16 %8, i32 0
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = load volatile i16, i16* %10, align 2
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 1
+; CHECK-NEXT: %13 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %14 = load volatile i16, i16* %13, align 4
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 2
+; CHECK-NEXT: %16 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %17 = load volatile i16, i16* %16, align 2
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi32_align_8(<20 x i32>*) {
+entry:
+ %1 = load volatile <20 x i32>, <20 x i32>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi32_align_8(<20 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load volatile <4 x i32>, <4 x i32>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load volatile <4 x i32>, <4 x i32>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i32* %13 to <4 x i32>*
+; CHECK-NEXT: %15 = load volatile <4 x i32>, <4 x i32>* %14, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi64_align_8(<20 x i64>*) {
+entry:
+ %1 = load volatile <20 x i64>, <20 x i64>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi64_align_8(<20 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load volatile <2 x i64>, <2 x i64>* %14, align 8
+; CHECK-NEXT: %16 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load volatile <2 x i64>, <2 x i64>* %17, align 8
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load volatile <2 x i64>, <2 x i64>* %20, align 8
+; CHECK-NEXT: %22 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load volatile <2 x i64>, <2 x i64>* %23, align 8
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: %27 = load volatile <2 x i64>, <2 x i64>* %26, align 8
+; CHECK-NEXT: %28 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast i64* %28 to <2 x i64>*
+; CHECK-NEXT: %30 = load volatile <2 x i64>, <2 x i64>* %29, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi8ptr_align_8(<20 x i8*>*) {
+entry:
+ %1 = load volatile <20 x i8*>, <20 x i8*>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi8ptr_align_8(<20 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load volatile <4 x i8*>, <4 x i8*>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load volatile <4 x i8*>, <4 x i8*>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i8** %13 to <4 x i8*>*
+; CHECK-NEXT: %15 = load volatile <4 x i8*>, <4 x i8*>* %14, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xfloat_align_8(<20 x float>*) {
+entry:
+ %1 = load volatile <20 x float>, <20 x float>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xfloat_align_8(<20 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load volatile <4 x float>, <4 x float>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load volatile <4 x float>, <4 x float>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast float* %13 to <4 x float>*
+; CHECK-NEXT: %15 = load volatile <4 x float>, <4 x float>* %14, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xdouble_align_8(<20 x double>*) {
+entry:
+ %1 = load volatile <20 x double>, <20 x double>* %0, align 8
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xdouble_align_8(<20 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 8
+; CHECK-NEXT: %4 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 8
+; CHECK-NEXT: %7 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 8
+; CHECK-NEXT: %10 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11, align 8
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load volatile <2 x double>, <2 x double>* %14, align 8
+; CHECK-NEXT: %16 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load volatile <2 x double>, <2 x double>* %17, align 8
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load volatile <2 x double>, <2 x double>* %20, align 8
+; CHECK-NEXT: %22 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load volatile <2 x double>, <2 x double>* %23, align 8
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: %27 = load volatile <2 x double>, <2 x double>* %26, align 8
+; CHECK-NEXT: %28 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast double* %28 to <2 x double>*
+; CHECK-NEXT: %30 = load volatile <2 x double>, <2 x double>* %29, align 8
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi8_align_32(<2 x i8>*) {
+entry:
+ %1 = load volatile <2 x i8>, <2 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi8_align_32(<2 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 32
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8>, <2 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi16_align_32(<2 x i16>*) {
+entry:
+ %1 = load volatile <2 x i16>, <2 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi16_align_32(<2 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i16, i16* %1, align 32
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i16>, <2 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i16, i16* %4, align 2
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi32_align_32(<2 x i32>*) {
+entry:
+ %1 = load volatile <2 x i32>, <2 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi32_align_32(<2 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i32, i32* %1, align 32
+; CHECK-NEXT: %3 = insertelement <4 x i32> undef, i32 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i32>, <2 x i32>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i32, i32* %4, align 4
+; CHECK-NEXT: %6 = insertelement <4 x i32> %3, i32 %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi64_align_32(<2 x i64>*) {
+entry:
+ %1 = load volatile <2 x i64>, <2 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi64_align_32(<2 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <2 x i64>, <2 x i64>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xi8ptr_align_32(<2 x i8*>*) {
+entry:
+ %1 = load volatile <2 x i8*>, <2 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xi8ptr_align_32(<2 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8*, i8** %1, align 32
+; CHECK-NEXT: %3 = insertelement <4 x i8*> undef, i8* %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x i8*>, <2 x i8*>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8*, i8** %4, align 4
+; CHECK-NEXT: %6 = insertelement <4 x i8*> %3, i8* %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xfloat_align_32(<2 x float>*) {
+entry:
+ %1 = load volatile <2 x float>, <2 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xfloat_align_32(<2 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile float, float* %1, align 32
+; CHECK-NEXT: %3 = insertelement <4 x float> undef, float %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <2 x float>, <2 x float>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile float, float* %4, align 4
+; CHECK-NEXT: %6 = insertelement <4 x float> %3, float %5, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_2xdouble_align_32(<2 x double>*) {
+entry:
+ %1 = load volatile <2 x double>, <2 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_2xdouble_align_32(<2 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <2 x double>, <2 x double>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi8_align_32(<4 x i8>*) {
+entry:
+ %1 = load volatile <4 x i8>, <4 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi8_align_32(<4 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 32
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i8>, <4 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi16_align_32(<4 x i16>*) {
+entry:
+ %1 = load volatile <4 x i16>, <4 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi16_align_32(<4 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i16, i16* %1, align 32
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i16, i16* %4, align 2
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i16, i16* %7, align 4
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <4 x i16>, <4 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i16, i16* %10, align 2
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi32_align_32(<4 x i32>*) {
+entry:
+ %1 = load volatile <4 x i32>, <4 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi32_align_32(<4 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <4 x i32>, <4 x i32>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi64_align_32(<4 x i64>*) {
+entry:
+ %1 = load volatile <4 x i64>, <4 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi64_align_32(<4 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <4 x i64>, <4 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xi8ptr_align_32(<4 x i8*>*) {
+entry:
+ %1 = load volatile <4 x i8*>, <4 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xi8ptr_align_32(<4 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <4 x i8*>, <4 x i8*>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xfloat_align_32(<4 x float>*) {
+entry:
+ %1 = load volatile <4 x float>, <4 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xfloat_align_32(<4 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <4 x float>, <4 x float>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_4xdouble_align_32(<4 x double>*) {
+entry:
+ %1 = load volatile <4 x double>, <4 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_4xdouble_align_32(<4 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <4 x double>, <4 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi8_align_32(<6 x i8>*) {
+entry:
+ %1 = load volatile <6 x i8>, <6 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi8_align_32(<6 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 32
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i8, i8* %13, align 4
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i8>, <6 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi16_align_32(<6 x i16>*) {
+entry:
+ %1 = load volatile <6 x i16>, <6 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi16_align_32(<6 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i16, i16* %1, align 32
+; CHECK-NEXT: %3 = insertelement <8 x i16> undef, i16 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i16, i16* %4, align 2
+; CHECK-NEXT: %6 = insertelement <8 x i16> %3, i16 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i16, i16* %7, align 4
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i16, i16* %10, align 2
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i16, i16* %13, align 8
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <6 x i16>, <6 x i16>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i16, i16* %16, align 2
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 5
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi32_align_32(<6 x i32>*) {
+entry:
+ %1 = load volatile <6 x i32>, <6 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi32_align_32(<6 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load volatile i32, i32* %4, align 16
+; CHECK-NEXT: %6 = insertelement <4 x i32> undef, i32 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i32>, <6 x i32>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load volatile i32, i32* %7, align 4
+; CHECK-NEXT: %9 = insertelement <4 x i32> %6, i32 %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi64_align_32(<6 x i64>*) {
+entry:
+ %1 = load volatile <6 x i64>, <6 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi64_align_32(<6 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <6 x i64>, <6 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xi8ptr_align_32(<6 x i8*>*) {
+entry:
+ %1 = load volatile <6 x i8*>, <6 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xi8ptr_align_32(<6 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load volatile i8*, i8** %4, align 16
+; CHECK-NEXT: %6 = insertelement <4 x i8*> undef, i8* %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x i8*>, <6 x i8*>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load volatile i8*, i8** %7, align 4
+; CHECK-NEXT: %9 = insertelement <4 x i8*> %6, i8* %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xfloat_align_32(<6 x float>*) {
+entry:
+ %1 = load volatile <6 x float>, <6 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xfloat_align_32(<6 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = load volatile float, float* %4, align 16
+; CHECK-NEXT: %6 = insertelement <4 x float> undef, float %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <6 x float>, <6 x float>* %0, i32 0, i32 5
+; CHECK-NEXT: %8 = load volatile float, float* %7, align 4
+; CHECK-NEXT: %9 = insertelement <4 x float> %6, float %8, i32 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_6xdouble_align_32(<6 x double>*) {
+entry:
+ %1 = load volatile <6 x double>, <6 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_6xdouble_align_32(<6 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <6 x double>, <6 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi8_align_32(<8 x i8>*) {
+entry:
+ %1 = load volatile <8 x i8>, <8 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi8_align_32(<8 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 32
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i8, i8* %13, align 4
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load volatile i8, i8* %19, align 2
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <8 x i8>, <8 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load volatile i8, i8* %22, align 1
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi16_align_32(<8 x i16>*) {
+entry:
+ %1 = load volatile <8 x i16>, <8 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi16_align_32(<8 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <8 x i16>, <8 x i16>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi32_align_32(<8 x i32>*) {
+entry:
+ %1 = load volatile <8 x i32>, <8 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi32_align_32(<8 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <8 x i32>, <8 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi64_align_32(<8 x i64>*) {
+entry:
+ %1 = load volatile <8 x i64>, <8 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi64_align_32(<8 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <8 x i64>, <8 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xi8ptr_align_32(<8 x i8*>*) {
+entry:
+ %1 = load volatile <8 x i8*>, <8 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xi8ptr_align_32(<8 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <8 x i8*>, <8 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xfloat_align_32(<8 x float>*) {
+entry:
+ %1 = load volatile <8 x float>, <8 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xfloat_align_32(<8 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <8 x float>, <8 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_8xdouble_align_32(<8 x double>*) {
+entry:
+ %1 = load volatile <8 x double>, <8 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_8xdouble_align_32(<8 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <8 x double>, <8 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi8_align_32(<12 x i8>*) {
+entry:
+ %1 = load volatile <12 x i8>, <12 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi8_align_32(<12 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = load volatile i8, i8* %1, align 32
+; CHECK-NEXT: %3 = insertelement <16 x i8> undef, i8 %2, i32 0
+; CHECK-NEXT: %4 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 1
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 1
+; CHECK-NEXT: %6 = insertelement <16 x i8> %3, i8 %5, i32 1
+; CHECK-NEXT: %7 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 2
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 2
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 2
+; CHECK-NEXT: %10 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 3
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 1
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 3
+; CHECK-NEXT: %13 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 4
+; CHECK-NEXT: %14 = load volatile i8, i8* %13, align 4
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 4
+; CHECK-NEXT: %16 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 5
+; CHECK-NEXT: %17 = load volatile i8, i8* %16, align 1
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 5
+; CHECK-NEXT: %19 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 6
+; CHECK-NEXT: %20 = load volatile i8, i8* %19, align 2
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 6
+; CHECK-NEXT: %22 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 7
+; CHECK-NEXT: %23 = load volatile i8, i8* %22, align 1
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 7
+; CHECK-NEXT: %25 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 8
+; CHECK-NEXT: %26 = load volatile i8, i8* %25, align 8
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 8
+; CHECK-NEXT: %28 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 9
+; CHECK-NEXT: %29 = load volatile i8, i8* %28, align 1
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 9
+; CHECK-NEXT: %31 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 10
+; CHECK-NEXT: %32 = load volatile i8, i8* %31, align 2
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 10
+; CHECK-NEXT: %34 = getelementptr <12 x i8>, <12 x i8>* %0, i32 0, i32 11
+; CHECK-NEXT: %35 = load volatile i8, i8* %34, align 1
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 11
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi16_align_32(<12 x i16>*) {
+entry:
+ %1 = load volatile <12 x i16>, <12 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi16_align_32(<12 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load volatile <8 x i16>, <8 x i16>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = load volatile i16, i16* %4, align 16
+; CHECK-NEXT: %6 = insertelement <8 x i16> undef, i16 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 9
+; CHECK-NEXT: %8 = load volatile i16, i16* %7, align 2
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 10
+; CHECK-NEXT: %11 = load volatile i16, i16* %10, align 4
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <12 x i16>, <12 x i16>* %0, i32 0, i32 11
+; CHECK-NEXT: %14 = load volatile i16, i16* %13, align 2
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi32_align_32(<12 x i32>*) {
+entry:
+ %1 = load volatile <12 x i32>, <12 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi32_align_32(<12 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <12 x i32>, <12 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load volatile <4 x i32>, <4 x i32>* %8, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi64_align_32(<12 x i64>*) {
+entry:
+ %1 = load volatile <12 x i64>, <12 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi64_align_32(<12 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load volatile <2 x i64>, <2 x i64>* %14, align 32
+; CHECK-NEXT: %16 = getelementptr <12 x i64>, <12 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load volatile <2 x i64>, <2 x i64>* %17, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xi8ptr_align_32(<12 x i8*>*) {
+entry:
+ %1 = load volatile <12 x i8*>, <12 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xi8ptr_align_32(<12 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <12 x i8*>, <12 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load volatile <4 x i8*>, <4 x i8*>* %8, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xfloat_align_32(<12 x float>*) {
+entry:
+ %1 = load volatile <12 x float>, <12 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xfloat_align_32(<12 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <12 x float>, <12 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load volatile <4 x float>, <4 x float>* %8, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_12xdouble_align_32(<12 x double>*) {
+entry:
+ %1 = load volatile <12 x double>, <12 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_12xdouble_align_32(<12 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load volatile <2 x double>, <2 x double>* %14, align 32
+; CHECK-NEXT: %16 = getelementptr <12 x double>, <12 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load volatile <2 x double>, <2 x double>* %17, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi8_align_32(<16 x i8>*) {
+entry:
+ %1 = load volatile <16 x i8>, <16 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi8_align_32(<16 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = load volatile <16 x i8>, <16 x i8>* %0, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi16_align_32(<16 x i16>*) {
+entry:
+ %1 = load volatile <16 x i16>, <16 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi16_align_32(<16 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load volatile <8 x i16>, <8 x i16>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <16 x i16>, <16 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load volatile <8 x i16>, <8 x i16>* %5, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi32_align_32(<16 x i32>*) {
+entry:
+ %1 = load volatile <16 x i32>, <16 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi32_align_32(<16 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load volatile <4 x i32>, <4 x i32>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <16 x i32>, <16 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load volatile <4 x i32>, <4 x i32>* %11, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi64_align_32(<16 x i64>*) {
+entry:
+ %1 = load volatile <16 x i64>, <16 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi64_align_32(<16 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load volatile <2 x i64>, <2 x i64>* %14, align 32
+; CHECK-NEXT: %16 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load volatile <2 x i64>, <2 x i64>* %17, align 16
+; CHECK-NEXT: %19 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load volatile <2 x i64>, <2 x i64>* %20, align 32
+; CHECK-NEXT: %22 = getelementptr <16 x i64>, <16 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load volatile <2 x i64>, <2 x i64>* %23, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xi8ptr_align_32(<16 x i8*>*) {
+entry:
+ %1 = load volatile <16 x i8*>, <16 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xi8ptr_align_32(<16 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load volatile <4 x i8*>, <4 x i8*>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <16 x i8*>, <16 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load volatile <4 x i8*>, <4 x i8*>* %11, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xfloat_align_32(<16 x float>*) {
+entry:
+ %1 = load volatile <16 x float>, <16 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xfloat_align_32(<16 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load volatile <4 x float>, <4 x float>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <16 x float>, <16 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load volatile <4 x float>, <4 x float>* %11, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_16xdouble_align_32(<16 x double>*) {
+entry:
+ %1 = load volatile <16 x double>, <16 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_16xdouble_align_32(<16 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load volatile <2 x double>, <2 x double>* %14, align 32
+; CHECK-NEXT: %16 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load volatile <2 x double>, <2 x double>* %17, align 16
+; CHECK-NEXT: %19 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load volatile <2 x double>, <2 x double>* %20, align 32
+; CHECK-NEXT: %22 = getelementptr <16 x double>, <16 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load volatile <2 x double>, <2 x double>* %23, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi8_align_32(<20 x i8>*) {
+entry:
+ %1 = load volatile <20 x i8>, <20 x i8>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi8_align_32(<20 x i8>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8* %1 to <16 x i8>*
+; CHECK-NEXT: %3 = load volatile <16 x i8>, <16 x i8>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 16
+; CHECK-NEXT: %5 = load volatile i8, i8* %4, align 16
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 17
+; CHECK-NEXT: %8 = load volatile i8, i8* %7, align 1
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 18
+; CHECK-NEXT: %11 = load volatile i8, i8* %10, align 2
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = getelementptr <20 x i8>, <20 x i8>* %0, i32 0, i32 19
+; CHECK-NEXT: %14 = load volatile i8, i8* %13, align 1
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi16_align_32(<20 x i16>*) {
+entry:
+ %1 = load volatile <20 x i16>, <20 x i16>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi16_align_32(<20 x i16>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i16* %1 to <8 x i16>*
+; CHECK-NEXT: %3 = load volatile <8 x i16>, <8 x i16>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 8
+; CHECK-NEXT: %5 = bitcast i16* %4 to <8 x i16>*
+; CHECK-NEXT: %6 = load volatile <8 x i16>, <8 x i16>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 16
+; CHECK-NEXT: %8 = load volatile i16, i16* %7, align 32
+; CHECK-NEXT: %9 = insertelement <8 x i16> undef, i16 %8, i32 0
+; CHECK-NEXT: %10 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 17
+; CHECK-NEXT: %11 = load volatile i16, i16* %10, align 2
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 1
+; CHECK-NEXT: %13 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 18
+; CHECK-NEXT: %14 = load volatile i16, i16* %13, align 4
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 2
+; CHECK-NEXT: %16 = getelementptr <20 x i16>, <20 x i16>* %0, i32 0, i32 19
+; CHECK-NEXT: %17 = load volatile i16, i16* %16, align 2
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 3
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi32_align_32(<20 x i32>*) {
+entry:
+ %1 = load volatile <20 x i32>, <20 x i32>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi32_align_32(<20 x i32>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i32* %1 to <4 x i32>*
+; CHECK-NEXT: %3 = load volatile <4 x i32>, <4 x i32>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i32* %4 to <4 x i32>*
+; CHECK-NEXT: %6 = load volatile <4 x i32>, <4 x i32>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i32* %7 to <4 x i32>*
+; CHECK-NEXT: %9 = load volatile <4 x i32>, <4 x i32>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i32* %10 to <4 x i32>*
+; CHECK-NEXT: %12 = load volatile <4 x i32>, <4 x i32>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <20 x i32>, <20 x i32>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i32* %13 to <4 x i32>*
+; CHECK-NEXT: %15 = load volatile <4 x i32>, <4 x i32>* %14, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi64_align_32(<20 x i64>*) {
+entry:
+ %1 = load volatile <20 x i64>, <20 x i64>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi64_align_32(<20 x i64>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i64* %1 to <2 x i64>*
+; CHECK-NEXT: %3 = load volatile <2 x i64>, <2 x i64>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast i64* %4 to <2 x i64>*
+; CHECK-NEXT: %6 = load volatile <2 x i64>, <2 x i64>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast i64* %7 to <2 x i64>*
+; CHECK-NEXT: %9 = load volatile <2 x i64>, <2 x i64>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast i64* %10 to <2 x i64>*
+; CHECK-NEXT: %12 = load volatile <2 x i64>, <2 x i64>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast i64* %13 to <2 x i64>*
+; CHECK-NEXT: %15 = load volatile <2 x i64>, <2 x i64>* %14, align 32
+; CHECK-NEXT: %16 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast i64* %16 to <2 x i64>*
+; CHECK-NEXT: %18 = load volatile <2 x i64>, <2 x i64>* %17, align 16
+; CHECK-NEXT: %19 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast i64* %19 to <2 x i64>*
+; CHECK-NEXT: %21 = load volatile <2 x i64>, <2 x i64>* %20, align 32
+; CHECK-NEXT: %22 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast i64* %22 to <2 x i64>*
+; CHECK-NEXT: %24 = load volatile <2 x i64>, <2 x i64>* %23, align 16
+; CHECK-NEXT: %25 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast i64* %25 to <2 x i64>*
+; CHECK-NEXT: %27 = load volatile <2 x i64>, <2 x i64>* %26, align 32
+; CHECK-NEXT: %28 = getelementptr <20 x i64>, <20 x i64>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast i64* %28 to <2 x i64>*
+; CHECK-NEXT: %30 = load volatile <2 x i64>, <2 x i64>* %29, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xi8ptr_align_32(<20 x i8*>*) {
+entry:
+ %1 = load volatile <20 x i8*>, <20 x i8*>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xi8ptr_align_32(<20 x i8*>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast i8** %1 to <4 x i8*>*
+; CHECK-NEXT: %3 = load volatile <4 x i8*>, <4 x i8*>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast i8** %4 to <4 x i8*>*
+; CHECK-NEXT: %6 = load volatile <4 x i8*>, <4 x i8*>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast i8** %7 to <4 x i8*>*
+; CHECK-NEXT: %9 = load volatile <4 x i8*>, <4 x i8*>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast i8** %10 to <4 x i8*>*
+; CHECK-NEXT: %12 = load volatile <4 x i8*>, <4 x i8*>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <20 x i8*>, <20 x i8*>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast i8** %13 to <4 x i8*>*
+; CHECK-NEXT: %15 = load volatile <4 x i8*>, <4 x i8*>* %14, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xfloat_align_32(<20 x float>*) {
+entry:
+ %1 = load volatile <20 x float>, <20 x float>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xfloat_align_32(<20 x float>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast float* %1 to <4 x float>*
+; CHECK-NEXT: %3 = load volatile <4 x float>, <4 x float>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 4
+; CHECK-NEXT: %5 = bitcast float* %4 to <4 x float>*
+; CHECK-NEXT: %6 = load volatile <4 x float>, <4 x float>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 8
+; CHECK-NEXT: %8 = bitcast float* %7 to <4 x float>*
+; CHECK-NEXT: %9 = load volatile <4 x float>, <4 x float>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 12
+; CHECK-NEXT: %11 = bitcast float* %10 to <4 x float>*
+; CHECK-NEXT: %12 = load volatile <4 x float>, <4 x float>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <20 x float>, <20 x float>* %0, i32 0, i32 16
+; CHECK-NEXT: %14 = bitcast float* %13 to <4 x float>*
+; CHECK-NEXT: %15 = load volatile <4 x float>, <4 x float>* %14, align 32
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
+define i32 @volatile_load_from_20xdouble_align_32(<20 x double>*) {
+entry:
+ %1 = load volatile <20 x double>, <20 x double>* %0, align 32
+ ret i32 0
+}
+; CHECK-LABEL: define i32 @volatile_load_from_20xdouble_align_32(<20 x double>*) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 0
+; CHECK-NEXT: %2 = bitcast double* %1 to <2 x double>*
+; CHECK-NEXT: %3 = load volatile <2 x double>, <2 x double>* %2, align 32
+; CHECK-NEXT: %4 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 2
+; CHECK-NEXT: %5 = bitcast double* %4 to <2 x double>*
+; CHECK-NEXT: %6 = load volatile <2 x double>, <2 x double>* %5, align 16
+; CHECK-NEXT: %7 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 4
+; CHECK-NEXT: %8 = bitcast double* %7 to <2 x double>*
+; CHECK-NEXT: %9 = load volatile <2 x double>, <2 x double>* %8, align 32
+; CHECK-NEXT: %10 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 6
+; CHECK-NEXT: %11 = bitcast double* %10 to <2 x double>*
+; CHECK-NEXT: %12 = load volatile <2 x double>, <2 x double>* %11, align 16
+; CHECK-NEXT: %13 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 8
+; CHECK-NEXT: %14 = bitcast double* %13 to <2 x double>*
+; CHECK-NEXT: %15 = load volatile <2 x double>, <2 x double>* %14, align 32
+; CHECK-NEXT: %16 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 10
+; CHECK-NEXT: %17 = bitcast double* %16 to <2 x double>*
+; CHECK-NEXT: %18 = load volatile <2 x double>, <2 x double>* %17, align 16
+; CHECK-NEXT: %19 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 12
+; CHECK-NEXT: %20 = bitcast double* %19 to <2 x double>*
+; CHECK-NEXT: %21 = load volatile <2 x double>, <2 x double>* %20, align 32
+; CHECK-NEXT: %22 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 14
+; CHECK-NEXT: %23 = bitcast double* %22 to <2 x double>*
+; CHECK-NEXT: %24 = load volatile <2 x double>, <2 x double>* %23, align 16
+; CHECK-NEXT: %25 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 16
+; CHECK-NEXT: %26 = bitcast double* %25 to <2 x double>*
+; CHECK-NEXT: %27 = load volatile <2 x double>, <2 x double>* %26, align 32
+; CHECK-NEXT: %28 = getelementptr <20 x double>, <20 x double>* %0, i32 0, i32 18
+; CHECK-NEXT: %29 = bitcast double* %28 to <2 x double>*
+; CHECK-NEXT: %30 = load volatile <2 x double>, <2 x double>* %29, align 16
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
+
« no previous file with comments | « test/Transforms/NaCl/vector-canonicalization-inserts.ll ('k') | test/Transforms/NaCl/vector-canonicalization-phis.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698