Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1298)

Unified Diff: test/Transforms/NaCl/vector-canonicalization-shuffles.ll

Issue 1423873002: PNaCl: Add a vector type legalization pass. Base URL: https://chromium.googlesource.com/native_client/pnacl-llvm.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: test/Transforms/NaCl/vector-canonicalization-shuffles.ll
diff --git a/test/Transforms/NaCl/vector-canonicalization-shuffles.ll b/test/Transforms/NaCl/vector-canonicalization-shuffles.ll
new file mode 100644
index 0000000000000000000000000000000000000000..bc68d1bf377862e942a33ac6baab8572d58dfebe
--- /dev/null
+++ b/test/Transforms/NaCl/vector-canonicalization-shuffles.ll
@@ -0,0 +1,6390 @@
+; RUN: opt -S -pnacl-vector-canonicalization %s | FileCheck %s
+
+; Auto-generated tests for shuffle operations.
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128"
+
+define <2 x i32> @shuffle_2xi32_to_2xi32_0(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 1, i32 1>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_2xi32_0(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_2xi32_to_2xi32_1(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 2, i32 1>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_2xi32_1(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_2xi32_to_2xi32_2(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 0, i32 1>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_2xi32_2(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: ret <4 x i32> %0
+
+define <2 x i32> @shuffle_2xi32_to_2xi32_3(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 0, i32 2>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_2xi32_3(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_2xi32_to_2xi32_4(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 1, i32 0>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_2xi32_4(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_2xi32_to_2xi32_5(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 2, i32 0>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_2xi32_5(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_2xi32_to_2xi32_6(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 1, i32 2>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_2xi32_6(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_2xi32_to_2xi32_7(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 1, i32 2>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_2xi32_7(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_2xi32_to_2xi32_8(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 2, i32 1>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_2xi32_8(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_2xi32_to_2xi32_9(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 0, i32 2>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_2xi32_9(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_2xi32_to_4xi32_0(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 2, i32 0, i32 1, i32 0>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_4xi32_0(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 4, i32 5, i32 4>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_2xi32_to_4xi32_1(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_4xi32_1(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: ret <4 x i32> %0
+
+define <4 x i32> @shuffle_2xi32_to_4xi32_2(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 1>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_4xi32_2(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 4, i32 1>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_2xi32_to_4xi32_3(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 2, i32 1, i32 0, i32 1>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_4xi32_3(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 5, i32 4, i32 5>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_2xi32_to_4xi32_4(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 2, i32 1, i32 0, i32 1>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_4xi32_4(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 5, i32 4, i32 5>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_2xi32_to_4xi32_5(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 0, i32 2>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_4xi32_5(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 0, i32 4>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_2xi32_to_4xi32_6(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 2, i32 2, i32 2, i32 0>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_4xi32_6(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 0, i32 0, i32 4>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_2xi32_to_4xi32_7(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 2, i32 0, i32 0>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_4xi32_7(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_2xi32_to_4xi32_8(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 1, i32 0, i32 1, i32 2>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_4xi32_8(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 1, i32 0, i32 1, i32 4>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_2xi32_to_4xi32_9(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 2, i32 0, i32 1, i32 1>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_4xi32_9(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 4, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <6 x i32> @shuffle_2xi32_to_6xi32_0(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <6 x i32> <i32 0, i32 1, i32 0, i32 0, i32 1, i32 1>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_6xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 0>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_2xi32_to_6xi32_1(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <6 x i32> <i32 2, i32 0, i32 0, i32 0, i32 1, i32 0>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_6xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_2xi32_to_6xi32_2(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <6 x i32> <i32 2, i32 1, i32 0, i32 1, i32 0, i32 0>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_6xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 4, i32 5>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_2xi32_to_6xi32_3(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <6 x i32> <i32 2, i32 0, i32 2, i32 0, i32 0, i32 2>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_6xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 0, i32 4>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_2xi32_to_6xi32_4(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <6 x i32> <i32 0, i32 0, i32 2, i32 1, i32 2, i32 1>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_6xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 4, i32 1>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_2xi32_to_6xi32_5(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <6 x i32> <i32 2, i32 2, i32 2, i32 2, i32 0, i32 0>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_6xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_2xi32_to_6xi32_6(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <6 x i32> <i32 1, i32 0, i32 1, i32 1, i32 0, i32 2>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_6xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 1>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_2xi32_to_6xi32_7(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <6 x i32> <i32 1, i32 0, i32 2, i32 2, i32 2, i32 0>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_6xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 0, i32 4, i32 4>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_2xi32_to_6xi32_8(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <6 x i32> <i32 2, i32 1, i32 1, i32 2, i32 0, i32 1>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_6xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 5, i32 0>
+; CHECK-NEXT: store <4 x i32> %1, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_2xi32_to_6xi32_9(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <6 x i32> <i32 0, i32 0, i32 1, i32 0, i32 2, i32 0>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_6xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_2xi32_to_8xi32_0(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <8 x i32> <i32 1, i32 2, i32 0, i32 2, i32 0, i32 2, i32 2, i32 2>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_8xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 4, i32 0, i32 4>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_2xi32_to_8xi32_1(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <8 x i32> <i32 2, i32 0, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_8xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 5, i32 0>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_2xi32_to_8xi32_2(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <8 x i32> <i32 2, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 2>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_8xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 5, i32 4>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 0, i32 1, i32 4>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_2xi32_to_8xi32_3(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 0, i32 1>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_8xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_2xi32_to_8xi32_4(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <8 x i32> <i32 1, i32 0, i32 1, i32 2, i32 2, i32 1, i32 1, i32 0>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_8xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 0, i32 1, i32 4>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 5, i32 4>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_2xi32_to_8xi32_5(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <8 x i32> <i32 0, i32 2, i32 0, i32 0, i32 2, i32 0, i32 0, i32 2>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_8xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 4, i32 0>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_2xi32_to_8xi32_6(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <8 x i32> <i32 2, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_8xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 5, i32 4>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_2xi32_to_8xi32_7(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <8 x i32> <i32 0, i32 1, i32 1, i32 0, i32 0, i32 2, i32 0, i32 0>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_8xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_2xi32_to_8xi32_8(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <8 x i32> <i32 1, i32 2, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_8xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 4, i32 1, i32 0>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_2xi32_to_8xi32_9(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_8xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 1>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <12 x i32> @shuffle_2xi32_to_12xi32_0(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <12 x i32> <i32 2, i32 1, i32 2, i32 1, i32 0, i32 2, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_12xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 5, i32 0, i32 5>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %2, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_2xi32_to_12xi32_1(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <12 x i32> <i32 2, i32 0, i32 1, i32 1, i32 2, i32 1, i32 2, i32 2, i32 2, i32 2, i32 0, i32 1>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_12xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 5, i32 5>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 5, i32 0, i32 0>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_2xi32_to_12xi32_2(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 2, i32 1, i32 2, i32 2>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_12xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 5, i32 0, i32 0>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_2xi32_to_12xi32_3(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <12 x i32> <i32 2, i32 1, i32 2, i32 1, i32 0, i32 0, i32 1, i32 2, i32 1, i32 1, i32 2, i32 1>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_12xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 5, i32 0, i32 5>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 0, i32 1, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 1, i32 4, i32 1>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_2xi32_to_12xi32_4(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <12 x i32> <i32 2, i32 2, i32 1, i32 2, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2, i32 0, i32 2>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_12xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 5, i32 0>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_2xi32_to_12xi32_5(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <12 x i32> <i32 2, i32 0, i32 0, i32 2, i32 1, i32 0, i32 1, i32 2, i32 2, i32 0, i32 0, i32 1>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_12xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 4, i32 0>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 0, i32 1, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 4, i32 5>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_2xi32_to_12xi32_6(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <12 x i32> <i32 1, i32 2, i32 0, i32 0, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 2, i32 0>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_12xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 4, i32 0, i32 0>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 1>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 0>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_2xi32_to_12xi32_7(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <12 x i32> <i32 0, i32 1, i32 0, i32 2, i32 1, i32 0, i32 1, i32 1, i32 0, i32 2, i32 0, i32 2>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_12xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 0, i32 4>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 1>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 0, i32 4>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_2xi32_to_12xi32_8(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <12 x i32> <i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 1, i32 1, i32 2, i32 1, i32 1>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_12xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 4, i32 1, i32 1>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_2xi32_to_12xi32_9(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <12 x i32> <i32 0, i32 0, i32 1, i32 2, i32 1, i32 2, i32 1, i32 0, i32 0, i32 2, i32 1, i32 1>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_12xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 0, i32 1, i32 4>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 4, i32 1, i32 0>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 1, i32 1>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <16 x i32> @shuffle_2xi32_to_16xi32_0(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 0, i32 0, i32 2, i32 1, i32 1, i32 0, i32 1, i32 0>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_16xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 1, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 0, i32 4, i32 1>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_2xi32_to_16xi32_1(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <16 x i32> <i32 0, i32 2, i32 1, i32 2, i32 0, i32 0, i32 2, i32 2, i32 0, i32 1, i32 1, i32 0, i32 1, i32 2, i32 0, i32 1>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_16xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 0, i32 1>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_2xi32_to_16xi32_2(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <16 x i32> <i32 1, i32 2, i32 0, i32 1, i32 2, i32 2, i32 1, i32 2, i32 0, i32 0, i32 1, i32 1, i32 2, i32 1, i32 2, i32 0>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_16xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 0, i32 1>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 0, i32 5, i32 0>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 5, i32 0, i32 4>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_2xi32_to_16xi32_3(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <16 x i32> <i32 2, i32 0, i32 2, i32 0, i32 1, i32 1, i32 0, i32 0, i32 2, i32 2, i32 2, i32 0, i32 0, i32 0, i32 0, i32 1>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_16xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 0, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 0, i32 0, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_2xi32_to_16xi32_4(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <16 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 2, i32 1, i32 2, i32 0, i32 1, i32 2, i32 1, i32 2, i32 0, i32 2, i32 0>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_16xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 1, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 1>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 0, i32 4>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_2xi32_to_16xi32_5(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <16 x i32> <i32 1, i32 2, i32 0, i32 2, i32 0, i32 0, i32 0, i32 2, i32 2, i32 0, i32 1, i32 1, i32 0, i32 2, i32 2, i32 0>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_16xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 0, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 0, i32 0, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 4, i32 0>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_2xi32_to_16xi32_6(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <16 x i32> <i32 0, i32 2, i32 0, i32 2, i32 2, i32 2, i32 1, i32 1, i32 0, i32 0, i32 0, i32 2, i32 1, i32 0, i32 0, i32 1>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_16xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 0, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 0, i32 5, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 0, i32 0, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 1>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_2xi32_to_16xi32_7(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 1>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_16xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 1>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 4, i32 1>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_2xi32_to_16xi32_8(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <16 x i32> <i32 2, i32 0, i32 1, i32 1, i32 2, i32 1, i32 0, i32 2, i32 2, i32 0, i32 1, i32 1, i32 2, i32 1, i32 1, i32 0>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_16xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 5, i32 5>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 5, i32 4, i32 0>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 5, i32 5, i32 4>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_2xi32_to_16xi32_9(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <16 x i32> <i32 0, i32 2, i32 1, i32 1, i32 0, i32 2, i32 1, i32 0, i32 1, i32 1, i32 2, i32 0, i32 0, i32 1, i32 1, i32 2>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_16xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 1, i32 1>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 1, i32 0>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 1, i32 1, i32 4, i32 0>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 1, i32 4>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <20 x i32> @shuffle_2xi32_to_20xi32_0(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <20 x i32> <i32 0, i32 1, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 2, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 1>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_20xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 1, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 5, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 4, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 1>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_2xi32_to_20xi32_1(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <20 x i32> <i32 1, i32 2, i32 0, i32 2, i32 2, i32 1, i32 1, i32 1, i32 1, i32 2, i32 0, i32 2, i32 1, i32 0, i32 2, i32 0, i32 2, i32 0, i32 1, i32 1>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_20xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 0, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 0, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 0, i32 4, i32 0>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_2xi32_to_20xi32_2(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <20 x i32> <i32 1, i32 2, i32 2, i32 1, i32 1, i32 1, i32 2, i32 2, i32 0, i32 2, i32 0, i32 0, i32 1, i32 1, i32 0, i32 2, i32 2, i32 1, i32 1, i32 2>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_20xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 4, i32 1>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 1, i32 0, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 5, i32 0>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_2xi32_to_20xi32_3(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <20 x i32> <i32 1, i32 1, i32 1, i32 0, i32 1, i32 0, i32 2, i32 1, i32 2, i32 2, i32 2, i32 1, i32 0, i32 0, i32 2, i32 1, i32 1, i32 1, i32 2, i32 1>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_20xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 0>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 0, i32 4, i32 1>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 0, i32 0, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 0, i32 4, i32 1>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 1, i32 4, i32 1>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_2xi32_to_20xi32_4(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <20 x i32> <i32 2, i32 1, i32 0, i32 1, i32 1, i32 0, i32 2, i32 1, i32 0, i32 2, i32 1, i32 2, i32 0, i32 2, i32 0, i32 2, i32 1, i32 1, i32 1, i32 0>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_20xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 4, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 0, i32 4, i32 1>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 4, i32 0, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 0>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_2xi32_to_20xi32_5(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <20 x i32> <i32 2, i32 1, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0, i32 1, i32 0, i32 1, i32 2, i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 2, i32 1>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_20xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 4, i32 0>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 0, i32 1, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 4, i32 1>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_2xi32_to_20xi32_6(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <20 x i32> <i32 2, i32 0, i32 1, i32 0, i32 2, i32 2, i32 0, i32 1, i32 1, i32 2, i32 2, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 2, i32 1>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_20xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 5, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 4, i32 1>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 0, i32 4, i32 1>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_2xi32_to_20xi32_7(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <20 x i32> <i32 1, i32 2, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 0, i32 1, i32 2, i32 2, i32 1, i32 1, i32 1, i32 0, i32 2, i32 2, i32 0, i32 2>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_20xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 0, i32 1>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 0>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_2xi32_to_20xi32_8(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <20 x i32> <i32 1, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 2, i32 1, i32 2, i32 1, i32 1, i32 0, i32 0, i32 0, i32 1, i32 2, i32 1>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_20xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 0, i32 4, i32 0>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 1, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 4, i32 1>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_2xi32_to_20xi32_9(<2 x i32>, <2 x i32>) {
+ %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <20 x i32> <i32 0, i32 1, i32 2, i32 1, i32 1, i32 0, i32 0, i32 0, i32 1, i32 2, i32 1, i32 1, i32 2, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_2xi32_to_20xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 4, i32 1>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 1, i32 1>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 5, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i32> @shuffle_4xi32_to_2xi32_0(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <2 x i32> <i32 6, i32 4>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_2xi32_0(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_4xi32_to_2xi32_1(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <2 x i32> <i32 0, i32 5>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_2xi32_1(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_4xi32_to_2xi32_2(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <2 x i32> <i32 6, i32 4>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_2xi32_2(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_4xi32_to_2xi32_3(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <2 x i32> <i32 5, i32 5>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_2xi32_3(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_4xi32_to_2xi32_4(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <2 x i32> <i32 3, i32 3>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_2xi32_4(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_4xi32_to_2xi32_5(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <2 x i32> <i32 2, i32 4>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_2xi32_5(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_4xi32_to_2xi32_6(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <2 x i32> <i32 1, i32 6>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_2xi32_6(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_4xi32_to_2xi32_7(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <2 x i32> <i32 0, i32 1>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_2xi32_7(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: ret <4 x i32> %0
+
+define <2 x i32> @shuffle_4xi32_to_2xi32_8(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <2 x i32> <i32 0, i32 4>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_2xi32_8(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <2 x i32> @shuffle_4xi32_to_2xi32_9(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <2 x i32> <i32 2, i32 5>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_2xi32_9(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_4xi32_to_4xi32_0(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 3, i32 6, i32 6, i32 1>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_4xi32_0(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 3, i32 6, i32 6, i32 1>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_4xi32_to_4xi32_1(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 3, i32 2, i32 3, i32 3>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_4xi32_1(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 3, i32 2, i32 3, i32 3>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_4xi32_to_4xi32_2(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 1, i32 4, i32 1, i32 4>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_4xi32_2(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 1, i32 4, i32 1, i32 4>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_4xi32_to_4xi32_3(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 3, i32 5>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_4xi32_3(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 3, i32 5>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_4xi32_to_4xi32_4(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 6, i32 1, i32 5, i32 0>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_4xi32_4(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 6, i32 1, i32 5, i32 0>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_4xi32_to_4xi32_5(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 1, i32 3, i32 5, i32 6>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_4xi32_5(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 1, i32 3, i32 5, i32 6>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_4xi32_to_4xi32_6(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 6, i32 0, i32 0, i32 2>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_4xi32_6(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 6, i32 0, i32 0, i32 2>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_4xi32_to_4xi32_7(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 3, i32 2, i32 6, i32 3>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_4xi32_7(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 3, i32 2, i32 6, i32 3>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_4xi32_to_4xi32_8(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 3, i32 5, i32 3, i32 6>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_4xi32_8(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 3, i32 5, i32 3, i32 6>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <4 x i32> @shuffle_4xi32_to_4xi32_9(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 1, i32 3, i32 0, i32 0>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_4xi32_9(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 1, i32 3, i32 0, i32 0>
+; CHECK-NEXT: ret <4 x i32> %3
+
+define <6 x i32> @shuffle_4xi32_to_6xi32_0(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <6 x i32> <i32 0, i32 6, i32 2, i32 2, i32 4, i32 2>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_6xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 6, i32 2, i32 2>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_4xi32_to_6xi32_1(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <6 x i32> <i32 6, i32 2, i32 6, i32 3, i32 4, i32 0>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_6xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 2, i32 6, i32 2, i32 7>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_4xi32_to_6xi32_2(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <6 x i32> <i32 6, i32 3, i32 0, i32 6, i32 6, i32 0>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_6xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 2, i32 7, i32 4, i32 2>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_4xi32_to_6xi32_3(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <6 x i32> <i32 1, i32 2, i32 0, i32 0, i32 4, i32 4>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_6xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 0>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_4xi32_to_6xi32_4(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <6 x i32> <i32 5, i32 6, i32 2, i32 0, i32 0, i32 3>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_6xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 2, i32 6, i32 4>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_4xi32_to_6xi32_5(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <6 x i32> <i32 6, i32 0, i32 5, i32 4, i32 0, i32 2>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_6xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 2, i32 4, i32 1, i32 0>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_4xi32_to_6xi32_6(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <6 x i32> <i32 0, i32 3, i32 3, i32 1, i32 5, i32 4>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_6xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 3, i32 1>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_4xi32_to_6xi32_7(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <6 x i32> <i32 6, i32 0, i32 2, i32 2, i32 2, i32 5>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_6xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 2, i32 4, i32 6, i32 6>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_4xi32_to_6xi32_8(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <6 x i32> <i32 2, i32 5, i32 2, i32 6, i32 3, i32 4>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_6xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 5, i32 2, i32 6>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i32> @shuffle_4xi32_to_6xi32_9(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <6 x i32> <i32 1, i32 0, i32 3, i32 5, i32 3, i32 1>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_6xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 0, i32 3, i32 5>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_4xi32_to_8xi32_0(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 5, i32 2, i32 4, i32 1, i32 5, i32 3, i32 1, i32 2>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_8xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 6, i32 0, i32 5>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 7, i32 5, i32 6>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_4xi32_to_8xi32_1(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 1, i32 0, i32 6, i32 1, i32 4, i32 4, i32 6, i32 4>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_8xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 0, i32 6, i32 1>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_4xi32_to_8xi32_2(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 2, i32 1, i32 1, i32 5>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_8xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 4, i32 0>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 1, i32 1, i32 5>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_4xi32_to_8xi32_3(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 6, i32 6, i32 0, i32 0, i32 4, i32 6, i32 3, i32 6>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_8xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 2, i32 2, i32 4, i32 4>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 2, i32 7, i32 2>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_4xi32_to_8xi32_4(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 5, i32 0, i32 5, i32 5, i32 5, i32 5, i32 0, i32 1>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_8xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 4, i32 1, i32 1>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 4, i32 5>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_4xi32_to_8xi32_5(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 5, i32 5, i32 4, i32 0, i32 2, i32 5, i32 3, i32 0>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_8xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 0, i32 4>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 5, i32 3, i32 0>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_4xi32_to_8xi32_6(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 3, i32 5, i32 0, i32 6, i32 5, i32 6, i32 5, i32 3>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_8xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 3, i32 5, i32 0, i32 6>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 2, i32 1, i32 7>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_4xi32_to_8xi32_7(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 3, i32 5, i32 1, i32 2, i32 5, i32 1, i32 5, i32 0>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_8xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 3, i32 5, i32 1, i32 2>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 1, i32 4>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_4xi32_to_8xi32_8(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 6, i32 5, i32 1, i32 6, i32 3, i32 5, i32 2, i32 5>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_8xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 2, i32 1, i32 5, i32 2>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 3, i32 5, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i32> @shuffle_4xi32_to_8xi32_9(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 3, i32 6, i32 3, i32 3, i32 5, i32 1, i32 5, i32 2>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_8xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 3, i32 6, i32 3, i32 3>
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 1, i32 6>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <12 x i32> @shuffle_4xi32_to_12xi32_0(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <12 x i32> <i32 5, i32 5, i32 4, i32 3, i32 0, i32 0, i32 3, i32 0, i32 6, i32 4, i32 0, i32 3>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_12xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 1, i32 0, i32 7>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 0>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 2, i32 0, i32 4, i32 7>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_4xi32_to_12xi32_1(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <12 x i32> <i32 3, i32 1, i32 5, i32 4, i32 1, i32 5, i32 6, i32 2, i32 4, i32 5, i32 6, i32 5>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_12xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 3, i32 1, i32 5, i32 4>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 5, i32 6, i32 2>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 1>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_4xi32_to_12xi32_2(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <12 x i32> <i32 4, i32 4, i32 1, i32 0, i32 3, i32 5, i32 0, i32 4, i32 1, i32 3, i32 3, i32 4>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_12xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 5, i32 4>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 3, i32 5, i32 0, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 3, i32 3, i32 4>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_4xi32_to_12xi32_3(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <12 x i32> <i32 5, i32 2, i32 3, i32 4, i32 2, i32 6, i32 3, i32 6, i32 5, i32 2, i32 5, i32 0>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_12xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 6, i32 7, i32 0>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 2, i32 6, i32 3, i32 6>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 6, i32 1, i32 4>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_4xi32_to_12xi32_4(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <12 x i32> <i32 1, i32 6, i32 5, i32 6, i32 2, i32 4, i32 3, i32 1, i32 2, i32 5, i32 5, i32 6>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_12xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 6, i32 5, i32 6>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 2, i32 4, i32 3, i32 1>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 2, i32 5, i32 5, i32 6>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_4xi32_to_12xi32_5(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <12 x i32> <i32 1, i32 1, i32 2, i32 1, i32 5, i32 3, i32 2, i32 6, i32 3, i32 2, i32 2, i32 1>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_12xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 1>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 7, i32 6, i32 2>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 1>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_4xi32_to_12xi32_6(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <12 x i32> <i32 1, i32 2, i32 6, i32 0, i32 2, i32 1, i32 1, i32 3, i32 0, i32 5, i32 2, i32 6>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_12xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 2, i32 6, i32 0>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 3>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_4xi32_to_12xi32_7(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <12 x i32> <i32 3, i32 2, i32 1, i32 4, i32 1, i32 6, i32 4, i32 6, i32 5, i32 0, i32 3, i32 6>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_12xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 3, i32 2, i32 1, i32 4>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 6, i32 4, i32 6>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 4, i32 7, i32 2>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_4xi32_to_12xi32_8(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <12 x i32> <i32 0, i32 3, i32 1, i32 1, i32 4, i32 1, i32 2, i32 3, i32 0, i32 5, i32 5, i32 3>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_12xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 1>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 5, i32 5, i32 3>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <12 x i32> @shuffle_4xi32_to_12xi32_9(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <12 x i32> <i32 4, i32 0, i32 4, i32 2, i32 3, i32 0, i32 2, i32 2, i32 0, i32 4, i32 3, i32 4>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_12xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 0, i32 6>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 2>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 3, i32 4>
+; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <16 x i32> @shuffle_4xi32_to_16xi32_0(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <16 x i32> <i32 0, i32 6, i32 5, i32 3, i32 0, i32 2, i32 4, i32 2, i32 3, i32 2, i32 2, i32 6, i32 5, i32 3, i32 2, i32 4>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_16xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 5, i32 3>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 2, i32 4, i32 2>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 3, i32 2, i32 2, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 1, i32 7, i32 6, i32 0>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_4xi32_to_16xi32_1(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <16 x i32> <i32 4, i32 0, i32 6, i32 2, i32 5, i32 6, i32 6, i32 0, i32 5, i32 3, i32 1, i32 1, i32 6, i32 0, i32 0, i32 4>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_16xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 1, i32 2, i32 2, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 1, i32 7, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 2, i32 4, i32 4, i32 0>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_4xi32_to_16xi32_2(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <16 x i32> <i32 1, i32 3, i32 4, i32 4, i32 4, i32 4, i32 1, i32 3, i32 2, i32 3, i32 5, i32 5, i32 2, i32 5, i32 4, i32 0>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_16xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 1, i32 3, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 0, i32 5, i32 7>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 2, i32 5, i32 4, i32 0>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_4xi32_to_16xi32_3(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <16 x i32> <i32 1, i32 2, i32 1, i32 5, i32 2, i32 2, i32 5, i32 0, i32 0, i32 0, i32 4, i32 2, i32 0, i32 1, i32 4, i32 2>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_16xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 1, i32 2, i32 1, i32 5>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 2, i32 2, i32 5, i32 0>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 0, i32 4, i32 2>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_4xi32_to_16xi32_4(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <16 x i32> <i32 5, i32 3, i32 3, i32 4, i32 6, i32 3, i32 6, i32 1, i32 0, i32 6, i32 0, i32 6, i32 0, i32 1, i32 0, i32 3>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_16xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 1, i32 7, i32 7, i32 0>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 2, i32 7, i32 2, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 0, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_4xi32_to_16xi32_5(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <16 x i32> <i32 6, i32 6, i32 6, i32 0, i32 0, i32 2, i32 5, i32 5, i32 5, i32 4, i32 0, i32 2, i32 1, i32 5, i32 2, i32 0>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_16xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 2, i32 2, i32 2, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 2, i32 5, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 1, i32 0, i32 4, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 1, i32 5, i32 2, i32 0>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_4xi32_to_16xi32_6(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <16 x i32> <i32 4, i32 2, i32 6, i32 2, i32 1, i32 3, i32 0, i32 4, i32 6, i32 4, i32 2, i32 6, i32 6, i32 5, i32 1, i32 2>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_16xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 6, i32 2, i32 6>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 1, i32 3, i32 0, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 2, i32 0, i32 6, i32 2>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 2, i32 1, i32 5, i32 6>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_4xi32_to_16xi32_7(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <16 x i32> <i32 4, i32 6, i32 6, i32 6, i32 0, i32 1, i32 4, i32 3, i32 1, i32 3, i32 2, i32 1, i32 1, i32 2, i32 2, i32 2>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_16xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 1>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_4xi32_to_16xi32_8(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <16 x i32> <i32 0, i32 6, i32 1, i32 5, i32 0, i32 2, i32 1, i32 5, i32 0, i32 4, i32 2, i32 1, i32 4, i32 6, i32 6, i32 2>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_16xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 1, i32 5>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 2, i32 1, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 2, i32 1>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 2, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <16 x i32> @shuffle_4xi32_to_16xi32_9(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <16 x i32> <i32 5, i32 6, i32 0, i32 3, i32 6, i32 5, i32 6, i32 0, i32 6, i32 0, i32 5, i32 5, i32 2, i32 0, i32 3, i32 6>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_16xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 1, i32 2, i32 4, i32 7>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 2, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 2, i32 4, i32 1, i32 1>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 2, i32 0, i32 3, i32 6>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <20 x i32> @shuffle_4xi32_to_20xi32_0(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <20 x i32> <i32 6, i32 1, i32 4, i32 6, i32 4, i32 2, i32 2, i32 3, i32 0, i32 4, i32 2, i32 1, i32 4, i32 4, i32 3, i32 4, i32 3, i32 1, i32 2, i32 1>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_20xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 2, i32 5, i32 0, i32 2>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 6, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 4, i32 2, i32 1>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 0, i32 7, i32 0>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 1>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_4xi32_to_20xi32_1(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <20 x i32> <i32 1, i32 4, i32 6, i32 1, i32 2, i32 1, i32 5, i32 2, i32 5, i32 4, i32 1, i32 6, i32 4, i32 3, i32 4, i32 3, i32 6, i32 6, i32 6, i32 3>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_20xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 6, i32 1>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 1, i32 5, i32 2>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 0, i32 5, i32 2>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 7, i32 0, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 2, i32 2, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_4xi32_to_20xi32_2(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <20 x i32> <i32 6, i32 2, i32 2, i32 5, i32 6, i32 2, i32 3, i32 3, i32 2, i32 0, i32 3, i32 2, i32 0, i32 2, i32 4, i32 5, i32 1, i32 0, i32 3, i32 3>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_20xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 2, i32 6, i32 6, i32 1>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 2, i32 6, i32 7, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 2>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 2, i32 4, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_4xi32_to_20xi32_3(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <20 x i32> <i32 0, i32 0, i32 0, i32 5, i32 6, i32 2, i32 0, i32 3, i32 0, i32 4, i32 2, i32 4, i32 0, i32 0, i32 3, i32 6, i32 4, i32 6, i32 5, i32 1>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_20xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 0, i32 0, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 2, i32 6, i32 4, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 0, i32 3, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 2, i32 1, i32 5>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_4xi32_to_20xi32_4(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <20 x i32> <i32 2, i32 5, i32 4, i32 3, i32 1, i32 6, i32 4, i32 3, i32 2, i32 1, i32 2, i32 1, i32 0, i32 5, i32 6, i32 3, i32 1, i32 0, i32 6, i32 3>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_20xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 5, i32 4, i32 3>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 6, i32 4, i32 3>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 1>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 0, i32 6, i32 3>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_4xi32_to_20xi32_5(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <20 x i32> <i32 3, i32 2, i32 4, i32 5, i32 5, i32 4, i32 3, i32 2, i32 4, i32 2, i32 2, i32 2, i32 0, i32 0, i32 0, i32 4, i32 4, i32 2, i32 1, i32 0>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_20xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 3, i32 2, i32 4, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 0, i32 0, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 5, i32 4>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_4xi32_to_20xi32_6(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <20 x i32> <i32 5, i32 5, i32 3, i32 0, i32 6, i32 5, i32 2, i32 1, i32 0, i32 2, i32 1, i32 2, i32 5, i32 2, i32 2, i32 4, i32 4, i32 6, i32 1, i32 3>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_20xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 1, i32 7, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 2, i32 1, i32 6, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 2>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 6, i32 6, i32 0>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 2, i32 5, i32 7>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_4xi32_to_20xi32_7(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <20 x i32> <i32 3, i32 6, i32 1, i32 4, i32 1, i32 3, i32 5, i32 0, i32 6, i32 6, i32 2, i32 0, i32 3, i32 0, i32 6, i32 5, i32 1, i32 0, i32 1, i32 6>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_20xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 3, i32 6, i32 1, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 3, i32 5, i32 0>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 2, i32 2, i32 6, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 3, i32 0, i32 6, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 0, i32 1, i32 6>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_4xi32_to_20xi32_8(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <20 x i32> <i32 6, i32 3, i32 5, i32 6, i32 5, i32 1, i32 6, i32 6, i32 2, i32 0, i32 6, i32 3, i32 2, i32 1, i32 0, i32 6, i32 0, i32 0, i32 6, i32 3>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_20xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 2, i32 7, i32 1, i32 2>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 5, i32 2, i32 2>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 0, i32 6, i32 3>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 1, i32 0, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 0, i32 6, i32 3>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <20 x i32> @shuffle_4xi32_to_20xi32_9(<4 x i32>, <4 x i32>) {
+ %3 = shufflevector <4 x i32> %0, <4 x i32> %1, <20 x i32> <i32 2, i32 5, i32 1, i32 2, i32 0, i32 3, i32 5, i32 4, i32 4, i32 2, i32 3, i32 2, i32 5, i32 0, i32 0, i32 6, i32 4, i32 2, i32 3, i32 4>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_4xi32_to_20xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 5, i32 1, i32 2>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 3, i32 5, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 7, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 4, i32 2>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 7, i32 0>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i32> @shuffle_6xi32_to_2xi32_0(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <2 x i32> <i32 7, i32 3>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_2xi32_0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %0, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_6xi32_to_2xi32_1(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <2 x i32> <i32 6, i32 4>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_2xi32_1(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_6xi32_to_2xi32_2(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <2 x i32> <i32 9, i32 3>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_2xi32_2(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %0, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_6xi32_to_2xi32_3(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <2 x i32> <i32 7, i32 4>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_2xi32_3(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_6xi32_to_2xi32_4(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <2 x i32> <i32 5, i32 9>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_2xi32_4(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_6xi32_to_2xi32_5(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <2 x i32> <i32 0, i32 3>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_2xi32_5(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_6xi32_to_2xi32_6(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <2 x i32> <i32 3, i32 7>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_2xi32_6(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %0, <4 x i32> %2, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_6xi32_to_2xi32_7(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <2 x i32> <i32 4, i32 1>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_2xi32_7(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_6xi32_to_2xi32_8(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <2 x i32> <i32 6, i32 3>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_2xi32_8(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %0, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_6xi32_to_2xi32_9(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <2 x i32> <i32 7, i32 9>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_2xi32_9(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <4 x i32> @shuffle_6xi32_to_4xi32_0(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <4 x i32> <i32 9, i32 3, i32 5, i32 2>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_4xi32_0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %0, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <4 x i32> @shuffle_6xi32_to_4xi32_1(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <4 x i32> <i32 9, i32 0, i32 6, i32 4>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_4xi32_1(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %0, <4 x i32> <i32 3, i32 4, i32 0, i32 0>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <4 x i32> @shuffle_6xi32_to_4xi32_2(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <4 x i32> <i32 6, i32 3, i32 8, i32 0>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_4xi32_2(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %0, <4 x i32> <i32 0, i32 7, i32 2, i32 4>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <4 x i32> @shuffle_6xi32_to_4xi32_3(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <4 x i32> <i32 8, i32 10, i32 4, i32 6>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_4xi32_3(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <4 x i32> @shuffle_6xi32_to_4xi32_4(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <4 x i32> <i32 1, i32 3, i32 0, i32 6>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_4xi32_4(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %0, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 0, i32 4>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <4 x i32> @shuffle_6xi32_to_4xi32_5(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <4 x i32> <i32 1, i32 10, i32 9, i32 3>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_4xi32_5(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %0, <4 x i32> %3, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %5, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <4 x i32> @shuffle_6xi32_to_4xi32_6(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <4 x i32> <i32 6, i32 10, i32 0, i32 1>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_4xi32_6(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %5, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <4 x i32> @shuffle_6xi32_to_4xi32_7(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <4 x i32> <i32 7, i32 5, i32 6, i32 2>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_4xi32_7(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 0, i32 0>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %5, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <4 x i32> @shuffle_6xi32_to_4xi32_8(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <4 x i32> <i32 8, i32 6, i32 4, i32 5>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_4xi32_8(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 2, i32 0, i32 4, i32 5>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <4 x i32> @shuffle_6xi32_to_4xi32_9(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <4 x i32> <i32 1, i32 7, i32 0, i32 5>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_4xi32_9(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %0, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 0, i32 0>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <6 x i32> @shuffle_6xi32_to_6xi32_0(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <6 x i32> <i32 2, i32 0, i32 6, i32 10, i32 5, i32 7>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_6xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 2, i32 0, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <6 x i32> @shuffle_6xi32_to_6xi32_1(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <6 x i32> <i32 7, i32 7, i32 3, i32 8, i32 8, i32 10>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_6xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 7, i32 2>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <6 x i32> @shuffle_6xi32_to_6xi32_2(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <6 x i32> <i32 1, i32 5, i32 4, i32 0, i32 7, i32 4>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_6xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 4, i32 0>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <6 x i32> @shuffle_6xi32_to_6xi32_3(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <6 x i32> <i32 8, i32 2, i32 0, i32 3, i32 7, i32 0>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_6xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 2, i32 6, i32 4, i32 7>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <6 x i32> @shuffle_6xi32_to_6xi32_4(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <6 x i32> <i32 3, i32 5, i32 1, i32 4, i32 7, i32 4>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_6xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 3, i32 5, i32 1, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <6 x i32> @shuffle_6xi32_to_6xi32_5(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <6 x i32> <i32 8, i32 4, i32 3, i32 7, i32 3, i32 2>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_6xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <6 x i32> @shuffle_6xi32_to_6xi32_6(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <6 x i32> <i32 4, i32 3, i32 0, i32 0, i32 8, i32 5>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_6xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 7, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <6 x i32> @shuffle_6xi32_to_6xi32_7(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <6 x i32> <i32 0, i32 2, i32 4, i32 0, i32 6, i32 2>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_6xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 0>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <6 x i32> @shuffle_6xi32_to_6xi32_8(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <6 x i32> <i32 9, i32 1, i32 0, i32 10, i32 1, i32 5>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_6xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 3, i32 5, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <6 x i32> @shuffle_6xi32_to_6xi32_9(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <6 x i32> <i32 6, i32 8, i32 3, i32 10, i32 3, i32 3>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_6xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 0, i32 2, i32 7, i32 7>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <8 x i32> @shuffle_6xi32_to_8xi32_0(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <8 x i32> <i32 0, i32 5, i32 7, i32 9, i32 10, i32 3, i32 6, i32 9>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_8xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 5, i32 7>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %1, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 7>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <8 x i32> @shuffle_6xi32_to_8xi32_1(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <8 x i32> <i32 0, i32 7, i32 9, i32 2, i32 3, i32 5, i32 5, i32 9>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_8xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 0, i32 5, i32 7, i32 2>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <8 x i32> @shuffle_6xi32_to_8xi32_2(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <8 x i32> <i32 4, i32 1, i32 10, i32 1, i32 1, i32 5, i32 3, i32 4>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_8xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 4>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <8 x i32> @shuffle_6xi32_to_8xi32_3(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <8 x i32> <i32 0, i32 8, i32 6, i32 4, i32 5, i32 2, i32 6, i32 9>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_8xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 0, i32 6, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 7>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <8 x i32> @shuffle_6xi32_to_8xi32_4(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <8 x i32> <i32 1, i32 3, i32 0, i32 2, i32 9, i32 10, i32 1, i32 5>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_8xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <8 x i32> @shuffle_6xi32_to_8xi32_5(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <8 x i32> <i32 7, i32 6, i32 8, i32 6, i32 7, i32 4, i32 5, i32 8>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_8xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 0>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 4, i32 5, i32 2>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <8 x i32> @shuffle_6xi32_to_8xi32_6(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <8 x i32> <i32 1, i32 10, i32 0, i32 1, i32 7, i32 7, i32 5, i32 1>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_8xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 0, i32 1>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <8 x i32> @shuffle_6xi32_to_8xi32_7(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <8 x i32> <i32 1, i32 3, i32 8, i32 6, i32 8, i32 10, i32 6, i32 8>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_8xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 1, i32 3, i32 6, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 2, i32 4, i32 0, i32 2>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <8 x i32> @shuffle_6xi32_to_8xi32_8(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <8 x i32> <i32 9, i32 2, i32 6, i32 2, i32 8, i32 9, i32 6, i32 3>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_8xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 3, i32 6, i32 0, i32 6>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 2, i32 3, i32 0, i32 7>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <8 x i32> @shuffle_6xi32_to_8xi32_9(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <8 x i32> <i32 10, i32 3, i32 0, i32 1, i32 10, i32 3, i32 3, i32 10>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_8xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> %1, <4 x i32> <i32 0, i32 7, i32 4, i32 5>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %1, <4 x i32> <i32 0, i32 7, i32 7, i32 0>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <12 x i32> @shuffle_6xi32_to_12xi32_0(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <12 x i32> <i32 9, i32 9, i32 6, i32 2, i32 1, i32 6, i32 1, i32 5, i32 6, i32 1, i32 7, i32 3>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_12xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 3, i32 3, i32 0, i32 6>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 1, i32 1>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <12 x i32> @shuffle_6xi32_to_12xi32_1(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <12 x i32> <i32 3, i32 1, i32 0, i32 1, i32 2, i32 0, i32 7, i32 10, i32 9, i32 4, i32 5, i32 5>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_12xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 0, i32 1>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 2, i32 0, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 3, i32 4, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <12 x i32> @shuffle_6xi32_to_12xi32_2(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <12 x i32> <i32 7, i32 6, i32 9, i32 4, i32 4, i32 8, i32 8, i32 0, i32 9, i32 3, i32 9, i32 4>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_12xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 1, i32 0, i32 3, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 3, i32 7, i32 3, i32 3>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <12 x i32> @shuffle_6xi32_to_12xi32_3(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <12 x i32> <i32 7, i32 10, i32 8, i32 4, i32 6, i32 9, i32 10, i32 10, i32 5, i32 1, i32 3, i32 0>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_12xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 2, i32 2>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 3, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 7, i32 4>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <12 x i32> @shuffle_6xi32_to_12xi32_4(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <12 x i32> <i32 7, i32 1, i32 10, i32 3, i32 4, i32 4, i32 2, i32 9, i32 6, i32 4, i32 1, i32 8>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_12xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 6, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <12 x i32> @shuffle_6xi32_to_12xi32_5(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <12 x i32> <i32 4, i32 9, i32 10, i32 10, i32 5, i32 1, i32 1, i32 2, i32 5, i32 2, i32 0, i32 2>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_12xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 5, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 6, i32 4, i32 6>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <12 x i32> @shuffle_6xi32_to_12xi32_6(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <12 x i32> <i32 1, i32 7, i32 0, i32 7, i32 7, i32 6, i32 0, i32 0, i32 1, i32 10, i32 2, i32 0>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_12xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 1, i32 5, i32 0, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 1, i32 0, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %2, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 2, i32 0>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <12 x i32> @shuffle_6xi32_to_12xi32_7(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <12 x i32> <i32 1, i32 4, i32 7, i32 1, i32 6, i32 2, i32 3, i32 2, i32 0, i32 5, i32 3, i32 5>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_12xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 0, i32 6, i32 7, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 5, i32 3, i32 5>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <12 x i32> @shuffle_6xi32_to_12xi32_8(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <12 x i32> <i32 5, i32 10, i32 5, i32 2, i32 10, i32 7, i32 7, i32 0, i32 0, i32 0, i32 6, i32 9>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_12xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 1, i32 1>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 0, i32 0, i32 4, i32 7>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <12 x i32> @shuffle_6xi32_to_12xi32_9(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <12 x i32> <i32 6, i32 6, i32 9, i32 1, i32 8, i32 3, i32 9, i32 10, i32 10, i32 5, i32 6, i32 10>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_12xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 3, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 2, i32 7, i32 3, i32 3>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <16 x i32> @shuffle_6xi32_to_16xi32_0(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <16 x i32> <i32 7, i32 3, i32 9, i32 5, i32 8, i32 5, i32 2, i32 1, i32 9, i32 5, i32 10, i32 7, i32 5, i32 7, i32 0, i32 3>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_16xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 1, i32 7, i32 3, i32 3>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 7>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <16 x i32> @shuffle_6xi32_to_16xi32_1(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <16 x i32> <i32 6, i32 7, i32 10, i32 3, i32 10, i32 4, i32 2, i32 2, i32 3, i32 7, i32 6, i32 6, i32 9, i32 7, i32 0, i32 6>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_16xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 3, i32 5, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 3, i32 1, i32 4, i32 0>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <16 x i32> @shuffle_6xi32_to_16xi32_2(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <16 x i32> <i32 4, i32 1, i32 4, i32 0, i32 7, i32 3, i32 6, i32 4, i32 3, i32 7, i32 7, i32 8, i32 9, i32 4, i32 2, i32 0>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_16xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 5, i32 0, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 1, i32 7, i32 0, i32 0>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 3, i32 5, i32 5, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 4>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i32> @shuffle_6xi32_to_16xi32_3(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <16 x i32> <i32 7, i32 9, i32 8, i32 3, i32 6, i32 8, i32 0, i32 8, i32 7, i32 10, i32 1, i32 0, i32 4, i32 4, i32 3, i32 9>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_16xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 1, i32 3, i32 2, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 0, i32 2, i32 4, i32 2>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 5, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 0, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i32> @shuffle_6xi32_to_16xi32_4(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <16 x i32> <i32 4, i32 8, i32 8, i32 5, i32 2, i32 8, i32 8, i32 5, i32 10, i32 7, i32 9, i32 9, i32 5, i32 6, i32 1, i32 4>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_16xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 6, i32 6, i32 1>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 0, i32 5, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i32> @shuffle_6xi32_to_16xi32_5(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <16 x i32> <i32 0, i32 8, i32 2, i32 7, i32 6, i32 2, i32 9, i32 6, i32 1, i32 6, i32 6, i32 9, i32 7, i32 2, i32 5, i32 4>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_16xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 0, i32 6, i32 2, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 0, i32 6, i32 3, i32 0>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 4, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 4>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i32> @shuffle_6xi32_to_16xi32_6(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <16 x i32> <i32 2, i32 4, i32 10, i32 0, i32 2, i32 4, i32 8, i32 4, i32 2, i32 6, i32 4, i32 8, i32 4, i32 5, i32 7, i32 10>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_16xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <16 x i32> @shuffle_6xi32_to_16xi32_7(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <16 x i32> <i32 0, i32 8, i32 3, i32 0, i32 10, i32 10, i32 6, i32 6, i32 6, i32 8, i32 8, i32 10, i32 3, i32 4, i32 6, i32 3>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_16xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 0, i32 6, i32 3, i32 0>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 0, i32 2, i32 2, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i32> @shuffle_6xi32_to_16xi32_8(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <16 x i32> <i32 6, i32 0, i32 1, i32 3, i32 0, i32 2, i32 2, i32 7, i32 8, i32 10, i32 2, i32 2, i32 0, i32 10, i32 8, i32 1>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_16xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 5, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 0, i32 2, i32 2, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %3, <4 x i32> %6, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i32> @shuffle_6xi32_to_16xi32_9(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <16 x i32> <i32 4, i32 9, i32 8, i32 0, i32 3, i32 7, i32 5, i32 2, i32 4, i32 2, i32 5, i32 9, i32 8, i32 0, i32 6, i32 1>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_16xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 7, i32 6, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 6, i32 1, i32 1>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 2, i32 4, i32 0, i32 5>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <20 x i32> @shuffle_6xi32_to_20xi32_0(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <20 x i32> <i32 7, i32 4, i32 8, i32 9, i32 7, i32 5, i32 7, i32 5, i32 3, i32 2, i32 1, i32 4, i32 9, i32 4, i32 8, i32 4, i32 8, i32 6, i32 1, i32 0>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_20xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 2, i32 3>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 1, i32 5, i32 1, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 3, i32 2, i32 1, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 3, i32 4, i32 2, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 2, i32 0, i32 5, i32 4>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <20 x i32> @shuffle_6xi32_to_20xi32_1(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <20 x i32> <i32 8, i32 8, i32 1, i32 0, i32 0, i32 7, i32 10, i32 8, i32 1, i32 6, i32 0, i32 8, i32 3, i32 2, i32 4, i32 7, i32 6, i32 6, i32 7, i32 4>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_20xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 2, i32 2, i32 5, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 1, i32 4, i32 0, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 3, i32 2, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 0, i32 0, i32 1, i32 4>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <20 x i32> @shuffle_6xi32_to_20xi32_2(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <20 x i32> <i32 0, i32 5, i32 9, i32 9, i32 6, i32 5, i32 0, i32 3, i32 10, i32 1, i32 1, i32 10, i32 10, i32 10, i32 4, i32 5, i32 4, i32 4, i32 8, i32 7>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_20xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 5, i32 0>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 0, i32 0, i32 6, i32 5>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <20 x i32> @shuffle_6xi32_to_20xi32_3(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <20 x i32> <i32 0, i32 3, i32 1, i32 5, i32 9, i32 7, i32 1, i32 1, i32 5, i32 3, i32 3, i32 10, i32 3, i32 7, i32 3, i32 2, i32 1, i32 2, i32 8, i32 6>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_20xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 3, i32 1, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 3, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 3, i32 5, i32 3, i32 2>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 1, i32 2, i32 6, i32 4>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <20 x i32> @shuffle_6xi32_to_20xi32_4(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <20 x i32> <i32 10, i32 10, i32 2, i32 2, i32 5, i32 4, i32 10, i32 7, i32 6, i32 1, i32 0, i32 9, i32 6, i32 0, i32 10, i32 6, i32 3, i32 2, i32 3, i32 9>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_20xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 0, i32 0, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 1, i32 0, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 4, i32 3>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 3, i32 2, i32 3, i32 7>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <20 x i32> @shuffle_6xi32_to_20xi32_5(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <20 x i32> <i32 7, i32 5, i32 9, i32 10, i32 10, i32 3, i32 1, i32 4, i32 4, i32 10, i32 7, i32 4, i32 6, i32 10, i32 10, i32 1, i32 0, i32 1, i32 1, i32 2>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_20xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 1, i32 5, i32 3, i32 3>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 0, i32 7, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <20 x i32> @shuffle_6xi32_to_20xi32_6(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <20 x i32> <i32 5, i32 1, i32 5, i32 6, i32 3, i32 8, i32 5, i32 5, i32 0, i32 8, i32 7, i32 5, i32 9, i32 6, i32 10, i32 10, i32 5, i32 10, i32 8, i32 4>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_20xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 5, i32 1, i32 1>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 0, i32 6, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 3, i32 0, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <20 x i32> @shuffle_6xi32_to_20xi32_7(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <20 x i32> <i32 8, i32 5, i32 7, i32 8, i32 1, i32 6, i32 1, i32 7, i32 2, i32 10, i32 2, i32 3, i32 8, i32 6, i32 0, i32 1, i32 3, i32 9, i32 4, i32 0>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_20xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 2, i32 5, i32 1, i32 2>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 1, i32 4, i32 1, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %7, <4 x i32> <i32 2, i32 4, i32 2, i32 3>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 2, i32 0, i32 4, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <20 x i32> @shuffle_6xi32_to_20xi32_8(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <20 x i32> <i32 1, i32 5, i32 10, i32 3, i32 10, i32 3, i32 10, i32 6, i32 8, i32 4, i32 0, i32 5, i32 7, i32 9, i32 9, i32 1, i32 3, i32 9, i32 1, i32 9>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_20xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 1, i32 3, i32 3, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 3, i32 7, i32 1, i32 7>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <20 x i32> @shuffle_6xi32_to_20xi32_9(<6 x i32>, <6 x i32>) {
+ %3 = shufflevector <6 x i32> %0, <6 x i32> %1, <20 x i32> <i32 0, i32 9, i32 2, i32 7, i32 1, i32 2, i32 5, i32 9, i32 1, i32 6, i32 3, i32 4, i32 8, i32 7, i32 9, i32 2, i32 8, i32 7, i32 9, i32 9>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_6xi32_to_20xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 0, i32 7, i32 2, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 2, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 1, i32 4, i32 3, i32 3>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 2, i32 1, i32 3, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <2 x i32> @shuffle_8xi32_to_2xi32_0(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <2 x i32> <i32 5, i32 6>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_2xi32_0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_8xi32_to_2xi32_1(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <2 x i32> <i32 5, i32 8>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_2xi32_1(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_8xi32_to_2xi32_2(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <2 x i32> <i32 3, i32 5>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_2xi32_2(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_8xi32_to_2xi32_3(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <2 x i32> <i32 3, i32 12>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_2xi32_3(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %0, <4 x i32> %3, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_8xi32_to_2xi32_4(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <2 x i32> <i32 11, i32 3>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_2xi32_4(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %0, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_8xi32_to_2xi32_5(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <2 x i32> <i32 2, i32 10>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_2xi32_5(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %0, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_8xi32_to_2xi32_6(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <2 x i32> <i32 10, i32 9>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_2xi32_6(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_8xi32_to_2xi32_7(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <2 x i32> <i32 4, i32 3>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_2xi32_7(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_8xi32_to_2xi32_8(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <2 x i32> <i32 10, i32 3>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_2xi32_8(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %0, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <2 x i32> @shuffle_8xi32_to_2xi32_9(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <2 x i32> <i32 12, i32 5>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_2xi32_9(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <4 x i32> @shuffle_8xi32_to_4xi32_0(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <4 x i32> <i32 10, i32 4, i32 7, i32 3>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_4xi32_0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 2, i32 4, i32 7, i32 7>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %5, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <4 x i32> @shuffle_8xi32_to_4xi32_1(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <4 x i32> <i32 4, i32 5, i32 4, i32 13>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_4xi32_1(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 0, i32 5>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <4 x i32> @shuffle_8xi32_to_4xi32_2(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <4 x i32> <i32 8, i32 9, i32 2, i32 12>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_4xi32_2(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <4 x i32> @shuffle_8xi32_to_4xi32_3(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <4 x i32> <i32 3, i32 12, i32 14, i32 6>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_4xi32_3(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %0, <4 x i32> %3, <4 x i32> <i32 3, i32 4, i32 6, i32 6>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <4 x i32> @shuffle_8xi32_to_4xi32_4(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <4 x i32> <i32 10, i32 9, i32 8, i32 13>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_4xi32_4(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 2, i32 1, i32 0, i32 5>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <4 x i32> @shuffle_8xi32_to_4xi32_5(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <4 x i32> <i32 2, i32 10, i32 2, i32 7>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_4xi32_5(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %0, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 2, i32 2>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <4 x i32> @shuffle_8xi32_to_4xi32_6(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <4 x i32> <i32 3, i32 8, i32 13, i32 11>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_4xi32_6(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %0, <4 x i32> %2, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <4 x i32> @shuffle_8xi32_to_4xi32_7(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <4 x i32> <i32 11, i32 11, i32 11, i32 9>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_4xi32_7(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 1>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <4 x i32> @shuffle_8xi32_to_4xi32_8(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <4 x i32> <i32 10, i32 10, i32 11, i32 9>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_4xi32_8(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 1>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <4 x i32> @shuffle_8xi32_to_4xi32_9(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <4 x i32> <i32 2, i32 5, i32 4, i32 4>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_4xi32_9(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 2, i32 5, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %5
+
+define <6 x i32> @shuffle_8xi32_to_6xi32_0(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <6 x i32> <i32 5, i32 2, i32 14, i32 4, i32 11, i32 7>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_6xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <6 x i32> @shuffle_8xi32_to_6xi32_1(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <6 x i32> <i32 3, i32 3, i32 4, i32 1, i32 9, i32 3>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_6xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 3, i32 3, i32 4, i32 1>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <6 x i32> @shuffle_8xi32_to_6xi32_2(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <6 x i32> <i32 14, i32 13, i32 2, i32 6, i32 2, i32 0>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_6xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> %1, <4 x i32> <i32 2, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <6 x i32> @shuffle_8xi32_to_6xi32_3(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <6 x i32> <i32 11, i32 11, i32 6, i32 6, i32 7, i32 11>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_6xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 3, i32 3, i32 6, i32 6>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <6 x i32> @shuffle_8xi32_to_6xi32_4(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <6 x i32> <i32 12, i32 1, i32 7, i32 8, i32 4, i32 6>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_6xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <6 x i32> @shuffle_8xi32_to_6xi32_5(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <6 x i32> <i32 6, i32 11, i32 0, i32 13, i32 0, i32 10>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_6xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <6 x i32> @shuffle_8xi32_to_6xi32_6(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <6 x i32> <i32 2, i32 5, i32 13, i32 13, i32 6, i32 9>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_6xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <6 x i32> @shuffle_8xi32_to_6xi32_7(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <6 x i32> <i32 14, i32 14, i32 3, i32 4, i32 10, i32 6>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_6xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> %1, <4 x i32> <i32 2, i32 2, i32 7, i32 7>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <6 x i32> @shuffle_8xi32_to_6xi32_8(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <6 x i32> <i32 1, i32 6, i32 12, i32 5, i32 1, i32 8>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_6xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <6 x i32> @shuffle_8xi32_to_6xi32_9(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <6 x i32> <i32 0, i32 8, i32 1, i32 8, i32 7, i32 9>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_6xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <8 x i32> @shuffle_8xi32_to_8xi32_0(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 13, i32 12, i32 1, i32 9, i32 5, i32 7, i32 0, i32 4>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_8xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %4, <4 x i32> %1, <4 x i32> <i32 1, i32 0, i32 5, i32 5>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 3, i32 4, i32 0>
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <8 x i32> @shuffle_8xi32_to_8xi32_1(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 1, i32 13, i32 14, i32 2, i32 0, i32 1, i32 14, i32 12>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_8xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %4, <4 x i32> <i32 1, i32 5, i32 6, i32 2>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %1, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 6, i32 4>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <8 x i32> @shuffle_8xi32_to_8xi32_2(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 6, i32 13, i32 11, i32 13, i32 0, i32 11, i32 13, i32 14>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_8xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 6>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <8 x i32> @shuffle_8xi32_to_8xi32_3(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 1, i32 11, i32 12, i32 11, i32 4, i32 3, i32 0, i32 7>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_8xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 7, i32 4, i32 3>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <8 x i32> @shuffle_8xi32_to_8xi32_4(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 10, i32 10, i32 4, i32 6, i32 9, i32 7, i32 0, i32 11>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_8xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 2, i32 2, i32 4, i32 6>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <8 x i32> @shuffle_8xi32_to_8xi32_5(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 10, i32 1, i32 0, i32 12, i32 3, i32 14, i32 7, i32 0>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_8xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 2, i32 5, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %1, <4 x i32> %4, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <8 x i32> @shuffle_8xi32_to_8xi32_6(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 6, i32 12, i32 0, i32 9, i32 12, i32 11, i32 0, i32 5>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_8xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <8 x i32> @shuffle_8xi32_to_8xi32_7(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 7, i32 11, i32 1, i32 7, i32 3, i32 2, i32 13, i32 3>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_8xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %1, <4 x i32> %4, <4 x i32> <i32 3, i32 2, i32 5, i32 3>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <8 x i32> @shuffle_8xi32_to_8xi32_8(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 0, i32 4, i32 8, i32 5, i32 1, i32 13, i32 4, i32 8>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_8xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %1, <4 x i32> %4, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <8 x i32> @shuffle_8xi32_to_8xi32_9(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 8, i32 0, i32 6, i32 10, i32 9, i32 2, i32 8, i32 3>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_8xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %6, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 1, i32 6, i32 0, i32 7>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <12 x i32> @shuffle_8xi32_to_12xi32_0(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <12 x i32> <i32 9, i32 4, i32 2, i32 9, i32 8, i32 8, i32 9, i32 4, i32 6, i32 0, i32 14, i32 1>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_12xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 0, i32 0, i32 1, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <12 x i32> @shuffle_8xi32_to_12xi32_1(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <12 x i32> <i32 3, i32 9, i32 6, i32 12, i32 0, i32 7, i32 14, i32 4, i32 12, i32 8, i32 8, i32 3>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_12xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <12 x i32> @shuffle_8xi32_to_12xi32_2(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <12 x i32> <i32 2, i32 9, i32 3, i32 10, i32 9, i32 6, i32 4, i32 12, i32 1, i32 7, i32 3, i32 14>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_12xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 2, i32 5, i32 3, i32 6>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 1, i32 6, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 7, i32 3, i32 3>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <12 x i32> @shuffle_8xi32_to_12xi32_3(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <12 x i32> <i32 0, i32 13, i32 4, i32 7, i32 4, i32 2, i32 13, i32 5, i32 11, i32 6, i32 13, i32 1>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_12xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %5, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <12 x i32> @shuffle_8xi32_to_12xi32_4(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <12 x i32> <i32 3, i32 7, i32 5, i32 0, i32 2, i32 9, i32 13, i32 6, i32 0, i32 5, i32 4, i32 7>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_12xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 3, i32 7, i32 5, i32 0>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 5, i32 4, i32 7>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <12 x i32> @shuffle_8xi32_to_12xi32_5(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <12 x i32> <i32 9, i32 4, i32 4, i32 9, i32 11, i32 10, i32 3, i32 7, i32 10, i32 13, i32 1, i32 11>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_12xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 1, i32 4, i32 4, i32 1>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 3, i32 2, i32 7, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <12 x i32> @shuffle_8xi32_to_12xi32_6(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <12 x i32> <i32 10, i32 3, i32 7, i32 0, i32 7, i32 6, i32 0, i32 8, i32 10, i32 13, i32 0, i32 1>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_12xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 3, i32 2, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <12 x i32> @shuffle_8xi32_to_12xi32_7(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <12 x i32> <i32 9, i32 12, i32 8, i32 5, i32 0, i32 11, i32 8, i32 5, i32 10, i32 12, i32 6, i32 7>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_12xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 0, i32 0>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 0, i32 7, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <12 x i32> @shuffle_8xi32_to_12xi32_8(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <12 x i32> <i32 13, i32 1, i32 14, i32 7, i32 9, i32 12, i32 1, i32 10, i32 12, i32 14, i32 4, i32 7>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_12xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %5, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 2, i32 2>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 0, i32 2, i32 4, i32 7>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <12 x i32> @shuffle_8xi32_to_12xi32_9(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <12 x i32> <i32 3, i32 10, i32 9, i32 1, i32 9, i32 0, i32 11, i32 5, i32 12, i32 10, i32 5, i32 5>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_12xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 3, i32 6, i32 5, i32 1>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 1, i32 4, i32 3, i32 3>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <16 x i32> @shuffle_8xi32_to_16xi32_0(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <16 x i32> <i32 4, i32 12, i32 5, i32 7, i32 3, i32 8, i32 14, i32 8, i32 6, i32 7, i32 7, i32 11, i32 11, i32 7, i32 11, i32 8>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_16xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 3, i32 3, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 3, i32 7, i32 3, i32 0>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i32> @shuffle_8xi32_to_16xi32_1(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <16 x i32> <i32 0, i32 9, i32 10, i32 2, i32 8, i32 6, i32 11, i32 4, i32 7, i32 6, i32 9, i32 1, i32 9, i32 7, i32 7, i32 7>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_16xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 0, i32 5, i32 6, i32 2>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 3, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 3, i32 2, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i32> @shuffle_8xi32_to_16xi32_2(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <16 x i32> <i32 11, i32 7, i32 9, i32 11, i32 9, i32 2, i32 5, i32 5, i32 8, i32 5, i32 4, i32 5, i32 8, i32 7, i32 9, i32 11>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_16xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 3, i32 7, i32 1, i32 3>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 4, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 7, i32 1, i32 3>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i32> @shuffle_8xi32_to_16xi32_3(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <16 x i32> <i32 14, i32 0, i32 13, i32 13, i32 14, i32 0, i32 10, i32 14, i32 12, i32 4, i32 1, i32 8, i32 1, i32 11, i32 1, i32 1>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_16xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 2, i32 4, i32 1, i32 1>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 1, i32 7, i32 1, i32 1>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i32> @shuffle_8xi32_to_16xi32_4(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <16 x i32> <i32 14, i32 2, i32 9, i32 11, i32 7, i32 13, i32 10, i32 0, i32 4, i32 9, i32 7, i32 7, i32 13, i32 12, i32 7, i32 13>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_16xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 5, i32 3, i32 3>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 1, i32 0, i32 7, i32 1>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <16 x i32> @shuffle_8xi32_to_16xi32_5(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <16 x i32> <i32 12, i32 9, i32 0, i32 10, i32 12, i32 9, i32 12, i32 4, i32 0, i32 12, i32 6, i32 14, i32 3, i32 5, i32 0, i32 3>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_16xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 0, i32 5, i32 0, i32 0>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %3, <4 x i32> %6, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 3, i32 5, i32 0, i32 3>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <16 x i32> @shuffle_8xi32_to_16xi32_6(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <16 x i32> <i32 3, i32 9, i32 5, i32 4, i32 4, i32 4, i32 4, i32 7, i32 3, i32 3, i32 10, i32 2, i32 11, i32 2, i32 2, i32 3>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_16xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 3>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 3, i32 3, i32 6, i32 2>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 3, i32 6, i32 6, i32 7>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <16 x i32> @shuffle_8xi32_to_16xi32_7(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <16 x i32> <i32 1, i32 13, i32 7, i32 2, i32 12, i32 7, i32 14, i32 2, i32 5, i32 10, i32 9, i32 9, i32 7, i32 13, i32 11, i32 7>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_16xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %6, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 7, i32 2, i32 2>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 6, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <16 x i32> @shuffle_8xi32_to_16xi32_8(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <16 x i32> <i32 14, i32 13, i32 2, i32 14, i32 13, i32 4, i32 7, i32 6, i32 1, i32 11, i32 3, i32 11, i32 2, i32 9, i32 9, i32 8>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_16xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 2, i32 1, i32 6, i32 2>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 7, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 1, i32 7, i32 3, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 2, i32 5, i32 5, i32 4>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i32> @shuffle_8xi32_to_16xi32_9(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <16 x i32> <i32 1, i32 11, i32 6, i32 2, i32 3, i32 7, i32 11, i32 5, i32 3, i32 12, i32 1, i32 13, i32 13, i32 3, i32 4, i32 1>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_16xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %3, <4 x i32> %6, <4 x i32> <i32 3, i32 4, i32 1, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <20 x i32> @shuffle_8xi32_to_20xi32_0(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <20 x i32> <i32 4, i32 10, i32 14, i32 11, i32 10, i32 0, i32 4, i32 12, i32 2, i32 8, i32 0, i32 4, i32 8, i32 4, i32 11, i32 0, i32 11, i32 8, i32 1, i32 14>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_20xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 2, i32 4, i32 0, i32 0>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 0, i32 4, i32 3, i32 3>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 3, i32 0, i32 5, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <20 x i32> @shuffle_8xi32_to_20xi32_1(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <20 x i32> <i32 1, i32 3, i32 8, i32 5, i32 2, i32 2, i32 11, i32 10, i32 11, i32 10, i32 10, i32 6, i32 9, i32 7, i32 8, i32 6, i32 5, i32 14, i32 12, i32 14>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_20xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 1, i32 3, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 2, i32 2, i32 7, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 3, i32 2, i32 2, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 1, i32 7, i32 0, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 1, i32 6, i32 4, i32 6>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <20 x i32> @shuffle_8xi32_to_20xi32_2(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <20 x i32> <i32 3, i32 8, i32 11, i32 10, i32 11, i32 8, i32 10, i32 1, i32 6, i32 6, i32 6, i32 13, i32 3, i32 9, i32 12, i32 2, i32 3, i32 3, i32 7, i32 10>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_20xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 3, i32 4, i32 7, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 3, i32 0, i32 2, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 2, i32 2, i32 2, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 3, i32 3, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <20 x i32> @shuffle_8xi32_to_20xi32_3(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <20 x i32> <i32 2, i32 7, i32 11, i32 3, i32 11, i32 13, i32 13, i32 0, i32 11, i32 8, i32 1, i32 11, i32 2, i32 7, i32 1, i32 3, i32 2, i32 3, i32 0, i32 8>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_20xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 3, i32 0, i32 5, i32 3>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 7, i32 1, i32 3>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 2, i32 3, i32 0, i32 4>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <20 x i32> @shuffle_8xi32_to_20xi32_4(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <20 x i32> <i32 1, i32 2, i32 1, i32 0, i32 5, i32 9, i32 14, i32 2, i32 12, i32 0, i32 11, i32 2, i32 14, i32 6, i32 9, i32 14, i32 0, i32 3, i32 0, i32 10>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_20xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 0, i32 3, i32 0, i32 6>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <20 x i32> @shuffle_8xi32_to_20xi32_5(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <20 x i32> <i32 14, i32 4, i32 10, i32 1, i32 3, i32 14, i32 6, i32 7, i32 14, i32 3, i32 8, i32 8, i32 8, i32 8, i32 1, i32 12, i32 10, i32 14, i32 10, i32 0>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_20xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %7, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 0, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 2, i32 6, i32 2, i32 2>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <20 x i32> @shuffle_8xi32_to_20xi32_6(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <20 x i32> <i32 14, i32 12, i32 9, i32 7, i32 5, i32 1, i32 10, i32 2, i32 6, i32 1, i32 8, i32 11, i32 3, i32 5, i32 5, i32 0, i32 2, i32 6, i32 5, i32 14>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_20xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %7, <4 x i32> %6, <4 x i32> <i32 2, i32 0, i32 5, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 3, i32 5, i32 5, i32 0>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 6, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <20 x i32> @shuffle_8xi32_to_20xi32_7(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <20 x i32> <i32 4, i32 1, i32 13, i32 13, i32 10, i32 6, i32 7, i32 7, i32 8, i32 6, i32 7, i32 0, i32 0, i32 11, i32 7, i32 3, i32 14, i32 2, i32 0, i32 0>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_20xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 2, i32 6, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 0, i32 6, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 2, i32 6, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <20 x i32> @shuffle_8xi32_to_20xi32_8(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <20 x i32> <i32 5, i32 4, i32 3, i32 1, i32 10, i32 0, i32 6, i32 3, i32 3, i32 13, i32 11, i32 2, i32 2, i32 12, i32 8, i32 13, i32 6, i32 5, i32 13, i32 13>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_20xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 0, i32 7, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %7, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %4, <4 x i32> %7, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 2, i32 1, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <20 x i32> @shuffle_8xi32_to_20xi32_9(<8 x i32>, <8 x i32>) {
+ %3 = shufflevector <8 x i32> %0, <8 x i32> %1, <20 x i32> <i32 3, i32 13, i32 0, i32 3, i32 0, i32 1, i32 9, i32 3, i32 4, i32 4, i32 5, i32 12, i32 4, i32 14, i32 10, i32 11, i32 1, i32 7, i32 4, i32 9>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_8xi32_to_20xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %7, <4 x i32> <i32 3, i32 5, i32 0, i32 3>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 0, i32 0, i32 1, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 7, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <2 x i32> @shuffle_12xi32_to_2xi32_0(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <2 x i32> <i32 0, i32 1>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_2xi32_0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: ret <4 x i32> %0
+
+define <2 x i32> @shuffle_12xi32_to_2xi32_1(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <2 x i32> <i32 15, i32 16>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_2xi32_1(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i32> @shuffle_12xi32_to_2xi32_2(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <2 x i32> <i32 2, i32 13>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_2xi32_2(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %0, <4 x i32> %3, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i32> @shuffle_12xi32_to_2xi32_3(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <2 x i32> <i32 0, i32 1>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_2xi32_3(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: ret <4 x i32> %0
+
+define <2 x i32> @shuffle_12xi32_to_2xi32_4(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <2 x i32> <i32 1, i32 6>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_2xi32_4(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i32> @shuffle_12xi32_to_2xi32_5(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <2 x i32> <i32 19, i32 17>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_2xi32_5(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i32> @shuffle_12xi32_to_2xi32_6(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <2 x i32> <i32 22, i32 13>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_2xi32_6(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i32> @shuffle_12xi32_to_2xi32_7(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <2 x i32> <i32 3, i32 12>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_2xi32_7(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %0, <4 x i32> %3, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i32> @shuffle_12xi32_to_2xi32_8(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <2 x i32> <i32 6, i32 14>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_2xi32_8(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i32> @shuffle_12xi32_to_2xi32_9(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <2 x i32> <i32 2, i32 15>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_2xi32_9(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %0, <4 x i32> %3, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <4 x i32> @shuffle_12xi32_to_4xi32_0(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <4 x i32> <i32 14, i32 9, i32 11, i32 14>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_4xi32_0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 2, i32 5, i32 7, i32 2>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <4 x i32> @shuffle_12xi32_to_4xi32_1(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <4 x i32> <i32 16, i32 0, i32 7, i32 6>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_4xi32_1(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %0, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 7, i32 6>
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <4 x i32> @shuffle_12xi32_to_4xi32_2(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <4 x i32> <i32 0, i32 17, i32 2, i32 8>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_4xi32_2(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %0, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 2, i32 2>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <4 x i32> @shuffle_12xi32_to_4xi32_3(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <4 x i32> <i32 7, i32 17, i32 18, i32 4>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_4xi32_3(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %1, <4 x i32> %4, <4 x i32> <i32 3, i32 5, i32 6, i32 0>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <4 x i32> @shuffle_12xi32_to_4xi32_4(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <4 x i32> <i32 7, i32 13, i32 12, i32 5>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_4xi32_4(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 3, i32 5, i32 4, i32 1>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <4 x i32> @shuffle_12xi32_to_4xi32_5(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <4 x i32> <i32 11, i32 21, i32 11, i32 22>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_4xi32_5(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %5, <4 x i32> <i32 3, i32 5, i32 3, i32 6>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <4 x i32> @shuffle_12xi32_to_4xi32_6(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <4 x i32> <i32 21, i32 14, i32 20, i32 15>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_4xi32_6(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 1, i32 6, i32 0, i32 7>
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <4 x i32> @shuffle_12xi32_to_4xi32_7(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <4 x i32> <i32 3, i32 12, i32 14, i32 4>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_4xi32_7(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %0, <4 x i32> %3, <4 x i32> <i32 3, i32 4, i32 6, i32 6>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <4 x i32> @shuffle_12xi32_to_4xi32_8(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <4 x i32> <i32 8, i32 16, i32 0, i32 6>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_4xi32_8(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <4 x i32> @shuffle_12xi32_to_4xi32_9(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <4 x i32> <i32 18, i32 19, i32 8, i32 5>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_4xi32_9(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 4>
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %7, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <6 x i32> @shuffle_12xi32_to_6xi32_0(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <6 x i32> <i32 11, i32 13, i32 6, i32 20, i32 12, i32 7>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_6xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <6 x i32> @shuffle_12xi32_to_6xi32_1(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <6 x i32> <i32 17, i32 4, i32 15, i32 7, i32 19, i32 16>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_6xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %2, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <6 x i32> @shuffle_12xi32_to_6xi32_2(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <6 x i32> <i32 6, i32 2, i32 21, i32 8, i32 22, i32 12>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_6xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <6 x i32> @shuffle_12xi32_to_6xi32_3(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <6 x i32> <i32 11, i32 6, i32 0, i32 11, i32 16, i32 5>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_6xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %2, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <6 x i32> @shuffle_12xi32_to_6xi32_4(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <6 x i32> <i32 21, i32 5, i32 11, i32 8, i32 21, i32 12>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_6xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %6, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 7, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <6 x i32> @shuffle_12xi32_to_6xi32_5(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <6 x i32> <i32 0, i32 15, i32 3, i32 14, i32 11, i32 18>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_6xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %1, <4 x i32> %4, <4 x i32> <i32 0, i32 7, i32 3, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <6 x i32> @shuffle_12xi32_to_6xi32_6(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <6 x i32> <i32 9, i32 12, i32 8, i32 19, i32 5, i32 5>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_6xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 0, i32 0>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <6 x i32> @shuffle_12xi32_to_6xi32_7(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <6 x i32> <i32 17, i32 3, i32 18, i32 16, i32 9, i32 18>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_6xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 1, i32 7, i32 2, i32 0>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <6 x i32> @shuffle_12xi32_to_6xi32_8(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <6 x i32> <i32 11, i32 14, i32 6, i32 20, i32 15, i32 0>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_6xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <6 x i32> @shuffle_12xi32_to_6xi32_9(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <6 x i32> <i32 12, i32 20, i32 10, i32 10, i32 14, i32 14>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_6xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <8 x i32> @shuffle_12xi32_to_8xi32_0(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <8 x i32> <i32 7, i32 14, i32 14, i32 18, i32 18, i32 20, i32 22, i32 15>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_8xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 2, i32 4, i32 6, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <8 x i32> @shuffle_12xi32_to_8xi32_1(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <8 x i32> <i32 5, i32 7, i32 16, i32 22, i32 16, i32 2, i32 16, i32 20>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_8xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %2, <4 x i32> %5, <4 x i32> <i32 1, i32 3, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 0, i32 6, i32 0, i32 0>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <8 x i32> @shuffle_12xi32_to_8xi32_2(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <8 x i32> <i32 3, i32 3, i32 1, i32 0, i32 5, i32 22, i32 5, i32 9>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_8xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %2, <4 x i32> %6, <4 x i32> <i32 1, i32 6, i32 1, i32 1>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <8 x i32> @shuffle_12xi32_to_8xi32_3(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <8 x i32> <i32 5, i32 1, i32 18, i32 1, i32 13, i32 6, i32 7, i32 14>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_8xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 1, i32 6, i32 7, i32 2>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <8 x i32> @shuffle_12xi32_to_8xi32_4(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <8 x i32> <i32 21, i32 21, i32 7, i32 11, i32 17, i32 19, i32 3, i32 16>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_8xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %6, <4 x i32> %2, <4 x i32> <i32 1, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 1, i32 3, i32 7, i32 0>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <8 x i32> @shuffle_12xi32_to_8xi32_5(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <8 x i32> <i32 14, i32 9, i32 22, i32 12, i32 13, i32 1, i32 3, i32 0>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_8xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 7, i32 4>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <8 x i32> @shuffle_12xi32_to_8xi32_6(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <8 x i32> <i32 15, i32 7, i32 0, i32 6, i32 2, i32 3, i32 7, i32 12>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_8xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 3, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <8 x i32> @shuffle_12xi32_to_8xi32_7(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <8 x i32> <i32 22, i32 20, i32 9, i32 8, i32 5, i32 5, i32 22, i32 9>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_8xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 2, i32 0, i32 5, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %2, <4 x i32> %6, <4 x i32> <i32 1, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <8 x i32> @shuffle_12xi32_to_8xi32_8(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <8 x i32> <i32 5, i32 20, i32 19, i32 3, i32 7, i32 18, i32 3, i32 1>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_8xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %2, <4 x i32> %6, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %2, <4 x i32> %5, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 7, i32 5>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <8 x i32> @shuffle_12xi32_to_8xi32_9(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <8 x i32> <i32 19, i32 21, i32 15, i32 11, i32 16, i32 1, i32 10, i32 3>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_8xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <12 x i32> @shuffle_12xi32_to_12xi32_0(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <12 x i32> <i32 18, i32 14, i32 14, i32 4, i32 15, i32 11, i32 12, i32 14, i32 17, i32 8, i32 0, i32 3>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_12xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 3, i32 7, i32 0, i32 2>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 7>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <12 x i32> @shuffle_12xi32_to_12xi32_1(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <12 x i32> <i32 3, i32 2, i32 10, i32 19, i32 1, i32 11, i32 21, i32 0, i32 14, i32 14, i32 13, i32 0>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_12xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 3, i32 2, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %5, <4 x i32> %2, <4 x i32> <i32 2, i32 2, i32 1, i32 4>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <12 x i32> @shuffle_12xi32_to_12xi32_2(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <12 x i32> <i32 7, i32 13, i32 1, i32 10, i32 9, i32 18, i32 2, i32 17, i32 8, i32 11, i32 2, i32 18>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_12xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 0, i32 3, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <12 x i32> @shuffle_12xi32_to_12xi32_3(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <12 x i32> <i32 5, i32 20, i32 1, i32 3, i32 14, i32 18, i32 10, i32 1, i32 6, i32 16, i32 21, i32 22>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_12xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %7, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 5, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %3, <4 x i32> %6, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 5, i32 6>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <12 x i32> @shuffle_12xi32_to_12xi32_4(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <12 x i32> <i32 7, i32 9, i32 16, i32 5, i32 13, i32 19, i32 6, i32 9, i32 6, i32 15, i32 8, i32 0>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_12xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <12 x i32> @shuffle_12xi32_to_12xi32_5(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <12 x i32> <i32 2, i32 11, i32 19, i32 2, i32 14, i32 21, i32 17, i32 3, i32 1, i32 4, i32 3, i32 11>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_12xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 1, i32 4, i32 3, i32 3>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <12 x i32> @shuffle_12xi32_to_12xi32_6(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <12 x i32> <i32 20, i32 6, i32 9, i32 13, i32 20, i32 19, i32 17, i32 21, i32 12, i32 8, i32 8, i32 8>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_12xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %7, <4 x i32> %6, <4 x i32> <i32 0, i32 7, i32 5, i32 1>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <12 x i32> @shuffle_12xi32_to_12xi32_7(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <12 x i32> <i32 10, i32 15, i32 5, i32 15, i32 15, i32 21, i32 3, i32 16, i32 10, i32 4, i32 0, i32 17>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_12xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <12 x i32> @shuffle_12xi32_to_12xi32_8(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <12 x i32> <i32 12, i32 7, i32 3, i32 10, i32 11, i32 4, i32 11, i32 20, i32 15, i32 19, i32 20, i32 17>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_12xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 3, i32 4, i32 3, i32 3>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <12 x i32> @shuffle_12xi32_to_12xi32_9(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <12 x i32> <i32 5, i32 14, i32 9, i32 5, i32 15, i32 18, i32 11, i32 18, i32 19, i32 3, i32 7, i32 15>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_12xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %6, <4 x i32> %2, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <16 x i32> @shuffle_12xi32_to_16xi32_0(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <16 x i32> <i32 15, i32 11, i32 14, i32 12, i32 2, i32 14, i32 18, i32 2, i32 14, i32 0, i32 10, i32 5, i32 9, i32 9, i32 21, i32 0>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_16xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 3, i32 7, i32 2, i32 0>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %3, <4 x i32> %6, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %5, <4 x i32> %8, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <16 x i32> @shuffle_12xi32_to_16xi32_1(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <16 x i32> <i32 1, i32 9, i32 12, i32 2, i32 15, i32 8, i32 0, i32 20, i32 10, i32 16, i32 18, i32 17, i32 9, i32 17, i32 0, i32 18>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_16xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 2, i32 4, i32 6, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <16 x i32> @shuffle_12xi32_to_16xi32_2(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <16 x i32> <i32 18, i32 5, i32 6, i32 3, i32 7, i32 10, i32 22, i32 18, i32 15, i32 14, i32 14, i32 10, i32 6, i32 14, i32 9, i32 6>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_16xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 2, i32 5, i32 6, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 3, i32 2, i32 2, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <16 x i32> @shuffle_12xi32_to_16xi32_3(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <16 x i32> <i32 17, i32 11, i32 6, i32 13, i32 6, i32 15, i32 22, i32 13, i32 12, i32 18, i32 15, i32 13, i32 9, i32 21, i32 22, i32 3>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_16xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 6, i32 3, i32 1>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %5, <4 x i32> %8, <4 x i32> <i32 1, i32 5, i32 6, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <16 x i32> @shuffle_12xi32_to_16xi32_4(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <16 x i32> <i32 9, i32 6, i32 16, i32 18, i32 6, i32 8, i32 8, i32 15, i32 10, i32 18, i32 16, i32 18, i32 6, i32 1, i32 22, i32 2>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_16xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 4, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 2, i32 6, i32 4, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <16 x i32> @shuffle_12xi32_to_16xi32_5(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <16 x i32> <i32 15, i32 3, i32 8, i32 18, i32 18, i32 9, i32 14, i32 4, i32 11, i32 5, i32 2, i32 6, i32 6, i32 19, i32 0, i32 19>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_16xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %4, <4 x i32> %7, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <16 x i32> @shuffle_12xi32_to_16xi32_6(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <16 x i32> <i32 6, i32 7, i32 18, i32 5, i32 1, i32 16, i32 21, i32 11, i32 2, i32 2, i32 0, i32 19, i32 10, i32 10, i32 21, i32 19>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_16xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %7, <4 x i32> <i32 2, i32 3, i32 6, i32 1>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %3, <4 x i32> %7, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %3, <4 x i32> %7, <4 x i32> <i32 2, i32 2, i32 0, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %5, <4 x i32> %8, <4 x i32> <i32 2, i32 2, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <16 x i32> @shuffle_12xi32_to_16xi32_7(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <16 x i32> <i32 19, i32 21, i32 22, i32 12, i32 18, i32 11, i32 20, i32 5, i32 12, i32 5, i32 22, i32 16, i32 6, i32 9, i32 13, i32 17>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_16xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %7, <4 x i32> %8, <4 x i32> <i32 3, i32 5, i32 6, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <16 x i32> @shuffle_12xi32_to_16xi32_8(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <16 x i32> <i32 0, i32 17, i32 16, i32 3, i32 12, i32 3, i32 20, i32 22, i32 8, i32 9, i32 0, i32 20, i32 11, i32 20, i32 13, i32 6>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_16xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %3, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 4, i32 3>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 4, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %5, <4 x i32> %8, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <16 x i32> @shuffle_12xi32_to_16xi32_9(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <16 x i32> <i32 0, i32 6, i32 10, i32 3, i32 9, i32 2, i32 22, i32 22, i32 5, i32 9, i32 21, i32 17, i32 6, i32 2, i32 12, i32 20>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_16xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <20 x i32> @shuffle_12xi32_to_20xi32_0(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <20 x i32> <i32 13, i32 15, i32 10, i32 6, i32 14, i32 0, i32 3, i32 9, i32 14, i32 4, i32 8, i32 20, i32 19, i32 1, i32 16, i32 20, i32 7, i32 7, i32 13, i32 20>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_20xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %7, <4 x i32> %6, <4 x i32> <i32 1, i32 3, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 2, i32 4, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 3, i32 5, i32 0, i32 0>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 3, i32 3, i32 5, i32 5>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <20 x i32> @shuffle_12xi32_to_20xi32_1(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <20 x i32> <i32 4, i32 2, i32 9, i32 14, i32 22, i32 17, i32 12, i32 10, i32 6, i32 13, i32 21, i32 14, i32 2, i32 19, i32 17, i32 12, i32 15, i32 17, i32 9, i32 12>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_20xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %9, <4 x i32> %8, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %5, <4 x i32> %7, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %4, <4 x i32> %8, <4 x i32> <i32 2, i32 7, i32 5, i32 5>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %7, <4 x i32> %8, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %24, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <20 x i32> @shuffle_12xi32_to_20xi32_2(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <20 x i32> <i32 3, i32 1, i32 10, i32 4, i32 17, i32 18, i32 9, i32 17, i32 9, i32 13, i32 1, i32 20, i32 11, i32 0, i32 15, i32 15, i32 16, i32 3, i32 19, i32 12>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_20xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 3, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 1, i32 2, i32 5, i32 1>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 0, i32 7, i32 3, i32 3>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <20 x i32> @shuffle_12xi32_to_20xi32_3(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <20 x i32> <i32 2, i32 4, i32 0, i32 22, i32 17, i32 18, i32 15, i32 7, i32 4, i32 9, i32 2, i32 9, i32 10, i32 6, i32 19, i32 6, i32 15, i32 7, i32 5, i32 5>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_20xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 4, i32 0, i32 0>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %8, <4 x i32> %7, <4 x i32> <i32 1, i32 2, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 3, i32 7, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <20 x i32> @shuffle_12xi32_to_20xi32_4(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <20 x i32> <i32 4, i32 20, i32 1, i32 15, i32 17, i32 13, i32 7, i32 8, i32 10, i32 22, i32 7, i32 8, i32 11, i32 14, i32 0, i32 12, i32 12, i32 14, i32 19, i32 3>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_20xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %9, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %8, <4 x i32> %7, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %6, <4 x i32> %9, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %7, <4 x i32> %8, <4 x i32> <i32 0, i32 2, i32 7, i32 7>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %24, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <20 x i32> @shuffle_12xi32_to_20xi32_5(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <20 x i32> <i32 20, i32 21, i32 13, i32 7, i32 13, i32 6, i32 20, i32 17, i32 1, i32 9, i32 16, i32 8, i32 21, i32 21, i32 7, i32 10, i32 5, i32 10, i32 8, i32 18>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_20xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %9, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %9, <4 x i32> %5, <4 x i32> <i32 1, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 1, i32 6, i32 4, i32 4>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <20 x i32> @shuffle_12xi32_to_20xi32_6(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <20 x i32> <i32 14, i32 19, i32 14, i32 3, i32 6, i32 9, i32 17, i32 21, i32 8, i32 16, i32 16, i32 7, i32 13, i32 20, i32 4, i32 11, i32 10, i32 4, i32 4, i32 19>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_20xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %7, <4 x i32> %8, <4 x i32> <i32 2, i32 7, i32 2, i32 2>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %6, <4 x i32> %8, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %7, <4 x i32> %9, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <20 x i32> @shuffle_12xi32_to_20xi32_7(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <20 x i32> <i32 22, i32 5, i32 5, i32 6, i32 9, i32 16, i32 19, i32 16, i32 14, i32 9, i32 10, i32 5, i32 3, i32 10, i32 20, i32 18, i32 18, i32 11, i32 17, i32 7>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_20xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %9, <4 x i32> %5, <4 x i32> <i32 2, i32 5, i32 5, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %8, <4 x i32> <i32 1, i32 4, i32 7, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %7, <4 x i32> %6, <4 x i32> <i32 2, i32 5, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 2, i32 7, i32 1, i32 1>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <20 x i32> @shuffle_12xi32_to_20xi32_8(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <20 x i32> <i32 18, i32 21, i32 0, i32 1, i32 4, i32 4, i32 11, i32 10, i32 9, i32 21, i32 16, i32 6, i32 7, i32 1, i32 9, i32 14, i32 11, i32 1, i32 18, i32 18>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_20xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %8, <4 x i32> %9, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 0, i32 0, i32 7, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %6, <4 x i32> %9, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <20 x i32> @shuffle_12xi32_to_20xi32_9(<12 x i32>, <12 x i32>) {
+ %3 = shufflevector <12 x i32> %0, <12 x i32> %1, <20 x i32> <i32 21, i32 10, i32 18, i32 0, i32 7, i32 16, i32 4, i32 15, i32 5, i32 20, i32 8, i32 19, i32 4, i32 20, i32 20, i32 7, i32 13, i32 7, i32 9, i32 13>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_12xi32_to_20xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %5, <4 x i32> %8, <4 x i32> <i32 3, i32 4, i32 0, i32 0>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %5, <4 x i32> %9, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %5, <4 x i32> %9, <4 x i32> <i32 0, i32 4, i32 4, i32 3>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <2 x i32> @shuffle_16xi32_to_2xi32_0(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <2 x i32> <i32 3, i32 12>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_2xi32_0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %0, <4 x i32> %3, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <2 x i32> @shuffle_16xi32_to_2xi32_1(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <2 x i32> <i32 1, i32 2>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_2xi32_1(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <2 x i32> @shuffle_16xi32_to_2xi32_2(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <2 x i32> <i32 14, i32 1>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_2xi32_2(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %0, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <2 x i32> @shuffle_16xi32_to_2xi32_3(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <2 x i32> <i32 21, i32 25>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_2xi32_3(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <2 x i32> @shuffle_16xi32_to_2xi32_4(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <2 x i32> <i32 18, i32 6>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_2xi32_4(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <2 x i32> @shuffle_16xi32_to_2xi32_5(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <2 x i32> <i32 13, i32 20>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_2xi32_5(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <2 x i32> @shuffle_16xi32_to_2xi32_6(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <2 x i32> <i32 10, i32 0>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_2xi32_6(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %2, <4 x i32> %0, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <2 x i32> @shuffle_16xi32_to_2xi32_7(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <2 x i32> <i32 12, i32 24>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_2xi32_7(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %6, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <2 x i32> @shuffle_16xi32_to_2xi32_8(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <2 x i32> <i32 22, i32 21>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_2xi32_8(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <2 x i32> @shuffle_16xi32_to_2xi32_9(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <2 x i32> <i32 27, i32 20>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_2xi32_9(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <4 x i32> @shuffle_16xi32_to_4xi32_0(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <4 x i32> <i32 22, i32 6, i32 29, i32 29>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_4xi32_0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <4 x i32> @shuffle_16xi32_to_4xi32_1(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <4 x i32> <i32 28, i32 29, i32 14, i32 17>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_4xi32_1(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <4 x i32> @shuffle_16xi32_to_4xi32_2(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <4 x i32> <i32 18, i32 5, i32 4, i32 10>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_4xi32_2(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %4, <4 x i32> %1, <4 x i32> <i32 2, i32 5, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <4 x i32> @shuffle_16xi32_to_4xi32_3(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <4 x i32> <i32 9, i32 19, i32 22, i32 22>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_4xi32_3(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <4 x i32> @shuffle_16xi32_to_4xi32_4(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <4 x i32> <i32 28, i32 13, i32 0, i32 26>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_4xi32_4(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <4 x i32> @shuffle_16xi32_to_4xi32_5(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <4 x i32> <i32 24, i32 1, i32 25, i32 27>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_4xi32_5(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %6, <4 x i32> %0, <4 x i32> <i32 0, i32 5, i32 1, i32 3>
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <4 x i32> @shuffle_16xi32_to_4xi32_6(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <4 x i32> <i32 12, i32 6, i32 19, i32 11>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_4xi32_6(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <4 x i32> @shuffle_16xi32_to_4xi32_7(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <4 x i32> <i32 12, i32 14, i32 19, i32 17>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_4xi32_7(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 2, i32 7, i32 5>
+; CHECK-NEXT: ret <4 x i32> %9
+
+define <4 x i32> @shuffle_16xi32_to_4xi32_8(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <4 x i32> <i32 12, i32 11, i32 24, i32 4>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_4xi32_8(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <4 x i32> @shuffle_16xi32_to_4xi32_9(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <4 x i32> <i32 2, i32 0, i32 20, i32 28>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_4xi32_9(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = shufflevector <4 x i32> %0, <4 x i32> %5, <4 x i32> <i32 2, i32 0, i32 4, i32 4>
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %9, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <6 x i32> @shuffle_16xi32_to_6xi32_0(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <6 x i32> <i32 21, i32 11, i32 20, i32 26, i32 25, i32 5>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_6xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 1, i32 7, i32 0, i32 0>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %7, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <6 x i32> @shuffle_16xi32_to_6xi32_1(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <6 x i32> <i32 14, i32 12, i32 17, i32 30, i32 28, i32 15>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_6xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 0, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <6 x i32> @shuffle_16xi32_to_6xi32_2(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <6 x i32> <i32 25, i32 3, i32 21, i32 5, i32 8, i32 30>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_6xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %7, <4 x i32> %1, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %3, <4 x i32> %8, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <6 x i32> @shuffle_16xi32_to_6xi32_3(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <6 x i32> <i32 8, i32 16, i32 28, i32 18, i32 7, i32 19>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_6xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %2, <4 x i32> %5, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <6 x i32> @shuffle_16xi32_to_6xi32_4(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <6 x i32> <i32 13, i32 7, i32 8, i32 5, i32 13, i32 19>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_6xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <6 x i32> @shuffle_16xi32_to_6xi32_5(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <6 x i32> <i32 26, i32 0, i32 0, i32 2, i32 16, i32 9>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_6xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %7, <4 x i32> %1, <4 x i32> <i32 2, i32 4, i32 4, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %3, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <6 x i32> @shuffle_16xi32_to_6xi32_6(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <6 x i32> <i32 20, i32 30, i32 11, i32 0, i32 20, i32 3>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_6xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %6, <4 x i32> %8, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %6, <4 x i32> %1, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <6 x i32> @shuffle_16xi32_to_6xi32_7(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <6 x i32> <i32 10, i32 3, i32 7, i32 6, i32 2, i32 11>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_6xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 7, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <6 x i32> @shuffle_16xi32_to_6xi32_8(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <6 x i32> <i32 15, i32 16, i32 19, i32 12, i32 18, i32 29>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_6xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 3, i32 4, i32 7, i32 0>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %8, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <6 x i32> @shuffle_16xi32_to_6xi32_9(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <6 x i32> <i32 15, i32 4, i32 4, i32 15, i32 0, i32 16>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_6xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 3, i32 4, i32 4, i32 3>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %1, <4 x i32> %5, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <8 x i32> @shuffle_16xi32_to_8xi32_0(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <8 x i32> <i32 17, i32 20, i32 29, i32 9, i32 9, i32 7, i32 17, i32 22>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_8xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <8 x i32> @shuffle_16xi32_to_8xi32_1(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <8 x i32> <i32 1, i32 12, i32 18, i32 23, i32 15, i32 6, i32 0, i32 28>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_8xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %1, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <8 x i32> @shuffle_16xi32_to_8xi32_2(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <8 x i32> <i32 21, i32 6, i32 11, i32 22, i32 23, i32 30, i32 1, i32 2>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_8xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %6, <4 x i32> %2, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %6, <4 x i32> %8, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 6>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <8 x i32> @shuffle_16xi32_to_8xi32_3(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <8 x i32> <i32 12, i32 29, i32 3, i32 24, i32 29, i32 4, i32 8, i32 2>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_8xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %8, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %8, <4 x i32> %2, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <8 x i32> @shuffle_16xi32_to_8xi32_4(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <8 x i32> <i32 26, i32 30, i32 25, i32 27, i32 9, i32 24, i32 4, i32 1>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_8xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %7, <4 x i32> %8, <4 x i32> <i32 2, i32 6, i32 1, i32 3>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %3, <4 x i32> %7, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <8 x i32> @shuffle_16xi32_to_8xi32_5(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <8 x i32> <i32 0, i32 5, i32 0, i32 17, i32 30, i32 12, i32 23, i32 1>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_8xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 5, i32 0, i32 0>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <8 x i32> @shuffle_16xi32_to_8xi32_6(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <8 x i32> <i32 13, i32 1, i32 0, i32 6, i32 24, i32 16, i32 23, i32 14>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_8xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %4, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 4, i32 4>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <8 x i32> @shuffle_16xi32_to_8xi32_7(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <8 x i32> <i32 4, i32 21, i32 17, i32 2, i32 15, i32 30, i32 10, i32 14>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_8xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %2, <4 x i32> %6, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %8, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <8 x i32> @shuffle_16xi32_to_8xi32_8(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <8 x i32> <i32 18, i32 1, i32 1, i32 24, i32 2, i32 19, i32 21, i32 6>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_8xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %10, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %1, <4 x i32> %5, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <8 x i32> @shuffle_16xi32_to_8xi32_9(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <8 x i32> <i32 4, i32 14, i32 14, i32 4, i32 30, i32 23, i32 23, i32 11>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_8xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 6, i32 0>
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <12 x i32> @shuffle_16xi32_to_12xi32_0(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <12 x i32> <i32 3, i32 24, i32 25, i32 17, i32 11, i32 21, i32 30, i32 0, i32 22, i32 19, i32 0, i32 23>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_12xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %2, <4 x i32> %8, <4 x i32> <i32 3, i32 4, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %7, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %7, <4 x i32> %6, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <12 x i32> @shuffle_16xi32_to_12xi32_1(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <12 x i32> <i32 21, i32 3, i32 9, i32 20, i32 21, i32 22, i32 14, i32 25, i32 5, i32 28, i32 29, i32 1>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_12xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %7, <4 x i32> %2, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 1, i32 2, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %3, <4 x i32> %9, <4 x i32> <i32 1, i32 4, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <12 x i32> @shuffle_16xi32_to_12xi32_2(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <12 x i32> <i32 20, i32 1, i32 8, i32 28, i32 9, i32 28, i32 14, i32 29, i32 2, i32 14, i32 29, i32 6>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_12xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %7, <4 x i32> %2, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %4, <4 x i32> %9, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %2, <4 x i32> %5, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <12 x i32> @shuffle_16xi32_to_12xi32_3(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <12 x i32> <i32 25, i32 12, i32 6, i32 23, i32 21, i32 16, i32 19, i32 7, i32 8, i32 24, i32 27, i32 5>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_12xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %7, <4 x i32> %6, <4 x i32> <i32 1, i32 4, i32 7, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %4, <4 x i32> %8, <4 x i32> <i32 0, i32 4, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <12 x i32> @shuffle_16xi32_to_12xi32_4(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <12 x i32> <i32 26, i32 25, i32 30, i32 27, i32 4, i32 28, i32 16, i32 5, i32 25, i32 28, i32 23, i32 15>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_12xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %8, <4 x i32> %9, <4 x i32> <i32 2, i32 1, i32 6, i32 3>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %3, <4 x i32> %9, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %8, <4 x i32> %9, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <12 x i32> @shuffle_16xi32_to_12xi32_5(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <12 x i32> <i32 4, i32 21, i32 11, i32 5, i32 2, i32 19, i32 10, i32 1, i32 27, i32 17, i32 8, i32 24>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_12xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %3, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %2, <4 x i32> %6, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <12 x i32> @shuffle_16xi32_to_12xi32_6(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <12 x i32> <i32 9, i32 29, i32 28, i32 9, i32 24, i32 3, i32 3, i32 27, i32 4, i32 9, i32 8, i32 21>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_12xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %9, <4 x i32> <i32 1, i32 5, i32 4, i32 1>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %8, <4 x i32> %2, <4 x i32> <i32 0, i32 7, i32 7, i32 3>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <12 x i32> @shuffle_16xi32_to_12xi32_7(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <12 x i32> <i32 1, i32 1, i32 19, i32 8, i32 4, i32 13, i32 22, i32 17, i32 7, i32 10, i32 26, i32 5>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_12xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %2, <4 x i32> %6, <4 x i32> <i32 1, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <12 x i32> @shuffle_16xi32_to_12xi32_8(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <12 x i32> <i32 8, i32 18, i32 30, i32 9, i32 14, i32 12, i32 2, i32 1, i32 19, i32 7, i32 16, i32 17>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_12xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %5, <4 x i32> %2, <4 x i32> <i32 2, i32 0, i32 6, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 3, i32 7, i32 0, i32 1>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <12 x i32> @shuffle_16xi32_to_12xi32_9(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <12 x i32> <i32 29, i32 30, i32 1, i32 18, i32 18, i32 22, i32 5, i32 22, i32 1, i32 26, i32 5, i32 13>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_12xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %9, <4 x i32> %2, <4 x i32> <i32 1, i32 2, i32 5, i32 5>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %2, <4 x i32> %8, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <16 x i32> @shuffle_16xi32_to_16xi32_0(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 0, i32 25, i32 0, i32 24, i32 6, i32 17, i32 23, i32 21, i32 15, i32 16, i32 24, i32 17, i32 26, i32 26, i32 24, i32 21>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_16xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %3, <4 x i32> %9, <4 x i32> <i32 0, i32 5, i32 0, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %7, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 7, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %9, <4 x i32> %8, <4 x i32> <i32 2, i32 2, i32 0, i32 5>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <16 x i32> @shuffle_16xi32_to_16xi32_1(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 13, i32 4, i32 10, i32 19, i32 21, i32 13, i32 27, i32 21, i32 21, i32 14, i32 5, i32 2, i32 11, i32 15, i32 15, i32 16>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_16xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <16 x i32> @shuffle_16xi32_to_16xi32_2(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 19, i32 15, i32 12, i32 18, i32 24, i32 27, i32 0, i32 29, i32 15, i32 11, i32 11, i32 25, i32 13, i32 30, i32 12, i32 19>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_16xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %7, <4 x i32> %6, <4 x i32> <i32 3, i32 7, i32 4, i32 2>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 3, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %6, <4 x i32> %10, <4 x i32> <i32 1, i32 6, i32 0, i32 0>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <16 x i32> @shuffle_16xi32_to_16xi32_3(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 4, i32 28, i32 27, i32 23, i32 6, i32 29, i32 17, i32 23, i32 20, i32 12, i32 12, i32 18, i32 30, i32 28, i32 20, i32 8>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_16xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %10, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %10, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %10, <4 x i32> %8, <4 x i32> <i32 2, i32 0, i32 4, i32 4>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <16 x i32> @shuffle_16xi32_to_16xi32_4(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 20, i32 6, i32 14, i32 26, i32 20, i32 30, i32 18, i32 25, i32 1, i32 21, i32 3, i32 12, i32 8, i32 29, i32 26, i32 17>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_16xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %8, <4 x i32> %10, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %3, <4 x i32> %8, <4 x i32> <i32 1, i32 5, i32 3, i32 3>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %5, <4 x i32> %10, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <16 x i32> @shuffle_16xi32_to_16xi32_5(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 27, i32 4, i32 0, i32 12, i32 0, i32 11, i32 14, i32 15, i32 17, i32 9, i32 27, i32 23, i32 17, i32 7, i32 30, i32 22>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_16xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %9, <4 x i32> %4, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <16 x i32> @shuffle_16xi32_to_16xi32_6(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 16, i32 0, i32 7, i32 26, i32 3, i32 28, i32 21, i32 21, i32 30, i32 17, i32 28, i32 0, i32 8, i32 24, i32 5, i32 7>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_16xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %3, <4 x i32> %10, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %10, <4 x i32> %7, <4 x i32> <i32 2, i32 5, i32 0, i32 0>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %5, <4 x i32> %9, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 7>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <16 x i32> @shuffle_16xi32_to_16xi32_7(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 13, i32 21, i32 0, i32 29, i32 6, i32 2, i32 15, i32 5, i32 7, i32 18, i32 14, i32 13, i32 2, i32 1, i32 14, i32 11>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_16xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %8, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %3, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %4, <4 x i32> %7, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 6, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %3, <4 x i32> %6, <4 x i32> <i32 2, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <16 x i32> @shuffle_16xi32_to_16xi32_8(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 0, i32 1, i32 19, i32 24, i32 6, i32 21, i32 24, i32 16, i32 19, i32 11, i32 3, i32 1, i32 5, i32 17, i32 27, i32 28>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_16xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %3, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %4, <4 x i32> %8, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 7, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %4, <4 x i32> %7, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <16 x i32> @shuffle_16xi32_to_16xi32_9(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 14, i32 0, i32 13, i32 20, i32 29, i32 0, i32 24, i32 10, i32 4, i32 10, i32 16, i32 15, i32 18, i32 22, i32 22, i32 28>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_16xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 2, i32 4, i32 1, i32 1>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %7, <4 x i32> %8, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <20 x i32> @shuffle_16xi32_to_20xi32_0(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <20 x i32> <i32 18, i32 4, i32 4, i32 25, i32 5, i32 18, i32 0, i32 8, i32 28, i32 5, i32 17, i32 13, i32 0, i32 25, i32 7, i32 13, i32 9, i32 0, i32 15, i32 16>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_20xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %5, <4 x i32> %8, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %11, <4 x i32> %5, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %4, <4 x i32> %10, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %26 = shufflevector <4 x i32> %25, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %23, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %26, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <20 x i32> @shuffle_16xi32_to_20xi32_1(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <20 x i32> <i32 2, i32 0, i32 25, i32 16, i32 29, i32 8, i32 9, i32 11, i32 11, i32 30, i32 29, i32 22, i32 1, i32 9, i32 19, i32 18, i32 11, i32 14, i32 1, i32 9>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_20xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %10, <4 x i32> <i32 2, i32 0, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 1, i32 4, i32 5, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %6, <4 x i32> %11, <4 x i32> <i32 3, i32 6, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 7, i32 6>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <20 x i32> @shuffle_16xi32_to_20xi32_2(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <20 x i32> <i32 18, i32 15, i32 28, i32 26, i32 17, i32 6, i32 8, i32 25, i32 13, i32 23, i32 30, i32 9, i32 10, i32 29, i32 6, i32 28, i32 4, i32 9, i32 13, i32 2>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_20xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %8, <4 x i32> %7, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %7, <4 x i32> %9, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %6, <4 x i32> %11, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %26 = shufflevector <4 x i32> %25, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %27 = shufflevector <4 x i32> %26, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %24, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %27, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <20 x i32> @shuffle_16xi32_to_20xi32_3(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <20 x i32> <i32 21, i32 10, i32 10, i32 16, i32 20, i32 15, i32 7, i32 4, i32 13, i32 15, i32 27, i32 8, i32 19, i32 16, i32 14, i32 25, i32 4, i32 19, i32 19, i32 29>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_20xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %9, <4 x i32> %7, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 7, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %7, <4 x i32> %10, <4 x i32> <i32 1, i32 3, i32 7, i32 7>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %8, <4 x i32> %7, <4 x i32> <i32 3, i32 0, i32 6, i32 6>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %5, <4 x i32> %8, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <20 x i32> @shuffle_16xi32_to_20xi32_4(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <20 x i32> <i32 17, i32 10, i32 24, i32 1, i32 13, i32 9, i32 6, i32 10, i32 15, i32 27, i32 13, i32 7, i32 0, i32 16, i32 30, i32 22, i32 29, i32 16, i32 23, i32 3>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_20xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %7, <4 x i32> %6, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %7, <4 x i32> %10, <4 x i32> <i32 3, i32 7, i32 1, i32 1>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %4, <4 x i32> %8, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %11, <4 x i32> %8, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %26 = shufflevector <4 x i32> %25, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %23, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %26, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <20 x i32> @shuffle_16xi32_to_20xi32_5(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <20 x i32> <i32 2, i32 2, i32 1, i32 14, i32 20, i32 0, i32 15, i32 30, i32 19, i32 19, i32 18, i32 20, i32 20, i32 13, i32 21, i32 15, i32 7, i32 11, i32 29, i32 4>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_20xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %7, <4 x i32> <i32 2, i32 2, i32 1, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %9, <4 x i32> %4, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %8, <4 x i32> %9, <4 x i32> <i32 3, i32 3, i32 2, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %9, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <20 x i32> @shuffle_16xi32_to_20xi32_6(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <20 x i32> <i32 24, i32 13, i32 9, i32 13, i32 25, i32 20, i32 8, i32 29, i32 10, i32 3, i32 14, i32 29, i32 3, i32 29, i32 14, i32 14, i32 7, i32 9, i32 9, i32 19>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_20xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %10, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %10, <4 x i32> %9, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %4, <4 x i32> %11, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %23, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %25, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <20 x i32> @shuffle_16xi32_to_20xi32_7(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <20 x i32> <i32 15, i32 9, i32 28, i32 26, i32 15, i32 23, i32 5, i32 1, i32 18, i32 12, i32 12, i32 13, i32 26, i32 15, i32 25, i32 19, i32 7, i32 28, i32 2, i32 12>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_20xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %7, <4 x i32> %6, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %7, <4 x i32> %9, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %8, <4 x i32> %7, <4 x i32> <i32 2, i32 4, i32 4, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %10, <4 x i32> %7, <4 x i32> <i32 2, i32 7, i32 1, i32 1>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %5, <4 x i32> %11, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %24, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <20 x i32> @shuffle_16xi32_to_20xi32_8(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <20 x i32> <i32 28, i32 9, i32 16, i32 26, i32 8, i32 21, i32 5, i32 10, i32 11, i32 20, i32 14, i32 16, i32 16, i32 11, i32 29, i32 2, i32 23, i32 16, i32 8, i32 21>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_20xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %6, <4 x i32> %9, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %6, <4 x i32> %9, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %9, <4 x i32> %8, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %26 = shufflevector <4 x i32> %25, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %27 = shufflevector <4 x i32> %26, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %24, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %27, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <20 x i32> @shuffle_16xi32_to_20xi32_9(<16 x i32>, <16 x i32>) {
+ %3 = shufflevector <16 x i32> %0, <16 x i32> %1, <20 x i32> <i32 5, i32 7, i32 24, i32 3, i32 0, i32 11, i32 19, i32 10, i32 26, i32 3, i32 25, i32 16, i32 27, i32 23, i32 9, i32 26, i32 27, i32 7, i32 29, i32 23>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_16xi32_to_20xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %5, <4 x i32> %10, <4 x i32> <i32 1, i32 3, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %10, <4 x i32> %4, <4 x i32> <i32 2, i32 7, i32 1, i32 1>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %10, <4 x i32> %9, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %25, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <2 x i32> @shuffle_20xi32_to_2xi32_0(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <2 x i32> <i32 4, i32 2>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_2xi32_0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <2 x i32> @shuffle_20xi32_to_2xi32_1(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <2 x i32> <i32 2, i32 4>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_2xi32_1(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <2 x i32> @shuffle_20xi32_to_2xi32_2(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <2 x i32> <i32 34, i32 16>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_2xi32_2(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <2 x i32> @shuffle_20xi32_to_2xi32_3(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <2 x i32> <i32 27, i32 25>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_2xi32_3(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <2 x i32> @shuffle_20xi32_to_2xi32_4(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <2 x i32> <i32 2, i32 29>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_2xi32_4(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %0, <4 x i32> %7, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <2 x i32> @shuffle_20xi32_to_2xi32_5(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <2 x i32> <i32 18, i32 0>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_2xi32_5(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %0, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <2 x i32> @shuffle_20xi32_to_2xi32_6(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <2 x i32> <i32 37, i32 3>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_2xi32_6(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %9, <4 x i32> %0, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <2 x i32> @shuffle_20xi32_to_2xi32_7(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <2 x i32> <i32 3, i32 33>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_2xi32_7(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %0, <4 x i32> %8, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <2 x i32> @shuffle_20xi32_to_2xi32_8(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <2 x i32> <i32 28, i32 17>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_2xi32_8(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <2 x i32> @shuffle_20xi32_to_2xi32_9(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <2 x i32> <i32 16, i32 35>
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_2xi32_9(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %8, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: ret <4 x i32> %11
+
+define <4 x i32> @shuffle_20xi32_to_4xi32_0(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <4 x i32> <i32 38, i32 30, i32 38, i32 13>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_4xi32_0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %9, <4 x i32> %7, <4 x i32> <i32 2, i32 6, i32 2, i32 2>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <4 x i32> @shuffle_20xi32_to_4xi32_1(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <4 x i32> <i32 38, i32 14, i32 25, i32 14>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_4xi32_1(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <4 x i32> @shuffle_20xi32_to_4xi32_2(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <4 x i32> <i32 24, i32 7, i32 3, i32 32>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_4xi32_2(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %6, <4 x i32> %1, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <4 x i32> @shuffle_20xi32_to_4xi32_3(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <4 x i32> <i32 18, i32 20, i32 35, i32 34>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_4xi32_3(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 7, i32 6>
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <4 x i32> @shuffle_20xi32_to_4xi32_4(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <4 x i32> <i32 22, i32 26, i32 4, i32 9>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_4xi32_4(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <4 x i32> @shuffle_20xi32_to_4xi32_5(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <4 x i32> <i32 29, i32 6, i32 31, i32 34>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_4xi32_5(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %7, <4 x i32> %1, <4 x i32> <i32 1, i32 6, i32 3, i32 3>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <4 x i32> @shuffle_20xi32_to_4xi32_6(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <4 x i32> <i32 9, i32 18, i32 15, i32 8>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_4xi32_6(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <4 x i32> @shuffle_20xi32_to_4xi32_7(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <4 x i32> <i32 6, i32 38, i32 10, i32 8>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_4xi32_7(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %1, <4 x i32> %9, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 6, i32 4>
+; CHECK-NEXT: ret <4 x i32> %12
+
+define <4 x i32> @shuffle_20xi32_to_4xi32_8(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <4 x i32> <i32 35, i32 2, i32 15, i32 21>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_4xi32_8(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %8, <4 x i32> %0, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <4 x i32> @shuffle_20xi32_to_4xi32_9(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <4 x i32> <i32 21, i32 19, i32 27, i32 37>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_4xi32_9(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %11 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <6 x i32> @shuffle_20xi32_to_6xi32_0(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <6 x i32> <i32 11, i32 6, i32 8, i32 2, i32 24, i32 3>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_6xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 3, i32 6, i32 0, i32 0>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %7, <4 x i32> %1, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <6 x i32> @shuffle_20xi32_to_6xi32_1(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <6 x i32> <i32 33, i32 24, i32 30, i32 20, i32 37, i32 31>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_6xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %9, <4 x i32> %7, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %10, <4 x i32> %8, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <6 x i32> @shuffle_20xi32_to_6xi32_2(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <6 x i32> <i32 35, i32 38, i32 32, i32 2, i32 2, i32 2>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_6xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %9, <4 x i32> %10, <4 x i32> <i32 3, i32 6, i32 0, i32 0>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <6 x i32> @shuffle_20xi32_to_6xi32_3(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <6 x i32> <i32 34, i32 21, i32 21, i32 24, i32 8, i32 4>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_6xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %9, <4 x i32> %6, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <6 x i32> @shuffle_20xi32_to_6xi32_4(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <6 x i32> <i32 18, i32 0, i32 33, i32 30, i32 22, i32 25>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_6xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <6 x i32> @shuffle_20xi32_to_6xi32_5(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <6 x i32> <i32 1, i32 31, i32 14, i32 4, i32 25, i32 36>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_6xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %1, <4 x i32> %8, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %7, <4 x i32> %10, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <6 x i32> @shuffle_20xi32_to_6xi32_6(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <6 x i32> <i32 9, i32 17, i32 0, i32 27, i32 9, i32 35>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_6xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %3, <4 x i32> %9, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <6 x i32> @shuffle_20xi32_to_6xi32_7(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <6 x i32> <i32 28, i32 26, i32 24, i32 19, i32 9, i32 6>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_6xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %8, <4 x i32> %7, <4 x i32> <i32 0, i32 6, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <6 x i32> @shuffle_20xi32_to_6xi32_8(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <6 x i32> <i32 38, i32 9, i32 19, i32 6, i32 0, i32 13>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_6xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %10, <4 x i32> %3, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %1, <4 x i32> %4, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <6 x i32> @shuffle_20xi32_to_6xi32_9(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <6 x i32> <i32 30, i32 8, i32 19, i32 23, i32 28, i32 32>
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_6xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %8, <4 x i32> %3, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %8, <4 x i32> %9, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <8 x i32> @shuffle_20xi32_to_8xi32_0(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <8 x i32> <i32 9, i32 3, i32 37, i32 8, i32 15, i32 2, i32 19, i32 28>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_8xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %3, <4 x i32> %1, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %1, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <8 x i32> @shuffle_20xi32_to_8xi32_1(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <8 x i32> <i32 24, i32 28, i32 10, i32 37, i32 30, i32 5, i32 13, i32 11>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_8xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %7, <4 x i32> %8, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %8, <4 x i32> %2, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <8 x i32> @shuffle_20xi32_to_8xi32_2(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <8 x i32> <i32 12, i32 27, i32 35, i32 3, i32 10, i32 12, i32 38, i32 30>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_8xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %4, <4 x i32> %7, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %3, <4 x i32> %4, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <8 x i32> @shuffle_20xi32_to_8xi32_3(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <8 x i32> <i32 9, i32 9, i32 30, i32 13, i32 4, i32 17, i32 18, i32 27>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_8xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %3, <4 x i32> %8, <4 x i32> <i32 1, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %2, <4 x i32> %5, <4 x i32> <i32 0, i32 5, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <8 x i32> @shuffle_20xi32_to_8xi32_4(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <8 x i32> <i32 17, i32 35, i32 22, i32 26, i32 16, i32 2, i32 5, i32 15>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_8xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %5, <4 x i32> %9, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %5, <4 x i32> %1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <8 x i32> @shuffle_20xi32_to_8xi32_5(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <8 x i32> <i32 21, i32 22, i32 18, i32 11, i32 28, i32 36, i32 5, i32 35>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_8xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 1, i32 2, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %8, <4 x i32> %10, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <8 x i32> @shuffle_20xi32_to_8xi32_6(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <8 x i32> <i32 4, i32 24, i32 15, i32 21, i32 34, i32 2, i32 5, i32 7>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_8xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %2, <4 x i32> %7, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %9, <4 x i32> %1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 5, i32 7>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <8 x i32> @shuffle_20xi32_to_8xi32_7(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <8 x i32> <i32 30, i32 2, i32 30, i32 11, i32 21, i32 9, i32 9, i32 34>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_8xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %8, <4 x i32> %1, <4 x i32> <i32 2, i32 6, i32 2, i32 2>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <8 x i32> @shuffle_20xi32_to_8xi32_8(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <8 x i32> <i32 3, i32 9, i32 24, i32 37, i32 26, i32 38, i32 31, i32 27>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_8xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %1, <4 x i32> %3, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %7, <4 x i32> %10, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <8 x i32> @shuffle_20xi32_to_8xi32_9(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <8 x i32> <i32 4, i32 7, i32 2, i32 34, i32 21, i32 0, i32 29, i32 13>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_8xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = shufflevector <4 x i32> %2, <4 x i32> %1, <4 x i32> <i32 0, i32 3, i32 6, i32 6>
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %12, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %6, <4 x i32> %1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <12 x i32> @shuffle_20xi32_to_12xi32_0(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <12 x i32> <i32 34, i32 35, i32 2, i32 2, i32 3, i32 18, i32 33, i32 2, i32 23, i32 13, i32 3, i32 22>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_12xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %10, <4 x i32> %2, <4 x i32> <i32 2, i32 3, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %2, <4 x i32> %6, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <12 x i32> @shuffle_20xi32_to_12xi32_1(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <12 x i32> <i32 15, i32 25, i32 21, i32 38, i32 35, i32 2, i32 30, i32 36, i32 30, i32 23, i32 29, i32 18>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_12xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %5, <4 x i32> %8, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %10, <4 x i32> %2, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %9, <4 x i32> %7, <4 x i32> <i32 2, i32 7, i32 1, i32 1>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <12 x i32> @shuffle_20xi32_to_12xi32_2(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <12 x i32> <i32 6, i32 0, i32 30, i32 25, i32 3, i32 28, i32 33, i32 4, i32 2, i32 25, i32 16, i32 25>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_12xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %3, <4 x i32> %2, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %2, <4 x i32> %9, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %2, <4 x i32> %8, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <12 x i32> @shuffle_20xi32_to_12xi32_3(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <12 x i32> <i32 11, i32 18, i32 22, i32 16, i32 21, i32 36, i32 7, i32 16, i32 38, i32 31, i32 23, i32 38>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_12xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %7, <4 x i32> %11, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %11, <4 x i32> %9, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <12 x i32> @shuffle_20xi32_to_12xi32_4(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <12 x i32> <i32 10, i32 19, i32 38, i32 13, i32 21, i32 25, i32 13, i32 9, i32 0, i32 19, i32 17, i32 4>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_12xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %7, <4 x i32> %8, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %2, <4 x i32> %6, <4 x i32> <i32 0, i32 7, i32 5, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <12 x i32> @shuffle_20xi32_to_12xi32_5(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <12 x i32> <i32 24, i32 38, i32 8, i32 29, i32 2, i32 26, i32 0, i32 19, i32 17, i32 5, i32 24, i32 38>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_12xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %8, <4 x i32> %11, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %2, <4 x i32> %8, <4 x i32> <i32 2, i32 6, i32 0, i32 0>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %6, <4 x i32> %3, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <12 x i32> @shuffle_20xi32_to_12xi32_6(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <12 x i32> <i32 36, i32 20, i32 2, i32 22, i32 26, i32 24, i32 24, i32 14, i32 10, i32 15, i32 32, i32 9>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_12xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %11, <4 x i32> %7, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 2, i32 0, i32 0, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %4, <4 x i32> %5, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <12 x i32> @shuffle_20xi32_to_12xi32_7(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <12 x i32> <i32 1, i32 32, i32 3, i32 38, i32 8, i32 19, i32 35, i32 31, i32 7, i32 6, i32 25, i32 30>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_12xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %2, <4 x i32> %10, <4 x i32> <i32 1, i32 4, i32 3, i32 3>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %4, <4 x i32> %6, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %3, <4 x i32> %8, <4 x i32> <i32 3, i32 2, i32 5, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <12 x i32> @shuffle_20xi32_to_12xi32_8(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <12 x i32> <i32 25, i32 27, i32 9, i32 4, i32 28, i32 7, i32 32, i32 28, i32 22, i32 14, i32 25, i32 20>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_12xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 1, i32 3, i32 5, i32 5>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <12 x i32> @shuffle_20xi32_to_12xi32_9(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <12 x i32> <i32 8, i32 3, i32 21, i32 12, i32 25, i32 28, i32 37, i32 36, i32 15, i32 15, i32 2, i32 2>
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_12xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %13 = shufflevector <4 x i32> %4, <4 x i32> %2, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %13, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %8, <4 x i32> %9, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 5, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %5, <4 x i32> %2, <4 x i32> <i32 3, i32 3, i32 6, i32 6>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <16 x i32> @shuffle_20xi32_to_16xi32_0(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <16 x i32> <i32 23, i32 33, i32 2, i32 18, i32 9, i32 7, i32 36, i32 23, i32 23, i32 7, i32 10, i32 10, i32 5, i32 23, i32 7, i32 21>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_16xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %8, <4 x i32> %11, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %8, <4 x i32> %4, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %4, <4 x i32> %8, <4 x i32> <i32 1, i32 7, i32 3, i32 5>
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
+define <16 x i32> @shuffle_20xi32_to_16xi32_1(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <16 x i32> <i32 7, i32 21, i32 5, i32 10, i32 13, i32 28, i32 32, i32 22, i32 3, i32 21, i32 37, i32 38, i32 36, i32 6, i32 7, i32 7>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_16xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %4, <4 x i32> %8, <4 x i32> <i32 3, i32 5, i32 1, i32 1>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %6, <4 x i32> %10, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %3, <4 x i32> %8, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 5, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 0, i32 6, i32 7, i32 7>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <16 x i32> @shuffle_20xi32_to_16xi32_2(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <16 x i32> <i32 26, i32 1, i32 9, i32 38, i32 37, i32 4, i32 1, i32 20, i32 1, i32 19, i32 20, i32 12, i32 1, i32 32, i32 12, i32 3>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_16xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %3, <4 x i32> %7, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %3, <4 x i32> %11, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %25, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
+define <16 x i32> @shuffle_20xi32_to_16xi32_3(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <16 x i32> <i32 1, i32 11, i32 24, i32 21, i32 24, i32 8, i32 10, i32 36, i32 38, i32 31, i32 37, i32 12, i32 10, i32 28, i32 36, i32 25>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_16xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %9, <4 x i32> %5, <4 x i32> <i32 0, i32 4, i32 6, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %12, <4 x i32> %10, <4 x i32> <i32 2, i32 7, i32 1, i32 1>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %5, <4 x i32> %10, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %23, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
+define <16 x i32> @shuffle_20xi32_to_16xi32_4(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <16 x i32> <i32 29, i32 10, i32 26, i32 20, i32 38, i32 16, i32 37, i32 25, i32 34, i32 2, i32 0, i32 38, i32 19, i32 4, i32 31, i32 28>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_16xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %12, <4 x i32> %7, <4 x i32> <i32 2, i32 4, i32 1, i32 1>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %11, <4 x i32> %3, <4 x i32> <i32 2, i32 6, i32 4, i32 4>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %7, <4 x i32> %4, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 7, i32 4>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
+define <16 x i32> @shuffle_20xi32_to_16xi32_5(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <16 x i32> <i32 36, i32 30, i32 26, i32 8, i32 25, i32 2, i32 28, i32 37, i32 17, i32 8, i32 29, i32 16, i32 12, i32 12, i32 8, i32 35>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_16xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %12, <4 x i32> %10, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %24, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
+define <16 x i32> @shuffle_20xi32_to_16xi32_6(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <16 x i32> <i32 15, i32 11, i32 15, i32 15, i32 11, i32 15, i32 5, i32 1, i32 27, i32 2, i32 29, i32 32, i32 24, i32 35, i32 35, i32 13>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_16xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %6, <4 x i32> %5, <4 x i32> <i32 3, i32 7, i32 3, i32 3>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %9, <4 x i32> %3, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %9, <4 x i32> %11, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <16 x i32> @shuffle_20xi32_to_16xi32_7(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <16 x i32> <i32 28, i32 10, i32 9, i32 29, i32 3, i32 11, i32 37, i32 6, i32 34, i32 6, i32 9, i32 9, i32 32, i32 12, i32 20, i32 27>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_16xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %10, <4 x i32> %5, <4 x i32> <i32 0, i32 6, i32 5, i32 1>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %3, <4 x i32> %5, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %11, <4 x i32> %4, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <16 x i32> @shuffle_20xi32_to_16xi32_8(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <16 x i32> <i32 38, i32 9, i32 26, i32 34, i32 15, i32 20, i32 21, i32 37, i32 13, i32 30, i32 23, i32 37, i32 2, i32 38, i32 5, i32 15>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_16xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %12, <4 x i32> %5, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %6, <4 x i32> %8, <4 x i32> <i32 3, i32 4, i32 5, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %6, <4 x i32> %10, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %3, <4 x i32> %12, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %24, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
+define <16 x i32> @shuffle_20xi32_to_16xi32_9(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <16 x i32> <i32 16, i32 0, i32 7, i32 4, i32 26, i32 18, i32 22, i32 14, i32 15, i32 6, i32 23, i32 6, i32 24, i32 7, i32 16, i32 1>
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_16xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %14 = shufflevector <4 x i32> %7, <4 x i32> %3, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %14, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 7, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %9, <4 x i32> %7, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %6, <4 x i32> %4, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %9, <4 x i32> %4, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %24, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <20 x i32> @shuffle_20xi32_to_20xi32_0(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <20 x i32> <i32 33, i32 9, i32 31, i32 14, i32 24, i32 15, i32 38, i32 7, i32 35, i32 32, i32 2, i32 29, i32 16, i32 6, i32 34, i32 2, i32 12, i32 32, i32 30, i32 16>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_20xi32_0(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %12, <4 x i32> %6, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %10, <4 x i32> %7, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %13, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %12, <4 x i32> %4, <4 x i32> <i32 3, i32 0, i32 6, i32 6>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %26 = shufflevector <4 x i32> %7, <4 x i32> %12, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %27 = shufflevector <4 x i32> %26, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %28 = shufflevector <4 x i32> %27, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %25, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %28, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %17
+
+define <20 x i32> @shuffle_20xi32_to_20xi32_1(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <20 x i32> <i32 27, i32 11, i32 23, i32 33, i32 29, i32 28, i32 6, i32 17, i32 26, i32 23, i32 9, i32 5, i32 14, i32 28, i32 18, i32 14, i32 14, i32 7, i32 36, i32 11>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_20xi32_1(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %10, <4 x i32> %6, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %11, <4 x i32> %5, <4 x i32> <i32 1, i32 0, i32 6, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %10, <4 x i32> %9, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %7, <4 x i32> %11, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %26 = shufflevector <4 x i32> %7, <4 x i32> %5, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %27 = shufflevector <4 x i32> %26, <4 x i32> %13, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %28 = shufflevector <4 x i32> %27, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %25, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %28, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %17
+
+define <20 x i32> @shuffle_20xi32_to_20xi32_2(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <20 x i32> <i32 19, i32 19, i32 25, i32 17, i32 14, i32 16, i32 0, i32 12, i32 29, i32 17, i32 34, i32 18, i32 17, i32 10, i32 27, i32 7, i32 8, i32 20, i32 13, i32 18>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_20xi32_2(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %8, <4 x i32> %10, <4 x i32> <i32 3, i32 3, i32 5, i32 1>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %7, <4 x i32> %8, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %11, <4 x i32> %8, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %8, <4 x i32> %6, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %6, <4 x i32> %9, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %26 = shufflevector <4 x i32> %25, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %27 = shufflevector <4 x i32> %26, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %24, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %27, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <20 x i32> @shuffle_20xi32_to_20xi32_3(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <20 x i32> <i32 7, i32 0, i32 35, i32 30, i32 11, i32 32, i32 10, i32 0, i32 27, i32 10, i32 30, i32 32, i32 2, i32 26, i32 16, i32 2, i32 38, i32 16, i32 21, i32 21>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_20xi32_3(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %6, <4 x i32> %12, <4 x i32> <i32 3, i32 4, i32 2, i32 2>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %10, <4 x i32> %6, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %4, <4 x i32> %10, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %26 = shufflevector <4 x i32> %13, <4 x i32> %8, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %27 = shufflevector <4 x i32> %26, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %25, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %27, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %17
+
+define <20 x i32> @shuffle_20xi32_to_20xi32_4(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <20 x i32> <i32 6, i32 10, i32 14, i32 28, i32 21, i32 3, i32 16, i32 4, i32 22, i32 38, i32 4, i32 29, i32 20, i32 27, i32 32, i32 16, i32 8, i32 20, i32 9, i32 29>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_20xi32_4(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %9, <4 x i32> %4, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %9, <4 x i32> %13, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %9, <4 x i32> %10, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %26 = shufflevector <4 x i32> %25, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %27 = shufflevector <4 x i32> %6, <4 x i32> %9, <4 x i32> <i32 0, i32 4, i32 1, i32 1>
+; CHECK-NEXT: %28 = shufflevector <4 x i32> %27, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %23, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %26, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %28, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %17
+
+define <20 x i32> @shuffle_20xi32_to_20xi32_5(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <20 x i32> <i32 6, i32 18, i32 22, i32 15, i32 7, i32 2, i32 14, i32 32, i32 10, i32 36, i32 7, i32 38, i32 21, i32 33, i32 19, i32 23, i32 30, i32 15, i32 2, i32 6>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_20xi32_5(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %5, <4 x i32> %8, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %6, <4 x i32> %13, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %13, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %9, <4 x i32> %12, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %26 = shufflevector <4 x i32> %25, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %27 = shufflevector <4 x i32> %11, <4 x i32> %7, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %28 = shufflevector <4 x i32> %27, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %29 = shufflevector <4 x i32> %28, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %23, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %26, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %29, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %17
+
+define <20 x i32> @shuffle_20xi32_to_20xi32_6(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <20 x i32> <i32 8, i32 27, i32 21, i32 37, i32 5, i32 23, i32 38, i32 23, i32 4, i32 25, i32 12, i32 29, i32 30, i32 9, i32 21, i32 4, i32 10, i32 16, i32 18, i32 25>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_20xi32_6(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %6, <4 x i32> %10, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %13, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %5, <4 x i32> %9, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %13, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %5, <4 x i32> %10, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %22, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %11, <4 x i32> %6, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
+; CHECK-NEXT: %26 = shufflevector <4 x i32> %25, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %27 = shufflevector <4 x i32> %6, <4 x i32> %8, <4 x i32> <i32 2, i32 4, i32 6, i32 6>
+; CHECK-NEXT: %28 = shufflevector <4 x i32> %27, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %23, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %26, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %28, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %17
+
+define <20 x i32> @shuffle_20xi32_to_20xi32_7(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <20 x i32> <i32 12, i32 37, i32 23, i32 16, i32 7, i32 6, i32 8, i32 21, i32 36, i32 19, i32 6, i32 2, i32 32, i32 10, i32 18, i32 15, i32 7, i32 5, i32 6, i32 21>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_20xi32_7(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %7, <4 x i32> %13, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 3, i32 2, i32 4, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %13, <4 x i32> %8, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %12, <4 x i32> %6, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %8, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %26 = shufflevector <4 x i32> %5, <4 x i32> %9, <4 x i32> <i32 3, i32 1, i32 2, i32 5>
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %25, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %26, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %17
+
+define <20 x i32> @shuffle_20xi32_to_20xi32_8(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <20 x i32> <i32 14, i32 11, i32 7, i32 3, i32 24, i32 36, i32 11, i32 28, i32 14, i32 11, i32 11, i32 7, i32 26, i32 15, i32 20, i32 36, i32 6, i32 3, i32 12, i32 26>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_20xi32_8(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %7, <4 x i32> %6, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %16, <4 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %10, <4 x i32> %13, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %18, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 7, i32 7>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %7, <4 x i32> %6, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %21, <4 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %10, <4 x i32> %7, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %13, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: %26 = shufflevector <4 x i32> %5, <4 x i32> %4, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %27 = shufflevector <4 x i32> %26, <4 x i32> %7, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %28 = shufflevector <4 x i32> %27, <4 x i32> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %25, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %28, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %17
+
+define <20 x i32> @shuffle_20xi32_to_20xi32_9(<20 x i32>, <20 x i32>) {
+ %3 = shufflevector <20 x i32> %0, <20 x i32> %1, <20 x i32> <i32 4, i32 11, i32 35, i32 32, i32 6, i32 24, i32 11, i32 9, i32 16, i32 7, i32 28, i32 10, i32 19, i32 22, i32 17, i32 23, i32 13, i32 30, i32 22, i32 38>
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @shuffle_20xi32_to_20xi32_9(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %16 = shufflevector <4 x i32> %15, <4 x i32> %12, <4 x i32> <i32 0, i32 1, i32 7, i32 4>
+; CHECK-NEXT: %17 = shufflevector <4 x i32> %5, <4 x i32> %10, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+; CHECK-NEXT: %18 = shufflevector <4 x i32> %17, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 7, i32 5>
+; CHECK-NEXT: %19 = shufflevector <4 x i32> %8, <4 x i32> %5, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
+; CHECK-NEXT: %20 = shufflevector <4 x i32> %19, <4 x i32> %11, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+; CHECK-NEXT: %21 = shufflevector <4 x i32> %20, <4 x i32> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: %22 = shufflevector <4 x i32> %8, <4 x i32> %9, <4 x i32> <i32 3, i32 6, i32 1, i32 7>
+; CHECK-NEXT: %23 = shufflevector <4 x i32> %7, <4 x i32> %11, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+; CHECK-NEXT: %24 = shufflevector <4 x i32> %23, <4 x i32> %9, <4 x i32> <i32 0, i32 1, i32 6, i32 6>
+; CHECK-NEXT: %25 = shufflevector <4 x i32> %24, <4 x i32> %13, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %25, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
« no previous file with comments | « test/Transforms/NaCl/vector-canonicalization-phis.ll ('k') | test/Transforms/NaCl/vector-canonicalization-stores.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698