Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(504)

Unified Diff: test/Transforms/NaCl/vector-canonicalization-calls.ll

Issue 1423873002: PNaCl: Add a vector type legalization pass. Base URL: https://chromium.googlesource.com/native_client/pnacl-llvm.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: test/Transforms/NaCl/vector-canonicalization-calls.ll
diff --git a/test/Transforms/NaCl/vector-canonicalization-calls.ll b/test/Transforms/NaCl/vector-canonicalization-calls.ll
new file mode 100644
index 0000000000000000000000000000000000000000..54cbe98881425e60db8efcfa15972523b19a3ebc
--- /dev/null
+++ b/test/Transforms/NaCl/vector-canonicalization-calls.ll
@@ -0,0 +1,1868 @@
+; RUN: opt -S -pnacl-vector-canonicalization %s | FileCheck %s
+
+; Auto-generated tests for call instructions.
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128"
+
+define <2 x i8> @fn_0_2xi8(<2 x i8>) {
+entry:
+ %1 = call <2 x i8> @fn_0_2xi8(<2 x i8> %0)
+ ret <2 x i8> %1
+}
+; CHECK-LABEL: define <16 x i8> @fn_0_2xi8(<16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <16 x i8> @fn_0_2xi8(<16 x i8> %0)
+; CHECK-NEXT: ret <16 x i8> %1
+; CHECK-NEXT: }
+
+define <2 x i16> @fn_1_2xi16(<2 x i16>) {
+entry:
+ %1 = call <2 x i16> @fn_1_2xi16(<2 x i16> %0)
+ ret <2 x i16> %1
+}
+; CHECK-LABEL: define <8 x i16> @fn_1_2xi16(<8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <8 x i16> @fn_1_2xi16(<8 x i16> %0)
+; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-NEXT: }
+
+define <2 x i32> @fn_2_2xi32(<2 x i32>) {
+entry:
+ %1 = call <2 x i32> @fn_2_2xi32(<2 x i32> %0)
+ ret <2 x i32> %1
+}
+; CHECK-LABEL: define <4 x i32> @fn_2_2xi32(<4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <4 x i32> @fn_2_2xi32(<4 x i32> %0)
+; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-NEXT: }
+
+define <2 x i64> @fn_3_2xi64(<2 x i64>) {
+entry:
+ %1 = call <2 x i64> @fn_3_2xi64(<2 x i64> %0)
+ ret <2 x i64> %1
+}
+; CHECK-LABEL: define <2 x i64> @fn_3_2xi64(<2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <2 x i64> @fn_3_2xi64(<2 x i64> %0)
+; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT: }
+
+define <2 x i8*> @fn_4_2xi8ptr(<2 x i8*>) {
+entry:
+ %1 = call <2 x i8*> @fn_4_2xi8ptr(<2 x i8*> %0)
+ ret <2 x i8*> %1
+}
+; CHECK-LABEL: define <4 x i8*> @fn_4_2xi8ptr(<4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <4 x i8*> @fn_4_2xi8ptr(<4 x i8*> %0)
+; CHECK-NEXT: ret <4 x i8*> %1
+; CHECK-NEXT: }
+
+define <2 x float> @fn_5_2xfloat(<2 x float>) {
+entry:
+ %1 = call <2 x float> @fn_5_2xfloat(<2 x float> %0)
+ ret <2 x float> %1
+}
+; CHECK-LABEL: define <4 x float> @fn_5_2xfloat(<4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <4 x float> @fn_5_2xfloat(<4 x float> %0)
+; CHECK-NEXT: ret <4 x float> %1
+; CHECK-NEXT: }
+
+define <2 x double> @fn_6_2xdouble(<2 x double>) {
+entry:
+ %1 = call <2 x double> @fn_6_2xdouble(<2 x double> %0)
+ ret <2 x double> %1
+}
+; CHECK-LABEL: define <2 x double> @fn_6_2xdouble(<2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <2 x double> @fn_6_2xdouble(<2 x double> %0)
+; CHECK-NEXT: ret <2 x double> %1
+; CHECK-NEXT: }
+
+define <4 x i8> @fn_7_4xi8(<4 x i8>) {
+entry:
+ %1 = call <4 x i8> @fn_7_4xi8(<4 x i8> %0)
+ ret <4 x i8> %1
+}
+; CHECK-LABEL: define <16 x i8> @fn_7_4xi8(<16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <16 x i8> @fn_7_4xi8(<16 x i8> %0)
+; CHECK-NEXT: ret <16 x i8> %1
+; CHECK-NEXT: }
+
+define <4 x i16> @fn_8_4xi16(<4 x i16>) {
+entry:
+ %1 = call <4 x i16> @fn_8_4xi16(<4 x i16> %0)
+ ret <4 x i16> %1
+}
+; CHECK-LABEL: define <8 x i16> @fn_8_4xi16(<8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <8 x i16> @fn_8_4xi16(<8 x i16> %0)
+; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-NEXT: }
+
+define <4 x i32> @fn_9_4xi32(<4 x i32>) {
+entry:
+ %1 = call <4 x i32> @fn_9_4xi32(<4 x i32> %0)
+ ret <4 x i32> %1
+}
+; CHECK-LABEL: define <4 x i32> @fn_9_4xi32(<4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <4 x i32> @fn_9_4xi32(<4 x i32> %0)
+; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-NEXT: }
+
+define <4 x i64> @fn_10_4xi64(<4 x i64>) {
+entry:
+ %1 = call <4 x i64> @fn_10_4xi64(<4 x i64> %0)
+ ret <4 x i64> %1
+}
+; CHECK-LABEL: define <2 x i64> @fn_10_4xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %4 = call <2 x i64> @fn_10_4xi64(<2 x i64>* nocapture nonnull dereferenceable(16) %3, <2 x i64> %1, <2 x i64> %2)
+; CHECK-NEXT: %5 = load <2 x i64>, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %0, align 16
+; CHECK-NEXT: ret <2 x i64> %4
+; CHECK-NEXT: }
+
+define <4 x i8*> @fn_11_4xi8ptr(<4 x i8*>) {
+entry:
+ %1 = call <4 x i8*> @fn_11_4xi8ptr(<4 x i8*> %0)
+ ret <4 x i8*> %1
+}
+; CHECK-LABEL: define <4 x i8*> @fn_11_4xi8ptr(<4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <4 x i8*> @fn_11_4xi8ptr(<4 x i8*> %0)
+; CHECK-NEXT: ret <4 x i8*> %1
+; CHECK-NEXT: }
+
+define <4 x float> @fn_12_4xfloat(<4 x float>) {
+entry:
+ %1 = call <4 x float> @fn_12_4xfloat(<4 x float> %0)
+ ret <4 x float> %1
+}
+; CHECK-LABEL: define <4 x float> @fn_12_4xfloat(<4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <4 x float> @fn_12_4xfloat(<4 x float> %0)
+; CHECK-NEXT: ret <4 x float> %1
+; CHECK-NEXT: }
+
+define <4 x double> @fn_13_4xdouble(<4 x double>) {
+entry:
+ %1 = call <4 x double> @fn_13_4xdouble(<4 x double> %0)
+ ret <4 x double> %1
+}
+; CHECK-LABEL: define <2 x double> @fn_13_4xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = alloca <2 x double>, align 16
+; CHECK-NEXT: %4 = call <2 x double> @fn_13_4xdouble(<2 x double>* nocapture nonnull dereferenceable(16) %3, <2 x double> %1, <2 x double> %2)
+; CHECK-NEXT: %5 = load <2 x double>, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %0, align 16
+; CHECK-NEXT: ret <2 x double> %4
+; CHECK-NEXT: }
+
+define <6 x i8> @fn_14_6xi8(<6 x i8>) {
+entry:
+ %1 = call <6 x i8> @fn_14_6xi8(<6 x i8> %0)
+ ret <6 x i8> %1
+}
+; CHECK-LABEL: define <16 x i8> @fn_14_6xi8(<16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <16 x i8> @fn_14_6xi8(<16 x i8> %0)
+; CHECK-NEXT: ret <16 x i8> %1
+; CHECK-NEXT: }
+
+define <6 x i16> @fn_15_6xi16(<6 x i16>) {
+entry:
+ %1 = call <6 x i16> @fn_15_6xi16(<6 x i16> %0)
+ ret <6 x i16> %1
+}
+; CHECK-LABEL: define <8 x i16> @fn_15_6xi16(<8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <8 x i16> @fn_15_6xi16(<8 x i16> %0)
+; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-NEXT: }
+
+define <6 x i32> @fn_16_6xi32(<6 x i32>) {
+entry:
+ %1 = call <6 x i32> @fn_16_6xi32(<6 x i32> %0)
+ ret <6 x i32> %1
+}
+; CHECK-LABEL: define <4 x i32> @fn_16_6xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %4 = call <4 x i32> @fn_16_6xi32(<4 x i32>* nocapture nonnull dereferenceable(16) %3, <4 x i32> %1, <4 x i32> %2)
+; CHECK-NEXT: %5 = load <4 x i32>, <4 x i32>* %3, align 16
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+; CHECK-NEXT: }
+
+define <6 x i64> @fn_17_6xi64(<6 x i64>) {
+entry:
+ %1 = call <6 x i64> @fn_17_6xi64(<6 x i64> %0)
+ ret <6 x i64> %1
+}
+; CHECK-LABEL: define <2 x i64> @fn_17_6xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %6 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %7 = call <2 x i64> @fn_17_6xi64(<2 x i64>* nocapture nonnull dereferenceable(16) %5, <2 x i64>* nocapture nonnull dereferenceable(16) %6, <2 x i64> %2, <2 x i64> %3, <2 x i64> %4)
+; CHECK-NEXT: %8 = load <2 x i64>, <2 x i64>* %5, align 16
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %8, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %9, <2 x i64>* %1, align 16
+; CHECK-NEXT: ret <2 x i64> %7
+; CHECK-NEXT: }
+
+define <6 x i8*> @fn_18_6xi8ptr(<6 x i8*>) {
+entry:
+ %1 = call <6 x i8*> @fn_18_6xi8ptr(<6 x i8*> %0)
+ ret <6 x i8*> %1
+}
+; CHECK-LABEL: define <4 x i8*> @fn_18_6xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %4 = call <4 x i8*> @fn_18_6xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16) %3, <4 x i8*> %1, <4 x i8*> %2)
+; CHECK-NEXT: %5 = load <4 x i8*>, <4 x i8*>* %3, align 16
+; CHECK-NEXT: store <4 x i8*> %5, <4 x i8*>* %0, align 16
+; CHECK-NEXT: ret <4 x i8*> %4
+; CHECK-NEXT: }
+
+define <6 x float> @fn_19_6xfloat(<6 x float>) {
+entry:
+ %1 = call <6 x float> @fn_19_6xfloat(<6 x float> %0)
+ ret <6 x float> %1
+}
+; CHECK-LABEL: define <4 x float> @fn_19_6xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = alloca <4 x float>, align 16
+; CHECK-NEXT: %4 = call <4 x float> @fn_19_6xfloat(<4 x float>* nocapture nonnull dereferenceable(16) %3, <4 x float> %1, <4 x float> %2)
+; CHECK-NEXT: %5 = load <4 x float>, <4 x float>* %3, align 16
+; CHECK-NEXT: store <4 x float> %5, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %4
+; CHECK-NEXT: }
+
+define <6 x double> @fn_20_6xdouble(<6 x double>) {
+entry:
+ %1 = call <6 x double> @fn_20_6xdouble(<6 x double> %0)
+ ret <6 x double> %1
+}
+; CHECK-LABEL: define <2 x double> @fn_20_6xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = alloca <2 x double>, align 16
+; CHECK-NEXT: %6 = alloca <2 x double>, align 16
+; CHECK-NEXT: %7 = call <2 x double> @fn_20_6xdouble(<2 x double>* nocapture nonnull dereferenceable(16) %5, <2 x double>* nocapture nonnull dereferenceable(16) %6, <2 x double> %2, <2 x double> %3, <2 x double> %4)
+; CHECK-NEXT: %8 = load <2 x double>, <2 x double>* %5, align 16
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %6, align 16
+; CHECK-NEXT: store <2 x double> %8, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %9, <2 x double>* %1, align 16
+; CHECK-NEXT: ret <2 x double> %7
+; CHECK-NEXT: }
+
+define <8 x i8> @fn_21_8xi8(<8 x i8>) {
+entry:
+ %1 = call <8 x i8> @fn_21_8xi8(<8 x i8> %0)
+ ret <8 x i8> %1
+}
+; CHECK-LABEL: define <16 x i8> @fn_21_8xi8(<16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <16 x i8> @fn_21_8xi8(<16 x i8> %0)
+; CHECK-NEXT: ret <16 x i8> %1
+; CHECK-NEXT: }
+
+define <8 x i16> @fn_22_8xi16(<8 x i16>) {
+entry:
+ %1 = call <8 x i16> @fn_22_8xi16(<8 x i16> %0)
+ ret <8 x i16> %1
+}
+; CHECK-LABEL: define <8 x i16> @fn_22_8xi16(<8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <8 x i16> @fn_22_8xi16(<8 x i16> %0)
+; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-NEXT: }
+
+define <8 x i32> @fn_23_8xi32(<8 x i32>) {
+entry:
+ %1 = call <8 x i32> @fn_23_8xi32(<8 x i32> %0)
+ ret <8 x i32> %1
+}
+; CHECK-LABEL: define <4 x i32> @fn_23_8xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %4 = call <4 x i32> @fn_23_8xi32(<4 x i32>* nocapture nonnull dereferenceable(16) %3, <4 x i32> %1, <4 x i32> %2)
+; CHECK-NEXT: %5 = load <4 x i32>, <4 x i32>* %3, align 16
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+; CHECK-NEXT: }
+
+define <8 x i64> @fn_24_8xi64(<8 x i64>) {
+entry:
+ %1 = call <8 x i64> @fn_24_8xi64(<8 x i64> %0)
+ ret <8 x i64> %1
+}
+; CHECK-LABEL: define <2 x i64> @fn_24_8xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %8 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %9 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %10 = call <2 x i64> @fn_24_8xi64(<2 x i64>* nocapture nonnull dereferenceable(16) %7, <2 x i64>* nocapture nonnull dereferenceable(16) %8, <2 x i64>* nocapture nonnull dereferenceable(16) %9, <2 x i64> %3, <2 x i64> %4, <2 x i64> %5, <2 x i64> %6)
+; CHECK-NEXT: %11 = load <2 x i64>, <2 x i64>* %7, align 16
+; CHECK-NEXT: %12 = load <2 x i64>, <2 x i64>* %8, align 16
+; CHECK-NEXT: %13 = load <2 x i64>, <2 x i64>* %9, align 16
+; CHECK-NEXT: store <2 x i64> %11, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %12, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %13, <2 x i64>* %2, align 16
+; CHECK-NEXT: ret <2 x i64> %10
+; CHECK-NEXT: }
+
+define <8 x i8*> @fn_25_8xi8ptr(<8 x i8*>) {
+entry:
+ %1 = call <8 x i8*> @fn_25_8xi8ptr(<8 x i8*> %0)
+ ret <8 x i8*> %1
+}
+; CHECK-LABEL: define <4 x i8*> @fn_25_8xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %4 = call <4 x i8*> @fn_25_8xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16) %3, <4 x i8*> %1, <4 x i8*> %2)
+; CHECK-NEXT: %5 = load <4 x i8*>, <4 x i8*>* %3, align 16
+; CHECK-NEXT: store <4 x i8*> %5, <4 x i8*>* %0, align 16
+; CHECK-NEXT: ret <4 x i8*> %4
+; CHECK-NEXT: }
+
+define <8 x float> @fn_26_8xfloat(<8 x float>) {
+entry:
+ %1 = call <8 x float> @fn_26_8xfloat(<8 x float> %0)
+ ret <8 x float> %1
+}
+; CHECK-LABEL: define <4 x float> @fn_26_8xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = alloca <4 x float>, align 16
+; CHECK-NEXT: %4 = call <4 x float> @fn_26_8xfloat(<4 x float>* nocapture nonnull dereferenceable(16) %3, <4 x float> %1, <4 x float> %2)
+; CHECK-NEXT: %5 = load <4 x float>, <4 x float>* %3, align 16
+; CHECK-NEXT: store <4 x float> %5, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %4
+; CHECK-NEXT: }
+
+define <8 x double> @fn_27_8xdouble(<8 x double>) {
+entry:
+ %1 = call <8 x double> @fn_27_8xdouble(<8 x double> %0)
+ ret <8 x double> %1
+}
+; CHECK-LABEL: define <2 x double> @fn_27_8xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = alloca <2 x double>, align 16
+; CHECK-NEXT: %8 = alloca <2 x double>, align 16
+; CHECK-NEXT: %9 = alloca <2 x double>, align 16
+; CHECK-NEXT: %10 = call <2 x double> @fn_27_8xdouble(<2 x double>* nocapture nonnull dereferenceable(16) %7, <2 x double>* nocapture nonnull dereferenceable(16) %8, <2 x double>* nocapture nonnull dereferenceable(16) %9, <2 x double> %3, <2 x double> %4, <2 x double> %5, <2 x double> %6)
+; CHECK-NEXT: %11 = load <2 x double>, <2 x double>* %7, align 16
+; CHECK-NEXT: %12 = load <2 x double>, <2 x double>* %8, align 16
+; CHECK-NEXT: %13 = load <2 x double>, <2 x double>* %9, align 16
+; CHECK-NEXT: store <2 x double> %11, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %12, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %13, <2 x double>* %2, align 16
+; CHECK-NEXT: ret <2 x double> %10
+; CHECK-NEXT: }
+
+define <12 x i8> @fn_28_12xi8(<12 x i8>) {
+entry:
+ %1 = call <12 x i8> @fn_28_12xi8(<12 x i8> %0)
+ ret <12 x i8> %1
+}
+; CHECK-LABEL: define <16 x i8> @fn_28_12xi8(<16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <16 x i8> @fn_28_12xi8(<16 x i8> %0)
+; CHECK-NEXT: ret <16 x i8> %1
+; CHECK-NEXT: }
+
+define <12 x i16> @fn_29_12xi16(<12 x i16>) {
+entry:
+ %1 = call <12 x i16> @fn_29_12xi16(<12 x i16> %0)
+ ret <12 x i16> %1
+}
+; CHECK-LABEL: define <8 x i16> @fn_29_12xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = alloca <8 x i16>, align 16
+; CHECK-NEXT: %4 = call <8 x i16> @fn_29_12xi16(<8 x i16>* nocapture nonnull dereferenceable(16) %3, <8 x i16> %1, <8 x i16> %2)
+; CHECK-NEXT: %5 = load <8 x i16>, <8 x i16>* %3, align 16
+; CHECK-NEXT: store <8 x i16> %5, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %4
+; CHECK-NEXT: }
+
+define <12 x i32> @fn_30_12xi32(<12 x i32>) {
+entry:
+ %1 = call <12 x i32> @fn_30_12xi32(<12 x i32> %0)
+ ret <12 x i32> %1
+}
+; CHECK-LABEL: define <4 x i32> @fn_30_12xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %6 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %7 = call <4 x i32> @fn_30_12xi32(<4 x i32>* nocapture nonnull dereferenceable(16) %5, <4 x i32>* nocapture nonnull dereferenceable(16) %6, <4 x i32> %2, <4 x i32> %3, <4 x i32> %4)
+; CHECK-NEXT: %8 = load <4 x i32>, <4 x i32>* %5, align 16
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %6, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %7
+; CHECK-NEXT: }
+
+define <12 x i64> @fn_31_12xi64(<12 x i64>) {
+entry:
+ %1 = call <12 x i64> @fn_31_12xi64(<12 x i64> %0)
+ ret <12 x i64> %1
+}
+; CHECK-LABEL: define <2 x i64> @fn_31_12xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %12 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %13 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %14 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %15 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %16 = call <2 x i64> @fn_31_12xi64(<2 x i64>* nocapture nonnull dereferenceable(16) %11, <2 x i64>* nocapture nonnull dereferenceable(16) %12, <2 x i64>* nocapture nonnull dereferenceable(16) %13, <2 x i64>* nocapture nonnull dereferenceable(16) %14, <2 x i64>* nocapture nonnull dereferenceable(16) %15, <2 x i64> %5, <2 x i64> %6, <2 x i64> %7, <2 x i64> %8, <2 x i64> %9, <2 x i64> %10)
+; CHECK-NEXT: %17 = load <2 x i64>, <2 x i64>* %11, align 16
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %12, align 16
+; CHECK-NEXT: %19 = load <2 x i64>, <2 x i64>* %13, align 16
+; CHECK-NEXT: %20 = load <2 x i64>, <2 x i64>* %14, align 16
+; CHECK-NEXT: %21 = load <2 x i64>, <2 x i64>* %15, align 16
+; CHECK-NEXT: store <2 x i64> %17, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %18, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %19, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %20, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %21, <2 x i64>* %4, align 16
+; CHECK-NEXT: ret <2 x i64> %16
+; CHECK-NEXT: }
+
+define <12 x i8*> @fn_32_12xi8ptr(<12 x i8*>) {
+entry:
+ %1 = call <12 x i8*> @fn_32_12xi8ptr(<12 x i8*> %0)
+ ret <12 x i8*> %1
+}
+; CHECK-LABEL: define <4 x i8*> @fn_32_12xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %6 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %7 = call <4 x i8*> @fn_32_12xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16) %5, <4 x i8*>* nocapture nonnull dereferenceable(16) %6, <4 x i8*> %2, <4 x i8*> %3, <4 x i8*> %4)
+; CHECK-NEXT: %8 = load <4 x i8*>, <4 x i8*>* %5, align 16
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %6, align 16
+; CHECK-NEXT: store <4 x i8*> %8, <4 x i8*>* %0, align 16
+; CHECK-NEXT: store <4 x i8*> %9, <4 x i8*>* %1, align 16
+; CHECK-NEXT: ret <4 x i8*> %7
+; CHECK-NEXT: }
+
+define <12 x float> @fn_33_12xfloat(<12 x float>) {
+entry:
+ %1 = call <12 x float> @fn_33_12xfloat(<12 x float> %0)
+ ret <12 x float> %1
+}
+; CHECK-LABEL: define <4 x float> @fn_33_12xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = alloca <4 x float>, align 16
+; CHECK-NEXT: %6 = alloca <4 x float>, align 16
+; CHECK-NEXT: %7 = call <4 x float> @fn_33_12xfloat(<4 x float>* nocapture nonnull dereferenceable(16) %5, <4 x float>* nocapture nonnull dereferenceable(16) %6, <4 x float> %2, <4 x float> %3, <4 x float> %4)
+; CHECK-NEXT: %8 = load <4 x float>, <4 x float>* %5, align 16
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %6, align 16
+; CHECK-NEXT: store <4 x float> %8, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %9, <4 x float>* %1, align 16
+; CHECK-NEXT: ret <4 x float> %7
+; CHECK-NEXT: }
+
+define <12 x double> @fn_34_12xdouble(<12 x double>) {
+entry:
+ %1 = call <12 x double> @fn_34_12xdouble(<12 x double> %0)
+ ret <12 x double> %1
+}
+; CHECK-LABEL: define <2 x double> @fn_34_12xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %11 = alloca <2 x double>, align 16
+; CHECK-NEXT: %12 = alloca <2 x double>, align 16
+; CHECK-NEXT: %13 = alloca <2 x double>, align 16
+; CHECK-NEXT: %14 = alloca <2 x double>, align 16
+; CHECK-NEXT: %15 = alloca <2 x double>, align 16
+; CHECK-NEXT: %16 = call <2 x double> @fn_34_12xdouble(<2 x double>* nocapture nonnull dereferenceable(16) %11, <2 x double>* nocapture nonnull dereferenceable(16) %12, <2 x double>* nocapture nonnull dereferenceable(16) %13, <2 x double>* nocapture nonnull dereferenceable(16) %14, <2 x double>* nocapture nonnull dereferenceable(16) %15, <2 x double> %5, <2 x double> %6, <2 x double> %7, <2 x double> %8, <2 x double> %9, <2 x double> %10)
+; CHECK-NEXT: %17 = load <2 x double>, <2 x double>* %11, align 16
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %12, align 16
+; CHECK-NEXT: %19 = load <2 x double>, <2 x double>* %13, align 16
+; CHECK-NEXT: %20 = load <2 x double>, <2 x double>* %14, align 16
+; CHECK-NEXT: %21 = load <2 x double>, <2 x double>* %15, align 16
+; CHECK-NEXT: store <2 x double> %17, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %18, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %19, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %20, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %21, <2 x double>* %4, align 16
+; CHECK-NEXT: ret <2 x double> %16
+; CHECK-NEXT: }
+
+define <16 x i8> @fn_35_16xi8(<16 x i8>) {
+entry:
+ %1 = call <16 x i8> @fn_35_16xi8(<16 x i8> %0)
+ ret <16 x i8> %1
+}
+; CHECK-LABEL: define <16 x i8> @fn_35_16xi8(<16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %1 = call <16 x i8> @fn_35_16xi8(<16 x i8> %0)
+; CHECK-NEXT: ret <16 x i8> %1
+; CHECK-NEXT: }
+
+define <16 x i16> @fn_36_16xi16(<16 x i16>) {
+entry:
+ %1 = call <16 x i16> @fn_36_16xi16(<16 x i16> %0)
+ ret <16 x i16> %1
+}
+; CHECK-LABEL: define <8 x i16> @fn_36_16xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = alloca <8 x i16>, align 16
+; CHECK-NEXT: %4 = call <8 x i16> @fn_36_16xi16(<8 x i16>* nocapture nonnull dereferenceable(16) %3, <8 x i16> %1, <8 x i16> %2)
+; CHECK-NEXT: %5 = load <8 x i16>, <8 x i16>* %3, align 16
+; CHECK-NEXT: store <8 x i16> %5, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %4
+; CHECK-NEXT: }
+
+define <16 x i32> @fn_37_16xi32(<16 x i32>) {
+entry:
+ %1 = call <16 x i32> @fn_37_16xi32(<16 x i32> %0)
+ ret <16 x i32> %1
+}
+; CHECK-LABEL: define <4 x i32> @fn_37_16xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %8 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %9 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %10 = call <4 x i32> @fn_37_16xi32(<4 x i32>* nocapture nonnull dereferenceable(16) %7, <4 x i32>* nocapture nonnull dereferenceable(16) %8, <4 x i32>* nocapture nonnull dereferenceable(16) %9, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, <4 x i32> %6)
+; CHECK-NEXT: %11 = load <4 x i32>, <4 x i32>* %7, align 16
+; CHECK-NEXT: %12 = load <4 x i32>, <4 x i32>* %8, align 16
+; CHECK-NEXT: %13 = load <4 x i32>, <4 x i32>* %9, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+; CHECK-NEXT: }
+
+define <16 x i64> @fn_38_16xi64(<16 x i64>) {
+entry:
+ %1 = call <16 x i64> @fn_38_16xi64(<16 x i64> %0)
+ ret <16 x i64> %1
+}
+; CHECK-LABEL: define <2 x i64> @fn_38_16xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %15 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %16 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %17 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %18 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %19 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %20 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %21 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %22 = call <2 x i64> @fn_38_16xi64(<2 x i64>* nocapture nonnull dereferenceable(16) %15, <2 x i64>* nocapture nonnull dereferenceable(16) %16, <2 x i64>* nocapture nonnull dereferenceable(16) %17, <2 x i64>* nocapture nonnull dereferenceable(16) %18, <2 x i64>* nocapture nonnull dereferenceable(16) %19, <2 x i64>* nocapture nonnull dereferenceable(16) %20, <2 x i64>* nocapture nonnull dereferenceable(16) %21, <2 x i64> %7, <2 x i64> %8, <2 x i64> %9, <2 x i64> %10, <2 x i64> %11, <2 x i64> %12, <2 x i64> %13, <2 x i64> %14)
+; CHECK-NEXT: %23 = load <2 x i64>, <2 x i64>* %15, align 16
+; CHECK-NEXT: %24 = load <2 x i64>, <2 x i64>* %16, align 16
+; CHECK-NEXT: %25 = load <2 x i64>, <2 x i64>* %17, align 16
+; CHECK-NEXT: %26 = load <2 x i64>, <2 x i64>* %18, align 16
+; CHECK-NEXT: %27 = load <2 x i64>, <2 x i64>* %19, align 16
+; CHECK-NEXT: %28 = load <2 x i64>, <2 x i64>* %20, align 16
+; CHECK-NEXT: %29 = load <2 x i64>, <2 x i64>* %21, align 16
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %24, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %25, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %27, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %28, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret <2 x i64> %22
+; CHECK-NEXT: }
+
+define <16 x i8*> @fn_39_16xi8ptr(<16 x i8*>) {
+entry:
+ %1 = call <16 x i8*> @fn_39_16xi8ptr(<16 x i8*> %0)
+ ret <16 x i8*> %1
+}
+; CHECK-LABEL: define <4 x i8*> @fn_39_16xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %8 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %9 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %10 = call <4 x i8*> @fn_39_16xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16) %7, <4 x i8*>* nocapture nonnull dereferenceable(16) %8, <4 x i8*>* nocapture nonnull dereferenceable(16) %9, <4 x i8*> %3, <4 x i8*> %4, <4 x i8*> %5, <4 x i8*> %6)
+; CHECK-NEXT: %11 = load <4 x i8*>, <4 x i8*>* %7, align 16
+; CHECK-NEXT: %12 = load <4 x i8*>, <4 x i8*>* %8, align 16
+; CHECK-NEXT: %13 = load <4 x i8*>, <4 x i8*>* %9, align 16
+; CHECK-NEXT: store <4 x i8*> %11, <4 x i8*>* %0, align 16
+; CHECK-NEXT: store <4 x i8*> %12, <4 x i8*>* %1, align 16
+; CHECK-NEXT: store <4 x i8*> %13, <4 x i8*>* %2, align 16
+; CHECK-NEXT: ret <4 x i8*> %10
+; CHECK-NEXT: }
+
+define <16 x float> @fn_40_16xfloat(<16 x float>) {
+entry:
+ %1 = call <16 x float> @fn_40_16xfloat(<16 x float> %0)
+ ret <16 x float> %1
+}
+; CHECK-LABEL: define <4 x float> @fn_40_16xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %7 = alloca <4 x float>, align 16
+; CHECK-NEXT: %8 = alloca <4 x float>, align 16
+; CHECK-NEXT: %9 = alloca <4 x float>, align 16
+; CHECK-NEXT: %10 = call <4 x float> @fn_40_16xfloat(<4 x float>* nocapture nonnull dereferenceable(16) %7, <4 x float>* nocapture nonnull dereferenceable(16) %8, <4 x float>* nocapture nonnull dereferenceable(16) %9, <4 x float> %3, <4 x float> %4, <4 x float> %5, <4 x float> %6)
+; CHECK-NEXT: %11 = load <4 x float>, <4 x float>* %7, align 16
+; CHECK-NEXT: %12 = load <4 x float>, <4 x float>* %8, align 16
+; CHECK-NEXT: %13 = load <4 x float>, <4 x float>* %9, align 16
+; CHECK-NEXT: store <4 x float> %11, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %12, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %13, <4 x float>* %2, align 16
+; CHECK-NEXT: ret <4 x float> %10
+; CHECK-NEXT: }
+
+define <16 x double> @fn_41_16xdouble(<16 x double>) {
+entry:
+ %1 = call <16 x double> @fn_41_16xdouble(<16 x double> %0)
+ ret <16 x double> %1
+}
+; CHECK-LABEL: define <2 x double> @fn_41_16xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %15 = alloca <2 x double>, align 16
+; CHECK-NEXT: %16 = alloca <2 x double>, align 16
+; CHECK-NEXT: %17 = alloca <2 x double>, align 16
+; CHECK-NEXT: %18 = alloca <2 x double>, align 16
+; CHECK-NEXT: %19 = alloca <2 x double>, align 16
+; CHECK-NEXT: %20 = alloca <2 x double>, align 16
+; CHECK-NEXT: %21 = alloca <2 x double>, align 16
+; CHECK-NEXT: %22 = call <2 x double> @fn_41_16xdouble(<2 x double>* nocapture nonnull dereferenceable(16) %15, <2 x double>* nocapture nonnull dereferenceable(16) %16, <2 x double>* nocapture nonnull dereferenceable(16) %17, <2 x double>* nocapture nonnull dereferenceable(16) %18, <2 x double>* nocapture nonnull dereferenceable(16) %19, <2 x double>* nocapture nonnull dereferenceable(16) %20, <2 x double>* nocapture nonnull dereferenceable(16) %21, <2 x double> %7, <2 x double> %8, <2 x double> %9, <2 x double> %10, <2 x double> %11, <2 x double> %12, <2 x double> %13, <2 x double> %14)
+; CHECK-NEXT: %23 = load <2 x double>, <2 x double>* %15, align 16
+; CHECK-NEXT: %24 = load <2 x double>, <2 x double>* %16, align 16
+; CHECK-NEXT: %25 = load <2 x double>, <2 x double>* %17, align 16
+; CHECK-NEXT: %26 = load <2 x double>, <2 x double>* %18, align 16
+; CHECK-NEXT: %27 = load <2 x double>, <2 x double>* %19, align 16
+; CHECK-NEXT: %28 = load <2 x double>, <2 x double>* %20, align 16
+; CHECK-NEXT: %29 = load <2 x double>, <2 x double>* %21, align 16
+; CHECK-NEXT: store <2 x double> %23, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %24, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %25, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %26, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %27, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %28, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %29, <2 x double>* %6, align 16
+; CHECK-NEXT: ret <2 x double> %22
+; CHECK-NEXT: }
+
+define <20 x i8> @fn_42_20xi8(<20 x i8>) {
+entry:
+ %1 = call <20 x i8> @fn_42_20xi8(<20 x i8> %0)
+ ret <20 x i8> %1
+}
+; CHECK-LABEL: define <16 x i8> @fn_42_20xi8(<16 x i8>* nocapture nonnull dereferenceable(16), <16 x i8>, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %3 = alloca <16 x i8>, align 16
+; CHECK-NEXT: %4 = call <16 x i8> @fn_42_20xi8(<16 x i8>* nocapture nonnull dereferenceable(16) %3, <16 x i8> %1, <16 x i8> %2)
+; CHECK-NEXT: %5 = load <16 x i8>, <16 x i8>* %3, align 16
+; CHECK-NEXT: store <16 x i8> %5, <16 x i8>* %0, align 16
+; CHECK-NEXT: ret <16 x i8> %4
+; CHECK-NEXT: }
+
+define <20 x i16> @fn_43_20xi16(<20 x i16>) {
+entry:
+ %1 = call <20 x i16> @fn_43_20xi16(<20 x i16> %0)
+ ret <20 x i16> %1
+}
+; CHECK-LABEL: define <8 x i16> @fn_43_20xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <8 x i16>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %5 = alloca <8 x i16>, align 16
+; CHECK-NEXT: %6 = alloca <8 x i16>, align 16
+; CHECK-NEXT: %7 = call <8 x i16> @fn_43_20xi16(<8 x i16>* nocapture nonnull dereferenceable(16) %5, <8 x i16>* nocapture nonnull dereferenceable(16) %6, <8 x i16> %2, <8 x i16> %3, <8 x i16> %4)
+; CHECK-NEXT: %8 = load <8 x i16>, <8 x i16>* %5, align 16
+; CHECK-NEXT: %9 = load <8 x i16>, <8 x i16>* %6, align 16
+; CHECK-NEXT: store <8 x i16> %8, <8 x i16>* %0, align 16
+; CHECK-NEXT: store <8 x i16> %9, <8 x i16>* %1, align 16
+; CHECK-NEXT: ret <8 x i16> %7
+; CHECK-NEXT: }
+
+define <20 x i32> @fn_44_20xi32(<20 x i32>) {
+entry:
+ %1 = call <20 x i32> @fn_44_20xi32(<20 x i32> %0)
+ ret <20 x i32> %1
+}
+; CHECK-LABEL: define <4 x i32> @fn_44_20xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %10 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %11 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %12 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %13 = call <4 x i32> @fn_44_20xi32(<4 x i32>* nocapture nonnull dereferenceable(16) %9, <4 x i32>* nocapture nonnull dereferenceable(16) %10, <4 x i32>* nocapture nonnull dereferenceable(16) %11, <4 x i32>* nocapture nonnull dereferenceable(16) %12, <4 x i32> %4, <4 x i32> %5, <4 x i32> %6, <4 x i32> %7, <4 x i32> %8)
+; CHECK-NEXT: %14 = load <4 x i32>, <4 x i32>* %9, align 16
+; CHECK-NEXT: %15 = load <4 x i32>, <4 x i32>* %10, align 16
+; CHECK-NEXT: %16 = load <4 x i32>, <4 x i32>* %11, align 16
+; CHECK-NEXT: %17 = load <4 x i32>, <4 x i32>* %12, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %15, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %16, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %13
+; CHECK-NEXT: }
+
+define <20 x i64> @fn_45_20xi64(<20 x i64>) {
+entry:
+ %1 = call <20 x i64> @fn_45_20xi64(<20 x i64> %0)
+ ret <20 x i64> %1
+}
+; CHECK-LABEL: define <2 x i64> @fn_45_20xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %19 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %20 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %21 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %22 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %23 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %24 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %25 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %26 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %27 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %28 = call <2 x i64> @fn_45_20xi64(<2 x i64>* nocapture nonnull dereferenceable(16) %19, <2 x i64>* nocapture nonnull dereferenceable(16) %20, <2 x i64>* nocapture nonnull dereferenceable(16) %21, <2 x i64>* nocapture nonnull dereferenceable(16) %22, <2 x i64>* nocapture nonnull dereferenceable(16) %23, <2 x i64>* nocapture nonnull dereferenceable(16) %24, <2 x i64>* nocapture nonnull dereferenceable(16) %25, <2 x i64>* nocapture nonnull dereferenceable(16) %26, <2 x i64>* nocapture nonnull dereferenceable(16) %27, <2 x i64> %9, <2 x i64> %10, <2 x i64> %11, <2 x i64> %12, <2 x i64> %13, <2 x i64> %14, <2 x i64> %15, <2 x i64> %16, <2 x i64> %17, <2 x i64> %18)
+; CHECK-NEXT: %29 = load <2 x i64>, <2 x i64>* %19, align 16
+; CHECK-NEXT: %30 = load <2 x i64>, <2 x i64>* %20, align 16
+; CHECK-NEXT: %31 = load <2 x i64>, <2 x i64>* %21, align 16
+; CHECK-NEXT: %32 = load <2 x i64>, <2 x i64>* %22, align 16
+; CHECK-NEXT: %33 = load <2 x i64>, <2 x i64>* %23, align 16
+; CHECK-NEXT: %34 = load <2 x i64>, <2 x i64>* %24, align 16
+; CHECK-NEXT: %35 = load <2 x i64>, <2 x i64>* %25, align 16
+; CHECK-NEXT: %36 = load <2 x i64>, <2 x i64>* %26, align 16
+; CHECK-NEXT: %37 = load <2 x i64>, <2 x i64>* %27, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %30, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %31, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %32, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %33, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %34, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %35, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %36, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %37, <2 x i64>* %8, align 16
+; CHECK-NEXT: ret <2 x i64> %28
+; CHECK-NEXT: }
+
+define <20 x i8*> @fn_46_20xi8ptr(<20 x i8*>) {
+entry:
+ %1 = call <20 x i8*> @fn_46_20xi8ptr(<20 x i8*> %0)
+ ret <20 x i8*> %1
+}
+; CHECK-LABEL: define <4 x i8*> @fn_46_20xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %10 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %11 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %12 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %13 = call <4 x i8*> @fn_46_20xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16) %9, <4 x i8*>* nocapture nonnull dereferenceable(16) %10, <4 x i8*>* nocapture nonnull dereferenceable(16) %11, <4 x i8*>* nocapture nonnull dereferenceable(16) %12, <4 x i8*> %4, <4 x i8*> %5, <4 x i8*> %6, <4 x i8*> %7, <4 x i8*> %8)
+; CHECK-NEXT: %14 = load <4 x i8*>, <4 x i8*>* %9, align 16
+; CHECK-NEXT: %15 = load <4 x i8*>, <4 x i8*>* %10, align 16
+; CHECK-NEXT: %16 = load <4 x i8*>, <4 x i8*>* %11, align 16
+; CHECK-NEXT: %17 = load <4 x i8*>, <4 x i8*>* %12, align 16
+; CHECK-NEXT: store <4 x i8*> %14, <4 x i8*>* %0, align 16
+; CHECK-NEXT: store <4 x i8*> %15, <4 x i8*>* %1, align 16
+; CHECK-NEXT: store <4 x i8*> %16, <4 x i8*>* %2, align 16
+; CHECK-NEXT: store <4 x i8*> %17, <4 x i8*>* %3, align 16
+; CHECK-NEXT: ret <4 x i8*> %13
+; CHECK-NEXT: }
+
+define <20 x float> @fn_47_20xfloat(<20 x float>) {
+entry:
+ %1 = call <20 x float> @fn_47_20xfloat(<20 x float> %0)
+ ret <20 x float> %1
+}
+; CHECK-LABEL: define <4 x float> @fn_47_20xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %9 = alloca <4 x float>, align 16
+; CHECK-NEXT: %10 = alloca <4 x float>, align 16
+; CHECK-NEXT: %11 = alloca <4 x float>, align 16
+; CHECK-NEXT: %12 = alloca <4 x float>, align 16
+; CHECK-NEXT: %13 = call <4 x float> @fn_47_20xfloat(<4 x float>* nocapture nonnull dereferenceable(16) %9, <4 x float>* nocapture nonnull dereferenceable(16) %10, <4 x float>* nocapture nonnull dereferenceable(16) %11, <4 x float>* nocapture nonnull dereferenceable(16) %12, <4 x float> %4, <4 x float> %5, <4 x float> %6, <4 x float> %7, <4 x float> %8)
+; CHECK-NEXT: %14 = load <4 x float>, <4 x float>* %9, align 16
+; CHECK-NEXT: %15 = load <4 x float>, <4 x float>* %10, align 16
+; CHECK-NEXT: %16 = load <4 x float>, <4 x float>* %11, align 16
+; CHECK-NEXT: %17 = load <4 x float>, <4 x float>* %12, align 16
+; CHECK-NEXT: store <4 x float> %14, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %15, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %16, <4 x float>* %2, align 16
+; CHECK-NEXT: store <4 x float> %17, <4 x float>* %3, align 16
+; CHECK-NEXT: ret <4 x float> %13
+; CHECK-NEXT: }
+
+define <20 x double> @fn_48_20xdouble(<20 x double>) {
+entry:
+ %1 = call <20 x double> @fn_48_20xdouble(<20 x double> %0)
+ ret <20 x double> %1
+}
+; CHECK-LABEL: define <2 x double> @fn_48_20xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %19 = alloca <2 x double>, align 16
+; CHECK-NEXT: %20 = alloca <2 x double>, align 16
+; CHECK-NEXT: %21 = alloca <2 x double>, align 16
+; CHECK-NEXT: %22 = alloca <2 x double>, align 16
+; CHECK-NEXT: %23 = alloca <2 x double>, align 16
+; CHECK-NEXT: %24 = alloca <2 x double>, align 16
+; CHECK-NEXT: %25 = alloca <2 x double>, align 16
+; CHECK-NEXT: %26 = alloca <2 x double>, align 16
+; CHECK-NEXT: %27 = alloca <2 x double>, align 16
+; CHECK-NEXT: %28 = call <2 x double> @fn_48_20xdouble(<2 x double>* nocapture nonnull dereferenceable(16) %19, <2 x double>* nocapture nonnull dereferenceable(16) %20, <2 x double>* nocapture nonnull dereferenceable(16) %21, <2 x double>* nocapture nonnull dereferenceable(16) %22, <2 x double>* nocapture nonnull dereferenceable(16) %23, <2 x double>* nocapture nonnull dereferenceable(16) %24, <2 x double>* nocapture nonnull dereferenceable(16) %25, <2 x double>* nocapture nonnull dereferenceable(16) %26, <2 x double>* nocapture nonnull dereferenceable(16) %27, <2 x double> %9, <2 x double> %10, <2 x double> %11, <2 x double> %12, <2 x double> %13, <2 x double> %14, <2 x double> %15, <2 x double> %16, <2 x double> %17, <2 x double> %18)
+; CHECK-NEXT: %29 = load <2 x double>, <2 x double>* %19, align 16
+; CHECK-NEXT: %30 = load <2 x double>, <2 x double>* %20, align 16
+; CHECK-NEXT: %31 = load <2 x double>, <2 x double>* %21, align 16
+; CHECK-NEXT: %32 = load <2 x double>, <2 x double>* %22, align 16
+; CHECK-NEXT: %33 = load <2 x double>, <2 x double>* %23, align 16
+; CHECK-NEXT: %34 = load <2 x double>, <2 x double>* %24, align 16
+; CHECK-NEXT: %35 = load <2 x double>, <2 x double>* %25, align 16
+; CHECK-NEXT: %36 = load <2 x double>, <2 x double>* %26, align 16
+; CHECK-NEXT: %37 = load <2 x double>, <2 x double>* %27, align 16
+; CHECK-NEXT: store <2 x double> %29, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %30, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %31, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %32, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %33, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %34, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %35, <2 x double>* %6, align 16
+; CHECK-NEXT: store <2 x double> %36, <2 x double>* %7, align 16
+; CHECK-NEXT: store <2 x double> %37, <2 x double>* %8, align 16
+; CHECK-NEXT: ret <2 x double> %28
+; CHECK-NEXT: }
+
+define <2 x i8> @fn_49_2xi8_call_arg(<2 x i8> (<2 x i8>)*, <2 x i8>) {
+entry:
+ %2 = call <2 x i8> %0(<2 x i8> %1)
+ %3 = call <2 x i8> @fn_49_2xi8_call_arg(<2 x i8> (<2 x i8>)* @fn_0_2xi8, <2 x i8> %2)
+ ret <2 x i8> %3
+}
+; CHECK-LABEL: define <16 x i8> @fn_49_2xi8_call_arg(<2 x i8> (<2 x i8>)*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = bitcast <2 x i8> (<2 x i8>)* %0 to <16 x i8> (<16 x i8>)*
+; CHECK-NEXT: %3 = call <16 x i8> %2(<16 x i8> %1)
+; CHECK-NEXT: %4 = call <16 x i8> @fn_49_2xi8_call_arg(<2 x i8> (<2 x i8>)* bitcast (<16 x i8> (<16 x i8>)* @fn_0_2xi8 to <2 x i8> (<2 x i8>)*), <16 x i8> %3)
+; CHECK-NEXT: ret <16 x i8> %4
+; CHECK-NEXT: }
+
+define <2 x i16> @fn_50_2xi16_call_arg(<2 x i16> (<2 x i16>)*, <2 x i16>) {
+entry:
+ %2 = call <2 x i16> %0(<2 x i16> %1)
+ %3 = call <2 x i16> @fn_50_2xi16_call_arg(<2 x i16> (<2 x i16>)* @fn_1_2xi16, <2 x i16> %2)
+ ret <2 x i16> %3
+}
+; CHECK-LABEL: define <8 x i16> @fn_50_2xi16_call_arg(<2 x i16> (<2 x i16>)*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = bitcast <2 x i16> (<2 x i16>)* %0 to <8 x i16> (<8 x i16>)*
+; CHECK-NEXT: %3 = call <8 x i16> %2(<8 x i16> %1)
+; CHECK-NEXT: %4 = call <8 x i16> @fn_50_2xi16_call_arg(<2 x i16> (<2 x i16>)* bitcast (<8 x i16> (<8 x i16>)* @fn_1_2xi16 to <2 x i16> (<2 x i16>)*), <8 x i16> %3)
+; CHECK-NEXT: ret <8 x i16> %4
+; CHECK-NEXT: }
+
+define <2 x i32> @fn_51_2xi32_call_arg(<2 x i32> (<2 x i32>)*, <2 x i32>) {
+entry:
+ %2 = call <2 x i32> %0(<2 x i32> %1)
+ %3 = call <2 x i32> @fn_51_2xi32_call_arg(<2 x i32> (<2 x i32>)* @fn_2_2xi32, <2 x i32> %2)
+ ret <2 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @fn_51_2xi32_call_arg(<2 x i32> (<2 x i32>)*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = bitcast <2 x i32> (<2 x i32>)* %0 to <4 x i32> (<4 x i32>)*
+; CHECK-NEXT: %3 = call <4 x i32> %2(<4 x i32> %1)
+; CHECK-NEXT: %4 = call <4 x i32> @fn_51_2xi32_call_arg(<2 x i32> (<2 x i32>)* bitcast (<4 x i32> (<4 x i32>)* @fn_2_2xi32 to <2 x i32> (<2 x i32>)*), <4 x i32> %3)
+; CHECK-NEXT: ret <4 x i32> %4
+; CHECK-NEXT: }
+
+define <2 x i64> @fn_52_2xi64_call_arg(<2 x i64> (<2 x i64>)*, <2 x i64>) {
+entry:
+ %2 = call <2 x i64> %0(<2 x i64> %1)
+ %3 = call <2 x i64> @fn_52_2xi64_call_arg(<2 x i64> (<2 x i64>)* @fn_3_2xi64, <2 x i64> %2)
+ ret <2 x i64> %3
+}
+; CHECK-LABEL: define <2 x i64> @fn_52_2xi64_call_arg(<2 x i64> (<2 x i64>)*, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = call <2 x i64> %0(<2 x i64> %1)
+; CHECK-NEXT: %3 = call <2 x i64> @fn_52_2xi64_call_arg(<2 x i64> (<2 x i64>)* @fn_3_2xi64, <2 x i64> %2)
+; CHECK-NEXT: ret <2 x i64> %3
+; CHECK-NEXT: }
+
+define <2 x i8*> @fn_53_2xi8ptr_call_arg(<2 x i8*> (<2 x i8*>)*, <2 x i8*>) {
+entry:
+ %2 = call <2 x i8*> %0(<2 x i8*> %1)
+ %3 = call <2 x i8*> @fn_53_2xi8ptr_call_arg(<2 x i8*> (<2 x i8*>)* @fn_4_2xi8ptr, <2 x i8*> %2)
+ ret <2 x i8*> %3
+}
+; CHECK-LABEL: define <4 x i8*> @fn_53_2xi8ptr_call_arg(<2 x i8*> (<2 x i8*>)*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = bitcast <2 x i8*> (<2 x i8*>)* %0 to <4 x i8*> (<4 x i8*>)*
+; CHECK-NEXT: %3 = call <4 x i8*> %2(<4 x i8*> %1)
+; CHECK-NEXT: %4 = call <4 x i8*> @fn_53_2xi8ptr_call_arg(<2 x i8*> (<2 x i8*>)* bitcast (<4 x i8*> (<4 x i8*>)* @fn_4_2xi8ptr to <2 x i8*> (<2 x i8*>)*), <4 x i8*> %3)
+; CHECK-NEXT: ret <4 x i8*> %4
+; CHECK-NEXT: }
+
+define <2 x float> @fn_54_2xfloat_call_arg(<2 x float> (<2 x float>)*, <2 x float>) {
+entry:
+ %2 = call <2 x float> %0(<2 x float> %1)
+ %3 = call <2 x float> @fn_54_2xfloat_call_arg(<2 x float> (<2 x float>)* @fn_5_2xfloat, <2 x float> %2)
+ ret <2 x float> %3
+}
+; CHECK-LABEL: define <4 x float> @fn_54_2xfloat_call_arg(<2 x float> (<2 x float>)*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = bitcast <2 x float> (<2 x float>)* %0 to <4 x float> (<4 x float>)*
+; CHECK-NEXT: %3 = call <4 x float> %2(<4 x float> %1)
+; CHECK-NEXT: %4 = call <4 x float> @fn_54_2xfloat_call_arg(<2 x float> (<2 x float>)* bitcast (<4 x float> (<4 x float>)* @fn_5_2xfloat to <2 x float> (<2 x float>)*), <4 x float> %3)
+; CHECK-NEXT: ret <4 x float> %4
+; CHECK-NEXT: }
+
+define <2 x double> @fn_55_2xdouble_call_arg(<2 x double> (<2 x double>)*, <2 x double>) {
+entry:
+ %2 = call <2 x double> %0(<2 x double> %1)
+ %3 = call <2 x double> @fn_55_2xdouble_call_arg(<2 x double> (<2 x double>)* @fn_6_2xdouble, <2 x double> %2)
+ ret <2 x double> %3
+}
+; CHECK-LABEL: define <2 x double> @fn_55_2xdouble_call_arg(<2 x double> (<2 x double>)*, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = call <2 x double> %0(<2 x double> %1)
+; CHECK-NEXT: %3 = call <2 x double> @fn_55_2xdouble_call_arg(<2 x double> (<2 x double>)* @fn_6_2xdouble, <2 x double> %2)
+; CHECK-NEXT: ret <2 x double> %3
+; CHECK-NEXT: }
+
+define <4 x i8> @fn_56_4xi8_call_arg(<4 x i8> (<4 x i8>)*, <4 x i8>) {
+entry:
+ %2 = call <4 x i8> %0(<4 x i8> %1)
+ %3 = call <4 x i8> @fn_56_4xi8_call_arg(<4 x i8> (<4 x i8>)* @fn_7_4xi8, <4 x i8> %2)
+ ret <4 x i8> %3
+}
+; CHECK-LABEL: define <16 x i8> @fn_56_4xi8_call_arg(<4 x i8> (<4 x i8>)*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = bitcast <4 x i8> (<4 x i8>)* %0 to <16 x i8> (<16 x i8>)*
+; CHECK-NEXT: %3 = call <16 x i8> %2(<16 x i8> %1)
+; CHECK-NEXT: %4 = call <16 x i8> @fn_56_4xi8_call_arg(<4 x i8> (<4 x i8>)* bitcast (<16 x i8> (<16 x i8>)* @fn_7_4xi8 to <4 x i8> (<4 x i8>)*), <16 x i8> %3)
+; CHECK-NEXT: ret <16 x i8> %4
+; CHECK-NEXT: }
+
+define <4 x i16> @fn_57_4xi16_call_arg(<4 x i16> (<4 x i16>)*, <4 x i16>) {
+entry:
+ %2 = call <4 x i16> %0(<4 x i16> %1)
+ %3 = call <4 x i16> @fn_57_4xi16_call_arg(<4 x i16> (<4 x i16>)* @fn_8_4xi16, <4 x i16> %2)
+ ret <4 x i16> %3
+}
+; CHECK-LABEL: define <8 x i16> @fn_57_4xi16_call_arg(<4 x i16> (<4 x i16>)*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = bitcast <4 x i16> (<4 x i16>)* %0 to <8 x i16> (<8 x i16>)*
+; CHECK-NEXT: %3 = call <8 x i16> %2(<8 x i16> %1)
+; CHECK-NEXT: %4 = call <8 x i16> @fn_57_4xi16_call_arg(<4 x i16> (<4 x i16>)* bitcast (<8 x i16> (<8 x i16>)* @fn_8_4xi16 to <4 x i16> (<4 x i16>)*), <8 x i16> %3)
+; CHECK-NEXT: ret <8 x i16> %4
+; CHECK-NEXT: }
+
+define <4 x i32> @fn_58_4xi32_call_arg(<4 x i32> (<4 x i32>)*, <4 x i32>) {
+entry:
+ %2 = call <4 x i32> %0(<4 x i32> %1)
+ %3 = call <4 x i32> @fn_58_4xi32_call_arg(<4 x i32> (<4 x i32>)* @fn_9_4xi32, <4 x i32> %2)
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @fn_58_4xi32_call_arg(<4 x i32> (<4 x i32>)*, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = call <4 x i32> %0(<4 x i32> %1)
+; CHECK-NEXT: %3 = call <4 x i32> @fn_58_4xi32_call_arg(<4 x i32> (<4 x i32>)* @fn_9_4xi32, <4 x i32> %2)
+; CHECK-NEXT: ret <4 x i32> %3
+; CHECK-NEXT: }
+
+define <4 x i64> @fn_59_4xi64_call_arg(<4 x i64> (<4 x i64>)*, <4 x i64>) {
+entry:
+ %2 = call <4 x i64> %0(<4 x i64> %1)
+ %3 = call <4 x i64> @fn_59_4xi64_call_arg(<4 x i64> (<4 x i64>)* @fn_10_4xi64, <4 x i64> %2)
+ ret <4 x i64> %3
+}
+; CHECK-LABEL: define <2 x i64> @fn_59_4xi64_call_arg(<2 x i64>* nocapture nonnull dereferenceable(16), <4 x i64> (<4 x i64>)*, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %5 = bitcast <4 x i64> (<4 x i64>)* %1 to <2 x i64> (<2 x i64>*, <2 x i64>, <2 x i64>)*
+; CHECK-NEXT: %6 = call <2 x i64> %5(<2 x i64>* nocapture nonnull dereferenceable(16) %4, <2 x i64> %2, <2 x i64> %3)
+; CHECK-NEXT: %7 = load <2 x i64>, <2 x i64>* %4, align 16
+; CHECK-NEXT: %8 = call <2 x i64> @fn_59_4xi64_call_arg(<2 x i64>* nocapture nonnull dereferenceable(16) %4, <4 x i64> (<4 x i64>)* bitcast (<2 x i64> (<2 x i64>*, <2 x i64>, <2 x i64>)* @fn_10_4xi64 to <4 x i64> (<4 x i64>)*), <2 x i64> %6, <2 x i64> %7)
+; CHECK-NEXT: %9 = load <2 x i64>, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %9, <2 x i64>* %0, align 16
+; CHECK-NEXT: ret <2 x i64> %8
+; CHECK-NEXT: }
+
+define <4 x i8*> @fn_60_4xi8ptr_call_arg(<4 x i8*> (<4 x i8*>)*, <4 x i8*>) {
+entry:
+ %2 = call <4 x i8*> %0(<4 x i8*> %1)
+ %3 = call <4 x i8*> @fn_60_4xi8ptr_call_arg(<4 x i8*> (<4 x i8*>)* @fn_11_4xi8ptr, <4 x i8*> %2)
+ ret <4 x i8*> %3
+}
+; CHECK-LABEL: define <4 x i8*> @fn_60_4xi8ptr_call_arg(<4 x i8*> (<4 x i8*>)*, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = call <4 x i8*> %0(<4 x i8*> %1)
+; CHECK-NEXT: %3 = call <4 x i8*> @fn_60_4xi8ptr_call_arg(<4 x i8*> (<4 x i8*>)* @fn_11_4xi8ptr, <4 x i8*> %2)
+; CHECK-NEXT: ret <4 x i8*> %3
+; CHECK-NEXT: }
+
+define <4 x float> @fn_61_4xfloat_call_arg(<4 x float> (<4 x float>)*, <4 x float>) {
+entry:
+ %2 = call <4 x float> %0(<4 x float> %1)
+ %3 = call <4 x float> @fn_61_4xfloat_call_arg(<4 x float> (<4 x float>)* @fn_12_4xfloat, <4 x float> %2)
+ ret <4 x float> %3
+}
+; CHECK-LABEL: define <4 x float> @fn_61_4xfloat_call_arg(<4 x float> (<4 x float>)*, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = call <4 x float> %0(<4 x float> %1)
+; CHECK-NEXT: %3 = call <4 x float> @fn_61_4xfloat_call_arg(<4 x float> (<4 x float>)* @fn_12_4xfloat, <4 x float> %2)
+; CHECK-NEXT: ret <4 x float> %3
+; CHECK-NEXT: }
+
+define <4 x double> @fn_62_4xdouble_call_arg(<4 x double> (<4 x double>)*, <4 x double>) {
+entry:
+ %2 = call <4 x double> %0(<4 x double> %1)
+ %3 = call <4 x double> @fn_62_4xdouble_call_arg(<4 x double> (<4 x double>)* @fn_13_4xdouble, <4 x double> %2)
+ ret <4 x double> %3
+}
+; CHECK-LABEL: define <2 x double> @fn_62_4xdouble_call_arg(<2 x double>* nocapture nonnull dereferenceable(16), <4 x double> (<4 x double>)*, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = alloca <2 x double>, align 16
+; CHECK-NEXT: %5 = bitcast <4 x double> (<4 x double>)* %1 to <2 x double> (<2 x double>*, <2 x double>, <2 x double>)*
+; CHECK-NEXT: %6 = call <2 x double> %5(<2 x double>* nocapture nonnull dereferenceable(16) %4, <2 x double> %2, <2 x double> %3)
+; CHECK-NEXT: %7 = load <2 x double>, <2 x double>* %4, align 16
+; CHECK-NEXT: %8 = call <2 x double> @fn_62_4xdouble_call_arg(<2 x double>* nocapture nonnull dereferenceable(16) %4, <4 x double> (<4 x double>)* bitcast (<2 x double> (<2 x double>*, <2 x double>, <2 x double>)* @fn_13_4xdouble to <4 x double> (<4 x double>)*), <2 x double> %6, <2 x double> %7)
+; CHECK-NEXT: %9 = load <2 x double>, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %9, <2 x double>* %0, align 16
+; CHECK-NEXT: ret <2 x double> %8
+; CHECK-NEXT: }
+
+define <6 x i8> @fn_63_6xi8_call_arg(<6 x i8> (<6 x i8>)*, <6 x i8>) {
+entry:
+ %2 = call <6 x i8> %0(<6 x i8> %1)
+ %3 = call <6 x i8> @fn_63_6xi8_call_arg(<6 x i8> (<6 x i8>)* @fn_14_6xi8, <6 x i8> %2)
+ ret <6 x i8> %3
+}
+; CHECK-LABEL: define <16 x i8> @fn_63_6xi8_call_arg(<6 x i8> (<6 x i8>)*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = bitcast <6 x i8> (<6 x i8>)* %0 to <16 x i8> (<16 x i8>)*
+; CHECK-NEXT: %3 = call <16 x i8> %2(<16 x i8> %1)
+; CHECK-NEXT: %4 = call <16 x i8> @fn_63_6xi8_call_arg(<6 x i8> (<6 x i8>)* bitcast (<16 x i8> (<16 x i8>)* @fn_14_6xi8 to <6 x i8> (<6 x i8>)*), <16 x i8> %3)
+; CHECK-NEXT: ret <16 x i8> %4
+; CHECK-NEXT: }
+
+define <6 x i16> @fn_64_6xi16_call_arg(<6 x i16> (<6 x i16>)*, <6 x i16>) {
+entry:
+ %2 = call <6 x i16> %0(<6 x i16> %1)
+ %3 = call <6 x i16> @fn_64_6xi16_call_arg(<6 x i16> (<6 x i16>)* @fn_15_6xi16, <6 x i16> %2)
+ ret <6 x i16> %3
+}
+; CHECK-LABEL: define <8 x i16> @fn_64_6xi16_call_arg(<6 x i16> (<6 x i16>)*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = bitcast <6 x i16> (<6 x i16>)* %0 to <8 x i16> (<8 x i16>)*
+; CHECK-NEXT: %3 = call <8 x i16> %2(<8 x i16> %1)
+; CHECK-NEXT: %4 = call <8 x i16> @fn_64_6xi16_call_arg(<6 x i16> (<6 x i16>)* bitcast (<8 x i16> (<8 x i16>)* @fn_15_6xi16 to <6 x i16> (<6 x i16>)*), <8 x i16> %3)
+; CHECK-NEXT: ret <8 x i16> %4
+; CHECK-NEXT: }
+
+define <6 x i32> @fn_65_6xi32_call_arg(<6 x i32> (<6 x i32>)*, <6 x i32>) {
+entry:
+ %2 = call <6 x i32> %0(<6 x i32> %1)
+ %3 = call <6 x i32> @fn_65_6xi32_call_arg(<6 x i32> (<6 x i32>)* @fn_16_6xi32, <6 x i32> %2)
+ ret <6 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @fn_65_6xi32_call_arg(<4 x i32>* nocapture nonnull dereferenceable(16), <6 x i32> (<6 x i32>)*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %5 = bitcast <6 x i32> (<6 x i32>)* %1 to <4 x i32> (<4 x i32>*, <4 x i32>, <4 x i32>)*
+; CHECK-NEXT: %6 = call <4 x i32> %5(<4 x i32>* nocapture nonnull dereferenceable(16) %4, <4 x i32> %2, <4 x i32> %3)
+; CHECK-NEXT: %7 = load <4 x i32>, <4 x i32>* %4, align 16
+; CHECK-NEXT: %8 = call <4 x i32> @fn_65_6xi32_call_arg(<4 x i32>* nocapture nonnull dereferenceable(16) %4, <6 x i32> (<6 x i32>)* bitcast (<4 x i32> (<4 x i32>*, <4 x i32>, <4 x i32>)* @fn_16_6xi32 to <6 x i32> (<6 x i32>)*), <4 x i32> %6, <4 x i32> %7)
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %4, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+; CHECK-NEXT: }
+
+define <6 x i64> @fn_66_6xi64_call_arg(<6 x i64> (<6 x i64>)*, <6 x i64>) {
+entry:
+ %2 = call <6 x i64> %0(<6 x i64> %1)
+ %3 = call <6 x i64> @fn_66_6xi64_call_arg(<6 x i64> (<6 x i64>)* @fn_17_6xi64, <6 x i64> %2)
+ ret <6 x i64> %3
+}
+; CHECK-LABEL: define <2 x i64> @fn_66_6xi64_call_arg(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <6 x i64> (<6 x i64>)*, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %7 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %8 = bitcast <6 x i64> (<6 x i64>)* %2 to <2 x i64> (<2 x i64>*, <2 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>)*
+; CHECK-NEXT: %9 = call <2 x i64> %8(<2 x i64>* nocapture nonnull dereferenceable(16) %6, <2 x i64>* nocapture nonnull dereferenceable(16) %7, <2 x i64> %3, <2 x i64> %4, <2 x i64> %5)
+; CHECK-NEXT: %10 = load <2 x i64>, <2 x i64>* %6, align 16
+; CHECK-NEXT: %11 = load <2 x i64>, <2 x i64>* %7, align 16
+; CHECK-NEXT: %12 = call <2 x i64> @fn_66_6xi64_call_arg(<2 x i64>* nocapture nonnull dereferenceable(16) %6, <2 x i64>* nocapture nonnull dereferenceable(16) %7, <6 x i64> (<6 x i64>)* bitcast (<2 x i64> (<2 x i64>*, <2 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>)* @fn_17_6xi64 to <6 x i64> (<6 x i64>)*), <2 x i64> %9, <2 x i64> %10, <2 x i64> %11)
+; CHECK-NEXT: %13 = load <2 x i64>, <2 x i64>* %6, align 16
+; CHECK-NEXT: %14 = load <2 x i64>, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %13, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %14, <2 x i64>* %1, align 16
+; CHECK-NEXT: ret <2 x i64> %12
+; CHECK-NEXT: }
+
+define <6 x i8*> @fn_67_6xi8ptr_call_arg(<6 x i8*> (<6 x i8*>)*, <6 x i8*>) {
+entry:
+ %2 = call <6 x i8*> %0(<6 x i8*> %1)
+ %3 = call <6 x i8*> @fn_67_6xi8ptr_call_arg(<6 x i8*> (<6 x i8*>)* @fn_18_6xi8ptr, <6 x i8*> %2)
+ ret <6 x i8*> %3
+}
+; CHECK-LABEL: define <4 x i8*> @fn_67_6xi8ptr_call_arg(<4 x i8*>* nocapture nonnull dereferenceable(16), <6 x i8*> (<6 x i8*>)*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %5 = bitcast <6 x i8*> (<6 x i8*>)* %1 to <4 x i8*> (<4 x i8*>*, <4 x i8*>, <4 x i8*>)*
+; CHECK-NEXT: %6 = call <4 x i8*> %5(<4 x i8*>* nocapture nonnull dereferenceable(16) %4, <4 x i8*> %2, <4 x i8*> %3)
+; CHECK-NEXT: %7 = load <4 x i8*>, <4 x i8*>* %4, align 16
+; CHECK-NEXT: %8 = call <4 x i8*> @fn_67_6xi8ptr_call_arg(<4 x i8*>* nocapture nonnull dereferenceable(16) %4, <6 x i8*> (<6 x i8*>)* bitcast (<4 x i8*> (<4 x i8*>*, <4 x i8*>, <4 x i8*>)* @fn_18_6xi8ptr to <6 x i8*> (<6 x i8*>)*), <4 x i8*> %6, <4 x i8*> %7)
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %4, align 16
+; CHECK-NEXT: store <4 x i8*> %9, <4 x i8*>* %0, align 16
+; CHECK-NEXT: ret <4 x i8*> %8
+; CHECK-NEXT: }
+
+define <6 x float> @fn_68_6xfloat_call_arg(<6 x float> (<6 x float>)*, <6 x float>) {
+entry:
+ %2 = call <6 x float> %0(<6 x float> %1)
+ %3 = call <6 x float> @fn_68_6xfloat_call_arg(<6 x float> (<6 x float>)* @fn_19_6xfloat, <6 x float> %2)
+ ret <6 x float> %3
+}
+; CHECK-LABEL: define <4 x float> @fn_68_6xfloat_call_arg(<4 x float>* nocapture nonnull dereferenceable(16), <6 x float> (<6 x float>)*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = alloca <4 x float>, align 16
+; CHECK-NEXT: %5 = bitcast <6 x float> (<6 x float>)* %1 to <4 x float> (<4 x float>*, <4 x float>, <4 x float>)*
+; CHECK-NEXT: %6 = call <4 x float> %5(<4 x float>* nocapture nonnull dereferenceable(16) %4, <4 x float> %2, <4 x float> %3)
+; CHECK-NEXT: %7 = load <4 x float>, <4 x float>* %4, align 16
+; CHECK-NEXT: %8 = call <4 x float> @fn_68_6xfloat_call_arg(<4 x float>* nocapture nonnull dereferenceable(16) %4, <6 x float> (<6 x float>)* bitcast (<4 x float> (<4 x float>*, <4 x float>, <4 x float>)* @fn_19_6xfloat to <6 x float> (<6 x float>)*), <4 x float> %6, <4 x float> %7)
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %4, align 16
+; CHECK-NEXT: store <4 x float> %9, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %8
+; CHECK-NEXT: }
+
+define <6 x double> @fn_69_6xdouble_call_arg(<6 x double> (<6 x double>)*, <6 x double>) {
+entry:
+ %2 = call <6 x double> %0(<6 x double> %1)
+ %3 = call <6 x double> @fn_69_6xdouble_call_arg(<6 x double> (<6 x double>)* @fn_20_6xdouble, <6 x double> %2)
+ ret <6 x double> %3
+}
+; CHECK-LABEL: define <2 x double> @fn_69_6xdouble_call_arg(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <6 x double> (<6 x double>)*, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = alloca <2 x double>, align 16
+; CHECK-NEXT: %7 = alloca <2 x double>, align 16
+; CHECK-NEXT: %8 = bitcast <6 x double> (<6 x double>)* %2 to <2 x double> (<2 x double>*, <2 x double>*, <2 x double>, <2 x double>, <2 x double>)*
+; CHECK-NEXT: %9 = call <2 x double> %8(<2 x double>* nocapture nonnull dereferenceable(16) %6, <2 x double>* nocapture nonnull dereferenceable(16) %7, <2 x double> %3, <2 x double> %4, <2 x double> %5)
+; CHECK-NEXT: %10 = load <2 x double>, <2 x double>* %6, align 16
+; CHECK-NEXT: %11 = load <2 x double>, <2 x double>* %7, align 16
+; CHECK-NEXT: %12 = call <2 x double> @fn_69_6xdouble_call_arg(<2 x double>* nocapture nonnull dereferenceable(16) %6, <2 x double>* nocapture nonnull dereferenceable(16) %7, <6 x double> (<6 x double>)* bitcast (<2 x double> (<2 x double>*, <2 x double>*, <2 x double>, <2 x double>, <2 x double>)* @fn_20_6xdouble to <6 x double> (<6 x double>)*), <2 x double> %9, <2 x double> %10, <2 x double> %11)
+; CHECK-NEXT: %13 = load <2 x double>, <2 x double>* %6, align 16
+; CHECK-NEXT: %14 = load <2 x double>, <2 x double>* %7, align 16
+; CHECK-NEXT: store <2 x double> %13, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %14, <2 x double>* %1, align 16
+; CHECK-NEXT: ret <2 x double> %12
+; CHECK-NEXT: }
+
+define <8 x i8> @fn_70_8xi8_call_arg(<8 x i8> (<8 x i8>)*, <8 x i8>) {
+entry:
+ %2 = call <8 x i8> %0(<8 x i8> %1)
+ %3 = call <8 x i8> @fn_70_8xi8_call_arg(<8 x i8> (<8 x i8>)* @fn_21_8xi8, <8 x i8> %2)
+ ret <8 x i8> %3
+}
+; CHECK-LABEL: define <16 x i8> @fn_70_8xi8_call_arg(<8 x i8> (<8 x i8>)*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = bitcast <8 x i8> (<8 x i8>)* %0 to <16 x i8> (<16 x i8>)*
+; CHECK-NEXT: %3 = call <16 x i8> %2(<16 x i8> %1)
+; CHECK-NEXT: %4 = call <16 x i8> @fn_70_8xi8_call_arg(<8 x i8> (<8 x i8>)* bitcast (<16 x i8> (<16 x i8>)* @fn_21_8xi8 to <8 x i8> (<8 x i8>)*), <16 x i8> %3)
+; CHECK-NEXT: ret <16 x i8> %4
+; CHECK-NEXT: }
+
+define <8 x i16> @fn_71_8xi16_call_arg(<8 x i16> (<8 x i16>)*, <8 x i16>) {
+entry:
+ %2 = call <8 x i16> %0(<8 x i16> %1)
+ %3 = call <8 x i16> @fn_71_8xi16_call_arg(<8 x i16> (<8 x i16>)* @fn_22_8xi16, <8 x i16> %2)
+ ret <8 x i16> %3
+}
+; CHECK-LABEL: define <8 x i16> @fn_71_8xi16_call_arg(<8 x i16> (<8 x i16>)*, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = call <8 x i16> %0(<8 x i16> %1)
+; CHECK-NEXT: %3 = call <8 x i16> @fn_71_8xi16_call_arg(<8 x i16> (<8 x i16>)* @fn_22_8xi16, <8 x i16> %2)
+; CHECK-NEXT: ret <8 x i16> %3
+; CHECK-NEXT: }
+
+define <8 x i32> @fn_72_8xi32_call_arg(<8 x i32> (<8 x i32>)*, <8 x i32>) {
+entry:
+ %2 = call <8 x i32> %0(<8 x i32> %1)
+ %3 = call <8 x i32> @fn_72_8xi32_call_arg(<8 x i32> (<8 x i32>)* @fn_23_8xi32, <8 x i32> %2)
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @fn_72_8xi32_call_arg(<4 x i32>* nocapture nonnull dereferenceable(16), <8 x i32> (<8 x i32>)*, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %5 = bitcast <8 x i32> (<8 x i32>)* %1 to <4 x i32> (<4 x i32>*, <4 x i32>, <4 x i32>)*
+; CHECK-NEXT: %6 = call <4 x i32> %5(<4 x i32>* nocapture nonnull dereferenceable(16) %4, <4 x i32> %2, <4 x i32> %3)
+; CHECK-NEXT: %7 = load <4 x i32>, <4 x i32>* %4, align 16
+; CHECK-NEXT: %8 = call <4 x i32> @fn_72_8xi32_call_arg(<4 x i32>* nocapture nonnull dereferenceable(16) %4, <8 x i32> (<8 x i32>)* bitcast (<4 x i32> (<4 x i32>*, <4 x i32>, <4 x i32>)* @fn_23_8xi32 to <8 x i32> (<8 x i32>)*), <4 x i32> %6, <4 x i32> %7)
+; CHECK-NEXT: %9 = load <4 x i32>, <4 x i32>* %4, align 16
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+; CHECK-NEXT: }
+
+define <8 x i64> @fn_73_8xi64_call_arg(<8 x i64> (<8 x i64>)*, <8 x i64>) {
+entry:
+ %2 = call <8 x i64> %0(<8 x i64> %1)
+ %3 = call <8 x i64> @fn_73_8xi64_call_arg(<8 x i64> (<8 x i64>)* @fn_24_8xi64, <8 x i64> %2)
+ ret <8 x i64> %3
+}
+; CHECK-LABEL: define <2 x i64> @fn_73_8xi64_call_arg(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <8 x i64> (<8 x i64>)*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %8 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %9 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %10 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %11 = bitcast <8 x i64> (<8 x i64>)* %3 to <2 x i64> (<2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)*
+; CHECK-NEXT: %12 = call <2 x i64> %11(<2 x i64>* nocapture nonnull dereferenceable(16) %8, <2 x i64>* nocapture nonnull dereferenceable(16) %9, <2 x i64>* nocapture nonnull dereferenceable(16) %10, <2 x i64> %4, <2 x i64> %5, <2 x i64> %6, <2 x i64> %7)
+; CHECK-NEXT: %13 = load <2 x i64>, <2 x i64>* %8, align 16
+; CHECK-NEXT: %14 = load <2 x i64>, <2 x i64>* %9, align 16
+; CHECK-NEXT: %15 = load <2 x i64>, <2 x i64>* %10, align 16
+; CHECK-NEXT: %16 = call <2 x i64> @fn_73_8xi64_call_arg(<2 x i64>* nocapture nonnull dereferenceable(16) %8, <2 x i64>* nocapture nonnull dereferenceable(16) %9, <2 x i64>* nocapture nonnull dereferenceable(16) %10, <8 x i64> (<8 x i64>)* bitcast (<2 x i64> (<2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)* @fn_24_8xi64 to <8 x i64> (<8 x i64>)*), <2 x i64> %12, <2 x i64> %13, <2 x i64> %14, <2 x i64> %15)
+; CHECK-NEXT: %17 = load <2 x i64>, <2 x i64>* %8, align 16
+; CHECK-NEXT: %18 = load <2 x i64>, <2 x i64>* %9, align 16
+; CHECK-NEXT: %19 = load <2 x i64>, <2 x i64>* %10, align 16
+; CHECK-NEXT: store <2 x i64> %17, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %18, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %19, <2 x i64>* %2, align 16
+; CHECK-NEXT: ret <2 x i64> %16
+; CHECK-NEXT: }
+
+define <8 x i8*> @fn_74_8xi8ptr_call_arg(<8 x i8*> (<8 x i8*>)*, <8 x i8*>) {
+entry:
+ %2 = call <8 x i8*> %0(<8 x i8*> %1)
+ %3 = call <8 x i8*> @fn_74_8xi8ptr_call_arg(<8 x i8*> (<8 x i8*>)* @fn_25_8xi8ptr, <8 x i8*> %2)
+ ret <8 x i8*> %3
+}
+; CHECK-LABEL: define <4 x i8*> @fn_74_8xi8ptr_call_arg(<4 x i8*>* nocapture nonnull dereferenceable(16), <8 x i8*> (<8 x i8*>)*, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %5 = bitcast <8 x i8*> (<8 x i8*>)* %1 to <4 x i8*> (<4 x i8*>*, <4 x i8*>, <4 x i8*>)*
+; CHECK-NEXT: %6 = call <4 x i8*> %5(<4 x i8*>* nocapture nonnull dereferenceable(16) %4, <4 x i8*> %2, <4 x i8*> %3)
+; CHECK-NEXT: %7 = load <4 x i8*>, <4 x i8*>* %4, align 16
+; CHECK-NEXT: %8 = call <4 x i8*> @fn_74_8xi8ptr_call_arg(<4 x i8*>* nocapture nonnull dereferenceable(16) %4, <8 x i8*> (<8 x i8*>)* bitcast (<4 x i8*> (<4 x i8*>*, <4 x i8*>, <4 x i8*>)* @fn_25_8xi8ptr to <8 x i8*> (<8 x i8*>)*), <4 x i8*> %6, <4 x i8*> %7)
+; CHECK-NEXT: %9 = load <4 x i8*>, <4 x i8*>* %4, align 16
+; CHECK-NEXT: store <4 x i8*> %9, <4 x i8*>* %0, align 16
+; CHECK-NEXT: ret <4 x i8*> %8
+; CHECK-NEXT: }
+
+define <8 x float> @fn_75_8xfloat_call_arg(<8 x float> (<8 x float>)*, <8 x float>) {
+entry:
+ %2 = call <8 x float> %0(<8 x float> %1)
+ %3 = call <8 x float> @fn_75_8xfloat_call_arg(<8 x float> (<8 x float>)* @fn_26_8xfloat, <8 x float> %2)
+ ret <8 x float> %3
+}
+; CHECK-LABEL: define <4 x float> @fn_75_8xfloat_call_arg(<4 x float>* nocapture nonnull dereferenceable(16), <8 x float> (<8 x float>)*, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = alloca <4 x float>, align 16
+; CHECK-NEXT: %5 = bitcast <8 x float> (<8 x float>)* %1 to <4 x float> (<4 x float>*, <4 x float>, <4 x float>)*
+; CHECK-NEXT: %6 = call <4 x float> %5(<4 x float>* nocapture nonnull dereferenceable(16) %4, <4 x float> %2, <4 x float> %3)
+; CHECK-NEXT: %7 = load <4 x float>, <4 x float>* %4, align 16
+; CHECK-NEXT: %8 = call <4 x float> @fn_75_8xfloat_call_arg(<4 x float>* nocapture nonnull dereferenceable(16) %4, <8 x float> (<8 x float>)* bitcast (<4 x float> (<4 x float>*, <4 x float>, <4 x float>)* @fn_26_8xfloat to <8 x float> (<8 x float>)*), <4 x float> %6, <4 x float> %7)
+; CHECK-NEXT: %9 = load <4 x float>, <4 x float>* %4, align 16
+; CHECK-NEXT: store <4 x float> %9, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %8
+; CHECK-NEXT: }
+
+define <8 x double> @fn_76_8xdouble_call_arg(<8 x double> (<8 x double>)*, <8 x double>) {
+entry:
+ %2 = call <8 x double> %0(<8 x double> %1)
+ %3 = call <8 x double> @fn_76_8xdouble_call_arg(<8 x double> (<8 x double>)* @fn_27_8xdouble, <8 x double> %2)
+ ret <8 x double> %3
+}
+; CHECK-LABEL: define <2 x double> @fn_76_8xdouble_call_arg(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <8 x double> (<8 x double>)*, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %8 = alloca <2 x double>, align 16
+; CHECK-NEXT: %9 = alloca <2 x double>, align 16
+; CHECK-NEXT: %10 = alloca <2 x double>, align 16
+; CHECK-NEXT: %11 = bitcast <8 x double> (<8 x double>)* %3 to <2 x double> (<2 x double>*, <2 x double>*, <2 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>)*
+; CHECK-NEXT: %12 = call <2 x double> %11(<2 x double>* nocapture nonnull dereferenceable(16) %8, <2 x double>* nocapture nonnull dereferenceable(16) %9, <2 x double>* nocapture nonnull dereferenceable(16) %10, <2 x double> %4, <2 x double> %5, <2 x double> %6, <2 x double> %7)
+; CHECK-NEXT: %13 = load <2 x double>, <2 x double>* %8, align 16
+; CHECK-NEXT: %14 = load <2 x double>, <2 x double>* %9, align 16
+; CHECK-NEXT: %15 = load <2 x double>, <2 x double>* %10, align 16
+; CHECK-NEXT: %16 = call <2 x double> @fn_76_8xdouble_call_arg(<2 x double>* nocapture nonnull dereferenceable(16) %8, <2 x double>* nocapture nonnull dereferenceable(16) %9, <2 x double>* nocapture nonnull dereferenceable(16) %10, <8 x double> (<8 x double>)* bitcast (<2 x double> (<2 x double>*, <2 x double>*, <2 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>)* @fn_27_8xdouble to <8 x double> (<8 x double>)*), <2 x double> %12, <2 x double> %13, <2 x double> %14, <2 x double> %15)
+; CHECK-NEXT: %17 = load <2 x double>, <2 x double>* %8, align 16
+; CHECK-NEXT: %18 = load <2 x double>, <2 x double>* %9, align 16
+; CHECK-NEXT: %19 = load <2 x double>, <2 x double>* %10, align 16
+; CHECK-NEXT: store <2 x double> %17, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %18, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %19, <2 x double>* %2, align 16
+; CHECK-NEXT: ret <2 x double> %16
+; CHECK-NEXT: }
+
+define <12 x i8> @fn_77_12xi8_call_arg(<12 x i8> (<12 x i8>)*, <12 x i8>) {
+entry:
+ %2 = call <12 x i8> %0(<12 x i8> %1)
+ %3 = call <12 x i8> @fn_77_12xi8_call_arg(<12 x i8> (<12 x i8>)* @fn_28_12xi8, <12 x i8> %2)
+ ret <12 x i8> %3
+}
+; CHECK-LABEL: define <16 x i8> @fn_77_12xi8_call_arg(<12 x i8> (<12 x i8>)*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = bitcast <12 x i8> (<12 x i8>)* %0 to <16 x i8> (<16 x i8>)*
+; CHECK-NEXT: %3 = call <16 x i8> %2(<16 x i8> %1)
+; CHECK-NEXT: %4 = call <16 x i8> @fn_77_12xi8_call_arg(<12 x i8> (<12 x i8>)* bitcast (<16 x i8> (<16 x i8>)* @fn_28_12xi8 to <12 x i8> (<12 x i8>)*), <16 x i8> %3)
+; CHECK-NEXT: ret <16 x i8> %4
+; CHECK-NEXT: }
+
+define <12 x i16> @fn_78_12xi16_call_arg(<12 x i16> (<12 x i16>)*, <12 x i16>) {
+entry:
+ %2 = call <12 x i16> %0(<12 x i16> %1)
+ %3 = call <12 x i16> @fn_78_12xi16_call_arg(<12 x i16> (<12 x i16>)* @fn_29_12xi16, <12 x i16> %2)
+ ret <12 x i16> %3
+}
+; CHECK-LABEL: define <8 x i16> @fn_78_12xi16_call_arg(<8 x i16>* nocapture nonnull dereferenceable(16), <12 x i16> (<12 x i16>)*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = alloca <8 x i16>, align 16
+; CHECK-NEXT: %5 = bitcast <12 x i16> (<12 x i16>)* %1 to <8 x i16> (<8 x i16>*, <8 x i16>, <8 x i16>)*
+; CHECK-NEXT: %6 = call <8 x i16> %5(<8 x i16>* nocapture nonnull dereferenceable(16) %4, <8 x i16> %2, <8 x i16> %3)
+; CHECK-NEXT: %7 = load <8 x i16>, <8 x i16>* %4, align 16
+; CHECK-NEXT: %8 = call <8 x i16> @fn_78_12xi16_call_arg(<8 x i16>* nocapture nonnull dereferenceable(16) %4, <12 x i16> (<12 x i16>)* bitcast (<8 x i16> (<8 x i16>*, <8 x i16>, <8 x i16>)* @fn_29_12xi16 to <12 x i16> (<12 x i16>)*), <8 x i16> %6, <8 x i16> %7)
+; CHECK-NEXT: %9 = load <8 x i16>, <8 x i16>* %4, align 16
+; CHECK-NEXT: store <8 x i16> %9, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %8
+; CHECK-NEXT: }
+
+define <12 x i32> @fn_79_12xi32_call_arg(<12 x i32> (<12 x i32>)*, <12 x i32>) {
+entry:
+ %2 = call <12 x i32> %0(<12 x i32> %1)
+ %3 = call <12 x i32> @fn_79_12xi32_call_arg(<12 x i32> (<12 x i32>)* @fn_30_12xi32, <12 x i32> %2)
+ ret <12 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @fn_79_12xi32_call_arg(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <12 x i32> (<12 x i32>)*, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %7 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %8 = bitcast <12 x i32> (<12 x i32>)* %2 to <4 x i32> (<4 x i32>*, <4 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>)*
+; CHECK-NEXT: %9 = call <4 x i32> %8(<4 x i32>* nocapture nonnull dereferenceable(16) %6, <4 x i32>* nocapture nonnull dereferenceable(16) %7, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5)
+; CHECK-NEXT: %10 = load <4 x i32>, <4 x i32>* %6, align 16
+; CHECK-NEXT: %11 = load <4 x i32>, <4 x i32>* %7, align 16
+; CHECK-NEXT: %12 = call <4 x i32> @fn_79_12xi32_call_arg(<4 x i32>* nocapture nonnull dereferenceable(16) %6, <4 x i32>* nocapture nonnull dereferenceable(16) %7, <12 x i32> (<12 x i32>)* bitcast (<4 x i32> (<4 x i32>*, <4 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>)* @fn_30_12xi32 to <12 x i32> (<12 x i32>)*), <4 x i32> %9, <4 x i32> %10, <4 x i32> %11)
+; CHECK-NEXT: %13 = load <4 x i32>, <4 x i32>* %6, align 16
+; CHECK-NEXT: %14 = load <4 x i32>, <4 x i32>* %7, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %12
+; CHECK-NEXT: }
+
+define <12 x i64> @fn_80_12xi64_call_arg(<12 x i64> (<12 x i64>)*, <12 x i64>) {
+entry:
+ %2 = call <12 x i64> %0(<12 x i64> %1)
+ %3 = call <12 x i64> @fn_80_12xi64_call_arg(<12 x i64> (<12 x i64>)* @fn_31_12xi64, <12 x i64> %2)
+ ret <12 x i64> %3
+}
+; CHECK-LABEL: define <2 x i64> @fn_80_12xi64_call_arg(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <12 x i64> (<12 x i64>)*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %12 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %13 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %14 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %15 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %16 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %17 = bitcast <12 x i64> (<12 x i64>)* %5 to <2 x i64> (<2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)*
+; CHECK-NEXT: %18 = call <2 x i64> %17(<2 x i64>* nocapture nonnull dereferenceable(16) %12, <2 x i64>* nocapture nonnull dereferenceable(16) %13, <2 x i64>* nocapture nonnull dereferenceable(16) %14, <2 x i64>* nocapture nonnull dereferenceable(16) %15, <2 x i64>* nocapture nonnull dereferenceable(16) %16, <2 x i64> %6, <2 x i64> %7, <2 x i64> %8, <2 x i64> %9, <2 x i64> %10, <2 x i64> %11)
+; CHECK-NEXT: %19 = load <2 x i64>, <2 x i64>* %12, align 16
+; CHECK-NEXT: %20 = load <2 x i64>, <2 x i64>* %13, align 16
+; CHECK-NEXT: %21 = load <2 x i64>, <2 x i64>* %14, align 16
+; CHECK-NEXT: %22 = load <2 x i64>, <2 x i64>* %15, align 16
+; CHECK-NEXT: %23 = load <2 x i64>, <2 x i64>* %16, align 16
+; CHECK-NEXT: %24 = call <2 x i64> @fn_80_12xi64_call_arg(<2 x i64>* nocapture nonnull dereferenceable(16) %12, <2 x i64>* nocapture nonnull dereferenceable(16) %13, <2 x i64>* nocapture nonnull dereferenceable(16) %14, <2 x i64>* nocapture nonnull dereferenceable(16) %15, <2 x i64>* nocapture nonnull dereferenceable(16) %16, <12 x i64> (<12 x i64>)* bitcast (<2 x i64> (<2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)* @fn_31_12xi64 to <12 x i64> (<12 x i64>)*), <2 x i64> %18, <2 x i64> %19, <2 x i64> %20, <2 x i64> %21, <2 x i64> %22, <2 x i64> %23)
+; CHECK-NEXT: %25 = load <2 x i64>, <2 x i64>* %12, align 16
+; CHECK-NEXT: %26 = load <2 x i64>, <2 x i64>* %13, align 16
+; CHECK-NEXT: %27 = load <2 x i64>, <2 x i64>* %14, align 16
+; CHECK-NEXT: %28 = load <2 x i64>, <2 x i64>* %15, align 16
+; CHECK-NEXT: %29 = load <2 x i64>, <2 x i64>* %16, align 16
+; CHECK-NEXT: store <2 x i64> %25, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %27, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %28, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %4, align 16
+; CHECK-NEXT: ret <2 x i64> %24
+; CHECK-NEXT: }
+
+define <12 x i8*> @fn_81_12xi8ptr_call_arg(<12 x i8*> (<12 x i8*>)*, <12 x i8*>) {
+entry:
+ %2 = call <12 x i8*> %0(<12 x i8*> %1)
+ %3 = call <12 x i8*> @fn_81_12xi8ptr_call_arg(<12 x i8*> (<12 x i8*>)* @fn_32_12xi8ptr, <12 x i8*> %2)
+ ret <12 x i8*> %3
+}
+; CHECK-LABEL: define <4 x i8*> @fn_81_12xi8ptr_call_arg(<4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <12 x i8*> (<12 x i8*>)*, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %7 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %8 = bitcast <12 x i8*> (<12 x i8*>)* %2 to <4 x i8*> (<4 x i8*>*, <4 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>)*
+; CHECK-NEXT: %9 = call <4 x i8*> %8(<4 x i8*>* nocapture nonnull dereferenceable(16) %6, <4 x i8*>* nocapture nonnull dereferenceable(16) %7, <4 x i8*> %3, <4 x i8*> %4, <4 x i8*> %5)
+; CHECK-NEXT: %10 = load <4 x i8*>, <4 x i8*>* %6, align 16
+; CHECK-NEXT: %11 = load <4 x i8*>, <4 x i8*>* %7, align 16
+; CHECK-NEXT: %12 = call <4 x i8*> @fn_81_12xi8ptr_call_arg(<4 x i8*>* nocapture nonnull dereferenceable(16) %6, <4 x i8*>* nocapture nonnull dereferenceable(16) %7, <12 x i8*> (<12 x i8*>)* bitcast (<4 x i8*> (<4 x i8*>*, <4 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>)* @fn_32_12xi8ptr to <12 x i8*> (<12 x i8*>)*), <4 x i8*> %9, <4 x i8*> %10, <4 x i8*> %11)
+; CHECK-NEXT: %13 = load <4 x i8*>, <4 x i8*>* %6, align 16
+; CHECK-NEXT: %14 = load <4 x i8*>, <4 x i8*>* %7, align 16
+; CHECK-NEXT: store <4 x i8*> %13, <4 x i8*>* %0, align 16
+; CHECK-NEXT: store <4 x i8*> %14, <4 x i8*>* %1, align 16
+; CHECK-NEXT: ret <4 x i8*> %12
+; CHECK-NEXT: }
+
+define <12 x float> @fn_82_12xfloat_call_arg(<12 x float> (<12 x float>)*, <12 x float>) {
+entry:
+ %2 = call <12 x float> %0(<12 x float> %1)
+ %3 = call <12 x float> @fn_82_12xfloat_call_arg(<12 x float> (<12 x float>)* @fn_33_12xfloat, <12 x float> %2)
+ ret <12 x float> %3
+}
+; CHECK-LABEL: define <4 x float> @fn_82_12xfloat_call_arg(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <12 x float> (<12 x float>)*, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = alloca <4 x float>, align 16
+; CHECK-NEXT: %7 = alloca <4 x float>, align 16
+; CHECK-NEXT: %8 = bitcast <12 x float> (<12 x float>)* %2 to <4 x float> (<4 x float>*, <4 x float>*, <4 x float>, <4 x float>, <4 x float>)*
+; CHECK-NEXT: %9 = call <4 x float> %8(<4 x float>* nocapture nonnull dereferenceable(16) %6, <4 x float>* nocapture nonnull dereferenceable(16) %7, <4 x float> %3, <4 x float> %4, <4 x float> %5)
+; CHECK-NEXT: %10 = load <4 x float>, <4 x float>* %6, align 16
+; CHECK-NEXT: %11 = load <4 x float>, <4 x float>* %7, align 16
+; CHECK-NEXT: %12 = call <4 x float> @fn_82_12xfloat_call_arg(<4 x float>* nocapture nonnull dereferenceable(16) %6, <4 x float>* nocapture nonnull dereferenceable(16) %7, <12 x float> (<12 x float>)* bitcast (<4 x float> (<4 x float>*, <4 x float>*, <4 x float>, <4 x float>, <4 x float>)* @fn_33_12xfloat to <12 x float> (<12 x float>)*), <4 x float> %9, <4 x float> %10, <4 x float> %11)
+; CHECK-NEXT: %13 = load <4 x float>, <4 x float>* %6, align 16
+; CHECK-NEXT: %14 = load <4 x float>, <4 x float>* %7, align 16
+; CHECK-NEXT: store <4 x float> %13, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %14, <4 x float>* %1, align 16
+; CHECK-NEXT: ret <4 x float> %12
+; CHECK-NEXT: }
+
+define <12 x double> @fn_83_12xdouble_call_arg(<12 x double> (<12 x double>)*, <12 x double>) {
+entry:
+ %2 = call <12 x double> %0(<12 x double> %1)
+ %3 = call <12 x double> @fn_83_12xdouble_call_arg(<12 x double> (<12 x double>)* @fn_34_12xdouble, <12 x double> %2)
+ ret <12 x double> %3
+}
+; CHECK-LABEL: define <2 x double> @fn_83_12xdouble_call_arg(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <12 x double> (<12 x double>)*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %12 = alloca <2 x double>, align 16
+; CHECK-NEXT: %13 = alloca <2 x double>, align 16
+; CHECK-NEXT: %14 = alloca <2 x double>, align 16
+; CHECK-NEXT: %15 = alloca <2 x double>, align 16
+; CHECK-NEXT: %16 = alloca <2 x double>, align 16
+; CHECK-NEXT: %17 = bitcast <12 x double> (<12 x double>)* %5 to <2 x double> (<2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)*
+; CHECK-NEXT: %18 = call <2 x double> %17(<2 x double>* nocapture nonnull dereferenceable(16) %12, <2 x double>* nocapture nonnull dereferenceable(16) %13, <2 x double>* nocapture nonnull dereferenceable(16) %14, <2 x double>* nocapture nonnull dereferenceable(16) %15, <2 x double>* nocapture nonnull dereferenceable(16) %16, <2 x double> %6, <2 x double> %7, <2 x double> %8, <2 x double> %9, <2 x double> %10, <2 x double> %11)
+; CHECK-NEXT: %19 = load <2 x double>, <2 x double>* %12, align 16
+; CHECK-NEXT: %20 = load <2 x double>, <2 x double>* %13, align 16
+; CHECK-NEXT: %21 = load <2 x double>, <2 x double>* %14, align 16
+; CHECK-NEXT: %22 = load <2 x double>, <2 x double>* %15, align 16
+; CHECK-NEXT: %23 = load <2 x double>, <2 x double>* %16, align 16
+; CHECK-NEXT: %24 = call <2 x double> @fn_83_12xdouble_call_arg(<2 x double>* nocapture nonnull dereferenceable(16) %12, <2 x double>* nocapture nonnull dereferenceable(16) %13, <2 x double>* nocapture nonnull dereferenceable(16) %14, <2 x double>* nocapture nonnull dereferenceable(16) %15, <2 x double>* nocapture nonnull dereferenceable(16) %16, <12 x double> (<12 x double>)* bitcast (<2 x double> (<2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)* @fn_34_12xdouble to <12 x double> (<12 x double>)*), <2 x double> %18, <2 x double> %19, <2 x double> %20, <2 x double> %21, <2 x double> %22, <2 x double> %23)
+; CHECK-NEXT: %25 = load <2 x double>, <2 x double>* %12, align 16
+; CHECK-NEXT: %26 = load <2 x double>, <2 x double>* %13, align 16
+; CHECK-NEXT: %27 = load <2 x double>, <2 x double>* %14, align 16
+; CHECK-NEXT: %28 = load <2 x double>, <2 x double>* %15, align 16
+; CHECK-NEXT: %29 = load <2 x double>, <2 x double>* %16, align 16
+; CHECK-NEXT: store <2 x double> %25, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %26, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %27, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %28, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %29, <2 x double>* %4, align 16
+; CHECK-NEXT: ret <2 x double> %24
+; CHECK-NEXT: }
+
+define <16 x i8> @fn_84_16xi8_call_arg(<16 x i8> (<16 x i8>)*, <16 x i8>) {
+entry:
+ %2 = call <16 x i8> %0(<16 x i8> %1)
+ %3 = call <16 x i8> @fn_84_16xi8_call_arg(<16 x i8> (<16 x i8>)* @fn_35_16xi8, <16 x i8> %2)
+ ret <16 x i8> %3
+}
+; CHECK-LABEL: define <16 x i8> @fn_84_16xi8_call_arg(<16 x i8> (<16 x i8>)*, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %2 = call <16 x i8> %0(<16 x i8> %1)
+; CHECK-NEXT: %3 = call <16 x i8> @fn_84_16xi8_call_arg(<16 x i8> (<16 x i8>)* @fn_35_16xi8, <16 x i8> %2)
+; CHECK-NEXT: ret <16 x i8> %3
+; CHECK-NEXT: }
+
+define <16 x i16> @fn_85_16xi16_call_arg(<16 x i16> (<16 x i16>)*, <16 x i16>) {
+entry:
+ %2 = call <16 x i16> %0(<16 x i16> %1)
+ %3 = call <16 x i16> @fn_85_16xi16_call_arg(<16 x i16> (<16 x i16>)* @fn_36_16xi16, <16 x i16> %2)
+ ret <16 x i16> %3
+}
+; CHECK-LABEL: define <8 x i16> @fn_85_16xi16_call_arg(<8 x i16>* nocapture nonnull dereferenceable(16), <16 x i16> (<16 x i16>)*, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = alloca <8 x i16>, align 16
+; CHECK-NEXT: %5 = bitcast <16 x i16> (<16 x i16>)* %1 to <8 x i16> (<8 x i16>*, <8 x i16>, <8 x i16>)*
+; CHECK-NEXT: %6 = call <8 x i16> %5(<8 x i16>* nocapture nonnull dereferenceable(16) %4, <8 x i16> %2, <8 x i16> %3)
+; CHECK-NEXT: %7 = load <8 x i16>, <8 x i16>* %4, align 16
+; CHECK-NEXT: %8 = call <8 x i16> @fn_85_16xi16_call_arg(<8 x i16>* nocapture nonnull dereferenceable(16) %4, <16 x i16> (<16 x i16>)* bitcast (<8 x i16> (<8 x i16>*, <8 x i16>, <8 x i16>)* @fn_36_16xi16 to <16 x i16> (<16 x i16>)*), <8 x i16> %6, <8 x i16> %7)
+; CHECK-NEXT: %9 = load <8 x i16>, <8 x i16>* %4, align 16
+; CHECK-NEXT: store <8 x i16> %9, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %8
+; CHECK-NEXT: }
+
+define <16 x i32> @fn_86_16xi32_call_arg(<16 x i32> (<16 x i32>)*, <16 x i32>) {
+entry:
+ %2 = call <16 x i32> %0(<16 x i32> %1)
+ %3 = call <16 x i32> @fn_86_16xi32_call_arg(<16 x i32> (<16 x i32>)* @fn_37_16xi32, <16 x i32> %2)
+ ret <16 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @fn_86_16xi32_call_arg(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <16 x i32> (<16 x i32>)*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %8 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %9 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %10 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %11 = bitcast <16 x i32> (<16 x i32>)* %3 to <4 x i32> (<4 x i32>*, <4 x i32>*, <4 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)*
+; CHECK-NEXT: %12 = call <4 x i32> %11(<4 x i32>* nocapture nonnull dereferenceable(16) %8, <4 x i32>* nocapture nonnull dereferenceable(16) %9, <4 x i32>* nocapture nonnull dereferenceable(16) %10, <4 x i32> %4, <4 x i32> %5, <4 x i32> %6, <4 x i32> %7)
+; CHECK-NEXT: %13 = load <4 x i32>, <4 x i32>* %8, align 16
+; CHECK-NEXT: %14 = load <4 x i32>, <4 x i32>* %9, align 16
+; CHECK-NEXT: %15 = load <4 x i32>, <4 x i32>* %10, align 16
+; CHECK-NEXT: %16 = call <4 x i32> @fn_86_16xi32_call_arg(<4 x i32>* nocapture nonnull dereferenceable(16) %8, <4 x i32>* nocapture nonnull dereferenceable(16) %9, <4 x i32>* nocapture nonnull dereferenceable(16) %10, <16 x i32> (<16 x i32>)* bitcast (<4 x i32> (<4 x i32>*, <4 x i32>*, <4 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)* @fn_37_16xi32 to <16 x i32> (<16 x i32>)*), <4 x i32> %12, <4 x i32> %13, <4 x i32> %14, <4 x i32> %15)
+; CHECK-NEXT: %17 = load <4 x i32>, <4 x i32>* %8, align 16
+; CHECK-NEXT: %18 = load <4 x i32>, <4 x i32>* %9, align 16
+; CHECK-NEXT: %19 = load <4 x i32>, <4 x i32>* %10, align 16
+; CHECK-NEXT: store <4 x i32> %17, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %18, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %19, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+; CHECK-NEXT: }
+
+define <16 x i64> @fn_87_16xi64_call_arg(<16 x i64> (<16 x i64>)*, <16 x i64>) {
+entry:
+ %2 = call <16 x i64> %0(<16 x i64> %1)
+ %3 = call <16 x i64> @fn_87_16xi64_call_arg(<16 x i64> (<16 x i64>)* @fn_38_16xi64, <16 x i64> %2)
+ ret <16 x i64> %3
+}
+; CHECK-LABEL: define <2 x i64> @fn_87_16xi64_call_arg(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <16 x i64> (<16 x i64>)*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %16 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %17 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %18 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %19 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %20 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %21 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %22 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %23 = bitcast <16 x i64> (<16 x i64>)* %7 to <2 x i64> (<2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)*
+; CHECK-NEXT: %24 = call <2 x i64> %23(<2 x i64>* nocapture nonnull dereferenceable(16) %16, <2 x i64>* nocapture nonnull dereferenceable(16) %17, <2 x i64>* nocapture nonnull dereferenceable(16) %18, <2 x i64>* nocapture nonnull dereferenceable(16) %19, <2 x i64>* nocapture nonnull dereferenceable(16) %20, <2 x i64>* nocapture nonnull dereferenceable(16) %21, <2 x i64>* nocapture nonnull dereferenceable(16) %22, <2 x i64> %8, <2 x i64> %9, <2 x i64> %10, <2 x i64> %11, <2 x i64> %12, <2 x i64> %13, <2 x i64> %14, <2 x i64> %15)
+; CHECK-NEXT: %25 = load <2 x i64>, <2 x i64>* %16, align 16
+; CHECK-NEXT: %26 = load <2 x i64>, <2 x i64>* %17, align 16
+; CHECK-NEXT: %27 = load <2 x i64>, <2 x i64>* %18, align 16
+; CHECK-NEXT: %28 = load <2 x i64>, <2 x i64>* %19, align 16
+; CHECK-NEXT: %29 = load <2 x i64>, <2 x i64>* %20, align 16
+; CHECK-NEXT: %30 = load <2 x i64>, <2 x i64>* %21, align 16
+; CHECK-NEXT: %31 = load <2 x i64>, <2 x i64>* %22, align 16
+; CHECK-NEXT: %32 = call <2 x i64> @fn_87_16xi64_call_arg(<2 x i64>* nocapture nonnull dereferenceable(16) %16, <2 x i64>* nocapture nonnull dereferenceable(16) %17, <2 x i64>* nocapture nonnull dereferenceable(16) %18, <2 x i64>* nocapture nonnull dereferenceable(16) %19, <2 x i64>* nocapture nonnull dereferenceable(16) %20, <2 x i64>* nocapture nonnull dereferenceable(16) %21, <2 x i64>* nocapture nonnull dereferenceable(16) %22, <16 x i64> (<16 x i64>)* bitcast (<2 x i64> (<2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)* @fn_38_16xi64 to <16 x i64> (<16 x i64>)*), <2 x i64> %24, <2 x i64> %25, <2 x i64> %26, <2 x i64> %27, <2 x i64> %28, <2 x i64> %29, <2 x i64> %30, <2 x i64> %31)
+; CHECK-NEXT: %33 = load <2 x i64>, <2 x i64>* %16, align 16
+; CHECK-NEXT: %34 = load <2 x i64>, <2 x i64>* %17, align 16
+; CHECK-NEXT: %35 = load <2 x i64>, <2 x i64>* %18, align 16
+; CHECK-NEXT: %36 = load <2 x i64>, <2 x i64>* %19, align 16
+; CHECK-NEXT: %37 = load <2 x i64>, <2 x i64>* %20, align 16
+; CHECK-NEXT: %38 = load <2 x i64>, <2 x i64>* %21, align 16
+; CHECK-NEXT: %39 = load <2 x i64>, <2 x i64>* %22, align 16
+; CHECK-NEXT: store <2 x i64> %33, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %34, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %35, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %36, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %37, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %38, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %39, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret <2 x i64> %32
+; CHECK-NEXT: }
+
+define <16 x i8*> @fn_88_16xi8ptr_call_arg(<16 x i8*> (<16 x i8*>)*, <16 x i8*>) {
+entry:
+ %2 = call <16 x i8*> %0(<16 x i8*> %1)
+ %3 = call <16 x i8*> @fn_88_16xi8ptr_call_arg(<16 x i8*> (<16 x i8*>)* @fn_39_16xi8ptr, <16 x i8*> %2)
+ ret <16 x i8*> %3
+}
+; CHECK-LABEL: define <4 x i8*> @fn_88_16xi8ptr_call_arg(<4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <16 x i8*> (<16 x i8*>)*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %8 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %9 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %10 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %11 = bitcast <16 x i8*> (<16 x i8*>)* %3 to <4 x i8*> (<4 x i8*>*, <4 x i8*>*, <4 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>)*
+; CHECK-NEXT: %12 = call <4 x i8*> %11(<4 x i8*>* nocapture nonnull dereferenceable(16) %8, <4 x i8*>* nocapture nonnull dereferenceable(16) %9, <4 x i8*>* nocapture nonnull dereferenceable(16) %10, <4 x i8*> %4, <4 x i8*> %5, <4 x i8*> %6, <4 x i8*> %7)
+; CHECK-NEXT: %13 = load <4 x i8*>, <4 x i8*>* %8, align 16
+; CHECK-NEXT: %14 = load <4 x i8*>, <4 x i8*>* %9, align 16
+; CHECK-NEXT: %15 = load <4 x i8*>, <4 x i8*>* %10, align 16
+; CHECK-NEXT: %16 = call <4 x i8*> @fn_88_16xi8ptr_call_arg(<4 x i8*>* nocapture nonnull dereferenceable(16) %8, <4 x i8*>* nocapture nonnull dereferenceable(16) %9, <4 x i8*>* nocapture nonnull dereferenceable(16) %10, <16 x i8*> (<16 x i8*>)* bitcast (<4 x i8*> (<4 x i8*>*, <4 x i8*>*, <4 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>)* @fn_39_16xi8ptr to <16 x i8*> (<16 x i8*>)*), <4 x i8*> %12, <4 x i8*> %13, <4 x i8*> %14, <4 x i8*> %15)
+; CHECK-NEXT: %17 = load <4 x i8*>, <4 x i8*>* %8, align 16
+; CHECK-NEXT: %18 = load <4 x i8*>, <4 x i8*>* %9, align 16
+; CHECK-NEXT: %19 = load <4 x i8*>, <4 x i8*>* %10, align 16
+; CHECK-NEXT: store <4 x i8*> %17, <4 x i8*>* %0, align 16
+; CHECK-NEXT: store <4 x i8*> %18, <4 x i8*>* %1, align 16
+; CHECK-NEXT: store <4 x i8*> %19, <4 x i8*>* %2, align 16
+; CHECK-NEXT: ret <4 x i8*> %16
+; CHECK-NEXT: }
+
+define <16 x float> @fn_89_16xfloat_call_arg(<16 x float> (<16 x float>)*, <16 x float>) {
+entry:
+ %2 = call <16 x float> %0(<16 x float> %1)
+ %3 = call <16 x float> @fn_89_16xfloat_call_arg(<16 x float> (<16 x float>)* @fn_40_16xfloat, <16 x float> %2)
+ ret <16 x float> %3
+}
+; CHECK-LABEL: define <4 x float> @fn_89_16xfloat_call_arg(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <16 x float> (<16 x float>)*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %8 = alloca <4 x float>, align 16
+; CHECK-NEXT: %9 = alloca <4 x float>, align 16
+; CHECK-NEXT: %10 = alloca <4 x float>, align 16
+; CHECK-NEXT: %11 = bitcast <16 x float> (<16 x float>)* %3 to <4 x float> (<4 x float>*, <4 x float>*, <4 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>)*
+; CHECK-NEXT: %12 = call <4 x float> %11(<4 x float>* nocapture nonnull dereferenceable(16) %8, <4 x float>* nocapture nonnull dereferenceable(16) %9, <4 x float>* nocapture nonnull dereferenceable(16) %10, <4 x float> %4, <4 x float> %5, <4 x float> %6, <4 x float> %7)
+; CHECK-NEXT: %13 = load <4 x float>, <4 x float>* %8, align 16
+; CHECK-NEXT: %14 = load <4 x float>, <4 x float>* %9, align 16
+; CHECK-NEXT: %15 = load <4 x float>, <4 x float>* %10, align 16
+; CHECK-NEXT: %16 = call <4 x float> @fn_89_16xfloat_call_arg(<4 x float>* nocapture nonnull dereferenceable(16) %8, <4 x float>* nocapture nonnull dereferenceable(16) %9, <4 x float>* nocapture nonnull dereferenceable(16) %10, <16 x float> (<16 x float>)* bitcast (<4 x float> (<4 x float>*, <4 x float>*, <4 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>)* @fn_40_16xfloat to <16 x float> (<16 x float>)*), <4 x float> %12, <4 x float> %13, <4 x float> %14, <4 x float> %15)
+; CHECK-NEXT: %17 = load <4 x float>, <4 x float>* %8, align 16
+; CHECK-NEXT: %18 = load <4 x float>, <4 x float>* %9, align 16
+; CHECK-NEXT: %19 = load <4 x float>, <4 x float>* %10, align 16
+; CHECK-NEXT: store <4 x float> %17, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %18, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %19, <4 x float>* %2, align 16
+; CHECK-NEXT: ret <4 x float> %16
+; CHECK-NEXT: }
+
+define <16 x double> @fn_90_16xdouble_call_arg(<16 x double> (<16 x double>)*, <16 x double>) {
+entry:
+ %2 = call <16 x double> %0(<16 x double> %1)
+ %3 = call <16 x double> @fn_90_16xdouble_call_arg(<16 x double> (<16 x double>)* @fn_41_16xdouble, <16 x double> %2)
+ ret <16 x double> %3
+}
+; CHECK-LABEL: define <2 x double> @fn_90_16xdouble_call_arg(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <16 x double> (<16 x double>)*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %16 = alloca <2 x double>, align 16
+; CHECK-NEXT: %17 = alloca <2 x double>, align 16
+; CHECK-NEXT: %18 = alloca <2 x double>, align 16
+; CHECK-NEXT: %19 = alloca <2 x double>, align 16
+; CHECK-NEXT: %20 = alloca <2 x double>, align 16
+; CHECK-NEXT: %21 = alloca <2 x double>, align 16
+; CHECK-NEXT: %22 = alloca <2 x double>, align 16
+; CHECK-NEXT: %23 = bitcast <16 x double> (<16 x double>)* %7 to <2 x double> (<2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)*
+; CHECK-NEXT: %24 = call <2 x double> %23(<2 x double>* nocapture nonnull dereferenceable(16) %16, <2 x double>* nocapture nonnull dereferenceable(16) %17, <2 x double>* nocapture nonnull dereferenceable(16) %18, <2 x double>* nocapture nonnull dereferenceable(16) %19, <2 x double>* nocapture nonnull dereferenceable(16) %20, <2 x double>* nocapture nonnull dereferenceable(16) %21, <2 x double>* nocapture nonnull dereferenceable(16) %22, <2 x double> %8, <2 x double> %9, <2 x double> %10, <2 x double> %11, <2 x double> %12, <2 x double> %13, <2 x double> %14, <2 x double> %15)
+; CHECK-NEXT: %25 = load <2 x double>, <2 x double>* %16, align 16
+; CHECK-NEXT: %26 = load <2 x double>, <2 x double>* %17, align 16
+; CHECK-NEXT: %27 = load <2 x double>, <2 x double>* %18, align 16
+; CHECK-NEXT: %28 = load <2 x double>, <2 x double>* %19, align 16
+; CHECK-NEXT: %29 = load <2 x double>, <2 x double>* %20, align 16
+; CHECK-NEXT: %30 = load <2 x double>, <2 x double>* %21, align 16
+; CHECK-NEXT: %31 = load <2 x double>, <2 x double>* %22, align 16
+; CHECK-NEXT: %32 = call <2 x double> @fn_90_16xdouble_call_arg(<2 x double>* nocapture nonnull dereferenceable(16) %16, <2 x double>* nocapture nonnull dereferenceable(16) %17, <2 x double>* nocapture nonnull dereferenceable(16) %18, <2 x double>* nocapture nonnull dereferenceable(16) %19, <2 x double>* nocapture nonnull dereferenceable(16) %20, <2 x double>* nocapture nonnull dereferenceable(16) %21, <2 x double>* nocapture nonnull dereferenceable(16) %22, <16 x double> (<16 x double>)* bitcast (<2 x double> (<2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)* @fn_41_16xdouble to <16 x double> (<16 x double>)*), <2 x double> %24, <2 x double> %25, <2 x double> %26, <2 x double> %27, <2 x double> %28, <2 x double> %29, <2 x double> %30, <2 x double> %31)
+; CHECK-NEXT: %33 = load <2 x double>, <2 x double>* %16, align 16
+; CHECK-NEXT: %34 = load <2 x double>, <2 x double>* %17, align 16
+; CHECK-NEXT: %35 = load <2 x double>, <2 x double>* %18, align 16
+; CHECK-NEXT: %36 = load <2 x double>, <2 x double>* %19, align 16
+; CHECK-NEXT: %37 = load <2 x double>, <2 x double>* %20, align 16
+; CHECK-NEXT: %38 = load <2 x double>, <2 x double>* %21, align 16
+; CHECK-NEXT: %39 = load <2 x double>, <2 x double>* %22, align 16
+; CHECK-NEXT: store <2 x double> %33, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %34, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %35, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %36, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %37, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %38, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %39, <2 x double>* %6, align 16
+; CHECK-NEXT: ret <2 x double> %32
+; CHECK-NEXT: }
+
+define <20 x i8> @fn_91_20xi8_call_arg(<20 x i8> (<20 x i8>)*, <20 x i8>) {
+entry:
+ %2 = call <20 x i8> %0(<20 x i8> %1)
+ %3 = call <20 x i8> @fn_91_20xi8_call_arg(<20 x i8> (<20 x i8>)* @fn_42_20xi8, <20 x i8> %2)
+ ret <20 x i8> %3
+}
+; CHECK-LABEL: define <16 x i8> @fn_91_20xi8_call_arg(<16 x i8>* nocapture nonnull dereferenceable(16), <20 x i8> (<20 x i8>)*, <16 x i8>, <16 x i8>) {
+; CHECK: entry:
+; CHECK-NEXT: %4 = alloca <16 x i8>, align 16
+; CHECK-NEXT: %5 = bitcast <20 x i8> (<20 x i8>)* %1 to <16 x i8> (<16 x i8>*, <16 x i8>, <16 x i8>)*
+; CHECK-NEXT: %6 = call <16 x i8> %5(<16 x i8>* nocapture nonnull dereferenceable(16) %4, <16 x i8> %2, <16 x i8> %3)
+; CHECK-NEXT: %7 = load <16 x i8>, <16 x i8>* %4, align 16
+; CHECK-NEXT: %8 = call <16 x i8> @fn_91_20xi8_call_arg(<16 x i8>* nocapture nonnull dereferenceable(16) %4, <20 x i8> (<20 x i8>)* bitcast (<16 x i8> (<16 x i8>*, <16 x i8>, <16 x i8>)* @fn_42_20xi8 to <20 x i8> (<20 x i8>)*), <16 x i8> %6, <16 x i8> %7)
+; CHECK-NEXT: %9 = load <16 x i8>, <16 x i8>* %4, align 16
+; CHECK-NEXT: store <16 x i8> %9, <16 x i8>* %0, align 16
+; CHECK-NEXT: ret <16 x i8> %8
+; CHECK-NEXT: }
+
+define <20 x i16> @fn_92_20xi16_call_arg(<20 x i16> (<20 x i16>)*, <20 x i16>) {
+entry:
+ %2 = call <20 x i16> %0(<20 x i16> %1)
+ %3 = call <20 x i16> @fn_92_20xi16_call_arg(<20 x i16> (<20 x i16>)* @fn_43_20xi16, <20 x i16> %2)
+ ret <20 x i16> %3
+}
+; CHECK-LABEL: define <8 x i16> @fn_92_20xi16_call_arg(<8 x i16>* nocapture nonnull dereferenceable(16), <8 x i16>* nocapture nonnull dereferenceable(16), <20 x i16> (<20 x i16>)*, <8 x i16>, <8 x i16>, <8 x i16>) {
+; CHECK: entry:
+; CHECK-NEXT: %6 = alloca <8 x i16>, align 16
+; CHECK-NEXT: %7 = alloca <8 x i16>, align 16
+; CHECK-NEXT: %8 = bitcast <20 x i16> (<20 x i16>)* %2 to <8 x i16> (<8 x i16>*, <8 x i16>*, <8 x i16>, <8 x i16>, <8 x i16>)*
+; CHECK-NEXT: %9 = call <8 x i16> %8(<8 x i16>* nocapture nonnull dereferenceable(16) %6, <8 x i16>* nocapture nonnull dereferenceable(16) %7, <8 x i16> %3, <8 x i16> %4, <8 x i16> %5)
+; CHECK-NEXT: %10 = load <8 x i16>, <8 x i16>* %6, align 16
+; CHECK-NEXT: %11 = load <8 x i16>, <8 x i16>* %7, align 16
+; CHECK-NEXT: %12 = call <8 x i16> @fn_92_20xi16_call_arg(<8 x i16>* nocapture nonnull dereferenceable(16) %6, <8 x i16>* nocapture nonnull dereferenceable(16) %7, <20 x i16> (<20 x i16>)* bitcast (<8 x i16> (<8 x i16>*, <8 x i16>*, <8 x i16>, <8 x i16>, <8 x i16>)* @fn_43_20xi16 to <20 x i16> (<20 x i16>)*), <8 x i16> %9, <8 x i16> %10, <8 x i16> %11)
+; CHECK-NEXT: %13 = load <8 x i16>, <8 x i16>* %6, align 16
+; CHECK-NEXT: %14 = load <8 x i16>, <8 x i16>* %7, align 16
+; CHECK-NEXT: store <8 x i16> %13, <8 x i16>* %0, align 16
+; CHECK-NEXT: store <8 x i16> %14, <8 x i16>* %1, align 16
+; CHECK-NEXT: ret <8 x i16> %12
+; CHECK-NEXT: }
+
+define <20 x i32> @fn_93_20xi32_call_arg(<20 x i32> (<20 x i32>)*, <20 x i32>) {
+entry:
+ %2 = call <20 x i32> %0(<20 x i32> %1)
+ %3 = call <20 x i32> @fn_93_20xi32_call_arg(<20 x i32> (<20 x i32>)* @fn_44_20xi32, <20 x i32> %2)
+ ret <20 x i32> %3
+}
+; CHECK-LABEL: define <4 x i32> @fn_93_20xi32_call_arg(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <20 x i32> (<20 x i32>)*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) {
+; CHECK: entry:
+; CHECK-NEXT: %10 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %11 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %12 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %13 = alloca <4 x i32>, align 16
+; CHECK-NEXT: %14 = bitcast <20 x i32> (<20 x i32>)* %4 to <4 x i32> (<4 x i32>*, <4 x i32>*, <4 x i32>*, <4 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)*
+; CHECK-NEXT: %15 = call <4 x i32> %14(<4 x i32>* nocapture nonnull dereferenceable(16) %10, <4 x i32>* nocapture nonnull dereferenceable(16) %11, <4 x i32>* nocapture nonnull dereferenceable(16) %12, <4 x i32>* nocapture nonnull dereferenceable(16) %13, <4 x i32> %5, <4 x i32> %6, <4 x i32> %7, <4 x i32> %8, <4 x i32> %9)
+; CHECK-NEXT: %16 = load <4 x i32>, <4 x i32>* %10, align 16
+; CHECK-NEXT: %17 = load <4 x i32>, <4 x i32>* %11, align 16
+; CHECK-NEXT: %18 = load <4 x i32>, <4 x i32>* %12, align 16
+; CHECK-NEXT: %19 = load <4 x i32>, <4 x i32>* %13, align 16
+; CHECK-NEXT: %20 = call <4 x i32> @fn_93_20xi32_call_arg(<4 x i32>* nocapture nonnull dereferenceable(16) %10, <4 x i32>* nocapture nonnull dereferenceable(16) %11, <4 x i32>* nocapture nonnull dereferenceable(16) %12, <4 x i32>* nocapture nonnull dereferenceable(16) %13, <20 x i32> (<20 x i32>)* bitcast (<4 x i32> (<4 x i32>*, <4 x i32>*, <4 x i32>*, <4 x i32>*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)* @fn_44_20xi32 to <20 x i32> (<20 x i32>)*), <4 x i32> %15, <4 x i32> %16, <4 x i32> %17, <4 x i32> %18, <4 x i32> %19)
+; CHECK-NEXT: %21 = load <4 x i32>, <4 x i32>* %10, align 16
+; CHECK-NEXT: %22 = load <4 x i32>, <4 x i32>* %11, align 16
+; CHECK-NEXT: %23 = load <4 x i32>, <4 x i32>* %12, align 16
+; CHECK-NEXT: %24 = load <4 x i32>, <4 x i32>* %13, align 16
+; CHECK-NEXT: store <4 x i32> %21, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %23, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %24, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %20
+; CHECK-NEXT: }
+
+define <20 x i64> @fn_94_20xi64_call_arg(<20 x i64> (<20 x i64>)*, <20 x i64>) {
+entry:
+ %2 = call <20 x i64> %0(<20 x i64> %1)
+ %3 = call <20 x i64> @fn_94_20xi64_call_arg(<20 x i64> (<20 x i64>)* @fn_45_20xi64, <20 x i64> %2)
+ ret <20 x i64> %3
+}
+; CHECK-LABEL: define <2 x i64> @fn_94_20xi64_call_arg(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <20 x i64> (<20 x i64>)*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) {
+; CHECK: entry:
+; CHECK-NEXT: %20 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %21 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %22 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %23 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %24 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %25 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %26 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %27 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %28 = alloca <2 x i64>, align 16
+; CHECK-NEXT: %29 = bitcast <20 x i64> (<20 x i64>)* %9 to <2 x i64> (<2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)*
+; CHECK-NEXT: %30 = call <2 x i64> %29(<2 x i64>* nocapture nonnull dereferenceable(16) %20, <2 x i64>* nocapture nonnull dereferenceable(16) %21, <2 x i64>* nocapture nonnull dereferenceable(16) %22, <2 x i64>* nocapture nonnull dereferenceable(16) %23, <2 x i64>* nocapture nonnull dereferenceable(16) %24, <2 x i64>* nocapture nonnull dereferenceable(16) %25, <2 x i64>* nocapture nonnull dereferenceable(16) %26, <2 x i64>* nocapture nonnull dereferenceable(16) %27, <2 x i64>* nocapture nonnull dereferenceable(16) %28, <2 x i64> %10, <2 x i64> %11, <2 x i64> %12, <2 x i64> %13, <2 x i64> %14, <2 x i64> %15, <2 x i64> %16, <2 x i64> %17, <2 x i64> %18, <2 x i64> %19)
+; CHECK-NEXT: %31 = load <2 x i64>, <2 x i64>* %20, align 16
+; CHECK-NEXT: %32 = load <2 x i64>, <2 x i64>* %21, align 16
+; CHECK-NEXT: %33 = load <2 x i64>, <2 x i64>* %22, align 16
+; CHECK-NEXT: %34 = load <2 x i64>, <2 x i64>* %23, align 16
+; CHECK-NEXT: %35 = load <2 x i64>, <2 x i64>* %24, align 16
+; CHECK-NEXT: %36 = load <2 x i64>, <2 x i64>* %25, align 16
+; CHECK-NEXT: %37 = load <2 x i64>, <2 x i64>* %26, align 16
+; CHECK-NEXT: %38 = load <2 x i64>, <2 x i64>* %27, align 16
+; CHECK-NEXT: %39 = load <2 x i64>, <2 x i64>* %28, align 16
+; CHECK-NEXT: %40 = call <2 x i64> @fn_94_20xi64_call_arg(<2 x i64>* nocapture nonnull dereferenceable(16) %20, <2 x i64>* nocapture nonnull dereferenceable(16) %21, <2 x i64>* nocapture nonnull dereferenceable(16) %22, <2 x i64>* nocapture nonnull dereferenceable(16) %23, <2 x i64>* nocapture nonnull dereferenceable(16) %24, <2 x i64>* nocapture nonnull dereferenceable(16) %25, <2 x i64>* nocapture nonnull dereferenceable(16) %26, <2 x i64>* nocapture nonnull dereferenceable(16) %27, <2 x i64>* nocapture nonnull dereferenceable(16) %28, <20 x i64> (<20 x i64>)* bitcast (<2 x i64> (<2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)* @fn_45_20xi64 to <20 x i64> (<20 x i64>)*), <2 x i64> %30, <2 x i64> %31, <2 x i64> %32, <2 x i64> %33, <2 x i64> %34, <2 x i64> %35, <2 x i64> %36, <2 x i64> %37, <2 x i64> %38, <2 x i64> %39)
+; CHECK-NEXT: %41 = load <2 x i64>, <2 x i64>* %20, align 16
+; CHECK-NEXT: %42 = load <2 x i64>, <2 x i64>* %21, align 16
+; CHECK-NEXT: %43 = load <2 x i64>, <2 x i64>* %22, align 16
+; CHECK-NEXT: %44 = load <2 x i64>, <2 x i64>* %23, align 16
+; CHECK-NEXT: %45 = load <2 x i64>, <2 x i64>* %24, align 16
+; CHECK-NEXT: %46 = load <2 x i64>, <2 x i64>* %25, align 16
+; CHECK-NEXT: %47 = load <2 x i64>, <2 x i64>* %26, align 16
+; CHECK-NEXT: %48 = load <2 x i64>, <2 x i64>* %27, align 16
+; CHECK-NEXT: %49 = load <2 x i64>, <2 x i64>* %28, align 16
+; CHECK-NEXT: store <2 x i64> %41, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %42, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %43, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %44, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %45, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %46, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %47, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %48, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %49, <2 x i64>* %8, align 16
+; CHECK-NEXT: ret <2 x i64> %40
+; CHECK-NEXT: }
+
+define <20 x i8*> @fn_95_20xi8ptr_call_arg(<20 x i8*> (<20 x i8*>)*, <20 x i8*>) {
+entry:
+ %2 = call <20 x i8*> %0(<20 x i8*> %1)
+ %3 = call <20 x i8*> @fn_95_20xi8ptr_call_arg(<20 x i8*> (<20 x i8*>)* @fn_46_20xi8ptr, <20 x i8*> %2)
+ ret <20 x i8*> %3
+}
+; CHECK-LABEL: define <4 x i8*> @fn_95_20xi8ptr_call_arg(<4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <20 x i8*> (<20 x i8*>)*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>) {
+; CHECK: entry:
+; CHECK-NEXT: %10 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %11 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %12 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %13 = alloca <4 x i8*>, align 16
+; CHECK-NEXT: %14 = bitcast <20 x i8*> (<20 x i8*>)* %4 to <4 x i8*> (<4 x i8*>*, <4 x i8*>*, <4 x i8*>*, <4 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>)*
+; CHECK-NEXT: %15 = call <4 x i8*> %14(<4 x i8*>* nocapture nonnull dereferenceable(16) %10, <4 x i8*>* nocapture nonnull dereferenceable(16) %11, <4 x i8*>* nocapture nonnull dereferenceable(16) %12, <4 x i8*>* nocapture nonnull dereferenceable(16) %13, <4 x i8*> %5, <4 x i8*> %6, <4 x i8*> %7, <4 x i8*> %8, <4 x i8*> %9)
+; CHECK-NEXT: %16 = load <4 x i8*>, <4 x i8*>* %10, align 16
+; CHECK-NEXT: %17 = load <4 x i8*>, <4 x i8*>* %11, align 16
+; CHECK-NEXT: %18 = load <4 x i8*>, <4 x i8*>* %12, align 16
+; CHECK-NEXT: %19 = load <4 x i8*>, <4 x i8*>* %13, align 16
+; CHECK-NEXT: %20 = call <4 x i8*> @fn_95_20xi8ptr_call_arg(<4 x i8*>* nocapture nonnull dereferenceable(16) %10, <4 x i8*>* nocapture nonnull dereferenceable(16) %11, <4 x i8*>* nocapture nonnull dereferenceable(16) %12, <4 x i8*>* nocapture nonnull dereferenceable(16) %13, <20 x i8*> (<20 x i8*>)* bitcast (<4 x i8*> (<4 x i8*>*, <4 x i8*>*, <4 x i8*>*, <4 x i8*>*, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>)* @fn_46_20xi8ptr to <20 x i8*> (<20 x i8*>)*), <4 x i8*> %15, <4 x i8*> %16, <4 x i8*> %17, <4 x i8*> %18, <4 x i8*> %19)
+; CHECK-NEXT: %21 = load <4 x i8*>, <4 x i8*>* %10, align 16
+; CHECK-NEXT: %22 = load <4 x i8*>, <4 x i8*>* %11, align 16
+; CHECK-NEXT: %23 = load <4 x i8*>, <4 x i8*>* %12, align 16
+; CHECK-NEXT: %24 = load <4 x i8*>, <4 x i8*>* %13, align 16
+; CHECK-NEXT: store <4 x i8*> %21, <4 x i8*>* %0, align 16
+; CHECK-NEXT: store <4 x i8*> %22, <4 x i8*>* %1, align 16
+; CHECK-NEXT: store <4 x i8*> %23, <4 x i8*>* %2, align 16
+; CHECK-NEXT: store <4 x i8*> %24, <4 x i8*>* %3, align 16
+; CHECK-NEXT: ret <4 x i8*> %20
+; CHECK-NEXT: }
+
+define <20 x float> @fn_96_20xfloat_call_arg(<20 x float> (<20 x float>)*, <20 x float>) {
+entry:
+ %2 = call <20 x float> %0(<20 x float> %1)
+ %3 = call <20 x float> @fn_96_20xfloat_call_arg(<20 x float> (<20 x float>)* @fn_47_20xfloat, <20 x float> %2)
+ ret <20 x float> %3
+}
+; CHECK-LABEL: define <4 x float> @fn_96_20xfloat_call_arg(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <20 x float> (<20 x float>)*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>) {
+; CHECK: entry:
+; CHECK-NEXT: %10 = alloca <4 x float>, align 16
+; CHECK-NEXT: %11 = alloca <4 x float>, align 16
+; CHECK-NEXT: %12 = alloca <4 x float>, align 16
+; CHECK-NEXT: %13 = alloca <4 x float>, align 16
+; CHECK-NEXT: %14 = bitcast <20 x float> (<20 x float>)* %4 to <4 x float> (<4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)*
+; CHECK-NEXT: %15 = call <4 x float> %14(<4 x float>* nocapture nonnull dereferenceable(16) %10, <4 x float>* nocapture nonnull dereferenceable(16) %11, <4 x float>* nocapture nonnull dereferenceable(16) %12, <4 x float>* nocapture nonnull dereferenceable(16) %13, <4 x float> %5, <4 x float> %6, <4 x float> %7, <4 x float> %8, <4 x float> %9)
+; CHECK-NEXT: %16 = load <4 x float>, <4 x float>* %10, align 16
+; CHECK-NEXT: %17 = load <4 x float>, <4 x float>* %11, align 16
+; CHECK-NEXT: %18 = load <4 x float>, <4 x float>* %12, align 16
+; CHECK-NEXT: %19 = load <4 x float>, <4 x float>* %13, align 16
+; CHECK-NEXT: %20 = call <4 x float> @fn_96_20xfloat_call_arg(<4 x float>* nocapture nonnull dereferenceable(16) %10, <4 x float>* nocapture nonnull dereferenceable(16) %11, <4 x float>* nocapture nonnull dereferenceable(16) %12, <4 x float>* nocapture nonnull dereferenceable(16) %13, <20 x float> (<20 x float>)* bitcast (<4 x float> (<4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)* @fn_47_20xfloat to <20 x float> (<20 x float>)*), <4 x float> %15, <4 x float> %16, <4 x float> %17, <4 x float> %18, <4 x float> %19)
+; CHECK-NEXT: %21 = load <4 x float>, <4 x float>* %10, align 16
+; CHECK-NEXT: %22 = load <4 x float>, <4 x float>* %11, align 16
+; CHECK-NEXT: %23 = load <4 x float>, <4 x float>* %12, align 16
+; CHECK-NEXT: %24 = load <4 x float>, <4 x float>* %13, align 16
+; CHECK-NEXT: store <4 x float> %21, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %22, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %23, <4 x float>* %2, align 16
+; CHECK-NEXT: store <4 x float> %24, <4 x float>* %3, align 16
+; CHECK-NEXT: ret <4 x float> %20
+; CHECK-NEXT: }
+
+define <20 x double> @fn_97_20xdouble_call_arg(<20 x double> (<20 x double>)*, <20 x double>) {
+entry:
+ %2 = call <20 x double> %0(<20 x double> %1)
+ %3 = call <20 x double> @fn_97_20xdouble_call_arg(<20 x double> (<20 x double>)* @fn_48_20xdouble, <20 x double> %2)
+ ret <20 x double> %3
+}
+; CHECK-LABEL: define <2 x double> @fn_97_20xdouble_call_arg(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <20 x double> (<20 x double>)*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>) {
+; CHECK: entry:
+; CHECK-NEXT: %20 = alloca <2 x double>, align 16
+; CHECK-NEXT: %21 = alloca <2 x double>, align 16
+; CHECK-NEXT: %22 = alloca <2 x double>, align 16
+; CHECK-NEXT: %23 = alloca <2 x double>, align 16
+; CHECK-NEXT: %24 = alloca <2 x double>, align 16
+; CHECK-NEXT: %25 = alloca <2 x double>, align 16
+; CHECK-NEXT: %26 = alloca <2 x double>, align 16
+; CHECK-NEXT: %27 = alloca <2 x double>, align 16
+; CHECK-NEXT: %28 = alloca <2 x double>, align 16
+; CHECK-NEXT: %29 = bitcast <20 x double> (<20 x double>)* %9 to <2 x double> (<2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)*
+; CHECK-NEXT: %30 = call <2 x double> %29(<2 x double>* nocapture nonnull dereferenceable(16) %20, <2 x double>* nocapture nonnull dereferenceable(16) %21, <2 x double>* nocapture nonnull dereferenceable(16) %22, <2 x double>* nocapture nonnull dereferenceable(16) %23, <2 x double>* nocapture nonnull dereferenceable(16) %24, <2 x double>* nocapture nonnull dereferenceable(16) %25, <2 x double>* nocapture nonnull dereferenceable(16) %26, <2 x double>* nocapture nonnull dereferenceable(16) %27, <2 x double>* nocapture nonnull dereferenceable(16) %28, <2 x double> %10, <2 x double> %11, <2 x double> %12, <2 x double> %13, <2 x double> %14, <2 x double> %15, <2 x double> %16, <2 x double> %17, <2 x double> %18, <2 x double> %19)
+; CHECK-NEXT: %31 = load <2 x double>, <2 x double>* %20, align 16
+; CHECK-NEXT: %32 = load <2 x double>, <2 x double>* %21, align 16
+; CHECK-NEXT: %33 = load <2 x double>, <2 x double>* %22, align 16
+; CHECK-NEXT: %34 = load <2 x double>, <2 x double>* %23, align 16
+; CHECK-NEXT: %35 = load <2 x double>, <2 x double>* %24, align 16
+; CHECK-NEXT: %36 = load <2 x double>, <2 x double>* %25, align 16
+; CHECK-NEXT: %37 = load <2 x double>, <2 x double>* %26, align 16
+; CHECK-NEXT: %38 = load <2 x double>, <2 x double>* %27, align 16
+; CHECK-NEXT: %39 = load <2 x double>, <2 x double>* %28, align 16
+; CHECK-NEXT: %40 = call <2 x double> @fn_97_20xdouble_call_arg(<2 x double>* nocapture nonnull dereferenceable(16) %20, <2 x double>* nocapture nonnull dereferenceable(16) %21, <2 x double>* nocapture nonnull dereferenceable(16) %22, <2 x double>* nocapture nonnull dereferenceable(16) %23, <2 x double>* nocapture nonnull dereferenceable(16) %24, <2 x double>* nocapture nonnull dereferenceable(16) %25, <2 x double>* nocapture nonnull dereferenceable(16) %26, <2 x double>* nocapture nonnull dereferenceable(16) %27, <2 x double>* nocapture nonnull dereferenceable(16) %28, <20 x double> (<20 x double>)* bitcast (<2 x double> (<2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)* @fn_48_20xdouble to <20 x double> (<20 x double>)*), <2 x double> %30, <2 x double> %31, <2 x double> %32, <2 x double> %33, <2 x double> %34, <2 x double> %35, <2 x double> %36, <2 x double> %37, <2 x double> %38, <2 x double> %39)
+; CHECK-NEXT: %41 = load <2 x double>, <2 x double>* %20, align 16
+; CHECK-NEXT: %42 = load <2 x double>, <2 x double>* %21, align 16
+; CHECK-NEXT: %43 = load <2 x double>, <2 x double>* %22, align 16
+; CHECK-NEXT: %44 = load <2 x double>, <2 x double>* %23, align 16
+; CHECK-NEXT: %45 = load <2 x double>, <2 x double>* %24, align 16
+; CHECK-NEXT: %46 = load <2 x double>, <2 x double>* %25, align 16
+; CHECK-NEXT: %47 = load <2 x double>, <2 x double>* %26, align 16
+; CHECK-NEXT: %48 = load <2 x double>, <2 x double>* %27, align 16
+; CHECK-NEXT: %49 = load <2 x double>, <2 x double>* %28, align 16
+; CHECK-NEXT: store <2 x double> %41, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %42, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %43, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %44, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %45, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %46, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %47, <2 x double>* %6, align 16
+; CHECK-NEXT: store <2 x double> %48, <2 x double>* %7, align 16
+; CHECK-NEXT: store <2 x double> %49, <2 x double>* %8, align 16
+; CHECK-NEXT: ret <2 x double> %40
+; CHECK-NEXT: }
+
« no previous file with comments | « test/Transforms/NaCl/vector-canonicalization-binops.ll ('k') | test/Transforms/NaCl/vector-canonicalization-casts.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698