Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(126)

Unified Diff: test/Transforms/NaCl/vector-canonicalization-casts.ll

Issue 1423873002: PNaCl: Add a vector type legalization pass. Base URL: https://chromium.googlesource.com/native_client/pnacl-llvm.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: test/Transforms/NaCl/vector-canonicalization-casts.ll
diff --git a/test/Transforms/NaCl/vector-canonicalization-casts.ll b/test/Transforms/NaCl/vector-canonicalization-casts.ll
new file mode 100644
index 0000000000000000000000000000000000000000..b0adf51354c55ad67f61e119103805bcb1cd3de3
--- /dev/null
+++ b/test/Transforms/NaCl/vector-canonicalization-casts.ll
@@ -0,0 +1,12476 @@
+; RUN: opt -S -pnacl-vector-canonicalization %s | FileCheck %s
+
+; Auto-generated tests for all casts.
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128"
+
+define <2 x i8> @fptoui_cast_2xfloat_to_2xi8(<2 x float>) {
+ %2 = fptoui <2 x float> %0 to <2 x i8>
+ ret <2 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_2xfloat_to_2xi8(<4 x float>)
+; CHECK-NEXT: %2 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %3 = fptoui float %2 to i8
+; CHECK-NEXT: %4 = insertelement <16 x i8> undef, i8 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %6 = fptoui float %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> %4, i8 %6, i32 1
+; CHECK-NEXT: ret <16 x i8> %7
+
+define <2 x i16> @fptoui_cast_2xfloat_to_2xi16(<2 x float>) {
+ %2 = fptoui <2 x float> %0 to <2 x i16>
+ ret <2 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_2xfloat_to_2xi16(<4 x float>)
+; CHECK-NEXT: %2 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %3 = fptoui float %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %6 = fptoui float %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: ret <8 x i16> %7
+
+define <2 x i32> @fptoui_cast_2xfloat_to_2xi32(<2 x float>) {
+ %2 = fptoui <2 x float> %0 to <2 x i32>
+ ret <2 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_2xfloat_to_2xi32(<4 x float>)
+; CHECK-NEXT: %2 = fptoui <4 x float> %0 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+define <2 x i64> @fptoui_cast_2xfloat_to_2xi64(<2 x float>) {
+ %2 = fptoui <2 x float> %0 to <2 x i64>
+ ret <2 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_2xfloat_to_2xi64(<4 x float>)
+; CHECK-NEXT: %2 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %3 = fptoui float %2 to i64
+; CHECK-NEXT: %4 = insertelement <2 x i64> undef, i64 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %6 = fptoui float %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> %4, i64 %6, i32 1
+; CHECK-NEXT: ret <2 x i64> %7
+
+define <2 x i8> @fptoui_cast_2xdouble_to_2xi8(<2 x double>) {
+ %2 = fptoui <2 x double> %0 to <2 x i8>
+ ret <2 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_2xdouble_to_2xi8(<2 x double>)
+; CHECK-NEXT: %2 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %3 = fptoui double %2 to i8
+; CHECK-NEXT: %4 = insertelement <16 x i8> undef, i8 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %6 = fptoui double %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> %4, i8 %6, i32 1
+; CHECK-NEXT: ret <16 x i8> %7
+
+define <2 x i16> @fptoui_cast_2xdouble_to_2xi16(<2 x double>) {
+ %2 = fptoui <2 x double> %0 to <2 x i16>
+ ret <2 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_2xdouble_to_2xi16(<2 x double>)
+; CHECK-NEXT: %2 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %3 = fptoui double %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %6 = fptoui double %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: ret <8 x i16> %7
+
+define <2 x i32> @fptoui_cast_2xdouble_to_2xi32(<2 x double>) {
+ %2 = fptoui <2 x double> %0 to <2 x i32>
+ ret <2 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_2xdouble_to_2xi32(<2 x double>)
+; CHECK-NEXT: %2 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %3 = fptoui double %2 to i32
+; CHECK-NEXT: %4 = insertelement <4 x i32> undef, i32 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %6 = fptoui double %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> %4, i32 %6, i32 1
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i64> @fptoui_cast_2xdouble_to_2xi64(<2 x double>) {
+ %2 = fptoui <2 x double> %0 to <2 x i64>
+ ret <2 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_2xdouble_to_2xi64(<2 x double>)
+; CHECK-NEXT: %2 = fptoui <2 x double> %0 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+define <2 x i8> @fptosi_cast_2xfloat_to_2xi8(<2 x float>) {
+ %2 = fptosi <2 x float> %0 to <2 x i8>
+ ret <2 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_2xfloat_to_2xi8(<4 x float>)
+; CHECK-NEXT: %2 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %3 = fptosi float %2 to i8
+; CHECK-NEXT: %4 = insertelement <16 x i8> undef, i8 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %6 = fptosi float %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> %4, i8 %6, i32 1
+; CHECK-NEXT: ret <16 x i8> %7
+
+define <2 x i16> @fptosi_cast_2xfloat_to_2xi16(<2 x float>) {
+ %2 = fptosi <2 x float> %0 to <2 x i16>
+ ret <2 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_2xfloat_to_2xi16(<4 x float>)
+; CHECK-NEXT: %2 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %3 = fptosi float %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %6 = fptosi float %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: ret <8 x i16> %7
+
+define <2 x i32> @fptosi_cast_2xfloat_to_2xi32(<2 x float>) {
+ %2 = fptosi <2 x float> %0 to <2 x i32>
+ ret <2 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_2xfloat_to_2xi32(<4 x float>)
+; CHECK-NEXT: %2 = fptosi <4 x float> %0 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+define <2 x i64> @fptosi_cast_2xfloat_to_2xi64(<2 x float>) {
+ %2 = fptosi <2 x float> %0 to <2 x i64>
+ ret <2 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_2xfloat_to_2xi64(<4 x float>)
+; CHECK-NEXT: %2 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %3 = fptosi float %2 to i64
+; CHECK-NEXT: %4 = insertelement <2 x i64> undef, i64 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %6 = fptosi float %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> %4, i64 %6, i32 1
+; CHECK-NEXT: ret <2 x i64> %7
+
+define <2 x i8> @fptosi_cast_2xdouble_to_2xi8(<2 x double>) {
+ %2 = fptosi <2 x double> %0 to <2 x i8>
+ ret <2 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_2xdouble_to_2xi8(<2 x double>)
+; CHECK-NEXT: %2 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %3 = fptosi double %2 to i8
+; CHECK-NEXT: %4 = insertelement <16 x i8> undef, i8 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %6 = fptosi double %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> %4, i8 %6, i32 1
+; CHECK-NEXT: ret <16 x i8> %7
+
+define <2 x i16> @fptosi_cast_2xdouble_to_2xi16(<2 x double>) {
+ %2 = fptosi <2 x double> %0 to <2 x i16>
+ ret <2 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_2xdouble_to_2xi16(<2 x double>)
+; CHECK-NEXT: %2 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %3 = fptosi double %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %6 = fptosi double %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: ret <8 x i16> %7
+
+define <2 x i32> @fptosi_cast_2xdouble_to_2xi32(<2 x double>) {
+ %2 = fptosi <2 x double> %0 to <2 x i32>
+ ret <2 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_2xdouble_to_2xi32(<2 x double>)
+; CHECK-NEXT: %2 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %3 = fptosi double %2 to i32
+; CHECK-NEXT: %4 = insertelement <4 x i32> undef, i32 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %6 = fptosi double %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> %4, i32 %6, i32 1
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i64> @fptosi_cast_2xdouble_to_2xi64(<2 x double>) {
+ %2 = fptosi <2 x double> %0 to <2 x i64>
+ ret <2 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_2xdouble_to_2xi64(<2 x double>)
+; CHECK-NEXT: %2 = fptosi <2 x double> %0 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+define <2 x float> @uitofp_cast_2xi8_to_2xfloat(<2 x i8>) {
+ %2 = uitofp <2 x i8> %0 to <2 x float>
+ ret <2 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_2xi8_to_2xfloat(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = uitofp i8 %2 to float
+; CHECK-NEXT: %4 = insertelement <4 x float> undef, float %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = uitofp i8 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> %4, float %6, i32 1
+; CHECK-NEXT: ret <4 x float> %7
+
+define <2 x float> @uitofp_cast_2xi16_to_2xfloat(<2 x i16>) {
+ %2 = uitofp <2 x i16> %0 to <2 x float>
+ ret <2 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_2xi16_to_2xfloat(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = uitofp i16 %2 to float
+; CHECK-NEXT: %4 = insertelement <4 x float> undef, float %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = uitofp i16 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> %4, float %6, i32 1
+; CHECK-NEXT: ret <4 x float> %7
+
+define <2 x float> @uitofp_cast_2xi32_to_2xfloat(<2 x i32>) {
+ %2 = uitofp <2 x i32> %0 to <2 x float>
+ ret <2 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_2xi32_to_2xfloat(<4 x i32>)
+; CHECK-NEXT: %2 = uitofp <4 x i32> %0 to <4 x float>
+; CHECK-NEXT: ret <4 x float> %2
+
+define <2 x float> @uitofp_cast_2xi64_to_2xfloat(<2 x i64>) {
+ %2 = uitofp <2 x i64> %0 to <2 x float>
+ ret <2 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_2xi64_to_2xfloat(<2 x i64>)
+; CHECK-NEXT: %2 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %3 = uitofp i64 %2 to float
+; CHECK-NEXT: %4 = insertelement <4 x float> undef, float %3, i32 0
+; CHECK-NEXT: %5 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %6 = uitofp i64 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> %4, float %6, i32 1
+; CHECK-NEXT: ret <4 x float> %7
+
+define <2 x double> @uitofp_cast_2xi8_to_2xdouble(<2 x i8>) {
+ %2 = uitofp <2 x i8> %0 to <2 x double>
+ ret <2 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_2xi8_to_2xdouble(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = uitofp i8 %2 to double
+; CHECK-NEXT: %4 = insertelement <2 x double> undef, double %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = uitofp i8 %5 to double
+; CHECK-NEXT: %7 = insertelement <2 x double> %4, double %6, i32 1
+; CHECK-NEXT: ret <2 x double> %7
+
+define <2 x double> @uitofp_cast_2xi16_to_2xdouble(<2 x i16>) {
+ %2 = uitofp <2 x i16> %0 to <2 x double>
+ ret <2 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_2xi16_to_2xdouble(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = uitofp i16 %2 to double
+; CHECK-NEXT: %4 = insertelement <2 x double> undef, double %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = uitofp i16 %5 to double
+; CHECK-NEXT: %7 = insertelement <2 x double> %4, double %6, i32 1
+; CHECK-NEXT: ret <2 x double> %7
+
+define <2 x double> @uitofp_cast_2xi32_to_2xdouble(<2 x i32>) {
+ %2 = uitofp <2 x i32> %0 to <2 x double>
+ ret <2 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_2xi32_to_2xdouble(<4 x i32>)
+; CHECK-NEXT: %2 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %3 = uitofp i32 %2 to double
+; CHECK-NEXT: %4 = insertelement <2 x double> undef, double %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %6 = uitofp i32 %5 to double
+; CHECK-NEXT: %7 = insertelement <2 x double> %4, double %6, i32 1
+; CHECK-NEXT: ret <2 x double> %7
+
+define <2 x double> @uitofp_cast_2xi64_to_2xdouble(<2 x i64>) {
+ %2 = uitofp <2 x i64> %0 to <2 x double>
+ ret <2 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_2xi64_to_2xdouble(<2 x i64>)
+; CHECK-NEXT: %2 = uitofp <2 x i64> %0 to <2 x double>
+; CHECK-NEXT: ret <2 x double> %2
+
+define <2 x float> @sitofp_cast_2xi8_to_2xfloat(<2 x i8>) {
+ %2 = sitofp <2 x i8> %0 to <2 x float>
+ ret <2 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_2xi8_to_2xfloat(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = sitofp i8 %2 to float
+; CHECK-NEXT: %4 = insertelement <4 x float> undef, float %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = sitofp i8 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> %4, float %6, i32 1
+; CHECK-NEXT: ret <4 x float> %7
+
+define <2 x float> @sitofp_cast_2xi16_to_2xfloat(<2 x i16>) {
+ %2 = sitofp <2 x i16> %0 to <2 x float>
+ ret <2 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_2xi16_to_2xfloat(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = sitofp i16 %2 to float
+; CHECK-NEXT: %4 = insertelement <4 x float> undef, float %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = sitofp i16 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> %4, float %6, i32 1
+; CHECK-NEXT: ret <4 x float> %7
+
+define <2 x float> @sitofp_cast_2xi32_to_2xfloat(<2 x i32>) {
+ %2 = sitofp <2 x i32> %0 to <2 x float>
+ ret <2 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_2xi32_to_2xfloat(<4 x i32>)
+; CHECK-NEXT: %2 = sitofp <4 x i32> %0 to <4 x float>
+; CHECK-NEXT: ret <4 x float> %2
+
+define <2 x float> @sitofp_cast_2xi64_to_2xfloat(<2 x i64>) {
+ %2 = sitofp <2 x i64> %0 to <2 x float>
+ ret <2 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_2xi64_to_2xfloat(<2 x i64>)
+; CHECK-NEXT: %2 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %3 = sitofp i64 %2 to float
+; CHECK-NEXT: %4 = insertelement <4 x float> undef, float %3, i32 0
+; CHECK-NEXT: %5 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %6 = sitofp i64 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> %4, float %6, i32 1
+; CHECK-NEXT: ret <4 x float> %7
+
+define <2 x double> @sitofp_cast_2xi8_to_2xdouble(<2 x i8>) {
+ %2 = sitofp <2 x i8> %0 to <2 x double>
+ ret <2 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_2xi8_to_2xdouble(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = sitofp i8 %2 to double
+; CHECK-NEXT: %4 = insertelement <2 x double> undef, double %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = sitofp i8 %5 to double
+; CHECK-NEXT: %7 = insertelement <2 x double> %4, double %6, i32 1
+; CHECK-NEXT: ret <2 x double> %7
+
+define <2 x double> @sitofp_cast_2xi16_to_2xdouble(<2 x i16>) {
+ %2 = sitofp <2 x i16> %0 to <2 x double>
+ ret <2 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_2xi16_to_2xdouble(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = sitofp i16 %2 to double
+; CHECK-NEXT: %4 = insertelement <2 x double> undef, double %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = sitofp i16 %5 to double
+; CHECK-NEXT: %7 = insertelement <2 x double> %4, double %6, i32 1
+; CHECK-NEXT: ret <2 x double> %7
+
+define <2 x double> @sitofp_cast_2xi32_to_2xdouble(<2 x i32>) {
+ %2 = sitofp <2 x i32> %0 to <2 x double>
+ ret <2 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_2xi32_to_2xdouble(<4 x i32>)
+; CHECK-NEXT: %2 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %3 = sitofp i32 %2 to double
+; CHECK-NEXT: %4 = insertelement <2 x double> undef, double %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %6 = sitofp i32 %5 to double
+; CHECK-NEXT: %7 = insertelement <2 x double> %4, double %6, i32 1
+; CHECK-NEXT: ret <2 x double> %7
+
+define <2 x double> @sitofp_cast_2xi64_to_2xdouble(<2 x i64>) {
+ %2 = sitofp <2 x i64> %0 to <2 x double>
+ ret <2 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_2xi64_to_2xdouble(<2 x i64>)
+; CHECK-NEXT: %2 = sitofp <2 x i64> %0 to <2 x double>
+; CHECK-NEXT: ret <2 x double> %2
+
+define <4 x i8> @fptoui_cast_4xfloat_to_4xi8(<4 x float>) {
+ %2 = fptoui <4 x float> %0 to <4 x i8>
+ ret <4 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_4xfloat_to_4xi8(<4 x float>)
+; CHECK-NEXT: %2 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %3 = fptoui float %2 to i8
+; CHECK-NEXT: %4 = insertelement <16 x i8> undef, i8 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %6 = fptoui float %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> %4, i8 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %9 = fptoui float %8 to i8
+; CHECK-NEXT: %10 = insertelement <16 x i8> %7, i8 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %12 = fptoui float %11 to i8
+; CHECK-NEXT: %13 = insertelement <16 x i8> %10, i8 %12, i32 3
+; CHECK-NEXT: ret <16 x i8> %13
+
+define <4 x i16> @fptoui_cast_4xfloat_to_4xi16(<4 x float>) {
+ %2 = fptoui <4 x float> %0 to <4 x i16>
+ ret <4 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_4xfloat_to_4xi16(<4 x float>)
+; CHECK-NEXT: %2 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %3 = fptoui float %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %6 = fptoui float %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %9 = fptoui float %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %12 = fptoui float %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 3
+; CHECK-NEXT: ret <8 x i16> %13
+
+define <4 x i32> @fptoui_cast_4xfloat_to_4xi32(<4 x float>) {
+ %2 = fptoui <4 x float> %0 to <4 x i32>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_4xfloat_to_4xi32(<4 x float>)
+; CHECK-NEXT: %2 = fptoui <4 x float> %0 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+define <4 x i64> @fptoui_cast_4xfloat_to_4xi64(<4 x float>) {
+ %2 = fptoui <4 x float> %0 to <4 x i64>
+ ret <4 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_4xfloat_to_4xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <4 x float>)
+; CHECK-NEXT: %3 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %4 = fptoui float %3 to i64
+; CHECK-NEXT: %5 = insertelement <2 x i64> undef, i64 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %7 = fptoui float %6 to i64
+; CHECK-NEXT: %8 = insertelement <2 x i64> %5, i64 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %10 = fptoui float %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %13 = fptoui float %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: store <2 x i64> %14, <2 x i64>* %0, align 16
+; CHECK-NEXT: ret <2 x i64> %8
+
+define <4 x i8> @fptoui_cast_4xdouble_to_4xi8(<4 x double>) {
+ %2 = fptoui <4 x double> %0 to <4 x i8>
+ ret <4 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_4xdouble_to_4xi8(<2 x double>, <2 x double>)
+; CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %4 = fptoui double %3 to i8
+; CHECK-NEXT: %5 = insertelement <16 x i8> undef, i8 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %7 = fptoui double %6 to i8
+; CHECK-NEXT: %8 = insertelement <16 x i8> %5, i8 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %10 = fptoui double %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> %8, i8 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %13 = fptoui double %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 3
+; CHECK-NEXT: ret <16 x i8> %14
+
+define <4 x i16> @fptoui_cast_4xdouble_to_4xi16(<4 x double>) {
+ %2 = fptoui <4 x double> %0 to <4 x i16>
+ ret <4 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_4xdouble_to_4xi16(<2 x double>, <2 x double>)
+; CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %4 = fptoui double %3 to i16
+; CHECK-NEXT: %5 = insertelement <8 x i16> undef, i16 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %7 = fptoui double %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> %5, i16 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %10 = fptoui double %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %13 = fptoui double %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 3
+; CHECK-NEXT: ret <8 x i16> %14
+
+define <4 x i32> @fptoui_cast_4xdouble_to_4xi32(<4 x double>) {
+ %2 = fptoui <4 x double> %0 to <4 x i32>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_4xdouble_to_4xi32(<2 x double>, <2 x double>)
+; CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %4 = fptoui double %3 to i32
+; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %7 = fptoui double %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> %5, i32 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %10 = fptoui double %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %13 = fptoui double %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 3
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <4 x i64> @fptoui_cast_4xdouble_to_4xi64(<4 x double>) {
+ %2 = fptoui <4 x double> %0 to <4 x i64>
+ ret <4 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_4xdouble_to_4xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>)
+; CHECK-NEXT: %4 = fptoui <2 x double> %1 to <2 x i64>
+; CHECK-NEXT: %5 = fptoui <2 x double> %2 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %0, align 16
+; CHECK-NEXT: ret <2 x i64> %4
+
+define <4 x i8> @fptosi_cast_4xfloat_to_4xi8(<4 x float>) {
+ %2 = fptosi <4 x float> %0 to <4 x i8>
+ ret <4 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_4xfloat_to_4xi8(<4 x float>)
+; CHECK-NEXT: %2 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %3 = fptosi float %2 to i8
+; CHECK-NEXT: %4 = insertelement <16 x i8> undef, i8 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %6 = fptosi float %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> %4, i8 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %9 = fptosi float %8 to i8
+; CHECK-NEXT: %10 = insertelement <16 x i8> %7, i8 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %12 = fptosi float %11 to i8
+; CHECK-NEXT: %13 = insertelement <16 x i8> %10, i8 %12, i32 3
+; CHECK-NEXT: ret <16 x i8> %13
+
+define <4 x i16> @fptosi_cast_4xfloat_to_4xi16(<4 x float>) {
+ %2 = fptosi <4 x float> %0 to <4 x i16>
+ ret <4 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_4xfloat_to_4xi16(<4 x float>)
+; CHECK-NEXT: %2 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %3 = fptosi float %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %6 = fptosi float %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %9 = fptosi float %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %12 = fptosi float %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 3
+; CHECK-NEXT: ret <8 x i16> %13
+
+define <4 x i32> @fptosi_cast_4xfloat_to_4xi32(<4 x float>) {
+ %2 = fptosi <4 x float> %0 to <4 x i32>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_4xfloat_to_4xi32(<4 x float>)
+; CHECK-NEXT: %2 = fptosi <4 x float> %0 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+define <4 x i64> @fptosi_cast_4xfloat_to_4xi64(<4 x float>) {
+ %2 = fptosi <4 x float> %0 to <4 x i64>
+ ret <4 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_4xfloat_to_4xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <4 x float>)
+; CHECK-NEXT: %3 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %4 = fptosi float %3 to i64
+; CHECK-NEXT: %5 = insertelement <2 x i64> undef, i64 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %7 = fptosi float %6 to i64
+; CHECK-NEXT: %8 = insertelement <2 x i64> %5, i64 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %10 = fptosi float %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %13 = fptosi float %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: store <2 x i64> %14, <2 x i64>* %0, align 16
+; CHECK-NEXT: ret <2 x i64> %8
+
+define <4 x i8> @fptosi_cast_4xdouble_to_4xi8(<4 x double>) {
+ %2 = fptosi <4 x double> %0 to <4 x i8>
+ ret <4 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_4xdouble_to_4xi8(<2 x double>, <2 x double>)
+; CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %4 = fptosi double %3 to i8
+; CHECK-NEXT: %5 = insertelement <16 x i8> undef, i8 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %7 = fptosi double %6 to i8
+; CHECK-NEXT: %8 = insertelement <16 x i8> %5, i8 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %10 = fptosi double %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> %8, i8 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %13 = fptosi double %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 3
+; CHECK-NEXT: ret <16 x i8> %14
+
+define <4 x i16> @fptosi_cast_4xdouble_to_4xi16(<4 x double>) {
+ %2 = fptosi <4 x double> %0 to <4 x i16>
+ ret <4 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_4xdouble_to_4xi16(<2 x double>, <2 x double>)
+; CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %4 = fptosi double %3 to i16
+; CHECK-NEXT: %5 = insertelement <8 x i16> undef, i16 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %7 = fptosi double %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> %5, i16 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %10 = fptosi double %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %13 = fptosi double %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 3
+; CHECK-NEXT: ret <8 x i16> %14
+
+define <4 x i32> @fptosi_cast_4xdouble_to_4xi32(<4 x double>) {
+ %2 = fptosi <4 x double> %0 to <4 x i32>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_4xdouble_to_4xi32(<2 x double>, <2 x double>)
+; CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %4 = fptosi double %3 to i32
+; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %7 = fptosi double %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> %5, i32 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %10 = fptosi double %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %13 = fptosi double %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 3
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <4 x i64> @fptosi_cast_4xdouble_to_4xi64(<4 x double>) {
+ %2 = fptosi <4 x double> %0 to <4 x i64>
+ ret <4 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_4xdouble_to_4xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>)
+; CHECK-NEXT: %4 = fptosi <2 x double> %1 to <2 x i64>
+; CHECK-NEXT: %5 = fptosi <2 x double> %2 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %0, align 16
+; CHECK-NEXT: ret <2 x i64> %4
+
+define <4 x float> @uitofp_cast_4xi8_to_4xfloat(<4 x i8>) {
+ %2 = uitofp <4 x i8> %0 to <4 x float>
+ ret <4 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_4xi8_to_4xfloat(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = uitofp i8 %2 to float
+; CHECK-NEXT: %4 = insertelement <4 x float> undef, float %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = uitofp i8 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> %4, float %6, i32 1
+; CHECK-NEXT: %8 = extractelement <16 x i8> %0, i32 2
+; CHECK-NEXT: %9 = uitofp i8 %8 to float
+; CHECK-NEXT: %10 = insertelement <4 x float> %7, float %9, i32 2
+; CHECK-NEXT: %11 = extractelement <16 x i8> %0, i32 3
+; CHECK-NEXT: %12 = uitofp i8 %11 to float
+; CHECK-NEXT: %13 = insertelement <4 x float> %10, float %12, i32 3
+; CHECK-NEXT: ret <4 x float> %13
+
+define <4 x float> @uitofp_cast_4xi16_to_4xfloat(<4 x i16>) {
+ %2 = uitofp <4 x i16> %0 to <4 x float>
+ ret <4 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_4xi16_to_4xfloat(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = uitofp i16 %2 to float
+; CHECK-NEXT: %4 = insertelement <4 x float> undef, float %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = uitofp i16 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> %4, float %6, i32 1
+; CHECK-NEXT: %8 = extractelement <8 x i16> %0, i32 2
+; CHECK-NEXT: %9 = uitofp i16 %8 to float
+; CHECK-NEXT: %10 = insertelement <4 x float> %7, float %9, i32 2
+; CHECK-NEXT: %11 = extractelement <8 x i16> %0, i32 3
+; CHECK-NEXT: %12 = uitofp i16 %11 to float
+; CHECK-NEXT: %13 = insertelement <4 x float> %10, float %12, i32 3
+; CHECK-NEXT: ret <4 x float> %13
+
+define <4 x float> @uitofp_cast_4xi32_to_4xfloat(<4 x i32>) {
+ %2 = uitofp <4 x i32> %0 to <4 x float>
+ ret <4 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_4xi32_to_4xfloat(<4 x i32>)
+; CHECK-NEXT: %2 = uitofp <4 x i32> %0 to <4 x float>
+; CHECK-NEXT: ret <4 x float> %2
+
+define <4 x float> @uitofp_cast_4xi64_to_4xfloat(<4 x i64>) {
+ %2 = uitofp <4 x i64> %0 to <4 x float>
+ ret <4 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_4xi64_to_4xfloat(<2 x i64>, <2 x i64>)
+; CHECK-NEXT: %3 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %4 = uitofp i64 %3 to float
+; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %4, i32 0
+; CHECK-NEXT: %6 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %7 = uitofp i64 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> %5, float %7, i32 1
+; CHECK-NEXT: %9 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %10 = uitofp i64 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 2
+; CHECK-NEXT: %12 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %13 = uitofp i64 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 3
+; CHECK-NEXT: ret <4 x float> %14
+
+define <4 x double> @uitofp_cast_4xi8_to_4xdouble(<4 x i8>) {
+ %2 = uitofp <4 x i8> %0 to <4 x double>
+ ret <4 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_4xi8_to_4xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = uitofp i8 %3 to double
+; CHECK-NEXT: %5 = insertelement <2 x double> undef, double %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = uitofp i8 %6 to double
+; CHECK-NEXT: %8 = insertelement <2 x double> %5, double %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = uitofp i8 %9 to double
+; CHECK-NEXT: %11 = insertelement <2 x double> undef, double %10, i32 0
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = uitofp i8 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> %11, double %13, i32 1
+; CHECK-NEXT: store <2 x double> %14, <2 x double>* %0, align 16
+; CHECK-NEXT: ret <2 x double> %8
+
+define <4 x double> @uitofp_cast_4xi16_to_4xdouble(<4 x i16>) {
+ %2 = uitofp <4 x i16> %0 to <4 x double>
+ ret <4 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_4xi16_to_4xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %4 = uitofp i16 %3 to double
+; CHECK-NEXT: %5 = insertelement <2 x double> undef, double %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %7 = uitofp i16 %6 to double
+; CHECK-NEXT: %8 = insertelement <2 x double> %5, double %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %10 = uitofp i16 %9 to double
+; CHECK-NEXT: %11 = insertelement <2 x double> undef, double %10, i32 0
+; CHECK-NEXT: %12 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %13 = uitofp i16 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> %11, double %13, i32 1
+; CHECK-NEXT: store <2 x double> %14, <2 x double>* %0, align 16
+; CHECK-NEXT: ret <2 x double> %8
+
+define <4 x double> @uitofp_cast_4xi32_to_4xdouble(<4 x i32>) {
+ %2 = uitofp <4 x i32> %0 to <4 x double>
+ ret <4 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_4xi32_to_4xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <4 x i32>)
+; CHECK-NEXT: %3 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: %4 = uitofp i32 %3 to double
+; CHECK-NEXT: %5 = insertelement <2 x double> undef, double %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: %7 = uitofp i32 %6 to double
+; CHECK-NEXT: %8 = insertelement <2 x double> %5, double %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x i32> %1, i32 2
+; CHECK-NEXT: %10 = uitofp i32 %9 to double
+; CHECK-NEXT: %11 = insertelement <2 x double> undef, double %10, i32 0
+; CHECK-NEXT: %12 = extractelement <4 x i32> %1, i32 3
+; CHECK-NEXT: %13 = uitofp i32 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> %11, double %13, i32 1
+; CHECK-NEXT: store <2 x double> %14, <2 x double>* %0, align 16
+; CHECK-NEXT: ret <2 x double> %8
+
+define <4 x double> @uitofp_cast_4xi64_to_4xdouble(<4 x i64>) {
+ %2 = uitofp <4 x i64> %0 to <4 x double>
+ ret <4 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_4xi64_to_4xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %4 = uitofp <2 x i64> %1 to <2 x double>
+; CHECK-NEXT: %5 = uitofp <2 x i64> %2 to <2 x double>
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %0, align 16
+; CHECK-NEXT: ret <2 x double> %4
+
+define <4 x float> @sitofp_cast_4xi8_to_4xfloat(<4 x i8>) {
+ %2 = sitofp <4 x i8> %0 to <4 x float>
+ ret <4 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_4xi8_to_4xfloat(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = sitofp i8 %2 to float
+; CHECK-NEXT: %4 = insertelement <4 x float> undef, float %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = sitofp i8 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> %4, float %6, i32 1
+; CHECK-NEXT: %8 = extractelement <16 x i8> %0, i32 2
+; CHECK-NEXT: %9 = sitofp i8 %8 to float
+; CHECK-NEXT: %10 = insertelement <4 x float> %7, float %9, i32 2
+; CHECK-NEXT: %11 = extractelement <16 x i8> %0, i32 3
+; CHECK-NEXT: %12 = sitofp i8 %11 to float
+; CHECK-NEXT: %13 = insertelement <4 x float> %10, float %12, i32 3
+; CHECK-NEXT: ret <4 x float> %13
+
+define <4 x float> @sitofp_cast_4xi16_to_4xfloat(<4 x i16>) {
+ %2 = sitofp <4 x i16> %0 to <4 x float>
+ ret <4 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_4xi16_to_4xfloat(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = sitofp i16 %2 to float
+; CHECK-NEXT: %4 = insertelement <4 x float> undef, float %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = sitofp i16 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> %4, float %6, i32 1
+; CHECK-NEXT: %8 = extractelement <8 x i16> %0, i32 2
+; CHECK-NEXT: %9 = sitofp i16 %8 to float
+; CHECK-NEXT: %10 = insertelement <4 x float> %7, float %9, i32 2
+; CHECK-NEXT: %11 = extractelement <8 x i16> %0, i32 3
+; CHECK-NEXT: %12 = sitofp i16 %11 to float
+; CHECK-NEXT: %13 = insertelement <4 x float> %10, float %12, i32 3
+; CHECK-NEXT: ret <4 x float> %13
+
+define <4 x float> @sitofp_cast_4xi32_to_4xfloat(<4 x i32>) {
+ %2 = sitofp <4 x i32> %0 to <4 x float>
+ ret <4 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_4xi32_to_4xfloat(<4 x i32>)
+; CHECK-NEXT: %2 = sitofp <4 x i32> %0 to <4 x float>
+; CHECK-NEXT: ret <4 x float> %2
+
+define <4 x float> @sitofp_cast_4xi64_to_4xfloat(<4 x i64>) {
+ %2 = sitofp <4 x i64> %0 to <4 x float>
+ ret <4 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_4xi64_to_4xfloat(<2 x i64>, <2 x i64>)
+; CHECK-NEXT: %3 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %4 = sitofp i64 %3 to float
+; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %4, i32 0
+; CHECK-NEXT: %6 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %7 = sitofp i64 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> %5, float %7, i32 1
+; CHECK-NEXT: %9 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %10 = sitofp i64 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 2
+; CHECK-NEXT: %12 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %13 = sitofp i64 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 3
+; CHECK-NEXT: ret <4 x float> %14
+
+define <4 x double> @sitofp_cast_4xi8_to_4xdouble(<4 x i8>) {
+ %2 = sitofp <4 x i8> %0 to <4 x double>
+ ret <4 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_4xi8_to_4xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = sitofp i8 %3 to double
+; CHECK-NEXT: %5 = insertelement <2 x double> undef, double %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = sitofp i8 %6 to double
+; CHECK-NEXT: %8 = insertelement <2 x double> %5, double %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = sitofp i8 %9 to double
+; CHECK-NEXT: %11 = insertelement <2 x double> undef, double %10, i32 0
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = sitofp i8 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> %11, double %13, i32 1
+; CHECK-NEXT: store <2 x double> %14, <2 x double>* %0, align 16
+; CHECK-NEXT: ret <2 x double> %8
+
+define <4 x double> @sitofp_cast_4xi16_to_4xdouble(<4 x i16>) {
+ %2 = sitofp <4 x i16> %0 to <4 x double>
+ ret <4 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_4xi16_to_4xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %4 = sitofp i16 %3 to double
+; CHECK-NEXT: %5 = insertelement <2 x double> undef, double %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %7 = sitofp i16 %6 to double
+; CHECK-NEXT: %8 = insertelement <2 x double> %5, double %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %10 = sitofp i16 %9 to double
+; CHECK-NEXT: %11 = insertelement <2 x double> undef, double %10, i32 0
+; CHECK-NEXT: %12 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %13 = sitofp i16 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> %11, double %13, i32 1
+; CHECK-NEXT: store <2 x double> %14, <2 x double>* %0, align 16
+; CHECK-NEXT: ret <2 x double> %8
+
+define <4 x double> @sitofp_cast_4xi32_to_4xdouble(<4 x i32>) {
+ %2 = sitofp <4 x i32> %0 to <4 x double>
+ ret <4 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_4xi32_to_4xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <4 x i32>)
+; CHECK-NEXT: %3 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: %4 = sitofp i32 %3 to double
+; CHECK-NEXT: %5 = insertelement <2 x double> undef, double %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: %7 = sitofp i32 %6 to double
+; CHECK-NEXT: %8 = insertelement <2 x double> %5, double %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x i32> %1, i32 2
+; CHECK-NEXT: %10 = sitofp i32 %9 to double
+; CHECK-NEXT: %11 = insertelement <2 x double> undef, double %10, i32 0
+; CHECK-NEXT: %12 = extractelement <4 x i32> %1, i32 3
+; CHECK-NEXT: %13 = sitofp i32 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> %11, double %13, i32 1
+; CHECK-NEXT: store <2 x double> %14, <2 x double>* %0, align 16
+; CHECK-NEXT: ret <2 x double> %8
+
+define <4 x double> @sitofp_cast_4xi64_to_4xdouble(<4 x i64>) {
+ %2 = sitofp <4 x i64> %0 to <4 x double>
+ ret <4 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_4xi64_to_4xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %4 = sitofp <2 x i64> %1 to <2 x double>
+; CHECK-NEXT: %5 = sitofp <2 x i64> %2 to <2 x double>
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %0, align 16
+; CHECK-NEXT: ret <2 x double> %4
+
+define <6 x i8> @fptoui_cast_6xfloat_to_6xi8(<6 x float>) {
+ %2 = fptoui <6 x float> %0 to <6 x i8>
+ ret <6 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_6xfloat_to_6xi8(<4 x float>, <4 x float>)
+; CHECK-NEXT: %3 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %4 = fptoui float %3 to i8
+; CHECK-NEXT: %5 = insertelement <16 x i8> undef, i8 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %7 = fptoui float %6 to i8
+; CHECK-NEXT: %8 = insertelement <16 x i8> %5, i8 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %10 = fptoui float %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> %8, i8 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %13 = fptoui float %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %16 = fptoui float %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %19 = fptoui float %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 5
+; CHECK-NEXT: ret <16 x i8> %20
+
+define <6 x i16> @fptoui_cast_6xfloat_to_6xi16(<6 x float>) {
+ %2 = fptoui <6 x float> %0 to <6 x i16>
+ ret <6 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_6xfloat_to_6xi16(<4 x float>, <4 x float>)
+; CHECK-NEXT: %3 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %4 = fptoui float %3 to i16
+; CHECK-NEXT: %5 = insertelement <8 x i16> undef, i16 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %7 = fptoui float %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> %5, i16 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %10 = fptoui float %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %13 = fptoui float %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %16 = fptoui float %15 to i16
+; CHECK-NEXT: %17 = insertelement <8 x i16> %14, i16 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %19 = fptoui float %18 to i16
+; CHECK-NEXT: %20 = insertelement <8 x i16> %17, i16 %19, i32 5
+; CHECK-NEXT: ret <8 x i16> %20
+
+define <6 x i32> @fptoui_cast_6xfloat_to_6xi32(<6 x float>) {
+ %2 = fptoui <6 x float> %0 to <6 x i32>
+ ret <6 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_6xfloat_to_6xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>)
+; CHECK-NEXT: %4 = fptoui <4 x float> %1 to <4 x i32>
+; CHECK-NEXT: %5 = fptoui <4 x float> %2 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i64> @fptoui_cast_6xfloat_to_6xi64(<6 x float>) {
+ %2 = fptoui <6 x float> %0 to <6 x i64>
+ ret <6 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_6xfloat_to_6xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>)
+; CHECK-NEXT: %5 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %6 = fptoui float %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> undef, i64 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %9 = fptoui float %8 to i64
+; CHECK-NEXT: %10 = insertelement <2 x i64> %7, i64 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %12 = fptoui float %11 to i64
+; CHECK-NEXT: %13 = insertelement <2 x i64> undef, i64 %12, i32 0
+; CHECK-NEXT: %14 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %15 = fptoui float %14 to i64
+; CHECK-NEXT: %16 = insertelement <2 x i64> %13, i64 %15, i32 1
+; CHECK-NEXT: %17 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %18 = fptoui float %17 to i64
+; CHECK-NEXT: %19 = insertelement <2 x i64> undef, i64 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %21 = fptoui float %20 to i64
+; CHECK-NEXT: %22 = insertelement <2 x i64> %19, i64 %21, i32 1
+; CHECK-NEXT: store <2 x i64> %16, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %1, align 16
+; CHECK-NEXT: ret <2 x i64> %10
+
+define <6 x i8> @fptoui_cast_6xdouble_to_6xi8(<6 x double>) {
+ %2 = fptoui <6 x double> %0 to <6 x i8>
+ ret <6 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_6xdouble_to_6xi8(<2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %4 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %5 = fptoui double %4 to i8
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %8 = fptoui double %7 to i8
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %11 = fptoui double %10 to i8
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %14 = fptoui double %13 to i8
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: %16 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %17 = fptoui double %16 to i8
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 4
+; CHECK-NEXT: %19 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %20 = fptoui double %19 to i8
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 5
+; CHECK-NEXT: ret <16 x i8> %21
+
+define <6 x i16> @fptoui_cast_6xdouble_to_6xi16(<6 x double>) {
+ %2 = fptoui <6 x double> %0 to <6 x i16>
+ ret <6 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_6xdouble_to_6xi16(<2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %4 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %5 = fptoui double %4 to i16
+; CHECK-NEXT: %6 = insertelement <8 x i16> undef, i16 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %8 = fptoui double %7 to i16
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %11 = fptoui double %10 to i16
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 2
+; CHECK-NEXT: %13 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %14 = fptoui double %13 to i16
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 3
+; CHECK-NEXT: %16 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %17 = fptoui double %16 to i16
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 4
+; CHECK-NEXT: %19 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %20 = fptoui double %19 to i16
+; CHECK-NEXT: %21 = insertelement <8 x i16> %18, i16 %20, i32 5
+; CHECK-NEXT: ret <8 x i16> %21
+
+define <6 x i32> @fptoui_cast_6xdouble_to_6xi32(<6 x double>) {
+ %2 = fptoui <6 x double> %0 to <6 x i32>
+ ret <6 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_6xdouble_to_6xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %5 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %6 = fptoui double %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> undef, i32 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %9 = fptoui double %8 to i32
+; CHECK-NEXT: %10 = insertelement <4 x i32> %7, i32 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %12 = fptoui double %11 to i32
+; CHECK-NEXT: %13 = insertelement <4 x i32> %10, i32 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %15 = fptoui double %14 to i32
+; CHECK-NEXT: %16 = insertelement <4 x i32> %13, i32 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %18 = fptoui double %17 to i32
+; CHECK-NEXT: %19 = insertelement <4 x i32> undef, i32 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %21 = fptoui double %20 to i32
+; CHECK-NEXT: %22 = insertelement <4 x i32> %19, i32 %21, i32 1
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
+define <6 x i64> @fptoui_cast_6xdouble_to_6xi64(<6 x double>) {
+ %2 = fptoui <6 x double> %0 to <6 x i64>
+ ret <6 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_6xdouble_to_6xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %6 = fptoui <2 x double> %2 to <2 x i64>
+; CHECK-NEXT: %7 = fptoui <2 x double> %3 to <2 x i64>
+; CHECK-NEXT: %8 = fptoui <2 x double> %4 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %7, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %8, <2 x i64>* %1, align 16
+; CHECK-NEXT: ret <2 x i64> %6
+
+define <6 x i8> @fptosi_cast_6xfloat_to_6xi8(<6 x float>) {
+ %2 = fptosi <6 x float> %0 to <6 x i8>
+ ret <6 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_6xfloat_to_6xi8(<4 x float>, <4 x float>)
+; CHECK-NEXT: %3 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %4 = fptosi float %3 to i8
+; CHECK-NEXT: %5 = insertelement <16 x i8> undef, i8 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %7 = fptosi float %6 to i8
+; CHECK-NEXT: %8 = insertelement <16 x i8> %5, i8 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %10 = fptosi float %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> %8, i8 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %13 = fptosi float %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %16 = fptosi float %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %19 = fptosi float %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 5
+; CHECK-NEXT: ret <16 x i8> %20
+
+define <6 x i16> @fptosi_cast_6xfloat_to_6xi16(<6 x float>) {
+ %2 = fptosi <6 x float> %0 to <6 x i16>
+ ret <6 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_6xfloat_to_6xi16(<4 x float>, <4 x float>)
+; CHECK-NEXT: %3 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %4 = fptosi float %3 to i16
+; CHECK-NEXT: %5 = insertelement <8 x i16> undef, i16 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %7 = fptosi float %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> %5, i16 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %10 = fptosi float %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %13 = fptosi float %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %16 = fptosi float %15 to i16
+; CHECK-NEXT: %17 = insertelement <8 x i16> %14, i16 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %19 = fptosi float %18 to i16
+; CHECK-NEXT: %20 = insertelement <8 x i16> %17, i16 %19, i32 5
+; CHECK-NEXT: ret <8 x i16> %20
+
+define <6 x i32> @fptosi_cast_6xfloat_to_6xi32(<6 x float>) {
+ %2 = fptosi <6 x float> %0 to <6 x i32>
+ ret <6 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_6xfloat_to_6xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>)
+; CHECK-NEXT: %4 = fptosi <4 x float> %1 to <4 x i32>
+; CHECK-NEXT: %5 = fptosi <4 x float> %2 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i64> @fptosi_cast_6xfloat_to_6xi64(<6 x float>) {
+ %2 = fptosi <6 x float> %0 to <6 x i64>
+ ret <6 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_6xfloat_to_6xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>)
+; CHECK-NEXT: %5 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %6 = fptosi float %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> undef, i64 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %9 = fptosi float %8 to i64
+; CHECK-NEXT: %10 = insertelement <2 x i64> %7, i64 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %12 = fptosi float %11 to i64
+; CHECK-NEXT: %13 = insertelement <2 x i64> undef, i64 %12, i32 0
+; CHECK-NEXT: %14 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %15 = fptosi float %14 to i64
+; CHECK-NEXT: %16 = insertelement <2 x i64> %13, i64 %15, i32 1
+; CHECK-NEXT: %17 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %18 = fptosi float %17 to i64
+; CHECK-NEXT: %19 = insertelement <2 x i64> undef, i64 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %21 = fptosi float %20 to i64
+; CHECK-NEXT: %22 = insertelement <2 x i64> %19, i64 %21, i32 1
+; CHECK-NEXT: store <2 x i64> %16, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %1, align 16
+; CHECK-NEXT: ret <2 x i64> %10
+
+define <6 x i8> @fptosi_cast_6xdouble_to_6xi8(<6 x double>) {
+ %2 = fptosi <6 x double> %0 to <6 x i8>
+ ret <6 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_6xdouble_to_6xi8(<2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %4 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %5 = fptosi double %4 to i8
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %8 = fptosi double %7 to i8
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %11 = fptosi double %10 to i8
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %14 = fptosi double %13 to i8
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: %16 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %17 = fptosi double %16 to i8
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 4
+; CHECK-NEXT: %19 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %20 = fptosi double %19 to i8
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 5
+; CHECK-NEXT: ret <16 x i8> %21
+
+define <6 x i16> @fptosi_cast_6xdouble_to_6xi16(<6 x double>) {
+ %2 = fptosi <6 x double> %0 to <6 x i16>
+ ret <6 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_6xdouble_to_6xi16(<2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %4 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %5 = fptosi double %4 to i16
+; CHECK-NEXT: %6 = insertelement <8 x i16> undef, i16 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %8 = fptosi double %7 to i16
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %11 = fptosi double %10 to i16
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 2
+; CHECK-NEXT: %13 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %14 = fptosi double %13 to i16
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 3
+; CHECK-NEXT: %16 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %17 = fptosi double %16 to i16
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 4
+; CHECK-NEXT: %19 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %20 = fptosi double %19 to i16
+; CHECK-NEXT: %21 = insertelement <8 x i16> %18, i16 %20, i32 5
+; CHECK-NEXT: ret <8 x i16> %21
+
+define <6 x i32> @fptosi_cast_6xdouble_to_6xi32(<6 x double>) {
+ %2 = fptosi <6 x double> %0 to <6 x i32>
+ ret <6 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_6xdouble_to_6xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %5 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %6 = fptosi double %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> undef, i32 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %9 = fptosi double %8 to i32
+; CHECK-NEXT: %10 = insertelement <4 x i32> %7, i32 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %12 = fptosi double %11 to i32
+; CHECK-NEXT: %13 = insertelement <4 x i32> %10, i32 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %15 = fptosi double %14 to i32
+; CHECK-NEXT: %16 = insertelement <4 x i32> %13, i32 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %18 = fptosi double %17 to i32
+; CHECK-NEXT: %19 = insertelement <4 x i32> undef, i32 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %21 = fptosi double %20 to i32
+; CHECK-NEXT: %22 = insertelement <4 x i32> %19, i32 %21, i32 1
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
+define <6 x i64> @fptosi_cast_6xdouble_to_6xi64(<6 x double>) {
+ %2 = fptosi <6 x double> %0 to <6 x i64>
+ ret <6 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_6xdouble_to_6xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %6 = fptosi <2 x double> %2 to <2 x i64>
+; CHECK-NEXT: %7 = fptosi <2 x double> %3 to <2 x i64>
+; CHECK-NEXT: %8 = fptosi <2 x double> %4 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %7, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %8, <2 x i64>* %1, align 16
+; CHECK-NEXT: ret <2 x i64> %6
+
+define <6 x float> @uitofp_cast_6xi8_to_6xfloat(<6 x i8>) {
+ %2 = uitofp <6 x i8> %0 to <6 x float>
+ ret <6 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_6xi8_to_6xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = uitofp i8 %3 to float
+; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = uitofp i8 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> %5, float %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = uitofp i8 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 2
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = uitofp i8 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 3
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: %16 = uitofp i8 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> undef, float %16, i32 0
+; CHECK-NEXT: %18 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: %19 = uitofp i8 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 1
+; CHECK-NEXT: store <4 x float> %20, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %14
+
+define <6 x float> @uitofp_cast_6xi16_to_6xfloat(<6 x i16>) {
+ %2 = uitofp <6 x i16> %0 to <6 x float>
+ ret <6 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_6xi16_to_6xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %4 = uitofp i16 %3 to float
+; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %7 = uitofp i16 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> %5, float %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %10 = uitofp i16 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 2
+; CHECK-NEXT: %12 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %13 = uitofp i16 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 3
+; CHECK-NEXT: %15 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: %16 = uitofp i16 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> undef, float %16, i32 0
+; CHECK-NEXT: %18 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: %19 = uitofp i16 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 1
+; CHECK-NEXT: store <4 x float> %20, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %14
+
+define <6 x float> @uitofp_cast_6xi32_to_6xfloat(<6 x i32>) {
+ %2 = uitofp <6 x i32> %0 to <6 x float>
+ ret <6 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_6xi32_to_6xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = uitofp <4 x i32> %1 to <4 x float>
+; CHECK-NEXT: %5 = uitofp <4 x i32> %2 to <4 x float>
+; CHECK-NEXT: store <4 x float> %5, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %4
+
+define <6 x float> @uitofp_cast_6xi64_to_6xfloat(<6 x i64>) {
+ %2 = uitofp <6 x i64> %0 to <6 x float>
+ ret <6 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_6xi64_to_6xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %5 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %6 = uitofp i64 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> undef, float %6, i32 0
+; CHECK-NEXT: %8 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %9 = uitofp i64 %8 to float
+; CHECK-NEXT: %10 = insertelement <4 x float> %7, float %9, i32 1
+; CHECK-NEXT: %11 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %12 = uitofp i64 %11 to float
+; CHECK-NEXT: %13 = insertelement <4 x float> %10, float %12, i32 2
+; CHECK-NEXT: %14 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %15 = uitofp i64 %14 to float
+; CHECK-NEXT: %16 = insertelement <4 x float> %13, float %15, i32 3
+; CHECK-NEXT: %17 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %18 = uitofp i64 %17 to float
+; CHECK-NEXT: %19 = insertelement <4 x float> undef, float %18, i32 0
+; CHECK-NEXT: %20 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %21 = uitofp i64 %20 to float
+; CHECK-NEXT: %22 = insertelement <4 x float> %19, float %21, i32 1
+; CHECK-NEXT: store <4 x float> %22, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %16
+
+define <6 x double> @uitofp_cast_6xi8_to_6xdouble(<6 x i8>) {
+ %2 = uitofp <6 x i8> %0 to <6 x double>
+ ret <6 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_6xi8_to_6xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %4 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: %5 = uitofp i8 %4 to double
+; CHECK-NEXT: %6 = insertelement <2 x double> undef, double %5, i32 0
+; CHECK-NEXT: %7 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: %8 = uitofp i8 %7 to double
+; CHECK-NEXT: %9 = insertelement <2 x double> %6, double %8, i32 1
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: %11 = uitofp i8 %10 to double
+; CHECK-NEXT: %12 = insertelement <2 x double> undef, double %11, i32 0
+; CHECK-NEXT: %13 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: %14 = uitofp i8 %13 to double
+; CHECK-NEXT: %15 = insertelement <2 x double> %12, double %14, i32 1
+; CHECK-NEXT: %16 = extractelement <16 x i8> %2, i32 4
+; CHECK-NEXT: %17 = uitofp i8 %16 to double
+; CHECK-NEXT: %18 = insertelement <2 x double> undef, double %17, i32 0
+; CHECK-NEXT: %19 = extractelement <16 x i8> %2, i32 5
+; CHECK-NEXT: %20 = uitofp i8 %19 to double
+; CHECK-NEXT: %21 = insertelement <2 x double> %18, double %20, i32 1
+; CHECK-NEXT: store <2 x double> %15, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %21, <2 x double>* %1, align 16
+; CHECK-NEXT: ret <2 x double> %9
+
+define <6 x double> @uitofp_cast_6xi16_to_6xdouble(<6 x i16>) {
+ %2 = uitofp <6 x i16> %0 to <6 x double>
+ ret <6 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_6xi16_to_6xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %4 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: %5 = uitofp i16 %4 to double
+; CHECK-NEXT: %6 = insertelement <2 x double> undef, double %5, i32 0
+; CHECK-NEXT: %7 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: %8 = uitofp i16 %7 to double
+; CHECK-NEXT: %9 = insertelement <2 x double> %6, double %8, i32 1
+; CHECK-NEXT: %10 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: %11 = uitofp i16 %10 to double
+; CHECK-NEXT: %12 = insertelement <2 x double> undef, double %11, i32 0
+; CHECK-NEXT: %13 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: %14 = uitofp i16 %13 to double
+; CHECK-NEXT: %15 = insertelement <2 x double> %12, double %14, i32 1
+; CHECK-NEXT: %16 = extractelement <8 x i16> %2, i32 4
+; CHECK-NEXT: %17 = uitofp i16 %16 to double
+; CHECK-NEXT: %18 = insertelement <2 x double> undef, double %17, i32 0
+; CHECK-NEXT: %19 = extractelement <8 x i16> %2, i32 5
+; CHECK-NEXT: %20 = uitofp i16 %19 to double
+; CHECK-NEXT: %21 = insertelement <2 x double> %18, double %20, i32 1
+; CHECK-NEXT: store <2 x double> %15, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %21, <2 x double>* %1, align 16
+; CHECK-NEXT: ret <2 x double> %9
+
+define <6 x double> @uitofp_cast_6xi32_to_6xdouble(<6 x i32>) {
+ %2 = uitofp <6 x i32> %0 to <6 x double>
+ ret <6 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_6xi32_to_6xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: %6 = uitofp i32 %5 to double
+; CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+; CHECK-NEXT: %8 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: %9 = uitofp i32 %8 to double
+; CHECK-NEXT: %10 = insertelement <2 x double> %7, double %9, i32 1
+; CHECK-NEXT: %11 = extractelement <4 x i32> %2, i32 2
+; CHECK-NEXT: %12 = uitofp i32 %11 to double
+; CHECK-NEXT: %13 = insertelement <2 x double> undef, double %12, i32 0
+; CHECK-NEXT: %14 = extractelement <4 x i32> %2, i32 3
+; CHECK-NEXT: %15 = uitofp i32 %14 to double
+; CHECK-NEXT: %16 = insertelement <2 x double> %13, double %15, i32 1
+; CHECK-NEXT: %17 = extractelement <4 x i32> %3, i32 0
+; CHECK-NEXT: %18 = uitofp i32 %17 to double
+; CHECK-NEXT: %19 = insertelement <2 x double> undef, double %18, i32 0
+; CHECK-NEXT: %20 = extractelement <4 x i32> %3, i32 1
+; CHECK-NEXT: %21 = uitofp i32 %20 to double
+; CHECK-NEXT: %22 = insertelement <2 x double> %19, double %21, i32 1
+; CHECK-NEXT: store <2 x double> %16, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %22, <2 x double>* %1, align 16
+; CHECK-NEXT: ret <2 x double> %10
+
+define <6 x double> @uitofp_cast_6xi64_to_6xdouble(<6 x i64>) {
+ %2 = uitofp <6 x i64> %0 to <6 x double>
+ ret <6 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_6xi64_to_6xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %6 = uitofp <2 x i64> %2 to <2 x double>
+; CHECK-NEXT: %7 = uitofp <2 x i64> %3 to <2 x double>
+; CHECK-NEXT: %8 = uitofp <2 x i64> %4 to <2 x double>
+; CHECK-NEXT: store <2 x double> %7, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %8, <2 x double>* %1, align 16
+; CHECK-NEXT: ret <2 x double> %6
+
+define <6 x float> @sitofp_cast_6xi8_to_6xfloat(<6 x i8>) {
+ %2 = sitofp <6 x i8> %0 to <6 x float>
+ ret <6 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_6xi8_to_6xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = sitofp i8 %3 to float
+; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = sitofp i8 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> %5, float %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = sitofp i8 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 2
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = sitofp i8 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 3
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: %16 = sitofp i8 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> undef, float %16, i32 0
+; CHECK-NEXT: %18 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: %19 = sitofp i8 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 1
+; CHECK-NEXT: store <4 x float> %20, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %14
+
+define <6 x float> @sitofp_cast_6xi16_to_6xfloat(<6 x i16>) {
+ %2 = sitofp <6 x i16> %0 to <6 x float>
+ ret <6 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_6xi16_to_6xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %4 = sitofp i16 %3 to float
+; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %7 = sitofp i16 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> %5, float %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %10 = sitofp i16 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 2
+; CHECK-NEXT: %12 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %13 = sitofp i16 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 3
+; CHECK-NEXT: %15 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: %16 = sitofp i16 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> undef, float %16, i32 0
+; CHECK-NEXT: %18 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: %19 = sitofp i16 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 1
+; CHECK-NEXT: store <4 x float> %20, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %14
+
+define <6 x float> @sitofp_cast_6xi32_to_6xfloat(<6 x i32>) {
+ %2 = sitofp <6 x i32> %0 to <6 x float>
+ ret <6 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_6xi32_to_6xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = sitofp <4 x i32> %1 to <4 x float>
+; CHECK-NEXT: %5 = sitofp <4 x i32> %2 to <4 x float>
+; CHECK-NEXT: store <4 x float> %5, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %4
+
+define <6 x float> @sitofp_cast_6xi64_to_6xfloat(<6 x i64>) {
+ %2 = sitofp <6 x i64> %0 to <6 x float>
+ ret <6 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_6xi64_to_6xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %5 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %6 = sitofp i64 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> undef, float %6, i32 0
+; CHECK-NEXT: %8 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %9 = sitofp i64 %8 to float
+; CHECK-NEXT: %10 = insertelement <4 x float> %7, float %9, i32 1
+; CHECK-NEXT: %11 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %12 = sitofp i64 %11 to float
+; CHECK-NEXT: %13 = insertelement <4 x float> %10, float %12, i32 2
+; CHECK-NEXT: %14 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %15 = sitofp i64 %14 to float
+; CHECK-NEXT: %16 = insertelement <4 x float> %13, float %15, i32 3
+; CHECK-NEXT: %17 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %18 = sitofp i64 %17 to float
+; CHECK-NEXT: %19 = insertelement <4 x float> undef, float %18, i32 0
+; CHECK-NEXT: %20 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %21 = sitofp i64 %20 to float
+; CHECK-NEXT: %22 = insertelement <4 x float> %19, float %21, i32 1
+; CHECK-NEXT: store <4 x float> %22, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %16
+
+define <6 x double> @sitofp_cast_6xi8_to_6xdouble(<6 x i8>) {
+ %2 = sitofp <6 x i8> %0 to <6 x double>
+ ret <6 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_6xi8_to_6xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %4 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: %5 = sitofp i8 %4 to double
+; CHECK-NEXT: %6 = insertelement <2 x double> undef, double %5, i32 0
+; CHECK-NEXT: %7 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: %8 = sitofp i8 %7 to double
+; CHECK-NEXT: %9 = insertelement <2 x double> %6, double %8, i32 1
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: %11 = sitofp i8 %10 to double
+; CHECK-NEXT: %12 = insertelement <2 x double> undef, double %11, i32 0
+; CHECK-NEXT: %13 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: %14 = sitofp i8 %13 to double
+; CHECK-NEXT: %15 = insertelement <2 x double> %12, double %14, i32 1
+; CHECK-NEXT: %16 = extractelement <16 x i8> %2, i32 4
+; CHECK-NEXT: %17 = sitofp i8 %16 to double
+; CHECK-NEXT: %18 = insertelement <2 x double> undef, double %17, i32 0
+; CHECK-NEXT: %19 = extractelement <16 x i8> %2, i32 5
+; CHECK-NEXT: %20 = sitofp i8 %19 to double
+; CHECK-NEXT: %21 = insertelement <2 x double> %18, double %20, i32 1
+; CHECK-NEXT: store <2 x double> %15, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %21, <2 x double>* %1, align 16
+; CHECK-NEXT: ret <2 x double> %9
+
+define <6 x double> @sitofp_cast_6xi16_to_6xdouble(<6 x i16>) {
+ %2 = sitofp <6 x i16> %0 to <6 x double>
+ ret <6 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_6xi16_to_6xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %4 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: %5 = sitofp i16 %4 to double
+; CHECK-NEXT: %6 = insertelement <2 x double> undef, double %5, i32 0
+; CHECK-NEXT: %7 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: %8 = sitofp i16 %7 to double
+; CHECK-NEXT: %9 = insertelement <2 x double> %6, double %8, i32 1
+; CHECK-NEXT: %10 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: %11 = sitofp i16 %10 to double
+; CHECK-NEXT: %12 = insertelement <2 x double> undef, double %11, i32 0
+; CHECK-NEXT: %13 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: %14 = sitofp i16 %13 to double
+; CHECK-NEXT: %15 = insertelement <2 x double> %12, double %14, i32 1
+; CHECK-NEXT: %16 = extractelement <8 x i16> %2, i32 4
+; CHECK-NEXT: %17 = sitofp i16 %16 to double
+; CHECK-NEXT: %18 = insertelement <2 x double> undef, double %17, i32 0
+; CHECK-NEXT: %19 = extractelement <8 x i16> %2, i32 5
+; CHECK-NEXT: %20 = sitofp i16 %19 to double
+; CHECK-NEXT: %21 = insertelement <2 x double> %18, double %20, i32 1
+; CHECK-NEXT: store <2 x double> %15, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %21, <2 x double>* %1, align 16
+; CHECK-NEXT: ret <2 x double> %9
+
+define <6 x double> @sitofp_cast_6xi32_to_6xdouble(<6 x i32>) {
+ %2 = sitofp <6 x i32> %0 to <6 x double>
+ ret <6 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_6xi32_to_6xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: %6 = sitofp i32 %5 to double
+; CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+; CHECK-NEXT: %8 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: %9 = sitofp i32 %8 to double
+; CHECK-NEXT: %10 = insertelement <2 x double> %7, double %9, i32 1
+; CHECK-NEXT: %11 = extractelement <4 x i32> %2, i32 2
+; CHECK-NEXT: %12 = sitofp i32 %11 to double
+; CHECK-NEXT: %13 = insertelement <2 x double> undef, double %12, i32 0
+; CHECK-NEXT: %14 = extractelement <4 x i32> %2, i32 3
+; CHECK-NEXT: %15 = sitofp i32 %14 to double
+; CHECK-NEXT: %16 = insertelement <2 x double> %13, double %15, i32 1
+; CHECK-NEXT: %17 = extractelement <4 x i32> %3, i32 0
+; CHECK-NEXT: %18 = sitofp i32 %17 to double
+; CHECK-NEXT: %19 = insertelement <2 x double> undef, double %18, i32 0
+; CHECK-NEXT: %20 = extractelement <4 x i32> %3, i32 1
+; CHECK-NEXT: %21 = sitofp i32 %20 to double
+; CHECK-NEXT: %22 = insertelement <2 x double> %19, double %21, i32 1
+; CHECK-NEXT: store <2 x double> %16, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %22, <2 x double>* %1, align 16
+; CHECK-NEXT: ret <2 x double> %10
+
+define <6 x double> @sitofp_cast_6xi64_to_6xdouble(<6 x i64>) {
+ %2 = sitofp <6 x i64> %0 to <6 x double>
+ ret <6 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_6xi64_to_6xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %6 = sitofp <2 x i64> %2 to <2 x double>
+; CHECK-NEXT: %7 = sitofp <2 x i64> %3 to <2 x double>
+; CHECK-NEXT: %8 = sitofp <2 x i64> %4 to <2 x double>
+; CHECK-NEXT: store <2 x double> %7, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %8, <2 x double>* %1, align 16
+; CHECK-NEXT: ret <2 x double> %6
+
+define <8 x i8> @fptoui_cast_8xfloat_to_8xi8(<8 x float>) {
+ %2 = fptoui <8 x float> %0 to <8 x i8>
+ ret <8 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_8xfloat_to_8xi8(<4 x float>, <4 x float>)
+; CHECK-NEXT: %3 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %4 = fptoui float %3 to i8
+; CHECK-NEXT: %5 = insertelement <16 x i8> undef, i8 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %7 = fptoui float %6 to i8
+; CHECK-NEXT: %8 = insertelement <16 x i8> %5, i8 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %10 = fptoui float %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> %8, i8 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %13 = fptoui float %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %16 = fptoui float %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %19 = fptoui float %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 5
+; CHECK-NEXT: %21 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %22 = fptoui float %21 to i8
+; CHECK-NEXT: %23 = insertelement <16 x i8> %20, i8 %22, i32 6
+; CHECK-NEXT: %24 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %25 = fptoui float %24 to i8
+; CHECK-NEXT: %26 = insertelement <16 x i8> %23, i8 %25, i32 7
+; CHECK-NEXT: ret <16 x i8> %26
+
+define <8 x i16> @fptoui_cast_8xfloat_to_8xi16(<8 x float>) {
+ %2 = fptoui <8 x float> %0 to <8 x i16>
+ ret <8 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_8xfloat_to_8xi16(<4 x float>, <4 x float>)
+; CHECK-NEXT: %3 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %4 = fptoui float %3 to i16
+; CHECK-NEXT: %5 = insertelement <8 x i16> undef, i16 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %7 = fptoui float %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> %5, i16 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %10 = fptoui float %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %13 = fptoui float %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %16 = fptoui float %15 to i16
+; CHECK-NEXT: %17 = insertelement <8 x i16> %14, i16 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %19 = fptoui float %18 to i16
+; CHECK-NEXT: %20 = insertelement <8 x i16> %17, i16 %19, i32 5
+; CHECK-NEXT: %21 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %22 = fptoui float %21 to i16
+; CHECK-NEXT: %23 = insertelement <8 x i16> %20, i16 %22, i32 6
+; CHECK-NEXT: %24 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %25 = fptoui float %24 to i16
+; CHECK-NEXT: %26 = insertelement <8 x i16> %23, i16 %25, i32 7
+; CHECK-NEXT: ret <8 x i16> %26
+
+define <8 x i32> @fptoui_cast_8xfloat_to_8xi32(<8 x float>) {
+ %2 = fptoui <8 x float> %0 to <8 x i32>
+ ret <8 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_8xfloat_to_8xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>)
+; CHECK-NEXT: %4 = fptoui <4 x float> %1 to <4 x i32>
+; CHECK-NEXT: %5 = fptoui <4 x float> %2 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i64> @fptoui_cast_8xfloat_to_8xi64(<8 x float>) {
+ %2 = fptoui <8 x float> %0 to <8 x i64>
+ ret <8 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_8xfloat_to_8xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>)
+; CHECK-NEXT: %6 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %7 = fptoui float %6 to i64
+; CHECK-NEXT: %8 = insertelement <2 x i64> undef, i64 %7, i32 0
+; CHECK-NEXT: %9 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %10 = fptoui float %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> %8, i64 %10, i32 1
+; CHECK-NEXT: %12 = extractelement <4 x float> %3, i32 2
+; CHECK-NEXT: %13 = fptoui float %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> undef, i64 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <4 x float> %3, i32 3
+; CHECK-NEXT: %16 = fptoui float %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> %14, i64 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <4 x float> %4, i32 0
+; CHECK-NEXT: %19 = fptoui float %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> undef, i64 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <4 x float> %4, i32 1
+; CHECK-NEXT: %22 = fptoui float %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> %20, i64 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <4 x float> %4, i32 2
+; CHECK-NEXT: %25 = fptoui float %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> undef, i64 %25, i32 0
+; CHECK-NEXT: %27 = extractelement <4 x float> %4, i32 3
+; CHECK-NEXT: %28 = fptoui float %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> %26, i64 %28, i32 1
+; CHECK-NEXT: store <2 x i64> %17, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %2, align 16
+; CHECK-NEXT: ret <2 x i64> %11
+
+define <8 x i8> @fptoui_cast_8xdouble_to_8xi8(<8 x double>) {
+ %2 = fptoui <8 x double> %0 to <8 x i8>
+ ret <8 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_8xdouble_to_8xi8(<2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %5 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %6 = fptoui double %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> undef, i8 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %9 = fptoui double %8 to i8
+; CHECK-NEXT: %10 = insertelement <16 x i8> %7, i8 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %12 = fptoui double %11 to i8
+; CHECK-NEXT: %13 = insertelement <16 x i8> %10, i8 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %15 = fptoui double %14 to i8
+; CHECK-NEXT: %16 = insertelement <16 x i8> %13, i8 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %18 = fptoui double %17 to i8
+; CHECK-NEXT: %19 = insertelement <16 x i8> %16, i8 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %21 = fptoui double %20 to i8
+; CHECK-NEXT: %22 = insertelement <16 x i8> %19, i8 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %24 = fptoui double %23 to i8
+; CHECK-NEXT: %25 = insertelement <16 x i8> %22, i8 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %27 = fptoui double %26 to i8
+; CHECK-NEXT: %28 = insertelement <16 x i8> %25, i8 %27, i32 7
+; CHECK-NEXT: ret <16 x i8> %28
+
+define <8 x i16> @fptoui_cast_8xdouble_to_8xi16(<8 x double>) {
+ %2 = fptoui <8 x double> %0 to <8 x i16>
+ ret <8 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_8xdouble_to_8xi16(<2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %5 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %6 = fptoui double %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> undef, i16 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %9 = fptoui double %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %12 = fptoui double %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %15 = fptoui double %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %18 = fptoui double %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %21 = fptoui double %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %24 = fptoui double %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %27 = fptoui double %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 7
+; CHECK-NEXT: ret <8 x i16> %28
+
+define <8 x i32> @fptoui_cast_8xdouble_to_8xi32(<8 x double>) {
+ %2 = fptoui <8 x double> %0 to <8 x i32>
+ ret <8 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_8xdouble_to_8xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %6 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %7 = fptoui double %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> undef, i32 %7, i32 0
+; CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %10 = fptoui double %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 1
+; CHECK-NEXT: %12 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %13 = fptoui double %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 2
+; CHECK-NEXT: %15 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %16 = fptoui double %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> %14, i32 %16, i32 3
+; CHECK-NEXT: %18 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %19 = fptoui double %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> undef, i32 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %22 = fptoui double %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %25 = fptoui double %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 2
+; CHECK-NEXT: %27 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %28 = fptoui double %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> %26, i32 %28, i32 3
+; CHECK-NEXT: store <4 x i32> %29, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %17
+
+define <8 x i64> @fptoui_cast_8xdouble_to_8xi64(<8 x double>) {
+ %2 = fptoui <8 x double> %0 to <8 x i64>
+ ret <8 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_8xdouble_to_8xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %8 = fptoui <2 x double> %3 to <2 x i64>
+; CHECK-NEXT: %9 = fptoui <2 x double> %4 to <2 x i64>
+; CHECK-NEXT: %10 = fptoui <2 x double> %5 to <2 x i64>
+; CHECK-NEXT: %11 = fptoui <2 x double> %6 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %9, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %10, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %11, <2 x i64>* %2, align 16
+; CHECK-NEXT: ret <2 x i64> %8
+
+define <8 x i8> @fptosi_cast_8xfloat_to_8xi8(<8 x float>) {
+ %2 = fptosi <8 x float> %0 to <8 x i8>
+ ret <8 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_8xfloat_to_8xi8(<4 x float>, <4 x float>)
+; CHECK-NEXT: %3 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %4 = fptosi float %3 to i8
+; CHECK-NEXT: %5 = insertelement <16 x i8> undef, i8 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %7 = fptosi float %6 to i8
+; CHECK-NEXT: %8 = insertelement <16 x i8> %5, i8 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %10 = fptosi float %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> %8, i8 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %13 = fptosi float %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %16 = fptosi float %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %19 = fptosi float %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 5
+; CHECK-NEXT: %21 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %22 = fptosi float %21 to i8
+; CHECK-NEXT: %23 = insertelement <16 x i8> %20, i8 %22, i32 6
+; CHECK-NEXT: %24 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %25 = fptosi float %24 to i8
+; CHECK-NEXT: %26 = insertelement <16 x i8> %23, i8 %25, i32 7
+; CHECK-NEXT: ret <16 x i8> %26
+
+define <8 x i16> @fptosi_cast_8xfloat_to_8xi16(<8 x float>) {
+ %2 = fptosi <8 x float> %0 to <8 x i16>
+ ret <8 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_8xfloat_to_8xi16(<4 x float>, <4 x float>)
+; CHECK-NEXT: %3 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %4 = fptosi float %3 to i16
+; CHECK-NEXT: %5 = insertelement <8 x i16> undef, i16 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %7 = fptosi float %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> %5, i16 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %10 = fptosi float %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %13 = fptosi float %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %16 = fptosi float %15 to i16
+; CHECK-NEXT: %17 = insertelement <8 x i16> %14, i16 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %19 = fptosi float %18 to i16
+; CHECK-NEXT: %20 = insertelement <8 x i16> %17, i16 %19, i32 5
+; CHECK-NEXT: %21 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %22 = fptosi float %21 to i16
+; CHECK-NEXT: %23 = insertelement <8 x i16> %20, i16 %22, i32 6
+; CHECK-NEXT: %24 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %25 = fptosi float %24 to i16
+; CHECK-NEXT: %26 = insertelement <8 x i16> %23, i16 %25, i32 7
+; CHECK-NEXT: ret <8 x i16> %26
+
+define <8 x i32> @fptosi_cast_8xfloat_to_8xi32(<8 x float>) {
+ %2 = fptosi <8 x float> %0 to <8 x i32>
+ ret <8 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_8xfloat_to_8xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>)
+; CHECK-NEXT: %4 = fptosi <4 x float> %1 to <4 x i32>
+; CHECK-NEXT: %5 = fptosi <4 x float> %2 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i64> @fptosi_cast_8xfloat_to_8xi64(<8 x float>) {
+ %2 = fptosi <8 x float> %0 to <8 x i64>
+ ret <8 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_8xfloat_to_8xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>)
+; CHECK-NEXT: %6 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %7 = fptosi float %6 to i64
+; CHECK-NEXT: %8 = insertelement <2 x i64> undef, i64 %7, i32 0
+; CHECK-NEXT: %9 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %10 = fptosi float %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> %8, i64 %10, i32 1
+; CHECK-NEXT: %12 = extractelement <4 x float> %3, i32 2
+; CHECK-NEXT: %13 = fptosi float %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> undef, i64 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <4 x float> %3, i32 3
+; CHECK-NEXT: %16 = fptosi float %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> %14, i64 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <4 x float> %4, i32 0
+; CHECK-NEXT: %19 = fptosi float %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> undef, i64 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <4 x float> %4, i32 1
+; CHECK-NEXT: %22 = fptosi float %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> %20, i64 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <4 x float> %4, i32 2
+; CHECK-NEXT: %25 = fptosi float %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> undef, i64 %25, i32 0
+; CHECK-NEXT: %27 = extractelement <4 x float> %4, i32 3
+; CHECK-NEXT: %28 = fptosi float %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> %26, i64 %28, i32 1
+; CHECK-NEXT: store <2 x i64> %17, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %2, align 16
+; CHECK-NEXT: ret <2 x i64> %11
+
+define <8 x i8> @fptosi_cast_8xdouble_to_8xi8(<8 x double>) {
+ %2 = fptosi <8 x double> %0 to <8 x i8>
+ ret <8 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_8xdouble_to_8xi8(<2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %5 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %6 = fptosi double %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> undef, i8 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %9 = fptosi double %8 to i8
+; CHECK-NEXT: %10 = insertelement <16 x i8> %7, i8 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %12 = fptosi double %11 to i8
+; CHECK-NEXT: %13 = insertelement <16 x i8> %10, i8 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %15 = fptosi double %14 to i8
+; CHECK-NEXT: %16 = insertelement <16 x i8> %13, i8 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %18 = fptosi double %17 to i8
+; CHECK-NEXT: %19 = insertelement <16 x i8> %16, i8 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %21 = fptosi double %20 to i8
+; CHECK-NEXT: %22 = insertelement <16 x i8> %19, i8 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %24 = fptosi double %23 to i8
+; CHECK-NEXT: %25 = insertelement <16 x i8> %22, i8 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %27 = fptosi double %26 to i8
+; CHECK-NEXT: %28 = insertelement <16 x i8> %25, i8 %27, i32 7
+; CHECK-NEXT: ret <16 x i8> %28
+
+define <8 x i16> @fptosi_cast_8xdouble_to_8xi16(<8 x double>) {
+ %2 = fptosi <8 x double> %0 to <8 x i16>
+ ret <8 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_8xdouble_to_8xi16(<2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %5 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %6 = fptosi double %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> undef, i16 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %9 = fptosi double %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %12 = fptosi double %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %15 = fptosi double %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %18 = fptosi double %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %21 = fptosi double %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %24 = fptosi double %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %27 = fptosi double %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 7
+; CHECK-NEXT: ret <8 x i16> %28
+
+define <8 x i32> @fptosi_cast_8xdouble_to_8xi32(<8 x double>) {
+ %2 = fptosi <8 x double> %0 to <8 x i32>
+ ret <8 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_8xdouble_to_8xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %6 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %7 = fptosi double %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> undef, i32 %7, i32 0
+; CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %10 = fptosi double %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 1
+; CHECK-NEXT: %12 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %13 = fptosi double %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 2
+; CHECK-NEXT: %15 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %16 = fptosi double %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> %14, i32 %16, i32 3
+; CHECK-NEXT: %18 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %19 = fptosi double %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> undef, i32 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %22 = fptosi double %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %25 = fptosi double %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 2
+; CHECK-NEXT: %27 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %28 = fptosi double %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> %26, i32 %28, i32 3
+; CHECK-NEXT: store <4 x i32> %29, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %17
+
+define <8 x i64> @fptosi_cast_8xdouble_to_8xi64(<8 x double>) {
+ %2 = fptosi <8 x double> %0 to <8 x i64>
+ ret <8 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_8xdouble_to_8xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %8 = fptosi <2 x double> %3 to <2 x i64>
+; CHECK-NEXT: %9 = fptosi <2 x double> %4 to <2 x i64>
+; CHECK-NEXT: %10 = fptosi <2 x double> %5 to <2 x i64>
+; CHECK-NEXT: %11 = fptosi <2 x double> %6 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %9, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %10, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %11, <2 x i64>* %2, align 16
+; CHECK-NEXT: ret <2 x i64> %8
+
+define <8 x float> @uitofp_cast_8xi8_to_8xfloat(<8 x i8>) {
+ %2 = uitofp <8 x i8> %0 to <8 x float>
+ ret <8 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_8xi8_to_8xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = uitofp i8 %3 to float
+; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = uitofp i8 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> %5, float %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = uitofp i8 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 2
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = uitofp i8 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 3
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: %16 = uitofp i8 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> undef, float %16, i32 0
+; CHECK-NEXT: %18 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: %19 = uitofp i8 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 1
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: %22 = uitofp i8 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> %20, float %22, i32 2
+; CHECK-NEXT: %24 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: %25 = uitofp i8 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> %23, float %25, i32 3
+; CHECK-NEXT: store <4 x float> %26, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %14
+
+define <8 x float> @uitofp_cast_8xi16_to_8xfloat(<8 x i16>) {
+ %2 = uitofp <8 x i16> %0 to <8 x float>
+ ret <8 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_8xi16_to_8xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %4 = uitofp i16 %3 to float
+; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %7 = uitofp i16 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> %5, float %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %10 = uitofp i16 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 2
+; CHECK-NEXT: %12 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %13 = uitofp i16 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 3
+; CHECK-NEXT: %15 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: %16 = uitofp i16 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> undef, float %16, i32 0
+; CHECK-NEXT: %18 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: %19 = uitofp i16 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 1
+; CHECK-NEXT: %21 = extractelement <8 x i16> %1, i32 6
+; CHECK-NEXT: %22 = uitofp i16 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> %20, float %22, i32 2
+; CHECK-NEXT: %24 = extractelement <8 x i16> %1, i32 7
+; CHECK-NEXT: %25 = uitofp i16 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> %23, float %25, i32 3
+; CHECK-NEXT: store <4 x float> %26, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %14
+
+define <8 x float> @uitofp_cast_8xi32_to_8xfloat(<8 x i32>) {
+ %2 = uitofp <8 x i32> %0 to <8 x float>
+ ret <8 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_8xi32_to_8xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = uitofp <4 x i32> %1 to <4 x float>
+; CHECK-NEXT: %5 = uitofp <4 x i32> %2 to <4 x float>
+; CHECK-NEXT: store <4 x float> %5, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %4
+
+define <8 x float> @uitofp_cast_8xi64_to_8xfloat(<8 x i64>) {
+ %2 = uitofp <8 x i64> %0 to <8 x float>
+ ret <8 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_8xi64_to_8xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %6 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %7 = uitofp i64 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> undef, float %7, i32 0
+; CHECK-NEXT: %9 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %10 = uitofp i64 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 1
+; CHECK-NEXT: %12 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %13 = uitofp i64 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 2
+; CHECK-NEXT: %15 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %16 = uitofp i64 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> %14, float %16, i32 3
+; CHECK-NEXT: %18 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %19 = uitofp i64 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> undef, float %19, i32 0
+; CHECK-NEXT: %21 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %22 = uitofp i64 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> %20, float %22, i32 1
+; CHECK-NEXT: %24 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %25 = uitofp i64 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> %23, float %25, i32 2
+; CHECK-NEXT: %27 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %28 = uitofp i64 %27 to float
+; CHECK-NEXT: %29 = insertelement <4 x float> %26, float %28, i32 3
+; CHECK-NEXT: store <4 x float> %29, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %17
+
+define <8 x double> @uitofp_cast_8xi8_to_8xdouble(<8 x i8>) {
+ %2 = uitofp <8 x i8> %0 to <8 x double>
+ ret <8 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_8xi8_to_8xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %5 = extractelement <16 x i8> %3, i32 0
+; CHECK-NEXT: %6 = uitofp i8 %5 to double
+; CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+; CHECK-NEXT: %8 = extractelement <16 x i8> %3, i32 1
+; CHECK-NEXT: %9 = uitofp i8 %8 to double
+; CHECK-NEXT: %10 = insertelement <2 x double> %7, double %9, i32 1
+; CHECK-NEXT: %11 = extractelement <16 x i8> %3, i32 2
+; CHECK-NEXT: %12 = uitofp i8 %11 to double
+; CHECK-NEXT: %13 = insertelement <2 x double> undef, double %12, i32 0
+; CHECK-NEXT: %14 = extractelement <16 x i8> %3, i32 3
+; CHECK-NEXT: %15 = uitofp i8 %14 to double
+; CHECK-NEXT: %16 = insertelement <2 x double> %13, double %15, i32 1
+; CHECK-NEXT: %17 = extractelement <16 x i8> %3, i32 4
+; CHECK-NEXT: %18 = uitofp i8 %17 to double
+; CHECK-NEXT: %19 = insertelement <2 x double> undef, double %18, i32 0
+; CHECK-NEXT: %20 = extractelement <16 x i8> %3, i32 5
+; CHECK-NEXT: %21 = uitofp i8 %20 to double
+; CHECK-NEXT: %22 = insertelement <2 x double> %19, double %21, i32 1
+; CHECK-NEXT: %23 = extractelement <16 x i8> %3, i32 6
+; CHECK-NEXT: %24 = uitofp i8 %23 to double
+; CHECK-NEXT: %25 = insertelement <2 x double> undef, double %24, i32 0
+; CHECK-NEXT: %26 = extractelement <16 x i8> %3, i32 7
+; CHECK-NEXT: %27 = uitofp i8 %26 to double
+; CHECK-NEXT: %28 = insertelement <2 x double> %25, double %27, i32 1
+; CHECK-NEXT: store <2 x double> %16, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %22, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %28, <2 x double>* %2, align 16
+; CHECK-NEXT: ret <2 x double> %10
+
+define <8 x double> @uitofp_cast_8xi16_to_8xdouble(<8 x i16>) {
+ %2 = uitofp <8 x i16> %0 to <8 x double>
+ ret <8 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_8xi16_to_8xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %5 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: %6 = uitofp i16 %5 to double
+; CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+; CHECK-NEXT: %8 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: %9 = uitofp i16 %8 to double
+; CHECK-NEXT: %10 = insertelement <2 x double> %7, double %9, i32 1
+; CHECK-NEXT: %11 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: %12 = uitofp i16 %11 to double
+; CHECK-NEXT: %13 = insertelement <2 x double> undef, double %12, i32 0
+; CHECK-NEXT: %14 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: %15 = uitofp i16 %14 to double
+; CHECK-NEXT: %16 = insertelement <2 x double> %13, double %15, i32 1
+; CHECK-NEXT: %17 = extractelement <8 x i16> %3, i32 4
+; CHECK-NEXT: %18 = uitofp i16 %17 to double
+; CHECK-NEXT: %19 = insertelement <2 x double> undef, double %18, i32 0
+; CHECK-NEXT: %20 = extractelement <8 x i16> %3, i32 5
+; CHECK-NEXT: %21 = uitofp i16 %20 to double
+; CHECK-NEXT: %22 = insertelement <2 x double> %19, double %21, i32 1
+; CHECK-NEXT: %23 = extractelement <8 x i16> %3, i32 6
+; CHECK-NEXT: %24 = uitofp i16 %23 to double
+; CHECK-NEXT: %25 = insertelement <2 x double> undef, double %24, i32 0
+; CHECK-NEXT: %26 = extractelement <8 x i16> %3, i32 7
+; CHECK-NEXT: %27 = uitofp i16 %26 to double
+; CHECK-NEXT: %28 = insertelement <2 x double> %25, double %27, i32 1
+; CHECK-NEXT: store <2 x double> %16, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %22, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %28, <2 x double>* %2, align 16
+; CHECK-NEXT: ret <2 x double> %10
+
+define <8 x double> @uitofp_cast_8xi32_to_8xdouble(<8 x i32>) {
+ %2 = uitofp <8 x i32> %0 to <8 x double>
+ ret <8 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_8xi32_to_8xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = extractelement <4 x i32> %3, i32 0
+; CHECK-NEXT: %7 = uitofp i32 %6 to double
+; CHECK-NEXT: %8 = insertelement <2 x double> undef, double %7, i32 0
+; CHECK-NEXT: %9 = extractelement <4 x i32> %3, i32 1
+; CHECK-NEXT: %10 = uitofp i32 %9 to double
+; CHECK-NEXT: %11 = insertelement <2 x double> %8, double %10, i32 1
+; CHECK-NEXT: %12 = extractelement <4 x i32> %3, i32 2
+; CHECK-NEXT: %13 = uitofp i32 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> undef, double %13, i32 0
+; CHECK-NEXT: %15 = extractelement <4 x i32> %3, i32 3
+; CHECK-NEXT: %16 = uitofp i32 %15 to double
+; CHECK-NEXT: %17 = insertelement <2 x double> %14, double %16, i32 1
+; CHECK-NEXT: %18 = extractelement <4 x i32> %4, i32 0
+; CHECK-NEXT: %19 = uitofp i32 %18 to double
+; CHECK-NEXT: %20 = insertelement <2 x double> undef, double %19, i32 0
+; CHECK-NEXT: %21 = extractelement <4 x i32> %4, i32 1
+; CHECK-NEXT: %22 = uitofp i32 %21 to double
+; CHECK-NEXT: %23 = insertelement <2 x double> %20, double %22, i32 1
+; CHECK-NEXT: %24 = extractelement <4 x i32> %4, i32 2
+; CHECK-NEXT: %25 = uitofp i32 %24 to double
+; CHECK-NEXT: %26 = insertelement <2 x double> undef, double %25, i32 0
+; CHECK-NEXT: %27 = extractelement <4 x i32> %4, i32 3
+; CHECK-NEXT: %28 = uitofp i32 %27 to double
+; CHECK-NEXT: %29 = insertelement <2 x double> %26, double %28, i32 1
+; CHECK-NEXT: store <2 x double> %17, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %23, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %29, <2 x double>* %2, align 16
+; CHECK-NEXT: ret <2 x double> %11
+
+define <8 x double> @uitofp_cast_8xi64_to_8xdouble(<8 x i64>) {
+ %2 = uitofp <8 x i64> %0 to <8 x double>
+ ret <8 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_8xi64_to_8xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %8 = uitofp <2 x i64> %3 to <2 x double>
+; CHECK-NEXT: %9 = uitofp <2 x i64> %4 to <2 x double>
+; CHECK-NEXT: %10 = uitofp <2 x i64> %5 to <2 x double>
+; CHECK-NEXT: %11 = uitofp <2 x i64> %6 to <2 x double>
+; CHECK-NEXT: store <2 x double> %9, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %10, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %11, <2 x double>* %2, align 16
+; CHECK-NEXT: ret <2 x double> %8
+
+define <8 x float> @sitofp_cast_8xi8_to_8xfloat(<8 x i8>) {
+ %2 = sitofp <8 x i8> %0 to <8 x float>
+ ret <8 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_8xi8_to_8xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = sitofp i8 %3 to float
+; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = sitofp i8 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> %5, float %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = sitofp i8 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 2
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = sitofp i8 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 3
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: %16 = sitofp i8 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> undef, float %16, i32 0
+; CHECK-NEXT: %18 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: %19 = sitofp i8 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 1
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: %22 = sitofp i8 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> %20, float %22, i32 2
+; CHECK-NEXT: %24 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: %25 = sitofp i8 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> %23, float %25, i32 3
+; CHECK-NEXT: store <4 x float> %26, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %14
+
+define <8 x float> @sitofp_cast_8xi16_to_8xfloat(<8 x i16>) {
+ %2 = sitofp <8 x i16> %0 to <8 x float>
+ ret <8 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_8xi16_to_8xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %4 = sitofp i16 %3 to float
+; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %7 = sitofp i16 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> %5, float %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %10 = sitofp i16 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 2
+; CHECK-NEXT: %12 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %13 = sitofp i16 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 3
+; CHECK-NEXT: %15 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: %16 = sitofp i16 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> undef, float %16, i32 0
+; CHECK-NEXT: %18 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: %19 = sitofp i16 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 1
+; CHECK-NEXT: %21 = extractelement <8 x i16> %1, i32 6
+; CHECK-NEXT: %22 = sitofp i16 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> %20, float %22, i32 2
+; CHECK-NEXT: %24 = extractelement <8 x i16> %1, i32 7
+; CHECK-NEXT: %25 = sitofp i16 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> %23, float %25, i32 3
+; CHECK-NEXT: store <4 x float> %26, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %14
+
+define <8 x float> @sitofp_cast_8xi32_to_8xfloat(<8 x i32>) {
+ %2 = sitofp <8 x i32> %0 to <8 x float>
+ ret <8 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_8xi32_to_8xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = sitofp <4 x i32> %1 to <4 x float>
+; CHECK-NEXT: %5 = sitofp <4 x i32> %2 to <4 x float>
+; CHECK-NEXT: store <4 x float> %5, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %4
+
+define <8 x float> @sitofp_cast_8xi64_to_8xfloat(<8 x i64>) {
+ %2 = sitofp <8 x i64> %0 to <8 x float>
+ ret <8 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_8xi64_to_8xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %6 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %7 = sitofp i64 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> undef, float %7, i32 0
+; CHECK-NEXT: %9 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %10 = sitofp i64 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 1
+; CHECK-NEXT: %12 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %13 = sitofp i64 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 2
+; CHECK-NEXT: %15 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %16 = sitofp i64 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> %14, float %16, i32 3
+; CHECK-NEXT: %18 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %19 = sitofp i64 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> undef, float %19, i32 0
+; CHECK-NEXT: %21 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %22 = sitofp i64 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> %20, float %22, i32 1
+; CHECK-NEXT: %24 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %25 = sitofp i64 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> %23, float %25, i32 2
+; CHECK-NEXT: %27 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %28 = sitofp i64 %27 to float
+; CHECK-NEXT: %29 = insertelement <4 x float> %26, float %28, i32 3
+; CHECK-NEXT: store <4 x float> %29, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %17
+
+define <8 x double> @sitofp_cast_8xi8_to_8xdouble(<8 x i8>) {
+ %2 = sitofp <8 x i8> %0 to <8 x double>
+ ret <8 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_8xi8_to_8xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %5 = extractelement <16 x i8> %3, i32 0
+; CHECK-NEXT: %6 = sitofp i8 %5 to double
+; CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+; CHECK-NEXT: %8 = extractelement <16 x i8> %3, i32 1
+; CHECK-NEXT: %9 = sitofp i8 %8 to double
+; CHECK-NEXT: %10 = insertelement <2 x double> %7, double %9, i32 1
+; CHECK-NEXT: %11 = extractelement <16 x i8> %3, i32 2
+; CHECK-NEXT: %12 = sitofp i8 %11 to double
+; CHECK-NEXT: %13 = insertelement <2 x double> undef, double %12, i32 0
+; CHECK-NEXT: %14 = extractelement <16 x i8> %3, i32 3
+; CHECK-NEXT: %15 = sitofp i8 %14 to double
+; CHECK-NEXT: %16 = insertelement <2 x double> %13, double %15, i32 1
+; CHECK-NEXT: %17 = extractelement <16 x i8> %3, i32 4
+; CHECK-NEXT: %18 = sitofp i8 %17 to double
+; CHECK-NEXT: %19 = insertelement <2 x double> undef, double %18, i32 0
+; CHECK-NEXT: %20 = extractelement <16 x i8> %3, i32 5
+; CHECK-NEXT: %21 = sitofp i8 %20 to double
+; CHECK-NEXT: %22 = insertelement <2 x double> %19, double %21, i32 1
+; CHECK-NEXT: %23 = extractelement <16 x i8> %3, i32 6
+; CHECK-NEXT: %24 = sitofp i8 %23 to double
+; CHECK-NEXT: %25 = insertelement <2 x double> undef, double %24, i32 0
+; CHECK-NEXT: %26 = extractelement <16 x i8> %3, i32 7
+; CHECK-NEXT: %27 = sitofp i8 %26 to double
+; CHECK-NEXT: %28 = insertelement <2 x double> %25, double %27, i32 1
+; CHECK-NEXT: store <2 x double> %16, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %22, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %28, <2 x double>* %2, align 16
+; CHECK-NEXT: ret <2 x double> %10
+
+define <8 x double> @sitofp_cast_8xi16_to_8xdouble(<8 x i16>) {
+ %2 = sitofp <8 x i16> %0 to <8 x double>
+ ret <8 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_8xi16_to_8xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %5 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: %6 = sitofp i16 %5 to double
+; CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+; CHECK-NEXT: %8 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: %9 = sitofp i16 %8 to double
+; CHECK-NEXT: %10 = insertelement <2 x double> %7, double %9, i32 1
+; CHECK-NEXT: %11 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: %12 = sitofp i16 %11 to double
+; CHECK-NEXT: %13 = insertelement <2 x double> undef, double %12, i32 0
+; CHECK-NEXT: %14 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: %15 = sitofp i16 %14 to double
+; CHECK-NEXT: %16 = insertelement <2 x double> %13, double %15, i32 1
+; CHECK-NEXT: %17 = extractelement <8 x i16> %3, i32 4
+; CHECK-NEXT: %18 = sitofp i16 %17 to double
+; CHECK-NEXT: %19 = insertelement <2 x double> undef, double %18, i32 0
+; CHECK-NEXT: %20 = extractelement <8 x i16> %3, i32 5
+; CHECK-NEXT: %21 = sitofp i16 %20 to double
+; CHECK-NEXT: %22 = insertelement <2 x double> %19, double %21, i32 1
+; CHECK-NEXT: %23 = extractelement <8 x i16> %3, i32 6
+; CHECK-NEXT: %24 = sitofp i16 %23 to double
+; CHECK-NEXT: %25 = insertelement <2 x double> undef, double %24, i32 0
+; CHECK-NEXT: %26 = extractelement <8 x i16> %3, i32 7
+; CHECK-NEXT: %27 = sitofp i16 %26 to double
+; CHECK-NEXT: %28 = insertelement <2 x double> %25, double %27, i32 1
+; CHECK-NEXT: store <2 x double> %16, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %22, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %28, <2 x double>* %2, align 16
+; CHECK-NEXT: ret <2 x double> %10
+
+define <8 x double> @sitofp_cast_8xi32_to_8xdouble(<8 x i32>) {
+ %2 = sitofp <8 x i32> %0 to <8 x double>
+ ret <8 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_8xi32_to_8xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = extractelement <4 x i32> %3, i32 0
+; CHECK-NEXT: %7 = sitofp i32 %6 to double
+; CHECK-NEXT: %8 = insertelement <2 x double> undef, double %7, i32 0
+; CHECK-NEXT: %9 = extractelement <4 x i32> %3, i32 1
+; CHECK-NEXT: %10 = sitofp i32 %9 to double
+; CHECK-NEXT: %11 = insertelement <2 x double> %8, double %10, i32 1
+; CHECK-NEXT: %12 = extractelement <4 x i32> %3, i32 2
+; CHECK-NEXT: %13 = sitofp i32 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> undef, double %13, i32 0
+; CHECK-NEXT: %15 = extractelement <4 x i32> %3, i32 3
+; CHECK-NEXT: %16 = sitofp i32 %15 to double
+; CHECK-NEXT: %17 = insertelement <2 x double> %14, double %16, i32 1
+; CHECK-NEXT: %18 = extractelement <4 x i32> %4, i32 0
+; CHECK-NEXT: %19 = sitofp i32 %18 to double
+; CHECK-NEXT: %20 = insertelement <2 x double> undef, double %19, i32 0
+; CHECK-NEXT: %21 = extractelement <4 x i32> %4, i32 1
+; CHECK-NEXT: %22 = sitofp i32 %21 to double
+; CHECK-NEXT: %23 = insertelement <2 x double> %20, double %22, i32 1
+; CHECK-NEXT: %24 = extractelement <4 x i32> %4, i32 2
+; CHECK-NEXT: %25 = sitofp i32 %24 to double
+; CHECK-NEXT: %26 = insertelement <2 x double> undef, double %25, i32 0
+; CHECK-NEXT: %27 = extractelement <4 x i32> %4, i32 3
+; CHECK-NEXT: %28 = sitofp i32 %27 to double
+; CHECK-NEXT: %29 = insertelement <2 x double> %26, double %28, i32 1
+; CHECK-NEXT: store <2 x double> %17, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %23, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %29, <2 x double>* %2, align 16
+; CHECK-NEXT: ret <2 x double> %11
+
+define <8 x double> @sitofp_cast_8xi64_to_8xdouble(<8 x i64>) {
+ %2 = sitofp <8 x i64> %0 to <8 x double>
+ ret <8 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_8xi64_to_8xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %8 = sitofp <2 x i64> %3 to <2 x double>
+; CHECK-NEXT: %9 = sitofp <2 x i64> %4 to <2 x double>
+; CHECK-NEXT: %10 = sitofp <2 x i64> %5 to <2 x double>
+; CHECK-NEXT: %11 = sitofp <2 x i64> %6 to <2 x double>
+; CHECK-NEXT: store <2 x double> %9, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %10, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %11, <2 x double>* %2, align 16
+; CHECK-NEXT: ret <2 x double> %8
+
+define <12 x i8> @fptoui_cast_12xfloat_to_12xi8(<12 x float>) {
+ %2 = fptoui <12 x float> %0 to <12 x i8>
+ ret <12 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_12xfloat_to_12xi8(<4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %4 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %5 = fptoui float %4 to i8
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %8 = fptoui float %7 to i8
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %11 = fptoui float %10 to i8
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %14 = fptoui float %13 to i8
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: %16 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %17 = fptoui float %16 to i8
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 4
+; CHECK-NEXT: %19 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %20 = fptoui float %19 to i8
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 5
+; CHECK-NEXT: %22 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %23 = fptoui float %22 to i8
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 6
+; CHECK-NEXT: %25 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %26 = fptoui float %25 to i8
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 7
+; CHECK-NEXT: %28 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %29 = fptoui float %28 to i8
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 8
+; CHECK-NEXT: %31 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %32 = fptoui float %31 to i8
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 9
+; CHECK-NEXT: %34 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %35 = fptoui float %34 to i8
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 10
+; CHECK-NEXT: %37 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %38 = fptoui float %37 to i8
+; CHECK-NEXT: %39 = insertelement <16 x i8> %36, i8 %38, i32 11
+; CHECK-NEXT: ret <16 x i8> %39
+
+define <12 x i16> @fptoui_cast_12xfloat_to_12xi16(<12 x float>) {
+ %2 = fptoui <12 x float> %0 to <12 x i16>
+ ret <12 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_12xfloat_to_12xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %5 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %6 = fptoui float %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> undef, i16 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %9 = fptoui float %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %12 = fptoui float %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %15 = fptoui float %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %18 = fptoui float %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %21 = fptoui float %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %24 = fptoui float %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %27 = fptoui float %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 7
+; CHECK-NEXT: %29 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %30 = fptoui float %29 to i16
+; CHECK-NEXT: %31 = insertelement <8 x i16> undef, i16 %30, i32 0
+; CHECK-NEXT: %32 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %33 = fptoui float %32 to i16
+; CHECK-NEXT: %34 = insertelement <8 x i16> %31, i16 %33, i32 1
+; CHECK-NEXT: %35 = extractelement <4 x float> %3, i32 2
+; CHECK-NEXT: %36 = fptoui float %35 to i16
+; CHECK-NEXT: %37 = insertelement <8 x i16> %34, i16 %36, i32 2
+; CHECK-NEXT: %38 = extractelement <4 x float> %3, i32 3
+; CHECK-NEXT: %39 = fptoui float %38 to i16
+; CHECK-NEXT: %40 = insertelement <8 x i16> %37, i16 %39, i32 3
+; CHECK-NEXT: store <8 x i16> %40, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %28
+
+define <12 x i32> @fptoui_cast_12xfloat_to_12xi32(<12 x float>) {
+ %2 = fptoui <12 x float> %0 to <12 x i32>
+ ret <12 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_12xfloat_to_12xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %6 = fptoui <4 x float> %2 to <4 x i32>
+; CHECK-NEXT: %7 = fptoui <4 x float> %3 to <4 x i32>
+; CHECK-NEXT: %8 = fptoui <4 x float> %4 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <12 x i64> @fptoui_cast_12xfloat_to_12xi64(<12 x float>) {
+ %2 = fptoui <12 x float> %0 to <12 x i64>
+ ret <12 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_12xfloat_to_12xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %9 = extractelement <4 x float> %5, i32 0
+; CHECK-NEXT: %10 = fptoui float %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <4 x float> %5, i32 1
+; CHECK-NEXT: %13 = fptoui float %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: %15 = extractelement <4 x float> %5, i32 2
+; CHECK-NEXT: %16 = fptoui float %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> undef, i64 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <4 x float> %5, i32 3
+; CHECK-NEXT: %19 = fptoui float %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> %17, i64 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <4 x float> %6, i32 0
+; CHECK-NEXT: %22 = fptoui float %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> undef, i64 %22, i32 0
+; CHECK-NEXT: %24 = extractelement <4 x float> %6, i32 1
+; CHECK-NEXT: %25 = fptoui float %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> %23, i64 %25, i32 1
+; CHECK-NEXT: %27 = extractelement <4 x float> %6, i32 2
+; CHECK-NEXT: %28 = fptoui float %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> undef, i64 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <4 x float> %6, i32 3
+; CHECK-NEXT: %31 = fptoui float %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> %29, i64 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <4 x float> %7, i32 0
+; CHECK-NEXT: %34 = fptoui float %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> undef, i64 %34, i32 0
+; CHECK-NEXT: %36 = extractelement <4 x float> %7, i32 1
+; CHECK-NEXT: %37 = fptoui float %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> %35, i64 %37, i32 1
+; CHECK-NEXT: %39 = extractelement <4 x float> %7, i32 2
+; CHECK-NEXT: %40 = fptoui float %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> undef, i64 %40, i32 0
+; CHECK-NEXT: %42 = extractelement <4 x float> %7, i32 3
+; CHECK-NEXT: %43 = fptoui float %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> %41, i64 %43, i32 1
+; CHECK-NEXT: store <2 x i64> %20, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %32, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %38, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %44, <2 x i64>* %4, align 16
+; CHECK-NEXT: ret <2 x i64> %14
+
+define <12 x i8> @fptoui_cast_12xdouble_to_12xi8(<12 x double>) {
+ %2 = fptoui <12 x double> %0 to <12 x i8>
+ ret <12 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_12xdouble_to_12xi8(<2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %8 = fptoui double %7 to i8
+; CHECK-NEXT: %9 = insertelement <16 x i8> undef, i8 %8, i32 0
+; CHECK-NEXT: %10 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %11 = fptoui double %10 to i8
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 1
+; CHECK-NEXT: %13 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %14 = fptoui double %13 to i8
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 2
+; CHECK-NEXT: %16 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %17 = fptoui double %16 to i8
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 3
+; CHECK-NEXT: %19 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %20 = fptoui double %19 to i8
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 4
+; CHECK-NEXT: %22 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %23 = fptoui double %22 to i8
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 5
+; CHECK-NEXT: %25 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %26 = fptoui double %25 to i8
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 6
+; CHECK-NEXT: %28 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %29 = fptoui double %28 to i8
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 7
+; CHECK-NEXT: %31 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %32 = fptoui double %31 to i8
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 8
+; CHECK-NEXT: %34 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %35 = fptoui double %34 to i8
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 9
+; CHECK-NEXT: %37 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %38 = fptoui double %37 to i8
+; CHECK-NEXT: %39 = insertelement <16 x i8> %36, i8 %38, i32 10
+; CHECK-NEXT: %40 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %41 = fptoui double %40 to i8
+; CHECK-NEXT: %42 = insertelement <16 x i8> %39, i8 %41, i32 11
+; CHECK-NEXT: ret <16 x i8> %42
+
+define <12 x i16> @fptoui_cast_12xdouble_to_12xi16(<12 x double>) {
+ %2 = fptoui <12 x double> %0 to <12 x i16>
+ ret <12 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_12xdouble_to_12xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %8 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %9 = fptoui double %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> undef, i16 %9, i32 0
+; CHECK-NEXT: %11 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %12 = fptoui double %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 1
+; CHECK-NEXT: %14 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %15 = fptoui double %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 2
+; CHECK-NEXT: %17 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %18 = fptoui double %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 3
+; CHECK-NEXT: %20 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %21 = fptoui double %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 4
+; CHECK-NEXT: %23 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %24 = fptoui double %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 5
+; CHECK-NEXT: %26 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %27 = fptoui double %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 6
+; CHECK-NEXT: %29 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %30 = fptoui double %29 to i16
+; CHECK-NEXT: %31 = insertelement <8 x i16> %28, i16 %30, i32 7
+; CHECK-NEXT: %32 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %33 = fptoui double %32 to i16
+; CHECK-NEXT: %34 = insertelement <8 x i16> undef, i16 %33, i32 0
+; CHECK-NEXT: %35 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %36 = fptoui double %35 to i16
+; CHECK-NEXT: %37 = insertelement <8 x i16> %34, i16 %36, i32 1
+; CHECK-NEXT: %38 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %39 = fptoui double %38 to i16
+; CHECK-NEXT: %40 = insertelement <8 x i16> %37, i16 %39, i32 2
+; CHECK-NEXT: %41 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %42 = fptoui double %41 to i16
+; CHECK-NEXT: %43 = insertelement <8 x i16> %40, i16 %42, i32 3
+; CHECK-NEXT: store <8 x i16> %43, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %31
+
+define <12 x i32> @fptoui_cast_12xdouble_to_12xi32(<12 x double>) {
+ %2 = fptoui <12 x double> %0 to <12 x i32>
+ ret <12 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_12xdouble_to_12xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %9 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %10 = fptoui double %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> undef, i32 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %13 = fptoui double %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 1
+; CHECK-NEXT: %15 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %16 = fptoui double %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> %14, i32 %16, i32 2
+; CHECK-NEXT: %18 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %19 = fptoui double %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 3
+; CHECK-NEXT: %21 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %22 = fptoui double %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> undef, i32 %22, i32 0
+; CHECK-NEXT: %24 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %25 = fptoui double %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 1
+; CHECK-NEXT: %27 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %28 = fptoui double %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> %26, i32 %28, i32 2
+; CHECK-NEXT: %30 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %31 = fptoui double %30 to i32
+; CHECK-NEXT: %32 = insertelement <4 x i32> %29, i32 %31, i32 3
+; CHECK-NEXT: %33 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %34 = fptoui double %33 to i32
+; CHECK-NEXT: %35 = insertelement <4 x i32> undef, i32 %34, i32 0
+; CHECK-NEXT: %36 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %37 = fptoui double %36 to i32
+; CHECK-NEXT: %38 = insertelement <4 x i32> %35, i32 %37, i32 1
+; CHECK-NEXT: %39 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %40 = fptoui double %39 to i32
+; CHECK-NEXT: %41 = insertelement <4 x i32> %38, i32 %40, i32 2
+; CHECK-NEXT: %42 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %43 = fptoui double %42 to i32
+; CHECK-NEXT: %44 = insertelement <4 x i32> %41, i32 %43, i32 3
+; CHECK-NEXT: store <4 x i32> %32, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %44, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %20
+
+define <12 x i64> @fptoui_cast_12xdouble_to_12xi64(<12 x double>) {
+ %2 = fptoui <12 x double> %0 to <12 x i64>
+ ret <12 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_12xdouble_to_12xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %12 = fptoui <2 x double> %5 to <2 x i64>
+; CHECK-NEXT: %13 = fptoui <2 x double> %6 to <2 x i64>
+; CHECK-NEXT: %14 = fptoui <2 x double> %7 to <2 x i64>
+; CHECK-NEXT: %15 = fptoui <2 x double> %8 to <2 x i64>
+; CHECK-NEXT: %16 = fptoui <2 x double> %9 to <2 x i64>
+; CHECK-NEXT: %17 = fptoui <2 x double> %10 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %13, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %14, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %15, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %16, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %17, <2 x i64>* %4, align 16
+; CHECK-NEXT: ret <2 x i64> %12
+
+define <12 x i8> @fptosi_cast_12xfloat_to_12xi8(<12 x float>) {
+ %2 = fptosi <12 x float> %0 to <12 x i8>
+ ret <12 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_12xfloat_to_12xi8(<4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %4 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %5 = fptosi float %4 to i8
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %8 = fptosi float %7 to i8
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %11 = fptosi float %10 to i8
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %14 = fptosi float %13 to i8
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: %16 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %17 = fptosi float %16 to i8
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 4
+; CHECK-NEXT: %19 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %20 = fptosi float %19 to i8
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 5
+; CHECK-NEXT: %22 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %23 = fptosi float %22 to i8
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 6
+; CHECK-NEXT: %25 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %26 = fptosi float %25 to i8
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 7
+; CHECK-NEXT: %28 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %29 = fptosi float %28 to i8
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 8
+; CHECK-NEXT: %31 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %32 = fptosi float %31 to i8
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 9
+; CHECK-NEXT: %34 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %35 = fptosi float %34 to i8
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 10
+; CHECK-NEXT: %37 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %38 = fptosi float %37 to i8
+; CHECK-NEXT: %39 = insertelement <16 x i8> %36, i8 %38, i32 11
+; CHECK-NEXT: ret <16 x i8> %39
+
+define <12 x i16> @fptosi_cast_12xfloat_to_12xi16(<12 x float>) {
+ %2 = fptosi <12 x float> %0 to <12 x i16>
+ ret <12 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_12xfloat_to_12xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %5 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %6 = fptosi float %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> undef, i16 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %9 = fptosi float %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %12 = fptosi float %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %15 = fptosi float %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %18 = fptosi float %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %21 = fptosi float %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %24 = fptosi float %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %27 = fptosi float %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 7
+; CHECK-NEXT: %29 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %30 = fptosi float %29 to i16
+; CHECK-NEXT: %31 = insertelement <8 x i16> undef, i16 %30, i32 0
+; CHECK-NEXT: %32 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %33 = fptosi float %32 to i16
+; CHECK-NEXT: %34 = insertelement <8 x i16> %31, i16 %33, i32 1
+; CHECK-NEXT: %35 = extractelement <4 x float> %3, i32 2
+; CHECK-NEXT: %36 = fptosi float %35 to i16
+; CHECK-NEXT: %37 = insertelement <8 x i16> %34, i16 %36, i32 2
+; CHECK-NEXT: %38 = extractelement <4 x float> %3, i32 3
+; CHECK-NEXT: %39 = fptosi float %38 to i16
+; CHECK-NEXT: %40 = insertelement <8 x i16> %37, i16 %39, i32 3
+; CHECK-NEXT: store <8 x i16> %40, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %28
+
+define <12 x i32> @fptosi_cast_12xfloat_to_12xi32(<12 x float>) {
+ %2 = fptosi <12 x float> %0 to <12 x i32>
+ ret <12 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_12xfloat_to_12xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %6 = fptosi <4 x float> %2 to <4 x i32>
+; CHECK-NEXT: %7 = fptosi <4 x float> %3 to <4 x i32>
+; CHECK-NEXT: %8 = fptosi <4 x float> %4 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <12 x i64> @fptosi_cast_12xfloat_to_12xi64(<12 x float>) {
+ %2 = fptosi <12 x float> %0 to <12 x i64>
+ ret <12 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_12xfloat_to_12xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %9 = extractelement <4 x float> %5, i32 0
+; CHECK-NEXT: %10 = fptosi float %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <4 x float> %5, i32 1
+; CHECK-NEXT: %13 = fptosi float %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: %15 = extractelement <4 x float> %5, i32 2
+; CHECK-NEXT: %16 = fptosi float %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> undef, i64 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <4 x float> %5, i32 3
+; CHECK-NEXT: %19 = fptosi float %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> %17, i64 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <4 x float> %6, i32 0
+; CHECK-NEXT: %22 = fptosi float %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> undef, i64 %22, i32 0
+; CHECK-NEXT: %24 = extractelement <4 x float> %6, i32 1
+; CHECK-NEXT: %25 = fptosi float %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> %23, i64 %25, i32 1
+; CHECK-NEXT: %27 = extractelement <4 x float> %6, i32 2
+; CHECK-NEXT: %28 = fptosi float %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> undef, i64 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <4 x float> %6, i32 3
+; CHECK-NEXT: %31 = fptosi float %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> %29, i64 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <4 x float> %7, i32 0
+; CHECK-NEXT: %34 = fptosi float %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> undef, i64 %34, i32 0
+; CHECK-NEXT: %36 = extractelement <4 x float> %7, i32 1
+; CHECK-NEXT: %37 = fptosi float %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> %35, i64 %37, i32 1
+; CHECK-NEXT: %39 = extractelement <4 x float> %7, i32 2
+; CHECK-NEXT: %40 = fptosi float %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> undef, i64 %40, i32 0
+; CHECK-NEXT: %42 = extractelement <4 x float> %7, i32 3
+; CHECK-NEXT: %43 = fptosi float %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> %41, i64 %43, i32 1
+; CHECK-NEXT: store <2 x i64> %20, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %32, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %38, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %44, <2 x i64>* %4, align 16
+; CHECK-NEXT: ret <2 x i64> %14
+
+define <12 x i8> @fptosi_cast_12xdouble_to_12xi8(<12 x double>) {
+ %2 = fptosi <12 x double> %0 to <12 x i8>
+ ret <12 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_12xdouble_to_12xi8(<2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %8 = fptosi double %7 to i8
+; CHECK-NEXT: %9 = insertelement <16 x i8> undef, i8 %8, i32 0
+; CHECK-NEXT: %10 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %11 = fptosi double %10 to i8
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 1
+; CHECK-NEXT: %13 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %14 = fptosi double %13 to i8
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 2
+; CHECK-NEXT: %16 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %17 = fptosi double %16 to i8
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 3
+; CHECK-NEXT: %19 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %20 = fptosi double %19 to i8
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 4
+; CHECK-NEXT: %22 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %23 = fptosi double %22 to i8
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 5
+; CHECK-NEXT: %25 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %26 = fptosi double %25 to i8
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 6
+; CHECK-NEXT: %28 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %29 = fptosi double %28 to i8
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 7
+; CHECK-NEXT: %31 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %32 = fptosi double %31 to i8
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 8
+; CHECK-NEXT: %34 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %35 = fptosi double %34 to i8
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 9
+; CHECK-NEXT: %37 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %38 = fptosi double %37 to i8
+; CHECK-NEXT: %39 = insertelement <16 x i8> %36, i8 %38, i32 10
+; CHECK-NEXT: %40 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %41 = fptosi double %40 to i8
+; CHECK-NEXT: %42 = insertelement <16 x i8> %39, i8 %41, i32 11
+; CHECK-NEXT: ret <16 x i8> %42
+
+define <12 x i16> @fptosi_cast_12xdouble_to_12xi16(<12 x double>) {
+ %2 = fptosi <12 x double> %0 to <12 x i16>
+ ret <12 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_12xdouble_to_12xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %8 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %9 = fptosi double %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> undef, i16 %9, i32 0
+; CHECK-NEXT: %11 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %12 = fptosi double %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 1
+; CHECK-NEXT: %14 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %15 = fptosi double %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 2
+; CHECK-NEXT: %17 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %18 = fptosi double %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 3
+; CHECK-NEXT: %20 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %21 = fptosi double %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 4
+; CHECK-NEXT: %23 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %24 = fptosi double %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 5
+; CHECK-NEXT: %26 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %27 = fptosi double %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 6
+; CHECK-NEXT: %29 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %30 = fptosi double %29 to i16
+; CHECK-NEXT: %31 = insertelement <8 x i16> %28, i16 %30, i32 7
+; CHECK-NEXT: %32 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %33 = fptosi double %32 to i16
+; CHECK-NEXT: %34 = insertelement <8 x i16> undef, i16 %33, i32 0
+; CHECK-NEXT: %35 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %36 = fptosi double %35 to i16
+; CHECK-NEXT: %37 = insertelement <8 x i16> %34, i16 %36, i32 1
+; CHECK-NEXT: %38 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %39 = fptosi double %38 to i16
+; CHECK-NEXT: %40 = insertelement <8 x i16> %37, i16 %39, i32 2
+; CHECK-NEXT: %41 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %42 = fptosi double %41 to i16
+; CHECK-NEXT: %43 = insertelement <8 x i16> %40, i16 %42, i32 3
+; CHECK-NEXT: store <8 x i16> %43, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %31
+
+define <12 x i32> @fptosi_cast_12xdouble_to_12xi32(<12 x double>) {
+ %2 = fptosi <12 x double> %0 to <12 x i32>
+ ret <12 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_12xdouble_to_12xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %9 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %10 = fptosi double %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> undef, i32 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %13 = fptosi double %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 1
+; CHECK-NEXT: %15 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %16 = fptosi double %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> %14, i32 %16, i32 2
+; CHECK-NEXT: %18 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %19 = fptosi double %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 3
+; CHECK-NEXT: %21 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %22 = fptosi double %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> undef, i32 %22, i32 0
+; CHECK-NEXT: %24 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %25 = fptosi double %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 1
+; CHECK-NEXT: %27 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %28 = fptosi double %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> %26, i32 %28, i32 2
+; CHECK-NEXT: %30 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %31 = fptosi double %30 to i32
+; CHECK-NEXT: %32 = insertelement <4 x i32> %29, i32 %31, i32 3
+; CHECK-NEXT: %33 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %34 = fptosi double %33 to i32
+; CHECK-NEXT: %35 = insertelement <4 x i32> undef, i32 %34, i32 0
+; CHECK-NEXT: %36 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %37 = fptosi double %36 to i32
+; CHECK-NEXT: %38 = insertelement <4 x i32> %35, i32 %37, i32 1
+; CHECK-NEXT: %39 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %40 = fptosi double %39 to i32
+; CHECK-NEXT: %41 = insertelement <4 x i32> %38, i32 %40, i32 2
+; CHECK-NEXT: %42 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %43 = fptosi double %42 to i32
+; CHECK-NEXT: %44 = insertelement <4 x i32> %41, i32 %43, i32 3
+; CHECK-NEXT: store <4 x i32> %32, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %44, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %20
+
+define <12 x i64> @fptosi_cast_12xdouble_to_12xi64(<12 x double>) {
+ %2 = fptosi <12 x double> %0 to <12 x i64>
+ ret <12 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_12xdouble_to_12xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %12 = fptosi <2 x double> %5 to <2 x i64>
+; CHECK-NEXT: %13 = fptosi <2 x double> %6 to <2 x i64>
+; CHECK-NEXT: %14 = fptosi <2 x double> %7 to <2 x i64>
+; CHECK-NEXT: %15 = fptosi <2 x double> %8 to <2 x i64>
+; CHECK-NEXT: %16 = fptosi <2 x double> %9 to <2 x i64>
+; CHECK-NEXT: %17 = fptosi <2 x double> %10 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %13, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %14, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %15, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %16, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %17, <2 x i64>* %4, align 16
+; CHECK-NEXT: ret <2 x i64> %12
+
+define <12 x float> @uitofp_cast_12xi8_to_12xfloat(<12 x i8>) {
+ %2 = uitofp <12 x i8> %0 to <12 x float>
+ ret <12 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_12xi8_to_12xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %4 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: %5 = uitofp i8 %4 to float
+; CHECK-NEXT: %6 = insertelement <4 x float> undef, float %5, i32 0
+; CHECK-NEXT: %7 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: %8 = uitofp i8 %7 to float
+; CHECK-NEXT: %9 = insertelement <4 x float> %6, float %8, i32 1
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: %11 = uitofp i8 %10 to float
+; CHECK-NEXT: %12 = insertelement <4 x float> %9, float %11, i32 2
+; CHECK-NEXT: %13 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: %14 = uitofp i8 %13 to float
+; CHECK-NEXT: %15 = insertelement <4 x float> %12, float %14, i32 3
+; CHECK-NEXT: %16 = extractelement <16 x i8> %2, i32 4
+; CHECK-NEXT: %17 = uitofp i8 %16 to float
+; CHECK-NEXT: %18 = insertelement <4 x float> undef, float %17, i32 0
+; CHECK-NEXT: %19 = extractelement <16 x i8> %2, i32 5
+; CHECK-NEXT: %20 = uitofp i8 %19 to float
+; CHECK-NEXT: %21 = insertelement <4 x float> %18, float %20, i32 1
+; CHECK-NEXT: %22 = extractelement <16 x i8> %2, i32 6
+; CHECK-NEXT: %23 = uitofp i8 %22 to float
+; CHECK-NEXT: %24 = insertelement <4 x float> %21, float %23, i32 2
+; CHECK-NEXT: %25 = extractelement <16 x i8> %2, i32 7
+; CHECK-NEXT: %26 = uitofp i8 %25 to float
+; CHECK-NEXT: %27 = insertelement <4 x float> %24, float %26, i32 3
+; CHECK-NEXT: %28 = extractelement <16 x i8> %2, i32 8
+; CHECK-NEXT: %29 = uitofp i8 %28 to float
+; CHECK-NEXT: %30 = insertelement <4 x float> undef, float %29, i32 0
+; CHECK-NEXT: %31 = extractelement <16 x i8> %2, i32 9
+; CHECK-NEXT: %32 = uitofp i8 %31 to float
+; CHECK-NEXT: %33 = insertelement <4 x float> %30, float %32, i32 1
+; CHECK-NEXT: %34 = extractelement <16 x i8> %2, i32 10
+; CHECK-NEXT: %35 = uitofp i8 %34 to float
+; CHECK-NEXT: %36 = insertelement <4 x float> %33, float %35, i32 2
+; CHECK-NEXT: %37 = extractelement <16 x i8> %2, i32 11
+; CHECK-NEXT: %38 = uitofp i8 %37 to float
+; CHECK-NEXT: %39 = insertelement <4 x float> %36, float %38, i32 3
+; CHECK-NEXT: store <4 x float> %27, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %39, <4 x float>* %1, align 16
+; CHECK-NEXT: ret <4 x float> %15
+
+define <12 x float> @uitofp_cast_12xi16_to_12xfloat(<12 x i16>) {
+ %2 = uitofp <12 x i16> %0 to <12 x float>
+ ret <12 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_12xi16_to_12xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %5 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: %6 = uitofp i16 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> undef, float %6, i32 0
+; CHECK-NEXT: %8 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: %9 = uitofp i16 %8 to float
+; CHECK-NEXT: %10 = insertelement <4 x float> %7, float %9, i32 1
+; CHECK-NEXT: %11 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: %12 = uitofp i16 %11 to float
+; CHECK-NEXT: %13 = insertelement <4 x float> %10, float %12, i32 2
+; CHECK-NEXT: %14 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: %15 = uitofp i16 %14 to float
+; CHECK-NEXT: %16 = insertelement <4 x float> %13, float %15, i32 3
+; CHECK-NEXT: %17 = extractelement <8 x i16> %2, i32 4
+; CHECK-NEXT: %18 = uitofp i16 %17 to float
+; CHECK-NEXT: %19 = insertelement <4 x float> undef, float %18, i32 0
+; CHECK-NEXT: %20 = extractelement <8 x i16> %2, i32 5
+; CHECK-NEXT: %21 = uitofp i16 %20 to float
+; CHECK-NEXT: %22 = insertelement <4 x float> %19, float %21, i32 1
+; CHECK-NEXT: %23 = extractelement <8 x i16> %2, i32 6
+; CHECK-NEXT: %24 = uitofp i16 %23 to float
+; CHECK-NEXT: %25 = insertelement <4 x float> %22, float %24, i32 2
+; CHECK-NEXT: %26 = extractelement <8 x i16> %2, i32 7
+; CHECK-NEXT: %27 = uitofp i16 %26 to float
+; CHECK-NEXT: %28 = insertelement <4 x float> %25, float %27, i32 3
+; CHECK-NEXT: %29 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: %30 = uitofp i16 %29 to float
+; CHECK-NEXT: %31 = insertelement <4 x float> undef, float %30, i32 0
+; CHECK-NEXT: %32 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: %33 = uitofp i16 %32 to float
+; CHECK-NEXT: %34 = insertelement <4 x float> %31, float %33, i32 1
+; CHECK-NEXT: %35 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: %36 = uitofp i16 %35 to float
+; CHECK-NEXT: %37 = insertelement <4 x float> %34, float %36, i32 2
+; CHECK-NEXT: %38 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: %39 = uitofp i16 %38 to float
+; CHECK-NEXT: %40 = insertelement <4 x float> %37, float %39, i32 3
+; CHECK-NEXT: store <4 x float> %28, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %40, <4 x float>* %1, align 16
+; CHECK-NEXT: ret <4 x float> %16
+
+define <12 x float> @uitofp_cast_12xi32_to_12xfloat(<12 x i32>) {
+ %2 = uitofp <12 x i32> %0 to <12 x float>
+ ret <12 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_12xi32_to_12xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = uitofp <4 x i32> %2 to <4 x float>
+; CHECK-NEXT: %7 = uitofp <4 x i32> %3 to <4 x float>
+; CHECK-NEXT: %8 = uitofp <4 x i32> %4 to <4 x float>
+; CHECK-NEXT: store <4 x float> %7, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %8, <4 x float>* %1, align 16
+; CHECK-NEXT: ret <4 x float> %6
+
+define <12 x float> @uitofp_cast_12xi64_to_12xfloat(<12 x i64>) {
+ %2 = uitofp <12 x i64> %0 to <12 x float>
+ ret <12 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_12xi64_to_12xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %9 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %10 = uitofp i64 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> undef, float %10, i32 0
+; CHECK-NEXT: %12 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %13 = uitofp i64 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 1
+; CHECK-NEXT: %15 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %16 = uitofp i64 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> %14, float %16, i32 2
+; CHECK-NEXT: %18 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %19 = uitofp i64 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 3
+; CHECK-NEXT: %21 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %22 = uitofp i64 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> undef, float %22, i32 0
+; CHECK-NEXT: %24 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %25 = uitofp i64 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> %23, float %25, i32 1
+; CHECK-NEXT: %27 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %28 = uitofp i64 %27 to float
+; CHECK-NEXT: %29 = insertelement <4 x float> %26, float %28, i32 2
+; CHECK-NEXT: %30 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %31 = uitofp i64 %30 to float
+; CHECK-NEXT: %32 = insertelement <4 x float> %29, float %31, i32 3
+; CHECK-NEXT: %33 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %34 = uitofp i64 %33 to float
+; CHECK-NEXT: %35 = insertelement <4 x float> undef, float %34, i32 0
+; CHECK-NEXT: %36 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %37 = uitofp i64 %36 to float
+; CHECK-NEXT: %38 = insertelement <4 x float> %35, float %37, i32 1
+; CHECK-NEXT: %39 = extractelement <2 x i64> %7, i32 0
+; CHECK-NEXT: %40 = uitofp i64 %39 to float
+; CHECK-NEXT: %41 = insertelement <4 x float> %38, float %40, i32 2
+; CHECK-NEXT: %42 = extractelement <2 x i64> %7, i32 1
+; CHECK-NEXT: %43 = uitofp i64 %42 to float
+; CHECK-NEXT: %44 = insertelement <4 x float> %41, float %43, i32 3
+; CHECK-NEXT: store <4 x float> %32, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %44, <4 x float>* %1, align 16
+; CHECK-NEXT: ret <4 x float> %20
+
+define <12 x double> @uitofp_cast_12xi8_to_12xdouble(<12 x i8>) {
+ %2 = uitofp <12 x i8> %0 to <12 x double>
+ ret <12 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_12xi8_to_12xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %7 = extractelement <16 x i8> %5, i32 0
+; CHECK-NEXT: %8 = uitofp i8 %7 to double
+; CHECK-NEXT: %9 = insertelement <2 x double> undef, double %8, i32 0
+; CHECK-NEXT: %10 = extractelement <16 x i8> %5, i32 1
+; CHECK-NEXT: %11 = uitofp i8 %10 to double
+; CHECK-NEXT: %12 = insertelement <2 x double> %9, double %11, i32 1
+; CHECK-NEXT: %13 = extractelement <16 x i8> %5, i32 2
+; CHECK-NEXT: %14 = uitofp i8 %13 to double
+; CHECK-NEXT: %15 = insertelement <2 x double> undef, double %14, i32 0
+; CHECK-NEXT: %16 = extractelement <16 x i8> %5, i32 3
+; CHECK-NEXT: %17 = uitofp i8 %16 to double
+; CHECK-NEXT: %18 = insertelement <2 x double> %15, double %17, i32 1
+; CHECK-NEXT: %19 = extractelement <16 x i8> %5, i32 4
+; CHECK-NEXT: %20 = uitofp i8 %19 to double
+; CHECK-NEXT: %21 = insertelement <2 x double> undef, double %20, i32 0
+; CHECK-NEXT: %22 = extractelement <16 x i8> %5, i32 5
+; CHECK-NEXT: %23 = uitofp i8 %22 to double
+; CHECK-NEXT: %24 = insertelement <2 x double> %21, double %23, i32 1
+; CHECK-NEXT: %25 = extractelement <16 x i8> %5, i32 6
+; CHECK-NEXT: %26 = uitofp i8 %25 to double
+; CHECK-NEXT: %27 = insertelement <2 x double> undef, double %26, i32 0
+; CHECK-NEXT: %28 = extractelement <16 x i8> %5, i32 7
+; CHECK-NEXT: %29 = uitofp i8 %28 to double
+; CHECK-NEXT: %30 = insertelement <2 x double> %27, double %29, i32 1
+; CHECK-NEXT: %31 = extractelement <16 x i8> %5, i32 8
+; CHECK-NEXT: %32 = uitofp i8 %31 to double
+; CHECK-NEXT: %33 = insertelement <2 x double> undef, double %32, i32 0
+; CHECK-NEXT: %34 = extractelement <16 x i8> %5, i32 9
+; CHECK-NEXT: %35 = uitofp i8 %34 to double
+; CHECK-NEXT: %36 = insertelement <2 x double> %33, double %35, i32 1
+; CHECK-NEXT: %37 = extractelement <16 x i8> %5, i32 10
+; CHECK-NEXT: %38 = uitofp i8 %37 to double
+; CHECK-NEXT: %39 = insertelement <2 x double> undef, double %38, i32 0
+; CHECK-NEXT: %40 = extractelement <16 x i8> %5, i32 11
+; CHECK-NEXT: %41 = uitofp i8 %40 to double
+; CHECK-NEXT: %42 = insertelement <2 x double> %39, double %41, i32 1
+; CHECK-NEXT: store <2 x double> %18, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %24, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %30, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %36, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %42, <2 x double>* %4, align 16
+; CHECK-NEXT: ret <2 x double> %12
+
+define <12 x double> @uitofp_cast_12xi16_to_12xdouble(<12 x i16>) {
+ %2 = uitofp <12 x i16> %0 to <12 x double>
+ ret <12 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_12xi16_to_12xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %8 = extractelement <8 x i16> %5, i32 0
+; CHECK-NEXT: %9 = uitofp i16 %8 to double
+; CHECK-NEXT: %10 = insertelement <2 x double> undef, double %9, i32 0
+; CHECK-NEXT: %11 = extractelement <8 x i16> %5, i32 1
+; CHECK-NEXT: %12 = uitofp i16 %11 to double
+; CHECK-NEXT: %13 = insertelement <2 x double> %10, double %12, i32 1
+; CHECK-NEXT: %14 = extractelement <8 x i16> %5, i32 2
+; CHECK-NEXT: %15 = uitofp i16 %14 to double
+; CHECK-NEXT: %16 = insertelement <2 x double> undef, double %15, i32 0
+; CHECK-NEXT: %17 = extractelement <8 x i16> %5, i32 3
+; CHECK-NEXT: %18 = uitofp i16 %17 to double
+; CHECK-NEXT: %19 = insertelement <2 x double> %16, double %18, i32 1
+; CHECK-NEXT: %20 = extractelement <8 x i16> %5, i32 4
+; CHECK-NEXT: %21 = uitofp i16 %20 to double
+; CHECK-NEXT: %22 = insertelement <2 x double> undef, double %21, i32 0
+; CHECK-NEXT: %23 = extractelement <8 x i16> %5, i32 5
+; CHECK-NEXT: %24 = uitofp i16 %23 to double
+; CHECK-NEXT: %25 = insertelement <2 x double> %22, double %24, i32 1
+; CHECK-NEXT: %26 = extractelement <8 x i16> %5, i32 6
+; CHECK-NEXT: %27 = uitofp i16 %26 to double
+; CHECK-NEXT: %28 = insertelement <2 x double> undef, double %27, i32 0
+; CHECK-NEXT: %29 = extractelement <8 x i16> %5, i32 7
+; CHECK-NEXT: %30 = uitofp i16 %29 to double
+; CHECK-NEXT: %31 = insertelement <2 x double> %28, double %30, i32 1
+; CHECK-NEXT: %32 = extractelement <8 x i16> %6, i32 0
+; CHECK-NEXT: %33 = uitofp i16 %32 to double
+; CHECK-NEXT: %34 = insertelement <2 x double> undef, double %33, i32 0
+; CHECK-NEXT: %35 = extractelement <8 x i16> %6, i32 1
+; CHECK-NEXT: %36 = uitofp i16 %35 to double
+; CHECK-NEXT: %37 = insertelement <2 x double> %34, double %36, i32 1
+; CHECK-NEXT: %38 = extractelement <8 x i16> %6, i32 2
+; CHECK-NEXT: %39 = uitofp i16 %38 to double
+; CHECK-NEXT: %40 = insertelement <2 x double> undef, double %39, i32 0
+; CHECK-NEXT: %41 = extractelement <8 x i16> %6, i32 3
+; CHECK-NEXT: %42 = uitofp i16 %41 to double
+; CHECK-NEXT: %43 = insertelement <2 x double> %40, double %42, i32 1
+; CHECK-NEXT: store <2 x double> %19, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %25, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %31, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %37, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %43, <2 x double>* %4, align 16
+; CHECK-NEXT: ret <2 x double> %13
+
+define <12 x double> @uitofp_cast_12xi32_to_12xdouble(<12 x i32>) {
+ %2 = uitofp <12 x i32> %0 to <12 x double>
+ ret <12 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_12xi32_to_12xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = extractelement <4 x i32> %5, i32 0
+; CHECK-NEXT: %10 = uitofp i32 %9 to double
+; CHECK-NEXT: %11 = insertelement <2 x double> undef, double %10, i32 0
+; CHECK-NEXT: %12 = extractelement <4 x i32> %5, i32 1
+; CHECK-NEXT: %13 = uitofp i32 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> %11, double %13, i32 1
+; CHECK-NEXT: %15 = extractelement <4 x i32> %5, i32 2
+; CHECK-NEXT: %16 = uitofp i32 %15 to double
+; CHECK-NEXT: %17 = insertelement <2 x double> undef, double %16, i32 0
+; CHECK-NEXT: %18 = extractelement <4 x i32> %5, i32 3
+; CHECK-NEXT: %19 = uitofp i32 %18 to double
+; CHECK-NEXT: %20 = insertelement <2 x double> %17, double %19, i32 1
+; CHECK-NEXT: %21 = extractelement <4 x i32> %6, i32 0
+; CHECK-NEXT: %22 = uitofp i32 %21 to double
+; CHECK-NEXT: %23 = insertelement <2 x double> undef, double %22, i32 0
+; CHECK-NEXT: %24 = extractelement <4 x i32> %6, i32 1
+; CHECK-NEXT: %25 = uitofp i32 %24 to double
+; CHECK-NEXT: %26 = insertelement <2 x double> %23, double %25, i32 1
+; CHECK-NEXT: %27 = extractelement <4 x i32> %6, i32 2
+; CHECK-NEXT: %28 = uitofp i32 %27 to double
+; CHECK-NEXT: %29 = insertelement <2 x double> undef, double %28, i32 0
+; CHECK-NEXT: %30 = extractelement <4 x i32> %6, i32 3
+; CHECK-NEXT: %31 = uitofp i32 %30 to double
+; CHECK-NEXT: %32 = insertelement <2 x double> %29, double %31, i32 1
+; CHECK-NEXT: %33 = extractelement <4 x i32> %7, i32 0
+; CHECK-NEXT: %34 = uitofp i32 %33 to double
+; CHECK-NEXT: %35 = insertelement <2 x double> undef, double %34, i32 0
+; CHECK-NEXT: %36 = extractelement <4 x i32> %7, i32 1
+; CHECK-NEXT: %37 = uitofp i32 %36 to double
+; CHECK-NEXT: %38 = insertelement <2 x double> %35, double %37, i32 1
+; CHECK-NEXT: %39 = extractelement <4 x i32> %7, i32 2
+; CHECK-NEXT: %40 = uitofp i32 %39 to double
+; CHECK-NEXT: %41 = insertelement <2 x double> undef, double %40, i32 0
+; CHECK-NEXT: %42 = extractelement <4 x i32> %7, i32 3
+; CHECK-NEXT: %43 = uitofp i32 %42 to double
+; CHECK-NEXT: %44 = insertelement <2 x double> %41, double %43, i32 1
+; CHECK-NEXT: store <2 x double> %20, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %26, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %32, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %38, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %44, <2 x double>* %4, align 16
+; CHECK-NEXT: ret <2 x double> %14
+
+define <12 x double> @uitofp_cast_12xi64_to_12xdouble(<12 x i64>) {
+ %2 = uitofp <12 x i64> %0 to <12 x double>
+ ret <12 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_12xi64_to_12xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %12 = uitofp <2 x i64> %5 to <2 x double>
+; CHECK-NEXT: %13 = uitofp <2 x i64> %6 to <2 x double>
+; CHECK-NEXT: %14 = uitofp <2 x i64> %7 to <2 x double>
+; CHECK-NEXT: %15 = uitofp <2 x i64> %8 to <2 x double>
+; CHECK-NEXT: %16 = uitofp <2 x i64> %9 to <2 x double>
+; CHECK-NEXT: %17 = uitofp <2 x i64> %10 to <2 x double>
+; CHECK-NEXT: store <2 x double> %13, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %14, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %15, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %16, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %17, <2 x double>* %4, align 16
+; CHECK-NEXT: ret <2 x double> %12
+
+define <12 x float> @sitofp_cast_12xi8_to_12xfloat(<12 x i8>) {
+ %2 = sitofp <12 x i8> %0 to <12 x float>
+ ret <12 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_12xi8_to_12xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %4 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: %5 = sitofp i8 %4 to float
+; CHECK-NEXT: %6 = insertelement <4 x float> undef, float %5, i32 0
+; CHECK-NEXT: %7 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: %8 = sitofp i8 %7 to float
+; CHECK-NEXT: %9 = insertelement <4 x float> %6, float %8, i32 1
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: %11 = sitofp i8 %10 to float
+; CHECK-NEXT: %12 = insertelement <4 x float> %9, float %11, i32 2
+; CHECK-NEXT: %13 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: %14 = sitofp i8 %13 to float
+; CHECK-NEXT: %15 = insertelement <4 x float> %12, float %14, i32 3
+; CHECK-NEXT: %16 = extractelement <16 x i8> %2, i32 4
+; CHECK-NEXT: %17 = sitofp i8 %16 to float
+; CHECK-NEXT: %18 = insertelement <4 x float> undef, float %17, i32 0
+; CHECK-NEXT: %19 = extractelement <16 x i8> %2, i32 5
+; CHECK-NEXT: %20 = sitofp i8 %19 to float
+; CHECK-NEXT: %21 = insertelement <4 x float> %18, float %20, i32 1
+; CHECK-NEXT: %22 = extractelement <16 x i8> %2, i32 6
+; CHECK-NEXT: %23 = sitofp i8 %22 to float
+; CHECK-NEXT: %24 = insertelement <4 x float> %21, float %23, i32 2
+; CHECK-NEXT: %25 = extractelement <16 x i8> %2, i32 7
+; CHECK-NEXT: %26 = sitofp i8 %25 to float
+; CHECK-NEXT: %27 = insertelement <4 x float> %24, float %26, i32 3
+; CHECK-NEXT: %28 = extractelement <16 x i8> %2, i32 8
+; CHECK-NEXT: %29 = sitofp i8 %28 to float
+; CHECK-NEXT: %30 = insertelement <4 x float> undef, float %29, i32 0
+; CHECK-NEXT: %31 = extractelement <16 x i8> %2, i32 9
+; CHECK-NEXT: %32 = sitofp i8 %31 to float
+; CHECK-NEXT: %33 = insertelement <4 x float> %30, float %32, i32 1
+; CHECK-NEXT: %34 = extractelement <16 x i8> %2, i32 10
+; CHECK-NEXT: %35 = sitofp i8 %34 to float
+; CHECK-NEXT: %36 = insertelement <4 x float> %33, float %35, i32 2
+; CHECK-NEXT: %37 = extractelement <16 x i8> %2, i32 11
+; CHECK-NEXT: %38 = sitofp i8 %37 to float
+; CHECK-NEXT: %39 = insertelement <4 x float> %36, float %38, i32 3
+; CHECK-NEXT: store <4 x float> %27, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %39, <4 x float>* %1, align 16
+; CHECK-NEXT: ret <4 x float> %15
+
+define <12 x float> @sitofp_cast_12xi16_to_12xfloat(<12 x i16>) {
+ %2 = sitofp <12 x i16> %0 to <12 x float>
+ ret <12 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_12xi16_to_12xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %5 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: %6 = sitofp i16 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> undef, float %6, i32 0
+; CHECK-NEXT: %8 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: %9 = sitofp i16 %8 to float
+; CHECK-NEXT: %10 = insertelement <4 x float> %7, float %9, i32 1
+; CHECK-NEXT: %11 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: %12 = sitofp i16 %11 to float
+; CHECK-NEXT: %13 = insertelement <4 x float> %10, float %12, i32 2
+; CHECK-NEXT: %14 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: %15 = sitofp i16 %14 to float
+; CHECK-NEXT: %16 = insertelement <4 x float> %13, float %15, i32 3
+; CHECK-NEXT: %17 = extractelement <8 x i16> %2, i32 4
+; CHECK-NEXT: %18 = sitofp i16 %17 to float
+; CHECK-NEXT: %19 = insertelement <4 x float> undef, float %18, i32 0
+; CHECK-NEXT: %20 = extractelement <8 x i16> %2, i32 5
+; CHECK-NEXT: %21 = sitofp i16 %20 to float
+; CHECK-NEXT: %22 = insertelement <4 x float> %19, float %21, i32 1
+; CHECK-NEXT: %23 = extractelement <8 x i16> %2, i32 6
+; CHECK-NEXT: %24 = sitofp i16 %23 to float
+; CHECK-NEXT: %25 = insertelement <4 x float> %22, float %24, i32 2
+; CHECK-NEXT: %26 = extractelement <8 x i16> %2, i32 7
+; CHECK-NEXT: %27 = sitofp i16 %26 to float
+; CHECK-NEXT: %28 = insertelement <4 x float> %25, float %27, i32 3
+; CHECK-NEXT: %29 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: %30 = sitofp i16 %29 to float
+; CHECK-NEXT: %31 = insertelement <4 x float> undef, float %30, i32 0
+; CHECK-NEXT: %32 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: %33 = sitofp i16 %32 to float
+; CHECK-NEXT: %34 = insertelement <4 x float> %31, float %33, i32 1
+; CHECK-NEXT: %35 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: %36 = sitofp i16 %35 to float
+; CHECK-NEXT: %37 = insertelement <4 x float> %34, float %36, i32 2
+; CHECK-NEXT: %38 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: %39 = sitofp i16 %38 to float
+; CHECK-NEXT: %40 = insertelement <4 x float> %37, float %39, i32 3
+; CHECK-NEXT: store <4 x float> %28, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %40, <4 x float>* %1, align 16
+; CHECK-NEXT: ret <4 x float> %16
+
+define <12 x float> @sitofp_cast_12xi32_to_12xfloat(<12 x i32>) {
+ %2 = sitofp <12 x i32> %0 to <12 x float>
+ ret <12 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_12xi32_to_12xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = sitofp <4 x i32> %2 to <4 x float>
+; CHECK-NEXT: %7 = sitofp <4 x i32> %3 to <4 x float>
+; CHECK-NEXT: %8 = sitofp <4 x i32> %4 to <4 x float>
+; CHECK-NEXT: store <4 x float> %7, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %8, <4 x float>* %1, align 16
+; CHECK-NEXT: ret <4 x float> %6
+
+define <12 x float> @sitofp_cast_12xi64_to_12xfloat(<12 x i64>) {
+ %2 = sitofp <12 x i64> %0 to <12 x float>
+ ret <12 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_12xi64_to_12xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %9 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %10 = sitofp i64 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> undef, float %10, i32 0
+; CHECK-NEXT: %12 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %13 = sitofp i64 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 1
+; CHECK-NEXT: %15 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %16 = sitofp i64 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> %14, float %16, i32 2
+; CHECK-NEXT: %18 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %19 = sitofp i64 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 3
+; CHECK-NEXT: %21 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %22 = sitofp i64 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> undef, float %22, i32 0
+; CHECK-NEXT: %24 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %25 = sitofp i64 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> %23, float %25, i32 1
+; CHECK-NEXT: %27 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %28 = sitofp i64 %27 to float
+; CHECK-NEXT: %29 = insertelement <4 x float> %26, float %28, i32 2
+; CHECK-NEXT: %30 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %31 = sitofp i64 %30 to float
+; CHECK-NEXT: %32 = insertelement <4 x float> %29, float %31, i32 3
+; CHECK-NEXT: %33 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %34 = sitofp i64 %33 to float
+; CHECK-NEXT: %35 = insertelement <4 x float> undef, float %34, i32 0
+; CHECK-NEXT: %36 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %37 = sitofp i64 %36 to float
+; CHECK-NEXT: %38 = insertelement <4 x float> %35, float %37, i32 1
+; CHECK-NEXT: %39 = extractelement <2 x i64> %7, i32 0
+; CHECK-NEXT: %40 = sitofp i64 %39 to float
+; CHECK-NEXT: %41 = insertelement <4 x float> %38, float %40, i32 2
+; CHECK-NEXT: %42 = extractelement <2 x i64> %7, i32 1
+; CHECK-NEXT: %43 = sitofp i64 %42 to float
+; CHECK-NEXT: %44 = insertelement <4 x float> %41, float %43, i32 3
+; CHECK-NEXT: store <4 x float> %32, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %44, <4 x float>* %1, align 16
+; CHECK-NEXT: ret <4 x float> %20
+
+define <12 x double> @sitofp_cast_12xi8_to_12xdouble(<12 x i8>) {
+ %2 = sitofp <12 x i8> %0 to <12 x double>
+ ret <12 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_12xi8_to_12xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %7 = extractelement <16 x i8> %5, i32 0
+; CHECK-NEXT: %8 = sitofp i8 %7 to double
+; CHECK-NEXT: %9 = insertelement <2 x double> undef, double %8, i32 0
+; CHECK-NEXT: %10 = extractelement <16 x i8> %5, i32 1
+; CHECK-NEXT: %11 = sitofp i8 %10 to double
+; CHECK-NEXT: %12 = insertelement <2 x double> %9, double %11, i32 1
+; CHECK-NEXT: %13 = extractelement <16 x i8> %5, i32 2
+; CHECK-NEXT: %14 = sitofp i8 %13 to double
+; CHECK-NEXT: %15 = insertelement <2 x double> undef, double %14, i32 0
+; CHECK-NEXT: %16 = extractelement <16 x i8> %5, i32 3
+; CHECK-NEXT: %17 = sitofp i8 %16 to double
+; CHECK-NEXT: %18 = insertelement <2 x double> %15, double %17, i32 1
+; CHECK-NEXT: %19 = extractelement <16 x i8> %5, i32 4
+; CHECK-NEXT: %20 = sitofp i8 %19 to double
+; CHECK-NEXT: %21 = insertelement <2 x double> undef, double %20, i32 0
+; CHECK-NEXT: %22 = extractelement <16 x i8> %5, i32 5
+; CHECK-NEXT: %23 = sitofp i8 %22 to double
+; CHECK-NEXT: %24 = insertelement <2 x double> %21, double %23, i32 1
+; CHECK-NEXT: %25 = extractelement <16 x i8> %5, i32 6
+; CHECK-NEXT: %26 = sitofp i8 %25 to double
+; CHECK-NEXT: %27 = insertelement <2 x double> undef, double %26, i32 0
+; CHECK-NEXT: %28 = extractelement <16 x i8> %5, i32 7
+; CHECK-NEXT: %29 = sitofp i8 %28 to double
+; CHECK-NEXT: %30 = insertelement <2 x double> %27, double %29, i32 1
+; CHECK-NEXT: %31 = extractelement <16 x i8> %5, i32 8
+; CHECK-NEXT: %32 = sitofp i8 %31 to double
+; CHECK-NEXT: %33 = insertelement <2 x double> undef, double %32, i32 0
+; CHECK-NEXT: %34 = extractelement <16 x i8> %5, i32 9
+; CHECK-NEXT: %35 = sitofp i8 %34 to double
+; CHECK-NEXT: %36 = insertelement <2 x double> %33, double %35, i32 1
+; CHECK-NEXT: %37 = extractelement <16 x i8> %5, i32 10
+; CHECK-NEXT: %38 = sitofp i8 %37 to double
+; CHECK-NEXT: %39 = insertelement <2 x double> undef, double %38, i32 0
+; CHECK-NEXT: %40 = extractelement <16 x i8> %5, i32 11
+; CHECK-NEXT: %41 = sitofp i8 %40 to double
+; CHECK-NEXT: %42 = insertelement <2 x double> %39, double %41, i32 1
+; CHECK-NEXT: store <2 x double> %18, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %24, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %30, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %36, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %42, <2 x double>* %4, align 16
+; CHECK-NEXT: ret <2 x double> %12
+
+define <12 x double> @sitofp_cast_12xi16_to_12xdouble(<12 x i16>) {
+ %2 = sitofp <12 x i16> %0 to <12 x double>
+ ret <12 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_12xi16_to_12xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %8 = extractelement <8 x i16> %5, i32 0
+; CHECK-NEXT: %9 = sitofp i16 %8 to double
+; CHECK-NEXT: %10 = insertelement <2 x double> undef, double %9, i32 0
+; CHECK-NEXT: %11 = extractelement <8 x i16> %5, i32 1
+; CHECK-NEXT: %12 = sitofp i16 %11 to double
+; CHECK-NEXT: %13 = insertelement <2 x double> %10, double %12, i32 1
+; CHECK-NEXT: %14 = extractelement <8 x i16> %5, i32 2
+; CHECK-NEXT: %15 = sitofp i16 %14 to double
+; CHECK-NEXT: %16 = insertelement <2 x double> undef, double %15, i32 0
+; CHECK-NEXT: %17 = extractelement <8 x i16> %5, i32 3
+; CHECK-NEXT: %18 = sitofp i16 %17 to double
+; CHECK-NEXT: %19 = insertelement <2 x double> %16, double %18, i32 1
+; CHECK-NEXT: %20 = extractelement <8 x i16> %5, i32 4
+; CHECK-NEXT: %21 = sitofp i16 %20 to double
+; CHECK-NEXT: %22 = insertelement <2 x double> undef, double %21, i32 0
+; CHECK-NEXT: %23 = extractelement <8 x i16> %5, i32 5
+; CHECK-NEXT: %24 = sitofp i16 %23 to double
+; CHECK-NEXT: %25 = insertelement <2 x double> %22, double %24, i32 1
+; CHECK-NEXT: %26 = extractelement <8 x i16> %5, i32 6
+; CHECK-NEXT: %27 = sitofp i16 %26 to double
+; CHECK-NEXT: %28 = insertelement <2 x double> undef, double %27, i32 0
+; CHECK-NEXT: %29 = extractelement <8 x i16> %5, i32 7
+; CHECK-NEXT: %30 = sitofp i16 %29 to double
+; CHECK-NEXT: %31 = insertelement <2 x double> %28, double %30, i32 1
+; CHECK-NEXT: %32 = extractelement <8 x i16> %6, i32 0
+; CHECK-NEXT: %33 = sitofp i16 %32 to double
+; CHECK-NEXT: %34 = insertelement <2 x double> undef, double %33, i32 0
+; CHECK-NEXT: %35 = extractelement <8 x i16> %6, i32 1
+; CHECK-NEXT: %36 = sitofp i16 %35 to double
+; CHECK-NEXT: %37 = insertelement <2 x double> %34, double %36, i32 1
+; CHECK-NEXT: %38 = extractelement <8 x i16> %6, i32 2
+; CHECK-NEXT: %39 = sitofp i16 %38 to double
+; CHECK-NEXT: %40 = insertelement <2 x double> undef, double %39, i32 0
+; CHECK-NEXT: %41 = extractelement <8 x i16> %6, i32 3
+; CHECK-NEXT: %42 = sitofp i16 %41 to double
+; CHECK-NEXT: %43 = insertelement <2 x double> %40, double %42, i32 1
+; CHECK-NEXT: store <2 x double> %19, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %25, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %31, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %37, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %43, <2 x double>* %4, align 16
+; CHECK-NEXT: ret <2 x double> %13
+
+define <12 x double> @sitofp_cast_12xi32_to_12xdouble(<12 x i32>) {
+ %2 = sitofp <12 x i32> %0 to <12 x double>
+ ret <12 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_12xi32_to_12xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = extractelement <4 x i32> %5, i32 0
+; CHECK-NEXT: %10 = sitofp i32 %9 to double
+; CHECK-NEXT: %11 = insertelement <2 x double> undef, double %10, i32 0
+; CHECK-NEXT: %12 = extractelement <4 x i32> %5, i32 1
+; CHECK-NEXT: %13 = sitofp i32 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> %11, double %13, i32 1
+; CHECK-NEXT: %15 = extractelement <4 x i32> %5, i32 2
+; CHECK-NEXT: %16 = sitofp i32 %15 to double
+; CHECK-NEXT: %17 = insertelement <2 x double> undef, double %16, i32 0
+; CHECK-NEXT: %18 = extractelement <4 x i32> %5, i32 3
+; CHECK-NEXT: %19 = sitofp i32 %18 to double
+; CHECK-NEXT: %20 = insertelement <2 x double> %17, double %19, i32 1
+; CHECK-NEXT: %21 = extractelement <4 x i32> %6, i32 0
+; CHECK-NEXT: %22 = sitofp i32 %21 to double
+; CHECK-NEXT: %23 = insertelement <2 x double> undef, double %22, i32 0
+; CHECK-NEXT: %24 = extractelement <4 x i32> %6, i32 1
+; CHECK-NEXT: %25 = sitofp i32 %24 to double
+; CHECK-NEXT: %26 = insertelement <2 x double> %23, double %25, i32 1
+; CHECK-NEXT: %27 = extractelement <4 x i32> %6, i32 2
+; CHECK-NEXT: %28 = sitofp i32 %27 to double
+; CHECK-NEXT: %29 = insertelement <2 x double> undef, double %28, i32 0
+; CHECK-NEXT: %30 = extractelement <4 x i32> %6, i32 3
+; CHECK-NEXT: %31 = sitofp i32 %30 to double
+; CHECK-NEXT: %32 = insertelement <2 x double> %29, double %31, i32 1
+; CHECK-NEXT: %33 = extractelement <4 x i32> %7, i32 0
+; CHECK-NEXT: %34 = sitofp i32 %33 to double
+; CHECK-NEXT: %35 = insertelement <2 x double> undef, double %34, i32 0
+; CHECK-NEXT: %36 = extractelement <4 x i32> %7, i32 1
+; CHECK-NEXT: %37 = sitofp i32 %36 to double
+; CHECK-NEXT: %38 = insertelement <2 x double> %35, double %37, i32 1
+; CHECK-NEXT: %39 = extractelement <4 x i32> %7, i32 2
+; CHECK-NEXT: %40 = sitofp i32 %39 to double
+; CHECK-NEXT: %41 = insertelement <2 x double> undef, double %40, i32 0
+; CHECK-NEXT: %42 = extractelement <4 x i32> %7, i32 3
+; CHECK-NEXT: %43 = sitofp i32 %42 to double
+; CHECK-NEXT: %44 = insertelement <2 x double> %41, double %43, i32 1
+; CHECK-NEXT: store <2 x double> %20, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %26, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %32, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %38, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %44, <2 x double>* %4, align 16
+; CHECK-NEXT: ret <2 x double> %14
+
+define <12 x double> @sitofp_cast_12xi64_to_12xdouble(<12 x i64>) {
+ %2 = sitofp <12 x i64> %0 to <12 x double>
+ ret <12 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_12xi64_to_12xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %12 = sitofp <2 x i64> %5 to <2 x double>
+; CHECK-NEXT: %13 = sitofp <2 x i64> %6 to <2 x double>
+; CHECK-NEXT: %14 = sitofp <2 x i64> %7 to <2 x double>
+; CHECK-NEXT: %15 = sitofp <2 x i64> %8 to <2 x double>
+; CHECK-NEXT: %16 = sitofp <2 x i64> %9 to <2 x double>
+; CHECK-NEXT: %17 = sitofp <2 x i64> %10 to <2 x double>
+; CHECK-NEXT: store <2 x double> %13, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %14, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %15, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %16, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %17, <2 x double>* %4, align 16
+; CHECK-NEXT: ret <2 x double> %12
+
+define <16 x i8> @fptoui_cast_16xfloat_to_16xi8(<16 x float>) {
+ %2 = fptoui <16 x float> %0 to <16 x i8>
+ ret <16 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_16xfloat_to_16xi8(<4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %5 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %6 = fptoui float %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> undef, i8 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %9 = fptoui float %8 to i8
+; CHECK-NEXT: %10 = insertelement <16 x i8> %7, i8 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %12 = fptoui float %11 to i8
+; CHECK-NEXT: %13 = insertelement <16 x i8> %10, i8 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %15 = fptoui float %14 to i8
+; CHECK-NEXT: %16 = insertelement <16 x i8> %13, i8 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %18 = fptoui float %17 to i8
+; CHECK-NEXT: %19 = insertelement <16 x i8> %16, i8 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %21 = fptoui float %20 to i8
+; CHECK-NEXT: %22 = insertelement <16 x i8> %19, i8 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %24 = fptoui float %23 to i8
+; CHECK-NEXT: %25 = insertelement <16 x i8> %22, i8 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %27 = fptoui float %26 to i8
+; CHECK-NEXT: %28 = insertelement <16 x i8> %25, i8 %27, i32 7
+; CHECK-NEXT: %29 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %30 = fptoui float %29 to i8
+; CHECK-NEXT: %31 = insertelement <16 x i8> %28, i8 %30, i32 8
+; CHECK-NEXT: %32 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %33 = fptoui float %32 to i8
+; CHECK-NEXT: %34 = insertelement <16 x i8> %31, i8 %33, i32 9
+; CHECK-NEXT: %35 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %36 = fptoui float %35 to i8
+; CHECK-NEXT: %37 = insertelement <16 x i8> %34, i8 %36, i32 10
+; CHECK-NEXT: %38 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %39 = fptoui float %38 to i8
+; CHECK-NEXT: %40 = insertelement <16 x i8> %37, i8 %39, i32 11
+; CHECK-NEXT: %41 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %42 = fptoui float %41 to i8
+; CHECK-NEXT: %43 = insertelement <16 x i8> %40, i8 %42, i32 12
+; CHECK-NEXT: %44 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %45 = fptoui float %44 to i8
+; CHECK-NEXT: %46 = insertelement <16 x i8> %43, i8 %45, i32 13
+; CHECK-NEXT: %47 = extractelement <4 x float> %3, i32 2
+; CHECK-NEXT: %48 = fptoui float %47 to i8
+; CHECK-NEXT: %49 = insertelement <16 x i8> %46, i8 %48, i32 14
+; CHECK-NEXT: %50 = extractelement <4 x float> %3, i32 3
+; CHECK-NEXT: %51 = fptoui float %50 to i8
+; CHECK-NEXT: %52 = insertelement <16 x i8> %49, i8 %51, i32 15
+; CHECK-NEXT: ret <16 x i8> %52
+
+define <16 x i16> @fptoui_cast_16xfloat_to_16xi16(<16 x float>) {
+ %2 = fptoui <16 x float> %0 to <16 x i16>
+ ret <16 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_16xfloat_to_16xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %6 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %7 = fptoui float %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> undef, i16 %7, i32 0
+; CHECK-NEXT: %9 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %10 = fptoui float %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 1
+; CHECK-NEXT: %12 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %13 = fptoui float %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 2
+; CHECK-NEXT: %15 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %16 = fptoui float %15 to i16
+; CHECK-NEXT: %17 = insertelement <8 x i16> %14, i16 %16, i32 3
+; CHECK-NEXT: %18 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %19 = fptoui float %18 to i16
+; CHECK-NEXT: %20 = insertelement <8 x i16> %17, i16 %19, i32 4
+; CHECK-NEXT: %21 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %22 = fptoui float %21 to i16
+; CHECK-NEXT: %23 = insertelement <8 x i16> %20, i16 %22, i32 5
+; CHECK-NEXT: %24 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %25 = fptoui float %24 to i16
+; CHECK-NEXT: %26 = insertelement <8 x i16> %23, i16 %25, i32 6
+; CHECK-NEXT: %27 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %28 = fptoui float %27 to i16
+; CHECK-NEXT: %29 = insertelement <8 x i16> %26, i16 %28, i32 7
+; CHECK-NEXT: %30 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %31 = fptoui float %30 to i16
+; CHECK-NEXT: %32 = insertelement <8 x i16> undef, i16 %31, i32 0
+; CHECK-NEXT: %33 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %34 = fptoui float %33 to i16
+; CHECK-NEXT: %35 = insertelement <8 x i16> %32, i16 %34, i32 1
+; CHECK-NEXT: %36 = extractelement <4 x float> %3, i32 2
+; CHECK-NEXT: %37 = fptoui float %36 to i16
+; CHECK-NEXT: %38 = insertelement <8 x i16> %35, i16 %37, i32 2
+; CHECK-NEXT: %39 = extractelement <4 x float> %3, i32 3
+; CHECK-NEXT: %40 = fptoui float %39 to i16
+; CHECK-NEXT: %41 = insertelement <8 x i16> %38, i16 %40, i32 3
+; CHECK-NEXT: %42 = extractelement <4 x float> %4, i32 0
+; CHECK-NEXT: %43 = fptoui float %42 to i16
+; CHECK-NEXT: %44 = insertelement <8 x i16> %41, i16 %43, i32 4
+; CHECK-NEXT: %45 = extractelement <4 x float> %4, i32 1
+; CHECK-NEXT: %46 = fptoui float %45 to i16
+; CHECK-NEXT: %47 = insertelement <8 x i16> %44, i16 %46, i32 5
+; CHECK-NEXT: %48 = extractelement <4 x float> %4, i32 2
+; CHECK-NEXT: %49 = fptoui float %48 to i16
+; CHECK-NEXT: %50 = insertelement <8 x i16> %47, i16 %49, i32 6
+; CHECK-NEXT: %51 = extractelement <4 x float> %4, i32 3
+; CHECK-NEXT: %52 = fptoui float %51 to i16
+; CHECK-NEXT: %53 = insertelement <8 x i16> %50, i16 %52, i32 7
+; CHECK-NEXT: store <8 x i16> %53, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %29
+
+define <16 x i32> @fptoui_cast_16xfloat_to_16xi32(<16 x float>) {
+ %2 = fptoui <16 x float> %0 to <16 x i32>
+ ret <16 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_16xfloat_to_16xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %8 = fptoui <4 x float> %3 to <4 x i32>
+; CHECK-NEXT: %9 = fptoui <4 x float> %4 to <4 x i32>
+; CHECK-NEXT: %10 = fptoui <4 x float> %5 to <4 x i32>
+; CHECK-NEXT: %11 = fptoui <4 x float> %6 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i64> @fptoui_cast_16xfloat_to_16xi64(<16 x float>) {
+ %2 = fptoui <16 x float> %0 to <16 x i64>
+ ret <16 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_16xfloat_to_16xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %12 = extractelement <4 x float> %7, i32 0
+; CHECK-NEXT: %13 = fptoui float %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> undef, i64 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <4 x float> %7, i32 1
+; CHECK-NEXT: %16 = fptoui float %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> %14, i64 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <4 x float> %7, i32 2
+; CHECK-NEXT: %19 = fptoui float %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> undef, i64 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <4 x float> %7, i32 3
+; CHECK-NEXT: %22 = fptoui float %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> %20, i64 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <4 x float> %8, i32 0
+; CHECK-NEXT: %25 = fptoui float %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> undef, i64 %25, i32 0
+; CHECK-NEXT: %27 = extractelement <4 x float> %8, i32 1
+; CHECK-NEXT: %28 = fptoui float %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> %26, i64 %28, i32 1
+; CHECK-NEXT: %30 = extractelement <4 x float> %8, i32 2
+; CHECK-NEXT: %31 = fptoui float %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> undef, i64 %31, i32 0
+; CHECK-NEXT: %33 = extractelement <4 x float> %8, i32 3
+; CHECK-NEXT: %34 = fptoui float %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> %32, i64 %34, i32 1
+; CHECK-NEXT: %36 = extractelement <4 x float> %9, i32 0
+; CHECK-NEXT: %37 = fptoui float %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> undef, i64 %37, i32 0
+; CHECK-NEXT: %39 = extractelement <4 x float> %9, i32 1
+; CHECK-NEXT: %40 = fptoui float %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> %38, i64 %40, i32 1
+; CHECK-NEXT: %42 = extractelement <4 x float> %9, i32 2
+; CHECK-NEXT: %43 = fptoui float %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> undef, i64 %43, i32 0
+; CHECK-NEXT: %45 = extractelement <4 x float> %9, i32 3
+; CHECK-NEXT: %46 = fptoui float %45 to i64
+; CHECK-NEXT: %47 = insertelement <2 x i64> %44, i64 %46, i32 1
+; CHECK-NEXT: %48 = extractelement <4 x float> %10, i32 0
+; CHECK-NEXT: %49 = fptoui float %48 to i64
+; CHECK-NEXT: %50 = insertelement <2 x i64> undef, i64 %49, i32 0
+; CHECK-NEXT: %51 = extractelement <4 x float> %10, i32 1
+; CHECK-NEXT: %52 = fptoui float %51 to i64
+; CHECK-NEXT: %53 = insertelement <2 x i64> %50, i64 %52, i32 1
+; CHECK-NEXT: %54 = extractelement <4 x float> %10, i32 2
+; CHECK-NEXT: %55 = fptoui float %54 to i64
+; CHECK-NEXT: %56 = insertelement <2 x i64> undef, i64 %55, i32 0
+; CHECK-NEXT: %57 = extractelement <4 x float> %10, i32 3
+; CHECK-NEXT: %58 = fptoui float %57 to i64
+; CHECK-NEXT: %59 = insertelement <2 x i64> %56, i64 %58, i32 1
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %35, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %41, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %47, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %53, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %59, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret <2 x i64> %17
+
+define <16 x i8> @fptoui_cast_16xdouble_to_16xi8(<16 x double>) {
+ %2 = fptoui <16 x double> %0 to <16 x i8>
+ ret <16 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_16xdouble_to_16xi8(<2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %9 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %10 = fptoui double %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> undef, i8 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %13 = fptoui double %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 1
+; CHECK-NEXT: %15 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %16 = fptoui double %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 2
+; CHECK-NEXT: %18 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %19 = fptoui double %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 3
+; CHECK-NEXT: %21 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %22 = fptoui double %21 to i8
+; CHECK-NEXT: %23 = insertelement <16 x i8> %20, i8 %22, i32 4
+; CHECK-NEXT: %24 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %25 = fptoui double %24 to i8
+; CHECK-NEXT: %26 = insertelement <16 x i8> %23, i8 %25, i32 5
+; CHECK-NEXT: %27 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %28 = fptoui double %27 to i8
+; CHECK-NEXT: %29 = insertelement <16 x i8> %26, i8 %28, i32 6
+; CHECK-NEXT: %30 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %31 = fptoui double %30 to i8
+; CHECK-NEXT: %32 = insertelement <16 x i8> %29, i8 %31, i32 7
+; CHECK-NEXT: %33 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %34 = fptoui double %33 to i8
+; CHECK-NEXT: %35 = insertelement <16 x i8> %32, i8 %34, i32 8
+; CHECK-NEXT: %36 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %37 = fptoui double %36 to i8
+; CHECK-NEXT: %38 = insertelement <16 x i8> %35, i8 %37, i32 9
+; CHECK-NEXT: %39 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %40 = fptoui double %39 to i8
+; CHECK-NEXT: %41 = insertelement <16 x i8> %38, i8 %40, i32 10
+; CHECK-NEXT: %42 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %43 = fptoui double %42 to i8
+; CHECK-NEXT: %44 = insertelement <16 x i8> %41, i8 %43, i32 11
+; CHECK-NEXT: %45 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %46 = fptoui double %45 to i8
+; CHECK-NEXT: %47 = insertelement <16 x i8> %44, i8 %46, i32 12
+; CHECK-NEXT: %48 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %49 = fptoui double %48 to i8
+; CHECK-NEXT: %50 = insertelement <16 x i8> %47, i8 %49, i32 13
+; CHECK-NEXT: %51 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %52 = fptoui double %51 to i8
+; CHECK-NEXT: %53 = insertelement <16 x i8> %50, i8 %52, i32 14
+; CHECK-NEXT: %54 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %55 = fptoui double %54 to i8
+; CHECK-NEXT: %56 = insertelement <16 x i8> %53, i8 %55, i32 15
+; CHECK-NEXT: ret <16 x i8> %56
+
+define <16 x i16> @fptoui_cast_16xdouble_to_16xi16(<16 x double>) {
+ %2 = fptoui <16 x double> %0 to <16 x i16>
+ ret <16 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_16xdouble_to_16xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %10 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %11 = fptoui double %10 to i16
+; CHECK-NEXT: %12 = insertelement <8 x i16> undef, i16 %11, i32 0
+; CHECK-NEXT: %13 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %14 = fptoui double %13 to i16
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 1
+; CHECK-NEXT: %16 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %17 = fptoui double %16 to i16
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 2
+; CHECK-NEXT: %19 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %20 = fptoui double %19 to i16
+; CHECK-NEXT: %21 = insertelement <8 x i16> %18, i16 %20, i32 3
+; CHECK-NEXT: %22 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %23 = fptoui double %22 to i16
+; CHECK-NEXT: %24 = insertelement <8 x i16> %21, i16 %23, i32 4
+; CHECK-NEXT: %25 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %26 = fptoui double %25 to i16
+; CHECK-NEXT: %27 = insertelement <8 x i16> %24, i16 %26, i32 5
+; CHECK-NEXT: %28 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %29 = fptoui double %28 to i16
+; CHECK-NEXT: %30 = insertelement <8 x i16> %27, i16 %29, i32 6
+; CHECK-NEXT: %31 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %32 = fptoui double %31 to i16
+; CHECK-NEXT: %33 = insertelement <8 x i16> %30, i16 %32, i32 7
+; CHECK-NEXT: %34 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %35 = fptoui double %34 to i16
+; CHECK-NEXT: %36 = insertelement <8 x i16> undef, i16 %35, i32 0
+; CHECK-NEXT: %37 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %38 = fptoui double %37 to i16
+; CHECK-NEXT: %39 = insertelement <8 x i16> %36, i16 %38, i32 1
+; CHECK-NEXT: %40 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %41 = fptoui double %40 to i16
+; CHECK-NEXT: %42 = insertelement <8 x i16> %39, i16 %41, i32 2
+; CHECK-NEXT: %43 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %44 = fptoui double %43 to i16
+; CHECK-NEXT: %45 = insertelement <8 x i16> %42, i16 %44, i32 3
+; CHECK-NEXT: %46 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %47 = fptoui double %46 to i16
+; CHECK-NEXT: %48 = insertelement <8 x i16> %45, i16 %47, i32 4
+; CHECK-NEXT: %49 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %50 = fptoui double %49 to i16
+; CHECK-NEXT: %51 = insertelement <8 x i16> %48, i16 %50, i32 5
+; CHECK-NEXT: %52 = extractelement <2 x double> %8, i32 0
+; CHECK-NEXT: %53 = fptoui double %52 to i16
+; CHECK-NEXT: %54 = insertelement <8 x i16> %51, i16 %53, i32 6
+; CHECK-NEXT: %55 = extractelement <2 x double> %8, i32 1
+; CHECK-NEXT: %56 = fptoui double %55 to i16
+; CHECK-NEXT: %57 = insertelement <8 x i16> %54, i16 %56, i32 7
+; CHECK-NEXT: store <8 x i16> %57, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %33
+
+define <16 x i32> @fptoui_cast_16xdouble_to_16xi32(<16 x double>) {
+ %2 = fptoui <16 x double> %0 to <16 x i32>
+ ret <16 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_16xdouble_to_16xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %12 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %13 = fptoui double %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> undef, i32 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %16 = fptoui double %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> %14, i32 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %19 = fptoui double %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 2
+; CHECK-NEXT: %21 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %22 = fptoui double %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 3
+; CHECK-NEXT: %24 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %25 = fptoui double %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> undef, i32 %25, i32 0
+; CHECK-NEXT: %27 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %28 = fptoui double %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> %26, i32 %28, i32 1
+; CHECK-NEXT: %30 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %31 = fptoui double %30 to i32
+; CHECK-NEXT: %32 = insertelement <4 x i32> %29, i32 %31, i32 2
+; CHECK-NEXT: %33 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %34 = fptoui double %33 to i32
+; CHECK-NEXT: %35 = insertelement <4 x i32> %32, i32 %34, i32 3
+; CHECK-NEXT: %36 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %37 = fptoui double %36 to i32
+; CHECK-NEXT: %38 = insertelement <4 x i32> undef, i32 %37, i32 0
+; CHECK-NEXT: %39 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %40 = fptoui double %39 to i32
+; CHECK-NEXT: %41 = insertelement <4 x i32> %38, i32 %40, i32 1
+; CHECK-NEXT: %42 = extractelement <2 x double> %8, i32 0
+; CHECK-NEXT: %43 = fptoui double %42 to i32
+; CHECK-NEXT: %44 = insertelement <4 x i32> %41, i32 %43, i32 2
+; CHECK-NEXT: %45 = extractelement <2 x double> %8, i32 1
+; CHECK-NEXT: %46 = fptoui double %45 to i32
+; CHECK-NEXT: %47 = insertelement <4 x i32> %44, i32 %46, i32 3
+; CHECK-NEXT: %48 = extractelement <2 x double> %9, i32 0
+; CHECK-NEXT: %49 = fptoui double %48 to i32
+; CHECK-NEXT: %50 = insertelement <4 x i32> undef, i32 %49, i32 0
+; CHECK-NEXT: %51 = extractelement <2 x double> %9, i32 1
+; CHECK-NEXT: %52 = fptoui double %51 to i32
+; CHECK-NEXT: %53 = insertelement <4 x i32> %50, i32 %52, i32 1
+; CHECK-NEXT: %54 = extractelement <2 x double> %10, i32 0
+; CHECK-NEXT: %55 = fptoui double %54 to i32
+; CHECK-NEXT: %56 = insertelement <4 x i32> %53, i32 %55, i32 2
+; CHECK-NEXT: %57 = extractelement <2 x double> %10, i32 1
+; CHECK-NEXT: %58 = fptoui double %57 to i32
+; CHECK-NEXT: %59 = insertelement <4 x i32> %56, i32 %58, i32 3
+; CHECK-NEXT: store <4 x i32> %35, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %47, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %59, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %23
+
+define <16 x i64> @fptoui_cast_16xdouble_to_16xi64(<16 x double>) {
+ %2 = fptoui <16 x double> %0 to <16 x i64>
+ ret <16 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_16xdouble_to_16xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %16 = fptoui <2 x double> %7 to <2 x i64>
+; CHECK-NEXT: %17 = fptoui <2 x double> %8 to <2 x i64>
+; CHECK-NEXT: %18 = fptoui <2 x double> %9 to <2 x i64>
+; CHECK-NEXT: %19 = fptoui <2 x double> %10 to <2 x i64>
+; CHECK-NEXT: %20 = fptoui <2 x double> %11 to <2 x i64>
+; CHECK-NEXT: %21 = fptoui <2 x double> %12 to <2 x i64>
+; CHECK-NEXT: %22 = fptoui <2 x double> %13 to <2 x i64>
+; CHECK-NEXT: %23 = fptoui <2 x double> %14 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %17, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %18, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %19, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %20, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %21, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret <2 x i64> %16
+
+define <16 x i8> @fptosi_cast_16xfloat_to_16xi8(<16 x float>) {
+ %2 = fptosi <16 x float> %0 to <16 x i8>
+ ret <16 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_16xfloat_to_16xi8(<4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %5 = extractelement <4 x float> %0, i32 0
+; CHECK-NEXT: %6 = fptosi float %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> undef, i8 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <4 x float> %0, i32 1
+; CHECK-NEXT: %9 = fptosi float %8 to i8
+; CHECK-NEXT: %10 = insertelement <16 x i8> %7, i8 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <4 x float> %0, i32 2
+; CHECK-NEXT: %12 = fptosi float %11 to i8
+; CHECK-NEXT: %13 = insertelement <16 x i8> %10, i8 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <4 x float> %0, i32 3
+; CHECK-NEXT: %15 = fptosi float %14 to i8
+; CHECK-NEXT: %16 = insertelement <16 x i8> %13, i8 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %18 = fptosi float %17 to i8
+; CHECK-NEXT: %19 = insertelement <16 x i8> %16, i8 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %21 = fptosi float %20 to i8
+; CHECK-NEXT: %22 = insertelement <16 x i8> %19, i8 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %24 = fptosi float %23 to i8
+; CHECK-NEXT: %25 = insertelement <16 x i8> %22, i8 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %27 = fptosi float %26 to i8
+; CHECK-NEXT: %28 = insertelement <16 x i8> %25, i8 %27, i32 7
+; CHECK-NEXT: %29 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %30 = fptosi float %29 to i8
+; CHECK-NEXT: %31 = insertelement <16 x i8> %28, i8 %30, i32 8
+; CHECK-NEXT: %32 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %33 = fptosi float %32 to i8
+; CHECK-NEXT: %34 = insertelement <16 x i8> %31, i8 %33, i32 9
+; CHECK-NEXT: %35 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %36 = fptosi float %35 to i8
+; CHECK-NEXT: %37 = insertelement <16 x i8> %34, i8 %36, i32 10
+; CHECK-NEXT: %38 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %39 = fptosi float %38 to i8
+; CHECK-NEXT: %40 = insertelement <16 x i8> %37, i8 %39, i32 11
+; CHECK-NEXT: %41 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %42 = fptosi float %41 to i8
+; CHECK-NEXT: %43 = insertelement <16 x i8> %40, i8 %42, i32 12
+; CHECK-NEXT: %44 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %45 = fptosi float %44 to i8
+; CHECK-NEXT: %46 = insertelement <16 x i8> %43, i8 %45, i32 13
+; CHECK-NEXT: %47 = extractelement <4 x float> %3, i32 2
+; CHECK-NEXT: %48 = fptosi float %47 to i8
+; CHECK-NEXT: %49 = insertelement <16 x i8> %46, i8 %48, i32 14
+; CHECK-NEXT: %50 = extractelement <4 x float> %3, i32 3
+; CHECK-NEXT: %51 = fptosi float %50 to i8
+; CHECK-NEXT: %52 = insertelement <16 x i8> %49, i8 %51, i32 15
+; CHECK-NEXT: ret <16 x i8> %52
+
+define <16 x i16> @fptosi_cast_16xfloat_to_16xi16(<16 x float>) {
+ %2 = fptosi <16 x float> %0 to <16 x i16>
+ ret <16 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_16xfloat_to_16xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %6 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %7 = fptosi float %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> undef, i16 %7, i32 0
+; CHECK-NEXT: %9 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %10 = fptosi float %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 1
+; CHECK-NEXT: %12 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %13 = fptosi float %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 2
+; CHECK-NEXT: %15 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %16 = fptosi float %15 to i16
+; CHECK-NEXT: %17 = insertelement <8 x i16> %14, i16 %16, i32 3
+; CHECK-NEXT: %18 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %19 = fptosi float %18 to i16
+; CHECK-NEXT: %20 = insertelement <8 x i16> %17, i16 %19, i32 4
+; CHECK-NEXT: %21 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %22 = fptosi float %21 to i16
+; CHECK-NEXT: %23 = insertelement <8 x i16> %20, i16 %22, i32 5
+; CHECK-NEXT: %24 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %25 = fptosi float %24 to i16
+; CHECK-NEXT: %26 = insertelement <8 x i16> %23, i16 %25, i32 6
+; CHECK-NEXT: %27 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %28 = fptosi float %27 to i16
+; CHECK-NEXT: %29 = insertelement <8 x i16> %26, i16 %28, i32 7
+; CHECK-NEXT: %30 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %31 = fptosi float %30 to i16
+; CHECK-NEXT: %32 = insertelement <8 x i16> undef, i16 %31, i32 0
+; CHECK-NEXT: %33 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %34 = fptosi float %33 to i16
+; CHECK-NEXT: %35 = insertelement <8 x i16> %32, i16 %34, i32 1
+; CHECK-NEXT: %36 = extractelement <4 x float> %3, i32 2
+; CHECK-NEXT: %37 = fptosi float %36 to i16
+; CHECK-NEXT: %38 = insertelement <8 x i16> %35, i16 %37, i32 2
+; CHECK-NEXT: %39 = extractelement <4 x float> %3, i32 3
+; CHECK-NEXT: %40 = fptosi float %39 to i16
+; CHECK-NEXT: %41 = insertelement <8 x i16> %38, i16 %40, i32 3
+; CHECK-NEXT: %42 = extractelement <4 x float> %4, i32 0
+; CHECK-NEXT: %43 = fptosi float %42 to i16
+; CHECK-NEXT: %44 = insertelement <8 x i16> %41, i16 %43, i32 4
+; CHECK-NEXT: %45 = extractelement <4 x float> %4, i32 1
+; CHECK-NEXT: %46 = fptosi float %45 to i16
+; CHECK-NEXT: %47 = insertelement <8 x i16> %44, i16 %46, i32 5
+; CHECK-NEXT: %48 = extractelement <4 x float> %4, i32 2
+; CHECK-NEXT: %49 = fptosi float %48 to i16
+; CHECK-NEXT: %50 = insertelement <8 x i16> %47, i16 %49, i32 6
+; CHECK-NEXT: %51 = extractelement <4 x float> %4, i32 3
+; CHECK-NEXT: %52 = fptosi float %51 to i16
+; CHECK-NEXT: %53 = insertelement <8 x i16> %50, i16 %52, i32 7
+; CHECK-NEXT: store <8 x i16> %53, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %29
+
+define <16 x i32> @fptosi_cast_16xfloat_to_16xi32(<16 x float>) {
+ %2 = fptosi <16 x float> %0 to <16 x i32>
+ ret <16 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_16xfloat_to_16xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %8 = fptosi <4 x float> %3 to <4 x i32>
+; CHECK-NEXT: %9 = fptosi <4 x float> %4 to <4 x i32>
+; CHECK-NEXT: %10 = fptosi <4 x float> %5 to <4 x i32>
+; CHECK-NEXT: %11 = fptosi <4 x float> %6 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i64> @fptosi_cast_16xfloat_to_16xi64(<16 x float>) {
+ %2 = fptosi <16 x float> %0 to <16 x i64>
+ ret <16 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_16xfloat_to_16xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %12 = extractelement <4 x float> %7, i32 0
+; CHECK-NEXT: %13 = fptosi float %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> undef, i64 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <4 x float> %7, i32 1
+; CHECK-NEXT: %16 = fptosi float %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> %14, i64 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <4 x float> %7, i32 2
+; CHECK-NEXT: %19 = fptosi float %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> undef, i64 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <4 x float> %7, i32 3
+; CHECK-NEXT: %22 = fptosi float %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> %20, i64 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <4 x float> %8, i32 0
+; CHECK-NEXT: %25 = fptosi float %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> undef, i64 %25, i32 0
+; CHECK-NEXT: %27 = extractelement <4 x float> %8, i32 1
+; CHECK-NEXT: %28 = fptosi float %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> %26, i64 %28, i32 1
+; CHECK-NEXT: %30 = extractelement <4 x float> %8, i32 2
+; CHECK-NEXT: %31 = fptosi float %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> undef, i64 %31, i32 0
+; CHECK-NEXT: %33 = extractelement <4 x float> %8, i32 3
+; CHECK-NEXT: %34 = fptosi float %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> %32, i64 %34, i32 1
+; CHECK-NEXT: %36 = extractelement <4 x float> %9, i32 0
+; CHECK-NEXT: %37 = fptosi float %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> undef, i64 %37, i32 0
+; CHECK-NEXT: %39 = extractelement <4 x float> %9, i32 1
+; CHECK-NEXT: %40 = fptosi float %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> %38, i64 %40, i32 1
+; CHECK-NEXT: %42 = extractelement <4 x float> %9, i32 2
+; CHECK-NEXT: %43 = fptosi float %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> undef, i64 %43, i32 0
+; CHECK-NEXT: %45 = extractelement <4 x float> %9, i32 3
+; CHECK-NEXT: %46 = fptosi float %45 to i64
+; CHECK-NEXT: %47 = insertelement <2 x i64> %44, i64 %46, i32 1
+; CHECK-NEXT: %48 = extractelement <4 x float> %10, i32 0
+; CHECK-NEXT: %49 = fptosi float %48 to i64
+; CHECK-NEXT: %50 = insertelement <2 x i64> undef, i64 %49, i32 0
+; CHECK-NEXT: %51 = extractelement <4 x float> %10, i32 1
+; CHECK-NEXT: %52 = fptosi float %51 to i64
+; CHECK-NEXT: %53 = insertelement <2 x i64> %50, i64 %52, i32 1
+; CHECK-NEXT: %54 = extractelement <4 x float> %10, i32 2
+; CHECK-NEXT: %55 = fptosi float %54 to i64
+; CHECK-NEXT: %56 = insertelement <2 x i64> undef, i64 %55, i32 0
+; CHECK-NEXT: %57 = extractelement <4 x float> %10, i32 3
+; CHECK-NEXT: %58 = fptosi float %57 to i64
+; CHECK-NEXT: %59 = insertelement <2 x i64> %56, i64 %58, i32 1
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %35, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %41, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %47, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %53, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %59, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret <2 x i64> %17
+
+define <16 x i8> @fptosi_cast_16xdouble_to_16xi8(<16 x double>) {
+ %2 = fptosi <16 x double> %0 to <16 x i8>
+ ret <16 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_16xdouble_to_16xi8(<2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %9 = extractelement <2 x double> %0, i32 0
+; CHECK-NEXT: %10 = fptosi double %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> undef, i8 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <2 x double> %0, i32 1
+; CHECK-NEXT: %13 = fptosi double %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 1
+; CHECK-NEXT: %15 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %16 = fptosi double %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 2
+; CHECK-NEXT: %18 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %19 = fptosi double %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 3
+; CHECK-NEXT: %21 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %22 = fptosi double %21 to i8
+; CHECK-NEXT: %23 = insertelement <16 x i8> %20, i8 %22, i32 4
+; CHECK-NEXT: %24 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %25 = fptosi double %24 to i8
+; CHECK-NEXT: %26 = insertelement <16 x i8> %23, i8 %25, i32 5
+; CHECK-NEXT: %27 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %28 = fptosi double %27 to i8
+; CHECK-NEXT: %29 = insertelement <16 x i8> %26, i8 %28, i32 6
+; CHECK-NEXT: %30 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %31 = fptosi double %30 to i8
+; CHECK-NEXT: %32 = insertelement <16 x i8> %29, i8 %31, i32 7
+; CHECK-NEXT: %33 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %34 = fptosi double %33 to i8
+; CHECK-NEXT: %35 = insertelement <16 x i8> %32, i8 %34, i32 8
+; CHECK-NEXT: %36 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %37 = fptosi double %36 to i8
+; CHECK-NEXT: %38 = insertelement <16 x i8> %35, i8 %37, i32 9
+; CHECK-NEXT: %39 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %40 = fptosi double %39 to i8
+; CHECK-NEXT: %41 = insertelement <16 x i8> %38, i8 %40, i32 10
+; CHECK-NEXT: %42 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %43 = fptosi double %42 to i8
+; CHECK-NEXT: %44 = insertelement <16 x i8> %41, i8 %43, i32 11
+; CHECK-NEXT: %45 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %46 = fptosi double %45 to i8
+; CHECK-NEXT: %47 = insertelement <16 x i8> %44, i8 %46, i32 12
+; CHECK-NEXT: %48 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %49 = fptosi double %48 to i8
+; CHECK-NEXT: %50 = insertelement <16 x i8> %47, i8 %49, i32 13
+; CHECK-NEXT: %51 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %52 = fptosi double %51 to i8
+; CHECK-NEXT: %53 = insertelement <16 x i8> %50, i8 %52, i32 14
+; CHECK-NEXT: %54 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %55 = fptosi double %54 to i8
+; CHECK-NEXT: %56 = insertelement <16 x i8> %53, i8 %55, i32 15
+; CHECK-NEXT: ret <16 x i8> %56
+
+define <16 x i16> @fptosi_cast_16xdouble_to_16xi16(<16 x double>) {
+ %2 = fptosi <16 x double> %0 to <16 x i16>
+ ret <16 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_16xdouble_to_16xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %10 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %11 = fptosi double %10 to i16
+; CHECK-NEXT: %12 = insertelement <8 x i16> undef, i16 %11, i32 0
+; CHECK-NEXT: %13 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %14 = fptosi double %13 to i16
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 1
+; CHECK-NEXT: %16 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %17 = fptosi double %16 to i16
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 2
+; CHECK-NEXT: %19 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %20 = fptosi double %19 to i16
+; CHECK-NEXT: %21 = insertelement <8 x i16> %18, i16 %20, i32 3
+; CHECK-NEXT: %22 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %23 = fptosi double %22 to i16
+; CHECK-NEXT: %24 = insertelement <8 x i16> %21, i16 %23, i32 4
+; CHECK-NEXT: %25 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %26 = fptosi double %25 to i16
+; CHECK-NEXT: %27 = insertelement <8 x i16> %24, i16 %26, i32 5
+; CHECK-NEXT: %28 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %29 = fptosi double %28 to i16
+; CHECK-NEXT: %30 = insertelement <8 x i16> %27, i16 %29, i32 6
+; CHECK-NEXT: %31 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %32 = fptosi double %31 to i16
+; CHECK-NEXT: %33 = insertelement <8 x i16> %30, i16 %32, i32 7
+; CHECK-NEXT: %34 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %35 = fptosi double %34 to i16
+; CHECK-NEXT: %36 = insertelement <8 x i16> undef, i16 %35, i32 0
+; CHECK-NEXT: %37 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %38 = fptosi double %37 to i16
+; CHECK-NEXT: %39 = insertelement <8 x i16> %36, i16 %38, i32 1
+; CHECK-NEXT: %40 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %41 = fptosi double %40 to i16
+; CHECK-NEXT: %42 = insertelement <8 x i16> %39, i16 %41, i32 2
+; CHECK-NEXT: %43 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %44 = fptosi double %43 to i16
+; CHECK-NEXT: %45 = insertelement <8 x i16> %42, i16 %44, i32 3
+; CHECK-NEXT: %46 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %47 = fptosi double %46 to i16
+; CHECK-NEXT: %48 = insertelement <8 x i16> %45, i16 %47, i32 4
+; CHECK-NEXT: %49 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %50 = fptosi double %49 to i16
+; CHECK-NEXT: %51 = insertelement <8 x i16> %48, i16 %50, i32 5
+; CHECK-NEXT: %52 = extractelement <2 x double> %8, i32 0
+; CHECK-NEXT: %53 = fptosi double %52 to i16
+; CHECK-NEXT: %54 = insertelement <8 x i16> %51, i16 %53, i32 6
+; CHECK-NEXT: %55 = extractelement <2 x double> %8, i32 1
+; CHECK-NEXT: %56 = fptosi double %55 to i16
+; CHECK-NEXT: %57 = insertelement <8 x i16> %54, i16 %56, i32 7
+; CHECK-NEXT: store <8 x i16> %57, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %33
+
+define <16 x i32> @fptosi_cast_16xdouble_to_16xi32(<16 x double>) {
+ %2 = fptosi <16 x double> %0 to <16 x i32>
+ ret <16 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_16xdouble_to_16xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %12 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %13 = fptosi double %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> undef, i32 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %16 = fptosi double %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> %14, i32 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %19 = fptosi double %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 2
+; CHECK-NEXT: %21 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %22 = fptosi double %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 3
+; CHECK-NEXT: %24 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %25 = fptosi double %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> undef, i32 %25, i32 0
+; CHECK-NEXT: %27 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %28 = fptosi double %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> %26, i32 %28, i32 1
+; CHECK-NEXT: %30 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %31 = fptosi double %30 to i32
+; CHECK-NEXT: %32 = insertelement <4 x i32> %29, i32 %31, i32 2
+; CHECK-NEXT: %33 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %34 = fptosi double %33 to i32
+; CHECK-NEXT: %35 = insertelement <4 x i32> %32, i32 %34, i32 3
+; CHECK-NEXT: %36 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %37 = fptosi double %36 to i32
+; CHECK-NEXT: %38 = insertelement <4 x i32> undef, i32 %37, i32 0
+; CHECK-NEXT: %39 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %40 = fptosi double %39 to i32
+; CHECK-NEXT: %41 = insertelement <4 x i32> %38, i32 %40, i32 1
+; CHECK-NEXT: %42 = extractelement <2 x double> %8, i32 0
+; CHECK-NEXT: %43 = fptosi double %42 to i32
+; CHECK-NEXT: %44 = insertelement <4 x i32> %41, i32 %43, i32 2
+; CHECK-NEXT: %45 = extractelement <2 x double> %8, i32 1
+; CHECK-NEXT: %46 = fptosi double %45 to i32
+; CHECK-NEXT: %47 = insertelement <4 x i32> %44, i32 %46, i32 3
+; CHECK-NEXT: %48 = extractelement <2 x double> %9, i32 0
+; CHECK-NEXT: %49 = fptosi double %48 to i32
+; CHECK-NEXT: %50 = insertelement <4 x i32> undef, i32 %49, i32 0
+; CHECK-NEXT: %51 = extractelement <2 x double> %9, i32 1
+; CHECK-NEXT: %52 = fptosi double %51 to i32
+; CHECK-NEXT: %53 = insertelement <4 x i32> %50, i32 %52, i32 1
+; CHECK-NEXT: %54 = extractelement <2 x double> %10, i32 0
+; CHECK-NEXT: %55 = fptosi double %54 to i32
+; CHECK-NEXT: %56 = insertelement <4 x i32> %53, i32 %55, i32 2
+; CHECK-NEXT: %57 = extractelement <2 x double> %10, i32 1
+; CHECK-NEXT: %58 = fptosi double %57 to i32
+; CHECK-NEXT: %59 = insertelement <4 x i32> %56, i32 %58, i32 3
+; CHECK-NEXT: store <4 x i32> %35, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %47, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %59, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %23
+
+define <16 x i64> @fptosi_cast_16xdouble_to_16xi64(<16 x double>) {
+ %2 = fptosi <16 x double> %0 to <16 x i64>
+ ret <16 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_16xdouble_to_16xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %16 = fptosi <2 x double> %7 to <2 x i64>
+; CHECK-NEXT: %17 = fptosi <2 x double> %8 to <2 x i64>
+; CHECK-NEXT: %18 = fptosi <2 x double> %9 to <2 x i64>
+; CHECK-NEXT: %19 = fptosi <2 x double> %10 to <2 x i64>
+; CHECK-NEXT: %20 = fptosi <2 x double> %11 to <2 x i64>
+; CHECK-NEXT: %21 = fptosi <2 x double> %12 to <2 x i64>
+; CHECK-NEXT: %22 = fptosi <2 x double> %13 to <2 x i64>
+; CHECK-NEXT: %23 = fptosi <2 x double> %14 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %17, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %18, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %19, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %20, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %21, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret <2 x i64> %16
+
+define <16 x float> @uitofp_cast_16xi8_to_16xfloat(<16 x i8>) {
+ %2 = uitofp <16 x i8> %0 to <16 x float>
+ ret <16 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_16xi8_to_16xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %5 = extractelement <16 x i8> %3, i32 0
+; CHECK-NEXT: %6 = uitofp i8 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> undef, float %6, i32 0
+; CHECK-NEXT: %8 = extractelement <16 x i8> %3, i32 1
+; CHECK-NEXT: %9 = uitofp i8 %8 to float
+; CHECK-NEXT: %10 = insertelement <4 x float> %7, float %9, i32 1
+; CHECK-NEXT: %11 = extractelement <16 x i8> %3, i32 2
+; CHECK-NEXT: %12 = uitofp i8 %11 to float
+; CHECK-NEXT: %13 = insertelement <4 x float> %10, float %12, i32 2
+; CHECK-NEXT: %14 = extractelement <16 x i8> %3, i32 3
+; CHECK-NEXT: %15 = uitofp i8 %14 to float
+; CHECK-NEXT: %16 = insertelement <4 x float> %13, float %15, i32 3
+; CHECK-NEXT: %17 = extractelement <16 x i8> %3, i32 4
+; CHECK-NEXT: %18 = uitofp i8 %17 to float
+; CHECK-NEXT: %19 = insertelement <4 x float> undef, float %18, i32 0
+; CHECK-NEXT: %20 = extractelement <16 x i8> %3, i32 5
+; CHECK-NEXT: %21 = uitofp i8 %20 to float
+; CHECK-NEXT: %22 = insertelement <4 x float> %19, float %21, i32 1
+; CHECK-NEXT: %23 = extractelement <16 x i8> %3, i32 6
+; CHECK-NEXT: %24 = uitofp i8 %23 to float
+; CHECK-NEXT: %25 = insertelement <4 x float> %22, float %24, i32 2
+; CHECK-NEXT: %26 = extractelement <16 x i8> %3, i32 7
+; CHECK-NEXT: %27 = uitofp i8 %26 to float
+; CHECK-NEXT: %28 = insertelement <4 x float> %25, float %27, i32 3
+; CHECK-NEXT: %29 = extractelement <16 x i8> %3, i32 8
+; CHECK-NEXT: %30 = uitofp i8 %29 to float
+; CHECK-NEXT: %31 = insertelement <4 x float> undef, float %30, i32 0
+; CHECK-NEXT: %32 = extractelement <16 x i8> %3, i32 9
+; CHECK-NEXT: %33 = uitofp i8 %32 to float
+; CHECK-NEXT: %34 = insertelement <4 x float> %31, float %33, i32 1
+; CHECK-NEXT: %35 = extractelement <16 x i8> %3, i32 10
+; CHECK-NEXT: %36 = uitofp i8 %35 to float
+; CHECK-NEXT: %37 = insertelement <4 x float> %34, float %36, i32 2
+; CHECK-NEXT: %38 = extractelement <16 x i8> %3, i32 11
+; CHECK-NEXT: %39 = uitofp i8 %38 to float
+; CHECK-NEXT: %40 = insertelement <4 x float> %37, float %39, i32 3
+; CHECK-NEXT: %41 = extractelement <16 x i8> %3, i32 12
+; CHECK-NEXT: %42 = uitofp i8 %41 to float
+; CHECK-NEXT: %43 = insertelement <4 x float> undef, float %42, i32 0
+; CHECK-NEXT: %44 = extractelement <16 x i8> %3, i32 13
+; CHECK-NEXT: %45 = uitofp i8 %44 to float
+; CHECK-NEXT: %46 = insertelement <4 x float> %43, float %45, i32 1
+; CHECK-NEXT: %47 = extractelement <16 x i8> %3, i32 14
+; CHECK-NEXT: %48 = uitofp i8 %47 to float
+; CHECK-NEXT: %49 = insertelement <4 x float> %46, float %48, i32 2
+; CHECK-NEXT: %50 = extractelement <16 x i8> %3, i32 15
+; CHECK-NEXT: %51 = uitofp i8 %50 to float
+; CHECK-NEXT: %52 = insertelement <4 x float> %49, float %51, i32 3
+; CHECK-NEXT: store <4 x float> %28, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %40, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %52, <4 x float>* %2, align 16
+; CHECK-NEXT: ret <4 x float> %16
+
+define <16 x float> @uitofp_cast_16xi16_to_16xfloat(<16 x i16>) {
+ %2 = uitofp <16 x i16> %0 to <16 x float>
+ ret <16 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_16xi16_to_16xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %6 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: %7 = uitofp i16 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> undef, float %7, i32 0
+; CHECK-NEXT: %9 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: %10 = uitofp i16 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 1
+; CHECK-NEXT: %12 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: %13 = uitofp i16 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 2
+; CHECK-NEXT: %15 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: %16 = uitofp i16 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> %14, float %16, i32 3
+; CHECK-NEXT: %18 = extractelement <8 x i16> %3, i32 4
+; CHECK-NEXT: %19 = uitofp i16 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> undef, float %19, i32 0
+; CHECK-NEXT: %21 = extractelement <8 x i16> %3, i32 5
+; CHECK-NEXT: %22 = uitofp i16 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> %20, float %22, i32 1
+; CHECK-NEXT: %24 = extractelement <8 x i16> %3, i32 6
+; CHECK-NEXT: %25 = uitofp i16 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> %23, float %25, i32 2
+; CHECK-NEXT: %27 = extractelement <8 x i16> %3, i32 7
+; CHECK-NEXT: %28 = uitofp i16 %27 to float
+; CHECK-NEXT: %29 = insertelement <4 x float> %26, float %28, i32 3
+; CHECK-NEXT: %30 = extractelement <8 x i16> %4, i32 0
+; CHECK-NEXT: %31 = uitofp i16 %30 to float
+; CHECK-NEXT: %32 = insertelement <4 x float> undef, float %31, i32 0
+; CHECK-NEXT: %33 = extractelement <8 x i16> %4, i32 1
+; CHECK-NEXT: %34 = uitofp i16 %33 to float
+; CHECK-NEXT: %35 = insertelement <4 x float> %32, float %34, i32 1
+; CHECK-NEXT: %36 = extractelement <8 x i16> %4, i32 2
+; CHECK-NEXT: %37 = uitofp i16 %36 to float
+; CHECK-NEXT: %38 = insertelement <4 x float> %35, float %37, i32 2
+; CHECK-NEXT: %39 = extractelement <8 x i16> %4, i32 3
+; CHECK-NEXT: %40 = uitofp i16 %39 to float
+; CHECK-NEXT: %41 = insertelement <4 x float> %38, float %40, i32 3
+; CHECK-NEXT: %42 = extractelement <8 x i16> %4, i32 4
+; CHECK-NEXT: %43 = uitofp i16 %42 to float
+; CHECK-NEXT: %44 = insertelement <4 x float> undef, float %43, i32 0
+; CHECK-NEXT: %45 = extractelement <8 x i16> %4, i32 5
+; CHECK-NEXT: %46 = uitofp i16 %45 to float
+; CHECK-NEXT: %47 = insertelement <4 x float> %44, float %46, i32 1
+; CHECK-NEXT: %48 = extractelement <8 x i16> %4, i32 6
+; CHECK-NEXT: %49 = uitofp i16 %48 to float
+; CHECK-NEXT: %50 = insertelement <4 x float> %47, float %49, i32 2
+; CHECK-NEXT: %51 = extractelement <8 x i16> %4, i32 7
+; CHECK-NEXT: %52 = uitofp i16 %51 to float
+; CHECK-NEXT: %53 = insertelement <4 x float> %50, float %52, i32 3
+; CHECK-NEXT: store <4 x float> %29, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %41, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %53, <4 x float>* %2, align 16
+; CHECK-NEXT: ret <4 x float> %17
+
+define <16 x float> @uitofp_cast_16xi32_to_16xfloat(<16 x i32>) {
+ %2 = uitofp <16 x i32> %0 to <16 x float>
+ ret <16 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_16xi32_to_16xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = uitofp <4 x i32> %3 to <4 x float>
+; CHECK-NEXT: %9 = uitofp <4 x i32> %4 to <4 x float>
+; CHECK-NEXT: %10 = uitofp <4 x i32> %5 to <4 x float>
+; CHECK-NEXT: %11 = uitofp <4 x i32> %6 to <4 x float>
+; CHECK-NEXT: store <4 x float> %9, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %10, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %11, <4 x float>* %2, align 16
+; CHECK-NEXT: ret <4 x float> %8
+
+define <16 x float> @uitofp_cast_16xi64_to_16xfloat(<16 x i64>) {
+ %2 = uitofp <16 x i64> %0 to <16 x float>
+ ret <16 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_16xi64_to_16xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %12 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %13 = uitofp i64 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> undef, float %13, i32 0
+; CHECK-NEXT: %15 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %16 = uitofp i64 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> %14, float %16, i32 1
+; CHECK-NEXT: %18 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %19 = uitofp i64 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 2
+; CHECK-NEXT: %21 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %22 = uitofp i64 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> %20, float %22, i32 3
+; CHECK-NEXT: %24 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %25 = uitofp i64 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> undef, float %25, i32 0
+; CHECK-NEXT: %27 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %28 = uitofp i64 %27 to float
+; CHECK-NEXT: %29 = insertelement <4 x float> %26, float %28, i32 1
+; CHECK-NEXT: %30 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %31 = uitofp i64 %30 to float
+; CHECK-NEXT: %32 = insertelement <4 x float> %29, float %31, i32 2
+; CHECK-NEXT: %33 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %34 = uitofp i64 %33 to float
+; CHECK-NEXT: %35 = insertelement <4 x float> %32, float %34, i32 3
+; CHECK-NEXT: %36 = extractelement <2 x i64> %7, i32 0
+; CHECK-NEXT: %37 = uitofp i64 %36 to float
+; CHECK-NEXT: %38 = insertelement <4 x float> undef, float %37, i32 0
+; CHECK-NEXT: %39 = extractelement <2 x i64> %7, i32 1
+; CHECK-NEXT: %40 = uitofp i64 %39 to float
+; CHECK-NEXT: %41 = insertelement <4 x float> %38, float %40, i32 1
+; CHECK-NEXT: %42 = extractelement <2 x i64> %8, i32 0
+; CHECK-NEXT: %43 = uitofp i64 %42 to float
+; CHECK-NEXT: %44 = insertelement <4 x float> %41, float %43, i32 2
+; CHECK-NEXT: %45 = extractelement <2 x i64> %8, i32 1
+; CHECK-NEXT: %46 = uitofp i64 %45 to float
+; CHECK-NEXT: %47 = insertelement <4 x float> %44, float %46, i32 3
+; CHECK-NEXT: %48 = extractelement <2 x i64> %9, i32 0
+; CHECK-NEXT: %49 = uitofp i64 %48 to float
+; CHECK-NEXT: %50 = insertelement <4 x float> undef, float %49, i32 0
+; CHECK-NEXT: %51 = extractelement <2 x i64> %9, i32 1
+; CHECK-NEXT: %52 = uitofp i64 %51 to float
+; CHECK-NEXT: %53 = insertelement <4 x float> %50, float %52, i32 1
+; CHECK-NEXT: %54 = extractelement <2 x i64> %10, i32 0
+; CHECK-NEXT: %55 = uitofp i64 %54 to float
+; CHECK-NEXT: %56 = insertelement <4 x float> %53, float %55, i32 2
+; CHECK-NEXT: %57 = extractelement <2 x i64> %10, i32 1
+; CHECK-NEXT: %58 = uitofp i64 %57 to float
+; CHECK-NEXT: %59 = insertelement <4 x float> %56, float %58, i32 3
+; CHECK-NEXT: store <4 x float> %35, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %47, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %59, <4 x float>* %2, align 16
+; CHECK-NEXT: ret <4 x float> %23
+
+define <16 x double> @uitofp_cast_16xi8_to_16xdouble(<16 x i8>) {
+ %2 = uitofp <16 x i8> %0 to <16 x double>
+ ret <16 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_16xi8_to_16xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %9 = extractelement <16 x i8> %7, i32 0
+; CHECK-NEXT: %10 = uitofp i8 %9 to double
+; CHECK-NEXT: %11 = insertelement <2 x double> undef, double %10, i32 0
+; CHECK-NEXT: %12 = extractelement <16 x i8> %7, i32 1
+; CHECK-NEXT: %13 = uitofp i8 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> %11, double %13, i32 1
+; CHECK-NEXT: %15 = extractelement <16 x i8> %7, i32 2
+; CHECK-NEXT: %16 = uitofp i8 %15 to double
+; CHECK-NEXT: %17 = insertelement <2 x double> undef, double %16, i32 0
+; CHECK-NEXT: %18 = extractelement <16 x i8> %7, i32 3
+; CHECK-NEXT: %19 = uitofp i8 %18 to double
+; CHECK-NEXT: %20 = insertelement <2 x double> %17, double %19, i32 1
+; CHECK-NEXT: %21 = extractelement <16 x i8> %7, i32 4
+; CHECK-NEXT: %22 = uitofp i8 %21 to double
+; CHECK-NEXT: %23 = insertelement <2 x double> undef, double %22, i32 0
+; CHECK-NEXT: %24 = extractelement <16 x i8> %7, i32 5
+; CHECK-NEXT: %25 = uitofp i8 %24 to double
+; CHECK-NEXT: %26 = insertelement <2 x double> %23, double %25, i32 1
+; CHECK-NEXT: %27 = extractelement <16 x i8> %7, i32 6
+; CHECK-NEXT: %28 = uitofp i8 %27 to double
+; CHECK-NEXT: %29 = insertelement <2 x double> undef, double %28, i32 0
+; CHECK-NEXT: %30 = extractelement <16 x i8> %7, i32 7
+; CHECK-NEXT: %31 = uitofp i8 %30 to double
+; CHECK-NEXT: %32 = insertelement <2 x double> %29, double %31, i32 1
+; CHECK-NEXT: %33 = extractelement <16 x i8> %7, i32 8
+; CHECK-NEXT: %34 = uitofp i8 %33 to double
+; CHECK-NEXT: %35 = insertelement <2 x double> undef, double %34, i32 0
+; CHECK-NEXT: %36 = extractelement <16 x i8> %7, i32 9
+; CHECK-NEXT: %37 = uitofp i8 %36 to double
+; CHECK-NEXT: %38 = insertelement <2 x double> %35, double %37, i32 1
+; CHECK-NEXT: %39 = extractelement <16 x i8> %7, i32 10
+; CHECK-NEXT: %40 = uitofp i8 %39 to double
+; CHECK-NEXT: %41 = insertelement <2 x double> undef, double %40, i32 0
+; CHECK-NEXT: %42 = extractelement <16 x i8> %7, i32 11
+; CHECK-NEXT: %43 = uitofp i8 %42 to double
+; CHECK-NEXT: %44 = insertelement <2 x double> %41, double %43, i32 1
+; CHECK-NEXT: %45 = extractelement <16 x i8> %7, i32 12
+; CHECK-NEXT: %46 = uitofp i8 %45 to double
+; CHECK-NEXT: %47 = insertelement <2 x double> undef, double %46, i32 0
+; CHECK-NEXT: %48 = extractelement <16 x i8> %7, i32 13
+; CHECK-NEXT: %49 = uitofp i8 %48 to double
+; CHECK-NEXT: %50 = insertelement <2 x double> %47, double %49, i32 1
+; CHECK-NEXT: %51 = extractelement <16 x i8> %7, i32 14
+; CHECK-NEXT: %52 = uitofp i8 %51 to double
+; CHECK-NEXT: %53 = insertelement <2 x double> undef, double %52, i32 0
+; CHECK-NEXT: %54 = extractelement <16 x i8> %7, i32 15
+; CHECK-NEXT: %55 = uitofp i8 %54 to double
+; CHECK-NEXT: %56 = insertelement <2 x double> %53, double %55, i32 1
+; CHECK-NEXT: store <2 x double> %20, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %26, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %32, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %38, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %44, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %50, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %56, <2 x double>* %6, align 16
+; CHECK-NEXT: ret <2 x double> %14
+
+define <16 x double> @uitofp_cast_16xi16_to_16xdouble(<16 x i16>) {
+ %2 = uitofp <16 x i16> %0 to <16 x double>
+ ret <16 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_16xi16_to_16xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %10 = extractelement <8 x i16> %7, i32 0
+; CHECK-NEXT: %11 = uitofp i16 %10 to double
+; CHECK-NEXT: %12 = insertelement <2 x double> undef, double %11, i32 0
+; CHECK-NEXT: %13 = extractelement <8 x i16> %7, i32 1
+; CHECK-NEXT: %14 = uitofp i16 %13 to double
+; CHECK-NEXT: %15 = insertelement <2 x double> %12, double %14, i32 1
+; CHECK-NEXT: %16 = extractelement <8 x i16> %7, i32 2
+; CHECK-NEXT: %17 = uitofp i16 %16 to double
+; CHECK-NEXT: %18 = insertelement <2 x double> undef, double %17, i32 0
+; CHECK-NEXT: %19 = extractelement <8 x i16> %7, i32 3
+; CHECK-NEXT: %20 = uitofp i16 %19 to double
+; CHECK-NEXT: %21 = insertelement <2 x double> %18, double %20, i32 1
+; CHECK-NEXT: %22 = extractelement <8 x i16> %7, i32 4
+; CHECK-NEXT: %23 = uitofp i16 %22 to double
+; CHECK-NEXT: %24 = insertelement <2 x double> undef, double %23, i32 0
+; CHECK-NEXT: %25 = extractelement <8 x i16> %7, i32 5
+; CHECK-NEXT: %26 = uitofp i16 %25 to double
+; CHECK-NEXT: %27 = insertelement <2 x double> %24, double %26, i32 1
+; CHECK-NEXT: %28 = extractelement <8 x i16> %7, i32 6
+; CHECK-NEXT: %29 = uitofp i16 %28 to double
+; CHECK-NEXT: %30 = insertelement <2 x double> undef, double %29, i32 0
+; CHECK-NEXT: %31 = extractelement <8 x i16> %7, i32 7
+; CHECK-NEXT: %32 = uitofp i16 %31 to double
+; CHECK-NEXT: %33 = insertelement <2 x double> %30, double %32, i32 1
+; CHECK-NEXT: %34 = extractelement <8 x i16> %8, i32 0
+; CHECK-NEXT: %35 = uitofp i16 %34 to double
+; CHECK-NEXT: %36 = insertelement <2 x double> undef, double %35, i32 0
+; CHECK-NEXT: %37 = extractelement <8 x i16> %8, i32 1
+; CHECK-NEXT: %38 = uitofp i16 %37 to double
+; CHECK-NEXT: %39 = insertelement <2 x double> %36, double %38, i32 1
+; CHECK-NEXT: %40 = extractelement <8 x i16> %8, i32 2
+; CHECK-NEXT: %41 = uitofp i16 %40 to double
+; CHECK-NEXT: %42 = insertelement <2 x double> undef, double %41, i32 0
+; CHECK-NEXT: %43 = extractelement <8 x i16> %8, i32 3
+; CHECK-NEXT: %44 = uitofp i16 %43 to double
+; CHECK-NEXT: %45 = insertelement <2 x double> %42, double %44, i32 1
+; CHECK-NEXT: %46 = extractelement <8 x i16> %8, i32 4
+; CHECK-NEXT: %47 = uitofp i16 %46 to double
+; CHECK-NEXT: %48 = insertelement <2 x double> undef, double %47, i32 0
+; CHECK-NEXT: %49 = extractelement <8 x i16> %8, i32 5
+; CHECK-NEXT: %50 = uitofp i16 %49 to double
+; CHECK-NEXT: %51 = insertelement <2 x double> %48, double %50, i32 1
+; CHECK-NEXT: %52 = extractelement <8 x i16> %8, i32 6
+; CHECK-NEXT: %53 = uitofp i16 %52 to double
+; CHECK-NEXT: %54 = insertelement <2 x double> undef, double %53, i32 0
+; CHECK-NEXT: %55 = extractelement <8 x i16> %8, i32 7
+; CHECK-NEXT: %56 = uitofp i16 %55 to double
+; CHECK-NEXT: %57 = insertelement <2 x double> %54, double %56, i32 1
+; CHECK-NEXT: store <2 x double> %21, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %27, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %33, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %39, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %45, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %51, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %57, <2 x double>* %6, align 16
+; CHECK-NEXT: ret <2 x double> %15
+
+define <16 x double> @uitofp_cast_16xi32_to_16xdouble(<16 x i32>) {
+ %2 = uitofp <16 x i32> %0 to <16 x double>
+ ret <16 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_16xi32_to_16xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = extractelement <4 x i32> %7, i32 0
+; CHECK-NEXT: %13 = uitofp i32 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> undef, double %13, i32 0
+; CHECK-NEXT: %15 = extractelement <4 x i32> %7, i32 1
+; CHECK-NEXT: %16 = uitofp i32 %15 to double
+; CHECK-NEXT: %17 = insertelement <2 x double> %14, double %16, i32 1
+; CHECK-NEXT: %18 = extractelement <4 x i32> %7, i32 2
+; CHECK-NEXT: %19 = uitofp i32 %18 to double
+; CHECK-NEXT: %20 = insertelement <2 x double> undef, double %19, i32 0
+; CHECK-NEXT: %21 = extractelement <4 x i32> %7, i32 3
+; CHECK-NEXT: %22 = uitofp i32 %21 to double
+; CHECK-NEXT: %23 = insertelement <2 x double> %20, double %22, i32 1
+; CHECK-NEXT: %24 = extractelement <4 x i32> %8, i32 0
+; CHECK-NEXT: %25 = uitofp i32 %24 to double
+; CHECK-NEXT: %26 = insertelement <2 x double> undef, double %25, i32 0
+; CHECK-NEXT: %27 = extractelement <4 x i32> %8, i32 1
+; CHECK-NEXT: %28 = uitofp i32 %27 to double
+; CHECK-NEXT: %29 = insertelement <2 x double> %26, double %28, i32 1
+; CHECK-NEXT: %30 = extractelement <4 x i32> %8, i32 2
+; CHECK-NEXT: %31 = uitofp i32 %30 to double
+; CHECK-NEXT: %32 = insertelement <2 x double> undef, double %31, i32 0
+; CHECK-NEXT: %33 = extractelement <4 x i32> %8, i32 3
+; CHECK-NEXT: %34 = uitofp i32 %33 to double
+; CHECK-NEXT: %35 = insertelement <2 x double> %32, double %34, i32 1
+; CHECK-NEXT: %36 = extractelement <4 x i32> %9, i32 0
+; CHECK-NEXT: %37 = uitofp i32 %36 to double
+; CHECK-NEXT: %38 = insertelement <2 x double> undef, double %37, i32 0
+; CHECK-NEXT: %39 = extractelement <4 x i32> %9, i32 1
+; CHECK-NEXT: %40 = uitofp i32 %39 to double
+; CHECK-NEXT: %41 = insertelement <2 x double> %38, double %40, i32 1
+; CHECK-NEXT: %42 = extractelement <4 x i32> %9, i32 2
+; CHECK-NEXT: %43 = uitofp i32 %42 to double
+; CHECK-NEXT: %44 = insertelement <2 x double> undef, double %43, i32 0
+; CHECK-NEXT: %45 = extractelement <4 x i32> %9, i32 3
+; CHECK-NEXT: %46 = uitofp i32 %45 to double
+; CHECK-NEXT: %47 = insertelement <2 x double> %44, double %46, i32 1
+; CHECK-NEXT: %48 = extractelement <4 x i32> %10, i32 0
+; CHECK-NEXT: %49 = uitofp i32 %48 to double
+; CHECK-NEXT: %50 = insertelement <2 x double> undef, double %49, i32 0
+; CHECK-NEXT: %51 = extractelement <4 x i32> %10, i32 1
+; CHECK-NEXT: %52 = uitofp i32 %51 to double
+; CHECK-NEXT: %53 = insertelement <2 x double> %50, double %52, i32 1
+; CHECK-NEXT: %54 = extractelement <4 x i32> %10, i32 2
+; CHECK-NEXT: %55 = uitofp i32 %54 to double
+; CHECK-NEXT: %56 = insertelement <2 x double> undef, double %55, i32 0
+; CHECK-NEXT: %57 = extractelement <4 x i32> %10, i32 3
+; CHECK-NEXT: %58 = uitofp i32 %57 to double
+; CHECK-NEXT: %59 = insertelement <2 x double> %56, double %58, i32 1
+; CHECK-NEXT: store <2 x double> %23, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %29, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %35, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %41, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %47, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %53, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %59, <2 x double>* %6, align 16
+; CHECK-NEXT: ret <2 x double> %17
+
+define <16 x double> @uitofp_cast_16xi64_to_16xdouble(<16 x i64>) {
+ %2 = uitofp <16 x i64> %0 to <16 x double>
+ ret <16 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_16xi64_to_16xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %16 = uitofp <2 x i64> %7 to <2 x double>
+; CHECK-NEXT: %17 = uitofp <2 x i64> %8 to <2 x double>
+; CHECK-NEXT: %18 = uitofp <2 x i64> %9 to <2 x double>
+; CHECK-NEXT: %19 = uitofp <2 x i64> %10 to <2 x double>
+; CHECK-NEXT: %20 = uitofp <2 x i64> %11 to <2 x double>
+; CHECK-NEXT: %21 = uitofp <2 x i64> %12 to <2 x double>
+; CHECK-NEXT: %22 = uitofp <2 x i64> %13 to <2 x double>
+; CHECK-NEXT: %23 = uitofp <2 x i64> %14 to <2 x double>
+; CHECK-NEXT: store <2 x double> %17, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %18, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %19, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %20, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %21, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %22, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %23, <2 x double>* %6, align 16
+; CHECK-NEXT: ret <2 x double> %16
+
+define <16 x float> @sitofp_cast_16xi8_to_16xfloat(<16 x i8>) {
+ %2 = sitofp <16 x i8> %0 to <16 x float>
+ ret <16 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_16xi8_to_16xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %5 = extractelement <16 x i8> %3, i32 0
+; CHECK-NEXT: %6 = sitofp i8 %5 to float
+; CHECK-NEXT: %7 = insertelement <4 x float> undef, float %6, i32 0
+; CHECK-NEXT: %8 = extractelement <16 x i8> %3, i32 1
+; CHECK-NEXT: %9 = sitofp i8 %8 to float
+; CHECK-NEXT: %10 = insertelement <4 x float> %7, float %9, i32 1
+; CHECK-NEXT: %11 = extractelement <16 x i8> %3, i32 2
+; CHECK-NEXT: %12 = sitofp i8 %11 to float
+; CHECK-NEXT: %13 = insertelement <4 x float> %10, float %12, i32 2
+; CHECK-NEXT: %14 = extractelement <16 x i8> %3, i32 3
+; CHECK-NEXT: %15 = sitofp i8 %14 to float
+; CHECK-NEXT: %16 = insertelement <4 x float> %13, float %15, i32 3
+; CHECK-NEXT: %17 = extractelement <16 x i8> %3, i32 4
+; CHECK-NEXT: %18 = sitofp i8 %17 to float
+; CHECK-NEXT: %19 = insertelement <4 x float> undef, float %18, i32 0
+; CHECK-NEXT: %20 = extractelement <16 x i8> %3, i32 5
+; CHECK-NEXT: %21 = sitofp i8 %20 to float
+; CHECK-NEXT: %22 = insertelement <4 x float> %19, float %21, i32 1
+; CHECK-NEXT: %23 = extractelement <16 x i8> %3, i32 6
+; CHECK-NEXT: %24 = sitofp i8 %23 to float
+; CHECK-NEXT: %25 = insertelement <4 x float> %22, float %24, i32 2
+; CHECK-NEXT: %26 = extractelement <16 x i8> %3, i32 7
+; CHECK-NEXT: %27 = sitofp i8 %26 to float
+; CHECK-NEXT: %28 = insertelement <4 x float> %25, float %27, i32 3
+; CHECK-NEXT: %29 = extractelement <16 x i8> %3, i32 8
+; CHECK-NEXT: %30 = sitofp i8 %29 to float
+; CHECK-NEXT: %31 = insertelement <4 x float> undef, float %30, i32 0
+; CHECK-NEXT: %32 = extractelement <16 x i8> %3, i32 9
+; CHECK-NEXT: %33 = sitofp i8 %32 to float
+; CHECK-NEXT: %34 = insertelement <4 x float> %31, float %33, i32 1
+; CHECK-NEXT: %35 = extractelement <16 x i8> %3, i32 10
+; CHECK-NEXT: %36 = sitofp i8 %35 to float
+; CHECK-NEXT: %37 = insertelement <4 x float> %34, float %36, i32 2
+; CHECK-NEXT: %38 = extractelement <16 x i8> %3, i32 11
+; CHECK-NEXT: %39 = sitofp i8 %38 to float
+; CHECK-NEXT: %40 = insertelement <4 x float> %37, float %39, i32 3
+; CHECK-NEXT: %41 = extractelement <16 x i8> %3, i32 12
+; CHECK-NEXT: %42 = sitofp i8 %41 to float
+; CHECK-NEXT: %43 = insertelement <4 x float> undef, float %42, i32 0
+; CHECK-NEXT: %44 = extractelement <16 x i8> %3, i32 13
+; CHECK-NEXT: %45 = sitofp i8 %44 to float
+; CHECK-NEXT: %46 = insertelement <4 x float> %43, float %45, i32 1
+; CHECK-NEXT: %47 = extractelement <16 x i8> %3, i32 14
+; CHECK-NEXT: %48 = sitofp i8 %47 to float
+; CHECK-NEXT: %49 = insertelement <4 x float> %46, float %48, i32 2
+; CHECK-NEXT: %50 = extractelement <16 x i8> %3, i32 15
+; CHECK-NEXT: %51 = sitofp i8 %50 to float
+; CHECK-NEXT: %52 = insertelement <4 x float> %49, float %51, i32 3
+; CHECK-NEXT: store <4 x float> %28, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %40, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %52, <4 x float>* %2, align 16
+; CHECK-NEXT: ret <4 x float> %16
+
+define <16 x float> @sitofp_cast_16xi16_to_16xfloat(<16 x i16>) {
+ %2 = sitofp <16 x i16> %0 to <16 x float>
+ ret <16 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_16xi16_to_16xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %6 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: %7 = sitofp i16 %6 to float
+; CHECK-NEXT: %8 = insertelement <4 x float> undef, float %7, i32 0
+; CHECK-NEXT: %9 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: %10 = sitofp i16 %9 to float
+; CHECK-NEXT: %11 = insertelement <4 x float> %8, float %10, i32 1
+; CHECK-NEXT: %12 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: %13 = sitofp i16 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> %11, float %13, i32 2
+; CHECK-NEXT: %15 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: %16 = sitofp i16 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> %14, float %16, i32 3
+; CHECK-NEXT: %18 = extractelement <8 x i16> %3, i32 4
+; CHECK-NEXT: %19 = sitofp i16 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> undef, float %19, i32 0
+; CHECK-NEXT: %21 = extractelement <8 x i16> %3, i32 5
+; CHECK-NEXT: %22 = sitofp i16 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> %20, float %22, i32 1
+; CHECK-NEXT: %24 = extractelement <8 x i16> %3, i32 6
+; CHECK-NEXT: %25 = sitofp i16 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> %23, float %25, i32 2
+; CHECK-NEXT: %27 = extractelement <8 x i16> %3, i32 7
+; CHECK-NEXT: %28 = sitofp i16 %27 to float
+; CHECK-NEXT: %29 = insertelement <4 x float> %26, float %28, i32 3
+; CHECK-NEXT: %30 = extractelement <8 x i16> %4, i32 0
+; CHECK-NEXT: %31 = sitofp i16 %30 to float
+; CHECK-NEXT: %32 = insertelement <4 x float> undef, float %31, i32 0
+; CHECK-NEXT: %33 = extractelement <8 x i16> %4, i32 1
+; CHECK-NEXT: %34 = sitofp i16 %33 to float
+; CHECK-NEXT: %35 = insertelement <4 x float> %32, float %34, i32 1
+; CHECK-NEXT: %36 = extractelement <8 x i16> %4, i32 2
+; CHECK-NEXT: %37 = sitofp i16 %36 to float
+; CHECK-NEXT: %38 = insertelement <4 x float> %35, float %37, i32 2
+; CHECK-NEXT: %39 = extractelement <8 x i16> %4, i32 3
+; CHECK-NEXT: %40 = sitofp i16 %39 to float
+; CHECK-NEXT: %41 = insertelement <4 x float> %38, float %40, i32 3
+; CHECK-NEXT: %42 = extractelement <8 x i16> %4, i32 4
+; CHECK-NEXT: %43 = sitofp i16 %42 to float
+; CHECK-NEXT: %44 = insertelement <4 x float> undef, float %43, i32 0
+; CHECK-NEXT: %45 = extractelement <8 x i16> %4, i32 5
+; CHECK-NEXT: %46 = sitofp i16 %45 to float
+; CHECK-NEXT: %47 = insertelement <4 x float> %44, float %46, i32 1
+; CHECK-NEXT: %48 = extractelement <8 x i16> %4, i32 6
+; CHECK-NEXT: %49 = sitofp i16 %48 to float
+; CHECK-NEXT: %50 = insertelement <4 x float> %47, float %49, i32 2
+; CHECK-NEXT: %51 = extractelement <8 x i16> %4, i32 7
+; CHECK-NEXT: %52 = sitofp i16 %51 to float
+; CHECK-NEXT: %53 = insertelement <4 x float> %50, float %52, i32 3
+; CHECK-NEXT: store <4 x float> %29, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %41, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %53, <4 x float>* %2, align 16
+; CHECK-NEXT: ret <4 x float> %17
+
+define <16 x float> @sitofp_cast_16xi32_to_16xfloat(<16 x i32>) {
+ %2 = sitofp <16 x i32> %0 to <16 x float>
+ ret <16 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_16xi32_to_16xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = sitofp <4 x i32> %3 to <4 x float>
+; CHECK-NEXT: %9 = sitofp <4 x i32> %4 to <4 x float>
+; CHECK-NEXT: %10 = sitofp <4 x i32> %5 to <4 x float>
+; CHECK-NEXT: %11 = sitofp <4 x i32> %6 to <4 x float>
+; CHECK-NEXT: store <4 x float> %9, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %10, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %11, <4 x float>* %2, align 16
+; CHECK-NEXT: ret <4 x float> %8
+
+define <16 x float> @sitofp_cast_16xi64_to_16xfloat(<16 x i64>) {
+ %2 = sitofp <16 x i64> %0 to <16 x float>
+ ret <16 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_16xi64_to_16xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %12 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %13 = sitofp i64 %12 to float
+; CHECK-NEXT: %14 = insertelement <4 x float> undef, float %13, i32 0
+; CHECK-NEXT: %15 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %16 = sitofp i64 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> %14, float %16, i32 1
+; CHECK-NEXT: %18 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %19 = sitofp i64 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 2
+; CHECK-NEXT: %21 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %22 = sitofp i64 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> %20, float %22, i32 3
+; CHECK-NEXT: %24 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %25 = sitofp i64 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> undef, float %25, i32 0
+; CHECK-NEXT: %27 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %28 = sitofp i64 %27 to float
+; CHECK-NEXT: %29 = insertelement <4 x float> %26, float %28, i32 1
+; CHECK-NEXT: %30 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %31 = sitofp i64 %30 to float
+; CHECK-NEXT: %32 = insertelement <4 x float> %29, float %31, i32 2
+; CHECK-NEXT: %33 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %34 = sitofp i64 %33 to float
+; CHECK-NEXT: %35 = insertelement <4 x float> %32, float %34, i32 3
+; CHECK-NEXT: %36 = extractelement <2 x i64> %7, i32 0
+; CHECK-NEXT: %37 = sitofp i64 %36 to float
+; CHECK-NEXT: %38 = insertelement <4 x float> undef, float %37, i32 0
+; CHECK-NEXT: %39 = extractelement <2 x i64> %7, i32 1
+; CHECK-NEXT: %40 = sitofp i64 %39 to float
+; CHECK-NEXT: %41 = insertelement <4 x float> %38, float %40, i32 1
+; CHECK-NEXT: %42 = extractelement <2 x i64> %8, i32 0
+; CHECK-NEXT: %43 = sitofp i64 %42 to float
+; CHECK-NEXT: %44 = insertelement <4 x float> %41, float %43, i32 2
+; CHECK-NEXT: %45 = extractelement <2 x i64> %8, i32 1
+; CHECK-NEXT: %46 = sitofp i64 %45 to float
+; CHECK-NEXT: %47 = insertelement <4 x float> %44, float %46, i32 3
+; CHECK-NEXT: %48 = extractelement <2 x i64> %9, i32 0
+; CHECK-NEXT: %49 = sitofp i64 %48 to float
+; CHECK-NEXT: %50 = insertelement <4 x float> undef, float %49, i32 0
+; CHECK-NEXT: %51 = extractelement <2 x i64> %9, i32 1
+; CHECK-NEXT: %52 = sitofp i64 %51 to float
+; CHECK-NEXT: %53 = insertelement <4 x float> %50, float %52, i32 1
+; CHECK-NEXT: %54 = extractelement <2 x i64> %10, i32 0
+; CHECK-NEXT: %55 = sitofp i64 %54 to float
+; CHECK-NEXT: %56 = insertelement <4 x float> %53, float %55, i32 2
+; CHECK-NEXT: %57 = extractelement <2 x i64> %10, i32 1
+; CHECK-NEXT: %58 = sitofp i64 %57 to float
+; CHECK-NEXT: %59 = insertelement <4 x float> %56, float %58, i32 3
+; CHECK-NEXT: store <4 x float> %35, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %47, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %59, <4 x float>* %2, align 16
+; CHECK-NEXT: ret <4 x float> %23
+
+define <16 x double> @sitofp_cast_16xi8_to_16xdouble(<16 x i8>) {
+ %2 = sitofp <16 x i8> %0 to <16 x double>
+ ret <16 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_16xi8_to_16xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %9 = extractelement <16 x i8> %7, i32 0
+; CHECK-NEXT: %10 = sitofp i8 %9 to double
+; CHECK-NEXT: %11 = insertelement <2 x double> undef, double %10, i32 0
+; CHECK-NEXT: %12 = extractelement <16 x i8> %7, i32 1
+; CHECK-NEXT: %13 = sitofp i8 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> %11, double %13, i32 1
+; CHECK-NEXT: %15 = extractelement <16 x i8> %7, i32 2
+; CHECK-NEXT: %16 = sitofp i8 %15 to double
+; CHECK-NEXT: %17 = insertelement <2 x double> undef, double %16, i32 0
+; CHECK-NEXT: %18 = extractelement <16 x i8> %7, i32 3
+; CHECK-NEXT: %19 = sitofp i8 %18 to double
+; CHECK-NEXT: %20 = insertelement <2 x double> %17, double %19, i32 1
+; CHECK-NEXT: %21 = extractelement <16 x i8> %7, i32 4
+; CHECK-NEXT: %22 = sitofp i8 %21 to double
+; CHECK-NEXT: %23 = insertelement <2 x double> undef, double %22, i32 0
+; CHECK-NEXT: %24 = extractelement <16 x i8> %7, i32 5
+; CHECK-NEXT: %25 = sitofp i8 %24 to double
+; CHECK-NEXT: %26 = insertelement <2 x double> %23, double %25, i32 1
+; CHECK-NEXT: %27 = extractelement <16 x i8> %7, i32 6
+; CHECK-NEXT: %28 = sitofp i8 %27 to double
+; CHECK-NEXT: %29 = insertelement <2 x double> undef, double %28, i32 0
+; CHECK-NEXT: %30 = extractelement <16 x i8> %7, i32 7
+; CHECK-NEXT: %31 = sitofp i8 %30 to double
+; CHECK-NEXT: %32 = insertelement <2 x double> %29, double %31, i32 1
+; CHECK-NEXT: %33 = extractelement <16 x i8> %7, i32 8
+; CHECK-NEXT: %34 = sitofp i8 %33 to double
+; CHECK-NEXT: %35 = insertelement <2 x double> undef, double %34, i32 0
+; CHECK-NEXT: %36 = extractelement <16 x i8> %7, i32 9
+; CHECK-NEXT: %37 = sitofp i8 %36 to double
+; CHECK-NEXT: %38 = insertelement <2 x double> %35, double %37, i32 1
+; CHECK-NEXT: %39 = extractelement <16 x i8> %7, i32 10
+; CHECK-NEXT: %40 = sitofp i8 %39 to double
+; CHECK-NEXT: %41 = insertelement <2 x double> undef, double %40, i32 0
+; CHECK-NEXT: %42 = extractelement <16 x i8> %7, i32 11
+; CHECK-NEXT: %43 = sitofp i8 %42 to double
+; CHECK-NEXT: %44 = insertelement <2 x double> %41, double %43, i32 1
+; CHECK-NEXT: %45 = extractelement <16 x i8> %7, i32 12
+; CHECK-NEXT: %46 = sitofp i8 %45 to double
+; CHECK-NEXT: %47 = insertelement <2 x double> undef, double %46, i32 0
+; CHECK-NEXT: %48 = extractelement <16 x i8> %7, i32 13
+; CHECK-NEXT: %49 = sitofp i8 %48 to double
+; CHECK-NEXT: %50 = insertelement <2 x double> %47, double %49, i32 1
+; CHECK-NEXT: %51 = extractelement <16 x i8> %7, i32 14
+; CHECK-NEXT: %52 = sitofp i8 %51 to double
+; CHECK-NEXT: %53 = insertelement <2 x double> undef, double %52, i32 0
+; CHECK-NEXT: %54 = extractelement <16 x i8> %7, i32 15
+; CHECK-NEXT: %55 = sitofp i8 %54 to double
+; CHECK-NEXT: %56 = insertelement <2 x double> %53, double %55, i32 1
+; CHECK-NEXT: store <2 x double> %20, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %26, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %32, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %38, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %44, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %50, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %56, <2 x double>* %6, align 16
+; CHECK-NEXT: ret <2 x double> %14
+
+define <16 x double> @sitofp_cast_16xi16_to_16xdouble(<16 x i16>) {
+ %2 = sitofp <16 x i16> %0 to <16 x double>
+ ret <16 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_16xi16_to_16xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %10 = extractelement <8 x i16> %7, i32 0
+; CHECK-NEXT: %11 = sitofp i16 %10 to double
+; CHECK-NEXT: %12 = insertelement <2 x double> undef, double %11, i32 0
+; CHECK-NEXT: %13 = extractelement <8 x i16> %7, i32 1
+; CHECK-NEXT: %14 = sitofp i16 %13 to double
+; CHECK-NEXT: %15 = insertelement <2 x double> %12, double %14, i32 1
+; CHECK-NEXT: %16 = extractelement <8 x i16> %7, i32 2
+; CHECK-NEXT: %17 = sitofp i16 %16 to double
+; CHECK-NEXT: %18 = insertelement <2 x double> undef, double %17, i32 0
+; CHECK-NEXT: %19 = extractelement <8 x i16> %7, i32 3
+; CHECK-NEXT: %20 = sitofp i16 %19 to double
+; CHECK-NEXT: %21 = insertelement <2 x double> %18, double %20, i32 1
+; CHECK-NEXT: %22 = extractelement <8 x i16> %7, i32 4
+; CHECK-NEXT: %23 = sitofp i16 %22 to double
+; CHECK-NEXT: %24 = insertelement <2 x double> undef, double %23, i32 0
+; CHECK-NEXT: %25 = extractelement <8 x i16> %7, i32 5
+; CHECK-NEXT: %26 = sitofp i16 %25 to double
+; CHECK-NEXT: %27 = insertelement <2 x double> %24, double %26, i32 1
+; CHECK-NEXT: %28 = extractelement <8 x i16> %7, i32 6
+; CHECK-NEXT: %29 = sitofp i16 %28 to double
+; CHECK-NEXT: %30 = insertelement <2 x double> undef, double %29, i32 0
+; CHECK-NEXT: %31 = extractelement <8 x i16> %7, i32 7
+; CHECK-NEXT: %32 = sitofp i16 %31 to double
+; CHECK-NEXT: %33 = insertelement <2 x double> %30, double %32, i32 1
+; CHECK-NEXT: %34 = extractelement <8 x i16> %8, i32 0
+; CHECK-NEXT: %35 = sitofp i16 %34 to double
+; CHECK-NEXT: %36 = insertelement <2 x double> undef, double %35, i32 0
+; CHECK-NEXT: %37 = extractelement <8 x i16> %8, i32 1
+; CHECK-NEXT: %38 = sitofp i16 %37 to double
+; CHECK-NEXT: %39 = insertelement <2 x double> %36, double %38, i32 1
+; CHECK-NEXT: %40 = extractelement <8 x i16> %8, i32 2
+; CHECK-NEXT: %41 = sitofp i16 %40 to double
+; CHECK-NEXT: %42 = insertelement <2 x double> undef, double %41, i32 0
+; CHECK-NEXT: %43 = extractelement <8 x i16> %8, i32 3
+; CHECK-NEXT: %44 = sitofp i16 %43 to double
+; CHECK-NEXT: %45 = insertelement <2 x double> %42, double %44, i32 1
+; CHECK-NEXT: %46 = extractelement <8 x i16> %8, i32 4
+; CHECK-NEXT: %47 = sitofp i16 %46 to double
+; CHECK-NEXT: %48 = insertelement <2 x double> undef, double %47, i32 0
+; CHECK-NEXT: %49 = extractelement <8 x i16> %8, i32 5
+; CHECK-NEXT: %50 = sitofp i16 %49 to double
+; CHECK-NEXT: %51 = insertelement <2 x double> %48, double %50, i32 1
+; CHECK-NEXT: %52 = extractelement <8 x i16> %8, i32 6
+; CHECK-NEXT: %53 = sitofp i16 %52 to double
+; CHECK-NEXT: %54 = insertelement <2 x double> undef, double %53, i32 0
+; CHECK-NEXT: %55 = extractelement <8 x i16> %8, i32 7
+; CHECK-NEXT: %56 = sitofp i16 %55 to double
+; CHECK-NEXT: %57 = insertelement <2 x double> %54, double %56, i32 1
+; CHECK-NEXT: store <2 x double> %21, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %27, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %33, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %39, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %45, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %51, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %57, <2 x double>* %6, align 16
+; CHECK-NEXT: ret <2 x double> %15
+
+define <16 x double> @sitofp_cast_16xi32_to_16xdouble(<16 x i32>) {
+ %2 = sitofp <16 x i32> %0 to <16 x double>
+ ret <16 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_16xi32_to_16xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = extractelement <4 x i32> %7, i32 0
+; CHECK-NEXT: %13 = sitofp i32 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> undef, double %13, i32 0
+; CHECK-NEXT: %15 = extractelement <4 x i32> %7, i32 1
+; CHECK-NEXT: %16 = sitofp i32 %15 to double
+; CHECK-NEXT: %17 = insertelement <2 x double> %14, double %16, i32 1
+; CHECK-NEXT: %18 = extractelement <4 x i32> %7, i32 2
+; CHECK-NEXT: %19 = sitofp i32 %18 to double
+; CHECK-NEXT: %20 = insertelement <2 x double> undef, double %19, i32 0
+; CHECK-NEXT: %21 = extractelement <4 x i32> %7, i32 3
+; CHECK-NEXT: %22 = sitofp i32 %21 to double
+; CHECK-NEXT: %23 = insertelement <2 x double> %20, double %22, i32 1
+; CHECK-NEXT: %24 = extractelement <4 x i32> %8, i32 0
+; CHECK-NEXT: %25 = sitofp i32 %24 to double
+; CHECK-NEXT: %26 = insertelement <2 x double> undef, double %25, i32 0
+; CHECK-NEXT: %27 = extractelement <4 x i32> %8, i32 1
+; CHECK-NEXT: %28 = sitofp i32 %27 to double
+; CHECK-NEXT: %29 = insertelement <2 x double> %26, double %28, i32 1
+; CHECK-NEXT: %30 = extractelement <4 x i32> %8, i32 2
+; CHECK-NEXT: %31 = sitofp i32 %30 to double
+; CHECK-NEXT: %32 = insertelement <2 x double> undef, double %31, i32 0
+; CHECK-NEXT: %33 = extractelement <4 x i32> %8, i32 3
+; CHECK-NEXT: %34 = sitofp i32 %33 to double
+; CHECK-NEXT: %35 = insertelement <2 x double> %32, double %34, i32 1
+; CHECK-NEXT: %36 = extractelement <4 x i32> %9, i32 0
+; CHECK-NEXT: %37 = sitofp i32 %36 to double
+; CHECK-NEXT: %38 = insertelement <2 x double> undef, double %37, i32 0
+; CHECK-NEXT: %39 = extractelement <4 x i32> %9, i32 1
+; CHECK-NEXT: %40 = sitofp i32 %39 to double
+; CHECK-NEXT: %41 = insertelement <2 x double> %38, double %40, i32 1
+; CHECK-NEXT: %42 = extractelement <4 x i32> %9, i32 2
+; CHECK-NEXT: %43 = sitofp i32 %42 to double
+; CHECK-NEXT: %44 = insertelement <2 x double> undef, double %43, i32 0
+; CHECK-NEXT: %45 = extractelement <4 x i32> %9, i32 3
+; CHECK-NEXT: %46 = sitofp i32 %45 to double
+; CHECK-NEXT: %47 = insertelement <2 x double> %44, double %46, i32 1
+; CHECK-NEXT: %48 = extractelement <4 x i32> %10, i32 0
+; CHECK-NEXT: %49 = sitofp i32 %48 to double
+; CHECK-NEXT: %50 = insertelement <2 x double> undef, double %49, i32 0
+; CHECK-NEXT: %51 = extractelement <4 x i32> %10, i32 1
+; CHECK-NEXT: %52 = sitofp i32 %51 to double
+; CHECK-NEXT: %53 = insertelement <2 x double> %50, double %52, i32 1
+; CHECK-NEXT: %54 = extractelement <4 x i32> %10, i32 2
+; CHECK-NEXT: %55 = sitofp i32 %54 to double
+; CHECK-NEXT: %56 = insertelement <2 x double> undef, double %55, i32 0
+; CHECK-NEXT: %57 = extractelement <4 x i32> %10, i32 3
+; CHECK-NEXT: %58 = sitofp i32 %57 to double
+; CHECK-NEXT: %59 = insertelement <2 x double> %56, double %58, i32 1
+; CHECK-NEXT: store <2 x double> %23, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %29, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %35, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %41, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %47, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %53, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %59, <2 x double>* %6, align 16
+; CHECK-NEXT: ret <2 x double> %17
+
+define <16 x double> @sitofp_cast_16xi64_to_16xdouble(<16 x i64>) {
+ %2 = sitofp <16 x i64> %0 to <16 x double>
+ ret <16 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_16xi64_to_16xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %16 = sitofp <2 x i64> %7 to <2 x double>
+; CHECK-NEXT: %17 = sitofp <2 x i64> %8 to <2 x double>
+; CHECK-NEXT: %18 = sitofp <2 x i64> %9 to <2 x double>
+; CHECK-NEXT: %19 = sitofp <2 x i64> %10 to <2 x double>
+; CHECK-NEXT: %20 = sitofp <2 x i64> %11 to <2 x double>
+; CHECK-NEXT: %21 = sitofp <2 x i64> %12 to <2 x double>
+; CHECK-NEXT: %22 = sitofp <2 x i64> %13 to <2 x double>
+; CHECK-NEXT: %23 = sitofp <2 x i64> %14 to <2 x double>
+; CHECK-NEXT: store <2 x double> %17, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %18, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %19, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %20, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %21, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %22, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %23, <2 x double>* %6, align 16
+; CHECK-NEXT: ret <2 x double> %16
+
+define <20 x i8> @fptoui_cast_20xfloat_to_20xi8(<20 x float>) {
+ %2 = fptoui <20 x float> %0 to <20 x i8>
+ ret <20 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_20xfloat_to_20xi8(<16 x i8>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %7 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %8 = fptoui float %7 to i8
+; CHECK-NEXT: %9 = insertelement <16 x i8> undef, i8 %8, i32 0
+; CHECK-NEXT: %10 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %11 = fptoui float %10 to i8
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 1
+; CHECK-NEXT: %13 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %14 = fptoui float %13 to i8
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 2
+; CHECK-NEXT: %16 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %17 = fptoui float %16 to i8
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 3
+; CHECK-NEXT: %19 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %20 = fptoui float %19 to i8
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 4
+; CHECK-NEXT: %22 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %23 = fptoui float %22 to i8
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 5
+; CHECK-NEXT: %25 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %26 = fptoui float %25 to i8
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 6
+; CHECK-NEXT: %28 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %29 = fptoui float %28 to i8
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 7
+; CHECK-NEXT: %31 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %32 = fptoui float %31 to i8
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 8
+; CHECK-NEXT: %34 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %35 = fptoui float %34 to i8
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 9
+; CHECK-NEXT: %37 = extractelement <4 x float> %3, i32 2
+; CHECK-NEXT: %38 = fptoui float %37 to i8
+; CHECK-NEXT: %39 = insertelement <16 x i8> %36, i8 %38, i32 10
+; CHECK-NEXT: %40 = extractelement <4 x float> %3, i32 3
+; CHECK-NEXT: %41 = fptoui float %40 to i8
+; CHECK-NEXT: %42 = insertelement <16 x i8> %39, i8 %41, i32 11
+; CHECK-NEXT: %43 = extractelement <4 x float> %4, i32 0
+; CHECK-NEXT: %44 = fptoui float %43 to i8
+; CHECK-NEXT: %45 = insertelement <16 x i8> %42, i8 %44, i32 12
+; CHECK-NEXT: %46 = extractelement <4 x float> %4, i32 1
+; CHECK-NEXT: %47 = fptoui float %46 to i8
+; CHECK-NEXT: %48 = insertelement <16 x i8> %45, i8 %47, i32 13
+; CHECK-NEXT: %49 = extractelement <4 x float> %4, i32 2
+; CHECK-NEXT: %50 = fptoui float %49 to i8
+; CHECK-NEXT: %51 = insertelement <16 x i8> %48, i8 %50, i32 14
+; CHECK-NEXT: %52 = extractelement <4 x float> %4, i32 3
+; CHECK-NEXT: %53 = fptoui float %52 to i8
+; CHECK-NEXT: %54 = insertelement <16 x i8> %51, i8 %53, i32 15
+; CHECK-NEXT: %55 = extractelement <4 x float> %5, i32 0
+; CHECK-NEXT: %56 = fptoui float %55 to i8
+; CHECK-NEXT: %57 = insertelement <16 x i8> undef, i8 %56, i32 0
+; CHECK-NEXT: %58 = extractelement <4 x float> %5, i32 1
+; CHECK-NEXT: %59 = fptoui float %58 to i8
+; CHECK-NEXT: %60 = insertelement <16 x i8> %57, i8 %59, i32 1
+; CHECK-NEXT: %61 = extractelement <4 x float> %5, i32 2
+; CHECK-NEXT: %62 = fptoui float %61 to i8
+; CHECK-NEXT: %63 = insertelement <16 x i8> %60, i8 %62, i32 2
+; CHECK-NEXT: %64 = extractelement <4 x float> %5, i32 3
+; CHECK-NEXT: %65 = fptoui float %64 to i8
+; CHECK-NEXT: %66 = insertelement <16 x i8> %63, i8 %65, i32 3
+; CHECK-NEXT: store <16 x i8> %66, <16 x i8>* %0, align 16
+; CHECK-NEXT: ret <16 x i8> %54
+
+define <20 x i16> @fptoui_cast_20xfloat_to_20xi16(<20 x float>) {
+ %2 = fptoui <20 x float> %0 to <20 x i16>
+ ret <20 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_20xfloat_to_20xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <8 x i16>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %8 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %9 = fptoui float %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> undef, i16 %9, i32 0
+; CHECK-NEXT: %11 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %12 = fptoui float %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 1
+; CHECK-NEXT: %14 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %15 = fptoui float %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 2
+; CHECK-NEXT: %17 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %18 = fptoui float %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 3
+; CHECK-NEXT: %20 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %21 = fptoui float %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 4
+; CHECK-NEXT: %23 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %24 = fptoui float %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 5
+; CHECK-NEXT: %26 = extractelement <4 x float> %3, i32 2
+; CHECK-NEXT: %27 = fptoui float %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 6
+; CHECK-NEXT: %29 = extractelement <4 x float> %3, i32 3
+; CHECK-NEXT: %30 = fptoui float %29 to i16
+; CHECK-NEXT: %31 = insertelement <8 x i16> %28, i16 %30, i32 7
+; CHECK-NEXT: %32 = extractelement <4 x float> %4, i32 0
+; CHECK-NEXT: %33 = fptoui float %32 to i16
+; CHECK-NEXT: %34 = insertelement <8 x i16> undef, i16 %33, i32 0
+; CHECK-NEXT: %35 = extractelement <4 x float> %4, i32 1
+; CHECK-NEXT: %36 = fptoui float %35 to i16
+; CHECK-NEXT: %37 = insertelement <8 x i16> %34, i16 %36, i32 1
+; CHECK-NEXT: %38 = extractelement <4 x float> %4, i32 2
+; CHECK-NEXT: %39 = fptoui float %38 to i16
+; CHECK-NEXT: %40 = insertelement <8 x i16> %37, i16 %39, i32 2
+; CHECK-NEXT: %41 = extractelement <4 x float> %4, i32 3
+; CHECK-NEXT: %42 = fptoui float %41 to i16
+; CHECK-NEXT: %43 = insertelement <8 x i16> %40, i16 %42, i32 3
+; CHECK-NEXT: %44 = extractelement <4 x float> %5, i32 0
+; CHECK-NEXT: %45 = fptoui float %44 to i16
+; CHECK-NEXT: %46 = insertelement <8 x i16> %43, i16 %45, i32 4
+; CHECK-NEXT: %47 = extractelement <4 x float> %5, i32 1
+; CHECK-NEXT: %48 = fptoui float %47 to i16
+; CHECK-NEXT: %49 = insertelement <8 x i16> %46, i16 %48, i32 5
+; CHECK-NEXT: %50 = extractelement <4 x float> %5, i32 2
+; CHECK-NEXT: %51 = fptoui float %50 to i16
+; CHECK-NEXT: %52 = insertelement <8 x i16> %49, i16 %51, i32 6
+; CHECK-NEXT: %53 = extractelement <4 x float> %5, i32 3
+; CHECK-NEXT: %54 = fptoui float %53 to i16
+; CHECK-NEXT: %55 = insertelement <8 x i16> %52, i16 %54, i32 7
+; CHECK-NEXT: %56 = extractelement <4 x float> %6, i32 0
+; CHECK-NEXT: %57 = fptoui float %56 to i16
+; CHECK-NEXT: %58 = insertelement <8 x i16> undef, i16 %57, i32 0
+; CHECK-NEXT: %59 = extractelement <4 x float> %6, i32 1
+; CHECK-NEXT: %60 = fptoui float %59 to i16
+; CHECK-NEXT: %61 = insertelement <8 x i16> %58, i16 %60, i32 1
+; CHECK-NEXT: %62 = extractelement <4 x float> %6, i32 2
+; CHECK-NEXT: %63 = fptoui float %62 to i16
+; CHECK-NEXT: %64 = insertelement <8 x i16> %61, i16 %63, i32 2
+; CHECK-NEXT: %65 = extractelement <4 x float> %6, i32 3
+; CHECK-NEXT: %66 = fptoui float %65 to i16
+; CHECK-NEXT: %67 = insertelement <8 x i16> %64, i16 %66, i32 3
+; CHECK-NEXT: store <8 x i16> %55, <8 x i16>* %0, align 16
+; CHECK-NEXT: store <8 x i16> %67, <8 x i16>* %1, align 16
+; CHECK-NEXT: ret <8 x i16> %31
+
+define <20 x i32> @fptoui_cast_20xfloat_to_20xi32(<20 x float>) {
+ %2 = fptoui <20 x float> %0 to <20 x i32>
+ ret <20 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_20xfloat_to_20xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %10 = fptoui <4 x float> %4 to <4 x i32>
+; CHECK-NEXT: %11 = fptoui <4 x float> %5 to <4 x i32>
+; CHECK-NEXT: %12 = fptoui <4 x float> %6 to <4 x i32>
+; CHECK-NEXT: %13 = fptoui <4 x float> %7 to <4 x i32>
+; CHECK-NEXT: %14 = fptoui <4 x float> %8 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <20 x i64> @fptoui_cast_20xfloat_to_20xi64(<20 x float>) {
+ %2 = fptoui <20 x float> %0 to <20 x i64>
+ ret <20 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_20xfloat_to_20xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %15 = extractelement <4 x float> %9, i32 0
+; CHECK-NEXT: %16 = fptoui float %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> undef, i64 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <4 x float> %9, i32 1
+; CHECK-NEXT: %19 = fptoui float %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> %17, i64 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <4 x float> %9, i32 2
+; CHECK-NEXT: %22 = fptoui float %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> undef, i64 %22, i32 0
+; CHECK-NEXT: %24 = extractelement <4 x float> %9, i32 3
+; CHECK-NEXT: %25 = fptoui float %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> %23, i64 %25, i32 1
+; CHECK-NEXT: %27 = extractelement <4 x float> %10, i32 0
+; CHECK-NEXT: %28 = fptoui float %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> undef, i64 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <4 x float> %10, i32 1
+; CHECK-NEXT: %31 = fptoui float %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> %29, i64 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <4 x float> %10, i32 2
+; CHECK-NEXT: %34 = fptoui float %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> undef, i64 %34, i32 0
+; CHECK-NEXT: %36 = extractelement <4 x float> %10, i32 3
+; CHECK-NEXT: %37 = fptoui float %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> %35, i64 %37, i32 1
+; CHECK-NEXT: %39 = extractelement <4 x float> %11, i32 0
+; CHECK-NEXT: %40 = fptoui float %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> undef, i64 %40, i32 0
+; CHECK-NEXT: %42 = extractelement <4 x float> %11, i32 1
+; CHECK-NEXT: %43 = fptoui float %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> %41, i64 %43, i32 1
+; CHECK-NEXT: %45 = extractelement <4 x float> %11, i32 2
+; CHECK-NEXT: %46 = fptoui float %45 to i64
+; CHECK-NEXT: %47 = insertelement <2 x i64> undef, i64 %46, i32 0
+; CHECK-NEXT: %48 = extractelement <4 x float> %11, i32 3
+; CHECK-NEXT: %49 = fptoui float %48 to i64
+; CHECK-NEXT: %50 = insertelement <2 x i64> %47, i64 %49, i32 1
+; CHECK-NEXT: %51 = extractelement <4 x float> %12, i32 0
+; CHECK-NEXT: %52 = fptoui float %51 to i64
+; CHECK-NEXT: %53 = insertelement <2 x i64> undef, i64 %52, i32 0
+; CHECK-NEXT: %54 = extractelement <4 x float> %12, i32 1
+; CHECK-NEXT: %55 = fptoui float %54 to i64
+; CHECK-NEXT: %56 = insertelement <2 x i64> %53, i64 %55, i32 1
+; CHECK-NEXT: %57 = extractelement <4 x float> %12, i32 2
+; CHECK-NEXT: %58 = fptoui float %57 to i64
+; CHECK-NEXT: %59 = insertelement <2 x i64> undef, i64 %58, i32 0
+; CHECK-NEXT: %60 = extractelement <4 x float> %12, i32 3
+; CHECK-NEXT: %61 = fptoui float %60 to i64
+; CHECK-NEXT: %62 = insertelement <2 x i64> %59, i64 %61, i32 1
+; CHECK-NEXT: %63 = extractelement <4 x float> %13, i32 0
+; CHECK-NEXT: %64 = fptoui float %63 to i64
+; CHECK-NEXT: %65 = insertelement <2 x i64> undef, i64 %64, i32 0
+; CHECK-NEXT: %66 = extractelement <4 x float> %13, i32 1
+; CHECK-NEXT: %67 = fptoui float %66 to i64
+; CHECK-NEXT: %68 = insertelement <2 x i64> %65, i64 %67, i32 1
+; CHECK-NEXT: %69 = extractelement <4 x float> %13, i32 2
+; CHECK-NEXT: %70 = fptoui float %69 to i64
+; CHECK-NEXT: %71 = insertelement <2 x i64> undef, i64 %70, i32 0
+; CHECK-NEXT: %72 = extractelement <4 x float> %13, i32 3
+; CHECK-NEXT: %73 = fptoui float %72 to i64
+; CHECK-NEXT: %74 = insertelement <2 x i64> %71, i64 %73, i32 1
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %32, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %38, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %44, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %50, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %56, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %62, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %68, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %74, <2 x i64>* %8, align 16
+; CHECK-NEXT: ret <2 x i64> %20
+
+define <20 x i8> @fptoui_cast_20xdouble_to_20xi8(<20 x double>) {
+ %2 = fptoui <20 x double> %0 to <20 x i8>
+ ret <20 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptoui_cast_20xdouble_to_20xi8(<16 x i8>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %12 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %13 = fptoui double %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> undef, i8 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %16 = fptoui double %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %19 = fptoui double %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 2
+; CHECK-NEXT: %21 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %22 = fptoui double %21 to i8
+; CHECK-NEXT: %23 = insertelement <16 x i8> %20, i8 %22, i32 3
+; CHECK-NEXT: %24 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %25 = fptoui double %24 to i8
+; CHECK-NEXT: %26 = insertelement <16 x i8> %23, i8 %25, i32 4
+; CHECK-NEXT: %27 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %28 = fptoui double %27 to i8
+; CHECK-NEXT: %29 = insertelement <16 x i8> %26, i8 %28, i32 5
+; CHECK-NEXT: %30 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %31 = fptoui double %30 to i8
+; CHECK-NEXT: %32 = insertelement <16 x i8> %29, i8 %31, i32 6
+; CHECK-NEXT: %33 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %34 = fptoui double %33 to i8
+; CHECK-NEXT: %35 = insertelement <16 x i8> %32, i8 %34, i32 7
+; CHECK-NEXT: %36 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %37 = fptoui double %36 to i8
+; CHECK-NEXT: %38 = insertelement <16 x i8> %35, i8 %37, i32 8
+; CHECK-NEXT: %39 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %40 = fptoui double %39 to i8
+; CHECK-NEXT: %41 = insertelement <16 x i8> %38, i8 %40, i32 9
+; CHECK-NEXT: %42 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %43 = fptoui double %42 to i8
+; CHECK-NEXT: %44 = insertelement <16 x i8> %41, i8 %43, i32 10
+; CHECK-NEXT: %45 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %46 = fptoui double %45 to i8
+; CHECK-NEXT: %47 = insertelement <16 x i8> %44, i8 %46, i32 11
+; CHECK-NEXT: %48 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %49 = fptoui double %48 to i8
+; CHECK-NEXT: %50 = insertelement <16 x i8> %47, i8 %49, i32 12
+; CHECK-NEXT: %51 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %52 = fptoui double %51 to i8
+; CHECK-NEXT: %53 = insertelement <16 x i8> %50, i8 %52, i32 13
+; CHECK-NEXT: %54 = extractelement <2 x double> %8, i32 0
+; CHECK-NEXT: %55 = fptoui double %54 to i8
+; CHECK-NEXT: %56 = insertelement <16 x i8> %53, i8 %55, i32 14
+; CHECK-NEXT: %57 = extractelement <2 x double> %8, i32 1
+; CHECK-NEXT: %58 = fptoui double %57 to i8
+; CHECK-NEXT: %59 = insertelement <16 x i8> %56, i8 %58, i32 15
+; CHECK-NEXT: %60 = extractelement <2 x double> %9, i32 0
+; CHECK-NEXT: %61 = fptoui double %60 to i8
+; CHECK-NEXT: %62 = insertelement <16 x i8> undef, i8 %61, i32 0
+; CHECK-NEXT: %63 = extractelement <2 x double> %9, i32 1
+; CHECK-NEXT: %64 = fptoui double %63 to i8
+; CHECK-NEXT: %65 = insertelement <16 x i8> %62, i8 %64, i32 1
+; CHECK-NEXT: %66 = extractelement <2 x double> %10, i32 0
+; CHECK-NEXT: %67 = fptoui double %66 to i8
+; CHECK-NEXT: %68 = insertelement <16 x i8> %65, i8 %67, i32 2
+; CHECK-NEXT: %69 = extractelement <2 x double> %10, i32 1
+; CHECK-NEXT: %70 = fptoui double %69 to i8
+; CHECK-NEXT: %71 = insertelement <16 x i8> %68, i8 %70, i32 3
+; CHECK-NEXT: store <16 x i8> %71, <16 x i8>* %0, align 16
+; CHECK-NEXT: ret <16 x i8> %59
+
+define <20 x i16> @fptoui_cast_20xdouble_to_20xi16(<20 x double>) {
+ %2 = fptoui <20 x double> %0 to <20 x i16>
+ ret <20 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptoui_cast_20xdouble_to_20xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <8 x i16>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %13 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %14 = fptoui double %13 to i16
+; CHECK-NEXT: %15 = insertelement <8 x i16> undef, i16 %14, i32 0
+; CHECK-NEXT: %16 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %17 = fptoui double %16 to i16
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 1
+; CHECK-NEXT: %19 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %20 = fptoui double %19 to i16
+; CHECK-NEXT: %21 = insertelement <8 x i16> %18, i16 %20, i32 2
+; CHECK-NEXT: %22 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %23 = fptoui double %22 to i16
+; CHECK-NEXT: %24 = insertelement <8 x i16> %21, i16 %23, i32 3
+; CHECK-NEXT: %25 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %26 = fptoui double %25 to i16
+; CHECK-NEXT: %27 = insertelement <8 x i16> %24, i16 %26, i32 4
+; CHECK-NEXT: %28 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %29 = fptoui double %28 to i16
+; CHECK-NEXT: %30 = insertelement <8 x i16> %27, i16 %29, i32 5
+; CHECK-NEXT: %31 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %32 = fptoui double %31 to i16
+; CHECK-NEXT: %33 = insertelement <8 x i16> %30, i16 %32, i32 6
+; CHECK-NEXT: %34 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %35 = fptoui double %34 to i16
+; CHECK-NEXT: %36 = insertelement <8 x i16> %33, i16 %35, i32 7
+; CHECK-NEXT: %37 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %38 = fptoui double %37 to i16
+; CHECK-NEXT: %39 = insertelement <8 x i16> undef, i16 %38, i32 0
+; CHECK-NEXT: %40 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %41 = fptoui double %40 to i16
+; CHECK-NEXT: %42 = insertelement <8 x i16> %39, i16 %41, i32 1
+; CHECK-NEXT: %43 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %44 = fptoui double %43 to i16
+; CHECK-NEXT: %45 = insertelement <8 x i16> %42, i16 %44, i32 2
+; CHECK-NEXT: %46 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %47 = fptoui double %46 to i16
+; CHECK-NEXT: %48 = insertelement <8 x i16> %45, i16 %47, i32 3
+; CHECK-NEXT: %49 = extractelement <2 x double> %8, i32 0
+; CHECK-NEXT: %50 = fptoui double %49 to i16
+; CHECK-NEXT: %51 = insertelement <8 x i16> %48, i16 %50, i32 4
+; CHECK-NEXT: %52 = extractelement <2 x double> %8, i32 1
+; CHECK-NEXT: %53 = fptoui double %52 to i16
+; CHECK-NEXT: %54 = insertelement <8 x i16> %51, i16 %53, i32 5
+; CHECK-NEXT: %55 = extractelement <2 x double> %9, i32 0
+; CHECK-NEXT: %56 = fptoui double %55 to i16
+; CHECK-NEXT: %57 = insertelement <8 x i16> %54, i16 %56, i32 6
+; CHECK-NEXT: %58 = extractelement <2 x double> %9, i32 1
+; CHECK-NEXT: %59 = fptoui double %58 to i16
+; CHECK-NEXT: %60 = insertelement <8 x i16> %57, i16 %59, i32 7
+; CHECK-NEXT: %61 = extractelement <2 x double> %10, i32 0
+; CHECK-NEXT: %62 = fptoui double %61 to i16
+; CHECK-NEXT: %63 = insertelement <8 x i16> undef, i16 %62, i32 0
+; CHECK-NEXT: %64 = extractelement <2 x double> %10, i32 1
+; CHECK-NEXT: %65 = fptoui double %64 to i16
+; CHECK-NEXT: %66 = insertelement <8 x i16> %63, i16 %65, i32 1
+; CHECK-NEXT: %67 = extractelement <2 x double> %11, i32 0
+; CHECK-NEXT: %68 = fptoui double %67 to i16
+; CHECK-NEXT: %69 = insertelement <8 x i16> %66, i16 %68, i32 2
+; CHECK-NEXT: %70 = extractelement <2 x double> %11, i32 1
+; CHECK-NEXT: %71 = fptoui double %70 to i16
+; CHECK-NEXT: %72 = insertelement <8 x i16> %69, i16 %71, i32 3
+; CHECK-NEXT: store <8 x i16> %60, <8 x i16>* %0, align 16
+; CHECK-NEXT: store <8 x i16> %72, <8 x i16>* %1, align 16
+; CHECK-NEXT: ret <8 x i16> %36
+
+define <20 x i32> @fptoui_cast_20xdouble_to_20xi32(<20 x double>) {
+ %2 = fptoui <20 x double> %0 to <20 x i32>
+ ret <20 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptoui_cast_20xdouble_to_20xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %15 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %16 = fptoui double %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> undef, i32 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %19 = fptoui double %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %22 = fptoui double %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 2
+; CHECK-NEXT: %24 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %25 = fptoui double %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 3
+; CHECK-NEXT: %27 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %28 = fptoui double %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> undef, i32 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %31 = fptoui double %30 to i32
+; CHECK-NEXT: %32 = insertelement <4 x i32> %29, i32 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %34 = fptoui double %33 to i32
+; CHECK-NEXT: %35 = insertelement <4 x i32> %32, i32 %34, i32 2
+; CHECK-NEXT: %36 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %37 = fptoui double %36 to i32
+; CHECK-NEXT: %38 = insertelement <4 x i32> %35, i32 %37, i32 3
+; CHECK-NEXT: %39 = extractelement <2 x double> %8, i32 0
+; CHECK-NEXT: %40 = fptoui double %39 to i32
+; CHECK-NEXT: %41 = insertelement <4 x i32> undef, i32 %40, i32 0
+; CHECK-NEXT: %42 = extractelement <2 x double> %8, i32 1
+; CHECK-NEXT: %43 = fptoui double %42 to i32
+; CHECK-NEXT: %44 = insertelement <4 x i32> %41, i32 %43, i32 1
+; CHECK-NEXT: %45 = extractelement <2 x double> %9, i32 0
+; CHECK-NEXT: %46 = fptoui double %45 to i32
+; CHECK-NEXT: %47 = insertelement <4 x i32> %44, i32 %46, i32 2
+; CHECK-NEXT: %48 = extractelement <2 x double> %9, i32 1
+; CHECK-NEXT: %49 = fptoui double %48 to i32
+; CHECK-NEXT: %50 = insertelement <4 x i32> %47, i32 %49, i32 3
+; CHECK-NEXT: %51 = extractelement <2 x double> %10, i32 0
+; CHECK-NEXT: %52 = fptoui double %51 to i32
+; CHECK-NEXT: %53 = insertelement <4 x i32> undef, i32 %52, i32 0
+; CHECK-NEXT: %54 = extractelement <2 x double> %10, i32 1
+; CHECK-NEXT: %55 = fptoui double %54 to i32
+; CHECK-NEXT: %56 = insertelement <4 x i32> %53, i32 %55, i32 1
+; CHECK-NEXT: %57 = extractelement <2 x double> %11, i32 0
+; CHECK-NEXT: %58 = fptoui double %57 to i32
+; CHECK-NEXT: %59 = insertelement <4 x i32> %56, i32 %58, i32 2
+; CHECK-NEXT: %60 = extractelement <2 x double> %11, i32 1
+; CHECK-NEXT: %61 = fptoui double %60 to i32
+; CHECK-NEXT: %62 = insertelement <4 x i32> %59, i32 %61, i32 3
+; CHECK-NEXT: %63 = extractelement <2 x double> %12, i32 0
+; CHECK-NEXT: %64 = fptoui double %63 to i32
+; CHECK-NEXT: %65 = insertelement <4 x i32> undef, i32 %64, i32 0
+; CHECK-NEXT: %66 = extractelement <2 x double> %12, i32 1
+; CHECK-NEXT: %67 = fptoui double %66 to i32
+; CHECK-NEXT: %68 = insertelement <4 x i32> %65, i32 %67, i32 1
+; CHECK-NEXT: %69 = extractelement <2 x double> %13, i32 0
+; CHECK-NEXT: %70 = fptoui double %69 to i32
+; CHECK-NEXT: %71 = insertelement <4 x i32> %68, i32 %70, i32 2
+; CHECK-NEXT: %72 = extractelement <2 x double> %13, i32 1
+; CHECK-NEXT: %73 = fptoui double %72 to i32
+; CHECK-NEXT: %74 = insertelement <4 x i32> %71, i32 %73, i32 3
+; CHECK-NEXT: store <4 x i32> %38, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %50, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %62, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %74, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %26
+
+define <20 x i64> @fptoui_cast_20xdouble_to_20xi64(<20 x double>) {
+ %2 = fptoui <20 x double> %0 to <20 x i64>
+ ret <20 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptoui_cast_20xdouble_to_20xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %20 = fptoui <2 x double> %9 to <2 x i64>
+; CHECK-NEXT: %21 = fptoui <2 x double> %10 to <2 x i64>
+; CHECK-NEXT: %22 = fptoui <2 x double> %11 to <2 x i64>
+; CHECK-NEXT: %23 = fptoui <2 x double> %12 to <2 x i64>
+; CHECK-NEXT: %24 = fptoui <2 x double> %13 to <2 x i64>
+; CHECK-NEXT: %25 = fptoui <2 x double> %14 to <2 x i64>
+; CHECK-NEXT: %26 = fptoui <2 x double> %15 to <2 x i64>
+; CHECK-NEXT: %27 = fptoui <2 x double> %16 to <2 x i64>
+; CHECK-NEXT: %28 = fptoui <2 x double> %17 to <2 x i64>
+; CHECK-NEXT: %29 = fptoui <2 x double> %18 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %21, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %24, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %25, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %27, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %28, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %8, align 16
+; CHECK-NEXT: ret <2 x i64> %20
+
+define <20 x i8> @fptosi_cast_20xfloat_to_20xi8(<20 x float>) {
+ %2 = fptosi <20 x float> %0 to <20 x i8>
+ ret <20 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_20xfloat_to_20xi8(<16 x i8>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %7 = extractelement <4 x float> %1, i32 0
+; CHECK-NEXT: %8 = fptosi float %7 to i8
+; CHECK-NEXT: %9 = insertelement <16 x i8> undef, i8 %8, i32 0
+; CHECK-NEXT: %10 = extractelement <4 x float> %1, i32 1
+; CHECK-NEXT: %11 = fptosi float %10 to i8
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 1
+; CHECK-NEXT: %13 = extractelement <4 x float> %1, i32 2
+; CHECK-NEXT: %14 = fptosi float %13 to i8
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 2
+; CHECK-NEXT: %16 = extractelement <4 x float> %1, i32 3
+; CHECK-NEXT: %17 = fptosi float %16 to i8
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 3
+; CHECK-NEXT: %19 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %20 = fptosi float %19 to i8
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 4
+; CHECK-NEXT: %22 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %23 = fptosi float %22 to i8
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 5
+; CHECK-NEXT: %25 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %26 = fptosi float %25 to i8
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 6
+; CHECK-NEXT: %28 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %29 = fptosi float %28 to i8
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 7
+; CHECK-NEXT: %31 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %32 = fptosi float %31 to i8
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 8
+; CHECK-NEXT: %34 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %35 = fptosi float %34 to i8
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 9
+; CHECK-NEXT: %37 = extractelement <4 x float> %3, i32 2
+; CHECK-NEXT: %38 = fptosi float %37 to i8
+; CHECK-NEXT: %39 = insertelement <16 x i8> %36, i8 %38, i32 10
+; CHECK-NEXT: %40 = extractelement <4 x float> %3, i32 3
+; CHECK-NEXT: %41 = fptosi float %40 to i8
+; CHECK-NEXT: %42 = insertelement <16 x i8> %39, i8 %41, i32 11
+; CHECK-NEXT: %43 = extractelement <4 x float> %4, i32 0
+; CHECK-NEXT: %44 = fptosi float %43 to i8
+; CHECK-NEXT: %45 = insertelement <16 x i8> %42, i8 %44, i32 12
+; CHECK-NEXT: %46 = extractelement <4 x float> %4, i32 1
+; CHECK-NEXT: %47 = fptosi float %46 to i8
+; CHECK-NEXT: %48 = insertelement <16 x i8> %45, i8 %47, i32 13
+; CHECK-NEXT: %49 = extractelement <4 x float> %4, i32 2
+; CHECK-NEXT: %50 = fptosi float %49 to i8
+; CHECK-NEXT: %51 = insertelement <16 x i8> %48, i8 %50, i32 14
+; CHECK-NEXT: %52 = extractelement <4 x float> %4, i32 3
+; CHECK-NEXT: %53 = fptosi float %52 to i8
+; CHECK-NEXT: %54 = insertelement <16 x i8> %51, i8 %53, i32 15
+; CHECK-NEXT: %55 = extractelement <4 x float> %5, i32 0
+; CHECK-NEXT: %56 = fptosi float %55 to i8
+; CHECK-NEXT: %57 = insertelement <16 x i8> undef, i8 %56, i32 0
+; CHECK-NEXT: %58 = extractelement <4 x float> %5, i32 1
+; CHECK-NEXT: %59 = fptosi float %58 to i8
+; CHECK-NEXT: %60 = insertelement <16 x i8> %57, i8 %59, i32 1
+; CHECK-NEXT: %61 = extractelement <4 x float> %5, i32 2
+; CHECK-NEXT: %62 = fptosi float %61 to i8
+; CHECK-NEXT: %63 = insertelement <16 x i8> %60, i8 %62, i32 2
+; CHECK-NEXT: %64 = extractelement <4 x float> %5, i32 3
+; CHECK-NEXT: %65 = fptosi float %64 to i8
+; CHECK-NEXT: %66 = insertelement <16 x i8> %63, i8 %65, i32 3
+; CHECK-NEXT: store <16 x i8> %66, <16 x i8>* %0, align 16
+; CHECK-NEXT: ret <16 x i8> %54
+
+define <20 x i16> @fptosi_cast_20xfloat_to_20xi16(<20 x float>) {
+ %2 = fptosi <20 x float> %0 to <20 x i16>
+ ret <20 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_20xfloat_to_20xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <8 x i16>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %8 = extractelement <4 x float> %2, i32 0
+; CHECK-NEXT: %9 = fptosi float %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> undef, i16 %9, i32 0
+; CHECK-NEXT: %11 = extractelement <4 x float> %2, i32 1
+; CHECK-NEXT: %12 = fptosi float %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 1
+; CHECK-NEXT: %14 = extractelement <4 x float> %2, i32 2
+; CHECK-NEXT: %15 = fptosi float %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 2
+; CHECK-NEXT: %17 = extractelement <4 x float> %2, i32 3
+; CHECK-NEXT: %18 = fptosi float %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 3
+; CHECK-NEXT: %20 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT: %21 = fptosi float %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 4
+; CHECK-NEXT: %23 = extractelement <4 x float> %3, i32 1
+; CHECK-NEXT: %24 = fptosi float %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 5
+; CHECK-NEXT: %26 = extractelement <4 x float> %3, i32 2
+; CHECK-NEXT: %27 = fptosi float %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 6
+; CHECK-NEXT: %29 = extractelement <4 x float> %3, i32 3
+; CHECK-NEXT: %30 = fptosi float %29 to i16
+; CHECK-NEXT: %31 = insertelement <8 x i16> %28, i16 %30, i32 7
+; CHECK-NEXT: %32 = extractelement <4 x float> %4, i32 0
+; CHECK-NEXT: %33 = fptosi float %32 to i16
+; CHECK-NEXT: %34 = insertelement <8 x i16> undef, i16 %33, i32 0
+; CHECK-NEXT: %35 = extractelement <4 x float> %4, i32 1
+; CHECK-NEXT: %36 = fptosi float %35 to i16
+; CHECK-NEXT: %37 = insertelement <8 x i16> %34, i16 %36, i32 1
+; CHECK-NEXT: %38 = extractelement <4 x float> %4, i32 2
+; CHECK-NEXT: %39 = fptosi float %38 to i16
+; CHECK-NEXT: %40 = insertelement <8 x i16> %37, i16 %39, i32 2
+; CHECK-NEXT: %41 = extractelement <4 x float> %4, i32 3
+; CHECK-NEXT: %42 = fptosi float %41 to i16
+; CHECK-NEXT: %43 = insertelement <8 x i16> %40, i16 %42, i32 3
+; CHECK-NEXT: %44 = extractelement <4 x float> %5, i32 0
+; CHECK-NEXT: %45 = fptosi float %44 to i16
+; CHECK-NEXT: %46 = insertelement <8 x i16> %43, i16 %45, i32 4
+; CHECK-NEXT: %47 = extractelement <4 x float> %5, i32 1
+; CHECK-NEXT: %48 = fptosi float %47 to i16
+; CHECK-NEXT: %49 = insertelement <8 x i16> %46, i16 %48, i32 5
+; CHECK-NEXT: %50 = extractelement <4 x float> %5, i32 2
+; CHECK-NEXT: %51 = fptosi float %50 to i16
+; CHECK-NEXT: %52 = insertelement <8 x i16> %49, i16 %51, i32 6
+; CHECK-NEXT: %53 = extractelement <4 x float> %5, i32 3
+; CHECK-NEXT: %54 = fptosi float %53 to i16
+; CHECK-NEXT: %55 = insertelement <8 x i16> %52, i16 %54, i32 7
+; CHECK-NEXT: %56 = extractelement <4 x float> %6, i32 0
+; CHECK-NEXT: %57 = fptosi float %56 to i16
+; CHECK-NEXT: %58 = insertelement <8 x i16> undef, i16 %57, i32 0
+; CHECK-NEXT: %59 = extractelement <4 x float> %6, i32 1
+; CHECK-NEXT: %60 = fptosi float %59 to i16
+; CHECK-NEXT: %61 = insertelement <8 x i16> %58, i16 %60, i32 1
+; CHECK-NEXT: %62 = extractelement <4 x float> %6, i32 2
+; CHECK-NEXT: %63 = fptosi float %62 to i16
+; CHECK-NEXT: %64 = insertelement <8 x i16> %61, i16 %63, i32 2
+; CHECK-NEXT: %65 = extractelement <4 x float> %6, i32 3
+; CHECK-NEXT: %66 = fptosi float %65 to i16
+; CHECK-NEXT: %67 = insertelement <8 x i16> %64, i16 %66, i32 3
+; CHECK-NEXT: store <8 x i16> %55, <8 x i16>* %0, align 16
+; CHECK-NEXT: store <8 x i16> %67, <8 x i16>* %1, align 16
+; CHECK-NEXT: ret <8 x i16> %31
+
+define <20 x i32> @fptosi_cast_20xfloat_to_20xi32(<20 x float>) {
+ %2 = fptosi <20 x float> %0 to <20 x i32>
+ ret <20 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_20xfloat_to_20xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %10 = fptosi <4 x float> %4 to <4 x i32>
+; CHECK-NEXT: %11 = fptosi <4 x float> %5 to <4 x i32>
+; CHECK-NEXT: %12 = fptosi <4 x float> %6 to <4 x i32>
+; CHECK-NEXT: %13 = fptosi <4 x float> %7 to <4 x i32>
+; CHECK-NEXT: %14 = fptosi <4 x float> %8 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <20 x i64> @fptosi_cast_20xfloat_to_20xi64(<20 x float>) {
+ %2 = fptosi <20 x float> %0 to <20 x i64>
+ ret <20 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_20xfloat_to_20xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %15 = extractelement <4 x float> %9, i32 0
+; CHECK-NEXT: %16 = fptosi float %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> undef, i64 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <4 x float> %9, i32 1
+; CHECK-NEXT: %19 = fptosi float %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> %17, i64 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <4 x float> %9, i32 2
+; CHECK-NEXT: %22 = fptosi float %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> undef, i64 %22, i32 0
+; CHECK-NEXT: %24 = extractelement <4 x float> %9, i32 3
+; CHECK-NEXT: %25 = fptosi float %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> %23, i64 %25, i32 1
+; CHECK-NEXT: %27 = extractelement <4 x float> %10, i32 0
+; CHECK-NEXT: %28 = fptosi float %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> undef, i64 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <4 x float> %10, i32 1
+; CHECK-NEXT: %31 = fptosi float %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> %29, i64 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <4 x float> %10, i32 2
+; CHECK-NEXT: %34 = fptosi float %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> undef, i64 %34, i32 0
+; CHECK-NEXT: %36 = extractelement <4 x float> %10, i32 3
+; CHECK-NEXT: %37 = fptosi float %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> %35, i64 %37, i32 1
+; CHECK-NEXT: %39 = extractelement <4 x float> %11, i32 0
+; CHECK-NEXT: %40 = fptosi float %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> undef, i64 %40, i32 0
+; CHECK-NEXT: %42 = extractelement <4 x float> %11, i32 1
+; CHECK-NEXT: %43 = fptosi float %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> %41, i64 %43, i32 1
+; CHECK-NEXT: %45 = extractelement <4 x float> %11, i32 2
+; CHECK-NEXT: %46 = fptosi float %45 to i64
+; CHECK-NEXT: %47 = insertelement <2 x i64> undef, i64 %46, i32 0
+; CHECK-NEXT: %48 = extractelement <4 x float> %11, i32 3
+; CHECK-NEXT: %49 = fptosi float %48 to i64
+; CHECK-NEXT: %50 = insertelement <2 x i64> %47, i64 %49, i32 1
+; CHECK-NEXT: %51 = extractelement <4 x float> %12, i32 0
+; CHECK-NEXT: %52 = fptosi float %51 to i64
+; CHECK-NEXT: %53 = insertelement <2 x i64> undef, i64 %52, i32 0
+; CHECK-NEXT: %54 = extractelement <4 x float> %12, i32 1
+; CHECK-NEXT: %55 = fptosi float %54 to i64
+; CHECK-NEXT: %56 = insertelement <2 x i64> %53, i64 %55, i32 1
+; CHECK-NEXT: %57 = extractelement <4 x float> %12, i32 2
+; CHECK-NEXT: %58 = fptosi float %57 to i64
+; CHECK-NEXT: %59 = insertelement <2 x i64> undef, i64 %58, i32 0
+; CHECK-NEXT: %60 = extractelement <4 x float> %12, i32 3
+; CHECK-NEXT: %61 = fptosi float %60 to i64
+; CHECK-NEXT: %62 = insertelement <2 x i64> %59, i64 %61, i32 1
+; CHECK-NEXT: %63 = extractelement <4 x float> %13, i32 0
+; CHECK-NEXT: %64 = fptosi float %63 to i64
+; CHECK-NEXT: %65 = insertelement <2 x i64> undef, i64 %64, i32 0
+; CHECK-NEXT: %66 = extractelement <4 x float> %13, i32 1
+; CHECK-NEXT: %67 = fptosi float %66 to i64
+; CHECK-NEXT: %68 = insertelement <2 x i64> %65, i64 %67, i32 1
+; CHECK-NEXT: %69 = extractelement <4 x float> %13, i32 2
+; CHECK-NEXT: %70 = fptosi float %69 to i64
+; CHECK-NEXT: %71 = insertelement <2 x i64> undef, i64 %70, i32 0
+; CHECK-NEXT: %72 = extractelement <4 x float> %13, i32 3
+; CHECK-NEXT: %73 = fptosi float %72 to i64
+; CHECK-NEXT: %74 = insertelement <2 x i64> %71, i64 %73, i32 1
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %32, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %38, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %44, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %50, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %56, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %62, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %68, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %74, <2 x i64>* %8, align 16
+; CHECK-NEXT: ret <2 x i64> %20
+
+define <20 x i8> @fptosi_cast_20xdouble_to_20xi8(<20 x double>) {
+ %2 = fptosi <20 x double> %0 to <20 x i8>
+ ret <20 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @fptosi_cast_20xdouble_to_20xi8(<16 x i8>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %12 = extractelement <2 x double> %1, i32 0
+; CHECK-NEXT: %13 = fptosi double %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> undef, i8 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <2 x double> %1, i32 1
+; CHECK-NEXT: %16 = fptosi double %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %19 = fptosi double %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 2
+; CHECK-NEXT: %21 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %22 = fptosi double %21 to i8
+; CHECK-NEXT: %23 = insertelement <16 x i8> %20, i8 %22, i32 3
+; CHECK-NEXT: %24 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %25 = fptosi double %24 to i8
+; CHECK-NEXT: %26 = insertelement <16 x i8> %23, i8 %25, i32 4
+; CHECK-NEXT: %27 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %28 = fptosi double %27 to i8
+; CHECK-NEXT: %29 = insertelement <16 x i8> %26, i8 %28, i32 5
+; CHECK-NEXT: %30 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %31 = fptosi double %30 to i8
+; CHECK-NEXT: %32 = insertelement <16 x i8> %29, i8 %31, i32 6
+; CHECK-NEXT: %33 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %34 = fptosi double %33 to i8
+; CHECK-NEXT: %35 = insertelement <16 x i8> %32, i8 %34, i32 7
+; CHECK-NEXT: %36 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %37 = fptosi double %36 to i8
+; CHECK-NEXT: %38 = insertelement <16 x i8> %35, i8 %37, i32 8
+; CHECK-NEXT: %39 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %40 = fptosi double %39 to i8
+; CHECK-NEXT: %41 = insertelement <16 x i8> %38, i8 %40, i32 9
+; CHECK-NEXT: %42 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %43 = fptosi double %42 to i8
+; CHECK-NEXT: %44 = insertelement <16 x i8> %41, i8 %43, i32 10
+; CHECK-NEXT: %45 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %46 = fptosi double %45 to i8
+; CHECK-NEXT: %47 = insertelement <16 x i8> %44, i8 %46, i32 11
+; CHECK-NEXT: %48 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %49 = fptosi double %48 to i8
+; CHECK-NEXT: %50 = insertelement <16 x i8> %47, i8 %49, i32 12
+; CHECK-NEXT: %51 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %52 = fptosi double %51 to i8
+; CHECK-NEXT: %53 = insertelement <16 x i8> %50, i8 %52, i32 13
+; CHECK-NEXT: %54 = extractelement <2 x double> %8, i32 0
+; CHECK-NEXT: %55 = fptosi double %54 to i8
+; CHECK-NEXT: %56 = insertelement <16 x i8> %53, i8 %55, i32 14
+; CHECK-NEXT: %57 = extractelement <2 x double> %8, i32 1
+; CHECK-NEXT: %58 = fptosi double %57 to i8
+; CHECK-NEXT: %59 = insertelement <16 x i8> %56, i8 %58, i32 15
+; CHECK-NEXT: %60 = extractelement <2 x double> %9, i32 0
+; CHECK-NEXT: %61 = fptosi double %60 to i8
+; CHECK-NEXT: %62 = insertelement <16 x i8> undef, i8 %61, i32 0
+; CHECK-NEXT: %63 = extractelement <2 x double> %9, i32 1
+; CHECK-NEXT: %64 = fptosi double %63 to i8
+; CHECK-NEXT: %65 = insertelement <16 x i8> %62, i8 %64, i32 1
+; CHECK-NEXT: %66 = extractelement <2 x double> %10, i32 0
+; CHECK-NEXT: %67 = fptosi double %66 to i8
+; CHECK-NEXT: %68 = insertelement <16 x i8> %65, i8 %67, i32 2
+; CHECK-NEXT: %69 = extractelement <2 x double> %10, i32 1
+; CHECK-NEXT: %70 = fptosi double %69 to i8
+; CHECK-NEXT: %71 = insertelement <16 x i8> %68, i8 %70, i32 3
+; CHECK-NEXT: store <16 x i8> %71, <16 x i8>* %0, align 16
+; CHECK-NEXT: ret <16 x i8> %59
+
+define <20 x i16> @fptosi_cast_20xdouble_to_20xi16(<20 x double>) {
+ %2 = fptosi <20 x double> %0 to <20 x i16>
+ ret <20 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @fptosi_cast_20xdouble_to_20xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <8 x i16>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %13 = extractelement <2 x double> %2, i32 0
+; CHECK-NEXT: %14 = fptosi double %13 to i16
+; CHECK-NEXT: %15 = insertelement <8 x i16> undef, i16 %14, i32 0
+; CHECK-NEXT: %16 = extractelement <2 x double> %2, i32 1
+; CHECK-NEXT: %17 = fptosi double %16 to i16
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 1
+; CHECK-NEXT: %19 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT: %20 = fptosi double %19 to i16
+; CHECK-NEXT: %21 = insertelement <8 x i16> %18, i16 %20, i32 2
+; CHECK-NEXT: %22 = extractelement <2 x double> %3, i32 1
+; CHECK-NEXT: %23 = fptosi double %22 to i16
+; CHECK-NEXT: %24 = insertelement <8 x i16> %21, i16 %23, i32 3
+; CHECK-NEXT: %25 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %26 = fptosi double %25 to i16
+; CHECK-NEXT: %27 = insertelement <8 x i16> %24, i16 %26, i32 4
+; CHECK-NEXT: %28 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %29 = fptosi double %28 to i16
+; CHECK-NEXT: %30 = insertelement <8 x i16> %27, i16 %29, i32 5
+; CHECK-NEXT: %31 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %32 = fptosi double %31 to i16
+; CHECK-NEXT: %33 = insertelement <8 x i16> %30, i16 %32, i32 6
+; CHECK-NEXT: %34 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %35 = fptosi double %34 to i16
+; CHECK-NEXT: %36 = insertelement <8 x i16> %33, i16 %35, i32 7
+; CHECK-NEXT: %37 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %38 = fptosi double %37 to i16
+; CHECK-NEXT: %39 = insertelement <8 x i16> undef, i16 %38, i32 0
+; CHECK-NEXT: %40 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %41 = fptosi double %40 to i16
+; CHECK-NEXT: %42 = insertelement <8 x i16> %39, i16 %41, i32 1
+; CHECK-NEXT: %43 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %44 = fptosi double %43 to i16
+; CHECK-NEXT: %45 = insertelement <8 x i16> %42, i16 %44, i32 2
+; CHECK-NEXT: %46 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %47 = fptosi double %46 to i16
+; CHECK-NEXT: %48 = insertelement <8 x i16> %45, i16 %47, i32 3
+; CHECK-NEXT: %49 = extractelement <2 x double> %8, i32 0
+; CHECK-NEXT: %50 = fptosi double %49 to i16
+; CHECK-NEXT: %51 = insertelement <8 x i16> %48, i16 %50, i32 4
+; CHECK-NEXT: %52 = extractelement <2 x double> %8, i32 1
+; CHECK-NEXT: %53 = fptosi double %52 to i16
+; CHECK-NEXT: %54 = insertelement <8 x i16> %51, i16 %53, i32 5
+; CHECK-NEXT: %55 = extractelement <2 x double> %9, i32 0
+; CHECK-NEXT: %56 = fptosi double %55 to i16
+; CHECK-NEXT: %57 = insertelement <8 x i16> %54, i16 %56, i32 6
+; CHECK-NEXT: %58 = extractelement <2 x double> %9, i32 1
+; CHECK-NEXT: %59 = fptosi double %58 to i16
+; CHECK-NEXT: %60 = insertelement <8 x i16> %57, i16 %59, i32 7
+; CHECK-NEXT: %61 = extractelement <2 x double> %10, i32 0
+; CHECK-NEXT: %62 = fptosi double %61 to i16
+; CHECK-NEXT: %63 = insertelement <8 x i16> undef, i16 %62, i32 0
+; CHECK-NEXT: %64 = extractelement <2 x double> %10, i32 1
+; CHECK-NEXT: %65 = fptosi double %64 to i16
+; CHECK-NEXT: %66 = insertelement <8 x i16> %63, i16 %65, i32 1
+; CHECK-NEXT: %67 = extractelement <2 x double> %11, i32 0
+; CHECK-NEXT: %68 = fptosi double %67 to i16
+; CHECK-NEXT: %69 = insertelement <8 x i16> %66, i16 %68, i32 2
+; CHECK-NEXT: %70 = extractelement <2 x double> %11, i32 1
+; CHECK-NEXT: %71 = fptosi double %70 to i16
+; CHECK-NEXT: %72 = insertelement <8 x i16> %69, i16 %71, i32 3
+; CHECK-NEXT: store <8 x i16> %60, <8 x i16>* %0, align 16
+; CHECK-NEXT: store <8 x i16> %72, <8 x i16>* %1, align 16
+; CHECK-NEXT: ret <8 x i16> %36
+
+define <20 x i32> @fptosi_cast_20xdouble_to_20xi32(<20 x double>) {
+ %2 = fptosi <20 x double> %0 to <20 x i32>
+ ret <20 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @fptosi_cast_20xdouble_to_20xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %15 = extractelement <2 x double> %4, i32 0
+; CHECK-NEXT: %16 = fptosi double %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> undef, i32 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <2 x double> %4, i32 1
+; CHECK-NEXT: %19 = fptosi double %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT: %22 = fptosi double %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 2
+; CHECK-NEXT: %24 = extractelement <2 x double> %5, i32 1
+; CHECK-NEXT: %25 = fptosi double %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 3
+; CHECK-NEXT: %27 = extractelement <2 x double> %6, i32 0
+; CHECK-NEXT: %28 = fptosi double %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> undef, i32 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <2 x double> %6, i32 1
+; CHECK-NEXT: %31 = fptosi double %30 to i32
+; CHECK-NEXT: %32 = insertelement <4 x i32> %29, i32 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <2 x double> %7, i32 0
+; CHECK-NEXT: %34 = fptosi double %33 to i32
+; CHECK-NEXT: %35 = insertelement <4 x i32> %32, i32 %34, i32 2
+; CHECK-NEXT: %36 = extractelement <2 x double> %7, i32 1
+; CHECK-NEXT: %37 = fptosi double %36 to i32
+; CHECK-NEXT: %38 = insertelement <4 x i32> %35, i32 %37, i32 3
+; CHECK-NEXT: %39 = extractelement <2 x double> %8, i32 0
+; CHECK-NEXT: %40 = fptosi double %39 to i32
+; CHECK-NEXT: %41 = insertelement <4 x i32> undef, i32 %40, i32 0
+; CHECK-NEXT: %42 = extractelement <2 x double> %8, i32 1
+; CHECK-NEXT: %43 = fptosi double %42 to i32
+; CHECK-NEXT: %44 = insertelement <4 x i32> %41, i32 %43, i32 1
+; CHECK-NEXT: %45 = extractelement <2 x double> %9, i32 0
+; CHECK-NEXT: %46 = fptosi double %45 to i32
+; CHECK-NEXT: %47 = insertelement <4 x i32> %44, i32 %46, i32 2
+; CHECK-NEXT: %48 = extractelement <2 x double> %9, i32 1
+; CHECK-NEXT: %49 = fptosi double %48 to i32
+; CHECK-NEXT: %50 = insertelement <4 x i32> %47, i32 %49, i32 3
+; CHECK-NEXT: %51 = extractelement <2 x double> %10, i32 0
+; CHECK-NEXT: %52 = fptosi double %51 to i32
+; CHECK-NEXT: %53 = insertelement <4 x i32> undef, i32 %52, i32 0
+; CHECK-NEXT: %54 = extractelement <2 x double> %10, i32 1
+; CHECK-NEXT: %55 = fptosi double %54 to i32
+; CHECK-NEXT: %56 = insertelement <4 x i32> %53, i32 %55, i32 1
+; CHECK-NEXT: %57 = extractelement <2 x double> %11, i32 0
+; CHECK-NEXT: %58 = fptosi double %57 to i32
+; CHECK-NEXT: %59 = insertelement <4 x i32> %56, i32 %58, i32 2
+; CHECK-NEXT: %60 = extractelement <2 x double> %11, i32 1
+; CHECK-NEXT: %61 = fptosi double %60 to i32
+; CHECK-NEXT: %62 = insertelement <4 x i32> %59, i32 %61, i32 3
+; CHECK-NEXT: %63 = extractelement <2 x double> %12, i32 0
+; CHECK-NEXT: %64 = fptosi double %63 to i32
+; CHECK-NEXT: %65 = insertelement <4 x i32> undef, i32 %64, i32 0
+; CHECK-NEXT: %66 = extractelement <2 x double> %12, i32 1
+; CHECK-NEXT: %67 = fptosi double %66 to i32
+; CHECK-NEXT: %68 = insertelement <4 x i32> %65, i32 %67, i32 1
+; CHECK-NEXT: %69 = extractelement <2 x double> %13, i32 0
+; CHECK-NEXT: %70 = fptosi double %69 to i32
+; CHECK-NEXT: %71 = insertelement <4 x i32> %68, i32 %70, i32 2
+; CHECK-NEXT: %72 = extractelement <2 x double> %13, i32 1
+; CHECK-NEXT: %73 = fptosi double %72 to i32
+; CHECK-NEXT: %74 = insertelement <4 x i32> %71, i32 %73, i32 3
+; CHECK-NEXT: store <4 x i32> %38, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %50, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %62, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %74, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %26
+
+define <20 x i64> @fptosi_cast_20xdouble_to_20xi64(<20 x double>) {
+ %2 = fptosi <20 x double> %0 to <20 x i64>
+ ret <20 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @fptosi_cast_20xdouble_to_20xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %20 = fptosi <2 x double> %9 to <2 x i64>
+; CHECK-NEXT: %21 = fptosi <2 x double> %10 to <2 x i64>
+; CHECK-NEXT: %22 = fptosi <2 x double> %11 to <2 x i64>
+; CHECK-NEXT: %23 = fptosi <2 x double> %12 to <2 x i64>
+; CHECK-NEXT: %24 = fptosi <2 x double> %13 to <2 x i64>
+; CHECK-NEXT: %25 = fptosi <2 x double> %14 to <2 x i64>
+; CHECK-NEXT: %26 = fptosi <2 x double> %15 to <2 x i64>
+; CHECK-NEXT: %27 = fptosi <2 x double> %16 to <2 x i64>
+; CHECK-NEXT: %28 = fptosi <2 x double> %17 to <2 x i64>
+; CHECK-NEXT: %29 = fptosi <2 x double> %18 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %21, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %24, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %25, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %27, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %28, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %8, align 16
+; CHECK-NEXT: ret <2 x i64> %20
+
+define <20 x float> @uitofp_cast_20xi8_to_20xfloat(<20 x i8>) {
+ %2 = uitofp <20 x i8> %0 to <20 x float>
+ ret <20 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_20xi8_to_20xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <16 x i8>, <16 x i8>)
+; CHECK-NEXT: %7 = extractelement <16 x i8> %4, i32 0
+; CHECK-NEXT: %8 = uitofp i8 %7 to float
+; CHECK-NEXT: %9 = insertelement <4 x float> undef, float %8, i32 0
+; CHECK-NEXT: %10 = extractelement <16 x i8> %4, i32 1
+; CHECK-NEXT: %11 = uitofp i8 %10 to float
+; CHECK-NEXT: %12 = insertelement <4 x float> %9, float %11, i32 1
+; CHECK-NEXT: %13 = extractelement <16 x i8> %4, i32 2
+; CHECK-NEXT: %14 = uitofp i8 %13 to float
+; CHECK-NEXT: %15 = insertelement <4 x float> %12, float %14, i32 2
+; CHECK-NEXT: %16 = extractelement <16 x i8> %4, i32 3
+; CHECK-NEXT: %17 = uitofp i8 %16 to float
+; CHECK-NEXT: %18 = insertelement <4 x float> %15, float %17, i32 3
+; CHECK-NEXT: %19 = extractelement <16 x i8> %4, i32 4
+; CHECK-NEXT: %20 = uitofp i8 %19 to float
+; CHECK-NEXT: %21 = insertelement <4 x float> undef, float %20, i32 0
+; CHECK-NEXT: %22 = extractelement <16 x i8> %4, i32 5
+; CHECK-NEXT: %23 = uitofp i8 %22 to float
+; CHECK-NEXT: %24 = insertelement <4 x float> %21, float %23, i32 1
+; CHECK-NEXT: %25 = extractelement <16 x i8> %4, i32 6
+; CHECK-NEXT: %26 = uitofp i8 %25 to float
+; CHECK-NEXT: %27 = insertelement <4 x float> %24, float %26, i32 2
+; CHECK-NEXT: %28 = extractelement <16 x i8> %4, i32 7
+; CHECK-NEXT: %29 = uitofp i8 %28 to float
+; CHECK-NEXT: %30 = insertelement <4 x float> %27, float %29, i32 3
+; CHECK-NEXT: %31 = extractelement <16 x i8> %4, i32 8
+; CHECK-NEXT: %32 = uitofp i8 %31 to float
+; CHECK-NEXT: %33 = insertelement <4 x float> undef, float %32, i32 0
+; CHECK-NEXT: %34 = extractelement <16 x i8> %4, i32 9
+; CHECK-NEXT: %35 = uitofp i8 %34 to float
+; CHECK-NEXT: %36 = insertelement <4 x float> %33, float %35, i32 1
+; CHECK-NEXT: %37 = extractelement <16 x i8> %4, i32 10
+; CHECK-NEXT: %38 = uitofp i8 %37 to float
+; CHECK-NEXT: %39 = insertelement <4 x float> %36, float %38, i32 2
+; CHECK-NEXT: %40 = extractelement <16 x i8> %4, i32 11
+; CHECK-NEXT: %41 = uitofp i8 %40 to float
+; CHECK-NEXT: %42 = insertelement <4 x float> %39, float %41, i32 3
+; CHECK-NEXT: %43 = extractelement <16 x i8> %4, i32 12
+; CHECK-NEXT: %44 = uitofp i8 %43 to float
+; CHECK-NEXT: %45 = insertelement <4 x float> undef, float %44, i32 0
+; CHECK-NEXT: %46 = extractelement <16 x i8> %4, i32 13
+; CHECK-NEXT: %47 = uitofp i8 %46 to float
+; CHECK-NEXT: %48 = insertelement <4 x float> %45, float %47, i32 1
+; CHECK-NEXT: %49 = extractelement <16 x i8> %4, i32 14
+; CHECK-NEXT: %50 = uitofp i8 %49 to float
+; CHECK-NEXT: %51 = insertelement <4 x float> %48, float %50, i32 2
+; CHECK-NEXT: %52 = extractelement <16 x i8> %4, i32 15
+; CHECK-NEXT: %53 = uitofp i8 %52 to float
+; CHECK-NEXT: %54 = insertelement <4 x float> %51, float %53, i32 3
+; CHECK-NEXT: %55 = extractelement <16 x i8> %5, i32 0
+; CHECK-NEXT: %56 = uitofp i8 %55 to float
+; CHECK-NEXT: %57 = insertelement <4 x float> undef, float %56, i32 0
+; CHECK-NEXT: %58 = extractelement <16 x i8> %5, i32 1
+; CHECK-NEXT: %59 = uitofp i8 %58 to float
+; CHECK-NEXT: %60 = insertelement <4 x float> %57, float %59, i32 1
+; CHECK-NEXT: %61 = extractelement <16 x i8> %5, i32 2
+; CHECK-NEXT: %62 = uitofp i8 %61 to float
+; CHECK-NEXT: %63 = insertelement <4 x float> %60, float %62, i32 2
+; CHECK-NEXT: %64 = extractelement <16 x i8> %5, i32 3
+; CHECK-NEXT: %65 = uitofp i8 %64 to float
+; CHECK-NEXT: %66 = insertelement <4 x float> %63, float %65, i32 3
+; CHECK-NEXT: store <4 x float> %30, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %42, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %54, <4 x float>* %2, align 16
+; CHECK-NEXT: store <4 x float> %66, <4 x float>* %3, align 16
+; CHECK-NEXT: ret <4 x float> %18
+
+define <20 x float> @uitofp_cast_20xi16_to_20xfloat(<20 x i16>) {
+ %2 = uitofp <20 x i16> %0 to <20 x float>
+ ret <20 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_20xi16_to_20xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %8 = extractelement <8 x i16> %4, i32 0
+; CHECK-NEXT: %9 = uitofp i16 %8 to float
+; CHECK-NEXT: %10 = insertelement <4 x float> undef, float %9, i32 0
+; CHECK-NEXT: %11 = extractelement <8 x i16> %4, i32 1
+; CHECK-NEXT: %12 = uitofp i16 %11 to float
+; CHECK-NEXT: %13 = insertelement <4 x float> %10, float %12, i32 1
+; CHECK-NEXT: %14 = extractelement <8 x i16> %4, i32 2
+; CHECK-NEXT: %15 = uitofp i16 %14 to float
+; CHECK-NEXT: %16 = insertelement <4 x float> %13, float %15, i32 2
+; CHECK-NEXT: %17 = extractelement <8 x i16> %4, i32 3
+; CHECK-NEXT: %18 = uitofp i16 %17 to float
+; CHECK-NEXT: %19 = insertelement <4 x float> %16, float %18, i32 3
+; CHECK-NEXT: %20 = extractelement <8 x i16> %4, i32 4
+; CHECK-NEXT: %21 = uitofp i16 %20 to float
+; CHECK-NEXT: %22 = insertelement <4 x float> undef, float %21, i32 0
+; CHECK-NEXT: %23 = extractelement <8 x i16> %4, i32 5
+; CHECK-NEXT: %24 = uitofp i16 %23 to float
+; CHECK-NEXT: %25 = insertelement <4 x float> %22, float %24, i32 1
+; CHECK-NEXT: %26 = extractelement <8 x i16> %4, i32 6
+; CHECK-NEXT: %27 = uitofp i16 %26 to float
+; CHECK-NEXT: %28 = insertelement <4 x float> %25, float %27, i32 2
+; CHECK-NEXT: %29 = extractelement <8 x i16> %4, i32 7
+; CHECK-NEXT: %30 = uitofp i16 %29 to float
+; CHECK-NEXT: %31 = insertelement <4 x float> %28, float %30, i32 3
+; CHECK-NEXT: %32 = extractelement <8 x i16> %5, i32 0
+; CHECK-NEXT: %33 = uitofp i16 %32 to float
+; CHECK-NEXT: %34 = insertelement <4 x float> undef, float %33, i32 0
+; CHECK-NEXT: %35 = extractelement <8 x i16> %5, i32 1
+; CHECK-NEXT: %36 = uitofp i16 %35 to float
+; CHECK-NEXT: %37 = insertelement <4 x float> %34, float %36, i32 1
+; CHECK-NEXT: %38 = extractelement <8 x i16> %5, i32 2
+; CHECK-NEXT: %39 = uitofp i16 %38 to float
+; CHECK-NEXT: %40 = insertelement <4 x float> %37, float %39, i32 2
+; CHECK-NEXT: %41 = extractelement <8 x i16> %5, i32 3
+; CHECK-NEXT: %42 = uitofp i16 %41 to float
+; CHECK-NEXT: %43 = insertelement <4 x float> %40, float %42, i32 3
+; CHECK-NEXT: %44 = extractelement <8 x i16> %5, i32 4
+; CHECK-NEXT: %45 = uitofp i16 %44 to float
+; CHECK-NEXT: %46 = insertelement <4 x float> undef, float %45, i32 0
+; CHECK-NEXT: %47 = extractelement <8 x i16> %5, i32 5
+; CHECK-NEXT: %48 = uitofp i16 %47 to float
+; CHECK-NEXT: %49 = insertelement <4 x float> %46, float %48, i32 1
+; CHECK-NEXT: %50 = extractelement <8 x i16> %5, i32 6
+; CHECK-NEXT: %51 = uitofp i16 %50 to float
+; CHECK-NEXT: %52 = insertelement <4 x float> %49, float %51, i32 2
+; CHECK-NEXT: %53 = extractelement <8 x i16> %5, i32 7
+; CHECK-NEXT: %54 = uitofp i16 %53 to float
+; CHECK-NEXT: %55 = insertelement <4 x float> %52, float %54, i32 3
+; CHECK-NEXT: %56 = extractelement <8 x i16> %6, i32 0
+; CHECK-NEXT: %57 = uitofp i16 %56 to float
+; CHECK-NEXT: %58 = insertelement <4 x float> undef, float %57, i32 0
+; CHECK-NEXT: %59 = extractelement <8 x i16> %6, i32 1
+; CHECK-NEXT: %60 = uitofp i16 %59 to float
+; CHECK-NEXT: %61 = insertelement <4 x float> %58, float %60, i32 1
+; CHECK-NEXT: %62 = extractelement <8 x i16> %6, i32 2
+; CHECK-NEXT: %63 = uitofp i16 %62 to float
+; CHECK-NEXT: %64 = insertelement <4 x float> %61, float %63, i32 2
+; CHECK-NEXT: %65 = extractelement <8 x i16> %6, i32 3
+; CHECK-NEXT: %66 = uitofp i16 %65 to float
+; CHECK-NEXT: %67 = insertelement <4 x float> %64, float %66, i32 3
+; CHECK-NEXT: store <4 x float> %31, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %43, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %55, <4 x float>* %2, align 16
+; CHECK-NEXT: store <4 x float> %67, <4 x float>* %3, align 16
+; CHECK-NEXT: ret <4 x float> %19
+
+define <20 x float> @uitofp_cast_20xi32_to_20xfloat(<20 x i32>) {
+ %2 = uitofp <20 x i32> %0 to <20 x float>
+ ret <20 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_20xi32_to_20xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = uitofp <4 x i32> %4 to <4 x float>
+; CHECK-NEXT: %11 = uitofp <4 x i32> %5 to <4 x float>
+; CHECK-NEXT: %12 = uitofp <4 x i32> %6 to <4 x float>
+; CHECK-NEXT: %13 = uitofp <4 x i32> %7 to <4 x float>
+; CHECK-NEXT: %14 = uitofp <4 x i32> %8 to <4 x float>
+; CHECK-NEXT: store <4 x float> %11, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %12, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %13, <4 x float>* %2, align 16
+; CHECK-NEXT: store <4 x float> %14, <4 x float>* %3, align 16
+; CHECK-NEXT: ret <4 x float> %10
+
+define <20 x float> @uitofp_cast_20xi64_to_20xfloat(<20 x i64>) {
+ %2 = uitofp <20 x i64> %0 to <20 x float>
+ ret <20 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @uitofp_cast_20xi64_to_20xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %15 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %16 = uitofp i64 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> undef, float %16, i32 0
+; CHECK-NEXT: %18 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %19 = uitofp i64 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 1
+; CHECK-NEXT: %21 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %22 = uitofp i64 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> %20, float %22, i32 2
+; CHECK-NEXT: %24 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %25 = uitofp i64 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> %23, float %25, i32 3
+; CHECK-NEXT: %27 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %28 = uitofp i64 %27 to float
+; CHECK-NEXT: %29 = insertelement <4 x float> undef, float %28, i32 0
+; CHECK-NEXT: %30 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %31 = uitofp i64 %30 to float
+; CHECK-NEXT: %32 = insertelement <4 x float> %29, float %31, i32 1
+; CHECK-NEXT: %33 = extractelement <2 x i64> %7, i32 0
+; CHECK-NEXT: %34 = uitofp i64 %33 to float
+; CHECK-NEXT: %35 = insertelement <4 x float> %32, float %34, i32 2
+; CHECK-NEXT: %36 = extractelement <2 x i64> %7, i32 1
+; CHECK-NEXT: %37 = uitofp i64 %36 to float
+; CHECK-NEXT: %38 = insertelement <4 x float> %35, float %37, i32 3
+; CHECK-NEXT: %39 = extractelement <2 x i64> %8, i32 0
+; CHECK-NEXT: %40 = uitofp i64 %39 to float
+; CHECK-NEXT: %41 = insertelement <4 x float> undef, float %40, i32 0
+; CHECK-NEXT: %42 = extractelement <2 x i64> %8, i32 1
+; CHECK-NEXT: %43 = uitofp i64 %42 to float
+; CHECK-NEXT: %44 = insertelement <4 x float> %41, float %43, i32 1
+; CHECK-NEXT: %45 = extractelement <2 x i64> %9, i32 0
+; CHECK-NEXT: %46 = uitofp i64 %45 to float
+; CHECK-NEXT: %47 = insertelement <4 x float> %44, float %46, i32 2
+; CHECK-NEXT: %48 = extractelement <2 x i64> %9, i32 1
+; CHECK-NEXT: %49 = uitofp i64 %48 to float
+; CHECK-NEXT: %50 = insertelement <4 x float> %47, float %49, i32 3
+; CHECK-NEXT: %51 = extractelement <2 x i64> %10, i32 0
+; CHECK-NEXT: %52 = uitofp i64 %51 to float
+; CHECK-NEXT: %53 = insertelement <4 x float> undef, float %52, i32 0
+; CHECK-NEXT: %54 = extractelement <2 x i64> %10, i32 1
+; CHECK-NEXT: %55 = uitofp i64 %54 to float
+; CHECK-NEXT: %56 = insertelement <4 x float> %53, float %55, i32 1
+; CHECK-NEXT: %57 = extractelement <2 x i64> %11, i32 0
+; CHECK-NEXT: %58 = uitofp i64 %57 to float
+; CHECK-NEXT: %59 = insertelement <4 x float> %56, float %58, i32 2
+; CHECK-NEXT: %60 = extractelement <2 x i64> %11, i32 1
+; CHECK-NEXT: %61 = uitofp i64 %60 to float
+; CHECK-NEXT: %62 = insertelement <4 x float> %59, float %61, i32 3
+; CHECK-NEXT: %63 = extractelement <2 x i64> %12, i32 0
+; CHECK-NEXT: %64 = uitofp i64 %63 to float
+; CHECK-NEXT: %65 = insertelement <4 x float> undef, float %64, i32 0
+; CHECK-NEXT: %66 = extractelement <2 x i64> %12, i32 1
+; CHECK-NEXT: %67 = uitofp i64 %66 to float
+; CHECK-NEXT: %68 = insertelement <4 x float> %65, float %67, i32 1
+; CHECK-NEXT: %69 = extractelement <2 x i64> %13, i32 0
+; CHECK-NEXT: %70 = uitofp i64 %69 to float
+; CHECK-NEXT: %71 = insertelement <4 x float> %68, float %70, i32 2
+; CHECK-NEXT: %72 = extractelement <2 x i64> %13, i32 1
+; CHECK-NEXT: %73 = uitofp i64 %72 to float
+; CHECK-NEXT: %74 = insertelement <4 x float> %71, float %73, i32 3
+; CHECK-NEXT: store <4 x float> %38, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %50, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %62, <4 x float>* %2, align 16
+; CHECK-NEXT: store <4 x float> %74, <4 x float>* %3, align 16
+; CHECK-NEXT: ret <4 x float> %26
+
+define <20 x double> @uitofp_cast_20xi8_to_20xdouble(<20 x i8>) {
+ %2 = uitofp <20 x i8> %0 to <20 x double>
+ ret <20 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_20xi8_to_20xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <16 x i8>, <16 x i8>)
+; CHECK-NEXT: %12 = extractelement <16 x i8> %9, i32 0
+; CHECK-NEXT: %13 = uitofp i8 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> undef, double %13, i32 0
+; CHECK-NEXT: %15 = extractelement <16 x i8> %9, i32 1
+; CHECK-NEXT: %16 = uitofp i8 %15 to double
+; CHECK-NEXT: %17 = insertelement <2 x double> %14, double %16, i32 1
+; CHECK-NEXT: %18 = extractelement <16 x i8> %9, i32 2
+; CHECK-NEXT: %19 = uitofp i8 %18 to double
+; CHECK-NEXT: %20 = insertelement <2 x double> undef, double %19, i32 0
+; CHECK-NEXT: %21 = extractelement <16 x i8> %9, i32 3
+; CHECK-NEXT: %22 = uitofp i8 %21 to double
+; CHECK-NEXT: %23 = insertelement <2 x double> %20, double %22, i32 1
+; CHECK-NEXT: %24 = extractelement <16 x i8> %9, i32 4
+; CHECK-NEXT: %25 = uitofp i8 %24 to double
+; CHECK-NEXT: %26 = insertelement <2 x double> undef, double %25, i32 0
+; CHECK-NEXT: %27 = extractelement <16 x i8> %9, i32 5
+; CHECK-NEXT: %28 = uitofp i8 %27 to double
+; CHECK-NEXT: %29 = insertelement <2 x double> %26, double %28, i32 1
+; CHECK-NEXT: %30 = extractelement <16 x i8> %9, i32 6
+; CHECK-NEXT: %31 = uitofp i8 %30 to double
+; CHECK-NEXT: %32 = insertelement <2 x double> undef, double %31, i32 0
+; CHECK-NEXT: %33 = extractelement <16 x i8> %9, i32 7
+; CHECK-NEXT: %34 = uitofp i8 %33 to double
+; CHECK-NEXT: %35 = insertelement <2 x double> %32, double %34, i32 1
+; CHECK-NEXT: %36 = extractelement <16 x i8> %9, i32 8
+; CHECK-NEXT: %37 = uitofp i8 %36 to double
+; CHECK-NEXT: %38 = insertelement <2 x double> undef, double %37, i32 0
+; CHECK-NEXT: %39 = extractelement <16 x i8> %9, i32 9
+; CHECK-NEXT: %40 = uitofp i8 %39 to double
+; CHECK-NEXT: %41 = insertelement <2 x double> %38, double %40, i32 1
+; CHECK-NEXT: %42 = extractelement <16 x i8> %9, i32 10
+; CHECK-NEXT: %43 = uitofp i8 %42 to double
+; CHECK-NEXT: %44 = insertelement <2 x double> undef, double %43, i32 0
+; CHECK-NEXT: %45 = extractelement <16 x i8> %9, i32 11
+; CHECK-NEXT: %46 = uitofp i8 %45 to double
+; CHECK-NEXT: %47 = insertelement <2 x double> %44, double %46, i32 1
+; CHECK-NEXT: %48 = extractelement <16 x i8> %9, i32 12
+; CHECK-NEXT: %49 = uitofp i8 %48 to double
+; CHECK-NEXT: %50 = insertelement <2 x double> undef, double %49, i32 0
+; CHECK-NEXT: %51 = extractelement <16 x i8> %9, i32 13
+; CHECK-NEXT: %52 = uitofp i8 %51 to double
+; CHECK-NEXT: %53 = insertelement <2 x double> %50, double %52, i32 1
+; CHECK-NEXT: %54 = extractelement <16 x i8> %9, i32 14
+; CHECK-NEXT: %55 = uitofp i8 %54 to double
+; CHECK-NEXT: %56 = insertelement <2 x double> undef, double %55, i32 0
+; CHECK-NEXT: %57 = extractelement <16 x i8> %9, i32 15
+; CHECK-NEXT: %58 = uitofp i8 %57 to double
+; CHECK-NEXT: %59 = insertelement <2 x double> %56, double %58, i32 1
+; CHECK-NEXT: %60 = extractelement <16 x i8> %10, i32 0
+; CHECK-NEXT: %61 = uitofp i8 %60 to double
+; CHECK-NEXT: %62 = insertelement <2 x double> undef, double %61, i32 0
+; CHECK-NEXT: %63 = extractelement <16 x i8> %10, i32 1
+; CHECK-NEXT: %64 = uitofp i8 %63 to double
+; CHECK-NEXT: %65 = insertelement <2 x double> %62, double %64, i32 1
+; CHECK-NEXT: %66 = extractelement <16 x i8> %10, i32 2
+; CHECK-NEXT: %67 = uitofp i8 %66 to double
+; CHECK-NEXT: %68 = insertelement <2 x double> undef, double %67, i32 0
+; CHECK-NEXT: %69 = extractelement <16 x i8> %10, i32 3
+; CHECK-NEXT: %70 = uitofp i8 %69 to double
+; CHECK-NEXT: %71 = insertelement <2 x double> %68, double %70, i32 1
+; CHECK-NEXT: store <2 x double> %23, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %29, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %35, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %41, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %47, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %53, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %59, <2 x double>* %6, align 16
+; CHECK-NEXT: store <2 x double> %65, <2 x double>* %7, align 16
+; CHECK-NEXT: store <2 x double> %71, <2 x double>* %8, align 16
+; CHECK-NEXT: ret <2 x double> %17
+
+define <20 x double> @uitofp_cast_20xi16_to_20xdouble(<20 x i16>) {
+ %2 = uitofp <20 x i16> %0 to <20 x double>
+ ret <20 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_20xi16_to_20xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %13 = extractelement <8 x i16> %9, i32 0
+; CHECK-NEXT: %14 = uitofp i16 %13 to double
+; CHECK-NEXT: %15 = insertelement <2 x double> undef, double %14, i32 0
+; CHECK-NEXT: %16 = extractelement <8 x i16> %9, i32 1
+; CHECK-NEXT: %17 = uitofp i16 %16 to double
+; CHECK-NEXT: %18 = insertelement <2 x double> %15, double %17, i32 1
+; CHECK-NEXT: %19 = extractelement <8 x i16> %9, i32 2
+; CHECK-NEXT: %20 = uitofp i16 %19 to double
+; CHECK-NEXT: %21 = insertelement <2 x double> undef, double %20, i32 0
+; CHECK-NEXT: %22 = extractelement <8 x i16> %9, i32 3
+; CHECK-NEXT: %23 = uitofp i16 %22 to double
+; CHECK-NEXT: %24 = insertelement <2 x double> %21, double %23, i32 1
+; CHECK-NEXT: %25 = extractelement <8 x i16> %9, i32 4
+; CHECK-NEXT: %26 = uitofp i16 %25 to double
+; CHECK-NEXT: %27 = insertelement <2 x double> undef, double %26, i32 0
+; CHECK-NEXT: %28 = extractelement <8 x i16> %9, i32 5
+; CHECK-NEXT: %29 = uitofp i16 %28 to double
+; CHECK-NEXT: %30 = insertelement <2 x double> %27, double %29, i32 1
+; CHECK-NEXT: %31 = extractelement <8 x i16> %9, i32 6
+; CHECK-NEXT: %32 = uitofp i16 %31 to double
+; CHECK-NEXT: %33 = insertelement <2 x double> undef, double %32, i32 0
+; CHECK-NEXT: %34 = extractelement <8 x i16> %9, i32 7
+; CHECK-NEXT: %35 = uitofp i16 %34 to double
+; CHECK-NEXT: %36 = insertelement <2 x double> %33, double %35, i32 1
+; CHECK-NEXT: %37 = extractelement <8 x i16> %10, i32 0
+; CHECK-NEXT: %38 = uitofp i16 %37 to double
+; CHECK-NEXT: %39 = insertelement <2 x double> undef, double %38, i32 0
+; CHECK-NEXT: %40 = extractelement <8 x i16> %10, i32 1
+; CHECK-NEXT: %41 = uitofp i16 %40 to double
+; CHECK-NEXT: %42 = insertelement <2 x double> %39, double %41, i32 1
+; CHECK-NEXT: %43 = extractelement <8 x i16> %10, i32 2
+; CHECK-NEXT: %44 = uitofp i16 %43 to double
+; CHECK-NEXT: %45 = insertelement <2 x double> undef, double %44, i32 0
+; CHECK-NEXT: %46 = extractelement <8 x i16> %10, i32 3
+; CHECK-NEXT: %47 = uitofp i16 %46 to double
+; CHECK-NEXT: %48 = insertelement <2 x double> %45, double %47, i32 1
+; CHECK-NEXT: %49 = extractelement <8 x i16> %10, i32 4
+; CHECK-NEXT: %50 = uitofp i16 %49 to double
+; CHECK-NEXT: %51 = insertelement <2 x double> undef, double %50, i32 0
+; CHECK-NEXT: %52 = extractelement <8 x i16> %10, i32 5
+; CHECK-NEXT: %53 = uitofp i16 %52 to double
+; CHECK-NEXT: %54 = insertelement <2 x double> %51, double %53, i32 1
+; CHECK-NEXT: %55 = extractelement <8 x i16> %10, i32 6
+; CHECK-NEXT: %56 = uitofp i16 %55 to double
+; CHECK-NEXT: %57 = insertelement <2 x double> undef, double %56, i32 0
+; CHECK-NEXT: %58 = extractelement <8 x i16> %10, i32 7
+; CHECK-NEXT: %59 = uitofp i16 %58 to double
+; CHECK-NEXT: %60 = insertelement <2 x double> %57, double %59, i32 1
+; CHECK-NEXT: %61 = extractelement <8 x i16> %11, i32 0
+; CHECK-NEXT: %62 = uitofp i16 %61 to double
+; CHECK-NEXT: %63 = insertelement <2 x double> undef, double %62, i32 0
+; CHECK-NEXT: %64 = extractelement <8 x i16> %11, i32 1
+; CHECK-NEXT: %65 = uitofp i16 %64 to double
+; CHECK-NEXT: %66 = insertelement <2 x double> %63, double %65, i32 1
+; CHECK-NEXT: %67 = extractelement <8 x i16> %11, i32 2
+; CHECK-NEXT: %68 = uitofp i16 %67 to double
+; CHECK-NEXT: %69 = insertelement <2 x double> undef, double %68, i32 0
+; CHECK-NEXT: %70 = extractelement <8 x i16> %11, i32 3
+; CHECK-NEXT: %71 = uitofp i16 %70 to double
+; CHECK-NEXT: %72 = insertelement <2 x double> %69, double %71, i32 1
+; CHECK-NEXT: store <2 x double> %24, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %30, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %36, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %42, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %48, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %54, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %60, <2 x double>* %6, align 16
+; CHECK-NEXT: store <2 x double> %66, <2 x double>* %7, align 16
+; CHECK-NEXT: store <2 x double> %72, <2 x double>* %8, align 16
+; CHECK-NEXT: ret <2 x double> %18
+
+define <20 x double> @uitofp_cast_20xi32_to_20xdouble(<20 x i32>) {
+ %2 = uitofp <20 x i32> %0 to <20 x double>
+ ret <20 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_20xi32_to_20xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = extractelement <4 x i32> %9, i32 0
+; CHECK-NEXT: %16 = uitofp i32 %15 to double
+; CHECK-NEXT: %17 = insertelement <2 x double> undef, double %16, i32 0
+; CHECK-NEXT: %18 = extractelement <4 x i32> %9, i32 1
+; CHECK-NEXT: %19 = uitofp i32 %18 to double
+; CHECK-NEXT: %20 = insertelement <2 x double> %17, double %19, i32 1
+; CHECK-NEXT: %21 = extractelement <4 x i32> %9, i32 2
+; CHECK-NEXT: %22 = uitofp i32 %21 to double
+; CHECK-NEXT: %23 = insertelement <2 x double> undef, double %22, i32 0
+; CHECK-NEXT: %24 = extractelement <4 x i32> %9, i32 3
+; CHECK-NEXT: %25 = uitofp i32 %24 to double
+; CHECK-NEXT: %26 = insertelement <2 x double> %23, double %25, i32 1
+; CHECK-NEXT: %27 = extractelement <4 x i32> %10, i32 0
+; CHECK-NEXT: %28 = uitofp i32 %27 to double
+; CHECK-NEXT: %29 = insertelement <2 x double> undef, double %28, i32 0
+; CHECK-NEXT: %30 = extractelement <4 x i32> %10, i32 1
+; CHECK-NEXT: %31 = uitofp i32 %30 to double
+; CHECK-NEXT: %32 = insertelement <2 x double> %29, double %31, i32 1
+; CHECK-NEXT: %33 = extractelement <4 x i32> %10, i32 2
+; CHECK-NEXT: %34 = uitofp i32 %33 to double
+; CHECK-NEXT: %35 = insertelement <2 x double> undef, double %34, i32 0
+; CHECK-NEXT: %36 = extractelement <4 x i32> %10, i32 3
+; CHECK-NEXT: %37 = uitofp i32 %36 to double
+; CHECK-NEXT: %38 = insertelement <2 x double> %35, double %37, i32 1
+; CHECK-NEXT: %39 = extractelement <4 x i32> %11, i32 0
+; CHECK-NEXT: %40 = uitofp i32 %39 to double
+; CHECK-NEXT: %41 = insertelement <2 x double> undef, double %40, i32 0
+; CHECK-NEXT: %42 = extractelement <4 x i32> %11, i32 1
+; CHECK-NEXT: %43 = uitofp i32 %42 to double
+; CHECK-NEXT: %44 = insertelement <2 x double> %41, double %43, i32 1
+; CHECK-NEXT: %45 = extractelement <4 x i32> %11, i32 2
+; CHECK-NEXT: %46 = uitofp i32 %45 to double
+; CHECK-NEXT: %47 = insertelement <2 x double> undef, double %46, i32 0
+; CHECK-NEXT: %48 = extractelement <4 x i32> %11, i32 3
+; CHECK-NEXT: %49 = uitofp i32 %48 to double
+; CHECK-NEXT: %50 = insertelement <2 x double> %47, double %49, i32 1
+; CHECK-NEXT: %51 = extractelement <4 x i32> %12, i32 0
+; CHECK-NEXT: %52 = uitofp i32 %51 to double
+; CHECK-NEXT: %53 = insertelement <2 x double> undef, double %52, i32 0
+; CHECK-NEXT: %54 = extractelement <4 x i32> %12, i32 1
+; CHECK-NEXT: %55 = uitofp i32 %54 to double
+; CHECK-NEXT: %56 = insertelement <2 x double> %53, double %55, i32 1
+; CHECK-NEXT: %57 = extractelement <4 x i32> %12, i32 2
+; CHECK-NEXT: %58 = uitofp i32 %57 to double
+; CHECK-NEXT: %59 = insertelement <2 x double> undef, double %58, i32 0
+; CHECK-NEXT: %60 = extractelement <4 x i32> %12, i32 3
+; CHECK-NEXT: %61 = uitofp i32 %60 to double
+; CHECK-NEXT: %62 = insertelement <2 x double> %59, double %61, i32 1
+; CHECK-NEXT: %63 = extractelement <4 x i32> %13, i32 0
+; CHECK-NEXT: %64 = uitofp i32 %63 to double
+; CHECK-NEXT: %65 = insertelement <2 x double> undef, double %64, i32 0
+; CHECK-NEXT: %66 = extractelement <4 x i32> %13, i32 1
+; CHECK-NEXT: %67 = uitofp i32 %66 to double
+; CHECK-NEXT: %68 = insertelement <2 x double> %65, double %67, i32 1
+; CHECK-NEXT: %69 = extractelement <4 x i32> %13, i32 2
+; CHECK-NEXT: %70 = uitofp i32 %69 to double
+; CHECK-NEXT: %71 = insertelement <2 x double> undef, double %70, i32 0
+; CHECK-NEXT: %72 = extractelement <4 x i32> %13, i32 3
+; CHECK-NEXT: %73 = uitofp i32 %72 to double
+; CHECK-NEXT: %74 = insertelement <2 x double> %71, double %73, i32 1
+; CHECK-NEXT: store <2 x double> %26, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %32, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %38, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %44, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %50, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %56, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %62, <2 x double>* %6, align 16
+; CHECK-NEXT: store <2 x double> %68, <2 x double>* %7, align 16
+; CHECK-NEXT: store <2 x double> %74, <2 x double>* %8, align 16
+; CHECK-NEXT: ret <2 x double> %20
+
+define <20 x double> @uitofp_cast_20xi64_to_20xdouble(<20 x i64>) {
+ %2 = uitofp <20 x i64> %0 to <20 x double>
+ ret <20 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @uitofp_cast_20xi64_to_20xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %20 = uitofp <2 x i64> %9 to <2 x double>
+; CHECK-NEXT: %21 = uitofp <2 x i64> %10 to <2 x double>
+; CHECK-NEXT: %22 = uitofp <2 x i64> %11 to <2 x double>
+; CHECK-NEXT: %23 = uitofp <2 x i64> %12 to <2 x double>
+; CHECK-NEXT: %24 = uitofp <2 x i64> %13 to <2 x double>
+; CHECK-NEXT: %25 = uitofp <2 x i64> %14 to <2 x double>
+; CHECK-NEXT: %26 = uitofp <2 x i64> %15 to <2 x double>
+; CHECK-NEXT: %27 = uitofp <2 x i64> %16 to <2 x double>
+; CHECK-NEXT: %28 = uitofp <2 x i64> %17 to <2 x double>
+; CHECK-NEXT: %29 = uitofp <2 x i64> %18 to <2 x double>
+; CHECK-NEXT: store <2 x double> %21, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %22, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %23, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %24, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %25, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %26, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %27, <2 x double>* %6, align 16
+; CHECK-NEXT: store <2 x double> %28, <2 x double>* %7, align 16
+; CHECK-NEXT: store <2 x double> %29, <2 x double>* %8, align 16
+; CHECK-NEXT: ret <2 x double> %20
+
+define <20 x float> @sitofp_cast_20xi8_to_20xfloat(<20 x i8>) {
+ %2 = sitofp <20 x i8> %0 to <20 x float>
+ ret <20 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_20xi8_to_20xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <16 x i8>, <16 x i8>)
+; CHECK-NEXT: %7 = extractelement <16 x i8> %4, i32 0
+; CHECK-NEXT: %8 = sitofp i8 %7 to float
+; CHECK-NEXT: %9 = insertelement <4 x float> undef, float %8, i32 0
+; CHECK-NEXT: %10 = extractelement <16 x i8> %4, i32 1
+; CHECK-NEXT: %11 = sitofp i8 %10 to float
+; CHECK-NEXT: %12 = insertelement <4 x float> %9, float %11, i32 1
+; CHECK-NEXT: %13 = extractelement <16 x i8> %4, i32 2
+; CHECK-NEXT: %14 = sitofp i8 %13 to float
+; CHECK-NEXT: %15 = insertelement <4 x float> %12, float %14, i32 2
+; CHECK-NEXT: %16 = extractelement <16 x i8> %4, i32 3
+; CHECK-NEXT: %17 = sitofp i8 %16 to float
+; CHECK-NEXT: %18 = insertelement <4 x float> %15, float %17, i32 3
+; CHECK-NEXT: %19 = extractelement <16 x i8> %4, i32 4
+; CHECK-NEXT: %20 = sitofp i8 %19 to float
+; CHECK-NEXT: %21 = insertelement <4 x float> undef, float %20, i32 0
+; CHECK-NEXT: %22 = extractelement <16 x i8> %4, i32 5
+; CHECK-NEXT: %23 = sitofp i8 %22 to float
+; CHECK-NEXT: %24 = insertelement <4 x float> %21, float %23, i32 1
+; CHECK-NEXT: %25 = extractelement <16 x i8> %4, i32 6
+; CHECK-NEXT: %26 = sitofp i8 %25 to float
+; CHECK-NEXT: %27 = insertelement <4 x float> %24, float %26, i32 2
+; CHECK-NEXT: %28 = extractelement <16 x i8> %4, i32 7
+; CHECK-NEXT: %29 = sitofp i8 %28 to float
+; CHECK-NEXT: %30 = insertelement <4 x float> %27, float %29, i32 3
+; CHECK-NEXT: %31 = extractelement <16 x i8> %4, i32 8
+; CHECK-NEXT: %32 = sitofp i8 %31 to float
+; CHECK-NEXT: %33 = insertelement <4 x float> undef, float %32, i32 0
+; CHECK-NEXT: %34 = extractelement <16 x i8> %4, i32 9
+; CHECK-NEXT: %35 = sitofp i8 %34 to float
+; CHECK-NEXT: %36 = insertelement <4 x float> %33, float %35, i32 1
+; CHECK-NEXT: %37 = extractelement <16 x i8> %4, i32 10
+; CHECK-NEXT: %38 = sitofp i8 %37 to float
+; CHECK-NEXT: %39 = insertelement <4 x float> %36, float %38, i32 2
+; CHECK-NEXT: %40 = extractelement <16 x i8> %4, i32 11
+; CHECK-NEXT: %41 = sitofp i8 %40 to float
+; CHECK-NEXT: %42 = insertelement <4 x float> %39, float %41, i32 3
+; CHECK-NEXT: %43 = extractelement <16 x i8> %4, i32 12
+; CHECK-NEXT: %44 = sitofp i8 %43 to float
+; CHECK-NEXT: %45 = insertelement <4 x float> undef, float %44, i32 0
+; CHECK-NEXT: %46 = extractelement <16 x i8> %4, i32 13
+; CHECK-NEXT: %47 = sitofp i8 %46 to float
+; CHECK-NEXT: %48 = insertelement <4 x float> %45, float %47, i32 1
+; CHECK-NEXT: %49 = extractelement <16 x i8> %4, i32 14
+; CHECK-NEXT: %50 = sitofp i8 %49 to float
+; CHECK-NEXT: %51 = insertelement <4 x float> %48, float %50, i32 2
+; CHECK-NEXT: %52 = extractelement <16 x i8> %4, i32 15
+; CHECK-NEXT: %53 = sitofp i8 %52 to float
+; CHECK-NEXT: %54 = insertelement <4 x float> %51, float %53, i32 3
+; CHECK-NEXT: %55 = extractelement <16 x i8> %5, i32 0
+; CHECK-NEXT: %56 = sitofp i8 %55 to float
+; CHECK-NEXT: %57 = insertelement <4 x float> undef, float %56, i32 0
+; CHECK-NEXT: %58 = extractelement <16 x i8> %5, i32 1
+; CHECK-NEXT: %59 = sitofp i8 %58 to float
+; CHECK-NEXT: %60 = insertelement <4 x float> %57, float %59, i32 1
+; CHECK-NEXT: %61 = extractelement <16 x i8> %5, i32 2
+; CHECK-NEXT: %62 = sitofp i8 %61 to float
+; CHECK-NEXT: %63 = insertelement <4 x float> %60, float %62, i32 2
+; CHECK-NEXT: %64 = extractelement <16 x i8> %5, i32 3
+; CHECK-NEXT: %65 = sitofp i8 %64 to float
+; CHECK-NEXT: %66 = insertelement <4 x float> %63, float %65, i32 3
+; CHECK-NEXT: store <4 x float> %30, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %42, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %54, <4 x float>* %2, align 16
+; CHECK-NEXT: store <4 x float> %66, <4 x float>* %3, align 16
+; CHECK-NEXT: ret <4 x float> %18
+
+define <20 x float> @sitofp_cast_20xi16_to_20xfloat(<20 x i16>) {
+ %2 = sitofp <20 x i16> %0 to <20 x float>
+ ret <20 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_20xi16_to_20xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %8 = extractelement <8 x i16> %4, i32 0
+; CHECK-NEXT: %9 = sitofp i16 %8 to float
+; CHECK-NEXT: %10 = insertelement <4 x float> undef, float %9, i32 0
+; CHECK-NEXT: %11 = extractelement <8 x i16> %4, i32 1
+; CHECK-NEXT: %12 = sitofp i16 %11 to float
+; CHECK-NEXT: %13 = insertelement <4 x float> %10, float %12, i32 1
+; CHECK-NEXT: %14 = extractelement <8 x i16> %4, i32 2
+; CHECK-NEXT: %15 = sitofp i16 %14 to float
+; CHECK-NEXT: %16 = insertelement <4 x float> %13, float %15, i32 2
+; CHECK-NEXT: %17 = extractelement <8 x i16> %4, i32 3
+; CHECK-NEXT: %18 = sitofp i16 %17 to float
+; CHECK-NEXT: %19 = insertelement <4 x float> %16, float %18, i32 3
+; CHECK-NEXT: %20 = extractelement <8 x i16> %4, i32 4
+; CHECK-NEXT: %21 = sitofp i16 %20 to float
+; CHECK-NEXT: %22 = insertelement <4 x float> undef, float %21, i32 0
+; CHECK-NEXT: %23 = extractelement <8 x i16> %4, i32 5
+; CHECK-NEXT: %24 = sitofp i16 %23 to float
+; CHECK-NEXT: %25 = insertelement <4 x float> %22, float %24, i32 1
+; CHECK-NEXT: %26 = extractelement <8 x i16> %4, i32 6
+; CHECK-NEXT: %27 = sitofp i16 %26 to float
+; CHECK-NEXT: %28 = insertelement <4 x float> %25, float %27, i32 2
+; CHECK-NEXT: %29 = extractelement <8 x i16> %4, i32 7
+; CHECK-NEXT: %30 = sitofp i16 %29 to float
+; CHECK-NEXT: %31 = insertelement <4 x float> %28, float %30, i32 3
+; CHECK-NEXT: %32 = extractelement <8 x i16> %5, i32 0
+; CHECK-NEXT: %33 = sitofp i16 %32 to float
+; CHECK-NEXT: %34 = insertelement <4 x float> undef, float %33, i32 0
+; CHECK-NEXT: %35 = extractelement <8 x i16> %5, i32 1
+; CHECK-NEXT: %36 = sitofp i16 %35 to float
+; CHECK-NEXT: %37 = insertelement <4 x float> %34, float %36, i32 1
+; CHECK-NEXT: %38 = extractelement <8 x i16> %5, i32 2
+; CHECK-NEXT: %39 = sitofp i16 %38 to float
+; CHECK-NEXT: %40 = insertelement <4 x float> %37, float %39, i32 2
+; CHECK-NEXT: %41 = extractelement <8 x i16> %5, i32 3
+; CHECK-NEXT: %42 = sitofp i16 %41 to float
+; CHECK-NEXT: %43 = insertelement <4 x float> %40, float %42, i32 3
+; CHECK-NEXT: %44 = extractelement <8 x i16> %5, i32 4
+; CHECK-NEXT: %45 = sitofp i16 %44 to float
+; CHECK-NEXT: %46 = insertelement <4 x float> undef, float %45, i32 0
+; CHECK-NEXT: %47 = extractelement <8 x i16> %5, i32 5
+; CHECK-NEXT: %48 = sitofp i16 %47 to float
+; CHECK-NEXT: %49 = insertelement <4 x float> %46, float %48, i32 1
+; CHECK-NEXT: %50 = extractelement <8 x i16> %5, i32 6
+; CHECK-NEXT: %51 = sitofp i16 %50 to float
+; CHECK-NEXT: %52 = insertelement <4 x float> %49, float %51, i32 2
+; CHECK-NEXT: %53 = extractelement <8 x i16> %5, i32 7
+; CHECK-NEXT: %54 = sitofp i16 %53 to float
+; CHECK-NEXT: %55 = insertelement <4 x float> %52, float %54, i32 3
+; CHECK-NEXT: %56 = extractelement <8 x i16> %6, i32 0
+; CHECK-NEXT: %57 = sitofp i16 %56 to float
+; CHECK-NEXT: %58 = insertelement <4 x float> undef, float %57, i32 0
+; CHECK-NEXT: %59 = extractelement <8 x i16> %6, i32 1
+; CHECK-NEXT: %60 = sitofp i16 %59 to float
+; CHECK-NEXT: %61 = insertelement <4 x float> %58, float %60, i32 1
+; CHECK-NEXT: %62 = extractelement <8 x i16> %6, i32 2
+; CHECK-NEXT: %63 = sitofp i16 %62 to float
+; CHECK-NEXT: %64 = insertelement <4 x float> %61, float %63, i32 2
+; CHECK-NEXT: %65 = extractelement <8 x i16> %6, i32 3
+; CHECK-NEXT: %66 = sitofp i16 %65 to float
+; CHECK-NEXT: %67 = insertelement <4 x float> %64, float %66, i32 3
+; CHECK-NEXT: store <4 x float> %31, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %43, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %55, <4 x float>* %2, align 16
+; CHECK-NEXT: store <4 x float> %67, <4 x float>* %3, align 16
+; CHECK-NEXT: ret <4 x float> %19
+
+define <20 x float> @sitofp_cast_20xi32_to_20xfloat(<20 x i32>) {
+ %2 = sitofp <20 x i32> %0 to <20 x float>
+ ret <20 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_20xi32_to_20xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = sitofp <4 x i32> %4 to <4 x float>
+; CHECK-NEXT: %11 = sitofp <4 x i32> %5 to <4 x float>
+; CHECK-NEXT: %12 = sitofp <4 x i32> %6 to <4 x float>
+; CHECK-NEXT: %13 = sitofp <4 x i32> %7 to <4 x float>
+; CHECK-NEXT: %14 = sitofp <4 x i32> %8 to <4 x float>
+; CHECK-NEXT: store <4 x float> %11, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %12, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %13, <4 x float>* %2, align 16
+; CHECK-NEXT: store <4 x float> %14, <4 x float>* %3, align 16
+; CHECK-NEXT: ret <4 x float> %10
+
+define <20 x float> @sitofp_cast_20xi64_to_20xfloat(<20 x i64>) {
+ %2 = sitofp <20 x i64> %0 to <20 x float>
+ ret <20 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @sitofp_cast_20xi64_to_20xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %15 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %16 = sitofp i64 %15 to float
+; CHECK-NEXT: %17 = insertelement <4 x float> undef, float %16, i32 0
+; CHECK-NEXT: %18 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %19 = sitofp i64 %18 to float
+; CHECK-NEXT: %20 = insertelement <4 x float> %17, float %19, i32 1
+; CHECK-NEXT: %21 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %22 = sitofp i64 %21 to float
+; CHECK-NEXT: %23 = insertelement <4 x float> %20, float %22, i32 2
+; CHECK-NEXT: %24 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %25 = sitofp i64 %24 to float
+; CHECK-NEXT: %26 = insertelement <4 x float> %23, float %25, i32 3
+; CHECK-NEXT: %27 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %28 = sitofp i64 %27 to float
+; CHECK-NEXT: %29 = insertelement <4 x float> undef, float %28, i32 0
+; CHECK-NEXT: %30 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %31 = sitofp i64 %30 to float
+; CHECK-NEXT: %32 = insertelement <4 x float> %29, float %31, i32 1
+; CHECK-NEXT: %33 = extractelement <2 x i64> %7, i32 0
+; CHECK-NEXT: %34 = sitofp i64 %33 to float
+; CHECK-NEXT: %35 = insertelement <4 x float> %32, float %34, i32 2
+; CHECK-NEXT: %36 = extractelement <2 x i64> %7, i32 1
+; CHECK-NEXT: %37 = sitofp i64 %36 to float
+; CHECK-NEXT: %38 = insertelement <4 x float> %35, float %37, i32 3
+; CHECK-NEXT: %39 = extractelement <2 x i64> %8, i32 0
+; CHECK-NEXT: %40 = sitofp i64 %39 to float
+; CHECK-NEXT: %41 = insertelement <4 x float> undef, float %40, i32 0
+; CHECK-NEXT: %42 = extractelement <2 x i64> %8, i32 1
+; CHECK-NEXT: %43 = sitofp i64 %42 to float
+; CHECK-NEXT: %44 = insertelement <4 x float> %41, float %43, i32 1
+; CHECK-NEXT: %45 = extractelement <2 x i64> %9, i32 0
+; CHECK-NEXT: %46 = sitofp i64 %45 to float
+; CHECK-NEXT: %47 = insertelement <4 x float> %44, float %46, i32 2
+; CHECK-NEXT: %48 = extractelement <2 x i64> %9, i32 1
+; CHECK-NEXT: %49 = sitofp i64 %48 to float
+; CHECK-NEXT: %50 = insertelement <4 x float> %47, float %49, i32 3
+; CHECK-NEXT: %51 = extractelement <2 x i64> %10, i32 0
+; CHECK-NEXT: %52 = sitofp i64 %51 to float
+; CHECK-NEXT: %53 = insertelement <4 x float> undef, float %52, i32 0
+; CHECK-NEXT: %54 = extractelement <2 x i64> %10, i32 1
+; CHECK-NEXT: %55 = sitofp i64 %54 to float
+; CHECK-NEXT: %56 = insertelement <4 x float> %53, float %55, i32 1
+; CHECK-NEXT: %57 = extractelement <2 x i64> %11, i32 0
+; CHECK-NEXT: %58 = sitofp i64 %57 to float
+; CHECK-NEXT: %59 = insertelement <4 x float> %56, float %58, i32 2
+; CHECK-NEXT: %60 = extractelement <2 x i64> %11, i32 1
+; CHECK-NEXT: %61 = sitofp i64 %60 to float
+; CHECK-NEXT: %62 = insertelement <4 x float> %59, float %61, i32 3
+; CHECK-NEXT: %63 = extractelement <2 x i64> %12, i32 0
+; CHECK-NEXT: %64 = sitofp i64 %63 to float
+; CHECK-NEXT: %65 = insertelement <4 x float> undef, float %64, i32 0
+; CHECK-NEXT: %66 = extractelement <2 x i64> %12, i32 1
+; CHECK-NEXT: %67 = sitofp i64 %66 to float
+; CHECK-NEXT: %68 = insertelement <4 x float> %65, float %67, i32 1
+; CHECK-NEXT: %69 = extractelement <2 x i64> %13, i32 0
+; CHECK-NEXT: %70 = sitofp i64 %69 to float
+; CHECK-NEXT: %71 = insertelement <4 x float> %68, float %70, i32 2
+; CHECK-NEXT: %72 = extractelement <2 x i64> %13, i32 1
+; CHECK-NEXT: %73 = sitofp i64 %72 to float
+; CHECK-NEXT: %74 = insertelement <4 x float> %71, float %73, i32 3
+; CHECK-NEXT: store <4 x float> %38, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %50, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %62, <4 x float>* %2, align 16
+; CHECK-NEXT: store <4 x float> %74, <4 x float>* %3, align 16
+; CHECK-NEXT: ret <4 x float> %26
+
+define <20 x double> @sitofp_cast_20xi8_to_20xdouble(<20 x i8>) {
+ %2 = sitofp <20 x i8> %0 to <20 x double>
+ ret <20 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_20xi8_to_20xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <16 x i8>, <16 x i8>)
+; CHECK-NEXT: %12 = extractelement <16 x i8> %9, i32 0
+; CHECK-NEXT: %13 = sitofp i8 %12 to double
+; CHECK-NEXT: %14 = insertelement <2 x double> undef, double %13, i32 0
+; CHECK-NEXT: %15 = extractelement <16 x i8> %9, i32 1
+; CHECK-NEXT: %16 = sitofp i8 %15 to double
+; CHECK-NEXT: %17 = insertelement <2 x double> %14, double %16, i32 1
+; CHECK-NEXT: %18 = extractelement <16 x i8> %9, i32 2
+; CHECK-NEXT: %19 = sitofp i8 %18 to double
+; CHECK-NEXT: %20 = insertelement <2 x double> undef, double %19, i32 0
+; CHECK-NEXT: %21 = extractelement <16 x i8> %9, i32 3
+; CHECK-NEXT: %22 = sitofp i8 %21 to double
+; CHECK-NEXT: %23 = insertelement <2 x double> %20, double %22, i32 1
+; CHECK-NEXT: %24 = extractelement <16 x i8> %9, i32 4
+; CHECK-NEXT: %25 = sitofp i8 %24 to double
+; CHECK-NEXT: %26 = insertelement <2 x double> undef, double %25, i32 0
+; CHECK-NEXT: %27 = extractelement <16 x i8> %9, i32 5
+; CHECK-NEXT: %28 = sitofp i8 %27 to double
+; CHECK-NEXT: %29 = insertelement <2 x double> %26, double %28, i32 1
+; CHECK-NEXT: %30 = extractelement <16 x i8> %9, i32 6
+; CHECK-NEXT: %31 = sitofp i8 %30 to double
+; CHECK-NEXT: %32 = insertelement <2 x double> undef, double %31, i32 0
+; CHECK-NEXT: %33 = extractelement <16 x i8> %9, i32 7
+; CHECK-NEXT: %34 = sitofp i8 %33 to double
+; CHECK-NEXT: %35 = insertelement <2 x double> %32, double %34, i32 1
+; CHECK-NEXT: %36 = extractelement <16 x i8> %9, i32 8
+; CHECK-NEXT: %37 = sitofp i8 %36 to double
+; CHECK-NEXT: %38 = insertelement <2 x double> undef, double %37, i32 0
+; CHECK-NEXT: %39 = extractelement <16 x i8> %9, i32 9
+; CHECK-NEXT: %40 = sitofp i8 %39 to double
+; CHECK-NEXT: %41 = insertelement <2 x double> %38, double %40, i32 1
+; CHECK-NEXT: %42 = extractelement <16 x i8> %9, i32 10
+; CHECK-NEXT: %43 = sitofp i8 %42 to double
+; CHECK-NEXT: %44 = insertelement <2 x double> undef, double %43, i32 0
+; CHECK-NEXT: %45 = extractelement <16 x i8> %9, i32 11
+; CHECK-NEXT: %46 = sitofp i8 %45 to double
+; CHECK-NEXT: %47 = insertelement <2 x double> %44, double %46, i32 1
+; CHECK-NEXT: %48 = extractelement <16 x i8> %9, i32 12
+; CHECK-NEXT: %49 = sitofp i8 %48 to double
+; CHECK-NEXT: %50 = insertelement <2 x double> undef, double %49, i32 0
+; CHECK-NEXT: %51 = extractelement <16 x i8> %9, i32 13
+; CHECK-NEXT: %52 = sitofp i8 %51 to double
+; CHECK-NEXT: %53 = insertelement <2 x double> %50, double %52, i32 1
+; CHECK-NEXT: %54 = extractelement <16 x i8> %9, i32 14
+; CHECK-NEXT: %55 = sitofp i8 %54 to double
+; CHECK-NEXT: %56 = insertelement <2 x double> undef, double %55, i32 0
+; CHECK-NEXT: %57 = extractelement <16 x i8> %9, i32 15
+; CHECK-NEXT: %58 = sitofp i8 %57 to double
+; CHECK-NEXT: %59 = insertelement <2 x double> %56, double %58, i32 1
+; CHECK-NEXT: %60 = extractelement <16 x i8> %10, i32 0
+; CHECK-NEXT: %61 = sitofp i8 %60 to double
+; CHECK-NEXT: %62 = insertelement <2 x double> undef, double %61, i32 0
+; CHECK-NEXT: %63 = extractelement <16 x i8> %10, i32 1
+; CHECK-NEXT: %64 = sitofp i8 %63 to double
+; CHECK-NEXT: %65 = insertelement <2 x double> %62, double %64, i32 1
+; CHECK-NEXT: %66 = extractelement <16 x i8> %10, i32 2
+; CHECK-NEXT: %67 = sitofp i8 %66 to double
+; CHECK-NEXT: %68 = insertelement <2 x double> undef, double %67, i32 0
+; CHECK-NEXT: %69 = extractelement <16 x i8> %10, i32 3
+; CHECK-NEXT: %70 = sitofp i8 %69 to double
+; CHECK-NEXT: %71 = insertelement <2 x double> %68, double %70, i32 1
+; CHECK-NEXT: store <2 x double> %23, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %29, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %35, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %41, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %47, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %53, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %59, <2 x double>* %6, align 16
+; CHECK-NEXT: store <2 x double> %65, <2 x double>* %7, align 16
+; CHECK-NEXT: store <2 x double> %71, <2 x double>* %8, align 16
+; CHECK-NEXT: ret <2 x double> %17
+
+define <20 x double> @sitofp_cast_20xi16_to_20xdouble(<20 x i16>) {
+ %2 = sitofp <20 x i16> %0 to <20 x double>
+ ret <20 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_20xi16_to_20xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %13 = extractelement <8 x i16> %9, i32 0
+; CHECK-NEXT: %14 = sitofp i16 %13 to double
+; CHECK-NEXT: %15 = insertelement <2 x double> undef, double %14, i32 0
+; CHECK-NEXT: %16 = extractelement <8 x i16> %9, i32 1
+; CHECK-NEXT: %17 = sitofp i16 %16 to double
+; CHECK-NEXT: %18 = insertelement <2 x double> %15, double %17, i32 1
+; CHECK-NEXT: %19 = extractelement <8 x i16> %9, i32 2
+; CHECK-NEXT: %20 = sitofp i16 %19 to double
+; CHECK-NEXT: %21 = insertelement <2 x double> undef, double %20, i32 0
+; CHECK-NEXT: %22 = extractelement <8 x i16> %9, i32 3
+; CHECK-NEXT: %23 = sitofp i16 %22 to double
+; CHECK-NEXT: %24 = insertelement <2 x double> %21, double %23, i32 1
+; CHECK-NEXT: %25 = extractelement <8 x i16> %9, i32 4
+; CHECK-NEXT: %26 = sitofp i16 %25 to double
+; CHECK-NEXT: %27 = insertelement <2 x double> undef, double %26, i32 0
+; CHECK-NEXT: %28 = extractelement <8 x i16> %9, i32 5
+; CHECK-NEXT: %29 = sitofp i16 %28 to double
+; CHECK-NEXT: %30 = insertelement <2 x double> %27, double %29, i32 1
+; CHECK-NEXT: %31 = extractelement <8 x i16> %9, i32 6
+; CHECK-NEXT: %32 = sitofp i16 %31 to double
+; CHECK-NEXT: %33 = insertelement <2 x double> undef, double %32, i32 0
+; CHECK-NEXT: %34 = extractelement <8 x i16> %9, i32 7
+; CHECK-NEXT: %35 = sitofp i16 %34 to double
+; CHECK-NEXT: %36 = insertelement <2 x double> %33, double %35, i32 1
+; CHECK-NEXT: %37 = extractelement <8 x i16> %10, i32 0
+; CHECK-NEXT: %38 = sitofp i16 %37 to double
+; CHECK-NEXT: %39 = insertelement <2 x double> undef, double %38, i32 0
+; CHECK-NEXT: %40 = extractelement <8 x i16> %10, i32 1
+; CHECK-NEXT: %41 = sitofp i16 %40 to double
+; CHECK-NEXT: %42 = insertelement <2 x double> %39, double %41, i32 1
+; CHECK-NEXT: %43 = extractelement <8 x i16> %10, i32 2
+; CHECK-NEXT: %44 = sitofp i16 %43 to double
+; CHECK-NEXT: %45 = insertelement <2 x double> undef, double %44, i32 0
+; CHECK-NEXT: %46 = extractelement <8 x i16> %10, i32 3
+; CHECK-NEXT: %47 = sitofp i16 %46 to double
+; CHECK-NEXT: %48 = insertelement <2 x double> %45, double %47, i32 1
+; CHECK-NEXT: %49 = extractelement <8 x i16> %10, i32 4
+; CHECK-NEXT: %50 = sitofp i16 %49 to double
+; CHECK-NEXT: %51 = insertelement <2 x double> undef, double %50, i32 0
+; CHECK-NEXT: %52 = extractelement <8 x i16> %10, i32 5
+; CHECK-NEXT: %53 = sitofp i16 %52 to double
+; CHECK-NEXT: %54 = insertelement <2 x double> %51, double %53, i32 1
+; CHECK-NEXT: %55 = extractelement <8 x i16> %10, i32 6
+; CHECK-NEXT: %56 = sitofp i16 %55 to double
+; CHECK-NEXT: %57 = insertelement <2 x double> undef, double %56, i32 0
+; CHECK-NEXT: %58 = extractelement <8 x i16> %10, i32 7
+; CHECK-NEXT: %59 = sitofp i16 %58 to double
+; CHECK-NEXT: %60 = insertelement <2 x double> %57, double %59, i32 1
+; CHECK-NEXT: %61 = extractelement <8 x i16> %11, i32 0
+; CHECK-NEXT: %62 = sitofp i16 %61 to double
+; CHECK-NEXT: %63 = insertelement <2 x double> undef, double %62, i32 0
+; CHECK-NEXT: %64 = extractelement <8 x i16> %11, i32 1
+; CHECK-NEXT: %65 = sitofp i16 %64 to double
+; CHECK-NEXT: %66 = insertelement <2 x double> %63, double %65, i32 1
+; CHECK-NEXT: %67 = extractelement <8 x i16> %11, i32 2
+; CHECK-NEXT: %68 = sitofp i16 %67 to double
+; CHECK-NEXT: %69 = insertelement <2 x double> undef, double %68, i32 0
+; CHECK-NEXT: %70 = extractelement <8 x i16> %11, i32 3
+; CHECK-NEXT: %71 = sitofp i16 %70 to double
+; CHECK-NEXT: %72 = insertelement <2 x double> %69, double %71, i32 1
+; CHECK-NEXT: store <2 x double> %24, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %30, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %36, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %42, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %48, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %54, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %60, <2 x double>* %6, align 16
+; CHECK-NEXT: store <2 x double> %66, <2 x double>* %7, align 16
+; CHECK-NEXT: store <2 x double> %72, <2 x double>* %8, align 16
+; CHECK-NEXT: ret <2 x double> %18
+
+define <20 x double> @sitofp_cast_20xi32_to_20xdouble(<20 x i32>) {
+ %2 = sitofp <20 x i32> %0 to <20 x double>
+ ret <20 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_20xi32_to_20xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = extractelement <4 x i32> %9, i32 0
+; CHECK-NEXT: %16 = sitofp i32 %15 to double
+; CHECK-NEXT: %17 = insertelement <2 x double> undef, double %16, i32 0
+; CHECK-NEXT: %18 = extractelement <4 x i32> %9, i32 1
+; CHECK-NEXT: %19 = sitofp i32 %18 to double
+; CHECK-NEXT: %20 = insertelement <2 x double> %17, double %19, i32 1
+; CHECK-NEXT: %21 = extractelement <4 x i32> %9, i32 2
+; CHECK-NEXT: %22 = sitofp i32 %21 to double
+; CHECK-NEXT: %23 = insertelement <2 x double> undef, double %22, i32 0
+; CHECK-NEXT: %24 = extractelement <4 x i32> %9, i32 3
+; CHECK-NEXT: %25 = sitofp i32 %24 to double
+; CHECK-NEXT: %26 = insertelement <2 x double> %23, double %25, i32 1
+; CHECK-NEXT: %27 = extractelement <4 x i32> %10, i32 0
+; CHECK-NEXT: %28 = sitofp i32 %27 to double
+; CHECK-NEXT: %29 = insertelement <2 x double> undef, double %28, i32 0
+; CHECK-NEXT: %30 = extractelement <4 x i32> %10, i32 1
+; CHECK-NEXT: %31 = sitofp i32 %30 to double
+; CHECK-NEXT: %32 = insertelement <2 x double> %29, double %31, i32 1
+; CHECK-NEXT: %33 = extractelement <4 x i32> %10, i32 2
+; CHECK-NEXT: %34 = sitofp i32 %33 to double
+; CHECK-NEXT: %35 = insertelement <2 x double> undef, double %34, i32 0
+; CHECK-NEXT: %36 = extractelement <4 x i32> %10, i32 3
+; CHECK-NEXT: %37 = sitofp i32 %36 to double
+; CHECK-NEXT: %38 = insertelement <2 x double> %35, double %37, i32 1
+; CHECK-NEXT: %39 = extractelement <4 x i32> %11, i32 0
+; CHECK-NEXT: %40 = sitofp i32 %39 to double
+; CHECK-NEXT: %41 = insertelement <2 x double> undef, double %40, i32 0
+; CHECK-NEXT: %42 = extractelement <4 x i32> %11, i32 1
+; CHECK-NEXT: %43 = sitofp i32 %42 to double
+; CHECK-NEXT: %44 = insertelement <2 x double> %41, double %43, i32 1
+; CHECK-NEXT: %45 = extractelement <4 x i32> %11, i32 2
+; CHECK-NEXT: %46 = sitofp i32 %45 to double
+; CHECK-NEXT: %47 = insertelement <2 x double> undef, double %46, i32 0
+; CHECK-NEXT: %48 = extractelement <4 x i32> %11, i32 3
+; CHECK-NEXT: %49 = sitofp i32 %48 to double
+; CHECK-NEXT: %50 = insertelement <2 x double> %47, double %49, i32 1
+; CHECK-NEXT: %51 = extractelement <4 x i32> %12, i32 0
+; CHECK-NEXT: %52 = sitofp i32 %51 to double
+; CHECK-NEXT: %53 = insertelement <2 x double> undef, double %52, i32 0
+; CHECK-NEXT: %54 = extractelement <4 x i32> %12, i32 1
+; CHECK-NEXT: %55 = sitofp i32 %54 to double
+; CHECK-NEXT: %56 = insertelement <2 x double> %53, double %55, i32 1
+; CHECK-NEXT: %57 = extractelement <4 x i32> %12, i32 2
+; CHECK-NEXT: %58 = sitofp i32 %57 to double
+; CHECK-NEXT: %59 = insertelement <2 x double> undef, double %58, i32 0
+; CHECK-NEXT: %60 = extractelement <4 x i32> %12, i32 3
+; CHECK-NEXT: %61 = sitofp i32 %60 to double
+; CHECK-NEXT: %62 = insertelement <2 x double> %59, double %61, i32 1
+; CHECK-NEXT: %63 = extractelement <4 x i32> %13, i32 0
+; CHECK-NEXT: %64 = sitofp i32 %63 to double
+; CHECK-NEXT: %65 = insertelement <2 x double> undef, double %64, i32 0
+; CHECK-NEXT: %66 = extractelement <4 x i32> %13, i32 1
+; CHECK-NEXT: %67 = sitofp i32 %66 to double
+; CHECK-NEXT: %68 = insertelement <2 x double> %65, double %67, i32 1
+; CHECK-NEXT: %69 = extractelement <4 x i32> %13, i32 2
+; CHECK-NEXT: %70 = sitofp i32 %69 to double
+; CHECK-NEXT: %71 = insertelement <2 x double> undef, double %70, i32 0
+; CHECK-NEXT: %72 = extractelement <4 x i32> %13, i32 3
+; CHECK-NEXT: %73 = sitofp i32 %72 to double
+; CHECK-NEXT: %74 = insertelement <2 x double> %71, double %73, i32 1
+; CHECK-NEXT: store <2 x double> %26, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %32, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %38, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %44, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %50, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %56, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %62, <2 x double>* %6, align 16
+; CHECK-NEXT: store <2 x double> %68, <2 x double>* %7, align 16
+; CHECK-NEXT: store <2 x double> %74, <2 x double>* %8, align 16
+; CHECK-NEXT: ret <2 x double> %20
+
+define <20 x double> @sitofp_cast_20xi64_to_20xdouble(<20 x i64>) {
+ %2 = sitofp <20 x i64> %0 to <20 x double>
+ ret <20 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @sitofp_cast_20xi64_to_20xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %20 = sitofp <2 x i64> %9 to <2 x double>
+; CHECK-NEXT: %21 = sitofp <2 x i64> %10 to <2 x double>
+; CHECK-NEXT: %22 = sitofp <2 x i64> %11 to <2 x double>
+; CHECK-NEXT: %23 = sitofp <2 x i64> %12 to <2 x double>
+; CHECK-NEXT: %24 = sitofp <2 x i64> %13 to <2 x double>
+; CHECK-NEXT: %25 = sitofp <2 x i64> %14 to <2 x double>
+; CHECK-NEXT: %26 = sitofp <2 x i64> %15 to <2 x double>
+; CHECK-NEXT: %27 = sitofp <2 x i64> %16 to <2 x double>
+; CHECK-NEXT: %28 = sitofp <2 x i64> %17 to <2 x double>
+; CHECK-NEXT: %29 = sitofp <2 x i64> %18 to <2 x double>
+; CHECK-NEXT: store <2 x double> %21, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %22, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %23, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %24, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %25, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %26, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %27, <2 x double>* %6, align 16
+; CHECK-NEXT: store <2 x double> %28, <2 x double>* %7, align 16
+; CHECK-NEXT: store <2 x double> %29, <2 x double>* %8, align 16
+; CHECK-NEXT: ret <2 x double> %20
+
+define <2 x i32> @ptrtoint_cast_2xi8ptr_to_2xi32(<2 x i8*>) {
+ %2 = ptrtoint <2 x i8*> %0 to <2 x i32>
+ ret <2 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @ptrtoint_cast_2xi8ptr_to_2xi32(<4 x i8*>)
+; CHECK-NEXT: %2 = ptrtoint <4 x i8*> %0 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+define <2 x i8*> @inttoptr_cast_2xi32_to_2xi8ptr(<2 x i32>) {
+ %2 = inttoptr <2 x i32> %0 to <2 x i8*>
+ ret <2 x i8*> %2
+}
+; CHECK-LABEL: define <4 x i8*> @inttoptr_cast_2xi32_to_2xi8ptr(<4 x i32>)
+; CHECK-NEXT: %2 = inttoptr <4 x i32> %0 to <4 x i8*>
+; CHECK-NEXT: ret <4 x i8*> %2
+
+define <2 x float> @bitcast_cast_2xi32_to_2xfloat(<2 x i32>) {
+ %2 = bitcast <2 x i32> %0 to <2 x float>
+ ret <2 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @bitcast_cast_2xi32_to_2xfloat(<4 x i32>)
+; CHECK-NEXT: %2 = bitcast <4 x i32> %0 to <4 x float>
+; CHECK-NEXT: ret <4 x float> %2
+
+define <2 x i32> @bitcast_cast_2xfloat_to_2xi32(<2 x float>) {
+ %2 = bitcast <2 x float> %0 to <2 x i32>
+ ret <2 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @bitcast_cast_2xfloat_to_2xi32(<4 x float>)
+; CHECK-NEXT: %2 = bitcast <4 x float> %0 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+define <2 x double> @bitcast_cast_2xi64_to_2xdouble(<2 x i64>) {
+ %2 = bitcast <2 x i64> %0 to <2 x double>
+ ret <2 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @bitcast_cast_2xi64_to_2xdouble(<2 x i64>)
+; CHECK-NEXT: %2 = bitcast <2 x i64> %0 to <2 x double>
+; CHECK-NEXT: ret <2 x double> %2
+
+define <2 x i64> @bitcast_cast_2xdouble_to_2xi64(<2 x double>) {
+ %2 = bitcast <2 x double> %0 to <2 x i64>
+ ret <2 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @bitcast_cast_2xdouble_to_2xi64(<2 x double>)
+; CHECK-NEXT: %2 = bitcast <2 x double> %0 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+define <4 x i32> @ptrtoint_cast_4xi8ptr_to_4xi32(<4 x i8*>) {
+ %2 = ptrtoint <4 x i8*> %0 to <4 x i32>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @ptrtoint_cast_4xi8ptr_to_4xi32(<4 x i8*>)
+; CHECK-NEXT: %2 = ptrtoint <4 x i8*> %0 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+define <4 x i8*> @inttoptr_cast_4xi32_to_4xi8ptr(<4 x i32>) {
+ %2 = inttoptr <4 x i32> %0 to <4 x i8*>
+ ret <4 x i8*> %2
+}
+; CHECK-LABEL: define <4 x i8*> @inttoptr_cast_4xi32_to_4xi8ptr(<4 x i32>)
+; CHECK-NEXT: %2 = inttoptr <4 x i32> %0 to <4 x i8*>
+; CHECK-NEXT: ret <4 x i8*> %2
+
+define <4 x float> @bitcast_cast_4xi32_to_4xfloat(<4 x i32>) {
+ %2 = bitcast <4 x i32> %0 to <4 x float>
+ ret <4 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @bitcast_cast_4xi32_to_4xfloat(<4 x i32>)
+; CHECK-NEXT: %2 = bitcast <4 x i32> %0 to <4 x float>
+; CHECK-NEXT: ret <4 x float> %2
+
+define <4 x i32> @bitcast_cast_4xfloat_to_4xi32(<4 x float>) {
+ %2 = bitcast <4 x float> %0 to <4 x i32>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @bitcast_cast_4xfloat_to_4xi32(<4 x float>)
+; CHECK-NEXT: %2 = bitcast <4 x float> %0 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+define <4 x double> @bitcast_cast_4xi64_to_4xdouble(<4 x i64>) {
+ %2 = bitcast <4 x i64> %0 to <4 x double>
+ ret <4 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @bitcast_cast_4xi64_to_4xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %4 = bitcast <2 x i64> %1 to <2 x double>
+; CHECK-NEXT: %5 = bitcast <2 x i64> %2 to <2 x double>
+; CHECK-NEXT: store <2 x double> %5, <2 x double>* %0, align 16
+; CHECK-NEXT: ret <2 x double> %4
+
+define <4 x i64> @bitcast_cast_4xdouble_to_4xi64(<4 x double>) {
+ %2 = bitcast <4 x double> %0 to <4 x i64>
+ ret <4 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @bitcast_cast_4xdouble_to_4xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>)
+; CHECK-NEXT: %4 = bitcast <2 x double> %1 to <2 x i64>
+; CHECK-NEXT: %5 = bitcast <2 x double> %2 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %5, <2 x i64>* %0, align 16
+; CHECK-NEXT: ret <2 x i64> %4
+
+define <6 x i32> @ptrtoint_cast_6xi8ptr_to_6xi32(<6 x i8*>) {
+ %2 = ptrtoint <6 x i8*> %0 to <6 x i32>
+ ret <6 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @ptrtoint_cast_6xi8ptr_to_6xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i8*>, <4 x i8*>)
+; CHECK-NEXT: %4 = ptrtoint <4 x i8*> %1 to <4 x i32>
+; CHECK-NEXT: %5 = ptrtoint <4 x i8*> %2 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x i8*> @inttoptr_cast_6xi32_to_6xi8ptr(<6 x i32>) {
+ %2 = inttoptr <6 x i32> %0 to <6 x i8*>
+ ret <6 x i8*> %2
+}
+; CHECK-LABEL: define <4 x i8*> @inttoptr_cast_6xi32_to_6xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = inttoptr <4 x i32> %1 to <4 x i8*>
+; CHECK-NEXT: %5 = inttoptr <4 x i32> %2 to <4 x i8*>
+; CHECK-NEXT: store <4 x i8*> %5, <4 x i8*>* %0, align 16
+; CHECK-NEXT: ret <4 x i8*> %4
+
+define <6 x float> @bitcast_cast_6xi32_to_6xfloat(<6 x i32>) {
+ %2 = bitcast <6 x i32> %0 to <6 x float>
+ ret <6 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @bitcast_cast_6xi32_to_6xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = bitcast <4 x i32> %1 to <4 x float>
+; CHECK-NEXT: %5 = bitcast <4 x i32> %2 to <4 x float>
+; CHECK-NEXT: store <4 x float> %5, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %4
+
+define <6 x i32> @bitcast_cast_6xfloat_to_6xi32(<6 x float>) {
+ %2 = bitcast <6 x float> %0 to <6 x i32>
+ ret <6 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @bitcast_cast_6xfloat_to_6xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>)
+; CHECK-NEXT: %4 = bitcast <4 x float> %1 to <4 x i32>
+; CHECK-NEXT: %5 = bitcast <4 x float> %2 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <6 x double> @bitcast_cast_6xi64_to_6xdouble(<6 x i64>) {
+ %2 = bitcast <6 x i64> %0 to <6 x double>
+ ret <6 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @bitcast_cast_6xi64_to_6xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %6 = bitcast <2 x i64> %2 to <2 x double>
+; CHECK-NEXT: %7 = bitcast <2 x i64> %3 to <2 x double>
+; CHECK-NEXT: %8 = bitcast <2 x i64> %4 to <2 x double>
+; CHECK-NEXT: store <2 x double> %7, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %8, <2 x double>* %1, align 16
+; CHECK-NEXT: ret <2 x double> %6
+
+define <6 x i64> @bitcast_cast_6xdouble_to_6xi64(<6 x double>) {
+ %2 = bitcast <6 x double> %0 to <6 x i64>
+ ret <6 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @bitcast_cast_6xdouble_to_6xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %6 = bitcast <2 x double> %2 to <2 x i64>
+; CHECK-NEXT: %7 = bitcast <2 x double> %3 to <2 x i64>
+; CHECK-NEXT: %8 = bitcast <2 x double> %4 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %7, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %8, <2 x i64>* %1, align 16
+; CHECK-NEXT: ret <2 x i64> %6
+
+define <8 x i32> @ptrtoint_cast_8xi8ptr_to_8xi32(<8 x i8*>) {
+ %2 = ptrtoint <8 x i8*> %0 to <8 x i32>
+ ret <8 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @ptrtoint_cast_8xi8ptr_to_8xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i8*>, <4 x i8*>)
+; CHECK-NEXT: %4 = ptrtoint <4 x i8*> %1 to <4 x i32>
+; CHECK-NEXT: %5 = ptrtoint <4 x i8*> %2 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x i8*> @inttoptr_cast_8xi32_to_8xi8ptr(<8 x i32>) {
+ %2 = inttoptr <8 x i32> %0 to <8 x i8*>
+ ret <8 x i8*> %2
+}
+; CHECK-LABEL: define <4 x i8*> @inttoptr_cast_8xi32_to_8xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = inttoptr <4 x i32> %1 to <4 x i8*>
+; CHECK-NEXT: %5 = inttoptr <4 x i32> %2 to <4 x i8*>
+; CHECK-NEXT: store <4 x i8*> %5, <4 x i8*>* %0, align 16
+; CHECK-NEXT: ret <4 x i8*> %4
+
+define <8 x float> @bitcast_cast_8xi32_to_8xfloat(<8 x i32>) {
+ %2 = bitcast <8 x i32> %0 to <8 x float>
+ ret <8 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @bitcast_cast_8xi32_to_8xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = bitcast <4 x i32> %1 to <4 x float>
+; CHECK-NEXT: %5 = bitcast <4 x i32> %2 to <4 x float>
+; CHECK-NEXT: store <4 x float> %5, <4 x float>* %0, align 16
+; CHECK-NEXT: ret <4 x float> %4
+
+define <8 x i32> @bitcast_cast_8xfloat_to_8xi32(<8 x float>) {
+ %2 = bitcast <8 x float> %0 to <8 x i32>
+ ret <8 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @bitcast_cast_8xfloat_to_8xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>)
+; CHECK-NEXT: %4 = bitcast <4 x float> %1 to <4 x i32>
+; CHECK-NEXT: %5 = bitcast <4 x float> %2 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %5, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %4
+
+define <8 x double> @bitcast_cast_8xi64_to_8xdouble(<8 x i64>) {
+ %2 = bitcast <8 x i64> %0 to <8 x double>
+ ret <8 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @bitcast_cast_8xi64_to_8xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %8 = bitcast <2 x i64> %3 to <2 x double>
+; CHECK-NEXT: %9 = bitcast <2 x i64> %4 to <2 x double>
+; CHECK-NEXT: %10 = bitcast <2 x i64> %5 to <2 x double>
+; CHECK-NEXT: %11 = bitcast <2 x i64> %6 to <2 x double>
+; CHECK-NEXT: store <2 x double> %9, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %10, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %11, <2 x double>* %2, align 16
+; CHECK-NEXT: ret <2 x double> %8
+
+define <8 x i64> @bitcast_cast_8xdouble_to_8xi64(<8 x double>) {
+ %2 = bitcast <8 x double> %0 to <8 x i64>
+ ret <8 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @bitcast_cast_8xdouble_to_8xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %8 = bitcast <2 x double> %3 to <2 x i64>
+; CHECK-NEXT: %9 = bitcast <2 x double> %4 to <2 x i64>
+; CHECK-NEXT: %10 = bitcast <2 x double> %5 to <2 x i64>
+; CHECK-NEXT: %11 = bitcast <2 x double> %6 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %9, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %10, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %11, <2 x i64>* %2, align 16
+; CHECK-NEXT: ret <2 x i64> %8
+
+define <12 x i32> @ptrtoint_cast_12xi8ptr_to_12xi32(<12 x i8*>) {
+ %2 = ptrtoint <12 x i8*> %0 to <12 x i32>
+ ret <12 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @ptrtoint_cast_12xi8ptr_to_12xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i8*>, <4 x i8*>, <4 x i8*>)
+; CHECK-NEXT: %6 = ptrtoint <4 x i8*> %2 to <4 x i32>
+; CHECK-NEXT: %7 = ptrtoint <4 x i8*> %3 to <4 x i32>
+; CHECK-NEXT: %8 = ptrtoint <4 x i8*> %4 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <12 x i8*> @inttoptr_cast_12xi32_to_12xi8ptr(<12 x i32>) {
+ %2 = inttoptr <12 x i32> %0 to <12 x i8*>
+ ret <12 x i8*> %2
+}
+; CHECK-LABEL: define <4 x i8*> @inttoptr_cast_12xi32_to_12xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = inttoptr <4 x i32> %2 to <4 x i8*>
+; CHECK-NEXT: %7 = inttoptr <4 x i32> %3 to <4 x i8*>
+; CHECK-NEXT: %8 = inttoptr <4 x i32> %4 to <4 x i8*>
+; CHECK-NEXT: store <4 x i8*> %7, <4 x i8*>* %0, align 16
+; CHECK-NEXT: store <4 x i8*> %8, <4 x i8*>* %1, align 16
+; CHECK-NEXT: ret <4 x i8*> %6
+
+define <12 x float> @bitcast_cast_12xi32_to_12xfloat(<12 x i32>) {
+ %2 = bitcast <12 x i32> %0 to <12 x float>
+ ret <12 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @bitcast_cast_12xi32_to_12xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = bitcast <4 x i32> %2 to <4 x float>
+; CHECK-NEXT: %7 = bitcast <4 x i32> %3 to <4 x float>
+; CHECK-NEXT: %8 = bitcast <4 x i32> %4 to <4 x float>
+; CHECK-NEXT: store <4 x float> %7, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %8, <4 x float>* %1, align 16
+; CHECK-NEXT: ret <4 x float> %6
+
+define <12 x i32> @bitcast_cast_12xfloat_to_12xi32(<12 x float>) {
+ %2 = bitcast <12 x float> %0 to <12 x i32>
+ ret <12 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @bitcast_cast_12xfloat_to_12xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %6 = bitcast <4 x float> %2 to <4 x i32>
+; CHECK-NEXT: %7 = bitcast <4 x float> %3 to <4 x i32>
+; CHECK-NEXT: %8 = bitcast <4 x float> %4 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %7, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %8, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %6
+
+define <12 x double> @bitcast_cast_12xi64_to_12xdouble(<12 x i64>) {
+ %2 = bitcast <12 x i64> %0 to <12 x double>
+ ret <12 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @bitcast_cast_12xi64_to_12xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %12 = bitcast <2 x i64> %5 to <2 x double>
+; CHECK-NEXT: %13 = bitcast <2 x i64> %6 to <2 x double>
+; CHECK-NEXT: %14 = bitcast <2 x i64> %7 to <2 x double>
+; CHECK-NEXT: %15 = bitcast <2 x i64> %8 to <2 x double>
+; CHECK-NEXT: %16 = bitcast <2 x i64> %9 to <2 x double>
+; CHECK-NEXT: %17 = bitcast <2 x i64> %10 to <2 x double>
+; CHECK-NEXT: store <2 x double> %13, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %14, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %15, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %16, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %17, <2 x double>* %4, align 16
+; CHECK-NEXT: ret <2 x double> %12
+
+define <12 x i64> @bitcast_cast_12xdouble_to_12xi64(<12 x double>) {
+ %2 = bitcast <12 x double> %0 to <12 x i64>
+ ret <12 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @bitcast_cast_12xdouble_to_12xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %12 = bitcast <2 x double> %5 to <2 x i64>
+; CHECK-NEXT: %13 = bitcast <2 x double> %6 to <2 x i64>
+; CHECK-NEXT: %14 = bitcast <2 x double> %7 to <2 x i64>
+; CHECK-NEXT: %15 = bitcast <2 x double> %8 to <2 x i64>
+; CHECK-NEXT: %16 = bitcast <2 x double> %9 to <2 x i64>
+; CHECK-NEXT: %17 = bitcast <2 x double> %10 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %13, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %14, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %15, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %16, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %17, <2 x i64>* %4, align 16
+; CHECK-NEXT: ret <2 x i64> %12
+
+define <16 x i32> @ptrtoint_cast_16xi8ptr_to_16xi32(<16 x i8*>) {
+ %2 = ptrtoint <16 x i8*> %0 to <16 x i32>
+ ret <16 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @ptrtoint_cast_16xi8ptr_to_16xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>)
+; CHECK-NEXT: %8 = ptrtoint <4 x i8*> %3 to <4 x i32>
+; CHECK-NEXT: %9 = ptrtoint <4 x i8*> %4 to <4 x i32>
+; CHECK-NEXT: %10 = ptrtoint <4 x i8*> %5 to <4 x i32>
+; CHECK-NEXT: %11 = ptrtoint <4 x i8*> %6 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x i8*> @inttoptr_cast_16xi32_to_16xi8ptr(<16 x i32>) {
+ %2 = inttoptr <16 x i32> %0 to <16 x i8*>
+ ret <16 x i8*> %2
+}
+; CHECK-LABEL: define <4 x i8*> @inttoptr_cast_16xi32_to_16xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = inttoptr <4 x i32> %3 to <4 x i8*>
+; CHECK-NEXT: %9 = inttoptr <4 x i32> %4 to <4 x i8*>
+; CHECK-NEXT: %10 = inttoptr <4 x i32> %5 to <4 x i8*>
+; CHECK-NEXT: %11 = inttoptr <4 x i32> %6 to <4 x i8*>
+; CHECK-NEXT: store <4 x i8*> %9, <4 x i8*>* %0, align 16
+; CHECK-NEXT: store <4 x i8*> %10, <4 x i8*>* %1, align 16
+; CHECK-NEXT: store <4 x i8*> %11, <4 x i8*>* %2, align 16
+; CHECK-NEXT: ret <4 x i8*> %8
+
+define <16 x float> @bitcast_cast_16xi32_to_16xfloat(<16 x i32>) {
+ %2 = bitcast <16 x i32> %0 to <16 x float>
+ ret <16 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @bitcast_cast_16xi32_to_16xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = bitcast <4 x i32> %3 to <4 x float>
+; CHECK-NEXT: %9 = bitcast <4 x i32> %4 to <4 x float>
+; CHECK-NEXT: %10 = bitcast <4 x i32> %5 to <4 x float>
+; CHECK-NEXT: %11 = bitcast <4 x i32> %6 to <4 x float>
+; CHECK-NEXT: store <4 x float> %9, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %10, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %11, <4 x float>* %2, align 16
+; CHECK-NEXT: ret <4 x float> %8
+
+define <16 x i32> @bitcast_cast_16xfloat_to_16xi32(<16 x float>) {
+ %2 = bitcast <16 x float> %0 to <16 x i32>
+ ret <16 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @bitcast_cast_16xfloat_to_16xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %8 = bitcast <4 x float> %3 to <4 x i32>
+; CHECK-NEXT: %9 = bitcast <4 x float> %4 to <4 x i32>
+; CHECK-NEXT: %10 = bitcast <4 x float> %5 to <4 x i32>
+; CHECK-NEXT: %11 = bitcast <4 x float> %6 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %9, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %10, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %8
+
+define <16 x double> @bitcast_cast_16xi64_to_16xdouble(<16 x i64>) {
+ %2 = bitcast <16 x i64> %0 to <16 x double>
+ ret <16 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @bitcast_cast_16xi64_to_16xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %16 = bitcast <2 x i64> %7 to <2 x double>
+; CHECK-NEXT: %17 = bitcast <2 x i64> %8 to <2 x double>
+; CHECK-NEXT: %18 = bitcast <2 x i64> %9 to <2 x double>
+; CHECK-NEXT: %19 = bitcast <2 x i64> %10 to <2 x double>
+; CHECK-NEXT: %20 = bitcast <2 x i64> %11 to <2 x double>
+; CHECK-NEXT: %21 = bitcast <2 x i64> %12 to <2 x double>
+; CHECK-NEXT: %22 = bitcast <2 x i64> %13 to <2 x double>
+; CHECK-NEXT: %23 = bitcast <2 x i64> %14 to <2 x double>
+; CHECK-NEXT: store <2 x double> %17, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %18, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %19, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %20, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %21, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %22, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %23, <2 x double>* %6, align 16
+; CHECK-NEXT: ret <2 x double> %16
+
+define <16 x i64> @bitcast_cast_16xdouble_to_16xi64(<16 x double>) {
+ %2 = bitcast <16 x double> %0 to <16 x i64>
+ ret <16 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @bitcast_cast_16xdouble_to_16xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %16 = bitcast <2 x double> %7 to <2 x i64>
+; CHECK-NEXT: %17 = bitcast <2 x double> %8 to <2 x i64>
+; CHECK-NEXT: %18 = bitcast <2 x double> %9 to <2 x i64>
+; CHECK-NEXT: %19 = bitcast <2 x double> %10 to <2 x i64>
+; CHECK-NEXT: %20 = bitcast <2 x double> %11 to <2 x i64>
+; CHECK-NEXT: %21 = bitcast <2 x double> %12 to <2 x i64>
+; CHECK-NEXT: %22 = bitcast <2 x double> %13 to <2 x i64>
+; CHECK-NEXT: %23 = bitcast <2 x double> %14 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %17, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %18, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %19, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %20, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %21, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret <2 x i64> %16
+
+define <20 x i32> @ptrtoint_cast_20xi8ptr_to_20xi32(<20 x i8*>) {
+ %2 = ptrtoint <20 x i8*> %0 to <20 x i32>
+ ret <20 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @ptrtoint_cast_20xi8ptr_to_20xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>, <4 x i8*>)
+; CHECK-NEXT: %10 = ptrtoint <4 x i8*> %4 to <4 x i32>
+; CHECK-NEXT: %11 = ptrtoint <4 x i8*> %5 to <4 x i32>
+; CHECK-NEXT: %12 = ptrtoint <4 x i8*> %6 to <4 x i32>
+; CHECK-NEXT: %13 = ptrtoint <4 x i8*> %7 to <4 x i32>
+; CHECK-NEXT: %14 = ptrtoint <4 x i8*> %8 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <20 x i8*> @inttoptr_cast_20xi32_to_20xi8ptr(<20 x i32>) {
+ %2 = inttoptr <20 x i32> %0 to <20 x i8*>
+ ret <20 x i8*> %2
+}
+; CHECK-LABEL: define <4 x i8*> @inttoptr_cast_20xi32_to_20xi8ptr(<4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i8*>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = inttoptr <4 x i32> %4 to <4 x i8*>
+; CHECK-NEXT: %11 = inttoptr <4 x i32> %5 to <4 x i8*>
+; CHECK-NEXT: %12 = inttoptr <4 x i32> %6 to <4 x i8*>
+; CHECK-NEXT: %13 = inttoptr <4 x i32> %7 to <4 x i8*>
+; CHECK-NEXT: %14 = inttoptr <4 x i32> %8 to <4 x i8*>
+; CHECK-NEXT: store <4 x i8*> %11, <4 x i8*>* %0, align 16
+; CHECK-NEXT: store <4 x i8*> %12, <4 x i8*>* %1, align 16
+; CHECK-NEXT: store <4 x i8*> %13, <4 x i8*>* %2, align 16
+; CHECK-NEXT: store <4 x i8*> %14, <4 x i8*>* %3, align 16
+; CHECK-NEXT: ret <4 x i8*> %10
+
+define <20 x float> @bitcast_cast_20xi32_to_20xfloat(<20 x i32>) {
+ %2 = bitcast <20 x i32> %0 to <20 x float>
+ ret <20 x float> %2
+}
+; CHECK-LABEL: define <4 x float> @bitcast_cast_20xi32_to_20xfloat(<4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x float>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %10 = bitcast <4 x i32> %4 to <4 x float>
+; CHECK-NEXT: %11 = bitcast <4 x i32> %5 to <4 x float>
+; CHECK-NEXT: %12 = bitcast <4 x i32> %6 to <4 x float>
+; CHECK-NEXT: %13 = bitcast <4 x i32> %7 to <4 x float>
+; CHECK-NEXT: %14 = bitcast <4 x i32> %8 to <4 x float>
+; CHECK-NEXT: store <4 x float> %11, <4 x float>* %0, align 16
+; CHECK-NEXT: store <4 x float> %12, <4 x float>* %1, align 16
+; CHECK-NEXT: store <4 x float> %13, <4 x float>* %2, align 16
+; CHECK-NEXT: store <4 x float> %14, <4 x float>* %3, align 16
+; CHECK-NEXT: ret <4 x float> %10
+
+define <20 x i32> @bitcast_cast_20xfloat_to_20xi32(<20 x float>) {
+ %2 = bitcast <20 x float> %0 to <20 x i32>
+ ret <20 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @bitcast_cast_20xfloat_to_20xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)
+; CHECK-NEXT: %10 = bitcast <4 x float> %4 to <4 x i32>
+; CHECK-NEXT: %11 = bitcast <4 x float> %5 to <4 x i32>
+; CHECK-NEXT: %12 = bitcast <4 x float> %6 to <4 x i32>
+; CHECK-NEXT: %13 = bitcast <4 x float> %7 to <4 x i32>
+; CHECK-NEXT: %14 = bitcast <4 x float> %8 to <4 x i32>
+; CHECK-NEXT: store <4 x i32> %11, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %12, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %13, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %14, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %10
+
+define <20 x double> @bitcast_cast_20xi64_to_20xdouble(<20 x i64>) {
+ %2 = bitcast <20 x i64> %0 to <20 x double>
+ ret <20 x double> %2
+}
+; CHECK-LABEL: define <2 x double> @bitcast_cast_20xi64_to_20xdouble(<2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x double>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %20 = bitcast <2 x i64> %9 to <2 x double>
+; CHECK-NEXT: %21 = bitcast <2 x i64> %10 to <2 x double>
+; CHECK-NEXT: %22 = bitcast <2 x i64> %11 to <2 x double>
+; CHECK-NEXT: %23 = bitcast <2 x i64> %12 to <2 x double>
+; CHECK-NEXT: %24 = bitcast <2 x i64> %13 to <2 x double>
+; CHECK-NEXT: %25 = bitcast <2 x i64> %14 to <2 x double>
+; CHECK-NEXT: %26 = bitcast <2 x i64> %15 to <2 x double>
+; CHECK-NEXT: %27 = bitcast <2 x i64> %16 to <2 x double>
+; CHECK-NEXT: %28 = bitcast <2 x i64> %17 to <2 x double>
+; CHECK-NEXT: %29 = bitcast <2 x i64> %18 to <2 x double>
+; CHECK-NEXT: store <2 x double> %21, <2 x double>* %0, align 16
+; CHECK-NEXT: store <2 x double> %22, <2 x double>* %1, align 16
+; CHECK-NEXT: store <2 x double> %23, <2 x double>* %2, align 16
+; CHECK-NEXT: store <2 x double> %24, <2 x double>* %3, align 16
+; CHECK-NEXT: store <2 x double> %25, <2 x double>* %4, align 16
+; CHECK-NEXT: store <2 x double> %26, <2 x double>* %5, align 16
+; CHECK-NEXT: store <2 x double> %27, <2 x double>* %6, align 16
+; CHECK-NEXT: store <2 x double> %28, <2 x double>* %7, align 16
+; CHECK-NEXT: store <2 x double> %29, <2 x double>* %8, align 16
+; CHECK-NEXT: ret <2 x double> %20
+
+define <20 x i64> @bitcast_cast_20xdouble_to_20xi64(<20 x double>) {
+ %2 = bitcast <20 x double> %0 to <20 x i64>
+ ret <20 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @bitcast_cast_20xdouble_to_20xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>)
+; CHECK-NEXT: %20 = bitcast <2 x double> %9 to <2 x i64>
+; CHECK-NEXT: %21 = bitcast <2 x double> %10 to <2 x i64>
+; CHECK-NEXT: %22 = bitcast <2 x double> %11 to <2 x i64>
+; CHECK-NEXT: %23 = bitcast <2 x double> %12 to <2 x i64>
+; CHECK-NEXT: %24 = bitcast <2 x double> %13 to <2 x i64>
+; CHECK-NEXT: %25 = bitcast <2 x double> %14 to <2 x i64>
+; CHECK-NEXT: %26 = bitcast <2 x double> %15 to <2 x i64>
+; CHECK-NEXT: %27 = bitcast <2 x double> %16 to <2 x i64>
+; CHECK-NEXT: %28 = bitcast <2 x double> %17 to <2 x i64>
+; CHECK-NEXT: %29 = bitcast <2 x double> %18 to <2 x i64>
+; CHECK-NEXT: store <2 x i64> %21, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %24, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %25, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %27, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %28, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %8, align 16
+; CHECK-NEXT: ret <2 x i64> %20
+
+define <2 x i16> @zext_cast_2xi8_to_2xi16(<2 x i8>) {
+ %2 = zext <2 x i8> %0 to <2 x i16>
+ ret <2 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @zext_cast_2xi8_to_2xi16(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = zext i8 %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = zext i8 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: ret <8 x i16> %7
+
+define <2 x i32> @zext_cast_2xi8_to_2xi32(<2 x i8>) {
+ %2 = zext <2 x i8> %0 to <2 x i32>
+ ret <2 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_2xi8_to_2xi32(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = zext i8 %2 to i32
+; CHECK-NEXT: %4 = insertelement <4 x i32> undef, i32 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = zext i8 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> %4, i32 %6, i32 1
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i64> @zext_cast_2xi8_to_2xi64(<2 x i8>) {
+ %2 = zext <2 x i8> %0 to <2 x i64>
+ ret <2 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_2xi8_to_2xi64(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = zext i8 %2 to i64
+; CHECK-NEXT: %4 = insertelement <2 x i64> undef, i64 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = zext i8 %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> %4, i64 %6, i32 1
+; CHECK-NEXT: ret <2 x i64> %7
+
+define <2 x i32> @zext_cast_2xi16_to_2xi32(<2 x i16>) {
+ %2 = zext <2 x i16> %0 to <2 x i32>
+ ret <2 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_2xi16_to_2xi32(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = zext i16 %2 to i32
+; CHECK-NEXT: %4 = insertelement <4 x i32> undef, i32 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = zext i16 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> %4, i32 %6, i32 1
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i64> @zext_cast_2xi16_to_2xi64(<2 x i16>) {
+ %2 = zext <2 x i16> %0 to <2 x i64>
+ ret <2 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_2xi16_to_2xi64(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = zext i16 %2 to i64
+; CHECK-NEXT: %4 = insertelement <2 x i64> undef, i64 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = zext i16 %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> %4, i64 %6, i32 1
+; CHECK-NEXT: ret <2 x i64> %7
+
+define <2 x i64> @zext_cast_2xi32_to_2xi64(<2 x i32>) {
+ %2 = zext <2 x i32> %0 to <2 x i64>
+ ret <2 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_2xi32_to_2xi64(<4 x i32>)
+; CHECK-NEXT: %2 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %3 = zext i32 %2 to i64
+; CHECK-NEXT: %4 = insertelement <2 x i64> undef, i64 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %6 = zext i32 %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> %4, i64 %6, i32 1
+; CHECK-NEXT: ret <2 x i64> %7
+
+define <2 x i16> @sext_cast_2xi8_to_2xi16(<2 x i8>) {
+ %2 = sext <2 x i8> %0 to <2 x i16>
+ ret <2 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @sext_cast_2xi8_to_2xi16(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = sext i8 %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = sext i8 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: ret <8 x i16> %7
+
+define <2 x i32> @sext_cast_2xi8_to_2xi32(<2 x i8>) {
+ %2 = sext <2 x i8> %0 to <2 x i32>
+ ret <2 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_2xi8_to_2xi32(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = sext i8 %2 to i32
+; CHECK-NEXT: %4 = insertelement <4 x i32> undef, i32 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = sext i8 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> %4, i32 %6, i32 1
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i64> @sext_cast_2xi8_to_2xi64(<2 x i8>) {
+ %2 = sext <2 x i8> %0 to <2 x i64>
+ ret <2 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_2xi8_to_2xi64(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = sext i8 %2 to i64
+; CHECK-NEXT: %4 = insertelement <2 x i64> undef, i64 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = sext i8 %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> %4, i64 %6, i32 1
+; CHECK-NEXT: ret <2 x i64> %7
+
+define <2 x i32> @sext_cast_2xi16_to_2xi32(<2 x i16>) {
+ %2 = sext <2 x i16> %0 to <2 x i32>
+ ret <2 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_2xi16_to_2xi32(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = sext i16 %2 to i32
+; CHECK-NEXT: %4 = insertelement <4 x i32> undef, i32 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = sext i16 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> %4, i32 %6, i32 1
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i64> @sext_cast_2xi16_to_2xi64(<2 x i16>) {
+ %2 = sext <2 x i16> %0 to <2 x i64>
+ ret <2 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_2xi16_to_2xi64(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = sext i16 %2 to i64
+; CHECK-NEXT: %4 = insertelement <2 x i64> undef, i64 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = sext i16 %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> %4, i64 %6, i32 1
+; CHECK-NEXT: ret <2 x i64> %7
+
+define <2 x i64> @sext_cast_2xi32_to_2xi64(<2 x i32>) {
+ %2 = sext <2 x i32> %0 to <2 x i64>
+ ret <2 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_2xi32_to_2xi64(<4 x i32>)
+; CHECK-NEXT: %2 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %3 = sext i32 %2 to i64
+; CHECK-NEXT: %4 = insertelement <2 x i64> undef, i64 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %6 = sext i32 %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> %4, i64 %6, i32 1
+; CHECK-NEXT: ret <2 x i64> %7
+
+define <2 x i32> @trunc_cast_2xi64_to_2xi32(<2 x i64>) {
+ %2 = trunc <2 x i64> %0 to <2 x i32>
+ ret <2 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @trunc_cast_2xi64_to_2xi32(<2 x i64>)
+; CHECK-NEXT: %2 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %3 = trunc i64 %2 to i32
+; CHECK-NEXT: %4 = insertelement <4 x i32> undef, i32 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %6 = trunc i64 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> %4, i32 %6, i32 1
+; CHECK-NEXT: ret <4 x i32> %7
+
+define <2 x i16> @trunc_cast_2xi64_to_2xi16(<2 x i64>) {
+ %2 = trunc <2 x i64> %0 to <2 x i16>
+ ret <2 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_2xi64_to_2xi16(<2 x i64>)
+; CHECK-NEXT: %2 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %3 = trunc i64 %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %6 = trunc i64 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: ret <8 x i16> %7
+
+define <2 x i8> @trunc_cast_2xi64_to_2xi8(<2 x i64>) {
+ %2 = trunc <2 x i64> %0 to <2 x i8>
+ ret <2 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_2xi64_to_2xi8(<2 x i64>)
+; CHECK-NEXT: %2 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %3 = trunc i64 %2 to i8
+; CHECK-NEXT: %4 = insertelement <16 x i8> undef, i8 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %6 = trunc i64 %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> %4, i8 %6, i32 1
+; CHECK-NEXT: ret <16 x i8> %7
+
+define <2 x i16> @trunc_cast_2xi32_to_2xi16(<2 x i32>) {
+ %2 = trunc <2 x i32> %0 to <2 x i16>
+ ret <2 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_2xi32_to_2xi16(<4 x i32>)
+; CHECK-NEXT: %2 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %3 = trunc i32 %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %6 = trunc i32 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: ret <8 x i16> %7
+
+define <2 x i8> @trunc_cast_2xi32_to_2xi8(<2 x i32>) {
+ %2 = trunc <2 x i32> %0 to <2 x i8>
+ ret <2 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_2xi32_to_2xi8(<4 x i32>)
+; CHECK-NEXT: %2 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %3 = trunc i32 %2 to i8
+; CHECK-NEXT: %4 = insertelement <16 x i8> undef, i8 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %6 = trunc i32 %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> %4, i8 %6, i32 1
+; CHECK-NEXT: ret <16 x i8> %7
+
+define <2 x i8> @trunc_cast_2xi16_to_2xi8(<2 x i16>) {
+ %2 = trunc <2 x i16> %0 to <2 x i8>
+ ret <2 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_2xi16_to_2xi8(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = trunc i16 %2 to i8
+; CHECK-NEXT: %4 = insertelement <16 x i8> undef, i8 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = trunc i16 %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> %4, i8 %6, i32 1
+; CHECK-NEXT: ret <16 x i8> %7
+
+define <4 x i16> @zext_cast_4xi8_to_4xi16(<4 x i8>) {
+ %2 = zext <4 x i8> %0 to <4 x i16>
+ ret <4 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @zext_cast_4xi8_to_4xi16(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = zext i8 %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = zext i8 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <16 x i8> %0, i32 2
+; CHECK-NEXT: %9 = zext i8 %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <16 x i8> %0, i32 3
+; CHECK-NEXT: %12 = zext i8 %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 3
+; CHECK-NEXT: ret <8 x i16> %13
+
+define <4 x i32> @zext_cast_4xi8_to_4xi32(<4 x i8>) {
+ %2 = zext <4 x i8> %0 to <4 x i32>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_4xi8_to_4xi32(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = zext i8 %2 to i32
+; CHECK-NEXT: %4 = insertelement <4 x i32> undef, i32 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = zext i8 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> %4, i32 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <16 x i8> %0, i32 2
+; CHECK-NEXT: %9 = zext i8 %8 to i32
+; CHECK-NEXT: %10 = insertelement <4 x i32> %7, i32 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <16 x i8> %0, i32 3
+; CHECK-NEXT: %12 = zext i8 %11 to i32
+; CHECK-NEXT: %13 = insertelement <4 x i32> %10, i32 %12, i32 3
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <4 x i64> @zext_cast_4xi8_to_4xi64(<4 x i8>) {
+ %2 = zext <4 x i8> %0 to <4 x i64>
+ ret <4 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_4xi8_to_4xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = zext i8 %3 to i64
+; CHECK-NEXT: %5 = insertelement <2 x i64> undef, i64 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = zext i8 %6 to i64
+; CHECK-NEXT: %8 = insertelement <2 x i64> %5, i64 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = zext i8 %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = zext i8 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: store <2 x i64> %14, <2 x i64>* %0, align 16
+; CHECK-NEXT: ret <2 x i64> %8
+
+define <4 x i32> @zext_cast_4xi16_to_4xi32(<4 x i16>) {
+ %2 = zext <4 x i16> %0 to <4 x i32>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_4xi16_to_4xi32(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = zext i16 %2 to i32
+; CHECK-NEXT: %4 = insertelement <4 x i32> undef, i32 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = zext i16 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> %4, i32 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <8 x i16> %0, i32 2
+; CHECK-NEXT: %9 = zext i16 %8 to i32
+; CHECK-NEXT: %10 = insertelement <4 x i32> %7, i32 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <8 x i16> %0, i32 3
+; CHECK-NEXT: %12 = zext i16 %11 to i32
+; CHECK-NEXT: %13 = insertelement <4 x i32> %10, i32 %12, i32 3
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <4 x i64> @zext_cast_4xi16_to_4xi64(<4 x i16>) {
+ %2 = zext <4 x i16> %0 to <4 x i64>
+ ret <4 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_4xi16_to_4xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %4 = zext i16 %3 to i64
+; CHECK-NEXT: %5 = insertelement <2 x i64> undef, i64 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %7 = zext i16 %6 to i64
+; CHECK-NEXT: %8 = insertelement <2 x i64> %5, i64 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %10 = zext i16 %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %13 = zext i16 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: store <2 x i64> %14, <2 x i64>* %0, align 16
+; CHECK-NEXT: ret <2 x i64> %8
+
+define <4 x i64> @zext_cast_4xi32_to_4xi64(<4 x i32>) {
+ %2 = zext <4 x i32> %0 to <4 x i64>
+ ret <4 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_4xi32_to_4xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <4 x i32>)
+; CHECK-NEXT: %3 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: %4 = zext i32 %3 to i64
+; CHECK-NEXT: %5 = insertelement <2 x i64> undef, i64 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: %7 = zext i32 %6 to i64
+; CHECK-NEXT: %8 = insertelement <2 x i64> %5, i64 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x i32> %1, i32 2
+; CHECK-NEXT: %10 = zext i32 %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <4 x i32> %1, i32 3
+; CHECK-NEXT: %13 = zext i32 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: store <2 x i64> %14, <2 x i64>* %0, align 16
+; CHECK-NEXT: ret <2 x i64> %8
+
+define <4 x i16> @sext_cast_4xi8_to_4xi16(<4 x i8>) {
+ %2 = sext <4 x i8> %0 to <4 x i16>
+ ret <4 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @sext_cast_4xi8_to_4xi16(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = sext i8 %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = sext i8 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <16 x i8> %0, i32 2
+; CHECK-NEXT: %9 = sext i8 %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <16 x i8> %0, i32 3
+; CHECK-NEXT: %12 = sext i8 %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 3
+; CHECK-NEXT: ret <8 x i16> %13
+
+define <4 x i32> @sext_cast_4xi8_to_4xi32(<4 x i8>) {
+ %2 = sext <4 x i8> %0 to <4 x i32>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_4xi8_to_4xi32(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = sext i8 %2 to i32
+; CHECK-NEXT: %4 = insertelement <4 x i32> undef, i32 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = sext i8 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> %4, i32 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <16 x i8> %0, i32 2
+; CHECK-NEXT: %9 = sext i8 %8 to i32
+; CHECK-NEXT: %10 = insertelement <4 x i32> %7, i32 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <16 x i8> %0, i32 3
+; CHECK-NEXT: %12 = sext i8 %11 to i32
+; CHECK-NEXT: %13 = insertelement <4 x i32> %10, i32 %12, i32 3
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <4 x i64> @sext_cast_4xi8_to_4xi64(<4 x i8>) {
+ %2 = sext <4 x i8> %0 to <4 x i64>
+ ret <4 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_4xi8_to_4xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = sext i8 %3 to i64
+; CHECK-NEXT: %5 = insertelement <2 x i64> undef, i64 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = sext i8 %6 to i64
+; CHECK-NEXT: %8 = insertelement <2 x i64> %5, i64 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = sext i8 %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = sext i8 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: store <2 x i64> %14, <2 x i64>* %0, align 16
+; CHECK-NEXT: ret <2 x i64> %8
+
+define <4 x i32> @sext_cast_4xi16_to_4xi32(<4 x i16>) {
+ %2 = sext <4 x i16> %0 to <4 x i32>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_4xi16_to_4xi32(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = sext i16 %2 to i32
+; CHECK-NEXT: %4 = insertelement <4 x i32> undef, i32 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = sext i16 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> %4, i32 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <8 x i16> %0, i32 2
+; CHECK-NEXT: %9 = sext i16 %8 to i32
+; CHECK-NEXT: %10 = insertelement <4 x i32> %7, i32 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <8 x i16> %0, i32 3
+; CHECK-NEXT: %12 = sext i16 %11 to i32
+; CHECK-NEXT: %13 = insertelement <4 x i32> %10, i32 %12, i32 3
+; CHECK-NEXT: ret <4 x i32> %13
+
+define <4 x i64> @sext_cast_4xi16_to_4xi64(<4 x i16>) {
+ %2 = sext <4 x i16> %0 to <4 x i64>
+ ret <4 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_4xi16_to_4xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %4 = sext i16 %3 to i64
+; CHECK-NEXT: %5 = insertelement <2 x i64> undef, i64 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %7 = sext i16 %6 to i64
+; CHECK-NEXT: %8 = insertelement <2 x i64> %5, i64 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %10 = sext i16 %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %13 = sext i16 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: store <2 x i64> %14, <2 x i64>* %0, align 16
+; CHECK-NEXT: ret <2 x i64> %8
+
+define <4 x i64> @sext_cast_4xi32_to_4xi64(<4 x i32>) {
+ %2 = sext <4 x i32> %0 to <4 x i64>
+ ret <4 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_4xi32_to_4xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <4 x i32>)
+; CHECK-NEXT: %3 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: %4 = sext i32 %3 to i64
+; CHECK-NEXT: %5 = insertelement <2 x i64> undef, i64 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: %7 = sext i32 %6 to i64
+; CHECK-NEXT: %8 = insertelement <2 x i64> %5, i64 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x i32> %1, i32 2
+; CHECK-NEXT: %10 = sext i32 %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <4 x i32> %1, i32 3
+; CHECK-NEXT: %13 = sext i32 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: store <2 x i64> %14, <2 x i64>* %0, align 16
+; CHECK-NEXT: ret <2 x i64> %8
+
+define <4 x i32> @trunc_cast_4xi64_to_4xi32(<4 x i64>) {
+ %2 = trunc <4 x i64> %0 to <4 x i32>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @trunc_cast_4xi64_to_4xi32(<2 x i64>, <2 x i64>)
+; CHECK-NEXT: %3 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %4 = trunc i64 %3 to i32
+; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %7 = trunc i64 %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> %5, i32 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %10 = trunc i64 %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %13 = trunc i64 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 3
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <4 x i16> @trunc_cast_4xi64_to_4xi16(<4 x i64>) {
+ %2 = trunc <4 x i64> %0 to <4 x i16>
+ ret <4 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_4xi64_to_4xi16(<2 x i64>, <2 x i64>)
+; CHECK-NEXT: %3 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %4 = trunc i64 %3 to i16
+; CHECK-NEXT: %5 = insertelement <8 x i16> undef, i16 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %7 = trunc i64 %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> %5, i16 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %10 = trunc i64 %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %13 = trunc i64 %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 3
+; CHECK-NEXT: ret <8 x i16> %14
+
+define <4 x i8> @trunc_cast_4xi64_to_4xi8(<4 x i64>) {
+ %2 = trunc <4 x i64> %0 to <4 x i8>
+ ret <4 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_4xi64_to_4xi8(<2 x i64>, <2 x i64>)
+; CHECK-NEXT: %3 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %4 = trunc i64 %3 to i8
+; CHECK-NEXT: %5 = insertelement <16 x i8> undef, i8 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %7 = trunc i64 %6 to i8
+; CHECK-NEXT: %8 = insertelement <16 x i8> %5, i8 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %10 = trunc i64 %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> %8, i8 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %13 = trunc i64 %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 3
+; CHECK-NEXT: ret <16 x i8> %14
+
+define <4 x i16> @trunc_cast_4xi32_to_4xi16(<4 x i32>) {
+ %2 = trunc <4 x i32> %0 to <4 x i16>
+ ret <4 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_4xi32_to_4xi16(<4 x i32>)
+; CHECK-NEXT: %2 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %3 = trunc i32 %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %6 = trunc i32 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <4 x i32> %0, i32 2
+; CHECK-NEXT: %9 = trunc i32 %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <4 x i32> %0, i32 3
+; CHECK-NEXT: %12 = trunc i32 %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 3
+; CHECK-NEXT: ret <8 x i16> %13
+
+define <4 x i8> @trunc_cast_4xi32_to_4xi8(<4 x i32>) {
+ %2 = trunc <4 x i32> %0 to <4 x i8>
+ ret <4 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_4xi32_to_4xi8(<4 x i32>)
+; CHECK-NEXT: %2 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %3 = trunc i32 %2 to i8
+; CHECK-NEXT: %4 = insertelement <16 x i8> undef, i8 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %6 = trunc i32 %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> %4, i8 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <4 x i32> %0, i32 2
+; CHECK-NEXT: %9 = trunc i32 %8 to i8
+; CHECK-NEXT: %10 = insertelement <16 x i8> %7, i8 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <4 x i32> %0, i32 3
+; CHECK-NEXT: %12 = trunc i32 %11 to i8
+; CHECK-NEXT: %13 = insertelement <16 x i8> %10, i8 %12, i32 3
+; CHECK-NEXT: ret <16 x i8> %13
+
+define <4 x i8> @trunc_cast_4xi16_to_4xi8(<4 x i16>) {
+ %2 = trunc <4 x i16> %0 to <4 x i8>
+ ret <4 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_4xi16_to_4xi8(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = trunc i16 %2 to i8
+; CHECK-NEXT: %4 = insertelement <16 x i8> undef, i8 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = trunc i16 %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> %4, i8 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <8 x i16> %0, i32 2
+; CHECK-NEXT: %9 = trunc i16 %8 to i8
+; CHECK-NEXT: %10 = insertelement <16 x i8> %7, i8 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <8 x i16> %0, i32 3
+; CHECK-NEXT: %12 = trunc i16 %11 to i8
+; CHECK-NEXT: %13 = insertelement <16 x i8> %10, i8 %12, i32 3
+; CHECK-NEXT: ret <16 x i8> %13
+
+define <6 x i16> @zext_cast_6xi8_to_6xi16(<6 x i8>) {
+ %2 = zext <6 x i8> %0 to <6 x i16>
+ ret <6 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @zext_cast_6xi8_to_6xi16(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = zext i8 %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = zext i8 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <16 x i8> %0, i32 2
+; CHECK-NEXT: %9 = zext i8 %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <16 x i8> %0, i32 3
+; CHECK-NEXT: %12 = zext i8 %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 3
+; CHECK-NEXT: %14 = extractelement <16 x i8> %0, i32 4
+; CHECK-NEXT: %15 = zext i8 %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 4
+; CHECK-NEXT: %17 = extractelement <16 x i8> %0, i32 5
+; CHECK-NEXT: %18 = zext i8 %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 5
+; CHECK-NEXT: ret <8 x i16> %19
+
+define <6 x i32> @zext_cast_6xi8_to_6xi32(<6 x i8>) {
+ %2 = zext <6 x i8> %0 to <6 x i32>
+ ret <6 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_6xi8_to_6xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = zext i8 %3 to i32
+; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = zext i8 %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> %5, i32 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = zext i8 %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = zext i8 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: %16 = zext i8 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> undef, i32 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: %19 = zext i8 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 1
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <6 x i64> @zext_cast_6xi8_to_6xi64(<6 x i8>) {
+ %2 = zext <6 x i8> %0 to <6 x i64>
+ ret <6 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_6xi8_to_6xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %4 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: %5 = zext i8 %4 to i64
+; CHECK-NEXT: %6 = insertelement <2 x i64> undef, i64 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: %8 = zext i8 %7 to i64
+; CHECK-NEXT: %9 = insertelement <2 x i64> %6, i64 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: %11 = zext i8 %10 to i64
+; CHECK-NEXT: %12 = insertelement <2 x i64> undef, i64 %11, i32 0
+; CHECK-NEXT: %13 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: %14 = zext i8 %13 to i64
+; CHECK-NEXT: %15 = insertelement <2 x i64> %12, i64 %14, i32 1
+; CHECK-NEXT: %16 = extractelement <16 x i8> %2, i32 4
+; CHECK-NEXT: %17 = zext i8 %16 to i64
+; CHECK-NEXT: %18 = insertelement <2 x i64> undef, i64 %17, i32 0
+; CHECK-NEXT: %19 = extractelement <16 x i8> %2, i32 5
+; CHECK-NEXT: %20 = zext i8 %19 to i64
+; CHECK-NEXT: %21 = insertelement <2 x i64> %18, i64 %20, i32 1
+; CHECK-NEXT: store <2 x i64> %15, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %21, <2 x i64>* %1, align 16
+; CHECK-NEXT: ret <2 x i64> %9
+
+define <6 x i32> @zext_cast_6xi16_to_6xi32(<6 x i16>) {
+ %2 = zext <6 x i16> %0 to <6 x i32>
+ ret <6 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_6xi16_to_6xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %4 = zext i16 %3 to i32
+; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %7 = zext i16 %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> %5, i32 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %10 = zext i16 %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %13 = zext i16 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: %16 = zext i16 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> undef, i32 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: %19 = zext i16 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 1
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <6 x i64> @zext_cast_6xi16_to_6xi64(<6 x i16>) {
+ %2 = zext <6 x i16> %0 to <6 x i64>
+ ret <6 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_6xi16_to_6xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %4 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: %5 = zext i16 %4 to i64
+; CHECK-NEXT: %6 = insertelement <2 x i64> undef, i64 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: %8 = zext i16 %7 to i64
+; CHECK-NEXT: %9 = insertelement <2 x i64> %6, i64 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: %11 = zext i16 %10 to i64
+; CHECK-NEXT: %12 = insertelement <2 x i64> undef, i64 %11, i32 0
+; CHECK-NEXT: %13 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: %14 = zext i16 %13 to i64
+; CHECK-NEXT: %15 = insertelement <2 x i64> %12, i64 %14, i32 1
+; CHECK-NEXT: %16 = extractelement <8 x i16> %2, i32 4
+; CHECK-NEXT: %17 = zext i16 %16 to i64
+; CHECK-NEXT: %18 = insertelement <2 x i64> undef, i64 %17, i32 0
+; CHECK-NEXT: %19 = extractelement <8 x i16> %2, i32 5
+; CHECK-NEXT: %20 = zext i16 %19 to i64
+; CHECK-NEXT: %21 = insertelement <2 x i64> %18, i64 %20, i32 1
+; CHECK-NEXT: store <2 x i64> %15, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %21, <2 x i64>* %1, align 16
+; CHECK-NEXT: ret <2 x i64> %9
+
+define <6 x i64> @zext_cast_6xi32_to_6xi64(<6 x i32>) {
+ %2 = zext <6 x i32> %0 to <6 x i64>
+ ret <6 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_6xi32_to_6xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: %6 = zext i32 %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> undef, i64 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: %9 = zext i32 %8 to i64
+; CHECK-NEXT: %10 = insertelement <2 x i64> %7, i64 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <4 x i32> %2, i32 2
+; CHECK-NEXT: %12 = zext i32 %11 to i64
+; CHECK-NEXT: %13 = insertelement <2 x i64> undef, i64 %12, i32 0
+; CHECK-NEXT: %14 = extractelement <4 x i32> %2, i32 3
+; CHECK-NEXT: %15 = zext i32 %14 to i64
+; CHECK-NEXT: %16 = insertelement <2 x i64> %13, i64 %15, i32 1
+; CHECK-NEXT: %17 = extractelement <4 x i32> %3, i32 0
+; CHECK-NEXT: %18 = zext i32 %17 to i64
+; CHECK-NEXT: %19 = insertelement <2 x i64> undef, i64 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <4 x i32> %3, i32 1
+; CHECK-NEXT: %21 = zext i32 %20 to i64
+; CHECK-NEXT: %22 = insertelement <2 x i64> %19, i64 %21, i32 1
+; CHECK-NEXT: store <2 x i64> %16, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %1, align 16
+; CHECK-NEXT: ret <2 x i64> %10
+
+define <6 x i16> @sext_cast_6xi8_to_6xi16(<6 x i8>) {
+ %2 = sext <6 x i8> %0 to <6 x i16>
+ ret <6 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @sext_cast_6xi8_to_6xi16(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = sext i8 %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = sext i8 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <16 x i8> %0, i32 2
+; CHECK-NEXT: %9 = sext i8 %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <16 x i8> %0, i32 3
+; CHECK-NEXT: %12 = sext i8 %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 3
+; CHECK-NEXT: %14 = extractelement <16 x i8> %0, i32 4
+; CHECK-NEXT: %15 = sext i8 %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 4
+; CHECK-NEXT: %17 = extractelement <16 x i8> %0, i32 5
+; CHECK-NEXT: %18 = sext i8 %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 5
+; CHECK-NEXT: ret <8 x i16> %19
+
+define <6 x i32> @sext_cast_6xi8_to_6xi32(<6 x i8>) {
+ %2 = sext <6 x i8> %0 to <6 x i32>
+ ret <6 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_6xi8_to_6xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = sext i8 %3 to i32
+; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = sext i8 %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> %5, i32 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = sext i8 %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = sext i8 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: %16 = sext i8 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> undef, i32 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: %19 = sext i8 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 1
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <6 x i64> @sext_cast_6xi8_to_6xi64(<6 x i8>) {
+ %2 = sext <6 x i8> %0 to <6 x i64>
+ ret <6 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_6xi8_to_6xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %4 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: %5 = sext i8 %4 to i64
+; CHECK-NEXT: %6 = insertelement <2 x i64> undef, i64 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: %8 = sext i8 %7 to i64
+; CHECK-NEXT: %9 = insertelement <2 x i64> %6, i64 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: %11 = sext i8 %10 to i64
+; CHECK-NEXT: %12 = insertelement <2 x i64> undef, i64 %11, i32 0
+; CHECK-NEXT: %13 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: %14 = sext i8 %13 to i64
+; CHECK-NEXT: %15 = insertelement <2 x i64> %12, i64 %14, i32 1
+; CHECK-NEXT: %16 = extractelement <16 x i8> %2, i32 4
+; CHECK-NEXT: %17 = sext i8 %16 to i64
+; CHECK-NEXT: %18 = insertelement <2 x i64> undef, i64 %17, i32 0
+; CHECK-NEXT: %19 = extractelement <16 x i8> %2, i32 5
+; CHECK-NEXT: %20 = sext i8 %19 to i64
+; CHECK-NEXT: %21 = insertelement <2 x i64> %18, i64 %20, i32 1
+; CHECK-NEXT: store <2 x i64> %15, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %21, <2 x i64>* %1, align 16
+; CHECK-NEXT: ret <2 x i64> %9
+
+define <6 x i32> @sext_cast_6xi16_to_6xi32(<6 x i16>) {
+ %2 = sext <6 x i16> %0 to <6 x i32>
+ ret <6 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_6xi16_to_6xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %4 = sext i16 %3 to i32
+; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %7 = sext i16 %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> %5, i32 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %10 = sext i16 %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %13 = sext i16 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: %16 = sext i16 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> undef, i32 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: %19 = sext i16 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 1
+; CHECK-NEXT: store <4 x i32> %20, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <6 x i64> @sext_cast_6xi16_to_6xi64(<6 x i16>) {
+ %2 = sext <6 x i16> %0 to <6 x i64>
+ ret <6 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_6xi16_to_6xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %4 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: %5 = sext i16 %4 to i64
+; CHECK-NEXT: %6 = insertelement <2 x i64> undef, i64 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: %8 = sext i16 %7 to i64
+; CHECK-NEXT: %9 = insertelement <2 x i64> %6, i64 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: %11 = sext i16 %10 to i64
+; CHECK-NEXT: %12 = insertelement <2 x i64> undef, i64 %11, i32 0
+; CHECK-NEXT: %13 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: %14 = sext i16 %13 to i64
+; CHECK-NEXT: %15 = insertelement <2 x i64> %12, i64 %14, i32 1
+; CHECK-NEXT: %16 = extractelement <8 x i16> %2, i32 4
+; CHECK-NEXT: %17 = sext i16 %16 to i64
+; CHECK-NEXT: %18 = insertelement <2 x i64> undef, i64 %17, i32 0
+; CHECK-NEXT: %19 = extractelement <8 x i16> %2, i32 5
+; CHECK-NEXT: %20 = sext i16 %19 to i64
+; CHECK-NEXT: %21 = insertelement <2 x i64> %18, i64 %20, i32 1
+; CHECK-NEXT: store <2 x i64> %15, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %21, <2 x i64>* %1, align 16
+; CHECK-NEXT: ret <2 x i64> %9
+
+define <6 x i64> @sext_cast_6xi32_to_6xi64(<6 x i32>) {
+ %2 = sext <6 x i32> %0 to <6 x i64>
+ ret <6 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_6xi32_to_6xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: %6 = sext i32 %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> undef, i64 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: %9 = sext i32 %8 to i64
+; CHECK-NEXT: %10 = insertelement <2 x i64> %7, i64 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <4 x i32> %2, i32 2
+; CHECK-NEXT: %12 = sext i32 %11 to i64
+; CHECK-NEXT: %13 = insertelement <2 x i64> undef, i64 %12, i32 0
+; CHECK-NEXT: %14 = extractelement <4 x i32> %2, i32 3
+; CHECK-NEXT: %15 = sext i32 %14 to i64
+; CHECK-NEXT: %16 = insertelement <2 x i64> %13, i64 %15, i32 1
+; CHECK-NEXT: %17 = extractelement <4 x i32> %3, i32 0
+; CHECK-NEXT: %18 = sext i32 %17 to i64
+; CHECK-NEXT: %19 = insertelement <2 x i64> undef, i64 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <4 x i32> %3, i32 1
+; CHECK-NEXT: %21 = sext i32 %20 to i64
+; CHECK-NEXT: %22 = insertelement <2 x i64> %19, i64 %21, i32 1
+; CHECK-NEXT: store <2 x i64> %16, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %1, align 16
+; CHECK-NEXT: ret <2 x i64> %10
+
+define <6 x i32> @trunc_cast_6xi64_to_6xi32(<6 x i64>) {
+ %2 = trunc <6 x i64> %0 to <6 x i32>
+ ret <6 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @trunc_cast_6xi64_to_6xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %5 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %6 = trunc i64 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> undef, i32 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %9 = trunc i64 %8 to i32
+; CHECK-NEXT: %10 = insertelement <4 x i32> %7, i32 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %12 = trunc i64 %11 to i32
+; CHECK-NEXT: %13 = insertelement <4 x i32> %10, i32 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %15 = trunc i64 %14 to i32
+; CHECK-NEXT: %16 = insertelement <4 x i32> %13, i32 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %18 = trunc i64 %17 to i32
+; CHECK-NEXT: %19 = insertelement <4 x i32> undef, i32 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %21 = trunc i64 %20 to i32
+; CHECK-NEXT: %22 = insertelement <4 x i32> %19, i32 %21, i32 1
+; CHECK-NEXT: store <4 x i32> %22, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
+define <6 x i16> @trunc_cast_6xi64_to_6xi16(<6 x i64>) {
+ %2 = trunc <6 x i64> %0 to <6 x i16>
+ ret <6 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_6xi64_to_6xi16(<2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %4 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %5 = trunc i64 %4 to i16
+; CHECK-NEXT: %6 = insertelement <8 x i16> undef, i16 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %8 = trunc i64 %7 to i16
+; CHECK-NEXT: %9 = insertelement <8 x i16> %6, i16 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %11 = trunc i64 %10 to i16
+; CHECK-NEXT: %12 = insertelement <8 x i16> %9, i16 %11, i32 2
+; CHECK-NEXT: %13 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %14 = trunc i64 %13 to i16
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 3
+; CHECK-NEXT: %16 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %17 = trunc i64 %16 to i16
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 4
+; CHECK-NEXT: %19 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %20 = trunc i64 %19 to i16
+; CHECK-NEXT: %21 = insertelement <8 x i16> %18, i16 %20, i32 5
+; CHECK-NEXT: ret <8 x i16> %21
+
+define <6 x i8> @trunc_cast_6xi64_to_6xi8(<6 x i64>) {
+ %2 = trunc <6 x i64> %0 to <6 x i8>
+ ret <6 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_6xi64_to_6xi8(<2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %4 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %5 = trunc i64 %4 to i8
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %8 = trunc i64 %7 to i8
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %11 = trunc i64 %10 to i8
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %14 = trunc i64 %13 to i8
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: %16 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %17 = trunc i64 %16 to i8
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 4
+; CHECK-NEXT: %19 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %20 = trunc i64 %19 to i8
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 5
+; CHECK-NEXT: ret <16 x i8> %21
+
+define <6 x i16> @trunc_cast_6xi32_to_6xi16(<6 x i32>) {
+ %2 = trunc <6 x i32> %0 to <6 x i16>
+ ret <6 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_6xi32_to_6xi16(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %4 = trunc i32 %3 to i16
+; CHECK-NEXT: %5 = insertelement <8 x i16> undef, i16 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %7 = trunc i32 %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> %5, i16 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x i32> %0, i32 2
+; CHECK-NEXT: %10 = trunc i32 %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <4 x i32> %0, i32 3
+; CHECK-NEXT: %13 = trunc i32 %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: %16 = trunc i32 %15 to i16
+; CHECK-NEXT: %17 = insertelement <8 x i16> %14, i16 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: %19 = trunc i32 %18 to i16
+; CHECK-NEXT: %20 = insertelement <8 x i16> %17, i16 %19, i32 5
+; CHECK-NEXT: ret <8 x i16> %20
+
+define <6 x i8> @trunc_cast_6xi32_to_6xi8(<6 x i32>) {
+ %2 = trunc <6 x i32> %0 to <6 x i8>
+ ret <6 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_6xi32_to_6xi8(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %4 = trunc i32 %3 to i8
+; CHECK-NEXT: %5 = insertelement <16 x i8> undef, i8 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %7 = trunc i32 %6 to i8
+; CHECK-NEXT: %8 = insertelement <16 x i8> %5, i8 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x i32> %0, i32 2
+; CHECK-NEXT: %10 = trunc i32 %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> %8, i8 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <4 x i32> %0, i32 3
+; CHECK-NEXT: %13 = trunc i32 %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: %16 = trunc i32 %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: %19 = trunc i32 %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 5
+; CHECK-NEXT: ret <16 x i8> %20
+
+define <6 x i8> @trunc_cast_6xi16_to_6xi8(<6 x i16>) {
+ %2 = trunc <6 x i16> %0 to <6 x i8>
+ ret <6 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_6xi16_to_6xi8(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = trunc i16 %2 to i8
+; CHECK-NEXT: %4 = insertelement <16 x i8> undef, i8 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = trunc i16 %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> %4, i8 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <8 x i16> %0, i32 2
+; CHECK-NEXT: %9 = trunc i16 %8 to i8
+; CHECK-NEXT: %10 = insertelement <16 x i8> %7, i8 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <8 x i16> %0, i32 3
+; CHECK-NEXT: %12 = trunc i16 %11 to i8
+; CHECK-NEXT: %13 = insertelement <16 x i8> %10, i8 %12, i32 3
+; CHECK-NEXT: %14 = extractelement <8 x i16> %0, i32 4
+; CHECK-NEXT: %15 = trunc i16 %14 to i8
+; CHECK-NEXT: %16 = insertelement <16 x i8> %13, i8 %15, i32 4
+; CHECK-NEXT: %17 = extractelement <8 x i16> %0, i32 5
+; CHECK-NEXT: %18 = trunc i16 %17 to i8
+; CHECK-NEXT: %19 = insertelement <16 x i8> %16, i8 %18, i32 5
+; CHECK-NEXT: ret <16 x i8> %19
+
+define <8 x i16> @zext_cast_8xi8_to_8xi16(<8 x i8>) {
+ %2 = zext <8 x i8> %0 to <8 x i16>
+ ret <8 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @zext_cast_8xi8_to_8xi16(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = zext i8 %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = zext i8 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <16 x i8> %0, i32 2
+; CHECK-NEXT: %9 = zext i8 %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <16 x i8> %0, i32 3
+; CHECK-NEXT: %12 = zext i8 %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 3
+; CHECK-NEXT: %14 = extractelement <16 x i8> %0, i32 4
+; CHECK-NEXT: %15 = zext i8 %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 4
+; CHECK-NEXT: %17 = extractelement <16 x i8> %0, i32 5
+; CHECK-NEXT: %18 = zext i8 %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 5
+; CHECK-NEXT: %20 = extractelement <16 x i8> %0, i32 6
+; CHECK-NEXT: %21 = zext i8 %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 6
+; CHECK-NEXT: %23 = extractelement <16 x i8> %0, i32 7
+; CHECK-NEXT: %24 = zext i8 %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 7
+; CHECK-NEXT: ret <8 x i16> %25
+
+define <8 x i32> @zext_cast_8xi8_to_8xi32(<8 x i8>) {
+ %2 = zext <8 x i8> %0 to <8 x i32>
+ ret <8 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_8xi8_to_8xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = zext i8 %3 to i32
+; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = zext i8 %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> %5, i32 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = zext i8 %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = zext i8 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: %16 = zext i8 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> undef, i32 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: %19 = zext i8 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: %22 = zext i8 %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 2
+; CHECK-NEXT: %24 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: %25 = zext i8 %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 3
+; CHECK-NEXT: store <4 x i32> %26, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <8 x i64> @zext_cast_8xi8_to_8xi64(<8 x i8>) {
+ %2 = zext <8 x i8> %0 to <8 x i64>
+ ret <8 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_8xi8_to_8xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %5 = extractelement <16 x i8> %3, i32 0
+; CHECK-NEXT: %6 = zext i8 %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> undef, i64 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <16 x i8> %3, i32 1
+; CHECK-NEXT: %9 = zext i8 %8 to i64
+; CHECK-NEXT: %10 = insertelement <2 x i64> %7, i64 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <16 x i8> %3, i32 2
+; CHECK-NEXT: %12 = zext i8 %11 to i64
+; CHECK-NEXT: %13 = insertelement <2 x i64> undef, i64 %12, i32 0
+; CHECK-NEXT: %14 = extractelement <16 x i8> %3, i32 3
+; CHECK-NEXT: %15 = zext i8 %14 to i64
+; CHECK-NEXT: %16 = insertelement <2 x i64> %13, i64 %15, i32 1
+; CHECK-NEXT: %17 = extractelement <16 x i8> %3, i32 4
+; CHECK-NEXT: %18 = zext i8 %17 to i64
+; CHECK-NEXT: %19 = insertelement <2 x i64> undef, i64 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <16 x i8> %3, i32 5
+; CHECK-NEXT: %21 = zext i8 %20 to i64
+; CHECK-NEXT: %22 = insertelement <2 x i64> %19, i64 %21, i32 1
+; CHECK-NEXT: %23 = extractelement <16 x i8> %3, i32 6
+; CHECK-NEXT: %24 = zext i8 %23 to i64
+; CHECK-NEXT: %25 = insertelement <2 x i64> undef, i64 %24, i32 0
+; CHECK-NEXT: %26 = extractelement <16 x i8> %3, i32 7
+; CHECK-NEXT: %27 = zext i8 %26 to i64
+; CHECK-NEXT: %28 = insertelement <2 x i64> %25, i64 %27, i32 1
+; CHECK-NEXT: store <2 x i64> %16, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %28, <2 x i64>* %2, align 16
+; CHECK-NEXT: ret <2 x i64> %10
+
+define <8 x i32> @zext_cast_8xi16_to_8xi32(<8 x i16>) {
+ %2 = zext <8 x i16> %0 to <8 x i32>
+ ret <8 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_8xi16_to_8xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %4 = zext i16 %3 to i32
+; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %7 = zext i16 %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> %5, i32 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %10 = zext i16 %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %13 = zext i16 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: %16 = zext i16 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> undef, i32 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: %19 = zext i16 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <8 x i16> %1, i32 6
+; CHECK-NEXT: %22 = zext i16 %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 2
+; CHECK-NEXT: %24 = extractelement <8 x i16> %1, i32 7
+; CHECK-NEXT: %25 = zext i16 %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 3
+; CHECK-NEXT: store <4 x i32> %26, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <8 x i64> @zext_cast_8xi16_to_8xi64(<8 x i16>) {
+ %2 = zext <8 x i16> %0 to <8 x i64>
+ ret <8 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_8xi16_to_8xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %5 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: %6 = zext i16 %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> undef, i64 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: %9 = zext i16 %8 to i64
+; CHECK-NEXT: %10 = insertelement <2 x i64> %7, i64 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: %12 = zext i16 %11 to i64
+; CHECK-NEXT: %13 = insertelement <2 x i64> undef, i64 %12, i32 0
+; CHECK-NEXT: %14 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: %15 = zext i16 %14 to i64
+; CHECK-NEXT: %16 = insertelement <2 x i64> %13, i64 %15, i32 1
+; CHECK-NEXT: %17 = extractelement <8 x i16> %3, i32 4
+; CHECK-NEXT: %18 = zext i16 %17 to i64
+; CHECK-NEXT: %19 = insertelement <2 x i64> undef, i64 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <8 x i16> %3, i32 5
+; CHECK-NEXT: %21 = zext i16 %20 to i64
+; CHECK-NEXT: %22 = insertelement <2 x i64> %19, i64 %21, i32 1
+; CHECK-NEXT: %23 = extractelement <8 x i16> %3, i32 6
+; CHECK-NEXT: %24 = zext i16 %23 to i64
+; CHECK-NEXT: %25 = insertelement <2 x i64> undef, i64 %24, i32 0
+; CHECK-NEXT: %26 = extractelement <8 x i16> %3, i32 7
+; CHECK-NEXT: %27 = zext i16 %26 to i64
+; CHECK-NEXT: %28 = insertelement <2 x i64> %25, i64 %27, i32 1
+; CHECK-NEXT: store <2 x i64> %16, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %28, <2 x i64>* %2, align 16
+; CHECK-NEXT: ret <2 x i64> %10
+
+define <8 x i64> @zext_cast_8xi32_to_8xi64(<8 x i32>) {
+ %2 = zext <8 x i32> %0 to <8 x i64>
+ ret <8 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_8xi32_to_8xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = extractelement <4 x i32> %3, i32 0
+; CHECK-NEXT: %7 = zext i32 %6 to i64
+; CHECK-NEXT: %8 = insertelement <2 x i64> undef, i64 %7, i32 0
+; CHECK-NEXT: %9 = extractelement <4 x i32> %3, i32 1
+; CHECK-NEXT: %10 = zext i32 %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> %8, i64 %10, i32 1
+; CHECK-NEXT: %12 = extractelement <4 x i32> %3, i32 2
+; CHECK-NEXT: %13 = zext i32 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> undef, i64 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <4 x i32> %3, i32 3
+; CHECK-NEXT: %16 = zext i32 %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> %14, i64 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <4 x i32> %4, i32 0
+; CHECK-NEXT: %19 = zext i32 %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> undef, i64 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <4 x i32> %4, i32 1
+; CHECK-NEXT: %22 = zext i32 %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> %20, i64 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <4 x i32> %4, i32 2
+; CHECK-NEXT: %25 = zext i32 %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> undef, i64 %25, i32 0
+; CHECK-NEXT: %27 = extractelement <4 x i32> %4, i32 3
+; CHECK-NEXT: %28 = zext i32 %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> %26, i64 %28, i32 1
+; CHECK-NEXT: store <2 x i64> %17, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %2, align 16
+; CHECK-NEXT: ret <2 x i64> %11
+
+define <8 x i16> @sext_cast_8xi8_to_8xi16(<8 x i8>) {
+ %2 = sext <8 x i8> %0 to <8 x i16>
+ ret <8 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @sext_cast_8xi8_to_8xi16(<16 x i8>)
+; CHECK-NEXT: %2 = extractelement <16 x i8> %0, i32 0
+; CHECK-NEXT: %3 = sext i8 %2 to i16
+; CHECK-NEXT: %4 = insertelement <8 x i16> undef, i16 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <16 x i8> %0, i32 1
+; CHECK-NEXT: %6 = sext i8 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> %4, i16 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <16 x i8> %0, i32 2
+; CHECK-NEXT: %9 = sext i8 %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <16 x i8> %0, i32 3
+; CHECK-NEXT: %12 = sext i8 %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 3
+; CHECK-NEXT: %14 = extractelement <16 x i8> %0, i32 4
+; CHECK-NEXT: %15 = sext i8 %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 4
+; CHECK-NEXT: %17 = extractelement <16 x i8> %0, i32 5
+; CHECK-NEXT: %18 = sext i8 %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 5
+; CHECK-NEXT: %20 = extractelement <16 x i8> %0, i32 6
+; CHECK-NEXT: %21 = sext i8 %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 6
+; CHECK-NEXT: %23 = extractelement <16 x i8> %0, i32 7
+; CHECK-NEXT: %24 = sext i8 %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 7
+; CHECK-NEXT: ret <8 x i16> %25
+
+define <8 x i32> @sext_cast_8xi8_to_8xi32(<8 x i8>) {
+ %2 = sext <8 x i8> %0 to <8 x i32>
+ ret <8 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_8xi8_to_8xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = sext i8 %3 to i32
+; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = sext i8 %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> %5, i32 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = sext i8 %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = sext i8 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: %16 = sext i8 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> undef, i32 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: %19 = sext i8 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: %22 = sext i8 %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 2
+; CHECK-NEXT: %24 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: %25 = sext i8 %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 3
+; CHECK-NEXT: store <4 x i32> %26, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <8 x i64> @sext_cast_8xi8_to_8xi64(<8 x i8>) {
+ %2 = sext <8 x i8> %0 to <8 x i64>
+ ret <8 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_8xi8_to_8xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %5 = extractelement <16 x i8> %3, i32 0
+; CHECK-NEXT: %6 = sext i8 %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> undef, i64 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <16 x i8> %3, i32 1
+; CHECK-NEXT: %9 = sext i8 %8 to i64
+; CHECK-NEXT: %10 = insertelement <2 x i64> %7, i64 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <16 x i8> %3, i32 2
+; CHECK-NEXT: %12 = sext i8 %11 to i64
+; CHECK-NEXT: %13 = insertelement <2 x i64> undef, i64 %12, i32 0
+; CHECK-NEXT: %14 = extractelement <16 x i8> %3, i32 3
+; CHECK-NEXT: %15 = sext i8 %14 to i64
+; CHECK-NEXT: %16 = insertelement <2 x i64> %13, i64 %15, i32 1
+; CHECK-NEXT: %17 = extractelement <16 x i8> %3, i32 4
+; CHECK-NEXT: %18 = sext i8 %17 to i64
+; CHECK-NEXT: %19 = insertelement <2 x i64> undef, i64 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <16 x i8> %3, i32 5
+; CHECK-NEXT: %21 = sext i8 %20 to i64
+; CHECK-NEXT: %22 = insertelement <2 x i64> %19, i64 %21, i32 1
+; CHECK-NEXT: %23 = extractelement <16 x i8> %3, i32 6
+; CHECK-NEXT: %24 = sext i8 %23 to i64
+; CHECK-NEXT: %25 = insertelement <2 x i64> undef, i64 %24, i32 0
+; CHECK-NEXT: %26 = extractelement <16 x i8> %3, i32 7
+; CHECK-NEXT: %27 = sext i8 %26 to i64
+; CHECK-NEXT: %28 = insertelement <2 x i64> %25, i64 %27, i32 1
+; CHECK-NEXT: store <2 x i64> %16, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %28, <2 x i64>* %2, align 16
+; CHECK-NEXT: ret <2 x i64> %10
+
+define <8 x i32> @sext_cast_8xi16_to_8xi32(<8 x i16>) {
+ %2 = sext <8 x i16> %0 to <8 x i32>
+ ret <8 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_8xi16_to_8xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %4 = sext i16 %3 to i32
+; CHECK-NEXT: %5 = insertelement <4 x i32> undef, i32 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %7 = sext i16 %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> %5, i32 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %10 = sext i16 %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %13 = sext i16 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: %16 = sext i16 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> undef, i32 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: %19 = sext i16 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <8 x i16> %1, i32 6
+; CHECK-NEXT: %22 = sext i16 %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 2
+; CHECK-NEXT: %24 = extractelement <8 x i16> %1, i32 7
+; CHECK-NEXT: %25 = sext i16 %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 3
+; CHECK-NEXT: store <4 x i32> %26, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %14
+
+define <8 x i64> @sext_cast_8xi16_to_8xi64(<8 x i16>) {
+ %2 = sext <8 x i16> %0 to <8 x i64>
+ ret <8 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_8xi16_to_8xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <8 x i16>)
+; CHECK-NEXT: %5 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: %6 = sext i16 %5 to i64
+; CHECK-NEXT: %7 = insertelement <2 x i64> undef, i64 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: %9 = sext i16 %8 to i64
+; CHECK-NEXT: %10 = insertelement <2 x i64> %7, i64 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: %12 = sext i16 %11 to i64
+; CHECK-NEXT: %13 = insertelement <2 x i64> undef, i64 %12, i32 0
+; CHECK-NEXT: %14 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: %15 = sext i16 %14 to i64
+; CHECK-NEXT: %16 = insertelement <2 x i64> %13, i64 %15, i32 1
+; CHECK-NEXT: %17 = extractelement <8 x i16> %3, i32 4
+; CHECK-NEXT: %18 = sext i16 %17 to i64
+; CHECK-NEXT: %19 = insertelement <2 x i64> undef, i64 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <8 x i16> %3, i32 5
+; CHECK-NEXT: %21 = sext i16 %20 to i64
+; CHECK-NEXT: %22 = insertelement <2 x i64> %19, i64 %21, i32 1
+; CHECK-NEXT: %23 = extractelement <8 x i16> %3, i32 6
+; CHECK-NEXT: %24 = sext i16 %23 to i64
+; CHECK-NEXT: %25 = insertelement <2 x i64> undef, i64 %24, i32 0
+; CHECK-NEXT: %26 = extractelement <8 x i16> %3, i32 7
+; CHECK-NEXT: %27 = sext i16 %26 to i64
+; CHECK-NEXT: %28 = insertelement <2 x i64> %25, i64 %27, i32 1
+; CHECK-NEXT: store <2 x i64> %16, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %22, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %28, <2 x i64>* %2, align 16
+; CHECK-NEXT: ret <2 x i64> %10
+
+define <8 x i64> @sext_cast_8xi32_to_8xi64(<8 x i32>) {
+ %2 = sext <8 x i32> %0 to <8 x i64>
+ ret <8 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_8xi32_to_8xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = extractelement <4 x i32> %3, i32 0
+; CHECK-NEXT: %7 = sext i32 %6 to i64
+; CHECK-NEXT: %8 = insertelement <2 x i64> undef, i64 %7, i32 0
+; CHECK-NEXT: %9 = extractelement <4 x i32> %3, i32 1
+; CHECK-NEXT: %10 = sext i32 %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> %8, i64 %10, i32 1
+; CHECK-NEXT: %12 = extractelement <4 x i32> %3, i32 2
+; CHECK-NEXT: %13 = sext i32 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> undef, i64 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <4 x i32> %3, i32 3
+; CHECK-NEXT: %16 = sext i32 %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> %14, i64 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <4 x i32> %4, i32 0
+; CHECK-NEXT: %19 = sext i32 %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> undef, i64 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <4 x i32> %4, i32 1
+; CHECK-NEXT: %22 = sext i32 %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> %20, i64 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <4 x i32> %4, i32 2
+; CHECK-NEXT: %25 = sext i32 %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> undef, i64 %25, i32 0
+; CHECK-NEXT: %27 = extractelement <4 x i32> %4, i32 3
+; CHECK-NEXT: %28 = sext i32 %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> %26, i64 %28, i32 1
+; CHECK-NEXT: store <2 x i64> %17, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %2, align 16
+; CHECK-NEXT: ret <2 x i64> %11
+
+define <8 x i32> @trunc_cast_8xi64_to_8xi32(<8 x i64>) {
+ %2 = trunc <8 x i64> %0 to <8 x i32>
+ ret <8 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @trunc_cast_8xi64_to_8xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %6 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %7 = trunc i64 %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> undef, i32 %7, i32 0
+; CHECK-NEXT: %9 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %10 = trunc i64 %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 1
+; CHECK-NEXT: %12 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %13 = trunc i64 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 2
+; CHECK-NEXT: %15 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %16 = trunc i64 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> %14, i32 %16, i32 3
+; CHECK-NEXT: %18 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %19 = trunc i64 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> undef, i32 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %22 = trunc i64 %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %25 = trunc i64 %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 2
+; CHECK-NEXT: %27 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %28 = trunc i64 %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> %26, i32 %28, i32 3
+; CHECK-NEXT: store <4 x i32> %29, <4 x i32>* %0, align 16
+; CHECK-NEXT: ret <4 x i32> %17
+
+define <8 x i16> @trunc_cast_8xi64_to_8xi16(<8 x i64>) {
+ %2 = trunc <8 x i64> %0 to <8 x i16>
+ ret <8 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_8xi64_to_8xi16(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %5 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %6 = trunc i64 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> undef, i16 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %9 = trunc i64 %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %12 = trunc i64 %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %15 = trunc i64 %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %18 = trunc i64 %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %21 = trunc i64 %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %24 = trunc i64 %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %27 = trunc i64 %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 7
+; CHECK-NEXT: ret <8 x i16> %28
+
+define <8 x i8> @trunc_cast_8xi64_to_8xi8(<8 x i64>) {
+ %2 = trunc <8 x i64> %0 to <8 x i8>
+ ret <8 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_8xi64_to_8xi8(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %5 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %6 = trunc i64 %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> undef, i8 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %9 = trunc i64 %8 to i8
+; CHECK-NEXT: %10 = insertelement <16 x i8> %7, i8 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %12 = trunc i64 %11 to i8
+; CHECK-NEXT: %13 = insertelement <16 x i8> %10, i8 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %15 = trunc i64 %14 to i8
+; CHECK-NEXT: %16 = insertelement <16 x i8> %13, i8 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %18 = trunc i64 %17 to i8
+; CHECK-NEXT: %19 = insertelement <16 x i8> %16, i8 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %21 = trunc i64 %20 to i8
+; CHECK-NEXT: %22 = insertelement <16 x i8> %19, i8 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %24 = trunc i64 %23 to i8
+; CHECK-NEXT: %25 = insertelement <16 x i8> %22, i8 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %27 = trunc i64 %26 to i8
+; CHECK-NEXT: %28 = insertelement <16 x i8> %25, i8 %27, i32 7
+; CHECK-NEXT: ret <16 x i8> %28
+
+define <8 x i16> @trunc_cast_8xi32_to_8xi16(<8 x i32>) {
+ %2 = trunc <8 x i32> %0 to <8 x i16>
+ ret <8 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_8xi32_to_8xi16(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %4 = trunc i32 %3 to i16
+; CHECK-NEXT: %5 = insertelement <8 x i16> undef, i16 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %7 = trunc i32 %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> %5, i16 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x i32> %0, i32 2
+; CHECK-NEXT: %10 = trunc i32 %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <4 x i32> %0, i32 3
+; CHECK-NEXT: %13 = trunc i32 %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: %16 = trunc i32 %15 to i16
+; CHECK-NEXT: %17 = insertelement <8 x i16> %14, i16 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: %19 = trunc i32 %18 to i16
+; CHECK-NEXT: %20 = insertelement <8 x i16> %17, i16 %19, i32 5
+; CHECK-NEXT: %21 = extractelement <4 x i32> %1, i32 2
+; CHECK-NEXT: %22 = trunc i32 %21 to i16
+; CHECK-NEXT: %23 = insertelement <8 x i16> %20, i16 %22, i32 6
+; CHECK-NEXT: %24 = extractelement <4 x i32> %1, i32 3
+; CHECK-NEXT: %25 = trunc i32 %24 to i16
+; CHECK-NEXT: %26 = insertelement <8 x i16> %23, i16 %25, i32 7
+; CHECK-NEXT: ret <8 x i16> %26
+
+define <8 x i8> @trunc_cast_8xi32_to_8xi8(<8 x i32>) {
+ %2 = trunc <8 x i32> %0 to <8 x i8>
+ ret <8 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_8xi32_to_8xi8(<4 x i32>, <4 x i32>)
+; CHECK-NEXT: %3 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %4 = trunc i32 %3 to i8
+; CHECK-NEXT: %5 = insertelement <16 x i8> undef, i8 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %7 = trunc i32 %6 to i8
+; CHECK-NEXT: %8 = insertelement <16 x i8> %5, i8 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <4 x i32> %0, i32 2
+; CHECK-NEXT: %10 = trunc i32 %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> %8, i8 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <4 x i32> %0, i32 3
+; CHECK-NEXT: %13 = trunc i32 %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: %16 = trunc i32 %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: %19 = trunc i32 %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 5
+; CHECK-NEXT: %21 = extractelement <4 x i32> %1, i32 2
+; CHECK-NEXT: %22 = trunc i32 %21 to i8
+; CHECK-NEXT: %23 = insertelement <16 x i8> %20, i8 %22, i32 6
+; CHECK-NEXT: %24 = extractelement <4 x i32> %1, i32 3
+; CHECK-NEXT: %25 = trunc i32 %24 to i8
+; CHECK-NEXT: %26 = insertelement <16 x i8> %23, i8 %25, i32 7
+; CHECK-NEXT: ret <16 x i8> %26
+
+define <8 x i8> @trunc_cast_8xi16_to_8xi8(<8 x i16>) {
+ %2 = trunc <8 x i16> %0 to <8 x i8>
+ ret <8 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_8xi16_to_8xi8(<8 x i16>)
+; CHECK-NEXT: %2 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %3 = trunc i16 %2 to i8
+; CHECK-NEXT: %4 = insertelement <16 x i8> undef, i8 %3, i32 0
+; CHECK-NEXT: %5 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %6 = trunc i16 %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> %4, i8 %6, i32 1
+; CHECK-NEXT: %8 = extractelement <8 x i16> %0, i32 2
+; CHECK-NEXT: %9 = trunc i16 %8 to i8
+; CHECK-NEXT: %10 = insertelement <16 x i8> %7, i8 %9, i32 2
+; CHECK-NEXT: %11 = extractelement <8 x i16> %0, i32 3
+; CHECK-NEXT: %12 = trunc i16 %11 to i8
+; CHECK-NEXT: %13 = insertelement <16 x i8> %10, i8 %12, i32 3
+; CHECK-NEXT: %14 = extractelement <8 x i16> %0, i32 4
+; CHECK-NEXT: %15 = trunc i16 %14 to i8
+; CHECK-NEXT: %16 = insertelement <16 x i8> %13, i8 %15, i32 4
+; CHECK-NEXT: %17 = extractelement <8 x i16> %0, i32 5
+; CHECK-NEXT: %18 = trunc i16 %17 to i8
+; CHECK-NEXT: %19 = insertelement <16 x i8> %16, i8 %18, i32 5
+; CHECK-NEXT: %20 = extractelement <8 x i16> %0, i32 6
+; CHECK-NEXT: %21 = trunc i16 %20 to i8
+; CHECK-NEXT: %22 = insertelement <16 x i8> %19, i8 %21, i32 6
+; CHECK-NEXT: %23 = extractelement <8 x i16> %0, i32 7
+; CHECK-NEXT: %24 = trunc i16 %23 to i8
+; CHECK-NEXT: %25 = insertelement <16 x i8> %22, i8 %24, i32 7
+; CHECK-NEXT: ret <16 x i8> %25
+
+define <12 x i16> @zext_cast_12xi8_to_12xi16(<12 x i8>) {
+ %2 = zext <12 x i8> %0 to <12 x i16>
+ ret <12 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @zext_cast_12xi8_to_12xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = zext i8 %3 to i16
+; CHECK-NEXT: %5 = insertelement <8 x i16> undef, i16 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = zext i8 %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> %5, i16 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = zext i8 %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = zext i8 %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: %16 = zext i8 %15 to i16
+; CHECK-NEXT: %17 = insertelement <8 x i16> %14, i16 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: %19 = zext i8 %18 to i16
+; CHECK-NEXT: %20 = insertelement <8 x i16> %17, i16 %19, i32 5
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: %22 = zext i8 %21 to i16
+; CHECK-NEXT: %23 = insertelement <8 x i16> %20, i16 %22, i32 6
+; CHECK-NEXT: %24 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: %25 = zext i8 %24 to i16
+; CHECK-NEXT: %26 = insertelement <8 x i16> %23, i16 %25, i32 7
+; CHECK-NEXT: %27 = extractelement <16 x i8> %1, i32 8
+; CHECK-NEXT: %28 = zext i8 %27 to i16
+; CHECK-NEXT: %29 = insertelement <8 x i16> undef, i16 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <16 x i8> %1, i32 9
+; CHECK-NEXT: %31 = zext i8 %30 to i16
+; CHECK-NEXT: %32 = insertelement <8 x i16> %29, i16 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <16 x i8> %1, i32 10
+; CHECK-NEXT: %34 = zext i8 %33 to i16
+; CHECK-NEXT: %35 = insertelement <8 x i16> %32, i16 %34, i32 2
+; CHECK-NEXT: %36 = extractelement <16 x i8> %1, i32 11
+; CHECK-NEXT: %37 = zext i8 %36 to i16
+; CHECK-NEXT: %38 = insertelement <8 x i16> %35, i16 %37, i32 3
+; CHECK-NEXT: store <8 x i16> %38, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %26
+
+define <12 x i32> @zext_cast_12xi8_to_12xi32(<12 x i8>) {
+ %2 = zext <12 x i8> %0 to <12 x i32>
+ ret <12 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_12xi8_to_12xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %4 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: %5 = zext i8 %4 to i32
+; CHECK-NEXT: %6 = insertelement <4 x i32> undef, i32 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: %8 = zext i8 %7 to i32
+; CHECK-NEXT: %9 = insertelement <4 x i32> %6, i32 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: %11 = zext i8 %10 to i32
+; CHECK-NEXT: %12 = insertelement <4 x i32> %9, i32 %11, i32 2
+; CHECK-NEXT: %13 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: %14 = zext i8 %13 to i32
+; CHECK-NEXT: %15 = insertelement <4 x i32> %12, i32 %14, i32 3
+; CHECK-NEXT: %16 = extractelement <16 x i8> %2, i32 4
+; CHECK-NEXT: %17 = zext i8 %16 to i32
+; CHECK-NEXT: %18 = insertelement <4 x i32> undef, i32 %17, i32 0
+; CHECK-NEXT: %19 = extractelement <16 x i8> %2, i32 5
+; CHECK-NEXT: %20 = zext i8 %19 to i32
+; CHECK-NEXT: %21 = insertelement <4 x i32> %18, i32 %20, i32 1
+; CHECK-NEXT: %22 = extractelement <16 x i8> %2, i32 6
+; CHECK-NEXT: %23 = zext i8 %22 to i32
+; CHECK-NEXT: %24 = insertelement <4 x i32> %21, i32 %23, i32 2
+; CHECK-NEXT: %25 = extractelement <16 x i8> %2, i32 7
+; CHECK-NEXT: %26 = zext i8 %25 to i32
+; CHECK-NEXT: %27 = insertelement <4 x i32> %24, i32 %26, i32 3
+; CHECK-NEXT: %28 = extractelement <16 x i8> %2, i32 8
+; CHECK-NEXT: %29 = zext i8 %28 to i32
+; CHECK-NEXT: %30 = insertelement <4 x i32> undef, i32 %29, i32 0
+; CHECK-NEXT: %31 = extractelement <16 x i8> %2, i32 9
+; CHECK-NEXT: %32 = zext i8 %31 to i32
+; CHECK-NEXT: %33 = insertelement <4 x i32> %30, i32 %32, i32 1
+; CHECK-NEXT: %34 = extractelement <16 x i8> %2, i32 10
+; CHECK-NEXT: %35 = zext i8 %34 to i32
+; CHECK-NEXT: %36 = insertelement <4 x i32> %33, i32 %35, i32 2
+; CHECK-NEXT: %37 = extractelement <16 x i8> %2, i32 11
+; CHECK-NEXT: %38 = zext i8 %37 to i32
+; CHECK-NEXT: %39 = insertelement <4 x i32> %36, i32 %38, i32 3
+; CHECK-NEXT: store <4 x i32> %27, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %39, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <12 x i64> @zext_cast_12xi8_to_12xi64(<12 x i8>) {
+ %2 = zext <12 x i8> %0 to <12 x i64>
+ ret <12 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_12xi8_to_12xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %7 = extractelement <16 x i8> %5, i32 0
+; CHECK-NEXT: %8 = zext i8 %7 to i64
+; CHECK-NEXT: %9 = insertelement <2 x i64> undef, i64 %8, i32 0
+; CHECK-NEXT: %10 = extractelement <16 x i8> %5, i32 1
+; CHECK-NEXT: %11 = zext i8 %10 to i64
+; CHECK-NEXT: %12 = insertelement <2 x i64> %9, i64 %11, i32 1
+; CHECK-NEXT: %13 = extractelement <16 x i8> %5, i32 2
+; CHECK-NEXT: %14 = zext i8 %13 to i64
+; CHECK-NEXT: %15 = insertelement <2 x i64> undef, i64 %14, i32 0
+; CHECK-NEXT: %16 = extractelement <16 x i8> %5, i32 3
+; CHECK-NEXT: %17 = zext i8 %16 to i64
+; CHECK-NEXT: %18 = insertelement <2 x i64> %15, i64 %17, i32 1
+; CHECK-NEXT: %19 = extractelement <16 x i8> %5, i32 4
+; CHECK-NEXT: %20 = zext i8 %19 to i64
+; CHECK-NEXT: %21 = insertelement <2 x i64> undef, i64 %20, i32 0
+; CHECK-NEXT: %22 = extractelement <16 x i8> %5, i32 5
+; CHECK-NEXT: %23 = zext i8 %22 to i64
+; CHECK-NEXT: %24 = insertelement <2 x i64> %21, i64 %23, i32 1
+; CHECK-NEXT: %25 = extractelement <16 x i8> %5, i32 6
+; CHECK-NEXT: %26 = zext i8 %25 to i64
+; CHECK-NEXT: %27 = insertelement <2 x i64> undef, i64 %26, i32 0
+; CHECK-NEXT: %28 = extractelement <16 x i8> %5, i32 7
+; CHECK-NEXT: %29 = zext i8 %28 to i64
+; CHECK-NEXT: %30 = insertelement <2 x i64> %27, i64 %29, i32 1
+; CHECK-NEXT: %31 = extractelement <16 x i8> %5, i32 8
+; CHECK-NEXT: %32 = zext i8 %31 to i64
+; CHECK-NEXT: %33 = insertelement <2 x i64> undef, i64 %32, i32 0
+; CHECK-NEXT: %34 = extractelement <16 x i8> %5, i32 9
+; CHECK-NEXT: %35 = zext i8 %34 to i64
+; CHECK-NEXT: %36 = insertelement <2 x i64> %33, i64 %35, i32 1
+; CHECK-NEXT: %37 = extractelement <16 x i8> %5, i32 10
+; CHECK-NEXT: %38 = zext i8 %37 to i64
+; CHECK-NEXT: %39 = insertelement <2 x i64> undef, i64 %38, i32 0
+; CHECK-NEXT: %40 = extractelement <16 x i8> %5, i32 11
+; CHECK-NEXT: %41 = zext i8 %40 to i64
+; CHECK-NEXT: %42 = insertelement <2 x i64> %39, i64 %41, i32 1
+; CHECK-NEXT: store <2 x i64> %18, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %24, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %30, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %36, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %42, <2 x i64>* %4, align 16
+; CHECK-NEXT: ret <2 x i64> %12
+
+define <12 x i32> @zext_cast_12xi16_to_12xi32(<12 x i16>) {
+ %2 = zext <12 x i16> %0 to <12 x i32>
+ ret <12 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_12xi16_to_12xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %5 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: %6 = zext i16 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> undef, i32 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: %9 = zext i16 %8 to i32
+; CHECK-NEXT: %10 = insertelement <4 x i32> %7, i32 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: %12 = zext i16 %11 to i32
+; CHECK-NEXT: %13 = insertelement <4 x i32> %10, i32 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: %15 = zext i16 %14 to i32
+; CHECK-NEXT: %16 = insertelement <4 x i32> %13, i32 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <8 x i16> %2, i32 4
+; CHECK-NEXT: %18 = zext i16 %17 to i32
+; CHECK-NEXT: %19 = insertelement <4 x i32> undef, i32 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <8 x i16> %2, i32 5
+; CHECK-NEXT: %21 = zext i16 %20 to i32
+; CHECK-NEXT: %22 = insertelement <4 x i32> %19, i32 %21, i32 1
+; CHECK-NEXT: %23 = extractelement <8 x i16> %2, i32 6
+; CHECK-NEXT: %24 = zext i16 %23 to i32
+; CHECK-NEXT: %25 = insertelement <4 x i32> %22, i32 %24, i32 2
+; CHECK-NEXT: %26 = extractelement <8 x i16> %2, i32 7
+; CHECK-NEXT: %27 = zext i16 %26 to i32
+; CHECK-NEXT: %28 = insertelement <4 x i32> %25, i32 %27, i32 3
+; CHECK-NEXT: %29 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: %30 = zext i16 %29 to i32
+; CHECK-NEXT: %31 = insertelement <4 x i32> undef, i32 %30, i32 0
+; CHECK-NEXT: %32 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: %33 = zext i16 %32 to i32
+; CHECK-NEXT: %34 = insertelement <4 x i32> %31, i32 %33, i32 1
+; CHECK-NEXT: %35 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: %36 = zext i16 %35 to i32
+; CHECK-NEXT: %37 = insertelement <4 x i32> %34, i32 %36, i32 2
+; CHECK-NEXT: %38 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: %39 = zext i16 %38 to i32
+; CHECK-NEXT: %40 = insertelement <4 x i32> %37, i32 %39, i32 3
+; CHECK-NEXT: store <4 x i32> %28, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %40, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
+define <12 x i64> @zext_cast_12xi16_to_12xi64(<12 x i16>) {
+ %2 = zext <12 x i16> %0 to <12 x i64>
+ ret <12 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_12xi16_to_12xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %8 = extractelement <8 x i16> %5, i32 0
+; CHECK-NEXT: %9 = zext i16 %8 to i64
+; CHECK-NEXT: %10 = insertelement <2 x i64> undef, i64 %9, i32 0
+; CHECK-NEXT: %11 = extractelement <8 x i16> %5, i32 1
+; CHECK-NEXT: %12 = zext i16 %11 to i64
+; CHECK-NEXT: %13 = insertelement <2 x i64> %10, i64 %12, i32 1
+; CHECK-NEXT: %14 = extractelement <8 x i16> %5, i32 2
+; CHECK-NEXT: %15 = zext i16 %14 to i64
+; CHECK-NEXT: %16 = insertelement <2 x i64> undef, i64 %15, i32 0
+; CHECK-NEXT: %17 = extractelement <8 x i16> %5, i32 3
+; CHECK-NEXT: %18 = zext i16 %17 to i64
+; CHECK-NEXT: %19 = insertelement <2 x i64> %16, i64 %18, i32 1
+; CHECK-NEXT: %20 = extractelement <8 x i16> %5, i32 4
+; CHECK-NEXT: %21 = zext i16 %20 to i64
+; CHECK-NEXT: %22 = insertelement <2 x i64> undef, i64 %21, i32 0
+; CHECK-NEXT: %23 = extractelement <8 x i16> %5, i32 5
+; CHECK-NEXT: %24 = zext i16 %23 to i64
+; CHECK-NEXT: %25 = insertelement <2 x i64> %22, i64 %24, i32 1
+; CHECK-NEXT: %26 = extractelement <8 x i16> %5, i32 6
+; CHECK-NEXT: %27 = zext i16 %26 to i64
+; CHECK-NEXT: %28 = insertelement <2 x i64> undef, i64 %27, i32 0
+; CHECK-NEXT: %29 = extractelement <8 x i16> %5, i32 7
+; CHECK-NEXT: %30 = zext i16 %29 to i64
+; CHECK-NEXT: %31 = insertelement <2 x i64> %28, i64 %30, i32 1
+; CHECK-NEXT: %32 = extractelement <8 x i16> %6, i32 0
+; CHECK-NEXT: %33 = zext i16 %32 to i64
+; CHECK-NEXT: %34 = insertelement <2 x i64> undef, i64 %33, i32 0
+; CHECK-NEXT: %35 = extractelement <8 x i16> %6, i32 1
+; CHECK-NEXT: %36 = zext i16 %35 to i64
+; CHECK-NEXT: %37 = insertelement <2 x i64> %34, i64 %36, i32 1
+; CHECK-NEXT: %38 = extractelement <8 x i16> %6, i32 2
+; CHECK-NEXT: %39 = zext i16 %38 to i64
+; CHECK-NEXT: %40 = insertelement <2 x i64> undef, i64 %39, i32 0
+; CHECK-NEXT: %41 = extractelement <8 x i16> %6, i32 3
+; CHECK-NEXT: %42 = zext i16 %41 to i64
+; CHECK-NEXT: %43 = insertelement <2 x i64> %40, i64 %42, i32 1
+; CHECK-NEXT: store <2 x i64> %19, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %25, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %31, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %37, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %43, <2 x i64>* %4, align 16
+; CHECK-NEXT: ret <2 x i64> %13
+
+define <12 x i64> @zext_cast_12xi32_to_12xi64(<12 x i32>) {
+ %2 = zext <12 x i32> %0 to <12 x i64>
+ ret <12 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_12xi32_to_12xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = extractelement <4 x i32> %5, i32 0
+; CHECK-NEXT: %10 = zext i32 %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <4 x i32> %5, i32 1
+; CHECK-NEXT: %13 = zext i32 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: %15 = extractelement <4 x i32> %5, i32 2
+; CHECK-NEXT: %16 = zext i32 %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> undef, i64 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <4 x i32> %5, i32 3
+; CHECK-NEXT: %19 = zext i32 %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> %17, i64 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <4 x i32> %6, i32 0
+; CHECK-NEXT: %22 = zext i32 %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> undef, i64 %22, i32 0
+; CHECK-NEXT: %24 = extractelement <4 x i32> %6, i32 1
+; CHECK-NEXT: %25 = zext i32 %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> %23, i64 %25, i32 1
+; CHECK-NEXT: %27 = extractelement <4 x i32> %6, i32 2
+; CHECK-NEXT: %28 = zext i32 %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> undef, i64 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <4 x i32> %6, i32 3
+; CHECK-NEXT: %31 = zext i32 %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> %29, i64 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <4 x i32> %7, i32 0
+; CHECK-NEXT: %34 = zext i32 %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> undef, i64 %34, i32 0
+; CHECK-NEXT: %36 = extractelement <4 x i32> %7, i32 1
+; CHECK-NEXT: %37 = zext i32 %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> %35, i64 %37, i32 1
+; CHECK-NEXT: %39 = extractelement <4 x i32> %7, i32 2
+; CHECK-NEXT: %40 = zext i32 %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> undef, i64 %40, i32 0
+; CHECK-NEXT: %42 = extractelement <4 x i32> %7, i32 3
+; CHECK-NEXT: %43 = zext i32 %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> %41, i64 %43, i32 1
+; CHECK-NEXT: store <2 x i64> %20, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %32, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %38, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %44, <2 x i64>* %4, align 16
+; CHECK-NEXT: ret <2 x i64> %14
+
+define <12 x i16> @sext_cast_12xi8_to_12xi16(<12 x i8>) {
+ %2 = sext <12 x i8> %0 to <12 x i16>
+ ret <12 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @sext_cast_12xi8_to_12xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = sext i8 %3 to i16
+; CHECK-NEXT: %5 = insertelement <8 x i16> undef, i16 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = sext i8 %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> %5, i16 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = sext i8 %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = sext i8 %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: %16 = sext i8 %15 to i16
+; CHECK-NEXT: %17 = insertelement <8 x i16> %14, i16 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: %19 = sext i8 %18 to i16
+; CHECK-NEXT: %20 = insertelement <8 x i16> %17, i16 %19, i32 5
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: %22 = sext i8 %21 to i16
+; CHECK-NEXT: %23 = insertelement <8 x i16> %20, i16 %22, i32 6
+; CHECK-NEXT: %24 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: %25 = sext i8 %24 to i16
+; CHECK-NEXT: %26 = insertelement <8 x i16> %23, i16 %25, i32 7
+; CHECK-NEXT: %27 = extractelement <16 x i8> %1, i32 8
+; CHECK-NEXT: %28 = sext i8 %27 to i16
+; CHECK-NEXT: %29 = insertelement <8 x i16> undef, i16 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <16 x i8> %1, i32 9
+; CHECK-NEXT: %31 = sext i8 %30 to i16
+; CHECK-NEXT: %32 = insertelement <8 x i16> %29, i16 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <16 x i8> %1, i32 10
+; CHECK-NEXT: %34 = sext i8 %33 to i16
+; CHECK-NEXT: %35 = insertelement <8 x i16> %32, i16 %34, i32 2
+; CHECK-NEXT: %36 = extractelement <16 x i8> %1, i32 11
+; CHECK-NEXT: %37 = sext i8 %36 to i16
+; CHECK-NEXT: %38 = insertelement <8 x i16> %35, i16 %37, i32 3
+; CHECK-NEXT: store <8 x i16> %38, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %26
+
+define <12 x i32> @sext_cast_12xi8_to_12xi32(<12 x i8>) {
+ %2 = sext <12 x i8> %0 to <12 x i32>
+ ret <12 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_12xi8_to_12xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %4 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: %5 = sext i8 %4 to i32
+; CHECK-NEXT: %6 = insertelement <4 x i32> undef, i32 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: %8 = sext i8 %7 to i32
+; CHECK-NEXT: %9 = insertelement <4 x i32> %6, i32 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: %11 = sext i8 %10 to i32
+; CHECK-NEXT: %12 = insertelement <4 x i32> %9, i32 %11, i32 2
+; CHECK-NEXT: %13 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: %14 = sext i8 %13 to i32
+; CHECK-NEXT: %15 = insertelement <4 x i32> %12, i32 %14, i32 3
+; CHECK-NEXT: %16 = extractelement <16 x i8> %2, i32 4
+; CHECK-NEXT: %17 = sext i8 %16 to i32
+; CHECK-NEXT: %18 = insertelement <4 x i32> undef, i32 %17, i32 0
+; CHECK-NEXT: %19 = extractelement <16 x i8> %2, i32 5
+; CHECK-NEXT: %20 = sext i8 %19 to i32
+; CHECK-NEXT: %21 = insertelement <4 x i32> %18, i32 %20, i32 1
+; CHECK-NEXT: %22 = extractelement <16 x i8> %2, i32 6
+; CHECK-NEXT: %23 = sext i8 %22 to i32
+; CHECK-NEXT: %24 = insertelement <4 x i32> %21, i32 %23, i32 2
+; CHECK-NEXT: %25 = extractelement <16 x i8> %2, i32 7
+; CHECK-NEXT: %26 = sext i8 %25 to i32
+; CHECK-NEXT: %27 = insertelement <4 x i32> %24, i32 %26, i32 3
+; CHECK-NEXT: %28 = extractelement <16 x i8> %2, i32 8
+; CHECK-NEXT: %29 = sext i8 %28 to i32
+; CHECK-NEXT: %30 = insertelement <4 x i32> undef, i32 %29, i32 0
+; CHECK-NEXT: %31 = extractelement <16 x i8> %2, i32 9
+; CHECK-NEXT: %32 = sext i8 %31 to i32
+; CHECK-NEXT: %33 = insertelement <4 x i32> %30, i32 %32, i32 1
+; CHECK-NEXT: %34 = extractelement <16 x i8> %2, i32 10
+; CHECK-NEXT: %35 = sext i8 %34 to i32
+; CHECK-NEXT: %36 = insertelement <4 x i32> %33, i32 %35, i32 2
+; CHECK-NEXT: %37 = extractelement <16 x i8> %2, i32 11
+; CHECK-NEXT: %38 = sext i8 %37 to i32
+; CHECK-NEXT: %39 = insertelement <4 x i32> %36, i32 %38, i32 3
+; CHECK-NEXT: store <4 x i32> %27, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %39, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %15
+
+define <12 x i64> @sext_cast_12xi8_to_12xi64(<12 x i8>) {
+ %2 = sext <12 x i8> %0 to <12 x i64>
+ ret <12 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_12xi8_to_12xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %7 = extractelement <16 x i8> %5, i32 0
+; CHECK-NEXT: %8 = sext i8 %7 to i64
+; CHECK-NEXT: %9 = insertelement <2 x i64> undef, i64 %8, i32 0
+; CHECK-NEXT: %10 = extractelement <16 x i8> %5, i32 1
+; CHECK-NEXT: %11 = sext i8 %10 to i64
+; CHECK-NEXT: %12 = insertelement <2 x i64> %9, i64 %11, i32 1
+; CHECK-NEXT: %13 = extractelement <16 x i8> %5, i32 2
+; CHECK-NEXT: %14 = sext i8 %13 to i64
+; CHECK-NEXT: %15 = insertelement <2 x i64> undef, i64 %14, i32 0
+; CHECK-NEXT: %16 = extractelement <16 x i8> %5, i32 3
+; CHECK-NEXT: %17 = sext i8 %16 to i64
+; CHECK-NEXT: %18 = insertelement <2 x i64> %15, i64 %17, i32 1
+; CHECK-NEXT: %19 = extractelement <16 x i8> %5, i32 4
+; CHECK-NEXT: %20 = sext i8 %19 to i64
+; CHECK-NEXT: %21 = insertelement <2 x i64> undef, i64 %20, i32 0
+; CHECK-NEXT: %22 = extractelement <16 x i8> %5, i32 5
+; CHECK-NEXT: %23 = sext i8 %22 to i64
+; CHECK-NEXT: %24 = insertelement <2 x i64> %21, i64 %23, i32 1
+; CHECK-NEXT: %25 = extractelement <16 x i8> %5, i32 6
+; CHECK-NEXT: %26 = sext i8 %25 to i64
+; CHECK-NEXT: %27 = insertelement <2 x i64> undef, i64 %26, i32 0
+; CHECK-NEXT: %28 = extractelement <16 x i8> %5, i32 7
+; CHECK-NEXT: %29 = sext i8 %28 to i64
+; CHECK-NEXT: %30 = insertelement <2 x i64> %27, i64 %29, i32 1
+; CHECK-NEXT: %31 = extractelement <16 x i8> %5, i32 8
+; CHECK-NEXT: %32 = sext i8 %31 to i64
+; CHECK-NEXT: %33 = insertelement <2 x i64> undef, i64 %32, i32 0
+; CHECK-NEXT: %34 = extractelement <16 x i8> %5, i32 9
+; CHECK-NEXT: %35 = sext i8 %34 to i64
+; CHECK-NEXT: %36 = insertelement <2 x i64> %33, i64 %35, i32 1
+; CHECK-NEXT: %37 = extractelement <16 x i8> %5, i32 10
+; CHECK-NEXT: %38 = sext i8 %37 to i64
+; CHECK-NEXT: %39 = insertelement <2 x i64> undef, i64 %38, i32 0
+; CHECK-NEXT: %40 = extractelement <16 x i8> %5, i32 11
+; CHECK-NEXT: %41 = sext i8 %40 to i64
+; CHECK-NEXT: %42 = insertelement <2 x i64> %39, i64 %41, i32 1
+; CHECK-NEXT: store <2 x i64> %18, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %24, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %30, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %36, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %42, <2 x i64>* %4, align 16
+; CHECK-NEXT: ret <2 x i64> %12
+
+define <12 x i32> @sext_cast_12xi16_to_12xi32(<12 x i16>) {
+ %2 = sext <12 x i16> %0 to <12 x i32>
+ ret <12 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_12xi16_to_12xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %5 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: %6 = sext i16 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> undef, i32 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: %9 = sext i16 %8 to i32
+; CHECK-NEXT: %10 = insertelement <4 x i32> %7, i32 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: %12 = sext i16 %11 to i32
+; CHECK-NEXT: %13 = insertelement <4 x i32> %10, i32 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: %15 = sext i16 %14 to i32
+; CHECK-NEXT: %16 = insertelement <4 x i32> %13, i32 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <8 x i16> %2, i32 4
+; CHECK-NEXT: %18 = sext i16 %17 to i32
+; CHECK-NEXT: %19 = insertelement <4 x i32> undef, i32 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <8 x i16> %2, i32 5
+; CHECK-NEXT: %21 = sext i16 %20 to i32
+; CHECK-NEXT: %22 = insertelement <4 x i32> %19, i32 %21, i32 1
+; CHECK-NEXT: %23 = extractelement <8 x i16> %2, i32 6
+; CHECK-NEXT: %24 = sext i16 %23 to i32
+; CHECK-NEXT: %25 = insertelement <4 x i32> %22, i32 %24, i32 2
+; CHECK-NEXT: %26 = extractelement <8 x i16> %2, i32 7
+; CHECK-NEXT: %27 = sext i16 %26 to i32
+; CHECK-NEXT: %28 = insertelement <4 x i32> %25, i32 %27, i32 3
+; CHECK-NEXT: %29 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: %30 = sext i16 %29 to i32
+; CHECK-NEXT: %31 = insertelement <4 x i32> undef, i32 %30, i32 0
+; CHECK-NEXT: %32 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: %33 = sext i16 %32 to i32
+; CHECK-NEXT: %34 = insertelement <4 x i32> %31, i32 %33, i32 1
+; CHECK-NEXT: %35 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: %36 = sext i16 %35 to i32
+; CHECK-NEXT: %37 = insertelement <4 x i32> %34, i32 %36, i32 2
+; CHECK-NEXT: %38 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: %39 = sext i16 %38 to i32
+; CHECK-NEXT: %40 = insertelement <4 x i32> %37, i32 %39, i32 3
+; CHECK-NEXT: store <4 x i32> %28, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %40, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
+define <12 x i64> @sext_cast_12xi16_to_12xi64(<12 x i16>) {
+ %2 = sext <12 x i16> %0 to <12 x i64>
+ ret <12 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_12xi16_to_12xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %8 = extractelement <8 x i16> %5, i32 0
+; CHECK-NEXT: %9 = sext i16 %8 to i64
+; CHECK-NEXT: %10 = insertelement <2 x i64> undef, i64 %9, i32 0
+; CHECK-NEXT: %11 = extractelement <8 x i16> %5, i32 1
+; CHECK-NEXT: %12 = sext i16 %11 to i64
+; CHECK-NEXT: %13 = insertelement <2 x i64> %10, i64 %12, i32 1
+; CHECK-NEXT: %14 = extractelement <8 x i16> %5, i32 2
+; CHECK-NEXT: %15 = sext i16 %14 to i64
+; CHECK-NEXT: %16 = insertelement <2 x i64> undef, i64 %15, i32 0
+; CHECK-NEXT: %17 = extractelement <8 x i16> %5, i32 3
+; CHECK-NEXT: %18 = sext i16 %17 to i64
+; CHECK-NEXT: %19 = insertelement <2 x i64> %16, i64 %18, i32 1
+; CHECK-NEXT: %20 = extractelement <8 x i16> %5, i32 4
+; CHECK-NEXT: %21 = sext i16 %20 to i64
+; CHECK-NEXT: %22 = insertelement <2 x i64> undef, i64 %21, i32 0
+; CHECK-NEXT: %23 = extractelement <8 x i16> %5, i32 5
+; CHECK-NEXT: %24 = sext i16 %23 to i64
+; CHECK-NEXT: %25 = insertelement <2 x i64> %22, i64 %24, i32 1
+; CHECK-NEXT: %26 = extractelement <8 x i16> %5, i32 6
+; CHECK-NEXT: %27 = sext i16 %26 to i64
+; CHECK-NEXT: %28 = insertelement <2 x i64> undef, i64 %27, i32 0
+; CHECK-NEXT: %29 = extractelement <8 x i16> %5, i32 7
+; CHECK-NEXT: %30 = sext i16 %29 to i64
+; CHECK-NEXT: %31 = insertelement <2 x i64> %28, i64 %30, i32 1
+; CHECK-NEXT: %32 = extractelement <8 x i16> %6, i32 0
+; CHECK-NEXT: %33 = sext i16 %32 to i64
+; CHECK-NEXT: %34 = insertelement <2 x i64> undef, i64 %33, i32 0
+; CHECK-NEXT: %35 = extractelement <8 x i16> %6, i32 1
+; CHECK-NEXT: %36 = sext i16 %35 to i64
+; CHECK-NEXT: %37 = insertelement <2 x i64> %34, i64 %36, i32 1
+; CHECK-NEXT: %38 = extractelement <8 x i16> %6, i32 2
+; CHECK-NEXT: %39 = sext i16 %38 to i64
+; CHECK-NEXT: %40 = insertelement <2 x i64> undef, i64 %39, i32 0
+; CHECK-NEXT: %41 = extractelement <8 x i16> %6, i32 3
+; CHECK-NEXT: %42 = sext i16 %41 to i64
+; CHECK-NEXT: %43 = insertelement <2 x i64> %40, i64 %42, i32 1
+; CHECK-NEXT: store <2 x i64> %19, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %25, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %31, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %37, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %43, <2 x i64>* %4, align 16
+; CHECK-NEXT: ret <2 x i64> %13
+
+define <12 x i64> @sext_cast_12xi32_to_12xi64(<12 x i32>) {
+ %2 = sext <12 x i32> %0 to <12 x i64>
+ ret <12 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_12xi32_to_12xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %9 = extractelement <4 x i32> %5, i32 0
+; CHECK-NEXT: %10 = sext i32 %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <4 x i32> %5, i32 1
+; CHECK-NEXT: %13 = sext i32 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: %15 = extractelement <4 x i32> %5, i32 2
+; CHECK-NEXT: %16 = sext i32 %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> undef, i64 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <4 x i32> %5, i32 3
+; CHECK-NEXT: %19 = sext i32 %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> %17, i64 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <4 x i32> %6, i32 0
+; CHECK-NEXT: %22 = sext i32 %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> undef, i64 %22, i32 0
+; CHECK-NEXT: %24 = extractelement <4 x i32> %6, i32 1
+; CHECK-NEXT: %25 = sext i32 %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> %23, i64 %25, i32 1
+; CHECK-NEXT: %27 = extractelement <4 x i32> %6, i32 2
+; CHECK-NEXT: %28 = sext i32 %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> undef, i64 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <4 x i32> %6, i32 3
+; CHECK-NEXT: %31 = sext i32 %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> %29, i64 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <4 x i32> %7, i32 0
+; CHECK-NEXT: %34 = sext i32 %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> undef, i64 %34, i32 0
+; CHECK-NEXT: %36 = extractelement <4 x i32> %7, i32 1
+; CHECK-NEXT: %37 = sext i32 %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> %35, i64 %37, i32 1
+; CHECK-NEXT: %39 = extractelement <4 x i32> %7, i32 2
+; CHECK-NEXT: %40 = sext i32 %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> undef, i64 %40, i32 0
+; CHECK-NEXT: %42 = extractelement <4 x i32> %7, i32 3
+; CHECK-NEXT: %43 = sext i32 %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> %41, i64 %43, i32 1
+; CHECK-NEXT: store <2 x i64> %20, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %32, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %38, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %44, <2 x i64>* %4, align 16
+; CHECK-NEXT: ret <2 x i64> %14
+
+define <12 x i32> @trunc_cast_12xi64_to_12xi32(<12 x i64>) {
+ %2 = trunc <12 x i64> %0 to <12 x i32>
+ ret <12 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @trunc_cast_12xi64_to_12xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %9 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %10 = trunc i64 %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> undef, i32 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %13 = trunc i64 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 1
+; CHECK-NEXT: %15 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %16 = trunc i64 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> %14, i32 %16, i32 2
+; CHECK-NEXT: %18 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %19 = trunc i64 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 3
+; CHECK-NEXT: %21 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %22 = trunc i64 %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> undef, i32 %22, i32 0
+; CHECK-NEXT: %24 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %25 = trunc i64 %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 1
+; CHECK-NEXT: %27 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %28 = trunc i64 %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> %26, i32 %28, i32 2
+; CHECK-NEXT: %30 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %31 = trunc i64 %30 to i32
+; CHECK-NEXT: %32 = insertelement <4 x i32> %29, i32 %31, i32 3
+; CHECK-NEXT: %33 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %34 = trunc i64 %33 to i32
+; CHECK-NEXT: %35 = insertelement <4 x i32> undef, i32 %34, i32 0
+; CHECK-NEXT: %36 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %37 = trunc i64 %36 to i32
+; CHECK-NEXT: %38 = insertelement <4 x i32> %35, i32 %37, i32 1
+; CHECK-NEXT: %39 = extractelement <2 x i64> %7, i32 0
+; CHECK-NEXT: %40 = trunc i64 %39 to i32
+; CHECK-NEXT: %41 = insertelement <4 x i32> %38, i32 %40, i32 2
+; CHECK-NEXT: %42 = extractelement <2 x i64> %7, i32 1
+; CHECK-NEXT: %43 = trunc i64 %42 to i32
+; CHECK-NEXT: %44 = insertelement <4 x i32> %41, i32 %43, i32 3
+; CHECK-NEXT: store <4 x i32> %32, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %44, <4 x i32>* %1, align 16
+; CHECK-NEXT: ret <4 x i32> %20
+
+define <12 x i16> @trunc_cast_12xi64_to_12xi16(<12 x i64>) {
+ %2 = trunc <12 x i64> %0 to <12 x i16>
+ ret <12 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_12xi64_to_12xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %8 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %9 = trunc i64 %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> undef, i16 %9, i32 0
+; CHECK-NEXT: %11 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %12 = trunc i64 %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 1
+; CHECK-NEXT: %14 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %15 = trunc i64 %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 2
+; CHECK-NEXT: %17 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %18 = trunc i64 %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 3
+; CHECK-NEXT: %20 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %21 = trunc i64 %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 4
+; CHECK-NEXT: %23 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %24 = trunc i64 %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 5
+; CHECK-NEXT: %26 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %27 = trunc i64 %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 6
+; CHECK-NEXT: %29 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %30 = trunc i64 %29 to i16
+; CHECK-NEXT: %31 = insertelement <8 x i16> %28, i16 %30, i32 7
+; CHECK-NEXT: %32 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %33 = trunc i64 %32 to i16
+; CHECK-NEXT: %34 = insertelement <8 x i16> undef, i16 %33, i32 0
+; CHECK-NEXT: %35 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %36 = trunc i64 %35 to i16
+; CHECK-NEXT: %37 = insertelement <8 x i16> %34, i16 %36, i32 1
+; CHECK-NEXT: %38 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %39 = trunc i64 %38 to i16
+; CHECK-NEXT: %40 = insertelement <8 x i16> %37, i16 %39, i32 2
+; CHECK-NEXT: %41 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %42 = trunc i64 %41 to i16
+; CHECK-NEXT: %43 = insertelement <8 x i16> %40, i16 %42, i32 3
+; CHECK-NEXT: store <8 x i16> %43, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %31
+
+define <12 x i8> @trunc_cast_12xi64_to_12xi8(<12 x i64>) {
+ %2 = trunc <12 x i64> %0 to <12 x i8>
+ ret <12 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_12xi64_to_12xi8(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %7 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %8 = trunc i64 %7 to i8
+; CHECK-NEXT: %9 = insertelement <16 x i8> undef, i8 %8, i32 0
+; CHECK-NEXT: %10 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %11 = trunc i64 %10 to i8
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 1
+; CHECK-NEXT: %13 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %14 = trunc i64 %13 to i8
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 2
+; CHECK-NEXT: %16 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %17 = trunc i64 %16 to i8
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 3
+; CHECK-NEXT: %19 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %20 = trunc i64 %19 to i8
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 4
+; CHECK-NEXT: %22 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %23 = trunc i64 %22 to i8
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 5
+; CHECK-NEXT: %25 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %26 = trunc i64 %25 to i8
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 6
+; CHECK-NEXT: %28 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %29 = trunc i64 %28 to i8
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 7
+; CHECK-NEXT: %31 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %32 = trunc i64 %31 to i8
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 8
+; CHECK-NEXT: %34 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %35 = trunc i64 %34 to i8
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 9
+; CHECK-NEXT: %37 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %38 = trunc i64 %37 to i8
+; CHECK-NEXT: %39 = insertelement <16 x i8> %36, i8 %38, i32 10
+; CHECK-NEXT: %40 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %41 = trunc i64 %40 to i8
+; CHECK-NEXT: %42 = insertelement <16 x i8> %39, i8 %41, i32 11
+; CHECK-NEXT: ret <16 x i8> %42
+
+define <12 x i16> @trunc_cast_12xi32_to_12xi16(<12 x i32>) {
+ %2 = trunc <12 x i32> %0 to <12 x i16>
+ ret <12 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_12xi32_to_12xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: %6 = trunc i32 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> undef, i16 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: %9 = trunc i32 %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <4 x i32> %1, i32 2
+; CHECK-NEXT: %12 = trunc i32 %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <4 x i32> %1, i32 3
+; CHECK-NEXT: %15 = trunc i32 %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: %18 = trunc i32 %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: %21 = trunc i32 %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <4 x i32> %2, i32 2
+; CHECK-NEXT: %24 = trunc i32 %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <4 x i32> %2, i32 3
+; CHECK-NEXT: %27 = trunc i32 %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 7
+; CHECK-NEXT: %29 = extractelement <4 x i32> %3, i32 0
+; CHECK-NEXT: %30 = trunc i32 %29 to i16
+; CHECK-NEXT: %31 = insertelement <8 x i16> undef, i16 %30, i32 0
+; CHECK-NEXT: %32 = extractelement <4 x i32> %3, i32 1
+; CHECK-NEXT: %33 = trunc i32 %32 to i16
+; CHECK-NEXT: %34 = insertelement <8 x i16> %31, i16 %33, i32 1
+; CHECK-NEXT: %35 = extractelement <4 x i32> %3, i32 2
+; CHECK-NEXT: %36 = trunc i32 %35 to i16
+; CHECK-NEXT: %37 = insertelement <8 x i16> %34, i16 %36, i32 2
+; CHECK-NEXT: %38 = extractelement <4 x i32> %3, i32 3
+; CHECK-NEXT: %39 = trunc i32 %38 to i16
+; CHECK-NEXT: %40 = insertelement <8 x i16> %37, i16 %39, i32 3
+; CHECK-NEXT: store <8 x i16> %40, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %28
+
+define <12 x i8> @trunc_cast_12xi32_to_12xi8(<12 x i32>) {
+ %2 = trunc <12 x i32> %0 to <12 x i8>
+ ret <12 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_12xi32_to_12xi8(<4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %4 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %5 = trunc i32 %4 to i8
+; CHECK-NEXT: %6 = insertelement <16 x i8> undef, i8 %5, i32 0
+; CHECK-NEXT: %7 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %8 = trunc i32 %7 to i8
+; CHECK-NEXT: %9 = insertelement <16 x i8> %6, i8 %8, i32 1
+; CHECK-NEXT: %10 = extractelement <4 x i32> %0, i32 2
+; CHECK-NEXT: %11 = trunc i32 %10 to i8
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 2
+; CHECK-NEXT: %13 = extractelement <4 x i32> %0, i32 3
+; CHECK-NEXT: %14 = trunc i32 %13 to i8
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 3
+; CHECK-NEXT: %16 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: %17 = trunc i32 %16 to i8
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 4
+; CHECK-NEXT: %19 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: %20 = trunc i32 %19 to i8
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 5
+; CHECK-NEXT: %22 = extractelement <4 x i32> %1, i32 2
+; CHECK-NEXT: %23 = trunc i32 %22 to i8
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 6
+; CHECK-NEXT: %25 = extractelement <4 x i32> %1, i32 3
+; CHECK-NEXT: %26 = trunc i32 %25 to i8
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 7
+; CHECK-NEXT: %28 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: %29 = trunc i32 %28 to i8
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 8
+; CHECK-NEXT: %31 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: %32 = trunc i32 %31 to i8
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 9
+; CHECK-NEXT: %34 = extractelement <4 x i32> %2, i32 2
+; CHECK-NEXT: %35 = trunc i32 %34 to i8
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 10
+; CHECK-NEXT: %37 = extractelement <4 x i32> %2, i32 3
+; CHECK-NEXT: %38 = trunc i32 %37 to i8
+; CHECK-NEXT: %39 = insertelement <16 x i8> %36, i8 %38, i32 11
+; CHECK-NEXT: ret <16 x i8> %39
+
+define <12 x i8> @trunc_cast_12xi16_to_12xi8(<12 x i16>) {
+ %2 = trunc <12 x i16> %0 to <12 x i8>
+ ret <12 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_12xi16_to_12xi8(<8 x i16>, <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %4 = trunc i16 %3 to i8
+; CHECK-NEXT: %5 = insertelement <16 x i8> undef, i8 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %7 = trunc i16 %6 to i8
+; CHECK-NEXT: %8 = insertelement <16 x i8> %5, i8 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %0, i32 2
+; CHECK-NEXT: %10 = trunc i16 %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> %8, i8 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <8 x i16> %0, i32 3
+; CHECK-NEXT: %13 = trunc i16 %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <8 x i16> %0, i32 4
+; CHECK-NEXT: %16 = trunc i16 %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <8 x i16> %0, i32 5
+; CHECK-NEXT: %19 = trunc i16 %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 5
+; CHECK-NEXT: %21 = extractelement <8 x i16> %0, i32 6
+; CHECK-NEXT: %22 = trunc i16 %21 to i8
+; CHECK-NEXT: %23 = insertelement <16 x i8> %20, i8 %22, i32 6
+; CHECK-NEXT: %24 = extractelement <8 x i16> %0, i32 7
+; CHECK-NEXT: %25 = trunc i16 %24 to i8
+; CHECK-NEXT: %26 = insertelement <16 x i8> %23, i8 %25, i32 7
+; CHECK-NEXT: %27 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %28 = trunc i16 %27 to i8
+; CHECK-NEXT: %29 = insertelement <16 x i8> %26, i8 %28, i32 8
+; CHECK-NEXT: %30 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %31 = trunc i16 %30 to i8
+; CHECK-NEXT: %32 = insertelement <16 x i8> %29, i8 %31, i32 9
+; CHECK-NEXT: %33 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %34 = trunc i16 %33 to i8
+; CHECK-NEXT: %35 = insertelement <16 x i8> %32, i8 %34, i32 10
+; CHECK-NEXT: %36 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %37 = trunc i16 %36 to i8
+; CHECK-NEXT: %38 = insertelement <16 x i8> %35, i8 %37, i32 11
+; CHECK-NEXT: ret <16 x i8> %38
+
+define <16 x i16> @zext_cast_16xi8_to_16xi16(<16 x i8>) {
+ %2 = zext <16 x i8> %0 to <16 x i16>
+ ret <16 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @zext_cast_16xi8_to_16xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = zext i8 %3 to i16
+; CHECK-NEXT: %5 = insertelement <8 x i16> undef, i16 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = zext i8 %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> %5, i16 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = zext i8 %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = zext i8 %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: %16 = zext i8 %15 to i16
+; CHECK-NEXT: %17 = insertelement <8 x i16> %14, i16 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: %19 = zext i8 %18 to i16
+; CHECK-NEXT: %20 = insertelement <8 x i16> %17, i16 %19, i32 5
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: %22 = zext i8 %21 to i16
+; CHECK-NEXT: %23 = insertelement <8 x i16> %20, i16 %22, i32 6
+; CHECK-NEXT: %24 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: %25 = zext i8 %24 to i16
+; CHECK-NEXT: %26 = insertelement <8 x i16> %23, i16 %25, i32 7
+; CHECK-NEXT: %27 = extractelement <16 x i8> %1, i32 8
+; CHECK-NEXT: %28 = zext i8 %27 to i16
+; CHECK-NEXT: %29 = insertelement <8 x i16> undef, i16 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <16 x i8> %1, i32 9
+; CHECK-NEXT: %31 = zext i8 %30 to i16
+; CHECK-NEXT: %32 = insertelement <8 x i16> %29, i16 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <16 x i8> %1, i32 10
+; CHECK-NEXT: %34 = zext i8 %33 to i16
+; CHECK-NEXT: %35 = insertelement <8 x i16> %32, i16 %34, i32 2
+; CHECK-NEXT: %36 = extractelement <16 x i8> %1, i32 11
+; CHECK-NEXT: %37 = zext i8 %36 to i16
+; CHECK-NEXT: %38 = insertelement <8 x i16> %35, i16 %37, i32 3
+; CHECK-NEXT: %39 = extractelement <16 x i8> %1, i32 12
+; CHECK-NEXT: %40 = zext i8 %39 to i16
+; CHECK-NEXT: %41 = insertelement <8 x i16> %38, i16 %40, i32 4
+; CHECK-NEXT: %42 = extractelement <16 x i8> %1, i32 13
+; CHECK-NEXT: %43 = zext i8 %42 to i16
+; CHECK-NEXT: %44 = insertelement <8 x i16> %41, i16 %43, i32 5
+; CHECK-NEXT: %45 = extractelement <16 x i8> %1, i32 14
+; CHECK-NEXT: %46 = zext i8 %45 to i16
+; CHECK-NEXT: %47 = insertelement <8 x i16> %44, i16 %46, i32 6
+; CHECK-NEXT: %48 = extractelement <16 x i8> %1, i32 15
+; CHECK-NEXT: %49 = zext i8 %48 to i16
+; CHECK-NEXT: %50 = insertelement <8 x i16> %47, i16 %49, i32 7
+; CHECK-NEXT: store <8 x i16> %50, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %26
+
+define <16 x i32> @zext_cast_16xi8_to_16xi32(<16 x i8>) {
+ %2 = zext <16 x i8> %0 to <16 x i32>
+ ret <16 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_16xi8_to_16xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %5 = extractelement <16 x i8> %3, i32 0
+; CHECK-NEXT: %6 = zext i8 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> undef, i32 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <16 x i8> %3, i32 1
+; CHECK-NEXT: %9 = zext i8 %8 to i32
+; CHECK-NEXT: %10 = insertelement <4 x i32> %7, i32 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <16 x i8> %3, i32 2
+; CHECK-NEXT: %12 = zext i8 %11 to i32
+; CHECK-NEXT: %13 = insertelement <4 x i32> %10, i32 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <16 x i8> %3, i32 3
+; CHECK-NEXT: %15 = zext i8 %14 to i32
+; CHECK-NEXT: %16 = insertelement <4 x i32> %13, i32 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <16 x i8> %3, i32 4
+; CHECK-NEXT: %18 = zext i8 %17 to i32
+; CHECK-NEXT: %19 = insertelement <4 x i32> undef, i32 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <16 x i8> %3, i32 5
+; CHECK-NEXT: %21 = zext i8 %20 to i32
+; CHECK-NEXT: %22 = insertelement <4 x i32> %19, i32 %21, i32 1
+; CHECK-NEXT: %23 = extractelement <16 x i8> %3, i32 6
+; CHECK-NEXT: %24 = zext i8 %23 to i32
+; CHECK-NEXT: %25 = insertelement <4 x i32> %22, i32 %24, i32 2
+; CHECK-NEXT: %26 = extractelement <16 x i8> %3, i32 7
+; CHECK-NEXT: %27 = zext i8 %26 to i32
+; CHECK-NEXT: %28 = insertelement <4 x i32> %25, i32 %27, i32 3
+; CHECK-NEXT: %29 = extractelement <16 x i8> %3, i32 8
+; CHECK-NEXT: %30 = zext i8 %29 to i32
+; CHECK-NEXT: %31 = insertelement <4 x i32> undef, i32 %30, i32 0
+; CHECK-NEXT: %32 = extractelement <16 x i8> %3, i32 9
+; CHECK-NEXT: %33 = zext i8 %32 to i32
+; CHECK-NEXT: %34 = insertelement <4 x i32> %31, i32 %33, i32 1
+; CHECK-NEXT: %35 = extractelement <16 x i8> %3, i32 10
+; CHECK-NEXT: %36 = zext i8 %35 to i32
+; CHECK-NEXT: %37 = insertelement <4 x i32> %34, i32 %36, i32 2
+; CHECK-NEXT: %38 = extractelement <16 x i8> %3, i32 11
+; CHECK-NEXT: %39 = zext i8 %38 to i32
+; CHECK-NEXT: %40 = insertelement <4 x i32> %37, i32 %39, i32 3
+; CHECK-NEXT: %41 = extractelement <16 x i8> %3, i32 12
+; CHECK-NEXT: %42 = zext i8 %41 to i32
+; CHECK-NEXT: %43 = insertelement <4 x i32> undef, i32 %42, i32 0
+; CHECK-NEXT: %44 = extractelement <16 x i8> %3, i32 13
+; CHECK-NEXT: %45 = zext i8 %44 to i32
+; CHECK-NEXT: %46 = insertelement <4 x i32> %43, i32 %45, i32 1
+; CHECK-NEXT: %47 = extractelement <16 x i8> %3, i32 14
+; CHECK-NEXT: %48 = zext i8 %47 to i32
+; CHECK-NEXT: %49 = insertelement <4 x i32> %46, i32 %48, i32 2
+; CHECK-NEXT: %50 = extractelement <16 x i8> %3, i32 15
+; CHECK-NEXT: %51 = zext i8 %50 to i32
+; CHECK-NEXT: %52 = insertelement <4 x i32> %49, i32 %51, i32 3
+; CHECK-NEXT: store <4 x i32> %28, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %40, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %52, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
+define <16 x i64> @zext_cast_16xi8_to_16xi64(<16 x i8>) {
+ %2 = zext <16 x i8> %0 to <16 x i64>
+ ret <16 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_16xi8_to_16xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %9 = extractelement <16 x i8> %7, i32 0
+; CHECK-NEXT: %10 = zext i8 %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <16 x i8> %7, i32 1
+; CHECK-NEXT: %13 = zext i8 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: %15 = extractelement <16 x i8> %7, i32 2
+; CHECK-NEXT: %16 = zext i8 %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> undef, i64 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <16 x i8> %7, i32 3
+; CHECK-NEXT: %19 = zext i8 %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> %17, i64 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <16 x i8> %7, i32 4
+; CHECK-NEXT: %22 = zext i8 %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> undef, i64 %22, i32 0
+; CHECK-NEXT: %24 = extractelement <16 x i8> %7, i32 5
+; CHECK-NEXT: %25 = zext i8 %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> %23, i64 %25, i32 1
+; CHECK-NEXT: %27 = extractelement <16 x i8> %7, i32 6
+; CHECK-NEXT: %28 = zext i8 %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> undef, i64 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <16 x i8> %7, i32 7
+; CHECK-NEXT: %31 = zext i8 %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> %29, i64 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <16 x i8> %7, i32 8
+; CHECK-NEXT: %34 = zext i8 %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> undef, i64 %34, i32 0
+; CHECK-NEXT: %36 = extractelement <16 x i8> %7, i32 9
+; CHECK-NEXT: %37 = zext i8 %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> %35, i64 %37, i32 1
+; CHECK-NEXT: %39 = extractelement <16 x i8> %7, i32 10
+; CHECK-NEXT: %40 = zext i8 %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> undef, i64 %40, i32 0
+; CHECK-NEXT: %42 = extractelement <16 x i8> %7, i32 11
+; CHECK-NEXT: %43 = zext i8 %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> %41, i64 %43, i32 1
+; CHECK-NEXT: %45 = extractelement <16 x i8> %7, i32 12
+; CHECK-NEXT: %46 = zext i8 %45 to i64
+; CHECK-NEXT: %47 = insertelement <2 x i64> undef, i64 %46, i32 0
+; CHECK-NEXT: %48 = extractelement <16 x i8> %7, i32 13
+; CHECK-NEXT: %49 = zext i8 %48 to i64
+; CHECK-NEXT: %50 = insertelement <2 x i64> %47, i64 %49, i32 1
+; CHECK-NEXT: %51 = extractelement <16 x i8> %7, i32 14
+; CHECK-NEXT: %52 = zext i8 %51 to i64
+; CHECK-NEXT: %53 = insertelement <2 x i64> undef, i64 %52, i32 0
+; CHECK-NEXT: %54 = extractelement <16 x i8> %7, i32 15
+; CHECK-NEXT: %55 = zext i8 %54 to i64
+; CHECK-NEXT: %56 = insertelement <2 x i64> %53, i64 %55, i32 1
+; CHECK-NEXT: store <2 x i64> %20, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %32, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %38, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %44, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %50, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %56, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret <2 x i64> %14
+
+define <16 x i32> @zext_cast_16xi16_to_16xi32(<16 x i16>) {
+ %2 = zext <16 x i16> %0 to <16 x i32>
+ ret <16 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_16xi16_to_16xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %6 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: %7 = zext i16 %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> undef, i32 %7, i32 0
+; CHECK-NEXT: %9 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: %10 = zext i16 %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 1
+; CHECK-NEXT: %12 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: %13 = zext i16 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 2
+; CHECK-NEXT: %15 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: %16 = zext i16 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> %14, i32 %16, i32 3
+; CHECK-NEXT: %18 = extractelement <8 x i16> %3, i32 4
+; CHECK-NEXT: %19 = zext i16 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> undef, i32 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <8 x i16> %3, i32 5
+; CHECK-NEXT: %22 = zext i16 %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <8 x i16> %3, i32 6
+; CHECK-NEXT: %25 = zext i16 %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 2
+; CHECK-NEXT: %27 = extractelement <8 x i16> %3, i32 7
+; CHECK-NEXT: %28 = zext i16 %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> %26, i32 %28, i32 3
+; CHECK-NEXT: %30 = extractelement <8 x i16> %4, i32 0
+; CHECK-NEXT: %31 = zext i16 %30 to i32
+; CHECK-NEXT: %32 = insertelement <4 x i32> undef, i32 %31, i32 0
+; CHECK-NEXT: %33 = extractelement <8 x i16> %4, i32 1
+; CHECK-NEXT: %34 = zext i16 %33 to i32
+; CHECK-NEXT: %35 = insertelement <4 x i32> %32, i32 %34, i32 1
+; CHECK-NEXT: %36 = extractelement <8 x i16> %4, i32 2
+; CHECK-NEXT: %37 = zext i16 %36 to i32
+; CHECK-NEXT: %38 = insertelement <4 x i32> %35, i32 %37, i32 2
+; CHECK-NEXT: %39 = extractelement <8 x i16> %4, i32 3
+; CHECK-NEXT: %40 = zext i16 %39 to i32
+; CHECK-NEXT: %41 = insertelement <4 x i32> %38, i32 %40, i32 3
+; CHECK-NEXT: %42 = extractelement <8 x i16> %4, i32 4
+; CHECK-NEXT: %43 = zext i16 %42 to i32
+; CHECK-NEXT: %44 = insertelement <4 x i32> undef, i32 %43, i32 0
+; CHECK-NEXT: %45 = extractelement <8 x i16> %4, i32 5
+; CHECK-NEXT: %46 = zext i16 %45 to i32
+; CHECK-NEXT: %47 = insertelement <4 x i32> %44, i32 %46, i32 1
+; CHECK-NEXT: %48 = extractelement <8 x i16> %4, i32 6
+; CHECK-NEXT: %49 = zext i16 %48 to i32
+; CHECK-NEXT: %50 = insertelement <4 x i32> %47, i32 %49, i32 2
+; CHECK-NEXT: %51 = extractelement <8 x i16> %4, i32 7
+; CHECK-NEXT: %52 = zext i16 %51 to i32
+; CHECK-NEXT: %53 = insertelement <4 x i32> %50, i32 %52, i32 3
+; CHECK-NEXT: store <4 x i32> %29, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %41, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %53, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %17
+
+define <16 x i64> @zext_cast_16xi16_to_16xi64(<16 x i16>) {
+ %2 = zext <16 x i16> %0 to <16 x i64>
+ ret <16 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_16xi16_to_16xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %10 = extractelement <8 x i16> %7, i32 0
+; CHECK-NEXT: %11 = zext i16 %10 to i64
+; CHECK-NEXT: %12 = insertelement <2 x i64> undef, i64 %11, i32 0
+; CHECK-NEXT: %13 = extractelement <8 x i16> %7, i32 1
+; CHECK-NEXT: %14 = zext i16 %13 to i64
+; CHECK-NEXT: %15 = insertelement <2 x i64> %12, i64 %14, i32 1
+; CHECK-NEXT: %16 = extractelement <8 x i16> %7, i32 2
+; CHECK-NEXT: %17 = zext i16 %16 to i64
+; CHECK-NEXT: %18 = insertelement <2 x i64> undef, i64 %17, i32 0
+; CHECK-NEXT: %19 = extractelement <8 x i16> %7, i32 3
+; CHECK-NEXT: %20 = zext i16 %19 to i64
+; CHECK-NEXT: %21 = insertelement <2 x i64> %18, i64 %20, i32 1
+; CHECK-NEXT: %22 = extractelement <8 x i16> %7, i32 4
+; CHECK-NEXT: %23 = zext i16 %22 to i64
+; CHECK-NEXT: %24 = insertelement <2 x i64> undef, i64 %23, i32 0
+; CHECK-NEXT: %25 = extractelement <8 x i16> %7, i32 5
+; CHECK-NEXT: %26 = zext i16 %25 to i64
+; CHECK-NEXT: %27 = insertelement <2 x i64> %24, i64 %26, i32 1
+; CHECK-NEXT: %28 = extractelement <8 x i16> %7, i32 6
+; CHECK-NEXT: %29 = zext i16 %28 to i64
+; CHECK-NEXT: %30 = insertelement <2 x i64> undef, i64 %29, i32 0
+; CHECK-NEXT: %31 = extractelement <8 x i16> %7, i32 7
+; CHECK-NEXT: %32 = zext i16 %31 to i64
+; CHECK-NEXT: %33 = insertelement <2 x i64> %30, i64 %32, i32 1
+; CHECK-NEXT: %34 = extractelement <8 x i16> %8, i32 0
+; CHECK-NEXT: %35 = zext i16 %34 to i64
+; CHECK-NEXT: %36 = insertelement <2 x i64> undef, i64 %35, i32 0
+; CHECK-NEXT: %37 = extractelement <8 x i16> %8, i32 1
+; CHECK-NEXT: %38 = zext i16 %37 to i64
+; CHECK-NEXT: %39 = insertelement <2 x i64> %36, i64 %38, i32 1
+; CHECK-NEXT: %40 = extractelement <8 x i16> %8, i32 2
+; CHECK-NEXT: %41 = zext i16 %40 to i64
+; CHECK-NEXT: %42 = insertelement <2 x i64> undef, i64 %41, i32 0
+; CHECK-NEXT: %43 = extractelement <8 x i16> %8, i32 3
+; CHECK-NEXT: %44 = zext i16 %43 to i64
+; CHECK-NEXT: %45 = insertelement <2 x i64> %42, i64 %44, i32 1
+; CHECK-NEXT: %46 = extractelement <8 x i16> %8, i32 4
+; CHECK-NEXT: %47 = zext i16 %46 to i64
+; CHECK-NEXT: %48 = insertelement <2 x i64> undef, i64 %47, i32 0
+; CHECK-NEXT: %49 = extractelement <8 x i16> %8, i32 5
+; CHECK-NEXT: %50 = zext i16 %49 to i64
+; CHECK-NEXT: %51 = insertelement <2 x i64> %48, i64 %50, i32 1
+; CHECK-NEXT: %52 = extractelement <8 x i16> %8, i32 6
+; CHECK-NEXT: %53 = zext i16 %52 to i64
+; CHECK-NEXT: %54 = insertelement <2 x i64> undef, i64 %53, i32 0
+; CHECK-NEXT: %55 = extractelement <8 x i16> %8, i32 7
+; CHECK-NEXT: %56 = zext i16 %55 to i64
+; CHECK-NEXT: %57 = insertelement <2 x i64> %54, i64 %56, i32 1
+; CHECK-NEXT: store <2 x i64> %21, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %27, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %33, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %39, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %45, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %51, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %57, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret <2 x i64> %15
+
+define <16 x i64> @zext_cast_16xi32_to_16xi64(<16 x i32>) {
+ %2 = zext <16 x i32> %0 to <16 x i64>
+ ret <16 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_16xi32_to_16xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = extractelement <4 x i32> %7, i32 0
+; CHECK-NEXT: %13 = zext i32 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> undef, i64 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <4 x i32> %7, i32 1
+; CHECK-NEXT: %16 = zext i32 %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> %14, i64 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <4 x i32> %7, i32 2
+; CHECK-NEXT: %19 = zext i32 %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> undef, i64 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <4 x i32> %7, i32 3
+; CHECK-NEXT: %22 = zext i32 %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> %20, i64 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <4 x i32> %8, i32 0
+; CHECK-NEXT: %25 = zext i32 %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> undef, i64 %25, i32 0
+; CHECK-NEXT: %27 = extractelement <4 x i32> %8, i32 1
+; CHECK-NEXT: %28 = zext i32 %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> %26, i64 %28, i32 1
+; CHECK-NEXT: %30 = extractelement <4 x i32> %8, i32 2
+; CHECK-NEXT: %31 = zext i32 %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> undef, i64 %31, i32 0
+; CHECK-NEXT: %33 = extractelement <4 x i32> %8, i32 3
+; CHECK-NEXT: %34 = zext i32 %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> %32, i64 %34, i32 1
+; CHECK-NEXT: %36 = extractelement <4 x i32> %9, i32 0
+; CHECK-NEXT: %37 = zext i32 %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> undef, i64 %37, i32 0
+; CHECK-NEXT: %39 = extractelement <4 x i32> %9, i32 1
+; CHECK-NEXT: %40 = zext i32 %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> %38, i64 %40, i32 1
+; CHECK-NEXT: %42 = extractelement <4 x i32> %9, i32 2
+; CHECK-NEXT: %43 = zext i32 %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> undef, i64 %43, i32 0
+; CHECK-NEXT: %45 = extractelement <4 x i32> %9, i32 3
+; CHECK-NEXT: %46 = zext i32 %45 to i64
+; CHECK-NEXT: %47 = insertelement <2 x i64> %44, i64 %46, i32 1
+; CHECK-NEXT: %48 = extractelement <4 x i32> %10, i32 0
+; CHECK-NEXT: %49 = zext i32 %48 to i64
+; CHECK-NEXT: %50 = insertelement <2 x i64> undef, i64 %49, i32 0
+; CHECK-NEXT: %51 = extractelement <4 x i32> %10, i32 1
+; CHECK-NEXT: %52 = zext i32 %51 to i64
+; CHECK-NEXT: %53 = insertelement <2 x i64> %50, i64 %52, i32 1
+; CHECK-NEXT: %54 = extractelement <4 x i32> %10, i32 2
+; CHECK-NEXT: %55 = zext i32 %54 to i64
+; CHECK-NEXT: %56 = insertelement <2 x i64> undef, i64 %55, i32 0
+; CHECK-NEXT: %57 = extractelement <4 x i32> %10, i32 3
+; CHECK-NEXT: %58 = zext i32 %57 to i64
+; CHECK-NEXT: %59 = insertelement <2 x i64> %56, i64 %58, i32 1
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %35, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %41, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %47, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %53, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %59, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret <2 x i64> %17
+
+define <16 x i16> @sext_cast_16xi8_to_16xi16(<16 x i8>) {
+ %2 = sext <16 x i8> %0 to <16 x i16>
+ ret <16 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @sext_cast_16xi8_to_16xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %3 = extractelement <16 x i8> %1, i32 0
+; CHECK-NEXT: %4 = sext i8 %3 to i16
+; CHECK-NEXT: %5 = insertelement <8 x i16> undef, i16 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <16 x i8> %1, i32 1
+; CHECK-NEXT: %7 = sext i8 %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> %5, i16 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <16 x i8> %1, i32 2
+; CHECK-NEXT: %10 = sext i8 %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <16 x i8> %1, i32 3
+; CHECK-NEXT: %13 = sext i8 %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <16 x i8> %1, i32 4
+; CHECK-NEXT: %16 = sext i8 %15 to i16
+; CHECK-NEXT: %17 = insertelement <8 x i16> %14, i16 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <16 x i8> %1, i32 5
+; CHECK-NEXT: %19 = sext i8 %18 to i16
+; CHECK-NEXT: %20 = insertelement <8 x i16> %17, i16 %19, i32 5
+; CHECK-NEXT: %21 = extractelement <16 x i8> %1, i32 6
+; CHECK-NEXT: %22 = sext i8 %21 to i16
+; CHECK-NEXT: %23 = insertelement <8 x i16> %20, i16 %22, i32 6
+; CHECK-NEXT: %24 = extractelement <16 x i8> %1, i32 7
+; CHECK-NEXT: %25 = sext i8 %24 to i16
+; CHECK-NEXT: %26 = insertelement <8 x i16> %23, i16 %25, i32 7
+; CHECK-NEXT: %27 = extractelement <16 x i8> %1, i32 8
+; CHECK-NEXT: %28 = sext i8 %27 to i16
+; CHECK-NEXT: %29 = insertelement <8 x i16> undef, i16 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <16 x i8> %1, i32 9
+; CHECK-NEXT: %31 = sext i8 %30 to i16
+; CHECK-NEXT: %32 = insertelement <8 x i16> %29, i16 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <16 x i8> %1, i32 10
+; CHECK-NEXT: %34 = sext i8 %33 to i16
+; CHECK-NEXT: %35 = insertelement <8 x i16> %32, i16 %34, i32 2
+; CHECK-NEXT: %36 = extractelement <16 x i8> %1, i32 11
+; CHECK-NEXT: %37 = sext i8 %36 to i16
+; CHECK-NEXT: %38 = insertelement <8 x i16> %35, i16 %37, i32 3
+; CHECK-NEXT: %39 = extractelement <16 x i8> %1, i32 12
+; CHECK-NEXT: %40 = sext i8 %39 to i16
+; CHECK-NEXT: %41 = insertelement <8 x i16> %38, i16 %40, i32 4
+; CHECK-NEXT: %42 = extractelement <16 x i8> %1, i32 13
+; CHECK-NEXT: %43 = sext i8 %42 to i16
+; CHECK-NEXT: %44 = insertelement <8 x i16> %41, i16 %43, i32 5
+; CHECK-NEXT: %45 = extractelement <16 x i8> %1, i32 14
+; CHECK-NEXT: %46 = sext i8 %45 to i16
+; CHECK-NEXT: %47 = insertelement <8 x i16> %44, i16 %46, i32 6
+; CHECK-NEXT: %48 = extractelement <16 x i8> %1, i32 15
+; CHECK-NEXT: %49 = sext i8 %48 to i16
+; CHECK-NEXT: %50 = insertelement <8 x i16> %47, i16 %49, i32 7
+; CHECK-NEXT: store <8 x i16> %50, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %26
+
+define <16 x i32> @sext_cast_16xi8_to_16xi32(<16 x i8>) {
+ %2 = sext <16 x i8> %0 to <16 x i32>
+ ret <16 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_16xi8_to_16xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %5 = extractelement <16 x i8> %3, i32 0
+; CHECK-NEXT: %6 = sext i8 %5 to i32
+; CHECK-NEXT: %7 = insertelement <4 x i32> undef, i32 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <16 x i8> %3, i32 1
+; CHECK-NEXT: %9 = sext i8 %8 to i32
+; CHECK-NEXT: %10 = insertelement <4 x i32> %7, i32 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <16 x i8> %3, i32 2
+; CHECK-NEXT: %12 = sext i8 %11 to i32
+; CHECK-NEXT: %13 = insertelement <4 x i32> %10, i32 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <16 x i8> %3, i32 3
+; CHECK-NEXT: %15 = sext i8 %14 to i32
+; CHECK-NEXT: %16 = insertelement <4 x i32> %13, i32 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <16 x i8> %3, i32 4
+; CHECK-NEXT: %18 = sext i8 %17 to i32
+; CHECK-NEXT: %19 = insertelement <4 x i32> undef, i32 %18, i32 0
+; CHECK-NEXT: %20 = extractelement <16 x i8> %3, i32 5
+; CHECK-NEXT: %21 = sext i8 %20 to i32
+; CHECK-NEXT: %22 = insertelement <4 x i32> %19, i32 %21, i32 1
+; CHECK-NEXT: %23 = extractelement <16 x i8> %3, i32 6
+; CHECK-NEXT: %24 = sext i8 %23 to i32
+; CHECK-NEXT: %25 = insertelement <4 x i32> %22, i32 %24, i32 2
+; CHECK-NEXT: %26 = extractelement <16 x i8> %3, i32 7
+; CHECK-NEXT: %27 = sext i8 %26 to i32
+; CHECK-NEXT: %28 = insertelement <4 x i32> %25, i32 %27, i32 3
+; CHECK-NEXT: %29 = extractelement <16 x i8> %3, i32 8
+; CHECK-NEXT: %30 = sext i8 %29 to i32
+; CHECK-NEXT: %31 = insertelement <4 x i32> undef, i32 %30, i32 0
+; CHECK-NEXT: %32 = extractelement <16 x i8> %3, i32 9
+; CHECK-NEXT: %33 = sext i8 %32 to i32
+; CHECK-NEXT: %34 = insertelement <4 x i32> %31, i32 %33, i32 1
+; CHECK-NEXT: %35 = extractelement <16 x i8> %3, i32 10
+; CHECK-NEXT: %36 = sext i8 %35 to i32
+; CHECK-NEXT: %37 = insertelement <4 x i32> %34, i32 %36, i32 2
+; CHECK-NEXT: %38 = extractelement <16 x i8> %3, i32 11
+; CHECK-NEXT: %39 = sext i8 %38 to i32
+; CHECK-NEXT: %40 = insertelement <4 x i32> %37, i32 %39, i32 3
+; CHECK-NEXT: %41 = extractelement <16 x i8> %3, i32 12
+; CHECK-NEXT: %42 = sext i8 %41 to i32
+; CHECK-NEXT: %43 = insertelement <4 x i32> undef, i32 %42, i32 0
+; CHECK-NEXT: %44 = extractelement <16 x i8> %3, i32 13
+; CHECK-NEXT: %45 = sext i8 %44 to i32
+; CHECK-NEXT: %46 = insertelement <4 x i32> %43, i32 %45, i32 1
+; CHECK-NEXT: %47 = extractelement <16 x i8> %3, i32 14
+; CHECK-NEXT: %48 = sext i8 %47 to i32
+; CHECK-NEXT: %49 = insertelement <4 x i32> %46, i32 %48, i32 2
+; CHECK-NEXT: %50 = extractelement <16 x i8> %3, i32 15
+; CHECK-NEXT: %51 = sext i8 %50 to i32
+; CHECK-NEXT: %52 = insertelement <4 x i32> %49, i32 %51, i32 3
+; CHECK-NEXT: store <4 x i32> %28, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %40, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %52, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %16
+
+define <16 x i64> @sext_cast_16xi8_to_16xi64(<16 x i8>) {
+ %2 = sext <16 x i8> %0 to <16 x i64>
+ ret <16 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_16xi8_to_16xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <16 x i8>)
+; CHECK-NEXT: %9 = extractelement <16 x i8> %7, i32 0
+; CHECK-NEXT: %10 = sext i8 %9 to i64
+; CHECK-NEXT: %11 = insertelement <2 x i64> undef, i64 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <16 x i8> %7, i32 1
+; CHECK-NEXT: %13 = sext i8 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> %11, i64 %13, i32 1
+; CHECK-NEXT: %15 = extractelement <16 x i8> %7, i32 2
+; CHECK-NEXT: %16 = sext i8 %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> undef, i64 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <16 x i8> %7, i32 3
+; CHECK-NEXT: %19 = sext i8 %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> %17, i64 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <16 x i8> %7, i32 4
+; CHECK-NEXT: %22 = sext i8 %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> undef, i64 %22, i32 0
+; CHECK-NEXT: %24 = extractelement <16 x i8> %7, i32 5
+; CHECK-NEXT: %25 = sext i8 %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> %23, i64 %25, i32 1
+; CHECK-NEXT: %27 = extractelement <16 x i8> %7, i32 6
+; CHECK-NEXT: %28 = sext i8 %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> undef, i64 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <16 x i8> %7, i32 7
+; CHECK-NEXT: %31 = sext i8 %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> %29, i64 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <16 x i8> %7, i32 8
+; CHECK-NEXT: %34 = sext i8 %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> undef, i64 %34, i32 0
+; CHECK-NEXT: %36 = extractelement <16 x i8> %7, i32 9
+; CHECK-NEXT: %37 = sext i8 %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> %35, i64 %37, i32 1
+; CHECK-NEXT: %39 = extractelement <16 x i8> %7, i32 10
+; CHECK-NEXT: %40 = sext i8 %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> undef, i64 %40, i32 0
+; CHECK-NEXT: %42 = extractelement <16 x i8> %7, i32 11
+; CHECK-NEXT: %43 = sext i8 %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> %41, i64 %43, i32 1
+; CHECK-NEXT: %45 = extractelement <16 x i8> %7, i32 12
+; CHECK-NEXT: %46 = sext i8 %45 to i64
+; CHECK-NEXT: %47 = insertelement <2 x i64> undef, i64 %46, i32 0
+; CHECK-NEXT: %48 = extractelement <16 x i8> %7, i32 13
+; CHECK-NEXT: %49 = sext i8 %48 to i64
+; CHECK-NEXT: %50 = insertelement <2 x i64> %47, i64 %49, i32 1
+; CHECK-NEXT: %51 = extractelement <16 x i8> %7, i32 14
+; CHECK-NEXT: %52 = sext i8 %51 to i64
+; CHECK-NEXT: %53 = insertelement <2 x i64> undef, i64 %52, i32 0
+; CHECK-NEXT: %54 = extractelement <16 x i8> %7, i32 15
+; CHECK-NEXT: %55 = sext i8 %54 to i64
+; CHECK-NEXT: %56 = insertelement <2 x i64> %53, i64 %55, i32 1
+; CHECK-NEXT: store <2 x i64> %20, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %32, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %38, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %44, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %50, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %56, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret <2 x i64> %14
+
+define <16 x i32> @sext_cast_16xi16_to_16xi32(<16 x i16>) {
+ %2 = sext <16 x i16> %0 to <16 x i32>
+ ret <16 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_16xi16_to_16xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %6 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: %7 = sext i16 %6 to i32
+; CHECK-NEXT: %8 = insertelement <4 x i32> undef, i32 %7, i32 0
+; CHECK-NEXT: %9 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: %10 = sext i16 %9 to i32
+; CHECK-NEXT: %11 = insertelement <4 x i32> %8, i32 %10, i32 1
+; CHECK-NEXT: %12 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: %13 = sext i16 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> %11, i32 %13, i32 2
+; CHECK-NEXT: %15 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: %16 = sext i16 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> %14, i32 %16, i32 3
+; CHECK-NEXT: %18 = extractelement <8 x i16> %3, i32 4
+; CHECK-NEXT: %19 = sext i16 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> undef, i32 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <8 x i16> %3, i32 5
+; CHECK-NEXT: %22 = sext i16 %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <8 x i16> %3, i32 6
+; CHECK-NEXT: %25 = sext i16 %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 2
+; CHECK-NEXT: %27 = extractelement <8 x i16> %3, i32 7
+; CHECK-NEXT: %28 = sext i16 %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> %26, i32 %28, i32 3
+; CHECK-NEXT: %30 = extractelement <8 x i16> %4, i32 0
+; CHECK-NEXT: %31 = sext i16 %30 to i32
+; CHECK-NEXT: %32 = insertelement <4 x i32> undef, i32 %31, i32 0
+; CHECK-NEXT: %33 = extractelement <8 x i16> %4, i32 1
+; CHECK-NEXT: %34 = sext i16 %33 to i32
+; CHECK-NEXT: %35 = insertelement <4 x i32> %32, i32 %34, i32 1
+; CHECK-NEXT: %36 = extractelement <8 x i16> %4, i32 2
+; CHECK-NEXT: %37 = sext i16 %36 to i32
+; CHECK-NEXT: %38 = insertelement <4 x i32> %35, i32 %37, i32 2
+; CHECK-NEXT: %39 = extractelement <8 x i16> %4, i32 3
+; CHECK-NEXT: %40 = sext i16 %39 to i32
+; CHECK-NEXT: %41 = insertelement <4 x i32> %38, i32 %40, i32 3
+; CHECK-NEXT: %42 = extractelement <8 x i16> %4, i32 4
+; CHECK-NEXT: %43 = sext i16 %42 to i32
+; CHECK-NEXT: %44 = insertelement <4 x i32> undef, i32 %43, i32 0
+; CHECK-NEXT: %45 = extractelement <8 x i16> %4, i32 5
+; CHECK-NEXT: %46 = sext i16 %45 to i32
+; CHECK-NEXT: %47 = insertelement <4 x i32> %44, i32 %46, i32 1
+; CHECK-NEXT: %48 = extractelement <8 x i16> %4, i32 6
+; CHECK-NEXT: %49 = sext i16 %48 to i32
+; CHECK-NEXT: %50 = insertelement <4 x i32> %47, i32 %49, i32 2
+; CHECK-NEXT: %51 = extractelement <8 x i16> %4, i32 7
+; CHECK-NEXT: %52 = sext i16 %51 to i32
+; CHECK-NEXT: %53 = insertelement <4 x i32> %50, i32 %52, i32 3
+; CHECK-NEXT: store <4 x i32> %29, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %41, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %53, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %17
+
+define <16 x i64> @sext_cast_16xi16_to_16xi64(<16 x i16>) {
+ %2 = sext <16 x i16> %0 to <16 x i64>
+ ret <16 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_16xi16_to_16xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %10 = extractelement <8 x i16> %7, i32 0
+; CHECK-NEXT: %11 = sext i16 %10 to i64
+; CHECK-NEXT: %12 = insertelement <2 x i64> undef, i64 %11, i32 0
+; CHECK-NEXT: %13 = extractelement <8 x i16> %7, i32 1
+; CHECK-NEXT: %14 = sext i16 %13 to i64
+; CHECK-NEXT: %15 = insertelement <2 x i64> %12, i64 %14, i32 1
+; CHECK-NEXT: %16 = extractelement <8 x i16> %7, i32 2
+; CHECK-NEXT: %17 = sext i16 %16 to i64
+; CHECK-NEXT: %18 = insertelement <2 x i64> undef, i64 %17, i32 0
+; CHECK-NEXT: %19 = extractelement <8 x i16> %7, i32 3
+; CHECK-NEXT: %20 = sext i16 %19 to i64
+; CHECK-NEXT: %21 = insertelement <2 x i64> %18, i64 %20, i32 1
+; CHECK-NEXT: %22 = extractelement <8 x i16> %7, i32 4
+; CHECK-NEXT: %23 = sext i16 %22 to i64
+; CHECK-NEXT: %24 = insertelement <2 x i64> undef, i64 %23, i32 0
+; CHECK-NEXT: %25 = extractelement <8 x i16> %7, i32 5
+; CHECK-NEXT: %26 = sext i16 %25 to i64
+; CHECK-NEXT: %27 = insertelement <2 x i64> %24, i64 %26, i32 1
+; CHECK-NEXT: %28 = extractelement <8 x i16> %7, i32 6
+; CHECK-NEXT: %29 = sext i16 %28 to i64
+; CHECK-NEXT: %30 = insertelement <2 x i64> undef, i64 %29, i32 0
+; CHECK-NEXT: %31 = extractelement <8 x i16> %7, i32 7
+; CHECK-NEXT: %32 = sext i16 %31 to i64
+; CHECK-NEXT: %33 = insertelement <2 x i64> %30, i64 %32, i32 1
+; CHECK-NEXT: %34 = extractelement <8 x i16> %8, i32 0
+; CHECK-NEXT: %35 = sext i16 %34 to i64
+; CHECK-NEXT: %36 = insertelement <2 x i64> undef, i64 %35, i32 0
+; CHECK-NEXT: %37 = extractelement <8 x i16> %8, i32 1
+; CHECK-NEXT: %38 = sext i16 %37 to i64
+; CHECK-NEXT: %39 = insertelement <2 x i64> %36, i64 %38, i32 1
+; CHECK-NEXT: %40 = extractelement <8 x i16> %8, i32 2
+; CHECK-NEXT: %41 = sext i16 %40 to i64
+; CHECK-NEXT: %42 = insertelement <2 x i64> undef, i64 %41, i32 0
+; CHECK-NEXT: %43 = extractelement <8 x i16> %8, i32 3
+; CHECK-NEXT: %44 = sext i16 %43 to i64
+; CHECK-NEXT: %45 = insertelement <2 x i64> %42, i64 %44, i32 1
+; CHECK-NEXT: %46 = extractelement <8 x i16> %8, i32 4
+; CHECK-NEXT: %47 = sext i16 %46 to i64
+; CHECK-NEXT: %48 = insertelement <2 x i64> undef, i64 %47, i32 0
+; CHECK-NEXT: %49 = extractelement <8 x i16> %8, i32 5
+; CHECK-NEXT: %50 = sext i16 %49 to i64
+; CHECK-NEXT: %51 = insertelement <2 x i64> %48, i64 %50, i32 1
+; CHECK-NEXT: %52 = extractelement <8 x i16> %8, i32 6
+; CHECK-NEXT: %53 = sext i16 %52 to i64
+; CHECK-NEXT: %54 = insertelement <2 x i64> undef, i64 %53, i32 0
+; CHECK-NEXT: %55 = extractelement <8 x i16> %8, i32 7
+; CHECK-NEXT: %56 = sext i16 %55 to i64
+; CHECK-NEXT: %57 = insertelement <2 x i64> %54, i64 %56, i32 1
+; CHECK-NEXT: store <2 x i64> %21, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %27, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %33, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %39, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %45, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %51, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %57, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret <2 x i64> %15
+
+define <16 x i64> @sext_cast_16xi32_to_16xi64(<16 x i32>) {
+ %2 = sext <16 x i32> %0 to <16 x i64>
+ ret <16 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_16xi32_to_16xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %12 = extractelement <4 x i32> %7, i32 0
+; CHECK-NEXT: %13 = sext i32 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> undef, i64 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <4 x i32> %7, i32 1
+; CHECK-NEXT: %16 = sext i32 %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> %14, i64 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <4 x i32> %7, i32 2
+; CHECK-NEXT: %19 = sext i32 %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> undef, i64 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <4 x i32> %7, i32 3
+; CHECK-NEXT: %22 = sext i32 %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> %20, i64 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <4 x i32> %8, i32 0
+; CHECK-NEXT: %25 = sext i32 %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> undef, i64 %25, i32 0
+; CHECK-NEXT: %27 = extractelement <4 x i32> %8, i32 1
+; CHECK-NEXT: %28 = sext i32 %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> %26, i64 %28, i32 1
+; CHECK-NEXT: %30 = extractelement <4 x i32> %8, i32 2
+; CHECK-NEXT: %31 = sext i32 %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> undef, i64 %31, i32 0
+; CHECK-NEXT: %33 = extractelement <4 x i32> %8, i32 3
+; CHECK-NEXT: %34 = sext i32 %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> %32, i64 %34, i32 1
+; CHECK-NEXT: %36 = extractelement <4 x i32> %9, i32 0
+; CHECK-NEXT: %37 = sext i32 %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> undef, i64 %37, i32 0
+; CHECK-NEXT: %39 = extractelement <4 x i32> %9, i32 1
+; CHECK-NEXT: %40 = sext i32 %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> %38, i64 %40, i32 1
+; CHECK-NEXT: %42 = extractelement <4 x i32> %9, i32 2
+; CHECK-NEXT: %43 = sext i32 %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> undef, i64 %43, i32 0
+; CHECK-NEXT: %45 = extractelement <4 x i32> %9, i32 3
+; CHECK-NEXT: %46 = sext i32 %45 to i64
+; CHECK-NEXT: %47 = insertelement <2 x i64> %44, i64 %46, i32 1
+; CHECK-NEXT: %48 = extractelement <4 x i32> %10, i32 0
+; CHECK-NEXT: %49 = sext i32 %48 to i64
+; CHECK-NEXT: %50 = insertelement <2 x i64> undef, i64 %49, i32 0
+; CHECK-NEXT: %51 = extractelement <4 x i32> %10, i32 1
+; CHECK-NEXT: %52 = sext i32 %51 to i64
+; CHECK-NEXT: %53 = insertelement <2 x i64> %50, i64 %52, i32 1
+; CHECK-NEXT: %54 = extractelement <4 x i32> %10, i32 2
+; CHECK-NEXT: %55 = sext i32 %54 to i64
+; CHECK-NEXT: %56 = insertelement <2 x i64> undef, i64 %55, i32 0
+; CHECK-NEXT: %57 = extractelement <4 x i32> %10, i32 3
+; CHECK-NEXT: %58 = sext i32 %57 to i64
+; CHECK-NEXT: %59 = insertelement <2 x i64> %56, i64 %58, i32 1
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %35, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %41, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %47, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %53, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %59, <2 x i64>* %6, align 16
+; CHECK-NEXT: ret <2 x i64> %17
+
+define <16 x i32> @trunc_cast_16xi64_to_16xi32(<16 x i64>) {
+ %2 = trunc <16 x i64> %0 to <16 x i32>
+ ret <16 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @trunc_cast_16xi64_to_16xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %12 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %13 = trunc i64 %12 to i32
+; CHECK-NEXT: %14 = insertelement <4 x i32> undef, i32 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %16 = trunc i64 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> %14, i32 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %19 = trunc i64 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 2
+; CHECK-NEXT: %21 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %22 = trunc i64 %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 3
+; CHECK-NEXT: %24 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %25 = trunc i64 %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> undef, i32 %25, i32 0
+; CHECK-NEXT: %27 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %28 = trunc i64 %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> %26, i32 %28, i32 1
+; CHECK-NEXT: %30 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %31 = trunc i64 %30 to i32
+; CHECK-NEXT: %32 = insertelement <4 x i32> %29, i32 %31, i32 2
+; CHECK-NEXT: %33 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %34 = trunc i64 %33 to i32
+; CHECK-NEXT: %35 = insertelement <4 x i32> %32, i32 %34, i32 3
+; CHECK-NEXT: %36 = extractelement <2 x i64> %7, i32 0
+; CHECK-NEXT: %37 = trunc i64 %36 to i32
+; CHECK-NEXT: %38 = insertelement <4 x i32> undef, i32 %37, i32 0
+; CHECK-NEXT: %39 = extractelement <2 x i64> %7, i32 1
+; CHECK-NEXT: %40 = trunc i64 %39 to i32
+; CHECK-NEXT: %41 = insertelement <4 x i32> %38, i32 %40, i32 1
+; CHECK-NEXT: %42 = extractelement <2 x i64> %8, i32 0
+; CHECK-NEXT: %43 = trunc i64 %42 to i32
+; CHECK-NEXT: %44 = insertelement <4 x i32> %41, i32 %43, i32 2
+; CHECK-NEXT: %45 = extractelement <2 x i64> %8, i32 1
+; CHECK-NEXT: %46 = trunc i64 %45 to i32
+; CHECK-NEXT: %47 = insertelement <4 x i32> %44, i32 %46, i32 3
+; CHECK-NEXT: %48 = extractelement <2 x i64> %9, i32 0
+; CHECK-NEXT: %49 = trunc i64 %48 to i32
+; CHECK-NEXT: %50 = insertelement <4 x i32> undef, i32 %49, i32 0
+; CHECK-NEXT: %51 = extractelement <2 x i64> %9, i32 1
+; CHECK-NEXT: %52 = trunc i64 %51 to i32
+; CHECK-NEXT: %53 = insertelement <4 x i32> %50, i32 %52, i32 1
+; CHECK-NEXT: %54 = extractelement <2 x i64> %10, i32 0
+; CHECK-NEXT: %55 = trunc i64 %54 to i32
+; CHECK-NEXT: %56 = insertelement <4 x i32> %53, i32 %55, i32 2
+; CHECK-NEXT: %57 = extractelement <2 x i64> %10, i32 1
+; CHECK-NEXT: %58 = trunc i64 %57 to i32
+; CHECK-NEXT: %59 = insertelement <4 x i32> %56, i32 %58, i32 3
+; CHECK-NEXT: store <4 x i32> %35, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %47, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %59, <4 x i32>* %2, align 16
+; CHECK-NEXT: ret <4 x i32> %23
+
+define <16 x i16> @trunc_cast_16xi64_to_16xi16(<16 x i64>) {
+ %2 = trunc <16 x i64> %0 to <16 x i16>
+ ret <16 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_16xi64_to_16xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %10 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %11 = trunc i64 %10 to i16
+; CHECK-NEXT: %12 = insertelement <8 x i16> undef, i16 %11, i32 0
+; CHECK-NEXT: %13 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %14 = trunc i64 %13 to i16
+; CHECK-NEXT: %15 = insertelement <8 x i16> %12, i16 %14, i32 1
+; CHECK-NEXT: %16 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %17 = trunc i64 %16 to i16
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 2
+; CHECK-NEXT: %19 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %20 = trunc i64 %19 to i16
+; CHECK-NEXT: %21 = insertelement <8 x i16> %18, i16 %20, i32 3
+; CHECK-NEXT: %22 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %23 = trunc i64 %22 to i16
+; CHECK-NEXT: %24 = insertelement <8 x i16> %21, i16 %23, i32 4
+; CHECK-NEXT: %25 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %26 = trunc i64 %25 to i16
+; CHECK-NEXT: %27 = insertelement <8 x i16> %24, i16 %26, i32 5
+; CHECK-NEXT: %28 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %29 = trunc i64 %28 to i16
+; CHECK-NEXT: %30 = insertelement <8 x i16> %27, i16 %29, i32 6
+; CHECK-NEXT: %31 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %32 = trunc i64 %31 to i16
+; CHECK-NEXT: %33 = insertelement <8 x i16> %30, i16 %32, i32 7
+; CHECK-NEXT: %34 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %35 = trunc i64 %34 to i16
+; CHECK-NEXT: %36 = insertelement <8 x i16> undef, i16 %35, i32 0
+; CHECK-NEXT: %37 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %38 = trunc i64 %37 to i16
+; CHECK-NEXT: %39 = insertelement <8 x i16> %36, i16 %38, i32 1
+; CHECK-NEXT: %40 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %41 = trunc i64 %40 to i16
+; CHECK-NEXT: %42 = insertelement <8 x i16> %39, i16 %41, i32 2
+; CHECK-NEXT: %43 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %44 = trunc i64 %43 to i16
+; CHECK-NEXT: %45 = insertelement <8 x i16> %42, i16 %44, i32 3
+; CHECK-NEXT: %46 = extractelement <2 x i64> %7, i32 0
+; CHECK-NEXT: %47 = trunc i64 %46 to i16
+; CHECK-NEXT: %48 = insertelement <8 x i16> %45, i16 %47, i32 4
+; CHECK-NEXT: %49 = extractelement <2 x i64> %7, i32 1
+; CHECK-NEXT: %50 = trunc i64 %49 to i16
+; CHECK-NEXT: %51 = insertelement <8 x i16> %48, i16 %50, i32 5
+; CHECK-NEXT: %52 = extractelement <2 x i64> %8, i32 0
+; CHECK-NEXT: %53 = trunc i64 %52 to i16
+; CHECK-NEXT: %54 = insertelement <8 x i16> %51, i16 %53, i32 6
+; CHECK-NEXT: %55 = extractelement <2 x i64> %8, i32 1
+; CHECK-NEXT: %56 = trunc i64 %55 to i16
+; CHECK-NEXT: %57 = insertelement <8 x i16> %54, i16 %56, i32 7
+; CHECK-NEXT: store <8 x i16> %57, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %33
+
+define <16 x i8> @trunc_cast_16xi64_to_16xi8(<16 x i64>) {
+ %2 = trunc <16 x i64> %0 to <16 x i8>
+ ret <16 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_16xi64_to_16xi8(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %9 = extractelement <2 x i64> %0, i32 0
+; CHECK-NEXT: %10 = trunc i64 %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> undef, i8 %10, i32 0
+; CHECK-NEXT: %12 = extractelement <2 x i64> %0, i32 1
+; CHECK-NEXT: %13 = trunc i64 %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 1
+; CHECK-NEXT: %15 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %16 = trunc i64 %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 2
+; CHECK-NEXT: %18 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %19 = trunc i64 %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 3
+; CHECK-NEXT: %21 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %22 = trunc i64 %21 to i8
+; CHECK-NEXT: %23 = insertelement <16 x i8> %20, i8 %22, i32 4
+; CHECK-NEXT: %24 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %25 = trunc i64 %24 to i8
+; CHECK-NEXT: %26 = insertelement <16 x i8> %23, i8 %25, i32 5
+; CHECK-NEXT: %27 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %28 = trunc i64 %27 to i8
+; CHECK-NEXT: %29 = insertelement <16 x i8> %26, i8 %28, i32 6
+; CHECK-NEXT: %30 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %31 = trunc i64 %30 to i8
+; CHECK-NEXT: %32 = insertelement <16 x i8> %29, i8 %31, i32 7
+; CHECK-NEXT: %33 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %34 = trunc i64 %33 to i8
+; CHECK-NEXT: %35 = insertelement <16 x i8> %32, i8 %34, i32 8
+; CHECK-NEXT: %36 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %37 = trunc i64 %36 to i8
+; CHECK-NEXT: %38 = insertelement <16 x i8> %35, i8 %37, i32 9
+; CHECK-NEXT: %39 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %40 = trunc i64 %39 to i8
+; CHECK-NEXT: %41 = insertelement <16 x i8> %38, i8 %40, i32 10
+; CHECK-NEXT: %42 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %43 = trunc i64 %42 to i8
+; CHECK-NEXT: %44 = insertelement <16 x i8> %41, i8 %43, i32 11
+; CHECK-NEXT: %45 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %46 = trunc i64 %45 to i8
+; CHECK-NEXT: %47 = insertelement <16 x i8> %44, i8 %46, i32 12
+; CHECK-NEXT: %48 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %49 = trunc i64 %48 to i8
+; CHECK-NEXT: %50 = insertelement <16 x i8> %47, i8 %49, i32 13
+; CHECK-NEXT: %51 = extractelement <2 x i64> %7, i32 0
+; CHECK-NEXT: %52 = trunc i64 %51 to i8
+; CHECK-NEXT: %53 = insertelement <16 x i8> %50, i8 %52, i32 14
+; CHECK-NEXT: %54 = extractelement <2 x i64> %7, i32 1
+; CHECK-NEXT: %55 = trunc i64 %54 to i8
+; CHECK-NEXT: %56 = insertelement <16 x i8> %53, i8 %55, i32 15
+; CHECK-NEXT: ret <16 x i8> %56
+
+define <16 x i16> @trunc_cast_16xi32_to_16xi16(<16 x i32>) {
+ %2 = trunc <16 x i32> %0 to <16 x i16>
+ ret <16 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_16xi32_to_16xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %6 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: %7 = trunc i32 %6 to i16
+; CHECK-NEXT: %8 = insertelement <8 x i16> undef, i16 %7, i32 0
+; CHECK-NEXT: %9 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: %10 = trunc i32 %9 to i16
+; CHECK-NEXT: %11 = insertelement <8 x i16> %8, i16 %10, i32 1
+; CHECK-NEXT: %12 = extractelement <4 x i32> %1, i32 2
+; CHECK-NEXT: %13 = trunc i32 %12 to i16
+; CHECK-NEXT: %14 = insertelement <8 x i16> %11, i16 %13, i32 2
+; CHECK-NEXT: %15 = extractelement <4 x i32> %1, i32 3
+; CHECK-NEXT: %16 = trunc i32 %15 to i16
+; CHECK-NEXT: %17 = insertelement <8 x i16> %14, i16 %16, i32 3
+; CHECK-NEXT: %18 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: %19 = trunc i32 %18 to i16
+; CHECK-NEXT: %20 = insertelement <8 x i16> %17, i16 %19, i32 4
+; CHECK-NEXT: %21 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: %22 = trunc i32 %21 to i16
+; CHECK-NEXT: %23 = insertelement <8 x i16> %20, i16 %22, i32 5
+; CHECK-NEXT: %24 = extractelement <4 x i32> %2, i32 2
+; CHECK-NEXT: %25 = trunc i32 %24 to i16
+; CHECK-NEXT: %26 = insertelement <8 x i16> %23, i16 %25, i32 6
+; CHECK-NEXT: %27 = extractelement <4 x i32> %2, i32 3
+; CHECK-NEXT: %28 = trunc i32 %27 to i16
+; CHECK-NEXT: %29 = insertelement <8 x i16> %26, i16 %28, i32 7
+; CHECK-NEXT: %30 = extractelement <4 x i32> %3, i32 0
+; CHECK-NEXT: %31 = trunc i32 %30 to i16
+; CHECK-NEXT: %32 = insertelement <8 x i16> undef, i16 %31, i32 0
+; CHECK-NEXT: %33 = extractelement <4 x i32> %3, i32 1
+; CHECK-NEXT: %34 = trunc i32 %33 to i16
+; CHECK-NEXT: %35 = insertelement <8 x i16> %32, i16 %34, i32 1
+; CHECK-NEXT: %36 = extractelement <4 x i32> %3, i32 2
+; CHECK-NEXT: %37 = trunc i32 %36 to i16
+; CHECK-NEXT: %38 = insertelement <8 x i16> %35, i16 %37, i32 2
+; CHECK-NEXT: %39 = extractelement <4 x i32> %3, i32 3
+; CHECK-NEXT: %40 = trunc i32 %39 to i16
+; CHECK-NEXT: %41 = insertelement <8 x i16> %38, i16 %40, i32 3
+; CHECK-NEXT: %42 = extractelement <4 x i32> %4, i32 0
+; CHECK-NEXT: %43 = trunc i32 %42 to i16
+; CHECK-NEXT: %44 = insertelement <8 x i16> %41, i16 %43, i32 4
+; CHECK-NEXT: %45 = extractelement <4 x i32> %4, i32 1
+; CHECK-NEXT: %46 = trunc i32 %45 to i16
+; CHECK-NEXT: %47 = insertelement <8 x i16> %44, i16 %46, i32 5
+; CHECK-NEXT: %48 = extractelement <4 x i32> %4, i32 2
+; CHECK-NEXT: %49 = trunc i32 %48 to i16
+; CHECK-NEXT: %50 = insertelement <8 x i16> %47, i16 %49, i32 6
+; CHECK-NEXT: %51 = extractelement <4 x i32> %4, i32 3
+; CHECK-NEXT: %52 = trunc i32 %51 to i16
+; CHECK-NEXT: %53 = insertelement <8 x i16> %50, i16 %52, i32 7
+; CHECK-NEXT: store <8 x i16> %53, <8 x i16>* %0, align 16
+; CHECK-NEXT: ret <8 x i16> %29
+
+define <16 x i8> @trunc_cast_16xi32_to_16xi8(<16 x i32>) {
+ %2 = trunc <16 x i32> %0 to <16 x i8>
+ ret <16 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_16xi32_to_16xi8(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %5 = extractelement <4 x i32> %0, i32 0
+; CHECK-NEXT: %6 = trunc i32 %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> undef, i8 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <4 x i32> %0, i32 1
+; CHECK-NEXT: %9 = trunc i32 %8 to i8
+; CHECK-NEXT: %10 = insertelement <16 x i8> %7, i8 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <4 x i32> %0, i32 2
+; CHECK-NEXT: %12 = trunc i32 %11 to i8
+; CHECK-NEXT: %13 = insertelement <16 x i8> %10, i8 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <4 x i32> %0, i32 3
+; CHECK-NEXT: %15 = trunc i32 %14 to i8
+; CHECK-NEXT: %16 = insertelement <16 x i8> %13, i8 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: %18 = trunc i32 %17 to i8
+; CHECK-NEXT: %19 = insertelement <16 x i8> %16, i8 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: %21 = trunc i32 %20 to i8
+; CHECK-NEXT: %22 = insertelement <16 x i8> %19, i8 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <4 x i32> %1, i32 2
+; CHECK-NEXT: %24 = trunc i32 %23 to i8
+; CHECK-NEXT: %25 = insertelement <16 x i8> %22, i8 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <4 x i32> %1, i32 3
+; CHECK-NEXT: %27 = trunc i32 %26 to i8
+; CHECK-NEXT: %28 = insertelement <16 x i8> %25, i8 %27, i32 7
+; CHECK-NEXT: %29 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: %30 = trunc i32 %29 to i8
+; CHECK-NEXT: %31 = insertelement <16 x i8> %28, i8 %30, i32 8
+; CHECK-NEXT: %32 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: %33 = trunc i32 %32 to i8
+; CHECK-NEXT: %34 = insertelement <16 x i8> %31, i8 %33, i32 9
+; CHECK-NEXT: %35 = extractelement <4 x i32> %2, i32 2
+; CHECK-NEXT: %36 = trunc i32 %35 to i8
+; CHECK-NEXT: %37 = insertelement <16 x i8> %34, i8 %36, i32 10
+; CHECK-NEXT: %38 = extractelement <4 x i32> %2, i32 3
+; CHECK-NEXT: %39 = trunc i32 %38 to i8
+; CHECK-NEXT: %40 = insertelement <16 x i8> %37, i8 %39, i32 11
+; CHECK-NEXT: %41 = extractelement <4 x i32> %3, i32 0
+; CHECK-NEXT: %42 = trunc i32 %41 to i8
+; CHECK-NEXT: %43 = insertelement <16 x i8> %40, i8 %42, i32 12
+; CHECK-NEXT: %44 = extractelement <4 x i32> %3, i32 1
+; CHECK-NEXT: %45 = trunc i32 %44 to i8
+; CHECK-NEXT: %46 = insertelement <16 x i8> %43, i8 %45, i32 13
+; CHECK-NEXT: %47 = extractelement <4 x i32> %3, i32 2
+; CHECK-NEXT: %48 = trunc i32 %47 to i8
+; CHECK-NEXT: %49 = insertelement <16 x i8> %46, i8 %48, i32 14
+; CHECK-NEXT: %50 = extractelement <4 x i32> %3, i32 3
+; CHECK-NEXT: %51 = trunc i32 %50 to i8
+; CHECK-NEXT: %52 = insertelement <16 x i8> %49, i8 %51, i32 15
+; CHECK-NEXT: ret <16 x i8> %52
+
+define <16 x i8> @trunc_cast_16xi16_to_16xi8(<16 x i16>) {
+ %2 = trunc <16 x i16> %0 to <16 x i8>
+ ret <16 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_16xi16_to_16xi8(<8 x i16>, <8 x i16>)
+; CHECK-NEXT: %3 = extractelement <8 x i16> %0, i32 0
+; CHECK-NEXT: %4 = trunc i16 %3 to i8
+; CHECK-NEXT: %5 = insertelement <16 x i8> undef, i8 %4, i32 0
+; CHECK-NEXT: %6 = extractelement <8 x i16> %0, i32 1
+; CHECK-NEXT: %7 = trunc i16 %6 to i8
+; CHECK-NEXT: %8 = insertelement <16 x i8> %5, i8 %7, i32 1
+; CHECK-NEXT: %9 = extractelement <8 x i16> %0, i32 2
+; CHECK-NEXT: %10 = trunc i16 %9 to i8
+; CHECK-NEXT: %11 = insertelement <16 x i8> %8, i8 %10, i32 2
+; CHECK-NEXT: %12 = extractelement <8 x i16> %0, i32 3
+; CHECK-NEXT: %13 = trunc i16 %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> %11, i8 %13, i32 3
+; CHECK-NEXT: %15 = extractelement <8 x i16> %0, i32 4
+; CHECK-NEXT: %16 = trunc i16 %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 4
+; CHECK-NEXT: %18 = extractelement <8 x i16> %0, i32 5
+; CHECK-NEXT: %19 = trunc i16 %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 5
+; CHECK-NEXT: %21 = extractelement <8 x i16> %0, i32 6
+; CHECK-NEXT: %22 = trunc i16 %21 to i8
+; CHECK-NEXT: %23 = insertelement <16 x i8> %20, i8 %22, i32 6
+; CHECK-NEXT: %24 = extractelement <8 x i16> %0, i32 7
+; CHECK-NEXT: %25 = trunc i16 %24 to i8
+; CHECK-NEXT: %26 = insertelement <16 x i8> %23, i8 %25, i32 7
+; CHECK-NEXT: %27 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %28 = trunc i16 %27 to i8
+; CHECK-NEXT: %29 = insertelement <16 x i8> %26, i8 %28, i32 8
+; CHECK-NEXT: %30 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %31 = trunc i16 %30 to i8
+; CHECK-NEXT: %32 = insertelement <16 x i8> %29, i8 %31, i32 9
+; CHECK-NEXT: %33 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %34 = trunc i16 %33 to i8
+; CHECK-NEXT: %35 = insertelement <16 x i8> %32, i8 %34, i32 10
+; CHECK-NEXT: %36 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %37 = trunc i16 %36 to i8
+; CHECK-NEXT: %38 = insertelement <16 x i8> %35, i8 %37, i32 11
+; CHECK-NEXT: %39 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: %40 = trunc i16 %39 to i8
+; CHECK-NEXT: %41 = insertelement <16 x i8> %38, i8 %40, i32 12
+; CHECK-NEXT: %42 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: %43 = trunc i16 %42 to i8
+; CHECK-NEXT: %44 = insertelement <16 x i8> %41, i8 %43, i32 13
+; CHECK-NEXT: %45 = extractelement <8 x i16> %1, i32 6
+; CHECK-NEXT: %46 = trunc i16 %45 to i8
+; CHECK-NEXT: %47 = insertelement <16 x i8> %44, i8 %46, i32 14
+; CHECK-NEXT: %48 = extractelement <8 x i16> %1, i32 7
+; CHECK-NEXT: %49 = trunc i16 %48 to i8
+; CHECK-NEXT: %50 = insertelement <16 x i8> %47, i8 %49, i32 15
+; CHECK-NEXT: ret <16 x i8> %50
+
+define <20 x i16> @zext_cast_20xi8_to_20xi16(<20 x i8>) {
+ %2 = zext <20 x i8> %0 to <20 x i16>
+ ret <20 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @zext_cast_20xi8_to_20xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <8 x i16>* nocapture nonnull dereferenceable(16), <16 x i8>, <16 x i8>)
+; CHECK-NEXT: %5 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: %6 = zext i8 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> undef, i16 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: %9 = zext i8 %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: %12 = zext i8 %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: %15 = zext i8 %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <16 x i8> %2, i32 4
+; CHECK-NEXT: %18 = zext i8 %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <16 x i8> %2, i32 5
+; CHECK-NEXT: %21 = zext i8 %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <16 x i8> %2, i32 6
+; CHECK-NEXT: %24 = zext i8 %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <16 x i8> %2, i32 7
+; CHECK-NEXT: %27 = zext i8 %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 7
+; CHECK-NEXT: %29 = extractelement <16 x i8> %2, i32 8
+; CHECK-NEXT: %30 = zext i8 %29 to i16
+; CHECK-NEXT: %31 = insertelement <8 x i16> undef, i16 %30, i32 0
+; CHECK-NEXT: %32 = extractelement <16 x i8> %2, i32 9
+; CHECK-NEXT: %33 = zext i8 %32 to i16
+; CHECK-NEXT: %34 = insertelement <8 x i16> %31, i16 %33, i32 1
+; CHECK-NEXT: %35 = extractelement <16 x i8> %2, i32 10
+; CHECK-NEXT: %36 = zext i8 %35 to i16
+; CHECK-NEXT: %37 = insertelement <8 x i16> %34, i16 %36, i32 2
+; CHECK-NEXT: %38 = extractelement <16 x i8> %2, i32 11
+; CHECK-NEXT: %39 = zext i8 %38 to i16
+; CHECK-NEXT: %40 = insertelement <8 x i16> %37, i16 %39, i32 3
+; CHECK-NEXT: %41 = extractelement <16 x i8> %2, i32 12
+; CHECK-NEXT: %42 = zext i8 %41 to i16
+; CHECK-NEXT: %43 = insertelement <8 x i16> %40, i16 %42, i32 4
+; CHECK-NEXT: %44 = extractelement <16 x i8> %2, i32 13
+; CHECK-NEXT: %45 = zext i8 %44 to i16
+; CHECK-NEXT: %46 = insertelement <8 x i16> %43, i16 %45, i32 5
+; CHECK-NEXT: %47 = extractelement <16 x i8> %2, i32 14
+; CHECK-NEXT: %48 = zext i8 %47 to i16
+; CHECK-NEXT: %49 = insertelement <8 x i16> %46, i16 %48, i32 6
+; CHECK-NEXT: %50 = extractelement <16 x i8> %2, i32 15
+; CHECK-NEXT: %51 = zext i8 %50 to i16
+; CHECK-NEXT: %52 = insertelement <8 x i16> %49, i16 %51, i32 7
+; CHECK-NEXT: %53 = extractelement <16 x i8> %3, i32 0
+; CHECK-NEXT: %54 = zext i8 %53 to i16
+; CHECK-NEXT: %55 = insertelement <8 x i16> undef, i16 %54, i32 0
+; CHECK-NEXT: %56 = extractelement <16 x i8> %3, i32 1
+; CHECK-NEXT: %57 = zext i8 %56 to i16
+; CHECK-NEXT: %58 = insertelement <8 x i16> %55, i16 %57, i32 1
+; CHECK-NEXT: %59 = extractelement <16 x i8> %3, i32 2
+; CHECK-NEXT: %60 = zext i8 %59 to i16
+; CHECK-NEXT: %61 = insertelement <8 x i16> %58, i16 %60, i32 2
+; CHECK-NEXT: %62 = extractelement <16 x i8> %3, i32 3
+; CHECK-NEXT: %63 = zext i8 %62 to i16
+; CHECK-NEXT: %64 = insertelement <8 x i16> %61, i16 %63, i32 3
+; CHECK-NEXT: store <8 x i16> %52, <8 x i16>* %0, align 16
+; CHECK-NEXT: store <8 x i16> %64, <8 x i16>* %1, align 16
+; CHECK-NEXT: ret <8 x i16> %28
+
+define <20 x i32> @zext_cast_20xi8_to_20xi32(<20 x i8>) {
+ %2 = zext <20 x i8> %0 to <20 x i32>
+ ret <20 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_20xi8_to_20xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <16 x i8>, <16 x i8>)
+; CHECK-NEXT: %7 = extractelement <16 x i8> %4, i32 0
+; CHECK-NEXT: %8 = zext i8 %7 to i32
+; CHECK-NEXT: %9 = insertelement <4 x i32> undef, i32 %8, i32 0
+; CHECK-NEXT: %10 = extractelement <16 x i8> %4, i32 1
+; CHECK-NEXT: %11 = zext i8 %10 to i32
+; CHECK-NEXT: %12 = insertelement <4 x i32> %9, i32 %11, i32 1
+; CHECK-NEXT: %13 = extractelement <16 x i8> %4, i32 2
+; CHECK-NEXT: %14 = zext i8 %13 to i32
+; CHECK-NEXT: %15 = insertelement <4 x i32> %12, i32 %14, i32 2
+; CHECK-NEXT: %16 = extractelement <16 x i8> %4, i32 3
+; CHECK-NEXT: %17 = zext i8 %16 to i32
+; CHECK-NEXT: %18 = insertelement <4 x i32> %15, i32 %17, i32 3
+; CHECK-NEXT: %19 = extractelement <16 x i8> %4, i32 4
+; CHECK-NEXT: %20 = zext i8 %19 to i32
+; CHECK-NEXT: %21 = insertelement <4 x i32> undef, i32 %20, i32 0
+; CHECK-NEXT: %22 = extractelement <16 x i8> %4, i32 5
+; CHECK-NEXT: %23 = zext i8 %22 to i32
+; CHECK-NEXT: %24 = insertelement <4 x i32> %21, i32 %23, i32 1
+; CHECK-NEXT: %25 = extractelement <16 x i8> %4, i32 6
+; CHECK-NEXT: %26 = zext i8 %25 to i32
+; CHECK-NEXT: %27 = insertelement <4 x i32> %24, i32 %26, i32 2
+; CHECK-NEXT: %28 = extractelement <16 x i8> %4, i32 7
+; CHECK-NEXT: %29 = zext i8 %28 to i32
+; CHECK-NEXT: %30 = insertelement <4 x i32> %27, i32 %29, i32 3
+; CHECK-NEXT: %31 = extractelement <16 x i8> %4, i32 8
+; CHECK-NEXT: %32 = zext i8 %31 to i32
+; CHECK-NEXT: %33 = insertelement <4 x i32> undef, i32 %32, i32 0
+; CHECK-NEXT: %34 = extractelement <16 x i8> %4, i32 9
+; CHECK-NEXT: %35 = zext i8 %34 to i32
+; CHECK-NEXT: %36 = insertelement <4 x i32> %33, i32 %35, i32 1
+; CHECK-NEXT: %37 = extractelement <16 x i8> %4, i32 10
+; CHECK-NEXT: %38 = zext i8 %37 to i32
+; CHECK-NEXT: %39 = insertelement <4 x i32> %36, i32 %38, i32 2
+; CHECK-NEXT: %40 = extractelement <16 x i8> %4, i32 11
+; CHECK-NEXT: %41 = zext i8 %40 to i32
+; CHECK-NEXT: %42 = insertelement <4 x i32> %39, i32 %41, i32 3
+; CHECK-NEXT: %43 = extractelement <16 x i8> %4, i32 12
+; CHECK-NEXT: %44 = zext i8 %43 to i32
+; CHECK-NEXT: %45 = insertelement <4 x i32> undef, i32 %44, i32 0
+; CHECK-NEXT: %46 = extractelement <16 x i8> %4, i32 13
+; CHECK-NEXT: %47 = zext i8 %46 to i32
+; CHECK-NEXT: %48 = insertelement <4 x i32> %45, i32 %47, i32 1
+; CHECK-NEXT: %49 = extractelement <16 x i8> %4, i32 14
+; CHECK-NEXT: %50 = zext i8 %49 to i32
+; CHECK-NEXT: %51 = insertelement <4 x i32> %48, i32 %50, i32 2
+; CHECK-NEXT: %52 = extractelement <16 x i8> %4, i32 15
+; CHECK-NEXT: %53 = zext i8 %52 to i32
+; CHECK-NEXT: %54 = insertelement <4 x i32> %51, i32 %53, i32 3
+; CHECK-NEXT: %55 = extractelement <16 x i8> %5, i32 0
+; CHECK-NEXT: %56 = zext i8 %55 to i32
+; CHECK-NEXT: %57 = insertelement <4 x i32> undef, i32 %56, i32 0
+; CHECK-NEXT: %58 = extractelement <16 x i8> %5, i32 1
+; CHECK-NEXT: %59 = zext i8 %58 to i32
+; CHECK-NEXT: %60 = insertelement <4 x i32> %57, i32 %59, i32 1
+; CHECK-NEXT: %61 = extractelement <16 x i8> %5, i32 2
+; CHECK-NEXT: %62 = zext i8 %61 to i32
+; CHECK-NEXT: %63 = insertelement <4 x i32> %60, i32 %62, i32 2
+; CHECK-NEXT: %64 = extractelement <16 x i8> %5, i32 3
+; CHECK-NEXT: %65 = zext i8 %64 to i32
+; CHECK-NEXT: %66 = insertelement <4 x i32> %63, i32 %65, i32 3
+; CHECK-NEXT: store <4 x i32> %30, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %42, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %54, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %66, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %18
+
+define <20 x i64> @zext_cast_20xi8_to_20xi64(<20 x i8>) {
+ %2 = zext <20 x i8> %0 to <20 x i64>
+ ret <20 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_20xi8_to_20xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <16 x i8>, <16 x i8>)
+; CHECK-NEXT: %12 = extractelement <16 x i8> %9, i32 0
+; CHECK-NEXT: %13 = zext i8 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> undef, i64 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <16 x i8> %9, i32 1
+; CHECK-NEXT: %16 = zext i8 %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> %14, i64 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <16 x i8> %9, i32 2
+; CHECK-NEXT: %19 = zext i8 %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> undef, i64 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <16 x i8> %9, i32 3
+; CHECK-NEXT: %22 = zext i8 %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> %20, i64 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <16 x i8> %9, i32 4
+; CHECK-NEXT: %25 = zext i8 %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> undef, i64 %25, i32 0
+; CHECK-NEXT: %27 = extractelement <16 x i8> %9, i32 5
+; CHECK-NEXT: %28 = zext i8 %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> %26, i64 %28, i32 1
+; CHECK-NEXT: %30 = extractelement <16 x i8> %9, i32 6
+; CHECK-NEXT: %31 = zext i8 %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> undef, i64 %31, i32 0
+; CHECK-NEXT: %33 = extractelement <16 x i8> %9, i32 7
+; CHECK-NEXT: %34 = zext i8 %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> %32, i64 %34, i32 1
+; CHECK-NEXT: %36 = extractelement <16 x i8> %9, i32 8
+; CHECK-NEXT: %37 = zext i8 %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> undef, i64 %37, i32 0
+; CHECK-NEXT: %39 = extractelement <16 x i8> %9, i32 9
+; CHECK-NEXT: %40 = zext i8 %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> %38, i64 %40, i32 1
+; CHECK-NEXT: %42 = extractelement <16 x i8> %9, i32 10
+; CHECK-NEXT: %43 = zext i8 %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> undef, i64 %43, i32 0
+; CHECK-NEXT: %45 = extractelement <16 x i8> %9, i32 11
+; CHECK-NEXT: %46 = zext i8 %45 to i64
+; CHECK-NEXT: %47 = insertelement <2 x i64> %44, i64 %46, i32 1
+; CHECK-NEXT: %48 = extractelement <16 x i8> %9, i32 12
+; CHECK-NEXT: %49 = zext i8 %48 to i64
+; CHECK-NEXT: %50 = insertelement <2 x i64> undef, i64 %49, i32 0
+; CHECK-NEXT: %51 = extractelement <16 x i8> %9, i32 13
+; CHECK-NEXT: %52 = zext i8 %51 to i64
+; CHECK-NEXT: %53 = insertelement <2 x i64> %50, i64 %52, i32 1
+; CHECK-NEXT: %54 = extractelement <16 x i8> %9, i32 14
+; CHECK-NEXT: %55 = zext i8 %54 to i64
+; CHECK-NEXT: %56 = insertelement <2 x i64> undef, i64 %55, i32 0
+; CHECK-NEXT: %57 = extractelement <16 x i8> %9, i32 15
+; CHECK-NEXT: %58 = zext i8 %57 to i64
+; CHECK-NEXT: %59 = insertelement <2 x i64> %56, i64 %58, i32 1
+; CHECK-NEXT: %60 = extractelement <16 x i8> %10, i32 0
+; CHECK-NEXT: %61 = zext i8 %60 to i64
+; CHECK-NEXT: %62 = insertelement <2 x i64> undef, i64 %61, i32 0
+; CHECK-NEXT: %63 = extractelement <16 x i8> %10, i32 1
+; CHECK-NEXT: %64 = zext i8 %63 to i64
+; CHECK-NEXT: %65 = insertelement <2 x i64> %62, i64 %64, i32 1
+; CHECK-NEXT: %66 = extractelement <16 x i8> %10, i32 2
+; CHECK-NEXT: %67 = zext i8 %66 to i64
+; CHECK-NEXT: %68 = insertelement <2 x i64> undef, i64 %67, i32 0
+; CHECK-NEXT: %69 = extractelement <16 x i8> %10, i32 3
+; CHECK-NEXT: %70 = zext i8 %69 to i64
+; CHECK-NEXT: %71 = insertelement <2 x i64> %68, i64 %70, i32 1
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %35, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %41, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %47, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %53, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %59, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %65, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %71, <2 x i64>* %8, align 16
+; CHECK-NEXT: ret <2 x i64> %17
+
+define <20 x i32> @zext_cast_20xi16_to_20xi32(<20 x i16>) {
+ %2 = zext <20 x i16> %0 to <20 x i32>
+ ret <20 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @zext_cast_20xi16_to_20xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %8 = extractelement <8 x i16> %4, i32 0
+; CHECK-NEXT: %9 = zext i16 %8 to i32
+; CHECK-NEXT: %10 = insertelement <4 x i32> undef, i32 %9, i32 0
+; CHECK-NEXT: %11 = extractelement <8 x i16> %4, i32 1
+; CHECK-NEXT: %12 = zext i16 %11 to i32
+; CHECK-NEXT: %13 = insertelement <4 x i32> %10, i32 %12, i32 1
+; CHECK-NEXT: %14 = extractelement <8 x i16> %4, i32 2
+; CHECK-NEXT: %15 = zext i16 %14 to i32
+; CHECK-NEXT: %16 = insertelement <4 x i32> %13, i32 %15, i32 2
+; CHECK-NEXT: %17 = extractelement <8 x i16> %4, i32 3
+; CHECK-NEXT: %18 = zext i16 %17 to i32
+; CHECK-NEXT: %19 = insertelement <4 x i32> %16, i32 %18, i32 3
+; CHECK-NEXT: %20 = extractelement <8 x i16> %4, i32 4
+; CHECK-NEXT: %21 = zext i16 %20 to i32
+; CHECK-NEXT: %22 = insertelement <4 x i32> undef, i32 %21, i32 0
+; CHECK-NEXT: %23 = extractelement <8 x i16> %4, i32 5
+; CHECK-NEXT: %24 = zext i16 %23 to i32
+; CHECK-NEXT: %25 = insertelement <4 x i32> %22, i32 %24, i32 1
+; CHECK-NEXT: %26 = extractelement <8 x i16> %4, i32 6
+; CHECK-NEXT: %27 = zext i16 %26 to i32
+; CHECK-NEXT: %28 = insertelement <4 x i32> %25, i32 %27, i32 2
+; CHECK-NEXT: %29 = extractelement <8 x i16> %4, i32 7
+; CHECK-NEXT: %30 = zext i16 %29 to i32
+; CHECK-NEXT: %31 = insertelement <4 x i32> %28, i32 %30, i32 3
+; CHECK-NEXT: %32 = extractelement <8 x i16> %5, i32 0
+; CHECK-NEXT: %33 = zext i16 %32 to i32
+; CHECK-NEXT: %34 = insertelement <4 x i32> undef, i32 %33, i32 0
+; CHECK-NEXT: %35 = extractelement <8 x i16> %5, i32 1
+; CHECK-NEXT: %36 = zext i16 %35 to i32
+; CHECK-NEXT: %37 = insertelement <4 x i32> %34, i32 %36, i32 1
+; CHECK-NEXT: %38 = extractelement <8 x i16> %5, i32 2
+; CHECK-NEXT: %39 = zext i16 %38 to i32
+; CHECK-NEXT: %40 = insertelement <4 x i32> %37, i32 %39, i32 2
+; CHECK-NEXT: %41 = extractelement <8 x i16> %5, i32 3
+; CHECK-NEXT: %42 = zext i16 %41 to i32
+; CHECK-NEXT: %43 = insertelement <4 x i32> %40, i32 %42, i32 3
+; CHECK-NEXT: %44 = extractelement <8 x i16> %5, i32 4
+; CHECK-NEXT: %45 = zext i16 %44 to i32
+; CHECK-NEXT: %46 = insertelement <4 x i32> undef, i32 %45, i32 0
+; CHECK-NEXT: %47 = extractelement <8 x i16> %5, i32 5
+; CHECK-NEXT: %48 = zext i16 %47 to i32
+; CHECK-NEXT: %49 = insertelement <4 x i32> %46, i32 %48, i32 1
+; CHECK-NEXT: %50 = extractelement <8 x i16> %5, i32 6
+; CHECK-NEXT: %51 = zext i16 %50 to i32
+; CHECK-NEXT: %52 = insertelement <4 x i32> %49, i32 %51, i32 2
+; CHECK-NEXT: %53 = extractelement <8 x i16> %5, i32 7
+; CHECK-NEXT: %54 = zext i16 %53 to i32
+; CHECK-NEXT: %55 = insertelement <4 x i32> %52, i32 %54, i32 3
+; CHECK-NEXT: %56 = extractelement <8 x i16> %6, i32 0
+; CHECK-NEXT: %57 = zext i16 %56 to i32
+; CHECK-NEXT: %58 = insertelement <4 x i32> undef, i32 %57, i32 0
+; CHECK-NEXT: %59 = extractelement <8 x i16> %6, i32 1
+; CHECK-NEXT: %60 = zext i16 %59 to i32
+; CHECK-NEXT: %61 = insertelement <4 x i32> %58, i32 %60, i32 1
+; CHECK-NEXT: %62 = extractelement <8 x i16> %6, i32 2
+; CHECK-NEXT: %63 = zext i16 %62 to i32
+; CHECK-NEXT: %64 = insertelement <4 x i32> %61, i32 %63, i32 2
+; CHECK-NEXT: %65 = extractelement <8 x i16> %6, i32 3
+; CHECK-NEXT: %66 = zext i16 %65 to i32
+; CHECK-NEXT: %67 = insertelement <4 x i32> %64, i32 %66, i32 3
+; CHECK-NEXT: store <4 x i32> %31, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %43, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %55, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %67, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %19
+
+define <20 x i64> @zext_cast_20xi16_to_20xi64(<20 x i16>) {
+ %2 = zext <20 x i16> %0 to <20 x i64>
+ ret <20 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_20xi16_to_20xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %13 = extractelement <8 x i16> %9, i32 0
+; CHECK-NEXT: %14 = zext i16 %13 to i64
+; CHECK-NEXT: %15 = insertelement <2 x i64> undef, i64 %14, i32 0
+; CHECK-NEXT: %16 = extractelement <8 x i16> %9, i32 1
+; CHECK-NEXT: %17 = zext i16 %16 to i64
+; CHECK-NEXT: %18 = insertelement <2 x i64> %15, i64 %17, i32 1
+; CHECK-NEXT: %19 = extractelement <8 x i16> %9, i32 2
+; CHECK-NEXT: %20 = zext i16 %19 to i64
+; CHECK-NEXT: %21 = insertelement <2 x i64> undef, i64 %20, i32 0
+; CHECK-NEXT: %22 = extractelement <8 x i16> %9, i32 3
+; CHECK-NEXT: %23 = zext i16 %22 to i64
+; CHECK-NEXT: %24 = insertelement <2 x i64> %21, i64 %23, i32 1
+; CHECK-NEXT: %25 = extractelement <8 x i16> %9, i32 4
+; CHECK-NEXT: %26 = zext i16 %25 to i64
+; CHECK-NEXT: %27 = insertelement <2 x i64> undef, i64 %26, i32 0
+; CHECK-NEXT: %28 = extractelement <8 x i16> %9, i32 5
+; CHECK-NEXT: %29 = zext i16 %28 to i64
+; CHECK-NEXT: %30 = insertelement <2 x i64> %27, i64 %29, i32 1
+; CHECK-NEXT: %31 = extractelement <8 x i16> %9, i32 6
+; CHECK-NEXT: %32 = zext i16 %31 to i64
+; CHECK-NEXT: %33 = insertelement <2 x i64> undef, i64 %32, i32 0
+; CHECK-NEXT: %34 = extractelement <8 x i16> %9, i32 7
+; CHECK-NEXT: %35 = zext i16 %34 to i64
+; CHECK-NEXT: %36 = insertelement <2 x i64> %33, i64 %35, i32 1
+; CHECK-NEXT: %37 = extractelement <8 x i16> %10, i32 0
+; CHECK-NEXT: %38 = zext i16 %37 to i64
+; CHECK-NEXT: %39 = insertelement <2 x i64> undef, i64 %38, i32 0
+; CHECK-NEXT: %40 = extractelement <8 x i16> %10, i32 1
+; CHECK-NEXT: %41 = zext i16 %40 to i64
+; CHECK-NEXT: %42 = insertelement <2 x i64> %39, i64 %41, i32 1
+; CHECK-NEXT: %43 = extractelement <8 x i16> %10, i32 2
+; CHECK-NEXT: %44 = zext i16 %43 to i64
+; CHECK-NEXT: %45 = insertelement <2 x i64> undef, i64 %44, i32 0
+; CHECK-NEXT: %46 = extractelement <8 x i16> %10, i32 3
+; CHECK-NEXT: %47 = zext i16 %46 to i64
+; CHECK-NEXT: %48 = insertelement <2 x i64> %45, i64 %47, i32 1
+; CHECK-NEXT: %49 = extractelement <8 x i16> %10, i32 4
+; CHECK-NEXT: %50 = zext i16 %49 to i64
+; CHECK-NEXT: %51 = insertelement <2 x i64> undef, i64 %50, i32 0
+; CHECK-NEXT: %52 = extractelement <8 x i16> %10, i32 5
+; CHECK-NEXT: %53 = zext i16 %52 to i64
+; CHECK-NEXT: %54 = insertelement <2 x i64> %51, i64 %53, i32 1
+; CHECK-NEXT: %55 = extractelement <8 x i16> %10, i32 6
+; CHECK-NEXT: %56 = zext i16 %55 to i64
+; CHECK-NEXT: %57 = insertelement <2 x i64> undef, i64 %56, i32 0
+; CHECK-NEXT: %58 = extractelement <8 x i16> %10, i32 7
+; CHECK-NEXT: %59 = zext i16 %58 to i64
+; CHECK-NEXT: %60 = insertelement <2 x i64> %57, i64 %59, i32 1
+; CHECK-NEXT: %61 = extractelement <8 x i16> %11, i32 0
+; CHECK-NEXT: %62 = zext i16 %61 to i64
+; CHECK-NEXT: %63 = insertelement <2 x i64> undef, i64 %62, i32 0
+; CHECK-NEXT: %64 = extractelement <8 x i16> %11, i32 1
+; CHECK-NEXT: %65 = zext i16 %64 to i64
+; CHECK-NEXT: %66 = insertelement <2 x i64> %63, i64 %65, i32 1
+; CHECK-NEXT: %67 = extractelement <8 x i16> %11, i32 2
+; CHECK-NEXT: %68 = zext i16 %67 to i64
+; CHECK-NEXT: %69 = insertelement <2 x i64> undef, i64 %68, i32 0
+; CHECK-NEXT: %70 = extractelement <8 x i16> %11, i32 3
+; CHECK-NEXT: %71 = zext i16 %70 to i64
+; CHECK-NEXT: %72 = insertelement <2 x i64> %69, i64 %71, i32 1
+; CHECK-NEXT: store <2 x i64> %24, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %30, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %36, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %42, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %48, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %54, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %60, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %66, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %72, <2 x i64>* %8, align 16
+; CHECK-NEXT: ret <2 x i64> %18
+
+define <20 x i64> @zext_cast_20xi32_to_20xi64(<20 x i32>) {
+ %2 = zext <20 x i32> %0 to <20 x i64>
+ ret <20 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @zext_cast_20xi32_to_20xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = extractelement <4 x i32> %9, i32 0
+; CHECK-NEXT: %16 = zext i32 %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> undef, i64 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <4 x i32> %9, i32 1
+; CHECK-NEXT: %19 = zext i32 %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> %17, i64 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <4 x i32> %9, i32 2
+; CHECK-NEXT: %22 = zext i32 %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> undef, i64 %22, i32 0
+; CHECK-NEXT: %24 = extractelement <4 x i32> %9, i32 3
+; CHECK-NEXT: %25 = zext i32 %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> %23, i64 %25, i32 1
+; CHECK-NEXT: %27 = extractelement <4 x i32> %10, i32 0
+; CHECK-NEXT: %28 = zext i32 %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> undef, i64 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <4 x i32> %10, i32 1
+; CHECK-NEXT: %31 = zext i32 %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> %29, i64 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <4 x i32> %10, i32 2
+; CHECK-NEXT: %34 = zext i32 %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> undef, i64 %34, i32 0
+; CHECK-NEXT: %36 = extractelement <4 x i32> %10, i32 3
+; CHECK-NEXT: %37 = zext i32 %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> %35, i64 %37, i32 1
+; CHECK-NEXT: %39 = extractelement <4 x i32> %11, i32 0
+; CHECK-NEXT: %40 = zext i32 %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> undef, i64 %40, i32 0
+; CHECK-NEXT: %42 = extractelement <4 x i32> %11, i32 1
+; CHECK-NEXT: %43 = zext i32 %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> %41, i64 %43, i32 1
+; CHECK-NEXT: %45 = extractelement <4 x i32> %11, i32 2
+; CHECK-NEXT: %46 = zext i32 %45 to i64
+; CHECK-NEXT: %47 = insertelement <2 x i64> undef, i64 %46, i32 0
+; CHECK-NEXT: %48 = extractelement <4 x i32> %11, i32 3
+; CHECK-NEXT: %49 = zext i32 %48 to i64
+; CHECK-NEXT: %50 = insertelement <2 x i64> %47, i64 %49, i32 1
+; CHECK-NEXT: %51 = extractelement <4 x i32> %12, i32 0
+; CHECK-NEXT: %52 = zext i32 %51 to i64
+; CHECK-NEXT: %53 = insertelement <2 x i64> undef, i64 %52, i32 0
+; CHECK-NEXT: %54 = extractelement <4 x i32> %12, i32 1
+; CHECK-NEXT: %55 = zext i32 %54 to i64
+; CHECK-NEXT: %56 = insertelement <2 x i64> %53, i64 %55, i32 1
+; CHECK-NEXT: %57 = extractelement <4 x i32> %12, i32 2
+; CHECK-NEXT: %58 = zext i32 %57 to i64
+; CHECK-NEXT: %59 = insertelement <2 x i64> undef, i64 %58, i32 0
+; CHECK-NEXT: %60 = extractelement <4 x i32> %12, i32 3
+; CHECK-NEXT: %61 = zext i32 %60 to i64
+; CHECK-NEXT: %62 = insertelement <2 x i64> %59, i64 %61, i32 1
+; CHECK-NEXT: %63 = extractelement <4 x i32> %13, i32 0
+; CHECK-NEXT: %64 = zext i32 %63 to i64
+; CHECK-NEXT: %65 = insertelement <2 x i64> undef, i64 %64, i32 0
+; CHECK-NEXT: %66 = extractelement <4 x i32> %13, i32 1
+; CHECK-NEXT: %67 = zext i32 %66 to i64
+; CHECK-NEXT: %68 = insertelement <2 x i64> %65, i64 %67, i32 1
+; CHECK-NEXT: %69 = extractelement <4 x i32> %13, i32 2
+; CHECK-NEXT: %70 = zext i32 %69 to i64
+; CHECK-NEXT: %71 = insertelement <2 x i64> undef, i64 %70, i32 0
+; CHECK-NEXT: %72 = extractelement <4 x i32> %13, i32 3
+; CHECK-NEXT: %73 = zext i32 %72 to i64
+; CHECK-NEXT: %74 = insertelement <2 x i64> %71, i64 %73, i32 1
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %32, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %38, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %44, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %50, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %56, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %62, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %68, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %74, <2 x i64>* %8, align 16
+; CHECK-NEXT: ret <2 x i64> %20
+
+define <20 x i16> @sext_cast_20xi8_to_20xi16(<20 x i8>) {
+ %2 = sext <20 x i8> %0 to <20 x i16>
+ ret <20 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @sext_cast_20xi8_to_20xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <8 x i16>* nocapture nonnull dereferenceable(16), <16 x i8>, <16 x i8>)
+; CHECK-NEXT: %5 = extractelement <16 x i8> %2, i32 0
+; CHECK-NEXT: %6 = sext i8 %5 to i16
+; CHECK-NEXT: %7 = insertelement <8 x i16> undef, i16 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <16 x i8> %2, i32 1
+; CHECK-NEXT: %9 = sext i8 %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> %7, i16 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <16 x i8> %2, i32 2
+; CHECK-NEXT: %12 = sext i8 %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <16 x i8> %2, i32 3
+; CHECK-NEXT: %15 = sext i8 %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <16 x i8> %2, i32 4
+; CHECK-NEXT: %18 = sext i8 %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <16 x i8> %2, i32 5
+; CHECK-NEXT: %21 = sext i8 %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <16 x i8> %2, i32 6
+; CHECK-NEXT: %24 = sext i8 %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <16 x i8> %2, i32 7
+; CHECK-NEXT: %27 = sext i8 %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 7
+; CHECK-NEXT: %29 = extractelement <16 x i8> %2, i32 8
+; CHECK-NEXT: %30 = sext i8 %29 to i16
+; CHECK-NEXT: %31 = insertelement <8 x i16> undef, i16 %30, i32 0
+; CHECK-NEXT: %32 = extractelement <16 x i8> %2, i32 9
+; CHECK-NEXT: %33 = sext i8 %32 to i16
+; CHECK-NEXT: %34 = insertelement <8 x i16> %31, i16 %33, i32 1
+; CHECK-NEXT: %35 = extractelement <16 x i8> %2, i32 10
+; CHECK-NEXT: %36 = sext i8 %35 to i16
+; CHECK-NEXT: %37 = insertelement <8 x i16> %34, i16 %36, i32 2
+; CHECK-NEXT: %38 = extractelement <16 x i8> %2, i32 11
+; CHECK-NEXT: %39 = sext i8 %38 to i16
+; CHECK-NEXT: %40 = insertelement <8 x i16> %37, i16 %39, i32 3
+; CHECK-NEXT: %41 = extractelement <16 x i8> %2, i32 12
+; CHECK-NEXT: %42 = sext i8 %41 to i16
+; CHECK-NEXT: %43 = insertelement <8 x i16> %40, i16 %42, i32 4
+; CHECK-NEXT: %44 = extractelement <16 x i8> %2, i32 13
+; CHECK-NEXT: %45 = sext i8 %44 to i16
+; CHECK-NEXT: %46 = insertelement <8 x i16> %43, i16 %45, i32 5
+; CHECK-NEXT: %47 = extractelement <16 x i8> %2, i32 14
+; CHECK-NEXT: %48 = sext i8 %47 to i16
+; CHECK-NEXT: %49 = insertelement <8 x i16> %46, i16 %48, i32 6
+; CHECK-NEXT: %50 = extractelement <16 x i8> %2, i32 15
+; CHECK-NEXT: %51 = sext i8 %50 to i16
+; CHECK-NEXT: %52 = insertelement <8 x i16> %49, i16 %51, i32 7
+; CHECK-NEXT: %53 = extractelement <16 x i8> %3, i32 0
+; CHECK-NEXT: %54 = sext i8 %53 to i16
+; CHECK-NEXT: %55 = insertelement <8 x i16> undef, i16 %54, i32 0
+; CHECK-NEXT: %56 = extractelement <16 x i8> %3, i32 1
+; CHECK-NEXT: %57 = sext i8 %56 to i16
+; CHECK-NEXT: %58 = insertelement <8 x i16> %55, i16 %57, i32 1
+; CHECK-NEXT: %59 = extractelement <16 x i8> %3, i32 2
+; CHECK-NEXT: %60 = sext i8 %59 to i16
+; CHECK-NEXT: %61 = insertelement <8 x i16> %58, i16 %60, i32 2
+; CHECK-NEXT: %62 = extractelement <16 x i8> %3, i32 3
+; CHECK-NEXT: %63 = sext i8 %62 to i16
+; CHECK-NEXT: %64 = insertelement <8 x i16> %61, i16 %63, i32 3
+; CHECK-NEXT: store <8 x i16> %52, <8 x i16>* %0, align 16
+; CHECK-NEXT: store <8 x i16> %64, <8 x i16>* %1, align 16
+; CHECK-NEXT: ret <8 x i16> %28
+
+define <20 x i32> @sext_cast_20xi8_to_20xi32(<20 x i8>) {
+ %2 = sext <20 x i8> %0 to <20 x i32>
+ ret <20 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_20xi8_to_20xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <16 x i8>, <16 x i8>)
+; CHECK-NEXT: %7 = extractelement <16 x i8> %4, i32 0
+; CHECK-NEXT: %8 = sext i8 %7 to i32
+; CHECK-NEXT: %9 = insertelement <4 x i32> undef, i32 %8, i32 0
+; CHECK-NEXT: %10 = extractelement <16 x i8> %4, i32 1
+; CHECK-NEXT: %11 = sext i8 %10 to i32
+; CHECK-NEXT: %12 = insertelement <4 x i32> %9, i32 %11, i32 1
+; CHECK-NEXT: %13 = extractelement <16 x i8> %4, i32 2
+; CHECK-NEXT: %14 = sext i8 %13 to i32
+; CHECK-NEXT: %15 = insertelement <4 x i32> %12, i32 %14, i32 2
+; CHECK-NEXT: %16 = extractelement <16 x i8> %4, i32 3
+; CHECK-NEXT: %17 = sext i8 %16 to i32
+; CHECK-NEXT: %18 = insertelement <4 x i32> %15, i32 %17, i32 3
+; CHECK-NEXT: %19 = extractelement <16 x i8> %4, i32 4
+; CHECK-NEXT: %20 = sext i8 %19 to i32
+; CHECK-NEXT: %21 = insertelement <4 x i32> undef, i32 %20, i32 0
+; CHECK-NEXT: %22 = extractelement <16 x i8> %4, i32 5
+; CHECK-NEXT: %23 = sext i8 %22 to i32
+; CHECK-NEXT: %24 = insertelement <4 x i32> %21, i32 %23, i32 1
+; CHECK-NEXT: %25 = extractelement <16 x i8> %4, i32 6
+; CHECK-NEXT: %26 = sext i8 %25 to i32
+; CHECK-NEXT: %27 = insertelement <4 x i32> %24, i32 %26, i32 2
+; CHECK-NEXT: %28 = extractelement <16 x i8> %4, i32 7
+; CHECK-NEXT: %29 = sext i8 %28 to i32
+; CHECK-NEXT: %30 = insertelement <4 x i32> %27, i32 %29, i32 3
+; CHECK-NEXT: %31 = extractelement <16 x i8> %4, i32 8
+; CHECK-NEXT: %32 = sext i8 %31 to i32
+; CHECK-NEXT: %33 = insertelement <4 x i32> undef, i32 %32, i32 0
+; CHECK-NEXT: %34 = extractelement <16 x i8> %4, i32 9
+; CHECK-NEXT: %35 = sext i8 %34 to i32
+; CHECK-NEXT: %36 = insertelement <4 x i32> %33, i32 %35, i32 1
+; CHECK-NEXT: %37 = extractelement <16 x i8> %4, i32 10
+; CHECK-NEXT: %38 = sext i8 %37 to i32
+; CHECK-NEXT: %39 = insertelement <4 x i32> %36, i32 %38, i32 2
+; CHECK-NEXT: %40 = extractelement <16 x i8> %4, i32 11
+; CHECK-NEXT: %41 = sext i8 %40 to i32
+; CHECK-NEXT: %42 = insertelement <4 x i32> %39, i32 %41, i32 3
+; CHECK-NEXT: %43 = extractelement <16 x i8> %4, i32 12
+; CHECK-NEXT: %44 = sext i8 %43 to i32
+; CHECK-NEXT: %45 = insertelement <4 x i32> undef, i32 %44, i32 0
+; CHECK-NEXT: %46 = extractelement <16 x i8> %4, i32 13
+; CHECK-NEXT: %47 = sext i8 %46 to i32
+; CHECK-NEXT: %48 = insertelement <4 x i32> %45, i32 %47, i32 1
+; CHECK-NEXT: %49 = extractelement <16 x i8> %4, i32 14
+; CHECK-NEXT: %50 = sext i8 %49 to i32
+; CHECK-NEXT: %51 = insertelement <4 x i32> %48, i32 %50, i32 2
+; CHECK-NEXT: %52 = extractelement <16 x i8> %4, i32 15
+; CHECK-NEXT: %53 = sext i8 %52 to i32
+; CHECK-NEXT: %54 = insertelement <4 x i32> %51, i32 %53, i32 3
+; CHECK-NEXT: %55 = extractelement <16 x i8> %5, i32 0
+; CHECK-NEXT: %56 = sext i8 %55 to i32
+; CHECK-NEXT: %57 = insertelement <4 x i32> undef, i32 %56, i32 0
+; CHECK-NEXT: %58 = extractelement <16 x i8> %5, i32 1
+; CHECK-NEXT: %59 = sext i8 %58 to i32
+; CHECK-NEXT: %60 = insertelement <4 x i32> %57, i32 %59, i32 1
+; CHECK-NEXT: %61 = extractelement <16 x i8> %5, i32 2
+; CHECK-NEXT: %62 = sext i8 %61 to i32
+; CHECK-NEXT: %63 = insertelement <4 x i32> %60, i32 %62, i32 2
+; CHECK-NEXT: %64 = extractelement <16 x i8> %5, i32 3
+; CHECK-NEXT: %65 = sext i8 %64 to i32
+; CHECK-NEXT: %66 = insertelement <4 x i32> %63, i32 %65, i32 3
+; CHECK-NEXT: store <4 x i32> %30, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %42, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %54, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %66, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %18
+
+define <20 x i64> @sext_cast_20xi8_to_20xi64(<20 x i8>) {
+ %2 = sext <20 x i8> %0 to <20 x i64>
+ ret <20 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_20xi8_to_20xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <16 x i8>, <16 x i8>)
+; CHECK-NEXT: %12 = extractelement <16 x i8> %9, i32 0
+; CHECK-NEXT: %13 = sext i8 %12 to i64
+; CHECK-NEXT: %14 = insertelement <2 x i64> undef, i64 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <16 x i8> %9, i32 1
+; CHECK-NEXT: %16 = sext i8 %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> %14, i64 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <16 x i8> %9, i32 2
+; CHECK-NEXT: %19 = sext i8 %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> undef, i64 %19, i32 0
+; CHECK-NEXT: %21 = extractelement <16 x i8> %9, i32 3
+; CHECK-NEXT: %22 = sext i8 %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> %20, i64 %22, i32 1
+; CHECK-NEXT: %24 = extractelement <16 x i8> %9, i32 4
+; CHECK-NEXT: %25 = sext i8 %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> undef, i64 %25, i32 0
+; CHECK-NEXT: %27 = extractelement <16 x i8> %9, i32 5
+; CHECK-NEXT: %28 = sext i8 %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> %26, i64 %28, i32 1
+; CHECK-NEXT: %30 = extractelement <16 x i8> %9, i32 6
+; CHECK-NEXT: %31 = sext i8 %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> undef, i64 %31, i32 0
+; CHECK-NEXT: %33 = extractelement <16 x i8> %9, i32 7
+; CHECK-NEXT: %34 = sext i8 %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> %32, i64 %34, i32 1
+; CHECK-NEXT: %36 = extractelement <16 x i8> %9, i32 8
+; CHECK-NEXT: %37 = sext i8 %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> undef, i64 %37, i32 0
+; CHECK-NEXT: %39 = extractelement <16 x i8> %9, i32 9
+; CHECK-NEXT: %40 = sext i8 %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> %38, i64 %40, i32 1
+; CHECK-NEXT: %42 = extractelement <16 x i8> %9, i32 10
+; CHECK-NEXT: %43 = sext i8 %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> undef, i64 %43, i32 0
+; CHECK-NEXT: %45 = extractelement <16 x i8> %9, i32 11
+; CHECK-NEXT: %46 = sext i8 %45 to i64
+; CHECK-NEXT: %47 = insertelement <2 x i64> %44, i64 %46, i32 1
+; CHECK-NEXT: %48 = extractelement <16 x i8> %9, i32 12
+; CHECK-NEXT: %49 = sext i8 %48 to i64
+; CHECK-NEXT: %50 = insertelement <2 x i64> undef, i64 %49, i32 0
+; CHECK-NEXT: %51 = extractelement <16 x i8> %9, i32 13
+; CHECK-NEXT: %52 = sext i8 %51 to i64
+; CHECK-NEXT: %53 = insertelement <2 x i64> %50, i64 %52, i32 1
+; CHECK-NEXT: %54 = extractelement <16 x i8> %9, i32 14
+; CHECK-NEXT: %55 = sext i8 %54 to i64
+; CHECK-NEXT: %56 = insertelement <2 x i64> undef, i64 %55, i32 0
+; CHECK-NEXT: %57 = extractelement <16 x i8> %9, i32 15
+; CHECK-NEXT: %58 = sext i8 %57 to i64
+; CHECK-NEXT: %59 = insertelement <2 x i64> %56, i64 %58, i32 1
+; CHECK-NEXT: %60 = extractelement <16 x i8> %10, i32 0
+; CHECK-NEXT: %61 = sext i8 %60 to i64
+; CHECK-NEXT: %62 = insertelement <2 x i64> undef, i64 %61, i32 0
+; CHECK-NEXT: %63 = extractelement <16 x i8> %10, i32 1
+; CHECK-NEXT: %64 = sext i8 %63 to i64
+; CHECK-NEXT: %65 = insertelement <2 x i64> %62, i64 %64, i32 1
+; CHECK-NEXT: %66 = extractelement <16 x i8> %10, i32 2
+; CHECK-NEXT: %67 = sext i8 %66 to i64
+; CHECK-NEXT: %68 = insertelement <2 x i64> undef, i64 %67, i32 0
+; CHECK-NEXT: %69 = extractelement <16 x i8> %10, i32 3
+; CHECK-NEXT: %70 = sext i8 %69 to i64
+; CHECK-NEXT: %71 = insertelement <2 x i64> %68, i64 %70, i32 1
+; CHECK-NEXT: store <2 x i64> %23, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %29, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %35, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %41, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %47, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %53, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %59, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %65, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %71, <2 x i64>* %8, align 16
+; CHECK-NEXT: ret <2 x i64> %17
+
+define <20 x i32> @sext_cast_20xi16_to_20xi32(<20 x i16>) {
+ %2 = sext <20 x i16> %0 to <20 x i32>
+ ret <20 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @sext_cast_20xi16_to_20xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %8 = extractelement <8 x i16> %4, i32 0
+; CHECK-NEXT: %9 = sext i16 %8 to i32
+; CHECK-NEXT: %10 = insertelement <4 x i32> undef, i32 %9, i32 0
+; CHECK-NEXT: %11 = extractelement <8 x i16> %4, i32 1
+; CHECK-NEXT: %12 = sext i16 %11 to i32
+; CHECK-NEXT: %13 = insertelement <4 x i32> %10, i32 %12, i32 1
+; CHECK-NEXT: %14 = extractelement <8 x i16> %4, i32 2
+; CHECK-NEXT: %15 = sext i16 %14 to i32
+; CHECK-NEXT: %16 = insertelement <4 x i32> %13, i32 %15, i32 2
+; CHECK-NEXT: %17 = extractelement <8 x i16> %4, i32 3
+; CHECK-NEXT: %18 = sext i16 %17 to i32
+; CHECK-NEXT: %19 = insertelement <4 x i32> %16, i32 %18, i32 3
+; CHECK-NEXT: %20 = extractelement <8 x i16> %4, i32 4
+; CHECK-NEXT: %21 = sext i16 %20 to i32
+; CHECK-NEXT: %22 = insertelement <4 x i32> undef, i32 %21, i32 0
+; CHECK-NEXT: %23 = extractelement <8 x i16> %4, i32 5
+; CHECK-NEXT: %24 = sext i16 %23 to i32
+; CHECK-NEXT: %25 = insertelement <4 x i32> %22, i32 %24, i32 1
+; CHECK-NEXT: %26 = extractelement <8 x i16> %4, i32 6
+; CHECK-NEXT: %27 = sext i16 %26 to i32
+; CHECK-NEXT: %28 = insertelement <4 x i32> %25, i32 %27, i32 2
+; CHECK-NEXT: %29 = extractelement <8 x i16> %4, i32 7
+; CHECK-NEXT: %30 = sext i16 %29 to i32
+; CHECK-NEXT: %31 = insertelement <4 x i32> %28, i32 %30, i32 3
+; CHECK-NEXT: %32 = extractelement <8 x i16> %5, i32 0
+; CHECK-NEXT: %33 = sext i16 %32 to i32
+; CHECK-NEXT: %34 = insertelement <4 x i32> undef, i32 %33, i32 0
+; CHECK-NEXT: %35 = extractelement <8 x i16> %5, i32 1
+; CHECK-NEXT: %36 = sext i16 %35 to i32
+; CHECK-NEXT: %37 = insertelement <4 x i32> %34, i32 %36, i32 1
+; CHECK-NEXT: %38 = extractelement <8 x i16> %5, i32 2
+; CHECK-NEXT: %39 = sext i16 %38 to i32
+; CHECK-NEXT: %40 = insertelement <4 x i32> %37, i32 %39, i32 2
+; CHECK-NEXT: %41 = extractelement <8 x i16> %5, i32 3
+; CHECK-NEXT: %42 = sext i16 %41 to i32
+; CHECK-NEXT: %43 = insertelement <4 x i32> %40, i32 %42, i32 3
+; CHECK-NEXT: %44 = extractelement <8 x i16> %5, i32 4
+; CHECK-NEXT: %45 = sext i16 %44 to i32
+; CHECK-NEXT: %46 = insertelement <4 x i32> undef, i32 %45, i32 0
+; CHECK-NEXT: %47 = extractelement <8 x i16> %5, i32 5
+; CHECK-NEXT: %48 = sext i16 %47 to i32
+; CHECK-NEXT: %49 = insertelement <4 x i32> %46, i32 %48, i32 1
+; CHECK-NEXT: %50 = extractelement <8 x i16> %5, i32 6
+; CHECK-NEXT: %51 = sext i16 %50 to i32
+; CHECK-NEXT: %52 = insertelement <4 x i32> %49, i32 %51, i32 2
+; CHECK-NEXT: %53 = extractelement <8 x i16> %5, i32 7
+; CHECK-NEXT: %54 = sext i16 %53 to i32
+; CHECK-NEXT: %55 = insertelement <4 x i32> %52, i32 %54, i32 3
+; CHECK-NEXT: %56 = extractelement <8 x i16> %6, i32 0
+; CHECK-NEXT: %57 = sext i16 %56 to i32
+; CHECK-NEXT: %58 = insertelement <4 x i32> undef, i32 %57, i32 0
+; CHECK-NEXT: %59 = extractelement <8 x i16> %6, i32 1
+; CHECK-NEXT: %60 = sext i16 %59 to i32
+; CHECK-NEXT: %61 = insertelement <4 x i32> %58, i32 %60, i32 1
+; CHECK-NEXT: %62 = extractelement <8 x i16> %6, i32 2
+; CHECK-NEXT: %63 = sext i16 %62 to i32
+; CHECK-NEXT: %64 = insertelement <4 x i32> %61, i32 %63, i32 2
+; CHECK-NEXT: %65 = extractelement <8 x i16> %6, i32 3
+; CHECK-NEXT: %66 = sext i16 %65 to i32
+; CHECK-NEXT: %67 = insertelement <4 x i32> %64, i32 %66, i32 3
+; CHECK-NEXT: store <4 x i32> %31, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %43, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %55, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %67, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %19
+
+define <20 x i64> @sext_cast_20xi16_to_20xi64(<20 x i16>) {
+ %2 = sext <20 x i16> %0 to <20 x i64>
+ ret <20 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_20xi16_to_20xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %13 = extractelement <8 x i16> %9, i32 0
+; CHECK-NEXT: %14 = sext i16 %13 to i64
+; CHECK-NEXT: %15 = insertelement <2 x i64> undef, i64 %14, i32 0
+; CHECK-NEXT: %16 = extractelement <8 x i16> %9, i32 1
+; CHECK-NEXT: %17 = sext i16 %16 to i64
+; CHECK-NEXT: %18 = insertelement <2 x i64> %15, i64 %17, i32 1
+; CHECK-NEXT: %19 = extractelement <8 x i16> %9, i32 2
+; CHECK-NEXT: %20 = sext i16 %19 to i64
+; CHECK-NEXT: %21 = insertelement <2 x i64> undef, i64 %20, i32 0
+; CHECK-NEXT: %22 = extractelement <8 x i16> %9, i32 3
+; CHECK-NEXT: %23 = sext i16 %22 to i64
+; CHECK-NEXT: %24 = insertelement <2 x i64> %21, i64 %23, i32 1
+; CHECK-NEXT: %25 = extractelement <8 x i16> %9, i32 4
+; CHECK-NEXT: %26 = sext i16 %25 to i64
+; CHECK-NEXT: %27 = insertelement <2 x i64> undef, i64 %26, i32 0
+; CHECK-NEXT: %28 = extractelement <8 x i16> %9, i32 5
+; CHECK-NEXT: %29 = sext i16 %28 to i64
+; CHECK-NEXT: %30 = insertelement <2 x i64> %27, i64 %29, i32 1
+; CHECK-NEXT: %31 = extractelement <8 x i16> %9, i32 6
+; CHECK-NEXT: %32 = sext i16 %31 to i64
+; CHECK-NEXT: %33 = insertelement <2 x i64> undef, i64 %32, i32 0
+; CHECK-NEXT: %34 = extractelement <8 x i16> %9, i32 7
+; CHECK-NEXT: %35 = sext i16 %34 to i64
+; CHECK-NEXT: %36 = insertelement <2 x i64> %33, i64 %35, i32 1
+; CHECK-NEXT: %37 = extractelement <8 x i16> %10, i32 0
+; CHECK-NEXT: %38 = sext i16 %37 to i64
+; CHECK-NEXT: %39 = insertelement <2 x i64> undef, i64 %38, i32 0
+; CHECK-NEXT: %40 = extractelement <8 x i16> %10, i32 1
+; CHECK-NEXT: %41 = sext i16 %40 to i64
+; CHECK-NEXT: %42 = insertelement <2 x i64> %39, i64 %41, i32 1
+; CHECK-NEXT: %43 = extractelement <8 x i16> %10, i32 2
+; CHECK-NEXT: %44 = sext i16 %43 to i64
+; CHECK-NEXT: %45 = insertelement <2 x i64> undef, i64 %44, i32 0
+; CHECK-NEXT: %46 = extractelement <8 x i16> %10, i32 3
+; CHECK-NEXT: %47 = sext i16 %46 to i64
+; CHECK-NEXT: %48 = insertelement <2 x i64> %45, i64 %47, i32 1
+; CHECK-NEXT: %49 = extractelement <8 x i16> %10, i32 4
+; CHECK-NEXT: %50 = sext i16 %49 to i64
+; CHECK-NEXT: %51 = insertelement <2 x i64> undef, i64 %50, i32 0
+; CHECK-NEXT: %52 = extractelement <8 x i16> %10, i32 5
+; CHECK-NEXT: %53 = sext i16 %52 to i64
+; CHECK-NEXT: %54 = insertelement <2 x i64> %51, i64 %53, i32 1
+; CHECK-NEXT: %55 = extractelement <8 x i16> %10, i32 6
+; CHECK-NEXT: %56 = sext i16 %55 to i64
+; CHECK-NEXT: %57 = insertelement <2 x i64> undef, i64 %56, i32 0
+; CHECK-NEXT: %58 = extractelement <8 x i16> %10, i32 7
+; CHECK-NEXT: %59 = sext i16 %58 to i64
+; CHECK-NEXT: %60 = insertelement <2 x i64> %57, i64 %59, i32 1
+; CHECK-NEXT: %61 = extractelement <8 x i16> %11, i32 0
+; CHECK-NEXT: %62 = sext i16 %61 to i64
+; CHECK-NEXT: %63 = insertelement <2 x i64> undef, i64 %62, i32 0
+; CHECK-NEXT: %64 = extractelement <8 x i16> %11, i32 1
+; CHECK-NEXT: %65 = sext i16 %64 to i64
+; CHECK-NEXT: %66 = insertelement <2 x i64> %63, i64 %65, i32 1
+; CHECK-NEXT: %67 = extractelement <8 x i16> %11, i32 2
+; CHECK-NEXT: %68 = sext i16 %67 to i64
+; CHECK-NEXT: %69 = insertelement <2 x i64> undef, i64 %68, i32 0
+; CHECK-NEXT: %70 = extractelement <8 x i16> %11, i32 3
+; CHECK-NEXT: %71 = sext i16 %70 to i64
+; CHECK-NEXT: %72 = insertelement <2 x i64> %69, i64 %71, i32 1
+; CHECK-NEXT: store <2 x i64> %24, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %30, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %36, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %42, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %48, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %54, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %60, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %66, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %72, <2 x i64>* %8, align 16
+; CHECK-NEXT: ret <2 x i64> %18
+
+define <20 x i64> @sext_cast_20xi32_to_20xi64(<20 x i32>) {
+ %2 = sext <20 x i32> %0 to <20 x i64>
+ ret <20 x i64> %2
+}
+; CHECK-LABEL: define <2 x i64> @sext_cast_20xi32_to_20xi64(<2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <2 x i64>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %15 = extractelement <4 x i32> %9, i32 0
+; CHECK-NEXT: %16 = sext i32 %15 to i64
+; CHECK-NEXT: %17 = insertelement <2 x i64> undef, i64 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <4 x i32> %9, i32 1
+; CHECK-NEXT: %19 = sext i32 %18 to i64
+; CHECK-NEXT: %20 = insertelement <2 x i64> %17, i64 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <4 x i32> %9, i32 2
+; CHECK-NEXT: %22 = sext i32 %21 to i64
+; CHECK-NEXT: %23 = insertelement <2 x i64> undef, i64 %22, i32 0
+; CHECK-NEXT: %24 = extractelement <4 x i32> %9, i32 3
+; CHECK-NEXT: %25 = sext i32 %24 to i64
+; CHECK-NEXT: %26 = insertelement <2 x i64> %23, i64 %25, i32 1
+; CHECK-NEXT: %27 = extractelement <4 x i32> %10, i32 0
+; CHECK-NEXT: %28 = sext i32 %27 to i64
+; CHECK-NEXT: %29 = insertelement <2 x i64> undef, i64 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <4 x i32> %10, i32 1
+; CHECK-NEXT: %31 = sext i32 %30 to i64
+; CHECK-NEXT: %32 = insertelement <2 x i64> %29, i64 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <4 x i32> %10, i32 2
+; CHECK-NEXT: %34 = sext i32 %33 to i64
+; CHECK-NEXT: %35 = insertelement <2 x i64> undef, i64 %34, i32 0
+; CHECK-NEXT: %36 = extractelement <4 x i32> %10, i32 3
+; CHECK-NEXT: %37 = sext i32 %36 to i64
+; CHECK-NEXT: %38 = insertelement <2 x i64> %35, i64 %37, i32 1
+; CHECK-NEXT: %39 = extractelement <4 x i32> %11, i32 0
+; CHECK-NEXT: %40 = sext i32 %39 to i64
+; CHECK-NEXT: %41 = insertelement <2 x i64> undef, i64 %40, i32 0
+; CHECK-NEXT: %42 = extractelement <4 x i32> %11, i32 1
+; CHECK-NEXT: %43 = sext i32 %42 to i64
+; CHECK-NEXT: %44 = insertelement <2 x i64> %41, i64 %43, i32 1
+; CHECK-NEXT: %45 = extractelement <4 x i32> %11, i32 2
+; CHECK-NEXT: %46 = sext i32 %45 to i64
+; CHECK-NEXT: %47 = insertelement <2 x i64> undef, i64 %46, i32 0
+; CHECK-NEXT: %48 = extractelement <4 x i32> %11, i32 3
+; CHECK-NEXT: %49 = sext i32 %48 to i64
+; CHECK-NEXT: %50 = insertelement <2 x i64> %47, i64 %49, i32 1
+; CHECK-NEXT: %51 = extractelement <4 x i32> %12, i32 0
+; CHECK-NEXT: %52 = sext i32 %51 to i64
+; CHECK-NEXT: %53 = insertelement <2 x i64> undef, i64 %52, i32 0
+; CHECK-NEXT: %54 = extractelement <4 x i32> %12, i32 1
+; CHECK-NEXT: %55 = sext i32 %54 to i64
+; CHECK-NEXT: %56 = insertelement <2 x i64> %53, i64 %55, i32 1
+; CHECK-NEXT: %57 = extractelement <4 x i32> %12, i32 2
+; CHECK-NEXT: %58 = sext i32 %57 to i64
+; CHECK-NEXT: %59 = insertelement <2 x i64> undef, i64 %58, i32 0
+; CHECK-NEXT: %60 = extractelement <4 x i32> %12, i32 3
+; CHECK-NEXT: %61 = sext i32 %60 to i64
+; CHECK-NEXT: %62 = insertelement <2 x i64> %59, i64 %61, i32 1
+; CHECK-NEXT: %63 = extractelement <4 x i32> %13, i32 0
+; CHECK-NEXT: %64 = sext i32 %63 to i64
+; CHECK-NEXT: %65 = insertelement <2 x i64> undef, i64 %64, i32 0
+; CHECK-NEXT: %66 = extractelement <4 x i32> %13, i32 1
+; CHECK-NEXT: %67 = sext i32 %66 to i64
+; CHECK-NEXT: %68 = insertelement <2 x i64> %65, i64 %67, i32 1
+; CHECK-NEXT: %69 = extractelement <4 x i32> %13, i32 2
+; CHECK-NEXT: %70 = sext i32 %69 to i64
+; CHECK-NEXT: %71 = insertelement <2 x i64> undef, i64 %70, i32 0
+; CHECK-NEXT: %72 = extractelement <4 x i32> %13, i32 3
+; CHECK-NEXT: %73 = sext i32 %72 to i64
+; CHECK-NEXT: %74 = insertelement <2 x i64> %71, i64 %73, i32 1
+; CHECK-NEXT: store <2 x i64> %26, <2 x i64>* %0, align 16
+; CHECK-NEXT: store <2 x i64> %32, <2 x i64>* %1, align 16
+; CHECK-NEXT: store <2 x i64> %38, <2 x i64>* %2, align 16
+; CHECK-NEXT: store <2 x i64> %44, <2 x i64>* %3, align 16
+; CHECK-NEXT: store <2 x i64> %50, <2 x i64>* %4, align 16
+; CHECK-NEXT: store <2 x i64> %56, <2 x i64>* %5, align 16
+; CHECK-NEXT: store <2 x i64> %62, <2 x i64>* %6, align 16
+; CHECK-NEXT: store <2 x i64> %68, <2 x i64>* %7, align 16
+; CHECK-NEXT: store <2 x i64> %74, <2 x i64>* %8, align 16
+; CHECK-NEXT: ret <2 x i64> %20
+
+define <20 x i32> @trunc_cast_20xi64_to_20xi32(<20 x i64>) {
+ %2 = trunc <20 x i64> %0 to <20 x i32>
+ ret <20 x i32> %2
+}
+; CHECK-LABEL: define <4 x i32> @trunc_cast_20xi64_to_20xi32(<4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <4 x i32>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %15 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %16 = trunc i64 %15 to i32
+; CHECK-NEXT: %17 = insertelement <4 x i32> undef, i32 %16, i32 0
+; CHECK-NEXT: %18 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %19 = trunc i64 %18 to i32
+; CHECK-NEXT: %20 = insertelement <4 x i32> %17, i32 %19, i32 1
+; CHECK-NEXT: %21 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %22 = trunc i64 %21 to i32
+; CHECK-NEXT: %23 = insertelement <4 x i32> %20, i32 %22, i32 2
+; CHECK-NEXT: %24 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %25 = trunc i64 %24 to i32
+; CHECK-NEXT: %26 = insertelement <4 x i32> %23, i32 %25, i32 3
+; CHECK-NEXT: %27 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %28 = trunc i64 %27 to i32
+; CHECK-NEXT: %29 = insertelement <4 x i32> undef, i32 %28, i32 0
+; CHECK-NEXT: %30 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %31 = trunc i64 %30 to i32
+; CHECK-NEXT: %32 = insertelement <4 x i32> %29, i32 %31, i32 1
+; CHECK-NEXT: %33 = extractelement <2 x i64> %7, i32 0
+; CHECK-NEXT: %34 = trunc i64 %33 to i32
+; CHECK-NEXT: %35 = insertelement <4 x i32> %32, i32 %34, i32 2
+; CHECK-NEXT: %36 = extractelement <2 x i64> %7, i32 1
+; CHECK-NEXT: %37 = trunc i64 %36 to i32
+; CHECK-NEXT: %38 = insertelement <4 x i32> %35, i32 %37, i32 3
+; CHECK-NEXT: %39 = extractelement <2 x i64> %8, i32 0
+; CHECK-NEXT: %40 = trunc i64 %39 to i32
+; CHECK-NEXT: %41 = insertelement <4 x i32> undef, i32 %40, i32 0
+; CHECK-NEXT: %42 = extractelement <2 x i64> %8, i32 1
+; CHECK-NEXT: %43 = trunc i64 %42 to i32
+; CHECK-NEXT: %44 = insertelement <4 x i32> %41, i32 %43, i32 1
+; CHECK-NEXT: %45 = extractelement <2 x i64> %9, i32 0
+; CHECK-NEXT: %46 = trunc i64 %45 to i32
+; CHECK-NEXT: %47 = insertelement <4 x i32> %44, i32 %46, i32 2
+; CHECK-NEXT: %48 = extractelement <2 x i64> %9, i32 1
+; CHECK-NEXT: %49 = trunc i64 %48 to i32
+; CHECK-NEXT: %50 = insertelement <4 x i32> %47, i32 %49, i32 3
+; CHECK-NEXT: %51 = extractelement <2 x i64> %10, i32 0
+; CHECK-NEXT: %52 = trunc i64 %51 to i32
+; CHECK-NEXT: %53 = insertelement <4 x i32> undef, i32 %52, i32 0
+; CHECK-NEXT: %54 = extractelement <2 x i64> %10, i32 1
+; CHECK-NEXT: %55 = trunc i64 %54 to i32
+; CHECK-NEXT: %56 = insertelement <4 x i32> %53, i32 %55, i32 1
+; CHECK-NEXT: %57 = extractelement <2 x i64> %11, i32 0
+; CHECK-NEXT: %58 = trunc i64 %57 to i32
+; CHECK-NEXT: %59 = insertelement <4 x i32> %56, i32 %58, i32 2
+; CHECK-NEXT: %60 = extractelement <2 x i64> %11, i32 1
+; CHECK-NEXT: %61 = trunc i64 %60 to i32
+; CHECK-NEXT: %62 = insertelement <4 x i32> %59, i32 %61, i32 3
+; CHECK-NEXT: %63 = extractelement <2 x i64> %12, i32 0
+; CHECK-NEXT: %64 = trunc i64 %63 to i32
+; CHECK-NEXT: %65 = insertelement <4 x i32> undef, i32 %64, i32 0
+; CHECK-NEXT: %66 = extractelement <2 x i64> %12, i32 1
+; CHECK-NEXT: %67 = trunc i64 %66 to i32
+; CHECK-NEXT: %68 = insertelement <4 x i32> %65, i32 %67, i32 1
+; CHECK-NEXT: %69 = extractelement <2 x i64> %13, i32 0
+; CHECK-NEXT: %70 = trunc i64 %69 to i32
+; CHECK-NEXT: %71 = insertelement <4 x i32> %68, i32 %70, i32 2
+; CHECK-NEXT: %72 = extractelement <2 x i64> %13, i32 1
+; CHECK-NEXT: %73 = trunc i64 %72 to i32
+; CHECK-NEXT: %74 = insertelement <4 x i32> %71, i32 %73, i32 3
+; CHECK-NEXT: store <4 x i32> %38, <4 x i32>* %0, align 16
+; CHECK-NEXT: store <4 x i32> %50, <4 x i32>* %1, align 16
+; CHECK-NEXT: store <4 x i32> %62, <4 x i32>* %2, align 16
+; CHECK-NEXT: store <4 x i32> %74, <4 x i32>* %3, align 16
+; CHECK-NEXT: ret <4 x i32> %26
+
+define <20 x i16> @trunc_cast_20xi64_to_20xi16(<20 x i64>) {
+ %2 = trunc <20 x i64> %0 to <20 x i16>
+ ret <20 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_20xi64_to_20xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <8 x i16>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %13 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %14 = trunc i64 %13 to i16
+; CHECK-NEXT: %15 = insertelement <8 x i16> undef, i16 %14, i32 0
+; CHECK-NEXT: %16 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %17 = trunc i64 %16 to i16
+; CHECK-NEXT: %18 = insertelement <8 x i16> %15, i16 %17, i32 1
+; CHECK-NEXT: %19 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %20 = trunc i64 %19 to i16
+; CHECK-NEXT: %21 = insertelement <8 x i16> %18, i16 %20, i32 2
+; CHECK-NEXT: %22 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %23 = trunc i64 %22 to i16
+; CHECK-NEXT: %24 = insertelement <8 x i16> %21, i16 %23, i32 3
+; CHECK-NEXT: %25 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %26 = trunc i64 %25 to i16
+; CHECK-NEXT: %27 = insertelement <8 x i16> %24, i16 %26, i32 4
+; CHECK-NEXT: %28 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %29 = trunc i64 %28 to i16
+; CHECK-NEXT: %30 = insertelement <8 x i16> %27, i16 %29, i32 5
+; CHECK-NEXT: %31 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %32 = trunc i64 %31 to i16
+; CHECK-NEXT: %33 = insertelement <8 x i16> %30, i16 %32, i32 6
+; CHECK-NEXT: %34 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %35 = trunc i64 %34 to i16
+; CHECK-NEXT: %36 = insertelement <8 x i16> %33, i16 %35, i32 7
+; CHECK-NEXT: %37 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %38 = trunc i64 %37 to i16
+; CHECK-NEXT: %39 = insertelement <8 x i16> undef, i16 %38, i32 0
+; CHECK-NEXT: %40 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %41 = trunc i64 %40 to i16
+; CHECK-NEXT: %42 = insertelement <8 x i16> %39, i16 %41, i32 1
+; CHECK-NEXT: %43 = extractelement <2 x i64> %7, i32 0
+; CHECK-NEXT: %44 = trunc i64 %43 to i16
+; CHECK-NEXT: %45 = insertelement <8 x i16> %42, i16 %44, i32 2
+; CHECK-NEXT: %46 = extractelement <2 x i64> %7, i32 1
+; CHECK-NEXT: %47 = trunc i64 %46 to i16
+; CHECK-NEXT: %48 = insertelement <8 x i16> %45, i16 %47, i32 3
+; CHECK-NEXT: %49 = extractelement <2 x i64> %8, i32 0
+; CHECK-NEXT: %50 = trunc i64 %49 to i16
+; CHECK-NEXT: %51 = insertelement <8 x i16> %48, i16 %50, i32 4
+; CHECK-NEXT: %52 = extractelement <2 x i64> %8, i32 1
+; CHECK-NEXT: %53 = trunc i64 %52 to i16
+; CHECK-NEXT: %54 = insertelement <8 x i16> %51, i16 %53, i32 5
+; CHECK-NEXT: %55 = extractelement <2 x i64> %9, i32 0
+; CHECK-NEXT: %56 = trunc i64 %55 to i16
+; CHECK-NEXT: %57 = insertelement <8 x i16> %54, i16 %56, i32 6
+; CHECK-NEXT: %58 = extractelement <2 x i64> %9, i32 1
+; CHECK-NEXT: %59 = trunc i64 %58 to i16
+; CHECK-NEXT: %60 = insertelement <8 x i16> %57, i16 %59, i32 7
+; CHECK-NEXT: %61 = extractelement <2 x i64> %10, i32 0
+; CHECK-NEXT: %62 = trunc i64 %61 to i16
+; CHECK-NEXT: %63 = insertelement <8 x i16> undef, i16 %62, i32 0
+; CHECK-NEXT: %64 = extractelement <2 x i64> %10, i32 1
+; CHECK-NEXT: %65 = trunc i64 %64 to i16
+; CHECK-NEXT: %66 = insertelement <8 x i16> %63, i16 %65, i32 1
+; CHECK-NEXT: %67 = extractelement <2 x i64> %11, i32 0
+; CHECK-NEXT: %68 = trunc i64 %67 to i16
+; CHECK-NEXT: %69 = insertelement <8 x i16> %66, i16 %68, i32 2
+; CHECK-NEXT: %70 = extractelement <2 x i64> %11, i32 1
+; CHECK-NEXT: %71 = trunc i64 %70 to i16
+; CHECK-NEXT: %72 = insertelement <8 x i16> %69, i16 %71, i32 3
+; CHECK-NEXT: store <8 x i16> %60, <8 x i16>* %0, align 16
+; CHECK-NEXT: store <8 x i16> %72, <8 x i16>* %1, align 16
+; CHECK-NEXT: ret <8 x i16> %36
+
+define <20 x i8> @trunc_cast_20xi64_to_20xi8(<20 x i64>) {
+ %2 = trunc <20 x i64> %0 to <20 x i8>
+ ret <20 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_20xi64_to_20xi8(<16 x i8>* nocapture nonnull dereferenceable(16), <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+; CHECK-NEXT: %12 = extractelement <2 x i64> %1, i32 0
+; CHECK-NEXT: %13 = trunc i64 %12 to i8
+; CHECK-NEXT: %14 = insertelement <16 x i8> undef, i8 %13, i32 0
+; CHECK-NEXT: %15 = extractelement <2 x i64> %1, i32 1
+; CHECK-NEXT: %16 = trunc i64 %15 to i8
+; CHECK-NEXT: %17 = insertelement <16 x i8> %14, i8 %16, i32 1
+; CHECK-NEXT: %18 = extractelement <2 x i64> %2, i32 0
+; CHECK-NEXT: %19 = trunc i64 %18 to i8
+; CHECK-NEXT: %20 = insertelement <16 x i8> %17, i8 %19, i32 2
+; CHECK-NEXT: %21 = extractelement <2 x i64> %2, i32 1
+; CHECK-NEXT: %22 = trunc i64 %21 to i8
+; CHECK-NEXT: %23 = insertelement <16 x i8> %20, i8 %22, i32 3
+; CHECK-NEXT: %24 = extractelement <2 x i64> %3, i32 0
+; CHECK-NEXT: %25 = trunc i64 %24 to i8
+; CHECK-NEXT: %26 = insertelement <16 x i8> %23, i8 %25, i32 4
+; CHECK-NEXT: %27 = extractelement <2 x i64> %3, i32 1
+; CHECK-NEXT: %28 = trunc i64 %27 to i8
+; CHECK-NEXT: %29 = insertelement <16 x i8> %26, i8 %28, i32 5
+; CHECK-NEXT: %30 = extractelement <2 x i64> %4, i32 0
+; CHECK-NEXT: %31 = trunc i64 %30 to i8
+; CHECK-NEXT: %32 = insertelement <16 x i8> %29, i8 %31, i32 6
+; CHECK-NEXT: %33 = extractelement <2 x i64> %4, i32 1
+; CHECK-NEXT: %34 = trunc i64 %33 to i8
+; CHECK-NEXT: %35 = insertelement <16 x i8> %32, i8 %34, i32 7
+; CHECK-NEXT: %36 = extractelement <2 x i64> %5, i32 0
+; CHECK-NEXT: %37 = trunc i64 %36 to i8
+; CHECK-NEXT: %38 = insertelement <16 x i8> %35, i8 %37, i32 8
+; CHECK-NEXT: %39 = extractelement <2 x i64> %5, i32 1
+; CHECK-NEXT: %40 = trunc i64 %39 to i8
+; CHECK-NEXT: %41 = insertelement <16 x i8> %38, i8 %40, i32 9
+; CHECK-NEXT: %42 = extractelement <2 x i64> %6, i32 0
+; CHECK-NEXT: %43 = trunc i64 %42 to i8
+; CHECK-NEXT: %44 = insertelement <16 x i8> %41, i8 %43, i32 10
+; CHECK-NEXT: %45 = extractelement <2 x i64> %6, i32 1
+; CHECK-NEXT: %46 = trunc i64 %45 to i8
+; CHECK-NEXT: %47 = insertelement <16 x i8> %44, i8 %46, i32 11
+; CHECK-NEXT: %48 = extractelement <2 x i64> %7, i32 0
+; CHECK-NEXT: %49 = trunc i64 %48 to i8
+; CHECK-NEXT: %50 = insertelement <16 x i8> %47, i8 %49, i32 12
+; CHECK-NEXT: %51 = extractelement <2 x i64> %7, i32 1
+; CHECK-NEXT: %52 = trunc i64 %51 to i8
+; CHECK-NEXT: %53 = insertelement <16 x i8> %50, i8 %52, i32 13
+; CHECK-NEXT: %54 = extractelement <2 x i64> %8, i32 0
+; CHECK-NEXT: %55 = trunc i64 %54 to i8
+; CHECK-NEXT: %56 = insertelement <16 x i8> %53, i8 %55, i32 14
+; CHECK-NEXT: %57 = extractelement <2 x i64> %8, i32 1
+; CHECK-NEXT: %58 = trunc i64 %57 to i8
+; CHECK-NEXT: %59 = insertelement <16 x i8> %56, i8 %58, i32 15
+; CHECK-NEXT: %60 = extractelement <2 x i64> %9, i32 0
+; CHECK-NEXT: %61 = trunc i64 %60 to i8
+; CHECK-NEXT: %62 = insertelement <16 x i8> undef, i8 %61, i32 0
+; CHECK-NEXT: %63 = extractelement <2 x i64> %9, i32 1
+; CHECK-NEXT: %64 = trunc i64 %63 to i8
+; CHECK-NEXT: %65 = insertelement <16 x i8> %62, i8 %64, i32 1
+; CHECK-NEXT: %66 = extractelement <2 x i64> %10, i32 0
+; CHECK-NEXT: %67 = trunc i64 %66 to i8
+; CHECK-NEXT: %68 = insertelement <16 x i8> %65, i8 %67, i32 2
+; CHECK-NEXT: %69 = extractelement <2 x i64> %10, i32 1
+; CHECK-NEXT: %70 = trunc i64 %69 to i8
+; CHECK-NEXT: %71 = insertelement <16 x i8> %68, i8 %70, i32 3
+; CHECK-NEXT: store <16 x i8> %71, <16 x i8>* %0, align 16
+; CHECK-NEXT: ret <16 x i8> %59
+
+define <20 x i16> @trunc_cast_20xi32_to_20xi16(<20 x i32>) {
+ %2 = trunc <20 x i32> %0 to <20 x i16>
+ ret <20 x i16> %2
+}
+; CHECK-LABEL: define <8 x i16> @trunc_cast_20xi32_to_20xi16(<8 x i16>* nocapture nonnull dereferenceable(16), <8 x i16>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %8 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: %9 = trunc i32 %8 to i16
+; CHECK-NEXT: %10 = insertelement <8 x i16> undef, i16 %9, i32 0
+; CHECK-NEXT: %11 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: %12 = trunc i32 %11 to i16
+; CHECK-NEXT: %13 = insertelement <8 x i16> %10, i16 %12, i32 1
+; CHECK-NEXT: %14 = extractelement <4 x i32> %2, i32 2
+; CHECK-NEXT: %15 = trunc i32 %14 to i16
+; CHECK-NEXT: %16 = insertelement <8 x i16> %13, i16 %15, i32 2
+; CHECK-NEXT: %17 = extractelement <4 x i32> %2, i32 3
+; CHECK-NEXT: %18 = trunc i32 %17 to i16
+; CHECK-NEXT: %19 = insertelement <8 x i16> %16, i16 %18, i32 3
+; CHECK-NEXT: %20 = extractelement <4 x i32> %3, i32 0
+; CHECK-NEXT: %21 = trunc i32 %20 to i16
+; CHECK-NEXT: %22 = insertelement <8 x i16> %19, i16 %21, i32 4
+; CHECK-NEXT: %23 = extractelement <4 x i32> %3, i32 1
+; CHECK-NEXT: %24 = trunc i32 %23 to i16
+; CHECK-NEXT: %25 = insertelement <8 x i16> %22, i16 %24, i32 5
+; CHECK-NEXT: %26 = extractelement <4 x i32> %3, i32 2
+; CHECK-NEXT: %27 = trunc i32 %26 to i16
+; CHECK-NEXT: %28 = insertelement <8 x i16> %25, i16 %27, i32 6
+; CHECK-NEXT: %29 = extractelement <4 x i32> %3, i32 3
+; CHECK-NEXT: %30 = trunc i32 %29 to i16
+; CHECK-NEXT: %31 = insertelement <8 x i16> %28, i16 %30, i32 7
+; CHECK-NEXT: %32 = extractelement <4 x i32> %4, i32 0
+; CHECK-NEXT: %33 = trunc i32 %32 to i16
+; CHECK-NEXT: %34 = insertelement <8 x i16> undef, i16 %33, i32 0
+; CHECK-NEXT: %35 = extractelement <4 x i32> %4, i32 1
+; CHECK-NEXT: %36 = trunc i32 %35 to i16
+; CHECK-NEXT: %37 = insertelement <8 x i16> %34, i16 %36, i32 1
+; CHECK-NEXT: %38 = extractelement <4 x i32> %4, i32 2
+; CHECK-NEXT: %39 = trunc i32 %38 to i16
+; CHECK-NEXT: %40 = insertelement <8 x i16> %37, i16 %39, i32 2
+; CHECK-NEXT: %41 = extractelement <4 x i32> %4, i32 3
+; CHECK-NEXT: %42 = trunc i32 %41 to i16
+; CHECK-NEXT: %43 = insertelement <8 x i16> %40, i16 %42, i32 3
+; CHECK-NEXT: %44 = extractelement <4 x i32> %5, i32 0
+; CHECK-NEXT: %45 = trunc i32 %44 to i16
+; CHECK-NEXT: %46 = insertelement <8 x i16> %43, i16 %45, i32 4
+; CHECK-NEXT: %47 = extractelement <4 x i32> %5, i32 1
+; CHECK-NEXT: %48 = trunc i32 %47 to i16
+; CHECK-NEXT: %49 = insertelement <8 x i16> %46, i16 %48, i32 5
+; CHECK-NEXT: %50 = extractelement <4 x i32> %5, i32 2
+; CHECK-NEXT: %51 = trunc i32 %50 to i16
+; CHECK-NEXT: %52 = insertelement <8 x i16> %49, i16 %51, i32 6
+; CHECK-NEXT: %53 = extractelement <4 x i32> %5, i32 3
+; CHECK-NEXT: %54 = trunc i32 %53 to i16
+; CHECK-NEXT: %55 = insertelement <8 x i16> %52, i16 %54, i32 7
+; CHECK-NEXT: %56 = extractelement <4 x i32> %6, i32 0
+; CHECK-NEXT: %57 = trunc i32 %56 to i16
+; CHECK-NEXT: %58 = insertelement <8 x i16> undef, i16 %57, i32 0
+; CHECK-NEXT: %59 = extractelement <4 x i32> %6, i32 1
+; CHECK-NEXT: %60 = trunc i32 %59 to i16
+; CHECK-NEXT: %61 = insertelement <8 x i16> %58, i16 %60, i32 1
+; CHECK-NEXT: %62 = extractelement <4 x i32> %6, i32 2
+; CHECK-NEXT: %63 = trunc i32 %62 to i16
+; CHECK-NEXT: %64 = insertelement <8 x i16> %61, i16 %63, i32 2
+; CHECK-NEXT: %65 = extractelement <4 x i32> %6, i32 3
+; CHECK-NEXT: %66 = trunc i32 %65 to i16
+; CHECK-NEXT: %67 = insertelement <8 x i16> %64, i16 %66, i32 3
+; CHECK-NEXT: store <8 x i16> %55, <8 x i16>* %0, align 16
+; CHECK-NEXT: store <8 x i16> %67, <8 x i16>* %1, align 16
+; CHECK-NEXT: ret <8 x i16> %31
+
+define <20 x i8> @trunc_cast_20xi32_to_20xi8(<20 x i32>) {
+ %2 = trunc <20 x i32> %0 to <20 x i8>
+ ret <20 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_20xi32_to_20xi8(<16 x i8>* nocapture nonnull dereferenceable(16), <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)
+; CHECK-NEXT: %7 = extractelement <4 x i32> %1, i32 0
+; CHECK-NEXT: %8 = trunc i32 %7 to i8
+; CHECK-NEXT: %9 = insertelement <16 x i8> undef, i8 %8, i32 0
+; CHECK-NEXT: %10 = extractelement <4 x i32> %1, i32 1
+; CHECK-NEXT: %11 = trunc i32 %10 to i8
+; CHECK-NEXT: %12 = insertelement <16 x i8> %9, i8 %11, i32 1
+; CHECK-NEXT: %13 = extractelement <4 x i32> %1, i32 2
+; CHECK-NEXT: %14 = trunc i32 %13 to i8
+; CHECK-NEXT: %15 = insertelement <16 x i8> %12, i8 %14, i32 2
+; CHECK-NEXT: %16 = extractelement <4 x i32> %1, i32 3
+; CHECK-NEXT: %17 = trunc i32 %16 to i8
+; CHECK-NEXT: %18 = insertelement <16 x i8> %15, i8 %17, i32 3
+; CHECK-NEXT: %19 = extractelement <4 x i32> %2, i32 0
+; CHECK-NEXT: %20 = trunc i32 %19 to i8
+; CHECK-NEXT: %21 = insertelement <16 x i8> %18, i8 %20, i32 4
+; CHECK-NEXT: %22 = extractelement <4 x i32> %2, i32 1
+; CHECK-NEXT: %23 = trunc i32 %22 to i8
+; CHECK-NEXT: %24 = insertelement <16 x i8> %21, i8 %23, i32 5
+; CHECK-NEXT: %25 = extractelement <4 x i32> %2, i32 2
+; CHECK-NEXT: %26 = trunc i32 %25 to i8
+; CHECK-NEXT: %27 = insertelement <16 x i8> %24, i8 %26, i32 6
+; CHECK-NEXT: %28 = extractelement <4 x i32> %2, i32 3
+; CHECK-NEXT: %29 = trunc i32 %28 to i8
+; CHECK-NEXT: %30 = insertelement <16 x i8> %27, i8 %29, i32 7
+; CHECK-NEXT: %31 = extractelement <4 x i32> %3, i32 0
+; CHECK-NEXT: %32 = trunc i32 %31 to i8
+; CHECK-NEXT: %33 = insertelement <16 x i8> %30, i8 %32, i32 8
+; CHECK-NEXT: %34 = extractelement <4 x i32> %3, i32 1
+; CHECK-NEXT: %35 = trunc i32 %34 to i8
+; CHECK-NEXT: %36 = insertelement <16 x i8> %33, i8 %35, i32 9
+; CHECK-NEXT: %37 = extractelement <4 x i32> %3, i32 2
+; CHECK-NEXT: %38 = trunc i32 %37 to i8
+; CHECK-NEXT: %39 = insertelement <16 x i8> %36, i8 %38, i32 10
+; CHECK-NEXT: %40 = extractelement <4 x i32> %3, i32 3
+; CHECK-NEXT: %41 = trunc i32 %40 to i8
+; CHECK-NEXT: %42 = insertelement <16 x i8> %39, i8 %41, i32 11
+; CHECK-NEXT: %43 = extractelement <4 x i32> %4, i32 0
+; CHECK-NEXT: %44 = trunc i32 %43 to i8
+; CHECK-NEXT: %45 = insertelement <16 x i8> %42, i8 %44, i32 12
+; CHECK-NEXT: %46 = extractelement <4 x i32> %4, i32 1
+; CHECK-NEXT: %47 = trunc i32 %46 to i8
+; CHECK-NEXT: %48 = insertelement <16 x i8> %45, i8 %47, i32 13
+; CHECK-NEXT: %49 = extractelement <4 x i32> %4, i32 2
+; CHECK-NEXT: %50 = trunc i32 %49 to i8
+; CHECK-NEXT: %51 = insertelement <16 x i8> %48, i8 %50, i32 14
+; CHECK-NEXT: %52 = extractelement <4 x i32> %4, i32 3
+; CHECK-NEXT: %53 = trunc i32 %52 to i8
+; CHECK-NEXT: %54 = insertelement <16 x i8> %51, i8 %53, i32 15
+; CHECK-NEXT: %55 = extractelement <4 x i32> %5, i32 0
+; CHECK-NEXT: %56 = trunc i32 %55 to i8
+; CHECK-NEXT: %57 = insertelement <16 x i8> undef, i8 %56, i32 0
+; CHECK-NEXT: %58 = extractelement <4 x i32> %5, i32 1
+; CHECK-NEXT: %59 = trunc i32 %58 to i8
+; CHECK-NEXT: %60 = insertelement <16 x i8> %57, i8 %59, i32 1
+; CHECK-NEXT: %61 = extractelement <4 x i32> %5, i32 2
+; CHECK-NEXT: %62 = trunc i32 %61 to i8
+; CHECK-NEXT: %63 = insertelement <16 x i8> %60, i8 %62, i32 2
+; CHECK-NEXT: %64 = extractelement <4 x i32> %5, i32 3
+; CHECK-NEXT: %65 = trunc i32 %64 to i8
+; CHECK-NEXT: %66 = insertelement <16 x i8> %63, i8 %65, i32 3
+; CHECK-NEXT: store <16 x i8> %66, <16 x i8>* %0, align 16
+; CHECK-NEXT: ret <16 x i8> %54
+
+define <20 x i8> @trunc_cast_20xi16_to_20xi8(<20 x i16>) {
+ %2 = trunc <20 x i16> %0 to <20 x i8>
+ ret <20 x i8> %2
+}
+; CHECK-LABEL: define <16 x i8> @trunc_cast_20xi16_to_20xi8(<16 x i8>* nocapture nonnull dereferenceable(16), <8 x i16>, <8 x i16>, <8 x i16>)
+; CHECK-NEXT: %5 = extractelement <8 x i16> %1, i32 0
+; CHECK-NEXT: %6 = trunc i16 %5 to i8
+; CHECK-NEXT: %7 = insertelement <16 x i8> undef, i8 %6, i32 0
+; CHECK-NEXT: %8 = extractelement <8 x i16> %1, i32 1
+; CHECK-NEXT: %9 = trunc i16 %8 to i8
+; CHECK-NEXT: %10 = insertelement <16 x i8> %7, i8 %9, i32 1
+; CHECK-NEXT: %11 = extractelement <8 x i16> %1, i32 2
+; CHECK-NEXT: %12 = trunc i16 %11 to i8
+; CHECK-NEXT: %13 = insertelement <16 x i8> %10, i8 %12, i32 2
+; CHECK-NEXT: %14 = extractelement <8 x i16> %1, i32 3
+; CHECK-NEXT: %15 = trunc i16 %14 to i8
+; CHECK-NEXT: %16 = insertelement <16 x i8> %13, i8 %15, i32 3
+; CHECK-NEXT: %17 = extractelement <8 x i16> %1, i32 4
+; CHECK-NEXT: %18 = trunc i16 %17 to i8
+; CHECK-NEXT: %19 = insertelement <16 x i8> %16, i8 %18, i32 4
+; CHECK-NEXT: %20 = extractelement <8 x i16> %1, i32 5
+; CHECK-NEXT: %21 = trunc i16 %20 to i8
+; CHECK-NEXT: %22 = insertelement <16 x i8> %19, i8 %21, i32 5
+; CHECK-NEXT: %23 = extractelement <8 x i16> %1, i32 6
+; CHECK-NEXT: %24 = trunc i16 %23 to i8
+; CHECK-NEXT: %25 = insertelement <16 x i8> %22, i8 %24, i32 6
+; CHECK-NEXT: %26 = extractelement <8 x i16> %1, i32 7
+; CHECK-NEXT: %27 = trunc i16 %26 to i8
+; CHECK-NEXT: %28 = insertelement <16 x i8> %25, i8 %27, i32 7
+; CHECK-NEXT: %29 = extractelement <8 x i16> %2, i32 0
+; CHECK-NEXT: %30 = trunc i16 %29 to i8
+; CHECK-NEXT: %31 = insertelement <16 x i8> %28, i8 %30, i32 8
+; CHECK-NEXT: %32 = extractelement <8 x i16> %2, i32 1
+; CHECK-NEXT: %33 = trunc i16 %32 to i8
+; CHECK-NEXT: %34 = insertelement <16 x i8> %31, i8 %33, i32 9
+; CHECK-NEXT: %35 = extractelement <8 x i16> %2, i32 2
+; CHECK-NEXT: %36 = trunc i16 %35 to i8
+; CHECK-NEXT: %37 = insertelement <16 x i8> %34, i8 %36, i32 10
+; CHECK-NEXT: %38 = extractelement <8 x i16> %2, i32 3
+; CHECK-NEXT: %39 = trunc i16 %38 to i8
+; CHECK-NEXT: %40 = insertelement <16 x i8> %37, i8 %39, i32 11
+; CHECK-NEXT: %41 = extractelement <8 x i16> %2, i32 4
+; CHECK-NEXT: %42 = trunc i16 %41 to i8
+; CHECK-NEXT: %43 = insertelement <16 x i8> %40, i8 %42, i32 12
+; CHECK-NEXT: %44 = extractelement <8 x i16> %2, i32 5
+; CHECK-NEXT: %45 = trunc i16 %44 to i8
+; CHECK-NEXT: %46 = insertelement <16 x i8> %43, i8 %45, i32 13
+; CHECK-NEXT: %47 = extractelement <8 x i16> %2, i32 6
+; CHECK-NEXT: %48 = trunc i16 %47 to i8
+; CHECK-NEXT: %49 = insertelement <16 x i8> %46, i8 %48, i32 14
+; CHECK-NEXT: %50 = extractelement <8 x i16> %2, i32 7
+; CHECK-NEXT: %51 = trunc i16 %50 to i8
+; CHECK-NEXT: %52 = insertelement <16 x i8> %49, i8 %51, i32 15
+; CHECK-NEXT: %53 = extractelement <8 x i16> %3, i32 0
+; CHECK-NEXT: %54 = trunc i16 %53 to i8
+; CHECK-NEXT: %55 = insertelement <16 x i8> undef, i8 %54, i32 0
+; CHECK-NEXT: %56 = extractelement <8 x i16> %3, i32 1
+; CHECK-NEXT: %57 = trunc i16 %56 to i8
+; CHECK-NEXT: %58 = insertelement <16 x i8> %55, i8 %57, i32 1
+; CHECK-NEXT: %59 = extractelement <8 x i16> %3, i32 2
+; CHECK-NEXT: %60 = trunc i16 %59 to i8
+; CHECK-NEXT: %61 = insertelement <16 x i8> %58, i8 %60, i32 2
+; CHECK-NEXT: %62 = extractelement <8 x i16> %3, i32 3
+; CHECK-NEXT: %63 = trunc i16 %62 to i8
+; CHECK-NEXT: %64 = insertelement <16 x i8> %61, i8 %63, i32 3
+; CHECK-NEXT: store <16 x i8> %64, <16 x i8>* %0, align 16
+; CHECK-NEXT: ret <16 x i8> %52
+
« no previous file with comments | « test/Transforms/NaCl/vector-canonicalization-calls.ll ('k') | test/Transforms/NaCl/vector-canonicalization-cmps.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698