OLD | NEW |
1 ; Tests various aspects of x86 opcode encodings. E.g., some opcodes like | 1 ; Tests various aspects of x86 opcode encodings. E.g., some opcodes like |
2 ; those for pmull vary more wildly depending on operand size (rather than | 2 ; those for pmull vary more wildly depending on operand size (rather than |
3 ; follow a usual pattern). | 3 ; follow a usual pattern). |
4 | 4 |
5 ; RUN: %p2i -i %s --args -O2 -mattr=sse4.1 -sandbox --verbose none \ | 5 ; RUN: %p2i --assemble --disassemble -i %s --args -O2 -mattr=sse4.1 -sandbox \ |
6 ; RUN: | llvm-mc -triple=i686-none-nacl -filetype=obj \ | 6 ; RUN: --verbose none | FileCheck %s |
7 ; RUN: | llvm-objdump -d --symbolize -x86-asm-syntax=intel - | FileCheck %s | |
8 | 7 |
9 define <8 x i16> @test_mul_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) { | 8 define <8 x i16> @test_mul_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) { |
10 entry: | 9 entry: |
11 %res = mul <8 x i16> %arg0, %arg1 | 10 %res = mul <8 x i16> %arg0, %arg1 |
12 ret <8 x i16> %res | 11 ret <8 x i16> %res |
13 ; CHECK-LABEL: test_mul_v8i16 | 12 ; CHECK-LABEL: test_mul_v8i16 |
14 ; CHECK: 66 0f d5 c1 pmullw xmm0, xmm1 | 13 ; CHECK: 66 0f d5 c1 pmullw xmm0,xmm1 |
15 } | 14 } |
16 | 15 |
17 ; Test register and address mode encoding. | 16 ; Test register and address mode encoding. |
18 define <8 x i16> @test_mul_v8i16_more_regs(<8 x i1> %cond, <8 x i16> %arg0, <8 x
i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3, <8 x i16> %arg4, <8 x i16> %arg5,
<8 x i16> %arg6, <8 x i16> %arg7, <8 x i16> %arg8) { | 17 define <8 x i16> @test_mul_v8i16_more_regs(<8 x i1> %cond, <8 x i16> %arg0, <8 x
i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3, <8 x i16> %arg4, <8 x i16> %arg5,
<8 x i16> %arg6, <8 x i16> %arg7, <8 x i16> %arg8) { |
19 entry: | 18 entry: |
20 %res1 = mul <8 x i16> %arg0, %arg1 | 19 %res1 = mul <8 x i16> %arg0, %arg1 |
21 %res2 = mul <8 x i16> %arg0, %arg2 | 20 %res2 = mul <8 x i16> %arg0, %arg2 |
22 %res3 = mul <8 x i16> %arg0, %arg3 | 21 %res3 = mul <8 x i16> %arg0, %arg3 |
23 %res4 = mul <8 x i16> %arg0, %arg4 | 22 %res4 = mul <8 x i16> %arg0, %arg4 |
24 %res5 = mul <8 x i16> %arg0, %arg5 | 23 %res5 = mul <8 x i16> %arg0, %arg5 |
25 %res6 = mul <8 x i16> %arg0, %arg6 | 24 %res6 = mul <8 x i16> %arg0, %arg6 |
26 %res7 = mul <8 x i16> %arg0, %arg7 | 25 %res7 = mul <8 x i16> %arg0, %arg7 |
27 %res8 = mul <8 x i16> %arg0, %arg8 | 26 %res8 = mul <8 x i16> %arg0, %arg8 |
28 %res_acc1 = select <8 x i1> %cond, <8 x i16> %res1, <8 x i16> %res2 | 27 %res_acc1 = select <8 x i1> %cond, <8 x i16> %res1, <8 x i16> %res2 |
29 %res_acc2 = select <8 x i1> %cond, <8 x i16> %res3, <8 x i16> %res4 | 28 %res_acc2 = select <8 x i1> %cond, <8 x i16> %res3, <8 x i16> %res4 |
30 %res_acc3 = select <8 x i1> %cond, <8 x i16> %res5, <8 x i16> %res6 | 29 %res_acc3 = select <8 x i1> %cond, <8 x i16> %res5, <8 x i16> %res6 |
31 %res_acc4 = select <8 x i1> %cond, <8 x i16> %res7, <8 x i16> %res8 | 30 %res_acc4 = select <8 x i1> %cond, <8 x i16> %res7, <8 x i16> %res8 |
32 %res_acc1_3 = select <8 x i1> %cond, <8 x i16> %res_acc1, <8 x i16> %res_acc3 | 31 %res_acc1_3 = select <8 x i1> %cond, <8 x i16> %res_acc1, <8 x i16> %res_acc3 |
33 %res_acc2_4 = select <8 x i1> %cond, <8 x i16> %res_acc2, <8 x i16> %res_acc4 | 32 %res_acc2_4 = select <8 x i1> %cond, <8 x i16> %res_acc2, <8 x i16> %res_acc4 |
34 %res = select <8 x i1> %cond, <8 x i16> %res_acc1_3, <8 x i16> %res_acc2_4 | 33 %res = select <8 x i1> %cond, <8 x i16> %res_acc1_3, <8 x i16> %res_acc2_4 |
35 ret <8 x i16> %res | 34 ret <8 x i16> %res |
36 ; CHECK-LABEL: test_mul_v8i16_more_regs | 35 ; CHECK-LABEL: test_mul_v8i16_more_regs |
37 ; CHECK-DAG: 66 0f d5 c2 pmullw xmm0, xmm2 | 36 ; CHECK-DAG: 66 0f d5 c2 pmullw xmm0,xmm2 |
38 ; CHECK-DAG: 66 0f d5 c3 pmullw xmm0, xmm3 | 37 ; CHECK-DAG: 66 0f d5 c3 pmullw xmm0,xmm3 |
39 ; CHECK-DAG: 66 0f d5 c4 pmullw xmm0, xmm4 | 38 ; CHECK-DAG: 66 0f d5 c4 pmullw xmm0,xmm4 |
40 ; CHECK-DAG: 66 0f d5 c5 pmullw xmm0, xmm5 | 39 ; CHECK-DAG: 66 0f d5 c5 pmullw xmm0,xmm5 |
41 ; CHECK-DAG: 66 0f d5 c6 pmullw xmm0, xmm6 | 40 ; CHECK-DAG: 66 0f d5 c6 pmullw xmm0,xmm6 |
42 ; CHECK-DAG: 66 0f d5 c7 pmullw xmm0, xmm7 | 41 ; CHECK-DAG: 66 0f d5 c7 pmullw xmm0,xmm7 |
43 ; CHECK-DAG: 66 0f d5 44 24 70 pmullw xmm0, xmmword ptr [esp + 112] | 42 ; CHECK-DAG: 66 0f d5 44 24 70 pmullw xmm0,XMMWORD PTR [esp |
44 ; CHECK-DAG: 66 0f d5 8c 24 80 00 00 00 pmullw xmm1, xmmword ptr [esp + 128] | 43 ; CHECK-DAG: 66 0f d5 8c 24 80 00 00 00 pmullw xmm1,XMMWORD PTR [esp |
45 } | 44 } |
46 | 45 |
47 define <4 x i32> @test_mul_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) { | 46 define <4 x i32> @test_mul_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) { |
48 entry: | 47 entry: |
49 %res = mul <4 x i32> %arg0, %arg1 | 48 %res = mul <4 x i32> %arg0, %arg1 |
50 ret <4 x i32> %res | 49 ret <4 x i32> %res |
51 ; CHECK-LABEL: test_mul_v4i32 | 50 ; CHECK-LABEL: test_mul_v4i32 |
52 ; CHECK: 66 0f 38 40 c1 pmulld xmm0, xmm1 | 51 ; CHECK: 66 0f 38 40 c1 pmulld xmm0,xmm1 |
53 } | 52 } |
54 | 53 |
55 define <4 x i32> @test_mul_v4i32_more_regs(<4 x i1> %cond, <4 x i32> %arg0, <4 x
i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3, <4 x i32> %arg4, <4 x i32> %arg5,
<4 x i32> %arg6, <4 x i32> %arg7, <4 x i32> %arg8) { | 54 define <4 x i32> @test_mul_v4i32_more_regs(<4 x i1> %cond, <4 x i32> %arg0, <4 x
i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3, <4 x i32> %arg4, <4 x i32> %arg5,
<4 x i32> %arg6, <4 x i32> %arg7, <4 x i32> %arg8) { |
56 entry: | 55 entry: |
57 %res1 = mul <4 x i32> %arg0, %arg1 | 56 %res1 = mul <4 x i32> %arg0, %arg1 |
58 %res2 = mul <4 x i32> %arg0, %arg2 | 57 %res2 = mul <4 x i32> %arg0, %arg2 |
59 %res3 = mul <4 x i32> %arg0, %arg3 | 58 %res3 = mul <4 x i32> %arg0, %arg3 |
60 %res4 = mul <4 x i32> %arg0, %arg4 | 59 %res4 = mul <4 x i32> %arg0, %arg4 |
61 %res5 = mul <4 x i32> %arg0, %arg5 | 60 %res5 = mul <4 x i32> %arg0, %arg5 |
62 %res6 = mul <4 x i32> %arg0, %arg6 | 61 %res6 = mul <4 x i32> %arg0, %arg6 |
63 %res7 = mul <4 x i32> %arg0, %arg7 | 62 %res7 = mul <4 x i32> %arg0, %arg7 |
64 %res8 = mul <4 x i32> %arg0, %arg8 | 63 %res8 = mul <4 x i32> %arg0, %arg8 |
65 %res_acc1 = select <4 x i1> %cond, <4 x i32> %res1, <4 x i32> %res2 | 64 %res_acc1 = select <4 x i1> %cond, <4 x i32> %res1, <4 x i32> %res2 |
66 %res_acc2 = select <4 x i1> %cond, <4 x i32> %res3, <4 x i32> %res4 | 65 %res_acc2 = select <4 x i1> %cond, <4 x i32> %res3, <4 x i32> %res4 |
67 %res_acc3 = select <4 x i1> %cond, <4 x i32> %res5, <4 x i32> %res6 | 66 %res_acc3 = select <4 x i1> %cond, <4 x i32> %res5, <4 x i32> %res6 |
68 %res_acc4 = select <4 x i1> %cond, <4 x i32> %res7, <4 x i32> %res8 | 67 %res_acc4 = select <4 x i1> %cond, <4 x i32> %res7, <4 x i32> %res8 |
69 %res_acc1_3 = select <4 x i1> %cond, <4 x i32> %res_acc1, <4 x i32> %res_acc3 | 68 %res_acc1_3 = select <4 x i1> %cond, <4 x i32> %res_acc1, <4 x i32> %res_acc3 |
70 %res_acc2_4 = select <4 x i1> %cond, <4 x i32> %res_acc2, <4 x i32> %res_acc4 | 69 %res_acc2_4 = select <4 x i1> %cond, <4 x i32> %res_acc2, <4 x i32> %res_acc4 |
71 %res = select <4 x i1> %cond, <4 x i32> %res_acc1_3, <4 x i32> %res_acc2_4 | 70 %res = select <4 x i1> %cond, <4 x i32> %res_acc1_3, <4 x i32> %res_acc2_4 |
72 ret <4 x i32> %res | 71 ret <4 x i32> %res |
73 ; CHECK-LABEL: test_mul_v4i32_more_regs | 72 ; CHECK-LABEL: test_mul_v4i32_more_regs |
74 ; CHECK-DAG: 66 0f 38 40 c2 pmulld xmm0, xmm2 | 73 ; CHECK-DAG: 66 0f 38 40 c2 pmulld xmm0,xmm2 |
75 ; CHECK-DAG: 66 0f 38 40 c3 pmulld xmm0, xmm3 | 74 ; CHECK-DAG: 66 0f 38 40 c3 pmulld xmm0,xmm3 |
76 ; CHECK-DAG: 66 0f 38 40 c4 pmulld xmm0, xmm4 | 75 ; CHECK-DAG: 66 0f 38 40 c4 pmulld xmm0,xmm4 |
77 ; CHECK-DAG: 66 0f 38 40 c5 pmulld xmm0, xmm5 | 76 ; CHECK-DAG: 66 0f 38 40 c5 pmulld xmm0,xmm5 |
78 ; CHECK-DAG: 66 0f 38 40 c6 pmulld xmm0, xmm6 | 77 ; CHECK-DAG: 66 0f 38 40 c6 pmulld xmm0,xmm6 |
79 ; CHECK-DAG: 66 0f 38 40 c7 pmulld xmm0, xmm7 | 78 ; CHECK-DAG: 66 0f 38 40 c7 pmulld xmm0,xmm7 |
80 ; CHECK-DAG: 66 0f 38 40 44 24 70 pmulld xmm0, xmmword ptr [esp + 112] | 79 ; CHECK-DAG: 66 0f 38 40 44 24 70 pmulld xmm0,XMMWORD PTR [esp |
81 ; CHECK-DAG: 66 0f 38 40 8c 24 80 00 00 00 pmulld xmm1, xmmword ptr [esp + 128] | 80 ; CHECK-DAG: 66 0f 38 40 8c 24 80 00 00 00 pmulld xmm1,XMMWORD PTR [esp |
82 } | 81 } |
83 | 82 |
84 ; Test movq, which is used by atomic stores. | 83 ; Test movq, which is used by atomic stores. |
85 declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32) | 84 declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32) |
86 | 85 |
87 define void @test_atomic_store_64(i32 %iptr, i32 %iptr2, i32 %iptr3, i64 %v) { | 86 define void @test_atomic_store_64(i32 %iptr, i32 %iptr2, i32 %iptr3, i64 %v) { |
88 entry: | 87 entry: |
89 %ptr = inttoptr i32 %iptr to i64* | 88 %ptr = inttoptr i32 %iptr to i64* |
90 %ptr2 = inttoptr i32 %iptr2 to i64* | 89 %ptr2 = inttoptr i32 %iptr2 to i64* |
91 %ptr3 = inttoptr i32 %iptr3 to i64* | 90 %ptr3 = inttoptr i32 %iptr3 to i64* |
92 call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr2, i32 6) | 91 call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr2, i32 6) |
93 call void @llvm.nacl.atomic.store.i64(i64 1234567891024, i64* %ptr, i32 6) | 92 call void @llvm.nacl.atomic.store.i64(i64 1234567891024, i64* %ptr, i32 6) |
94 call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr3, i32 6) | 93 call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr3, i32 6) |
95 ret void | 94 ret void |
96 } | 95 } |
97 ; CHECK-LABEL: test_atomic_store_64 | 96 ; CHECK-LABEL: test_atomic_store_64 |
98 ; CHECK-DAG: f3 0f 7e 04 24 movq xmm0, qword ptr [esp] | 97 ; CHECK-DAG: f3 0f 7e 04 24 movq xmm0,QWORD PTR [esp] |
99 ; CHECK-DAG: f3 0f 7e 44 24 08 movq xmm0, qword ptr [esp + 8] | 98 ; CHECK-DAG: f3 0f 7e 44 24 08 movq xmm0,QWORD PTR [esp |
100 ; CHECK-DAG: 66 0f d6 0{{.*}} movq qword ptr [e{{.*}}], xmm0 | 99 ; CHECK-DAG: 66 0f d6 0{{.*}} movq QWORD PTR [e{{.*}}],xmm0 |
101 | 100 |
102 ; Test "movups" via vector stores and loads. | 101 ; Test "movups" via vector stores and loads. |
103 define void @store_v16xI8(i32 %addr, i32 %addr2, i32 %addr3, <16 x i8> %v) { | 102 define void @store_v16xI8(i32 %addr, i32 %addr2, i32 %addr3, <16 x i8> %v) { |
104 %addr_v16xI8 = inttoptr i32 %addr to <16 x i8>* | 103 %addr_v16xI8 = inttoptr i32 %addr to <16 x i8>* |
105 %addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>* | 104 %addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>* |
106 %addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>* | 105 %addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>* |
107 store <16 x i8> %v, <16 x i8>* %addr2_v16xI8, align 1 | 106 store <16 x i8> %v, <16 x i8>* %addr2_v16xI8, align 1 |
108 store <16 x i8> %v, <16 x i8>* %addr_v16xI8, align 1 | 107 store <16 x i8> %v, <16 x i8>* %addr_v16xI8, align 1 |
109 store <16 x i8> %v, <16 x i8>* %addr3_v16xI8, align 1 | 108 store <16 x i8> %v, <16 x i8>* %addr3_v16xI8, align 1 |
110 ret void | 109 ret void |
111 } | 110 } |
112 ; CHECK-LABEL: store_v16xI8 | 111 ; CHECK-LABEL: store_v16xI8 |
113 ; CHECK: 0f 11 0{{.*}} movups xmmword ptr [e{{.*}}], xmm0 | 112 ; CHECK: 0f 11 0{{.*}} movups XMMWORD PTR [e{{.*}}],xmm0 |
114 | 113 |
115 define <16 x i8> @load_v16xI8(i32 %addr, i32 %addr2, i32 %addr3) { | 114 define <16 x i8> @load_v16xI8(i32 %addr, i32 %addr2, i32 %addr3) { |
116 %addr_v16xI8 = inttoptr i32 %addr to <16 x i8>* | 115 %addr_v16xI8 = inttoptr i32 %addr to <16 x i8>* |
117 %addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>* | 116 %addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>* |
118 %addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>* | 117 %addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>* |
119 %res1 = load <16 x i8>* %addr2_v16xI8, align 1 | 118 %res1 = load <16 x i8>* %addr2_v16xI8, align 1 |
120 %res2 = load <16 x i8>* %addr_v16xI8, align 1 | 119 %res2 = load <16 x i8>* %addr_v16xI8, align 1 |
121 %res3 = load <16 x i8>* %addr3_v16xI8, align 1 | 120 %res3 = load <16 x i8>* %addr3_v16xI8, align 1 |
122 %res12 = add <16 x i8> %res1, %res2 | 121 %res12 = add <16 x i8> %res1, %res2 |
123 %res123 = add <16 x i8> %res12, %res3 | 122 %res123 = add <16 x i8> %res12, %res3 |
124 ret <16 x i8> %res123 | 123 ret <16 x i8> %res123 |
125 } | 124 } |
126 ; CHECK-LABEL: load_v16xI8 | 125 ; CHECK-LABEL: load_v16xI8 |
127 ; CHECK: 0f 10 0{{.*}} movups xmm0, xmmword ptr [e{{.*}}] | 126 ; CHECK: 0f 10 0{{.*}} movups xmm0,XMMWORD PTR [e{{.*}}] |
128 | 127 |
129 ; Test segment override prefix. This happens w/ nacl.read.tp. | 128 ; Test segment override prefix. This happens w/ nacl.read.tp. |
130 declare i8* @llvm.nacl.read.tp() | 129 declare i8* @llvm.nacl.read.tp() |
131 | 130 |
132 ; Also test more address complex operands via address-mode-optimization. | 131 ; Also test more address complex operands via address-mode-optimization. |
133 define i32 @test_nacl_read_tp_more_addressing() { | 132 define i32 @test_nacl_read_tp_more_addressing() { |
134 entry: | 133 entry: |
135 %ptr = call i8* @llvm.nacl.read.tp() | 134 %ptr = call i8* @llvm.nacl.read.tp() |
136 %__1 = ptrtoint i8* %ptr to i32 | 135 %__1 = ptrtoint i8* %ptr to i32 |
137 %x = add i32 %__1, %__1 | 136 %x = add i32 %__1, %__1 |
138 %__3 = inttoptr i32 %x to i32* | 137 %__3 = inttoptr i32 %x to i32* |
139 %v = load i32* %__3, align 1 | 138 %v = load i32* %__3, align 1 |
140 %v_add = add i32 %v, 1 | 139 %v_add = add i32 %v, 1 |
141 | 140 |
142 %ptr2 = call i8* @llvm.nacl.read.tp() | 141 %ptr2 = call i8* @llvm.nacl.read.tp() |
143 %__6 = ptrtoint i8* %ptr2 to i32 | 142 %__6 = ptrtoint i8* %ptr2 to i32 |
144 %y = add i32 %__6, -128 | 143 %y = add i32 %__6, -128 |
145 %__8 = inttoptr i32 %y to i32* | 144 %__8 = inttoptr i32 %y to i32* |
146 %v_add2 = add i32 %v, 4 | 145 %v_add2 = add i32 %v, 4 |
147 store i32 %v_add2, i32* %__8, align 1 | 146 store i32 %v_add2, i32* %__8, align 1 |
148 | 147 |
149 %z = add i32 %__6, 256 | 148 %z = add i32 %__6, 256 |
150 %__9 = inttoptr i32 %z to i32* | 149 %__9 = inttoptr i32 %z to i32* |
151 %v_add3 = add i32 %v, 91 | 150 %v_add3 = add i32 %v, 91 |
152 store i32 %v_add2, i32* %__9, align 1 | 151 store i32 %v_add2, i32* %__9, align 1 |
153 | 152 |
154 ret i32 %v | 153 ret i32 %v |
155 } | 154 } |
156 ; CHECK-LABEL: test_nacl_read_tp_more_addressing | 155 ; CHECK-LABEL: test_nacl_read_tp_more_addressing |
157 ; CHECK: 65 8b 05 00 00 00 00 mov eax, dword ptr gs:[0] | 156 ; CHECK: 65 8b 05 00 00 00 00 mov eax,DWORD PTR gs:0x0 |
158 ; CHECK: 8b 04 00 mov eax, dword ptr [eax + eax] | 157 ; CHECK: 8b 04 00 mov eax,DWORD PTR [eax+eax*1] |
159 ; CHECK: 65 8b 0d 00 00 00 00 mov ecx, dword ptr gs:[0] | 158 ; CHECK: 65 8b 0d 00 00 00 00 mov ecx,DWORD PTR gs:0x0 |
160 ; CHECK: 89 51 80 mov dword ptr [ecx - 128], edx | 159 ; CHECK: 89 51 80 mov DWORD PTR [ecx-0x80],edx |
161 ; CHECK: 89 91 00 01 00 00 mov dword ptr [ecx + 256], edx | 160 ; CHECK: 89 91 00 01 00 00 mov DWORD PTR [ecx+0x100],edx |
162 | 161 |
163 ; The 16-bit pinsrw/pextrw (SSE2) are quite different from | 162 ; The 16-bit pinsrw/pextrw (SSE2) are quite different from |
164 ; the pinsr{b,d}/pextr{b,d} (SSE4.1). | 163 ; the pinsr{b,d}/pextr{b,d} (SSE4.1). |
165 | 164 |
166 define <4 x i32> @test_pinsrd(<4 x i32> %vec, i32 %elt1, i32 %elt2, i32 %elt3, i
32 %elt4) { | 165 define <4 x i32> @test_pinsrd(<4 x i32> %vec, i32 %elt1, i32 %elt2, i32 %elt3, i
32 %elt4) { |
167 entry: | 166 entry: |
168 %elt12 = add i32 %elt1, %elt2 | 167 %elt12 = add i32 %elt1, %elt2 |
169 %elt34 = add i32 %elt3, %elt4 | 168 %elt34 = add i32 %elt3, %elt4 |
170 %res1 = insertelement <4 x i32> %vec, i32 %elt12, i32 1 | 169 %res1 = insertelement <4 x i32> %vec, i32 %elt12, i32 1 |
171 %res2 = insertelement <4 x i32> %res1, i32 %elt34, i32 2 | 170 %res2 = insertelement <4 x i32> %res1, i32 %elt34, i32 2 |
172 %res3 = insertelement <4 x i32> %res2, i32 %elt1, i32 3 | 171 %res3 = insertelement <4 x i32> %res2, i32 %elt1, i32 3 |
173 ret <4 x i32> %res3 | 172 ret <4 x i32> %res3 |
174 } | 173 } |
175 ; CHECK-LABEL: test_pinsrd: | 174 ; CHECK-LABEL: test_pinsrd |
176 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 01 pinsrd xmm0, e{{.*}}, 1 | 175 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 01 pinsrd xmm0,e{{.*}} |
177 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 02 pinsrd xmm0, e{{.*}}, 2 | 176 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 02 pinsrd xmm0,e{{.*}} |
178 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 03 pinsrd xmm0, e{{.*}}, 3 | 177 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 03 pinsrd xmm0,e{{.*}} |
179 | 178 |
180 define <16 x i8> @test_pinsrb(<16 x i8> %vec, i32 %elt1_w, i32 %elt2_w, i32 %elt
3_w, i32 %elt4_w) { | 179 define <16 x i8> @test_pinsrb(<16 x i8> %vec, i32 %elt1_w, i32 %elt2_w, i32 %elt
3_w, i32 %elt4_w) { |
181 entry: | 180 entry: |
182 %elt1 = trunc i32 %elt1_w to i8 | 181 %elt1 = trunc i32 %elt1_w to i8 |
183 %elt2 = trunc i32 %elt2_w to i8 | 182 %elt2 = trunc i32 %elt2_w to i8 |
184 %elt3 = trunc i32 %elt3_w to i8 | 183 %elt3 = trunc i32 %elt3_w to i8 |
185 %elt4 = trunc i32 %elt4_w to i8 | 184 %elt4 = trunc i32 %elt4_w to i8 |
186 %elt12 = add i8 %elt1, %elt2 | 185 %elt12 = add i8 %elt1, %elt2 |
187 %elt34 = add i8 %elt3, %elt4 | 186 %elt34 = add i8 %elt3, %elt4 |
188 %res1 = insertelement <16 x i8> %vec, i8 %elt12, i32 1 | 187 %res1 = insertelement <16 x i8> %vec, i8 %elt12, i32 1 |
189 %res2 = insertelement <16 x i8> %res1, i8 %elt34, i32 7 | 188 %res2 = insertelement <16 x i8> %res1, i8 %elt34, i32 7 |
190 %res3 = insertelement <16 x i8> %res2, i8 %elt1, i32 15 | 189 %res3 = insertelement <16 x i8> %res2, i8 %elt1, i32 15 |
191 ret <16 x i8> %res3 | 190 ret <16 x i8> %res3 |
192 } | 191 } |
193 ; CHECK-LABEL: test_pinsrb: | 192 ; CHECK-LABEL: test_pinsrb |
194 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 01 pinsrb xmm0, e{{.*}}, 1 | 193 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 01 pinsrb xmm0,e{{.*}} |
195 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 07 pinsrb xmm0, e{{.*}}, 7 | 194 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 07 pinsrb xmm0,e{{.*}} |
196 ; CHECK-DAG: 66 0f 3a 20 {{.*}} 0f pinsrb xmm0, byte ptr {{.*}}, 15 | 195 ; CHECK-DAG: 66 0f 3a 20 {{.*}} 0f pinsrb xmm0,BYTE PTR {{.*}} |
197 | 196 |
198 define <8 x i16> @test_pinsrw(<8 x i16> %vec, i32 %elt1_w, i32 %elt2_w, i32 %elt
3_w, i32 %elt4_w) { | 197 define <8 x i16> @test_pinsrw(<8 x i16> %vec, i32 %elt1_w, i32 %elt2_w, i32 %elt
3_w, i32 %elt4_w) { |
199 entry: | 198 entry: |
200 %elt1 = trunc i32 %elt1_w to i16 | 199 %elt1 = trunc i32 %elt1_w to i16 |
201 %elt2 = trunc i32 %elt2_w to i16 | 200 %elt2 = trunc i32 %elt2_w to i16 |
202 %elt3 = trunc i32 %elt3_w to i16 | 201 %elt3 = trunc i32 %elt3_w to i16 |
203 %elt4 = trunc i32 %elt4_w to i16 | 202 %elt4 = trunc i32 %elt4_w to i16 |
204 %elt12 = add i16 %elt1, %elt2 | 203 %elt12 = add i16 %elt1, %elt2 |
205 %elt34 = add i16 %elt3, %elt4 | 204 %elt34 = add i16 %elt3, %elt4 |
206 %res1 = insertelement <8 x i16> %vec, i16 %elt12, i32 1 | 205 %res1 = insertelement <8 x i16> %vec, i16 %elt12, i32 1 |
207 %res2 = insertelement <8 x i16> %res1, i16 %elt34, i32 4 | 206 %res2 = insertelement <8 x i16> %res1, i16 %elt34, i32 4 |
208 %res3 = insertelement <8 x i16> %res2, i16 %elt1, i32 7 | 207 %res3 = insertelement <8 x i16> %res2, i16 %elt1, i32 7 |
209 ret <8 x i16> %res3 | 208 ret <8 x i16> %res3 |
210 } | 209 } |
211 ; CHECK-LABEL: test_pinsrw: | 210 ; CHECK-LABEL: test_pinsrw |
212 ; CHECK-DAG: 66 0f c4 c{{.*}} 01 pinsrw xmm0, e{{.*}}, 1 | 211 ; CHECK-DAG: 66 0f c4 c{{.*}} 01 pinsrw xmm0,e{{.*}} |
213 ; CHECK-DAG: 66 0f c4 c{{.*}} 04 pinsrw xmm0, e{{.*}}, 4 | 212 ; CHECK-DAG: 66 0f c4 c{{.*}} 04 pinsrw xmm0,e{{.*}} |
214 ; CHECK-DAG: 66 0f c4 c{{.*}} 07 pinsrw xmm0, e{{.*}}, 7 | 213 ; CHECK-DAG: 66 0f c4 c{{.*}} 07 pinsrw xmm0,e{{.*}} |
215 | 214 |
216 define i32 @test_pextrd(i32 %c, <4 x i32> %vec1, <4 x i32> %vec2, <4 x i32> %vec
3, <4 x i32> %vec4) { | 215 define i32 @test_pextrd(i32 %c, <4 x i32> %vec1, <4 x i32> %vec2, <4 x i32> %vec
3, <4 x i32> %vec4) { |
217 entry: | 216 entry: |
218 switch i32 %c, label %three [i32 0, label %zero | 217 switch i32 %c, label %three [i32 0, label %zero |
219 i32 1, label %one | 218 i32 1, label %one |
220 i32 2, label %two] | 219 i32 2, label %two] |
221 zero: | 220 zero: |
222 %res0 = extractelement <4 x i32> %vec1, i32 0 | 221 %res0 = extractelement <4 x i32> %vec1, i32 0 |
223 ret i32 %res0 | 222 ret i32 %res0 |
224 one: | 223 one: |
225 %res1 = extractelement <4 x i32> %vec2, i32 1 | 224 %res1 = extractelement <4 x i32> %vec2, i32 1 |
226 ret i32 %res1 | 225 ret i32 %res1 |
227 two: | 226 two: |
228 %res2 = extractelement <4 x i32> %vec3, i32 2 | 227 %res2 = extractelement <4 x i32> %vec3, i32 2 |
229 ret i32 %res2 | 228 ret i32 %res2 |
230 three: | 229 three: |
231 %res3 = extractelement <4 x i32> %vec4, i32 3 | 230 %res3 = extractelement <4 x i32> %vec4, i32 3 |
232 ret i32 %res3 | 231 ret i32 %res3 |
233 } | 232 } |
234 ; CHECK-LABEL: test_pextrd | 233 ; CHECK-LABEL: test_pextrd |
235 ; CHECK-DAG: 66 0f 3a 16 c0 00 pextrd eax, xmm0, 0 | 234 ; CHECK-DAG: 66 0f 3a 16 c0 00 pextrd eax,xmm0 |
236 ; CHECK-DAG: 66 0f 3a 16 c8 01 pextrd eax, xmm1, 1 | 235 ; CHECK-DAG: 66 0f 3a 16 c8 01 pextrd eax,xmm1 |
237 ; CHECK-DAG: 66 0f 3a 16 d0 02 pextrd eax, xmm2, 2 | 236 ; CHECK-DAG: 66 0f 3a 16 d0 02 pextrd eax,xmm2 |
238 ; CHECK-DAG: 66 0f 3a 16 d8 03 pextrd eax, xmm3, 3 | 237 ; CHECK-DAG: 66 0f 3a 16 d8 03 pextrd eax,xmm3 |
239 | 238 |
240 define i32 @test_pextrb(i32 %c, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec
3, <16 x i8> %vec4) { | 239 define i32 @test_pextrb(i32 %c, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec
3, <16 x i8> %vec4) { |
241 entry: | 240 entry: |
242 switch i32 %c, label %three [i32 0, label %zero | 241 switch i32 %c, label %three [i32 0, label %zero |
243 i32 1, label %one | 242 i32 1, label %one |
244 i32 2, label %two] | 243 i32 2, label %two] |
245 zero: | 244 zero: |
246 %res0 = extractelement <16 x i8> %vec1, i32 0 | 245 %res0 = extractelement <16 x i8> %vec1, i32 0 |
247 %res0_ext = zext i8 %res0 to i32 | 246 %res0_ext = zext i8 %res0 to i32 |
248 ret i32 %res0_ext | 247 ret i32 %res0_ext |
249 one: | 248 one: |
250 %res1 = extractelement <16 x i8> %vec2, i32 6 | 249 %res1 = extractelement <16 x i8> %vec2, i32 6 |
251 %res1_ext = zext i8 %res1 to i32 | 250 %res1_ext = zext i8 %res1 to i32 |
252 ret i32 %res1_ext | 251 ret i32 %res1_ext |
253 two: | 252 two: |
254 %res2 = extractelement <16 x i8> %vec3, i32 12 | 253 %res2 = extractelement <16 x i8> %vec3, i32 12 |
255 %res2_ext = zext i8 %res2 to i32 | 254 %res2_ext = zext i8 %res2 to i32 |
256 ret i32 %res2_ext | 255 ret i32 %res2_ext |
257 three: | 256 three: |
258 %res3 = extractelement <16 x i8> %vec4, i32 15 | 257 %res3 = extractelement <16 x i8> %vec4, i32 15 |
259 %res3_ext = zext i8 %res3 to i32 | 258 %res3_ext = zext i8 %res3 to i32 |
260 ret i32 %res3_ext | 259 ret i32 %res3_ext |
261 } | 260 } |
262 ; CHECK-LABEL: test_pextrb | 261 ; CHECK-LABEL: test_pextrb |
263 ; CHECK-DAG: 66 0f 3a 14 c0 00 pextrb eax, xmm0, 0 | 262 ; CHECK-DAG: 66 0f 3a 14 c0 00 pextrb eax,xmm0 |
264 ; CHECK-DAG: 66 0f 3a 14 c8 06 pextrb eax, xmm1, 6 | 263 ; CHECK-DAG: 66 0f 3a 14 c8 06 pextrb eax,xmm1 |
265 ; CHECK-DAG: 66 0f 3a 14 d0 0c pextrb eax, xmm2, 12 | 264 ; CHECK-DAG: 66 0f 3a 14 d0 0c pextrb eax,xmm2 |
266 ; CHECK-DAG: 66 0f 3a 14 d8 0f pextrb eax, xmm3, 15 | 265 ; CHECK-DAG: 66 0f 3a 14 d8 0f pextrb eax,xmm3 |
267 | 266 |
268 define i32 @test_pextrw(i32 %c, <8 x i16> %vec1, <8 x i16> %vec2, <8 x i16> %vec
3, <8 x i16> %vec4) { | 267 define i32 @test_pextrw(i32 %c, <8 x i16> %vec1, <8 x i16> %vec2, <8 x i16> %vec
3, <8 x i16> %vec4) { |
269 entry: | 268 entry: |
270 switch i32 %c, label %three [i32 0, label %zero | 269 switch i32 %c, label %three [i32 0, label %zero |
271 i32 1, label %one | 270 i32 1, label %one |
272 i32 2, label %two] | 271 i32 2, label %two] |
273 zero: | 272 zero: |
274 %res0 = extractelement <8 x i16> %vec1, i32 0 | 273 %res0 = extractelement <8 x i16> %vec1, i32 0 |
275 %res0_ext = zext i16 %res0 to i32 | 274 %res0_ext = zext i16 %res0 to i32 |
276 ret i32 %res0_ext | 275 ret i32 %res0_ext |
277 one: | 276 one: |
278 %res1 = extractelement <8 x i16> %vec2, i32 2 | 277 %res1 = extractelement <8 x i16> %vec2, i32 2 |
279 %res1_ext = zext i16 %res1 to i32 | 278 %res1_ext = zext i16 %res1 to i32 |
280 ret i32 %res1_ext | 279 ret i32 %res1_ext |
281 two: | 280 two: |
282 %res2 = extractelement <8 x i16> %vec3, i32 5 | 281 %res2 = extractelement <8 x i16> %vec3, i32 5 |
283 %res2_ext = zext i16 %res2 to i32 | 282 %res2_ext = zext i16 %res2 to i32 |
284 ret i32 %res2_ext | 283 ret i32 %res2_ext |
285 three: | 284 three: |
286 %res3 = extractelement <8 x i16> %vec4, i32 7 | 285 %res3 = extractelement <8 x i16> %vec4, i32 7 |
287 %res3_ext = zext i16 %res3 to i32 | 286 %res3_ext = zext i16 %res3 to i32 |
288 ret i32 %res3_ext | 287 ret i32 %res3_ext |
289 } | 288 } |
290 ; CHECK-LABEL: test_pextrw | 289 ; CHECK-LABEL: test_pextrw |
291 ; CHECK-DAG: 66 0f c5 c0 00 pextrw eax, xmm0, 0 | 290 ; CHECK-DAG: 66 0f c5 c0 00 pextrw eax,xmm0 |
292 ; CHECK-DAG: 66 0f c5 c1 02 pextrw eax, xmm1, 2 | 291 ; CHECK-DAG: 66 0f c5 c1 02 pextrw eax,xmm1 |
293 ; CHECK-DAG: 66 0f c5 c2 05 pextrw eax, xmm2, 5 | 292 ; CHECK-DAG: 66 0f c5 c2 05 pextrw eax,xmm2 |
294 ; CHECK-DAG: 66 0f c5 c3 07 pextrw eax, xmm3, 7 | 293 ; CHECK-DAG: 66 0f c5 c3 07 pextrw eax,xmm3 |
OLD | NEW |