| OLD | NEW |
| 1 ; Tests various aspects of x86 opcode encodings. E.g., some opcodes like | 1 ; Tests various aspects of x86 opcode encodings. E.g., some opcodes like |
| 2 ; those for pmull vary more wildly depending on operand size (rather than | 2 ; those for pmull vary more wildly depending on operand size (rather than |
| 3 ; follow a usual pattern). | 3 ; follow a usual pattern). |
| 4 | 4 |
| 5 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 -mattr=sse4.1 \ | 5 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 -mattr=sse4.1 \ |
| 6 ; RUN: -sandbox | FileCheck %s | 6 ; RUN: -sandbox | FileCheck %s |
| 7 | 7 |
| 8 define <8 x i16> @test_mul_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) { | 8 define internal <8 x i16> @test_mul_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) { |
| 9 entry: | 9 entry: |
| 10 %res = mul <8 x i16> %arg0, %arg1 | 10 %res = mul <8 x i16> %arg0, %arg1 |
| 11 ret <8 x i16> %res | 11 ret <8 x i16> %res |
| 12 ; CHECK-LABEL: test_mul_v8i16 | 12 ; CHECK-LABEL: test_mul_v8i16 |
| 13 ; CHECK: 66 0f d5 c1 pmullw xmm0,xmm1 | 13 ; CHECK: 66 0f d5 c1 pmullw xmm0,xmm1 |
| 14 } | 14 } |
| 15 | 15 |
| 16 ; Test register and address mode encoding. | 16 ; Test register and address mode encoding. |
| 17 define <8 x i16> @test_mul_v8i16_more_regs(<8 x i1> %cond, <8 x i16> %arg0, <8 x
i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3, <8 x i16> %arg4, <8 x i16> %arg5,
<8 x i16> %arg6, <8 x i16> %arg7, <8 x i16> %arg8) { | 17 define internal <8 x i16> @test_mul_v8i16_more_regs( |
| 18 <8 x i1> %cond, <8 x i16> %arg0, <8 x i16> %arg1, <8 x i16> %arg2, |
| 19 <8 x i16> %arg3, <8 x i16> %arg4, <8 x i16> %arg5, <8 x i16> %arg6, |
| 20 <8 x i16> %arg7, <8 x i16> %arg8) { |
| 18 entry: | 21 entry: |
| 19 %res1 = sub <8 x i16> %arg0, %arg1 | 22 %res1 = sub <8 x i16> %arg0, %arg1 |
| 20 %res2 = sub <8 x i16> %arg0, %arg2 | 23 %res2 = sub <8 x i16> %arg0, %arg2 |
| 21 %res3 = sub <8 x i16> %arg0, %arg3 | 24 %res3 = sub <8 x i16> %arg0, %arg3 |
| 22 %res4 = sub <8 x i16> %arg0, %arg4 | 25 %res4 = sub <8 x i16> %arg0, %arg4 |
| 23 %res5 = sub <8 x i16> %arg0, %arg5 | 26 %res5 = sub <8 x i16> %arg0, %arg5 |
| 24 %res6 = sub <8 x i16> %arg0, %arg6 | 27 %res6 = sub <8 x i16> %arg0, %arg6 |
| 25 %res7 = sub <8 x i16> %arg0, %arg7 | 28 %res7 = sub <8 x i16> %arg0, %arg7 |
| 26 %res8 = sub <8 x i16> %arg0, %arg8 | 29 %res8 = sub <8 x i16> %arg0, %arg8 |
| 27 %res_acc1 = select <8 x i1> %cond, <8 x i16> %res1, <8 x i16> %res2 | 30 %res_acc1 = select <8 x i1> %cond, <8 x i16> %res1, <8 x i16> %res2 |
| 28 %res_acc2 = select <8 x i1> %cond, <8 x i16> %res3, <8 x i16> %res4 | 31 %res_acc2 = select <8 x i1> %cond, <8 x i16> %res3, <8 x i16> %res4 |
| 29 %res_acc3 = select <8 x i1> %cond, <8 x i16> %res5, <8 x i16> %res6 | 32 %res_acc3 = select <8 x i1> %cond, <8 x i16> %res5, <8 x i16> %res6 |
| 30 %res_acc4 = select <8 x i1> %cond, <8 x i16> %res7, <8 x i16> %res8 | 33 %res_acc4 = select <8 x i1> %cond, <8 x i16> %res7, <8 x i16> %res8 |
| 31 %res_acc1_3 = select <8 x i1> %cond, <8 x i16> %res_acc1, <8 x i16> %res_acc3 | 34 %res_acc1_3 = select <8 x i1> %cond, <8 x i16> %res_acc1, <8 x i16> %res_acc3 |
| 32 %res_acc2_4 = select <8 x i1> %cond, <8 x i16> %res_acc2, <8 x i16> %res_acc4 | 35 %res_acc2_4 = select <8 x i1> %cond, <8 x i16> %res_acc2, <8 x i16> %res_acc4 |
| 33 %res = select <8 x i1> %cond, <8 x i16> %res_acc1_3, <8 x i16> %res_acc2_4 | 36 %res = select <8 x i1> %cond, <8 x i16> %res_acc1_3, <8 x i16> %res_acc2_4 |
| 34 ret <8 x i16> %res | 37 ret <8 x i16> %res |
| 35 ; CHECK-LABEL: test_mul_v8i16_more_regs | 38 ; CHECK-LABEL: test_mul_v8i16_more_regs |
| 36 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 39 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
| 37 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 40 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
| 38 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 41 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
| 39 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 42 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
| 40 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 43 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
| 41 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 44 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
| 42 ; CHECK-DAG: psubw xmm0,XMMWORD PTR [esp | 45 ; CHECK-DAG: psubw xmm0,XMMWORD PTR [esp |
| 43 ; CHECK-DAG: psubw xmm1,XMMWORD PTR [esp | 46 ; CHECK-DAG: psubw xmm1,XMMWORD PTR [esp |
| 44 } | 47 } |
| 45 | 48 |
| 46 define <4 x i32> @test_mul_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) { | 49 define internal <4 x i32> @test_mul_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) { |
| 47 entry: | 50 entry: |
| 48 %res = mul <4 x i32> %arg0, %arg1 | 51 %res = mul <4 x i32> %arg0, %arg1 |
| 49 ret <4 x i32> %res | 52 ret <4 x i32> %res |
| 50 ; CHECK-LABEL: test_mul_v4i32 | 53 ; CHECK-LABEL: test_mul_v4i32 |
| 51 ; CHECK: 66 0f 38 40 c1 pmulld xmm0,xmm1 | 54 ; CHECK: 66 0f 38 40 c1 pmulld xmm0,xmm1 |
| 52 } | 55 } |
| 53 | 56 |
| 54 define <4 x i32> @test_mul_v4i32_more_regs(<4 x i1> %cond, <4 x i32> %arg0, <4 x
i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3, <4 x i32> %arg4, <4 x i32> %arg5,
<4 x i32> %arg6, <4 x i32> %arg7, <4 x i32> %arg8) { | 57 define internal <4 x i32> @test_mul_v4i32_more_regs( |
| 58 <4 x i1> %cond, <4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, |
| 59 <4 x i32> %arg3, <4 x i32> %arg4, <4 x i32> %arg5, <4 x i32> %arg6, |
| 60 <4 x i32> %arg7, <4 x i32> %arg8) { |
| 55 entry: | 61 entry: |
| 56 %res1 = sub <4 x i32> %arg0, %arg1 | 62 %res1 = sub <4 x i32> %arg0, %arg1 |
| 57 %res2 = sub <4 x i32> %arg0, %arg2 | 63 %res2 = sub <4 x i32> %arg0, %arg2 |
| 58 %res3 = sub <4 x i32> %arg0, %arg3 | 64 %res3 = sub <4 x i32> %arg0, %arg3 |
| 59 %res4 = sub <4 x i32> %arg0, %arg4 | 65 %res4 = sub <4 x i32> %arg0, %arg4 |
| 60 %res5 = sub <4 x i32> %arg0, %arg5 | 66 %res5 = sub <4 x i32> %arg0, %arg5 |
| 61 %res6 = sub <4 x i32> %arg0, %arg6 | 67 %res6 = sub <4 x i32> %arg0, %arg6 |
| 62 %res7 = sub <4 x i32> %arg0, %arg7 | 68 %res7 = sub <4 x i32> %arg0, %arg7 |
| 63 %res8 = sub <4 x i32> %arg0, %arg8 | 69 %res8 = sub <4 x i32> %arg0, %arg8 |
| 64 %res_acc1 = select <4 x i1> %cond, <4 x i32> %res1, <4 x i32> %res2 | 70 %res_acc1 = select <4 x i1> %cond, <4 x i32> %res1, <4 x i32> %res2 |
| (...skipping 11 matching lines...) Expand all Loading... |
| 76 ; CHECK-DAG: psubd xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 82 ; CHECK-DAG: psubd xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
| 77 ; CHECK-DAG: psubd xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 83 ; CHECK-DAG: psubd xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
| 78 ; CHECK-DAG: psubd xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 84 ; CHECK-DAG: psubd xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
| 79 ; CHECK-DAG: psubd xmm0,XMMWORD PTR [esp | 85 ; CHECK-DAG: psubd xmm0,XMMWORD PTR [esp |
| 80 ; CHECK-DAG: psubd xmm1,XMMWORD PTR [esp | 86 ; CHECK-DAG: psubd xmm1,XMMWORD PTR [esp |
| 81 } | 87 } |
| 82 | 88 |
| 83 ; Test movq, which is used by atomic stores. | 89 ; Test movq, which is used by atomic stores. |
| 84 declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32) | 90 declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32) |
| 85 | 91 |
| 86 define void @test_atomic_store_64(i32 %iptr, i32 %iptr2, i32 %iptr3, i64 %v) { | 92 define internal void @test_atomic_store_64(i32 %iptr, i32 %iptr2, |
| 93 i32 %iptr3, i64 %v) { |
| 87 entry: | 94 entry: |
| 88 %ptr = inttoptr i32 %iptr to i64* | 95 %ptr = inttoptr i32 %iptr to i64* |
| 89 %ptr2 = inttoptr i32 %iptr2 to i64* | 96 %ptr2 = inttoptr i32 %iptr2 to i64* |
| 90 %ptr3 = inttoptr i32 %iptr3 to i64* | 97 %ptr3 = inttoptr i32 %iptr3 to i64* |
| 91 call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr2, i32 6) | 98 call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr2, i32 6) |
| 92 call void @llvm.nacl.atomic.store.i64(i64 1234567891024, i64* %ptr, i32 6) | 99 call void @llvm.nacl.atomic.store.i64(i64 1234567891024, i64* %ptr, i32 6) |
| 93 call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr3, i32 6) | 100 call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr3, i32 6) |
| 94 ret void | 101 ret void |
| 95 } | 102 } |
| 96 ; CHECK-LABEL: test_atomic_store_64 | 103 ; CHECK-LABEL: test_atomic_store_64 |
| 97 ; CHECK-DAG: f3 0f 7e 04 24 movq xmm0,QWORD PTR [esp] | 104 ; CHECK-DAG: f3 0f 7e 04 24 movq xmm0,QWORD PTR [esp] |
| 98 ; CHECK-DAG: f3 0f 7e 44 24 08 movq xmm0,QWORD PTR [esp | 105 ; CHECK-DAG: f3 0f 7e 44 24 08 movq xmm0,QWORD PTR [esp |
| 99 ; CHECK-DAG: 66 0f d6 0{{.*}} movq QWORD PTR [e{{.*}}],xmm0 | 106 ; CHECK-DAG: 66 0f d6 0{{.*}} movq QWORD PTR [e{{.*}}],xmm0 |
| 100 | 107 |
| 101 ; Test "movups" via vector stores and loads. | 108 ; Test "movups" via vector stores and loads. |
| 102 define void @store_v16xI8(i32 %addr, i32 %addr2, i32 %addr3, <16 x i8> %v) { | 109 define internal void @store_v16xI8(i32 %addr, i32 %addr2, i32 %addr3, |
| 110 <16 x i8> %v) { |
| 103 %addr_v16xI8 = inttoptr i32 %addr to <16 x i8>* | 111 %addr_v16xI8 = inttoptr i32 %addr to <16 x i8>* |
| 104 %addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>* | 112 %addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>* |
| 105 %addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>* | 113 %addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>* |
| 106 store <16 x i8> %v, <16 x i8>* %addr2_v16xI8, align 1 | 114 store <16 x i8> %v, <16 x i8>* %addr2_v16xI8, align 1 |
| 107 store <16 x i8> %v, <16 x i8>* %addr_v16xI8, align 1 | 115 store <16 x i8> %v, <16 x i8>* %addr_v16xI8, align 1 |
| 108 store <16 x i8> %v, <16 x i8>* %addr3_v16xI8, align 1 | 116 store <16 x i8> %v, <16 x i8>* %addr3_v16xI8, align 1 |
| 109 ret void | 117 ret void |
| 110 } | 118 } |
| 111 ; CHECK-LABEL: store_v16xI8 | 119 ; CHECK-LABEL: store_v16xI8 |
| 112 ; CHECK: 0f 11 0{{.*}} movups XMMWORD PTR [e{{.*}}],xmm0 | 120 ; CHECK: 0f 11 0{{.*}} movups XMMWORD PTR [e{{.*}}],xmm0 |
| 113 | 121 |
| 114 define <16 x i8> @load_v16xI8(i32 %addr, i32 %addr2, i32 %addr3) { | 122 define internal <16 x i8> @load_v16xI8(i32 %addr, i32 %addr2, i32 %addr3) { |
| 115 %addr_v16xI8 = inttoptr i32 %addr to <16 x i8>* | 123 %addr_v16xI8 = inttoptr i32 %addr to <16 x i8>* |
| 116 %addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>* | 124 %addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>* |
| 117 %addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>* | 125 %addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>* |
| 118 %res1 = load <16 x i8>, <16 x i8>* %addr2_v16xI8, align 1 | 126 %res1 = load <16 x i8>, <16 x i8>* %addr2_v16xI8, align 1 |
| 119 %res2 = load <16 x i8>, <16 x i8>* %addr_v16xI8, align 1 | 127 %res2 = load <16 x i8>, <16 x i8>* %addr_v16xI8, align 1 |
| 120 %res3 = load <16 x i8>, <16 x i8>* %addr3_v16xI8, align 1 | 128 %res3 = load <16 x i8>, <16 x i8>* %addr3_v16xI8, align 1 |
| 121 %res12 = add <16 x i8> %res1, %res2 | 129 %res12 = add <16 x i8> %res1, %res2 |
| 122 %res123 = add <16 x i8> %res12, %res3 | 130 %res123 = add <16 x i8> %res12, %res3 |
| 123 ret <16 x i8> %res123 | 131 ret <16 x i8> %res123 |
| 124 } | 132 } |
| 125 ; CHECK-LABEL: load_v16xI8 | 133 ; CHECK-LABEL: load_v16xI8 |
| 126 ; CHECK: 0f 10 0{{.*}} movups xmm0,XMMWORD PTR [e{{.*}}] | 134 ; CHECK: 0f 10 0{{.*}} movups xmm0,XMMWORD PTR [e{{.*}}] |
| 127 | 135 |
| 128 ; Test segment override prefix. This happens w/ nacl.read.tp. | 136 ; Test segment override prefix. This happens w/ nacl.read.tp. |
| 129 declare i8* @llvm.nacl.read.tp() | 137 declare i8* @llvm.nacl.read.tp() |
| 130 | 138 |
| 131 ; Also test more address complex operands via address-mode-optimization. | 139 ; Also test more address complex operands via address-mode-optimization. |
| 132 define i32 @test_nacl_read_tp_more_addressing() { | 140 define internal i32 @test_nacl_read_tp_more_addressing() { |
| 133 entry: | 141 entry: |
| 134 %ptr = call i8* @llvm.nacl.read.tp() | 142 %ptr = call i8* @llvm.nacl.read.tp() |
| 135 %__1 = ptrtoint i8* %ptr to i32 | 143 %__1 = ptrtoint i8* %ptr to i32 |
| 136 %x = add i32 %__1, %__1 | 144 %x = add i32 %__1, %__1 |
| 137 %__3 = inttoptr i32 %x to i32* | 145 %__3 = inttoptr i32 %x to i32* |
| 138 %v = load i32, i32* %__3, align 1 | 146 %v = load i32, i32* %__3, align 1 |
| 139 %v_add = add i32 %v, 1 | 147 %v_add = add i32 %v, 1 |
| 140 | 148 |
| 141 %ptr2 = call i8* @llvm.nacl.read.tp() | 149 %ptr2 = call i8* @llvm.nacl.read.tp() |
| 142 %__6 = ptrtoint i8* %ptr2 to i32 | 150 %__6 = ptrtoint i8* %ptr2 to i32 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 155 ; CHECK-LABEL: test_nacl_read_tp_more_addressing | 163 ; CHECK-LABEL: test_nacl_read_tp_more_addressing |
| 156 ; CHECK: 65 8b 05 00 00 00 00 mov eax,DWORD PTR gs:0x0 | 164 ; CHECK: 65 8b 05 00 00 00 00 mov eax,DWORD PTR gs:0x0 |
| 157 ; CHECK: 8b 04 00 mov eax,DWORD PTR [eax+eax*1] | 165 ; CHECK: 8b 04 00 mov eax,DWORD PTR [eax+eax*1] |
| 158 ; CHECK: 65 8b 0d 00 00 00 00 mov ecx,DWORD PTR gs:0x0 | 166 ; CHECK: 65 8b 0d 00 00 00 00 mov ecx,DWORD PTR gs:0x0 |
| 159 ; CHECK: 89 51 80 mov DWORD PTR [ecx-0x80],edx | 167 ; CHECK: 89 51 80 mov DWORD PTR [ecx-0x80],edx |
| 160 ; CHECK: 89 91 00 01 00 00 mov DWORD PTR [ecx+0x100],edx | 168 ; CHECK: 89 91 00 01 00 00 mov DWORD PTR [ecx+0x100],edx |
| 161 | 169 |
| 162 ; The 16-bit pinsrw/pextrw (SSE2) are quite different from | 170 ; The 16-bit pinsrw/pextrw (SSE2) are quite different from |
| 163 ; the pinsr{b,d}/pextr{b,d} (SSE4.1). | 171 ; the pinsr{b,d}/pextr{b,d} (SSE4.1). |
| 164 | 172 |
| 165 define <4 x i32> @test_pinsrd(<4 x i32> %vec, i32 %elt1, i32 %elt2, i32 %elt3, i
32 %elt4) { | 173 define internal <4 x i32> @test_pinsrd(<4 x i32> %vec, i32 %elt1, i32 %elt2, |
| 174 i32 %elt3, i32 %elt4) { |
| 166 entry: | 175 entry: |
| 167 %elt12 = add i32 %elt1, %elt2 | 176 %elt12 = add i32 %elt1, %elt2 |
| 168 %elt34 = add i32 %elt3, %elt4 | 177 %elt34 = add i32 %elt3, %elt4 |
| 169 %res1 = insertelement <4 x i32> %vec, i32 %elt12, i32 1 | 178 %res1 = insertelement <4 x i32> %vec, i32 %elt12, i32 1 |
| 170 %res2 = insertelement <4 x i32> %res1, i32 %elt34, i32 2 | 179 %res2 = insertelement <4 x i32> %res1, i32 %elt34, i32 2 |
| 171 %res3 = insertelement <4 x i32> %res2, i32 %elt1, i32 3 | 180 %res3 = insertelement <4 x i32> %res2, i32 %elt1, i32 3 |
| 172 ret <4 x i32> %res3 | 181 ret <4 x i32> %res3 |
| 173 } | 182 } |
| 174 ; CHECK-LABEL: test_pinsrd | 183 ; CHECK-LABEL: test_pinsrd |
| 175 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 01 pinsrd xmm0,e{{.*}} | 184 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 01 pinsrd xmm0,e{{.*}} |
| 176 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 02 pinsrd xmm0,e{{.*}} | 185 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 02 pinsrd xmm0,e{{.*}} |
| 177 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 03 pinsrd xmm0,e{{.*}} | 186 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 03 pinsrd xmm0,e{{.*}} |
| 178 | 187 |
| 179 define <16 x i8> @test_pinsrb(<16 x i8> %vec, i32 %elt1_w, i32 %elt2_w, i32 %elt
3_w, i32 %elt4_w) { | 188 define internal <16 x i8> @test_pinsrb(<16 x i8> %vec, i32 %elt1_w, i32 %elt2_w, |
| 189 i32 %elt3_w, i32 %elt4_w) { |
| 180 entry: | 190 entry: |
| 181 %elt1 = trunc i32 %elt1_w to i8 | 191 %elt1 = trunc i32 %elt1_w to i8 |
| 182 %elt2 = trunc i32 %elt2_w to i8 | 192 %elt2 = trunc i32 %elt2_w to i8 |
| 183 %elt3 = trunc i32 %elt3_w to i8 | 193 %elt3 = trunc i32 %elt3_w to i8 |
| 184 %elt4 = trunc i32 %elt4_w to i8 | 194 %elt4 = trunc i32 %elt4_w to i8 |
| 185 %elt12 = add i8 %elt1, %elt2 | 195 %elt12 = add i8 %elt1, %elt2 |
| 186 %elt34 = add i8 %elt3, %elt4 | 196 %elt34 = add i8 %elt3, %elt4 |
| 187 %res1 = insertelement <16 x i8> %vec, i8 %elt12, i32 1 | 197 %res1 = insertelement <16 x i8> %vec, i8 %elt12, i32 1 |
| 188 %res2 = insertelement <16 x i8> %res1, i8 %elt34, i32 7 | 198 %res2 = insertelement <16 x i8> %res1, i8 %elt34, i32 7 |
| 189 %res3 = insertelement <16 x i8> %res2, i8 %elt1, i32 15 | 199 %res3 = insertelement <16 x i8> %res2, i8 %elt1, i32 15 |
| 190 ret <16 x i8> %res3 | 200 ret <16 x i8> %res3 |
| 191 } | 201 } |
| 192 ; CHECK-LABEL: test_pinsrb | 202 ; CHECK-LABEL: test_pinsrb |
| 193 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 01 pinsrb xmm0,e{{.*}} | 203 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 01 pinsrb xmm0,e{{.*}} |
| 194 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 07 pinsrb xmm0,e{{.*}} | 204 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 07 pinsrb xmm0,e{{.*}} |
| 195 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 0f pinsrb xmm0,e{{.*}} | 205 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 0f pinsrb xmm0,e{{.*}} |
| 196 | 206 |
| 197 define <8 x i16> @test_pinsrw(<8 x i16> %vec, i32 %elt1_w, i32 %elt2_w, i32 %elt
3_w, i32 %elt4_w) { | 207 define internal <8 x i16> @test_pinsrw(<8 x i16> %vec, i32 %elt1_w, i32 %elt2_w, |
| 208 i32 %elt3_w, i32 %elt4_w) { |
| 198 entry: | 209 entry: |
| 199 %elt1 = trunc i32 %elt1_w to i16 | 210 %elt1 = trunc i32 %elt1_w to i16 |
| 200 %elt2 = trunc i32 %elt2_w to i16 | 211 %elt2 = trunc i32 %elt2_w to i16 |
| 201 %elt3 = trunc i32 %elt3_w to i16 | 212 %elt3 = trunc i32 %elt3_w to i16 |
| 202 %elt4 = trunc i32 %elt4_w to i16 | 213 %elt4 = trunc i32 %elt4_w to i16 |
| 203 %elt12 = add i16 %elt1, %elt2 | 214 %elt12 = add i16 %elt1, %elt2 |
| 204 %elt34 = add i16 %elt3, %elt4 | 215 %elt34 = add i16 %elt3, %elt4 |
| 205 %res1 = insertelement <8 x i16> %vec, i16 %elt12, i32 1 | 216 %res1 = insertelement <8 x i16> %vec, i16 %elt12, i32 1 |
| 206 %res2 = insertelement <8 x i16> %res1, i16 %elt34, i32 4 | 217 %res2 = insertelement <8 x i16> %res1, i16 %elt34, i32 4 |
| 207 %res3 = insertelement <8 x i16> %res2, i16 %elt1, i32 7 | 218 %res3 = insertelement <8 x i16> %res2, i16 %elt1, i32 7 |
| 208 ret <8 x i16> %res3 | 219 ret <8 x i16> %res3 |
| 209 } | 220 } |
| 210 ; CHECK-LABEL: test_pinsrw | 221 ; CHECK-LABEL: test_pinsrw |
| 211 ; CHECK-DAG: 66 0f c4 c{{.*}} 01 pinsrw xmm0,e{{.*}} | 222 ; CHECK-DAG: 66 0f c4 c{{.*}} 01 pinsrw xmm0,e{{.*}} |
| 212 ; CHECK-DAG: 66 0f c4 c{{.*}} 04 pinsrw xmm0,e{{.*}} | 223 ; CHECK-DAG: 66 0f c4 c{{.*}} 04 pinsrw xmm0,e{{.*}} |
| 213 ; CHECK-DAG: 66 0f c4 c{{.*}} 07 pinsrw xmm0,e{{.*}} | 224 ; CHECK-DAG: 66 0f c4 c{{.*}} 07 pinsrw xmm0,e{{.*}} |
| 214 | 225 |
| 215 define i32 @test_pextrd(i32 %c, <4 x i32> %vec1, <4 x i32> %vec2, <4 x i32> %vec
3, <4 x i32> %vec4) { | 226 define internal i32 @test_pextrd(i32 %c, <4 x i32> %vec1, <4 x i32> %vec2, |
| 227 <4 x i32> %vec3, <4 x i32> %vec4) { |
| 216 entry: | 228 entry: |
| 217 switch i32 %c, label %three [i32 0, label %zero | 229 switch i32 %c, label %three [i32 0, label %zero |
| 218 i32 1, label %one | 230 i32 1, label %one |
| 219 i32 2, label %two] | 231 i32 2, label %two] |
| 220 zero: | 232 zero: |
| 221 %res0 = extractelement <4 x i32> %vec1, i32 0 | 233 %res0 = extractelement <4 x i32> %vec1, i32 0 |
| 222 ret i32 %res0 | 234 ret i32 %res0 |
| 223 one: | 235 one: |
| 224 %res1 = extractelement <4 x i32> %vec2, i32 1 | 236 %res1 = extractelement <4 x i32> %vec2, i32 1 |
| 225 ret i32 %res1 | 237 ret i32 %res1 |
| 226 two: | 238 two: |
| 227 %res2 = extractelement <4 x i32> %vec3, i32 2 | 239 %res2 = extractelement <4 x i32> %vec3, i32 2 |
| 228 ret i32 %res2 | 240 ret i32 %res2 |
| 229 three: | 241 three: |
| 230 %res3 = extractelement <4 x i32> %vec4, i32 3 | 242 %res3 = extractelement <4 x i32> %vec4, i32 3 |
| 231 ret i32 %res3 | 243 ret i32 %res3 |
| 232 } | 244 } |
| 233 ; CHECK-LABEL: test_pextrd | 245 ; CHECK-LABEL: test_pextrd |
| 234 ; CHECK-DAG: 66 0f 3a 16 c0 00 pextrd eax,xmm0 | 246 ; CHECK-DAG: 66 0f 3a 16 c0 00 pextrd eax,xmm0 |
| 235 ; CHECK-DAG: 66 0f 3a 16 c8 01 pextrd eax,xmm1 | 247 ; CHECK-DAG: 66 0f 3a 16 c8 01 pextrd eax,xmm1 |
| 236 ; CHECK-DAG: 66 0f 3a 16 d0 02 pextrd eax,xmm2 | 248 ; CHECK-DAG: 66 0f 3a 16 d0 02 pextrd eax,xmm2 |
| 237 ; CHECK-DAG: 66 0f 3a 16 d8 03 pextrd eax,xmm3 | 249 ; CHECK-DAG: 66 0f 3a 16 d8 03 pextrd eax,xmm3 |
| 238 | 250 |
| 239 define i32 @test_pextrb(i32 %c, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec
3, <16 x i8> %vec4) { | 251 define internal i32 @test_pextrb(i32 %c, <16 x i8> %vec1, <16 x i8> %vec2, |
| 252 <16 x i8> %vec3, <16 x i8> %vec4) { |
| 240 entry: | 253 entry: |
| 241 switch i32 %c, label %three [i32 0, label %zero | 254 switch i32 %c, label %three [i32 0, label %zero |
| 242 i32 1, label %one | 255 i32 1, label %one |
| 243 i32 2, label %two] | 256 i32 2, label %two] |
| 244 zero: | 257 zero: |
| 245 %res0 = extractelement <16 x i8> %vec1, i32 0 | 258 %res0 = extractelement <16 x i8> %vec1, i32 0 |
| 246 %res0_ext = zext i8 %res0 to i32 | 259 %res0_ext = zext i8 %res0 to i32 |
| 247 ret i32 %res0_ext | 260 ret i32 %res0_ext |
| 248 one: | 261 one: |
| 249 %res1 = extractelement <16 x i8> %vec2, i32 6 | 262 %res1 = extractelement <16 x i8> %vec2, i32 6 |
| 250 %res1_ext = zext i8 %res1 to i32 | 263 %res1_ext = zext i8 %res1 to i32 |
| 251 ret i32 %res1_ext | 264 ret i32 %res1_ext |
| 252 two: | 265 two: |
| 253 %res2 = extractelement <16 x i8> %vec3, i32 12 | 266 %res2 = extractelement <16 x i8> %vec3, i32 12 |
| 254 %res2_ext = zext i8 %res2 to i32 | 267 %res2_ext = zext i8 %res2 to i32 |
| 255 ret i32 %res2_ext | 268 ret i32 %res2_ext |
| 256 three: | 269 three: |
| 257 %res3 = extractelement <16 x i8> %vec4, i32 15 | 270 %res3 = extractelement <16 x i8> %vec4, i32 15 |
| 258 %res3_ext = zext i8 %res3 to i32 | 271 %res3_ext = zext i8 %res3 to i32 |
| 259 ret i32 %res3_ext | 272 ret i32 %res3_ext |
| 260 } | 273 } |
| 261 ; CHECK-LABEL: test_pextrb | 274 ; CHECK-LABEL: test_pextrb |
| 262 ; CHECK-DAG: 66 0f 3a 14 c0 00 pextrb eax,xmm0 | 275 ; CHECK-DAG: 66 0f 3a 14 c0 00 pextrb eax,xmm0 |
| 263 ; CHECK-DAG: 66 0f 3a 14 c8 06 pextrb eax,xmm1 | 276 ; CHECK-DAG: 66 0f 3a 14 c8 06 pextrb eax,xmm1 |
| 264 ; CHECK-DAG: 66 0f 3a 14 d0 0c pextrb eax,xmm2 | 277 ; CHECK-DAG: 66 0f 3a 14 d0 0c pextrb eax,xmm2 |
| 265 ; CHECK-DAG: 66 0f 3a 14 d8 0f pextrb eax,xmm3 | 278 ; CHECK-DAG: 66 0f 3a 14 d8 0f pextrb eax,xmm3 |
| 266 | 279 |
| 267 define i32 @test_pextrw(i32 %c, <8 x i16> %vec1, <8 x i16> %vec2, <8 x i16> %vec
3, <8 x i16> %vec4) { | 280 define internal i32 @test_pextrw(i32 %c, <8 x i16> %vec1, <8 x i16> %vec2, |
| 281 <8 x i16> %vec3, <8 x i16> %vec4) { |
| 268 entry: | 282 entry: |
| 269 switch i32 %c, label %three [i32 0, label %zero | 283 switch i32 %c, label %three [i32 0, label %zero |
| 270 i32 1, label %one | 284 i32 1, label %one |
| 271 i32 2, label %two] | 285 i32 2, label %two] |
| 272 zero: | 286 zero: |
| 273 %res0 = extractelement <8 x i16> %vec1, i32 0 | 287 %res0 = extractelement <8 x i16> %vec1, i32 0 |
| 274 %res0_ext = zext i16 %res0 to i32 | 288 %res0_ext = zext i16 %res0 to i32 |
| 275 ret i32 %res0_ext | 289 ret i32 %res0_ext |
| 276 one: | 290 one: |
| 277 %res1 = extractelement <8 x i16> %vec2, i32 2 | 291 %res1 = extractelement <8 x i16> %vec2, i32 2 |
| 278 %res1_ext = zext i16 %res1 to i32 | 292 %res1_ext = zext i16 %res1 to i32 |
| 279 ret i32 %res1_ext | 293 ret i32 %res1_ext |
| 280 two: | 294 two: |
| 281 %res2 = extractelement <8 x i16> %vec3, i32 5 | 295 %res2 = extractelement <8 x i16> %vec3, i32 5 |
| 282 %res2_ext = zext i16 %res2 to i32 | 296 %res2_ext = zext i16 %res2 to i32 |
| 283 ret i32 %res2_ext | 297 ret i32 %res2_ext |
| 284 three: | 298 three: |
| 285 %res3 = extractelement <8 x i16> %vec4, i32 7 | 299 %res3 = extractelement <8 x i16> %vec4, i32 7 |
| 286 %res3_ext = zext i16 %res3 to i32 | 300 %res3_ext = zext i16 %res3 to i32 |
| 287 ret i32 %res3_ext | 301 ret i32 %res3_ext |
| 288 } | 302 } |
| 289 ; CHECK-LABEL: test_pextrw | 303 ; CHECK-LABEL: test_pextrw |
| 290 ; CHECK-DAG: 66 0f c5 c0 00 pextrw eax,xmm0 | 304 ; CHECK-DAG: 66 0f c5 c0 00 pextrw eax,xmm0 |
| 291 ; CHECK-DAG: 66 0f c5 c1 02 pextrw eax,xmm1 | 305 ; CHECK-DAG: 66 0f c5 c1 02 pextrw eax,xmm1 |
| 292 ; CHECK-DAG: 66 0f c5 c2 05 pextrw eax,xmm2 | 306 ; CHECK-DAG: 66 0f c5 c2 05 pextrw eax,xmm2 |
| 293 ; CHECK-DAG: 66 0f c5 c3 07 pextrw eax,xmm3 | 307 ; CHECK-DAG: 66 0f c5 c3 07 pextrw eax,xmm3 |
| OLD | NEW |