OLD | NEW |
1 ; Test the a=b*b lowering sequence which can use a single temporary register | 1 ; Test the a=b*b lowering sequence which can use a single temporary register |
2 ; instead of two registers. | 2 ; instead of two registers. |
3 | 3 |
4 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ | 4 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ |
5 ; RUN: --target x8632 -i %s --args -O2 -mattr=sse4.1 \ | 5 ; RUN: --target x8632 -i %s --args -O2 -mattr=sse4.1 \ |
6 ; RUN: | %if --need=target_X8632 --command FileCheck %s | 6 ; RUN: | %if --need=target_X8632 --command FileCheck %s |
7 | 7 |
8 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ | 8 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ |
9 ; RUN: --target x8632 -i %s --args -Om1 -mattr=sse4.1 \ | 9 ; RUN: --target x8632 -i %s --args -Om1 -mattr=sse4.1 \ |
10 ; RUN: | %if --need=target_X8632 --command FileCheck %s | 10 ; RUN: | %if --need=target_X8632 --command FileCheck %s |
(...skipping 15 matching lines...) Expand all Loading... |
26 ; CHECK: mulsd [[REG:xmm.]],[[REG]] | 26 ; CHECK: mulsd [[REG:xmm.]],[[REG]] |
27 | 27 |
28 define internal i32 @Square_i32(i32 %a) { | 28 define internal i32 @Square_i32(i32 %a) { |
29 entry: | 29 entry: |
30 %result = mul i32 %a, %a | 30 %result = mul i32 %a, %a |
31 ret i32 %result | 31 ret i32 %result |
32 } | 32 } |
33 ; CHECK-LABEL: Square_i32 | 33 ; CHECK-LABEL: Square_i32 |
34 ; CHECK: imul [[REG:e..]],[[REG]] | 34 ; CHECK: imul [[REG:e..]],[[REG]] |
35 | 35 |
36 define internal i16 @Square_i16(i16 %a) { | 36 define internal i32 @Square_i16(i32 %a) { |
37 entry: | 37 entry: |
38 %result = mul i16 %a, %a | 38 %a.16 = trunc i32 %a to i16 |
39 ret i16 %result | 39 %result = mul i16 %a.16, %a.16 |
| 40 %result.i32 = sext i16 %result to i32 |
| 41 ret i32 %result.i32 |
40 } | 42 } |
41 ; CHECK-LABEL: Square_i16 | 43 ; CHECK-LABEL: Square_i16 |
42 ; CHECK: imul [[REG:..]],[[REG]] | 44 ; CHECK: imul [[REG:..]],[[REG]] |
43 | 45 |
44 define internal i8 @Square_i8(i8 %a) { | 46 define internal i32 @Square_i8(i32 %a) { |
45 entry: | 47 entry: |
46 %result = mul i8 %a, %a | 48 %a.8 = trunc i32 %a to i8 |
47 ret i8 %result | 49 %result = mul i8 %a.8, %a.8 |
| 50 %result.i32 = sext i8 %result to i32 |
| 51 ret i32 %result.i32 |
48 } | 52 } |
49 ; CHECK-LABEL: Square_i8 | 53 ; CHECK-LABEL: Square_i8 |
50 ; CHECK: imul al | 54 ; CHECK: imul al |
51 | 55 |
52 define internal <4 x float> @Square_v4f32(<4 x float> %a) { | 56 define internal <4 x float> @Square_v4f32(<4 x float> %a) { |
53 entry: | 57 entry: |
54 %result = fmul <4 x float> %a, %a | 58 %result = fmul <4 x float> %a, %a |
55 ret <4 x float> %result | 59 ret <4 x float> %result |
56 } | 60 } |
57 ; CHECK-LABEL: Square_v4f32 | 61 ; CHECK-LABEL: Square_v4f32 |
(...skipping 15 matching lines...) Expand all Loading... |
73 ; CHECK-LABEL: Square_v8i16 | 77 ; CHECK-LABEL: Square_v8i16 |
74 ; CHECK: pmullw [[REG:xmm.]],[[REG]] | 78 ; CHECK: pmullw [[REG:xmm.]],[[REG]] |
75 | 79 |
76 define internal <16 x i8> @Square_v16i8(<16 x i8> %a) { | 80 define internal <16 x i8> @Square_v16i8(<16 x i8> %a) { |
77 entry: | 81 entry: |
78 %result = mul <16 x i8> %a, %a | 82 %result = mul <16 x i8> %a, %a |
79 ret <16 x i8> %result | 83 ret <16 x i8> %result |
80 } | 84 } |
81 ; CHECK-LABEL: Square_v16i8 | 85 ; CHECK-LABEL: Square_v16i8 |
82 ; CHECK-NOT: pmul | 86 ; CHECK-NOT: pmul |
OLD | NEW |