OLD | NEW |
1 ; Test the a=b*b lowering sequence which can use a single temporary register | 1 ; Test the a=b*b lowering sequence which can use a single temporary register |
2 ; instead of two registers. | 2 ; instead of two registers. |
3 | 3 |
4 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ | 4 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ |
5 ; RUN: --target x8632 -i %s --args -O2 -mattr=sse4.1 \ | 5 ; RUN: --target x8632 -i %s --args -O2 -mattr=sse4.1 \ |
6 ; RUN: | %if --need=target_X8632 --command FileCheck %s | 6 ; RUN: | %if --need=target_X8632 --command FileCheck %s |
7 | 7 |
8 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ | 8 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ |
9 ; RUN: --target x8632 -i %s --args -Om1 -mattr=sse4.1 \ | 9 ; RUN: --target x8632 -i %s --args -Om1 -mattr=sse4.1 \ |
10 ; RUN: | %if --need=target_X8632 --command FileCheck %s | 10 ; RUN: | %if --need=target_X8632 --command FileCheck %s |
11 | 11 |
12 define float @Square_float(float %a) { | 12 define internal float @Square_float(float %a) { |
13 entry: | 13 entry: |
14 %result = fmul float %a, %a | 14 %result = fmul float %a, %a |
15 ret float %result | 15 ret float %result |
16 } | 16 } |
17 ; CHECK-LABEL: Square_float | 17 ; CHECK-LABEL: Square_float |
18 ; CHECK: mulss [[REG:xmm.]],[[REG]] | 18 ; CHECK: mulss [[REG:xmm.]],[[REG]] |
19 | 19 |
20 define double @Square_double(double %a) { | 20 define internal double @Square_double(double %a) { |
21 entry: | 21 entry: |
22 %result = fmul double %a, %a | 22 %result = fmul double %a, %a |
23 ret double %result | 23 ret double %result |
24 } | 24 } |
25 ; CHECK-LABEL: Square_double | 25 ; CHECK-LABEL: Square_double |
26 ; CHECK: mulsd [[REG:xmm.]],[[REG]] | 26 ; CHECK: mulsd [[REG:xmm.]],[[REG]] |
27 | 27 |
28 define i32 @Square_i32(i32 %a) { | 28 define internal i32 @Square_i32(i32 %a) { |
29 entry: | 29 entry: |
30 %result = mul i32 %a, %a | 30 %result = mul i32 %a, %a |
31 ret i32 %result | 31 ret i32 %result |
32 } | 32 } |
33 ; CHECK-LABEL: Square_i32 | 33 ; CHECK-LABEL: Square_i32 |
34 ; CHECK: imul [[REG:e..]],[[REG]] | 34 ; CHECK: imul [[REG:e..]],[[REG]] |
35 | 35 |
36 define i16 @Square_i16(i16 %a) { | 36 define internal i16 @Square_i16(i16 %a) { |
37 entry: | 37 entry: |
38 %result = mul i16 %a, %a | 38 %result = mul i16 %a, %a |
39 ret i16 %result | 39 ret i16 %result |
40 } | 40 } |
41 ; CHECK-LABEL: Square_i16 | 41 ; CHECK-LABEL: Square_i16 |
42 ; CHECK: imul [[REG:..]],[[REG]] | 42 ; CHECK: imul [[REG:..]],[[REG]] |
43 | 43 |
44 define i8 @Square_i8(i8 %a) { | 44 define internal i8 @Square_i8(i8 %a) { |
45 entry: | 45 entry: |
46 %result = mul i8 %a, %a | 46 %result = mul i8 %a, %a |
47 ret i8 %result | 47 ret i8 %result |
48 } | 48 } |
49 ; CHECK-LABEL: Square_i8 | 49 ; CHECK-LABEL: Square_i8 |
50 ; CHECK: imul al | 50 ; CHECK: imul al |
51 | 51 |
52 define <4 x float> @Square_v4f32(<4 x float> %a) { | 52 define internal <4 x float> @Square_v4f32(<4 x float> %a) { |
53 entry: | 53 entry: |
54 %result = fmul <4 x float> %a, %a | 54 %result = fmul <4 x float> %a, %a |
55 ret <4 x float> %result | 55 ret <4 x float> %result |
56 } | 56 } |
57 ; CHECK-LABEL: Square_v4f32 | 57 ; CHECK-LABEL: Square_v4f32 |
58 ; CHECK: mulps [[REG:xmm.]],[[REG]] | 58 ; CHECK: mulps [[REG:xmm.]],[[REG]] |
59 | 59 |
60 define <4 x i32> @Square_v4i32(<4 x i32> %a) { | 60 define internal <4 x i32> @Square_v4i32(<4 x i32> %a) { |
61 entry: | 61 entry: |
62 %result = mul <4 x i32> %a, %a | 62 %result = mul <4 x i32> %a, %a |
63 ret <4 x i32> %result | 63 ret <4 x i32> %result |
64 } | 64 } |
65 ; CHECK-LABEL: Square_v4i32 | 65 ; CHECK-LABEL: Square_v4i32 |
66 ; CHECK: pmulld [[REG:xmm.]],[[REG]] | 66 ; CHECK: pmulld [[REG:xmm.]],[[REG]] |
67 | 67 |
68 define <8 x i16> @Square_v8i16(<8 x i16> %a) { | 68 define internal <8 x i16> @Square_v8i16(<8 x i16> %a) { |
69 entry: | 69 entry: |
70 %result = mul <8 x i16> %a, %a | 70 %result = mul <8 x i16> %a, %a |
71 ret <8 x i16> %result | 71 ret <8 x i16> %result |
72 } | 72 } |
73 ; CHECK-LABEL: Square_v8i16 | 73 ; CHECK-LABEL: Square_v8i16 |
74 ; CHECK: pmullw [[REG:xmm.]],[[REG]] | 74 ; CHECK: pmullw [[REG:xmm.]],[[REG]] |
75 | 75 |
76 define <16 x i8> @Square_v16i8(<16 x i8> %a) { | 76 define internal <16 x i8> @Square_v16i8(<16 x i8> %a) { |
77 entry: | 77 entry: |
78 %result = mul <16 x i8> %a, %a | 78 %result = mul <16 x i8> %a, %a |
79 ret <16 x i8> %result | 79 ret <16 x i8> %result |
80 } | 80 } |
81 ; CHECK-LABEL: Square_v16i8 | 81 ; CHECK-LABEL: Square_v16i8 |
82 ; CHECK-NOT: pmul | 82 ; CHECK-NOT: pmul |
OLD | NEW |