OLD | NEW |
1 ; This file checks support for comparing vector values with the fcmp | 1 ; This file checks support for comparing vector values with the fcmp |
2 ; instruction. | 2 ; instruction. |
3 | 3 |
4 ; RUN: %p2i -i %s --filetype=obj --disassemble -a -O2 | FileCheck %s | 4 ; RUN: %p2i -i %s --filetype=obj --disassemble -a -O2 | FileCheck %s |
5 ; RUN: %p2i -i %s --filetype=obj --disassemble -a -Om1 | FileCheck %s | 5 ; RUN: %p2i -i %s --filetype=obj --disassemble -a -Om1 | FileCheck %s |
6 | 6 |
7 ; Check that sext elimination occurs when the result of the comparison | 7 ; Check that sext elimination occurs when the result of the comparison |
8 ; instruction is alrady sign extended. Sign extension to 4 x i32 uses | 8 ; instruction is alrady sign extended. Sign extension to 4 x i32 uses |
9 ; the pslld instruction. | 9 ; the pslld instruction. |
10 define <4 x i32> @sextElimination(<4 x float> %a, <4 x float> %b) { | 10 define internal <4 x i32> @sextElimination(<4 x float> %a, <4 x float> %b) { |
11 entry: | 11 entry: |
12 %res.trunc = fcmp oeq <4 x float> %a, %b | 12 %res.trunc = fcmp oeq <4 x float> %a, %b |
13 %res = sext <4 x i1> %res.trunc to <4 x i32> | 13 %res = sext <4 x i1> %res.trunc to <4 x i32> |
14 ret <4 x i32> %res | 14 ret <4 x i32> %res |
15 ; CHECK-LABEL: sextElimination | 15 ; CHECK-LABEL: sextElimination |
16 ; CHECK: cmpeqps | 16 ; CHECK: cmpeqps |
17 ; CHECK-NOT: pslld | 17 ; CHECK-NOT: pslld |
18 } | 18 } |
19 | 19 |
20 define <4 x i32> @fcmpFalseVector(<4 x float> %a, <4 x float> %b) { | 20 define internal <4 x i32> @fcmpFalseVector(<4 x float> %a, <4 x float> %b) { |
21 entry: | 21 entry: |
22 %res.trunc = fcmp false <4 x float> %a, %b | 22 %res.trunc = fcmp false <4 x float> %a, %b |
23 %res = sext <4 x i1> %res.trunc to <4 x i32> | 23 %res = sext <4 x i1> %res.trunc to <4 x i32> |
24 ret <4 x i32> %res | 24 ret <4 x i32> %res |
25 ; CHECK-LABEL: fcmpFalseVector | 25 ; CHECK-LABEL: fcmpFalseVector |
26 ; CHECK: pxor | 26 ; CHECK: pxor |
27 } | 27 } |
28 | 28 |
29 define <4 x i32> @fcmpOeqVector(<4 x float> %a, <4 x float> %b) { | 29 define internal <4 x i32> @fcmpOeqVector(<4 x float> %a, <4 x float> %b) { |
30 entry: | 30 entry: |
31 %res.trunc = fcmp oeq <4 x float> %a, %b | 31 %res.trunc = fcmp oeq <4 x float> %a, %b |
32 %res = sext <4 x i1> %res.trunc to <4 x i32> | 32 %res = sext <4 x i1> %res.trunc to <4 x i32> |
33 ret <4 x i32> %res | 33 ret <4 x i32> %res |
34 ; CHECK-LABEL: fcmpOeqVector | 34 ; CHECK-LABEL: fcmpOeqVector |
35 ; CHECK: cmpeqps | 35 ; CHECK: cmpeqps |
36 } | 36 } |
37 | 37 |
38 define <4 x i32> @fcmpOgeVector(<4 x float> %a, <4 x float> %b) { | 38 define internal <4 x i32> @fcmpOgeVector(<4 x float> %a, <4 x float> %b) { |
39 entry: | 39 entry: |
40 %res.trunc = fcmp oge <4 x float> %a, %b | 40 %res.trunc = fcmp oge <4 x float> %a, %b |
41 %res = sext <4 x i1> %res.trunc to <4 x i32> | 41 %res = sext <4 x i1> %res.trunc to <4 x i32> |
42 ret <4 x i32> %res | 42 ret <4 x i32> %res |
43 ; CHECK-LABEL: fcmpOgeVector | 43 ; CHECK-LABEL: fcmpOgeVector |
44 ; CHECK: cmpleps | 44 ; CHECK: cmpleps |
45 } | 45 } |
46 | 46 |
47 define <4 x i32> @fcmpOgtVector(<4 x float> %a, <4 x float> %b) { | 47 define internal <4 x i32> @fcmpOgtVector(<4 x float> %a, <4 x float> %b) { |
48 entry: | 48 entry: |
49 %res.trunc = fcmp ogt <4 x float> %a, %b | 49 %res.trunc = fcmp ogt <4 x float> %a, %b |
50 %res = sext <4 x i1> %res.trunc to <4 x i32> | 50 %res = sext <4 x i1> %res.trunc to <4 x i32> |
51 ret <4 x i32> %res | 51 ret <4 x i32> %res |
52 ; CHECK-LABEL: fcmpOgtVector | 52 ; CHECK-LABEL: fcmpOgtVector |
53 ; CHECK: cmpltps | 53 ; CHECK: cmpltps |
54 } | 54 } |
55 | 55 |
56 define <4 x i32> @fcmpOleVector(<4 x float> %a, <4 x float> %b) { | 56 define internal <4 x i32> @fcmpOleVector(<4 x float> %a, <4 x float> %b) { |
57 entry: | 57 entry: |
58 %res.trunc = fcmp ole <4 x float> %a, %b | 58 %res.trunc = fcmp ole <4 x float> %a, %b |
59 %res = sext <4 x i1> %res.trunc to <4 x i32> | 59 %res = sext <4 x i1> %res.trunc to <4 x i32> |
60 ret <4 x i32> %res | 60 ret <4 x i32> %res |
61 ; CHECK-LABEL: fcmpOleVector | 61 ; CHECK-LABEL: fcmpOleVector |
62 ; CHECK: cmpleps | 62 ; CHECK: cmpleps |
63 } | 63 } |
64 | 64 |
65 define <4 x i32> @fcmpOltVector(<4 x float> %a, <4 x float> %b) { | 65 define internal <4 x i32> @fcmpOltVector(<4 x float> %a, <4 x float> %b) { |
66 entry: | 66 entry: |
67 %res.trunc = fcmp olt <4 x float> %a, %b | 67 %res.trunc = fcmp olt <4 x float> %a, %b |
68 %res = sext <4 x i1> %res.trunc to <4 x i32> | 68 %res = sext <4 x i1> %res.trunc to <4 x i32> |
69 ret <4 x i32> %res | 69 ret <4 x i32> %res |
70 ; CHECK-LABEL: fcmpOltVector | 70 ; CHECK-LABEL: fcmpOltVector |
71 ; CHECK: cmpltps | 71 ; CHECK: cmpltps |
72 } | 72 } |
73 | 73 |
74 define <4 x i32> @fcmpOneVector(<4 x float> %a, <4 x float> %b) { | 74 define internal <4 x i32> @fcmpOneVector(<4 x float> %a, <4 x float> %b) { |
75 entry: | 75 entry: |
76 %res.trunc = fcmp one <4 x float> %a, %b | 76 %res.trunc = fcmp one <4 x float> %a, %b |
77 %res = sext <4 x i1> %res.trunc to <4 x i32> | 77 %res = sext <4 x i1> %res.trunc to <4 x i32> |
78 ret <4 x i32> %res | 78 ret <4 x i32> %res |
79 ; CHECK-LABEL: fcmpOneVector | 79 ; CHECK-LABEL: fcmpOneVector |
80 ; CHECK: cmpneqps | 80 ; CHECK: cmpneqps |
81 ; CHECK: cmpordps | 81 ; CHECK: cmpordps |
82 ; CHECK: pand | 82 ; CHECK: pand |
83 } | 83 } |
84 | 84 |
85 define <4 x i32> @fcmpOrdVector(<4 x float> %a, <4 x float> %b) { | 85 define internal <4 x i32> @fcmpOrdVector(<4 x float> %a, <4 x float> %b) { |
86 entry: | 86 entry: |
87 %res.trunc = fcmp ord <4 x float> %a, %b | 87 %res.trunc = fcmp ord <4 x float> %a, %b |
88 %res = sext <4 x i1> %res.trunc to <4 x i32> | 88 %res = sext <4 x i1> %res.trunc to <4 x i32> |
89 ret <4 x i32> %res | 89 ret <4 x i32> %res |
90 ; CHECK-LABEL: fcmpOrdVector | 90 ; CHECK-LABEL: fcmpOrdVector |
91 ; CHECK: cmpordps | 91 ; CHECK: cmpordps |
92 } | 92 } |
93 | 93 |
94 define <4 x i32> @fcmpTrueVector(<4 x float> %a, <4 x float> %b) { | 94 define internal <4 x i32> @fcmpTrueVector(<4 x float> %a, <4 x float> %b) { |
95 entry: | 95 entry: |
96 %res.trunc = fcmp true <4 x float> %a, %b | 96 %res.trunc = fcmp true <4 x float> %a, %b |
97 %res = sext <4 x i1> %res.trunc to <4 x i32> | 97 %res = sext <4 x i1> %res.trunc to <4 x i32> |
98 ret <4 x i32> %res | 98 ret <4 x i32> %res |
99 ; CHECK-LABEL: fcmpTrueVector | 99 ; CHECK-LABEL: fcmpTrueVector |
100 ; CHECK: pcmpeqd | 100 ; CHECK: pcmpeqd |
101 } | 101 } |
102 | 102 |
103 define <4 x i32> @fcmpUeqVector(<4 x float> %a, <4 x float> %b) { | 103 define internal <4 x i32> @fcmpUeqVector(<4 x float> %a, <4 x float> %b) { |
104 entry: | 104 entry: |
105 %res.trunc = fcmp ueq <4 x float> %a, %b | 105 %res.trunc = fcmp ueq <4 x float> %a, %b |
106 %res = sext <4 x i1> %res.trunc to <4 x i32> | 106 %res = sext <4 x i1> %res.trunc to <4 x i32> |
107 ret <4 x i32> %res | 107 ret <4 x i32> %res |
108 ; CHECK-LABEL: fcmpUeqVector | 108 ; CHECK-LABEL: fcmpUeqVector |
109 ; CHECK: cmpeqps | 109 ; CHECK: cmpeqps |
110 ; CHECK: cmpunordps | 110 ; CHECK: cmpunordps |
111 ; CHECK: por | 111 ; CHECK: por |
112 } | 112 } |
113 | 113 |
114 define <4 x i32> @fcmpUgeVector(<4 x float> %a, <4 x float> %b) { | 114 define internal <4 x i32> @fcmpUgeVector(<4 x float> %a, <4 x float> %b) { |
115 entry: | 115 entry: |
116 %res.trunc = fcmp uge <4 x float> %a, %b | 116 %res.trunc = fcmp uge <4 x float> %a, %b |
117 %res = sext <4 x i1> %res.trunc to <4 x i32> | 117 %res = sext <4 x i1> %res.trunc to <4 x i32> |
118 ret <4 x i32> %res | 118 ret <4 x i32> %res |
119 ; CHECK-LABEL: fcmpUgeVector | 119 ; CHECK-LABEL: fcmpUgeVector |
120 ; CHECK: cmpnltps | 120 ; CHECK: cmpnltps |
121 } | 121 } |
122 | 122 |
123 define <4 x i32> @fcmpUgtVector(<4 x float> %a, <4 x float> %b) { | 123 define internal <4 x i32> @fcmpUgtVector(<4 x float> %a, <4 x float> %b) { |
124 entry: | 124 entry: |
125 %res.trunc = fcmp ugt <4 x float> %a, %b | 125 %res.trunc = fcmp ugt <4 x float> %a, %b |
126 %res = sext <4 x i1> %res.trunc to <4 x i32> | 126 %res = sext <4 x i1> %res.trunc to <4 x i32> |
127 ret <4 x i32> %res | 127 ret <4 x i32> %res |
128 ; CHECK-LABEL: fcmpUgtVector | 128 ; CHECK-LABEL: fcmpUgtVector |
129 ; CHECK: cmpnleps | 129 ; CHECK: cmpnleps |
130 } | 130 } |
131 | 131 |
132 define <4 x i32> @fcmpUleVector(<4 x float> %a, <4 x float> %b) { | 132 define internal <4 x i32> @fcmpUleVector(<4 x float> %a, <4 x float> %b) { |
133 entry: | 133 entry: |
134 %res.trunc = fcmp ule <4 x float> %a, %b | 134 %res.trunc = fcmp ule <4 x float> %a, %b |
135 %res = sext <4 x i1> %res.trunc to <4 x i32> | 135 %res = sext <4 x i1> %res.trunc to <4 x i32> |
136 ret <4 x i32> %res | 136 ret <4 x i32> %res |
137 ; CHECK-LABEL: fcmpUleVector | 137 ; CHECK-LABEL: fcmpUleVector |
138 ; CHECK: cmpnltps | 138 ; CHECK: cmpnltps |
139 } | 139 } |
140 | 140 |
141 define <4 x i32> @fcmpUltVector(<4 x float> %a, <4 x float> %b) { | 141 define internal <4 x i32> @fcmpUltVector(<4 x float> %a, <4 x float> %b) { |
142 entry: | 142 entry: |
143 %res.trunc = fcmp ult <4 x float> %a, %b | 143 %res.trunc = fcmp ult <4 x float> %a, %b |
144 %res = sext <4 x i1> %res.trunc to <4 x i32> | 144 %res = sext <4 x i1> %res.trunc to <4 x i32> |
145 ret <4 x i32> %res | 145 ret <4 x i32> %res |
146 ; CHECK-LABEL: fcmpUltVector | 146 ; CHECK-LABEL: fcmpUltVector |
147 ; CHECK: cmpnleps | 147 ; CHECK: cmpnleps |
148 } | 148 } |
149 | 149 |
150 define <4 x i32> @fcmpUneVector(<4 x float> %a, <4 x float> %b) { | 150 define internal <4 x i32> @fcmpUneVector(<4 x float> %a, <4 x float> %b) { |
151 entry: | 151 entry: |
152 %res.trunc = fcmp une <4 x float> %a, %b | 152 %res.trunc = fcmp une <4 x float> %a, %b |
153 %res = sext <4 x i1> %res.trunc to <4 x i32> | 153 %res = sext <4 x i1> %res.trunc to <4 x i32> |
154 ret <4 x i32> %res | 154 ret <4 x i32> %res |
155 ; CHECK-LABEL: fcmpUneVector | 155 ; CHECK-LABEL: fcmpUneVector |
156 ; CHECK: cmpneqps | 156 ; CHECK: cmpneqps |
157 } | 157 } |
158 | 158 |
159 define <4 x i32> @fcmpUnoVector(<4 x float> %a, <4 x float> %b) { | 159 define internal <4 x i32> @fcmpUnoVector(<4 x float> %a, <4 x float> %b) { |
160 entry: | 160 entry: |
161 %res.trunc = fcmp uno <4 x float> %a, %b | 161 %res.trunc = fcmp uno <4 x float> %a, %b |
162 %res = sext <4 x i1> %res.trunc to <4 x i32> | 162 %res = sext <4 x i1> %res.trunc to <4 x i32> |
163 ret <4 x i32> %res | 163 ret <4 x i32> %res |
164 ; CHECK-LABEL: fcmpUnoVector | 164 ; CHECK-LABEL: fcmpUnoVector |
165 ; CHECK: cmpunordps | 165 ; CHECK: cmpunordps |
166 } | 166 } |
OLD | NEW |