| OLD | NEW |
| 1 ; This file checks support for comparing vector values with the fcmp | 1 ; This file checks support for comparing vector values with the fcmp |
| 2 ; instruction. | 2 ; instruction. |
| 3 | 3 |
| 4 ; RUN: %p2i -i %s --filetype=obj --disassemble -a -O2 | FileCheck %s | 4 ; RUN: %p2i -i %s --filetype=obj --disassemble -a -O2 | FileCheck %s |
| 5 ; RUN: %p2i -i %s --filetype=obj --disassemble -a -Om1 | FileCheck %s | 5 ; RUN: %p2i -i %s --filetype=obj --disassemble -a -Om1 | FileCheck %s |
| 6 | 6 |
| 7 ; Check that sext elimination occurs when the result of the comparison | 7 ; Check that sext elimination occurs when the result of the comparison |
| 8 ; instruction is alrady sign extended. Sign extension to 4 x i32 uses | 8 ; instruction is alrady sign extended. Sign extension to 4 x i32 uses |
| 9 ; the pslld instruction. | 9 ; the pslld instruction. |
| 10 define <4 x i32> @sextElimination(<4 x float> %a, <4 x float> %b) { | 10 define internal <4 x i32> @sextElimination(<4 x float> %a, <4 x float> %b) { |
| 11 entry: | 11 entry: |
| 12 %res.trunc = fcmp oeq <4 x float> %a, %b | 12 %res.trunc = fcmp oeq <4 x float> %a, %b |
| 13 %res = sext <4 x i1> %res.trunc to <4 x i32> | 13 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 14 ret <4 x i32> %res | 14 ret <4 x i32> %res |
| 15 ; CHECK-LABEL: sextElimination | 15 ; CHECK-LABEL: sextElimination |
| 16 ; CHECK: cmpeqps | 16 ; CHECK: cmpeqps |
| 17 ; CHECK-NOT: pslld | 17 ; CHECK-NOT: pslld |
| 18 } | 18 } |
| 19 | 19 |
| 20 define <4 x i32> @fcmpFalseVector(<4 x float> %a, <4 x float> %b) { | 20 define internal <4 x i32> @fcmpFalseVector(<4 x float> %a, <4 x float> %b) { |
| 21 entry: | 21 entry: |
| 22 %res.trunc = fcmp false <4 x float> %a, %b | 22 %res.trunc = fcmp false <4 x float> %a, %b |
| 23 %res = sext <4 x i1> %res.trunc to <4 x i32> | 23 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 24 ret <4 x i32> %res | 24 ret <4 x i32> %res |
| 25 ; CHECK-LABEL: fcmpFalseVector | 25 ; CHECK-LABEL: fcmpFalseVector |
| 26 ; CHECK: pxor | 26 ; CHECK: pxor |
| 27 } | 27 } |
| 28 | 28 |
| 29 define <4 x i32> @fcmpOeqVector(<4 x float> %a, <4 x float> %b) { | 29 define internal <4 x i32> @fcmpOeqVector(<4 x float> %a, <4 x float> %b) { |
| 30 entry: | 30 entry: |
| 31 %res.trunc = fcmp oeq <4 x float> %a, %b | 31 %res.trunc = fcmp oeq <4 x float> %a, %b |
| 32 %res = sext <4 x i1> %res.trunc to <4 x i32> | 32 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 33 ret <4 x i32> %res | 33 ret <4 x i32> %res |
| 34 ; CHECK-LABEL: fcmpOeqVector | 34 ; CHECK-LABEL: fcmpOeqVector |
| 35 ; CHECK: cmpeqps | 35 ; CHECK: cmpeqps |
| 36 } | 36 } |
| 37 | 37 |
| 38 define <4 x i32> @fcmpOgeVector(<4 x float> %a, <4 x float> %b) { | 38 define internal <4 x i32> @fcmpOgeVector(<4 x float> %a, <4 x float> %b) { |
| 39 entry: | 39 entry: |
| 40 %res.trunc = fcmp oge <4 x float> %a, %b | 40 %res.trunc = fcmp oge <4 x float> %a, %b |
| 41 %res = sext <4 x i1> %res.trunc to <4 x i32> | 41 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 42 ret <4 x i32> %res | 42 ret <4 x i32> %res |
| 43 ; CHECK-LABEL: fcmpOgeVector | 43 ; CHECK-LABEL: fcmpOgeVector |
| 44 ; CHECK: cmpleps | 44 ; CHECK: cmpleps |
| 45 } | 45 } |
| 46 | 46 |
| 47 define <4 x i32> @fcmpOgtVector(<4 x float> %a, <4 x float> %b) { | 47 define internal <4 x i32> @fcmpOgtVector(<4 x float> %a, <4 x float> %b) { |
| 48 entry: | 48 entry: |
| 49 %res.trunc = fcmp ogt <4 x float> %a, %b | 49 %res.trunc = fcmp ogt <4 x float> %a, %b |
| 50 %res = sext <4 x i1> %res.trunc to <4 x i32> | 50 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 51 ret <4 x i32> %res | 51 ret <4 x i32> %res |
| 52 ; CHECK-LABEL: fcmpOgtVector | 52 ; CHECK-LABEL: fcmpOgtVector |
| 53 ; CHECK: cmpltps | 53 ; CHECK: cmpltps |
| 54 } | 54 } |
| 55 | 55 |
| 56 define <4 x i32> @fcmpOleVector(<4 x float> %a, <4 x float> %b) { | 56 define internal <4 x i32> @fcmpOleVector(<4 x float> %a, <4 x float> %b) { |
| 57 entry: | 57 entry: |
| 58 %res.trunc = fcmp ole <4 x float> %a, %b | 58 %res.trunc = fcmp ole <4 x float> %a, %b |
| 59 %res = sext <4 x i1> %res.trunc to <4 x i32> | 59 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 60 ret <4 x i32> %res | 60 ret <4 x i32> %res |
| 61 ; CHECK-LABEL: fcmpOleVector | 61 ; CHECK-LABEL: fcmpOleVector |
| 62 ; CHECK: cmpleps | 62 ; CHECK: cmpleps |
| 63 } | 63 } |
| 64 | 64 |
| 65 define <4 x i32> @fcmpOltVector(<4 x float> %a, <4 x float> %b) { | 65 define internal <4 x i32> @fcmpOltVector(<4 x float> %a, <4 x float> %b) { |
| 66 entry: | 66 entry: |
| 67 %res.trunc = fcmp olt <4 x float> %a, %b | 67 %res.trunc = fcmp olt <4 x float> %a, %b |
| 68 %res = sext <4 x i1> %res.trunc to <4 x i32> | 68 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 69 ret <4 x i32> %res | 69 ret <4 x i32> %res |
| 70 ; CHECK-LABEL: fcmpOltVector | 70 ; CHECK-LABEL: fcmpOltVector |
| 71 ; CHECK: cmpltps | 71 ; CHECK: cmpltps |
| 72 } | 72 } |
| 73 | 73 |
| 74 define <4 x i32> @fcmpOneVector(<4 x float> %a, <4 x float> %b) { | 74 define internal <4 x i32> @fcmpOneVector(<4 x float> %a, <4 x float> %b) { |
| 75 entry: | 75 entry: |
| 76 %res.trunc = fcmp one <4 x float> %a, %b | 76 %res.trunc = fcmp one <4 x float> %a, %b |
| 77 %res = sext <4 x i1> %res.trunc to <4 x i32> | 77 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 78 ret <4 x i32> %res | 78 ret <4 x i32> %res |
| 79 ; CHECK-LABEL: fcmpOneVector | 79 ; CHECK-LABEL: fcmpOneVector |
| 80 ; CHECK: cmpneqps | 80 ; CHECK: cmpneqps |
| 81 ; CHECK: cmpordps | 81 ; CHECK: cmpordps |
| 82 ; CHECK: pand | 82 ; CHECK: pand |
| 83 } | 83 } |
| 84 | 84 |
| 85 define <4 x i32> @fcmpOrdVector(<4 x float> %a, <4 x float> %b) { | 85 define internal <4 x i32> @fcmpOrdVector(<4 x float> %a, <4 x float> %b) { |
| 86 entry: | 86 entry: |
| 87 %res.trunc = fcmp ord <4 x float> %a, %b | 87 %res.trunc = fcmp ord <4 x float> %a, %b |
| 88 %res = sext <4 x i1> %res.trunc to <4 x i32> | 88 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 89 ret <4 x i32> %res | 89 ret <4 x i32> %res |
| 90 ; CHECK-LABEL: fcmpOrdVector | 90 ; CHECK-LABEL: fcmpOrdVector |
| 91 ; CHECK: cmpordps | 91 ; CHECK: cmpordps |
| 92 } | 92 } |
| 93 | 93 |
| 94 define <4 x i32> @fcmpTrueVector(<4 x float> %a, <4 x float> %b) { | 94 define internal <4 x i32> @fcmpTrueVector(<4 x float> %a, <4 x float> %b) { |
| 95 entry: | 95 entry: |
| 96 %res.trunc = fcmp true <4 x float> %a, %b | 96 %res.trunc = fcmp true <4 x float> %a, %b |
| 97 %res = sext <4 x i1> %res.trunc to <4 x i32> | 97 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 98 ret <4 x i32> %res | 98 ret <4 x i32> %res |
| 99 ; CHECK-LABEL: fcmpTrueVector | 99 ; CHECK-LABEL: fcmpTrueVector |
| 100 ; CHECK: pcmpeqd | 100 ; CHECK: pcmpeqd |
| 101 } | 101 } |
| 102 | 102 |
| 103 define <4 x i32> @fcmpUeqVector(<4 x float> %a, <4 x float> %b) { | 103 define internal <4 x i32> @fcmpUeqVector(<4 x float> %a, <4 x float> %b) { |
| 104 entry: | 104 entry: |
| 105 %res.trunc = fcmp ueq <4 x float> %a, %b | 105 %res.trunc = fcmp ueq <4 x float> %a, %b |
| 106 %res = sext <4 x i1> %res.trunc to <4 x i32> | 106 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 107 ret <4 x i32> %res | 107 ret <4 x i32> %res |
| 108 ; CHECK-LABEL: fcmpUeqVector | 108 ; CHECK-LABEL: fcmpUeqVector |
| 109 ; CHECK: cmpeqps | 109 ; CHECK: cmpeqps |
| 110 ; CHECK: cmpunordps | 110 ; CHECK: cmpunordps |
| 111 ; CHECK: por | 111 ; CHECK: por |
| 112 } | 112 } |
| 113 | 113 |
| 114 define <4 x i32> @fcmpUgeVector(<4 x float> %a, <4 x float> %b) { | 114 define internal <4 x i32> @fcmpUgeVector(<4 x float> %a, <4 x float> %b) { |
| 115 entry: | 115 entry: |
| 116 %res.trunc = fcmp uge <4 x float> %a, %b | 116 %res.trunc = fcmp uge <4 x float> %a, %b |
| 117 %res = sext <4 x i1> %res.trunc to <4 x i32> | 117 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 118 ret <4 x i32> %res | 118 ret <4 x i32> %res |
| 119 ; CHECK-LABEL: fcmpUgeVector | 119 ; CHECK-LABEL: fcmpUgeVector |
| 120 ; CHECK: cmpnltps | 120 ; CHECK: cmpnltps |
| 121 } | 121 } |
| 122 | 122 |
| 123 define <4 x i32> @fcmpUgtVector(<4 x float> %a, <4 x float> %b) { | 123 define internal <4 x i32> @fcmpUgtVector(<4 x float> %a, <4 x float> %b) { |
| 124 entry: | 124 entry: |
| 125 %res.trunc = fcmp ugt <4 x float> %a, %b | 125 %res.trunc = fcmp ugt <4 x float> %a, %b |
| 126 %res = sext <4 x i1> %res.trunc to <4 x i32> | 126 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 127 ret <4 x i32> %res | 127 ret <4 x i32> %res |
| 128 ; CHECK-LABEL: fcmpUgtVector | 128 ; CHECK-LABEL: fcmpUgtVector |
| 129 ; CHECK: cmpnleps | 129 ; CHECK: cmpnleps |
| 130 } | 130 } |
| 131 | 131 |
| 132 define <4 x i32> @fcmpUleVector(<4 x float> %a, <4 x float> %b) { | 132 define internal <4 x i32> @fcmpUleVector(<4 x float> %a, <4 x float> %b) { |
| 133 entry: | 133 entry: |
| 134 %res.trunc = fcmp ule <4 x float> %a, %b | 134 %res.trunc = fcmp ule <4 x float> %a, %b |
| 135 %res = sext <4 x i1> %res.trunc to <4 x i32> | 135 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 136 ret <4 x i32> %res | 136 ret <4 x i32> %res |
| 137 ; CHECK-LABEL: fcmpUleVector | 137 ; CHECK-LABEL: fcmpUleVector |
| 138 ; CHECK: cmpnltps | 138 ; CHECK: cmpnltps |
| 139 } | 139 } |
| 140 | 140 |
| 141 define <4 x i32> @fcmpUltVector(<4 x float> %a, <4 x float> %b) { | 141 define internal <4 x i32> @fcmpUltVector(<4 x float> %a, <4 x float> %b) { |
| 142 entry: | 142 entry: |
| 143 %res.trunc = fcmp ult <4 x float> %a, %b | 143 %res.trunc = fcmp ult <4 x float> %a, %b |
| 144 %res = sext <4 x i1> %res.trunc to <4 x i32> | 144 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 145 ret <4 x i32> %res | 145 ret <4 x i32> %res |
| 146 ; CHECK-LABEL: fcmpUltVector | 146 ; CHECK-LABEL: fcmpUltVector |
| 147 ; CHECK: cmpnleps | 147 ; CHECK: cmpnleps |
| 148 } | 148 } |
| 149 | 149 |
| 150 define <4 x i32> @fcmpUneVector(<4 x float> %a, <4 x float> %b) { | 150 define internal <4 x i32> @fcmpUneVector(<4 x float> %a, <4 x float> %b) { |
| 151 entry: | 151 entry: |
| 152 %res.trunc = fcmp une <4 x float> %a, %b | 152 %res.trunc = fcmp une <4 x float> %a, %b |
| 153 %res = sext <4 x i1> %res.trunc to <4 x i32> | 153 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 154 ret <4 x i32> %res | 154 ret <4 x i32> %res |
| 155 ; CHECK-LABEL: fcmpUneVector | 155 ; CHECK-LABEL: fcmpUneVector |
| 156 ; CHECK: cmpneqps | 156 ; CHECK: cmpneqps |
| 157 } | 157 } |
| 158 | 158 |
| 159 define <4 x i32> @fcmpUnoVector(<4 x float> %a, <4 x float> %b) { | 159 define internal <4 x i32> @fcmpUnoVector(<4 x float> %a, <4 x float> %b) { |
| 160 entry: | 160 entry: |
| 161 %res.trunc = fcmp uno <4 x float> %a, %b | 161 %res.trunc = fcmp uno <4 x float> %a, %b |
| 162 %res = sext <4 x i1> %res.trunc to <4 x i32> | 162 %res = sext <4 x i1> %res.trunc to <4 x i32> |
| 163 ret <4 x i32> %res | 163 ret <4 x i32> %res |
| 164 ; CHECK-LABEL: fcmpUnoVector | 164 ; CHECK-LABEL: fcmpUnoVector |
| 165 ; CHECK: cmpunordps | 165 ; CHECK: cmpunordps |
| 166 } | 166 } |
| OLD | NEW |