OLD | NEW |
1 ; RUN: opt -fix-vector-load-store-alignment %s -S | FileCheck %s | 1 ; RUN: opt -fix-vector-load-store-alignment %s -S | FileCheck %s |
2 | 2 |
3 ; Test that vector load/store get converted to memory accesses of the | 3 ; Test that vector load/store are always element-aligned when possible, and get |
4 ; underlying elements, with proper alignment. | 4 ; converted to scalar load/store when not. |
5 | 5 |
6 ; The datalayout is needed to determine the alignment of the load/stores. | 6 ; The datalayout is needed to determine the alignment of the load/stores. |
7 target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64
:64:64-p:32:32:32-v128:32:32" | 7 target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64
:64:64-p:32:32:32-v128:32:32" |
8 | 8 |
9 ; Load ========================================================================= | 9 ; Load ========================================================================= |
10 | 10 |
11 define <4 x i1> @test_load_4xi1(<4 x i1>* %loc) { | 11 define <4 x i1> @test_load_4xi1(<4 x i1>* %loc) { |
12 ; CHECK-LABEL: test_load_4xi1 | 12 ; CHECK-LABEL: test_load_4xi1 |
13 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i1>* %loc to i1* | 13 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i1>* %loc to i1* |
14 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i1* %[[BASE]], i32 0 | 14 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i1* %[[BASE]], i32 0 |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
109 ; CHECK-NEXT: %[[LD14:[0-9]+]] = load i1* %[[GEP14]], align 2 | 109 ; CHECK-NEXT: %[[LD14:[0-9]+]] = load i1* %[[GEP14]], align 2 |
110 ; CHECK-NEXT: %[[INS14:[0-9]+]] = insertelement <16 x i1> %[[INS13]], i1 %[[LD
14]], i32 14 | 110 ; CHECK-NEXT: %[[INS14:[0-9]+]] = insertelement <16 x i1> %[[INS13]], i1 %[[LD
14]], i32 14 |
111 ; CHECK-NEXT: %[[GEP15:[0-9]+]] = getelementptr inbounds i1* %[[BASE]], i32 15 | 111 ; CHECK-NEXT: %[[GEP15:[0-9]+]] = getelementptr inbounds i1* %[[BASE]], i32 15 |
112 ; CHECK-NEXT: %[[LD15:[0-9]+]] = load i1* %[[GEP15]], align 1 | 112 ; CHECK-NEXT: %[[LD15:[0-9]+]] = load i1* %[[GEP15]], align 1 |
113 ; CHECK-NEXT: %[[INS15:[0-9]+]] = insertelement <16 x i1> %[[INS14]], i1 %[[LD
15]], i32 15 | 113 ; CHECK-NEXT: %[[INS15:[0-9]+]] = insertelement <16 x i1> %[[INS14]], i1 %[[LD
15]], i32 15 |
114 ; CHECK-NEXT: ret <16 x i1> %[[INS15]] | 114 ; CHECK-NEXT: ret <16 x i1> %[[INS15]] |
115 %loaded = load <16 x i1>* %loc | 115 %loaded = load <16 x i1>* %loc |
116 ret <16 x i1> %loaded | 116 ret <16 x i1> %loaded |
117 } | 117 } |
118 | 118 |
119 define <4 x i32> @test_load_4xi32(<4 x i32>* %loc) { | 119 define <4 x i32> @test_load_4xi32_align0(<4 x i32>* %loc) { |
120 ; CHECK-LABEL: test_load_4xi32 | 120 ; CHECK-LABEL: test_load_4xi32_align0 |
121 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i32>* %loc to i32* | 121 ; CHECK-NEXT: %loaded = load <4 x i32>* %loc, align 4 |
122 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 0 | 122 ; CHECK-NEXT: ret <4 x i32> %loaded |
123 ; CHECK-NEXT: %[[LD0:[0-9]+]] = load i32* %[[GEP0]], align 4 | |
124 ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <4 x i32> undef, i32 %[[LD0]],
i32 0 | |
125 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 1 | |
126 ; CHECK-NEXT: %[[LD1:[0-9]+]] = load i32* %[[GEP1]], align 4 | |
127 ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <4 x i32> %[[INS0]], i32 %[[LD1
]], i32 1 | |
128 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 2 | |
129 ; CHECK-NEXT: %[[LD2:[0-9]+]] = load i32* %[[GEP2]], align 4 | |
130 ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <4 x i32> %[[INS1]], i32 %[[LD2
]], i32 2 | |
131 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 3 | |
132 ; CHECK-NEXT: %[[LD3:[0-9]+]] = load i32* %[[GEP3]], align 4 | |
133 ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <4 x i32> %[[INS2]], i32 %[[LD3
]], i32 3 | |
134 ; CHECK-NEXT: ret <4 x i32> %[[INS3]] | |
135 %loaded = load <4 x i32>* %loc | 123 %loaded = load <4 x i32>* %loc |
136 ret <4 x i32> %loaded | 124 ret <4 x i32> %loaded |
137 } | 125 } |
138 | 126 |
139 define <4 x float> @test_load_4xfloat(<4 x float>* %loc) { | 127 define <4 x i32> @test_load_4xi32_align1(<4 x i32>* %loc) { |
140 ; CHECK-LABEL: test_load_4xfloat | 128 ; CHECK-LABEL: test_load_4xi32_align1 |
141 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x float>* %loc to float* | 129 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i32>* %loc to i32* |
142 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds float* %[[BASE]], i32
0 | 130 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 0 |
143 ; CHECK-NEXT: %[[LD0:[0-9]+]] = load float* %[[GEP0]], align 4 | 131 ; CHECK-NEXT: %[[LD0:[0-9]+]] = load i32* %[[GEP0]], align 1 |
144 ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <4 x float> undef, float %[[LD0
]], i32 0 | 132 ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <4 x i32> undef, i32 %[[LD0]],
i32 0 |
145 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds float* %[[BASE]], i32
1 | 133 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 1 |
146 ; CHECK-NEXT: %[[LD1:[0-9]+]] = load float* %[[GEP1]], align 4 | 134 ; CHECK-NEXT: %[[LD1:[0-9]+]] = load i32* %[[GEP1]], align 1 |
147 ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <4 x float> %[[INS0]], float %[
[LD1]], i32 1 | 135 ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <4 x i32> %[[INS0]], i32 %[[LD1
]], i32 1 |
148 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds float* %[[BASE]], i32
2 | 136 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 2 |
149 ; CHECK-NEXT: %[[LD2:[0-9]+]] = load float* %[[GEP2]], align 4 | 137 ; CHECK-NEXT: %[[LD2:[0-9]+]] = load i32* %[[GEP2]], align 1 |
150 ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <4 x float> %[[INS1]], float %[
[LD2]], i32 2 | 138 ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <4 x i32> %[[INS1]], i32 %[[LD2
]], i32 2 |
151 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds float* %[[BASE]], i32
3 | 139 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 3 |
152 ; CHECK-NEXT: %[[LD3:[0-9]+]] = load float* %[[GEP3]], align 4 | 140 ; CHECK-NEXT: %[[LD3:[0-9]+]] = load i32* %[[GEP3]], align 1 |
153 ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <4 x float> %[[INS2]], float %[
[LD3]], i32 3 | 141 ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <4 x i32> %[[INS2]], i32 %[[LD3
]], i32 3 |
154 ; CHECK-NEXT: ret <4 x float> %[[INS3]] | 142 ; CHECK-NEXT: ret <4 x i32> %[[INS3]] |
| 143 %loaded = load <4 x i32>* %loc, align 1 |
| 144 ret <4 x i32> %loaded |
| 145 } |
| 146 |
| 147 define <4 x i32> @test_load_4xi32_align2(<4 x i32>* %loc) { |
| 148 ; CHECK-LABEL: test_load_4xi32_align2 |
| 149 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i32>* %loc to i32* |
| 150 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 0 |
| 151 ; CHECK-NEXT: %[[LD0:[0-9]+]] = load i32* %[[GEP0]], align 2 |
| 152 ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <4 x i32> undef, i32 %[[LD0]],
i32 0 |
| 153 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 1 |
| 154 ; CHECK-NEXT: %[[LD1:[0-9]+]] = load i32* %[[GEP1]], align 2 |
| 155 ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <4 x i32> %[[INS0]], i32 %[[LD1
]], i32 1 |
| 156 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 2 |
| 157 ; CHECK-NEXT: %[[LD2:[0-9]+]] = load i32* %[[GEP2]], align 2 |
| 158 ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <4 x i32> %[[INS1]], i32 %[[LD2
]], i32 2 |
| 159 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 3 |
| 160 ; CHECK-NEXT: %[[LD3:[0-9]+]] = load i32* %[[GEP3]], align 2 |
| 161 ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <4 x i32> %[[INS2]], i32 %[[LD3
]], i32 3 |
| 162 ; CHECK-NEXT: ret <4 x i32> %[[INS3]] |
| 163 %loaded = load <4 x i32>* %loc, align 2 |
| 164 ret <4 x i32> %loaded |
| 165 } |
| 166 |
| 167 define <4 x i32> @test_load_4xi32_align4(<4 x i32>* %loc) { |
| 168 ; CHECK-LABEL: test_load_4xi32_align4 |
| 169 ; CHECK-NEXT: %loaded = load <4 x i32>* %loc, align 4 |
| 170 ; CHECK-NEXT: ret <4 x i32> %loaded |
| 171 %loaded = load <4 x i32>* %loc, align 4 |
| 172 ret <4 x i32> %loaded |
| 173 } |
| 174 |
| 175 define <4 x i32> @test_load_4xi32_align8(<4 x i32>* %loc) { |
| 176 ; CHECK-LABEL: test_load_4xi32_align8 |
| 177 ; CHECK-NEXT: %loaded = load <4 x i32>* %loc, align 4 |
| 178 ; CHECK-NEXT: ret <4 x i32> %loaded |
| 179 %loaded = load <4 x i32>* %loc, align 8 |
| 180 ret <4 x i32> %loaded |
| 181 } |
| 182 |
| 183 define <4 x i32> @test_load_4xi32_align16(<4 x i32>* %loc) { |
| 184 ; CHECK-LABEL: test_load_4xi32_align16 |
| 185 ; CHECK-NEXT: %loaded = load <4 x i32>* %loc, align 4 |
| 186 ; CHECK-NEXT: ret <4 x i32> %loaded |
| 187 %loaded = load <4 x i32>* %loc, align 16 |
| 188 ret <4 x i32> %loaded |
| 189 } |
| 190 |
| 191 define <4 x i32> @test_load_4xi32_align32(<4 x i32>* %loc) { |
| 192 ; CHECK-LABEL: test_load_4xi32_align32 |
| 193 ; CHECK-NEXT: %loaded = load <4 x i32>* %loc, align 4 |
| 194 ; CHECK-NEXT: ret <4 x i32> %loaded |
| 195 %loaded = load <4 x i32>* %loc, align 32 |
| 196 ret <4 x i32> %loaded |
| 197 } |
| 198 |
| 199 define <4 x float> @test_load_4xfloat_align0(<4 x float>* %loc) { |
| 200 ; CHECK-LABEL: test_load_4xfloat_align0 |
| 201 ; CHECK-NEXT: %loaded = load <4 x float>* %loc, align 4 |
| 202 ; CHECK-NEXT: ret <4 x float> %loaded |
155 %loaded = load <4 x float>* %loc | 203 %loaded = load <4 x float>* %loc |
156 ret <4 x float> %loaded | 204 ret <4 x float> %loaded |
157 } | 205 } |
158 | 206 |
159 define <8 x i16> @test_load_8xi16(<8 x i16>* %loc) { | 207 define <4 x float> @test_load_4xfloat_align2(<4 x float>* %loc) { |
160 ; CHECK-LABEL: test_load_8xi16 | 208 ; CHECK-LABEL: test_load_4xfloat_align2 |
161 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <8 x i16>* %loc to i16* | 209 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x float>* %loc to float* |
162 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 0 | 210 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds float* %[[BASE]], i32
0 |
163 ; CHECK-NEXT: %[[LD0:[0-9]+]] = load i16* %[[GEP0]], align 4 | 211 ; CHECK-NEXT: %[[LD0:[0-9]+]] = load float* %[[GEP0]], align 2 |
164 ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <8 x i16> undef, i16 %[[LD0]],
i32 0 | 212 ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <4 x float> undef, float %[[LD0
]], i32 0 |
165 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 1 | 213 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds float* %[[BASE]], i32
1 |
166 ; CHECK-NEXT: %[[LD1:[0-9]+]] = load i16* %[[GEP1]], align 2 | 214 ; CHECK-NEXT: %[[LD1:[0-9]+]] = load float* %[[GEP1]], align 2 |
167 ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <8 x i16> %[[INS0]], i16 %[[LD1
]], i32 1 | 215 ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <4 x float> %[[INS0]], float %[
[LD1]], i32 1 |
168 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 2 | 216 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds float* %[[BASE]], i32
2 |
169 ; CHECK-NEXT: %[[LD2:[0-9]+]] = load i16* %[[GEP2]], align 4 | 217 ; CHECK-NEXT: %[[LD2:[0-9]+]] = load float* %[[GEP2]], align 2 |
170 ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <8 x i16> %[[INS1]], i16 %[[LD2
]], i32 2 | 218 ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <4 x float> %[[INS1]], float %[
[LD2]], i32 2 |
171 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 3 | 219 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds float* %[[BASE]], i32
3 |
172 ; CHECK-NEXT: %[[LD3:[0-9]+]] = load i16* %[[GEP3]], align 2 | 220 ; CHECK-NEXT: %[[LD3:[0-9]+]] = load float* %[[GEP3]], align 2 |
173 ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <8 x i16> %[[INS2]], i16 %[[LD3
]], i32 3 | 221 ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <4 x float> %[[INS2]], float %[
[LD3]], i32 3 |
174 ; CHECK-NEXT: %[[GEP4:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 4 | 222 ; CHECK-NEXT: ret <4 x float> %[[INS3]] |
175 ; CHECK-NEXT: %[[LD4:[0-9]+]] = load i16* %[[GEP4]], align 4 | 223 %loaded = load <4 x float>* %loc, align 2 |
176 ; CHECK-NEXT: %[[INS4:[0-9]+]] = insertelement <8 x i16> %[[INS3]], i16 %[[LD4
]], i32 4 | 224 ret <4 x float> %loaded |
177 ; CHECK-NEXT: %[[GEP5:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 5 | 225 } |
178 ; CHECK-NEXT: %[[LD5:[0-9]+]] = load i16* %[[GEP5]], align 2 | 226 |
179 ; CHECK-NEXT: %[[INS5:[0-9]+]] = insertelement <8 x i16> %[[INS4]], i16 %[[LD5
]], i32 5 | 227 define <4 x float> @test_load_4xfloat_align4(<4 x float>* %loc) { |
180 ; CHECK-NEXT: %[[GEP6:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 6 | 228 ; CHECK-LABEL: test_load_4xfloat_align4 |
181 ; CHECK-NEXT: %[[LD6:[0-9]+]] = load i16* %[[GEP6]], align 4 | 229 ; CHECK-NEXT: %loaded = load <4 x float>* %loc, align 4 |
182 ; CHECK-NEXT: %[[INS6:[0-9]+]] = insertelement <8 x i16> %[[INS5]], i16 %[[LD6
]], i32 6 | 230 ; CHECK-NEXT: ret <4 x float> %loaded |
183 ; CHECK-NEXT: %[[GEP7:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 7 | 231 %loaded = load <4 x float>* %loc, align 4 |
184 ; CHECK-NEXT: %[[LD7:[0-9]+]] = load i16* %[[GEP7]], align 2 | 232 ret <4 x float> %loaded |
185 ; CHECK-NEXT: %[[INS7:[0-9]+]] = insertelement <8 x i16> %[[INS6]], i16 %[[LD7
]], i32 7 | 233 } |
186 ; CHECK-NEXT: ret <8 x i16> %[[INS7]] | 234 |
| 235 define <8 x i16> @test_load_8xi16_align0(<8 x i16>* %loc) { |
| 236 ; CHECK-LABEL: test_load_8xi16_align0 |
| 237 ; CHECK-NEXT: %loaded = load <8 x i16>* %loc, align 2 |
| 238 ; CHECK-NEXT: ret <8 x i16> %loaded |
187 %loaded = load <8 x i16>* %loc | 239 %loaded = load <8 x i16>* %loc |
188 ret <8 x i16> %loaded | 240 ret <8 x i16> %loaded |
189 } | 241 } |
190 | 242 |
191 define <16 x i8> @test_load_16xi8(<16 x i8>* %loc) { | 243 define <8 x i16> @test_load_8xi16_align1(<8 x i16>* %loc) { |
192 ; CHECK-LABEL: test_load_16xi8 | 244 ; CHECK-LABEL: test_load_8xi16_align1 |
193 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <16 x i8>* %loc to i8* | 245 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <8 x i16>* %loc to i16* |
194 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 0 | 246 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 0 |
195 ; CHECK-NEXT: %[[LD0:[0-9]+]] = load i8* %[[GEP0]], align 4 | 247 ; CHECK-NEXT: %[[LD0:[0-9]+]] = load i16* %[[GEP0]], align 1 |
196 ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <16 x i8> undef, i8 %[[LD0]], i
32 0 | 248 ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <8 x i16> undef, i16 %[[LD0]],
i32 0 |
197 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 1 | 249 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 1 |
198 ; CHECK-NEXT: %[[LD1:[0-9]+]] = load i8* %[[GEP1]], align 1 | 250 ; CHECK-NEXT: %[[LD1:[0-9]+]] = load i16* %[[GEP1]], align 1 |
199 ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <16 x i8> %[[INS0]], i8 %[[LD1]
], i32 1 | 251 ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <8 x i16> %[[INS0]], i16 %[[LD1
]], i32 1 |
200 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 2 | 252 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 2 |
201 ; CHECK-NEXT: %[[LD2:[0-9]+]] = load i8* %[[GEP2]], align 2 | 253 ; CHECK-NEXT: %[[LD2:[0-9]+]] = load i16* %[[GEP2]], align 1 |
202 ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <16 x i8> %[[INS1]], i8 %[[LD2]
], i32 2 | 254 ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <8 x i16> %[[INS1]], i16 %[[LD2
]], i32 2 |
203 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 3 | 255 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 3 |
204 ; CHECK-NEXT: %[[LD3:[0-9]+]] = load i8* %[[GEP3]], align 1 | 256 ; CHECK-NEXT: %[[LD3:[0-9]+]] = load i16* %[[GEP3]], align 1 |
205 ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <16 x i8> %[[INS2]], i8 %[[LD3]
], i32 3 | 257 ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <8 x i16> %[[INS2]], i16 %[[LD3
]], i32 3 |
206 ; CHECK-NEXT: %[[GEP4:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 4 | 258 ; CHECK-NEXT: %[[GEP4:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 4 |
207 ; CHECK-NEXT: %[[LD4:[0-9]+]] = load i8* %[[GEP4]], align 4 | 259 ; CHECK-NEXT: %[[LD4:[0-9]+]] = load i16* %[[GEP4]], align 1 |
208 ; CHECK-NEXT: %[[INS4:[0-9]+]] = insertelement <16 x i8> %[[INS3]], i8 %[[LD4]
], i32 4 | 260 ; CHECK-NEXT: %[[INS4:[0-9]+]] = insertelement <8 x i16> %[[INS3]], i16 %[[LD4
]], i32 4 |
209 ; CHECK-NEXT: %[[GEP5:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 5 | 261 ; CHECK-NEXT: %[[GEP5:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 5 |
210 ; CHECK-NEXT: %[[LD5:[0-9]+]] = load i8* %[[GEP5]], align 1 | 262 ; CHECK-NEXT: %[[LD5:[0-9]+]] = load i16* %[[GEP5]], align 1 |
211 ; CHECK-NEXT: %[[INS5:[0-9]+]] = insertelement <16 x i8> %[[INS4]], i8 %[[LD5]
], i32 5 | 263 ; CHECK-NEXT: %[[INS5:[0-9]+]] = insertelement <8 x i16> %[[INS4]], i16 %[[LD5
]], i32 5 |
212 ; CHECK-NEXT: %[[GEP6:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 6 | 264 ; CHECK-NEXT: %[[GEP6:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 6 |
213 ; CHECK-NEXT: %[[LD6:[0-9]+]] = load i8* %[[GEP6]], align 2 | 265 ; CHECK-NEXT: %[[LD6:[0-9]+]] = load i16* %[[GEP6]], align 1 |
214 ; CHECK-NEXT: %[[INS6:[0-9]+]] = insertelement <16 x i8> %[[INS5]], i8 %[[LD6]
], i32 6 | 266 ; CHECK-NEXT: %[[INS6:[0-9]+]] = insertelement <8 x i16> %[[INS5]], i16 %[[LD6
]], i32 6 |
215 ; CHECK-NEXT: %[[GEP7:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 7 | 267 ; CHECK-NEXT: %[[GEP7:[0-9]+]] = getelementptr inbounds i16* %[[BASE]], i32 7 |
216 ; CHECK-NEXT: %[[LD7:[0-9]+]] = load i8* %[[GEP7]], align 1 | 268 ; CHECK-NEXT: %[[LD7:[0-9]+]] = load i16* %[[GEP7]], align 1 |
217 ; CHECK-NEXT: %[[INS7:[0-9]+]] = insertelement <16 x i8> %[[INS6]], i8 %[[LD7]
], i32 7 | 269 ; CHECK-NEXT: %[[INS7:[0-9]+]] = insertelement <8 x i16> %[[INS6]], i16 %[[LD7
]], i32 7 |
218 ; CHECK-NEXT: %[[GEP8:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 8 | 270 ; CHECK-NEXT: ret <8 x i16> %[[INS7]] |
219 ; CHECK-NEXT: %[[LD8:[0-9]+]] = load i8* %[[GEP8]], align 4 | 271 %loaded = load <8 x i16>* %loc, align 1 |
220 ; CHECK-NEXT: %[[INS8:[0-9]+]] = insertelement <16 x i8> %[[INS7]], i8 %[[LD8]
], i32 8 | 272 ret <8 x i16> %loaded |
221 ; CHECK-NEXT: %[[GEP9:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 9 | 273 } |
222 ; CHECK-NEXT: %[[LD9:[0-9]+]] = load i8* %[[GEP9]], align 1 | 274 |
223 ; CHECK-NEXT: %[[INS9:[0-9]+]] = insertelement <16 x i8> %[[INS8]], i8 %[[LD9]
], i32 9 | 275 define <8 x i16> @test_load_8xi16_align2(<8 x i16>* %loc) { |
224 ; CHECK-NEXT: %[[GEP10:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 10 | 276 ; CHECK-LABEL: test_load_8xi16_align2 |
225 ; CHECK-NEXT: %[[LD10:[0-9]+]] = load i8* %[[GEP10]], align 2 | 277 ; CHECK-NEXT: %loaded = load <8 x i16>* %loc, align 2 |
226 ; CHECK-NEXT: %[[INS10:[0-9]+]] = insertelement <16 x i8> %[[INS9]], i8 %[[LD1
0]], i32 10 | 278 ; CHECK-NEXT: ret <8 x i16> %loaded |
227 ; CHECK-NEXT: %[[GEP11:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 11 | 279 %loaded = load <8 x i16>* %loc, align 2 |
228 ; CHECK-NEXT: %[[LD11:[0-9]+]] = load i8* %[[GEP11]], align 1 | 280 ret <8 x i16> %loaded |
229 ; CHECK-NEXT: %[[INS11:[0-9]+]] = insertelement <16 x i8> %[[INS10]], i8 %[[LD
11]], i32 11 | 281 } |
230 ; CHECK-NEXT: %[[GEP12:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 12 | 282 |
231 ; CHECK-NEXT: %[[LD12:[0-9]+]] = load i8* %[[GEP12]], align 4 | 283 define <16 x i8> @test_load_16xi8_align0(<16 x i8>* %loc) { |
232 ; CHECK-NEXT: %[[INS12:[0-9]+]] = insertelement <16 x i8> %[[INS11]], i8 %[[LD
12]], i32 12 | 284 ; CHECK-LABEL: test_load_16xi8_align0 |
233 ; CHECK-NEXT: %[[GEP13:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 13 | 285 ; CHECK-NEXT: %loaded = load <16 x i8>* %loc, align 1 |
234 ; CHECK-NEXT: %[[LD13:[0-9]+]] = load i8* %[[GEP13]], align 1 | 286 ; CHECK-NEXT: ret <16 x i8> %loaded |
235 ; CHECK-NEXT: %[[INS13:[0-9]+]] = insertelement <16 x i8> %[[INS12]], i8 %[[LD
13]], i32 13 | |
236 ; CHECK-NEXT: %[[GEP14:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 14 | |
237 ; CHECK-NEXT: %[[LD14:[0-9]+]] = load i8* %[[GEP14]], align 2 | |
238 ; CHECK-NEXT: %[[INS14:[0-9]+]] = insertelement <16 x i8> %[[INS13]], i8 %[[LD
14]], i32 14 | |
239 ; CHECK-NEXT: %[[GEP15:[0-9]+]] = getelementptr inbounds i8* %[[BASE]], i32 15 | |
240 ; CHECK-NEXT: %[[LD15:[0-9]+]] = load i8* %[[GEP15]], align 1 | |
241 ; CHECK-NEXT: %[[INS15:[0-9]+]] = insertelement <16 x i8> %[[INS14]], i8 %[[LD
15]], i32 15 | |
242 ; CHECK-NEXT: ret <16 x i8> %[[INS15]] | |
243 %loaded = load <16 x i8>* %loc | 287 %loaded = load <16 x i8>* %loc |
244 ret <16 x i8> %loaded | 288 ret <16 x i8> %loaded |
245 } | 289 } |
246 | 290 |
| 291 define <16 x i8> @test_load_16xi8_align1(<16 x i8>* %loc) { |
| 292 ; CHECK-LABEL: test_load_16xi8_align1 |
| 293 ; CHECK-NEXT: %loaded = load <16 x i8>* %loc, align 1 |
| 294 ; CHECK-NEXT: ret <16 x i8> %loaded |
| 295 %loaded = load <16 x i8>* %loc, align 1 |
| 296 ret <16 x i8> %loaded |
| 297 } |
| 298 |
247 | 299 |
248 ; Store ======================================================================== | 300 ; Store ======================================================================== |
249 | 301 |
250 define void @test_store_4xi1(<4 x i1> %val, <4 x i1>* %loc) { | 302 define void @test_store_4xi1(<4 x i1> %val, <4 x i1>* %loc) { |
251 ; CHECK-LABEL: test_store_4xi1 | 303 ; CHECK-LABEL: test_store_4xi1 |
252 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i1>* %loc to i1* | 304 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i1>* %loc to i1* |
253 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i1* %[[BASE]], i32 0 | 305 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i1* %[[BASE]], i32 0 |
254 ; CHECK-NEXT: %[[EXT0:[0-9]+]] = extractelement <4 x i1> %val, i32 0 | 306 ; CHECK-NEXT: %[[EXT0:[0-9]+]] = extractelement <4 x i1> %val, i32 0 |
255 ; CHECK-NEXT: store i1 %[[EXT0]], i1* %[[GEP0]], align 4 | 307 ; CHECK-NEXT: store i1 %[[EXT0]], i1* %[[GEP0]], align 4 |
256 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i1* %[[BASE]], i32 1 | 308 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i1* %[[BASE]], i32 1 |
257 ; CHECK-NEXT: %[[EXT1:[0-9]+]] = extractelement <4 x i1> %val, i32 1 | 309 ; CHECK-NEXT: %[[EXT1:[0-9]+]] = extractelement <4 x i1> %val, i32 1 |
258 ; CHECK-NEXT: store i1 %[[EXT1]], i1* %[[GEP1]], align 1 | 310 ; CHECK-NEXT: store i1 %[[EXT1]], i1* %[[GEP1]], align 1 |
259 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i1* %[[BASE]], i32 2 | 311 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i1* %[[BASE]], i32 2 |
260 ; CHECK-NEXT: %[[EXT2:[0-9]+]] = extractelement <4 x i1> %val, i32 2 | 312 ; CHECK-NEXT: %[[EXT2:[0-9]+]] = extractelement <4 x i1> %val, i32 2 |
261 ; CHECK-NEXT: store i1 %[[EXT2]], i1* %[[GEP2]], align 2 | 313 ; CHECK-NEXT: store i1 %[[EXT2]], i1* %[[GEP2]], align 2 |
262 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i1* %[[BASE]], i32 3 | 314 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i1* %[[BASE]], i32 3 |
263 ; CHECK-NEXT: %[[EXT3:[0-9]+]] = extractelement <4 x i1> %val, i32 3 | 315 ; CHECK-NEXT: %[[EXT3:[0-9]+]] = extractelement <4 x i1> %val, i32 3 |
264 ; CHECK-NEXT: store i1 %[[EXT3]], i1* %[[GEP3]], align 1 | 316 ; CHECK-NEXT: store i1 %[[EXT3]], i1* %[[GEP3]], align 1 |
265 ; CHECK-NEXT: ret void | 317 ; CHECK-NEXT: ret void |
266 store <4 x i1> %val, <4 x i1>* %loc | 318 store <4 x i1> %val, <4 x i1>* %loc |
267 ret void | 319 ret void |
268 } | 320 } |
269 | 321 |
270 define void @test_store_4xi32(<4 x i32> %val, <4 x i32>* %loc) { | 322 define void @test_store_4xi32_align0(<4 x i32> %val, <4 x i32>* %loc) { |
271 ; CHECK-LABEL: test_store_4xi32 | 323 ; CHECK-LABEL: test_store_4xi32_align0 |
272 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i32>* %loc to i32* | 324 ; CHECK-NEXT: store <4 x i32> %val, <4 x i32>* %loc, align 4 |
273 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 0 | |
274 ; CHECK-NEXT: %[[EXT0:[0-9]+]] = extractelement <4 x i32> %val, i32 0 | |
275 ; CHECK-NEXT: store i32 %[[EXT0]], i32* %[[GEP0]], align 4 | |
276 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 1 | |
277 ; CHECK-NEXT: %[[EXT1:[0-9]+]] = extractelement <4 x i32> %val, i32 1 | |
278 ; CHECK-NEXT: store i32 %[[EXT1]], i32* %[[GEP1]], align 4 | |
279 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 2 | |
280 ; CHECK-NEXT: %[[EXT2:[0-9]+]] = extractelement <4 x i32> %val, i32 2 | |
281 ; CHECK-NEXT: store i32 %[[EXT2]], i32* %[[GEP2]], align 4 | |
282 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 3 | |
283 ; CHECK-NEXT: %[[EXT3:[0-9]+]] = extractelement <4 x i32> %val, i32 3 | |
284 ; CHECK-NEXT: store i32 %[[EXT3]], i32* %[[GEP3]], align 4 | |
285 ; CHECK-NEXT: ret void | 325 ; CHECK-NEXT: ret void |
286 store <4 x i32> %val, <4 x i32>* %loc | 326 store <4 x i32> %val, <4 x i32>* %loc |
287 ret void | 327 ret void |
288 } | 328 } |
289 | 329 |
290 define void @test_store_4xfloat(<4 x float> %val, <4 x float>* %loc) { | 330 define void @test_store_4xi32_align1(<4 x i32> %val, <4 x i32>* %loc) { |
291 ; CHECK-LABEL: test_store_4xfloat | 331 ; CHECK-LABEL: test_store_4xi32_align1 |
292 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x float>* %loc to float* | 332 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i32>* %loc to i32* |
293 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds float* %[[BASE]], i32
0 | 333 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 0 |
294 ; CHECK-NEXT: %[[EXT0:[0-9]+]] = extractelement <4 x float> %val, i32 0 | 334 ; CHECK-NEXT: %[[EXT0:[0-9]+]] = extractelement <4 x i32> %val, i32 0 |
295 ; CHECK-NEXT: store float %[[EXT0]], float* %[[GEP0]], align 4 | 335 ; CHECK-NEXT: store i32 %[[EXT0]], i32* %[[GEP0]], align 1 |
296 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds float* %[[BASE]], i32
1 | 336 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 1 |
297 ; CHECK-NEXT: %[[EXT1:[0-9]+]] = extractelement <4 x float> %val, i32 1 | 337 ; CHECK-NEXT: %[[EXT1:[0-9]+]] = extractelement <4 x i32> %val, i32 1 |
298 ; CHECK-NEXT: store float %[[EXT1]], float* %[[GEP1]], align 4 | 338 ; CHECK-NEXT: store i32 %[[EXT1]], i32* %[[GEP1]], align 1 |
299 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds float* %[[BASE]], i32
2 | 339 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 2 |
300 ; CHECK-NEXT: %[[EXT2:[0-9]+]] = extractelement <4 x float> %val, i32 2 | 340 ; CHECK-NEXT: %[[EXT2:[0-9]+]] = extractelement <4 x i32> %val, i32 2 |
301 ; CHECK-NEXT: store float %[[EXT2]], float* %[[GEP2]], align 4 | 341 ; CHECK-NEXT: store i32 %[[EXT2]], i32* %[[GEP2]], align 1 |
302 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds float* %[[BASE]], i32
3 | 342 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 3 |
303 ; CHECK-NEXT: %[[EXT3:[0-9]+]] = extractelement <4 x float> %val, i32 3 | 343 ; CHECK-NEXT: %[[EXT3:[0-9]+]] = extractelement <4 x i32> %val, i32 3 |
304 ; CHECK-NEXT: store float %[[EXT3]], float* %[[GEP3]], align 4 | 344 ; CHECK-NEXT: store i32 %[[EXT3]], i32* %[[GEP3]], align 1 |
| 345 ; CHECK-NEXT: ret void |
| 346 store <4 x i32> %val, <4 x i32>* %loc, align 1 |
| 347 ret void |
| 348 } |
| 349 |
| 350 define void @test_store_4xi32_align2(<4 x i32> %val, <4 x i32>* %loc) { |
| 351 ; CHECK-LABEL: test_store_4xi32_align2 |
| 352 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i32>* %loc to i32* |
| 353 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 0 |
| 354 ; CHECK-NEXT: %[[EXT0:[0-9]+]] = extractelement <4 x i32> %val, i32 0 |
| 355 ; CHECK-NEXT: store i32 %[[EXT0]], i32* %[[GEP0]], align 2 |
| 356 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 1 |
| 357 ; CHECK-NEXT: %[[EXT1:[0-9]+]] = extractelement <4 x i32> %val, i32 1 |
| 358 ; CHECK-NEXT: store i32 %[[EXT1]], i32* %[[GEP1]], align 2 |
| 359 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 2 |
| 360 ; CHECK-NEXT: %[[EXT2:[0-9]+]] = extractelement <4 x i32> %val, i32 2 |
| 361 ; CHECK-NEXT: store i32 %[[EXT2]], i32* %[[GEP2]], align 2 |
| 362 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 3 |
| 363 ; CHECK-NEXT: %[[EXT3:[0-9]+]] = extractelement <4 x i32> %val, i32 3 |
| 364 ; CHECK-NEXT: store i32 %[[EXT3]], i32* %[[GEP3]], align 2 |
| 365 ; CHECK-NEXT: ret void |
| 366 store <4 x i32> %val, <4 x i32>* %loc, align 2 |
| 367 ret void |
| 368 } |
| 369 |
| 370 define void @test_store_4xi32_align4(<4 x i32> %val, <4 x i32>* %loc) { |
| 371 ; CHECK-LABEL: test_store_4xi32_align4 |
| 372 ; CHECK-NEXT: store <4 x i32> %val, <4 x i32>* %loc, align 4 |
| 373 ; CHECK-NEXT: ret void |
| 374 store <4 x i32> %val, <4 x i32>* %loc, align 4 |
| 375 ret void |
| 376 } |
| 377 |
| 378 define void @test_store_4xi32_align8(<4 x i32> %val, <4 x i32>* %loc) { |
| 379 ; CHECK-LABEL: test_store_4xi32_align8 |
| 380 ; CHECK-NEXT: store <4 x i32> %val, <4 x i32>* %loc, align 4 |
| 381 ; CHECK-NEXT: ret void |
| 382 store <4 x i32> %val, <4 x i32>* %loc, align 8 |
| 383 ret void |
| 384 } |
| 385 |
| 386 define void @test_store_4xi32_align16(<4 x i32> %val, <4 x i32>* %loc) { |
| 387 ; CHECK-LABEL: test_store_4xi32_align16 |
| 388 ; CHECK-NEXT: store <4 x i32> %val, <4 x i32>* %loc, align 4 |
| 389 ; CHECK-NEXT: ret void |
| 390 store <4 x i32> %val, <4 x i32>* %loc, align 16 |
| 391 ret void |
| 392 } |
| 393 |
| 394 define void @test_store_4xi32_align32(<4 x i32> %val, <4 x i32>* %loc) { |
| 395 ; CHECK-LABEL: test_store_4xi32_align32 |
| 396 ; CHECK-NEXT: store <4 x i32> %val, <4 x i32>* %loc, align 4 |
| 397 ; CHECK-NEXT: ret void |
| 398 store <4 x i32> %val, <4 x i32>* %loc, align 32 |
| 399 ret void |
| 400 } |
| 401 |
| 402 define void @test_store_4xfloat_align0(<4 x float> %val, <4 x float>* %loc) { |
| 403 ; CHECK-LABEL: test_store_4xfloat_align0 |
| 404 ; CHECK-NEXT: store <4 x float> %val, <4 x float>* %loc, align 4 |
305 ; CHECK-NEXT: ret void | 405 ; CHECK-NEXT: ret void |
306 store <4 x float> %val, <4 x float>* %loc | 406 store <4 x float> %val, <4 x float>* %loc |
307 ret void | 407 ret void |
308 } | 408 } |
309 | 409 |
310 | 410 |
311 ; Volatile ===================================================================== | 411 ; Volatile ===================================================================== |
312 | 412 |
313 define <4 x i32> @test_volatile_load_4xi32(<4 x i32>* %loc) { | 413 define <4 x i32> @test_volatile_load_4xi32_align0(<4 x i32>* %loc) { |
314 ; CHECK-LABEL: test_volatile_load_4xi32 | 414 ; CHECK-LABEL: test_volatile_load_4xi32_align0 |
315 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i32>* %loc to i32* | 415 ; CHECK-NEXT: %loaded = load volatile <4 x i32>* %loc, align 4 |
316 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 0 | 416 ; CHECK-NEXT: ret <4 x i32> %loaded |
317 ; CHECK-NEXT: %[[LD0:[0-9]+]] = load volatile i32* %[[GEP0]], align 4 | |
318 ; CHECK-NEXT: %[[INS0:[0-9]+]] = insertelement <4 x i32> undef, i32 %[[LD0]],
i32 0 | |
319 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 1 | |
320 ; CHECK-NEXT: %[[LD1:[0-9]+]] = load volatile i32* %[[GEP1]], align 4 | |
321 ; CHECK-NEXT: %[[INS1:[0-9]+]] = insertelement <4 x i32> %[[INS0]], i32 %[[LD1
]], i32 1 | |
322 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 2 | |
323 ; CHECK-NEXT: %[[LD2:[0-9]+]] = load volatile i32* %[[GEP2]], align 4 | |
324 ; CHECK-NEXT: %[[INS2:[0-9]+]] = insertelement <4 x i32> %[[INS1]], i32 %[[LD2
]], i32 2 | |
325 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 3 | |
326 ; CHECK-NEXT: %[[LD3:[0-9]+]] = load volatile i32* %[[GEP3]], align 4 | |
327 ; CHECK-NEXT: %[[INS3:[0-9]+]] = insertelement <4 x i32> %[[INS2]], i32 %[[LD3
]], i32 3 | |
328 ; CHECK-NEXT: ret <4 x i32> %[[INS3]] | |
329 %loaded = load volatile <4 x i32>* %loc | 417 %loaded = load volatile <4 x i32>* %loc |
330 ret <4 x i32> %loaded | 418 ret <4 x i32> %loaded |
331 } | 419 } |
332 | 420 |
333 define void @test_volatile_store_4xi32(<4 x i32> %val, <4 x i32>* %loc) { | 421 define <4 x i32> @test_volatile_load_4xi32_align4(<4 x i32>* %loc) { |
334 ; CHECK-LABEL: test_volatile_store_4xi32 | 422 ; CHECK-LABEL: test_volatile_load_4xi32_align4 |
335 ; CHECK-NEXT: %[[BASE:[0-9]+]] = bitcast <4 x i32>* %loc to i32* | 423 ; CHECK-NEXT: %loaded = load volatile <4 x i32>* %loc, align 4 |
336 ; CHECK-NEXT: %[[GEP0:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 0 | 424 ; CHECK-NEXT: ret <4 x i32> %loaded |
337 ; CHECK-NEXT: %[[EXT0:[0-9]+]] = extractelement <4 x i32> %val, i32 0 | 425 %loaded = load volatile <4 x i32>* %loc, align 4 |
338 ; CHECK-NEXT: store volatile i32 %[[EXT0]], i32* %[[GEP0]], align 4 | 426 ret <4 x i32> %loaded |
339 ; CHECK-NEXT: %[[GEP1:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 1 | 427 } |
340 ; CHECK-NEXT: %[[EXT1:[0-9]+]] = extractelement <4 x i32> %val, i32 1 | 428 |
341 ; CHECK-NEXT: store volatile i32 %[[EXT1]], i32* %[[GEP1]], align 4 | 429 define void @test_volatile_store_4xi32_align0(<4 x i32> %val, <4 x i32>* %loc) { |
342 ; CHECK-NEXT: %[[GEP2:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 2 | 430 ; CHECK-LABEL: test_volatile_store_4xi32_align0 |
343 ; CHECK-NEXT: %[[EXT2:[0-9]+]] = extractelement <4 x i32> %val, i32 2 | 431 ; CHECK-NEXT: store volatile <4 x i32> %val, <4 x i32>* %loc, align 4 |
344 ; CHECK-NEXT: store volatile i32 %[[EXT2]], i32* %[[GEP2]], align 4 | |
345 ; CHECK-NEXT: %[[GEP3:[0-9]+]] = getelementptr inbounds i32* %[[BASE]], i32 3 | |
346 ; CHECK-NEXT: %[[EXT3:[0-9]+]] = extractelement <4 x i32> %val, i32 3 | |
347 ; CHECK-NEXT: store volatile i32 %[[EXT3]], i32* %[[GEP3]], align 4 | |
348 ; CHECK-NEXT: ret void | 432 ; CHECK-NEXT: ret void |
349 store volatile <4 x i32> %val, <4 x i32>* %loc | 433 store volatile <4 x i32> %val, <4 x i32>* %loc |
350 ret void | 434 ret void |
351 } | 435 } |
OLD | NEW |