OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
131 "vqadd.s16 q9, q9, q10 \n" /* R */ \ | 131 "vqadd.s16 q9, q9, q10 \n" /* R */ \ |
132 "vqsub.s16 q0, q0, q3 \n" /* G */ \ | 132 "vqsub.s16 q0, q0, q3 \n" /* G */ \ |
133 "vqshrun.s16 d20, q8, #6 \n" /* B */ \ | 133 "vqshrun.s16 d20, q8, #6 \n" /* B */ \ |
134 "vqshrun.s16 d22, q9, #6 \n" /* R */ \ | 134 "vqshrun.s16 d22, q9, #6 \n" /* R */ \ |
135 "vqshrun.s16 d21, q0, #6 \n" /* G */ | 135 "vqshrun.s16 d21, q0, #6 \n" /* G */ |
136 | 136 |
137 void I444ToARGBRow_NEON(const uint8* src_y, | 137 void I444ToARGBRow_NEON(const uint8* src_y, |
138 const uint8* src_u, | 138 const uint8* src_u, |
139 const uint8* src_v, | 139 const uint8* src_v, |
140 uint8* dst_argb, | 140 uint8* dst_argb, |
141 struct YuvConstants* yuvconstants, | 141 const struct YuvConstants* yuvconstants, |
142 int width) { | 142 int width) { |
143 asm volatile ( | 143 asm volatile ( |
144 YUVTORGB_SETUP | 144 YUVTORGB_SETUP |
145 "1: \n" | 145 "1: \n" |
146 READYUV444 | 146 READYUV444 |
147 YUVTORGB | 147 YUVTORGB |
148 "subs %4, %4, #8 \n" | 148 "subs %4, %4, #8 \n" |
149 "vmov.u8 d23, #255 \n" | 149 "vmov.u8 d23, #255 \n" |
150 MEMACCESS(3) | 150 MEMACCESS(3) |
151 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" | 151 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" |
152 "bgt 1b \n" | 152 "bgt 1b \n" |
153 : "+r"(src_y), // %0 | 153 : "+r"(src_y), // %0 |
154 "+r"(src_u), // %1 | 154 "+r"(src_u), // %1 |
155 "+r"(src_v), // %2 | 155 "+r"(src_v), // %2 |
156 "+r"(dst_argb), // %3 | 156 "+r"(dst_argb), // %3 |
157 "+r"(width) // %4 | 157 "+r"(width) // %4 |
158 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 158 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
159 [kUVToG]"r"(&yuvconstants->kUVToG), | 159 [kUVToG]"r"(&yuvconstants->kUVToG), |
160 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 160 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
161 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 161 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
162 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 162 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
163 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 163 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
164 ); | 164 ); |
165 } | 165 } |
166 | 166 |
167 void I422ToARGBRow_NEON(const uint8* src_y, | 167 void I422ToARGBRow_NEON(const uint8* src_y, |
168 const uint8* src_u, | 168 const uint8* src_u, |
169 const uint8* src_v, | 169 const uint8* src_v, |
170 uint8* dst_argb, | 170 uint8* dst_argb, |
171 struct YuvConstants* yuvconstants, | 171 const struct YuvConstants* yuvconstants, |
172 int width) { | 172 int width) { |
173 asm volatile ( | 173 asm volatile ( |
174 YUVTORGB_SETUP | 174 YUVTORGB_SETUP |
175 "1: \n" | 175 "1: \n" |
176 READYUV422 | 176 READYUV422 |
177 YUVTORGB | 177 YUVTORGB |
178 "subs %4, %4, #8 \n" | 178 "subs %4, %4, #8 \n" |
179 "vmov.u8 d23, #255 \n" | 179 "vmov.u8 d23, #255 \n" |
180 MEMACCESS(3) | 180 MEMACCESS(3) |
181 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" | 181 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" |
182 "bgt 1b \n" | 182 "bgt 1b \n" |
183 : "+r"(src_y), // %0 | 183 : "+r"(src_y), // %0 |
184 "+r"(src_u), // %1 | 184 "+r"(src_u), // %1 |
185 "+r"(src_v), // %2 | 185 "+r"(src_v), // %2 |
186 "+r"(dst_argb), // %3 | 186 "+r"(dst_argb), // %3 |
187 "+r"(width) // %4 | 187 "+r"(width) // %4 |
188 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 188 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
189 [kUVToG]"r"(&yuvconstants->kUVToG), | 189 [kUVToG]"r"(&yuvconstants->kUVToG), |
190 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 190 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
191 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 191 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
192 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 192 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
193 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 193 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
194 ); | 194 ); |
195 } | 195 } |
196 | 196 |
197 void I411ToARGBRow_NEON(const uint8* src_y, | 197 void I411ToARGBRow_NEON(const uint8* src_y, |
198 const uint8* src_u, | 198 const uint8* src_u, |
199 const uint8* src_v, | 199 const uint8* src_v, |
200 uint8* dst_argb, | 200 uint8* dst_argb, |
201 struct YuvConstants* yuvconstants, | 201 const struct YuvConstants* yuvconstants, |
202 int width) { | 202 int width) { |
203 asm volatile ( | 203 asm volatile ( |
204 YUVTORGB_SETUP | 204 YUVTORGB_SETUP |
205 "1: \n" | 205 "1: \n" |
206 READYUV411 | 206 READYUV411 |
207 YUVTORGB | 207 YUVTORGB |
208 "subs %4, %4, #8 \n" | 208 "subs %4, %4, #8 \n" |
209 "vmov.u8 d23, #255 \n" | 209 "vmov.u8 d23, #255 \n" |
210 MEMACCESS(3) | 210 MEMACCESS(3) |
211 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" | 211 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" |
212 "bgt 1b \n" | 212 "bgt 1b \n" |
213 : "+r"(src_y), // %0 | 213 : "+r"(src_y), // %0 |
214 "+r"(src_u), // %1 | 214 "+r"(src_u), // %1 |
215 "+r"(src_v), // %2 | 215 "+r"(src_v), // %2 |
216 "+r"(dst_argb), // %3 | 216 "+r"(dst_argb), // %3 |
217 "+r"(width) // %4 | 217 "+r"(width) // %4 |
218 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 218 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
219 [kUVToG]"r"(&yuvconstants->kUVToG), | 219 [kUVToG]"r"(&yuvconstants->kUVToG), |
220 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 220 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
221 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 221 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
222 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 222 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
223 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 223 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
224 ); | 224 ); |
225 } | 225 } |
226 | 226 |
227 void I422ToBGRARow_NEON(const uint8* src_y, | 227 void I422ToBGRARow_NEON(const uint8* src_y, |
228 const uint8* src_u, | 228 const uint8* src_u, |
229 const uint8* src_v, | 229 const uint8* src_v, |
230 uint8* dst_bgra, | 230 uint8* dst_bgra, |
231 struct YuvConstants* yuvconstants, | 231 const struct YuvConstants* yuvconstants, |
232 int width) { | 232 int width) { |
233 asm volatile ( | 233 asm volatile ( |
234 YUVTORGB_SETUP | 234 YUVTORGB_SETUP |
235 "1: \n" | 235 "1: \n" |
236 READYUV422 | 236 READYUV422 |
237 YUVTORGB | 237 YUVTORGB |
238 "subs %4, %4, #8 \n" | 238 "subs %4, %4, #8 \n" |
239 "vswp.u8 d20, d22 \n" | 239 "vswp.u8 d20, d22 \n" |
240 "vmov.u8 d19, #255 \n" | 240 "vmov.u8 d19, #255 \n" |
241 MEMACCESS(3) | 241 MEMACCESS(3) |
(...skipping 10 matching lines...) Expand all Loading... |
252 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 252 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
253 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 253 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
254 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 254 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
255 ); | 255 ); |
256 } | 256 } |
257 | 257 |
258 void I422ToABGRRow_NEON(const uint8* src_y, | 258 void I422ToABGRRow_NEON(const uint8* src_y, |
259 const uint8* src_u, | 259 const uint8* src_u, |
260 const uint8* src_v, | 260 const uint8* src_v, |
261 uint8* dst_abgr, | 261 uint8* dst_abgr, |
262 struct YuvConstants* yuvconstants, | 262 const struct YuvConstants* yuvconstants, |
263 int width) { | 263 int width) { |
264 asm volatile ( | 264 asm volatile ( |
265 YUVTORGB_SETUP | 265 YUVTORGB_SETUP |
266 "1: \n" | 266 "1: \n" |
267 READYUV422 | 267 READYUV422 |
268 YUVTORGB | 268 YUVTORGB |
269 "subs %4, %4, #8 \n" | 269 "subs %4, %4, #8 \n" |
270 "vswp.u8 d20, d22 \n" | 270 "vswp.u8 d20, d22 \n" |
271 "vmov.u8 d23, #255 \n" | 271 "vmov.u8 d23, #255 \n" |
272 MEMACCESS(3) | 272 MEMACCESS(3) |
(...skipping 10 matching lines...) Expand all Loading... |
283 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 283 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
284 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 284 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
285 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 285 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
286 ); | 286 ); |
287 } | 287 } |
288 | 288 |
289 void I422ToRGBARow_NEON(const uint8* src_y, | 289 void I422ToRGBARow_NEON(const uint8* src_y, |
290 const uint8* src_u, | 290 const uint8* src_u, |
291 const uint8* src_v, | 291 const uint8* src_v, |
292 uint8* dst_rgba, | 292 uint8* dst_rgba, |
293 struct YuvConstants* yuvconstants, | 293 const struct YuvConstants* yuvconstants, |
294 int width) { | 294 int width) { |
295 asm volatile ( | 295 asm volatile ( |
296 YUVTORGB_SETUP | 296 YUVTORGB_SETUP |
297 "1: \n" | 297 "1: \n" |
298 READYUV422 | 298 READYUV422 |
299 YUVTORGB | 299 YUVTORGB |
300 "subs %4, %4, #8 \n" | 300 "subs %4, %4, #8 \n" |
301 "vmov.u8 d19, #255 \n" | 301 "vmov.u8 d19, #255 \n" |
302 MEMACCESS(3) | 302 MEMACCESS(3) |
303 "vst4.8 {d19, d20, d21, d22}, [%3]! \n" | 303 "vst4.8 {d19, d20, d21, d22}, [%3]! \n" |
304 "bgt 1b \n" | 304 "bgt 1b \n" |
305 : "+r"(src_y), // %0 | 305 : "+r"(src_y), // %0 |
306 "+r"(src_u), // %1 | 306 "+r"(src_u), // %1 |
307 "+r"(src_v), // %2 | 307 "+r"(src_v), // %2 |
308 "+r"(dst_rgba), // %3 | 308 "+r"(dst_rgba), // %3 |
309 "+r"(width) // %4 | 309 "+r"(width) // %4 |
310 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 310 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
311 [kUVToG]"r"(&yuvconstants->kUVToG), | 311 [kUVToG]"r"(&yuvconstants->kUVToG), |
312 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 312 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
313 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 313 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
314 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 314 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
315 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 315 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
316 ); | 316 ); |
317 } | 317 } |
318 | 318 |
319 void I422ToRGB24Row_NEON(const uint8* src_y, | 319 void I422ToRGB24Row_NEON(const uint8* src_y, |
320 const uint8* src_u, | 320 const uint8* src_u, |
321 const uint8* src_v, | 321 const uint8* src_v, |
322 uint8* dst_rgb24, | 322 uint8* dst_rgb24, |
323 struct YuvConstants* yuvconstants, | 323 const struct YuvConstants* yuvconstants, |
324 int width) { | 324 int width) { |
325 asm volatile ( | 325 asm volatile ( |
326 YUVTORGB_SETUP | 326 YUVTORGB_SETUP |
327 "1: \n" | 327 "1: \n" |
328 READYUV422 | 328 READYUV422 |
329 YUVTORGB | 329 YUVTORGB |
330 "subs %4, %4, #8 \n" | 330 "subs %4, %4, #8 \n" |
331 MEMACCESS(3) | 331 MEMACCESS(3) |
332 "vst3.8 {d20, d21, d22}, [%3]! \n" | 332 "vst3.8 {d20, d21, d22}, [%3]! \n" |
333 "bgt 1b \n" | 333 "bgt 1b \n" |
334 : "+r"(src_y), // %0 | 334 : "+r"(src_y), // %0 |
335 "+r"(src_u), // %1 | 335 "+r"(src_u), // %1 |
336 "+r"(src_v), // %2 | 336 "+r"(src_v), // %2 |
337 "+r"(dst_rgb24), // %3 | 337 "+r"(dst_rgb24), // %3 |
338 "+r"(width) // %4 | 338 "+r"(width) // %4 |
339 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 339 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
340 [kUVToG]"r"(&yuvconstants->kUVToG), | 340 [kUVToG]"r"(&yuvconstants->kUVToG), |
341 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 341 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
342 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 342 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
343 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 343 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
344 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 344 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
345 ); | 345 ); |
346 } | 346 } |
347 | 347 |
348 void I422ToRAWRow_NEON(const uint8* src_y, | 348 void I422ToRAWRow_NEON(const uint8* src_y, |
349 const uint8* src_u, | 349 const uint8* src_u, |
350 const uint8* src_v, | 350 const uint8* src_v, |
351 uint8* dst_raw, | 351 uint8* dst_raw, |
352 struct YuvConstants* yuvconstants, | 352 const struct YuvConstants* yuvconstants, |
353 int width) { | 353 int width) { |
354 asm volatile ( | 354 asm volatile ( |
355 YUVTORGB_SETUP | 355 YUVTORGB_SETUP |
356 "1: \n" | 356 "1: \n" |
357 READYUV422 | 357 READYUV422 |
358 YUVTORGB | 358 YUVTORGB |
359 "subs %4, %4, #8 \n" | 359 "subs %4, %4, #8 \n" |
360 "vswp.u8 d20, d22 \n" | 360 "vswp.u8 d20, d22 \n" |
361 MEMACCESS(3) | 361 MEMACCESS(3) |
362 "vst3.8 {d20, d21, d22}, [%3]! \n" | 362 "vst3.8 {d20, d21, d22}, [%3]! \n" |
(...skipping 21 matching lines...) Expand all Loading... |
384 "vmovl.u8 q10, d22 \n" /* R */ \ | 384 "vmovl.u8 q10, d22 \n" /* R */ \ |
385 "vshl.u16 q9, q9, #5 \n" /* G */ \ | 385 "vshl.u16 q9, q9, #5 \n" /* G */ \ |
386 "vshl.u16 q10, q10, #11 \n" /* R */ \ | 386 "vshl.u16 q10, q10, #11 \n" /* R */ \ |
387 "vorr q0, q8, q9 \n" /* BG */ \ | 387 "vorr q0, q8, q9 \n" /* BG */ \ |
388 "vorr q0, q0, q10 \n" /* BGR */ | 388 "vorr q0, q0, q10 \n" /* BGR */ |
389 | 389 |
390 void I422ToRGB565Row_NEON(const uint8* src_y, | 390 void I422ToRGB565Row_NEON(const uint8* src_y, |
391 const uint8* src_u, | 391 const uint8* src_u, |
392 const uint8* src_v, | 392 const uint8* src_v, |
393 uint8* dst_rgb565, | 393 uint8* dst_rgb565, |
394 struct YuvConstants* yuvconstants, | 394 const struct YuvConstants* yuvconstants, |
395 int width) { | 395 int width) { |
396 asm volatile ( | 396 asm volatile ( |
397 YUVTORGB_SETUP | 397 YUVTORGB_SETUP |
398 "1: \n" | 398 "1: \n" |
399 READYUV422 | 399 READYUV422 |
400 YUVTORGB | 400 YUVTORGB |
401 "subs %4, %4, #8 \n" | 401 "subs %4, %4, #8 \n" |
402 ARGBTORGB565 | 402 ARGBTORGB565 |
403 MEMACCESS(3) | 403 MEMACCESS(3) |
404 "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565. | 404 "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565. |
(...skipping 24 matching lines...) Expand all Loading... |
429 "vshl.u16 q10, q10, #10 \n" /* R */ \ | 429 "vshl.u16 q10, q10, #10 \n" /* R */ \ |
430 "vshl.u16 q11, q11, #15 \n" /* A */ \ | 430 "vshl.u16 q11, q11, #15 \n" /* A */ \ |
431 "vorr q0, q8, q9 \n" /* BG */ \ | 431 "vorr q0, q8, q9 \n" /* BG */ \ |
432 "vorr q1, q10, q11 \n" /* RA */ \ | 432 "vorr q1, q10, q11 \n" /* RA */ \ |
433 "vorr q0, q0, q1 \n" /* BGRA */ | 433 "vorr q0, q0, q1 \n" /* BGRA */ |
434 | 434 |
435 void I422ToARGB1555Row_NEON(const uint8* src_y, | 435 void I422ToARGB1555Row_NEON(const uint8* src_y, |
436 const uint8* src_u, | 436 const uint8* src_u, |
437 const uint8* src_v, | 437 const uint8* src_v, |
438 uint8* dst_argb1555, | 438 uint8* dst_argb1555, |
439 struct YuvConstants* yuvconstants, | 439 const struct YuvConstants* yuvconstants, |
440 int width) { | 440 int width) { |
441 asm volatile ( | 441 asm volatile ( |
442 YUVTORGB_SETUP | 442 YUVTORGB_SETUP |
443 "1: \n" | 443 "1: \n" |
444 READYUV422 | 444 READYUV422 |
445 YUVTORGB | 445 YUVTORGB |
446 "subs %4, %4, #8 \n" | 446 "subs %4, %4, #8 \n" |
447 "vmov.u8 d23, #255 \n" | 447 "vmov.u8 d23, #255 \n" |
448 ARGBTOARGB1555 | 448 ARGBTOARGB1555 |
449 MEMACCESS(3) | 449 MEMACCESS(3) |
(...skipping 19 matching lines...) Expand all Loading... |
469 "vshr.u8 d22, d22, #4 \n" /* R */ \ | 469 "vshr.u8 d22, d22, #4 \n" /* R */ \ |
470 "vbic.32 d23, d23, d4 \n" /* A */ \ | 470 "vbic.32 d23, d23, d4 \n" /* A */ \ |
471 "vorr d0, d20, d21 \n" /* BG */ \ | 471 "vorr d0, d20, d21 \n" /* BG */ \ |
472 "vorr d1, d22, d23 \n" /* RA */ \ | 472 "vorr d1, d22, d23 \n" /* RA */ \ |
473 "vzip.u8 d0, d1 \n" /* BGRA */ | 473 "vzip.u8 d0, d1 \n" /* BGRA */ |
474 | 474 |
475 void I422ToARGB4444Row_NEON(const uint8* src_y, | 475 void I422ToARGB4444Row_NEON(const uint8* src_y, |
476 const uint8* src_u, | 476 const uint8* src_u, |
477 const uint8* src_v, | 477 const uint8* src_v, |
478 uint8* dst_argb4444, | 478 uint8* dst_argb4444, |
479 struct YuvConstants* yuvconstants, | 479 const struct YuvConstants* yuvconstants, |
480 int width) { | 480 int width) { |
481 asm volatile ( | 481 asm volatile ( |
482 YUVTORGB_SETUP | 482 YUVTORGB_SETUP |
483 "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. | 483 "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. |
484 "1: \n" | 484 "1: \n" |
485 READYUV422 | 485 READYUV422 |
486 YUVTORGB | 486 YUVTORGB |
487 "subs %4, %4, #8 \n" | 487 "subs %4, %4, #8 \n" |
488 "vmov.u8 d23, #255 \n" | 488 "vmov.u8 d23, #255 \n" |
489 ARGBTOARGB4444 | 489 ARGBTOARGB4444 |
(...skipping 23 matching lines...) Expand all Loading... |
513 READYUV400 | 513 READYUV400 |
514 YUVTORGB | 514 YUVTORGB |
515 "subs %2, %2, #8 \n" | 515 "subs %2, %2, #8 \n" |
516 "vmov.u8 d23, #255 \n" | 516 "vmov.u8 d23, #255 \n" |
517 MEMACCESS(1) | 517 MEMACCESS(1) |
518 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" | 518 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" |
519 "bgt 1b \n" | 519 "bgt 1b \n" |
520 : "+r"(src_y), // %0 | 520 : "+r"(src_y), // %0 |
521 "+r"(dst_argb), // %1 | 521 "+r"(dst_argb), // %1 |
522 "+r"(width) // %2 | 522 "+r"(width) // %2 |
523 : [kUVToRB]"r"(&kYuvConstants.kUVToRB), | 523 : [kUVToRB]"r"(&kYuvIConstants.kUVToRB), |
524 [kUVToG]"r"(&kYuvConstants.kUVToG), | 524 [kUVToG]"r"(&kYuvIConstants.kUVToG), |
525 [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), | 525 [kUVBiasBGR]"r"(&kYuvIConstants.kUVBiasBGR), |
526 [kYToRgb]"r"(&kYuvConstants.kYToRgb) | 526 [kYToRgb]"r"(&kYuvIConstants.kYToRgb) |
527 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 527 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
528 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 528 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
529 ); | 529 ); |
530 } | 530 } |
531 | 531 |
532 void J400ToARGBRow_NEON(const uint8* src_y, | 532 void J400ToARGBRow_NEON(const uint8* src_y, |
533 uint8* dst_argb, | 533 uint8* dst_argb, |
534 int width) { | 534 int width) { |
535 asm volatile ( | 535 asm volatile ( |
536 "vmov.u8 d23, #255 \n" | 536 "vmov.u8 d23, #255 \n" |
(...skipping 10 matching lines...) Expand all Loading... |
547 "+r"(dst_argb), // %1 | 547 "+r"(dst_argb), // %1 |
548 "+r"(width) // %2 | 548 "+r"(width) // %2 |
549 : | 549 : |
550 : "cc", "memory", "d20", "d21", "d22", "d23" | 550 : "cc", "memory", "d20", "d21", "d22", "d23" |
551 ); | 551 ); |
552 } | 552 } |
553 | 553 |
554 void NV12ToARGBRow_NEON(const uint8* src_y, | 554 void NV12ToARGBRow_NEON(const uint8* src_y, |
555 const uint8* src_uv, | 555 const uint8* src_uv, |
556 uint8* dst_argb, | 556 uint8* dst_argb, |
557 struct YuvConstants* yuvconstants, | 557 const struct YuvConstants* yuvconstants, |
558 int width) { | 558 int width) { |
559 asm volatile ( | 559 asm volatile ( |
560 YUVTORGB_SETUP | 560 YUVTORGB_SETUP |
561 "1: \n" | 561 "1: \n" |
562 READNV12 | 562 READNV12 |
563 YUVTORGB | 563 YUVTORGB |
564 "subs %3, %3, #8 \n" | 564 "subs %3, %3, #8 \n" |
565 "vmov.u8 d23, #255 \n" | 565 "vmov.u8 d23, #255 \n" |
566 MEMACCESS(2) | 566 MEMACCESS(2) |
567 "vst4.8 {d20, d21, d22, d23}, [%2]! \n" | 567 "vst4.8 {d20, d21, d22, d23}, [%2]! \n" |
568 "bgt 1b \n" | 568 "bgt 1b \n" |
569 : "+r"(src_y), // %0 | 569 : "+r"(src_y), // %0 |
570 "+r"(src_uv), // %1 | 570 "+r"(src_uv), // %1 |
571 "+r"(dst_argb), // %2 | 571 "+r"(dst_argb), // %2 |
572 "+r"(width) // %3 | 572 "+r"(width) // %3 |
573 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 573 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
574 [kUVToG]"r"(&yuvconstants->kUVToG), | 574 [kUVToG]"r"(&yuvconstants->kUVToG), |
575 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 575 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
576 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 576 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
577 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 577 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
578 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 578 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
579 ); | 579 ); |
580 } | 580 } |
581 | 581 |
582 void NV21ToARGBRow_NEON(const uint8* src_y, | 582 void NV21ToARGBRow_NEON(const uint8* src_y, |
583 const uint8* src_vu, | 583 const uint8* src_vu, |
584 uint8* dst_argb, | 584 uint8* dst_argb, |
585 struct YuvConstants* yuvconstants, | 585 const struct YuvConstants* yuvconstants, |
586 int width) { | 586 int width) { |
587 asm volatile ( | 587 asm volatile ( |
588 YUVTORGB_SETUP | 588 YUVTORGB_SETUP |
589 "1: \n" | 589 "1: \n" |
590 READNV21 | 590 READNV21 |
591 YUVTORGB | 591 YUVTORGB |
592 "subs %3, %3, #8 \n" | 592 "subs %3, %3, #8 \n" |
593 "vmov.u8 d23, #255 \n" | 593 "vmov.u8 d23, #255 \n" |
594 MEMACCESS(2) | 594 MEMACCESS(2) |
595 "vst4.8 {d20, d21, d22, d23}, [%2]! \n" | 595 "vst4.8 {d20, d21, d22, d23}, [%2]! \n" |
596 "bgt 1b \n" | 596 "bgt 1b \n" |
597 : "+r"(src_y), // %0 | 597 : "+r"(src_y), // %0 |
598 "+r"(src_vu), // %1 | 598 "+r"(src_vu), // %1 |
599 "+r"(dst_argb), // %2 | 599 "+r"(dst_argb), // %2 |
600 "+r"(width) // %3 | 600 "+r"(width) // %3 |
601 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 601 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
602 [kUVToG]"r"(&yuvconstants->kUVToG), | 602 [kUVToG]"r"(&yuvconstants->kUVToG), |
603 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 603 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
604 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 604 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
605 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 605 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
606 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 606 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
607 ); | 607 ); |
608 } | 608 } |
609 | 609 |
610 void NV12ToRGB565Row_NEON(const uint8* src_y, | 610 void NV12ToRGB565Row_NEON(const uint8* src_y, |
611 const uint8* src_uv, | 611 const uint8* src_uv, |
612 uint8* dst_rgb565, | 612 uint8* dst_rgb565, |
613 struct YuvConstants* yuvconstants, | 613 const struct YuvConstants* yuvconstants, |
614 int width) { | 614 int width) { |
615 asm volatile ( | 615 asm volatile ( |
616 YUVTORGB_SETUP | 616 YUVTORGB_SETUP |
617 "1: \n" | 617 "1: \n" |
618 READNV12 | 618 READNV12 |
619 YUVTORGB | 619 YUVTORGB |
620 "subs %3, %3, #8 \n" | 620 "subs %3, %3, #8 \n" |
621 ARGBTORGB565 | 621 ARGBTORGB565 |
622 MEMACCESS(2) | 622 MEMACCESS(2) |
623 "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. | 623 "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. |
624 "bgt 1b \n" | 624 "bgt 1b \n" |
625 : "+r"(src_y), // %0 | 625 : "+r"(src_y), // %0 |
626 "+r"(src_uv), // %1 | 626 "+r"(src_uv), // %1 |
627 "+r"(dst_rgb565), // %2 | 627 "+r"(dst_rgb565), // %2 |
628 "+r"(width) // %3 | 628 "+r"(width) // %3 |
629 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 629 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
630 [kUVToG]"r"(&yuvconstants->kUVToG), | 630 [kUVToG]"r"(&yuvconstants->kUVToG), |
631 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 631 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
632 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 632 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
633 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 633 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
634 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 634 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
635 ); | 635 ); |
636 } | 636 } |
637 | 637 |
638 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, | 638 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, |
639 uint8* dst_argb, | 639 uint8* dst_argb, |
640 struct YuvConstants* yuvconstants, | 640 const struct YuvConstants* yuvconstants, |
641 int width) { | 641 int width) { |
642 asm volatile ( | 642 asm volatile ( |
643 YUVTORGB_SETUP | 643 YUVTORGB_SETUP |
644 "1: \n" | 644 "1: \n" |
645 READYUY2 | 645 READYUY2 |
646 YUVTORGB | 646 YUVTORGB |
647 "subs %2, %2, #8 \n" | 647 "subs %2, %2, #8 \n" |
648 "vmov.u8 d23, #255 \n" | 648 "vmov.u8 d23, #255 \n" |
649 MEMACCESS(1) | 649 MEMACCESS(1) |
650 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" | 650 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" |
651 "bgt 1b \n" | 651 "bgt 1b \n" |
652 : "+r"(src_yuy2), // %0 | 652 : "+r"(src_yuy2), // %0 |
653 "+r"(dst_argb), // %1 | 653 "+r"(dst_argb), // %1 |
654 "+r"(width) // %2 | 654 "+r"(width) // %2 |
655 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 655 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
656 [kUVToG]"r"(&yuvconstants->kUVToG), | 656 [kUVToG]"r"(&yuvconstants->kUVToG), |
657 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 657 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
658 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 658 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
659 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 659 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
660 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 660 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
661 ); | 661 ); |
662 } | 662 } |
663 | 663 |
664 void UYVYToARGBRow_NEON(const uint8* src_uyvy, | 664 void UYVYToARGBRow_NEON(const uint8* src_uyvy, |
665 uint8* dst_argb, | 665 uint8* dst_argb, |
666 struct YuvConstants* yuvconstants, | 666 const struct YuvConstants* yuvconstants, |
667 int width) { | 667 int width) { |
668 asm volatile ( | 668 asm volatile ( |
669 YUVTORGB_SETUP | 669 YUVTORGB_SETUP |
670 "1: \n" | 670 "1: \n" |
671 READUYVY | 671 READUYVY |
672 YUVTORGB | 672 YUVTORGB |
673 "subs %2, %2, #8 \n" | 673 "subs %2, %2, #8 \n" |
674 "vmov.u8 d23, #255 \n" | 674 "vmov.u8 d23, #255 \n" |
675 MEMACCESS(1) | 675 MEMACCESS(1) |
676 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" | 676 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" |
(...skipping 2274 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2951 "r"(6) // %5 | 2951 "r"(6) // %5 |
2952 : "cc", "memory", "q0", "q1" // Clobber List | 2952 : "cc", "memory", "q0", "q1" // Clobber List |
2953 ); | 2953 ); |
2954 } | 2954 } |
2955 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) | 2955 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) |
2956 | 2956 |
2957 #ifdef __cplusplus | 2957 #ifdef __cplusplus |
2958 } // extern "C" | 2958 } // extern "C" |
2959 } // namespace libyuv | 2959 } // namespace libyuv |
2960 #endif | 2960 #endif |
OLD | NEW |