OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
122 "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \ | 122 "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \ |
123 "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \ | 123 "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \ |
124 "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \ | 124 "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \ |
125 "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \ | 125 "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \ |
126 "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \ | 126 "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \ |
127 "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \ | 127 "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \ |
128 "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \ | 128 "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \ |
129 "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ | 129 "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ |
130 "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ | 130 "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ |
131 | 131 |
132 // YUV to RGB conversion constants. | 132 // BT.601 YUV to RGB reference |
| 133 // R = (Y - 16) * 1.164 - V * -1.596 |
| 134 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813 |
| 135 // B = (Y - 16) * 1.164 - U * -2.018 |
| 136 |
133 // Y contribution to R,G,B. Scale and bias. | 137 // Y contribution to R,G,B. Scale and bias. |
| 138 // TODO(fbarchard): Consider moving constants into a common header. |
134 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ | 139 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ |
135 #define YGB 1160 /* 1.164 * 64 * 16 - adjusted for even error distribution */ | 140 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ |
136 | 141 |
137 // U and V contributions to R,G,B. | 142 // U and V contributions to R,G,B. |
138 #define UB -128 /* -min(128, round(2.018 * 64)) */ | 143 #define UB -128 /* max(-128, round(-2.018 * 64)) */ |
139 #define UG 25 /* -round(-0.391 * 64) */ | 144 #define UG 25 /* round(0.391 * 64) */ |
140 #define VG 52 /* -round(-0.813 * 64) */ | 145 #define VG 52 /* round(0.813 * 64) */ |
141 #define VR -102 /* -round(1.596 * 64) */ | 146 #define VR -102 /* round(-1.596 * 64) */ |
142 | 147 |
143 // Bias values to subtract 16 from Y and 128 from U and V. | 148 // Bias values to subtract 16 from Y and 128 from U and V. |
144 #define BB (UB * 128 - YGB) | 149 #define BB (UB * 128 + YGB) |
145 #define BG (UG * 128 + VG * 128 - YGB) | 150 #define BG (UG * 128 + VG * 128 + YGB) |
146 #define BR (VR * 128 - YGB) | 151 #define BR (VR * 128 + YGB) |
147 | 152 |
148 static vec16 kUVBiasBGR = { BB, BG, BR, 0, 0, 0, 0, 0 }; | 153 YuvConstantsNEON SIMD_ALIGNED(kYuvConstantsNEON) = { |
149 static vec32 kYToRgb = { 0x0101 * YG, 0, 0, 0 }; | 154 { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 }, |
| 155 { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 }, |
| 156 { BB, BG, BR, 0, 0, 0, 0, 0 }, |
| 157 { 0x0101 * YG, 0, 0, 0 } |
| 158 }; |
150 | 159 |
151 #undef YG | 160 #undef YG |
152 #undef YGB | 161 #undef YGB |
153 #undef UB | 162 #undef UB |
154 #undef UG | 163 #undef UG |
155 #undef VG | 164 #undef VG |
156 #undef VR | 165 #undef VR |
157 #undef BB | 166 #undef BB |
158 #undef BG | 167 #undef BG |
159 #undef BR | 168 #undef BR |
160 | 169 |
| 170 // TODO(fbarchard): Use structure for constants like 32 bit code. |
161 #define RGBTOUV_SETUP_REG \ | 171 #define RGBTOUV_SETUP_REG \ |
162 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ | 172 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ |
163 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ | 173 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ |
164 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ | 174 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ |
165 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ | 175 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ |
166 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ | 176 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ |
167 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ | 177 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ |
168 | 178 |
169 | |
170 #ifdef HAS_I444TOARGBROW_NEON | 179 #ifdef HAS_I444TOARGBROW_NEON |
171 void I444ToARGBRow_NEON(const uint8* src_y, | 180 void I444ToARGBRow_NEON(const uint8* src_y, |
172 const uint8* src_u, | 181 const uint8* src_u, |
173 const uint8* src_v, | 182 const uint8* src_v, |
174 uint8* dst_argb, | 183 uint8* dst_argb, |
175 int width) { | 184 int width) { |
176 asm volatile ( | 185 asm volatile ( |
177 YUV422TORGB_SETUP_REG | 186 YUV422TORGB_SETUP_REG |
178 "1: \n" | 187 "1: \n" |
179 READYUV444 | 188 READYUV444 |
180 YUV422TORGB(v22, v21, v20) | 189 YUV422TORGB(v22, v21, v20) |
181 "subs %w4, %w4, #8 \n" | 190 "subs %w4, %w4, #8 \n" |
182 "movi v23.8b, #255 \n" /* A */ | 191 "movi v23.8b, #255 \n" /* A */ |
183 MEMACCESS(3) | 192 MEMACCESS(3) |
184 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 193 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
185 "b.gt 1b \n" | 194 "b.gt 1b \n" |
186 : "+r"(src_y), // %0 | 195 : "+r"(src_y), // %0 |
187 "+r"(src_u), // %1 | 196 "+r"(src_u), // %1 |
188 "+r"(src_v), // %2 | 197 "+r"(src_v), // %2 |
189 "+r"(dst_argb), // %3 | 198 "+r"(dst_argb), // %3 |
190 "+r"(width) // %4 | 199 "+r"(width) // %4 |
191 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 200 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
192 [kYToRgb]"r"(&kYToRgb) | 201 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
193 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 202 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
194 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 203 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
195 ); | 204 ); |
196 } | 205 } |
197 #endif // HAS_I444TOARGBROW_NEON | 206 #endif // HAS_I444TOARGBROW_NEON |
198 | 207 |
199 #ifdef HAS_I422TOARGBROW_NEON | 208 #ifdef HAS_I422TOARGBROW_NEON |
200 void I422ToARGBRow_NEON(const uint8* src_y, | 209 void I422ToARGBRow_NEON(const uint8* src_y, |
201 const uint8* src_u, | 210 const uint8* src_u, |
202 const uint8* src_v, | 211 const uint8* src_v, |
203 uint8* dst_argb, | 212 uint8* dst_argb, |
204 int width) { | 213 int width) { |
205 asm volatile ( | 214 asm volatile ( |
206 YUV422TORGB_SETUP_REG | 215 YUV422TORGB_SETUP_REG |
207 "1: \n" | 216 "1: \n" |
208 READYUV422 | 217 READYUV422 |
209 YUV422TORGB(v22, v21, v20) | 218 YUV422TORGB(v22, v21, v20) |
210 "subs %w4, %w4, #8 \n" | 219 "subs %w4, %w4, #8 \n" |
211 "movi v23.8b, #255 \n" /* A */ | 220 "movi v23.8b, #255 \n" /* A */ |
212 MEMACCESS(3) | 221 MEMACCESS(3) |
213 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 222 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
214 "b.gt 1b \n" | 223 "b.gt 1b \n" |
215 : "+r"(src_y), // %0 | 224 : "+r"(src_y), // %0 |
216 "+r"(src_u), // %1 | 225 "+r"(src_u), // %1 |
217 "+r"(src_v), // %2 | 226 "+r"(src_v), // %2 |
218 "+r"(dst_argb), // %3 | 227 "+r"(dst_argb), // %3 |
219 "+r"(width) // %4 | 228 "+r"(width) // %4 |
220 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 229 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
221 [kYToRgb]"r"(&kYToRgb) | 230 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
222 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 231 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
223 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 232 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
224 ); | 233 ); |
225 } | 234 } |
226 #endif // HAS_I422TOARGBROW_NEON | 235 #endif // HAS_I422TOARGBROW_NEON |
227 | 236 |
228 #ifdef HAS_I411TOARGBROW_NEON | 237 #ifdef HAS_I411TOARGBROW_NEON |
229 void I411ToARGBRow_NEON(const uint8* src_y, | 238 void I411ToARGBRow_NEON(const uint8* src_y, |
230 const uint8* src_u, | 239 const uint8* src_u, |
231 const uint8* src_v, | 240 const uint8* src_v, |
232 uint8* dst_argb, | 241 uint8* dst_argb, |
233 int width) { | 242 int width) { |
234 asm volatile ( | 243 asm volatile ( |
235 YUV422TORGB_SETUP_REG | 244 YUV422TORGB_SETUP_REG |
236 "1: \n" | 245 "1: \n" |
237 READYUV411 | 246 READYUV411 |
238 YUV422TORGB(v22, v21, v20) | 247 YUV422TORGB(v22, v21, v20) |
239 "subs %w4, %w4, #8 \n" | 248 "subs %w4, %w4, #8 \n" |
240 "movi v23.8b, #255 \n" /* A */ | 249 "movi v23.8b, #255 \n" /* A */ |
241 MEMACCESS(3) | 250 MEMACCESS(3) |
242 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 251 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
243 "b.gt 1b \n" | 252 "b.gt 1b \n" |
244 : "+r"(src_y), // %0 | 253 : "+r"(src_y), // %0 |
245 "+r"(src_u), // %1 | 254 "+r"(src_u), // %1 |
246 "+r"(src_v), // %2 | 255 "+r"(src_v), // %2 |
247 "+r"(dst_argb), // %3 | 256 "+r"(dst_argb), // %3 |
248 "+r"(width) // %4 | 257 "+r"(width) // %4 |
249 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 258 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
250 [kYToRgb]"r"(&kYToRgb) | 259 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
251 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 260 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
252 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 261 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
253 ); | 262 ); |
254 } | 263 } |
255 #endif // HAS_I411TOARGBROW_NEON | 264 #endif // HAS_I411TOARGBROW_NEON |
256 | 265 |
257 #ifdef HAS_I422TOBGRAROW_NEON | 266 #ifdef HAS_I422TOBGRAROW_NEON |
258 void I422ToBGRARow_NEON(const uint8* src_y, | 267 void I422ToBGRARow_NEON(const uint8* src_y, |
259 const uint8* src_u, | 268 const uint8* src_u, |
260 const uint8* src_v, | 269 const uint8* src_v, |
261 uint8* dst_bgra, | 270 uint8* dst_bgra, |
262 int width) { | 271 int width) { |
263 asm volatile ( | 272 asm volatile ( |
264 YUV422TORGB_SETUP_REG | 273 YUV422TORGB_SETUP_REG |
265 "1: \n" | 274 "1: \n" |
266 READYUV422 | 275 READYUV422 |
267 YUV422TORGB(v21, v22, v23) | 276 YUV422TORGB(v21, v22, v23) |
268 "subs %w4, %w4, #8 \n" | 277 "subs %w4, %w4, #8 \n" |
269 "movi v20.8b, #255 \n" /* A */ | 278 "movi v20.8b, #255 \n" /* A */ |
270 MEMACCESS(3) | 279 MEMACCESS(3) |
271 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 280 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
272 "b.gt 1b \n" | 281 "b.gt 1b \n" |
273 : "+r"(src_y), // %0 | 282 : "+r"(src_y), // %0 |
274 "+r"(src_u), // %1 | 283 "+r"(src_u), // %1 |
275 "+r"(src_v), // %2 | 284 "+r"(src_v), // %2 |
276 "+r"(dst_bgra), // %3 | 285 "+r"(dst_bgra), // %3 |
277 "+r"(width) // %4 | 286 "+r"(width) // %4 |
278 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 287 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
279 [kYToRgb]"r"(&kYToRgb) | 288 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
280 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 289 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
281 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 290 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
282 ); | 291 ); |
283 } | 292 } |
284 #endif // HAS_I422TOBGRAROW_NEON | 293 #endif // HAS_I422TOBGRAROW_NEON |
285 | 294 |
286 #ifdef HAS_I422TOABGRROW_NEON | 295 #ifdef HAS_I422TOABGRROW_NEON |
287 void I422ToABGRRow_NEON(const uint8* src_y, | 296 void I422ToABGRRow_NEON(const uint8* src_y, |
288 const uint8* src_u, | 297 const uint8* src_u, |
289 const uint8* src_v, | 298 const uint8* src_v, |
290 uint8* dst_abgr, | 299 uint8* dst_abgr, |
291 int width) { | 300 int width) { |
292 asm volatile ( | 301 asm volatile ( |
293 YUV422TORGB_SETUP_REG | 302 YUV422TORGB_SETUP_REG |
294 "1: \n" | 303 "1: \n" |
295 READYUV422 | 304 READYUV422 |
296 YUV422TORGB(v20, v21, v22) | 305 YUV422TORGB(v20, v21, v22) |
297 "subs %w4, %w4, #8 \n" | 306 "subs %w4, %w4, #8 \n" |
298 "movi v23.8b, #255 \n" /* A */ | 307 "movi v23.8b, #255 \n" /* A */ |
299 MEMACCESS(3) | 308 MEMACCESS(3) |
300 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 309 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
301 "b.gt 1b \n" | 310 "b.gt 1b \n" |
302 : "+r"(src_y), // %0 | 311 : "+r"(src_y), // %0 |
303 "+r"(src_u), // %1 | 312 "+r"(src_u), // %1 |
304 "+r"(src_v), // %2 | 313 "+r"(src_v), // %2 |
305 "+r"(dst_abgr), // %3 | 314 "+r"(dst_abgr), // %3 |
306 "+r"(width) // %4 | 315 "+r"(width) // %4 |
307 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 316 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
308 [kYToRgb]"r"(&kYToRgb) | 317 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
309 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 318 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
310 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 319 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
311 ); | 320 ); |
312 } | 321 } |
313 #endif // HAS_I422TOABGRROW_NEON | 322 #endif // HAS_I422TOABGRROW_NEON |
314 | 323 |
315 #ifdef HAS_I422TORGBAROW_NEON | 324 #ifdef HAS_I422TORGBAROW_NEON |
316 void I422ToRGBARow_NEON(const uint8* src_y, | 325 void I422ToRGBARow_NEON(const uint8* src_y, |
317 const uint8* src_u, | 326 const uint8* src_u, |
318 const uint8* src_v, | 327 const uint8* src_v, |
319 uint8* dst_rgba, | 328 uint8* dst_rgba, |
320 int width) { | 329 int width) { |
321 asm volatile ( | 330 asm volatile ( |
322 YUV422TORGB_SETUP_REG | 331 YUV422TORGB_SETUP_REG |
323 "1: \n" | 332 "1: \n" |
324 READYUV422 | 333 READYUV422 |
325 YUV422TORGB(v23, v22, v21) | 334 YUV422TORGB(v23, v22, v21) |
326 "subs %w4, %w4, #8 \n" | 335 "subs %w4, %w4, #8 \n" |
327 "movi v20.8b, #255 \n" /* A */ | 336 "movi v20.8b, #255 \n" /* A */ |
328 MEMACCESS(3) | 337 MEMACCESS(3) |
329 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 338 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
330 "b.gt 1b \n" | 339 "b.gt 1b \n" |
331 : "+r"(src_y), // %0 | 340 : "+r"(src_y), // %0 |
332 "+r"(src_u), // %1 | 341 "+r"(src_u), // %1 |
333 "+r"(src_v), // %2 | 342 "+r"(src_v), // %2 |
334 "+r"(dst_rgba), // %3 | 343 "+r"(dst_rgba), // %3 |
335 "+r"(width) // %4 | 344 "+r"(width) // %4 |
336 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 345 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
337 [kYToRgb]"r"(&kYToRgb) | 346 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
338 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 347 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
339 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 348 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
340 ); | 349 ); |
341 } | 350 } |
342 #endif // HAS_I422TORGBAROW_NEON | 351 #endif // HAS_I422TORGBAROW_NEON |
343 | 352 |
344 #ifdef HAS_I422TORGB24ROW_NEON | 353 #ifdef HAS_I422TORGB24ROW_NEON |
345 void I422ToRGB24Row_NEON(const uint8* src_y, | 354 void I422ToRGB24Row_NEON(const uint8* src_y, |
346 const uint8* src_u, | 355 const uint8* src_u, |
347 const uint8* src_v, | 356 const uint8* src_v, |
348 uint8* dst_rgb24, | 357 uint8* dst_rgb24, |
349 int width) { | 358 int width) { |
350 asm volatile ( | 359 asm volatile ( |
351 YUV422TORGB_SETUP_REG | 360 YUV422TORGB_SETUP_REG |
352 "1: \n" | 361 "1: \n" |
353 READYUV422 | 362 READYUV422 |
354 YUV422TORGB(v22, v21, v20) | 363 YUV422TORGB(v22, v21, v20) |
355 "subs %w4, %w4, #8 \n" | 364 "subs %w4, %w4, #8 \n" |
356 MEMACCESS(3) | 365 MEMACCESS(3) |
357 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" | 366 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" |
358 "b.gt 1b \n" | 367 "b.gt 1b \n" |
359 : "+r"(src_y), // %0 | 368 : "+r"(src_y), // %0 |
360 "+r"(src_u), // %1 | 369 "+r"(src_u), // %1 |
361 "+r"(src_v), // %2 | 370 "+r"(src_v), // %2 |
362 "+r"(dst_rgb24), // %3 | 371 "+r"(dst_rgb24), // %3 |
363 "+r"(width) // %4 | 372 "+r"(width) // %4 |
364 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 373 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
365 [kYToRgb]"r"(&kYToRgb) | 374 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
366 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 375 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
367 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 376 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
368 ); | 377 ); |
369 } | 378 } |
370 #endif // HAS_I422TORGB24ROW_NEON | 379 #endif // HAS_I422TORGB24ROW_NEON |
371 | 380 |
372 #ifdef HAS_I422TORAWROW_NEON | 381 #ifdef HAS_I422TORAWROW_NEON |
373 void I422ToRAWRow_NEON(const uint8* src_y, | 382 void I422ToRAWRow_NEON(const uint8* src_y, |
374 const uint8* src_u, | 383 const uint8* src_u, |
375 const uint8* src_v, | 384 const uint8* src_v, |
376 uint8* dst_raw, | 385 uint8* dst_raw, |
377 int width) { | 386 int width) { |
378 asm volatile ( | 387 asm volatile ( |
379 YUV422TORGB_SETUP_REG | 388 YUV422TORGB_SETUP_REG |
380 "1: \n" | 389 "1: \n" |
381 READYUV422 | 390 READYUV422 |
382 YUV422TORGB(v20, v21, v22) | 391 YUV422TORGB(v20, v21, v22) |
383 "subs %w4, %w4, #8 \n" | 392 "subs %w4, %w4, #8 \n" |
384 MEMACCESS(3) | 393 MEMACCESS(3) |
385 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" | 394 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" |
386 "b.gt 1b \n" | 395 "b.gt 1b \n" |
387 : "+r"(src_y), // %0 | 396 : "+r"(src_y), // %0 |
388 "+r"(src_u), // %1 | 397 "+r"(src_u), // %1 |
389 "+r"(src_v), // %2 | 398 "+r"(src_v), // %2 |
390 "+r"(dst_raw), // %3 | 399 "+r"(dst_raw), // %3 |
391 "+r"(width) // %4 | 400 "+r"(width) // %4 |
392 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 401 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
393 [kYToRgb]"r"(&kYToRgb) | 402 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
394 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 403 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
395 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 404 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
396 ); | 405 ); |
397 } | 406 } |
398 #endif // HAS_I422TORAWROW_NEON | 407 #endif // HAS_I422TORAWROW_NEON |
399 | 408 |
400 #define ARGBTORGB565 \ | 409 #define ARGBTORGB565 \ |
401 "shll v0.8h, v22.8b, #8 \n" /* R */ \ | 410 "shll v0.8h, v22.8b, #8 \n" /* R */ \ |
402 "shll v20.8h, v20.8b, #8 \n" /* B */ \ | 411 "shll v20.8h, v20.8b, #8 \n" /* B */ \ |
403 "shll v21.8h, v21.8b, #8 \n" /* G */ \ | 412 "shll v21.8h, v21.8b, #8 \n" /* G */ \ |
(...skipping 14 matching lines...) Expand all Loading... |
418 "subs %w4, %w4, #8 \n" | 427 "subs %w4, %w4, #8 \n" |
419 ARGBTORGB565 | 428 ARGBTORGB565 |
420 MEMACCESS(3) | 429 MEMACCESS(3) |
421 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. | 430 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. |
422 "b.gt 1b \n" | 431 "b.gt 1b \n" |
423 : "+r"(src_y), // %0 | 432 : "+r"(src_y), // %0 |
424 "+r"(src_u), // %1 | 433 "+r"(src_u), // %1 |
425 "+r"(src_v), // %2 | 434 "+r"(src_v), // %2 |
426 "+r"(dst_rgb565), // %3 | 435 "+r"(dst_rgb565), // %3 |
427 "+r"(width) // %4 | 436 "+r"(width) // %4 |
428 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 437 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
429 [kYToRgb]"r"(&kYToRgb) | 438 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
430 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 439 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
431 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 440 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
432 ); | 441 ); |
433 } | 442 } |
434 #endif // HAS_I422TORGB565ROW_NEON | 443 #endif // HAS_I422TORGB565ROW_NEON |
435 | 444 |
436 #define ARGBTOARGB1555 \ | 445 #define ARGBTOARGB1555 \ |
437 "shll v0.8h, v23.8b, #8 \n" /* A */ \ | 446 "shll v0.8h, v23.8b, #8 \n" /* A */ \ |
438 "shll v22.8h, v22.8b, #8 \n" /* R */ \ | 447 "shll v22.8h, v22.8b, #8 \n" /* R */ \ |
439 "shll v20.8h, v20.8b, #8 \n" /* B */ \ | 448 "shll v20.8h, v20.8b, #8 \n" /* B */ \ |
(...skipping 17 matching lines...) Expand all Loading... |
457 "movi v23.8b, #255 \n" | 466 "movi v23.8b, #255 \n" |
458 ARGBTOARGB1555 | 467 ARGBTOARGB1555 |
459 MEMACCESS(3) | 468 MEMACCESS(3) |
460 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. | 469 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. |
461 "b.gt 1b \n" | 470 "b.gt 1b \n" |
462 : "+r"(src_y), // %0 | 471 : "+r"(src_y), // %0 |
463 "+r"(src_u), // %1 | 472 "+r"(src_u), // %1 |
464 "+r"(src_v), // %2 | 473 "+r"(src_v), // %2 |
465 "+r"(dst_argb1555), // %3 | 474 "+r"(dst_argb1555), // %3 |
466 "+r"(width) // %4 | 475 "+r"(width) // %4 |
467 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 476 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
468 [kYToRgb]"r"(&kYToRgb) | 477 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
469 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 478 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
470 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 479 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
471 ); | 480 ); |
472 } | 481 } |
473 #endif // HAS_I422TOARGB1555ROW_NEON | 482 #endif // HAS_I422TOARGB1555ROW_NEON |
474 | 483 |
475 #define ARGBTOARGB4444 \ | 484 #define ARGBTOARGB4444 \ |
476 /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \ | 485 /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \ |
477 "ushr v20.8b, v20.8b, #4 \n" /* B */ \ | 486 "ushr v20.8b, v20.8b, #4 \n" /* B */ \ |
478 "bic v21.8b, v21.8b, v4.8b \n" /* G */ \ | 487 "bic v21.8b, v21.8b, v4.8b \n" /* G */ \ |
(...skipping 19 matching lines...) Expand all Loading... |
498 "movi v23.8b, #255 \n" | 507 "movi v23.8b, #255 \n" |
499 ARGBTOARGB4444 | 508 ARGBTOARGB4444 |
500 MEMACCESS(3) | 509 MEMACCESS(3) |
501 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444. | 510 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444. |
502 "b.gt 1b \n" | 511 "b.gt 1b \n" |
503 : "+r"(src_y), // %0 | 512 : "+r"(src_y), // %0 |
504 "+r"(src_u), // %1 | 513 "+r"(src_u), // %1 |
505 "+r"(src_v), // %2 | 514 "+r"(src_v), // %2 |
506 "+r"(dst_argb4444), // %3 | 515 "+r"(dst_argb4444), // %3 |
507 "+r"(width) // %4 | 516 "+r"(width) // %4 |
508 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 517 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
509 [kYToRgb]"r"(&kYToRgb) | 518 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
510 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 519 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
511 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 520 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
512 ); | 521 ); |
513 } | 522 } |
514 #endif // HAS_I422TOARGB4444ROW_NEON | 523 #endif // HAS_I422TOARGB4444ROW_NEON |
515 | 524 |
516 #ifdef HAS_I400TOARGBROW_NEON | 525 #ifdef HAS_I400TOARGBROW_NEON |
517 void I400ToARGBRow_NEON(const uint8* src_y, | 526 void I400ToARGBRow_NEON(const uint8* src_y, |
518 uint8* dst_argb, | 527 uint8* dst_argb, |
519 int width) { | 528 int width) { |
520 int64 width64 = (int64)(width); | 529 int64 width64 = (int64)(width); |
521 asm volatile ( | 530 asm volatile ( |
522 YUV422TORGB_SETUP_REG | 531 YUV422TORGB_SETUP_REG |
523 "1: \n" | 532 "1: \n" |
524 READYUV400 | 533 READYUV400 |
525 YUV422TORGB(v22, v21, v20) | 534 YUV422TORGB(v22, v21, v20) |
526 "subs %w2, %w2, #8 \n" | 535 "subs %w2, %w2, #8 \n" |
527 "movi v23.8b, #255 \n" | 536 "movi v23.8b, #255 \n" |
528 MEMACCESS(1) | 537 MEMACCESS(1) |
529 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" | 538 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" |
530 "b.gt 1b \n" | 539 "b.gt 1b \n" |
531 : "+r"(src_y), // %0 | 540 : "+r"(src_y), // %0 |
532 "+r"(dst_argb), // %1 | 541 "+r"(dst_argb), // %1 |
533 "+r"(width64) // %2 | 542 "+r"(width64) // %2 |
534 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 543 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
535 [kYToRgb]"r"(&kYToRgb) | 544 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
536 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 545 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
537 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 546 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
538 ); | 547 ); |
539 } | 548 } |
540 #endif // HAS_I400TOARGBROW_NEON | 549 #endif // HAS_I400TOARGBROW_NEON |
541 | 550 |
542 #ifdef HAS_J400TOARGBROW_NEON | 551 #ifdef HAS_J400TOARGBROW_NEON |
543 void J400ToARGBRow_NEON(const uint8* src_y, | 552 void J400ToARGBRow_NEON(const uint8* src_y, |
544 uint8* dst_argb, | 553 uint8* dst_argb, |
545 int width) { | 554 int width) { |
(...skipping 29 matching lines...) Expand all Loading... |
575 YUV422TORGB(v22, v21, v20) | 584 YUV422TORGB(v22, v21, v20) |
576 "subs %w3, %w3, #8 \n" | 585 "subs %w3, %w3, #8 \n" |
577 "movi v23.8b, #255 \n" | 586 "movi v23.8b, #255 \n" |
578 MEMACCESS(2) | 587 MEMACCESS(2) |
579 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" | 588 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" |
580 "b.gt 1b \n" | 589 "b.gt 1b \n" |
581 : "+r"(src_y), // %0 | 590 : "+r"(src_y), // %0 |
582 "+r"(src_uv), // %1 | 591 "+r"(src_uv), // %1 |
583 "+r"(dst_argb), // %2 | 592 "+r"(dst_argb), // %2 |
584 "+r"(width) // %3 | 593 "+r"(width) // %3 |
585 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 594 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
586 [kYToRgb]"r"(&kYToRgb) | 595 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
587 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 596 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
588 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 597 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
589 ); | 598 ); |
590 } | 599 } |
591 #endif // HAS_NV12TOARGBROW_NEON | 600 #endif // HAS_NV12TOARGBROW_NEON |
592 | 601 |
593 #ifdef HAS_NV21TOARGBROW_NEON | 602 #ifdef HAS_NV21TOARGBROW_NEON |
594 void NV21ToARGBRow_NEON(const uint8* src_y, | 603 void NV21ToARGBRow_NEON(const uint8* src_y, |
595 const uint8* src_uv, | 604 const uint8* src_uv, |
596 uint8* dst_argb, | 605 uint8* dst_argb, |
597 int width) { | 606 int width) { |
598 asm volatile ( | 607 asm volatile ( |
599 YUV422TORGB_SETUP_REG | 608 YUV422TORGB_SETUP_REG |
600 "1: \n" | 609 "1: \n" |
601 READNV21 | 610 READNV21 |
602 YUV422TORGB(v22, v21, v20) | 611 YUV422TORGB(v22, v21, v20) |
603 "subs %w3, %w3, #8 \n" | 612 "subs %w3, %w3, #8 \n" |
604 "movi v23.8b, #255 \n" | 613 "movi v23.8b, #255 \n" |
605 MEMACCESS(2) | 614 MEMACCESS(2) |
606 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" | 615 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" |
607 "b.gt 1b \n" | 616 "b.gt 1b \n" |
608 : "+r"(src_y), // %0 | 617 : "+r"(src_y), // %0 |
609 "+r"(src_uv), // %1 | 618 "+r"(src_uv), // %1 |
610 "+r"(dst_argb), // %2 | 619 "+r"(dst_argb), // %2 |
611 "+r"(width) // %3 | 620 "+r"(width) // %3 |
612 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 621 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
613 [kYToRgb]"r"(&kYToRgb) | 622 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
614 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 623 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
615 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 624 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
616 ); | 625 ); |
617 } | 626 } |
618 #endif // HAS_NV21TOARGBROW_NEON | 627 #endif // HAS_NV21TOARGBROW_NEON |
619 | 628 |
620 #ifdef HAS_NV12TORGB565ROW_NEON | 629 #ifdef HAS_NV12TORGB565ROW_NEON |
621 void NV12ToRGB565Row_NEON(const uint8* src_y, | 630 void NV12ToRGB565Row_NEON(const uint8* src_y, |
622 const uint8* src_uv, | 631 const uint8* src_uv, |
623 uint8* dst_rgb565, | 632 uint8* dst_rgb565, |
624 int width) { | 633 int width) { |
625 asm volatile ( | 634 asm volatile ( |
626 YUV422TORGB_SETUP_REG | 635 YUV422TORGB_SETUP_REG |
627 "1: \n" | 636 "1: \n" |
628 READNV12 | 637 READNV12 |
629 YUV422TORGB(v22, v21, v20) | 638 YUV422TORGB(v22, v21, v20) |
630 "subs %w3, %w3, #8 \n" | 639 "subs %w3, %w3, #8 \n" |
631 ARGBTORGB565 | 640 ARGBTORGB565 |
632 MEMACCESS(2) | 641 MEMACCESS(2) |
633 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. | 642 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. |
634 "b.gt 1b \n" | 643 "b.gt 1b \n" |
635 : "+r"(src_y), // %0 | 644 : "+r"(src_y), // %0 |
636 "+r"(src_uv), // %1 | 645 "+r"(src_uv), // %1 |
637 "+r"(dst_rgb565), // %2 | 646 "+r"(dst_rgb565), // %2 |
638 "+r"(width) // %3 | 647 "+r"(width) // %3 |
639 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 648 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
640 [kYToRgb]"r"(&kYToRgb) | 649 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
641 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 650 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
642 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 651 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
643 ); | 652 ); |
644 } | 653 } |
645 #endif // HAS_NV12TORGB565ROW_NEON | 654 #endif // HAS_NV12TORGB565ROW_NEON |
646 | 655 |
647 #ifdef HAS_NV21TORGB565ROW_NEON | 656 #ifdef HAS_NV21TORGB565ROW_NEON |
648 void NV21ToRGB565Row_NEON(const uint8* src_y, | 657 void NV21ToRGB565Row_NEON(const uint8* src_y, |
649 const uint8* src_uv, | 658 const uint8* src_uv, |
650 uint8* dst_rgb565, | 659 uint8* dst_rgb565, |
651 int width) { | 660 int width) { |
652 asm volatile ( | 661 asm volatile ( |
653 YUV422TORGB_SETUP_REG | 662 YUV422TORGB_SETUP_REG |
654 "1: \n" | 663 "1: \n" |
655 READNV21 | 664 READNV21 |
656 YUV422TORGB(v22, v21, v20) | 665 YUV422TORGB(v22, v21, v20) |
657 "subs %w3, %w3, #8 \n" | 666 "subs %w3, %w3, #8 \n" |
658 ARGBTORGB565 | 667 ARGBTORGB565 |
659 MEMACCESS(2) | 668 MEMACCESS(2) |
660 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. | 669 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. |
661 "b.gt 1b \n" | 670 "b.gt 1b \n" |
662 : "+r"(src_y), // %0 | 671 : "+r"(src_y), // %0 |
663 "+r"(src_uv), // %1 | 672 "+r"(src_uv), // %1 |
664 "+r"(dst_rgb565), // %2 | 673 "+r"(dst_rgb565), // %2 |
665 "+r"(width) // %3 | 674 "+r"(width) // %3 |
666 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 675 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
667 [kYToRgb]"r"(&kYToRgb) | 676 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
668 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 677 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
669 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 678 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
670 ); | 679 ); |
671 } | 680 } |
672 #endif // HAS_NV21TORGB565ROW_NEON | 681 #endif // HAS_NV21TORGB565ROW_NEON |
673 | 682 |
674 #ifdef HAS_YUY2TOARGBROW_NEON | 683 #ifdef HAS_YUY2TOARGBROW_NEON |
675 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, | 684 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, |
676 uint8* dst_argb, | 685 uint8* dst_argb, |
677 int width) { | 686 int width) { |
678 int64 width64 = (int64)(width); | 687 int64 width64 = (int64)(width); |
679 asm volatile ( | 688 asm volatile ( |
680 YUV422TORGB_SETUP_REG | 689 YUV422TORGB_SETUP_REG |
681 "1: \n" | 690 "1: \n" |
682 READYUY2 | 691 READYUY2 |
683 YUV422TORGB(v22, v21, v20) | 692 YUV422TORGB(v22, v21, v20) |
684 "subs %w2, %w2, #8 \n" | 693 "subs %w2, %w2, #8 \n" |
685 "movi v23.8b, #255 \n" | 694 "movi v23.8b, #255 \n" |
686 MEMACCESS(1) | 695 MEMACCESS(1) |
687 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" | 696 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" |
688 "b.gt 1b \n" | 697 "b.gt 1b \n" |
689 : "+r"(src_yuy2), // %0 | 698 : "+r"(src_yuy2), // %0 |
690 "+r"(dst_argb), // %1 | 699 "+r"(dst_argb), // %1 |
691 "+r"(width64) // %2 | 700 "+r"(width64) // %2 |
692 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 701 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
693 [kYToRgb]"r"(&kYToRgb) | 702 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
694 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 703 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
695 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 704 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
696 ); | 705 ); |
697 } | 706 } |
698 #endif // HAS_YUY2TOARGBROW_NEON | 707 #endif // HAS_YUY2TOARGBROW_NEON |
699 | 708 |
700 #ifdef HAS_UYVYTOARGBROW_NEON | 709 #ifdef HAS_UYVYTOARGBROW_NEON |
701 void UYVYToARGBRow_NEON(const uint8* src_uyvy, | 710 void UYVYToARGBRow_NEON(const uint8* src_uyvy, |
702 uint8* dst_argb, | 711 uint8* dst_argb, |
703 int width) { | 712 int width) { |
704 int64 width64 = (int64)(width); | 713 int64 width64 = (int64)(width); |
705 asm volatile ( | 714 asm volatile ( |
706 YUV422TORGB_SETUP_REG | 715 YUV422TORGB_SETUP_REG |
707 "1: \n" | 716 "1: \n" |
708 READUYVY | 717 READUYVY |
709 YUV422TORGB(v22, v21, v20) | 718 YUV422TORGB(v22, v21, v20) |
710 "subs %w2, %w2, #8 \n" | 719 "subs %w2, %w2, #8 \n" |
711 "movi v23.8b, #255 \n" | 720 "movi v23.8b, #255 \n" |
712 MEMACCESS(1) | 721 MEMACCESS(1) |
713 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" | 722 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" |
714 "b.gt 1b \n" | 723 "b.gt 1b \n" |
715 : "+r"(src_uyvy), // %0 | 724 : "+r"(src_uyvy), // %0 |
716 "+r"(dst_argb), // %1 | 725 "+r"(dst_argb), // %1 |
717 "+r"(width64) // %2 | 726 "+r"(width64) // %2 |
718 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 727 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
719 [kYToRgb]"r"(&kYToRgb) | 728 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
720 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 729 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
721 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 730 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
722 ); | 731 ); |
723 } | 732 } |
724 #endif // HAS_UYVYTOARGBROW_NEON | 733 #endif // HAS_UYVYTOARGBROW_NEON |
725 | 734 |
726 // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. | 735 // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. |
727 #ifdef HAS_SPLITUVROW_NEON | 736 #ifdef HAS_SPLITUVROW_NEON |
728 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, | 737 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, |
729 int width) { | 738 int width) { |
(...skipping 2348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3078 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List | 3087 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List |
3079 ); | 3088 ); |
3080 } | 3089 } |
3081 #endif // HAS_SOBELYROW_NEON | 3090 #endif // HAS_SOBELYROW_NEON |
3082 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) | 3091 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) |
3083 | 3092 |
3084 #ifdef __cplusplus | 3093 #ifdef __cplusplus |
3085 } // extern "C" | 3094 } // extern "C" |
3086 } // namespace libyuv | 3095 } // namespace libyuv |
3087 #endif | 3096 #endif |
OLD | NEW |