OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkBlitRow_opts_arm_neon.h" | 8 #include "SkBlitRow_opts_arm_neon.h" |
9 | 9 |
10 #include "SkBlitMask.h" | 10 #include "SkBlitMask.h" |
(...skipping 372 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
383 void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, | 383 void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, |
384 const SkPMColor* SK_RESTRICT src, int count, | 384 const SkPMColor* SK_RESTRICT src, int count, |
385 U8CPU alpha, int /*x*/, int /*y*/) { | 385 U8CPU alpha, int /*x*/, int /*y*/) { |
386 SkASSERT(255 == alpha); | 386 SkASSERT(255 == alpha); |
387 | 387 |
388 if (count >= 16) { | 388 if (count >= 16) { |
389 asm ( | 389 asm ( |
390 "movi v4.8h, #0x80 \t\n" | 390 "movi v4.8h, #0x80 \t\n" |
391 | 391 |
392 "1: \t\n" | 392 "1: \t\n" |
393 "sub %[count], %[count], #16 \t\n" | 393 "sub %w[count], %w[count], #16 \t\n" |
394 "ld1 {v16.8h-v17.8h}, [%[dst]] \t\n" | 394 "ld1 {v16.8h-v17.8h}, [%[dst]] \t\n" |
395 "ld4 {v0.16b-v3.16b}, [%[src]], #64 \t\n" | 395 "ld4 {v0.16b-v3.16b}, [%[src]], #64 \t\n" |
396 "prfm pldl1keep, [%[src],#512] \t\n" | 396 "prfm pldl1keep, [%[src],#512] \t\n" |
397 "prfm pldl1keep, [%[dst],#256] \t\n" | 397 "prfm pldl1keep, [%[dst],#256] \t\n" |
398 "ushr v20.8h, v17.8h, #5 \t\n" | 398 "ushr v20.8h, v17.8h, #5 \t\n" |
399 "ushr v31.8h, v16.8h, #5 \t\n" | 399 "ushr v31.8h, v16.8h, #5 \t\n" |
400 "xtn v6.8b, v31.8h \t\n" | 400 "xtn v6.8b, v31.8h \t\n" |
401 "xtn2 v6.16b, v20.8h \t\n" | 401 "xtn2 v6.16b, v20.8h \t\n" |
402 "ushr v20.8h, v17.8h, #11 \t\n" | 402 "ushr v20.8h, v17.8h, #11 \t\n" |
403 "shl v19.16b, v6.16b, #2 \t\n" | 403 "shl v19.16b, v6.16b, #2 \t\n" |
404 "ushr v31.8h, v16.8h, #11 \t\n" | 404 "ushr v31.8h, v16.8h, #11 \t\n" |
405 "xtn v22.8b, v31.8h \t\n" | 405 "xtn v22.8b, v31.8h \t\n" |
406 "xtn2 v22.16b, v20.8h \t\n" | 406 "xtn2 v22.16b, v20.8h \t\n" |
407 "shl v18.16b, v22.16b, #3 \t\n" | 407 "shl v18.16b, v22.16b, #3 \t\n" |
408 "mvn v3.16b, v3.16b \t\n" | 408 "mvn v3.16b, v3.16b \t\n" |
409 "xtn v16.8b, v16.8h \t\n" | 409 "xtn v16.8b, v16.8h \t\n" |
410 "mov v7.16b, v4.16b \t\n" | 410 "mov v7.16b, v4.16b \t\n" |
411 "xtn2 v16.16b, v17.8h \t\n" | 411 "xtn2 v16.16b, v17.8h \t\n" |
412 "umlal v7.8h, v3.8b, v19.8b \t\n" | 412 "umlal v7.8h, v3.8b, v19.8b \t\n" |
413 "shl v16.16b, v16.16b, #3 \t\n" | 413 "shl v16.16b, v16.16b, #3 \t\n" |
414 "mov v22.16b, v4.16b \t\n" | 414 "mov v22.16b, v4.16b \t\n" |
415 "ushr v24.8h, v7.8h, #6 \t\n" | 415 "ushr v24.8h, v7.8h, #6 \t\n" |
416 "umlal v22.8h, v3.8b, v18.8b \t\n" | 416 "umlal v22.8h, v3.8b, v18.8b \t\n" |
417 "ushr v20.8h, v22.8h, #5 \t\n" | 417 "ushr v20.8h, v22.8h, #5 \t\n" |
418 "addhn v20.8b, v22.8h, v20.8h \t\n" | 418 "addhn v20.8b, v22.8h, v20.8h \t\n" |
419 "cmp %[count], #16 \t\n" | 419 "cmp %w[count], #16 \t\n" |
420 "mov v6.16b, v4.16b \t\n" | 420 "mov v6.16b, v4.16b \t\n" |
421 "mov v5.16b, v4.16b \t\n" | 421 "mov v5.16b, v4.16b \t\n" |
422 "umlal v6.8h, v3.8b, v16.8b \t\n" | 422 "umlal v6.8h, v3.8b, v16.8b \t\n" |
423 "umlal2 v5.8h, v3.16b, v19.16b \t\n" | 423 "umlal2 v5.8h, v3.16b, v19.16b \t\n" |
424 "mov v17.16b, v4.16b \t\n" | 424 "mov v17.16b, v4.16b \t\n" |
425 "ushr v19.8h, v6.8h, #5 \t\n" | 425 "ushr v19.8h, v6.8h, #5 \t\n" |
426 "umlal2 v17.8h, v3.16b, v18.16b \t\n" | 426 "umlal2 v17.8h, v3.16b, v18.16b \t\n" |
427 "addhn v7.8b, v7.8h, v24.8h \t\n" | 427 "addhn v7.8b, v7.8h, v24.8h \t\n" |
428 "ushr v18.8h, v5.8h, #6 \t\n" | 428 "ushr v18.8h, v5.8h, #6 \t\n" |
429 "ushr v21.8h, v17.8h, #5 \t\n" | 429 "ushr v21.8h, v17.8h, #5 \t\n" |
(...skipping 1145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1575 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, | 1575 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, |
1576 #else | 1576 #else |
1577 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, | 1577 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, |
1578 #endif | 1578 #endif |
1579 #ifdef SK_CPU_ARM32 | 1579 #ifdef SK_CPU_ARM32 |
1580 S32A_Blend_BlitRow32_neon // S32A_Blend | 1580 S32A_Blend_BlitRow32_neon // S32A_Blend |
1581 #else | 1581 #else |
1582 nullptr | 1582 nullptr |
1583 #endif | 1583 #endif |
1584 }; | 1584 }; |
OLD | NEW |