Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(167)

Side by Side Diff: src/opts/SkBlitRow_opts_arm_neon.cpp

Issue 18459008: ARM Skia NEON patches - 13 - S32A_Opaque (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkBlitRow_opts_arm.h" 8 #include "SkBlitRow_opts_arm.h"
9 9
10 #include "SkBlitMask.h" 10 #include "SkBlitMask.h"
(...skipping 408 matching lines...) Expand 10 before | Expand all | Expand 10 after
419 419
420 static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; 420 static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7};
421 alpha_mask = vld1_u8(alpha_mask_setup); 421 alpha_mask = vld1_u8(alpha_mask_setup);
422 422
423 /* do the NEON unrolled code */ 423 /* do the NEON unrolled code */
424 #define UNROLL 4 424 #define UNROLL 4
425 while (count >= UNROLL) { 425 while (count >= UNROLL) {
426 uint8x8_t src_raw, dst_raw, dst_final; 426 uint8x8_t src_raw, dst_raw, dst_final;
427 uint8x8_t src_raw_2, dst_raw_2, dst_final_2; 427 uint8x8_t src_raw_2, dst_raw_2, dst_final_2;
428 428
429 __builtin_prefetch(src+32);
430 __builtin_prefetch(dst+32);
djsollen 2013/07/11 15:24:47 can you put the comment here that this *may* be sl
kevin.petit.not.used.account 2013/07/11 15:42:14 Done.
431
429 /* get the source */ 432 /* get the source */
430 src_raw = vreinterpret_u8_u32(vld1_u32(src)); 433 src_raw = vreinterpret_u8_u32(vld1_u32(src));
431 #if UNROLL > 2 434 #if UNROLL > 2
432 src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2)); 435 src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2));
433 #endif 436 #endif
434 437
435 /* get and hold the dst too */ 438 /* get and hold the dst too */
436 dst_raw = vreinterpret_u8_u32(vld1_u32(dst)); 439 dst_raw = vreinterpret_u8_u32(vld1_u32(dst));
437 #if UNROLL > 2 440 #if UNROLL > 2
438 dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2)); 441 dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2));
439 #endif 442 #endif
440 443
441 /* 1st and 2nd bits of the unrolling */ 444 /* 1st and 2nd bits of the unrolling */
442 { 445 {
443 uint8x8_t dst_cooked; 446 uint8x8_t dst_cooked;
444 uint16x8_t dst_wide; 447 uint16x8_t dst_wide;
445 uint8x8_t alpha_narrow; 448 uint8x8_t alpha_narrow;
446 uint16x8_t alpha_wide; 449 uint16x8_t alpha_wide;
447 450
448 /* get the alphas spread out properly */ 451 /* get the alphas spread out properly */
449 alpha_narrow = vtbl1_u8(src_raw, alpha_mask); 452 alpha_narrow = vtbl1_u8(src_raw, alpha_mask);
450 #if 1
451 /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
452 /* we collapsed (255-a)+1 ... */
453 alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 453 alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
454 #else
455 alpha_wide = vsubw_u8(vdupq_n_u16(255), alpha_narrow);
456 alpha_wide = vaddq_u16(alpha_wide, vshrq_n_u16(alpha_wide,7));
457 #endif
458 454
459 /* spread the dest */ 455 /* spread the dest */
460 dst_wide = vmovl_u8(dst_raw); 456 dst_wide = vmovl_u8(dst_raw);
461 457
462 /* alpha mul the dest */ 458 /* alpha mul the dest */
463 dst_wide = vmulq_u16 (dst_wide, alpha_wide); 459 dst_wide = vmulq_u16 (dst_wide, alpha_wide);
464 dst_cooked = vshrn_n_u16(dst_wide, 8); 460 dst_cooked = vshrn_n_u16(dst_wide, 8);
465 461
466 /* sum -- ignoring any byte lane overflows */ 462 /* sum -- ignoring any byte lane overflows */
467 dst_final = vadd_u8(src_raw, dst_cooked); 463 dst_final = vadd_u8(src_raw, dst_cooked);
468 } 464 }
469 465
470 #if UNROLL > 2 466 #if UNROLL > 2
471 /* the 3rd and 4th bits of our unrolling */ 467 /* the 3rd and 4th bits of our unrolling */
472 { 468 {
473 uint8x8_t dst_cooked; 469 uint8x8_t dst_cooked;
474 uint16x8_t dst_wide; 470 uint16x8_t dst_wide;
475 uint8x8_t alpha_narrow; 471 uint8x8_t alpha_narrow;
476 uint16x8_t alpha_wide; 472 uint16x8_t alpha_wide;
477 473
478 alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask); 474 alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask);
479 #if 1
480 /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
481 /* we collapsed (255-a)+1 ... */
482 alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 475 alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
483 #else
484 alpha_wide = vsubw_u8(vdupq_n_u16(255), alpha_narrow);
485 alpha_wide = vaddq_u16(alpha_wide, vshrq_n_u16(alpha_wide,7));
486 #endif
487 476
488 /* spread the dest */ 477 /* spread the dest */
489 dst_wide = vmovl_u8(dst_raw_2); 478 dst_wide = vmovl_u8(dst_raw_2);
490 479
491 /* alpha mul the dest */ 480 /* alpha mul the dest */
492 dst_wide = vmulq_u16 (dst_wide, alpha_wide); 481 dst_wide = vmulq_u16 (dst_wide, alpha_wide);
493 dst_cooked = vshrn_n_u16(dst_wide, 8); 482 dst_cooked = vshrn_n_u16(dst_wide, 8);
494 483
495 /* sum -- ignoring any byte lane overflows */ 484 /* sum -- ignoring any byte lane overflows */
496 dst_final_2 = vadd_u8(src_raw_2, dst_cooked); 485 dst_final_2 = vadd_u8(src_raw_2, dst_cooked);
(...skipping 792 matching lines...) Expand 10 before | Expand all | Expand 10 after
1289 * case where we do not inspect the src alpha. 1278 * case where we do not inspect the src alpha.
1290 */ 1279 */
1291 #if SK_A32_SHIFT == 24 1280 #if SK_A32_SHIFT == 24
1292 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor 1281 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor
1293 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, 1282 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque,
1294 #else 1283 #else
1295 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, 1284 S32A_Opaque_BlitRow32_neon, // S32A_Opaque,
1296 #endif 1285 #endif
1297 S32A_Blend_BlitRow32_arm // S32A_Blend 1286 S32A_Blend_BlitRow32_arm // S32A_Blend
1298 }; 1287 };
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698