Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: src/opts/SkBlitRow_opts_arm_neon.cpp

Issue 181523002: ARM Skia NEON patches - 22 - S32_D565_Blend (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Rebase Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « expectations/gm/ignored-tests.txt ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkBlitRow_opts_arm_neon.h" 8 #include "SkBlitRow_opts_arm_neon.h"
9 9
10 #include "SkBlitMask.h" 10 #include "SkBlitMask.h"
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
45 // Leftovers 45 // Leftovers
46 while (count > 0) { 46 while (count > 0) {
47 SkPMColor c = *src++; 47 SkPMColor c = *src++;
48 SkPMColorAssert(c); 48 SkPMColorAssert(c);
49 *dst = SkPixel32ToPixel16_ToU16(c); 49 *dst = SkPixel32ToPixel16_ToU16(c);
50 dst++; 50 dst++;
51 count--; 51 count--;
52 }; 52 };
53 } 53 }
54 54
55 void S32_D565_Blend_neon(uint16_t* SK_RESTRICT dst,
56 const SkPMColor* SK_RESTRICT src, int count,
57 U8CPU alpha, int /*x*/, int /*y*/) {
58 SkASSERT(255 > alpha);
59
60 uint16x8_t vmask_blue, vscale;
61
62 // prepare constants
63 vscale = vdupq_n_u16(SkAlpha255To256(alpha));
64 vmask_blue = vmovq_n_u16(0x1F);
65
66 while (count >= 8) {
67 uint16x8_t vdst, vdst_r, vdst_g, vdst_b;
68 uint16x8_t vres_r, vres_g, vres_b;
69 uint8x8_t vsrc_r, vsrc_g, vsrc_b;
70
71 // Load src
72 {
73 register uint8x8_t d0 asm("d0");
74 register uint8x8_t d1 asm("d1");
75 register uint8x8_t d2 asm("d2");
76 register uint8x8_t d3 asm("d3");
77
78 asm (
79 "vld4.8 {d0-d3},[%[src]]!"
80 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src)
81 :
82 );
83 vsrc_g = d1;
84 #if SK_PMCOLOR_BYTE_ORDER(B,G,R,A)
85 vsrc_r = d2; vsrc_b = d0;
86 #elif SK_PMCOLOR_BYTE_ORDER(R,G,B,A)
87 vsrc_r = d0; vsrc_b = d2;
88 #endif
89 }
90
91 // Load and unpack dst
92 vdst = vld1q_u16(dst);
93 vdst_g = vshlq_n_u16(vdst, 5); // shift green to top of lanes
94 vdst_b = vandq_u16(vdst, vmask_blue); // extract blue
95 vdst_r = vshrq_n_u16(vdst, 6+5); // extract red
96 vdst_g = vshrq_n_u16(vdst_g, 5+5); // extract green
97
98 // Shift src to 565
99 vsrc_r = vshr_n_u8(vsrc_r, 3); // shift red to 565 range
100 vsrc_g = vshr_n_u8(vsrc_g, 2); // shift green to 565 range
101 vsrc_b = vshr_n_u8(vsrc_b, 3); // shift blue to 565 range
102
103 // Scale src - dst
104 vres_r = vmovl_u8(vsrc_r) - vdst_r;
105 vres_g = vmovl_u8(vsrc_g) - vdst_g;
106 vres_b = vmovl_u8(vsrc_b) - vdst_b;
107
108 vres_r = vshrq_n_u16(vres_r * vscale, 8);
109 vres_g = vshrq_n_u16(vres_g * vscale, 8);
110 vres_b = vshrq_n_u16(vres_b * vscale, 8);
111
112 vres_r += vdst_r;
113 vres_g += vdst_g;
114 vres_b += vdst_b;
115
116 // Combine
117 vres_b = vsliq_n_u16(vres_b, vres_g, 5); // insert green into blue
118 vres_b = vsliq_n_u16(vres_b, vres_r, 6+5); // insert red into green/blu e
119
120 // Store
121 vst1q_u16(dst, vres_b);
122 dst += 8;
123 count -= 8;
124 }
125 if (count > 0) {
126 int scale = SkAlpha255To256(alpha);
127 do {
128 SkPMColor c = *src++;
129 SkPMColorAssert(c);
130 uint16_t d = *dst;
131 *dst++ = SkPackRGB16(
132 SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), scale),
133 SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), scale),
134 SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), scale));
135 } while (--count != 0);
136 }
137 }
138
55 void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, 139 void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst,
56 const SkPMColor* SK_RESTRICT src, int count, 140 const SkPMColor* SK_RESTRICT src, int count,
57 U8CPU alpha, int /*x*/, int /*y*/) { 141 U8CPU alpha, int /*x*/, int /*y*/) {
58 SkASSERT(255 == alpha); 142 SkASSERT(255 == alpha);
59 143
60 if (count >= 8) { 144 if (count >= 8) {
61 uint16_t* SK_RESTRICT keep_dst = 0; 145 uint16_t* SK_RESTRICT keep_dst = 0;
62 146
63 asm volatile ( 147 asm volatile (
64 "ands ip, %[count], #7 \n\t" 148 "ands ip, %[count], #7 \n\t"
(...skipping 1313 matching lines...) Expand 10 before | Expand all | Expand 10 after
1378 dst += 1; 1462 dst += 1;
1379 count--; 1463 count--;
1380 } 1464 }
1381 } 1465 }
1382 } 1466 }
1383 1467
1384 /////////////////////////////////////////////////////////////////////////////// 1468 ///////////////////////////////////////////////////////////////////////////////
1385 1469
1386 const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = { 1470 const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = {
1387 // no dither 1471 // no dither
1388 // NOTE: For the S32_D565_Blend function below, we don't have a special
1389 // version that assumes that each source pixel is opaque. But our
1390 // S32A is still faster than the default, so use it.
1391 S32_D565_Opaque_neon, 1472 S32_D565_Opaque_neon,
1392 S32A_D565_Blend_neon, // really S32_D565_Blend 1473 S32_D565_Blend_neon,
1393 S32A_D565_Opaque_neon, 1474 S32A_D565_Opaque_neon,
1394 S32A_D565_Blend_neon, 1475 S32A_D565_Blend_neon,
1395 1476
1396 // dither 1477 // dither
1397 S32_D565_Opaque_Dither_neon, 1478 S32_D565_Opaque_Dither_neon,
1398 S32_D565_Blend_Dither_neon, 1479 S32_D565_Blend_Dither_neon,
1399 S32A_D565_Opaque_Dither_neon, 1480 S32A_D565_Opaque_Dither_neon,
1400 NULL, // S32A_D565_Blend_Dither 1481 NULL, // S32A_D565_Blend_Dither
1401 }; 1482 };
1402 1483
(...skipping 10 matching lines...) Expand all
1413 * case where we do not inspect the src alpha. 1494 * case where we do not inspect the src alpha.
1414 */ 1495 */
1415 #if SK_A32_SHIFT == 24 1496 #if SK_A32_SHIFT == 24
1416 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor 1497 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor
1417 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, 1498 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque,
1418 #else 1499 #else
1419 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, 1500 S32A_Opaque_BlitRow32_neon, // S32A_Opaque,
1420 #endif 1501 #endif
1421 S32A_Blend_BlitRow32_neon // S32A_Blend 1502 S32A_Blend_BlitRow32_neon // S32A_Blend
1422 }; 1503 };
OLDNEW
« no previous file with comments | « expectations/gm/ignored-tests.txt ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698