Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(978)

Side by Side Diff: src/opts/SkBlitRow_opts_arm_neon.cpp

Issue 22269003: ARM Skia NEON patches - 23 - S32_D565_Opaque_Dither cleanup/bugfix/speed (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkBlitRow_opts_arm.h" 8 #include "SkBlitRow_opts_arm.h"
9 9
10 #include "SkBlitMask.h" 10 #include "SkBlitMask.h"
(...skipping 1015 matching lines...) Expand 10 before | Expand all | Expand 10 after
1026 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 1026 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
1027 } 1027 }
1028 dst += 1; 1028 dst += 1;
1029 DITHER_INC_X(x); 1029 DITHER_INC_X(x);
1030 } while (--count != 0); 1030 } while (--count != 0);
1031 } 1031 }
1032 } 1032 }
1033 1033
1034 /////////////////////////////////////////////////////////////////////////////// 1034 ///////////////////////////////////////////////////////////////////////////////
1035 1035
1036 /* 2009/10/27: RBE says "a work in progress"; debugging says ok;
1037 * speedup untested, but ARM version is 26 insns/iteration and
1038 * this NEON version is 21 insns/iteration-of-8 (2.62insns/element)
1039 * which is 10x the native version; that's pure instruction counts,
1040 * not accounting for any instruction or memory latencies.
1041 */
1042
1043 #undef DEBUG_S32_OPAQUE_DITHER 1036 #undef DEBUG_S32_OPAQUE_DITHER
1044 1037
1045 void S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst, 1038 void S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst,
1046 const SkPMColor* SK_RESTRICT src, 1039 const SkPMColor* SK_RESTRICT src,
1047 int count, U8CPU alpha, int x, int y) { 1040 int count, U8CPU alpha, int x, int y) {
1048 SkASSERT(255 == alpha); 1041 SkASSERT(255 == alpha);
1049 1042
1050 #define UNROLL 8 1043 #define UNROLL 8
1051 if (count >= UNROLL) { 1044 if (count >= UNROLL) {
1052 uint8x8_t d; 1045 uint8x8_t d;
1053 const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 1046 const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)];
1054 d = vld1_u8(dstart); 1047 d = vld1_u8(dstart);
1055 1048
1056 while (count >= UNROLL) { 1049 while (count >= UNROLL) {
1057 uint8x8_t sr, sg, sb; 1050 uint8x8_t sr, sg, sb;
1058 uint16x8_t dr, dg, db; 1051 uint16x8_t dr, dg, db;
1059 uint16x8_t dst8; 1052 uint16x8_t dst8;
1060 1053
1061 /* source is in ABGR ordering (R == lsb) */ 1054 /* source is in ABGR ordering (R == lsb) */
1062 { 1055 {
1063 register uint8x8_t d0 asm("d0"); 1056 register uint8x8_t d0 asm("d0");
1064 register uint8x8_t d1 asm("d1"); 1057 register uint8x8_t d1 asm("d1");
1065 register uint8x8_t d2 asm("d2"); 1058 register uint8x8_t d2 asm("d2");
1066 register uint8x8_t d3 asm("d3"); 1059 register uint8x8_t d3 asm("d3");
1067 1060
1068 asm ("vld4.8 {d0-d3},[%4] /* r=%P0 g=%P1 b=%P2 a=%P3 */" 1061 asm (
1069 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3) 1062 "vld4.8 {d0-d3},[%[src]]! /* r=%P0 g=%P1 b=%P2 a=%P3 */"
1070 : "r" (src) 1063 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src)
1071 ); 1064 :
1072 sr = d0; sg = d1; sb = d2; 1065 );
1066 sg = d1;
1067 #if SK_PMCOLOR_BYTE_ORDER(B,G,R,A)
1068 sr = d2; sb = d0;
1069 #elif SK_PMCOLOR_BYTE_ORDER(R,G,B,A)
1070 sr = d0; sb = d2;
1071 #endif
1073 } 1072 }
1074 /* XXX: if we want to prefetch, hide it in the above asm() 1073 /* XXX: if we want to prefetch, hide it in the above asm()
1075 * using the gcc __builtin_prefetch(), the prefetch will 1074 * using the gcc __builtin_prefetch(), the prefetch will
1076 * fall to the bottom of the loop -- it won't stick up 1075 * fall to the bottom of the loop -- it won't stick up
1077 * at the top of the loop, just after the vld4. 1076 * at the top of the loop, just after the vld4.
1078 */ 1077 */
1079 1078
1080 /* sr = sr - (sr>>5) + d */ 1079 /* sr = sr - (sr>>5) + d */
1081 sr = vsub_u8(sr, vshr_n_u8(sr, 5)); 1080 sr = vsub_u8(sr, vshr_n_u8(sr, 5));
1082 dr = vaddl_u8(sr, d); 1081 dr = vaddl_u8(sr, d);
1083 1082
1084 /* sb = sb - (sb>>5) + d */ 1083 /* sb = sb - (sb>>5) + d */
1085 sb = vsub_u8(sb, vshr_n_u8(sb, 5)); 1084 sb = vsub_u8(sb, vshr_n_u8(sb, 5));
1086 db = vaddl_u8(sb, d); 1085 db = vaddl_u8(sb, d);
1087 1086
1088 /* sg = sg - (sg>>6) + d>>1; similar logic for overflows */ 1087 /* sg = sg - (sg>>6) + d>>1; similar logic for overflows */
1089 sg = vsub_u8(sg, vshr_n_u8(sg, 6)); 1088 sg = vsub_u8(sg, vshr_n_u8(sg, 6));
1090 dg = vaddl_u8(sg, vshr_n_u8(d,1)); 1089 dg = vaddl_u8(sg, vshr_n_u8(d, 1));
1091 /* XXX: check that the "d>>1" here is hoisted */ 1090 /* XXX: check that the "d>>1" here is hoisted */
1092 1091
1093 /* pack high bits of each into 565 format (rgb, b is lsb) */ 1092 /* pack high bits of each into 565 format (rgb, b is lsb) */
1094 dst8 = vshrq_n_u16(db, 3); 1093 dst8 = vshrq_n_u16(db, 3);
1095 dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dg, 2), 5); 1094 dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dg, 2), 5);
1096 dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dr,3), 11); 1095 dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dr, 3), 11);
1097 1096
1098 /* store it */ 1097 /* store it */
1099 vst1q_u16(dst, dst8); 1098 vst1q_u16(dst, dst8);
1100 1099
1101 #if defined(DEBUG_S32_OPAQUE_DITHER) 1100 #if defined(DEBUG_S32_OPAQUE_DITHER)
1102 /* always good to know if we generated good results */ 1101 /* always good to know if we generated good results */
1103 { 1102 {
1104 int i, myx = x, myy = y; 1103 int i, myx = x, myy = y;
1105 DITHER_565_SCAN(myy); 1104 DITHER_565_SCAN(myy);
1106 for (i=0;i<UNROLL;i++) { 1105 for (i=0;i<UNROLL;i++) {
1107 SkPMColor c = src[i]; 1106 SkPMColor c = src[i-8];
mtklein 2013/09/12 20:07:36 Can you tack on something like // The '!' in th
kevin.petit.not.used.account 2013/09/13 12:08:27 Done.
1108 unsigned dither = DITHER_VALUE(myx); 1107 unsigned dither = DITHER_VALUE(myx);
1109 uint16_t val = SkDitherRGB32To565(c, dither); 1108 uint16_t val = SkDitherRGB32To565(c, dither);
1110 if (val != dst[i]) { 1109 if (val != dst[i]) {
1111 SkDebugf("RBE: src %08x dither %02x, want %04x got %04x dbas[i] %02x \n", 1110 SkDebugf("RBE: src %08x dither %02x, want %04x got %04x dbas[i] %02x \n",
1112 c, dither, val, dst[i], dstart[i]); 1111 c, dither, val, dst[i], dstart[i]);
1113 } 1112 }
1114 DITHER_INC_X(myx); 1113 DITHER_INC_X(myx);
1115 } 1114 }
1116 } 1115 }
1117 #endif 1116 #endif
1118 1117
1119 dst += UNROLL; 1118 dst += UNROLL;
1120 src += UNROLL;
mtklein 2013/09/12 20:07:36 Maybe even duplicate the same note here about why
kevin.petit.not.used.account 2013/09/13 12:08:27 Done.
1121 count -= UNROLL; 1119 count -= UNROLL;
1122 x += UNROLL; /* probably superfluous */ 1120 x += UNROLL; /* probably superfluous */
1123 } 1121 }
1124 } 1122 }
1125 #undef UNROLL 1123 #undef UNROLL
1126 1124
1127 /* residuals */ 1125 /* residuals */
1128 if (count > 0) { 1126 if (count > 0) {
1129 DITHER_565_SCAN(y); 1127 DITHER_565_SCAN(y);
1130 do { 1128 do {
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after
1268 * case where we do not inspect the src alpha. 1266 * case where we do not inspect the src alpha.
1269 */ 1267 */
1270 #if SK_A32_SHIFT == 24 1268 #if SK_A32_SHIFT == 24
1271 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor 1269 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor
1272 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, 1270 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque,
1273 #else 1271 #else
1274 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, 1272 S32A_Opaque_BlitRow32_neon, // S32A_Opaque,
1275 #endif 1273 #endif
1276 S32A_Blend_BlitRow32_arm // S32A_Blend 1274 S32A_Blend_BlitRow32_arm // S32A_Blend
1277 }; 1275 };
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698