Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(606)

Side by Side Diff: src/opts/SkBlitRow_opts_SSE2.cpp

Issue 1746423003: remove untested blit optimization (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include <emmintrin.h> 8 #include <emmintrin.h>
9 #include "SkBitmapProcState_opts_SSE2.h" 9 #include "SkBitmapProcState_opts_SSE2.h"
10 #include "SkBlitRow_opts_SSE2.h" 10 #include "SkBlitRow_opts_SSE2.h"
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
70 void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, 70 void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
71 const SkPMColor* SK_RESTRICT src, 71 const SkPMColor* SK_RESTRICT src,
72 int count, U8CPU alpha) { 72 int count, U8CPU alpha) {
73 sk_msan_assert_initialized(src, src+count); 73 sk_msan_assert_initialized(src, src+count);
74 74
75 SkASSERT(alpha == 255); 75 SkASSERT(alpha == 255);
76 if (count <= 0) { 76 if (count <= 0) {
77 return; 77 return;
78 } 78 }
79 79
80 #ifdef SK_USE_ACCURATE_BLENDING
81 if (count >= 4) {
82 SkASSERT(((size_t)dst & 0x03) == 0);
83 while (((size_t)dst & 0x0F) != 0) {
84 *dst = SkPMSrcOver(*src, *dst);
85 src++;
86 dst++;
87 count--;
88 }
89
90 const __m128i *s = reinterpret_cast<const __m128i*>(src);
91 __m128i *d = reinterpret_cast<__m128i*>(dst);
92 __m128i rb_mask = _mm_set1_epi32(0x00FF00FF);
93 __m128i c_128 = _mm_set1_epi16(128); // 8 copies of 128 (16-bit)
94 __m128i c_255 = _mm_set1_epi16(255); // 8 copies of 255 (16-bit)
95 while (count >= 4) {
96 // Load 4 pixels
97 __m128i src_pixel = _mm_loadu_si128(s);
98 __m128i dst_pixel = _mm_load_si128(d);
99
100 __m128i dst_rb = _mm_and_si128(rb_mask, dst_pixel);
101 __m128i dst_ag = _mm_srli_epi16(dst_pixel, 8);
102 // Shift alphas down to lower 8 bits of each quad.
103 __m128i alpha = _mm_srli_epi32(src_pixel, 24);
104
105 // Copy alpha to upper 3rd byte of each quad
106 alpha = _mm_or_si128(alpha, _mm_slli_epi32(alpha, 16));
107
108 // Subtract alphas from 255, to get 0..255
109 alpha = _mm_sub_epi16(c_255, alpha);
110
111 // Multiply by red and blue by src alpha.
112 dst_rb = _mm_mullo_epi16(dst_rb, alpha);
113 // Multiply by alpha and green by src alpha.
114 dst_ag = _mm_mullo_epi16(dst_ag, alpha);
115
116 // dst_rb_low = (dst_rb >> 8)
117 __m128i dst_rb_low = _mm_srli_epi16(dst_rb, 8);
118 __m128i dst_ag_low = _mm_srli_epi16(dst_ag, 8);
119
120 // dst_rb = (dst_rb + dst_rb_low + 128) >> 8
121 dst_rb = _mm_add_epi16(dst_rb, dst_rb_low);
122 dst_rb = _mm_add_epi16(dst_rb, c_128);
123 dst_rb = _mm_srli_epi16(dst_rb, 8);
124
125 // dst_ag = (dst_ag + dst_ag_low + 128) & ag_mask
126 dst_ag = _mm_add_epi16(dst_ag, dst_ag_low);
127 dst_ag = _mm_add_epi16(dst_ag, c_128);
128 dst_ag = _mm_andnot_si128(rb_mask, dst_ag);
129
130 // Combine back into RGBA.
131 dst_pixel = _mm_or_si128(dst_rb, dst_ag);
132
133 // Add result
134 __m128i result = _mm_add_epi8(src_pixel, dst_pixel);
135 _mm_store_si128(d, result);
136 s++;
137 d++;
138 count -= 4;
139 }
140 src = reinterpret_cast<const SkPMColor*>(s);
141 dst = reinterpret_cast<SkPMColor*>(d);
142 }
143
144 while (count > 0) {
145 *dst = SkPMSrcOver(*src, *dst);
146 src++;
147 dst++;
148 count--;
149 }
150 #else
151 int count16 = count / 16; 80 int count16 = count / 16;
152 __m128i* dst4 = (__m128i*)dst; 81 __m128i* dst4 = (__m128i*)dst;
153 const __m128i* src4 = (const __m128i*)src; 82 const __m128i* src4 = (const __m128i*)src;
154 83
155 for (int i = 0; i < count16 * 4; i += 4) { 84 for (int i = 0; i < count16 * 4; i += 4) {
156 // Load 16 source pixels. 85 // Load 16 source pixels.
157 __m128i s0 = _mm_loadu_si128(src4+i+0), 86 __m128i s0 = _mm_loadu_si128(src4+i+0),
158 s1 = _mm_loadu_si128(src4+i+1), 87 s1 = _mm_loadu_si128(src4+i+1),
159 s2 = _mm_loadu_si128(src4+i+2), 88 s2 = _mm_loadu_si128(src4+i+2),
160 s3 = _mm_loadu_si128(src4+i+3); 89 s3 = _mm_loadu_si128(src4+i+3);
(...skipping 23 matching lines...) Expand all
184 } 113 }
185 114
186 // Wrap up the last <= 15 pixels. 115 // Wrap up the last <= 15 pixels.
187 SkASSERT(count - (count16*16) <= 15); 116 SkASSERT(count - (count16*16) <= 15);
188 for (int i = count16*16; i < count; i++) { 117 for (int i = count16*16; i < count; i++) {
189 // This check is not really necessarily, but it prevents pointless autov ectorization. 118 // This check is not really necessarily, but it prevents pointless autov ectorization.
190 if (src[i] & 0xFF000000) { 119 if (src[i] & 0xFF000000) {
191 dst[i] = SkPMSrcOver(src[i], dst[i]); 120 dst[i] = SkPMSrcOver(src[i], dst[i]);
192 } 121 }
193 } 122 }
194 #endif
195 } 123 }
196 124
197 void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, 125 void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
198 const SkPMColor* SK_RESTRICT src, 126 const SkPMColor* SK_RESTRICT src,
199 int count, U8CPU alpha) { 127 int count, U8CPU alpha) {
200 SkASSERT(alpha <= 255); 128 SkASSERT(alpha <= 255);
201 if (count <= 0) { 129 if (count <= 0) {
202 return; 130 return;
203 } 131 }
204 132
(...skipping 908 matching lines...) Expand 10 before | Expand all | Expand 10 after
1113 uint32_t dst_expanded = SkExpand_rgb_16(*dst); 1041 uint32_t dst_expanded = SkExpand_rgb_16(*dst);
1114 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); 1042 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
1115 // now src and dst expanded are in g:11 r:10 x:1 b:10 1043 // now src and dst expanded are in g:11 r:10 x:1 b:10
1116 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 1044 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
1117 } 1045 }
1118 dst += 1; 1046 dst += 1;
1119 DITHER_INC_X(x); 1047 DITHER_INC_X(x);
1120 } while (--count != 0); 1048 } while (--count != 0);
1121 } 1049 }
1122 } 1050 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698