Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(79)

Side by Side Diff: src/opts/SkBlitRow_opts_SSE2.cpp

Issue 1820313002: Port S32A_opaque blit row to SkOpts. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: fmt Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include <emmintrin.h> 8 #include <emmintrin.h>
9 #include "SkBitmapProcState_opts_SSE2.h" 9 #include "SkBitmapProcState_opts_SSE2.h"
10 #include "SkBlitRow_opts_SSE2.h" 10 #include "SkBlitRow_opts_SSE2.h"
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
60 } 60 }
61 61
62 while (count > 0) { 62 while (count > 0) {
63 *dst = SkAlphaMulQ(*src, src_scale) + SkAlphaMulQ(*dst, dst_scale); 63 *dst = SkAlphaMulQ(*src, src_scale) + SkAlphaMulQ(*dst, dst_scale);
64 src++; 64 src++;
65 dst++; 65 dst++;
66 count--; 66 count--;
67 } 67 }
68 } 68 }
69 69
70 void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
71 const SkPMColor* SK_RESTRICT src,
72 int count, U8CPU alpha) {
73 sk_msan_assert_initialized(src, src+count);
74
75 SkASSERT(alpha == 255);
76 if (count <= 0) {
77 return;
78 }
79
80 int count16 = count / 16;
81 __m128i* dst4 = (__m128i*)dst;
82 const __m128i* src4 = (const __m128i*)src;
83
84 for (int i = 0; i < count16 * 4; i += 4) {
85 // Load 16 source pixels.
86 __m128i s0 = _mm_loadu_si128(src4+i+0),
87 s1 = _mm_loadu_si128(src4+i+1),
88 s2 = _mm_loadu_si128(src4+i+2),
89 s3 = _mm_loadu_si128(src4+i+3);
90
91 const __m128i alphaMask = _mm_set1_epi32(0xFF << SK_A32_SHIFT);
92 const __m128i ORed = _mm_or_si128(s3, _mm_or_si128(s2, _mm_or_si128(s1, s0)));
93 __m128i cmp = _mm_cmpeq_epi8(_mm_and_si128(ORed, alphaMask), _mm_setzero _si128());
94 if (0xffff == _mm_movemask_epi8(cmp)) {
95 // All 16 source pixels are fully transparent. There's nothing to do !
96 continue;
97 }
98 const __m128i ANDed = _mm_and_si128(s3, _mm_and_si128(s2, _mm_and_si128( s1, s0)));
99 cmp = _mm_cmpeq_epi8(_mm_and_si128(ANDed, alphaMask), alphaMask);
100 if (0xffff == _mm_movemask_epi8(cmp)) {
101 // All 16 source pixels are fully opaque. There's no need to read ds t or blend it.
102 _mm_storeu_si128(dst4+i+0, s0);
103 _mm_storeu_si128(dst4+i+1, s1);
104 _mm_storeu_si128(dst4+i+2, s2);
105 _mm_storeu_si128(dst4+i+3, s3);
106 continue;
107 }
108 // The general slow case: do the blend for all 16 pixels.
109 _mm_storeu_si128(dst4+i+0, SkPMSrcOver_SSE2(s0, _mm_loadu_si128(dst4+i+0 )));
110 _mm_storeu_si128(dst4+i+1, SkPMSrcOver_SSE2(s1, _mm_loadu_si128(dst4+i+1 )));
111 _mm_storeu_si128(dst4+i+2, SkPMSrcOver_SSE2(s2, _mm_loadu_si128(dst4+i+2 )));
112 _mm_storeu_si128(dst4+i+3, SkPMSrcOver_SSE2(s3, _mm_loadu_si128(dst4+i+3 )));
113 }
114
115 // Wrap up the last <= 15 pixels.
116 SkASSERT(count - (count16*16) <= 15);
117 for (int i = count16*16; i < count; i++) {
118 // This check is not really necessarily, but it prevents pointless autov ectorization.
119 if (src[i] & 0xFF000000) {
120 dst[i] = SkPMSrcOver(src[i], dst[i]);
121 }
122 }
123 }
124
125 void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, 70 void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
126 const SkPMColor* SK_RESTRICT src, 71 const SkPMColor* SK_RESTRICT src,
127 int count, U8CPU alpha) { 72 int count, U8CPU alpha) {
128 SkASSERT(alpha <= 255); 73 SkASSERT(alpha <= 255);
129 if (count <= 0) { 74 if (count <= 0) {
130 return; 75 return;
131 } 76 }
132 77
133 if (count >= 4) { 78 if (count >= 4) {
134 while (((size_t)dst & 0x0F) != 0) { 79 while (((size_t)dst & 0x0F) != 0) {
(...skipping 906 matching lines...) Expand 10 before | Expand all | Expand 10 after
1041 uint32_t dst_expanded = SkExpand_rgb_16(*dst); 986 uint32_t dst_expanded = SkExpand_rgb_16(*dst);
1042 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); 987 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
1043 // now src and dst expanded are in g:11 r:10 x:1 b:10 988 // now src and dst expanded are in g:11 r:10 x:1 b:10
1044 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 989 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
1045 } 990 }
1046 dst += 1; 991 dst += 1;
1047 DITHER_INC_X(x); 992 DITHER_INC_X(x);
1048 } while (--count != 0); 993 } while (--count != 0);
1049 } 994 }
1050 } 995 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698