Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(79)

Side by Side Diff: src/opts/SkBlitRow_opts_SSE2.cpp

Issue 138163013: SSE2 implementation of S32A_D565_Opaque (Closed) Base URL: https://skia.googlesource.com/skia.git@SkSrcOver32To16SSE
Patch Set: fix nits Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkBlitRow_opts_SSE2.h ('k') | src/opts/SkColor_opts_SSE2.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 8
9 #include "SkBlitRow_opts_SSE2.h" 9 #include "SkBlitRow_opts_SSE2.h"
10 #include "SkBitmapProcState_opts_SSE2.h" 10 #include "SkBitmapProcState_opts_SSE2.h"
11 #include "SkColorPriv.h" 11 #include "SkColorPriv.h"
12 #include "SkColor_opts_SSE2.h"
12 #include "SkUtils.h" 13 #include "SkUtils.h"
13 14
14 #include <emmintrin.h> 15 #include <emmintrin.h>
15 16
16 /* SSE2 version of S32_Blend_BlitRow32() 17 /* SSE2 version of S32_Blend_BlitRow32()
17 * portable version is in core/SkBlitRow_D32.cpp 18 * portable version is in core/SkBlitRow_D32.cpp
18 */ 19 */
19 void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, 20 void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
20 const SkPMColor* SK_RESTRICT src, 21 const SkPMColor* SK_RESTRICT src,
21 int count, U8CPU alpha) { 22 int count, U8CPU alpha) {
(...skipping 822 matching lines...) Expand 10 before | Expand all | Expand 10 after
844 dst = reinterpret_cast<SkPMColor*>(d); 845 dst = reinterpret_cast<SkPMColor*>(d);
845 } 846 }
846 847
847 while (width > 0) { 848 while (width > 0) {
848 *dst = SkBlendLCD16Opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst); 849 *dst = SkBlendLCD16Opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst);
849 mask++; 850 mask++;
850 dst++; 851 dst++;
851 width--; 852 width--;
852 } 853 }
853 } 854 }
855
856 /* SSE2 version of S32A_D565_Opaque()
857 * portable version is in core/SkBlitRow_D16.cpp
858 */
859 void S32A_D565_Opaque_SSE2(uint16_t* SK_RESTRICT dst,
860 const SkPMColor* SK_RESTRICT src,
861 int count, U8CPU alpha, int /*x*/, int /*y*/) {
862 SkASSERT(255 == alpha);
863
864 if (count <= 0) {
865 return;
866 }
867
868 if (count >= 8) {
869 // Make dst 16 bytes alignment
870 while (((size_t)dst & 0x0F) != 0) {
871 SkPMColor c = *src++;
872 if (c) {
873 *dst = SkSrcOver32To16(c, *dst);
874 }
875 dst += 1;
876 count--;
877 }
878
879 const __m128i* s = reinterpret_cast<const __m128i*>(src);
880 __m128i* d = reinterpret_cast<__m128i*>(dst);
881 __m128i var255 = _mm_set1_epi16(255);
882 __m128i r16_mask = _mm_set1_epi16(SK_R16_MASK);
883 __m128i g16_mask = _mm_set1_epi16(SK_G16_MASK);
884 __m128i b16_mask = _mm_set1_epi16(SK_B16_MASK);
885
886 while (count >= 8) {
887 // Load 8 pixels of src.
888 __m128i src_pixel1 = _mm_loadu_si128(s++);
889 __m128i src_pixel2 = _mm_loadu_si128(s++);
890
891 // Check whether src pixels are equal to 0 and get the highest bit
892 // of each byte of result, if src pixels are all zero, src_cmp1 and
893 // src_cmp2 will be 0xFFFF.
894 int src_cmp1 = _mm_movemask_epi8(_mm_cmpeq_epi16(src_pixel1,
895 _mm_setzero_si128()));
896 int src_cmp2 = _mm_movemask_epi8(_mm_cmpeq_epi16(src_pixel2,
897 _mm_setzero_si128()));
898 if (src_cmp1 == 0xFFFF && src_cmp2 == 0xFFFF) {
899 d++;
900 count -= 8;
901 continue;
902 }
903
904 // Load 8 pixels of dst.
905 __m128i dst_pixel = _mm_load_si128(d);
906
907 // Extract A from src.
908 __m128i sa1 = _mm_slli_epi32(src_pixel1,(24 - SK_A32_SHIFT));
909 sa1 = _mm_srli_epi32(sa1, 24);
910 __m128i sa2 = _mm_slli_epi32(src_pixel2,(24 - SK_A32_SHIFT));
911 sa2 = _mm_srli_epi32(sa2, 24);
912 __m128i sa = _mm_packs_epi32(sa1, sa2);
913
914 // Extract R from src.
915 __m128i sr1 = _mm_slli_epi32(src_pixel1,(24 - SK_R32_SHIFT));
916 sr1 = _mm_srli_epi32(sr1, 24);
917 __m128i sr2 = _mm_slli_epi32(src_pixel2,(24 - SK_R32_SHIFT));
918 sr2 = _mm_srli_epi32(sr2, 24);
919 __m128i sr = _mm_packs_epi32(sr1, sr2);
920
921 // Extract G from src.
922 __m128i sg1 = _mm_slli_epi32(src_pixel1,(24 - SK_G32_SHIFT));
923 sg1 = _mm_srli_epi32(sg1, 24);
924 __m128i sg2 = _mm_slli_epi32(src_pixel2,(24 - SK_G32_SHIFT));
925 sg2 = _mm_srli_epi32(sg2, 24);
926 __m128i sg = _mm_packs_epi32(sg1, sg2);
927
928 // Extract B from src.
929 __m128i sb1 = _mm_slli_epi32(src_pixel1,(24 - SK_B32_SHIFT));
930 sb1 = _mm_srli_epi32(sb1, 24);
931 __m128i sb2 = _mm_slli_epi32(src_pixel2,(24 - SK_B32_SHIFT));
932 sb2 = _mm_srli_epi32(sb2, 24);
933 __m128i sb = _mm_packs_epi32(sb1, sb2);
934
935 // Extract R G B from dst.
936 __m128i dr = _mm_srli_epi16(dst_pixel,SK_R16_SHIFT);
937 dr = _mm_and_si128(dr, r16_mask);
938 __m128i dg = _mm_srli_epi16(dst_pixel,SK_G16_SHIFT);
939 dg = _mm_and_si128(dg, g16_mask);
940 __m128i db = _mm_srli_epi16(dst_pixel,SK_B16_SHIFT);
941 db = _mm_and_si128(db, b16_mask);
942
943 __m128i isa = _mm_sub_epi16(var255, sa); // 255 -sa
944
945 // Calculate R G B of result.
946 // Original algorithm is in SkSrcOver32To16().
947 dr = _mm_add_epi16(sr, SkMul16ShiftRound_SSE(dr, isa, SK_R16_BITS));
948 dr = _mm_srli_epi16(dr, 8 - SK_R16_BITS);
949 dg = _mm_add_epi16(sg, SkMul16ShiftRound_SSE(dg, isa, SK_G16_BITS));
950 dg = _mm_srli_epi16(dg, 8 - SK_G16_BITS);
951 db = _mm_add_epi16(sb, SkMul16ShiftRound_SSE(db, isa, SK_B16_BITS));
952 db = _mm_srli_epi16(db, 8 - SK_B16_BITS);
953
954 // Pack R G B into 16-bit color.
955 __m128i d_pixel = SkPackRGB16_SSE(dr, dg, db);
956
957 // Store 8 16-bit colors in dst.
958 _mm_store_si128(d++, d_pixel);
959 count -= 8;
960 }
961
962 src = reinterpret_cast<const SkPMColor*>(s);
963 dst = reinterpret_cast<uint16_t*>(d);
964 }
965
966 if (count > 0) {
967 do {
968 SkPMColor c = *src++;
969 SkPMColorAssert(c);
970 if (c) {
971 *dst = SkSrcOver32To16(c, *dst);
972 }
973 dst += 1;
974 } while (--count != 0);
975 }
976 }
OLDNEW
« no previous file with comments | « src/opts/SkBlitRow_opts_SSE2.h ('k') | src/opts/SkColor_opts_SSE2.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698