OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 | 8 |
9 #include "SkBlitRow_opts_SSE2.h" | 9 #include "SkBlitRow_opts_SSE2.h" |
10 #include "SkBitmapProcState_opts_SSE2.h" | 10 #include "SkBitmapProcState_opts_SSE2.h" |
11 #include "SkColorPriv.h" | 11 #include "SkColorPriv.h" |
12 #include "SkColor_opts_SSE2.h" | 12 #include "SkColor_opts_SSE2.h" |
| 13 #include "SkDither.h" |
13 #include "SkUtils.h" | 14 #include "SkUtils.h" |
14 | 15 |
15 #include <emmintrin.h> | 16 #include <emmintrin.h> |
16 | 17 |
17 /* SSE2 version of S32_Blend_BlitRow32() | 18 /* SSE2 version of S32_Blend_BlitRow32() |
18 * portable version is in core/SkBlitRow_D32.cpp | 19 * portable version is in core/SkBlitRow_D32.cpp |
19 */ | 20 */ |
20 void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, | 21 void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, |
21 const SkPMColor* SK_RESTRICT src, | 22 const SkPMColor* SK_RESTRICT src, |
22 int count, U8CPU alpha) { | 23 int count, U8CPU alpha) { |
(...skipping 1021 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1044 do { | 1045 do { |
1045 SkPMColor c = *src++; | 1046 SkPMColor c = *src++; |
1046 SkPMColorAssert(c); | 1047 SkPMColorAssert(c); |
1047 if (c) { | 1048 if (c) { |
1048 *dst = SkSrcOver32To16(c, *dst); | 1049 *dst = SkSrcOver32To16(c, *dst); |
1049 } | 1050 } |
1050 dst += 1; | 1051 dst += 1; |
1051 } while (--count != 0); | 1052 } while (--count != 0); |
1052 } | 1053 } |
1053 } | 1054 } |
| 1055 |
| 1056 void S32_D565_Opaque_Dither_SSE2(uint16_t* SK_RESTRICT dst, |
| 1057 const SkPMColor* SK_RESTRICT src, |
| 1058 int count, U8CPU alpha, int x, int y) { |
| 1059 SkASSERT(255 == alpha); |
| 1060 |
| 1061 if (count <= 0) { |
| 1062 return; |
| 1063 } |
| 1064 |
| 1065 if (count >= 8) { |
| 1066 while (((size_t)dst & 0x0F) != 0) { |
| 1067 DITHER_565_SCAN(y); |
| 1068 SkPMColor c = *src++; |
| 1069 SkPMColorAssert(c); |
| 1070 |
| 1071 unsigned dither = DITHER_VALUE(x); |
| 1072 *dst++ = SkDitherRGB32To565(c, dither); |
| 1073 DITHER_INC_X(x); |
| 1074 count--; |
| 1075 } |
| 1076 |
| 1077 unsigned short dither_value[8]; |
| 1078 __m128i dither; |
| 1079 #ifdef ENABLE_DITHER_MATRIX_4X4 |
| 1080 const uint8_t* dither_scan = gDitherMatrix_3Bit_4X4[(y) & 3]; |
| 1081 dither_value[0] = dither_value[4] = dither_scan[(x) & 3]; |
| 1082 dither_value[1] = dither_value[5] = dither_scan[(x + 1) & 3]; |
| 1083 dither_value[2] = dither_value[6] = dither_scan[(x + 2) & 3]; |
| 1084 dither_value[3] = dither_value[7] = dither_scan[(x + 3) & 3]; |
| 1085 #else |
| 1086 const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3]; |
| 1087 dither_value[0] = dither_value[4] = (dither_scan |
| 1088 >> (((x) & 3) << 2)) & 0xF; |
| 1089 dither_value[1] = dither_value[5] = (dither_scan |
| 1090 >> (((x + 1) & 3) << 2)) & 0xF; |
| 1091 dither_value[2] = dither_value[6] = (dither_scan |
| 1092 >> (((x + 2) & 3) << 2)) & 0xF; |
| 1093 dither_value[3] = dither_value[7] = (dither_scan |
| 1094 >> (((x + 3) & 3) << 2)) & 0xF; |
| 1095 #endif |
| 1096 dither = _mm_loadu_si128((__m128i*) dither_value); |
| 1097 |
| 1098 const __m128i* s = reinterpret_cast<const __m128i*>(src); |
| 1099 __m128i* d = reinterpret_cast<__m128i*>(dst); |
| 1100 |
| 1101 while (count >= 8) { |
| 1102 // Load 8 pixels of src. |
| 1103 __m128i src_pixel1 = _mm_loadu_si128(s++); |
| 1104 __m128i src_pixel2 = _mm_loadu_si128(s++); |
| 1105 |
| 1106 // Extract R from src. |
| 1107 __m128i sr1 = _mm_slli_epi32(src_pixel1, (24 - SK_R32_SHIFT)); |
| 1108 sr1 = _mm_srli_epi32(sr1, 24); |
| 1109 __m128i sr2 = _mm_slli_epi32(src_pixel2, (24 - SK_R32_SHIFT)); |
| 1110 sr2 = _mm_srli_epi32(sr2, 24); |
| 1111 __m128i sr = _mm_packs_epi32(sr1, sr2); |
| 1112 |
| 1113 // SkDITHER_R32To565(sr, dither) |
| 1114 __m128i sr_offset = _mm_srli_epi16(sr, 5); |
| 1115 sr = _mm_add_epi16(sr, dither); |
| 1116 sr = _mm_sub_epi16(sr, sr_offset); |
| 1117 sr = _mm_srli_epi16(sr, SK_R32_BITS - SK_R16_BITS); |
| 1118 |
| 1119 // Extract G from src. |
| 1120 __m128i sg1 = _mm_slli_epi32(src_pixel1, (24 - SK_G32_SHIFT)); |
| 1121 sg1 = _mm_srli_epi32(sg1, 24); |
| 1122 __m128i sg2 = _mm_slli_epi32(src_pixel2, (24 - SK_G32_SHIFT)); |
| 1123 sg2 = _mm_srli_epi32(sg2, 24); |
| 1124 __m128i sg = _mm_packs_epi32(sg1, sg2); |
| 1125 |
| 1126 // SkDITHER_R32To565(sg, dither) |
| 1127 __m128i sg_offset = _mm_srli_epi16(sg, 6); |
| 1128 sg = _mm_add_epi16(sg, _mm_srli_epi16(dither, 1)); |
| 1129 sg = _mm_sub_epi16(sg, sg_offset); |
| 1130 sg = _mm_srli_epi16(sg, SK_G32_BITS - SK_G16_BITS); |
| 1131 |
| 1132 // Extract B from src. |
| 1133 __m128i sb1 = _mm_slli_epi32(src_pixel1, (24 - SK_B32_SHIFT)); |
| 1134 sb1 = _mm_srli_epi32(sb1, 24); |
| 1135 __m128i sb2 = _mm_slli_epi32(src_pixel2, (24 - SK_B32_SHIFT)); |
| 1136 sb2 = _mm_srli_epi32(sb2, 24); |
| 1137 __m128i sb = _mm_packs_epi32(sb1, sb2); |
| 1138 |
| 1139 // SkDITHER_R32To565(sb, dither) |
| 1140 __m128i sb_offset = _mm_srli_epi16(sb, 5); |
| 1141 sb = _mm_add_epi16(sb, dither); |
| 1142 sb = _mm_sub_epi16(sb, sb_offset); |
| 1143 sb = _mm_srli_epi16(sb, SK_B32_BITS - SK_B16_BITS); |
| 1144 |
| 1145 // Pack and store 16-bit dst pixel. |
| 1146 __m128i d_pixel = SkPackRGB16_SSE(sr, sg, sb); |
| 1147 _mm_store_si128(d++, d_pixel); |
| 1148 |
| 1149 count -= 8; |
| 1150 x += 8; |
| 1151 } |
| 1152 |
| 1153 src = reinterpret_cast<const SkPMColor*>(s); |
| 1154 dst = reinterpret_cast<uint16_t*>(d); |
| 1155 } |
| 1156 |
| 1157 if (count > 0) { |
| 1158 DITHER_565_SCAN(y); |
| 1159 do { |
| 1160 SkPMColor c = *src++; |
| 1161 SkPMColorAssert(c); |
| 1162 |
| 1163 unsigned dither = DITHER_VALUE(x); |
| 1164 *dst++ = SkDitherRGB32To565(c, dither); |
| 1165 DITHER_INC_X(x); |
| 1166 } while (--count != 0); |
| 1167 } |
| 1168 } |
OLD | NEW |