| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "SkPngFilters.h" | 8 #include "SkPngFilters.h" |
| 9 #include "SkTypes.h" | |
| 10 | 9 |
| 11 // Functions in this file look at most 3 pixels (a,b,c) to predict the fourth (d
). | 10 // Functions in this file look at most 3 pixels (a,b,c) to predict the fourth (d
). |
| 12 // They're positioned like this: | 11 // They're positioned like this: |
| 13 // prev: c b | 12 // prev: c b |
| 14 // row: a d | 13 // row: a d |
| 15 // The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be which
ever | 14 // The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be which
ever |
| 16 // of a, b, or c is closest to p=a+b-c. (Up also exists, predicting d=b.) | 15 // of a, b, or c is closest to p=a+b-c. (Up also exists, predicting d=b.) |
| 17 | 16 |
| 18 #if defined(__SSE2__) | 17 #if defined(__SSE2__) |
| 19 | 18 |
| 20 template <int bpp> | 19 template <int bpp> |
| 21 static __m128i load(const void* p) { | 20 static __m128i load(const void* p) { |
| 22 static_assert(bpp <= 4, ""); | 21 static_assert(bpp <= 4, ""); |
| 23 | 22 |
| 24 uint32_t packed; | 23 uint32_t packed; |
| 25 memcpy(&packed, p, bpp); | 24 memcpy(&packed, p, bpp); |
| 26 return _mm_cvtsi32_si128(packed); | 25 return _mm_cvtsi32_si128(packed); |
| 27 } | 26 } |
| 28 | 27 |
| 29 template <int bpp> | 28 template <int bpp> |
| 30 static void store(void* p, __m128i v) { | 29 static void store(void* p, __m128i v) { |
| 31 static_assert(bpp <= 4, ""); | 30 static_assert(bpp <= 4, ""); |
| 32 | 31 |
| 33 uint32_t packed = _mm_cvtsi128_si32(v); | 32 uint32_t packed = _mm_cvtsi128_si32(v); |
| 34 memcpy(p, &packed, bpp); | 33 memcpy(p, &packed, bpp); |
| 35 } | 34 } |
| 36 | 35 |
| 37 template <int bpp> | 36 template <int bpp> |
| 38 static void sk_sub_sse2(png_row_infop row_info, png_bytep row, png_const_byt
ep) { | 37 static void sk_sub_sse2(png_row_infop row_info, uint8_t* row, const uint8_t*
) { |
| 39 // The Sub filter predicts each pixel as the previous pixel, a. | 38 // The Sub filter predicts each pixel as the previous pixel, a. |
| 40 // There is no pixel to the left of the first pixel. It's encoded direc
tly. | 39 // There is no pixel to the left of the first pixel. It's encoded direc
tly. |
| 41 // That works with our main loop if we just say that left pixel was zero
. | 40 // That works with our main loop if we just say that left pixel was zero
. |
| 42 __m128i a, d = _mm_setzero_si128(); | 41 __m128i a, d = _mm_setzero_si128(); |
| 43 | 42 |
| 44 int rb = row_info->rowbytes; | 43 int rb = row_info->rowbytes; |
| 45 while (rb > 0) { | 44 while (rb > 0) { |
| 46 a = d; d = load<bpp>(row); | 45 a = d; d = load<bpp>(row); |
| 47 d = _mm_add_epi8(d, a); | 46 d = _mm_add_epi8(d, a); |
| 48 store<bpp>(row, d); | 47 store<bpp>(row, d); |
| 49 | 48 |
| 50 row += bpp; | 49 row += bpp; |
| 51 rb -= bpp; | 50 rb -= bpp; |
| 52 } | 51 } |
| 53 } | 52 } |
| 54 | 53 |
| 55 template <int bpp> | 54 template <int bpp> |
| 56 void sk_avg_sse2(png_row_infop row_info, png_bytep row, png_const_bytep prev
) { | 55 void sk_avg_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
| 57 // The Avg filter predicts each pixel as the (truncated) average of a an
d b. | 56 // The Avg filter predicts each pixel as the (truncated) average of a an
d b. |
| 58 // There's no pixel to the left of the first pixel. Luckily, it's | 57 // There's no pixel to the left of the first pixel. Luckily, it's |
| 59 // predicted to be half of the pixel above it. So again, this works | 58 // predicted to be half of the pixel above it. So again, this works |
| 60 // perfectly with our loop if we make sure a starts at zero. | 59 // perfectly with our loop if we make sure a starts at zero. |
| 61 const __m128i zero = _mm_setzero_si128(); | 60 const __m128i zero = _mm_setzero_si128(); |
| 62 __m128i b; | 61 __m128i b; |
| 63 __m128i a, d = zero; | 62 __m128i a, d = zero; |
| 64 | 63 |
| 65 int rb = row_info->rowbytes; | 64 int rb = row_info->rowbytes; |
| 66 while (rb > 0) { | 65 while (rb > 0) { |
| (...skipping 20 matching lines...) Expand all Loading... |
| 87 return _mm_or_si128(_mm_subs_epu8(x,y), _mm_subs_epu8(y,x)); | 86 return _mm_or_si128(_mm_subs_epu8(x,y), _mm_subs_epu8(y,x)); |
| 88 } | 87 } |
| 89 | 88 |
| 90 // Bytewise c ? t : e. | 89 // Bytewise c ? t : e. |
| 91 static __m128i if_then_else(__m128i c, __m128i t, __m128i e) { | 90 static __m128i if_then_else(__m128i c, __m128i t, __m128i e) { |
| 92 // SSE 4.1+ would be: return _mm_blendv_epi8(e,t,c); | 91 // SSE 4.1+ would be: return _mm_blendv_epi8(e,t,c); |
| 93 return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e)); | 92 return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e)); |
| 94 } | 93 } |
| 95 | 94 |
| 96 template <int bpp> | 95 template <int bpp> |
| 97 void sk_paeth_sse2(png_row_infop row_info, png_bytep row, png_const_bytep pr
ev) { | 96 void sk_paeth_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev
) { |
| 98 // Paeth tries to predict pixel d using the pixel to the left of it, a, | 97 // Paeth tries to predict pixel d using the pixel to the left of it, a, |
| 99 // and two pixels from the previous row, b and c: | 98 // and two pixels from the previous row, b and c: |
| 100 // prev: c b | 99 // prev: c b |
| 101 // row: a d | 100 // row: a d |
| 102 // The Paeth function predicts d to be whichever of a, b, or c is neares
t to p=a+b-c. | 101 // The Paeth function predicts d to be whichever of a, b, or c is neares
t to p=a+b-c. |
| 103 | 102 |
| 104 // The first pixel has no left context, and so uses an Up filter, p = b. | 103 // The first pixel has no left context, and so uses an Up filter, p = b. |
| 105 // This works naturally with our main loop's p = a+b-c if we force a and
c to zero. | 104 // This works naturally with our main loop's p = a+b-c if we force a and
c to zero. |
| 106 // Here we zero b and d, which become c and a respectively at the start
of the loop. | 105 // Here we zero b and d, which become c and a respectively at the start
of the loop. |
| 107 __m128i c, b = _mm_setzero_si128(), | 106 __m128i c, b = _mm_setzero_si128(), |
| (...skipping 29 matching lines...) Expand all Loading... |
| 137 // We've reconstructed d! Leave it for next round to become a, and
write it out. | 136 // We've reconstructed d! Leave it for next round to become a, and
write it out. |
| 138 d = _mm_add_epi8(d, nearest); | 137 d = _mm_add_epi8(d, nearest); |
| 139 store<bpp>(row, d); | 138 store<bpp>(row, d); |
| 140 | 139 |
| 141 prev += bpp; | 140 prev += bpp; |
| 142 row += bpp; | 141 row += bpp; |
| 143 rb -= bpp; | 142 rb -= bpp; |
| 144 } | 143 } |
| 145 } | 144 } |
| 146 | 145 |
| 147 void sk_sub3_sse2(png_row_infop row_info, png_bytep row, png_const_bytep pre
v) { | 146 void sk_sub3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
| 148 sk_sub_sse2<3>(row_info, row, prev); | 147 sk_sub_sse2<3>(row_info, row, prev); |
| 149 } | 148 } |
| 150 void sk_sub4_sse2(png_row_infop row_info, png_bytep row, png_const_bytep pre
v) { | 149 void sk_sub4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
| 151 sk_sub_sse2<4>(row_info, row, prev); | 150 sk_sub_sse2<4>(row_info, row, prev); |
| 152 } | 151 } |
| 153 | 152 |
| 154 void sk_avg3_sse2(png_row_infop row_info, png_bytep row, png_const_bytep pre
v) { | 153 void sk_avg3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
| 155 sk_avg_sse2<3>(row_info, row, prev); | 154 sk_avg_sse2<3>(row_info, row, prev); |
| 156 } | 155 } |
| 157 void sk_avg4_sse2(png_row_infop row_info, png_bytep row, png_const_bytep pre
v) { | 156 void sk_avg4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
| 158 sk_avg_sse2<4>(row_info, row, prev); | 157 sk_avg_sse2<4>(row_info, row, prev); |
| 159 } | 158 } |
| 160 | 159 |
| 161 void sk_paeth3_sse2(png_row_infop row_info, png_bytep row, png_const_bytep p
rev) { | 160 void sk_paeth3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre
v) { |
| 162 sk_paeth_sse2<3>(row_info, row, prev); | 161 sk_paeth_sse2<3>(row_info, row, prev); |
| 163 } | 162 } |
| 164 void sk_paeth4_sse2(png_row_infop row_info, png_bytep row, png_const_bytep p
rev) { | 163 void sk_paeth4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre
v) { |
| 165 sk_paeth_sse2<4>(row_info, row, prev); | 164 sk_paeth_sse2<4>(row_info, row, prev); |
| 166 } | 165 } |
| 167 | 166 |
| 168 #endif | 167 #endif |
| OLD | NEW |