| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "SkPngFilters.h" | 8 #include "SkPngFilters.h" |
| 9 | 9 |
| 10 // Functions in this file look at most 3 pixels (a,b,c) to predict the fourth (d
). | 10 // Functions in this file look at most 3 pixels (a,b,c) to predict the fourth (d
). |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 73 | 73 |
| 74 d = _mm_add_epi8(d, avg); | 74 d = _mm_add_epi8(d, avg); |
| 75 store<bpp>(row, d); | 75 store<bpp>(row, d); |
| 76 | 76 |
| 77 prev += bpp; | 77 prev += bpp; |
| 78 row += bpp; | 78 row += bpp; |
| 79 rb -= bpp; | 79 rb -= bpp; |
| 80 } | 80 } |
| 81 } | 81 } |
| 82 | 82 |
| 83 // Returns bytewise |x-y|. | 83 // Returns |x| for 16-bit lanes. |
| 84 static __m128i absdiff_u8(__m128i x, __m128i y) { | 84 static __m128i abs_i16(__m128i x) { |
| 85 // One of these two saturated subtractions will be the answer, the other
zero. | 85 #if defined(__SSSE3__) |
| 86 return _mm_or_si128(_mm_subs_epu8(x,y), _mm_subs_epu8(y,x)); | 86 return _mm_abs_epi16(x); |
| 87 #else |
| 88 // Read this all as, return x<0 ? -x : x. |
| 89 // To negate two's complement, you flip all the bits then add 1. |
| 90 __m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128()); |
| 91 x = _mm_xor_si128(x, is_negative); // Flip negative
lanes. |
| 92 x = _mm_add_epi16(x, _mm_srli_epi16(is_negative, 15)); // +1 to negativ
e lanes, else +0. |
| 93 return x; |
| 94 #endif |
| 87 } | 95 } |
| 88 | 96 |
| 89 // Bytewise c ? t : e. | 97 // Bytewise c ? t : e. |
| 90 static __m128i if_then_else(__m128i c, __m128i t, __m128i e) { | 98 static __m128i if_then_else(__m128i c, __m128i t, __m128i e) { |
| 91 // SSE 4.1+ would be: return _mm_blendv_epi8(e,t,c); | 99 #if 0 && defined(__SSE4_1__) // Make sure we have a bot testing this before
enabling. |
| 100 return _mm_blendv_epi8(e,t,c); |
| 101 #else |
| 92 return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e)); | 102 return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e)); |
| 103 #endif |
| 93 } | 104 } |
| 94 | 105 |
| 95 template <int bpp> | 106 template <int bpp> |
| 96 void sk_paeth_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev
) { | 107 void sk_paeth_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev
) { |
| 97 // Paeth tries to predict pixel d using the pixel to the left of it, a, | 108 // Paeth tries to predict pixel d using the pixel to the left of it, a, |
| 98 // and two pixels from the previous row, b and c: | 109 // and two pixels from the previous row, b and c: |
| 99 // prev: c b | 110 // prev: c b |
| 100 // row: a d | 111 // row: a d |
| 101 // The Paeth function predicts d to be whichever of a, b, or c is neares
t to p=a+b-c. | 112 // The Paeth function predicts d to be whichever of a, b, or c is neares
t to p=a+b-c. |
| 102 | 113 |
| 103 // The first pixel has no left context, and so uses an Up filter, p = b. | 114 // The first pixel has no left context, and so uses an Up filter, p = b. |
| 104 // This works naturally with our main loop's p = a+b-c if we force a and
c to zero. | 115 // This works naturally with our main loop's p = a+b-c if we force a and
c to zero. |
| 105 // Here we zero b and d, which become c and a respectively at the start
of the loop. | 116 // Here we zero b and d, which become c and a respectively at the start
of the loop. |
| 106 __m128i c, b = _mm_setzero_si128(), | 117 const __m128i zero = _mm_setzero_si128(); |
| 107 a, d = _mm_setzero_si128(); | 118 __m128i c, b = zero, |
| 119 a, d = zero; |
| 108 | 120 |
| 109 int rb = row_info->rowbytes; | 121 int rb = row_info->rowbytes; |
| 110 while (rb > 0) { | 122 while (rb > 0) { |
| 111 c = b; b = load<bpp>(prev); | 123 // It's easiest to do this math (particularly, deal with pc) with 16
-bit intermediates. |
| 112 a = d; d = load<bpp>(row ); | 124 c = b; b = _mm_unpacklo_epi8(load<bpp>(prev), zero); |
| 125 a = d; d = _mm_unpacklo_epi8(load<bpp>(row ), zero); |
| 113 | 126 |
| 114 // We can't express p in 8 bits, but luckily we can use this faux p
instead. | 127 __m128i pa = _mm_sub_epi16(b,c), // (p-a) == (a+b-c - a) == (b-c) |
| 115 // (I have no deep insight here... I just proved this with brute for
ce.) | 128 pb = _mm_sub_epi16(a,c), // (p-b) == (a+b-c - b) == (a-c) |
| 116 __m128i min = _mm_min_epu8(a,b), | 129 pc = _mm_add_epi16(pa,pb); // (p-c) == (a+b-c - c) == (a+b-c
-c) == (b-c)+(a-c) |
| 117 max = _mm_max_epu8(a,b), | |
| 118 faux_p = _mm_adds_epu8(min, _mm_subs_epu8(max, c)); | |
| 119 | 130 |
| 120 // We could use faux_p for calculating all three of pa, pb, and pc, | 131 pa = abs_i16(pa); // |p-a| |
| 121 // but it's a little quicker to calculate the correct pa and pb dire
ctly, | 132 pb = abs_i16(pb); // |p-b| |
| 122 // and the predictor remains the same. (Again, brute force.) | 133 pc = abs_i16(pc); // |p-c| |
| 123 __m128i pa = absdiff_u8(b,c), // |a+b-c - a| == |b-c| | |
| 124 pb = absdiff_u8(a,c), // |a+b-c - b| == |a-c| | |
| 125 faux_pc = absdiff_u8(faux_p, c); | |
| 126 | 134 |
| 127 // From here, things are straightforward. Find the smallest distanc
e to p... | 135 __m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb)); |
| 128 __m128i smallest = _mm_min_epu8(_mm_min_epu8(pa, pb), faux_pc); | |
| 129 | 136 |
| 130 // ... then the predictor is the input corresponding to that smalles
t distance, | 137 // Paeth breaks ties favoring a over b over c. |
| 131 // breaking ties in favor of a over b over c. | 138 __m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a, |
| 132 __m128i nearest = if_then_else(_mm_cmpeq_epi8(smallest, pa), a, | 139 if_then_else(_mm_cmpeq_epi16(smallest, pb), b, |
| 133 if_then_else(_mm_cmpeq_epi8(smallest, pb), b, | 140 c)); |
| 134 c)); | |
| 135 | 141 |
| 136 // We've reconstructed d! Leave it for next round to become a, and
write it out. | 142 d = _mm_add_epi8(d, nearest); // Note `_epi8`: we need addition to
wrap modulo 255. |
| 137 d = _mm_add_epi8(d, nearest); | 143 store<bpp>(row, _mm_packus_epi16(d,d)); |
| 138 store<bpp>(row, d); | |
| 139 | 144 |
| 140 prev += bpp; | 145 prev += bpp; |
| 141 row += bpp; | 146 row += bpp; |
| 142 rb -= bpp; | 147 rb -= bpp; |
| 143 } | 148 } |
| 144 } | 149 } |
| 145 | 150 |
| 146 void sk_sub3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ | 151 void sk_sub3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
| 147 sk_sub_sse2<3>(row_info, row, prev); | 152 sk_sub_sse2<3>(row_info, row, prev); |
| 148 } | 153 } |
| 149 void sk_sub4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ | 154 void sk_sub4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
| 150 sk_sub_sse2<4>(row_info, row, prev); | 155 sk_sub_sse2<4>(row_info, row, prev); |
| 151 } | 156 } |
| 152 | 157 |
| 153 void sk_avg3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ | 158 void sk_avg3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
| 154 sk_avg_sse2<3>(row_info, row, prev); | 159 sk_avg_sse2<3>(row_info, row, prev); |
| 155 } | 160 } |
| 156 void sk_avg4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ | 161 void sk_avg4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
| 157 sk_avg_sse2<4>(row_info, row, prev); | 162 sk_avg_sse2<4>(row_info, row, prev); |
| 158 } | 163 } |
| 159 | 164 |
| 160 void sk_paeth3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre
v) { | 165 void sk_paeth3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre
v) { |
| 161 sk_paeth_sse2<3>(row_info, row, prev); | 166 sk_paeth_sse2<3>(row_info, row, prev); |
| 162 } | 167 } |
| 163 void sk_paeth4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre
v) { | 168 void sk_paeth4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre
v) { |
| 164 sk_paeth_sse2<4>(row_info, row, prev); | 169 sk_paeth_sse2<4>(row_info, row, prev); |
| 165 } | 170 } |
| 166 | 171 |
| 167 #endif | 172 #endif |
| OLD | NEW |