Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(526)

Side by Side Diff: core/src/fxcodec/fx_libopenjpeg/libopenjpeg20/mct.c

Issue 960183004: Upgrade openjpeg to revision 2997. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * The copyright in this software is being made available under the 2-clauses 2 * The copyright in this software is being made available under the 2-clauses
3 * BSD License, included below. This software may be subject to other third 3 * BSD License, included below. This software may be subject to other third
4 * party and contributor rights, including patent rights, and no such rights 4 * party and contributor rights, including patent rights, and no such rights
5 * are granted under this license. 5 * are granted under this license.
6 * 6 *
7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8 * Copyright (c) 2002-2014, Professor Benoit Macq 8 * Copyright (c) 2002-2014, Professor Benoit Macq
9 * Copyright (c) 2001-2003, David Janssens 9 * Copyright (c) 2001-2003, David Janssens
10 * Copyright (c) 2002-2003, Yannick Verschueren 10 * Copyright (c) 2002-2003, Yannick Verschueren
(...skipping 22 matching lines...) Expand all
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE. 37 * POSSIBILITY OF SUCH DAMAGE.
38 */ 38 */
39 39
40 #ifdef __SSE__ 40 #ifdef __SSE__
41 #include <xmmintrin.h> 41 #include <xmmintrin.h>
42 #endif 42 #endif
43 #ifdef __SSE2__
44 #include <emmintrin.h>
45 #endif
46 #ifdef __SSE4_1__
47 #include <smmintrin.h>
48 #endif
43 49
44 #include "opj_includes.h" 50 #include "opj_includes.h"
45 51
46 /* <summary> */ 52 /* <summary> */
47 /* This table contains the norms of the basis function of the reversible MCT. */ 53 /* This table contains the norms of the basis function of the reversible MCT. */
48 /* </summary> */ 54 /* </summary> */
49 static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 }; 55 static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 };
50 56
51 /* <summary> */ 57 /* <summary> */
52 /* This table contains the norms of the basis function of the irreversible MCT. */ 58 /* This table contains the norms of the basis function of the irreversible MCT. */
53 /* </summary> */ 59 /* </summary> */
54 static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 }; 60 static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 };
55 61
56 const OPJ_FLOAT64 * opj_mct_get_mct_norms () 62 const OPJ_FLOAT64 * opj_mct_get_mct_norms ()
57 { 63 {
58 return opj_mct_norms; 64 return opj_mct_norms;
59 } 65 }
60 66
61 const OPJ_FLOAT64 * opj_mct_get_mct_norms_real () 67 const OPJ_FLOAT64 * opj_mct_get_mct_norms_real ()
62 { 68 {
63 return opj_mct_norms_real; 69 return opj_mct_norms_real;
64 } 70 }
65 71
66 /* <summary> */ 72 /* <summary> */
67 /* Foward reversible MCT. */ 73 /* Foward reversible MCT. */
68 /* </summary> */ 74 /* </summary> */
75 #ifdef __SSE2__
69 void opj_mct_encode( 76 void opj_mct_encode(
70 OPJ_INT32* restrict c0, 77 OPJ_INT32* restrict c0,
71 OPJ_INT32* restrict c1, 78 OPJ_INT32* restrict c1,
72 OPJ_INT32* restrict c2, 79 OPJ_INT32* restrict c2,
73 OPJ_UINT32 n) 80 OPJ_UINT32 n)
74 { 81 {
75 » OPJ_UINT32 i; 82 » OPJ_SIZE_T i;
76 » for(i = 0; i < n; ++i) { 83 » const OPJ_SIZE_T len = n;
84 »
85 » for(i = 0; i < (len & ~3U); i += 4) {
86 » » __m128i y, u, v;
87 » » __m128i r = _mm_load_si128((const __m128i *)&(c0[i]));
88 » » __m128i g = _mm_load_si128((const __m128i *)&(c1[i]));
89 » » __m128i b = _mm_load_si128((const __m128i *)&(c2[i]));
90 » » y = _mm_add_epi32(g, g);
91 » » y = _mm_add_epi32(y, b);
92 » » y = _mm_add_epi32(y, r);
93 » » y = _mm_srai_epi32(y, 2);
94 » » u = _mm_sub_epi32(b, g);
95 » » v = _mm_sub_epi32(r, g);
96 » » _mm_store_si128((__m128i *)&(c0[i]), y);
97 » » _mm_store_si128((__m128i *)&(c1[i]), u);
98 » » _mm_store_si128((__m128i *)&(c2[i]), v);
99 » }
100 »
101 » for(; i < len; ++i) {
77 OPJ_INT32 r = c0[i]; 102 OPJ_INT32 r = c0[i];
78 OPJ_INT32 g = c1[i]; 103 OPJ_INT32 g = c1[i];
79 OPJ_INT32 b = c2[i]; 104 OPJ_INT32 b = c2[i];
80 OPJ_INT32 y = (r + (g * 2) + b) >> 2; 105 OPJ_INT32 y = (r + (g * 2) + b) >> 2;
81 OPJ_INT32 u = b - g; 106 OPJ_INT32 u = b - g;
82 OPJ_INT32 v = r - g; 107 OPJ_INT32 v = r - g;
83 c0[i] = y; 108 c0[i] = y;
84 c1[i] = u; 109 c1[i] = u;
85 c2[i] = v; 110 c2[i] = v;
86 } 111 }
87 } 112 }
113 #else
114 void opj_mct_encode(
115 OPJ_INT32* restrict c0,
116 OPJ_INT32* restrict c1,
117 OPJ_INT32* restrict c2,
118 OPJ_UINT32 n)
119 {
120 OPJ_SIZE_T i;
121 const OPJ_SIZE_T len = n;
122
123 for(i = 0; i < len; ++i) {
124 OPJ_INT32 r = c0[i];
125 OPJ_INT32 g = c1[i];
126 OPJ_INT32 b = c2[i];
127 OPJ_INT32 y = (r + (g * 2) + b) >> 2;
128 OPJ_INT32 u = b - g;
129 OPJ_INT32 v = r - g;
130 c0[i] = y;
131 c1[i] = u;
132 c2[i] = v;
133 }
134 }
135 #endif
88 136
89 /* <summary> */ 137 /* <summary> */
90 /* Inverse reversible MCT. */ 138 /* Inverse reversible MCT. */
91 /* </summary> */ 139 /* </summary> */
140 #ifdef __SSE2__
141 void opj_mct_decode(
142 OPJ_INT32* restrict c0,
143 OPJ_INT32* restrict c1,
144 OPJ_INT32* restrict c2,
145 OPJ_UINT32 n)
146 {
147 OPJ_SIZE_T i;
148 const OPJ_SIZE_T len = n;
149
150 for(i = 0; i < (len & ~3U); i += 4) {
151 __m128i r, g, b;
152 __m128i y = _mm_load_si128((const __m128i *)&(c0[i]));
153 __m128i u = _mm_load_si128((const __m128i *)&(c1[i]));
154 __m128i v = _mm_load_si128((const __m128i *)&(c2[i]));
155 g = y;
156 g = _mm_sub_epi32(g, _mm_srai_epi32(_mm_add_epi32(u, v), 2));
157 r = _mm_add_epi32(v, g);
158 b = _mm_add_epi32(u, g);
159 _mm_store_si128((__m128i *)&(c0[i]), r);
160 _mm_store_si128((__m128i *)&(c1[i]), g);
161 _mm_store_si128((__m128i *)&(c2[i]), b);
162 }
163 for (; i < len; ++i) {
164 OPJ_INT32 y = c0[i];
165 OPJ_INT32 u = c1[i];
166 OPJ_INT32 v = c2[i];
167 OPJ_INT32 g = y - ((u + v) >> 2);
168 OPJ_INT32 r = v + g;
169 OPJ_INT32 b = u + g;
170 c0[i] = r;
171 c1[i] = g;
172 c2[i] = b;
173 }
174 }
175 #else
92 void opj_mct_decode( 176 void opj_mct_decode(
93 OPJ_INT32* restrict c0, 177 OPJ_INT32* restrict c0,
94 OPJ_INT32* restrict c1, 178 OPJ_INT32* restrict c1,
95 OPJ_INT32* restrict c2, 179 OPJ_INT32* restrict c2,
96 OPJ_UINT32 n) 180 OPJ_UINT32 n)
97 { 181 {
98 OPJ_UINT32 i; 182 OPJ_UINT32 i;
99 for (i = 0; i < n; ++i) { 183 for (i = 0; i < n; ++i) {
100 OPJ_INT32 y = c0[i]; 184 OPJ_INT32 y = c0[i];
101 OPJ_INT32 u = c1[i]; 185 OPJ_INT32 u = c1[i];
102 OPJ_INT32 v = c2[i]; 186 OPJ_INT32 v = c2[i];
103 OPJ_INT32 g = y - ((u + v) >> 2); 187 OPJ_INT32 g = y - ((u + v) >> 2);
104 OPJ_INT32 r = v + g; 188 OPJ_INT32 r = v + g;
105 OPJ_INT32 b = u + g; 189 OPJ_INT32 b = u + g;
106 c0[i] = r; 190 c0[i] = r;
107 c1[i] = g; 191 c1[i] = g;
108 c2[i] = b; 192 c2[i] = b;
109 } 193 }
110 } 194 }
195 #endif
111 196
112 /* <summary> */ 197 /* <summary> */
113 /* Get norm of basis function of reversible MCT. */ 198 /* Get norm of basis function of reversible MCT. */
114 /* </summary> */ 199 /* </summary> */
115 OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) { 200 OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) {
116 return opj_mct_norms[compno]; 201 return opj_mct_norms[compno];
117 } 202 }
118 203
119 /* <summary> */ 204 /* <summary> */
120 /* Foward irreversible MCT. */ 205 /* Foward irreversible MCT. */
121 /* </summary> */ 206 /* </summary> */
207 #ifdef __SSE4_1__
208 void opj_mct_encode_real(
209 OPJ_INT32* restrict c0,
210 OPJ_INT32* restrict c1,
211 OPJ_INT32* restrict c2,
212 OPJ_UINT32 n)
213 {
214 OPJ_SIZE_T i;
215 const OPJ_SIZE_T len = n;
216
217 const __m128i ry = _mm_set1_epi32(2449);
218 const __m128i gy = _mm_set1_epi32(4809);
219 const __m128i by = _mm_set1_epi32(934);
220 const __m128i ru = _mm_set1_epi32(1382);
221 const __m128i gu = _mm_set1_epi32(2714);
222 /* const __m128i bu = _mm_set1_epi32(4096); */
223 /* const __m128i rv = _mm_set1_epi32(4096); */
224 const __m128i gv = _mm_set1_epi32(3430);
225 const __m128i bv = _mm_set1_epi32(666);
226 const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096), _MM_ SHUFFLE(1, 0, 1, 0));
227
228 for(i = 0; i < (len & ~3U); i += 4) {
229 __m128i lo, hi;
230 __m128i y, u, v;
231 __m128i r = _mm_load_si128((const __m128i *)&(c0[i]));
232 __m128i g = _mm_load_si128((const __m128i *)&(c1[i]));
233 __m128i b = _mm_load_si128((const __m128i *)&(c2[i]));
234
235 lo = r;
236 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
237 lo = _mm_mul_epi32(lo, ry);
238 hi = _mm_mul_epi32(hi, ry);
239 lo = _mm_add_epi64(lo, mulround);
240 hi = _mm_add_epi64(hi, mulround);
241 lo = _mm_srli_epi64(lo, 13);
242 hi = _mm_slli_epi64(hi, 32-13);
243 y = _mm_blend_epi16(lo, hi, 0xCC);
244
245 lo = g;
246 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
247 lo = _mm_mul_epi32(lo, gy);
248 hi = _mm_mul_epi32(hi, gy);
249 lo = _mm_add_epi64(lo, mulround);
250 hi = _mm_add_epi64(hi, mulround);
251 lo = _mm_srli_epi64(lo, 13);
252 hi = _mm_slli_epi64(hi, 32-13);
253 y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
254
255 lo = b;
256 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
257 lo = _mm_mul_epi32(lo, by);
258 hi = _mm_mul_epi32(hi, by);
259 lo = _mm_add_epi64(lo, mulround);
260 hi = _mm_add_epi64(hi, mulround);
261 lo = _mm_srli_epi64(lo, 13);
262 hi = _mm_slli_epi64(hi, 32-13);
263 y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
264 _mm_store_si128((__m128i *)&(c0[i]), y);
265
266 /*lo = b;
267 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
268 lo = _mm_mul_epi32(lo, mulround);
269 hi = _mm_mul_epi32(hi, mulround);*/
270 lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2 , 0)));
271 hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3 , 1)));
272 lo = _mm_slli_epi64(lo, 12);
273 hi = _mm_slli_epi64(hi, 12);
274 lo = _mm_add_epi64(lo, mulround);
275 hi = _mm_add_epi64(hi, mulround);
276 lo = _mm_srli_epi64(lo, 13);
277 hi = _mm_slli_epi64(hi, 32-13);
278 u = _mm_blend_epi16(lo, hi, 0xCC);
279
280 lo = r;
281 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
282 lo = _mm_mul_epi32(lo, ru);
283 hi = _mm_mul_epi32(hi, ru);
284 lo = _mm_add_epi64(lo, mulround);
285 hi = _mm_add_epi64(hi, mulround);
286 lo = _mm_srli_epi64(lo, 13);
287 hi = _mm_slli_epi64(hi, 32-13);
288 u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
289
290 lo = g;
291 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
292 lo = _mm_mul_epi32(lo, gu);
293 hi = _mm_mul_epi32(hi, gu);
294 lo = _mm_add_epi64(lo, mulround);
295 hi = _mm_add_epi64(hi, mulround);
296 lo = _mm_srli_epi64(lo, 13);
297 hi = _mm_slli_epi64(hi, 32-13);
298 u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
299 _mm_store_si128((__m128i *)&(c1[i]), u);
300
301 /*lo = r;
302 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
303 lo = _mm_mul_epi32(lo, mulround);
304 hi = _mm_mul_epi32(hi, mulround);*/
305 lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2 , 0)));
306 hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3 , 1)));
307 lo = _mm_slli_epi64(lo, 12);
308 hi = _mm_slli_epi64(hi, 12);
309 lo = _mm_add_epi64(lo, mulround);
310 hi = _mm_add_epi64(hi, mulround);
311 lo = _mm_srli_epi64(lo, 13);
312 hi = _mm_slli_epi64(hi, 32-13);
313 v = _mm_blend_epi16(lo, hi, 0xCC);
314
315 lo = g;
316 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
317 lo = _mm_mul_epi32(lo, gv);
318 hi = _mm_mul_epi32(hi, gv);
319 lo = _mm_add_epi64(lo, mulround);
320 hi = _mm_add_epi64(hi, mulround);
321 lo = _mm_srli_epi64(lo, 13);
322 hi = _mm_slli_epi64(hi, 32-13);
323 v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
324
325 lo = b;
326 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
327 lo = _mm_mul_epi32(lo, bv);
328 hi = _mm_mul_epi32(hi, bv);
329 lo = _mm_add_epi64(lo, mulround);
330 hi = _mm_add_epi64(hi, mulround);
331 lo = _mm_srli_epi64(lo, 13);
332 hi = _mm_slli_epi64(hi, 32-13);
333 v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
334 _mm_store_si128((__m128i *)&(c2[i]), v);
335 }
336 for(; i < len; ++i) {
337 OPJ_INT32 r = c0[i];
338 OPJ_INT32 g = c1[i];
339 OPJ_INT32 b = c2[i];
340 OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 480 9) + opj_int_fix_mul(b, 934);
341 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 271 4) + opj_int_fix_mul(b, 4096);
342 OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 343 0) - opj_int_fix_mul(b, 666);
343 c0[i] = y;
344 c1[i] = u;
345 c2[i] = v;
346 }
347 }
348 #else
122 void opj_mct_encode_real( 349 void opj_mct_encode_real(
123 OPJ_INT32* restrict c0, 350 OPJ_INT32* restrict c0,
124 OPJ_INT32* restrict c1, 351 OPJ_INT32* restrict c1,
125 OPJ_INT32* restrict c2, 352 OPJ_INT32* restrict c2,
126 OPJ_UINT32 n) 353 OPJ_UINT32 n)
127 { 354 {
128 OPJ_UINT32 i; 355 OPJ_UINT32 i;
129 for(i = 0; i < n; ++i) { 356 for(i = 0; i < n; ++i) {
130 OPJ_INT32 r = c0[i]; 357 OPJ_INT32 r = c0[i];
131 OPJ_INT32 g = c1[i]; 358 OPJ_INT32 g = c1[i];
132 OPJ_INT32 b = c2[i]; 359 OPJ_INT32 b = c2[i];
133 OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 480 9) + opj_int_fix_mul(b, 934); 360 OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 480 9) + opj_int_fix_mul(b, 934);
134 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 271 4) + opj_int_fix_mul(b, 4096); 361 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 271 4) + opj_int_fix_mul(b, 4096);
135 OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 343 0) - opj_int_fix_mul(b, 666); 362 OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 343 0) - opj_int_fix_mul(b, 666);
136 c0[i] = y; 363 c0[i] = y;
137 c1[i] = u; 364 c1[i] = u;
138 c2[i] = v; 365 c2[i] = v;
139 } 366 }
140 } 367 }
368 #endif
141 369
142 /* <summary> */ 370 /* <summary> */
143 /* Inverse irreversible MCT. */ 371 /* Inverse irreversible MCT. */
144 /* </summary> */ 372 /* </summary> */
145 void opj_mct_decode_real( 373 void opj_mct_decode_real(
146 OPJ_FLOAT32* restrict c0, 374 OPJ_FLOAT32* restrict c0,
147 OPJ_FLOAT32* restrict c1, 375 OPJ_FLOAT32* restrict c1,
148 OPJ_FLOAT32* restrict c2, 376 OPJ_FLOAT32* restrict c2,
149 OPJ_UINT32 n) 377 OPJ_UINT32 n)
150 { 378 {
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
316 lIndex = i; 544 lIndex = i;
317 545
318 for (j=0;j<pNbComps;++j) { 546 for (j=0;j<pNbComps;++j) {
319 lCurrentValue = lMatrix[lIndex]; 547 lCurrentValue = lMatrix[lIndex];
320 lIndex += pNbComps; 548 lIndex += pNbComps;
321 lNorms[i] += lCurrentValue * lCurrentValue; 549 lNorms[i] += lCurrentValue * lCurrentValue;
322 } 550 }
323 lNorms[i] = sqrt(lNorms[i]); 551 lNorms[i] = sqrt(lNorms[i]);
324 } 552 }
325 } 553 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698