OLD | NEW |
1 /* | 1 /* |
2 * The copyright in this software is being made available under the 2-clauses | 2 * The copyright in this software is being made available under the 2-clauses |
3 * BSD License, included below. This software may be subject to other third | 3 * BSD License, included below. This software may be subject to other third |
4 * party and contributor rights, including patent rights, and no such rights | 4 * party and contributor rights, including patent rights, and no such rights |
5 * are granted under this license. | 5 * are granted under this license. |
6 * | 6 * |
7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium | 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium |
8 * Copyright (c) 2002-2014, Professor Benoit Macq | 8 * Copyright (c) 2002-2014, Professor Benoit Macq |
9 * Copyright (c) 2001-2003, David Janssens | 9 * Copyright (c) 2001-2003, David Janssens |
10 * Copyright (c) 2002-2003, Yannick Verschueren | 10 * Copyright (c) 2002-2003, Yannick Verschueren |
(...skipping 19 matching lines...) Expand all Loading... |
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
37 * POSSIBILITY OF SUCH DAMAGE. | 37 * POSSIBILITY OF SUCH DAMAGE. |
38 */ | 38 */ |
39 | 39 |
40 #ifdef __SSE__ | 40 #if defined(__SSE__) && !defined(_M_IX86) && !defined(__i386) |
| 41 #define USE_SSE |
41 #include <xmmintrin.h> | 42 #include <xmmintrin.h> |
42 #endif | 43 #endif |
43 #ifdef __SSE2__ | 44 #if defined(__SSE2__) && !defined(_M_IX86) && !defined(__i386) |
| 45 #define USE_SSE2 |
44 #include <emmintrin.h> | 46 #include <emmintrin.h> |
45 #endif | 47 #endif |
46 #ifdef __SSE4_1__ | 48 #if defined(__SSE4_1__) && !defined(_M_IX86) && !defined(__i386) |
| 49 #define USE_SSE4 |
47 #include <smmintrin.h> | 50 #include <smmintrin.h> |
48 #endif | 51 #endif |
49 | 52 |
50 #include "opj_includes.h" | 53 #include "opj_includes.h" |
51 | 54 |
52 /* <summary> */ | 55 /* <summary> */ |
53 /* This table contains the norms of the basis function of the reversible MCT. */ | 56 /* This table contains the norms of the basis function of the reversible MCT. */ |
54 /* </summary> */ | 57 /* </summary> */ |
55 static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 }; | 58 static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 }; |
56 | 59 |
57 /* <summary> */ | 60 /* <summary> */ |
58 /* This table contains the norms of the basis function of the irreversible MCT.
*/ | 61 /* This table contains the norms of the basis function of the irreversible MCT.
*/ |
59 /* </summary> */ | 62 /* </summary> */ |
60 static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 }; | 63 static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 }; |
61 | 64 |
62 const OPJ_FLOAT64 * opj_mct_get_mct_norms () | 65 const OPJ_FLOAT64 * opj_mct_get_mct_norms () |
63 { | 66 { |
64 return opj_mct_norms; | 67 return opj_mct_norms; |
65 } | 68 } |
66 | 69 |
67 const OPJ_FLOAT64 * opj_mct_get_mct_norms_real () | 70 const OPJ_FLOAT64 * opj_mct_get_mct_norms_real () |
68 { | 71 { |
69 return opj_mct_norms_real; | 72 return opj_mct_norms_real; |
70 } | 73 } |
71 | 74 |
72 /* <summary> */ | 75 /* <summary> */ |
73 /* Forward reversible MCT. */ | 76 /* Forward reversible MCT. */ |
74 /* </summary> */ | 77 /* </summary> */ |
75 #ifdef __SSE2__ | 78 #ifdef USE_SSE2 |
76 void opj_mct_encode( | 79 void opj_mct_encode( |
77 OPJ_INT32* restrict c0, | 80 OPJ_INT32* restrict c0, |
78 OPJ_INT32* restrict c1, | 81 OPJ_INT32* restrict c1, |
79 OPJ_INT32* restrict c2, | 82 OPJ_INT32* restrict c2, |
80 OPJ_UINT32 n) | 83 OPJ_UINT32 n) |
81 { | 84 { |
82 OPJ_SIZE_T i; | 85 OPJ_SIZE_T i; |
83 const OPJ_SIZE_T len = n; | 86 const OPJ_SIZE_T len = n; |
84 | 87 |
85 for(i = 0; i < (len & ~3U); i += 4) { | 88 for(i = 0; i < (len & ~3U); i += 4) { |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
130 c0[i] = y; | 133 c0[i] = y; |
131 c1[i] = u; | 134 c1[i] = u; |
132 c2[i] = v; | 135 c2[i] = v; |
133 } | 136 } |
134 } | 137 } |
135 #endif | 138 #endif |
136 | 139 |
137 /* <summary> */ | 140 /* <summary> */ |
138 /* Inverse reversible MCT. */ | 141 /* Inverse reversible MCT. */ |
139 /* </summary> */ | 142 /* </summary> */ |
140 #ifdef __SSE2__ | 143 #ifdef USE_SSE2 |
141 void opj_mct_decode( | 144 void opj_mct_decode( |
142 OPJ_INT32* restrict c0, | 145 OPJ_INT32* restrict c0, |
143 OPJ_INT32* restrict c1, | 146 OPJ_INT32* restrict c1, |
144 OPJ_INT32* restrict c2, | 147 OPJ_INT32* restrict c2, |
145 OPJ_UINT32 n) | 148 OPJ_UINT32 n) |
146 { | 149 { |
147 OPJ_SIZE_T i; | 150 OPJ_SIZE_T i; |
148 const OPJ_SIZE_T len = n; | 151 const OPJ_SIZE_T len = n; |
149 | 152 |
150 for(i = 0; i < (len & ~3U); i += 4) { | 153 for(i = 0; i < (len & ~3U); i += 4) { |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
197 /* <summary> */ | 200 /* <summary> */ |
198 /* Get norm of basis function of reversible MCT. */ | 201 /* Get norm of basis function of reversible MCT. */ |
199 /* </summary> */ | 202 /* </summary> */ |
200 OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) { | 203 OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) { |
201 return opj_mct_norms[compno]; | 204 return opj_mct_norms[compno]; |
202 } | 205 } |
203 | 206 |
204 /* <summary> */ | 207 /* <summary> */ |
205 /* Forward irreversible MCT. */ | 208 /* Forward irreversible MCT. */ |
206 /* </summary> */ | 209 /* </summary> */ |
207 #ifdef __SSE4_1__ | 210 #ifdef USE_SSE4 |
208 void opj_mct_encode_real( | 211 void opj_mct_encode_real( |
209
OPJ_INT32* restrict c0, | 212
OPJ_INT32* restrict c0, |
210
OPJ_INT32* restrict c1, | 213
OPJ_INT32* restrict c1, |
211
OPJ_INT32* restrict c2, | 214
OPJ_INT32* restrict c2, |
212
OPJ_UINT32 n) | 215
OPJ_UINT32 n) |
213 { | 216 { |
214 OPJ_SIZE_T i; | 217 OPJ_SIZE_T i; |
215 const OPJ_SIZE_T len = n; | 218 const OPJ_SIZE_T len = n; |
216 | 219 |
217 const __m128i ry = _mm_set1_epi32(2449); | 220 const __m128i ry = _mm_set1_epi32(2449); |
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
370 /* <summary> */ | 373 /* <summary> */ |
371 /* Inverse irreversible MCT. */ | 374 /* Inverse irreversible MCT. */ |
372 /* </summary> */ | 375 /* </summary> */ |
373 void opj_mct_decode_real( | 376 void opj_mct_decode_real( |
374 OPJ_FLOAT32* restrict c0, | 377 OPJ_FLOAT32* restrict c0, |
375 OPJ_FLOAT32* restrict c1, | 378 OPJ_FLOAT32* restrict c1, |
376 OPJ_FLOAT32* restrict c2, | 379 OPJ_FLOAT32* restrict c2, |
377 OPJ_UINT32 n) | 380 OPJ_UINT32 n) |
378 { | 381 { |
379 OPJ_UINT32 i; | 382 OPJ_UINT32 i; |
380 #ifdef __SSE__ | 383 #ifdef USE_SSE |
381 __m128 vrv, vgu, vgv, vbu; | 384 __m128 vrv, vgu, vgv, vbu; |
382 vrv = _mm_set1_ps(1.402f); | 385 vrv = _mm_set1_ps(1.402f); |
383 vgu = _mm_set1_ps(0.34413f); | 386 vgu = _mm_set1_ps(0.34413f); |
384 vgv = _mm_set1_ps(0.71414f); | 387 vgv = _mm_set1_ps(0.71414f); |
385 vbu = _mm_set1_ps(1.772f); | 388 vbu = _mm_set1_ps(1.772f); |
386 for (i = 0; i < (n >> 3); ++i) { | 389 for (i = 0; i < (n >> 3); ++i) { |
387 __m128 vy, vu, vv; | 390 __m128 vy, vu, vv; |
388 __m128 vr, vg, vb; | 391 __m128 vr, vg, vb; |
389 | 392 |
390 vy = _mm_load_ps(c0); | 393 vy = _mm_load_ps(c0); |
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
544 lIndex = i; | 547 lIndex = i; |
545 | 548 |
546 for (j=0;j<pNbComps;++j) { | 549 for (j=0;j<pNbComps;++j) { |
547 lCurrentValue = lMatrix[lIndex]; | 550 lCurrentValue = lMatrix[lIndex]; |
548 lIndex += pNbComps; | 551 lIndex += pNbComps; |
549 lNorms[i] += lCurrentValue * lCurrentValue; | 552 lNorms[i] += lCurrentValue * lCurrentValue; |
550 } | 553 } |
551 lNorms[i] = sqrt(lNorms[i]); | 554 lNorms[i] = sqrt(lNorms[i]); |
552 } | 555 } |
553 } | 556 } |
OLD | NEW |