OLD | NEW |
1 /* | 1 /* |
2 * The copyright in this software is being made available under the 2-clauses | 2 * The copyright in this software is being made available under the 2-clauses |
3 * BSD License, included below. This software may be subject to other third | 3 * BSD License, included below. This software may be subject to other third |
4 * party and contributor rights, including patent rights, and no such rights | 4 * party and contributor rights, including patent rights, and no such rights |
5 * are granted under this license. | 5 * are granted under this license. |
6 * | 6 * |
7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium | 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium |
8 * Copyright (c) 2002-2014, Professor Benoit Macq | 8 * Copyright (c) 2002-2014, Professor Benoit Macq |
9 * Copyright (c) 2001-2003, David Janssens | 9 * Copyright (c) 2001-2003, David Janssens |
10 * Copyright (c) 2002-2003, Yannick Verschueren | 10 * Copyright (c) 2002-2003, Yannick Verschueren |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
143 /* Inverse irreversible MCT. */ | 143 /* Inverse irreversible MCT. */ |
144 /* </summary> */ | 144 /* </summary> */ |
145 void opj_mct_decode_real( | 145 void opj_mct_decode_real( |
146 OPJ_FLOAT32* restrict c0, | 146 OPJ_FLOAT32* restrict c0, |
147 OPJ_FLOAT32* restrict c1, | 147 OPJ_FLOAT32* restrict c1, |
148 OPJ_FLOAT32* restrict c2, | 148 OPJ_FLOAT32* restrict c2, |
149 OPJ_UINT32 n) | 149 OPJ_UINT32 n) |
150 { | 150 { |
151 OPJ_UINT32 i; | 151 OPJ_UINT32 i; |
152 #ifdef __SSE__ | 152 #ifdef __SSE__ |
153 __m128 vrv, vgu, vgv, vbu; | 153 » __m128 vrv, vgu, vgv, vbu; |
154 vrv = _mm_set1_ps(1.402f); | 154 » vrv = _mm_set1_ps(1.402f); |
155 vgu = _mm_set1_ps(0.34413f); | 155 » vgu = _mm_set1_ps(0.34413f); |
156 vgv = _mm_set1_ps(0.71414f); | 156 » vgv = _mm_set1_ps(0.71414f); |
157 vbu = _mm_set1_ps(1.772f); | 157 » vbu = _mm_set1_ps(1.772f); |
158 for (i = 0; i < (n >> 3); ++i) { | 158 » for (i = 0; i < (n >> 3); ++i) { |
159 __m128 vy, vu, vv; | 159 » » __m128 vy, vu, vv; |
160 __m128 vr, vg, vb; | 160 » » __m128 vr, vg, vb; |
161 | 161 |
162 vy = _mm_load_ps(c0); | 162 » » vy = _mm_load_ps(c0); |
163 vu = _mm_load_ps(c1); | 163 » » vu = _mm_load_ps(c1); |
164 vv = _mm_load_ps(c2); | 164 » » vv = _mm_load_ps(c2); |
165 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); | 165 » » vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); |
166 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv)
); | 166 » » vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(
vv, vgv)); |
167 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); | 167 » » vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); |
168 _mm_store_ps(c0, vr); | 168 » » _mm_store_ps(c0, vr); |
169 _mm_store_ps(c1, vg); | 169 » » _mm_store_ps(c1, vg); |
170 _mm_store_ps(c2, vb); | 170 » » _mm_store_ps(c2, vb); |
171 c0 += 4; | 171 » » c0 += 4; |
172 c1 += 4; | 172 » » c1 += 4; |
173 c2 += 4; | 173 » » c2 += 4; |
174 | 174 |
175 vy = _mm_load_ps(c0); | 175 » » vy = _mm_load_ps(c0); |
176 vu = _mm_load_ps(c1); | 176 » » vu = _mm_load_ps(c1); |
177 vv = _mm_load_ps(c2); | 177 » » vv = _mm_load_ps(c2); |
178 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); | 178 » » vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); |
179 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv)
); | 179 » » vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(
vv, vgv)); |
180 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); | 180 » » vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); |
181 _mm_store_ps(c0, vr); | 181 » » _mm_store_ps(c0, vr); |
182 _mm_store_ps(c1, vg); | 182 » » _mm_store_ps(c1, vg); |
183 _mm_store_ps(c2, vb); | 183 » » _mm_store_ps(c2, vb); |
184 c0 += 4; | 184 » » c0 += 4; |
185 c1 += 4; | 185 » » c1 += 4; |
186 c2 += 4; | 186 » » c2 += 4; |
187 } | 187 » } |
188 n &= 7; | 188 » n &= 7; |
189 | |
190 #endif | 189 #endif |
191 for(i = 0; i < n; ++i) { | 190 for(i = 0; i < n; ++i) { |
192 OPJ_FLOAT32 y = c0[i]; | 191 OPJ_FLOAT32 y = c0[i]; |
193 OPJ_FLOAT32 u = c1[i]; | 192 OPJ_FLOAT32 u = c1[i]; |
194 OPJ_FLOAT32 v = c2[i]; | 193 OPJ_FLOAT32 v = c2[i]; |
195 OPJ_FLOAT32 r = y + (v * 1.402f); | 194 OPJ_FLOAT32 r = y + (v * 1.402f); |
196 OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f)); | 195 OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f)); |
197 OPJ_FLOAT32 b = y + (u * 1.772f); | 196 OPJ_FLOAT32 b = y + (u * 1.772f); |
198 c0[i] = r; | 197 c0[i] = r; |
199 c1[i] = g; | 198 c1[i] = g; |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
317 lIndex = i; | 316 lIndex = i; |
318 | 317 |
319 for (j=0;j<pNbComps;++j) { | 318 for (j=0;j<pNbComps;++j) { |
320 lCurrentValue = lMatrix[lIndex]; | 319 lCurrentValue = lMatrix[lIndex]; |
321 lIndex += pNbComps; | 320 lIndex += pNbComps; |
322 lNorms[i] += lCurrentValue * lCurrentValue; | 321 lNorms[i] += lCurrentValue * lCurrentValue; |
323 } | 322 } |
324 lNorms[i] = sqrt(lNorms[i]); | 323 lNorms[i] = sqrt(lNorms[i]); |
325 } | 324 } |
326 } | 325 } |
OLD | NEW |