Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(245)

Side by Side Diff: core/src/fxcodec/fx_libopenjpeg/libopenjpeg20/mct.c

Issue 418563002: Remove unnecessary aligned memory check when __SSE__ is used (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * The copyright in this software is being made available under the 2-clauses 2 * The copyright in this software is being made available under the 2-clauses
3 * BSD License, included below. This software may be subject to other third 3 * BSD License, included below. This software may be subject to other third
4 * party and contributor rights, including patent rights, and no such rights 4 * party and contributor rights, including patent rights, and no such rights
5 * are granted under this license. 5 * are granted under this license.
6 * 6 *
7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8 * Copyright (c) 2002-2014, Professor Benoit Macq 8 * Copyright (c) 2002-2014, Professor Benoit Macq
9 * Copyright (c) 2001-2003, David Janssens 9 * Copyright (c) 2001-2003, David Janssens
10 * Copyright (c) 2002-2003, Yannick Verschueren 10 * Copyright (c) 2002-2003, Yannick Verschueren
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
143 /* Inverse irreversible MCT. */ 143 /* Inverse irreversible MCT. */
144 /* </summary> */ 144 /* </summary> */
145 void opj_mct_decode_real( 145 void opj_mct_decode_real(
146 OPJ_FLOAT32* restrict c0, 146 OPJ_FLOAT32* restrict c0,
147 OPJ_FLOAT32* restrict c1, 147 OPJ_FLOAT32* restrict c1,
148 OPJ_FLOAT32* restrict c2, 148 OPJ_FLOAT32* restrict c2,
149 OPJ_UINT32 n) 149 OPJ_UINT32 n)
150 { 150 {
151 OPJ_UINT32 i; 151 OPJ_UINT32 i;
152 #ifdef __SSE__ 152 #ifdef __SSE__
153 » // Mantis BUGID: 0056291. The address must be 16-byte aligned. 153 __m128 vrv, vgu, vgv, vbu;
154 » // TestFile: fuzz-signal_sigsegv_6e9e7f_5076_5265.pdf 154 vrv = _mm_set1_ps(1.402f);
155 » if ((OPJ_UINT32)c0 % 16 == 0 && (OPJ_UINT32)c1 % 16 == 0 && (OPJ_UINT32) c2 % 16 == 0){ 155 vgu = _mm_set1_ps(0.34413f);
156 » » __m128 vrv, vgu, vgv, vbu; 156 vgv = _mm_set1_ps(0.71414f);
157 » » vrv = _mm_set1_ps(1.402f); 157 vbu = _mm_set1_ps(1.772f);
158 » » vgu = _mm_set1_ps(0.34413f); 158 for (i = 0; i < (n >> 3); ++i) {
159 » » vgv = _mm_set1_ps(0.71414f); 159 __m128 vy, vu, vv;
160 » » vbu = _mm_set1_ps(1.772f); 160 __m128 vr, vg, vb;
161 » » for (i = 0; i < (n >> 3); ++i) {
162 » » » __m128 vy, vu, vv;
163 » » » __m128 vr, vg, vb;
164 161
165 » » » vy = _mm_load_ps(c0); 162 vy = _mm_load_ps(c0);
166 » » » vu = _mm_load_ps(c1); 163 vu = _mm_load_ps(c1);
167 » » » vv = _mm_load_ps(c2); 164 vv = _mm_load_ps(c2);
168 » » » vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); 165 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
169 » » » vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm _mul_ps(vv, vgv)); 166 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv) );
170 » » » vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); 167 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
171 » » » _mm_store_ps(c0, vr); 168 _mm_store_ps(c0, vr);
172 » » » _mm_store_ps(c1, vg); 169 _mm_store_ps(c1, vg);
173 » » » _mm_store_ps(c2, vb); 170 _mm_store_ps(c2, vb);
174 » » » c0 += 4; 171 c0 += 4;
175 » » » c1 += 4; 172 c1 += 4;
176 » » » c2 += 4; 173 c2 += 4;
177 174
178 » » » vy = _mm_load_ps(c0); 175 vy = _mm_load_ps(c0);
179 » » » vu = _mm_load_ps(c1); 176 vu = _mm_load_ps(c1);
180 » » » vv = _mm_load_ps(c2); 177 vv = _mm_load_ps(c2);
181 » » » vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); 178 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
182 » » » vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm _mul_ps(vv, vgv)); 179 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv) );
183 » » » vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); 180 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
184 » » » _mm_store_ps(c0, vr); 181 _mm_store_ps(c0, vr);
185 » » » _mm_store_ps(c1, vg); 182 _mm_store_ps(c1, vg);
186 » » » _mm_store_ps(c2, vb); 183 _mm_store_ps(c2, vb);
187 » » » c0 += 4; 184 c0 += 4;
188 » » » c1 += 4; 185 c1 += 4;
189 » » » c2 += 4; 186 c2 += 4;
190 » » } 187 }
191 » » n &= 7; 188 n &= 7;
192 » } else { 189
193 » » for (i = 0; i < n; ++i) {
194 » » » OPJ_FLOAT32 y = c0[i];
195 » » » OPJ_FLOAT32 u = c1[i];
196 » » » OPJ_FLOAT32 v = c2[i];
197 » » » OPJ_FLOAT32 r = y + (v * 1.402f);
198 » » » OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f));
199 » » » OPJ_FLOAT32 b = y + (u * 1.772f);
200 » » » c0[i] = r;
201 » » » c1[i] = g;
202 » » » c2[i] = b;
203 » » }
204 » }
205 #endif 190 #endif
206 for(i = 0; i < n; ++i) { 191 for(i = 0; i < n; ++i) {
207 OPJ_FLOAT32 y = c0[i]; 192 OPJ_FLOAT32 y = c0[i];
208 OPJ_FLOAT32 u = c1[i]; 193 OPJ_FLOAT32 u = c1[i];
209 OPJ_FLOAT32 v = c2[i]; 194 OPJ_FLOAT32 v = c2[i];
210 OPJ_FLOAT32 r = y + (v * 1.402f); 195 OPJ_FLOAT32 r = y + (v * 1.402f);
211 OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f)); 196 OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f));
212 OPJ_FLOAT32 b = y + (u * 1.772f); 197 OPJ_FLOAT32 b = y + (u * 1.772f);
213 c0[i] = r; 198 c0[i] = r;
214 c1[i] = g; 199 c1[i] = g;
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
332 lIndex = i; 317 lIndex = i;
333 318
334 for (j=0;j<pNbComps;++j) { 319 for (j=0;j<pNbComps;++j) {
335 lCurrentValue = lMatrix[lIndex]; 320 lCurrentValue = lMatrix[lIndex];
336 lIndex += pNbComps; 321 lIndex += pNbComps;
337 lNorms[i] += lCurrentValue * lCurrentValue; 322 lNorms[i] += lCurrentValue * lCurrentValue;
338 } 323 }
339 lNorms[i] = sqrt(lNorms[i]); 324 lNorms[i] = sqrt(lNorms[i]);
340 } 325 }
341 } 326 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698