Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(438)

Side by Side Diff: core/src/fxcodec/fx_libopenjpeg/libopenjpeg20/dwt.c

Issue 318593002: Use none SSE functions when data is not 16 byte aligned (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Patch Set 2 Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * The copyright in this software is being made available under the 2-clauses 2 * The copyright in this software is being made available under the 2-clauses
3 * BSD License, included below. This software may be subject to other third 3 * BSD License, included below. This software may be subject to other third
4 * party and contributor rights, including patent rights, and no such rights 4 * party and contributor rights, including patent rights, and no such rights
5 * are granted under this license. 5 * are granted under this license.
6 * 6 *
7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8 * Copyright (c) 2002-2014, Professor Benoit Macq 8 * Copyright (c) 2002-2014, Professor Benoit Macq
9 * Copyright (c) 2001-2003, David Janssens 9 * Copyright (c) 2001-2003, David Janssens
10 * Copyright (c) 2002-2003, Yannick Verschueren 10 * Copyright (c) 2002-2003, Yannick Verschueren
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
133 133
134 /* <summary> */ 134 /* <summary> */
135 /* Inverse 9-7 wavelet transform in 1-D. */ 135 /* Inverse 9-7 wavelet transform in 1-D. */
136 /* </summary> */ 136 /* </summary> */
137 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); 137 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt);
138 138
139 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restric t a, OPJ_INT32 x, OPJ_INT32 size); 139 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restric t a, OPJ_INT32 x, OPJ_INT32 size);
140 140
141 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restri ct a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); 141 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restri ct a , OPJ_INT32 x, OPJ_INT32 nb_elts_read);
142 142
143 #ifdef __SSE__ 143 //#ifdef __SSE__
144 #if 0
jabdelmalek 2014/06/06 17:25:31 here and below where you comment this out, please
144 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m12 8 c); 145 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m12 8 c);
145 146
146 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OP J_INT32 m, __m128 c); 147 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OP J_INT32 m, __m128 c);
147 148
148 #else 149 #else
149 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT 32 c); 150 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT 32 c);
150 151
151 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_IN T32 m, OPJ_FLOAT32 c); 152 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_IN T32 m, OPJ_FLOAT32 c);
152 153
153 #endif 154 #endif
(...skipping 510 matching lines...) Expand 10 before | Expand all | Expand 10 after
664 } 665 }
665 666
666 a += v->sn * x; 667 a += v->sn * x;
667 bi = v->wavelet + 1 - v->cas; 668 bi = v->wavelet + 1 - v->cas;
668 669
669 for(i = 0; i < v->dn; ++i){ 670 for(i = 0; i < v->dn; ++i){
670 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOA T32)); 671 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOA T32));
671 } 672 }
672 } 673 }
673 674
674 #ifdef __SSE__ 675 //#ifdef __SSE__
675 676 #if 0
676 void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ 677 void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){
677 __m128* restrict vw = (__m128*) w; 678 __m128* restrict vw = (__m128*) w;
678 OPJ_INT32 i; 679 OPJ_INT32 i;
679 /* 4x unrolled loop */ 680 /* 4x unrolled loop */
680 for(i = 0; i < count >> 2; ++i){ 681 for(i = 0; i < count >> 2; ++i){
681 *vw = _mm_mul_ps(*vw, c); 682 *vw = _mm_mul_ps(*vw, c);
682 vw += 2; 683 vw += 2;
683 *vw = _mm_mul_ps(*vw, c); 684 *vw = _mm_mul_ps(*vw, c);
684 vw += 2; 685 vw += 2;
685 *vw = _mm_mul_ps(*vw, c); 686 *vw = _mm_mul_ps(*vw, c);
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
801 } 802 }
802 a = 0; 803 a = 0;
803 b = 1; 804 b = 1;
804 }else{ 805 }else{
805 if(!((dwt->sn > 0) || (dwt->dn > 1))) { 806 if(!((dwt->sn > 0) || (dwt->dn > 1))) {
806 return; 807 return;
807 } 808 }
808 a = 1; 809 a = 1;
809 b = 0; 810 b = 0;
810 } 811 }
811 #ifdef __SSE__ 812 #if 0
813 //#ifdef __SSE__
812 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); 814 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K));
813 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c133 18)); 815 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c133 18));
814 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); 816 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta));
815 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); 817 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma));
816 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); 818 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta));
817 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); 819 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha));
818 #else 820 #else
819 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); 821 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K);
820 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); 822 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318);
821 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_in t_min(dwt->sn, dwt->dn-a), opj_dwt_delta); 823 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_in t_min(dwt->sn, dwt->dn-a), opj_dwt_delta);
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
916 918
917 for(k = 0; k < rh; ++k){ 919 for(k = 0; k < rh; ++k){
918 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * size of(OPJ_FLOAT32)); 920 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * size of(OPJ_FLOAT32));
919 } 921 }
920 } 922 }
921 } 923 }
922 924
923 opj_aligned_free(h.wavelet); 925 opj_aligned_free(h.wavelet);
924 return OPJ_TRUE; 926 return OPJ_TRUE;
925 } 927 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698