Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(32)

Side by Side Diff: core/src/fxcodec/fx_libopenjpeg/libopenjpeg20/dwt.c

Issue 318593002: Use none SSE functions when data is not 16 byte aligned (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * The copyright in this software is being made available under the 2-clauses 2 * The copyright in this software is being made available under the 2-clauses
3 * BSD License, included below. This software may be subject to other third 3 * BSD License, included below. This software may be subject to other third
4 * party and contributor rights, including patent rights, and no such rights 4 * party and contributor rights, including patent rights, and no such rights
5 * are granted under this license. 5 * are granted under this license.
6 * 6 *
7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8 * Copyright (c) 2002-2014, Professor Benoit Macq 8 * Copyright (c) 2002-2014, Professor Benoit Macq
9 * Copyright (c) 2001-2003, David Janssens 9 * Copyright (c) 2001-2003, David Janssens
10 * Copyright (c) 2002-2003, Yannick Verschueren 10 * Copyright (c) 2002-2003, Yannick Verschueren
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
133 133
134 /* <summary> */ 134 /* <summary> */
135 /* Inverse 9-7 wavelet transform in 1-D. */ 135 /* Inverse 9-7 wavelet transform in 1-D. */
136 /* </summary> */ 136 /* </summary> */
137 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); 137 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt);
138 138
139 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restric t a, OPJ_INT32 x, OPJ_INT32 size); 139 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restric t a, OPJ_INT32 x, OPJ_INT32 size);
140 140
141 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restri ct a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); 141 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restri ct a , OPJ_INT32 x, OPJ_INT32 nb_elts_read);
142 142
143 #ifdef __SSE__ 143 //#ifdef __SSE__
144 // Disable __SSE__ due to bug http://crbug.com/373619. Should enable this after adding aligned malloc in memory manager
145 #if 0
144 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m12 8 c); 146 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m12 8 c);
145 147
146 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OP J_INT32 m, __m128 c); 148 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OP J_INT32 m, __m128 c);
147 149
148 #else 150 #else
149 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT 32 c); 151 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT 32 c);
150 152
151 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_IN T32 m, OPJ_FLOAT32 c); 153 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_IN T32 m, OPJ_FLOAT32 c);
152 154
153 #endif 155 #endif
(...skipping 510 matching lines...) Expand 10 before | Expand all | Expand 10 after
664 } 666 }
665 667
666 a += v->sn * x; 668 a += v->sn * x;
667 bi = v->wavelet + 1 - v->cas; 669 bi = v->wavelet + 1 - v->cas;
668 670
669 for(i = 0; i < v->dn; ++i){ 671 for(i = 0; i < v->dn; ++i){
670 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOA T32)); 672 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOA T32));
671 } 673 }
672 } 674 }
673 675
674 #ifdef __SSE__ 676 //#ifdef __SSE__
675 677 // Disable __SSE__ due to bug http://crbug.com/373619. Should enable this after adding aligned malloc in memory manager
678 #if 0
676 void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ 679 void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){
677 __m128* restrict vw = (__m128*) w; 680 __m128* restrict vw = (__m128*) w;
678 OPJ_INT32 i; 681 OPJ_INT32 i;
679 /* 4x unrolled loop */ 682 /* 4x unrolled loop */
680 for(i = 0; i < count >> 2; ++i){ 683 for(i = 0; i < count >> 2; ++i){
681 *vw = _mm_mul_ps(*vw, c); 684 *vw = _mm_mul_ps(*vw, c);
682 vw += 2; 685 vw += 2;
683 *vw = _mm_mul_ps(*vw, c); 686 *vw = _mm_mul_ps(*vw, c);
684 vw += 2; 687 vw += 2;
685 *vw = _mm_mul_ps(*vw, c); 688 *vw = _mm_mul_ps(*vw, c);
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
801 } 804 }
802 a = 0; 805 a = 0;
803 b = 1; 806 b = 1;
804 }else{ 807 }else{
805 if(!((dwt->sn > 0) || (dwt->dn > 1))) { 808 if(!((dwt->sn > 0) || (dwt->dn > 1))) {
806 return; 809 return;
807 } 810 }
808 a = 1; 811 a = 1;
809 b = 0; 812 b = 0;
810 } 813 }
811 #ifdef __SSE__ 814
815 //#ifdef __SSE__
816 // Disable __SSE__ due to bug http://crbug.com/373619. Should enable this after adding aligned malloc in memory manager
817 #if 0
812 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); 818 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K));
813 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c133 18)); 819 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c133 18));
814 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); 820 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta));
815 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); 821 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma));
816 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); 822 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta));
817 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); 823 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha));
818 #else 824 #else
819 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); 825 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K);
820 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); 826 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318);
821 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_in t_min(dwt->sn, dwt->dn-a), opj_dwt_delta); 827 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_in t_min(dwt->sn, dwt->dn-a), opj_dwt_delta);
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
916 922
917 for(k = 0; k < rh; ++k){ 923 for(k = 0; k < rh; ++k){
918 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * size of(OPJ_FLOAT32)); 924 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * size of(OPJ_FLOAT32));
919 } 925 }
920 } 926 }
921 } 927 }
922 928
923 opj_aligned_free(h.wavelet); 929 opj_aligned_free(h.wavelet);
924 return OPJ_TRUE; 930 return OPJ_TRUE;
925 } 931 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698