OLD | NEW |
1 /* | 1 /* |
2 * The copyright in this software is being made available under the 2-clauses | 2 * The copyright in this software is being made available under the 2-clauses |
3 * BSD License, included below. This software may be subject to other third | 3 * BSD License, included below. This software may be subject to other third |
4 * party and contributor rights, including patent rights, and no such rights | 4 * party and contributor rights, including patent rights, and no such rights |
5 * are granted under this license. | 5 * are granted under this license. |
6 * | 6 * |
7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium | 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium |
8 * Copyright (c) 2002-2014, Professor Benoit Macq | 8 * Copyright (c) 2002-2014, Professor Benoit Macq |
9 * Copyright (c) 2001-2003, David Janssens | 9 * Copyright (c) 2001-2003, David Janssens |
10 * Copyright (c) 2002-2003, Yannick Verschueren | 10 * Copyright (c) 2002-2003, Yannick Verschueren |
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
133 | 133 |
134 /* <summary> */ | 134 /* <summary> */ |
135 /* Inverse 9-7 wavelet transform in 1-D. */ | 135 /* Inverse 9-7 wavelet transform in 1-D. */ |
136 /* </summary> */ | 136 /* </summary> */ |
137 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); | 137 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); |
138 | 138 |
139 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restric
t a, OPJ_INT32 x, OPJ_INT32 size); | 139 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restric
t a, OPJ_INT32 x, OPJ_INT32 size); |
140 | 140 |
141 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restri
ct a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); | 141 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restri
ct a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); |
142 | 142 |
143 #ifdef __SSE__ | 143 //#ifdef __SSE__ |
| 144 // Disable __SSE__ due to bug http://crbug.com/373619. Should enable this after
adding aligned malloc in memory manager |
| 145 #if 0 |
144 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m12
8 c); | 146 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m12
8 c); |
145 | 147 |
146 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OP
J_INT32 m, __m128 c); | 148 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OP
J_INT32 m, __m128 c); |
147 | 149 |
148 #else | 150 #else |
149 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT
32 c); | 151 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT
32 c); |
150 | 152 |
151 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_IN
T32 m, OPJ_FLOAT32 c); | 153 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_IN
T32 m, OPJ_FLOAT32 c); |
152 | 154 |
153 #endif | 155 #endif |
(...skipping 510 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
664 } | 666 } |
665 | 667 |
666 a += v->sn * x; | 668 a += v->sn * x; |
667 bi = v->wavelet + 1 - v->cas; | 669 bi = v->wavelet + 1 - v->cas; |
668 | 670 |
669 for(i = 0; i < v->dn; ++i){ | 671 for(i = 0; i < v->dn; ++i){ |
670 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOA
T32)); | 672 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOA
T32)); |
671 } | 673 } |
672 } | 674 } |
673 | 675 |
674 #ifdef __SSE__ | 676 //#ifdef __SSE__ |
675 | 677 // Disable __SSE__ due to bug http://crbug.com/373619. Should enable this after
adding aligned malloc in memory manager |
| 678 #if 0 |
676 void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ | 679 void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ |
677 __m128* restrict vw = (__m128*) w; | 680 __m128* restrict vw = (__m128*) w; |
678 OPJ_INT32 i; | 681 OPJ_INT32 i; |
679 /* 4x unrolled loop */ | 682 /* 4x unrolled loop */ |
680 for(i = 0; i < count >> 2; ++i){ | 683 for(i = 0; i < count >> 2; ++i){ |
681 *vw = _mm_mul_ps(*vw, c); | 684 *vw = _mm_mul_ps(*vw, c); |
682 vw += 2; | 685 vw += 2; |
683 *vw = _mm_mul_ps(*vw, c); | 686 *vw = _mm_mul_ps(*vw, c); |
684 vw += 2; | 687 vw += 2; |
685 *vw = _mm_mul_ps(*vw, c); | 688 *vw = _mm_mul_ps(*vw, c); |
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
801 } | 804 } |
802 a = 0; | 805 a = 0; |
803 b = 1; | 806 b = 1; |
804 }else{ | 807 }else{ |
805 if(!((dwt->sn > 0) || (dwt->dn > 1))) { | 808 if(!((dwt->sn > 0) || (dwt->dn > 1))) { |
806 return; | 809 return; |
807 } | 810 } |
808 a = 1; | 811 a = 1; |
809 b = 0; | 812 b = 0; |
810 } | 813 } |
811 #ifdef __SSE__ | 814 |
| 815 //#ifdef __SSE__ |
| 816 // Disable __SSE__ due to bug http://crbug.com/373619. Should enable this after
adding aligned malloc in memory manager |
| 817 #if 0 |
812 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); | 818 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); |
813 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c133
18)); | 819 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c133
18)); |
814 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op
j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); | 820 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op
j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); |
815 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op
j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); | 821 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op
j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); |
816 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op
j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); | 822 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op
j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); |
817 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op
j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); | 823 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op
j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); |
818 #else | 824 #else |
819 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); | 825 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); |
820 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); | 826 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); |
821 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_in
t_min(dwt->sn, dwt->dn-a), opj_dwt_delta); | 827 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_in
t_min(dwt->sn, dwt->dn-a), opj_dwt_delta); |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
916 | 922 |
917 for(k = 0; k < rh; ++k){ | 923 for(k = 0; k < rh; ++k){ |
918 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * size
of(OPJ_FLOAT32)); | 924 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * size
of(OPJ_FLOAT32)); |
919 } | 925 } |
920 } | 926 } |
921 } | 927 } |
922 | 928 |
923 opj_aligned_free(h.wavelet); | 929 opj_aligned_free(h.wavelet); |
924 return OPJ_TRUE; | 930 return OPJ_TRUE; |
925 } | 931 } |
OLD | NEW |