OLD | NEW |
1 /* | 1 /* |
2 * The copyright in this software is being made available under the 2-clauses | 2 * The copyright in this software is being made available under the 2-clauses |
3 * BSD License, included below. This software may be subject to other third | 3 * BSD License, included below. This software may be subject to other third |
4 * party and contributor rights, including patent rights, and no such rights | 4 * party and contributor rights, including patent rights, and no such rights |
5 * are granted under this license. | 5 * are granted under this license. |
6 * | 6 * |
7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium | 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium |
8 * Copyright (c) 2002-2014, Professor Benoit Macq | 8 * Copyright (c) 2002-2014, Professor Benoit Macq |
9 * Copyright (c) 2001-2003, David Janssens | 9 * Copyright (c) 2001-2003, David Janssens |
10 * Copyright (c) 2002-2003, Yannick Verschueren | 10 * Copyright (c) 2002-2003, Yannick Verschueren |
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
133 | 133 |
134 /* <summary> */ | 134 /* <summary> */ |
135 /* Inverse 9-7 wavelet transform in 1-D. */ | 135 /* Inverse 9-7 wavelet transform in 1-D. */ |
136 /* </summary> */ | 136 /* </summary> */ |
137 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); | 137 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); |
138 | 138 |
139 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restric
t a, OPJ_INT32 x, OPJ_INT32 size); | 139 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restric
t a, OPJ_INT32 x, OPJ_INT32 size); |
140 | 140 |
141 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restri
ct a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); | 141 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restri
ct a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); |
142 | 142 |
143 //#ifdef __SSE__ | 143 #ifdef __SSE__ |
144 // Disable __SSE__ due to bug http://crbug.com/373619. Should enable this after
adding aligned malloc in memory manager | |
145 #if 0 | |
146 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m12
8 c); | 144 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m12
8 c); |
147 | 145 |
148 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OP
J_INT32 m, __m128 c); | 146 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OP
J_INT32 m, __m128 c); |
149 | 147 |
150 #else | 148 #else |
151 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT
32 c); | 149 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT
32 c); |
152 | 150 |
153 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_IN
T32 m, OPJ_FLOAT32 c); | 151 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_IN
T32 m, OPJ_FLOAT32 c); |
154 | 152 |
155 #endif | 153 #endif |
(...skipping 510 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
666 } | 664 } |
667 | 665 |
668 a += v->sn * x; | 666 a += v->sn * x; |
669 bi = v->wavelet + 1 - v->cas; | 667 bi = v->wavelet + 1 - v->cas; |
670 | 668 |
671 for(i = 0; i < v->dn; ++i){ | 669 for(i = 0; i < v->dn; ++i){ |
672 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOA
T32)); | 670 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOA
T32)); |
673 } | 671 } |
674 } | 672 } |
675 | 673 |
676 //#ifdef __SSE__ | 674 #ifdef __SSE__ |
677 // Disable __SSE__ due to bug http://crbug.com/373619. Should enable this after
adding aligned malloc in memory manager | 675 |
678 #if 0 | |
679 void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ | 676 void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ |
680 __m128* restrict vw = (__m128*) w; | 677 __m128* restrict vw = (__m128*) w; |
681 OPJ_INT32 i; | 678 OPJ_INT32 i; |
682 /* 4x unrolled loop */ | 679 /* 4x unrolled loop */ |
683 for(i = 0; i < count >> 2; ++i){ | 680 for(i = 0; i < count >> 2; ++i){ |
684 *vw = _mm_mul_ps(*vw, c); | 681 *vw = _mm_mul_ps(*vw, c); |
685 vw += 2; | 682 vw += 2; |
686 *vw = _mm_mul_ps(*vw, c); | 683 *vw = _mm_mul_ps(*vw, c); |
687 vw += 2; | 684 vw += 2; |
688 *vw = _mm_mul_ps(*vw, c); | 685 *vw = _mm_mul_ps(*vw, c); |
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
805 a = 0; | 802 a = 0; |
806 b = 1; | 803 b = 1; |
807 }else{ | 804 }else{ |
808 if(!((dwt->sn > 0) || (dwt->dn > 1))) { | 805 if(!((dwt->sn > 0) || (dwt->dn > 1))) { |
809 return; | 806 return; |
810 } | 807 } |
811 a = 1; | 808 a = 1; |
812 b = 0; | 809 b = 0; |
813 } | 810 } |
814 | 811 |
815 //#ifdef __SSE__ | 812 #ifdef __SSE__ |
816 // Disable __SSE__ due to bug http://crbug.com/373619. Should enable this after
adding aligned malloc in memory manager | |
817 #if 0 | |
818 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); | 813 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); |
819 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c133
18)); | 814 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c133
18)); |
820 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op
j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); | 815 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op
j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); |
821 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op
j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); | 816 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op
j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); |
822 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op
j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); | 817 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op
j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); |
823 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op
j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); | 818 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op
j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); |
824 #else | 819 #else |
825 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); | 820 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); |
826 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); | 821 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); |
827 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_in
t_min(dwt->sn, dwt->dn-a), opj_dwt_delta); | 822 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_in
t_min(dwt->sn, dwt->dn-a), opj_dwt_delta); |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
922 | 917 |
923 for(k = 0; k < rh; ++k){ | 918 for(k = 0; k < rh; ++k){ |
924 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * size
of(OPJ_FLOAT32)); | 919 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * size
of(OPJ_FLOAT32)); |
925 } | 920 } |
926 } | 921 } |
927 } | 922 } |
928 | 923 |
929 opj_aligned_free(h.wavelet); | 924 opj_aligned_free(h.wavelet); |
930 return OPJ_TRUE; | 925 return OPJ_TRUE; |
931 } | 926 } |
OLD | NEW |