| OLD | NEW |
| 1 /* | 1 /* |
| 2 * The copyright in this software is being made available under the 2-clauses | 2 * The copyright in this software is being made available under the 2-clauses |
| 3 * BSD License, included below. This software may be subject to other third | 3 * BSD License, included below. This software may be subject to other third |
| 4 * party and contributor rights, including patent rights, and no such rights | 4 * party and contributor rights, including patent rights, and no such rights |
| 5 * are granted under this license. | 5 * are granted under this license. |
| 6 * | 6 * |
| 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium | 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium |
| 8 * Copyright (c) 2002-2014, Professor Benoit Macq | 8 * Copyright (c) 2002-2014, Professor Benoit Macq |
| 9 * Copyright (c) 2001-2003, David Janssens | 9 * Copyright (c) 2001-2003, David Janssens |
| 10 * Copyright (c) 2002-2003, Yannick Verschueren | 10 * Copyright (c) 2002-2003, Yannick Verschueren |
| (...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 133 | 133 |
| 134 /* <summary> */ | 134 /* <summary> */ |
| 135 /* Inverse 9-7 wavelet transform in 1-D. */ | 135 /* Inverse 9-7 wavelet transform in 1-D. */ |
| 136 /* </summary> */ | 136 /* </summary> */ |
| 137 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); | 137 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); |
| 138 | 138 |
| 139 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restric
t a, OPJ_INT32 x, OPJ_INT32 size); | 139 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restric
t a, OPJ_INT32 x, OPJ_INT32 size); |
| 140 | 140 |
| 141 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restri
ct a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); | 141 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restri
ct a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); |
| 142 | 142 |
| 143 //#ifdef __SSE__ | 143 #ifdef __SSE__ |
| 144 // Disable __SSE__ due to bug http://crbug.com/373619. Should enable this after
adding aligned malloc in memory manager | |
| 145 #if 0 | |
| 146 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m12
8 c); | 144 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m12
8 c); |
| 147 | 145 |
| 148 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OP
J_INT32 m, __m128 c); | 146 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OP
J_INT32 m, __m128 c); |
| 149 | 147 |
| 150 #else | 148 #else |
| 151 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT
32 c); | 149 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT
32 c); |
| 152 | 150 |
| 153 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_IN
T32 m, OPJ_FLOAT32 c); | 151 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_IN
T32 m, OPJ_FLOAT32 c); |
| 154 | 152 |
| 155 #endif | 153 #endif |
| (...skipping 510 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 666 } | 664 } |
| 667 | 665 |
| 668 a += v->sn * x; | 666 a += v->sn * x; |
| 669 bi = v->wavelet + 1 - v->cas; | 667 bi = v->wavelet + 1 - v->cas; |
| 670 | 668 |
| 671 for(i = 0; i < v->dn; ++i){ | 669 for(i = 0; i < v->dn; ++i){ |
| 672 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOA
T32)); | 670 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOA
T32)); |
| 673 } | 671 } |
| 674 } | 672 } |
| 675 | 673 |
| 676 //#ifdef __SSE__ | 674 #ifdef __SSE__ |
| 677 // Disable __SSE__ due to bug http://crbug.com/373619. Should enable this after
adding aligned malloc in memory manager | 675 |
| 678 #if 0 | |
| 679 void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ | 676 void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ |
| 680 __m128* restrict vw = (__m128*) w; | 677 __m128* restrict vw = (__m128*) w; |
| 681 OPJ_INT32 i; | 678 OPJ_INT32 i; |
| 682 /* 4x unrolled loop */ | 679 /* 4x unrolled loop */ |
| 683 for(i = 0; i < count >> 2; ++i){ | 680 for(i = 0; i < count >> 2; ++i){ |
| 684 *vw = _mm_mul_ps(*vw, c); | 681 *vw = _mm_mul_ps(*vw, c); |
| 685 vw += 2; | 682 vw += 2; |
| 686 *vw = _mm_mul_ps(*vw, c); | 683 *vw = _mm_mul_ps(*vw, c); |
| 687 vw += 2; | 684 vw += 2; |
| 688 *vw = _mm_mul_ps(*vw, c); | 685 *vw = _mm_mul_ps(*vw, c); |
| (...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 805 a = 0; | 802 a = 0; |
| 806 b = 1; | 803 b = 1; |
| 807 }else{ | 804 }else{ |
| 808 if(!((dwt->sn > 0) || (dwt->dn > 1))) { | 805 if(!((dwt->sn > 0) || (dwt->dn > 1))) { |
| 809 return; | 806 return; |
| 810 } | 807 } |
| 811 a = 1; | 808 a = 1; |
| 812 b = 0; | 809 b = 0; |
| 813 } | 810 } |
| 814 | 811 |
| 815 //#ifdef __SSE__ | 812 #ifdef __SSE__ |
| 816 // Disable __SSE__ due to bug http://crbug.com/373619. Should enable this after
adding aligned malloc in memory manager | |
| 817 #if 0 | |
| 818 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); | 813 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); |
| 819 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c133
18)); | 814 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c133
18)); |
| 820 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op
j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); | 815 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op
j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); |
| 821 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op
j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); | 816 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op
j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); |
| 822 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op
j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); | 817 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op
j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); |
| 823 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op
j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); | 818 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op
j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); |
| 824 #else | 819 #else |
| 825 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); | 820 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); |
| 826 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); | 821 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); |
| 827 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_in
t_min(dwt->sn, dwt->dn-a), opj_dwt_delta); | 822 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_in
t_min(dwt->sn, dwt->dn-a), opj_dwt_delta); |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 922 | 917 |
| 923 for(k = 0; k < rh; ++k){ | 918 for(k = 0; k < rh; ++k){ |
| 924 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * size
of(OPJ_FLOAT32)); | 919 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * size
of(OPJ_FLOAT32)); |
| 925 } | 920 } |
| 926 } | 921 } |
| 927 } | 922 } |
| 928 | 923 |
| 929 opj_aligned_free(h.wavelet); | 924 opj_aligned_free(h.wavelet); |
| 930 return OPJ_TRUE; | 925 return OPJ_TRUE; |
| 931 } | 926 } |
| OLD | NEW |