Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * The copyright in this software is being made available under the 2-clauses | 2 * The copyright in this software is being made available under the 2-clauses |
| 3 * BSD License, included below. This software may be subject to other third | 3 * BSD License, included below. This software may be subject to other third |
| 4 * party and contributor rights, including patent rights, and no such rights | 4 * party and contributor rights, including patent rights, and no such rights |
| 5 * are granted under this license. | 5 * are granted under this license. |
| 6 * | 6 * |
| 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium | 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium |
| 8 * Copyright (c) 2002-2014, Professor Benoit Macq | 8 * Copyright (c) 2002-2014, Professor Benoit Macq |
| 9 * Copyright (c) 2001-2003, David Janssens | 9 * Copyright (c) 2001-2003, David Janssens |
| 10 * Copyright (c) 2002-2003, Yannick Verschueren | 10 * Copyright (c) 2002-2003, Yannick Verschueren |
| (...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 133 | 133 |
| 134 /* <summary> */ | 134 /* <summary> */ |
| 135 /* Inverse 9-7 wavelet transform in 1-D. */ | 135 /* Inverse 9-7 wavelet transform in 1-D. */ |
| 136 /* </summary> */ | 136 /* </summary> */ |
| 137 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); | 137 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); |
| 138 | 138 |
| 139 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restric t a, OPJ_INT32 x, OPJ_INT32 size); | 139 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restric t a, OPJ_INT32 x, OPJ_INT32 size); |
| 140 | 140 |
| 141 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restri ct a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); | 141 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restri ct a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); |
| 142 | 142 |
| 143 #ifdef __SSE__ | 143 //#ifdef __SSE__ |
| 144 #if 0 | |
|
jabdelmalek
2014/06/06 17:25:31
here and below where you comment this out, please
| |
| 144 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m12 8 c); | 145 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m12 8 c); |
| 145 | 146 |
| 146 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OP J_INT32 m, __m128 c); | 147 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OP J_INT32 m, __m128 c); |
| 147 | 148 |
| 148 #else | 149 #else |
| 149 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT 32 c); | 150 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT 32 c); |
| 150 | 151 |
| 151 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_IN T32 m, OPJ_FLOAT32 c); | 152 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_IN T32 m, OPJ_FLOAT32 c); |
| 152 | 153 |
| 153 #endif | 154 #endif |
| (...skipping 510 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 664 } | 665 } |
| 665 | 666 |
| 666 a += v->sn * x; | 667 a += v->sn * x; |
| 667 bi = v->wavelet + 1 - v->cas; | 668 bi = v->wavelet + 1 - v->cas; |
| 668 | 669 |
| 669 for(i = 0; i < v->dn; ++i){ | 670 for(i = 0; i < v->dn; ++i){ |
| 670 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOA T32)); | 671 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOA T32)); |
| 671 } | 672 } |
| 672 } | 673 } |
| 673 | 674 |
| 674 #ifdef __SSE__ | 675 //#ifdef __SSE__ |
| 675 | 676 #if 0 |
| 676 void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ | 677 void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ |
| 677 __m128* restrict vw = (__m128*) w; | 678 __m128* restrict vw = (__m128*) w; |
| 678 OPJ_INT32 i; | 679 OPJ_INT32 i; |
| 679 /* 4x unrolled loop */ | 680 /* 4x unrolled loop */ |
| 680 for(i = 0; i < count >> 2; ++i){ | 681 for(i = 0; i < count >> 2; ++i){ |
| 681 *vw = _mm_mul_ps(*vw, c); | 682 *vw = _mm_mul_ps(*vw, c); |
| 682 vw += 2; | 683 vw += 2; |
| 683 *vw = _mm_mul_ps(*vw, c); | 684 *vw = _mm_mul_ps(*vw, c); |
| 684 vw += 2; | 685 vw += 2; |
| 685 *vw = _mm_mul_ps(*vw, c); | 686 *vw = _mm_mul_ps(*vw, c); |
| (...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 801 } | 802 } |
| 802 a = 0; | 803 a = 0; |
| 803 b = 1; | 804 b = 1; |
| 804 }else{ | 805 }else{ |
| 805 if(!((dwt->sn > 0) || (dwt->dn > 1))) { | 806 if(!((dwt->sn > 0) || (dwt->dn > 1))) { |
| 806 return; | 807 return; |
| 807 } | 808 } |
| 808 a = 1; | 809 a = 1; |
| 809 b = 0; | 810 b = 0; |
| 810 } | 811 } |
| 811 #ifdef __SSE__ | 812 #if 0 |
| 813 //#ifdef __SSE__ | |
| 812 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); | 814 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); |
| 813 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c133 18)); | 815 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c133 18)); |
| 814 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); | 816 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); |
| 815 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); | 817 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); |
| 816 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); | 818 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, op j_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); |
| 817 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); | 819 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, op j_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); |
| 818 #else | 820 #else |
| 819 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); | 821 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); |
| 820 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); | 822 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); |
| 821 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_in t_min(dwt->sn, dwt->dn-a), opj_dwt_delta); | 823 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_in t_min(dwt->sn, dwt->dn-a), opj_dwt_delta); |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 916 | 918 |
| 917 for(k = 0; k < rh; ++k){ | 919 for(k = 0; k < rh; ++k){ |
| 918 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * size of(OPJ_FLOAT32)); | 920 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * size of(OPJ_FLOAT32)); |
| 919 } | 921 } |
| 920 } | 922 } |
| 921 } | 923 } |
| 922 | 924 |
| 923 opj_aligned_free(h.wavelet); | 925 opj_aligned_free(h.wavelet); |
| 924 return OPJ_TRUE; | 926 return OPJ_TRUE; |
| 925 } | 927 } |
| OLD | NEW |