OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * AltiVec optimizations for libjpeg-turbo |
| 3 * |
| 4 * Copyright (C) 2015, D. R. Commander. |
| 5 * All rights reserved. |
| 6 * This software is provided 'as-is', without any express or implied |
| 7 * warranty. In no event will the authors be held liable for any damages |
| 8 * arising from the use of this software. |
| 9 * |
| 10 * Permission is granted to anyone to use this software for any purpose, |
| 11 * including commercial applications, and to alter it and redistribute it |
| 12 * freely, subject to the following restrictions: |
| 13 * |
| 14 * 1. The origin of this software must not be misrepresented; you must not |
| 15 * claim that you wrote the original software. If you use this software |
| 16 * in a product, an acknowledgment in the product documentation would be |
| 17 * appreciated but is not required. |
| 18 * 2. Altered source versions must be plainly marked as such, and must not be |
| 19 * misrepresented as being the original software. |
| 20 * 3. This notice may not be removed or altered from any source distribution. |
| 21 */ |
| 22 |
| 23 /* CHROMA DOWNSAMPLING */ |
| 24 |
| 25 #include "jsimd_altivec.h" |
| 26 #include "jcsample.h" |
| 27 |
| 28 |
| 29 void |
| 30 jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, |
| 31 JDIMENSION v_samp_factor, |
| 32 JDIMENSION width_blocks, |
| 33 JSAMPARRAY input_data, JSAMPARRAY output_data) |
| 34 { |
| 35 int outrow, outcol; |
| 36 JDIMENSION output_cols = width_blocks * DCTSIZE; |
| 37 JSAMPROW inptr, outptr; |
| 38 |
| 39 __vector unsigned char this0, next0, out; |
| 40 __vector unsigned short this0e, this0o, next0e, next0o, outl, outh; |
| 41 |
| 42 /* Constants */ |
| 43 __vector unsigned short pw_bias = { __4X2(0, 1) }, |
| 44 pw_one = { __8X(1) }; |
| 45 __vector unsigned char even_odd_index = |
| 46 {0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15}, |
| 47 pb_zero = { __16X(0) }; |
| 48 |
| 49 expand_right_edge(input_data, max_v_samp_factor, image_width, |
| 50 output_cols * 2); |
| 51 |
| 52 for (outrow = 0; outrow < v_samp_factor; outrow++) { |
| 53 outptr = output_data[outrow]; |
| 54 inptr = input_data[outrow]; |
| 55 |
| 56 for (outcol = output_cols; outcol > 0; |
| 57 outcol -= 16, inptr += 32, outptr += 16) { |
| 58 |
| 59 this0 = vec_ld(0, inptr); |
| 60 this0 = vec_perm(this0, this0, even_odd_index); |
| 61 this0e = (__vector unsigned short)VEC_UNPACKHU(this0); |
| 62 this0o = (__vector unsigned short)VEC_UNPACKLU(this0); |
| 63 outl = vec_add(this0e, this0o); |
| 64 outl = vec_add(outl, pw_bias); |
| 65 outl = vec_sr(outl, pw_one); |
| 66 |
| 67 if (outcol > 8) { |
| 68 next0 = vec_ld(16, inptr); |
| 69 next0 = vec_perm(next0, next0, even_odd_index); |
| 70 next0e = (__vector unsigned short)VEC_UNPACKHU(next0); |
| 71 next0o = (__vector unsigned short)VEC_UNPACKLU(next0); |
| 72 outh = vec_add(next0e, next0o); |
| 73 outh = vec_add(outh, pw_bias); |
| 74 outh = vec_sr(outh, pw_one); |
| 75 } else |
| 76 outh = vec_splat_u16(0); |
| 77 |
| 78 out = vec_pack(outl, outh); |
| 79 vec_st(out, 0, outptr); |
| 80 } |
| 81 } |
| 82 } |
| 83 |
| 84 |
| 85 void |
| 86 jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, |
| 87 JDIMENSION v_samp_factor, |
| 88 JDIMENSION width_blocks, |
| 89 JSAMPARRAY input_data, JSAMPARRAY output_data) |
| 90 { |
| 91 int inrow, outrow, outcol; |
| 92 JDIMENSION output_cols = width_blocks * DCTSIZE; |
| 93 JSAMPROW inptr0, inptr1, outptr; |
| 94 |
| 95 __vector unsigned char this0, next0, this1, next1, out; |
| 96 __vector unsigned short this0e, this0o, next0e, next0o, this1e, this1o, |
| 97 next1e, next1o, out0l, out0h, out1l, out1h, outl, outh; |
| 98 |
| 99 /* Constants */ |
| 100 __vector unsigned short pw_bias = { __4X2(1, 2) }, |
| 101 pw_two = { __8X(2) }; |
| 102 __vector unsigned char even_odd_index = |
| 103 { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 }, |
| 104 pb_zero = { __16X(0) }; |
| 105 |
| 106 expand_right_edge(input_data, max_v_samp_factor, image_width, |
| 107 output_cols * 2); |
| 108 |
| 109 for (inrow = 0, outrow = 0; outrow < v_samp_factor; |
| 110 inrow += 2, outrow++) { |
| 111 |
| 112 inptr0 = input_data[inrow]; |
| 113 inptr1 = input_data[inrow + 1]; |
| 114 outptr = output_data[outrow]; |
| 115 |
| 116 for (outcol = output_cols; outcol > 0; |
| 117 outcol -= 16, inptr0 += 32, inptr1 += 32, outptr += 16) { |
| 118 |
| 119 this0 = vec_ld(0, inptr0); |
| 120 this0 = vec_perm(this0, this0, even_odd_index); |
| 121 this0e = (__vector unsigned short)VEC_UNPACKHU(this0); |
| 122 this0o = (__vector unsigned short)VEC_UNPACKLU(this0); |
| 123 out0l = vec_add(this0e, this0o); |
| 124 |
| 125 this1 = vec_ld(0, inptr1); |
| 126 this1 = vec_perm(this1, this1, even_odd_index); |
| 127 this1e = (__vector unsigned short)VEC_UNPACKHU(this1); |
| 128 this1o = (__vector unsigned short)VEC_UNPACKLU(this1); |
| 129 out1l = vec_add(this1e, this1o); |
| 130 |
| 131 outl = vec_add(out0l, out1l); |
| 132 outl = vec_add(outl, pw_bias); |
| 133 outl = vec_sr(outl, pw_two); |
| 134 |
| 135 if (outcol > 8) { |
| 136 next0 = vec_ld(16, inptr0); |
| 137 next0 = vec_perm(next0, next0, even_odd_index); |
| 138 next0e = (__vector unsigned short)VEC_UNPACKHU(next0); |
| 139 next0o = (__vector unsigned short)VEC_UNPACKLU(next0); |
| 140 out0h = vec_add(next0e, next0o); |
| 141 |
| 142 next1 = vec_ld(16, inptr1); |
| 143 next1 = vec_perm(next1, next1, even_odd_index); |
| 144 next1e = (__vector unsigned short)VEC_UNPACKHU(next1); |
| 145 next1o = (__vector unsigned short)VEC_UNPACKLU(next1); |
| 146 out1h = vec_add(next1e, next1o); |
| 147 |
| 148 outh = vec_add(out0h, out1h); |
| 149 outh = vec_add(outh, pw_bias); |
| 150 outh = vec_sr(outh, pw_two); |
| 151 } else |
| 152 outh = vec_splat_u16(0); |
| 153 |
| 154 out = vec_pack(outl, outh); |
| 155 vec_st(out, 0, outptr); |
| 156 } |
| 157 } |
| 158 } |
OLD | NEW |