OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
108 in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16)); | 108 in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16)); |
109 } | 109 } |
110 | 110 |
111 #define RECON_AND_STORE(dest, in_x) \ | 111 #define RECON_AND_STORE(dest, in_x) \ |
112 { \ | 112 { \ |
113 __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \ | 113 __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \ |
114 d0 = _mm_unpacklo_epi8(d0, zero); \ | 114 d0 = _mm_unpacklo_epi8(d0, zero); \ |
115 d0 = _mm_add_epi16(in_x, d0); \ | 115 d0 = _mm_add_epi16(in_x, d0); \ |
116 d0 = _mm_packus_epi16(d0, d0); \ | 116 d0 = _mm_packus_epi16(d0, d0); \ |
117 _mm_storel_epi64((__m128i *)(dest), d0); \ | 117 _mm_storel_epi64((__m128i *)(dest), d0); \ |
118 dest += stride; \ | |
119 } | 118 } |
120 | 119 |
121 static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) { | 120 static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) { |
122 const __m128i final_rounding = _mm_set1_epi16(1<<5); | 121 const __m128i final_rounding = _mm_set1_epi16(1<<5); |
123 const __m128i zero = _mm_setzero_si128(); | 122 const __m128i zero = _mm_setzero_si128(); |
124 // Final rounding and shift | 123 // Final rounding and shift |
125 in[0] = _mm_adds_epi16(in[0], final_rounding); | 124 in[0] = _mm_adds_epi16(in[0], final_rounding); |
126 in[1] = _mm_adds_epi16(in[1], final_rounding); | 125 in[1] = _mm_adds_epi16(in[1], final_rounding); |
127 in[2] = _mm_adds_epi16(in[2], final_rounding); | 126 in[2] = _mm_adds_epi16(in[2], final_rounding); |
128 in[3] = _mm_adds_epi16(in[3], final_rounding); | 127 in[3] = _mm_adds_epi16(in[3], final_rounding); |
(...skipping 20 matching lines...) Expand all Loading... |
149 in[7] = _mm_srai_epi16(in[7], 6); | 148 in[7] = _mm_srai_epi16(in[7], 6); |
150 in[8] = _mm_srai_epi16(in[8], 6); | 149 in[8] = _mm_srai_epi16(in[8], 6); |
151 in[9] = _mm_srai_epi16(in[9], 6); | 150 in[9] = _mm_srai_epi16(in[9], 6); |
152 in[10] = _mm_srai_epi16(in[10], 6); | 151 in[10] = _mm_srai_epi16(in[10], 6); |
153 in[11] = _mm_srai_epi16(in[11], 6); | 152 in[11] = _mm_srai_epi16(in[11], 6); |
154 in[12] = _mm_srai_epi16(in[12], 6); | 153 in[12] = _mm_srai_epi16(in[12], 6); |
155 in[13] = _mm_srai_epi16(in[13], 6); | 154 in[13] = _mm_srai_epi16(in[13], 6); |
156 in[14] = _mm_srai_epi16(in[14], 6); | 155 in[14] = _mm_srai_epi16(in[14], 6); |
157 in[15] = _mm_srai_epi16(in[15], 6); | 156 in[15] = _mm_srai_epi16(in[15], 6); |
158 | 157 |
159 RECON_AND_STORE(dest, in[0]); | 158 RECON_AND_STORE(dest + 0 * stride, in[0]); |
160 RECON_AND_STORE(dest, in[1]); | 159 RECON_AND_STORE(dest + 1 * stride, in[1]); |
161 RECON_AND_STORE(dest, in[2]); | 160 RECON_AND_STORE(dest + 2 * stride, in[2]); |
162 RECON_AND_STORE(dest, in[3]); | 161 RECON_AND_STORE(dest + 3 * stride, in[3]); |
163 RECON_AND_STORE(dest, in[4]); | 162 RECON_AND_STORE(dest + 4 * stride, in[4]); |
164 RECON_AND_STORE(dest, in[5]); | 163 RECON_AND_STORE(dest + 5 * stride, in[5]); |
165 RECON_AND_STORE(dest, in[6]); | 164 RECON_AND_STORE(dest + 6 * stride, in[6]); |
166 RECON_AND_STORE(dest, in[7]); | 165 RECON_AND_STORE(dest + 7 * stride, in[7]); |
167 RECON_AND_STORE(dest, in[8]); | 166 RECON_AND_STORE(dest + 8 * stride, in[8]); |
168 RECON_AND_STORE(dest, in[9]); | 167 RECON_AND_STORE(dest + 9 * stride, in[9]); |
169 RECON_AND_STORE(dest, in[10]); | 168 RECON_AND_STORE(dest + 10 * stride, in[10]); |
170 RECON_AND_STORE(dest, in[11]); | 169 RECON_AND_STORE(dest + 11 * stride, in[11]); |
171 RECON_AND_STORE(dest, in[12]); | 170 RECON_AND_STORE(dest + 12 * stride, in[12]); |
172 RECON_AND_STORE(dest, in[13]); | 171 RECON_AND_STORE(dest + 13 * stride, in[13]); |
173 RECON_AND_STORE(dest, in[14]); | 172 RECON_AND_STORE(dest + 14 * stride, in[14]); |
174 RECON_AND_STORE(dest, in[15]); | 173 RECON_AND_STORE(dest + 15 * stride, in[15]); |
175 } | 174 } |
OLD | NEW |