OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
143 // Modify each level's adjustment according to motion_magnitude. | 143 // Modify each level's adjustment according to motion_magnitude. |
144 const __m128i l3 = _mm_set1_epi8( | 144 const __m128i l3 = _mm_set1_epi8( |
145 (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6); | 145 (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6); |
146 // Difference between level 3 and level 2 is 2. | 146 // Difference between level 3 and level 2 is 2. |
147 const __m128i l32 = _mm_set1_epi8(2); | 147 const __m128i l32 = _mm_set1_epi8(2); |
148 // Difference between level 2 and level 1 is 1. | 148 // Difference between level 2 and level 1 is 1. |
149 const __m128i l21 = _mm_set1_epi8(1); | 149 const __m128i l21 = _mm_set1_epi8(1); |
150 const uint8_t shift = (width == 4) ? 2 : 1; | 150 const uint8_t shift = (width == 4) ? 2 : 1; |
151 | 151 |
152 for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) { | 152 for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) { |
153 vpx_memcpy(sig_buffer[r], sig, width); | 153 memcpy(sig_buffer[r], sig, width); |
154 vpx_memcpy(sig_buffer[r] + width, sig + sig_stride, width); | 154 memcpy(sig_buffer[r] + width, sig + sig_stride, width); |
155 vpx_memcpy(mc_running_buffer[r], mc_running_avg_y, width); | 155 memcpy(mc_running_buffer[r], mc_running_avg_y, width); |
156 vpx_memcpy(mc_running_buffer[r] + width, | 156 memcpy(mc_running_buffer[r] + width, |
157 mc_running_avg_y + mc_avg_y_stride, width); | 157 mc_running_avg_y + mc_avg_y_stride, width); |
158 vpx_memcpy(running_buffer[r], running_avg_y, width); | 158 memcpy(running_buffer[r], running_avg_y, width); |
159 vpx_memcpy(running_buffer[r] + width, | 159 memcpy(running_buffer[r] + width, running_avg_y + avg_y_stride, width); |
160 running_avg_y + avg_y_stride, width); | |
161 if (width == 4) { | 160 if (width == 4) { |
162 vpx_memcpy(sig_buffer[r] + width * 2, sig + sig_stride * 2, width); | 161 memcpy(sig_buffer[r] + width * 2, sig + sig_stride * 2, width); |
163 vpx_memcpy(sig_buffer[r] + width * 3, sig + sig_stride * 3, width); | 162 memcpy(sig_buffer[r] + width * 3, sig + sig_stride * 3, width); |
164 vpx_memcpy(mc_running_buffer[r] + width * 2, | 163 memcpy(mc_running_buffer[r] + width * 2, |
165 mc_running_avg_y + mc_avg_y_stride * 2, width); | 164 mc_running_avg_y + mc_avg_y_stride * 2, width); |
166 vpx_memcpy(mc_running_buffer[r] + width * 3, | 165 memcpy(mc_running_buffer[r] + width * 3, |
167 mc_running_avg_y + mc_avg_y_stride * 3, width); | 166 mc_running_avg_y + mc_avg_y_stride * 3, width); |
168 vpx_memcpy(running_buffer[r] + width * 2, | 167 memcpy(running_buffer[r] + width * 2, |
169 running_avg_y + avg_y_stride * 2, width); | 168 running_avg_y + avg_y_stride * 2, width); |
170 vpx_memcpy(running_buffer[r] + width * 3, | 169 memcpy(running_buffer[r] + width * 3, |
171 running_avg_y + avg_y_stride * 3, width); | 170 running_avg_y + avg_y_stride * 3, width); |
172 } | 171 } |
173 acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r], | 172 acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r], |
174 mc_running_buffer[r], | 173 mc_running_buffer[r], |
175 running_buffer[r], | 174 running_buffer[r], |
176 &k_0, &k_4, &k_8, &k_16, | 175 &k_0, &k_4, &k_8, &k_16, |
177 &l3, &l32, &l21, acc_diff); | 176 &l3, &l32, &l21, acc_diff); |
178 vpx_memcpy(running_avg_y, running_buffer[r], width); | 177 memcpy(running_avg_y, running_buffer[r], width); |
179 vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width); | 178 memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width); |
180 if (width == 4) { | 179 if (width == 4) { |
181 vpx_memcpy(running_avg_y + avg_y_stride * 2, | 180 memcpy(running_avg_y + avg_y_stride * 2, |
182 running_buffer[r] + width * 2, width); | 181 running_buffer[r] + width * 2, width); |
183 vpx_memcpy(running_avg_y + avg_y_stride * 3, | 182 memcpy(running_avg_y + avg_y_stride * 3, |
184 running_buffer[r] + width * 3, width); | 183 running_buffer[r] + width * 3, width); |
185 } | 184 } |
186 // Update pointers for next iteration. | 185 // Update pointers for next iteration. |
187 sig += (sig_stride << shift); | 186 sig += (sig_stride << shift); |
188 mc_running_avg_y += (mc_avg_y_stride << shift); | 187 mc_running_avg_y += (mc_avg_y_stride << shift); |
189 running_avg_y += (avg_y_stride << shift); | 188 running_avg_y += (avg_y_stride << shift); |
190 } | 189 } |
191 | 190 |
192 { | 191 { |
193 sum_diff = sum_diff_16x1(acc_diff); | 192 sum_diff = sum_diff_16x1(acc_diff); |
194 sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising); | 193 sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising); |
(...skipping 11 matching lines...) Expand all Loading... |
206 const int delta = ((abs(sum_diff) - sum_diff_thresh) >> | 205 const int delta = ((abs(sum_diff) - sum_diff_thresh) >> |
207 num_pels_log2_lookup[bs]) + 1; | 206 num_pels_log2_lookup[bs]) + 1; |
208 // Only apply the adjustment for max delta up to 3. | 207 // Only apply the adjustment for max delta up to 3. |
209 if (delta < 4) { | 208 if (delta < 4) { |
210 const __m128i k_delta = _mm_set1_epi8(delta); | 209 const __m128i k_delta = _mm_set1_epi8(delta); |
211 running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]); | 210 running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]); |
212 for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) { | 211 for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) { |
213 acc_diff = vp9_denoiser_adj_16x1_sse2( | 212 acc_diff = vp9_denoiser_adj_16x1_sse2( |
214 sig_buffer[r], mc_running_buffer[r], running_buffer[r], | 213 sig_buffer[r], mc_running_buffer[r], running_buffer[r], |
215 k_0, k_delta, acc_diff); | 214 k_0, k_delta, acc_diff); |
216 vpx_memcpy(running_avg_y, running_buffer[r], width); | 215 memcpy(running_avg_y, running_buffer[r], width); |
217 vpx_memcpy(running_avg_y + avg_y_stride, | 216 memcpy(running_avg_y + avg_y_stride, |
218 running_buffer[r] + width, width); | 217 running_buffer[r] + width, width); |
219 if (width == 4) { | 218 if (width == 4) { |
220 vpx_memcpy(running_avg_y + avg_y_stride * 2, | 219 memcpy(running_avg_y + avg_y_stride * 2, |
221 running_buffer[r] + width * 2, width); | 220 running_buffer[r] + width * 2, width); |
222 vpx_memcpy(running_avg_y + avg_y_stride * 3, | 221 memcpy(running_avg_y + avg_y_stride * 3, |
223 running_buffer[r] + width * 3, width); | 222 running_buffer[r] + width * 3, width); |
224 } | 223 } |
225 // Update pointers for next iteration. | 224 // Update pointers for next iteration. |
226 running_avg_y += (avg_y_stride << shift); | 225 running_avg_y += (avg_y_stride << shift); |
227 } | 226 } |
228 sum_diff = sum_diff_16x1(acc_diff); | 227 sum_diff = sum_diff_16x1(acc_diff); |
229 if (abs(sum_diff) > sum_diff_thresh) { | 228 if (abs(sum_diff) > sum_diff_thresh) { |
230 return COPY_BLOCK; | 229 return COPY_BLOCK; |
231 } | 230 } |
232 } else { | 231 } else { |
233 return COPY_BLOCK; | 232 return COPY_BLOCK; |
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
367 bs == BLOCK_64X32 || bs == BLOCK_64X64) { | 366 bs == BLOCK_64X32 || bs == BLOCK_64X64) { |
368 return vp9_denoiser_NxM_sse2_big(sig, sig_stride, | 367 return vp9_denoiser_NxM_sse2_big(sig, sig_stride, |
369 mc_avg, mc_avg_stride, | 368 mc_avg, mc_avg_stride, |
370 avg, avg_stride, | 369 avg, avg_stride, |
371 increase_denoising, | 370 increase_denoising, |
372 bs, motion_magnitude); | 371 bs, motion_magnitude); |
373 } else { | 372 } else { |
374 return COPY_BLOCK; | 373 return COPY_BLOCK; |
375 } | 374 } |
376 } | 375 } |
OLD | NEW |