Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(497)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
143 // Modify each level's adjustment according to motion_magnitude. 143 // Modify each level's adjustment according to motion_magnitude.
144 const __m128i l3 = _mm_set1_epi8( 144 const __m128i l3 = _mm_set1_epi8(
145 (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6); 145 (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6);
146 // Difference between level 3 and level 2 is 2. 146 // Difference between level 3 and level 2 is 2.
147 const __m128i l32 = _mm_set1_epi8(2); 147 const __m128i l32 = _mm_set1_epi8(2);
148 // Difference between level 2 and level 1 is 1. 148 // Difference between level 2 and level 1 is 1.
149 const __m128i l21 = _mm_set1_epi8(1); 149 const __m128i l21 = _mm_set1_epi8(1);
150 const uint8_t shift = (width == 4) ? 2 : 1; 150 const uint8_t shift = (width == 4) ? 2 : 1;
151 151
152 for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) { 152 for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) {
153 vpx_memcpy(sig_buffer[r], sig, width); 153 memcpy(sig_buffer[r], sig, width);
154 vpx_memcpy(sig_buffer[r] + width, sig + sig_stride, width); 154 memcpy(sig_buffer[r] + width, sig + sig_stride, width);
155 vpx_memcpy(mc_running_buffer[r], mc_running_avg_y, width); 155 memcpy(mc_running_buffer[r], mc_running_avg_y, width);
156 vpx_memcpy(mc_running_buffer[r] + width, 156 memcpy(mc_running_buffer[r] + width,
157 mc_running_avg_y + mc_avg_y_stride, width); 157 mc_running_avg_y + mc_avg_y_stride, width);
158 vpx_memcpy(running_buffer[r], running_avg_y, width); 158 memcpy(running_buffer[r], running_avg_y, width);
159 vpx_memcpy(running_buffer[r] + width, 159 memcpy(running_buffer[r] + width, running_avg_y + avg_y_stride, width);
160 running_avg_y + avg_y_stride, width);
161 if (width == 4) { 160 if (width == 4) {
162 vpx_memcpy(sig_buffer[r] + width * 2, sig + sig_stride * 2, width); 161 memcpy(sig_buffer[r] + width * 2, sig + sig_stride * 2, width);
163 vpx_memcpy(sig_buffer[r] + width * 3, sig + sig_stride * 3, width); 162 memcpy(sig_buffer[r] + width * 3, sig + sig_stride * 3, width);
164 vpx_memcpy(mc_running_buffer[r] + width * 2, 163 memcpy(mc_running_buffer[r] + width * 2,
165 mc_running_avg_y + mc_avg_y_stride * 2, width); 164 mc_running_avg_y + mc_avg_y_stride * 2, width);
166 vpx_memcpy(mc_running_buffer[r] + width * 3, 165 memcpy(mc_running_buffer[r] + width * 3,
167 mc_running_avg_y + mc_avg_y_stride * 3, width); 166 mc_running_avg_y + mc_avg_y_stride * 3, width);
168 vpx_memcpy(running_buffer[r] + width * 2, 167 memcpy(running_buffer[r] + width * 2,
169 running_avg_y + avg_y_stride * 2, width); 168 running_avg_y + avg_y_stride * 2, width);
170 vpx_memcpy(running_buffer[r] + width * 3, 169 memcpy(running_buffer[r] + width * 3,
171 running_avg_y + avg_y_stride * 3, width); 170 running_avg_y + avg_y_stride * 3, width);
172 } 171 }
173 acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r], 172 acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r],
174 mc_running_buffer[r], 173 mc_running_buffer[r],
175 running_buffer[r], 174 running_buffer[r],
176 &k_0, &k_4, &k_8, &k_16, 175 &k_0, &k_4, &k_8, &k_16,
177 &l3, &l32, &l21, acc_diff); 176 &l3, &l32, &l21, acc_diff);
178 vpx_memcpy(running_avg_y, running_buffer[r], width); 177 memcpy(running_avg_y, running_buffer[r], width);
179 vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width); 178 memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width);
180 if (width == 4) { 179 if (width == 4) {
181 vpx_memcpy(running_avg_y + avg_y_stride * 2, 180 memcpy(running_avg_y + avg_y_stride * 2,
182 running_buffer[r] + width * 2, width); 181 running_buffer[r] + width * 2, width);
183 vpx_memcpy(running_avg_y + avg_y_stride * 3, 182 memcpy(running_avg_y + avg_y_stride * 3,
184 running_buffer[r] + width * 3, width); 183 running_buffer[r] + width * 3, width);
185 } 184 }
186 // Update pointers for next iteration. 185 // Update pointers for next iteration.
187 sig += (sig_stride << shift); 186 sig += (sig_stride << shift);
188 mc_running_avg_y += (mc_avg_y_stride << shift); 187 mc_running_avg_y += (mc_avg_y_stride << shift);
189 running_avg_y += (avg_y_stride << shift); 188 running_avg_y += (avg_y_stride << shift);
190 } 189 }
191 190
192 { 191 {
193 sum_diff = sum_diff_16x1(acc_diff); 192 sum_diff = sum_diff_16x1(acc_diff);
194 sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising); 193 sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising);
(...skipping 11 matching lines...) Expand all
206 const int delta = ((abs(sum_diff) - sum_diff_thresh) >> 205 const int delta = ((abs(sum_diff) - sum_diff_thresh) >>
207 num_pels_log2_lookup[bs]) + 1; 206 num_pels_log2_lookup[bs]) + 1;
208 // Only apply the adjustment for max delta up to 3. 207 // Only apply the adjustment for max delta up to 3.
209 if (delta < 4) { 208 if (delta < 4) {
210 const __m128i k_delta = _mm_set1_epi8(delta); 209 const __m128i k_delta = _mm_set1_epi8(delta);
211 running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]); 210 running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]);
212 for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) { 211 for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) {
213 acc_diff = vp9_denoiser_adj_16x1_sse2( 212 acc_diff = vp9_denoiser_adj_16x1_sse2(
214 sig_buffer[r], mc_running_buffer[r], running_buffer[r], 213 sig_buffer[r], mc_running_buffer[r], running_buffer[r],
215 k_0, k_delta, acc_diff); 214 k_0, k_delta, acc_diff);
216 vpx_memcpy(running_avg_y, running_buffer[r], width); 215 memcpy(running_avg_y, running_buffer[r], width);
217 vpx_memcpy(running_avg_y + avg_y_stride, 216 memcpy(running_avg_y + avg_y_stride,
218 running_buffer[r] + width, width); 217 running_buffer[r] + width, width);
219 if (width == 4) { 218 if (width == 4) {
220 vpx_memcpy(running_avg_y + avg_y_stride * 2, 219 memcpy(running_avg_y + avg_y_stride * 2,
221 running_buffer[r] + width * 2, width); 220 running_buffer[r] + width * 2, width);
222 vpx_memcpy(running_avg_y + avg_y_stride * 3, 221 memcpy(running_avg_y + avg_y_stride * 3,
223 running_buffer[r] + width * 3, width); 222 running_buffer[r] + width * 3, width);
224 } 223 }
225 // Update pointers for next iteration. 224 // Update pointers for next iteration.
226 running_avg_y += (avg_y_stride << shift); 225 running_avg_y += (avg_y_stride << shift);
227 } 226 }
228 sum_diff = sum_diff_16x1(acc_diff); 227 sum_diff = sum_diff_16x1(acc_diff);
229 if (abs(sum_diff) > sum_diff_thresh) { 228 if (abs(sum_diff) > sum_diff_thresh) {
230 return COPY_BLOCK; 229 return COPY_BLOCK;
231 } 230 }
232 } else { 231 } else {
233 return COPY_BLOCK; 232 return COPY_BLOCK;
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after
367 bs == BLOCK_64X32 || bs == BLOCK_64X64) { 366 bs == BLOCK_64X32 || bs == BLOCK_64X64) {
368 return vp9_denoiser_NxM_sse2_big(sig, sig_stride, 367 return vp9_denoiser_NxM_sse2_big(sig, sig_stride,
369 mc_avg, mc_avg_stride, 368 mc_avg, mc_avg_stride,
370 avg, avg_stride, 369 avg, avg_stride,
371 increase_denoising, 370 increase_denoising,
372 bs, motion_magnitude); 371 bs, motion_magnitude);
373 } else { 372 } else {
374 return COPY_BLOCK; 373 return COPY_BLOCK;
375 } 374 }
376 } 375 }
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm ('k') | source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698