Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(102)

Side by Side Diff: source/libvpx/vp9/common/vp9_convolve.c

Issue 592203002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/libvpx/vp9/common/vp9_convolve.h ('k') | source/libvpx/vp9/common/vp9_debugmodes.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 264 matching lines...) Expand 10 before | Expand all | Expand 10 after
275 (void)filter_y; (void)filter_y_stride; 275 (void)filter_y; (void)filter_y_stride;
276 276
277 for (y = 0; y < h; ++y) { 277 for (y = 0; y < h; ++y) {
278 for (x = 0; x < w; ++x) 278 for (x = 0; x < w; ++x)
279 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); 279 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
280 280
281 src += src_stride; 281 src += src_stride;
282 dst += dst_stride; 282 dst += dst_stride;
283 } 283 }
284 } 284 }
285
286 #if CONFIG_VP9_HIGHBITDEPTH
287 static void high_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
288 uint8_t *dst8, ptrdiff_t dst_stride,
289 const InterpKernel *x_filters,
290 int x0_q4, int x_step_q4,
291 int w, int h, int bd) {
292 int x, y;
293 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
294 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
295 src -= SUBPEL_TAPS / 2 - 1;
296 for (y = 0; y < h; ++y) {
297 int x_q4 = x0_q4;
298 for (x = 0; x < w; ++x) {
299 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
300 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
301 int k, sum = 0;
302 for (k = 0; k < SUBPEL_TAPS; ++k)
303 sum += src_x[k] * x_filter[k];
304 dst[x] = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
305 x_q4 += x_step_q4;
306 }
307 src += src_stride;
308 dst += dst_stride;
309 }
310 }
311
312 static void high_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
313 uint8_t *dst8, ptrdiff_t dst_stride,
314 const InterpKernel *x_filters,
315 int x0_q4, int x_step_q4,
316 int w, int h, int bd) {
317 int x, y;
318 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
319 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
320 src -= SUBPEL_TAPS / 2 - 1;
321 for (y = 0; y < h; ++y) {
322 int x_q4 = x0_q4;
323 for (x = 0; x < w; ++x) {
324 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
325 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
326 int k, sum = 0;
327 for (k = 0; k < SUBPEL_TAPS; ++k)
328 sum += src_x[k] * x_filter[k];
329 dst[x] = ROUND_POWER_OF_TWO(dst[x] +
330 clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
331 x_q4 += x_step_q4;
332 }
333 src += src_stride;
334 dst += dst_stride;
335 }
336 }
337
338 static void high_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
339 uint8_t *dst8, ptrdiff_t dst_stride,
340 const InterpKernel *y_filters,
341 int y0_q4, int y_step_q4, int w, int h,
342 int bd) {
343 int x, y;
344 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
345 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
346 src -= src_stride * (SUBPEL_TAPS / 2 - 1);
347 for (x = 0; x < w; ++x) {
348 int y_q4 = y0_q4;
349 for (y = 0; y < h; ++y) {
350 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
351 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
352 int k, sum = 0;
353 for (k = 0; k < SUBPEL_TAPS; ++k)
354 sum += src_y[k * src_stride] * y_filter[k];
355 dst[y * dst_stride] = clip_pixel_high(
356 ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
357 y_q4 += y_step_q4;
358 }
359 ++src;
360 ++dst;
361 }
362 }
363
364 static void high_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
365 uint8_t *dst8, ptrdiff_t dst_stride,
366 const InterpKernel *y_filters,
367 int y0_q4, int y_step_q4, int w, int h,
368 int bd) {
369 int x, y;
370 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
371 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
372 src -= src_stride * (SUBPEL_TAPS / 2 - 1);
373 for (x = 0; x < w; ++x) {
374 int y_q4 = y0_q4;
375 for (y = 0; y < h; ++y) {
376 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
377 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
378 int k, sum = 0;
379 for (k = 0; k < SUBPEL_TAPS; ++k)
380 sum += src_y[k * src_stride] * y_filter[k];
381 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
382 clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
383 y_q4 += y_step_q4;
384 }
385 ++src;
386 ++dst;
387 }
388 }
389
390 static void high_convolve(const uint8_t *src, ptrdiff_t src_stride,
391 uint8_t *dst, ptrdiff_t dst_stride,
392 const InterpKernel *const x_filters,
393 int x0_q4, int x_step_q4,
394 const InterpKernel *const y_filters,
395 int y0_q4, int y_step_q4,
396 int w, int h, int bd) {
397 // Note: Fixed size intermediate buffer, temp, places limits on parameters.
398 // 2d filtering proceeds in 2 steps:
399 // (1) Interpolate horizontally into an intermediate buffer, temp.
400 // (2) Interpolate temp vertically to derive the sub-pixel result.
401 // Deriving the maximum number of rows in the temp buffer (135):
402 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
403 // --Largest block size is 64x64 pixels.
404 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
405 // original frame (in 1/16th pixel units).
406 // --Must round-up because block may be located at sub-pixel position.
407 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
408 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
409 uint16_t temp[64 * 135];
410 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS;
411
412 assert(w <= 64);
413 assert(h <= 64);
414 assert(y_step_q4 <= 32);
415 assert(x_step_q4 <= 32);
416
417 if (intermediate_height < h)
418 intermediate_height = h;
419
420 high_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1),
421 src_stride, CONVERT_TO_BYTEPTR(temp), 64,
422 x_filters, x0_q4, x_step_q4, w,
423 intermediate_height, bd);
424 high_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),
425 64, dst, dst_stride, y_filters, y0_q4, y_step_q4,
426 w, h, bd);
427 }
428
429
430 void vp9_high_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
431 uint8_t *dst, ptrdiff_t dst_stride,
432 const int16_t *filter_x, int x_step_q4,
433 const int16_t *filter_y, int y_step_q4,
434 int w, int h, int bd) {
435 const InterpKernel *const filters_x = get_filter_base(filter_x);
436 const int x0_q4 = get_filter_offset(filter_x, filters_x);
437 (void)filter_y;
438 (void)y_step_q4;
439
440 high_convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
441 x0_q4, x_step_q4, w, h, bd);
442 }
443
444 void vp9_high_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
445 uint8_t *dst, ptrdiff_t dst_stride,
446 const int16_t *filter_x, int x_step_q4,
447 const int16_t *filter_y, int y_step_q4,
448 int w, int h, int bd) {
449 const InterpKernel *const filters_x = get_filter_base(filter_x);
450 const int x0_q4 = get_filter_offset(filter_x, filters_x);
451 (void)filter_y;
452 (void)y_step_q4;
453
454 high_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
455 x0_q4, x_step_q4, w, h, bd);
456 }
457
458 void vp9_high_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
459 uint8_t *dst, ptrdiff_t dst_stride,
460 const int16_t *filter_x, int x_step_q4,
461 const int16_t *filter_y, int y_step_q4,
462 int w, int h, int bd) {
463 const InterpKernel *const filters_y = get_filter_base(filter_y);
464 const int y0_q4 = get_filter_offset(filter_y, filters_y);
465 (void)filter_x;
466 (void)x_step_q4;
467
468 high_convolve_vert(src, src_stride, dst, dst_stride, filters_y,
469 y0_q4, y_step_q4, w, h, bd);
470 }
471
472 void vp9_high_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
473 uint8_t *dst, ptrdiff_t dst_stride,
474 const int16_t *filter_x, int x_step_q4,
475 const int16_t *filter_y, int y_step_q4,
476 int w, int h, int bd) {
477 const InterpKernel *const filters_y = get_filter_base(filter_y);
478 const int y0_q4 = get_filter_offset(filter_y, filters_y);
479 (void)filter_x;
480 (void)x_step_q4;
481
482 high_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
483 y0_q4, y_step_q4, w, h, bd);
484 }
485
486 void vp9_high_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
487 uint8_t *dst, ptrdiff_t dst_stride,
488 const int16_t *filter_x, int x_step_q4,
489 const int16_t *filter_y, int y_step_q4,
490 int w, int h, int bd) {
491 const InterpKernel *const filters_x = get_filter_base(filter_x);
492 const int x0_q4 = get_filter_offset(filter_x, filters_x);
493
494 const InterpKernel *const filters_y = get_filter_base(filter_y);
495 const int y0_q4 = get_filter_offset(filter_y, filters_y);
496
497 high_convolve(src, src_stride, dst, dst_stride,
498 filters_x, x0_q4, x_step_q4,
499 filters_y, y0_q4, y_step_q4, w, h, bd);
500 }
501
502 void vp9_high_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
503 uint8_t *dst, ptrdiff_t dst_stride,
504 const int16_t *filter_x, int x_step_q4,
505 const int16_t *filter_y, int y_step_q4,
506 int w, int h, int bd) {
507 // Fixed size intermediate buffer places limits on parameters.
508 DECLARE_ALIGNED_ARRAY(16, uint16_t, temp, 64 * 64);
509 assert(w <= 64);
510 assert(h <= 64);
511
512 vp9_high_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
513 filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
514 vp9_high_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,
515 NULL, 0, NULL, 0, w, h, bd);
516 }
517
518 void vp9_high_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
519 uint8_t *dst8, ptrdiff_t dst_stride,
520 const int16_t *filter_x, int filter_x_stride,
521 const int16_t *filter_y, int filter_y_stride,
522 int w, int h, int bd) {
523 int r;
524 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
525 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
526 (void)filter_x;
527 (void)filter_y;
528 (void)filter_x_stride;
529 (void)filter_y_stride;
530 (void)bd;
531
532 for (r = h; r > 0; --r) {
533 vpx_memcpy(dst, src, w * sizeof(uint16_t));
534 src += src_stride;
535 dst += dst_stride;
536 }
537 }
538
539 void vp9_high_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
540 uint8_t *dst8, ptrdiff_t dst_stride,
541 const int16_t *filter_x, int filter_x_stride,
542 const int16_t *filter_y, int filter_y_stride,
543 int w, int h, int bd) {
544 int x, y;
545 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
546 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
547 (void)filter_x;
548 (void)filter_y;
549 (void)filter_x_stride;
550 (void)filter_y_stride;
551 (void)bd;
552
553 for (y = 0; y < h; ++y) {
554 for (x = 0; x < w; ++x) {
555 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
556 }
557 src += src_stride;
558 dst += dst_stride;
559 }
560 }
561 #endif
OLDNEW
« no previous file with comments | « source/libvpx/vp9/common/vp9_convolve.h ('k') | source/libvpx/vp9/common/vp9_debugmodes.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698