source/libvpx/vp9/common/vp9_convolve.c - Issue 592203002: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/vp9_convolve.c

Issue 592203002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 264 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
275 (void)filter_y; (void)filter_y_stride;	275 (void)filter_y; (void)filter_y_stride;

276	276

277 for (y = 0; y < h; ++y) {	277 for (y = 0; y < h; ++y) {

278 for (x = 0; x < w; ++x)	278 for (x = 0; x < w; ++x)

279 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);	279 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);

280	280

281 src += src_stride;	281 src += src_stride;

282 dst += dst_stride;	282 dst += dst_stride;

283 }	283 }

284 }	284 }

	285

	286 #if CONFIG_VP9_HIGHBITDEPTH

	287 static void high_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,

	288 uint8_t *dst8, ptrdiff_t dst_stride,

	289 const InterpKernel *x_filters,

	290 int x0_q4, int x_step_q4,

	291 int w, int h, int bd) {

	292 int x, y;

	293 uint16_t *src = CONVERT_TO_SHORTPTR(src8);

	294 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

	295 src -= SUBPEL_TAPS / 2 - 1;

	296 for (y = 0; y < h; ++y) {

	297 int x_q4 = x0_q4;

	298 for (x = 0; x < w; ++x) {

	299 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

	300 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

	301 int k, sum = 0;

	302 for (k = 0; k < SUBPEL_TAPS; ++k)

	303 sum += src_x[k] * x_filter[k];

	304 dst[x] = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);

	305 x_q4 += x_step_q4;

	306 }

	307 src += src_stride;

	308 dst += dst_stride;

	309 }

	310 }

	311

	312 static void high_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,

	313 uint8_t *dst8, ptrdiff_t dst_stride,

	314 const InterpKernel *x_filters,

	315 int x0_q4, int x_step_q4,

	316 int w, int h, int bd) {

	317 int x, y;

	318 uint16_t *src = CONVERT_TO_SHORTPTR(src8);

	319 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

	320 src -= SUBPEL_TAPS / 2 - 1;

	321 for (y = 0; y < h; ++y) {

	322 int x_q4 = x0_q4;

	323 for (x = 0; x < w; ++x) {

	324 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

	325 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

	326 int k, sum = 0;

	327 for (k = 0; k < SUBPEL_TAPS; ++k)

	328 sum += src_x[k] * x_filter[k];

	329 dst[x] = ROUND_POWER_OF_TWO(dst[x] +

	330 clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);

	331 x_q4 += x_step_q4;

	332 }

	333 src += src_stride;

	334 dst += dst_stride;

	335 }

	336 }

	337

	338 static void high_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,

	339 uint8_t *dst8, ptrdiff_t dst_stride,

	340 const InterpKernel *y_filters,

	341 int y0_q4, int y_step_q4, int w, int h,

	342 int bd) {

	343 int x, y;

	344 uint16_t *src = CONVERT_TO_SHORTPTR(src8);

	345 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

	346 src -= src_stride * (SUBPEL_TAPS / 2 - 1);

	347 for (x = 0; x < w; ++x) {

	348 int y_q4 = y0_q4;

	349 for (y = 0; y < h; ++y) {

	350 const uint16_t src_y = &src[(y_q4 >> SUBPEL_BITS) src_stride];

	351 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

	352 int k, sum = 0;

	353 for (k = 0; k < SUBPEL_TAPS; ++k)

	354 sum += src_y[k * src_stride] * y_filter[k];

	355 dst[y * dst_stride] = clip_pixel_high(

	356 ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);

	357 y_q4 += y_step_q4;

	358 }

	359 ++src;

	360 ++dst;

	361 }

	362 }

	363

	364 static void high_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,

	365 uint8_t *dst8, ptrdiff_t dst_stride,

	366 const InterpKernel *y_filters,

	367 int y0_q4, int y_step_q4, int w, int h,

	368 int bd) {

	369 int x, y;

	370 uint16_t *src = CONVERT_TO_SHORTPTR(src8);

	371 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

	372 src -= src_stride * (SUBPEL_TAPS / 2 - 1);

	373 for (x = 0; x < w; ++x) {

	374 int y_q4 = y0_q4;

	375 for (y = 0; y < h; ++y) {

	376 const uint16_t src_y = &src[(y_q4 >> SUBPEL_BITS) src_stride];

	377 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

	378 int k, sum = 0;

	379 for (k = 0; k < SUBPEL_TAPS; ++k)

	380 sum += src_y[k * src_stride] * y_filter[k];

	381 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +

	382 clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);

	383 y_q4 += y_step_q4;

	384 }

	385 ++src;

	386 ++dst;

	387 }

	388 }

	389

	390 static void high_convolve(const uint8_t *src, ptrdiff_t src_stride,

	391 uint8_t *dst, ptrdiff_t dst_stride,

	392 const InterpKernel *const x_filters,

	393 int x0_q4, int x_step_q4,

	394 const InterpKernel *const y_filters,

	395 int y0_q4, int y_step_q4,

	396 int w, int h, int bd) {

	397 // Note: Fixed size intermediate buffer, temp, places limits on parameters.

	398 // 2d filtering proceeds in 2 steps:

	399 // (1) Interpolate horizontally into an intermediate buffer, temp.

	400 // (2) Interpolate temp vertically to derive the sub-pixel result.

	401 // Deriving the maximum number of rows in the temp buffer (135):

	402 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).

	403 // --Largest block size is 64x64 pixels.

	404 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the

	405 // original frame (in 1/16th pixel units).

	406 // --Must round-up because block may be located at sub-pixel position.

	407 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

	408 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

	409 uint16_t temp[64 * 135];

	410 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS;

	411

	412 assert(w <= 64);

	413 assert(h <= 64);

	414 assert(y_step_q4 <= 32);

	415 assert(x_step_q4 <= 32);

	416

	417 if (intermediate_height < h)

	418 intermediate_height = h;

	419

	420 high_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1),

	421 src_stride, CONVERT_TO_BYTEPTR(temp), 64,

	422 x_filters, x0_q4, x_step_q4, w,

	423 intermediate_height, bd);

	424 high_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),

	425 64, dst, dst_stride, y_filters, y0_q4, y_step_q4,

	426 w, h, bd);

	427 }

	428

	429

	430 void vp9_high_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

	431 uint8_t *dst, ptrdiff_t dst_stride,

	432 const int16_t *filter_x, int x_step_q4,

	433 const int16_t *filter_y, int y_step_q4,

	434 int w, int h, int bd) {

	435 const InterpKernel *const filters_x = get_filter_base(filter_x);

	436 const int x0_q4 = get_filter_offset(filter_x, filters_x);

	437 (void)filter_y;

	438 (void)y_step_q4;

	439

	440 high_convolve_horiz(src, src_stride, dst, dst_stride, filters_x,

	441 x0_q4, x_step_q4, w, h, bd);

	442 }

	443

	444 void vp9_high_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

	445 uint8_t *dst, ptrdiff_t dst_stride,

	446 const int16_t *filter_x, int x_step_q4,

	447 const int16_t *filter_y, int y_step_q4,

	448 int w, int h, int bd) {

	449 const InterpKernel *const filters_x = get_filter_base(filter_x);

	450 const int x0_q4 = get_filter_offset(filter_x, filters_x);

	451 (void)filter_y;

	452 (void)y_step_q4;

	453

	454 high_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,

	455 x0_q4, x_step_q4, w, h, bd);

	456 }

	457

	458 void vp9_high_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,

	459 uint8_t *dst, ptrdiff_t dst_stride,

	460 const int16_t *filter_x, int x_step_q4,

	461 const int16_t *filter_y, int y_step_q4,

	462 int w, int h, int bd) {

	463 const InterpKernel *const filters_y = get_filter_base(filter_y);

	464 const int y0_q4 = get_filter_offset(filter_y, filters_y);

	465 (void)filter_x;

	466 (void)x_step_q4;

	467

	468 high_convolve_vert(src, src_stride, dst, dst_stride, filters_y,

	469 y0_q4, y_step_q4, w, h, bd);

	470 }

	471

	472 void vp9_high_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

	473 uint8_t *dst, ptrdiff_t dst_stride,

	474 const int16_t *filter_x, int x_step_q4,

	475 const int16_t *filter_y, int y_step_q4,

	476 int w, int h, int bd) {

	477 const InterpKernel *const filters_y = get_filter_base(filter_y);

	478 const int y0_q4 = get_filter_offset(filter_y, filters_y);

	479 (void)filter_x;

	480 (void)x_step_q4;

	481

	482 high_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,

	483 y0_q4, y_step_q4, w, h, bd);

	484 }

	485

	486 void vp9_high_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,

	487 uint8_t *dst, ptrdiff_t dst_stride,

	488 const int16_t *filter_x, int x_step_q4,

	489 const int16_t *filter_y, int y_step_q4,

	490 int w, int h, int bd) {

	491 const InterpKernel *const filters_x = get_filter_base(filter_x);

	492 const int x0_q4 = get_filter_offset(filter_x, filters_x);

	493

	494 const InterpKernel *const filters_y = get_filter_base(filter_y);

	495 const int y0_q4 = get_filter_offset(filter_y, filters_y);

	496

	497 high_convolve(src, src_stride, dst, dst_stride,

	498 filters_x, x0_q4, x_step_q4,

	499 filters_y, y0_q4, y_step_q4, w, h, bd);

	500 }

	501

	502 void vp9_high_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,

	503 uint8_t *dst, ptrdiff_t dst_stride,

	504 const int16_t *filter_x, int x_step_q4,

	505 const int16_t *filter_y, int y_step_q4,

	506 int w, int h, int bd) {

	507 // Fixed size intermediate buffer places limits on parameters.

	508 DECLARE_ALIGNED_ARRAY(16, uint16_t, temp, 64 * 64);

	509 assert(w <= 64);

	510 assert(h <= 64);

	511

	512 vp9_high_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,

	513 filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);

	514 vp9_high_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,

	515 NULL, 0, NULL, 0, w, h, bd);

	516 }

	517

	518 void vp9_high_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,

	519 uint8_t *dst8, ptrdiff_t dst_stride,

	520 const int16_t *filter_x, int filter_x_stride,

	521 const int16_t *filter_y, int filter_y_stride,

	522 int w, int h, int bd) {

	523 int r;

	524 uint16_t *src = CONVERT_TO_SHORTPTR(src8);

	525 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

	526 (void)filter_x;

	527 (void)filter_y;

	528 (void)filter_x_stride;

	529 (void)filter_y_stride;

	530 (void)bd;

	531

	532 for (r = h; r > 0; --r) {

	533 vpx_memcpy(dst, src, w * sizeof(uint16_t));

	534 src += src_stride;

	535 dst += dst_stride;

	536 }

	537 }

	538

	539 void vp9_high_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,

	540 uint8_t *dst8, ptrdiff_t dst_stride,

	541 const int16_t *filter_x, int filter_x_stride,

	542 const int16_t *filter_y, int filter_y_stride,

	543 int w, int h, int bd) {

	544 int x, y;

	545 uint16_t *src = CONVERT_TO_SHORTPTR(src8);

	546 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

	547 (void)filter_x;

	548 (void)filter_y;

	549 (void)filter_x_stride;

	550 (void)filter_y_stride;

	551 (void)bd;

	552

	553 for (y = 0; y < h; ++y) {

	554 for (x = 0; x < w; ++x) {

	555 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);

	556 }

	557 src += src_stride;

	558 dst += dst_stride;

	559 }

	560 }

	561 #endif

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/common/vp9_convolve.h ('k') | source/libvpx/vp9/common/vp9_debugmodes.c » ('j') | no next file with comments »