source/libvpx/vp9/common/vp9_convolve.c - Issue 668403002: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/vp9_convolve.c

Issue 668403002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 112 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
123 // (2) Interpolate temp vertically to derive the sub-pixel result.	123 // (2) Interpolate temp vertically to derive the sub-pixel result.

124 // Deriving the maximum number of rows in the temp buffer (135):	124 // Deriving the maximum number of rows in the temp buffer (135):

125 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).	125 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).

126 // --Largest block size is 64x64 pixels.	126 // --Largest block size is 64x64 pixels.

127 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the	127 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the

128 // original frame (in 1/16th pixel units).	128 // original frame (in 1/16th pixel units).

129 // --Must round-up because block may be located at sub-pixel position.	129 // --Must round-up because block may be located at sub-pixel position.

130 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.	130 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

131 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.	131 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

132 uint8_t temp[135 * 64];	132 uint8_t temp[135 * 64];

133 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS;	133 int intermediate_height =

	134 (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

134	135

135 assert(w <= 64);	136 assert(w <= 64);

136 assert(h <= 64);	137 assert(h <= 64);

137 assert(y_step_q4 <= 32);	138 assert(y_step_q4 <= 32);

138 assert(x_step_q4 <= 32);	139 assert(x_step_q4 <= 32);

139	140

140 if (intermediate_height < h)

141 intermediate_height = h;

142

143 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,	141 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,

144 x_filters, x0_q4, x_step_q4, w, intermediate_height);	142 x_filters, x0_q4, x_step_q4, w, intermediate_height);

145 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,	143 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,

146 y_filters, y0_q4, y_step_q4, w, h);	144 y_filters, y0_q4, y_step_q4, w, h);

147 }	145 }

148	146

149 static const InterpKernel get_filter_base(const int16_t filter) {	147 static const InterpKernel get_filter_base(const int16_t filter) {

150 // NOTE: This assumes that the filter table is 256-byte aligned.	148 // NOTE: This assumes that the filter table is 256-byte aligned.

151 // TODO(agrange) Modify to make independent of table alignment.	149 // TODO(agrange) Modify to make independent of table alignment.

152 return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));	150 return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));

(...skipping 124 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
277 for (y = 0; y < h; ++y) {	275 for (y = 0; y < h; ++y) {

278 for (x = 0; x < w; ++x)	276 for (x = 0; x < w; ++x)

279 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);	277 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);

280	278

281 src += src_stride;	279 src += src_stride;

282 dst += dst_stride;	280 dst += dst_stride;

283 }	281 }

284 }	282 }

285	283

286 #if CONFIG_VP9_HIGHBITDEPTH	284 #if CONFIG_VP9_HIGHBITDEPTH

287 static void high_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,	285 static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,

288 uint8_t *dst8, ptrdiff_t dst_stride,	286 uint8_t *dst8, ptrdiff_t dst_stride,

289 const InterpKernel *x_filters,	287 const InterpKernel *x_filters,

290 int x0_q4, int x_step_q4,	288 int x0_q4, int x_step_q4,

291 int w, int h, int bd) {	289 int w, int h, int bd) {

292 int x, y;	290 int x, y;

293 uint16_t *src = CONVERT_TO_SHORTPTR(src8);	291 uint16_t *src = CONVERT_TO_SHORTPTR(src8);

294 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);	292 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

295 src -= SUBPEL_TAPS / 2 - 1;	293 src -= SUBPEL_TAPS / 2 - 1;

296 for (y = 0; y < h; ++y) {	294 for (y = 0; y < h; ++y) {

297 int x_q4 = x0_q4;	295 int x_q4 = x0_q4;

298 for (x = 0; x < w; ++x) {	296 for (x = 0; x < w; ++x) {

299 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];	297 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

300 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];	298 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

301 int k, sum = 0;	299 int k, sum = 0;

302 for (k = 0; k < SUBPEL_TAPS; ++k)	300 for (k = 0; k < SUBPEL_TAPS; ++k)

303 sum += src_x[k] * x_filter[k];	301 sum += src_x[k] * x_filter[k];

304 dst[x] = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);	302 dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);

305 x_q4 += x_step_q4;	303 x_q4 += x_step_q4;

306 }	304 }

307 src += src_stride;	305 src += src_stride;

308 dst += dst_stride;	306 dst += dst_stride;

309 }	307 }

310 }	308 }

311	309

312 static void high_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,	310 static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,

313 uint8_t *dst8, ptrdiff_t dst_stride,	311 uint8_t *dst8, ptrdiff_t dst_stride,

314 const InterpKernel *x_filters,	312 const InterpKernel *x_filters,

315 int x0_q4, int x_step_q4,	313 int x0_q4, int x_step_q4,

316 int w, int h, int bd) {	314 int w, int h, int bd) {

317 int x, y;	315 int x, y;

318 uint16_t *src = CONVERT_TO_SHORTPTR(src8);	316 uint16_t *src = CONVERT_TO_SHORTPTR(src8);

319 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);	317 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

320 src -= SUBPEL_TAPS / 2 - 1;	318 src -= SUBPEL_TAPS / 2 - 1;

321 for (y = 0; y < h; ++y) {	319 for (y = 0; y < h; ++y) {

322 int x_q4 = x0_q4;	320 int x_q4 = x0_q4;

323 for (x = 0; x < w; ++x) {	321 for (x = 0; x < w; ++x) {

324 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];	322 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

325 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];	323 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

326 int k, sum = 0;	324 int k, sum = 0;

327 for (k = 0; k < SUBPEL_TAPS; ++k)	325 for (k = 0; k < SUBPEL_TAPS; ++k)

328 sum += src_x[k] * x_filter[k];	326 sum += src_x[k] * x_filter[k];

329 dst[x] = ROUND_POWER_OF_TWO(dst[x] +	327 dst[x] = ROUND_POWER_OF_TWO(dst[x] +

330 clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);	328 clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);

331 x_q4 += x_step_q4;	329 x_q4 += x_step_q4;

332 }	330 }

333 src += src_stride;	331 src += src_stride;

334 dst += dst_stride;	332 dst += dst_stride;

335 }	333 }

336 }	334 }

337	335

338 static void high_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,	336 static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,

339 uint8_t *dst8, ptrdiff_t dst_stride,	337 uint8_t *dst8, ptrdiff_t dst_stride,

340 const InterpKernel *y_filters,	338 const InterpKernel *y_filters,

341 int y0_q4, int y_step_q4, int w, int h,	339 int y0_q4, int y_step_q4, int w, int h,

342 int bd) {	340 int bd) {

343 int x, y;	341 int x, y;

344 uint16_t *src = CONVERT_TO_SHORTPTR(src8);	342 uint16_t *src = CONVERT_TO_SHORTPTR(src8);

345 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);	343 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

346 src -= src_stride * (SUBPEL_TAPS / 2 - 1);	344 src -= src_stride * (SUBPEL_TAPS / 2 - 1);

347 for (x = 0; x < w; ++x) {	345 for (x = 0; x < w; ++x) {

348 int y_q4 = y0_q4;	346 int y_q4 = y0_q4;

349 for (y = 0; y < h; ++y) {	347 for (y = 0; y < h; ++y) {

350 const uint16_t src_y = &src[(y_q4 >> SUBPEL_BITS) src_stride];	348 const uint16_t src_y = &src[(y_q4 >> SUBPEL_BITS) src_stride];

351 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];	349 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

352 int k, sum = 0;	350 int k, sum = 0;

353 for (k = 0; k < SUBPEL_TAPS; ++k)	351 for (k = 0; k < SUBPEL_TAPS; ++k)

354 sum += src_y[k * src_stride] * y_filter[k];	352 sum += src_y[k * src_stride] * y_filter[k];

355 dst[y * dst_stride] = clip_pixel_high(	353 dst[y * dst_stride] = clip_pixel_highbd(

356 ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);	354 ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);

357 y_q4 += y_step_q4;	355 y_q4 += y_step_q4;

358 }	356 }

359 ++src;	357 ++src;

360 ++dst;	358 ++dst;

361 }	359 }

362 }	360 }

363	361

364 static void high_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,	362 static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,

365 uint8_t *dst8, ptrdiff_t dst_stride,	363 uint8_t *dst8, ptrdiff_t dst_stride,

366 const InterpKernel *y_filters,	364 const InterpKernel *y_filters,

367 int y0_q4, int y_step_q4, int w, int h,	365 int y0_q4, int y_step_q4, int w, int h,

368 int bd) {	366 int bd) {

369 int x, y;	367 int x, y;

370 uint16_t *src = CONVERT_TO_SHORTPTR(src8);	368 uint16_t *src = CONVERT_TO_SHORTPTR(src8);

371 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);	369 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

372 src -= src_stride * (SUBPEL_TAPS / 2 - 1);	370 src -= src_stride * (SUBPEL_TAPS / 2 - 1);

373 for (x = 0; x < w; ++x) {	371 for (x = 0; x < w; ++x) {

374 int y_q4 = y0_q4;	372 int y_q4 = y0_q4;

375 for (y = 0; y < h; ++y) {	373 for (y = 0; y < h; ++y) {

376 const uint16_t src_y = &src[(y_q4 >> SUBPEL_BITS) src_stride];	374 const uint16_t src_y = &src[(y_q4 >> SUBPEL_BITS) src_stride];

377 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];	375 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

378 int k, sum = 0;	376 int k, sum = 0;

379 for (k = 0; k < SUBPEL_TAPS; ++k)	377 for (k = 0; k < SUBPEL_TAPS; ++k)

380 sum += src_y[k * src_stride] * y_filter[k];	378 sum += src_y[k * src_stride] * y_filter[k];

381 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +	379 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +

382 clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);	380 clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);

383 y_q4 += y_step_q4;	381 y_q4 += y_step_q4;

384 }	382 }

385 ++src;	383 ++src;

386 ++dst;	384 ++dst;

387 }	385 }

388 }	386 }

389	387

390 static void high_convolve(const uint8_t *src, ptrdiff_t src_stride,	388 static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,

391 uint8_t *dst, ptrdiff_t dst_stride,	389 uint8_t *dst, ptrdiff_t dst_stride,

392 const InterpKernel *const x_filters,	390 const InterpKernel *const x_filters,

393 int x0_q4, int x_step_q4,	391 int x0_q4, int x_step_q4,

394 const InterpKernel *const y_filters,	392 const InterpKernel *const y_filters,

395 int y0_q4, int y_step_q4,	393 int y0_q4, int y_step_q4,

396 int w, int h, int bd) {	394 int w, int h, int bd) {

397 // Note: Fixed size intermediate buffer, temp, places limits on parameters.	395 // Note: Fixed size intermediate buffer, temp, places limits on parameters.

398 // 2d filtering proceeds in 2 steps:	396 // 2d filtering proceeds in 2 steps:

399 // (1) Interpolate horizontally into an intermediate buffer, temp.	397 // (1) Interpolate horizontally into an intermediate buffer, temp.

400 // (2) Interpolate temp vertically to derive the sub-pixel result.	398 // (2) Interpolate temp vertically to derive the sub-pixel result.

401 // Deriving the maximum number of rows in the temp buffer (135):	399 // Deriving the maximum number of rows in the temp buffer (135):

402 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).	400 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).

403 // --Largest block size is 64x64 pixels.	401 // --Largest block size is 64x64 pixels.

404 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the	402 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the

405 // original frame (in 1/16th pixel units).	403 // original frame (in 1/16th pixel units).

406 // --Must round-up because block may be located at sub-pixel position.	404 // --Must round-up because block may be located at sub-pixel position.

407 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.	405 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

408 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.	406 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

409 uint16_t temp[64 * 135];	407 uint16_t temp[64 * 135];

410 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS;	408 int intermediate_height =

	409 (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

411	410

412 assert(w <= 64);	411 assert(w <= 64);

413 assert(h <= 64);	412 assert(h <= 64);

414 assert(y_step_q4 <= 32);	413 assert(y_step_q4 <= 32);

415 assert(x_step_q4 <= 32);	414 assert(x_step_q4 <= 32);

416	415

417 if (intermediate_height < h)	416 highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1),

418 intermediate_height = h;	417 src_stride, CONVERT_TO_BYTEPTR(temp), 64,

419	418 x_filters, x0_q4, x_step_q4, w,

420 high_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1),	419 intermediate_height, bd);

421 src_stride, CONVERT_TO_BYTEPTR(temp), 64,	420 highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),

422 x_filters, x0_q4, x_step_q4, w,	421 64, dst, dst_stride, y_filters, y0_q4, y_step_q4,

423 intermediate_height, bd);	422 w, h, bd);

424 high_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),

425 64, dst, dst_stride, y_filters, y0_q4, y_step_q4,

426 w, h, bd);

427 }	423 }

428	424

429	425

430 void vp9_high_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,	426 void vp9_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

431 uint8_t *dst, ptrdiff_t dst_stride,	427 uint8_t *dst, ptrdiff_t dst_stride,

432 const int16_t *filter_x, int x_step_q4,	428 const int16_t *filter_x, int x_step_q4,

433 const int16_t *filter_y, int y_step_q4,	429 const int16_t *filter_y, int y_step_q4,

434 int w, int h, int bd) {	430 int w, int h, int bd) {

435 const InterpKernel *const filters_x = get_filter_base(filter_x);	431 const InterpKernel *const filters_x = get_filter_base(filter_x);

436 const int x0_q4 = get_filter_offset(filter_x, filters_x);	432 const int x0_q4 = get_filter_offset(filter_x, filters_x);

437 (void)filter_y;	433 (void)filter_y;

438 (void)y_step_q4;	434 (void)y_step_q4;

439	435

440 high_convolve_horiz(src, src_stride, dst, dst_stride, filters_x,	436 highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x,

441 x0_q4, x_step_q4, w, h, bd);	437 x0_q4, x_step_q4, w, h, bd);

442 }	438 }

443	439

444 void vp9_high_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,	440 void vp9_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

445 uint8_t *dst, ptrdiff_t dst_stride,	441 uint8_t *dst, ptrdiff_t dst_stride,

446 const int16_t *filter_x, int x_step_q4,	442 const int16_t *filter_x, int x_step_q4,

447 const int16_t *filter_y, int y_step_q4,	443 const int16_t *filter_y, int y_step_q4,

448 int w, int h, int bd) {	444 int w, int h, int bd) {

449 const InterpKernel *const filters_x = get_filter_base(filter_x);	445 const InterpKernel *const filters_x = get_filter_base(filter_x);

450 const int x0_q4 = get_filter_offset(filter_x, filters_x);	446 const int x0_q4 = get_filter_offset(filter_x, filters_x);

451 (void)filter_y;	447 (void)filter_y;

452 (void)y_step_q4;	448 (void)y_step_q4;

453	449

454 high_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,	450 highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,

455 x0_q4, x_step_q4, w, h, bd);	451 x0_q4, x_step_q4, w, h, bd);

456 }	452 }

457	453

458 void vp9_high_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,	454 void vp9_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,

459 uint8_t *dst, ptrdiff_t dst_stride,	455 uint8_t *dst, ptrdiff_t dst_stride,

460 const int16_t *filter_x, int x_step_q4,	456 const int16_t *filter_x, int x_step_q4,

461 const int16_t *filter_y, int y_step_q4,	457 const int16_t *filter_y, int y_step_q4,

462 int w, int h, int bd) {	458 int w, int h, int bd) {

463 const InterpKernel *const filters_y = get_filter_base(filter_y);	459 const InterpKernel *const filters_y = get_filter_base(filter_y);

464 const int y0_q4 = get_filter_offset(filter_y, filters_y);	460 const int y0_q4 = get_filter_offset(filter_y, filters_y);

465 (void)filter_x;	461 (void)filter_x;

466 (void)x_step_q4;	462 (void)x_step_q4;

467	463

468 high_convolve_vert(src, src_stride, dst, dst_stride, filters_y,	464 highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y,

469 y0_q4, y_step_q4, w, h, bd);	465 y0_q4, y_step_q4, w, h, bd);

470 }	466 }

471	467

472 void vp9_high_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,	468 void vp9_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

473 uint8_t *dst, ptrdiff_t dst_stride,	469 uint8_t *dst, ptrdiff_t dst_stride,

474 const int16_t *filter_x, int x_step_q4,	470 const int16_t *filter_x, int x_step_q4,

475 const int16_t *filter_y, int y_step_q4,	471 const int16_t *filter_y, int y_step_q4,

476 int w, int h, int bd) {	472 int w, int h, int bd) {

477 const InterpKernel *const filters_y = get_filter_base(filter_y);	473 const InterpKernel *const filters_y = get_filter_base(filter_y);

478 const int y0_q4 = get_filter_offset(filter_y, filters_y);	474 const int y0_q4 = get_filter_offset(filter_y, filters_y);

479 (void)filter_x;	475 (void)filter_x;

480 (void)x_step_q4;	476 (void)x_step_q4;

481	477

482 high_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,	478 highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,

483 y0_q4, y_step_q4, w, h, bd);	479 y0_q4, y_step_q4, w, h, bd);

484 }	480 }

485	481

486 void vp9_high_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,	482 void vp9_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,

487 uint8_t *dst, ptrdiff_t dst_stride,	483 uint8_t *dst, ptrdiff_t dst_stride,

488 const int16_t *filter_x, int x_step_q4,	484 const int16_t *filter_x, int x_step_q4,

489 const int16_t *filter_y, int y_step_q4,	485 const int16_t *filter_y, int y_step_q4,

490 int w, int h, int bd) {	486 int w, int h, int bd) {

491 const InterpKernel *const filters_x = get_filter_base(filter_x);	487 const InterpKernel *const filters_x = get_filter_base(filter_x);

492 const int x0_q4 = get_filter_offset(filter_x, filters_x);	488 const int x0_q4 = get_filter_offset(filter_x, filters_x);

493	489

494 const InterpKernel *const filters_y = get_filter_base(filter_y);	490 const InterpKernel *const filters_y = get_filter_base(filter_y);

495 const int y0_q4 = get_filter_offset(filter_y, filters_y);	491 const int y0_q4 = get_filter_offset(filter_y, filters_y);

496	492

497 high_convolve(src, src_stride, dst, dst_stride,	493 highbd_convolve(src, src_stride, dst, dst_stride,

498 filters_x, x0_q4, x_step_q4,	494 filters_x, x0_q4, x_step_q4,

499 filters_y, y0_q4, y_step_q4, w, h, bd);	495 filters_y, y0_q4, y_step_q4, w, h, bd);

500 }	496 }

501	497

502 void vp9_high_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,	498 void vp9_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,

503 uint8_t *dst, ptrdiff_t dst_stride,	499 uint8_t *dst, ptrdiff_t dst_stride,

504 const int16_t *filter_x, int x_step_q4,	500 const int16_t *filter_x, int x_step_q4,

505 const int16_t *filter_y, int y_step_q4,	501 const int16_t *filter_y, int y_step_q4,

506 int w, int h, int bd) {	502 int w, int h, int bd) {

507 // Fixed size intermediate buffer places limits on parameters.	503 // Fixed size intermediate buffer places limits on parameters.

508 DECLARE_ALIGNED_ARRAY(16, uint16_t, temp, 64 * 64);	504 DECLARE_ALIGNED_ARRAY(16, uint16_t, temp, 64 * 64);

509 assert(w <= 64);	505 assert(w <= 64);

510 assert(h <= 64);	506 assert(h <= 64);

511	507

512 vp9_high_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,	508 vp9_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,

513 filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);	509 filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);

514 vp9_high_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,	510 vp9_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,

515 NULL, 0, NULL, 0, w, h, bd);	511 NULL, 0, NULL, 0, w, h, bd);

516 }	512 }

517	513

518 void vp9_high_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,	514 void vp9_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,

519 uint8_t *dst8, ptrdiff_t dst_stride,	515 uint8_t *dst8, ptrdiff_t dst_stride,

520 const int16_t *filter_x, int filter_x_stride,	516 const int16_t *filter_x, int filter_x_stride,

521 const int16_t *filter_y, int filter_y_stride,	517 const int16_t *filter_y, int filter_y_stride,

522 int w, int h, int bd) {	518 int w, int h, int bd) {

523 int r;	519 int r;

524 uint16_t *src = CONVERT_TO_SHORTPTR(src8);	520 uint16_t *src = CONVERT_TO_SHORTPTR(src8);

525 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);	521 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

526 (void)filter_x;	522 (void)filter_x;

527 (void)filter_y;	523 (void)filter_y;

528 (void)filter_x_stride;	524 (void)filter_x_stride;

529 (void)filter_y_stride;	525 (void)filter_y_stride;

530 (void)bd;	526 (void)bd;

531	527

532 for (r = h; r > 0; --r) {	528 for (r = h; r > 0; --r) {

533 vpx_memcpy(dst, src, w * sizeof(uint16_t));	529 vpx_memcpy(dst, src, w * sizeof(uint16_t));

534 src += src_stride;	530 src += src_stride;

535 dst += dst_stride;	531 dst += dst_stride;

536 }	532 }

537 }	533 }

538	534

539 void vp9_high_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,	535 void vp9_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,

540 uint8_t *dst8, ptrdiff_t dst_stride,	536 uint8_t *dst8, ptrdiff_t dst_stride,

541 const int16_t *filter_x, int filter_x_stride,	537 const int16_t *filter_x, int filter_x_stride,

542 const int16_t *filter_y, int filter_y_stride,	538 const int16_t *filter_y, int filter_y_stride,

543 int w, int h, int bd) {	539 int w, int h, int bd) {

544 int x, y;	540 int x, y;

545 uint16_t *src = CONVERT_TO_SHORTPTR(src8);	541 uint16_t *src = CONVERT_TO_SHORTPTR(src8);

546 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);	542 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

547 (void)filter_x;	543 (void)filter_x;

548 (void)filter_y;	544 (void)filter_y;

549 (void)filter_x_stride;	545 (void)filter_x_stride;

550 (void)filter_y_stride;	546 (void)filter_y_stride;

551 (void)bd;	547 (void)bd;

552	548

553 for (y = 0; y < h; ++y) {	549 for (y = 0; y < h; ++y) {

554 for (x = 0; x < w; ++x) {	550 for (x = 0; x < w; ++x) {

555 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);	551 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);

556 }	552 }

557 src += src_stride;	553 src += src_stride;

558 dst += dst_stride;	554 dst += dst_stride;

559 }	555 }

560 }	556 }

561 #endif	557 #endif

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/common/vp9_convolve.h ('k') | source/libvpx/vp9/common/vp9_debugmodes.c » ('j') | no next file with comments »