| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 123 // (2) Interpolate temp vertically to derive the sub-pixel result. | 123 // (2) Interpolate temp vertically to derive the sub-pixel result. |
| 124 // Deriving the maximum number of rows in the temp buffer (135): | 124 // Deriving the maximum number of rows in the temp buffer (135): |
| 125 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). | 125 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). |
| 126 // --Largest block size is 64x64 pixels. | 126 // --Largest block size is 64x64 pixels. |
| 127 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the | 127 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the |
| 128 // original frame (in 1/16th pixel units). | 128 // original frame (in 1/16th pixel units). |
| 129 // --Must round-up because block may be located at sub-pixel position. | 129 // --Must round-up because block may be located at sub-pixel position. |
| 130 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. | 130 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. |
| 131 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. | 131 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. |
| 132 uint8_t temp[135 * 64]; | 132 uint8_t temp[135 * 64]; |
| 133 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; | 133 int intermediate_height = |
| 134 (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; |
| 134 | 135 |
| 135 assert(w <= 64); | 136 assert(w <= 64); |
| 136 assert(h <= 64); | 137 assert(h <= 64); |
| 137 assert(y_step_q4 <= 32); | 138 assert(y_step_q4 <= 32); |
| 138 assert(x_step_q4 <= 32); | 139 assert(x_step_q4 <= 32); |
| 139 | 140 |
| 140 if (intermediate_height < h) | |
| 141 intermediate_height = h; | |
| 142 | |
| 143 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, | 141 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, |
| 144 x_filters, x0_q4, x_step_q4, w, intermediate_height); | 142 x_filters, x0_q4, x_step_q4, w, intermediate_height); |
| 145 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, | 143 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, |
| 146 y_filters, y0_q4, y_step_q4, w, h); | 144 y_filters, y0_q4, y_step_q4, w, h); |
| 147 } | 145 } |
| 148 | 146 |
| 149 static const InterpKernel *get_filter_base(const int16_t *filter) { | 147 static const InterpKernel *get_filter_base(const int16_t *filter) { |
| 150 // NOTE: This assumes that the filter table is 256-byte aligned. | 148 // NOTE: This assumes that the filter table is 256-byte aligned. |
| 151 // TODO(agrange) Modify to make independent of table alignment. | 149 // TODO(agrange) Modify to make independent of table alignment. |
| 152 return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); | 150 return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 277 for (y = 0; y < h; ++y) { | 275 for (y = 0; y < h; ++y) { |
| 278 for (x = 0; x < w; ++x) | 276 for (x = 0; x < w; ++x) |
| 279 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); | 277 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); |
| 280 | 278 |
| 281 src += src_stride; | 279 src += src_stride; |
| 282 dst += dst_stride; | 280 dst += dst_stride; |
| 283 } | 281 } |
| 284 } | 282 } |
| 285 | 283 |
| 286 #if CONFIG_VP9_HIGHBITDEPTH | 284 #if CONFIG_VP9_HIGHBITDEPTH |
| 287 static void high_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride, | 285 static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride, |
| 288 uint8_t *dst8, ptrdiff_t dst_stride, | 286 uint8_t *dst8, ptrdiff_t dst_stride, |
| 289 const InterpKernel *x_filters, | 287 const InterpKernel *x_filters, |
| 290 int x0_q4, int x_step_q4, | 288 int x0_q4, int x_step_q4, |
| 291 int w, int h, int bd) { | 289 int w, int h, int bd) { |
| 292 int x, y; | 290 int x, y; |
| 293 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | 291 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
| 294 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); | 292 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
| 295 src -= SUBPEL_TAPS / 2 - 1; | 293 src -= SUBPEL_TAPS / 2 - 1; |
| 296 for (y = 0; y < h; ++y) { | 294 for (y = 0; y < h; ++y) { |
| 297 int x_q4 = x0_q4; | 295 int x_q4 = x0_q4; |
| 298 for (x = 0; x < w; ++x) { | 296 for (x = 0; x < w; ++x) { |
| 299 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; | 297 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
| 300 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; | 298 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
| 301 int k, sum = 0; | 299 int k, sum = 0; |
| 302 for (k = 0; k < SUBPEL_TAPS; ++k) | 300 for (k = 0; k < SUBPEL_TAPS; ++k) |
| 303 sum += src_x[k] * x_filter[k]; | 301 sum += src_x[k] * x_filter[k]; |
| 304 dst[x] = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); | 302 dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); |
| 305 x_q4 += x_step_q4; | 303 x_q4 += x_step_q4; |
| 306 } | 304 } |
| 307 src += src_stride; | 305 src += src_stride; |
| 308 dst += dst_stride; | 306 dst += dst_stride; |
| 309 } | 307 } |
| 310 } | 308 } |
| 311 | 309 |
| 312 static void high_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride, | 310 static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride, |
| 313 uint8_t *dst8, ptrdiff_t dst_stride, | 311 uint8_t *dst8, ptrdiff_t dst_stride, |
| 314 const InterpKernel *x_filters, | 312 const InterpKernel *x_filters, |
| 315 int x0_q4, int x_step_q4, | 313 int x0_q4, int x_step_q4, |
| 316 int w, int h, int bd) { | 314 int w, int h, int bd) { |
| 317 int x, y; | 315 int x, y; |
| 318 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | 316 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
| 319 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); | 317 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
| 320 src -= SUBPEL_TAPS / 2 - 1; | 318 src -= SUBPEL_TAPS / 2 - 1; |
| 321 for (y = 0; y < h; ++y) { | 319 for (y = 0; y < h; ++y) { |
| 322 int x_q4 = x0_q4; | 320 int x_q4 = x0_q4; |
| 323 for (x = 0; x < w; ++x) { | 321 for (x = 0; x < w; ++x) { |
| 324 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; | 322 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
| 325 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; | 323 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
| 326 int k, sum = 0; | 324 int k, sum = 0; |
| 327 for (k = 0; k < SUBPEL_TAPS; ++k) | 325 for (k = 0; k < SUBPEL_TAPS; ++k) |
| 328 sum += src_x[k] * x_filter[k]; | 326 sum += src_x[k] * x_filter[k]; |
| 329 dst[x] = ROUND_POWER_OF_TWO(dst[x] + | 327 dst[x] = ROUND_POWER_OF_TWO(dst[x] + |
| 330 clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); | 328 clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); |
| 331 x_q4 += x_step_q4; | 329 x_q4 += x_step_q4; |
| 332 } | 330 } |
| 333 src += src_stride; | 331 src += src_stride; |
| 334 dst += dst_stride; | 332 dst += dst_stride; |
| 335 } | 333 } |
| 336 } | 334 } |
| 337 | 335 |
| 338 static void high_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride, | 336 static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride, |
| 339 uint8_t *dst8, ptrdiff_t dst_stride, | 337 uint8_t *dst8, ptrdiff_t dst_stride, |
| 340 const InterpKernel *y_filters, | 338 const InterpKernel *y_filters, |
| 341 int y0_q4, int y_step_q4, int w, int h, | 339 int y0_q4, int y_step_q4, int w, int h, |
| 342 int bd) { | 340 int bd) { |
| 343 int x, y; | 341 int x, y; |
| 344 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | 342 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
| 345 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); | 343 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
| 346 src -= src_stride * (SUBPEL_TAPS / 2 - 1); | 344 src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
| 347 for (x = 0; x < w; ++x) { | 345 for (x = 0; x < w; ++x) { |
| 348 int y_q4 = y0_q4; | 346 int y_q4 = y0_q4; |
| 349 for (y = 0; y < h; ++y) { | 347 for (y = 0; y < h; ++y) { |
| 350 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; | 348 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
| 351 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; | 349 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
| 352 int k, sum = 0; | 350 int k, sum = 0; |
| 353 for (k = 0; k < SUBPEL_TAPS; ++k) | 351 for (k = 0; k < SUBPEL_TAPS; ++k) |
| 354 sum += src_y[k * src_stride] * y_filter[k]; | 352 sum += src_y[k * src_stride] * y_filter[k]; |
| 355 dst[y * dst_stride] = clip_pixel_high( | 353 dst[y * dst_stride] = clip_pixel_highbd( |
| 356 ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); | 354 ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); |
| 357 y_q4 += y_step_q4; | 355 y_q4 += y_step_q4; |
| 358 } | 356 } |
| 359 ++src; | 357 ++src; |
| 360 ++dst; | 358 ++dst; |
| 361 } | 359 } |
| 362 } | 360 } |
| 363 | 361 |
| 364 static void high_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride, | 362 static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride, |
| 365 uint8_t *dst8, ptrdiff_t dst_stride, | 363 uint8_t *dst8, ptrdiff_t dst_stride, |
| 366 const InterpKernel *y_filters, | 364 const InterpKernel *y_filters, |
| 367 int y0_q4, int y_step_q4, int w, int h, | 365 int y0_q4, int y_step_q4, int w, int h, |
| 368 int bd) { | 366 int bd) { |
| 369 int x, y; | 367 int x, y; |
| 370 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | 368 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
| 371 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); | 369 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
| 372 src -= src_stride * (SUBPEL_TAPS / 2 - 1); | 370 src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
| 373 for (x = 0; x < w; ++x) { | 371 for (x = 0; x < w; ++x) { |
| 374 int y_q4 = y0_q4; | 372 int y_q4 = y0_q4; |
| 375 for (y = 0; y < h; ++y) { | 373 for (y = 0; y < h; ++y) { |
| 376 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; | 374 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
| 377 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; | 375 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
| 378 int k, sum = 0; | 376 int k, sum = 0; |
| 379 for (k = 0; k < SUBPEL_TAPS; ++k) | 377 for (k = 0; k < SUBPEL_TAPS; ++k) |
| 380 sum += src_y[k * src_stride] * y_filter[k]; | 378 sum += src_y[k * src_stride] * y_filter[k]; |
| 381 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + | 379 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + |
| 382 clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); | 380 clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); |
| 383 y_q4 += y_step_q4; | 381 y_q4 += y_step_q4; |
| 384 } | 382 } |
| 385 ++src; | 383 ++src; |
| 386 ++dst; | 384 ++dst; |
| 387 } | 385 } |
| 388 } | 386 } |
| 389 | 387 |
| 390 static void high_convolve(const uint8_t *src, ptrdiff_t src_stride, | 388 static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride, |
| 391 uint8_t *dst, ptrdiff_t dst_stride, | 389 uint8_t *dst, ptrdiff_t dst_stride, |
| 392 const InterpKernel *const x_filters, | 390 const InterpKernel *const x_filters, |
| 393 int x0_q4, int x_step_q4, | 391 int x0_q4, int x_step_q4, |
| 394 const InterpKernel *const y_filters, | 392 const InterpKernel *const y_filters, |
| 395 int y0_q4, int y_step_q4, | 393 int y0_q4, int y_step_q4, |
| 396 int w, int h, int bd) { | 394 int w, int h, int bd) { |
| 397 // Note: Fixed size intermediate buffer, temp, places limits on parameters. | 395 // Note: Fixed size intermediate buffer, temp, places limits on parameters. |
| 398 // 2d filtering proceeds in 2 steps: | 396 // 2d filtering proceeds in 2 steps: |
| 399 // (1) Interpolate horizontally into an intermediate buffer, temp. | 397 // (1) Interpolate horizontally into an intermediate buffer, temp. |
| 400 // (2) Interpolate temp vertically to derive the sub-pixel result. | 398 // (2) Interpolate temp vertically to derive the sub-pixel result. |
| 401 // Deriving the maximum number of rows in the temp buffer (135): | 399 // Deriving the maximum number of rows in the temp buffer (135): |
| 402 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). | 400 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). |
| 403 // --Largest block size is 64x64 pixels. | 401 // --Largest block size is 64x64 pixels. |
| 404 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the | 402 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the |
| 405 // original frame (in 1/16th pixel units). | 403 // original frame (in 1/16th pixel units). |
| 406 // --Must round-up because block may be located at sub-pixel position. | 404 // --Must round-up because block may be located at sub-pixel position. |
| 407 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. | 405 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. |
| 408 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. | 406 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. |
| 409 uint16_t temp[64 * 135]; | 407 uint16_t temp[64 * 135]; |
| 410 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; | 408 int intermediate_height = |
| 409 (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; |
| 411 | 410 |
| 412 assert(w <= 64); | 411 assert(w <= 64); |
| 413 assert(h <= 64); | 412 assert(h <= 64); |
| 414 assert(y_step_q4 <= 32); | 413 assert(y_step_q4 <= 32); |
| 415 assert(x_step_q4 <= 32); | 414 assert(x_step_q4 <= 32); |
| 416 | 415 |
| 417 if (intermediate_height < h) | 416 highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), |
| 418 intermediate_height = h; | 417 src_stride, CONVERT_TO_BYTEPTR(temp), 64, |
| 419 | 418 x_filters, x0_q4, x_step_q4, w, |
| 420 high_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), | 419 intermediate_height, bd); |
| 421 src_stride, CONVERT_TO_BYTEPTR(temp), 64, | 420 highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1), |
| 422 x_filters, x0_q4, x_step_q4, w, | 421 64, dst, dst_stride, y_filters, y0_q4, y_step_q4, |
| 423 intermediate_height, bd); | 422 w, h, bd); |
| 424 high_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1), | |
| 425 64, dst, dst_stride, y_filters, y0_q4, y_step_q4, | |
| 426 w, h, bd); | |
| 427 } | 423 } |
| 428 | 424 |
| 429 | 425 |
| 430 void vp9_high_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 426 void vp9_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
| 431 uint8_t *dst, ptrdiff_t dst_stride, | 427 uint8_t *dst, ptrdiff_t dst_stride, |
| 432 const int16_t *filter_x, int x_step_q4, | 428 const int16_t *filter_x, int x_step_q4, |
| 433 const int16_t *filter_y, int y_step_q4, | 429 const int16_t *filter_y, int y_step_q4, |
| 434 int w, int h, int bd) { | 430 int w, int h, int bd) { |
| 435 const InterpKernel *const filters_x = get_filter_base(filter_x); | 431 const InterpKernel *const filters_x = get_filter_base(filter_x); |
| 436 const int x0_q4 = get_filter_offset(filter_x, filters_x); | 432 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
| 437 (void)filter_y; | 433 (void)filter_y; |
| 438 (void)y_step_q4; | 434 (void)y_step_q4; |
| 439 | 435 |
| 440 high_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, | 436 highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, |
| 441 x0_q4, x_step_q4, w, h, bd); | 437 x0_q4, x_step_q4, w, h, bd); |
| 442 } | 438 } |
| 443 | 439 |
| 444 void vp9_high_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 440 void vp9_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
| 445 uint8_t *dst, ptrdiff_t dst_stride, | 441 uint8_t *dst, ptrdiff_t dst_stride, |
| 446 const int16_t *filter_x, int x_step_q4, | 442 const int16_t *filter_x, int x_step_q4, |
| 447 const int16_t *filter_y, int y_step_q4, | 443 const int16_t *filter_y, int y_step_q4, |
| 448 int w, int h, int bd) { | 444 int w, int h, int bd) { |
| 449 const InterpKernel *const filters_x = get_filter_base(filter_x); | 445 const InterpKernel *const filters_x = get_filter_base(filter_x); |
| 450 const int x0_q4 = get_filter_offset(filter_x, filters_x); | 446 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
| 451 (void)filter_y; | 447 (void)filter_y; |
| 452 (void)y_step_q4; | 448 (void)y_step_q4; |
| 453 | 449 |
| 454 high_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, | 450 highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, |
| 455 x0_q4, x_step_q4, w, h, bd); | 451 x0_q4, x_step_q4, w, h, bd); |
| 456 } | 452 } |
| 457 | 453 |
| 458 void vp9_high_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 454 void vp9_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
| 459 uint8_t *dst, ptrdiff_t dst_stride, | 455 uint8_t *dst, ptrdiff_t dst_stride, |
| 460 const int16_t *filter_x, int x_step_q4, | 456 const int16_t *filter_x, int x_step_q4, |
| 461 const int16_t *filter_y, int y_step_q4, | 457 const int16_t *filter_y, int y_step_q4, |
| 462 int w, int h, int bd) { | 458 int w, int h, int bd) { |
| 463 const InterpKernel *const filters_y = get_filter_base(filter_y); | 459 const InterpKernel *const filters_y = get_filter_base(filter_y); |
| 464 const int y0_q4 = get_filter_offset(filter_y, filters_y); | 460 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
| 465 (void)filter_x; | 461 (void)filter_x; |
| 466 (void)x_step_q4; | 462 (void)x_step_q4; |
| 467 | 463 |
| 468 high_convolve_vert(src, src_stride, dst, dst_stride, filters_y, | 464 highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, |
| 469 y0_q4, y_step_q4, w, h, bd); | 465 y0_q4, y_step_q4, w, h, bd); |
| 470 } | 466 } |
| 471 | 467 |
| 472 void vp9_high_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 468 void vp9_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
| 473 uint8_t *dst, ptrdiff_t dst_stride, | 469 uint8_t *dst, ptrdiff_t dst_stride, |
| 474 const int16_t *filter_x, int x_step_q4, | 470 const int16_t *filter_x, int x_step_q4, |
| 475 const int16_t *filter_y, int y_step_q4, | 471 const int16_t *filter_y, int y_step_q4, |
| 476 int w, int h, int bd) { | 472 int w, int h, int bd) { |
| 477 const InterpKernel *const filters_y = get_filter_base(filter_y); | 473 const InterpKernel *const filters_y = get_filter_base(filter_y); |
| 478 const int y0_q4 = get_filter_offset(filter_y, filters_y); | 474 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
| 479 (void)filter_x; | 475 (void)filter_x; |
| 480 (void)x_step_q4; | 476 (void)x_step_q4; |
| 481 | 477 |
| 482 high_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, | 478 highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, |
| 483 y0_q4, y_step_q4, w, h, bd); | 479 y0_q4, y_step_q4, w, h, bd); |
| 484 } | 480 } |
| 485 | 481 |
| 486 void vp9_high_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, | 482 void vp9_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, |
| 487 uint8_t *dst, ptrdiff_t dst_stride, | 483 uint8_t *dst, ptrdiff_t dst_stride, |
| 488 const int16_t *filter_x, int x_step_q4, | 484 const int16_t *filter_x, int x_step_q4, |
| 489 const int16_t *filter_y, int y_step_q4, | 485 const int16_t *filter_y, int y_step_q4, |
| 490 int w, int h, int bd) { | 486 int w, int h, int bd) { |
| 491 const InterpKernel *const filters_x = get_filter_base(filter_x); | 487 const InterpKernel *const filters_x = get_filter_base(filter_x); |
| 492 const int x0_q4 = get_filter_offset(filter_x, filters_x); | 488 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
| 493 | 489 |
| 494 const InterpKernel *const filters_y = get_filter_base(filter_y); | 490 const InterpKernel *const filters_y = get_filter_base(filter_y); |
| 495 const int y0_q4 = get_filter_offset(filter_y, filters_y); | 491 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
| 496 | 492 |
| 497 high_convolve(src, src_stride, dst, dst_stride, | 493 highbd_convolve(src, src_stride, dst, dst_stride, |
| 498 filters_x, x0_q4, x_step_q4, | 494 filters_x, x0_q4, x_step_q4, |
| 499 filters_y, y0_q4, y_step_q4, w, h, bd); | 495 filters_y, y0_q4, y_step_q4, w, h, bd); |
| 500 } | 496 } |
| 501 | 497 |
| 502 void vp9_high_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, | 498 void vp9_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, |
| 503 uint8_t *dst, ptrdiff_t dst_stride, | 499 uint8_t *dst, ptrdiff_t dst_stride, |
| 504 const int16_t *filter_x, int x_step_q4, | 500 const int16_t *filter_x, int x_step_q4, |
| 505 const int16_t *filter_y, int y_step_q4, | 501 const int16_t *filter_y, int y_step_q4, |
| 506 int w, int h, int bd) { | 502 int w, int h, int bd) { |
| 507 // Fixed size intermediate buffer places limits on parameters. | 503 // Fixed size intermediate buffer places limits on parameters. |
| 508 DECLARE_ALIGNED_ARRAY(16, uint16_t, temp, 64 * 64); | 504 DECLARE_ALIGNED_ARRAY(16, uint16_t, temp, 64 * 64); |
| 509 assert(w <= 64); | 505 assert(w <= 64); |
| 510 assert(h <= 64); | 506 assert(h <= 64); |
| 511 | 507 |
| 512 vp9_high_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64, | 508 vp9_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64, |
| 513 filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); | 509 filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); |
| 514 vp9_high_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride, | 510 vp9_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride, |
| 515 NULL, 0, NULL, 0, w, h, bd); | 511 NULL, 0, NULL, 0, w, h, bd); |
| 516 } | 512 } |
| 517 | 513 |
| 518 void vp9_high_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride, | 514 void vp9_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride, |
| 519 uint8_t *dst8, ptrdiff_t dst_stride, | 515 uint8_t *dst8, ptrdiff_t dst_stride, |
| 520 const int16_t *filter_x, int filter_x_stride, | 516 const int16_t *filter_x, int filter_x_stride, |
| 521 const int16_t *filter_y, int filter_y_stride, | 517 const int16_t *filter_y, int filter_y_stride, |
| 522 int w, int h, int bd) { | 518 int w, int h, int bd) { |
| 523 int r; | 519 int r; |
| 524 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | 520 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
| 525 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); | 521 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
| 526 (void)filter_x; | 522 (void)filter_x; |
| 527 (void)filter_y; | 523 (void)filter_y; |
| 528 (void)filter_x_stride; | 524 (void)filter_x_stride; |
| 529 (void)filter_y_stride; | 525 (void)filter_y_stride; |
| 530 (void)bd; | 526 (void)bd; |
| 531 | 527 |
| 532 for (r = h; r > 0; --r) { | 528 for (r = h; r > 0; --r) { |
| 533 vpx_memcpy(dst, src, w * sizeof(uint16_t)); | 529 vpx_memcpy(dst, src, w * sizeof(uint16_t)); |
| 534 src += src_stride; | 530 src += src_stride; |
| 535 dst += dst_stride; | 531 dst += dst_stride; |
| 536 } | 532 } |
| 537 } | 533 } |
| 538 | 534 |
| 539 void vp9_high_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride, | 535 void vp9_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride, |
| 540 uint8_t *dst8, ptrdiff_t dst_stride, | 536 uint8_t *dst8, ptrdiff_t dst_stride, |
| 541 const int16_t *filter_x, int filter_x_stride, | 537 const int16_t *filter_x, int filter_x_stride, |
| 542 const int16_t *filter_y, int filter_y_stride, | 538 const int16_t *filter_y, int filter_y_stride, |
| 543 int w, int h, int bd) { | 539 int w, int h, int bd) { |
| 544 int x, y; | 540 int x, y; |
| 545 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | 541 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
| 546 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); | 542 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
| 547 (void)filter_x; | 543 (void)filter_x; |
| 548 (void)filter_y; | 544 (void)filter_y; |
| 549 (void)filter_x_stride; | 545 (void)filter_x_stride; |
| 550 (void)filter_y_stride; | 546 (void)filter_y_stride; |
| 551 (void)bd; | 547 (void)bd; |
| 552 | 548 |
| 553 for (y = 0; y < h; ++y) { | 549 for (y = 0; y < h; ++y) { |
| 554 for (x = 0; x < w; ++x) { | 550 for (x = 0; x < w; ++x) { |
| 555 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); | 551 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); |
| 556 } | 552 } |
| 557 src += src_stride; | 553 src += src_stride; |
| 558 dst += dst_stride; | 554 dst += dst_stride; |
| 559 } | 555 } |
| 560 } | 556 } |
| 561 #endif | 557 #endif |
| OLD | NEW |