OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
123 // (2) Interpolate temp vertically to derive the sub-pixel result. | 123 // (2) Interpolate temp vertically to derive the sub-pixel result. |
124 // Deriving the maximum number of rows in the temp buffer (135): | 124 // Deriving the maximum number of rows in the temp buffer (135): |
125 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). | 125 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). |
126 // --Largest block size is 64x64 pixels. | 126 // --Largest block size is 64x64 pixels. |
127 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the | 127 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the |
128 // original frame (in 1/16th pixel units). | 128 // original frame (in 1/16th pixel units). |
129 // --Must round-up because block may be located at sub-pixel position. | 129 // --Must round-up because block may be located at sub-pixel position. |
130 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. | 130 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. |
131 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. | 131 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. |
132 uint8_t temp[135 * 64]; | 132 uint8_t temp[135 * 64]; |
133 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; | 133 int intermediate_height = |
| 134 (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; |
134 | 135 |
135 assert(w <= 64); | 136 assert(w <= 64); |
136 assert(h <= 64); | 137 assert(h <= 64); |
137 assert(y_step_q4 <= 32); | 138 assert(y_step_q4 <= 32); |
138 assert(x_step_q4 <= 32); | 139 assert(x_step_q4 <= 32); |
139 | 140 |
140 if (intermediate_height < h) | |
141 intermediate_height = h; | |
142 | |
143 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, | 141 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, |
144 x_filters, x0_q4, x_step_q4, w, intermediate_height); | 142 x_filters, x0_q4, x_step_q4, w, intermediate_height); |
145 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, | 143 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, |
146 y_filters, y0_q4, y_step_q4, w, h); | 144 y_filters, y0_q4, y_step_q4, w, h); |
147 } | 145 } |
148 | 146 |
149 static const InterpKernel *get_filter_base(const int16_t *filter) { | 147 static const InterpKernel *get_filter_base(const int16_t *filter) { |
150 // NOTE: This assumes that the filter table is 256-byte aligned. | 148 // NOTE: This assumes that the filter table is 256-byte aligned. |
151 // TODO(agrange) Modify to make independent of table alignment. | 149 // TODO(agrange) Modify to make independent of table alignment. |
152 return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); | 150 return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); |
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
277 for (y = 0; y < h; ++y) { | 275 for (y = 0; y < h; ++y) { |
278 for (x = 0; x < w; ++x) | 276 for (x = 0; x < w; ++x) |
279 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); | 277 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); |
280 | 278 |
281 src += src_stride; | 279 src += src_stride; |
282 dst += dst_stride; | 280 dst += dst_stride; |
283 } | 281 } |
284 } | 282 } |
285 | 283 |
286 #if CONFIG_VP9_HIGHBITDEPTH | 284 #if CONFIG_VP9_HIGHBITDEPTH |
287 static void high_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride, | 285 static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride, |
288 uint8_t *dst8, ptrdiff_t dst_stride, | 286 uint8_t *dst8, ptrdiff_t dst_stride, |
289 const InterpKernel *x_filters, | 287 const InterpKernel *x_filters, |
290 int x0_q4, int x_step_q4, | 288 int x0_q4, int x_step_q4, |
291 int w, int h, int bd) { | 289 int w, int h, int bd) { |
292 int x, y; | 290 int x, y; |
293 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | 291 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
294 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); | 292 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
295 src -= SUBPEL_TAPS / 2 - 1; | 293 src -= SUBPEL_TAPS / 2 - 1; |
296 for (y = 0; y < h; ++y) { | 294 for (y = 0; y < h; ++y) { |
297 int x_q4 = x0_q4; | 295 int x_q4 = x0_q4; |
298 for (x = 0; x < w; ++x) { | 296 for (x = 0; x < w; ++x) { |
299 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; | 297 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
300 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; | 298 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
301 int k, sum = 0; | 299 int k, sum = 0; |
302 for (k = 0; k < SUBPEL_TAPS; ++k) | 300 for (k = 0; k < SUBPEL_TAPS; ++k) |
303 sum += src_x[k] * x_filter[k]; | 301 sum += src_x[k] * x_filter[k]; |
304 dst[x] = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); | 302 dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); |
305 x_q4 += x_step_q4; | 303 x_q4 += x_step_q4; |
306 } | 304 } |
307 src += src_stride; | 305 src += src_stride; |
308 dst += dst_stride; | 306 dst += dst_stride; |
309 } | 307 } |
310 } | 308 } |
311 | 309 |
312 static void high_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride, | 310 static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride, |
313 uint8_t *dst8, ptrdiff_t dst_stride, | 311 uint8_t *dst8, ptrdiff_t dst_stride, |
314 const InterpKernel *x_filters, | 312 const InterpKernel *x_filters, |
315 int x0_q4, int x_step_q4, | 313 int x0_q4, int x_step_q4, |
316 int w, int h, int bd) { | 314 int w, int h, int bd) { |
317 int x, y; | 315 int x, y; |
318 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | 316 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
319 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); | 317 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
320 src -= SUBPEL_TAPS / 2 - 1; | 318 src -= SUBPEL_TAPS / 2 - 1; |
321 for (y = 0; y < h; ++y) { | 319 for (y = 0; y < h; ++y) { |
322 int x_q4 = x0_q4; | 320 int x_q4 = x0_q4; |
323 for (x = 0; x < w; ++x) { | 321 for (x = 0; x < w; ++x) { |
324 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; | 322 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
325 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; | 323 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
326 int k, sum = 0; | 324 int k, sum = 0; |
327 for (k = 0; k < SUBPEL_TAPS; ++k) | 325 for (k = 0; k < SUBPEL_TAPS; ++k) |
328 sum += src_x[k] * x_filter[k]; | 326 sum += src_x[k] * x_filter[k]; |
329 dst[x] = ROUND_POWER_OF_TWO(dst[x] + | 327 dst[x] = ROUND_POWER_OF_TWO(dst[x] + |
330 clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); | 328 clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); |
331 x_q4 += x_step_q4; | 329 x_q4 += x_step_q4; |
332 } | 330 } |
333 src += src_stride; | 331 src += src_stride; |
334 dst += dst_stride; | 332 dst += dst_stride; |
335 } | 333 } |
336 } | 334 } |
337 | 335 |
338 static void high_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride, | 336 static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride, |
339 uint8_t *dst8, ptrdiff_t dst_stride, | 337 uint8_t *dst8, ptrdiff_t dst_stride, |
340 const InterpKernel *y_filters, | 338 const InterpKernel *y_filters, |
341 int y0_q4, int y_step_q4, int w, int h, | 339 int y0_q4, int y_step_q4, int w, int h, |
342 int bd) { | 340 int bd) { |
343 int x, y; | 341 int x, y; |
344 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | 342 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
345 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); | 343 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
346 src -= src_stride * (SUBPEL_TAPS / 2 - 1); | 344 src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
347 for (x = 0; x < w; ++x) { | 345 for (x = 0; x < w; ++x) { |
348 int y_q4 = y0_q4; | 346 int y_q4 = y0_q4; |
349 for (y = 0; y < h; ++y) { | 347 for (y = 0; y < h; ++y) { |
350 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; | 348 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
351 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; | 349 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
352 int k, sum = 0; | 350 int k, sum = 0; |
353 for (k = 0; k < SUBPEL_TAPS; ++k) | 351 for (k = 0; k < SUBPEL_TAPS; ++k) |
354 sum += src_y[k * src_stride] * y_filter[k]; | 352 sum += src_y[k * src_stride] * y_filter[k]; |
355 dst[y * dst_stride] = clip_pixel_high( | 353 dst[y * dst_stride] = clip_pixel_highbd( |
356 ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); | 354 ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); |
357 y_q4 += y_step_q4; | 355 y_q4 += y_step_q4; |
358 } | 356 } |
359 ++src; | 357 ++src; |
360 ++dst; | 358 ++dst; |
361 } | 359 } |
362 } | 360 } |
363 | 361 |
364 static void high_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride, | 362 static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride, |
365 uint8_t *dst8, ptrdiff_t dst_stride, | 363 uint8_t *dst8, ptrdiff_t dst_stride, |
366 const InterpKernel *y_filters, | 364 const InterpKernel *y_filters, |
367 int y0_q4, int y_step_q4, int w, int h, | 365 int y0_q4, int y_step_q4, int w, int h, |
368 int bd) { | 366 int bd) { |
369 int x, y; | 367 int x, y; |
370 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | 368 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
371 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); | 369 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
372 src -= src_stride * (SUBPEL_TAPS / 2 - 1); | 370 src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
373 for (x = 0; x < w; ++x) { | 371 for (x = 0; x < w; ++x) { |
374 int y_q4 = y0_q4; | 372 int y_q4 = y0_q4; |
375 for (y = 0; y < h; ++y) { | 373 for (y = 0; y < h; ++y) { |
376 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; | 374 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
377 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; | 375 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
378 int k, sum = 0; | 376 int k, sum = 0; |
379 for (k = 0; k < SUBPEL_TAPS; ++k) | 377 for (k = 0; k < SUBPEL_TAPS; ++k) |
380 sum += src_y[k * src_stride] * y_filter[k]; | 378 sum += src_y[k * src_stride] * y_filter[k]; |
381 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + | 379 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + |
382 clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); | 380 clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); |
383 y_q4 += y_step_q4; | 381 y_q4 += y_step_q4; |
384 } | 382 } |
385 ++src; | 383 ++src; |
386 ++dst; | 384 ++dst; |
387 } | 385 } |
388 } | 386 } |
389 | 387 |
390 static void high_convolve(const uint8_t *src, ptrdiff_t src_stride, | 388 static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride, |
391 uint8_t *dst, ptrdiff_t dst_stride, | 389 uint8_t *dst, ptrdiff_t dst_stride, |
392 const InterpKernel *const x_filters, | 390 const InterpKernel *const x_filters, |
393 int x0_q4, int x_step_q4, | 391 int x0_q4, int x_step_q4, |
394 const InterpKernel *const y_filters, | 392 const InterpKernel *const y_filters, |
395 int y0_q4, int y_step_q4, | 393 int y0_q4, int y_step_q4, |
396 int w, int h, int bd) { | 394 int w, int h, int bd) { |
397 // Note: Fixed size intermediate buffer, temp, places limits on parameters. | 395 // Note: Fixed size intermediate buffer, temp, places limits on parameters. |
398 // 2d filtering proceeds in 2 steps: | 396 // 2d filtering proceeds in 2 steps: |
399 // (1) Interpolate horizontally into an intermediate buffer, temp. | 397 // (1) Interpolate horizontally into an intermediate buffer, temp. |
400 // (2) Interpolate temp vertically to derive the sub-pixel result. | 398 // (2) Interpolate temp vertically to derive the sub-pixel result. |
401 // Deriving the maximum number of rows in the temp buffer (135): | 399 // Deriving the maximum number of rows in the temp buffer (135): |
402 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). | 400 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). |
403 // --Largest block size is 64x64 pixels. | 401 // --Largest block size is 64x64 pixels. |
404 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the | 402 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the |
405 // original frame (in 1/16th pixel units). | 403 // original frame (in 1/16th pixel units). |
406 // --Must round-up because block may be located at sub-pixel position. | 404 // --Must round-up because block may be located at sub-pixel position. |
407 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. | 405 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. |
408 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. | 406 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. |
409 uint16_t temp[64 * 135]; | 407 uint16_t temp[64 * 135]; |
410 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; | 408 int intermediate_height = |
| 409 (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; |
411 | 410 |
412 assert(w <= 64); | 411 assert(w <= 64); |
413 assert(h <= 64); | 412 assert(h <= 64); |
414 assert(y_step_q4 <= 32); | 413 assert(y_step_q4 <= 32); |
415 assert(x_step_q4 <= 32); | 414 assert(x_step_q4 <= 32); |
416 | 415 |
417 if (intermediate_height < h) | 416 highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), |
418 intermediate_height = h; | 417 src_stride, CONVERT_TO_BYTEPTR(temp), 64, |
419 | 418 x_filters, x0_q4, x_step_q4, w, |
420 high_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), | 419 intermediate_height, bd); |
421 src_stride, CONVERT_TO_BYTEPTR(temp), 64, | 420 highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1), |
422 x_filters, x0_q4, x_step_q4, w, | 421 64, dst, dst_stride, y_filters, y0_q4, y_step_q4, |
423 intermediate_height, bd); | 422 w, h, bd); |
424 high_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1), | |
425 64, dst, dst_stride, y_filters, y0_q4, y_step_q4, | |
426 w, h, bd); | |
427 } | 423 } |
428 | 424 |
429 | 425 |
430 void vp9_high_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 426 void vp9_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
431 uint8_t *dst, ptrdiff_t dst_stride, | 427 uint8_t *dst, ptrdiff_t dst_stride, |
432 const int16_t *filter_x, int x_step_q4, | 428 const int16_t *filter_x, int x_step_q4, |
433 const int16_t *filter_y, int y_step_q4, | 429 const int16_t *filter_y, int y_step_q4, |
434 int w, int h, int bd) { | 430 int w, int h, int bd) { |
435 const InterpKernel *const filters_x = get_filter_base(filter_x); | 431 const InterpKernel *const filters_x = get_filter_base(filter_x); |
436 const int x0_q4 = get_filter_offset(filter_x, filters_x); | 432 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
437 (void)filter_y; | 433 (void)filter_y; |
438 (void)y_step_q4; | 434 (void)y_step_q4; |
439 | 435 |
440 high_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, | 436 highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, |
441 x0_q4, x_step_q4, w, h, bd); | 437 x0_q4, x_step_q4, w, h, bd); |
442 } | 438 } |
443 | 439 |
444 void vp9_high_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 440 void vp9_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
445 uint8_t *dst, ptrdiff_t dst_stride, | 441 uint8_t *dst, ptrdiff_t dst_stride, |
446 const int16_t *filter_x, int x_step_q4, | 442 const int16_t *filter_x, int x_step_q4, |
447 const int16_t *filter_y, int y_step_q4, | 443 const int16_t *filter_y, int y_step_q4, |
448 int w, int h, int bd) { | 444 int w, int h, int bd) { |
449 const InterpKernel *const filters_x = get_filter_base(filter_x); | 445 const InterpKernel *const filters_x = get_filter_base(filter_x); |
450 const int x0_q4 = get_filter_offset(filter_x, filters_x); | 446 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
451 (void)filter_y; | 447 (void)filter_y; |
452 (void)y_step_q4; | 448 (void)y_step_q4; |
453 | 449 |
454 high_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, | 450 highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, |
455 x0_q4, x_step_q4, w, h, bd); | 451 x0_q4, x_step_q4, w, h, bd); |
456 } | 452 } |
457 | 453 |
458 void vp9_high_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 454 void vp9_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
459 uint8_t *dst, ptrdiff_t dst_stride, | 455 uint8_t *dst, ptrdiff_t dst_stride, |
460 const int16_t *filter_x, int x_step_q4, | 456 const int16_t *filter_x, int x_step_q4, |
461 const int16_t *filter_y, int y_step_q4, | 457 const int16_t *filter_y, int y_step_q4, |
462 int w, int h, int bd) { | 458 int w, int h, int bd) { |
463 const InterpKernel *const filters_y = get_filter_base(filter_y); | 459 const InterpKernel *const filters_y = get_filter_base(filter_y); |
464 const int y0_q4 = get_filter_offset(filter_y, filters_y); | 460 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
465 (void)filter_x; | 461 (void)filter_x; |
466 (void)x_step_q4; | 462 (void)x_step_q4; |
467 | 463 |
468 high_convolve_vert(src, src_stride, dst, dst_stride, filters_y, | 464 highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, |
469 y0_q4, y_step_q4, w, h, bd); | 465 y0_q4, y_step_q4, w, h, bd); |
470 } | 466 } |
471 | 467 |
472 void vp9_high_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 468 void vp9_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
473 uint8_t *dst, ptrdiff_t dst_stride, | 469 uint8_t *dst, ptrdiff_t dst_stride, |
474 const int16_t *filter_x, int x_step_q4, | 470 const int16_t *filter_x, int x_step_q4, |
475 const int16_t *filter_y, int y_step_q4, | 471 const int16_t *filter_y, int y_step_q4, |
476 int w, int h, int bd) { | 472 int w, int h, int bd) { |
477 const InterpKernel *const filters_y = get_filter_base(filter_y); | 473 const InterpKernel *const filters_y = get_filter_base(filter_y); |
478 const int y0_q4 = get_filter_offset(filter_y, filters_y); | 474 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
479 (void)filter_x; | 475 (void)filter_x; |
480 (void)x_step_q4; | 476 (void)x_step_q4; |
481 | 477 |
482 high_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, | 478 highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, |
483 y0_q4, y_step_q4, w, h, bd); | 479 y0_q4, y_step_q4, w, h, bd); |
484 } | 480 } |
485 | 481 |
486 void vp9_high_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, | 482 void vp9_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, |
487 uint8_t *dst, ptrdiff_t dst_stride, | 483 uint8_t *dst, ptrdiff_t dst_stride, |
488 const int16_t *filter_x, int x_step_q4, | 484 const int16_t *filter_x, int x_step_q4, |
489 const int16_t *filter_y, int y_step_q4, | 485 const int16_t *filter_y, int y_step_q4, |
490 int w, int h, int bd) { | 486 int w, int h, int bd) { |
491 const InterpKernel *const filters_x = get_filter_base(filter_x); | 487 const InterpKernel *const filters_x = get_filter_base(filter_x); |
492 const int x0_q4 = get_filter_offset(filter_x, filters_x); | 488 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
493 | 489 |
494 const InterpKernel *const filters_y = get_filter_base(filter_y); | 490 const InterpKernel *const filters_y = get_filter_base(filter_y); |
495 const int y0_q4 = get_filter_offset(filter_y, filters_y); | 491 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
496 | 492 |
497 high_convolve(src, src_stride, dst, dst_stride, | 493 highbd_convolve(src, src_stride, dst, dst_stride, |
498 filters_x, x0_q4, x_step_q4, | 494 filters_x, x0_q4, x_step_q4, |
499 filters_y, y0_q4, y_step_q4, w, h, bd); | 495 filters_y, y0_q4, y_step_q4, w, h, bd); |
500 } | 496 } |
501 | 497 |
502 void vp9_high_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, | 498 void vp9_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, |
503 uint8_t *dst, ptrdiff_t dst_stride, | 499 uint8_t *dst, ptrdiff_t dst_stride, |
504 const int16_t *filter_x, int x_step_q4, | 500 const int16_t *filter_x, int x_step_q4, |
505 const int16_t *filter_y, int y_step_q4, | 501 const int16_t *filter_y, int y_step_q4, |
506 int w, int h, int bd) { | 502 int w, int h, int bd) { |
507 // Fixed size intermediate buffer places limits on parameters. | 503 // Fixed size intermediate buffer places limits on parameters. |
508 DECLARE_ALIGNED_ARRAY(16, uint16_t, temp, 64 * 64); | 504 DECLARE_ALIGNED_ARRAY(16, uint16_t, temp, 64 * 64); |
509 assert(w <= 64); | 505 assert(w <= 64); |
510 assert(h <= 64); | 506 assert(h <= 64); |
511 | 507 |
512 vp9_high_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64, | 508 vp9_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64, |
513 filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); | 509 filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); |
514 vp9_high_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride, | 510 vp9_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride, |
515 NULL, 0, NULL, 0, w, h, bd); | 511 NULL, 0, NULL, 0, w, h, bd); |
516 } | 512 } |
517 | 513 |
518 void vp9_high_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride, | 514 void vp9_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride, |
519 uint8_t *dst8, ptrdiff_t dst_stride, | 515 uint8_t *dst8, ptrdiff_t dst_stride, |
520 const int16_t *filter_x, int filter_x_stride, | 516 const int16_t *filter_x, int filter_x_stride, |
521 const int16_t *filter_y, int filter_y_stride, | 517 const int16_t *filter_y, int filter_y_stride, |
522 int w, int h, int bd) { | 518 int w, int h, int bd) { |
523 int r; | 519 int r; |
524 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | 520 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
525 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); | 521 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
526 (void)filter_x; | 522 (void)filter_x; |
527 (void)filter_y; | 523 (void)filter_y; |
528 (void)filter_x_stride; | 524 (void)filter_x_stride; |
529 (void)filter_y_stride; | 525 (void)filter_y_stride; |
530 (void)bd; | 526 (void)bd; |
531 | 527 |
532 for (r = h; r > 0; --r) { | 528 for (r = h; r > 0; --r) { |
533 vpx_memcpy(dst, src, w * sizeof(uint16_t)); | 529 vpx_memcpy(dst, src, w * sizeof(uint16_t)); |
534 src += src_stride; | 530 src += src_stride; |
535 dst += dst_stride; | 531 dst += dst_stride; |
536 } | 532 } |
537 } | 533 } |
538 | 534 |
539 void vp9_high_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride, | 535 void vp9_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride, |
540 uint8_t *dst8, ptrdiff_t dst_stride, | 536 uint8_t *dst8, ptrdiff_t dst_stride, |
541 const int16_t *filter_x, int filter_x_stride, | 537 const int16_t *filter_x, int filter_x_stride, |
542 const int16_t *filter_y, int filter_y_stride, | 538 const int16_t *filter_y, int filter_y_stride, |
543 int w, int h, int bd) { | 539 int w, int h, int bd) { |
544 int x, y; | 540 int x, y; |
545 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | 541 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
546 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); | 542 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
547 (void)filter_x; | 543 (void)filter_x; |
548 (void)filter_y; | 544 (void)filter_y; |
549 (void)filter_x_stride; | 545 (void)filter_x_stride; |
550 (void)filter_y_stride; | 546 (void)filter_y_stride; |
551 (void)bd; | 547 (void)bd; |
552 | 548 |
553 for (y = 0; y < h; ++y) { | 549 for (y = 0; y < h; ++y) { |
554 for (x = 0; x < w; ++x) { | 550 for (x = 0; x < w; ++x) { |
555 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); | 551 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); |
556 } | 552 } |
557 src += src_stride; | 553 src += src_stride; |
558 dst += dst_stride; | 554 dst += dst_stride; |
559 } | 555 } |
560 } | 556 } |
561 #endif | 557 #endif |
OLD | NEW |