Index: source/libvpx/vp9/common/vp9_postproc.c |
=================================================================== |
--- source/libvpx/vp9/common/vp9_postproc.c (revision 292608) |
+++ source/libvpx/vp9/common/vp9_postproc.c (working copy) |
@@ -19,6 +19,9 @@ |
#include "vpx_scale/vpx_scale.h" |
#include "vpx_scale/yv12config.h" |
+#if CONFIG_VP9_HIGHBITDEPTH |
+#include "vp9/common/vp9_common.h" |
+#endif |
#include "vp9/common/vp9_onyxc_int.h" |
#include "vp9/common/vp9_postproc.h" |
#include "vp9/common/vp9_systemdependent.h" |
@@ -152,6 +155,84 @@ |
} |
} |
+#if CONFIG_VP9_HIGHBITDEPTH |
+void vp9_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr, |
+ uint16_t *dst_ptr, |
+ int src_pixels_per_line, |
+ int dst_pixels_per_line, |
+ int rows, |
+ int cols, |
+ int flimit) { |
+ uint16_t const *p_src; |
+ uint16_t *p_dst; |
+ int row; |
+ int col; |
+ int i; |
+ int v; |
+ int pitch = src_pixels_per_line; |
+ uint16_t d[8]; |
+ |
+ for (row = 0; row < rows; row++) { |
+ // post_proc_down for one row. |
+ p_src = src_ptr; |
+ p_dst = dst_ptr; |
+ |
+ for (col = 0; col < cols; col++) { |
+ int kernel = 4; |
+ int v = p_src[col]; |
+ |
+ for (i = -2; i <= 2; i++) { |
+ if (abs(v - p_src[col + i * pitch]) > flimit) |
+ goto down_skip_convolve; |
+ |
+ kernel += kernel5[2 + i] * p_src[col + i * pitch]; |
+ } |
+ |
+ v = (kernel >> 3); |
+ |
+ down_skip_convolve: |
+ p_dst[col] = v; |
+ } |
+ |
+ /* now post_proc_across */ |
+ p_src = dst_ptr; |
+ p_dst = dst_ptr; |
+ |
+ for (i = 0; i < 8; i++) |
+ d[i] = p_src[i]; |
+ |
+ for (col = 0; col < cols; col++) { |
+ int kernel = 4; |
+ v = p_src[col]; |
+ |
+ d[col & 7] = v; |
+ |
+ for (i = -2; i <= 2; i++) { |
+ if (abs(v - p_src[col + i]) > flimit) |
+ goto across_skip_convolve; |
+ |
+ kernel += kernel5[2 + i] * p_src[col + i]; |
+ } |
+ |
+ d[col & 7] = (kernel >> 3); |
+ |
+ across_skip_convolve: |
+ if (col >= 2) |
+ p_dst[col - 2] = d[(col - 2) & 7]; |
+ } |
+ |
+ /* handle the last two pixels */ |
+ p_dst[col - 2] = d[(col - 2) & 7]; |
+ p_dst[col - 1] = d[(col - 1) & 7]; |
+ |
+ |
+ /* next row */ |
+ src_ptr += pitch; |
+ dst_ptr += dst_pixels_per_line; |
+ } |
+} |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
+ |
static int q2mbl(int x) { |
if (x < 20) x = 20; |
@@ -162,11 +243,47 @@ |
void vp9_mbpost_proc_across_ip_c(uint8_t *src, int pitch, |
int rows, int cols, int flimit) { |
int r, c, i; |
- |
uint8_t *s = src; |
uint8_t d[16]; |
+ for (r = 0; r < rows; r++) { |
+ int sumsq = 0; |
+ int sum = 0; |
+ for (i = -8; i <= 6; i++) { |
+ sumsq += s[i] * s[i]; |
+ sum += s[i]; |
+ d[i + 8] = 0; |
+ } |
+ |
+ for (c = 0; c < cols + 8; c++) { |
+ int x = s[c + 7] - s[c - 8]; |
+ int y = s[c + 7] + s[c - 8]; |
+ |
+ sum += x; |
+ sumsq += x * y; |
+ |
+ d[c & 15] = s[c]; |
+ |
+ if (sumsq * 15 - sum * sum < flimit) { |
+ d[c & 15] = (8 + sum + s[c]) >> 4; |
+ } |
+ |
+ s[c - 8] = d[(c - 8) & 15]; |
+ } |
+ s += pitch; |
+ } |
+} |
+ |
+#if CONFIG_VP9_HIGHBITDEPTH |
+void vp9_highbd_mbpost_proc_across_ip_c(uint16_t *src, int pitch, |
+ int rows, int cols, int flimit) { |
+ int r, c, i; |
+ |
+ uint16_t *s = src; |
+ uint16_t d[16]; |
+ |
+ |
for (r = 0; r < rows; r++) { |
int sumsq = 0; |
int sum = 0; |
@@ -196,6 +313,7 @@ |
s += pitch; |
} |
} |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, |
int rows, int cols, int flimit) { |
@@ -229,6 +347,40 @@ |
} |
} |
+#if CONFIG_VP9_HIGHBITDEPTH |
+void vp9_highbd_mbpost_proc_down_c(uint16_t *dst, int pitch, |
+ int rows, int cols, int flimit) { |
+ int r, c, i; |
+ const int16_t *rv3 = &vp9_rv[63 & rand()]; // NOLINT |
+ |
+ for (c = 0; c < cols; c++) { |
+ uint16_t *s = &dst[c]; |
+ int sumsq = 0; |
+ int sum = 0; |
+ uint16_t d[16]; |
+ const int16_t *rv2 = rv3 + ((c * 17) & 127); |
+ |
+ for (i = -8; i <= 6; i++) { |
+ sumsq += s[i * pitch] * s[i * pitch]; |
+ sum += s[i * pitch]; |
+ } |
+ |
+ for (r = 0; r < rows + 8; r++) { |
+ sumsq += s[7 * pitch] * s[ 7 * pitch] - s[-8 * pitch] * s[-8 * pitch]; |
+ sum += s[7 * pitch] - s[-8 * pitch]; |
+ d[r & 15] = s[0]; |
+ |
+ if (sumsq * 15 - sum * sum < flimit) { |
+ d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4; |
+ } |
+ |
+ s[-8 * pitch] = d[(r - 8) & 15]; |
+ s += pitch; |
+ } |
+ } |
+} |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
+ |
static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, |
YV12_BUFFER_CONFIG *post, |
int q, |
@@ -239,6 +391,51 @@ |
(void) low_var_thresh; |
(void) flag; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (source->flags & YV12_FLAG_HIGHBITDEPTH) { |
+ vp9_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->y_buffer), |
+ CONVERT_TO_SHORTPTR(post->y_buffer), |
+ source->y_stride, post->y_stride, |
+ source->y_height, source->y_width, |
+ ppl); |
+ |
+ vp9_highbd_mbpost_proc_across_ip(CONVERT_TO_SHORTPTR(post->y_buffer), |
+ post->y_stride, post->y_height, |
+ post->y_width, q2mbl(q)); |
+ |
+ vp9_highbd_mbpost_proc_down(CONVERT_TO_SHORTPTR(post->y_buffer), |
+ post->y_stride, post->y_height, |
+ post->y_width, q2mbl(q)); |
+ |
+ vp9_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->u_buffer), |
+ CONVERT_TO_SHORTPTR(post->u_buffer), |
+ source->uv_stride, post->uv_stride, |
+ source->uv_height, source->uv_width, |
+ ppl); |
+ vp9_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->v_buffer), |
+ CONVERT_TO_SHORTPTR(post->v_buffer), |
+ source->uv_stride, post->uv_stride, |
+ source->uv_height, source->uv_width, |
+ ppl); |
+ } else { |
+ vp9_post_proc_down_and_across(source->y_buffer, post->y_buffer, |
+ source->y_stride, post->y_stride, |
+ source->y_height, source->y_width, ppl); |
+ |
+ vp9_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, |
+ post->y_width, q2mbl(q)); |
+ |
+ vp9_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, |
+ post->y_width, q2mbl(q)); |
+ |
+ vp9_post_proc_down_and_across(source->u_buffer, post->u_buffer, |
+ source->uv_stride, post->uv_stride, |
+ source->uv_height, source->uv_width, ppl); |
+ vp9_post_proc_down_and_across(source->v_buffer, post->v_buffer, |
+ source->uv_stride, post->uv_stride, |
+ source->uv_height, source->uv_width, ppl); |
+ } |
+#else |
vp9_post_proc_down_and_across(source->y_buffer, post->y_buffer, |
source->y_stride, post->y_stride, |
source->y_height, source->y_width, ppl); |
@@ -255,6 +452,7 @@ |
vp9_post_proc_down_and_across(source->v_buffer, post->v_buffer, |
source->uv_stride, post->uv_stride, |
source->uv_height, source->uv_width, ppl); |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
} |
void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, |
@@ -271,10 +469,26 @@ |
uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer}; |
const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride}; |
- for (i = 0; i < MAX_MB_PLANE; ++i) |
+ for (i = 0; i < MAX_MB_PLANE; ++i) { |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ assert((src->flags & YV12_FLAG_HIGHBITDEPTH) == |
+ (dst->flags & YV12_FLAG_HIGHBITDEPTH)); |
+ if (src->flags & YV12_FLAG_HIGHBITDEPTH) { |
+ vp9_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(srcs[i]), |
+ CONVERT_TO_SHORTPTR(dsts[i]), |
+ src_strides[i], dst_strides[i], |
+ src_heights[i], src_widths[i], ppl); |
+ } else { |
+ vp9_post_proc_down_and_across(srcs[i], dsts[i], |
+ src_strides[i], dst_strides[i], |
+ src_heights[i], src_widths[i], ppl); |
+ } |
+#else |
vp9_post_proc_down_and_across(srcs[i], dsts[i], |
src_strides[i], dst_strides[i], |
src_heights[i], src_widths[i], ppl); |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
+ } |
} |
void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, |
@@ -293,15 +507,32 @@ |
for (i = 0; i < MAX_MB_PLANE; ++i) { |
const int src_stride = src_strides[i]; |
- const uint8_t *const src = srcs[i] + 2 * src_stride + 2; |
const int src_width = src_widths[i] - 4; |
const int src_height = src_heights[i] - 4; |
+ const int dst_stride = dst_strides[i]; |
- const int dst_stride = dst_strides[i]; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ assert((src->flags & YV12_FLAG_HIGHBITDEPTH) == |
+ (dst->flags & YV12_FLAG_HIGHBITDEPTH)); |
+ if (src->flags & YV12_FLAG_HIGHBITDEPTH) { |
+ const uint16_t *const src = CONVERT_TO_SHORTPTR(srcs[i] + 2 * src_stride |
+ + 2); |
+ uint16_t *const dst = CONVERT_TO_SHORTPTR(dsts[i] + 2 * dst_stride + 2); |
+ vp9_highbd_post_proc_down_and_across(src, dst, src_stride, dst_stride, |
+ src_height, src_width, ppl); |
+ } else { |
+ const uint8_t *const src = srcs[i] + 2 * src_stride + 2; |
+ uint8_t *const dst = dsts[i] + 2 * dst_stride + 2; |
+ |
+ vp9_post_proc_down_and_across(src, dst, src_stride, dst_stride, |
+ src_height, src_width, ppl); |
+ } |
+#else |
+ const uint8_t *const src = srcs[i] + 2 * src_stride + 2; |
uint8_t *const dst = dsts[i] + 2 * dst_stride + 2; |
- |
vp9_post_proc_down_and_across(src, dst, src_stride, dst_stride, |
src_height, src_width, ppl); |
+#endif |
} |
} |
@@ -405,6 +636,9 @@ |
#if CONFIG_VP9_POSTPROC || CONFIG_INTERNAL_STATS |
if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, cm->width, cm->height, |
cm->subsampling_x, cm->subsampling_y, |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ cm->use_highbitdepth, |
+#endif |
VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0) |
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, |
"Failed to allocate post-processing buffer"); |