| Index: source/libvpx/vp9/common/vp9_reconintra.c
|
| diff --git a/source/libvpx/vp9/common/vp9_reconintra.c b/source/libvpx/vp9/common/vp9_reconintra.c
|
| index 1668b99ce22e09b17b8af3f4d806a426da9df4b8..825d03d69b2190b9e5e02beaa9e849526896ce09 100644
|
| --- a/source/libvpx/vp9/common/vp9_reconintra.c
|
| +++ b/source/libvpx/vp9/common/vp9_reconintra.c
|
| @@ -30,6 +30,25 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
|
| ADST_ADST, // TM
|
| };
|
|
|
| +enum {
|
| + NEED_LEFT = 1 << 1,
|
| + NEED_ABOVE = 1 << 2,
|
| + NEED_ABOVERIGHT = 1 << 3,
|
| +};
|
| +
|
| +static const uint8_t extend_modes[INTRA_MODES] = {
|
| + NEED_ABOVE | NEED_LEFT, // DC
|
| + NEED_ABOVE, // V
|
| + NEED_LEFT, // H
|
| + NEED_ABOVERIGHT, // D45
|
| + NEED_LEFT | NEED_ABOVE, // D135
|
| + NEED_LEFT | NEED_ABOVE, // D117
|
| + NEED_LEFT | NEED_ABOVE, // D153
|
| + NEED_LEFT, // D207
|
| + NEED_ABOVERIGHT, // D63
|
| + NEED_LEFT | NEED_ABOVE, // TM
|
| +};
|
| +
|
| // This serves as a wrapper function, so that all the prediction functions
|
| // can be unified and accessed as a pointer array. Note that the boundary
|
| // above and left are not necessarily used all the time.
|
| @@ -226,7 +245,7 @@ static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride,
|
| (void) left;
|
| (void) bd;
|
| for (r = 0; r < bs; r++) {
|
| - vpx_memcpy(dst, above, bs * sizeof(uint16_t));
|
| + memcpy(dst, above, bs * sizeof(uint16_t));
|
| dst += stride;
|
| }
|
| }
|
| @@ -469,7 +488,7 @@ static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
| (void) left;
|
|
|
| for (r = 0; r < bs; r++) {
|
| - vpx_memcpy(dst, above, bs);
|
| + memcpy(dst, above, bs);
|
| dst += stride;
|
| }
|
| }
|
| @@ -481,7 +500,7 @@ static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
| (void) above;
|
|
|
| for (r = 0; r < bs; r++) {
|
| - vpx_memset(dst, left[r], bs);
|
| + memset(dst, left[r], bs);
|
| dst += stride;
|
| }
|
| }
|
| @@ -507,7 +526,7 @@ static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
| (void) left;
|
|
|
| for (r = 0; r < bs; r++) {
|
| - vpx_memset(dst, 128, bs);
|
| + memset(dst, 128, bs);
|
| dst += stride;
|
| }
|
| }
|
| @@ -524,7 +543,7 @@ static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
| expected_dc = (sum + (bs >> 1)) / bs;
|
|
|
| for (r = 0; r < bs; r++) {
|
| - vpx_memset(dst, expected_dc, bs);
|
| + memset(dst, expected_dc, bs);
|
| dst += stride;
|
| }
|
| }
|
| @@ -540,7 +559,7 @@ static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
| expected_dc = (sum + (bs >> 1)) / bs;
|
|
|
| for (r = 0; r < bs; r++) {
|
| - vpx_memset(dst, expected_dc, bs);
|
| + memset(dst, expected_dc, bs);
|
| dst += stride;
|
| }
|
| }
|
| @@ -559,7 +578,7 @@ static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
|
| expected_dc = (sum + (count >> 1)) / count;
|
|
|
| for (r = 0; r < bs; r++) {
|
| - vpx_memset(dst, expected_dc, bs);
|
| + memset(dst, expected_dc, bs);
|
| dst += stride;
|
| }
|
| }
|
| @@ -638,8 +657,8 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
|
| int i;
|
| uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
|
| uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
|
| - DECLARE_ALIGNED_ARRAY(16, uint16_t, left_col, 64);
|
| - DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 128 + 16);
|
| + DECLARE_ALIGNED(16, uint16_t, left_col[32]);
|
| + DECLARE_ALIGNED(16, uint16_t, above_data[128 + 16]);
|
| uint16_t *above_row = above_data + 16;
|
| const uint16_t *const_above_row = above_row;
|
| const int bs = 4 << tx_size;
|
| @@ -699,24 +718,24 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
|
| /* slower path if the block needs border extension */
|
| if (x0 + 2 * bs <= frame_width) {
|
| if (right_available && bs == 4) {
|
| - vpx_memcpy(above_row, above_ref, 2 * bs * sizeof(uint16_t));
|
| + memcpy(above_row, above_ref, 2 * bs * sizeof(uint16_t));
|
| } else {
|
| - vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t));
|
| + memcpy(above_row, above_ref, bs * sizeof(uint16_t));
|
| vpx_memset16(above_row + bs, above_row[bs - 1], bs);
|
| }
|
| } else if (x0 + bs <= frame_width) {
|
| const int r = frame_width - x0;
|
| if (right_available && bs == 4) {
|
| - vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t));
|
| + memcpy(above_row, above_ref, r * sizeof(uint16_t));
|
| vpx_memset16(above_row + r, above_row[r - 1],
|
| x0 + 2 * bs - frame_width);
|
| } else {
|
| - vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t));
|
| + memcpy(above_row, above_ref, bs * sizeof(uint16_t));
|
| vpx_memset16(above_row + bs, above_row[bs - 1], bs);
|
| }
|
| } else if (x0 <= frame_width) {
|
| const int r = frame_width - x0;
|
| - vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t));
|
| + memcpy(above_row, above_ref, r * sizeof(uint16_t));
|
| vpx_memset16(above_row + r, above_row[r - 1],
|
| x0 + 2 * bs - frame_width);
|
| }
|
| @@ -727,9 +746,9 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
|
| if (bs == 4 && right_available && left_available) {
|
| const_above_row = above_ref;
|
| } else {
|
| - vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t));
|
| + memcpy(above_row, above_ref, bs * sizeof(uint16_t));
|
| if (bs == 4 && right_available)
|
| - vpx_memcpy(above_row + bs, above_ref + bs, bs * sizeof(uint16_t));
|
| + memcpy(above_row + bs, above_ref + bs, bs * sizeof(uint16_t));
|
| else
|
| vpx_memset16(above_row + bs, above_row[bs - 1], bs);
|
| // TODO(Peter): this value should probably change for high bitdepth
|
| @@ -761,8 +780,8 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
|
| int right_available, int x, int y,
|
| int plane) {
|
| int i;
|
| - DECLARE_ALIGNED_ARRAY(16, uint8_t, left_col, 64);
|
| - DECLARE_ALIGNED_ARRAY(16, uint8_t, above_data, 128 + 16);
|
| + DECLARE_ALIGNED(16, uint8_t, left_col[32]);
|
| + DECLARE_ALIGNED(16, uint8_t, above_data[128 + 16]);
|
| uint8_t *above_row = above_data + 16;
|
| const uint8_t *const_above_row = above_row;
|
| const int bs = 4 << tx_size;
|
| @@ -790,75 +809,103 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
|
| x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
|
| y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
|
|
|
| - vpx_memset(left_col, 129, 64);
|
| -
|
| - // left
|
| - if (left_available) {
|
| - if (xd->mb_to_bottom_edge < 0) {
|
| - /* slower path if the block needs border extension */
|
| - if (y0 + bs <= frame_height) {
|
| - for (i = 0; i < bs; ++i)
|
| - left_col[i] = ref[i * ref_stride - 1];
|
| + // NEED_LEFT
|
| + if (extend_modes[mode] & NEED_LEFT) {
|
| + if (left_available) {
|
| + if (xd->mb_to_bottom_edge < 0) {
|
| + /* slower path if the block needs border extension */
|
| + if (y0 + bs <= frame_height) {
|
| + for (i = 0; i < bs; ++i)
|
| + left_col[i] = ref[i * ref_stride - 1];
|
| + } else {
|
| + const int extend_bottom = frame_height - y0;
|
| + for (i = 0; i < extend_bottom; ++i)
|
| + left_col[i] = ref[i * ref_stride - 1];
|
| + for (; i < bs; ++i)
|
| + left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
|
| + }
|
| } else {
|
| - const int extend_bottom = frame_height - y0;
|
| - for (i = 0; i < extend_bottom; ++i)
|
| + /* faster path if the block does not need extension */
|
| + for (i = 0; i < bs; ++i)
|
| left_col[i] = ref[i * ref_stride - 1];
|
| - for (; i < bs; ++i)
|
| - left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
|
| }
|
| } else {
|
| - /* faster path if the block does not need extension */
|
| - for (i = 0; i < bs; ++i)
|
| - left_col[i] = ref[i * ref_stride - 1];
|
| + memset(left_col, 129, bs);
|
| }
|
| }
|
|
|
| - // TODO(hkuang) do not extend 2*bs pixels for all modes.
|
| - // above
|
| - if (up_available) {
|
| - const uint8_t *above_ref = ref - ref_stride;
|
| - if (xd->mb_to_right_edge < 0) {
|
| - /* slower path if the block needs border extension */
|
| - if (x0 + 2 * bs <= frame_width) {
|
| - if (right_available && bs == 4) {
|
| - vpx_memcpy(above_row, above_ref, 2 * bs);
|
| - } else {
|
| - vpx_memcpy(above_row, above_ref, bs);
|
| - vpx_memset(above_row + bs, above_row[bs - 1], bs);
|
| + // NEED_ABOVE
|
| + if (extend_modes[mode] & NEED_ABOVE) {
|
| + if (up_available) {
|
| + const uint8_t *above_ref = ref - ref_stride;
|
| + if (xd->mb_to_right_edge < 0) {
|
| + /* slower path if the block needs border extension */
|
| + if (x0 + bs <= frame_width) {
|
| + memcpy(above_row, above_ref, bs);
|
| + } else if (x0 <= frame_width) {
|
| + const int r = frame_width - x0;
|
| + memcpy(above_row, above_ref, r);
|
| + memset(above_row + r, above_row[r - 1], x0 + bs - frame_width);
|
| }
|
| - } else if (x0 + bs <= frame_width) {
|
| - const int r = frame_width - x0;
|
| - if (right_available && bs == 4) {
|
| - vpx_memcpy(above_row, above_ref, r);
|
| - vpx_memset(above_row + r, above_row[r - 1],
|
| - x0 + 2 * bs - frame_width);
|
| + } else {
|
| + /* faster path if the block does not need extension */
|
| + if (bs == 4 && right_available && left_available) {
|
| + const_above_row = above_ref;
|
| } else {
|
| - vpx_memcpy(above_row, above_ref, bs);
|
| - vpx_memset(above_row + bs, above_row[bs - 1], bs);
|
| + memcpy(above_row, above_ref, bs);
|
| }
|
| - } else if (x0 <= frame_width) {
|
| - const int r = frame_width - x0;
|
| - vpx_memcpy(above_row, above_ref, r);
|
| - vpx_memset(above_row + r, above_row[r - 1],
|
| - x0 + 2 * bs - frame_width);
|
| }
|
| above_row[-1] = left_available ? above_ref[-1] : 129;
|
| } else {
|
| - /* faster path if the block does not need extension */
|
| - if (bs == 4 && right_available && left_available) {
|
| - const_above_row = above_ref;
|
| + memset(above_row, 127, bs);
|
| + above_row[-1] = 127;
|
| + }
|
| + }
|
| +
|
| + // NEED_ABOVERIGHT
|
| + if (extend_modes[mode] & NEED_ABOVERIGHT) {
|
| + if (up_available) {
|
| + const uint8_t *above_ref = ref - ref_stride;
|
| + if (xd->mb_to_right_edge < 0) {
|
| + /* slower path if the block needs border extension */
|
| + if (x0 + 2 * bs <= frame_width) {
|
| + if (right_available && bs == 4) {
|
| + memcpy(above_row, above_ref, 2 * bs);
|
| + } else {
|
| + memcpy(above_row, above_ref, bs);
|
| + memset(above_row + bs, above_row[bs - 1], bs);
|
| + }
|
| + } else if (x0 + bs <= frame_width) {
|
| + const int r = frame_width - x0;
|
| + if (right_available && bs == 4) {
|
| + memcpy(above_row, above_ref, r);
|
| + memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width);
|
| + } else {
|
| + memcpy(above_row, above_ref, bs);
|
| + memset(above_row + bs, above_row[bs - 1], bs);
|
| + }
|
| + } else if (x0 <= frame_width) {
|
| + const int r = frame_width - x0;
|
| + memcpy(above_row, above_ref, r);
|
| + memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width);
|
| + }
|
| } else {
|
| - vpx_memcpy(above_row, above_ref, bs);
|
| - if (bs == 4 && right_available)
|
| - vpx_memcpy(above_row + bs, above_ref + bs, bs);
|
| - else
|
| - vpx_memset(above_row + bs, above_row[bs - 1], bs);
|
| - above_row[-1] = left_available ? above_ref[-1] : 129;
|
| + /* faster path if the block does not need extension */
|
| + if (bs == 4 && right_available && left_available) {
|
| + const_above_row = above_ref;
|
| + } else {
|
| + memcpy(above_row, above_ref, bs);
|
| + if (bs == 4 && right_available)
|
| + memcpy(above_row + bs, above_ref + bs, bs);
|
| + else
|
| + memset(above_row + bs, above_row[bs - 1], bs);
|
| + }
|
| }
|
| + above_row[-1] = left_available ? above_ref[-1] : 129;
|
| + } else {
|
| + memset(above_row, 127, bs * 2);
|
| + above_row[-1] = 127;
|
| }
|
| - } else {
|
| - vpx_memset(above_row, 127, bs * 2);
|
| - above_row[-1] = 127;
|
| }
|
|
|
| // predict
|
|
|