Index: source/libvpx/vp9/common/vp9_reconintra.c |
diff --git a/source/libvpx/vp9/common/vp9_reconintra.c b/source/libvpx/vp9/common/vp9_reconintra.c |
index 1668b99ce22e09b17b8af3f4d806a426da9df4b8..825d03d69b2190b9e5e02beaa9e849526896ce09 100644 |
--- a/source/libvpx/vp9/common/vp9_reconintra.c |
+++ b/source/libvpx/vp9/common/vp9_reconintra.c |
@@ -30,6 +30,25 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { |
ADST_ADST, // TM |
}; |
+enum { |
+ NEED_LEFT = 1 << 1, |
+ NEED_ABOVE = 1 << 2, |
+ NEED_ABOVERIGHT = 1 << 3, |
+}; |
+ |
+static const uint8_t extend_modes[INTRA_MODES] = { |
+ NEED_ABOVE | NEED_LEFT, // DC |
+ NEED_ABOVE, // V |
+ NEED_LEFT, // H |
+ NEED_ABOVERIGHT, // D45 |
+ NEED_LEFT | NEED_ABOVE, // D135 |
+ NEED_LEFT | NEED_ABOVE, // D117 |
+ NEED_LEFT | NEED_ABOVE, // D153 |
+ NEED_LEFT, // D207 |
+ NEED_ABOVERIGHT, // D63 |
+ NEED_LEFT | NEED_ABOVE, // TM |
+}; |
+ |
// This serves as a wrapper function, so that all the prediction functions |
// can be unified and accessed as a pointer array. Note that the boundary |
// above and left are not necessarily used all the time. |
@@ -226,7 +245,7 @@ static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, |
(void) left; |
(void) bd; |
for (r = 0; r < bs; r++) { |
- vpx_memcpy(dst, above, bs * sizeof(uint16_t)); |
+ memcpy(dst, above, bs * sizeof(uint16_t)); |
dst += stride; |
} |
} |
@@ -469,7 +488,7 @@ static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs, |
(void) left; |
for (r = 0; r < bs; r++) { |
- vpx_memcpy(dst, above, bs); |
+ memcpy(dst, above, bs); |
dst += stride; |
} |
} |
@@ -481,7 +500,7 @@ static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs, |
(void) above; |
for (r = 0; r < bs; r++) { |
- vpx_memset(dst, left[r], bs); |
+ memset(dst, left[r], bs); |
dst += stride; |
} |
} |
@@ -507,7 +526,7 @@ static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bs, |
(void) left; |
for (r = 0; r < bs; r++) { |
- vpx_memset(dst, 128, bs); |
+ memset(dst, 128, bs); |
dst += stride; |
} |
} |
@@ -524,7 +543,7 @@ static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bs, |
expected_dc = (sum + (bs >> 1)) / bs; |
for (r = 0; r < bs; r++) { |
- vpx_memset(dst, expected_dc, bs); |
+ memset(dst, expected_dc, bs); |
dst += stride; |
} |
} |
@@ -540,7 +559,7 @@ static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bs, |
expected_dc = (sum + (bs >> 1)) / bs; |
for (r = 0; r < bs; r++) { |
- vpx_memset(dst, expected_dc, bs); |
+ memset(dst, expected_dc, bs); |
dst += stride; |
} |
} |
@@ -559,7 +578,7 @@ static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bs, |
expected_dc = (sum + (count >> 1)) / count; |
for (r = 0; r < bs; r++) { |
- vpx_memset(dst, expected_dc, bs); |
+ memset(dst, expected_dc, bs); |
dst += stride; |
} |
} |
@@ -638,8 +657,8 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, |
int i; |
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); |
- DECLARE_ALIGNED_ARRAY(16, uint16_t, left_col, 64); |
- DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 128 + 16); |
+ DECLARE_ALIGNED(16, uint16_t, left_col[32]); |
+ DECLARE_ALIGNED(16, uint16_t, above_data[128 + 16]); |
uint16_t *above_row = above_data + 16; |
const uint16_t *const_above_row = above_row; |
const int bs = 4 << tx_size; |
@@ -699,24 +718,24 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, |
/* slower path if the block needs border extension */ |
if (x0 + 2 * bs <= frame_width) { |
if (right_available && bs == 4) { |
- vpx_memcpy(above_row, above_ref, 2 * bs * sizeof(uint16_t)); |
+ memcpy(above_row, above_ref, 2 * bs * sizeof(uint16_t)); |
} else { |
- vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t)); |
+ memcpy(above_row, above_ref, bs * sizeof(uint16_t)); |
vpx_memset16(above_row + bs, above_row[bs - 1], bs); |
} |
} else if (x0 + bs <= frame_width) { |
const int r = frame_width - x0; |
if (right_available && bs == 4) { |
- vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t)); |
+ memcpy(above_row, above_ref, r * sizeof(uint16_t)); |
vpx_memset16(above_row + r, above_row[r - 1], |
x0 + 2 * bs - frame_width); |
} else { |
- vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t)); |
+ memcpy(above_row, above_ref, bs * sizeof(uint16_t)); |
vpx_memset16(above_row + bs, above_row[bs - 1], bs); |
} |
} else if (x0 <= frame_width) { |
const int r = frame_width - x0; |
- vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t)); |
+ memcpy(above_row, above_ref, r * sizeof(uint16_t)); |
vpx_memset16(above_row + r, above_row[r - 1], |
x0 + 2 * bs - frame_width); |
} |
@@ -727,9 +746,9 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, |
if (bs == 4 && right_available && left_available) { |
const_above_row = above_ref; |
} else { |
- vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t)); |
+ memcpy(above_row, above_ref, bs * sizeof(uint16_t)); |
if (bs == 4 && right_available) |
- vpx_memcpy(above_row + bs, above_ref + bs, bs * sizeof(uint16_t)); |
+ memcpy(above_row + bs, above_ref + bs, bs * sizeof(uint16_t)); |
else |
vpx_memset16(above_row + bs, above_row[bs - 1], bs); |
// TODO(Peter): this value should probably change for high bitdepth |
@@ -761,8 +780,8 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, |
int right_available, int x, int y, |
int plane) { |
int i; |
- DECLARE_ALIGNED_ARRAY(16, uint8_t, left_col, 64); |
- DECLARE_ALIGNED_ARRAY(16, uint8_t, above_data, 128 + 16); |
+ DECLARE_ALIGNED(16, uint8_t, left_col[32]); |
+ DECLARE_ALIGNED(16, uint8_t, above_data[128 + 16]); |
uint8_t *above_row = above_data + 16; |
const uint8_t *const_above_row = above_row; |
const int bs = 4 << tx_size; |
@@ -790,75 +809,103 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, |
x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; |
y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; |
- vpx_memset(left_col, 129, 64); |
- |
- // left |
- if (left_available) { |
- if (xd->mb_to_bottom_edge < 0) { |
- /* slower path if the block needs border extension */ |
- if (y0 + bs <= frame_height) { |
- for (i = 0; i < bs; ++i) |
- left_col[i] = ref[i * ref_stride - 1]; |
+ // NEED_LEFT |
+ if (extend_modes[mode] & NEED_LEFT) { |
+ if (left_available) { |
+ if (xd->mb_to_bottom_edge < 0) { |
+ /* slower path if the block needs border extension */ |
+ if (y0 + bs <= frame_height) { |
+ for (i = 0; i < bs; ++i) |
+ left_col[i] = ref[i * ref_stride - 1]; |
+ } else { |
+ const int extend_bottom = frame_height - y0; |
+ for (i = 0; i < extend_bottom; ++i) |
+ left_col[i] = ref[i * ref_stride - 1]; |
+ for (; i < bs; ++i) |
+ left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; |
+ } |
} else { |
- const int extend_bottom = frame_height - y0; |
- for (i = 0; i < extend_bottom; ++i) |
+ /* faster path if the block does not need extension */ |
+ for (i = 0; i < bs; ++i) |
left_col[i] = ref[i * ref_stride - 1]; |
- for (; i < bs; ++i) |
- left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; |
} |
} else { |
- /* faster path if the block does not need extension */ |
- for (i = 0; i < bs; ++i) |
- left_col[i] = ref[i * ref_stride - 1]; |
+ memset(left_col, 129, bs); |
} |
} |
- // TODO(hkuang) do not extend 2*bs pixels for all modes. |
- // above |
- if (up_available) { |
- const uint8_t *above_ref = ref - ref_stride; |
- if (xd->mb_to_right_edge < 0) { |
- /* slower path if the block needs border extension */ |
- if (x0 + 2 * bs <= frame_width) { |
- if (right_available && bs == 4) { |
- vpx_memcpy(above_row, above_ref, 2 * bs); |
- } else { |
- vpx_memcpy(above_row, above_ref, bs); |
- vpx_memset(above_row + bs, above_row[bs - 1], bs); |
+ // NEED_ABOVE |
+ if (extend_modes[mode] & NEED_ABOVE) { |
+ if (up_available) { |
+ const uint8_t *above_ref = ref - ref_stride; |
+ if (xd->mb_to_right_edge < 0) { |
+ /* slower path if the block needs border extension */ |
+ if (x0 + bs <= frame_width) { |
+ memcpy(above_row, above_ref, bs); |
+ } else if (x0 <= frame_width) { |
+ const int r = frame_width - x0; |
+ memcpy(above_row, above_ref, r); |
+ memset(above_row + r, above_row[r - 1], x0 + bs - frame_width); |
} |
- } else if (x0 + bs <= frame_width) { |
- const int r = frame_width - x0; |
- if (right_available && bs == 4) { |
- vpx_memcpy(above_row, above_ref, r); |
- vpx_memset(above_row + r, above_row[r - 1], |
- x0 + 2 * bs - frame_width); |
+ } else { |
+ /* faster path if the block does not need extension */ |
+ if (bs == 4 && right_available && left_available) { |
+ const_above_row = above_ref; |
} else { |
- vpx_memcpy(above_row, above_ref, bs); |
- vpx_memset(above_row + bs, above_row[bs - 1], bs); |
+ memcpy(above_row, above_ref, bs); |
} |
- } else if (x0 <= frame_width) { |
- const int r = frame_width - x0; |
- vpx_memcpy(above_row, above_ref, r); |
- vpx_memset(above_row + r, above_row[r - 1], |
- x0 + 2 * bs - frame_width); |
} |
above_row[-1] = left_available ? above_ref[-1] : 129; |
} else { |
- /* faster path if the block does not need extension */ |
- if (bs == 4 && right_available && left_available) { |
- const_above_row = above_ref; |
+ memset(above_row, 127, bs); |
+ above_row[-1] = 127; |
+ } |
+ } |
+ |
+ // NEED_ABOVERIGHT |
+ if (extend_modes[mode] & NEED_ABOVERIGHT) { |
+ if (up_available) { |
+ const uint8_t *above_ref = ref - ref_stride; |
+ if (xd->mb_to_right_edge < 0) { |
+ /* slower path if the block needs border extension */ |
+ if (x0 + 2 * bs <= frame_width) { |
+ if (right_available && bs == 4) { |
+ memcpy(above_row, above_ref, 2 * bs); |
+ } else { |
+ memcpy(above_row, above_ref, bs); |
+ memset(above_row + bs, above_row[bs - 1], bs); |
+ } |
+ } else if (x0 + bs <= frame_width) { |
+ const int r = frame_width - x0; |
+ if (right_available && bs == 4) { |
+ memcpy(above_row, above_ref, r); |
+ memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); |
+ } else { |
+ memcpy(above_row, above_ref, bs); |
+ memset(above_row + bs, above_row[bs - 1], bs); |
+ } |
+ } else if (x0 <= frame_width) { |
+ const int r = frame_width - x0; |
+ memcpy(above_row, above_ref, r); |
+ memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); |
+ } |
} else { |
- vpx_memcpy(above_row, above_ref, bs); |
- if (bs == 4 && right_available) |
- vpx_memcpy(above_row + bs, above_ref + bs, bs); |
- else |
- vpx_memset(above_row + bs, above_row[bs - 1], bs); |
- above_row[-1] = left_available ? above_ref[-1] : 129; |
+ /* faster path if the block does not need extension */ |
+ if (bs == 4 && right_available && left_available) { |
+ const_above_row = above_ref; |
+ } else { |
+ memcpy(above_row, above_ref, bs); |
+ if (bs == 4 && right_available) |
+ memcpy(above_row + bs, above_ref + bs, bs); |
+ else |
+ memset(above_row + bs, above_row[bs - 1], bs); |
+ } |
} |
+ above_row[-1] = left_available ? above_ref[-1] : 129; |
+ } else { |
+ memset(above_row, 127, bs * 2); |
+ above_row[-1] = 127; |
} |
- } else { |
- vpx_memset(above_row, 127, bs * 2); |
- above_row[-1] = 127; |
} |
// predict |