Index: third_party/libwebp/dec/frame.c |
diff --git a/third_party/libwebp/dec/frame.c b/third_party/libwebp/dec/frame.c |
index 2359acc5b0d4c297aa4c7abc7b5df6a3b798c6e5..b882133eabbc002b6f1e2aa3b80858ba054aed33 100644 |
--- a/third_party/libwebp/dec/frame.c |
+++ b/third_party/libwebp/dec/frame.c |
@@ -15,10 +15,180 @@ |
#include "./vp8i.h" |
#include "../utils/utils.h" |
-#define ALIGN_MASK (32 - 1) |
+//------------------------------------------------------------------------------ |
+// Main reconstruction function. |
+ |
+static const int kScan[16] = { |
+ 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, |
+ 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, |
+ 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, |
+ 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS |
+}; |
+ |
+static int CheckMode(int mb_x, int mb_y, int mode) { |
+ if (mode == B_DC_PRED) { |
+ if (mb_x == 0) { |
+ return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT; |
+ } else { |
+ return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED; |
+ } |
+ } |
+ return mode; |
+} |
+ |
+static void Copy32b(uint8_t* const dst, const uint8_t* const src) { |
+ memcpy(dst, src, 4); |
+} |
+ |
+static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src, |
+ uint8_t* const dst) { |
+ switch (bits >> 30) { |
+ case 3: |
+ VP8Transform(src, dst, 0); |
+ break; |
+ case 2: |
+ VP8TransformAC3(src, dst); |
+ break; |
+ case 1: |
+ VP8TransformDC(src, dst); |
+ break; |
+ default: |
+ break; |
+ } |
+} |
+ |
+static void DoUVTransform(uint32_t bits, const int16_t* const src, |
+ uint8_t* const dst) { |
+ if (bits & 0xff) { // any non-zero coeff at all? |
+ if (bits & 0xaa) { // any non-zero AC coefficient? |
+ VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V |
+ } else { |
+ VP8TransformDCUV(src, dst); |
+ } |
+ } |
+} |
static void ReconstructRow(const VP8Decoder* const dec, |
- const VP8ThreadContext* ctx); // TODO(skal): remove |
+ const VP8ThreadContext* ctx) { |
+ int j; |
+ int mb_x; |
+ const int mb_y = ctx->mb_y_; |
+ const int cache_id = ctx->id_; |
+ uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; |
+ uint8_t* const u_dst = dec->yuv_b_ + U_OFF; |
+ uint8_t* const v_dst = dec->yuv_b_ + V_OFF; |
+ |
+ // Initialize left-most block. |
+ for (j = 0; j < 16; ++j) { |
+ y_dst[j * BPS - 1] = 129; |
+ } |
+ for (j = 0; j < 8; ++j) { |
+ u_dst[j * BPS - 1] = 129; |
+ v_dst[j * BPS - 1] = 129; |
+ } |
+ |
+ // Init top-left sample on left column too. |
+ if (mb_y > 0) { |
+ y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129; |
+ } else { |
+ // we only need to do this init once at block (0,0). |
+ // Afterward, it remains valid for the whole topmost row. |
+ memset(y_dst - BPS - 1, 127, 16 + 4 + 1); |
+ memset(u_dst - BPS - 1, 127, 8 + 1); |
+ memset(v_dst - BPS - 1, 127, 8 + 1); |
+ } |
+ |
+ // Reconstruct one row. |
+ for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) { |
+ const VP8MBData* const block = ctx->mb_data_ + mb_x; |
+ |
+ // Rotate in the left samples from previously decoded block. We move four |
+ // pixels at a time for alignment reason, and because of in-loop filter. |
+ if (mb_x > 0) { |
+ for (j = -1; j < 16; ++j) { |
+ Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); |
+ } |
+ for (j = -1; j < 8; ++j) { |
+ Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]); |
+ Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); |
+ } |
+ } |
+ { |
+ // bring top samples into the cache |
+ VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x; |
+ const int16_t* const coeffs = block->coeffs_; |
+ uint32_t bits = block->non_zero_y_; |
+ int n; |
+ |
+ if (mb_y > 0) { |
+ memcpy(y_dst - BPS, top_yuv[0].y, 16); |
+ memcpy(u_dst - BPS, top_yuv[0].u, 8); |
+ memcpy(v_dst - BPS, top_yuv[0].v, 8); |
+ } |
+ |
+ // predict and add residuals |
+ if (block->is_i4x4_) { // 4x4 |
+ uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16); |
+ |
+ if (mb_y > 0) { |
+ if (mb_x >= dec->mb_w_ - 1) { // on rightmost border |
+ memset(top_right, top_yuv[0].y[15], sizeof(*top_right)); |
+ } else { |
+ memcpy(top_right, top_yuv[1].y, sizeof(*top_right)); |
+ } |
+ } |
+ // replicate the top-right pixels below |
+ top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0]; |
+ |
+ // predict and add residuals for all 4x4 blocks in turn. |
+ for (n = 0; n < 16; ++n, bits <<= 2) { |
+ uint8_t* const dst = y_dst + kScan[n]; |
+ VP8PredLuma4[block->imodes_[n]](dst); |
+ DoTransform(bits, coeffs + n * 16, dst); |
+ } |
+ } else { // 16x16 |
+ const int pred_func = CheckMode(mb_x, mb_y, block->imodes_[0]); |
+ VP8PredLuma16[pred_func](y_dst); |
+ if (bits != 0) { |
+ for (n = 0; n < 16; ++n, bits <<= 2) { |
+ DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]); |
+ } |
+ } |
+ } |
+ { |
+ // Chroma |
+ const uint32_t bits_uv = block->non_zero_uv_; |
+ const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_); |
+ VP8PredChroma8[pred_func](u_dst); |
+ VP8PredChroma8[pred_func](v_dst); |
+ DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst); |
+ DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst); |
+ } |
+ |
+ // stash away top samples for next block |
+ if (mb_y < dec->mb_h_ - 1) { |
+ memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16); |
+ memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8); |
+ memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8); |
+ } |
+ } |
+ // Transfer reconstructed samples from yuv_b_ cache to final destination. |
+ { |
+ const int y_offset = cache_id * 16 * dec->cache_y_stride_; |
+ const int uv_offset = cache_id * 8 * dec->cache_uv_stride_; |
+ uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset; |
+ uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset; |
+ uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset; |
+ for (j = 0; j < 16; ++j) { |
+ memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16); |
+ } |
+ for (j = 0; j < 8; ++j) { |
+ memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8); |
+ memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8); |
+ } |
+ } |
+ } |
+} |
//------------------------------------------------------------------------------ |
// Filtering |
@@ -112,7 +282,6 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) { |
VP8FInfo* const info = &dec->fstrengths_[s][i4x4]; |
int level = base_level; |
if (hdr->use_lf_delta_) { |
- // TODO(skal): only CURRENT is handled for now. |
level += hdr->ref_lf_delta_[0]; |
if (i4x4) { |
level += hdr->mode_lf_delta_[0]; |
@@ -177,7 +346,6 @@ void VP8InitDithering(const WebPDecoderOptions* const options, |
dec->dither_ = 1; |
} |
} |
-#if WEBP_DECODER_ABI_VERSION > 0x0204 |
// potentially allow alpha dithering |
dec->alpha_dithering_ = options->alpha_dithering_strength; |
if (dec->alpha_dithering_ > 100) { |
@@ -185,7 +353,6 @@ void VP8InitDithering(const WebPDecoderOptions* const options, |
} else if (dec->alpha_dithering_ < 0) { |
dec->alpha_dithering_ = 0; |
} |
-#endif |
} |
} |
@@ -554,7 +721,7 @@ static int AllocateMemory(VP8Decoder* const dec) { |
const uint64_t needed = (uint64_t)intra_pred_mode_size |
+ top_size + mb_info_size + f_info_size |
+ yuv_size + mb_data_size |
- + cache_size + alpha_size + ALIGN_MASK; |
+ + cache_size + alpha_size + WEBP_ALIGN_CST; |
uint8_t* mem; |
if (needed != (size_t)needed) return 0; // check for overflow |
@@ -591,8 +758,8 @@ static int AllocateMemory(VP8Decoder* const dec) { |
dec->thread_ctx_.f_info_ += mb_w; |
} |
- mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK); |
- assert((yuv_size & ALIGN_MASK) == 0); |
+ mem = (uint8_t*)WEBP_ALIGN(mem); |
+ assert((yuv_size & WEBP_ALIGN_CST) == 0); |
dec->yuv_b_ = (uint8_t*)mem; |
mem += yuv_size; |
@@ -644,7 +811,7 @@ static void InitIo(VP8Decoder* const dec, VP8Io* io) { |
io->a = NULL; |
} |
-int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { |
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) { |
if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_. |
if (!AllocateMemory(dec)) return 0; |
InitIo(dec, io); |
@@ -653,176 +820,3 @@ int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { |
} |
//------------------------------------------------------------------------------ |
-// Main reconstruction function. |
- |
-static const int kScan[16] = { |
- 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, |
- 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, |
- 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, |
- 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS |
-}; |
- |
-static int CheckMode(int mb_x, int mb_y, int mode) { |
- if (mode == B_DC_PRED) { |
- if (mb_x == 0) { |
- return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT; |
- } else { |
- return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED; |
- } |
- } |
- return mode; |
-} |
- |
-static void Copy32b(uint8_t* dst, uint8_t* src) { |
- memcpy(dst, src, 4); |
-} |
- |
-static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src, |
- uint8_t* const dst) { |
- switch (bits >> 30) { |
- case 3: |
- VP8Transform(src, dst, 0); |
- break; |
- case 2: |
- VP8TransformAC3(src, dst); |
- break; |
- case 1: |
- VP8TransformDC(src, dst); |
- break; |
- default: |
- break; |
- } |
-} |
- |
-static void DoUVTransform(uint32_t bits, const int16_t* const src, |
- uint8_t* const dst) { |
- if (bits & 0xff) { // any non-zero coeff at all? |
- if (bits & 0xaa) { // any non-zero AC coefficient? |
- VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V |
- } else { |
- VP8TransformDCUV(src, dst); |
- } |
- } |
-} |
- |
-static void ReconstructRow(const VP8Decoder* const dec, |
- const VP8ThreadContext* ctx) { |
- int j; |
- int mb_x; |
- const int mb_y = ctx->mb_y_; |
- const int cache_id = ctx->id_; |
- uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; |
- uint8_t* const u_dst = dec->yuv_b_ + U_OFF; |
- uint8_t* const v_dst = dec->yuv_b_ + V_OFF; |
- for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) { |
- const VP8MBData* const block = ctx->mb_data_ + mb_x; |
- |
- // Rotate in the left samples from previously decoded block. We move four |
- // pixels at a time for alignment reason, and because of in-loop filter. |
- if (mb_x > 0) { |
- for (j = -1; j < 16; ++j) { |
- Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); |
- } |
- for (j = -1; j < 8; ++j) { |
- Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]); |
- Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); |
- } |
- } else { |
- for (j = 0; j < 16; ++j) { |
- y_dst[j * BPS - 1] = 129; |
- } |
- for (j = 0; j < 8; ++j) { |
- u_dst[j * BPS - 1] = 129; |
- v_dst[j * BPS - 1] = 129; |
- } |
- // Init top-left sample on left column too |
- if (mb_y > 0) { |
- y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129; |
- } |
- } |
- { |
- // bring top samples into the cache |
- VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x; |
- const int16_t* const coeffs = block->coeffs_; |
- uint32_t bits = block->non_zero_y_; |
- int n; |
- |
- if (mb_y > 0) { |
- memcpy(y_dst - BPS, top_yuv[0].y, 16); |
- memcpy(u_dst - BPS, top_yuv[0].u, 8); |
- memcpy(v_dst - BPS, top_yuv[0].v, 8); |
- } else if (mb_x == 0) { |
- // we only need to do this init once at block (0,0). |
- // Afterward, it remains valid for the whole topmost row. |
- memset(y_dst - BPS - 1, 127, 16 + 4 + 1); |
- memset(u_dst - BPS - 1, 127, 8 + 1); |
- memset(v_dst - BPS - 1, 127, 8 + 1); |
- } |
- |
- // predict and add residuals |
- if (block->is_i4x4_) { // 4x4 |
- uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16); |
- |
- if (mb_y > 0) { |
- if (mb_x >= dec->mb_w_ - 1) { // on rightmost border |
- memset(top_right, top_yuv[0].y[15], sizeof(*top_right)); |
- } else { |
- memcpy(top_right, top_yuv[1].y, sizeof(*top_right)); |
- } |
- } |
- // replicate the top-right pixels below |
- top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0]; |
- |
- // predict and add residuals for all 4x4 blocks in turn. |
- for (n = 0; n < 16; ++n, bits <<= 2) { |
- uint8_t* const dst = y_dst + kScan[n]; |
- VP8PredLuma4[block->imodes_[n]](dst); |
- DoTransform(bits, coeffs + n * 16, dst); |
- } |
- } else { // 16x16 |
- const int pred_func = CheckMode(mb_x, mb_y, |
- block->imodes_[0]); |
- VP8PredLuma16[pred_func](y_dst); |
- if (bits != 0) { |
- for (n = 0; n < 16; ++n, bits <<= 2) { |
- DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]); |
- } |
- } |
- } |
- { |
- // Chroma |
- const uint32_t bits_uv = block->non_zero_uv_; |
- const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_); |
- VP8PredChroma8[pred_func](u_dst); |
- VP8PredChroma8[pred_func](v_dst); |
- DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst); |
- DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst); |
- } |
- |
- // stash away top samples for next block |
- if (mb_y < dec->mb_h_ - 1) { |
- memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16); |
- memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8); |
- memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8); |
- } |
- } |
- // Transfer reconstructed samples from yuv_b_ cache to final destination. |
- { |
- const int y_offset = cache_id * 16 * dec->cache_y_stride_; |
- const int uv_offset = cache_id * 8 * dec->cache_uv_stride_; |
- uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset; |
- uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset; |
- uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset; |
- for (j = 0; j < 16; ++j) { |
- memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16); |
- } |
- for (j = 0; j < 8; ++j) { |
- memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8); |
- memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8); |
- } |
- } |
- } |
-} |
- |
-//------------------------------------------------------------------------------ |
- |