third_party/libwebp/dec/frame.c - Issue 1178013008: Use the upstream version of libwebp, v0.4.3.

Side by Side Diff: third_party/libwebp/dec/frame.c

Issue 1178013008: Use the upstream version of libwebp, v0.4.3. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Fixes for SkWebpImageDecoder and SkWebpCodec. Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2010 Google Inc. All Rights Reserved.

	2 //

	3 // Use of this source code is governed by a BSD-style license

	4 // that can be found in the COPYING file in the root of the source

	5 // tree. An additional intellectual property rights grant can be found

	6 // in the file PATENTS. All contributing project authors may

	7 // be found in the AUTHORS file in the root of the source tree.

	8 // -----------------------------------------------------------------------------

	9 //

	10 // Frame-reconstruction function. Memory allocation.

	11 //

	12 // Author: Skal (pascal.massimino@gmail.com)

	13

	14 #include <stdlib.h>

	15 #include "./vp8i.h"

	16 #include "../utils/utils.h"

	17

	18 #define ALIGN_MASK (32 - 1)

	19

	20 static void ReconstructRow(const VP8Decoder* const dec,

	21 const VP8ThreadContext* ctx); // TODO(skal): remove

	22

	23 //------------------------------------------------------------------------------

	24 // Filtering

	25

	26 // kFilterExtraRows[] = How many extra lines are needed on the MB boundary

	27 // for caching, given a filtering level.

	28 // Simple filter: up to 2 luma samples are read and 1 is written.

	29 // Complex filter: up to 4 luma samples are read and 3 are written. Same for

	30 // U/V, so it's 8 samples total (because of the 2x upsampling).

	31 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };

	32

	33 static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {

	34 const VP8ThreadContext* const ctx = &dec->thread_ctx_;

	35 const int cache_id = ctx->id_;

	36 const int y_bps = dec->cache_y_stride_;

	37 const VP8FInfo* const f_info = ctx->f_info_ + mb_x;

	38 uint8_t* const y_dst = dec->cache_y_ + cache_id * 16 * y_bps + mb_x * 16;

	39 const int ilevel = f_info->f_ilevel_;

	40 const int limit = f_info->f_limit_;

	41 if (limit == 0) {

	42 return;

	43 }

	44 assert(limit >= 3);

	45 if (dec->filter_type_ == 1) { // simple

	46 if (mb_x > 0) {

	47 VP8SimpleHFilter16(y_dst, y_bps, limit + 4);

	48 }

	49 if (f_info->f_inner_) {

	50 VP8SimpleHFilter16i(y_dst, y_bps, limit);

	51 }

	52 if (mb_y > 0) {

	53 VP8SimpleVFilter16(y_dst, y_bps, limit + 4);

	54 }

	55 if (f_info->f_inner_) {

	56 VP8SimpleVFilter16i(y_dst, y_bps, limit);

	57 }

	58 } else { // complex

	59 const int uv_bps = dec->cache_uv_stride_;

	60 uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;

	61 uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;

	62 const int hev_thresh = f_info->hev_thresh_;

	63 if (mb_x > 0) {

	64 VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);

	65 VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);

	66 }

	67 if (f_info->f_inner_) {

	68 VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);

	69 VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);

	70 }

	71 if (mb_y > 0) {

	72 VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);

	73 VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);

	74 }

	75 if (f_info->f_inner_) {

	76 VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);

	77 VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);

	78 }

	79 }

	80 }

	81

	82 // Filter the decoded macroblock row (if needed)

	83 static void FilterRow(const VP8Decoder* const dec) {

	84 int mb_x;

	85 const int mb_y = dec->thread_ctx_.mb_y_;

	86 assert(dec->thread_ctx_.filter_row_);

	87 for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {

	88 DoFilter(dec, mb_x, mb_y);

	89 }

	90 }

	91

	92 //------------------------------------------------------------------------------

	93 // Precompute the filtering strength for each segment and each i4x4/i16x16 mode.

	94

	95 static void PrecomputeFilterStrengths(VP8Decoder* const dec) {

	96 if (dec->filter_type_ > 0) {

	97 int s;

	98 const VP8FilterHeader* const hdr = &dec->filter_hdr_;

	99 for (s = 0; s < NUM_MB_SEGMENTS; ++s) {

	100 int i4x4;

	101 // First, compute the initial level

	102 int base_level;

	103 if (dec->segment_hdr_.use_segment_) {

	104 base_level = dec->segment_hdr_.filter_strength_[s];

	105 if (!dec->segment_hdr_.absolute_delta_) {

	106 base_level += hdr->level_;

	107 }

	108 } else {

	109 base_level = hdr->level_;

	110 }

	111 for (i4x4 = 0; i4x4 <= 1; ++i4x4) {

	112 VP8FInfo* const info = &dec->fstrengths_[s][i4x4];

	113 int level = base_level;

	114 if (hdr->use_lf_delta_) {

	115 // TODO(skal): only CURRENT is handled for now.

	116 level += hdr->ref_lf_delta_[0];

	117 if (i4x4) {

	118 level += hdr->mode_lf_delta_[0];

	119 }

	120 }

	121 level = (level < 0) ? 0 : (level > 63) ? 63 : level;

	122 if (level > 0) {

	123 int ilevel = level;

	124 if (hdr->sharpness_ > 0) {

	125 if (hdr->sharpness_ > 4) {

	126 ilevel >>= 2;

	127 } else {

	128 ilevel >>= 1;

	129 }

	130 if (ilevel > 9 - hdr->sharpness_) {

	131 ilevel = 9 - hdr->sharpness_;

	132 }

	133 }

	134 if (ilevel < 1) ilevel = 1;

	135 info->f_ilevel_ = ilevel;

	136 info->f_limit_ = 2 * level + ilevel;

	137 info->hev_thresh_ = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;

	138 } else {

	139 info->f_limit_ = 0; // no filtering

	140 }

	141 info->f_inner_ = i4x4;

	142 }

	143 }

	144 }

	145 }

	146

	147 //------------------------------------------------------------------------------

	148 // Dithering

	149

	150 #define DITHER_AMP_TAB_SIZE 12

	151 static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {

	152 // roughly, it's dqm->uv_mat_[1]

	153 8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1

	154 };

	155

	156 void VP8InitDithering(const WebPDecoderOptions* const options,

	157 VP8Decoder* const dec) {

	158 assert(dec != NULL);

	159 if (options != NULL) {

	160 const int d = options->dithering_strength;

	161 const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1;

	162 const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100);

	163 if (f > 0) {

	164 int s;

	165 int all_amp = 0;

	166 for (s = 0; s < NUM_MB_SEGMENTS; ++s) {

	167 VP8QuantMatrix* const dqm = &dec->dqm_[s];

	168 if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) {

	169 // TODO(skal): should we specially dither more for uv_quant_ < 0?

	170 const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;

	171 dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;

	172 }

	173 all_amp \|= dqm->dither_;

	174 }

	175 if (all_amp != 0) {

	176 VP8InitRandom(&dec->dithering_rg_, 1.0f);

	177 dec->dither_ = 1;

	178 }

	179 }

	180 #if WEBP_DECODER_ABI_VERSION > 0x0204

	181 // potentially allow alpha dithering

	182 dec->alpha_dithering_ = options->alpha_dithering_strength;

	183 if (dec->alpha_dithering_ > 100) {

	184 dec->alpha_dithering_ = 100;

	185 } else if (dec->alpha_dithering_ < 0) {

	186 dec->alpha_dithering_ = 0;

	187 }

	188 #endif

	189 }

	190 }

	191

	192 // minimal amp that will provide a non-zero dithering effect

	193 #define MIN_DITHER_AMP 4

	194 #define DITHER_DESCALE 4

	195 #define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1))

	196 #define DITHER_AMP_BITS 8

	197 #define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS)

	198

	199 static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {

	200 int i, j;

	201 for (j = 0; j < 8; ++j) {

	202 for (i = 0; i < 8; ++i) {

	203 // TODO: could be made faster with SSE2

	204 const int bits =

	205 VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER;

	206 // Convert to range: [-2,2] for dither=50, [-4,4] for dither=100

	207 const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE;

	208 const int v = (int)dst[i] + delta;

	209 dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v;

	210 }

	211 dst += bps;

	212 }

	213 }

	214

	215 static void DitherRow(VP8Decoder* const dec) {

	216 int mb_x;

	217 assert(dec->dither_);

	218 for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {

	219 const VP8ThreadContext* const ctx = &dec->thread_ctx_;

	220 const VP8MBData* const data = ctx->mb_data_ + mb_x;

	221 const int cache_id = ctx->id_;

	222 const int uv_bps = dec->cache_uv_stride_;

	223 if (data->dither_ >= MIN_DITHER_AMP) {

	224 uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;

	225 uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;

	226 Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_);

	227 Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_);

	228 }

	229 }

	230 }

	231

	232 //------------------------------------------------------------------------------

	233 // This function is called after a row of macroblocks is finished decoding.

	234 // It also takes into account the following restrictions:

	235 // * In case of in-loop filtering, we must hold off sending some of the bottom

	236 // pixels as they are yet unfiltered. They will be when the next macroblock

	237 // row is decoded. Meanwhile, we must preserve them by rotating them in the

	238 // cache area. This doesn't hold for the very bottom row of the uncropped

	239 // picture of course.

	240 // * we must clip the remaining pixels against the cropping area. The VP8Io

	241 // struct must have the following fields set correctly before calling put():

	242

	243 #define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB

	244

	245 // Finalize and transmit a complete row. Return false in case of user-abort.

	246 static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {

	247 int ok = 1;

	248 const VP8ThreadContext* const ctx = &dec->thread_ctx_;

	249 const int cache_id = ctx->id_;

	250 const int extra_y_rows = kFilterExtraRows[dec->filter_type_];

	251 const int ysize = extra_y_rows * dec->cache_y_stride_;

	252 const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;

	253 const int y_offset = cache_id * 16 * dec->cache_y_stride_;

	254 const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;

	255 uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;

	256 uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;

	257 uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;

	258 const int mb_y = ctx->mb_y_;

	259 const int is_first_row = (mb_y == 0);

	260 const int is_last_row = (mb_y >= dec->br_mb_y_ - 1);

	261

	262 if (dec->mt_method_ == 2) {

	263 ReconstructRow(dec, ctx);

	264 }

	265

	266 if (ctx->filter_row_) {

	267 FilterRow(dec);

	268 }

	269

	270 if (dec->dither_) {

	271 DitherRow(dec);

	272 }

	273

	274 if (io->put != NULL) {

	275 int y_start = MACROBLOCK_VPOS(mb_y);

	276 int y_end = MACROBLOCK_VPOS(mb_y + 1);

	277 if (!is_first_row) {

	278 y_start -= extra_y_rows;

	279 io->y = ydst;

	280 io->u = udst;

	281 io->v = vdst;

	282 } else {

	283 io->y = dec->cache_y_ + y_offset;

	284 io->u = dec->cache_u_ + uv_offset;

	285 io->v = dec->cache_v_ + uv_offset;

	286 }

	287

	288 if (!is_last_row) {

	289 y_end -= extra_y_rows;

	290 }

	291 if (y_end > io->crop_bottom) {

	292 y_end = io->crop_bottom; // make sure we don't overflow on last row.

	293 }

	294 io->a = NULL;

	295 if (dec->alpha_data_ != NULL && y_start < y_end) {

	296 // TODO(skal): testing presence of alpha with dec->alpha_data_ is not a

	297 // good idea.

	298 io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);

	299 if (io->a == NULL) {

	300 return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,

	301 "Could not decode alpha data.");

	302 }

	303 }

	304 if (y_start < io->crop_top) {

	305 const int delta_y = io->crop_top - y_start;

	306 y_start = io->crop_top;

	307 assert(!(delta_y & 1));

	308 io->y += dec->cache_y_stride_ * delta_y;

	309 io->u += dec->cache_uv_stride_ * (delta_y >> 1);

	310 io->v += dec->cache_uv_stride_ * (delta_y >> 1);

	311 if (io->a != NULL) {

	312 io->a += io->width * delta_y;

	313 }

	314 }

	315 if (y_start < y_end) {

	316 io->y += io->crop_left;

	317 io->u += io->crop_left >> 1;

	318 io->v += io->crop_left >> 1;

	319 if (io->a != NULL) {

	320 io->a += io->crop_left;

	321 }

	322 io->mb_y = y_start - io->crop_top;

	323 io->mb_w = io->crop_right - io->crop_left;

	324 io->mb_h = y_end - y_start;

	325 ok = io->put(io);

	326 }

	327 }

	328 // rotate top samples if needed

	329 if (cache_id + 1 == dec->num_caches_) {

	330 if (!is_last_row) {

	331 memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize);

	332 memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize);

	333 memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize);

	334 }

	335 }

	336

	337 return ok;

	338 }

	339

	340 #undef MACROBLOCK_VPOS

	341

	342 //------------------------------------------------------------------------------

	343

	344 int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {

	345 int ok = 1;

	346 VP8ThreadContext* const ctx = &dec->thread_ctx_;

	347 const int filter_row =

	348 (dec->filter_type_ > 0) &&

	349 (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);

	350 if (dec->mt_method_ == 0) {

	351 // ctx->id_ and ctx->f_info_ are already set

	352 ctx->mb_y_ = dec->mb_y_;

	353 ctx->filter_row_ = filter_row;

	354 ReconstructRow(dec, ctx);

	355 ok = FinishRow(dec, io);

	356 } else {

	357 WebPWorker* const worker = &dec->worker_;

	358 // Finish previous job before updating context

	359 ok &= WebPGetWorkerInterface()->Sync(worker);

	360 assert(worker->status_ == OK);

	361 if (ok) { // spawn a new deblocking/output job

	362 ctx->io_ = *io;

	363 ctx->id_ = dec->cache_id_;

	364 ctx->mb_y_ = dec->mb_y_;

	365 ctx->filter_row_ = filter_row;

	366 if (dec->mt_method_ == 2) { // swap macroblock data

	367 VP8MBData* const tmp = ctx->mb_data_;

	368 ctx->mb_data_ = dec->mb_data_;

	369 dec->mb_data_ = tmp;

	370 } else {

	371 // perform reconstruction directly in main thread

	372 ReconstructRow(dec, ctx);

	373 }

	374 if (filter_row) { // swap filter info

	375 VP8FInfo* const tmp = ctx->f_info_;

	376 ctx->f_info_ = dec->f_info_;

	377 dec->f_info_ = tmp;

	378 }

	379 // (reconstruct)+filter in parallel

	380 WebPGetWorkerInterface()->Launch(worker);

	381 if (++dec->cache_id_ == dec->num_caches_) {

	382 dec->cache_id_ = 0;

	383 }

	384 }

	385 }

	386 return ok;

	387 }

	388

	389 //------------------------------------------------------------------------------

	390 // Finish setting up the decoding parameter once user's setup() is called.

	391

	392 VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {

	393 // Call setup() first. This may trigger additional decoding features on 'io'.

	394 // Note: Afterward, we must call teardown() no matter what.

	395 if (io->setup != NULL && !io->setup(io)) {

	396 VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");

	397 return dec->status_;

	398 }

	399

	400 // Disable filtering per user request

	401 if (io->bypass_filtering) {

	402 dec->filter_type_ = 0;

	403 }

	404 // TODO(skal): filter type / strength / sharpness forcing

	405

	406 // Define the area where we can skip in-loop filtering, in case of cropping.

	407 //

	408 // 'Simple' filter reads two luma samples outside of the macroblock

	409 // and filters one. It doesn't filter the chroma samples. Hence, we can

	410 // avoid doing the in-loop filtering before crop_top/crop_left position.

	411 // For the 'Complex' filter, 3 samples are read and up to 3 are filtered.

	412 // Means: there's a dependency chain that goes all the way up to the

	413 // top-left corner of the picture (MB #0). We must filter all the previous

	414 // macroblocks.

	415 // TODO(skal): add an 'approximate_decoding' option, that won't produce

	416 // a 1:1 bit-exactness for complex filtering?

	417 {

	418 const int extra_pixels = kFilterExtraRows[dec->filter_type_];

	419 if (dec->filter_type_ == 2) {

	420 // For complex filter, we need to preserve the dependency chain.

	421 dec->tl_mb_x_ = 0;

	422 dec->tl_mb_y_ = 0;

	423 } else {

	424 // For simple filter, we can filter only the cropped region.

	425 // We include 'extra_pixels' on the other side of the boundary, since

	426 // vertical or horizontal filtering of the previous macroblock can

	427 // modify some abutting pixels.

	428 dec->tl_mb_x_ = (io->crop_left - extra_pixels) >> 4;

	429 dec->tl_mb_y_ = (io->crop_top - extra_pixels) >> 4;

	430 if (dec->tl_mb_x_ < 0) dec->tl_mb_x_ = 0;

	431 if (dec->tl_mb_y_ < 0) dec->tl_mb_y_ = 0;

	432 }

	433 // We need some 'extra' pixels on the right/bottom.

	434 dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4;

	435 dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4;

	436 if (dec->br_mb_x_ > dec->mb_w_) {

	437 dec->br_mb_x_ = dec->mb_w_;

	438 }

	439 if (dec->br_mb_y_ > dec->mb_h_) {

	440 dec->br_mb_y_ = dec->mb_h_;

	441 }

	442 }

	443 PrecomputeFilterStrengths(dec);

	444 return VP8_STATUS_OK;

	445 }

	446

	447 int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {

	448 int ok = 1;

	449 if (dec->mt_method_ > 0) {

	450 ok = WebPGetWorkerInterface()->Sync(&dec->worker_);

	451 }

	452

	453 if (io->teardown != NULL) {

	454 io->teardown(io);

	455 }

	456 return ok;

	457 }

	458

	459 //------------------------------------------------------------------------------

	460 // For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line.

	461 //

	462 // Reason is: the deblocking filter cannot deblock the bottom horizontal edges

	463 // immediately, and needs to wait for first few rows of the next macroblock to

	464 // be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending

	465 // on strength).

	466 // With two threads, the vertical positions of the rows being decoded are:

	467 // Decode: [ 0..15][16..31][32..47][48..63][64..79][...

	468 // Deblock: [ 0..11][12..27][28..43][44..59][...

	469 // If we use two threads and two caches of 16 pixels, the sequence would be:

	470 // Decode: [ 0..15][16..31][ 0..15!!][16..31][ 0..15][...

	471 // Deblock: [ 0..11][12..27!!][-4..11][12..27][...

	472 // The problem occurs during row [12..15!!] that both the decoding and

	473 // deblocking threads are writing simultaneously.

	474 // With 3 cache lines, one get a safe write pattern:

	475 // Decode: [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0..

	476 // Deblock: [ 0..11][12..27][28..43][-4..11][12..27][28...

	477 // Note that multi-threaded output _without_ deblocking can make use of two

	478 // cache lines of 16 pixels only, since there's no lagging behind. The decoding

	479 // and output process have non-concurrent writing:

	480 // Decode: [ 0..15][16..31][ 0..15][16..31][...

	481 // io->put: [ 0..15][16..31][ 0..15][...

	482

	483 #define MT_CACHE_LINES 3

	484 #define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case

	485

	486 // Initialize multi/single-thread worker

	487 static int InitThreadContext(VP8Decoder* const dec) {

	488 dec->cache_id_ = 0;

	489 if (dec->mt_method_ > 0) {

	490 WebPWorker* const worker = &dec->worker_;

	491 if (!WebPGetWorkerInterface()->Reset(worker)) {

	492 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,

	493 "thread initialization failed.");

	494 }

	495 worker->data1 = dec;

	496 worker->data2 = (void*)&dec->thread_ctx_.io_;

	497 worker->hook = (WebPWorkerHook)FinishRow;

	498 dec->num_caches_ =

	499 (dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;

	500 } else {

	501 dec->num_caches_ = ST_CACHE_LINES;

	502 }

	503 return 1;

	504 }

	505

	506 int VP8GetThreadMethod(const WebPDecoderOptions* const options,

	507 const WebPHeaderStructure* const headers,

	508 int width, int height) {

	509 if (options == NULL \|\| options->use_threads == 0) {

	510 return 0;

	511 }

	512 (void)headers;

	513 (void)width;

	514 (void)height;

	515 assert(headers == NULL \|\| !headers->is_lossless);

	516 #if defined(WEBP_USE_THREAD)

	517 if (width < MIN_WIDTH_FOR_THREADS) return 0;

	518 // TODO(skal): tune the heuristic further

	519 #if 0

	520 if (height < 2 * width) return 2;

	521 #endif

	522 return 2;

	523 #else // !WEBP_USE_THREAD

	524 return 0;

	525 #endif

	526 }

	527

	528 #undef MT_CACHE_LINES

	529 #undef ST_CACHE_LINES

	530

	531 //------------------------------------------------------------------------------

	532 // Memory setup

	533

	534 static int AllocateMemory(VP8Decoder* const dec) {

	535 const int num_caches = dec->num_caches_;

	536 const int mb_w = dec->mb_w_;

	537 // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.

	538 const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);

	539 const size_t top_size = sizeof(VP8TopSamples) * mb_w;

	540 const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);

	541 const size_t f_info_size =

	542 (dec->filter_type_ > 0) ?

	543 mb_w * (dec->mt_method_ > 0 ? 2 : 1) * sizeof(VP8FInfo)

	544 : 0;

	545 const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);

	546 const size_t mb_data_size =

	547 (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_);

	548 const size_t cache_height = (16 * num_caches

	549 + kFilterExtraRows[dec->filter_type_]) * 3 / 2;

	550 const size_t cache_size = top_size * cache_height;

	551 // alpha_size is the only one that scales as width x height.

	552 const uint64_t alpha_size = (dec->alpha_data_ != NULL) ?

	553 (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;

	554 const uint64_t needed = (uint64_t)intra_pred_mode_size

	555 + top_size + mb_info_size + f_info_size

	556 + yuv_size + mb_data_size

	557 + cache_size + alpha_size + ALIGN_MASK;

	558 uint8_t* mem;

	559

	560 if (needed != (size_t)needed) return 0; // check for overflow

	561 if (needed > dec->mem_size_) {

	562 WebPSafeFree(dec->mem_);

	563 dec->mem_size_ = 0;

	564 dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));

	565 if (dec->mem_ == NULL) {

	566 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,

	567 "no memory during frame initialization.");

	568 }

	569 // down-cast is ok, thanks to WebPSafeAlloc() above.

	570 dec->mem_size_ = (size_t)needed;

	571 }

	572

	573 mem = (uint8_t*)dec->mem_;

	574 dec->intra_t_ = (uint8_t*)mem;

	575 mem += intra_pred_mode_size;

	576

	577 dec->yuv_t_ = (VP8TopSamples*)mem;

	578 mem += top_size;

	579

	580 dec->mb_info_ = ((VP8MB*)mem) + 1;

	581 mem += mb_info_size;

	582

	583 dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;

	584 mem += f_info_size;

	585 dec->thread_ctx_.id_ = 0;

	586 dec->thread_ctx_.f_info_ = dec->f_info_;

	587 if (dec->mt_method_ > 0) {

	588 // secondary cache line. The deblocking process need to make use of the

	589 // filtering strength from previous macroblock row, while the new ones

	590 // are being decoded in parallel. We'll just swap the pointers.

	591 dec->thread_ctx_.f_info_ += mb_w;

	592 }

	593

	594 mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);

	595 assert((yuv_size & ALIGN_MASK) == 0);

	596 dec->yuv_b_ = (uint8_t*)mem;

	597 mem += yuv_size;

	598

	599 dec->mb_data_ = (VP8MBData*)mem;

	600 dec->thread_ctx_.mb_data_ = (VP8MBData*)mem;

	601 if (dec->mt_method_ == 2) {

	602 dec->thread_ctx_.mb_data_ += mb_w;

	603 }

	604 mem += mb_data_size;

	605

	606 dec->cache_y_stride_ = 16 * mb_w;

	607 dec->cache_uv_stride_ = 8 * mb_w;

	608 {

	609 const int extra_rows = kFilterExtraRows[dec->filter_type_];

	610 const int extra_y = extra_rows * dec->cache_y_stride_;

	611 const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;

	612 dec->cache_y_ = ((uint8_t*)mem) + extra_y;

	613 dec->cache_u_ = dec->cache_y_

	614 + 16 * num_caches * dec->cache_y_stride_ + extra_uv;

	615 dec->cache_v_ = dec->cache_u_

	616 + 8 * num_caches * dec->cache_uv_stride_ + extra_uv;

	617 dec->cache_id_ = 0;

	618 }

	619 mem += cache_size;

	620

	621 // alpha plane

	622 dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;

	623 mem += alpha_size;

	624 assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);

	625

	626 // note: left/top-info is initialized once for all.

	627 memset(dec->mb_info_ - 1, 0, mb_info_size);

	628 VP8InitScanline(dec); // initialize left too.

	629

	630 // initialize top

	631 memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);

	632

	633 return 1;

	634 }

	635

	636 static void InitIo(VP8Decoder* const dec, VP8Io* io) {

	637 // prepare 'io'

	638 io->mb_y = 0;

	639 io->y = dec->cache_y_;

	640 io->u = dec->cache_u_;

	641 io->v = dec->cache_v_;

	642 io->y_stride = dec->cache_y_stride_;

	643 io->uv_stride = dec->cache_uv_stride_;

	644 io->a = NULL;

	645 }

	646

	647 int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {

	648 if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_.

	649 if (!AllocateMemory(dec)) return 0;

	650 InitIo(dec, io);

	651 VP8DspInit(); // Init critical function pointers and look-up tables.

	652 return 1;

	653 }

	654

	655 //------------------------------------------------------------------------------

	656 // Main reconstruction function.

	657

	658 static const int kScan[16] = {

	659 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,

	660 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,

	661 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,

	662 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS

	663 };

	664

	665 static int CheckMode(int mb_x, int mb_y, int mode) {

	666 if (mode == B_DC_PRED) {

	667 if (mb_x == 0) {

	668 return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;

	669 } else {

	670 return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;

	671 }

	672 }

	673 return mode;

	674 }

	675

	676 static void Copy32b(uint8_t* dst, uint8_t* src) {

	677 memcpy(dst, src, 4);

	678 }

	679

	680 static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src,

	681 uint8_t* const dst) {

	682 switch (bits >> 30) {

	683 case 3:

	684 VP8Transform(src, dst, 0);

	685 break;

	686 case 2:

	687 VP8TransformAC3(src, dst);

	688 break;

	689 case 1:

	690 VP8TransformDC(src, dst);

	691 break;

	692 default:

	693 break;

	694 }

	695 }

	696

	697 static void DoUVTransform(uint32_t bits, const int16_t* const src,

	698 uint8_t* const dst) {

	699 if (bits & 0xff) { // any non-zero coeff at all?

	700 if (bits & 0xaa) { // any non-zero AC coefficient?

	701 VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V

	702 } else {

	703 VP8TransformDCUV(src, dst);

	704 }

	705 }

	706 }

	707

	708 static void ReconstructRow(const VP8Decoder* const dec,

	709 const VP8ThreadContext* ctx) {

	710 int j;

	711 int mb_x;

	712 const int mb_y = ctx->mb_y_;

	713 const int cache_id = ctx->id_;

	714 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;

	715 uint8_t* const u_dst = dec->yuv_b_ + U_OFF;

	716 uint8_t* const v_dst = dec->yuv_b_ + V_OFF;

	717 for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {

	718 const VP8MBData* const block = ctx->mb_data_ + mb_x;

	719

	720 // Rotate in the left samples from previously decoded block. We move four

	721 // pixels at a time for alignment reason, and because of in-loop filter.

	722 if (mb_x > 0) {

	723 for (j = -1; j < 16; ++j) {

	724 Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);

	725 }

	726 for (j = -1; j < 8; ++j) {

	727 Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);

	728 Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);

	729 }

	730 } else {

	731 for (j = 0; j < 16; ++j) {

	732 y_dst[j * BPS - 1] = 129;

	733 }

	734 for (j = 0; j < 8; ++j) {

	735 u_dst[j * BPS - 1] = 129;

	736 v_dst[j * BPS - 1] = 129;

	737 }

	738 // Init top-left sample on left column too

	739 if (mb_y > 0) {

	740 y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;

	741 }

	742 }

	743 {

	744 // bring top samples into the cache

	745 VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x;

	746 const int16_t* const coeffs = block->coeffs_;

	747 uint32_t bits = block->non_zero_y_;

	748 int n;

	749

	750 if (mb_y > 0) {

	751 memcpy(y_dst - BPS, top_yuv[0].y, 16);

	752 memcpy(u_dst - BPS, top_yuv[0].u, 8);

	753 memcpy(v_dst - BPS, top_yuv[0].v, 8);

	754 } else if (mb_x == 0) {

	755 // we only need to do this init once at block (0,0).

	756 // Afterward, it remains valid for the whole topmost row.

	757 memset(y_dst - BPS - 1, 127, 16 + 4 + 1);

	758 memset(u_dst - BPS - 1, 127, 8 + 1);

	759 memset(v_dst - BPS - 1, 127, 8 + 1);

	760 }

	761

	762 // predict and add residuals

	763 if (block->is_i4x4_) { // 4x4

	764 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);

	765

	766 if (mb_y > 0) {

	767 if (mb_x >= dec->mb_w_ - 1) { // on rightmost border

	768 memset(top_right, top_yuv[0].y[15], sizeof(*top_right));

	769 } else {

	770 memcpy(top_right, top_yuv[1].y, sizeof(*top_right));

	771 }

	772 }

	773 // replicate the top-right pixels below

	774 top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];

	775

	776 // predict and add residuals for all 4x4 blocks in turn.

	777 for (n = 0; n < 16; ++n, bits <<= 2) {

	778 uint8_t* const dst = y_dst + kScan[n];

	779 VP8PredLuma4[block->imodes_[n]](dst);

	780 DoTransform(bits, coeffs + n * 16, dst);

	781 }

	782 } else { // 16x16

	783 const int pred_func = CheckMode(mb_x, mb_y,

	784 block->imodes_[0]);

	785 VP8PredLuma16[pred_func](y_dst);

	786 if (bits != 0) {

	787 for (n = 0; n < 16; ++n, bits <<= 2) {

	788 DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]);

	789 }

	790 }

	791 }

	792 {

	793 // Chroma

	794 const uint32_t bits_uv = block->non_zero_uv_;

	795 const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_);

	796 VP8PredChroma8[pred_func](u_dst);

	797 VP8PredChroma8[pred_func](v_dst);

	798 DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);

	799 DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst);

	800 }

	801

	802 // stash away top samples for next block

	803 if (mb_y < dec->mb_h_ - 1) {

	804 memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);

	805 memcpy(top_yuv[0].u, u_dst + 7 * BPS, 8);

	806 memcpy(top_yuv[0].v, v_dst + 7 * BPS, 8);

	807 }

	808 }

	809 // Transfer reconstructed samples from yuv_b_ cache to final destination.

	810 {

	811 const int y_offset = cache_id * 16 * dec->cache_y_stride_;

	812 const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;

	813 uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset;

	814 uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset;

	815 uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset;

	816 for (j = 0; j < 16; ++j) {

	817 memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);

	818 }

	819 for (j = 0; j < 8; ++j) {

	820 memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);

	821 memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);

	822 }

	823 }

	824 }

	825 }

	826

	827 //------------------------------------------------------------------------------

	828

OLD	NEW

« src/codec/SkWebpCodec.cpp ('K') | « third_party/libwebp/dec/decode_vp8.h ('k') | third_party/libwebp/dec/idec.c » ('j') | no next file with comments »