Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(596)

Side by Side Diff: third_party/libwebp/dec/frame.c

Issue 10832153: libwebp: update snapshot to v0.2.0-rc1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2010 Google Inc. 1 // Copyright 2010 Google Inc. All Rights Reserved.
2 // 2 //
3 // This code is licensed under the same terms as WebM: 3 // This code is licensed under the same terms as WebM:
4 // Software License Agreement: http://www.webmproject.org/license/software/ 4 // Software License Agreement: http://www.webmproject.org/license/software/
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/
6 // ----------------------------------------------------------------------------- 6 // -----------------------------------------------------------------------------
7 // 7 //
8 // Frame-reconstruction function. Memory allocation. 8 // Frame-reconstruction function. Memory allocation.
9 // 9 //
10 // Author: Skal (pascal.massimino@gmail.com) 10 // Author: Skal (pascal.massimino@gmail.com)
11 11
12 #include <stdlib.h> 12 #include <stdlib.h>
13 #include "./vp8i.h" 13 #include "./vp8i.h"
14 #include "../utils/utils.h"
14 15
15 #if defined(__cplusplus) || defined(c_plusplus) 16 #if defined(__cplusplus) || defined(c_plusplus)
16 extern "C" { 17 extern "C" {
17 #endif 18 #endif
18 19
19 #define ALIGN_MASK (32 - 1) 20 #define ALIGN_MASK (32 - 1)
20 21
21 //------------------------------------------------------------------------------ 22 //------------------------------------------------------------------------------
22 // For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line. 23 // Filtering
23 //
24 // Reason is: the deblocking filter cannot deblock the bottom horizontal edges
25 // immediately, and needs to wait for first few rows of the next macroblock to
26 // be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending
27 // on strength).
28 // With two threads, the vertical positions of the rows being decoded are:
29 // Decode: [ 0..15][16..31][32..47][48..63][64..79][...
30 // Deblock: [ 0..11][12..27][28..43][44..59][...
31 // If we use two threads and two caches of 16 pixels, the sequence would be:
32 // Decode: [ 0..15][16..31][ 0..15!!][16..31][ 0..15][...
33 // Deblock: [ 0..11][12..27!!][-4..11][12..27][...
34 // The problem occurs during row [12..15!!] that both the decoding and
35 // deblocking threads are writing simultaneously.
36 // With 3 cache lines, one get a safe write pattern:
37 // Decode: [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0..
38 // Deblock: [ 0..11][12..27][28..43][-4..11][12..27][28...
39 // Note that multi-threaded output _without_ deblocking can make use of two
40 // cache lines of 16 pixels only, since there's no lagging behind. The decoding
41 // and output process have non-concurrent writing:
42 // Decode: [ 0..15][16..31][ 0..15][16..31][...
43 // io->put: [ 0..15][16..31][ 0..15][...
44
45 #define MT_CACHE_LINES 3
46 #define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case
47
48 // Initialize multi/single-thread worker
49 static int InitThreadContext(VP8Decoder* const dec) {
50 dec->cache_id_ = 0;
51 if (dec->use_threads_) {
52 WebPWorker* const worker = &dec->worker_;
53 if (!WebPWorkerReset(worker)) {
54 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
55 "thread initialization failed.");
56 }
57 worker->data1 = dec;
58 worker->data2 = (void*)&dec->thread_ctx_.io_;
59 worker->hook = (WebPWorkerHook)VP8FinishRow;
60 dec->num_caches_ =
61 (dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
62 } else {
63 dec->num_caches_ = ST_CACHE_LINES;
64 }
65 return 1;
66 }
67
68 //------------------------------------------------------------------------------
69 // Memory setup
70 24
71 // kFilterExtraRows[] = How many extra lines are needed on the MB boundary 25 // kFilterExtraRows[] = How many extra lines are needed on the MB boundary
72 // for caching, given a filtering level. 26 // for caching, given a filtering level.
73 // Simple filter: up to 2 luma samples are read and 1 is written. 27 // Simple filter: up to 2 luma samples are read and 1 is written.
74 // Complex filter: up to 4 luma samples are read and 3 are written. Same for 28 // Complex filter: up to 4 luma samples are read and 3 are written. Same for
75 // U/V, so it's 8 samples total (because of the 2x upsampling). 29 // U/V, so it's 8 samples total (because of the 2x upsampling).
76 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 }; 30 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
77 31
78 static int AllocateMemory(VP8Decoder* const dec) { 32 static WEBP_INLINE int hev_thresh_from_level(int level, int keyframe) {
79 const int num_caches = dec->num_caches_;
80 const int mb_w = dec->mb_w_;
81 const int intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
82 const int top_size = (16 + 8 + 8) * mb_w;
83 const int mb_info_size = (mb_w + 1) * sizeof(VP8MB);
84 const int f_info_size =
85 (dec->filter_type_ > 0) ?
86 mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo)
87 : 0;
88 const int yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
89 const int coeffs_size = 384 * sizeof(*dec->coeffs_);
90 const int cache_height = (16 * num_caches
91 + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
92 const int cache_size = top_size * cache_height;
93 const int alpha_size =
94 dec->alpha_data_ ? (dec->pic_hdr_.width_ * dec->pic_hdr_.height_) : 0;
95 const int needed = intra_pred_mode_size
96 + top_size + mb_info_size + f_info_size
97 + yuv_size + coeffs_size
98 + cache_size + alpha_size + ALIGN_MASK;
99 uint8_t* mem;
100
101 if (needed > dec->mem_size_) {
102 free(dec->mem_);
103 dec->mem_size_ = 0;
104 dec->mem_ = (uint8_t*)malloc(needed);
105 if (dec->mem_ == NULL) {
106 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
107 "no memory during frame initialization.");
108 }
109 dec->mem_size_ = needed;
110 }
111
112 mem = (uint8_t*)dec->mem_;
113 dec->intra_t_ = (uint8_t*)mem;
114 mem += intra_pred_mode_size;
115
116 dec->y_t_ = (uint8_t*)mem;
117 mem += 16 * mb_w;
118 dec->u_t_ = (uint8_t*)mem;
119 mem += 8 * mb_w;
120 dec->v_t_ = (uint8_t*)mem;
121 mem += 8 * mb_w;
122
123 dec->mb_info_ = ((VP8MB*)mem) + 1;
124 mem += mb_info_size;
125
126 dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;
127 mem += f_info_size;
128 dec->thread_ctx_.id_ = 0;
129 dec->thread_ctx_.f_info_ = dec->f_info_;
130 if (dec->use_threads_) {
131 // secondary cache line. The deblocking process need to make use of the
132 // filtering strength from previous macroblock row, while the new ones
133 // are being decoded in parallel. We'll just swap the pointers.
134 dec->thread_ctx_.f_info_ += mb_w;
135 }
136
137 mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);
138 assert((yuv_size & ALIGN_MASK) == 0);
139 dec->yuv_b_ = (uint8_t*)mem;
140 mem += yuv_size;
141
142 dec->coeffs_ = (int16_t*)mem;
143 mem += coeffs_size;
144
145 dec->cache_y_stride_ = 16 * mb_w;
146 dec->cache_uv_stride_ = 8 * mb_w;
147 {
148 const int extra_rows = kFilterExtraRows[dec->filter_type_];
149 const int extra_y = extra_rows * dec->cache_y_stride_;
150 const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
151 dec->cache_y_ = ((uint8_t*)mem) + extra_y;
152 dec->cache_u_ = dec->cache_y_
153 + 16 * num_caches * dec->cache_y_stride_ + extra_uv;
154 dec->cache_v_ = dec->cache_u_
155 + 8 * num_caches * dec->cache_uv_stride_ + extra_uv;
156 dec->cache_id_ = 0;
157 }
158 mem += cache_size;
159
160 // alpha plane
161 dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
162 mem += alpha_size;
163
164 // note: left-info is initialized once for all.
165 memset(dec->mb_info_ - 1, 0, mb_info_size);
166
167 // initialize top
168 memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
169
170 return 1;
171 }
172
173 static void InitIo(VP8Decoder* const dec, VP8Io* io) {
174 // prepare 'io'
175 io->mb_y = 0;
176 io->y = dec->cache_y_;
177 io->u = dec->cache_u_;
178 io->v = dec->cache_v_;
179 io->y_stride = dec->cache_y_stride_;
180 io->uv_stride = dec->cache_uv_stride_;
181 io->fancy_upsampling = 0; // default
182 io->a = NULL;
183 }
184
185 int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
186 if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_.
187 if (!AllocateMemory(dec)) return 0;
188 InitIo(dec, io);
189 VP8DspInit(); // Init critical function pointers and look-up tables.
190 return 1;
191 }
192
193 //------------------------------------------------------------------------------
194 // Filtering
195
196 static inline int hev_thresh_from_level(int level, int keyframe) {
197 if (keyframe) { 33 if (keyframe) {
198 return (level >= 40) ? 2 : (level >= 15) ? 1 : 0; 34 return (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
199 } else { 35 } else {
200 return (level >= 40) ? 3 : (level >= 20) ? 2 : (level >= 15) ? 1 : 0; 36 return (level >= 40) ? 3 : (level >= 20) ? 2 : (level >= 15) ? 1 : 0;
201 } 37 }
202 } 38 }
203 39
204 static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) { 40 static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
205 const VP8ThreadContext* const ctx = &dec->thread_ctx_; 41 const VP8ThreadContext* const ctx = &dec->thread_ctx_;
206 const int y_bps = dec->cache_y_stride_; 42 const int y_bps = dec->cache_y_stride_;
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
319 // pixels as they are yet unfiltered. They will be when the next macroblock 155 // pixels as they are yet unfiltered. They will be when the next macroblock
320 // row is decoded. Meanwhile, we must preserve them by rotating them in the 156 // row is decoded. Meanwhile, we must preserve them by rotating them in the
321 // cache area. This doesn't hold for the very bottom row of the uncropped 157 // cache area. This doesn't hold for the very bottom row of the uncropped
322 // picture of course. 158 // picture of course.
323 // * we must clip the remaining pixels against the cropping area. The VP8Io 159 // * we must clip the remaining pixels against the cropping area. The VP8Io
324 // struct must have the following fields set correctly before calling put(): 160 // struct must have the following fields set correctly before calling put():
325 161
326 #define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB 162 #define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB
327 163
328 // Finalize and transmit a complete row. Return false in case of user-abort. 164 // Finalize and transmit a complete row. Return false in case of user-abort.
329 int VP8FinishRow(VP8Decoder* const dec, VP8Io* io) { 165 static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
330 int ok = 1; 166 int ok = 1;
331 const VP8ThreadContext* const ctx = &dec->thread_ctx_; 167 const VP8ThreadContext* const ctx = &dec->thread_ctx_;
332 const int extra_y_rows = kFilterExtraRows[dec->filter_type_]; 168 const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
333 const int ysize = extra_y_rows * dec->cache_y_stride_; 169 const int ysize = extra_y_rows * dec->cache_y_stride_;
334 const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_; 170 const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
335 const int y_offset = ctx->id_ * 16 * dec->cache_y_stride_; 171 const int y_offset = ctx->id_ * 16 * dec->cache_y_stride_;
336 const int uv_offset = ctx->id_ * 8 * dec->cache_uv_stride_; 172 const int uv_offset = ctx->id_ * 8 * dec->cache_uv_stride_;
337 uint8_t* const ydst = dec->cache_y_ - ysize + y_offset; 173 uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;
338 uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset; 174 uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;
339 uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset; 175 uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;
(...skipping 18 matching lines...) Expand all
358 io->v = dec->cache_v_ + uv_offset; 194 io->v = dec->cache_v_ + uv_offset;
359 } 195 }
360 196
361 if (!last_row) { 197 if (!last_row) {
362 y_end -= extra_y_rows; 198 y_end -= extra_y_rows;
363 } 199 }
364 if (y_end > io->crop_bottom) { 200 if (y_end > io->crop_bottom) {
365 y_end = io->crop_bottom; // make sure we don't overflow on last row. 201 y_end = io->crop_bottom; // make sure we don't overflow on last row.
366 } 202 }
367 io->a = NULL; 203 io->a = NULL;
368 #ifdef WEBP_EXPERIMENTAL_FEATURES 204 if (dec->alpha_data_ != NULL && y_start < y_end) {
369 if (dec->alpha_data_) { 205 // TODO(skal): several things to correct here:
206 // * testing presence of alpha with dec->alpha_data_ is not a good idea
207 // * we're actually decompressing the full plane only once. It should be
208 // more obvious from signature.
209 // * we could free alpha_data_ right after this call, but we don't own.
370 io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start); 210 io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);
371 if (io->a == NULL) { 211 if (io->a == NULL) {
372 return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, 212 return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
373 "Could not decode alpha data."); 213 "Could not decode alpha data.");
374 } 214 }
375 } 215 }
376 #endif
377 if (y_start < io->crop_top) { 216 if (y_start < io->crop_top) {
378 const int delta_y = io->crop_top - y_start; 217 const int delta_y = io->crop_top - y_start;
379 y_start = io->crop_top; 218 y_start = io->crop_top;
380 assert(!(delta_y & 1)); 219 assert(!(delta_y & 1));
381 io->y += dec->cache_y_stride_ * delta_y; 220 io->y += dec->cache_y_stride_ * delta_y;
382 io->u += dec->cache_uv_stride_ * (delta_y >> 1); 221 io->u += dec->cache_uv_stride_ * (delta_y >> 1);
383 io->v += dec->cache_uv_stride_ * (delta_y >> 1); 222 io->v += dec->cache_uv_stride_ * (delta_y >> 1);
384 if (io->a) { 223 if (io->a != NULL) {
385 io->a += io->width * delta_y; 224 io->a += io->width * delta_y;
386 } 225 }
387 } 226 }
388 if (y_start < y_end) { 227 if (y_start < y_end) {
389 io->y += io->crop_left; 228 io->y += io->crop_left;
390 io->u += io->crop_left >> 1; 229 io->u += io->crop_left >> 1;
391 io->v += io->crop_left >> 1; 230 io->v += io->crop_left >> 1;
392 if (io->a) { 231 if (io->a != NULL) {
393 io->a += io->crop_left; 232 io->a += io->crop_left;
394 } 233 }
395 io->mb_y = y_start - io->crop_top; 234 io->mb_y = y_start - io->crop_top;
396 io->mb_w = io->crop_right - io->crop_left; 235 io->mb_w = io->crop_right - io->crop_left;
397 io->mb_h = y_end - y_start; 236 io->mb_h = y_end - y_start;
398 ok = io->put(io); 237 ok = io->put(io);
399 } 238 }
400 } 239 }
401 // rotate top samples if needed 240 // rotate top samples if needed
402 if (ctx->id_ + 1 == dec->num_caches_) { 241 if (ctx->id_ + 1 == dec->num_caches_) {
(...skipping 11 matching lines...) Expand all
414 253
415 //------------------------------------------------------------------------------ 254 //------------------------------------------------------------------------------
416 255
417 int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) { 256 int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
418 int ok = 1; 257 int ok = 1;
419 VP8ThreadContext* const ctx = &dec->thread_ctx_; 258 VP8ThreadContext* const ctx = &dec->thread_ctx_;
420 if (!dec->use_threads_) { 259 if (!dec->use_threads_) {
421 // ctx->id_ and ctx->f_info_ are already set 260 // ctx->id_ and ctx->f_info_ are already set
422 ctx->mb_y_ = dec->mb_y_; 261 ctx->mb_y_ = dec->mb_y_;
423 ctx->filter_row_ = dec->filter_row_; 262 ctx->filter_row_ = dec->filter_row_;
424 ok = VP8FinishRow(dec, io); 263 ok = FinishRow(dec, io);
425 } else { 264 } else {
426 WebPWorker* const worker = &dec->worker_; 265 WebPWorker* const worker = &dec->worker_;
427 // Finish previous job *before* updating context 266 // Finish previous job *before* updating context
428 ok &= WebPWorkerSync(worker); 267 ok &= WebPWorkerSync(worker);
429 assert(worker->status_ == OK); 268 assert(worker->status_ == OK);
430 if (ok) { // spawn a new deblocking/output job 269 if (ok) { // spawn a new deblocking/output job
431 ctx->io_ = *io; 270 ctx->io_ = *io;
432 ctx->id_ = dec->cache_id_; 271 ctx->id_ = dec->cache_id_;
433 ctx->mb_y_ = dec->mb_y_; 272 ctx->mb_y_ = dec->mb_y_;
434 ctx->filter_row_ = dec->filter_row_; 273 ctx->filter_row_ = dec->filter_row_;
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
475 // TODO(skal): add an 'approximate_decoding' option, that won't produce 314 // TODO(skal): add an 'approximate_decoding' option, that won't produce
476 // a 1:1 bit-exactness for complex filtering? 315 // a 1:1 bit-exactness for complex filtering?
477 { 316 {
478 const int extra_pixels = kFilterExtraRows[dec->filter_type_]; 317 const int extra_pixels = kFilterExtraRows[dec->filter_type_];
479 if (dec->filter_type_ == 2) { 318 if (dec->filter_type_ == 2) {
480 // For complex filter, we need to preserve the dependency chain. 319 // For complex filter, we need to preserve the dependency chain.
481 dec->tl_mb_x_ = 0; 320 dec->tl_mb_x_ = 0;
482 dec->tl_mb_y_ = 0; 321 dec->tl_mb_y_ = 0;
483 } else { 322 } else {
484 // For simple filter, we can filter only the cropped region. 323 // For simple filter, we can filter only the cropped region.
485 dec->tl_mb_y_ = io->crop_top >> 4; 324 // We include 'extra_pixels' on the other side of the boundary, since
486 dec->tl_mb_x_ = io->crop_left >> 4; 325 // vertical or horizontal filtering of the previous macroblock can
326 // modify some abutting pixels.
327 dec->tl_mb_x_ = (io->crop_left - extra_pixels) >> 4;
328 dec->tl_mb_y_ = (io->crop_top - extra_pixels) >> 4;
329 if (dec->tl_mb_x_ < 0) dec->tl_mb_x_ = 0;
330 if (dec->tl_mb_y_ < 0) dec->tl_mb_y_ = 0;
487 } 331 }
488 // We need some 'extra' pixels on the right/bottom. 332 // We need some 'extra' pixels on the right/bottom.
489 dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4; 333 dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4;
490 dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4; 334 dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4;
491 if (dec->br_mb_x_ > dec->mb_w_) { 335 if (dec->br_mb_x_ > dec->mb_w_) {
492 dec->br_mb_x_ = dec->mb_w_; 336 dec->br_mb_x_ = dec->mb_w_;
493 } 337 }
494 if (dec->br_mb_y_ > dec->mb_h_) { 338 if (dec->br_mb_y_ > dec->mb_h_) {
495 dec->br_mb_y_ = dec->mb_h_; 339 dec->br_mb_y_ = dec->mb_h_;
496 } 340 }
497 } 341 }
498 return VP8_STATUS_OK; 342 return VP8_STATUS_OK;
499 } 343 }
500 344
501 int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) { 345 int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
502 int ok = 1; 346 int ok = 1;
503 if (dec->use_threads_) { 347 if (dec->use_threads_) {
504 ok = WebPWorkerSync(&dec->worker_); 348 ok = WebPWorkerSync(&dec->worker_);
505 } 349 }
506 350
507 if (io->teardown) { 351 if (io->teardown) {
508 io->teardown(io); 352 io->teardown(io);
509 } 353 }
510 return ok; 354 return ok;
511 } 355 }
512 356
513 //------------------------------------------------------------------------------ 357 //------------------------------------------------------------------------------
358 // For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line.
359 //
360 // Reason is: the deblocking filter cannot deblock the bottom horizontal edges
361 // immediately, and needs to wait for first few rows of the next macroblock to
362 // be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending
363 // on strength).
364 // With two threads, the vertical positions of the rows being decoded are:
365 // Decode: [ 0..15][16..31][32..47][48..63][64..79][...
366 // Deblock: [ 0..11][12..27][28..43][44..59][...
367 // If we use two threads and two caches of 16 pixels, the sequence would be:
368 // Decode: [ 0..15][16..31][ 0..15!!][16..31][ 0..15][...
369 // Deblock: [ 0..11][12..27!!][-4..11][12..27][...
370 // The problem occurs during row [12..15!!] that both the decoding and
371 // deblocking threads are writing simultaneously.
372 // With 3 cache lines, one get a safe write pattern:
373 // Decode: [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0..
374 // Deblock: [ 0..11][12..27][28..43][-4..11][12..27][28...
375 // Note that multi-threaded output _without_ deblocking can make use of two
376 // cache lines of 16 pixels only, since there's no lagging behind. The decoding
377 // and output process have non-concurrent writing:
378 // Decode: [ 0..15][16..31][ 0..15][16..31][...
379 // io->put: [ 0..15][16..31][ 0..15][...
380
381 #define MT_CACHE_LINES 3
382 #define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case
383
384 // Initialize multi/single-thread worker
385 static int InitThreadContext(VP8Decoder* const dec) {
386 dec->cache_id_ = 0;
387 if (dec->use_threads_) {
388 WebPWorker* const worker = &dec->worker_;
389 if (!WebPWorkerReset(worker)) {
390 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
391 "thread initialization failed.");
392 }
393 worker->data1 = dec;
394 worker->data2 = (void*)&dec->thread_ctx_.io_;
395 worker->hook = (WebPWorkerHook)FinishRow;
396 dec->num_caches_ =
397 (dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
398 } else {
399 dec->num_caches_ = ST_CACHE_LINES;
400 }
401 return 1;
402 }
403
404 #undef MT_CACHE_LINES
405 #undef ST_CACHE_LINES
406
407 //------------------------------------------------------------------------------
408 // Memory setup
409
410 static int AllocateMemory(VP8Decoder* const dec) {
411 const int num_caches = dec->num_caches_;
412 const int mb_w = dec->mb_w_;
413 // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.
414 const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
415 const size_t top_size = (16 + 8 + 8) * mb_w;
416 const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
417 const size_t f_info_size =
418 (dec->filter_type_ > 0) ?
419 mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo)
420 : 0;
421 const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
422 const size_t coeffs_size = 384 * sizeof(*dec->coeffs_);
423 const size_t cache_height = (16 * num_caches
424 + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
425 const size_t cache_size = top_size * cache_height;
426 // alpha_size is the only one that scales as width x height.
427 const uint64_t alpha_size = (dec->alpha_data_ != NULL) ?
428 (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
429 const uint64_t needed = (uint64_t)intra_pred_mode_size
430 + top_size + mb_info_size + f_info_size
431 + yuv_size + coeffs_size
432 + cache_size + alpha_size + ALIGN_MASK;
433 uint8_t* mem;
434
435 if (needed != (size_t)needed) return 0; // check for overflow
436 if (needed > dec->mem_size_) {
437 free(dec->mem_);
438 dec->mem_size_ = 0;
439 dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));
440 if (dec->mem_ == NULL) {
441 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
442 "no memory during frame initialization.");
443 }
444 // down-cast is ok, thanks to WebPSafeAlloc() above.
445 dec->mem_size_ = (size_t)needed;
446 }
447
448 mem = (uint8_t*)dec->mem_;
449 dec->intra_t_ = (uint8_t*)mem;
450 mem += intra_pred_mode_size;
451
452 dec->y_t_ = (uint8_t*)mem;
453 mem += 16 * mb_w;
454 dec->u_t_ = (uint8_t*)mem;
455 mem += 8 * mb_w;
456 dec->v_t_ = (uint8_t*)mem;
457 mem += 8 * mb_w;
458
459 dec->mb_info_ = ((VP8MB*)mem) + 1;
460 mem += mb_info_size;
461
462 dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;
463 mem += f_info_size;
464 dec->thread_ctx_.id_ = 0;
465 dec->thread_ctx_.f_info_ = dec->f_info_;
466 if (dec->use_threads_) {
467 // secondary cache line. The deblocking process need to make use of the
468 // filtering strength from previous macroblock row, while the new ones
469 // are being decoded in parallel. We'll just swap the pointers.
470 dec->thread_ctx_.f_info_ += mb_w;
471 }
472
473 mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);
474 assert((yuv_size & ALIGN_MASK) == 0);
475 dec->yuv_b_ = (uint8_t*)mem;
476 mem += yuv_size;
477
478 dec->coeffs_ = (int16_t*)mem;
479 mem += coeffs_size;
480
481 dec->cache_y_stride_ = 16 * mb_w;
482 dec->cache_uv_stride_ = 8 * mb_w;
483 {
484 const int extra_rows = kFilterExtraRows[dec->filter_type_];
485 const int extra_y = extra_rows * dec->cache_y_stride_;
486 const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
487 dec->cache_y_ = ((uint8_t*)mem) + extra_y;
488 dec->cache_u_ = dec->cache_y_
489 + 16 * num_caches * dec->cache_y_stride_ + extra_uv;
490 dec->cache_v_ = dec->cache_u_
491 + 8 * num_caches * dec->cache_uv_stride_ + extra_uv;
492 dec->cache_id_ = 0;
493 }
494 mem += cache_size;
495
496 // alpha plane
497 dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
498 mem += alpha_size;
499
500 // note: left-info is initialized once for all.
501 memset(dec->mb_info_ - 1, 0, mb_info_size);
502
503 // initialize top
504 memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
505
506 return 1;
507 }
508
509 static void InitIo(VP8Decoder* const dec, VP8Io* io) {
510 // prepare 'io'
511 io->mb_y = 0;
512 io->y = dec->cache_y_;
513 io->u = dec->cache_u_;
514 io->v = dec->cache_v_;
515 io->y_stride = dec->cache_y_stride_;
516 io->uv_stride = dec->cache_uv_stride_;
517 io->a = NULL;
518 }
519
520 int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
521 if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_.
522 if (!AllocateMemory(dec)) return 0;
523 InitIo(dec, io);
524 VP8DspInit(); // Init critical function pointers and look-up tables.
525 return 1;
526 }
527
528 //------------------------------------------------------------------------------
514 // Main reconstruction function. 529 // Main reconstruction function.
515 530
516 static const int kScan[16] = { 531 static const int kScan[16] = {
517 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 532 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
518 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 533 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
519 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 534 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
520 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS 535 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
521 }; 536 };
522 537
523 static inline int CheckMode(VP8Decoder* const dec, int mode) { 538 static WEBP_INLINE int CheckMode(VP8Decoder* const dec, int mode) {
524 if (mode == B_DC_PRED) { 539 if (mode == B_DC_PRED) {
525 if (dec->mb_x_ == 0) { 540 if (dec->mb_x_ == 0) {
526 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT; 541 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
527 } else { 542 } else {
528 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOP : B_DC_PRED; 543 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
529 } 544 }
530 } 545 }
531 return mode; 546 return mode;
532 } 547 }
533 548
534 static inline void Copy32b(uint8_t* dst, uint8_t* src) { 549 static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) {
535 *(uint32_t*)dst = *(uint32_t*)src; 550 *(uint32_t*)dst = *(uint32_t*)src;
536 } 551 }
537 552
538 void VP8ReconstructBlock(VP8Decoder* const dec) { 553 void VP8ReconstructBlock(VP8Decoder* const dec) {
539 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; 554 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
540 uint8_t* const u_dst = dec->yuv_b_ + U_OFF; 555 uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
541 uint8_t* const v_dst = dec->yuv_b_ + V_OFF; 556 uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
542 557
543 // Rotate in the left samples from previously decoded block. We move four 558 // Rotate in the left samples from previously decoded block. We move four
544 // pixels at a time for alignment reason, and because of in-loop filter. 559 // pixels at a time for alignment reason, and because of in-loop filter.
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
655 } 670 }
656 } 671 }
657 } 672 }
658 } 673 }
659 674
660 //------------------------------------------------------------------------------ 675 //------------------------------------------------------------------------------
661 676
662 #if defined(__cplusplus) || defined(c_plusplus) 677 #if defined(__cplusplus) || defined(c_plusplus)
663 } // extern "C" 678 } // extern "C"
664 #endif 679 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698