OLD | NEW |
(Empty) | |
| 1 // Copyright 2010 Google Inc. |
| 2 // |
| 3 // This code is licensed under the same terms as WebM: |
| 4 // Software License Agreement: http://www.webmproject.org/license/software/ |
| 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ |
| 6 // ----------------------------------------------------------------------------- |
| 7 // |
| 8 // Frame-reconstruction function. Memory allocation. |
| 9 // |
| 10 // Author: Skal (pascal.massimino@gmail.com) |
| 11 |
| 12 #include <stdlib.h> |
| 13 #include "vp8i.h" |
| 14 |
| 15 #if defined(__cplusplus) || defined(c_plusplus) |
| 16 extern "C" { |
| 17 #endif |
| 18 |
| 19 #define ALIGN_MASK (32 - 1) |
| 20 |
| 21 //----------------------------------------------------------------------------- |
| 22 // Memory setup |
| 23 |
| 24 // how many extra luma lines are needed for caching, given a filtering level |
| 25 static const uint8_t kFilterExtraRows[3] = { 0, 4, 8 }; |
| 26 |
| 27 int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { |
| 28 const int mb_w = dec->mb_w_; |
| 29 const int intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t); |
| 30 const int top_size = (16 + 8 + 8) * mb_w; |
| 31 const int info_size = (mb_w + 1) * sizeof(VP8MB); |
| 32 const int yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_); |
| 33 const int coeffs_size = 384 * sizeof(*dec->coeffs_); |
| 34 const int cache_height = (dec->filter_type_ == 0) ? 0 : |
| 35 (16 + kFilterExtraRows[dec->filter_type_]) * 3 / 2; |
| 36 const int cache_size = top_size * cache_height; |
| 37 const int needed = intra_pred_mode_size |
| 38 + top_size + info_size |
| 39 + yuv_size + coeffs_size |
| 40 + cache_size + ALIGN_MASK; |
| 41 if (needed > dec->mem_size_) { |
| 42 free(dec->mem_); |
| 43 dec->mem_size_ = 0; |
| 44 dec->mem_ = (uint8_t*)malloc(needed); |
| 45 if (dec->mem_ == NULL) { |
| 46 return VP8SetError(dec, 1, "no memory during frame initialization."); |
| 47 } |
| 48 dec->mem_size_ = needed; |
| 49 } |
| 50 |
| 51 uint8_t* mem = (uint8_t*)dec->mem_; |
| 52 dec->intra_t_ = (uint8_t*)mem; |
| 53 mem += intra_pred_mode_size; |
| 54 |
| 55 dec->y_t_ = (uint8_t*)mem; |
| 56 mem += 16 * mb_w; |
| 57 dec->u_t_ = (uint8_t*)mem; |
| 58 mem += 8 * mb_w; |
| 59 dec->v_t_ = (uint8_t*)mem; |
| 60 mem += 8 * mb_w; |
| 61 |
| 62 dec->mb_info_ = ((VP8MB*)mem) + 1; |
| 63 mem += info_size; |
| 64 |
| 65 mem = (uint8_t*)((uint64_t)(mem + ALIGN_MASK) & ~ALIGN_MASK); |
| 66 assert((yuv_size & ALIGN_MASK) == 0); |
| 67 dec->yuv_b_ = (uint8_t*)mem; |
| 68 mem += yuv_size; |
| 69 |
| 70 dec->coeffs_ = (int16_t*)mem; |
| 71 mem += coeffs_size; |
| 72 |
| 73 dec->cache_y_stride_ = 16 * mb_w; |
| 74 dec->cache_uv_stride_ = 8 * mb_w; |
| 75 if (dec->filter_type_ == 0) { |
| 76 dec->cache_y_ = NULL; |
| 77 dec->cache_u_ = NULL; |
| 78 dec->cache_v_ = NULL; |
| 79 } else { |
| 80 const int extra_rows = kFilterExtraRows[dec->filter_type_]; |
| 81 const int extra_y = extra_rows * dec->cache_y_stride_; |
| 82 const int extra_uv =(extra_rows / 2) * dec->cache_uv_stride_; |
| 83 dec->cache_y_ = ((uint8_t*)mem) + extra_y; |
| 84 dec->cache_u_ = dec->cache_y_ + 16 * dec->cache_y_stride_ + extra_uv; |
| 85 dec->cache_v_ = dec->cache_u_ + 8 * dec->cache_uv_stride_ + extra_uv; |
| 86 } |
| 87 mem += cache_size; |
| 88 |
| 89 // note: left-info is initialized once for all. |
| 90 memset(dec->mb_info_ - 1, 0, (mb_w + 1) * sizeof(*dec->mb_info_)); |
| 91 |
| 92 // initialize top |
| 93 memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size); |
| 94 |
| 95 // prepare 'io' |
| 96 io->width = dec->pic_hdr_.width_; |
| 97 io->height = dec->pic_hdr_.height_; |
| 98 io->mb_x = 0; |
| 99 io->mb_y = 0; |
| 100 if (dec->filter_type_ == 0) { |
| 101 io->y = dec->yuv_b_ + Y_OFF; |
| 102 io->u = dec->yuv_b_ + U_OFF; |
| 103 io->v = dec->yuv_b_ + V_OFF; |
| 104 io->y_stride = BPS; |
| 105 io->uv_stride = BPS; |
| 106 } else { |
| 107 io->y = dec->cache_y_; |
| 108 io->u = dec->cache_u_; |
| 109 io->v = dec->cache_v_; |
| 110 io->y_stride = dec->cache_y_stride_; |
| 111 io->uv_stride = dec->cache_uv_stride_; |
| 112 io->mb_w = io->width; |
| 113 } |
| 114 |
| 115 // Init critical function pointers and look-up tables. |
| 116 VP8DspInitTables(); |
| 117 VP8DspInit(); |
| 118 |
| 119 return 1; |
| 120 } |
| 121 |
| 122 //----------------------------------------------------------------------------- |
| 123 // Filtering |
| 124 |
| 125 static inline int hev_thresh_from_level(int level, int keyframe) { |
| 126 if (keyframe) { |
| 127 return (level >= 40) ? 2 : (level >= 15) ? 1 : 0; |
| 128 } else { |
| 129 return (level >= 40) ? 3 : (level >= 20) ? 2 : (level >= 15) ? 1 : 0; |
| 130 } |
| 131 } |
| 132 |
| 133 static void DoFilter(VP8Decoder* const dec, int mb_x, int mb_y) { |
| 134 VP8MB* const mb = dec->mb_info_ + mb_x; |
| 135 uint8_t* const y_dst = dec->cache_y_ + mb_x * 16; |
| 136 const int y_bps = dec->cache_y_stride_; |
| 137 const int level = mb->f_level_; |
| 138 const int ilevel = mb->f_ilevel_; |
| 139 const int limit = 2 * level + ilevel; |
| 140 if (dec->filter_type_ == 1) { // simple |
| 141 if (mb_x > 0) { |
| 142 VP8SimpleHFilter16(y_dst, y_bps, limit + 4); |
| 143 } |
| 144 if (mb->f_inner_) { |
| 145 VP8SimpleHFilter16i(y_dst, y_bps, limit); |
| 146 } |
| 147 if (mb_y > 0) { |
| 148 VP8SimpleVFilter16(y_dst, y_bps, limit + 4); |
| 149 } |
| 150 if (mb->f_inner_) { |
| 151 VP8SimpleVFilter16i(y_dst, y_bps, limit); |
| 152 } |
| 153 } else { // complex |
| 154 uint8_t* const u_dst = dec->cache_u_ + mb_x * 8; |
| 155 uint8_t* const v_dst = dec->cache_v_ + mb_x * 8; |
| 156 const int uv_bps = dec->cache_uv_stride_; |
| 157 const int hev_thresh = |
| 158 hev_thresh_from_level(level, dec->frm_hdr_.key_frame_); |
| 159 if (mb_x > 0) { |
| 160 VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh); |
| 161 VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh); |
| 162 } |
| 163 if (mb->f_inner_) { |
| 164 VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh); |
| 165 VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh); |
| 166 } |
| 167 if (mb_y > 0) { |
| 168 VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh); |
| 169 VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh); |
| 170 } |
| 171 if (mb->f_inner_) { |
| 172 VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh); |
| 173 VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh); |
| 174 } |
| 175 } |
| 176 } |
| 177 |
| 178 void VP8StoreBlock(VP8Decoder* const dec) { |
| 179 VP8MB* const info = dec->mb_info_ + dec->mb_x_; |
| 180 int level = dec->filter_levels_[dec->segment_]; |
| 181 if (dec->filter_hdr_.use_lf_delta_) { |
| 182 // TODO(skal): only CURRENT is handled for now. |
| 183 level += dec->filter_hdr_.ref_lf_delta_[0]; |
| 184 if (dec->is_i4x4_) { |
| 185 level += dec->filter_hdr_.mode_lf_delta_[0]; |
| 186 } |
| 187 } |
| 188 level = (level < 0) ? 0 : (level > 63) ? 63 : level; |
| 189 info->f_level_ = level; |
| 190 |
| 191 if (dec->filter_hdr_.sharpness_ > 0) { |
| 192 if (dec->filter_hdr_.sharpness_ > 4) { |
| 193 level >>= 2; |
| 194 } else { |
| 195 level >>= 1; |
| 196 } |
| 197 if (level > 9 - dec->filter_hdr_.sharpness_) { |
| 198 level = 9 - dec->filter_hdr_.sharpness_; |
| 199 } |
| 200 } |
| 201 info->f_ilevel_ = (level < 1) ? 1 : level; |
| 202 info->f_inner_ = (!info->skip_ || dec->is_i4x4_); |
| 203 |
| 204 // Transfer samples to row cache |
| 205 uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16; |
| 206 uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8; |
| 207 uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8; |
| 208 for (int y = 0; y < 16; ++y) { |
| 209 memcpy(ydst + y * dec->cache_y_stride_, |
| 210 dec->yuv_b_ + Y_OFF + y * BPS, 16); |
| 211 } |
| 212 for (int y = 0; y < 8; ++y) { |
| 213 memcpy(udst + y * dec->cache_uv_stride_, |
| 214 dec->yuv_b_ + U_OFF + y * BPS, 8); |
| 215 memcpy(vdst + y * dec->cache_uv_stride_, |
| 216 dec->yuv_b_ + V_OFF + y * BPS, 8); |
| 217 } |
| 218 } |
| 219 |
| 220 void VP8FilterRow(VP8Decoder* const dec, VP8Io* io) { |
| 221 for (int mb_x = 0; mb_x < dec->mb_w_; ++mb_x) { |
| 222 DoFilter(dec, mb_x, dec->mb_y_); |
| 223 } |
| 224 const int extra_y_rows = kFilterExtraRows[dec->filter_type_]; |
| 225 const int ysize = extra_y_rows * dec->cache_y_stride_; |
| 226 const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_; |
| 227 uint8_t* const ydst = dec->cache_y_ - ysize; |
| 228 uint8_t* const udst = dec->cache_u_ - uvsize; |
| 229 uint8_t* const vdst = dec->cache_v_ - uvsize; |
| 230 if (io->put) { |
| 231 int y_end; |
| 232 if (dec->mb_y_ > 0) { |
| 233 io->mb_y = dec->mb_y_ * 16 - extra_y_rows; |
| 234 io->y = ydst; |
| 235 io->u = udst; |
| 236 io->v = vdst; |
| 237 if (dec->mb_y_ < dec->mb_h_ - 1) { |
| 238 y_end = io->mb_y + 16; |
| 239 } else { |
| 240 y_end = io->height; // last macroblock row. |
| 241 } |
| 242 } else { // first macroblock row. |
| 243 io->mb_y = 0; |
| 244 y_end = 16 - extra_y_rows; |
| 245 io->y = dec->cache_y_; |
| 246 io->u = dec->cache_u_; |
| 247 io->v = dec->cache_v_; |
| 248 } |
| 249 if (y_end > io->height) { |
| 250 y_end = io->height; |
| 251 } |
| 252 io->mb_h = y_end - io->mb_y; |
| 253 io->put(io); |
| 254 } |
| 255 // rotate top samples |
| 256 if (dec->mb_y_ < dec->mb_h_ - 1) { |
| 257 memcpy(ydst, ydst + 16 * dec->cache_y_stride_, ysize); |
| 258 memcpy(udst, udst + 8 * dec->cache_uv_stride_, uvsize); |
| 259 memcpy(vdst, vdst + 8 * dec->cache_uv_stride_, uvsize); |
| 260 } |
| 261 } |
| 262 |
| 263 |
| 264 //----------------------------------------------------------------------------- |
| 265 // Main reconstruction function. |
| 266 |
| 267 static const int kScan[16] = { |
| 268 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, |
| 269 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, |
| 270 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, |
| 271 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS |
| 272 }; |
| 273 |
| 274 static inline int CheckMode(VP8Decoder* const dec, int mode) { |
| 275 if (mode == B_DC_PRED) { |
| 276 if (dec->mb_x_ == 0) { |
| 277 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT; |
| 278 } else { |
| 279 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOP : B_DC_PRED; |
| 280 } |
| 281 } |
| 282 return mode; |
| 283 } |
| 284 |
| 285 static inline void Copy32b(uint8_t* dst, uint8_t* src) { |
| 286 *(uint32_t*)dst = *(uint32_t*)src; |
| 287 } |
| 288 |
| 289 void VP8ReconstructBlock(VP8Decoder* const dec) { |
| 290 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; |
| 291 uint8_t* const u_dst = dec->yuv_b_ + U_OFF; |
| 292 uint8_t* const v_dst = dec->yuv_b_ + V_OFF; |
| 293 |
| 294 // Rotate in the left samples from previously decoded block. We move four |
| 295 // pixels at a time for alignment reason, and because of in-loop filter. |
| 296 if (dec->mb_x_ > 0) { |
| 297 for (int j = -1; j < 16; ++j) { |
| 298 Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); |
| 299 } |
| 300 for (int j = -1; j < 8; ++j) { |
| 301 Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]); |
| 302 Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); |
| 303 } |
| 304 } else { |
| 305 for (int j = 0; j < 16; ++j) { |
| 306 y_dst[j * BPS - 1] = 129; |
| 307 } |
| 308 for (int j = 0; j < 8; ++j) { |
| 309 u_dst[j * BPS - 1] = 129; |
| 310 v_dst[j * BPS - 1] = 129; |
| 311 } |
| 312 // Init top-left sample on left column too |
| 313 if (dec->mb_y_ > 0) { |
| 314 y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129; |
| 315 } |
| 316 } |
| 317 |
| 318 // bring top samples into the cache |
| 319 uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16; |
| 320 uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8; |
| 321 uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8; |
| 322 if (dec->mb_y_ > 0) { |
| 323 memcpy(y_dst - BPS, top_y, 16); |
| 324 memcpy(u_dst - BPS, top_u, 8); |
| 325 memcpy(v_dst - BPS, top_v, 8); |
| 326 } else if (dec->mb_x_ == 0) { |
| 327 // we only need to do this init once at block (0,0). |
| 328 // Afterward, it remains valid for the whole topmost row. |
| 329 memset(y_dst - BPS - 1, 127, 16 + 4 + 1); |
| 330 memset(u_dst - BPS - 1, 127, 8 + 1); |
| 331 memset(v_dst - BPS - 1, 127, 8 + 1); |
| 332 } |
| 333 |
| 334 // predict and add residuals |
| 335 const int16_t* coeffs = dec->coeffs_; |
| 336 if (dec->is_i4x4_) { // 4x4 |
| 337 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16); |
| 338 if (dec->mb_y_ > 0) { |
| 339 if (dec->mb_x_ >= dec->mb_w_ - 1) { // on rightmost border |
| 340 top_right[0] = top_y[15] * 0x01010101u; |
| 341 } else { |
| 342 memcpy(top_right, top_y + 16, sizeof(*top_right)); |
| 343 } |
| 344 } |
| 345 // replicate the top-right pixels below |
| 346 top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0]; |
| 347 |
| 348 // predict and add residues for all 4x4 blocks in turn. |
| 349 for (int n = 0; n < 16; n++) { |
| 350 uint8_t* const dst = y_dst + kScan[n]; |
| 351 VP8PredLuma4[dec->imodes_[n]](dst); |
| 352 if (dec->non_zero_ & (1 << n)) { |
| 353 VP8Transform(coeffs + n * 16, dst); |
| 354 } else if (dec->non_zero_ & (1 << n)) { // only DC is present |
| 355 VP8TransformDC(coeffs + n * 16, dst); |
| 356 } |
| 357 } |
| 358 } else { // 16x16 |
| 359 const int pred_func = CheckMode(dec, dec->imodes_[0]); |
| 360 VP8PredLuma16[pred_func](y_dst); |
| 361 if (dec->non_zero_) { |
| 362 for (int n = 0; n < 16; n++) { |
| 363 uint8_t* const dst = y_dst + kScan[n]; |
| 364 if (dec->non_zero_ac_ & (1 << n)) { |
| 365 VP8Transform(coeffs + n * 16, dst); |
| 366 } else if (dec->non_zero_ & (1 << n)) { // only DC is present |
| 367 VP8TransformDC(coeffs + n * 16, dst); |
| 368 } |
| 369 } |
| 370 } |
| 371 } |
| 372 |
| 373 // Chroma |
| 374 const int pred_func = CheckMode(dec, dec->uvmode_); |
| 375 VP8PredChroma8[pred_func](u_dst); |
| 376 VP8PredChroma8[pred_func](v_dst); |
| 377 |
| 378 if (dec->non_zero_ & 0x0f0000) { // chroma-U |
| 379 const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16; |
| 380 if (dec->non_zero_ac_ & 0x0f0000) { |
| 381 VP8TransformUV(u_coeffs, u_dst); |
| 382 } else { |
| 383 VP8TransformDCUV(u_coeffs, u_dst); |
| 384 } |
| 385 } |
| 386 if (dec->non_zero_ & 0xf00000) { // chroma-V |
| 387 const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16; |
| 388 if (dec->non_zero_ac_ & 0xf00000) { |
| 389 VP8TransformUV(v_coeffs, v_dst); |
| 390 } else { |
| 391 VP8TransformDCUV(v_coeffs, v_dst); |
| 392 } |
| 393 } |
| 394 |
| 395 // stash away top samples for next block |
| 396 if (dec->mb_y_ < dec->mb_h_ - 1) { |
| 397 memcpy(top_y, y_dst + 15 * BPS, 16); |
| 398 memcpy(top_u, u_dst + 7 * BPS, 8); |
| 399 memcpy(top_v, v_dst + 7 * BPS, 8); |
| 400 } |
| 401 } |
| 402 |
| 403 //----------------------------------------------------------------------------- |
| 404 |
| 405 #if defined(__cplusplus) || defined(c_plusplus) |
| 406 } // extern "C" |
| 407 #endif |
OLD | NEW |