OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 /* |
| 12 * This is an example demonstrating how to implement a multi-layer |
| 13 * VP9 encoding scheme based on spatial scalability for video applications |
| 14 * that benefit from a scalable bitstream. |
| 15 */ |
| 16 #include <stdio.h> |
| 17 #include <stdlib.h> |
| 18 #include <stdarg.h> |
| 19 #include <time.h> |
| 20 #include <string.h> |
| 21 #include <unistd.h> |
| 22 #include <libgen.h> |
| 23 #define VPX_CODEC_DISABLE_COMPAT 1 |
| 24 #include "vpx/vpx_encoder.h" |
| 25 #include "vpx/vp8cx.h" |
| 26 #define interface (vpx_codec_vp9_cx()) |
| 27 #define fourcc 0x30395056 |
| 28 #define IVF_FILE_HDR_SZ (32) |
| 29 #define IVF_FRAME_HDR_SZ (12) |
| 30 #define NUM_BUFFERS 8 |
| 31 |
| 32 char *input_filename; |
| 33 char *output_filename; |
| 34 unsigned int number_frames_to_code = 60 * 60; |
| 35 unsigned int number_frames_to_skip = 0; |
| 36 unsigned int number_spatial_layers = 5; |
| 37 unsigned int key_period = 100; |
| 38 |
| 39 typedef enum ENCODING_MODE { |
| 40 INTER_LAYER_PREDICTION_I, |
| 41 INTER_LAYER_PREDICTION_IP, |
| 42 USE_GOLDEN_FRAME |
| 43 } ENCODING_MODE; |
| 44 |
| 45 static void mem_put_le16(char *mem, unsigned int val) { |
| 46 mem[0] = val; |
| 47 mem[1] = val >> 8; |
| 48 } |
| 49 |
| 50 static void mem_put_le32(char *mem, unsigned int val) { |
| 51 mem[0] = val; |
| 52 mem[1] = val >> 8; |
| 53 mem[2] = val >> 16; |
| 54 mem[3] = val >> 24; |
| 55 } |
| 56 |
| 57 static void usage(char *program_name) { |
| 58 printf( |
| 59 "Usage: %s [-f frames] [-s skip_frames] [-w width] [-h height] \n\t" |
| 60 "[-n rate_num] [-d rate_den] [-b bitrate] [-l layers] " |
| 61 "<input_filename> <output_filename>\n", |
| 62 basename(program_name)); |
| 63 exit(EXIT_FAILURE); |
| 64 } |
| 65 |
| 66 static void die(const char *fmt, ...) { |
| 67 va_list ap; |
| 68 |
| 69 va_start(ap, fmt); |
| 70 vprintf(fmt, ap); |
| 71 if (fmt[strlen(fmt) - 1] != '\n') printf("\n"); |
| 72 exit(EXIT_FAILURE); |
| 73 } |
| 74 |
| 75 static void die_codec(vpx_codec_ctx_t *ctx, const char *s) { |
| 76 const char *detail = vpx_codec_error_detail(ctx); |
| 77 |
| 78 printf("%s: %s\n", s, vpx_codec_error(ctx)); |
| 79 if (detail) printf(" %s\n", detail); |
| 80 exit(EXIT_FAILURE); |
| 81 } |
| 82 |
| 83 static int read_frame(FILE *f, vpx_image_t *img) { |
| 84 size_t nbytes, to_read; |
| 85 int res = 1; |
| 86 |
| 87 to_read = img->w * img->h * 3 / 2; |
| 88 nbytes = fread(img->planes[0], 1, to_read, f); |
| 89 if (nbytes != to_read) { |
| 90 res = 0; |
| 91 if (nbytes > 0) |
| 92 printf("Warning: Read partial frame. Check your width & height!\n"); |
| 93 } |
| 94 return res; |
| 95 } |
| 96 |
| 97 static int read_dummy_frame(vpx_image_t *img) { |
| 98 size_t to_read; |
| 99 |
| 100 to_read = img->w * img->h * 3 / 2; |
| 101 memset(img->planes[0], 129, to_read); |
| 102 return 1; |
| 103 } |
| 104 |
| 105 static void write_ivf_file_header(FILE *outfile, const vpx_codec_enc_cfg_t *cfg, |
| 106 int frame_cnt) { |
| 107 char header[32]; |
| 108 |
| 109 if (cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) return; |
| 110 header[0] = 'D'; |
| 111 header[1] = 'K'; |
| 112 header[2] = 'I'; |
| 113 header[3] = 'F'; |
| 114 mem_put_le16(header + 4, 0); /* version */ |
| 115 mem_put_le16(header + 6, 32); /* headersize */ |
| 116 mem_put_le32(header + 8, fourcc); /* headersize */ |
| 117 mem_put_le16(header + 12, cfg->g_w); /* width */ |
| 118 mem_put_le16(header + 14, cfg->g_h); /* height */ |
| 119 mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */ |
| 120 mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */ |
| 121 mem_put_le32(header + 24, frame_cnt); /* length */ |
| 122 mem_put_le32(header + 28, 0); /* unused */ |
| 123 |
| 124 (void)fwrite(header, 1, 32, outfile); |
| 125 } |
| 126 |
| 127 static void write_ivf_frame_header(FILE *outfile, |
| 128 const vpx_codec_cx_pkt_t *pkt) { |
| 129 char header[12]; |
| 130 vpx_codec_pts_t pts; |
| 131 |
| 132 if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return; |
| 133 |
| 134 pts = pkt->data.frame.pts; |
| 135 mem_put_le32(header, pkt->data.frame.sz); |
| 136 mem_put_le32(header + 4, pts & 0xFFFFFFFF); |
| 137 mem_put_le32(header + 8, pts >> 32); |
| 138 |
| 139 (void)fwrite(header, 1, 12, outfile); |
| 140 } |
| 141 |
| 142 static void check_parameters() { |
| 143 if (number_spatial_layers > 5) die("Cannot support more than 5 layers"); |
| 144 } |
| 145 |
| 146 static void parse_command_line(int argc, char **argv, |
| 147 vpx_codec_enc_cfg_t *cfg) { |
| 148 unsigned int width = 1920; |
| 149 unsigned int height = 1080; |
| 150 unsigned int timebase_num = 1; |
| 151 unsigned int timebase_den = 60; |
| 152 unsigned int bitrate = 1000; |
| 153 int c; |
| 154 vpx_codec_err_t res; |
| 155 |
| 156 opterr = 0; |
| 157 while ((c = getopt(argc, argv, "f:w:h:n:d:b:s:l:p:")) != -1) switch (c) { |
| 158 case 'f': |
| 159 number_frames_to_code = atoi(optarg); |
| 160 break; |
| 161 case 'w': |
| 162 width = atoi(optarg); |
| 163 break; |
| 164 case 'h': |
| 165 height = atoi(optarg); |
| 166 break; |
| 167 case 'n': |
| 168 timebase_num = atoi(optarg); |
| 169 break; |
| 170 case 'd': |
| 171 timebase_den = atoi(optarg); |
| 172 break; |
| 173 case 'b': |
| 174 bitrate = atoi(optarg); |
| 175 break; |
| 176 case 's': |
| 177 number_frames_to_skip = atoi(optarg); |
| 178 break; |
| 179 case 'l': |
| 180 number_spatial_layers = atoi(optarg); |
| 181 break; |
| 182 case 'p': |
| 183 key_period = atoi(optarg); |
| 184 break; |
| 185 case '?': |
| 186 usage(argv[0]); |
| 187 } |
| 188 |
| 189 // Parse required parameters |
| 190 if (argc - optind != 2) { |
| 191 usage(argv[0]); |
| 192 } |
| 193 |
| 194 input_filename = argv[optind]; |
| 195 output_filename = argv[optind + 1]; |
| 196 |
| 197 if (width < 16 || width % 2 || height < 16 || height % 2) |
| 198 die("Invalid resolution: %d x %d", width, height); |
| 199 |
| 200 /* Populate encoder configuration */ |
| 201 res = vpx_codec_enc_config_default(interface, cfg, 0); |
| 202 if (res) { |
| 203 die("Failed to get config: %s\n", vpx_codec_err_to_string(res)); |
| 204 } |
| 205 printf( |
| 206 "Codec %s\nframes: %d, skip: %d, layers: %d\n" |
| 207 "width %d, height: %d, \n" |
| 208 "num: %d, den: %d, bitrate: %d, \n" |
| 209 "key period: %d \n", |
| 210 vpx_codec_iface_name(interface), number_frames_to_code, |
| 211 number_frames_to_skip, number_spatial_layers, width, height, timebase_num, |
| 212 timebase_den, bitrate, key_period); |
| 213 |
| 214 // Do minimal check at the application level. Encoder parameters will be |
| 215 // checked internally |
| 216 check_parameters(); |
| 217 |
| 218 cfg->rc_target_bitrate = bitrate; |
| 219 cfg->g_w = width; |
| 220 cfg->g_h = height; |
| 221 cfg->g_timebase.num = timebase_num; |
| 222 cfg->g_timebase.den = timebase_den; |
| 223 cfg->ss_number_layers = number_spatial_layers; |
| 224 } |
| 225 |
| 226 static void set_default_configuration(vpx_codec_enc_cfg_t *cfg) { |
| 227 /* Real time parameters */ |
| 228 cfg->rc_dropframe_thresh = 0; |
| 229 cfg->rc_end_usage = VPX_CBR; |
| 230 cfg->rc_resize_allowed = 0; |
| 231 cfg->rc_min_quantizer = 33; |
| 232 cfg->rc_max_quantizer = 33; |
| 233 cfg->rc_undershoot_pct = 100; |
| 234 cfg->rc_overshoot_pct = 15; |
| 235 cfg->rc_buf_initial_sz = 500; |
| 236 cfg->rc_buf_optimal_sz = 600; |
| 237 cfg->rc_buf_sz = 1000; |
| 238 |
| 239 /* Enable error resilient mode */ |
| 240 cfg->g_error_resilient = 1; |
| 241 cfg->g_lag_in_frames = 0; |
| 242 |
| 243 /* Disable automatic keyframe placement */ |
| 244 cfg->kf_mode = VPX_KF_DISABLED; |
| 245 cfg->kf_min_dist = cfg->kf_max_dist = 3000; |
| 246 } |
| 247 |
| 248 static void initialize_codec(vpx_codec_ctx_t *codec, vpx_codec_enc_cfg_t *cfg) { |
| 249 int max_intra_size_pct; |
| 250 |
| 251 /* Initialize codec */ |
| 252 if (vpx_codec_enc_init(codec, interface, cfg, VPX_CODEC_USE_PSNR)) |
| 253 die_codec(codec, "Failed to initialize encoder"); |
| 254 |
| 255 vpx_codec_control(codec, VP9E_SET_SVC, 1); |
| 256 /* Cap CPU & first I-frame size */ |
| 257 vpx_codec_control(codec, VP8E_SET_CPUUSED, 1); |
| 258 vpx_codec_control(codec, VP8E_SET_STATIC_THRESHOLD, 1); |
| 259 vpx_codec_control(codec, VP8E_SET_NOISE_SENSITIVITY, 1); |
| 260 vpx_codec_control(codec, VP8E_SET_TOKEN_PARTITIONS, 1); |
| 261 |
| 262 max_intra_size_pct = |
| 263 (int)(((double)cfg->rc_buf_optimal_sz * 0.5) * |
| 264 ((double)cfg->g_timebase.den / cfg->g_timebase.num) / 10.0); |
| 265 /* printf ("max_intra_size_pct=%d\n", max_intra_size_pct); */ |
| 266 |
| 267 vpx_codec_control(codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct); |
| 268 } |
| 269 |
| 270 static int calculate_layer(int frame_cnt, int number_spatial_layers) { |
| 271 if (frame_cnt == 0) |
| 272 return 0; |
| 273 else |
| 274 return (frame_cnt + number_spatial_layers - 1) % number_spatial_layers; |
| 275 } |
| 276 |
| 277 static void switch_to_layer(int layer, unsigned int initial_width, |
| 278 unsigned int initial_height, |
| 279 vpx_codec_ctx_t *codec) { |
| 280 // Set layer size |
| 281 int scaling_factor_num[MAX_LAYERS] = {2, 1, 4, 2, 1}; |
| 282 int scaling_factor_den[MAX_LAYERS] = {9, 3, 9, 3, 1}; |
| 283 |
| 284 int quantizer[MAX_LAYERS] = {60, 53, 39, 33, 27}; |
| 285 |
| 286 unsigned int current_width; |
| 287 unsigned int current_height; |
| 288 |
| 289 current_width = initial_width * |
| 290 scaling_factor_num[layer + 5 - number_spatial_layers] / |
| 291 scaling_factor_den[layer + 5 - number_spatial_layers]; |
| 292 current_height = initial_height * |
| 293 scaling_factor_num[layer + 5 - number_spatial_layers] / |
| 294 scaling_factor_den[layer + 5 - number_spatial_layers]; |
| 295 |
| 296 current_width += current_width % 2; |
| 297 current_height += current_height % 2; |
| 298 |
| 299 vpx_codec_control(codec, VP9E_SET_WIDTH, ¤t_width); |
| 300 vpx_codec_control(codec, VP9E_SET_HEIGHT, ¤t_height); |
| 301 |
| 302 // Set layer context |
| 303 vpx_codec_control(codec, VP9E_SET_LAYER, &layer); |
| 304 vpx_codec_control(codec, VP9E_SET_MAX_Q, |
| 305 quantizer[layer + 5 - number_spatial_layers]); |
| 306 vpx_codec_control(codec, VP9E_SET_MIN_Q, |
| 307 quantizer[layer + 5 - number_spatial_layers]); |
| 308 } |
| 309 |
| 310 static int get_flag(int is_I_frame_in_layer, int layer, ENCODING_MODE mode) { |
| 311 // First layer |
| 312 switch (mode) { |
| 313 case INTER_LAYER_PREDICTION_I: |
| 314 if (is_I_frame_in_layer && layer == 0) return VPX_EFLAG_FORCE_KF; |
| 315 if (layer == 0) |
| 316 return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | |
| 317 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; |
| 318 else if (is_I_frame_in_layer) |
| 319 return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | |
| 320 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST; |
| 321 else |
| 322 return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | |
| 323 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; |
| 324 break; |
| 325 |
| 326 case INTER_LAYER_PREDICTION_IP: |
| 327 if (is_I_frame_in_layer && layer == 0) return VPX_EFLAG_FORCE_KF; |
| 328 if (layer == 0) |
| 329 return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | |
| 330 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; |
| 331 else if (is_I_frame_in_layer) |
| 332 return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | |
| 333 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST; |
| 334 else |
| 335 return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; |
| 336 break; |
| 337 |
| 338 case USE_GOLDEN_FRAME: |
| 339 if (is_I_frame_in_layer && layer == 0) return VPX_EFLAG_FORCE_KF; |
| 340 if (2 * number_spatial_layers - NUM_BUFFERS <= layer) { |
| 341 if (layer == 0) |
| 342 return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | |
| 343 VP8_EFLAG_NO_REF_ARF; |
| 344 else if (is_I_frame_in_layer) |
| 345 return VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | |
| 346 VP8_EFLAG_NO_REF_LAST; |
| 347 else |
| 348 return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; |
| 349 } else { |
| 350 if (layer == 0) |
| 351 return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | |
| 352 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; |
| 353 else if (is_I_frame_in_layer) |
| 354 return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | |
| 355 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST; |
| 356 else |
| 357 return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | |
| 358 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; |
| 359 } |
| 360 break; |
| 361 default: |
| 362 return VPX_EFLAG_FORCE_KF; |
| 363 } |
| 364 } |
| 365 |
| 366 int main(int argc, char **argv) { |
| 367 FILE *infile, *outfile[MAX_LAYERS]; |
| 368 vpx_codec_ctx_t codec; |
| 369 vpx_codec_enc_cfg_t cfg; |
| 370 int frame_cnt = 0; |
| 371 vpx_image_t raw; |
| 372 int frame_avail = 1; |
| 373 int got_data = 0; |
| 374 int i; |
| 375 int frames_in_layer[MAX_LAYERS] = {0}; |
| 376 clock_t before; |
| 377 clock_t after; |
| 378 int pts = 0; /* PTS starts at 0 */ |
| 379 int frame_duration = 1; /* 1 timebase tick per frame */ |
| 380 |
| 381 parse_command_line(argc, argv, &cfg); |
| 382 |
| 383 // Allocate image buffer |
| 384 if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, cfg.g_w, cfg.g_h, 32)) |
| 385 die("Failed to allocate image", cfg.g_w, cfg.g_h); |
| 386 |
| 387 set_default_configuration(&cfg); |
| 388 |
| 389 /* Open input file */ |
| 390 if (!(infile = fopen(input_filename, "rb"))) |
| 391 die("Failed to open %s for reading", argv[1]); |
| 392 |
| 393 /* Open output file */ |
| 394 for (i = 0; i < number_spatial_layers; i++) { |
| 395 char file_name[512]; |
| 396 snprintf(file_name, sizeof(file_name), "%s_%d.ivf", output_filename, i); |
| 397 if (!(outfile[i] = fopen(file_name, "wb"))) |
| 398 die("Failed to open %s for writing", file_name); |
| 399 write_ivf_file_header(outfile[i], &cfg, 0); |
| 400 } |
| 401 |
| 402 initialize_codec(&codec, &cfg); |
| 403 |
| 404 // skip initial frames |
| 405 for (i = 0; i < number_frames_to_skip; i++) { |
| 406 read_frame(infile, &raw); |
| 407 } |
| 408 |
| 409 before = clock(); |
| 410 // Encoding frames |
| 411 while ((frame_avail || got_data) && |
| 412 frame_cnt <= number_frames_to_code * number_spatial_layers) { |
| 413 int flags = 0; |
| 414 vpx_codec_iter_t iter = NULL; |
| 415 const vpx_codec_cx_pkt_t *pkt; |
| 416 |
| 417 int layer = calculate_layer(frame_cnt, number_spatial_layers); |
| 418 int is_I_frame_in_layer = |
| 419 (((frame_cnt - 1) / number_spatial_layers % key_period) == 0); |
| 420 int is_dummy = (frame_cnt == 0); |
| 421 |
| 422 if (is_dummy) { // Dummy frame |
| 423 flags = VPX_EFLAG_FORCE_KF; |
| 424 frame_avail = read_dummy_frame(&raw); |
| 425 |
| 426 } else { // Regular frame |
| 427 // Read a new frame only at the base layer |
| 428 if (layer == 0) frame_avail = read_frame(infile, &raw); |
| 429 switch_to_layer(layer, cfg.g_w, cfg.g_h, &codec); |
| 430 flags = get_flag(is_I_frame_in_layer, layer, INTER_LAYER_PREDICTION_I); |
| 431 } |
| 432 |
| 433 // Actual Encoding |
| 434 if (vpx_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags, |
| 435 VPX_DL_REALTIME)) |
| 436 die_codec(&codec, "Failed to encode frame"); |
| 437 |
| 438 got_data = 0; |
| 439 // Process data / Get PSNR statistics |
| 440 while ((pkt = vpx_codec_get_cx_data(&codec, &iter))) { |
| 441 got_data = 1; |
| 442 switch (pkt->kind) { |
| 443 case VPX_CODEC_CX_FRAME_PKT: |
| 444 for (i = layer; i < number_spatial_layers; i++) { |
| 445 write_ivf_frame_header(outfile[i], pkt); |
| 446 (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, |
| 447 outfile[i]); |
| 448 frames_in_layer[i]++; |
| 449 } |
| 450 break; |
| 451 case VPX_CODEC_PSNR_PKT: |
| 452 if (frame_cnt != 0) |
| 453 printf( |
| 454 "Processed Frame %d, layer %d, PSNR(Total/Y/U/V): " |
| 455 "%2.3f %2.3f %2.3f %2.3f \n", |
| 456 (frame_cnt - 1) / number_spatial_layers + 1, layer, |
| 457 pkt->data.psnr.psnr[0], pkt->data.psnr.psnr[1], |
| 458 pkt->data.psnr.psnr[2], pkt->data.psnr.psnr[3]); |
| 459 break; |
| 460 default: |
| 461 break; |
| 462 } |
| 463 } |
| 464 frame_cnt++; |
| 465 // TODO(ivan): Modify ts later if(!layer) |
| 466 pts += frame_duration; |
| 467 } |
| 468 // end while |
| 469 |
| 470 after = clock(); |
| 471 printf("Processed %d frames in different resolutions in %ld ms.\n", |
| 472 frame_cnt - 1, (int)(after - before) / (CLOCKS_PER_SEC / 1000)); |
| 473 |
| 474 fclose(infile); |
| 475 |
| 476 if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); |
| 477 |
| 478 /* Try to rewrite the output file headers with the actual frame count */ |
| 479 for (i = 0; i < number_spatial_layers; i++) { |
| 480 if (!fseek(outfile[i], 0, SEEK_SET)) { |
| 481 write_ivf_file_header(outfile[i], &cfg, frames_in_layer[i]); |
| 482 } |
| 483 fclose(outfile[i]); |
| 484 } |
| 485 |
| 486 return EXIT_SUCCESS; |
| 487 } |
OLD | NEW |