| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 658 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 669 | 669 |
| 670 if (width <= 0 || height <= 0) | 670 if (width <= 0 || height <= 0) |
| 671 vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, | 671 vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, |
| 672 "Referenced frame with invalid size"); | 672 "Referenced frame with invalid size"); |
| 673 | 673 |
| 674 apply_frame_size(cm, width, height); | 674 apply_frame_size(cm, width, height); |
| 675 setup_display_size(cm, rb); | 675 setup_display_size(cm, rb); |
| 676 } | 676 } |
| 677 | 677 |
| 678 static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile, | 678 static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile, |
| 679 vp9_reader *r) { | 679 int do_loopfilter_inline, vp9_reader *r) { |
| 680 const int num_threads = pbi->oxcf.max_threads; | 680 const int num_threads = pbi->max_threads; |
| 681 VP9_COMMON *const cm = &pbi->common; | 681 VP9_COMMON *const cm = &pbi->common; |
| 682 int mi_row, mi_col; | 682 int mi_row, mi_col; |
| 683 MACROBLOCKD *xd = &pbi->mb; | 683 MACROBLOCKD *xd = &pbi->mb; |
| 684 | 684 |
| 685 if (pbi->do_loopfilter_inline) { | 685 if (do_loopfilter_inline) { |
| 686 LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; | 686 LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; |
| 687 lf_data->frame_buffer = get_frame_new_buffer(cm); | 687 lf_data->frame_buffer = get_frame_new_buffer(cm); |
| 688 lf_data->cm = cm; | 688 lf_data->cm = cm; |
| 689 lf_data->xd = pbi->mb; | 689 lf_data->xd = pbi->mb; |
| 690 lf_data->stop = 0; | 690 lf_data->stop = 0; |
| 691 lf_data->y_only = 0; | 691 lf_data->y_only = 0; |
| 692 vp9_loop_filter_frame_init(cm, cm->lf.filter_level); | 692 vp9_loop_filter_frame_init(cm, cm->lf.filter_level); |
| 693 } | 693 } |
| 694 | 694 |
| 695 for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; | 695 for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; |
| 696 mi_row += MI_BLOCK_SIZE) { | 696 mi_row += MI_BLOCK_SIZE) { |
| 697 // For a SB there are 2 left contexts, each pertaining to a MB row within | 697 // For a SB there are 2 left contexts, each pertaining to a MB row within |
| 698 vp9_zero(xd->left_context); | 698 vp9_zero(xd->left_context); |
| 699 vp9_zero(xd->left_seg_context); | 699 vp9_zero(xd->left_seg_context); |
| 700 for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; | 700 for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; |
| 701 mi_col += MI_BLOCK_SIZE) { | 701 mi_col += MI_BLOCK_SIZE) { |
| 702 decode_partition(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64); | 702 decode_partition(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64); |
| 703 } | 703 } |
| 704 | 704 |
| 705 if (pbi->do_loopfilter_inline) { | 705 if (do_loopfilter_inline) { |
| 706 const int lf_start = mi_row - MI_BLOCK_SIZE; | 706 const int lf_start = mi_row - MI_BLOCK_SIZE; |
| 707 LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; | 707 LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; |
| 708 | 708 |
| 709 // delay the loopfilter by 1 macroblock row. | 709 // delay the loopfilter by 1 macroblock row. |
| 710 if (lf_start < 0) continue; | 710 if (lf_start < 0) continue; |
| 711 | 711 |
| 712 // decoding has completed: finish up the loop filter in this thread. | 712 // decoding has completed: finish up the loop filter in this thread. |
| 713 if (mi_row + MI_BLOCK_SIZE >= tile->mi_row_end) continue; | 713 if (mi_row + MI_BLOCK_SIZE >= tile->mi_row_end) continue; |
| 714 | 714 |
| 715 vp9_worker_sync(&pbi->lf_worker); | 715 vp9_worker_sync(&pbi->lf_worker); |
| 716 lf_data->start = lf_start; | 716 lf_data->start = lf_start; |
| 717 lf_data->stop = mi_row; | 717 lf_data->stop = mi_row; |
| 718 if (num_threads > 1) { | 718 if (num_threads > 1) { |
| 719 vp9_worker_launch(&pbi->lf_worker); | 719 vp9_worker_launch(&pbi->lf_worker); |
| 720 } else { | 720 } else { |
| 721 vp9_worker_execute(&pbi->lf_worker); | 721 vp9_worker_execute(&pbi->lf_worker); |
| 722 } | 722 } |
| 723 } | 723 } |
| 724 } | 724 } |
| 725 | 725 |
| 726 if (pbi->do_loopfilter_inline) { | 726 if (do_loopfilter_inline) { |
| 727 LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; | 727 LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; |
| 728 | 728 |
| 729 vp9_worker_sync(&pbi->lf_worker); | 729 vp9_worker_sync(&pbi->lf_worker); |
| 730 lf_data->start = lf_data->stop; | 730 lf_data->start = lf_data->stop; |
| 731 lf_data->stop = cm->mi_rows; | 731 lf_data->stop = cm->mi_rows; |
| 732 vp9_worker_execute(&pbi->lf_worker); | 732 vp9_worker_execute(&pbi->lf_worker); |
| 733 } | 733 } |
| 734 } | 734 } |
| 735 | 735 |
| 736 static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { | 736 static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { |
| 737 int min_log2_tile_cols, max_log2_tile_cols, max_ones; | 737 int min_log2_tile_cols, max_log2_tile_cols, max_ones; |
| 738 vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); | 738 vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); |
| 739 | 739 |
| 740 // columns | 740 // columns |
| 741 max_ones = max_log2_tile_cols - min_log2_tile_cols; | 741 max_ones = max_log2_tile_cols - min_log2_tile_cols; |
| 742 cm->log2_tile_cols = min_log2_tile_cols; | 742 cm->log2_tile_cols = min_log2_tile_cols; |
| 743 while (max_ones-- && vp9_rb_read_bit(rb)) | 743 while (max_ones-- && vp9_rb_read_bit(rb)) |
| 744 cm->log2_tile_cols++; | 744 cm->log2_tile_cols++; |
| 745 | 745 |
| 746 // rows | 746 // rows |
| 747 cm->log2_tile_rows = vp9_rb_read_bit(rb); | 747 cm->log2_tile_rows = vp9_rb_read_bit(rb); |
| 748 if (cm->log2_tile_rows) | 748 if (cm->log2_tile_rows) |
| 749 cm->log2_tile_rows += vp9_rb_read_bit(rb); | 749 cm->log2_tile_rows += vp9_rb_read_bit(rb); |
| 750 } | 750 } |
| 751 | 751 |
| 752 typedef struct TileBuffer { |
| 753 const uint8_t *data; |
| 754 size_t size; |
| 755 int col; // only used with multi-threaded decoding |
| 756 } TileBuffer; |
| 757 |
| 752 // Reads the next tile returning its size and adjusting '*data' accordingly | 758 // Reads the next tile returning its size and adjusting '*data' accordingly |
| 753 // based on 'is_last'. | 759 // based on 'is_last'. |
| 754 static size_t get_tile(const uint8_t *const data_end, | 760 static void get_tile_buffer(const uint8_t *const data_end, |
| 755 int is_last, | 761 int is_last, |
| 756 struct vpx_internal_error_info *error_info, | 762 struct vpx_internal_error_info *error_info, |
| 757 const uint8_t **data, | 763 const uint8_t **data, |
| 758 vpx_decrypt_cb decrypt_cb, | 764 vpx_decrypt_cb decrypt_cb, void *decrypt_state, |
| 759 void *decrypt_state) { | 765 TileBuffer *buf) { |
| 760 size_t size; | 766 size_t size; |
| 761 | 767 |
| 762 if (!is_last) { | 768 if (!is_last) { |
| 763 if (!read_is_valid(*data, 4, data_end)) | 769 if (!read_is_valid(*data, 4, data_end)) |
| 764 vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, | 770 vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, |
| 765 "Truncated packet or corrupt tile length"); | 771 "Truncated packet or corrupt tile length"); |
| 766 | 772 |
| 767 if (decrypt_cb) { | 773 if (decrypt_cb) { |
| 768 uint8_t be_data[4]; | 774 uint8_t be_data[4]; |
| 769 decrypt_cb(decrypt_state, *data, be_data, 4); | 775 decrypt_cb(decrypt_state, *data, be_data, 4); |
| 770 size = mem_get_be32(be_data); | 776 size = mem_get_be32(be_data); |
| 771 } else { | 777 } else { |
| 772 size = mem_get_be32(*data); | 778 size = mem_get_be32(*data); |
| 773 } | 779 } |
| 774 *data += 4; | 780 *data += 4; |
| 775 | 781 |
| 776 if (size > (size_t)(data_end - *data)) | 782 if (size > (size_t)(data_end - *data)) |
| 777 vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, | 783 vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, |
| 778 "Truncated packet or corrupt tile size"); | 784 "Truncated packet or corrupt tile size"); |
| 779 } else { | 785 } else { |
| 780 size = data_end - *data; | 786 size = data_end - *data; |
| 781 } | 787 } |
| 782 return size; | 788 |
| 789 buf->data = *data; |
| 790 buf->size = size; |
| 791 |
| 792 *data += size; |
| 783 } | 793 } |
| 784 | 794 |
| 785 typedef struct TileBuffer { | 795 static void get_tile_buffers(VP9Decoder *pbi, |
| 786 const uint8_t *data; | 796 const uint8_t *data, const uint8_t *data_end, |
| 787 size_t size; | 797 int tile_cols, int tile_rows, |
| 788 int col; // only used with multi-threaded decoding | 798 TileBuffer (*tile_buffers)[1 << 6]) { |
| 789 } TileBuffer; | 799 int r, c; |
| 800 |
| 801 for (r = 0; r < tile_rows; ++r) { |
| 802 for (c = 0; c < tile_cols; ++c) { |
| 803 const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1); |
| 804 TileBuffer *const buf = &tile_buffers[r][c]; |
| 805 buf->col = c; |
| 806 get_tile_buffer(data_end, is_last, &pbi->common.error, &data, |
| 807 pbi->decrypt_cb, pbi->decrypt_state, buf); |
| 808 } |
| 809 } |
| 810 } |
| 790 | 811 |
| 791 static const uint8_t *decode_tiles(VP9Decoder *pbi, | 812 static const uint8_t *decode_tiles(VP9Decoder *pbi, |
| 792 const uint8_t *data, | 813 const uint8_t *data, |
| 793 const uint8_t *data_end) { | 814 const uint8_t *data_end, |
| 815 int do_loopfilter_inline) { |
| 794 VP9_COMMON *const cm = &pbi->common; | 816 VP9_COMMON *const cm = &pbi->common; |
| 795 const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); | 817 const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); |
| 796 const int tile_cols = 1 << cm->log2_tile_cols; | 818 const int tile_cols = 1 << cm->log2_tile_cols; |
| 797 const int tile_rows = 1 << cm->log2_tile_rows; | 819 const int tile_rows = 1 << cm->log2_tile_rows; |
| 798 TileBuffer tile_buffers[4][1 << 6]; | 820 TileBuffer tile_buffers[4][1 << 6]; |
| 799 int tile_row, tile_col; | 821 int tile_row, tile_col; |
| 800 const uint8_t *end = NULL; | 822 const uint8_t *end = NULL; |
| 801 vp9_reader r; | 823 vp9_reader r; |
| 802 | 824 |
| 803 assert(tile_rows <= 4); | 825 assert(tile_rows <= 4); |
| 804 assert(tile_cols <= (1 << 6)); | 826 assert(tile_cols <= (1 << 6)); |
| 805 | 827 |
| 806 // Note: this memset assumes above_context[0], [1] and [2] | 828 // Note: this memset assumes above_context[0], [1] and [2] |
| 807 // are allocated as part of the same buffer. | 829 // are allocated as part of the same buffer. |
| 808 vpx_memset(cm->above_context, 0, | 830 vpx_memset(cm->above_context, 0, |
| 809 sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); | 831 sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); |
| 810 | 832 |
| 811 vpx_memset(cm->above_seg_context, 0, | 833 vpx_memset(cm->above_seg_context, 0, |
| 812 sizeof(*cm->above_seg_context) * aligned_cols); | 834 sizeof(*cm->above_seg_context) * aligned_cols); |
| 813 | 835 |
| 814 // Load tile data into tile_buffers | 836 get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); |
| 815 for (tile_row = 0; tile_row < tile_rows; ++tile_row) { | |
| 816 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { | |
| 817 const int last_tile = tile_row == tile_rows - 1 && | |
| 818 tile_col == tile_cols - 1; | |
| 819 const size_t size = get_tile(data_end, last_tile, &cm->error, &data, | |
| 820 pbi->decrypt_cb, pbi->decrypt_state); | |
| 821 TileBuffer *const buf = &tile_buffers[tile_row][tile_col]; | |
| 822 buf->data = data; | |
| 823 buf->size = size; | |
| 824 data += size; | |
| 825 } | |
| 826 } | |
| 827 | 837 |
| 828 // Decode tiles using data from tile_buffers | 838 // Decode tiles using data from tile_buffers |
| 829 for (tile_row = 0; tile_row < tile_rows; ++tile_row) { | 839 for (tile_row = 0; tile_row < tile_rows; ++tile_row) { |
| 830 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { | 840 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { |
| 831 const int col = pbi->oxcf.inv_tile_order ? tile_cols - tile_col - 1 | 841 const int col = pbi->inv_tile_order ? tile_cols - tile_col - 1 : tile_col; |
| 832 : tile_col; | |
| 833 const int last_tile = tile_row == tile_rows - 1 && | 842 const int last_tile = tile_row == tile_rows - 1 && |
| 834 col == tile_cols - 1; | 843 col == tile_cols - 1; |
| 835 const TileBuffer *const buf = &tile_buffers[tile_row][col]; | 844 const TileBuffer *const buf = &tile_buffers[tile_row][col]; |
| 836 TileInfo tile; | 845 TileInfo tile; |
| 837 | 846 |
| 838 vp9_tile_init(&tile, cm, tile_row, col); | 847 vp9_tile_init(&tile, cm, tile_row, col); |
| 839 setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r, | 848 setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r, |
| 840 pbi->decrypt_cb, pbi->decrypt_state); | 849 pbi->decrypt_cb, pbi->decrypt_state); |
| 841 decode_tile(pbi, &tile, &r); | 850 decode_tile(pbi, &tile, do_loopfilter_inline, &r); |
| 842 | 851 |
| 843 if (last_tile) | 852 if (last_tile) |
| 844 end = vp9_reader_find_end(&r); | 853 end = vp9_reader_find_end(&r); |
| 845 } | 854 } |
| 846 } | 855 } |
| 847 | 856 |
| 848 return end; | 857 return end; |
| 849 } | 858 } |
| 850 | 859 |
| 851 static int tile_worker_hook(void *arg1, void *arg2) { | 860 static int tile_worker_hook(void *arg1, void *arg2) { |
| (...skipping 28 matching lines...) Expand all Loading... |
| 880 } | 889 } |
| 881 | 890 |
| 882 static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, | 891 static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, |
| 883 const uint8_t *data, | 892 const uint8_t *data, |
| 884 const uint8_t *data_end) { | 893 const uint8_t *data_end) { |
| 885 VP9_COMMON *const cm = &pbi->common; | 894 VP9_COMMON *const cm = &pbi->common; |
| 886 const uint8_t *bit_reader_end = NULL; | 895 const uint8_t *bit_reader_end = NULL; |
| 887 const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); | 896 const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); |
| 888 const int tile_cols = 1 << cm->log2_tile_cols; | 897 const int tile_cols = 1 << cm->log2_tile_cols; |
| 889 const int tile_rows = 1 << cm->log2_tile_rows; | 898 const int tile_rows = 1 << cm->log2_tile_rows; |
| 890 const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols); | 899 const int num_workers = MIN(pbi->max_threads & ~1, tile_cols); |
| 891 TileBuffer tile_buffers[1 << 6]; | 900 TileBuffer tile_buffers[1][1 << 6]; |
| 892 int n; | 901 int n; |
| 893 int final_worker = -1; | 902 int final_worker = -1; |
| 894 | 903 |
| 895 assert(tile_cols <= (1 << 6)); | 904 assert(tile_cols <= (1 << 6)); |
| 896 assert(tile_rows == 1); | 905 assert(tile_rows == 1); |
| 897 (void)tile_rows; | 906 (void)tile_rows; |
| 898 | 907 |
| 899 // TODO(jzern): See if we can remove the restriction of passing in max | 908 // TODO(jzern): See if we can remove the restriction of passing in max |
| 900 // threads to the decoder. | 909 // threads to the decoder. |
| 901 if (pbi->num_tile_workers == 0) { | 910 if (pbi->num_tile_workers == 0) { |
| 902 const int num_threads = pbi->oxcf.max_threads & ~1; | 911 const int num_threads = pbi->max_threads & ~1; |
| 903 int i; | 912 int i; |
| 904 // TODO(jzern): Allocate one less worker, as in the current code we only | 913 // TODO(jzern): Allocate one less worker, as in the current code we only |
| 905 // use num_threads - 1 workers. | 914 // use num_threads - 1 workers. |
| 906 CHECK_MEM_ERROR(cm, pbi->tile_workers, | 915 CHECK_MEM_ERROR(cm, pbi->tile_workers, |
| 907 vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); | 916 vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); |
| 908 for (i = 0; i < num_threads; ++i) { | 917 for (i = 0; i < num_threads; ++i) { |
| 909 VP9Worker *const worker = &pbi->tile_workers[i]; | 918 VP9Worker *const worker = &pbi->tile_workers[i]; |
| 910 ++pbi->num_tile_workers; | 919 ++pbi->num_tile_workers; |
| 911 | 920 |
| 912 vp9_worker_init(worker); | 921 vp9_worker_init(worker); |
| (...skipping 13 matching lines...) Expand all Loading... |
| 926 } | 935 } |
| 927 | 936 |
| 928 // Note: this memset assumes above_context[0], [1] and [2] | 937 // Note: this memset assumes above_context[0], [1] and [2] |
| 929 // are allocated as part of the same buffer. | 938 // are allocated as part of the same buffer. |
| 930 vpx_memset(cm->above_context, 0, | 939 vpx_memset(cm->above_context, 0, |
| 931 sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); | 940 sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); |
| 932 vpx_memset(cm->above_seg_context, 0, | 941 vpx_memset(cm->above_seg_context, 0, |
| 933 sizeof(*cm->above_seg_context) * aligned_mi_cols); | 942 sizeof(*cm->above_seg_context) * aligned_mi_cols); |
| 934 | 943 |
| 935 // Load tile data into tile_buffers | 944 // Load tile data into tile_buffers |
| 936 for (n = 0; n < tile_cols; ++n) { | 945 get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); |
| 937 const size_t size = | |
| 938 get_tile(data_end, n == tile_cols - 1, &cm->error, &data, | |
| 939 pbi->decrypt_cb, pbi->decrypt_state); | |
| 940 TileBuffer *const buf = &tile_buffers[n]; | |
| 941 buf->data = data; | |
| 942 buf->size = size; | |
| 943 buf->col = n; | |
| 944 data += size; | |
| 945 } | |
| 946 | 946 |
| 947 // Sort the buffers based on size in descending order. | 947 // Sort the buffers based on size in descending order. |
| 948 qsort(tile_buffers, tile_cols, sizeof(tile_buffers[0]), compare_tile_buffers); | 948 qsort(tile_buffers[0], tile_cols, sizeof(tile_buffers[0][0]), |
| 949 compare_tile_buffers); |
| 949 | 950 |
| 950 // Rearrange the tile buffers such that per-tile group the largest, and | 951 // Rearrange the tile buffers such that per-tile group the largest, and |
| 951 // presumably the most difficult, tile will be decoded in the main thread. | 952 // presumably the most difficult, tile will be decoded in the main thread. |
| 952 // This should help minimize the number of instances where the main thread is | 953 // This should help minimize the number of instances where the main thread is |
| 953 // waiting for a worker to complete. | 954 // waiting for a worker to complete. |
| 954 { | 955 { |
| 955 int group_start = 0; | 956 int group_start = 0; |
| 956 while (group_start < tile_cols) { | 957 while (group_start < tile_cols) { |
| 957 const TileBuffer largest = tile_buffers[group_start]; | 958 const TileBuffer largest = tile_buffers[0][group_start]; |
| 958 const int group_end = MIN(group_start + num_workers, tile_cols) - 1; | 959 const int group_end = MIN(group_start + num_workers, tile_cols) - 1; |
| 959 memmove(tile_buffers + group_start, tile_buffers + group_start + 1, | 960 memmove(tile_buffers[0] + group_start, tile_buffers[0] + group_start + 1, |
| 960 (group_end - group_start) * sizeof(tile_buffers[0])); | 961 (group_end - group_start) * sizeof(tile_buffers[0][0])); |
| 961 tile_buffers[group_end] = largest; | 962 tile_buffers[0][group_end] = largest; |
| 962 group_start = group_end + 1; | 963 group_start = group_end + 1; |
| 963 } | 964 } |
| 964 } | 965 } |
| 965 | 966 |
| 966 n = 0; | 967 n = 0; |
| 967 while (n < tile_cols) { | 968 while (n < tile_cols) { |
| 968 int i; | 969 int i; |
| 969 for (i = 0; i < num_workers && n < tile_cols; ++i) { | 970 for (i = 0; i < num_workers && n < tile_cols; ++i) { |
| 970 VP9Worker *const worker = &pbi->tile_workers[i]; | 971 VP9Worker *const worker = &pbi->tile_workers[i]; |
| 971 TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; | 972 TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; |
| 972 TileInfo *const tile = (TileInfo*)worker->data2; | 973 TileInfo *const tile = (TileInfo*)worker->data2; |
| 973 TileBuffer *const buf = &tile_buffers[n]; | 974 TileBuffer *const buf = &tile_buffers[0][n]; |
| 974 | 975 |
| 975 tile_data->cm = cm; | 976 tile_data->cm = cm; |
| 976 tile_data->xd = pbi->mb; | 977 tile_data->xd = pbi->mb; |
| 977 tile_data->xd.corrupted = 0; | 978 tile_data->xd.corrupted = 0; |
| 978 vp9_tile_init(tile, tile_data->cm, 0, buf->col); | 979 vp9_tile_init(tile, tile_data->cm, 0, buf->col); |
| 979 setup_token_decoder(buf->data, data_end, buf->size, &cm->error, | 980 setup_token_decoder(buf->data, data_end, buf->size, &cm->error, |
| 980 &tile_data->bit_reader, pbi->decrypt_cb, | 981 &tile_data->bit_reader, pbi->decrypt_cb, |
| 981 pbi->decrypt_state); | 982 pbi->decrypt_state); |
| 982 init_macroblockd(cm, &tile_data->xd); | 983 init_macroblockd(cm, &tile_data->xd); |
| 983 vp9_zero(tile_data->xd.dqcoeff); | 984 vp9_zero(tile_data->xd.dqcoeff); |
| (...skipping 287 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1271 assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv))); | 1272 assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv))); |
| 1272 } | 1273 } |
| 1273 #endif // NDEBUG | 1274 #endif // NDEBUG |
| 1274 | 1275 |
| 1275 static struct vp9_read_bit_buffer* init_read_bit_buffer( | 1276 static struct vp9_read_bit_buffer* init_read_bit_buffer( |
| 1276 VP9Decoder *pbi, | 1277 VP9Decoder *pbi, |
| 1277 struct vp9_read_bit_buffer *rb, | 1278 struct vp9_read_bit_buffer *rb, |
| 1278 const uint8_t *data, | 1279 const uint8_t *data, |
| 1279 const uint8_t *data_end, | 1280 const uint8_t *data_end, |
| 1280 uint8_t *clear_data /* buffer size MAX_VP9_HEADER_SIZE */) { | 1281 uint8_t *clear_data /* buffer size MAX_VP9_HEADER_SIZE */) { |
| 1282 vp9_zero(*rb); |
| 1281 rb->bit_offset = 0; | 1283 rb->bit_offset = 0; |
| 1282 rb->error_handler = error_handler; | 1284 rb->error_handler = error_handler; |
| 1283 rb->error_handler_data = &pbi->common; | 1285 rb->error_handler_data = &pbi->common; |
| 1284 if (pbi->decrypt_cb) { | 1286 if (pbi->decrypt_cb) { |
| 1285 const int n = (int)MIN(MAX_VP9_HEADER_SIZE, data_end - data); | 1287 const int n = (int)MIN(MAX_VP9_HEADER_SIZE, data_end - data); |
| 1286 pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n); | 1288 pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n); |
| 1287 rb->bit_buffer = clear_data; | 1289 rb->bit_buffer = clear_data; |
| 1288 rb->bit_buffer_end = clear_data + n; | 1290 rb->bit_buffer_end = clear_data + n; |
| 1289 } else { | 1291 } else { |
| 1290 rb->bit_buffer = data; | 1292 rb->bit_buffer = data; |
| 1291 rb->bit_buffer_end = data_end; | 1293 rb->bit_buffer_end = data_end; |
| 1292 } | 1294 } |
| 1293 return rb; | 1295 return rb; |
| 1294 } | 1296 } |
| 1295 | 1297 |
| 1296 int vp9_decode_frame(VP9Decoder *pbi, | 1298 int vp9_decode_frame(VP9Decoder *pbi, |
| 1297 const uint8_t *data, const uint8_t *data_end, | 1299 const uint8_t *data, const uint8_t *data_end, |
| 1298 const uint8_t **p_data_end) { | 1300 const uint8_t **p_data_end) { |
| 1299 VP9_COMMON *const cm = &pbi->common; | 1301 VP9_COMMON *const cm = &pbi->common; |
| 1300 MACROBLOCKD *const xd = &pbi->mb; | 1302 MACROBLOCKD *const xd = &pbi->mb; |
| 1301 struct vp9_read_bit_buffer rb = { 0 }; | 1303 struct vp9_read_bit_buffer rb; |
| 1302 uint8_t clear_data[MAX_VP9_HEADER_SIZE]; | 1304 uint8_t clear_data[MAX_VP9_HEADER_SIZE]; |
| 1303 const size_t first_partition_size = read_uncompressed_header(pbi, | 1305 const size_t first_partition_size = read_uncompressed_header(pbi, |
| 1304 init_read_bit_buffer(pbi, &rb, data, data_end, clear_data)); | 1306 init_read_bit_buffer(pbi, &rb, data, data_end, clear_data)); |
| 1305 const int keyframe = cm->frame_type == KEY_FRAME; | 1307 const int keyframe = cm->frame_type == KEY_FRAME; |
| 1306 const int tile_rows = 1 << cm->log2_tile_rows; | 1308 const int tile_rows = 1 << cm->log2_tile_rows; |
| 1307 const int tile_cols = 1 << cm->log2_tile_cols; | 1309 const int tile_cols = 1 << cm->log2_tile_cols; |
| 1308 YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); | 1310 YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); |
| 1311 const int do_loopfilter_inline = tile_rows == 1 && tile_cols == 1 && |
| 1312 cm->lf.filter_level; |
| 1309 xd->cur_buf = new_fb; | 1313 xd->cur_buf = new_fb; |
| 1310 | 1314 |
| 1311 if (!first_partition_size) { | 1315 if (!first_partition_size) { |
| 1312 // showing a frame directly | 1316 // showing a frame directly |
| 1313 *p_data_end = data + 1; | 1317 *p_data_end = data + 1; |
| 1314 return 0; | 1318 return 0; |
| 1315 } | 1319 } |
| 1316 | 1320 |
| 1317 if (!pbi->decoded_key_frame && !keyframe) | 1321 if (!pbi->decoded_key_frame && !keyframe) |
| 1318 return -1; | 1322 return -1; |
| 1319 | 1323 |
| 1320 data += vp9_rb_bytes_read(&rb); | 1324 data += vp9_rb_bytes_read(&rb); |
| 1321 if (!read_is_valid(data, first_partition_size, data_end)) | 1325 if (!read_is_valid(data, first_partition_size, data_end)) |
| 1322 vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, | 1326 vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, |
| 1323 "Truncated packet or corrupt header length"); | 1327 "Truncated packet or corrupt header length"); |
| 1324 | 1328 |
| 1325 pbi->do_loopfilter_inline = | |
| 1326 (cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level; | |
| 1327 if (pbi->do_loopfilter_inline && pbi->lf_worker.data1 == NULL) { | |
| 1328 CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, | |
| 1329 vpx_memalign(32, sizeof(LFWorkerData))); | |
| 1330 pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; | |
| 1331 if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) { | |
| 1332 vpx_internal_error(&cm->error, VPX_CODEC_ERROR, | |
| 1333 "Loop filter thread creation failed"); | |
| 1334 } | |
| 1335 } | |
| 1336 | |
| 1337 init_macroblockd(cm, &pbi->mb); | 1329 init_macroblockd(cm, &pbi->mb); |
| 1338 | 1330 |
| 1339 if (cm->coding_use_prev_mi) | 1331 if (cm->coding_use_prev_mi) |
| 1340 set_prev_mi(cm); | 1332 set_prev_mi(cm); |
| 1341 else | 1333 else |
| 1342 cm->prev_mi = NULL; | 1334 cm->prev_mi = NULL; |
| 1343 | 1335 |
| 1344 setup_plane_dequants(cm, xd, cm->base_qindex); | 1336 setup_plane_dequants(cm, xd, cm->base_qindex); |
| 1345 vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); | 1337 vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); |
| 1346 | 1338 |
| 1347 cm->fc = cm->frame_contexts[cm->frame_context_idx]; | 1339 cm->fc = cm->frame_contexts[cm->frame_context_idx]; |
| 1348 vp9_zero(cm->counts); | 1340 vp9_zero(cm->counts); |
| 1349 vp9_zero(xd->dqcoeff); | 1341 vp9_zero(xd->dqcoeff); |
| 1350 | 1342 |
| 1351 xd->corrupted = 0; | 1343 xd->corrupted = 0; |
| 1352 new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); | 1344 new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); |
| 1353 | 1345 |
| 1354 // TODO(jzern): remove frame_parallel_decoding_mode restriction for | 1346 // TODO(jzern): remove frame_parallel_decoding_mode restriction for |
| 1355 // single-frame tile decoding. | 1347 // single-frame tile decoding. |
| 1356 if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 && | 1348 if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1 && |
| 1357 cm->frame_parallel_decoding_mode) { | 1349 cm->frame_parallel_decoding_mode) { |
| 1358 *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); | 1350 *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); |
| 1351 // If multiple threads are used to decode tiles, then we use those threads |
| 1352 // to do parallel loopfiltering. |
| 1353 vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0); |
| 1359 } else { | 1354 } else { |
| 1360 *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); | 1355 if (do_loopfilter_inline && pbi->lf_worker.data1 == NULL) { |
| 1356 CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, |
| 1357 vpx_memalign(32, sizeof(LFWorkerData))); |
| 1358 pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; |
| 1359 if (pbi->max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) { |
| 1360 vpx_internal_error(&cm->error, VPX_CODEC_ERROR, |
| 1361 "Loop filter thread creation failed"); |
| 1362 } |
| 1363 } |
| 1364 *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end, |
| 1365 do_loopfilter_inline); |
| 1366 if (!do_loopfilter_inline) |
| 1367 vp9_loop_filter_frame(new_fb, cm, &pbi->mb, cm->lf.filter_level, 0, 0); |
| 1361 } | 1368 } |
| 1362 | 1369 |
| 1363 new_fb->corrupted |= xd->corrupted; | 1370 new_fb->corrupted |= xd->corrupted; |
| 1364 | 1371 |
| 1365 if (!pbi->decoded_key_frame) { | 1372 if (!pbi->decoded_key_frame) { |
| 1366 if (keyframe && !new_fb->corrupted) | 1373 if (keyframe && !new_fb->corrupted) |
| 1367 pbi->decoded_key_frame = 1; | 1374 pbi->decoded_key_frame = 1; |
| 1368 else | 1375 else |
| 1369 vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, | 1376 vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, |
| 1370 "A stream must start with a complete key frame"); | 1377 "A stream must start with a complete key frame"); |
| 1371 } | 1378 } |
| 1372 | 1379 |
| 1373 if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode && | 1380 if (!new_fb->corrupted) { |
| 1374 !new_fb->corrupted) { | 1381 if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { |
| 1375 vp9_adapt_coef_probs(cm); | 1382 vp9_adapt_coef_probs(cm); |
| 1376 | 1383 |
| 1377 if (!frame_is_intra_only(cm)) { | 1384 if (!frame_is_intra_only(cm)) { |
| 1378 vp9_adapt_mode_probs(cm); | 1385 vp9_adapt_mode_probs(cm); |
| 1379 vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); | 1386 vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); |
| 1387 } |
| 1388 } else { |
| 1389 debug_check_frame_counts(cm); |
| 1380 } | 1390 } |
| 1381 } else { | |
| 1382 debug_check_frame_counts(cm); | |
| 1383 } | 1391 } |
| 1384 | 1392 |
| 1385 if (cm->refresh_frame_context) | 1393 if (cm->refresh_frame_context) |
| 1386 cm->frame_contexts[cm->frame_context_idx] = cm->fc; | 1394 cm->frame_contexts[cm->frame_context_idx] = cm->fc; |
| 1387 | 1395 |
| 1388 return 0; | 1396 return 0; |
| 1389 } | 1397 } |
| OLD | NEW |