OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 658 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
669 | 669 |
670 if (width <= 0 || height <= 0) | 670 if (width <= 0 || height <= 0) |
671 vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, | 671 vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, |
672 "Referenced frame with invalid size"); | 672 "Referenced frame with invalid size"); |
673 | 673 |
674 apply_frame_size(cm, width, height); | 674 apply_frame_size(cm, width, height); |
675 setup_display_size(cm, rb); | 675 setup_display_size(cm, rb); |
676 } | 676 } |
677 | 677 |
678 static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile, | 678 static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile, |
679 vp9_reader *r) { | 679 int do_loopfilter_inline, vp9_reader *r) { |
680 const int num_threads = pbi->oxcf.max_threads; | 680 const int num_threads = pbi->max_threads; |
681 VP9_COMMON *const cm = &pbi->common; | 681 VP9_COMMON *const cm = &pbi->common; |
682 int mi_row, mi_col; | 682 int mi_row, mi_col; |
683 MACROBLOCKD *xd = &pbi->mb; | 683 MACROBLOCKD *xd = &pbi->mb; |
684 | 684 |
685 if (pbi->do_loopfilter_inline) { | 685 if (do_loopfilter_inline) { |
686 LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; | 686 LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; |
687 lf_data->frame_buffer = get_frame_new_buffer(cm); | 687 lf_data->frame_buffer = get_frame_new_buffer(cm); |
688 lf_data->cm = cm; | 688 lf_data->cm = cm; |
689 lf_data->xd = pbi->mb; | 689 lf_data->xd = pbi->mb; |
690 lf_data->stop = 0; | 690 lf_data->stop = 0; |
691 lf_data->y_only = 0; | 691 lf_data->y_only = 0; |
692 vp9_loop_filter_frame_init(cm, cm->lf.filter_level); | 692 vp9_loop_filter_frame_init(cm, cm->lf.filter_level); |
693 } | 693 } |
694 | 694 |
695 for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; | 695 for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; |
696 mi_row += MI_BLOCK_SIZE) { | 696 mi_row += MI_BLOCK_SIZE) { |
697 // For a SB there are 2 left contexts, each pertaining to a MB row within | 697 // For a SB there are 2 left contexts, each pertaining to a MB row within |
698 vp9_zero(xd->left_context); | 698 vp9_zero(xd->left_context); |
699 vp9_zero(xd->left_seg_context); | 699 vp9_zero(xd->left_seg_context); |
700 for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; | 700 for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; |
701 mi_col += MI_BLOCK_SIZE) { | 701 mi_col += MI_BLOCK_SIZE) { |
702 decode_partition(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64); | 702 decode_partition(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64); |
703 } | 703 } |
704 | 704 |
705 if (pbi->do_loopfilter_inline) { | 705 if (do_loopfilter_inline) { |
706 const int lf_start = mi_row - MI_BLOCK_SIZE; | 706 const int lf_start = mi_row - MI_BLOCK_SIZE; |
707 LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; | 707 LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; |
708 | 708 |
709 // delay the loopfilter by 1 macroblock row. | 709 // delay the loopfilter by 1 macroblock row. |
710 if (lf_start < 0) continue; | 710 if (lf_start < 0) continue; |
711 | 711 |
712 // decoding has completed: finish up the loop filter in this thread. | 712 // decoding has completed: finish up the loop filter in this thread. |
713 if (mi_row + MI_BLOCK_SIZE >= tile->mi_row_end) continue; | 713 if (mi_row + MI_BLOCK_SIZE >= tile->mi_row_end) continue; |
714 | 714 |
715 vp9_worker_sync(&pbi->lf_worker); | 715 vp9_worker_sync(&pbi->lf_worker); |
716 lf_data->start = lf_start; | 716 lf_data->start = lf_start; |
717 lf_data->stop = mi_row; | 717 lf_data->stop = mi_row; |
718 if (num_threads > 1) { | 718 if (num_threads > 1) { |
719 vp9_worker_launch(&pbi->lf_worker); | 719 vp9_worker_launch(&pbi->lf_worker); |
720 } else { | 720 } else { |
721 vp9_worker_execute(&pbi->lf_worker); | 721 vp9_worker_execute(&pbi->lf_worker); |
722 } | 722 } |
723 } | 723 } |
724 } | 724 } |
725 | 725 |
726 if (pbi->do_loopfilter_inline) { | 726 if (do_loopfilter_inline) { |
727 LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; | 727 LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; |
728 | 728 |
729 vp9_worker_sync(&pbi->lf_worker); | 729 vp9_worker_sync(&pbi->lf_worker); |
730 lf_data->start = lf_data->stop; | 730 lf_data->start = lf_data->stop; |
731 lf_data->stop = cm->mi_rows; | 731 lf_data->stop = cm->mi_rows; |
732 vp9_worker_execute(&pbi->lf_worker); | 732 vp9_worker_execute(&pbi->lf_worker); |
733 } | 733 } |
734 } | 734 } |
735 | 735 |
736 static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { | 736 static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { |
737 int min_log2_tile_cols, max_log2_tile_cols, max_ones; | 737 int min_log2_tile_cols, max_log2_tile_cols, max_ones; |
738 vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); | 738 vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); |
739 | 739 |
740 // columns | 740 // columns |
741 max_ones = max_log2_tile_cols - min_log2_tile_cols; | 741 max_ones = max_log2_tile_cols - min_log2_tile_cols; |
742 cm->log2_tile_cols = min_log2_tile_cols; | 742 cm->log2_tile_cols = min_log2_tile_cols; |
743 while (max_ones-- && vp9_rb_read_bit(rb)) | 743 while (max_ones-- && vp9_rb_read_bit(rb)) |
744 cm->log2_tile_cols++; | 744 cm->log2_tile_cols++; |
745 | 745 |
746 // rows | 746 // rows |
747 cm->log2_tile_rows = vp9_rb_read_bit(rb); | 747 cm->log2_tile_rows = vp9_rb_read_bit(rb); |
748 if (cm->log2_tile_rows) | 748 if (cm->log2_tile_rows) |
749 cm->log2_tile_rows += vp9_rb_read_bit(rb); | 749 cm->log2_tile_rows += vp9_rb_read_bit(rb); |
750 } | 750 } |
751 | 751 |
| 752 typedef struct TileBuffer { |
| 753 const uint8_t *data; |
| 754 size_t size; |
| 755 int col; // only used with multi-threaded decoding |
| 756 } TileBuffer; |
| 757 |
752 // Reads the next tile returning its size and adjusting '*data' accordingly | 758 // Reads the next tile returning its size and adjusting '*data' accordingly |
753 // based on 'is_last'. | 759 // based on 'is_last'. |
754 static size_t get_tile(const uint8_t *const data_end, | 760 static void get_tile_buffer(const uint8_t *const data_end, |
755 int is_last, | 761 int is_last, |
756 struct vpx_internal_error_info *error_info, | 762 struct vpx_internal_error_info *error_info, |
757 const uint8_t **data, | 763 const uint8_t **data, |
758 vpx_decrypt_cb decrypt_cb, | 764 vpx_decrypt_cb decrypt_cb, void *decrypt_state, |
759 void *decrypt_state) { | 765 TileBuffer *buf) { |
760 size_t size; | 766 size_t size; |
761 | 767 |
762 if (!is_last) { | 768 if (!is_last) { |
763 if (!read_is_valid(*data, 4, data_end)) | 769 if (!read_is_valid(*data, 4, data_end)) |
764 vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, | 770 vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, |
765 "Truncated packet or corrupt tile length"); | 771 "Truncated packet or corrupt tile length"); |
766 | 772 |
767 if (decrypt_cb) { | 773 if (decrypt_cb) { |
768 uint8_t be_data[4]; | 774 uint8_t be_data[4]; |
769 decrypt_cb(decrypt_state, *data, be_data, 4); | 775 decrypt_cb(decrypt_state, *data, be_data, 4); |
770 size = mem_get_be32(be_data); | 776 size = mem_get_be32(be_data); |
771 } else { | 777 } else { |
772 size = mem_get_be32(*data); | 778 size = mem_get_be32(*data); |
773 } | 779 } |
774 *data += 4; | 780 *data += 4; |
775 | 781 |
776 if (size > (size_t)(data_end - *data)) | 782 if (size > (size_t)(data_end - *data)) |
777 vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, | 783 vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, |
778 "Truncated packet or corrupt tile size"); | 784 "Truncated packet or corrupt tile size"); |
779 } else { | 785 } else { |
780 size = data_end - *data; | 786 size = data_end - *data; |
781 } | 787 } |
782 return size; | 788 |
| 789 buf->data = *data; |
| 790 buf->size = size; |
| 791 |
| 792 *data += size; |
783 } | 793 } |
784 | 794 |
785 typedef struct TileBuffer { | 795 static void get_tile_buffers(VP9Decoder *pbi, |
786 const uint8_t *data; | 796 const uint8_t *data, const uint8_t *data_end, |
787 size_t size; | 797 int tile_cols, int tile_rows, |
788 int col; // only used with multi-threaded decoding | 798 TileBuffer (*tile_buffers)[1 << 6]) { |
789 } TileBuffer; | 799 int r, c; |
| 800 |
| 801 for (r = 0; r < tile_rows; ++r) { |
| 802 for (c = 0; c < tile_cols; ++c) { |
| 803 const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1); |
| 804 TileBuffer *const buf = &tile_buffers[r][c]; |
| 805 buf->col = c; |
| 806 get_tile_buffer(data_end, is_last, &pbi->common.error, &data, |
| 807 pbi->decrypt_cb, pbi->decrypt_state, buf); |
| 808 } |
| 809 } |
| 810 } |
790 | 811 |
791 static const uint8_t *decode_tiles(VP9Decoder *pbi, | 812 static const uint8_t *decode_tiles(VP9Decoder *pbi, |
792 const uint8_t *data, | 813 const uint8_t *data, |
793 const uint8_t *data_end) { | 814 const uint8_t *data_end, |
| 815 int do_loopfilter_inline) { |
794 VP9_COMMON *const cm = &pbi->common; | 816 VP9_COMMON *const cm = &pbi->common; |
795 const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); | 817 const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); |
796 const int tile_cols = 1 << cm->log2_tile_cols; | 818 const int tile_cols = 1 << cm->log2_tile_cols; |
797 const int tile_rows = 1 << cm->log2_tile_rows; | 819 const int tile_rows = 1 << cm->log2_tile_rows; |
798 TileBuffer tile_buffers[4][1 << 6]; | 820 TileBuffer tile_buffers[4][1 << 6]; |
799 int tile_row, tile_col; | 821 int tile_row, tile_col; |
800 const uint8_t *end = NULL; | 822 const uint8_t *end = NULL; |
801 vp9_reader r; | 823 vp9_reader r; |
802 | 824 |
803 assert(tile_rows <= 4); | 825 assert(tile_rows <= 4); |
804 assert(tile_cols <= (1 << 6)); | 826 assert(tile_cols <= (1 << 6)); |
805 | 827 |
806 // Note: this memset assumes above_context[0], [1] and [2] | 828 // Note: this memset assumes above_context[0], [1] and [2] |
807 // are allocated as part of the same buffer. | 829 // are allocated as part of the same buffer. |
808 vpx_memset(cm->above_context, 0, | 830 vpx_memset(cm->above_context, 0, |
809 sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); | 831 sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); |
810 | 832 |
811 vpx_memset(cm->above_seg_context, 0, | 833 vpx_memset(cm->above_seg_context, 0, |
812 sizeof(*cm->above_seg_context) * aligned_cols); | 834 sizeof(*cm->above_seg_context) * aligned_cols); |
813 | 835 |
814 // Load tile data into tile_buffers | 836 get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); |
815 for (tile_row = 0; tile_row < tile_rows; ++tile_row) { | |
816 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { | |
817 const int last_tile = tile_row == tile_rows - 1 && | |
818 tile_col == tile_cols - 1; | |
819 const size_t size = get_tile(data_end, last_tile, &cm->error, &data, | |
820 pbi->decrypt_cb, pbi->decrypt_state); | |
821 TileBuffer *const buf = &tile_buffers[tile_row][tile_col]; | |
822 buf->data = data; | |
823 buf->size = size; | |
824 data += size; | |
825 } | |
826 } | |
827 | 837 |
828 // Decode tiles using data from tile_buffers | 838 // Decode tiles using data from tile_buffers |
829 for (tile_row = 0; tile_row < tile_rows; ++tile_row) { | 839 for (tile_row = 0; tile_row < tile_rows; ++tile_row) { |
830 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { | 840 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { |
831 const int col = pbi->oxcf.inv_tile_order ? tile_cols - tile_col - 1 | 841 const int col = pbi->inv_tile_order ? tile_cols - tile_col - 1 : tile_col; |
832 : tile_col; | |
833 const int last_tile = tile_row == tile_rows - 1 && | 842 const int last_tile = tile_row == tile_rows - 1 && |
834 col == tile_cols - 1; | 843 col == tile_cols - 1; |
835 const TileBuffer *const buf = &tile_buffers[tile_row][col]; | 844 const TileBuffer *const buf = &tile_buffers[tile_row][col]; |
836 TileInfo tile; | 845 TileInfo tile; |
837 | 846 |
838 vp9_tile_init(&tile, cm, tile_row, col); | 847 vp9_tile_init(&tile, cm, tile_row, col); |
839 setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r, | 848 setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r, |
840 pbi->decrypt_cb, pbi->decrypt_state); | 849 pbi->decrypt_cb, pbi->decrypt_state); |
841 decode_tile(pbi, &tile, &r); | 850 decode_tile(pbi, &tile, do_loopfilter_inline, &r); |
842 | 851 |
843 if (last_tile) | 852 if (last_tile) |
844 end = vp9_reader_find_end(&r); | 853 end = vp9_reader_find_end(&r); |
845 } | 854 } |
846 } | 855 } |
847 | 856 |
848 return end; | 857 return end; |
849 } | 858 } |
850 | 859 |
851 static int tile_worker_hook(void *arg1, void *arg2) { | 860 static int tile_worker_hook(void *arg1, void *arg2) { |
(...skipping 28 matching lines...) Expand all Loading... |
880 } | 889 } |
881 | 890 |
882 static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, | 891 static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, |
883 const uint8_t *data, | 892 const uint8_t *data, |
884 const uint8_t *data_end) { | 893 const uint8_t *data_end) { |
885 VP9_COMMON *const cm = &pbi->common; | 894 VP9_COMMON *const cm = &pbi->common; |
886 const uint8_t *bit_reader_end = NULL; | 895 const uint8_t *bit_reader_end = NULL; |
887 const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); | 896 const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); |
888 const int tile_cols = 1 << cm->log2_tile_cols; | 897 const int tile_cols = 1 << cm->log2_tile_cols; |
889 const int tile_rows = 1 << cm->log2_tile_rows; | 898 const int tile_rows = 1 << cm->log2_tile_rows; |
890 const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols); | 899 const int num_workers = MIN(pbi->max_threads & ~1, tile_cols); |
891 TileBuffer tile_buffers[1 << 6]; | 900 TileBuffer tile_buffers[1][1 << 6]; |
892 int n; | 901 int n; |
893 int final_worker = -1; | 902 int final_worker = -1; |
894 | 903 |
895 assert(tile_cols <= (1 << 6)); | 904 assert(tile_cols <= (1 << 6)); |
896 assert(tile_rows == 1); | 905 assert(tile_rows == 1); |
897 (void)tile_rows; | 906 (void)tile_rows; |
898 | 907 |
899 // TODO(jzern): See if we can remove the restriction of passing in max | 908 // TODO(jzern): See if we can remove the restriction of passing in max |
900 // threads to the decoder. | 909 // threads to the decoder. |
901 if (pbi->num_tile_workers == 0) { | 910 if (pbi->num_tile_workers == 0) { |
902 const int num_threads = pbi->oxcf.max_threads & ~1; | 911 const int num_threads = pbi->max_threads & ~1; |
903 int i; | 912 int i; |
904 // TODO(jzern): Allocate one less worker, as in the current code we only | 913 // TODO(jzern): Allocate one less worker, as in the current code we only |
905 // use num_threads - 1 workers. | 914 // use num_threads - 1 workers. |
906 CHECK_MEM_ERROR(cm, pbi->tile_workers, | 915 CHECK_MEM_ERROR(cm, pbi->tile_workers, |
907 vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); | 916 vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); |
908 for (i = 0; i < num_threads; ++i) { | 917 for (i = 0; i < num_threads; ++i) { |
909 VP9Worker *const worker = &pbi->tile_workers[i]; | 918 VP9Worker *const worker = &pbi->tile_workers[i]; |
910 ++pbi->num_tile_workers; | 919 ++pbi->num_tile_workers; |
911 | 920 |
912 vp9_worker_init(worker); | 921 vp9_worker_init(worker); |
(...skipping 13 matching lines...) Expand all Loading... |
926 } | 935 } |
927 | 936 |
928 // Note: this memset assumes above_context[0], [1] and [2] | 937 // Note: this memset assumes above_context[0], [1] and [2] |
929 // are allocated as part of the same buffer. | 938 // are allocated as part of the same buffer. |
930 vpx_memset(cm->above_context, 0, | 939 vpx_memset(cm->above_context, 0, |
931 sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); | 940 sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); |
932 vpx_memset(cm->above_seg_context, 0, | 941 vpx_memset(cm->above_seg_context, 0, |
933 sizeof(*cm->above_seg_context) * aligned_mi_cols); | 942 sizeof(*cm->above_seg_context) * aligned_mi_cols); |
934 | 943 |
935 // Load tile data into tile_buffers | 944 // Load tile data into tile_buffers |
936 for (n = 0; n < tile_cols; ++n) { | 945 get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); |
937 const size_t size = | |
938 get_tile(data_end, n == tile_cols - 1, &cm->error, &data, | |
939 pbi->decrypt_cb, pbi->decrypt_state); | |
940 TileBuffer *const buf = &tile_buffers[n]; | |
941 buf->data = data; | |
942 buf->size = size; | |
943 buf->col = n; | |
944 data += size; | |
945 } | |
946 | 946 |
947 // Sort the buffers based on size in descending order. | 947 // Sort the buffers based on size in descending order. |
948 qsort(tile_buffers, tile_cols, sizeof(tile_buffers[0]), compare_tile_buffers); | 948 qsort(tile_buffers[0], tile_cols, sizeof(tile_buffers[0][0]), |
| 949 compare_tile_buffers); |
949 | 950 |
950 // Rearrange the tile buffers such that per-tile group the largest, and | 951 // Rearrange the tile buffers such that per-tile group the largest, and |
951 // presumably the most difficult, tile will be decoded in the main thread. | 952 // presumably the most difficult, tile will be decoded in the main thread. |
952 // This should help minimize the number of instances where the main thread is | 953 // This should help minimize the number of instances where the main thread is |
953 // waiting for a worker to complete. | 954 // waiting for a worker to complete. |
954 { | 955 { |
955 int group_start = 0; | 956 int group_start = 0; |
956 while (group_start < tile_cols) { | 957 while (group_start < tile_cols) { |
957 const TileBuffer largest = tile_buffers[group_start]; | 958 const TileBuffer largest = tile_buffers[0][group_start]; |
958 const int group_end = MIN(group_start + num_workers, tile_cols) - 1; | 959 const int group_end = MIN(group_start + num_workers, tile_cols) - 1; |
959 memmove(tile_buffers + group_start, tile_buffers + group_start + 1, | 960 memmove(tile_buffers[0] + group_start, tile_buffers[0] + group_start + 1, |
960 (group_end - group_start) * sizeof(tile_buffers[0])); | 961 (group_end - group_start) * sizeof(tile_buffers[0][0])); |
961 tile_buffers[group_end] = largest; | 962 tile_buffers[0][group_end] = largest; |
962 group_start = group_end + 1; | 963 group_start = group_end + 1; |
963 } | 964 } |
964 } | 965 } |
965 | 966 |
966 n = 0; | 967 n = 0; |
967 while (n < tile_cols) { | 968 while (n < tile_cols) { |
968 int i; | 969 int i; |
969 for (i = 0; i < num_workers && n < tile_cols; ++i) { | 970 for (i = 0; i < num_workers && n < tile_cols; ++i) { |
970 VP9Worker *const worker = &pbi->tile_workers[i]; | 971 VP9Worker *const worker = &pbi->tile_workers[i]; |
971 TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; | 972 TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; |
972 TileInfo *const tile = (TileInfo*)worker->data2; | 973 TileInfo *const tile = (TileInfo*)worker->data2; |
973 TileBuffer *const buf = &tile_buffers[n]; | 974 TileBuffer *const buf = &tile_buffers[0][n]; |
974 | 975 |
975 tile_data->cm = cm; | 976 tile_data->cm = cm; |
976 tile_data->xd = pbi->mb; | 977 tile_data->xd = pbi->mb; |
977 tile_data->xd.corrupted = 0; | 978 tile_data->xd.corrupted = 0; |
978 vp9_tile_init(tile, tile_data->cm, 0, buf->col); | 979 vp9_tile_init(tile, tile_data->cm, 0, buf->col); |
979 setup_token_decoder(buf->data, data_end, buf->size, &cm->error, | 980 setup_token_decoder(buf->data, data_end, buf->size, &cm->error, |
980 &tile_data->bit_reader, pbi->decrypt_cb, | 981 &tile_data->bit_reader, pbi->decrypt_cb, |
981 pbi->decrypt_state); | 982 pbi->decrypt_state); |
982 init_macroblockd(cm, &tile_data->xd); | 983 init_macroblockd(cm, &tile_data->xd); |
983 vp9_zero(tile_data->xd.dqcoeff); | 984 vp9_zero(tile_data->xd.dqcoeff); |
(...skipping 287 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1271 assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv))); | 1272 assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv))); |
1272 } | 1273 } |
1273 #endif // NDEBUG | 1274 #endif // NDEBUG |
1274 | 1275 |
1275 static struct vp9_read_bit_buffer* init_read_bit_buffer( | 1276 static struct vp9_read_bit_buffer* init_read_bit_buffer( |
1276 VP9Decoder *pbi, | 1277 VP9Decoder *pbi, |
1277 struct vp9_read_bit_buffer *rb, | 1278 struct vp9_read_bit_buffer *rb, |
1278 const uint8_t *data, | 1279 const uint8_t *data, |
1279 const uint8_t *data_end, | 1280 const uint8_t *data_end, |
1280 uint8_t *clear_data /* buffer size MAX_VP9_HEADER_SIZE */) { | 1281 uint8_t *clear_data /* buffer size MAX_VP9_HEADER_SIZE */) { |
| 1282 vp9_zero(*rb); |
1281 rb->bit_offset = 0; | 1283 rb->bit_offset = 0; |
1282 rb->error_handler = error_handler; | 1284 rb->error_handler = error_handler; |
1283 rb->error_handler_data = &pbi->common; | 1285 rb->error_handler_data = &pbi->common; |
1284 if (pbi->decrypt_cb) { | 1286 if (pbi->decrypt_cb) { |
1285 const int n = (int)MIN(MAX_VP9_HEADER_SIZE, data_end - data); | 1287 const int n = (int)MIN(MAX_VP9_HEADER_SIZE, data_end - data); |
1286 pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n); | 1288 pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n); |
1287 rb->bit_buffer = clear_data; | 1289 rb->bit_buffer = clear_data; |
1288 rb->bit_buffer_end = clear_data + n; | 1290 rb->bit_buffer_end = clear_data + n; |
1289 } else { | 1291 } else { |
1290 rb->bit_buffer = data; | 1292 rb->bit_buffer = data; |
1291 rb->bit_buffer_end = data_end; | 1293 rb->bit_buffer_end = data_end; |
1292 } | 1294 } |
1293 return rb; | 1295 return rb; |
1294 } | 1296 } |
1295 | 1297 |
1296 int vp9_decode_frame(VP9Decoder *pbi, | 1298 int vp9_decode_frame(VP9Decoder *pbi, |
1297 const uint8_t *data, const uint8_t *data_end, | 1299 const uint8_t *data, const uint8_t *data_end, |
1298 const uint8_t **p_data_end) { | 1300 const uint8_t **p_data_end) { |
1299 VP9_COMMON *const cm = &pbi->common; | 1301 VP9_COMMON *const cm = &pbi->common; |
1300 MACROBLOCKD *const xd = &pbi->mb; | 1302 MACROBLOCKD *const xd = &pbi->mb; |
1301 struct vp9_read_bit_buffer rb = { 0 }; | 1303 struct vp9_read_bit_buffer rb; |
1302 uint8_t clear_data[MAX_VP9_HEADER_SIZE]; | 1304 uint8_t clear_data[MAX_VP9_HEADER_SIZE]; |
1303 const size_t first_partition_size = read_uncompressed_header(pbi, | 1305 const size_t first_partition_size = read_uncompressed_header(pbi, |
1304 init_read_bit_buffer(pbi, &rb, data, data_end, clear_data)); | 1306 init_read_bit_buffer(pbi, &rb, data, data_end, clear_data)); |
1305 const int keyframe = cm->frame_type == KEY_FRAME; | 1307 const int keyframe = cm->frame_type == KEY_FRAME; |
1306 const int tile_rows = 1 << cm->log2_tile_rows; | 1308 const int tile_rows = 1 << cm->log2_tile_rows; |
1307 const int tile_cols = 1 << cm->log2_tile_cols; | 1309 const int tile_cols = 1 << cm->log2_tile_cols; |
1308 YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); | 1310 YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); |
| 1311 const int do_loopfilter_inline = tile_rows == 1 && tile_cols == 1 && |
| 1312 cm->lf.filter_level; |
1309 xd->cur_buf = new_fb; | 1313 xd->cur_buf = new_fb; |
1310 | 1314 |
1311 if (!first_partition_size) { | 1315 if (!first_partition_size) { |
1312 // showing a frame directly | 1316 // showing a frame directly |
1313 *p_data_end = data + 1; | 1317 *p_data_end = data + 1; |
1314 return 0; | 1318 return 0; |
1315 } | 1319 } |
1316 | 1320 |
1317 if (!pbi->decoded_key_frame && !keyframe) | 1321 if (!pbi->decoded_key_frame && !keyframe) |
1318 return -1; | 1322 return -1; |
1319 | 1323 |
1320 data += vp9_rb_bytes_read(&rb); | 1324 data += vp9_rb_bytes_read(&rb); |
1321 if (!read_is_valid(data, first_partition_size, data_end)) | 1325 if (!read_is_valid(data, first_partition_size, data_end)) |
1322 vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, | 1326 vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, |
1323 "Truncated packet or corrupt header length"); | 1327 "Truncated packet or corrupt header length"); |
1324 | 1328 |
1325 pbi->do_loopfilter_inline = | |
1326 (cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level; | |
1327 if (pbi->do_loopfilter_inline && pbi->lf_worker.data1 == NULL) { | |
1328 CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, | |
1329 vpx_memalign(32, sizeof(LFWorkerData))); | |
1330 pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; | |
1331 if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) { | |
1332 vpx_internal_error(&cm->error, VPX_CODEC_ERROR, | |
1333 "Loop filter thread creation failed"); | |
1334 } | |
1335 } | |
1336 | |
1337 init_macroblockd(cm, &pbi->mb); | 1329 init_macroblockd(cm, &pbi->mb); |
1338 | 1330 |
1339 if (cm->coding_use_prev_mi) | 1331 if (cm->coding_use_prev_mi) |
1340 set_prev_mi(cm); | 1332 set_prev_mi(cm); |
1341 else | 1333 else |
1342 cm->prev_mi = NULL; | 1334 cm->prev_mi = NULL; |
1343 | 1335 |
1344 setup_plane_dequants(cm, xd, cm->base_qindex); | 1336 setup_plane_dequants(cm, xd, cm->base_qindex); |
1345 vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); | 1337 vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); |
1346 | 1338 |
1347 cm->fc = cm->frame_contexts[cm->frame_context_idx]; | 1339 cm->fc = cm->frame_contexts[cm->frame_context_idx]; |
1348 vp9_zero(cm->counts); | 1340 vp9_zero(cm->counts); |
1349 vp9_zero(xd->dqcoeff); | 1341 vp9_zero(xd->dqcoeff); |
1350 | 1342 |
1351 xd->corrupted = 0; | 1343 xd->corrupted = 0; |
1352 new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); | 1344 new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); |
1353 | 1345 |
1354 // TODO(jzern): remove frame_parallel_decoding_mode restriction for | 1346 // TODO(jzern): remove frame_parallel_decoding_mode restriction for |
1355 // single-frame tile decoding. | 1347 // single-frame tile decoding. |
1356 if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 && | 1348 if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1 && |
1357 cm->frame_parallel_decoding_mode) { | 1349 cm->frame_parallel_decoding_mode) { |
1358 *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); | 1350 *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); |
| 1351 // If multiple threads are used to decode tiles, then we use those threads |
| 1352 // to do parallel loopfiltering. |
| 1353 vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0); |
1359 } else { | 1354 } else { |
1360 *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); | 1355 if (do_loopfilter_inline && pbi->lf_worker.data1 == NULL) { |
| 1356 CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, |
| 1357 vpx_memalign(32, sizeof(LFWorkerData))); |
| 1358 pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; |
| 1359 if (pbi->max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) { |
| 1360 vpx_internal_error(&cm->error, VPX_CODEC_ERROR, |
| 1361 "Loop filter thread creation failed"); |
| 1362 } |
| 1363 } |
| 1364 *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end, |
| 1365 do_loopfilter_inline); |
| 1366 if (!do_loopfilter_inline) |
| 1367 vp9_loop_filter_frame(new_fb, cm, &pbi->mb, cm->lf.filter_level, 0, 0); |
1361 } | 1368 } |
1362 | 1369 |
1363 new_fb->corrupted |= xd->corrupted; | 1370 new_fb->corrupted |= xd->corrupted; |
1364 | 1371 |
1365 if (!pbi->decoded_key_frame) { | 1372 if (!pbi->decoded_key_frame) { |
1366 if (keyframe && !new_fb->corrupted) | 1373 if (keyframe && !new_fb->corrupted) |
1367 pbi->decoded_key_frame = 1; | 1374 pbi->decoded_key_frame = 1; |
1368 else | 1375 else |
1369 vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, | 1376 vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, |
1370 "A stream must start with a complete key frame"); | 1377 "A stream must start with a complete key frame"); |
1371 } | 1378 } |
1372 | 1379 |
1373 if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode && | 1380 if (!new_fb->corrupted) { |
1374 !new_fb->corrupted) { | 1381 if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { |
1375 vp9_adapt_coef_probs(cm); | 1382 vp9_adapt_coef_probs(cm); |
1376 | 1383 |
1377 if (!frame_is_intra_only(cm)) { | 1384 if (!frame_is_intra_only(cm)) { |
1378 vp9_adapt_mode_probs(cm); | 1385 vp9_adapt_mode_probs(cm); |
1379 vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); | 1386 vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); |
| 1387 } |
| 1388 } else { |
| 1389 debug_check_frame_counts(cm); |
1380 } | 1390 } |
1381 } else { | |
1382 debug_check_frame_counts(cm); | |
1383 } | 1391 } |
1384 | 1392 |
1385 if (cm->refresh_frame_context) | 1393 if (cm->refresh_frame_context) |
1386 cm->frame_contexts[cm->frame_context_idx] = cm->fc; | 1394 cm->frame_contexts[cm->frame_context_idx] = cm->fc; |
1387 | 1395 |
1388 return 0; | 1396 return 0; |
1389 } | 1397 } |
OLD | NEW |