cc/resources/video_resource_updater.cc - Issue 2370453003: 12-bit vp9 video support

Side by Side Diff: cc/resources/video_resource_updater.cc

Issue 2370453003: 12-bit vp9 video support (Closed)

Patch Set: build fix Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« cc/resources/video_resource_updater.h ('K') | « cc/resources/video_resource_updater.h ('k') | cc/resources/video_resource_updater_unittest.cc » ('j') | cc/resources/video_resource_updater_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "cc/resources/video_resource_updater.h"	5 #include "cc/resources/video_resource_updater.h"

6	6

7 #include <stddef.h>	7 #include <stddef.h>

8 #include <stdint.h>	8 #include <stdint.h>

9	9

10 #include <algorithm>	10 #include <algorithm>

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
77 case media::PIXEL_FORMAT_RGB24:	77 case media::PIXEL_FORMAT_RGB24:

78 case media::PIXEL_FORMAT_RGB32:	78 case media::PIXEL_FORMAT_RGB32:

79 case media::PIXEL_FORMAT_MJPEG:	79 case media::PIXEL_FORMAT_MJPEG:

80 case media::PIXEL_FORMAT_MT21:	80 case media::PIXEL_FORMAT_MT21:

81 case media::PIXEL_FORMAT_YUV420P9:	81 case media::PIXEL_FORMAT_YUV420P9:

82 case media::PIXEL_FORMAT_YUV422P9:	82 case media::PIXEL_FORMAT_YUV422P9:

83 case media::PIXEL_FORMAT_YUV444P9:	83 case media::PIXEL_FORMAT_YUV444P9:

84 case media::PIXEL_FORMAT_YUV420P10:	84 case media::PIXEL_FORMAT_YUV420P10:

85 case media::PIXEL_FORMAT_YUV422P10:	85 case media::PIXEL_FORMAT_YUV422P10:

86 case media::PIXEL_FORMAT_YUV444P10:	86 case media::PIXEL_FORMAT_YUV444P10:

	87 case media::PIXEL_FORMAT_YUV420P12:

	88 case media::PIXEL_FORMAT_YUV422P12:

	89 case media::PIXEL_FORMAT_YUV444P12:

87 case media::PIXEL_FORMAT_UNKNOWN:	90 case media::PIXEL_FORMAT_UNKNOWN:

88 break;	91 break;

89 }	92 }

90 return VideoFrameExternalResources::NONE;	93 return VideoFrameExternalResources::NONE;

91 }	94 }

92	95

93 class SyncTokenClientImpl : public media::VideoFrame::SyncTokenClient {	96 class SyncTokenClientImpl : public media::VideoFrame::SyncTokenClient {

94 public:	97 public:

95 SyncTokenClientImpl(gpu::gles2::GLES2Interface* gl,	98 SyncTokenClientImpl(gpu::gles2::GLES2Interface* gl,

96 const gpu::SyncToken& sync_token)	99 const gpu::SyncToken& sync_token)

(...skipping 185 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
282 if (software_compositor)	285 if (software_compositor)

283 return coded_size;	286 return coded_size;

284	287

285 int plane_width = media::VideoFrame::Columns(	288 int plane_width = media::VideoFrame::Columns(

286 plane_index, input_frame->format(), coded_size.width());	289 plane_index, input_frame->format(), coded_size.width());

287 int plane_height = media::VideoFrame::Rows(plane_index, input_frame->format(),	290 int plane_height = media::VideoFrame::Rows(plane_index, input_frame->format(),

288 coded_size.height());	291 coded_size.height());

289 return gfx::Size(plane_width, plane_height);	292 return gfx::Size(plane_width, plane_height);

290 }	293 }

291	294

	295 void VideoResourceUpdater::MakeHalfFloats(const uint16_t* src,

	296 int bits_per_channel,

	297 size_t num,

	298 uint16_t* dst) {

	299 // TODO(hubbe): Make AVX and neon versions of this code.

	300

	301 // This magic constant is 2^-112. Multiplying by this

	302 // is the same as subtracting 112 from the exponent, which

	303 // is the difference in exponent bias between 32-bit and

	304 // 16-bit floats. Once we've done this subtraction, we can

	305 // simply extract the low bits of the exponent and the high

	306 // bits of the mantissa from our float and we're done.

	307 float mult = 1.9259299444e-34f / ((1 << bits_per_channel) - 1);

	308 for (size_t i = 0; i < num; i++) {

	309 float value = src[i] * mult;

	310 dst[i] = ((uint32_t)&value) >> 13;

	311 }

	312 }

	313

292 VideoFrameExternalResources VideoResourceUpdater::CreateForSoftwarePlanes(	314 VideoFrameExternalResources VideoResourceUpdater::CreateForSoftwarePlanes(

293 scoped_refptr<media::VideoFrame> video_frame) {	315 scoped_refptr<media::VideoFrame> video_frame) {

294 TRACE_EVENT0("cc", "VideoResourceUpdater::CreateForSoftwarePlanes");	316 TRACE_EVENT0("cc", "VideoResourceUpdater::CreateForSoftwarePlanes");

295 const media::VideoPixelFormat input_frame_format = video_frame->format();	317 const media::VideoPixelFormat input_frame_format = video_frame->format();

296	318

297 // TODO(hubbe): Make this a video frame method.	319 // TODO(hubbe): Make this a video frame method.

298 int bits_per_channel = 0;	320 int bits_per_channel = 0;

299 switch (input_frame_format) {	321 switch (input_frame_format) {

300 case media::PIXEL_FORMAT_UNKNOWN:	322 case media::PIXEL_FORMAT_UNKNOWN:

301 NOTREACHED();	323 NOTREACHED();

(...skipping 18 matching lines...) Expand all Loading...
320 case media::PIXEL_FORMAT_YUV420P9:	342 case media::PIXEL_FORMAT_YUV420P9:

321 case media::PIXEL_FORMAT_YUV422P9:	343 case media::PIXEL_FORMAT_YUV422P9:

322 case media::PIXEL_FORMAT_YUV444P9:	344 case media::PIXEL_FORMAT_YUV444P9:

323 bits_per_channel = 9;	345 bits_per_channel = 9;

324 break;	346 break;

325 case media::PIXEL_FORMAT_YUV420P10:	347 case media::PIXEL_FORMAT_YUV420P10:

326 case media::PIXEL_FORMAT_YUV422P10:	348 case media::PIXEL_FORMAT_YUV422P10:

327 case media::PIXEL_FORMAT_YUV444P10:	349 case media::PIXEL_FORMAT_YUV444P10:

328 bits_per_channel = 10;	350 bits_per_channel = 10;

329 break;	351 break;

	352 case media::PIXEL_FORMAT_YUV420P12:

	353 case media::PIXEL_FORMAT_YUV422P12:

	354 case media::PIXEL_FORMAT_YUV444P12:

	355 bits_per_channel = 12;

	356 break;

330 }	357 }

331	358

332 // Only YUV software video frames are supported.	359 // Only YUV software video frames are supported.

333 if (!media::IsYuvPlanar(input_frame_format)) {	360 if (!media::IsYuvPlanar(input_frame_format)) {

334 NOTREACHED() << media::VideoPixelFormatToString(input_frame_format);	361 NOTREACHED() << media::VideoPixelFormatToString(input_frame_format);

335 return VideoFrameExternalResources();	362 return VideoFrameExternalResources();

336 }	363 }

337	364

338 const bool software_compositor = context_provider_ == NULL;	365 const bool software_compositor = context_provider_ == NULL;

339	366

(...skipping 135 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
475 // Assuming that GL_UNPACK_ALIGNMENT has not changed from default.	502 // Assuming that GL_UNPACK_ALIGNMENT has not changed from default.

476 size_t upload_image_stride =	503 size_t upload_image_stride =

477 MathUtil::CheckedRoundUp<size_t>(bytes_per_row, 4u);	504 MathUtil::CheckedRoundUp<size_t>(bytes_per_row, 4u);

478	505

479 bool needs_conversion = false;	506 bool needs_conversion = false;

480 int shift = 0;	507 int shift = 0;

481	508

482 // LUMINANCE_F16 uses half-floats, so we always need a conversion step.	509 // LUMINANCE_F16 uses half-floats, so we always need a conversion step.

483 if (plane_resource.resource_format() == LUMINANCE_F16) {	510 if (plane_resource.resource_format() == LUMINANCE_F16) {

484 needs_conversion = true;	511 needs_conversion = true;

485 // Note that the current method of converting integers to half-floats

486 // stops working if you have more than 10 bits of data.

487 DCHECK_LE(bits_per_channel, 10);

488 } else if (bits_per_channel > 8) {	512 } else if (bits_per_channel > 8) {

489 // If bits_per_channel > 8 and we can't use LUMINANCE_F16, we need to	513 // If bits_per_channel > 8 and we can't use LUMINANCE_F16, we need to

490 // shift the data down and create an 8-bit texture.	514 // shift the data down and create an 8-bit texture.

491 needs_conversion = true;	515 needs_conversion = true;

492 shift = bits_per_channel - 8;	516 shift = bits_per_channel - 8;

493 }	517 }

494 const uint8_t* pixels;	518 const uint8_t* pixels;

495 if (static_cast<int>(upload_image_stride) == video_stride_bytes &&	519 if (static_cast<int>(upload_image_stride) == video_stride_bytes &&

496 !needs_conversion) {	520 !needs_conversion) {

497 pixels = video_frame->data(i);	521 pixels = video_frame->data(i);

498 } else {	522 } else {

499 // Avoid malloc for each frame/plane if possible.	523 // Avoid malloc for each frame/plane if possible.

500 size_t needed_size =	524 size_t needed_size =

501 upload_image_stride * resource_size_pixels.height();	525 upload_image_stride * resource_size_pixels.height();

502 if (upload_pixels_.size() < needed_size)	526 if (upload_pixels_.size() < needed_size)

503 upload_pixels_.resize(needed_size);	527 upload_pixels_.resize(needed_size);

504	528

505 for (int row = 0; row < resource_size_pixels.height(); ++row) {	529 for (int row = 0; row < resource_size_pixels.height(); ++row) {

506 if (plane_resource.resource_format() == LUMINANCE_F16) {	530 if (plane_resource.resource_format() == LUMINANCE_F16) {

507 uint16_t* dst = reinterpret_cast<uint16_t*>(	531 uint16_t* dst = reinterpret_cast<uint16_t*>(

508 &upload_pixels_[upload_image_stride * row]);	532 &upload_pixels_[upload_image_stride * row]);

509 const uint16_t* src = reinterpret_cast<uint16_t*>(	533 const uint16_t* src = reinterpret_cast<uint16_t*>(

510 video_frame->data(i) + (video_stride_bytes * row));	534 video_frame->data(i) + (video_stride_bytes * row));

511 // Micro-benchmarking indicates that the compiler does	535 if (bits_per_channel <= 10) {

512 // a good enough job of optimizing this loop that trying	536 // Micro-benchmarking indicates that the compiler does

513 // to manually operate on one uint64 at a time is not	537 // a good enough job of optimizing this loop that trying

514 // actually helpful.	538 // to manually operate on one uint64 at a time is not

515 // Note to future optimizers: Benchmark your optimizations!	539 // actually helpful.

516 for (size_t i = 0; i < bytes_per_row / 2; i++)	540 // Note to future optimizers: Benchmark your optimizations!

517 dst[i] = src[i] \| 0x3800;	541 for (size_t i = 0; i < bytes_per_row / 2; i++)

	542 dst[i] = src[i] \| 0x3800;

	543 } else {

	544 MakeHalfFloats(src, bits_per_channel, bytes_per_row / 2, dst);

	545 }

518 } else if (shift != 0) {	546 } else if (shift != 0) {

519 // We have more-than-8-bit input which we need to shift	547 // We have more-than-8-bit input which we need to shift

520 // down to fit it into an 8-bit texture.	548 // down to fit it into an 8-bit texture.

521 uint8_t* dst = &upload_pixels_[upload_image_stride * row];	549 uint8_t* dst = &upload_pixels_[upload_image_stride * row];

522 const uint16_t* src = reinterpret_cast<uint16_t*>(	550 const uint16_t* src = reinterpret_cast<uint16_t*>(

523 video_frame->data(i) + (video_stride_bytes * row));	551 video_frame->data(i) + (video_stride_bytes * row));

524 for (size_t i = 0; i < bytes_per_row; i++)	552 for (size_t i = 0; i < bytes_per_row; i++)

525 dst[i] = src[i] >> shift;	553 dst[i] = src[i] >> shift;

526 } else {	554 } else {

527 // Input and output are the same size and format, but	555 // Input and output are the same size and format, but

528 // differ in stride, copy one row at a time.	556 // differ in stride, copy one row at a time.

529 uint8_t* dst = &upload_pixels_[upload_image_stride * row];	557 uint8_t* dst = &upload_pixels_[upload_image_stride * row];

530 const uint8_t* src =	558 const uint8_t* src =

531 video_frame->data(i) + (video_stride_bytes * row);	559 video_frame->data(i) + (video_stride_bytes * row);

532 memcpy(dst, src, bytes_per_row);	560 memcpy(dst, src, bytes_per_row);

533 }	561 }

534 }	562 }

535 pixels = &upload_pixels_[0];	563 pixels = &upload_pixels_[0];

536 }	564 }

537	565

538 resource_provider_->CopyToResource(plane_resource.resource_id(), pixels,	566 resource_provider_->CopyToResource(plane_resource.resource_id(), pixels,

539 resource_size_pixels);	567 resource_size_pixels);

540 plane_resource.SetUniqueId(video_frame->unique_id(), i);	568 plane_resource.SetUniqueId(video_frame->unique_id(), i);

541 }	569 }

542	570

543 if (plane_resource.resource_format() == LUMINANCE_F16) {	571 // If the input data was 9 or 10 bit, and we output to half-floats,

	572 // then we used the OR path above, which means that we need to

	573 // adjust the resource offset and multiplier accordingly. If the

	574 // input data uses more than 10 bits, it will already be normalized

	575 // to 0.0..1.0, so there is no need to do anything.

	576 if (plane_resource.resource_format() == LUMINANCE_F16 &&
	danakj 2016/09/27 20:49:11 One more thought, we're doing this if thing twice One more thought, we're doing this if thing twice essentially, which means they need to not diverge. Above we have also checked the same things. Would it be more clear if we set a bool use_multiplier_for_half_float_output = false up above or something, and set it to true at the place where we do the OR path? hubbe 2016/09/27 22:27:36 I don't really want to set a bool in every row, bu Show quoted text On 2016/09/27 20:49:11, danakj wrote: > One more thought, we're doing this if thing twice essentially, which means they > need to not diverge. Above we have also checked the same things. Would it be > more clear if we set a bool use_multiplier_for_half_float_output = false up > above or something, and set it to true at the place where we do the OR path? I don't really want to set a bool in every row, but how about I put it above the loops where we do some similar if statements? danakj 2016/09/27 22:37:51 That's cool too if you agree it's more clear Show quoted text On 2016/09/27 22:27:36, hubbe wrote: > On 2016/09/27 20:49:11, danakj wrote: > > One more thought, we're doing this if thing twice essentially, which means > they > > need to not diverge. Above we have also checked the same things. Would it be > > more clear if we set a bool use_multiplier_for_half_float_output = false up > > above or something, and set it to true at the place where we do the OR path? > > I don't really want to set a bool in every row, but how about I put it above the > loops where we do some similar if statements? That's cool too if you agree it's more clear
	577 bits_per_channel <= 10) {

544 // By OR-ing with 0x3800, 10-bit numbers become half-floats in the	578 // By OR-ing with 0x3800, 10-bit numbers become half-floats in the

545 // range [0.5..1) and 9-bit numbers get the range [0.5..0.75).	579 // range [0.5..1) and 9-bit numbers get the range [0.5..0.75).

546 //	580 //

547 // Half-floats are evaluated as:	581 // Half-floats are evaluated as:

548 // float value = pow(2.0, exponent - 25) * (0x400 + fraction);	582 // float value = pow(2.0, exponent - 25) * (0x400 + fraction);

549 //	583 //

550 // In our case the exponent is 14 (since we or with 0x3800) and	584 // In our case the exponent is 14 (since we or with 0x3800) and

551 // pow(2.0, 14-25) * 0x400 evaluates to 0.5 (our offset) and	585 // pow(2.0, 14-25) * 0x400 evaluates to 0.5 (our offset) and

552 // pow(2.0, 14-25) * fraction is [0..0.49951171875] for 10-bit and	586 // pow(2.0, 14-25) * fraction is [0..0.49951171875] for 10-bit and

553 // [0..0.24951171875] for 9-bit.	587 // [0..0.24951171875] for 9-bit.

554 //	588 //

555 // (https://en.wikipedia.org/wiki/Half-precision_floating-point_format)	589 // (https://en.wikipedia.org/wiki/Half-precision_floating-point_format)

556 //	590 //

557 // PLEASE NOTE: This doesn't work if bits_per_channel is > 10.

558 // PLEASE NOTE: All planes are assumed to use the same multiplier/offset.	591 // PLEASE NOTE: All planes are assumed to use the same multiplier/offset.

559 external_resources.offset = 0.5f;	592 external_resources.offset = 0.5f;

560 // Max value from input data.	593 // Max value from input data.

561 int max_input_value = (1 << bits_per_channel) - 1;	594 int max_input_value = (1 << bits_per_channel) - 1;

562 // 2 << 11 = 2048 would be 1.0 with our exponent.	595 // 2 << 11 = 2048 would be 1.0 with our exponent.

563 external_resources.multiplier = 2048.0 / max_input_value;	596 external_resources.multiplier = 2048.0 / max_input_value;

564 }	597 }

565	598

566 // VideoResourceUpdater shares a context with the compositor so a	599 // VideoResourceUpdater shares a context with the compositor so a

567 // sync token is not required.	600 // sync token is not required.

(...skipping 154 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
722 if (lost_resource) {	755 if (lost_resource) {

723 resource_it->clear_refs();	756 resource_it->clear_refs();

724 updater->DeleteResource(resource_it);	757 updater->DeleteResource(resource_it);

725 return;	758 return;

726 }	759 }

727	760

728 resource_it->remove_ref();	761 resource_it->remove_ref();

729 }	762 }

730	763

731 } // namespace cc	764 } // namespace cc

OLD	NEW