Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "cc/resources/video_resource_updater.h" | 5 #include "cc/resources/video_resource_updater.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 #include <stdint.h> | 8 #include <stdint.h> |
| 9 | 9 |
| 10 #include <algorithm> | 10 #include <algorithm> |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 77 case media::PIXEL_FORMAT_RGB24: | 77 case media::PIXEL_FORMAT_RGB24: |
| 78 case media::PIXEL_FORMAT_RGB32: | 78 case media::PIXEL_FORMAT_RGB32: |
| 79 case media::PIXEL_FORMAT_MJPEG: | 79 case media::PIXEL_FORMAT_MJPEG: |
| 80 case media::PIXEL_FORMAT_MT21: | 80 case media::PIXEL_FORMAT_MT21: |
| 81 case media::PIXEL_FORMAT_YUV420P9: | 81 case media::PIXEL_FORMAT_YUV420P9: |
| 82 case media::PIXEL_FORMAT_YUV422P9: | 82 case media::PIXEL_FORMAT_YUV422P9: |
| 83 case media::PIXEL_FORMAT_YUV444P9: | 83 case media::PIXEL_FORMAT_YUV444P9: |
| 84 case media::PIXEL_FORMAT_YUV420P10: | 84 case media::PIXEL_FORMAT_YUV420P10: |
| 85 case media::PIXEL_FORMAT_YUV422P10: | 85 case media::PIXEL_FORMAT_YUV422P10: |
| 86 case media::PIXEL_FORMAT_YUV444P10: | 86 case media::PIXEL_FORMAT_YUV444P10: |
| 87 case media::PIXEL_FORMAT_YUV420P12: | |
| 88 case media::PIXEL_FORMAT_YUV422P12: | |
| 89 case media::PIXEL_FORMAT_YUV444P12: | |
| 87 case media::PIXEL_FORMAT_UNKNOWN: | 90 case media::PIXEL_FORMAT_UNKNOWN: |
| 88 break; | 91 break; |
| 89 } | 92 } |
| 90 return VideoFrameExternalResources::NONE; | 93 return VideoFrameExternalResources::NONE; |
| 91 } | 94 } |
| 92 | 95 |
| 93 class SyncTokenClientImpl : public media::VideoFrame::SyncTokenClient { | 96 class SyncTokenClientImpl : public media::VideoFrame::SyncTokenClient { |
| 94 public: | 97 public: |
| 95 SyncTokenClientImpl(gpu::gles2::GLES2Interface* gl, | 98 SyncTokenClientImpl(gpu::gles2::GLES2Interface* gl, |
| 96 const gpu::SyncToken& sync_token) | 99 const gpu::SyncToken& sync_token) |
| (...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 282 if (software_compositor) | 285 if (software_compositor) |
| 283 return coded_size; | 286 return coded_size; |
| 284 | 287 |
| 285 int plane_width = media::VideoFrame::Columns( | 288 int plane_width = media::VideoFrame::Columns( |
| 286 plane_index, input_frame->format(), coded_size.width()); | 289 plane_index, input_frame->format(), coded_size.width()); |
| 287 int plane_height = media::VideoFrame::Rows(plane_index, input_frame->format(), | 290 int plane_height = media::VideoFrame::Rows(plane_index, input_frame->format(), |
| 288 coded_size.height()); | 291 coded_size.height()); |
| 289 return gfx::Size(plane_width, plane_height); | 292 return gfx::Size(plane_width, plane_height); |
| 290 } | 293 } |
| 291 | 294 |
| 295 void VideoResourceUpdater::MakeHalfFloats(const uint16_t* src, | |
| 296 int bits_per_channel, | |
| 297 size_t num, | |
| 298 uint16_t* dst) { | |
| 299 // TODO(hubbe): Make AVX and neon versions of this code. | |
| 300 | |
| 301 // This magic constant is 2^-112. Multiplying by this | |
| 302 // is the same as subtracting 112 from the exponent, which | |
| 303 // is the difference in exponent bias between 32-bit and | |
| 304 // 16-bit floats. Once we've done this subtraction, we can | |
| 305 // simply extract the low bits of the exponent and the high | |
| 306 // bits of the mantissa from our float and we're done. | |
| 307 float mult = 1.9259299444e-34f / ((1 << bits_per_channel) - 1); | |
| 308 for (size_t i = 0; i < num; i++) { | |
| 309 float value = src[i] * mult; | |
| 310 dst[i] = (*(uint32_t*)&value) >> 13; | |
| 311 } | |
| 312 } | |
| 313 | |
| 292 VideoFrameExternalResources VideoResourceUpdater::CreateForSoftwarePlanes( | 314 VideoFrameExternalResources VideoResourceUpdater::CreateForSoftwarePlanes( |
| 293 scoped_refptr<media::VideoFrame> video_frame) { | 315 scoped_refptr<media::VideoFrame> video_frame) { |
| 294 TRACE_EVENT0("cc", "VideoResourceUpdater::CreateForSoftwarePlanes"); | 316 TRACE_EVENT0("cc", "VideoResourceUpdater::CreateForSoftwarePlanes"); |
| 295 const media::VideoPixelFormat input_frame_format = video_frame->format(); | 317 const media::VideoPixelFormat input_frame_format = video_frame->format(); |
| 296 | 318 |
| 297 // TODO(hubbe): Make this a video frame method. | 319 // TODO(hubbe): Make this a video frame method. |
| 298 int bits_per_channel = 0; | 320 int bits_per_channel = 0; |
| 299 switch (input_frame_format) { | 321 switch (input_frame_format) { |
| 300 case media::PIXEL_FORMAT_UNKNOWN: | 322 case media::PIXEL_FORMAT_UNKNOWN: |
| 301 NOTREACHED(); | 323 NOTREACHED(); |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 320 case media::PIXEL_FORMAT_YUV420P9: | 342 case media::PIXEL_FORMAT_YUV420P9: |
| 321 case media::PIXEL_FORMAT_YUV422P9: | 343 case media::PIXEL_FORMAT_YUV422P9: |
| 322 case media::PIXEL_FORMAT_YUV444P9: | 344 case media::PIXEL_FORMAT_YUV444P9: |
| 323 bits_per_channel = 9; | 345 bits_per_channel = 9; |
| 324 break; | 346 break; |
| 325 case media::PIXEL_FORMAT_YUV420P10: | 347 case media::PIXEL_FORMAT_YUV420P10: |
| 326 case media::PIXEL_FORMAT_YUV422P10: | 348 case media::PIXEL_FORMAT_YUV422P10: |
| 327 case media::PIXEL_FORMAT_YUV444P10: | 349 case media::PIXEL_FORMAT_YUV444P10: |
| 328 bits_per_channel = 10; | 350 bits_per_channel = 10; |
| 329 break; | 351 break; |
| 352 case media::PIXEL_FORMAT_YUV420P12: | |
| 353 case media::PIXEL_FORMAT_YUV422P12: | |
| 354 case media::PIXEL_FORMAT_YUV444P12: | |
| 355 bits_per_channel = 12; | |
| 356 break; | |
| 330 } | 357 } |
| 331 | 358 |
| 332 // Only YUV software video frames are supported. | 359 // Only YUV software video frames are supported. |
| 333 if (!media::IsYuvPlanar(input_frame_format)) { | 360 if (!media::IsYuvPlanar(input_frame_format)) { |
| 334 NOTREACHED() << media::VideoPixelFormatToString(input_frame_format); | 361 NOTREACHED() << media::VideoPixelFormatToString(input_frame_format); |
| 335 return VideoFrameExternalResources(); | 362 return VideoFrameExternalResources(); |
| 336 } | 363 } |
| 337 | 364 |
| 338 const bool software_compositor = context_provider_ == NULL; | 365 const bool software_compositor = context_provider_ == NULL; |
| 339 | 366 |
| (...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 475 // Assuming that GL_UNPACK_ALIGNMENT has not changed from default. | 502 // Assuming that GL_UNPACK_ALIGNMENT has not changed from default. |
| 476 size_t upload_image_stride = | 503 size_t upload_image_stride = |
| 477 MathUtil::CheckedRoundUp<size_t>(bytes_per_row, 4u); | 504 MathUtil::CheckedRoundUp<size_t>(bytes_per_row, 4u); |
| 478 | 505 |
| 479 bool needs_conversion = false; | 506 bool needs_conversion = false; |
| 480 int shift = 0; | 507 int shift = 0; |
| 481 | 508 |
| 482 // LUMINANCE_F16 uses half-floats, so we always need a conversion step. | 509 // LUMINANCE_F16 uses half-floats, so we always need a conversion step. |
| 483 if (plane_resource.resource_format() == LUMINANCE_F16) { | 510 if (plane_resource.resource_format() == LUMINANCE_F16) { |
| 484 needs_conversion = true; | 511 needs_conversion = true; |
| 485 // Note that the current method of converting integers to half-floats | |
| 486 // stops working if you have more than 10 bits of data. | |
| 487 DCHECK_LE(bits_per_channel, 10); | |
| 488 } else if (bits_per_channel > 8) { | 512 } else if (bits_per_channel > 8) { |
| 489 // If bits_per_channel > 8 and we can't use LUMINANCE_F16, we need to | 513 // If bits_per_channel > 8 and we can't use LUMINANCE_F16, we need to |
| 490 // shift the data down and create an 8-bit texture. | 514 // shift the data down and create an 8-bit texture. |
| 491 needs_conversion = true; | 515 needs_conversion = true; |
| 492 shift = bits_per_channel - 8; | 516 shift = bits_per_channel - 8; |
| 493 } | 517 } |
| 494 const uint8_t* pixels; | 518 const uint8_t* pixels; |
| 495 if (static_cast<int>(upload_image_stride) == video_stride_bytes && | 519 if (static_cast<int>(upload_image_stride) == video_stride_bytes && |
| 496 !needs_conversion) { | 520 !needs_conversion) { |
| 497 pixels = video_frame->data(i); | 521 pixels = video_frame->data(i); |
| 498 } else { | 522 } else { |
| 499 // Avoid malloc for each frame/plane if possible. | 523 // Avoid malloc for each frame/plane if possible. |
| 500 size_t needed_size = | 524 size_t needed_size = |
| 501 upload_image_stride * resource_size_pixels.height(); | 525 upload_image_stride * resource_size_pixels.height(); |
| 502 if (upload_pixels_.size() < needed_size) | 526 if (upload_pixels_.size() < needed_size) |
| 503 upload_pixels_.resize(needed_size); | 527 upload_pixels_.resize(needed_size); |
| 504 | 528 |
| 505 for (int row = 0; row < resource_size_pixels.height(); ++row) { | 529 for (int row = 0; row < resource_size_pixels.height(); ++row) { |
| 506 if (plane_resource.resource_format() == LUMINANCE_F16) { | 530 if (plane_resource.resource_format() == LUMINANCE_F16) { |
| 507 uint16_t* dst = reinterpret_cast<uint16_t*>( | 531 uint16_t* dst = reinterpret_cast<uint16_t*>( |
| 508 &upload_pixels_[upload_image_stride * row]); | 532 &upload_pixels_[upload_image_stride * row]); |
| 509 const uint16_t* src = reinterpret_cast<uint16_t*>( | 533 const uint16_t* src = reinterpret_cast<uint16_t*>( |
| 510 video_frame->data(i) + (video_stride_bytes * row)); | 534 video_frame->data(i) + (video_stride_bytes * row)); |
| 511 // Micro-benchmarking indicates that the compiler does | 535 if (bits_per_channel <= 10) { |
| 512 // a good enough job of optimizing this loop that trying | 536 // Micro-benchmarking indicates that the compiler does |
| 513 // to manually operate on one uint64 at a time is not | 537 // a good enough job of optimizing this loop that trying |
| 514 // actually helpful. | 538 // to manually operate on one uint64 at a time is not |
| 515 // Note to future optimizers: Benchmark your optimizations! | 539 // actually helpful. |
| 516 for (size_t i = 0; i < bytes_per_row / 2; i++) | 540 // Note to future optimizers: Benchmark your optimizations! |
| 517 dst[i] = src[i] | 0x3800; | 541 for (size_t i = 0; i < bytes_per_row / 2; i++) |
| 542 dst[i] = src[i] | 0x3800; | |
| 543 } else { | |
| 544 MakeHalfFloats(src, bits_per_channel, bytes_per_row / 2, dst); | |
| 545 } | |
| 518 } else if (shift != 0) { | 546 } else if (shift != 0) { |
| 519 // We have more-than-8-bit input which we need to shift | 547 // We have more-than-8-bit input which we need to shift |
| 520 // down to fit it into an 8-bit texture. | 548 // down to fit it into an 8-bit texture. |
| 521 uint8_t* dst = &upload_pixels_[upload_image_stride * row]; | 549 uint8_t* dst = &upload_pixels_[upload_image_stride * row]; |
| 522 const uint16_t* src = reinterpret_cast<uint16_t*>( | 550 const uint16_t* src = reinterpret_cast<uint16_t*>( |
| 523 video_frame->data(i) + (video_stride_bytes * row)); | 551 video_frame->data(i) + (video_stride_bytes * row)); |
| 524 for (size_t i = 0; i < bytes_per_row; i++) | 552 for (size_t i = 0; i < bytes_per_row; i++) |
| 525 dst[i] = src[i] >> shift; | 553 dst[i] = src[i] >> shift; |
| 526 } else { | 554 } else { |
| 527 // Input and output are the same size and format, but | 555 // Input and output are the same size and format, but |
| 528 // differ in stride, copy one row at a time. | 556 // differ in stride, copy one row at a time. |
| 529 uint8_t* dst = &upload_pixels_[upload_image_stride * row]; | 557 uint8_t* dst = &upload_pixels_[upload_image_stride * row]; |
| 530 const uint8_t* src = | 558 const uint8_t* src = |
| 531 video_frame->data(i) + (video_stride_bytes * row); | 559 video_frame->data(i) + (video_stride_bytes * row); |
| 532 memcpy(dst, src, bytes_per_row); | 560 memcpy(dst, src, bytes_per_row); |
| 533 } | 561 } |
| 534 } | 562 } |
| 535 pixels = &upload_pixels_[0]; | 563 pixels = &upload_pixels_[0]; |
| 536 } | 564 } |
| 537 | 565 |
| 538 resource_provider_->CopyToResource(plane_resource.resource_id(), pixels, | 566 resource_provider_->CopyToResource(plane_resource.resource_id(), pixels, |
| 539 resource_size_pixels); | 567 resource_size_pixels); |
| 540 plane_resource.SetUniqueId(video_frame->unique_id(), i); | 568 plane_resource.SetUniqueId(video_frame->unique_id(), i); |
| 541 } | 569 } |
| 542 | 570 |
| 543 if (plane_resource.resource_format() == LUMINANCE_F16) { | 571 // If the input data was 9 or 10 bit, and we output to half-floats, |
| 572 // then we used the OR path above, which means that we need to | |
| 573 // adjust the resource offset and multiplier accordingly. If the | |
| 574 // input data uses more than 10 bits, it will already be normalized | |
| 575 // to 0.0..1.0, so there is no need to do anything. | |
| 576 if (plane_resource.resource_format() == LUMINANCE_F16 && | |
|
danakj
2016/09/27 20:49:11
One more thought, we're doing this if thing twice
hubbe
2016/09/27 22:27:36
I don't really want to set a bool in every row, bu
danakj
2016/09/27 22:37:51
That's cool too if you agree it's more clear
| |
| 577 bits_per_channel <= 10) { | |
| 544 // By OR-ing with 0x3800, 10-bit numbers become half-floats in the | 578 // By OR-ing with 0x3800, 10-bit numbers become half-floats in the |
| 545 // range [0.5..1) and 9-bit numbers get the range [0.5..0.75). | 579 // range [0.5..1) and 9-bit numbers get the range [0.5..0.75). |
| 546 // | 580 // |
| 547 // Half-floats are evaluated as: | 581 // Half-floats are evaluated as: |
| 548 // float value = pow(2.0, exponent - 25) * (0x400 + fraction); | 582 // float value = pow(2.0, exponent - 25) * (0x400 + fraction); |
| 549 // | 583 // |
| 550 // In our case the exponent is 14 (since we or with 0x3800) and | 584 // In our case the exponent is 14 (since we or with 0x3800) and |
| 551 // pow(2.0, 14-25) * 0x400 evaluates to 0.5 (our offset) and | 585 // pow(2.0, 14-25) * 0x400 evaluates to 0.5 (our offset) and |
| 552 // pow(2.0, 14-25) * fraction is [0..0.49951171875] for 10-bit and | 586 // pow(2.0, 14-25) * fraction is [0..0.49951171875] for 10-bit and |
| 553 // [0..0.24951171875] for 9-bit. | 587 // [0..0.24951171875] for 9-bit. |
| 554 // | 588 // |
| 555 // (https://en.wikipedia.org/wiki/Half-precision_floating-point_format) | 589 // (https://en.wikipedia.org/wiki/Half-precision_floating-point_format) |
| 556 // | 590 // |
| 557 // PLEASE NOTE: This doesn't work if bits_per_channel is > 10. | |
| 558 // PLEASE NOTE: All planes are assumed to use the same multiplier/offset. | 591 // PLEASE NOTE: All planes are assumed to use the same multiplier/offset. |
| 559 external_resources.offset = 0.5f; | 592 external_resources.offset = 0.5f; |
| 560 // Max value from input data. | 593 // Max value from input data. |
| 561 int max_input_value = (1 << bits_per_channel) - 1; | 594 int max_input_value = (1 << bits_per_channel) - 1; |
| 562 // 2 << 11 = 2048 would be 1.0 with our exponent. | 595 // 2 << 11 = 2048 would be 1.0 with our exponent. |
| 563 external_resources.multiplier = 2048.0 / max_input_value; | 596 external_resources.multiplier = 2048.0 / max_input_value; |
| 564 } | 597 } |
| 565 | 598 |
| 566 // VideoResourceUpdater shares a context with the compositor so a | 599 // VideoResourceUpdater shares a context with the compositor so a |
| 567 // sync token is not required. | 600 // sync token is not required. |
| (...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 722 if (lost_resource) { | 755 if (lost_resource) { |
| 723 resource_it->clear_refs(); | 756 resource_it->clear_refs(); |
| 724 updater->DeleteResource(resource_it); | 757 updater->DeleteResource(resource_it); |
| 725 return; | 758 return; |
| 726 } | 759 } |
| 727 | 760 |
| 728 resource_it->remove_ref(); | 761 resource_it->remove_ref(); |
| 729 } | 762 } |
| 730 | 763 |
| 731 } // namespace cc | 764 } // namespace cc |
| OLD | NEW |