| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include <stdint.h> | |
| 6 | |
| 7 #if defined(_MSC_VER) | |
| 8 #include <intrin.h> | |
| 9 #else | |
| 10 #include <mmintrin.h> | |
| 11 #include <emmintrin.h> | |
| 12 #endif | |
| 13 | |
| 14 #include "media/base/simd/filter_yuv.h" | |
| 15 | |
| 16 namespace media { | |
| 17 | |
| 18 void FilterYUVRows_SSE2(uint8_t* dest, | |
| 19 const uint8_t* src0, | |
| 20 const uint8_t* src1, | |
| 21 int width, | |
| 22 uint8_t fraction) { | |
| 23 int pixel = 0; | |
| 24 | |
| 25 // Process the unaligned bytes first. | |
| 26 int unaligned_width = | |
| 27 (16 - (reinterpret_cast<uintptr_t>(dest) & 15)) & 15; | |
| 28 while (pixel < width && pixel < unaligned_width) { | |
| 29 dest[pixel] = (src0[pixel] * (256 - fraction) + | |
| 30 src1[pixel] * fraction) >> 8; | |
| 31 ++pixel; | |
| 32 } | |
| 33 | |
| 34 __m128i zero = _mm_setzero_si128(); | |
| 35 __m128i src1_fraction = _mm_set1_epi16(fraction); | |
| 36 __m128i src0_fraction = _mm_set1_epi16(256 - fraction); | |
| 37 const __m128i* src0_128 = | |
| 38 reinterpret_cast<const __m128i*>(src0 + pixel); | |
| 39 const __m128i* src1_128 = | |
| 40 reinterpret_cast<const __m128i*>(src1 + pixel); | |
| 41 __m128i* dest128 = reinterpret_cast<__m128i*>(dest + pixel); | |
| 42 __m128i* end128 = reinterpret_cast<__m128i*>( | |
| 43 reinterpret_cast<uintptr_t>(dest + width) & ~15); | |
| 44 | |
| 45 while (dest128 < end128) { | |
| 46 __m128i src0 = _mm_loadu_si128(src0_128); | |
| 47 __m128i src1 = _mm_loadu_si128(src1_128); | |
| 48 __m128i src2 = _mm_unpackhi_epi8(src0, zero); | |
| 49 __m128i src3 = _mm_unpackhi_epi8(src1, zero); | |
| 50 src0 = _mm_unpacklo_epi8(src0, zero); | |
| 51 src1 = _mm_unpacklo_epi8(src1, zero); | |
| 52 src0 = _mm_mullo_epi16(src0, src0_fraction); | |
| 53 src1 = _mm_mullo_epi16(src1, src1_fraction); | |
| 54 src2 = _mm_mullo_epi16(src2, src0_fraction); | |
| 55 src3 = _mm_mullo_epi16(src3, src1_fraction); | |
| 56 src0 = _mm_add_epi16(src0, src1); | |
| 57 src2 = _mm_add_epi16(src2, src3); | |
| 58 src0 = _mm_srli_epi16(src0, 8); | |
| 59 src2 = _mm_srli_epi16(src2, 8); | |
| 60 src0 = _mm_packus_epi16(src0, src2); | |
| 61 *dest128++ = src0; | |
| 62 ++src0_128; | |
| 63 ++src1_128; | |
| 64 pixel += 16; | |
| 65 } | |
| 66 | |
| 67 while (pixel < width) { | |
| 68 dest[pixel] = (src0[pixel] * (256 - fraction) + | |
| 69 src1[pixel] * fraction) >> 8; | |
| 70 ++pixel; | |
| 71 } | |
| 72 } | |
| 73 | |
| 74 } // namespace media | |
| OLD | NEW |