OLD | NEW |
| (Empty) |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include <stdint.h> | |
6 | |
7 #if defined(_MSC_VER) | |
8 #include <intrin.h> | |
9 #else | |
10 #include <mmintrin.h> | |
11 #include <emmintrin.h> | |
12 #endif | |
13 | |
14 #include "media/base/simd/filter_yuv.h" | |
15 | |
16 namespace media { | |
17 | |
18 void FilterYUVRows_SSE2(uint8_t* dest, | |
19 const uint8_t* src0, | |
20 const uint8_t* src1, | |
21 int width, | |
22 uint8_t fraction) { | |
23 int pixel = 0; | |
24 | |
25 // Process the unaligned bytes first. | |
26 int unaligned_width = | |
27 (16 - (reinterpret_cast<uintptr_t>(dest) & 15)) & 15; | |
28 while (pixel < width && pixel < unaligned_width) { | |
29 dest[pixel] = (src0[pixel] * (256 - fraction) + | |
30 src1[pixel] * fraction) >> 8; | |
31 ++pixel; | |
32 } | |
33 | |
34 __m128i zero = _mm_setzero_si128(); | |
35 __m128i src1_fraction = _mm_set1_epi16(fraction); | |
36 __m128i src0_fraction = _mm_set1_epi16(256 - fraction); | |
37 const __m128i* src0_128 = | |
38 reinterpret_cast<const __m128i*>(src0 + pixel); | |
39 const __m128i* src1_128 = | |
40 reinterpret_cast<const __m128i*>(src1 + pixel); | |
41 __m128i* dest128 = reinterpret_cast<__m128i*>(dest + pixel); | |
42 __m128i* end128 = reinterpret_cast<__m128i*>( | |
43 reinterpret_cast<uintptr_t>(dest + width) & ~15); | |
44 | |
45 while (dest128 < end128) { | |
46 __m128i src0 = _mm_loadu_si128(src0_128); | |
47 __m128i src1 = _mm_loadu_si128(src1_128); | |
48 __m128i src2 = _mm_unpackhi_epi8(src0, zero); | |
49 __m128i src3 = _mm_unpackhi_epi8(src1, zero); | |
50 src0 = _mm_unpacklo_epi8(src0, zero); | |
51 src1 = _mm_unpacklo_epi8(src1, zero); | |
52 src0 = _mm_mullo_epi16(src0, src0_fraction); | |
53 src1 = _mm_mullo_epi16(src1, src1_fraction); | |
54 src2 = _mm_mullo_epi16(src2, src0_fraction); | |
55 src3 = _mm_mullo_epi16(src3, src1_fraction); | |
56 src0 = _mm_add_epi16(src0, src1); | |
57 src2 = _mm_add_epi16(src2, src3); | |
58 src0 = _mm_srli_epi16(src0, 8); | |
59 src2 = _mm_srli_epi16(src2, 8); | |
60 src0 = _mm_packus_epi16(src0, src2); | |
61 *dest128++ = src0; | |
62 ++src0_128; | |
63 ++src1_128; | |
64 pixel += 16; | |
65 } | |
66 | |
67 while (pixel < width) { | |
68 dest[pixel] = (src0[pixel] * (256 - fraction) + | |
69 src1[pixel] * fraction) >> 8; | |
70 ++pixel; | |
71 } | |
72 } | |
73 | |
74 } // namespace media | |
OLD | NEW |