OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #if defined(_MSC_VER) | 5 #if defined(_MSC_VER) |
6 #include <intrin.h> | 6 #include <intrin.h> |
7 #else | 7 #else |
8 #include <mmintrin.h> | 8 #include <mmintrin.h> |
9 #include <emmintrin.h> | 9 #include <emmintrin.h> |
10 #endif | 10 #endif |
11 | 11 |
12 #include "remoting/host/differ_block.h" | 12 #include "remoting/host/differ_block.h" |
13 #include "remoting/host/differ_block_internal.h" | 13 #include "remoting/host/differ_block_internal.h" |
14 | 14 |
15 namespace remoting { | 15 namespace remoting { |
16 | 16 |
17 extern int BlockDifference_SSE2_W16(const uint8* image1, const uint8* image2, | 17 extern int BlockDifference_SSE2_W16(const uint8* image1, const uint8* image2, |
18 int stride) { | 18 int stride) { |
19 __m128i acc = _mm_setzero_si128(); | 19 __m128i acc = _mm_setzero_si128(); |
20 __m128i v0; | 20 __m128i v0; |
21 __m128i v1; | 21 __m128i v1; |
22 __m128i sad; | 22 __m128i sad; |
23 for (int y = 0; y < kBlockHeight; ++y) { | 23 for (int y = 0; y < kBlockSize; ++y) { |
24 const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); | 24 const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); |
25 const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); | 25 const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); |
26 v0 = _mm_loadu_si128(i1); | 26 v0 = _mm_loadu_si128(i1); |
27 v1 = _mm_loadu_si128(i2); | 27 v1 = _mm_loadu_si128(i2); |
28 sad = _mm_sad_epu8(v0, v1); | 28 sad = _mm_sad_epu8(v0, v1); |
29 acc = _mm_adds_epu16(acc, sad); | 29 acc = _mm_adds_epu16(acc, sad); |
30 v0 = _mm_loadu_si128(i1 + 1); | 30 v0 = _mm_loadu_si128(i1 + 1); |
31 v1 = _mm_loadu_si128(i2 + 1); | 31 v1 = _mm_loadu_si128(i2 + 1); |
32 sad = _mm_sad_epu8(v0, v1); | 32 sad = _mm_sad_epu8(v0, v1); |
33 acc = _mm_adds_epu16(acc, sad); | 33 acc = _mm_adds_epu16(acc, sad); |
(...skipping 18 matching lines...) Expand all Loading... |
52 } | 52 } |
53 return 0; | 53 return 0; |
54 } | 54 } |
55 | 55 |
56 extern int BlockDifference_SSE2_W32(const uint8* image1, const uint8* image2, | 56 extern int BlockDifference_SSE2_W32(const uint8* image1, const uint8* image2, |
57 int stride) { | 57 int stride) { |
58 __m128i acc = _mm_setzero_si128(); | 58 __m128i acc = _mm_setzero_si128(); |
59 __m128i v0; | 59 __m128i v0; |
60 __m128i v1; | 60 __m128i v1; |
61 __m128i sad; | 61 __m128i sad; |
62 for (int y = 0; y < kBlockHeight; ++y) { | 62 for (int y = 0; y < kBlockSize; ++y) { |
63 const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); | 63 const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); |
64 const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); | 64 const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); |
65 v0 = _mm_loadu_si128(i1); | 65 v0 = _mm_loadu_si128(i1); |
66 v1 = _mm_loadu_si128(i2); | 66 v1 = _mm_loadu_si128(i2); |
67 sad = _mm_sad_epu8(v0, v1); | 67 sad = _mm_sad_epu8(v0, v1); |
68 acc = _mm_adds_epu16(acc, sad); | 68 acc = _mm_adds_epu16(acc, sad); |
69 v0 = _mm_loadu_si128(i1 + 1); | 69 v0 = _mm_loadu_si128(i1 + 1); |
70 v1 = _mm_loadu_si128(i2 + 1); | 70 v1 = _mm_loadu_si128(i2 + 1); |
71 sad = _mm_sad_epu8(v0, v1); | 71 sad = _mm_sad_epu8(v0, v1); |
72 acc = _mm_adds_epu16(acc, sad); | 72 acc = _mm_adds_epu16(acc, sad); |
(...skipping 29 matching lines...) Expand all Loading... |
102 int diff = _mm_cvtsi128_si32(sad); | 102 int diff = _mm_cvtsi128_si32(sad); |
103 if (diff) | 103 if (diff) |
104 return 1; | 104 return 1; |
105 image1 += stride; | 105 image1 += stride; |
106 image2 += stride; | 106 image2 += stride; |
107 } | 107 } |
108 return 0; | 108 return 0; |
109 } | 109 } |
110 | 110 |
111 } // namespace remoting | 111 } // namespace remoting |
OLD | NEW |