OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "./vpx_config.h" | 11 #include "./vpx_config.h" |
12 #include "vp9/encoder/vp9_variance.h" | 12 #include "vp9/encoder/vp9_variance.h" |
13 #include "vp9/common/vp9_pragmas.h" | |
14 #include "vpx_ports/mem.h" | 13 #include "vpx_ports/mem.h" |
15 | 14 |
16 extern unsigned int vp9_get8x8var_mmx | 15 unsigned int vp9_get8x8var_mmx(const uint8_t *src, int src_stride, |
17 ( | 16 const uint8_t *ref, int ref_stride, |
18 const unsigned char *src_ptr, | 17 unsigned int *sse, int *sum); |
19 int source_stride, | |
20 const unsigned char *ref_ptr, | |
21 int recon_stride, | |
22 unsigned int *SSE, | |
23 int *Sum | |
24 ); | |
25 extern unsigned int vp9_get4x4var_mmx | |
26 ( | |
27 const unsigned char *src_ptr, | |
28 int source_stride, | |
29 const unsigned char *ref_ptr, | |
30 int recon_stride, | |
31 unsigned int *SSE, | |
32 int *Sum | |
33 ); | |
34 | 18 |
35 unsigned int vp9_variance4x4_mmx( | 19 unsigned int vp9_get4x4var_mmx(const uint8_t *src, int src_stride, |
36 const unsigned char *src_ptr, | 20 const uint8_t *ref, int ref_stride, |
37 int source_stride, | 21 unsigned int *SSE, int *sum); |
38 const unsigned char *ref_ptr, | |
39 int recon_stride, | |
40 unsigned int *sse) { | |
41 unsigned int var; | |
42 int avg; | |
43 | 22 |
44 vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); | 23 unsigned int vp9_variance4x4_mmx(const uint8_t *src, int src_stride, |
45 *sse = var; | 24 const uint8_t *ref, int ref_stride, |
46 return (var - (((unsigned int)avg * avg) >> 4)); | 25 unsigned int *sse) { |
| 26 int sum; |
| 27 vp9_get4x4var_mmx(src, src_stride, ref, ref_stride, sse, &sum); |
| 28 return *sse - (((unsigned int)sum * sum) >> 4); |
47 } | 29 } |
48 | 30 |
49 unsigned int vp9_variance8x8_mmx( | 31 unsigned int vp9_variance8x8_mmx(const uint8_t *src, int src_stride, |
50 const unsigned char *src_ptr, | 32 const uint8_t *ref, int ref_stride, |
51 int source_stride, | 33 unsigned int *sse) { |
52 const unsigned char *ref_ptr, | 34 int sum; |
53 int recon_stride, | 35 vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, sse, &sum); |
54 unsigned int *sse) { | 36 return *sse - (((unsigned int)sum * sum) >> 6); |
55 unsigned int var; | |
56 int avg; | |
57 | |
58 vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg); | |
59 *sse = var; | |
60 | |
61 return (var - (((unsigned int)avg * avg) >> 6)); | |
62 } | 37 } |
63 | 38 |
64 unsigned int vp9_mse16x16_mmx( | 39 unsigned int vp9_mse16x16_mmx(const uint8_t *src, int src_stride, |
65 const unsigned char *src_ptr, | 40 const uint8_t *ref, int ref_stride, |
66 int source_stride, | 41 unsigned int *sse) { |
67 const unsigned char *ref_ptr, | 42 unsigned int sse0, sse1, sse2, sse3; |
68 int recon_stride, | |
69 unsigned int *sse) { | |
70 unsigned int sse0, sse1, sse2, sse3, var; | |
71 int sum0, sum1, sum2, sum3; | 43 int sum0, sum1, sum2, sum3; |
72 | 44 |
| 45 vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0); |
| 46 vp9_get8x8var_mmx(src + 8, src_stride, ref + 8, ref_stride, &sse1, &sum1); |
| 47 vp9_get8x8var_mmx(src + 8 * src_stride, src_stride, |
| 48 ref + 8 * ref_stride, ref_stride, &sse2, &sum2); |
| 49 vp9_get8x8var_mmx(src + 8 * src_stride + 8, src_stride, |
| 50 ref + 8 * ref_stride + 8, ref_stride, &sse3, &sum3); |
73 | 51 |
74 vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, | 52 *sse = sse0 + sse1 + sse2 + sse3; |
75 &sum0); | 53 return *sse; |
76 vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, | |
77 &sse1, &sum1); | |
78 vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, | |
79 ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2); | |
80 vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, | |
81 ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); | |
82 | |
83 var = sse0 + sse1 + sse2 + sse3; | |
84 *sse = var; | |
85 return var; | |
86 } | 54 } |
87 | 55 |
88 | 56 |
89 unsigned int vp9_variance16x16_mmx( | 57 unsigned int vp9_variance16x16_mmx(const uint8_t *src, int src_stride, |
90 const unsigned char *src_ptr, | 58 const uint8_t *ref, int ref_stride, |
91 int source_stride, | 59 unsigned int *sse) { |
92 const unsigned char *ref_ptr, | 60 unsigned int sse0, sse1, sse2, sse3; |
93 int recon_stride, | 61 int sum0, sum1, sum2, sum3, sum; |
94 unsigned int *sse) { | |
95 unsigned int sse0, sse1, sse2, sse3, var; | |
96 int sum0, sum1, sum2, sum3, avg; | |
97 | 62 |
98 vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, | 63 vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0); |
99 &sum0); | 64 vp9_get8x8var_mmx(src + 8, src_stride, ref + 8, ref_stride, &sse1, &sum1); |
100 vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, | 65 vp9_get8x8var_mmx(src + 8 * src_stride, src_stride, |
101 &sse1, &sum1); | 66 ref + 8 * ref_stride, ref_stride, &sse2, &sum2); |
102 vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, | 67 vp9_get8x8var_mmx(src + 8 * src_stride + 8, src_stride, |
103 ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2); | 68 ref + 8 * ref_stride + 8, ref_stride, &sse3, &sum3); |
104 vp9_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, | |
105 ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); | |
106 | 69 |
107 var = sse0 + sse1 + sse2 + sse3; | 70 *sse = sse0 + sse1 + sse2 + sse3; |
108 avg = sum0 + sum1 + sum2 + sum3; | 71 sum = sum0 + sum1 + sum2 + sum3; |
109 *sse = var; | 72 return *sse - (((unsigned int)sum * sum) >> 8); |
110 return (var - (((unsigned int)avg * avg) >> 8)); | |
111 } | 73 } |
112 | 74 |
113 unsigned int vp9_variance16x8_mmx( | 75 unsigned int vp9_variance16x8_mmx(const uint8_t *src, int src_stride, |
114 const unsigned char *src_ptr, | 76 const uint8_t *ref, int ref_stride, |
115 int source_stride, | 77 unsigned int *sse) { |
116 const unsigned char *ref_ptr, | 78 unsigned int sse0, sse1; |
117 int recon_stride, | 79 int sum0, sum1, sum; |
118 unsigned int *sse) { | |
119 unsigned int sse0, sse1, var; | |
120 int sum0, sum1, avg; | |
121 | 80 |
122 vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, | 81 vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0); |
123 &sum0); | 82 vp9_get8x8var_mmx(src + 8, src_stride, ref + 8, ref_stride, &sse1, &sum1); |
124 vp9_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, | |
125 &sse1, &sum1); | |
126 | 83 |
127 var = sse0 + sse1; | 84 *sse = sse0 + sse1; |
128 avg = sum0 + sum1; | 85 sum = sum0 + sum1; |
129 *sse = var; | 86 return *sse - (((unsigned int)sum * sum) >> 7); |
130 return (var - (((unsigned int)avg * avg) >> 7)); | |
131 } | 87 } |
132 | 88 |
133 | 89 |
134 unsigned int vp9_variance8x16_mmx( | 90 unsigned int vp9_variance8x16_mmx(const uint8_t *src, int src_stride, |
135 const unsigned char *src_ptr, | 91 const uint8_t *ref, int ref_stride, |
136 int source_stride, | 92 unsigned int *sse) { |
137 const unsigned char *ref_ptr, | 93 unsigned int sse0, sse1; |
138 int recon_stride, | 94 int sum0, sum1, sum; |
139 unsigned int *sse) { | |
140 unsigned int sse0, sse1, var; | |
141 int sum0, sum1, avg; | |
142 | 95 |
143 vp9_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, | 96 vp9_get8x8var_mmx(src, src_stride, ref, ref_stride, &sse0, &sum0); |
144 &sum0); | 97 vp9_get8x8var_mmx(src + 8 * src_stride, src_stride, |
145 vp9_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, | 98 ref + 8 * ref_stride, ref_stride, &sse1, &sum1); |
146 ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1); | |
147 | 99 |
148 var = sse0 + sse1; | 100 *sse = sse0 + sse1; |
149 avg = sum0 + sum1; | 101 sum = sum0 + sum1; |
150 *sse = var; | 102 return *sse - (((unsigned int)sum * sum) >> 7); |
151 | |
152 return (var - (((unsigned int)avg * avg) >> 7)); | |
153 } | 103 } |
OLD | NEW |