OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 #include "./vpx_config.h" | 10 #include "./vpx_config.h" |
11 #include "vp9/common/vp9_common.h" | 11 #include "vp9/common/vp9_common.h" |
12 | 12 |
13 #include "vp9/encoder/vp9_variance.h" | 13 #include "vp9/encoder/vp9_variance.h" |
14 #include "vpx_ports/mem.h" | 14 #include "vpx_ports/mem.h" |
15 | 15 |
16 typedef uint32_t (*high_variance_fn_t) (const uint16_t *src, int src_stride, | |
17 const uint16_t *ref, int ref_stride, | |
18 uint32_t *sse, int *sum); | |
19 | |
20 uint32_t vp9_highbd_calc8x8var_sse2(const uint16_t *src, int src_stride, | |
21 const uint16_t *ref, int ref_stride, | |
22 uint32_t *sse, int *sum); | |
23 | |
24 uint32_t vp9_highbd_calc16x16var_sse2(const uint16_t *src, int src_stride, | |
25 const uint16_t *ref, int ref_stride, | |
26 uint32_t *sse, int *sum); | |
27 | |
28 static void highbd_variance_sse2(const uint16_t *src, int src_stride, | |
29 const uint16_t *ref, int ref_stride, | |
30 int w, int h, uint32_t *sse, int *sum, | |
31 high_variance_fn_t var_fn, int block_size) { | |
32 int i, j; | |
33 | |
34 *sse = 0; | |
35 *sum = 0; | |
36 | |
37 for (i = 0; i < h; i += block_size) { | |
38 for (j = 0; j < w; j += block_size) { | |
39 unsigned int sse0; | |
40 int sum0; | |
41 var_fn(src + src_stride * i + j, src_stride, | |
42 ref + ref_stride * i + j, ref_stride, &sse0, &sum0); | |
43 *sse += sse0; | |
44 *sum += sum0; | |
45 } | |
46 } | |
47 } | |
48 | |
49 static void highbd_10_variance_sse2(const uint16_t *src, int src_stride, | |
50 const uint16_t *ref, int ref_stride, | |
51 int w, int h, uint32_t *sse, int *sum, | |
52 high_variance_fn_t var_fn, int block_size) { | |
53 int i, j; | |
54 uint64_t sse_long = 0; | |
55 int64_t sum_long = 0; | |
56 | |
57 for (i = 0; i < h; i += block_size) { | |
58 for (j = 0; j < w; j += block_size) { | |
59 unsigned int sse0; | |
60 int sum0; | |
61 var_fn(src + src_stride * i + j, src_stride, | |
62 ref + ref_stride * i + j, ref_stride, &sse0, &sum0); | |
63 sse_long += sse0; | |
64 sum_long += sum0; | |
65 } | |
66 } | |
67 *sum = ROUND_POWER_OF_TWO(sum_long, 2); | |
68 *sse = ROUND_POWER_OF_TWO(sse_long, 4); | |
69 } | |
70 | |
71 static void highbd_12_variance_sse2(const uint16_t *src, int src_stride, | |
72 const uint16_t *ref, int ref_stride, | |
73 int w, int h, uint32_t *sse, int *sum, | |
74 high_variance_fn_t var_fn, int block_size) { | |
75 int i, j; | |
76 uint64_t sse_long = 0; | |
77 int64_t sum_long = 0; | |
78 | |
79 for (i = 0; i < h; i += block_size) { | |
80 for (j = 0; j < w; j += block_size) { | |
81 unsigned int sse0; | |
82 int sum0; | |
83 var_fn(src + src_stride * i + j, src_stride, | |
84 ref + ref_stride * i + j, ref_stride, &sse0, &sum0); | |
85 sse_long += sse0; | |
86 sum_long += sum0; | |
87 } | |
88 } | |
89 *sum = ROUND_POWER_OF_TWO(sum_long, 4); | |
90 *sse = ROUND_POWER_OF_TWO(sse_long, 8); | |
91 } | |
92 | |
93 | |
94 #define HIGH_GET_VAR(S) \ | |
95 void vp9_highbd_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \ | |
96 const uint8_t *ref8, int ref_stride, \ | |
97 uint32_t *sse, int *sum) { \ | |
98 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ | |
99 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ | |
100 vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \ | |
101 sse, sum); \ | |
102 } \ | |
103 \ | |
104 void vp9_highbd_10_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \ | |
105 const uint8_t *ref8, int ref_stride, \ | |
106 uint32_t *sse, int *sum) { \ | |
107 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ | |
108 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ | |
109 vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \ | |
110 sse, sum); \ | |
111 *sum = ROUND_POWER_OF_TWO(*sum, 2); \ | |
112 *sse = ROUND_POWER_OF_TWO(*sse, 4); \ | |
113 } \ | |
114 \ | |
115 void vp9_highbd_12_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \ | |
116 const uint8_t *ref8, int ref_stride, \ | |
117 uint32_t *sse, int *sum) { \ | |
118 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ | |
119 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ | |
120 vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \ | |
121 sse, sum); \ | |
122 *sum = ROUND_POWER_OF_TWO(*sum, 4); \ | |
123 *sse = ROUND_POWER_OF_TWO(*sse, 8); \ | |
124 } | |
125 | |
126 HIGH_GET_VAR(16); | |
127 HIGH_GET_VAR(8); | |
128 | |
129 #undef HIGH_GET_VAR | |
130 | |
131 #define VAR_FN(w, h, block_size, shift) \ | |
132 uint32_t vp9_highbd_variance##w##x##h##_sse2( \ | |
133 const uint8_t *src8, int src_stride, \ | |
134 const uint8_t *ref8, int ref_stride, uint32_t *sse) { \ | |
135 int sum; \ | |
136 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ | |
137 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ | |
138 highbd_variance_sse2(src, src_stride, ref, ref_stride, w, h, sse, &sum, \ | |
139 vp9_highbd_calc##block_size##x##block_size##var_sse2, \ | |
140 block_size); \ | |
141 return *sse - (((int64_t)sum * sum) >> shift); \ | |
142 } \ | |
143 \ | |
144 uint32_t vp9_highbd_10_variance##w##x##h##_sse2( \ | |
145 const uint8_t *src8, int src_stride, \ | |
146 const uint8_t *ref8, int ref_stride, uint32_t *sse) { \ | |
147 int sum; \ | |
148 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ | |
149 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ | |
150 highbd_10_variance_sse2( \ | |
151 src, src_stride, ref, ref_stride, w, h, sse, &sum, \ | |
152 vp9_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ | |
153 return *sse - (((int64_t)sum * sum) >> shift); \ | |
154 } \ | |
155 \ | |
156 uint32_t vp9_highbd_12_variance##w##x##h##_sse2( \ | |
157 const uint8_t *src8, int src_stride, \ | |
158 const uint8_t *ref8, int ref_stride, uint32_t *sse) { \ | |
159 int sum; \ | |
160 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ | |
161 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ | |
162 highbd_12_variance_sse2( \ | |
163 src, src_stride, ref, ref_stride, w, h, sse, &sum, \ | |
164 vp9_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ | |
165 return *sse - (((int64_t)sum * sum) >> shift); \ | |
166 } | |
167 | |
168 VAR_FN(64, 64, 16, 12); | |
169 VAR_FN(64, 32, 16, 11); | |
170 VAR_FN(32, 64, 16, 11); | |
171 VAR_FN(32, 32, 16, 10); | |
172 VAR_FN(32, 16, 16, 9); | |
173 VAR_FN(16, 32, 16, 9); | |
174 VAR_FN(16, 16, 16, 8); | |
175 VAR_FN(16, 8, 8, 7); | |
176 VAR_FN(8, 16, 8, 7); | |
177 VAR_FN(8, 8, 8, 6); | |
178 | |
179 #undef VAR_FN | |
180 | |
181 unsigned int vp9_highbd_mse16x16_sse2(const uint8_t *src8, int src_stride, | |
182 const uint8_t *ref8, int ref_stride, | |
183 unsigned int *sse) { | |
184 int sum; | |
185 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | |
186 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); | |
187 highbd_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, | |
188 sse, &sum, vp9_highbd_calc16x16var_sse2, 16); | |
189 return *sse; | |
190 } | |
191 | |
192 unsigned int vp9_highbd_10_mse16x16_sse2(const uint8_t *src8, int src_stride, | |
193 const uint8_t *ref8, int ref_stride, | |
194 unsigned int *sse) { | |
195 int sum; | |
196 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | |
197 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); | |
198 highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, | |
199 sse, &sum, vp9_highbd_calc16x16var_sse2, 16); | |
200 return *sse; | |
201 } | |
202 | |
203 unsigned int vp9_highbd_12_mse16x16_sse2(const uint8_t *src8, int src_stride, | |
204 const uint8_t *ref8, int ref_stride, | |
205 unsigned int *sse) { | |
206 int sum; | |
207 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | |
208 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); | |
209 highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, | |
210 sse, &sum, vp9_highbd_calc16x16var_sse2, 16); | |
211 return *sse; | |
212 } | |
213 | |
214 unsigned int vp9_highbd_mse8x8_sse2(const uint8_t *src8, int src_stride, | |
215 const uint8_t *ref8, int ref_stride, | |
216 unsigned int *sse) { | |
217 int sum; | |
218 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | |
219 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); | |
220 highbd_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, | |
221 sse, &sum, vp9_highbd_calc8x8var_sse2, 8); | |
222 return *sse; | |
223 } | |
224 | |
225 unsigned int vp9_highbd_10_mse8x8_sse2(const uint8_t *src8, int src_stride, | |
226 const uint8_t *ref8, int ref_stride, | |
227 unsigned int *sse) { | |
228 int sum; | |
229 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | |
230 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); | |
231 highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, | |
232 sse, &sum, vp9_highbd_calc8x8var_sse2, 8); | |
233 return *sse; | |
234 } | |
235 | |
236 unsigned int vp9_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride, | |
237 const uint8_t *ref8, int ref_stride, | |
238 unsigned int *sse) { | |
239 int sum; | |
240 uint16_t *src = CONVERT_TO_SHORTPTR(src8); | |
241 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); | |
242 highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, | |
243 sse, &sum, vp9_highbd_calc8x8var_sse2, 8); | |
244 return *sse; | |
245 } | |
246 | |
247 #define DECL(w, opt) \ | 16 #define DECL(w, opt) \ |
248 int vp9_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \ | 17 int vp9_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \ |
249 ptrdiff_t src_stride, \ | 18 ptrdiff_t src_stride, \ |
250 int x_offset, int y_offset, \ | 19 int x_offset, int y_offset, \ |
251 const uint16_t *dst, \ | 20 const uint16_t *dst, \ |
252 ptrdiff_t dst_stride, \ | 21 ptrdiff_t dst_stride, \ |
253 int height, unsigned int *sse); | 22 int height, unsigned int *sse); |
254 #define DECLS(opt1, opt2) \ | 23 #define DECLS(opt1, opt2) \ |
255 DECL(8, opt1); \ | 24 DECL(8, opt1); \ |
256 DECL(16, opt1) | 25 DECL(16, opt1) |
(...skipping 314 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
571 FN(16, 16, 16, 4, 4, opt1, (int64_t)); \ | 340 FN(16, 16, 16, 4, 4, opt1, (int64_t)); \ |
572 FN(16, 8, 16, 4, 3, opt1, (int64_t)); \ | 341 FN(16, 8, 16, 4, 3, opt1, (int64_t)); \ |
573 FN(8, 16, 8, 4, 3, opt1, (int64_t)); \ | 342 FN(8, 16, 8, 4, 3, opt1, (int64_t)); \ |
574 FN(8, 8, 8, 3, 3, opt1, (int64_t)); \ | 343 FN(8, 8, 8, 3, 3, opt1, (int64_t)); \ |
575 FN(8, 4, 8, 3, 2, opt1, (int64_t)); | 344 FN(8, 4, 8, 3, 2, opt1, (int64_t)); |
576 | 345 |
577 FNS(sse2); | 346 FNS(sse2); |
578 | 347 |
579 #undef FNS | 348 #undef FNS |
580 #undef FN | 349 #undef FN |
OLD | NEW |