Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1342)

Side by Side Diff: source/libvpx/vp9/encoder/vp9_variance.c

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_variance.h ('k') | source/libvpx/vp9/encoder/vp9_writer.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "./vp9_rtcd.h" 11 #include "./vp9_rtcd.h"
12 #include "./vpx_dsp_rtcd.h"
12 13
13 #include "vpx_ports/mem.h" 14 #include "vpx_ports/mem.h"
14 #include "vpx/vpx_integer.h" 15 #include "vpx/vpx_integer.h"
15 16
16 #include "vp9/common/vp9_common.h" 17 #include "vp9/common/vp9_common.h"
17 #include "vp9/common/vp9_filter.h" 18 #include "vp9/common/vp9_filter.h"
18 19
19 #include "vp9/encoder/vp9_variance.h" 20 #include "vp9/encoder/vp9_variance.h"
20 21
21 void variance(const uint8_t *a, int a_stride,
22 const uint8_t *b, int b_stride,
23 int w, int h, unsigned int *sse, int *sum) {
24 int i, j;
25
26 *sum = 0;
27 *sse = 0;
28
29 for (i = 0; i < h; i++) {
30 for (j = 0; j < w; j++) {
31 const int diff = a[j] - b[j];
32 *sum += diff;
33 *sse += diff * diff;
34 }
35
36 a += a_stride;
37 b += b_stride;
38 }
39 }
40
41 // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal 22 // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
42 // or vertical direction to produce the filtered output block. Used to implement 23 // or vertical direction to produce the filtered output block. Used to implement
43 // first-pass of 2-D separable filter. 24 // first-pass of 2-D separable filter.
44 // 25 //
45 // Produces int32_t output to retain precision for next pass. Two filter taps 26 // Produces int32_t output to retain precision for next pass. Two filter taps
46 // should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is 27 // should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is
47 // applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It 28 // applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It
48 // defines the offset required to move from one input to the next. 29 // defines the offset required to move from one input to the next.
49 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, 30 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
50 uint16_t *output_ptr, 31 uint16_t *output_ptr,
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
93 (int)src_ptr[pixel_step] * vp9_filter[1], 74 (int)src_ptr[pixel_step] * vp9_filter[1],
94 FILTER_BITS); 75 FILTER_BITS);
95 src_ptr++; 76 src_ptr++;
96 } 77 }
97 78
98 src_ptr += src_pixels_per_line - output_width; 79 src_ptr += src_pixels_per_line - output_width;
99 output_ptr += output_width; 80 output_ptr += output_width;
100 } 81 }
101 } 82 }
102 83
103 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
104 unsigned int i, sum = 0;
105
106 for (i = 0; i < 256; ++i) {
107 sum += src_ptr[i] * src_ptr[i];
108 }
109
110 return sum;
111 }
112
113 #define VAR(W, H) \
114 unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
115 const uint8_t *b, int b_stride, \
116 unsigned int *sse) { \
117 int sum; \
118 variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
119 return *sse - (((int64_t)sum * sum) / (W * H)); \
120 }
121
122 #define SUBPIX_VAR(W, H) \ 84 #define SUBPIX_VAR(W, H) \
123 unsigned int vp9_sub_pixel_variance##W##x##H##_c( \ 85 unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
124 const uint8_t *src, int src_stride, \ 86 const uint8_t *src, int src_stride, \
125 int xoffset, int yoffset, \ 87 int xoffset, int yoffset, \
126 const uint8_t *dst, int dst_stride, \ 88 const uint8_t *dst, int dst_stride, \
127 unsigned int *sse) { \ 89 unsigned int *sse) { \
128 uint16_t fdata3[(H + 1) * W]; \ 90 uint16_t fdata3[(H + 1) * W]; \
129 uint8_t temp2[H * W]; \ 91 uint8_t temp2[H * W]; \
130 \ 92 \
131 var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ 93 var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
132 BILINEAR_FILTERS_2TAP(xoffset)); \ 94 BILINEAR_FILTERS_2TAP(xoffset)); \
133 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 95 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
134 BILINEAR_FILTERS_2TAP(yoffset)); \ 96 BILINEAR_FILTERS_2TAP(yoffset)); \
135 \ 97 \
136 return vp9_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \ 98 return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
137 } 99 }
138 100
139 #define SUBPIX_AVG_VAR(W, H) \ 101 #define SUBPIX_AVG_VAR(W, H) \
140 unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \ 102 unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
141 const uint8_t *src, int src_stride, \ 103 const uint8_t *src, int src_stride, \
142 int xoffset, int yoffset, \ 104 int xoffset, int yoffset, \
143 const uint8_t *dst, int dst_stride, \ 105 const uint8_t *dst, int dst_stride, \
144 unsigned int *sse, \ 106 unsigned int *sse, \
145 const uint8_t *second_pred) { \ 107 const uint8_t *second_pred) { \
146 uint16_t fdata3[(H + 1) * W]; \ 108 uint16_t fdata3[(H + 1) * W]; \
147 uint8_t temp2[H * W]; \ 109 uint8_t temp2[H * W]; \
148 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ 110 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
149 \ 111 \
150 var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ 112 var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
151 BILINEAR_FILTERS_2TAP(xoffset)); \ 113 BILINEAR_FILTERS_2TAP(xoffset)); \
152 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 114 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
153 BILINEAR_FILTERS_2TAP(yoffset)); \ 115 BILINEAR_FILTERS_2TAP(yoffset)); \
154 \ 116 \
155 vp9_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \ 117 vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
156 \ 118 \
157 return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \ 119 return vpx_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
158 } 120 }
159 121
160 void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride,
161 const uint8_t *ref_ptr, int ref_stride,
162 unsigned int *sse, int *sum) {
163 variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
164 }
165
166 void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride,
167 const uint8_t *ref_ptr, int ref_stride,
168 unsigned int *sse, int *sum) {
169 variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
170 }
171
172 unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride,
173 const uint8_t *ref, int ref_stride,
174 unsigned int *sse) {
175 int sum;
176 variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum);
177 return *sse;
178 }
179
180 unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride,
181 const uint8_t *ref, int ref_stride,
182 unsigned int *sse) {
183 int sum;
184 variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum);
185 return *sse;
186 }
187
188 unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride,
189 const uint8_t *ref, int ref_stride,
190 unsigned int *sse) {
191 int sum;
192 variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum);
193 return *sse;
194 }
195
196 unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride,
197 const uint8_t *ref, int ref_stride,
198 unsigned int *sse) {
199 int sum;
200 variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum);
201 return *sse;
202 }
203
204 VAR(4, 4)
205 SUBPIX_VAR(4, 4) 122 SUBPIX_VAR(4, 4)
206 SUBPIX_AVG_VAR(4, 4) 123 SUBPIX_AVG_VAR(4, 4)
207 124
208 VAR(4, 8)
209 SUBPIX_VAR(4, 8) 125 SUBPIX_VAR(4, 8)
210 SUBPIX_AVG_VAR(4, 8) 126 SUBPIX_AVG_VAR(4, 8)
211 127
212 VAR(8, 4)
213 SUBPIX_VAR(8, 4) 128 SUBPIX_VAR(8, 4)
214 SUBPIX_AVG_VAR(8, 4) 129 SUBPIX_AVG_VAR(8, 4)
215 130
216 VAR(8, 8)
217 SUBPIX_VAR(8, 8) 131 SUBPIX_VAR(8, 8)
218 SUBPIX_AVG_VAR(8, 8) 132 SUBPIX_AVG_VAR(8, 8)
219 133
220 VAR(8, 16)
221 SUBPIX_VAR(8, 16) 134 SUBPIX_VAR(8, 16)
222 SUBPIX_AVG_VAR(8, 16) 135 SUBPIX_AVG_VAR(8, 16)
223 136
224 VAR(16, 8)
225 SUBPIX_VAR(16, 8) 137 SUBPIX_VAR(16, 8)
226 SUBPIX_AVG_VAR(16, 8) 138 SUBPIX_AVG_VAR(16, 8)
227 139
228 VAR(16, 16)
229 SUBPIX_VAR(16, 16) 140 SUBPIX_VAR(16, 16)
230 SUBPIX_AVG_VAR(16, 16) 141 SUBPIX_AVG_VAR(16, 16)
231 142
232 VAR(16, 32)
233 SUBPIX_VAR(16, 32) 143 SUBPIX_VAR(16, 32)
234 SUBPIX_AVG_VAR(16, 32) 144 SUBPIX_AVG_VAR(16, 32)
235 145
236 VAR(32, 16)
237 SUBPIX_VAR(32, 16) 146 SUBPIX_VAR(32, 16)
238 SUBPIX_AVG_VAR(32, 16) 147 SUBPIX_AVG_VAR(32, 16)
239 148
240 VAR(32, 32)
241 SUBPIX_VAR(32, 32) 149 SUBPIX_VAR(32, 32)
242 SUBPIX_AVG_VAR(32, 32) 150 SUBPIX_AVG_VAR(32, 32)
243 151
244 VAR(32, 64)
245 SUBPIX_VAR(32, 64) 152 SUBPIX_VAR(32, 64)
246 SUBPIX_AVG_VAR(32, 64) 153 SUBPIX_AVG_VAR(32, 64)
247 154
248 VAR(64, 32)
249 SUBPIX_VAR(64, 32) 155 SUBPIX_VAR(64, 32)
250 SUBPIX_AVG_VAR(64, 32) 156 SUBPIX_AVG_VAR(64, 32)
251 157
252 VAR(64, 64)
253 SUBPIX_VAR(64, 64) 158 SUBPIX_VAR(64, 64)
254 SUBPIX_AVG_VAR(64, 64) 159 SUBPIX_AVG_VAR(64, 64)
255 160
256 void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
257 int height, const uint8_t *ref, int ref_stride) {
258 int i, j;
259
260 for (i = 0; i < height; i++) {
261 for (j = 0; j < width; j++) {
262 const int tmp = pred[j] + ref[j];
263 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
264 }
265 comp_pred += width;
266 pred += width;
267 ref += ref_stride;
268 }
269 }
270
271 #if CONFIG_VP9_HIGHBITDEPTH 161 #if CONFIG_VP9_HIGHBITDEPTH
272 void highbd_variance64(const uint8_t *a8, int a_stride,
273 const uint8_t *b8, int b_stride,
274 int w, int h, uint64_t *sse,
275 uint64_t *sum) {
276 int i, j;
277
278 uint16_t *a = CONVERT_TO_SHORTPTR(a8);
279 uint16_t *b = CONVERT_TO_SHORTPTR(b8);
280 *sum = 0;
281 *sse = 0;
282
283 for (i = 0; i < h; i++) {
284 for (j = 0; j < w; j++) {
285 const int diff = a[j] - b[j];
286 *sum += diff;
287 *sse += diff * diff;
288 }
289 a += a_stride;
290 b += b_stride;
291 }
292 }
293
294 void highbd_variance(const uint8_t *a8, int a_stride,
295 const uint8_t *b8, int b_stride,
296 int w, int h, unsigned int *sse,
297 int *sum) {
298 uint64_t sse_long = 0;
299 uint64_t sum_long = 0;
300 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
301 *sse = (unsigned int)sse_long;
302 *sum = (int)sum_long;
303 }
304
305 void highbd_10_variance(const uint8_t *a8, int a_stride,
306 const uint8_t *b8, int b_stride,
307 int w, int h, unsigned int *sse,
308 int *sum) {
309 uint64_t sse_long = 0;
310 uint64_t sum_long = 0;
311 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
312 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
313 *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 4);
314 }
315
316 void highbd_12_variance(const uint8_t *a8, int a_stride,
317 const uint8_t *b8, int b_stride,
318 int w, int h, unsigned int *sse,
319 int *sum) {
320 uint64_t sse_long = 0;
321 uint64_t sum_long = 0;
322 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
323 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
324 *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 8);
325 }
326
327 static void highbd_var_filter_block2d_bil_first_pass( 162 static void highbd_var_filter_block2d_bil_first_pass(
328 const uint8_t *src_ptr8, 163 const uint8_t *src_ptr8,
329 uint16_t *output_ptr, 164 uint16_t *output_ptr,
330 unsigned int src_pixels_per_line, 165 unsigned int src_pixels_per_line,
331 int pixel_step, 166 int pixel_step,
332 unsigned int output_height, 167 unsigned int output_height,
333 unsigned int output_width, 168 unsigned int output_width,
334 const int16_t *vp9_filter) { 169 const int16_t *vp9_filter) {
335 unsigned int i, j; 170 unsigned int i, j;
336 uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8); 171 uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
(...skipping 30 matching lines...) Expand all
367 (int)src_ptr[pixel_step] * vp9_filter[1], 202 (int)src_ptr[pixel_step] * vp9_filter[1],
368 FILTER_BITS); 203 FILTER_BITS);
369 src_ptr++; 204 src_ptr++;
370 } 205 }
371 206
372 src_ptr += src_pixels_per_line - output_width; 207 src_ptr += src_pixels_per_line - output_width;
373 output_ptr += output_width; 208 output_ptr += output_width;
374 } 209 }
375 } 210 }
376 211
377 #define HIGHBD_VAR(W, H) \
378 unsigned int vp9_highbd_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
379 const uint8_t *b, int b_stride, \
380 unsigned int *sse) { \
381 int sum; \
382 highbd_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
383 return *sse - (((int64_t)sum * sum) / (W * H)); \
384 } \
385 \
386 unsigned int vp9_highbd_10_variance##W##x##H##_c(const uint8_t *a, \
387 int a_stride, \
388 const uint8_t *b, \
389 int b_stride, \
390 unsigned int *sse) { \
391 int sum; \
392 highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
393 return *sse - (((int64_t)sum * sum) / (W * H)); \
394 } \
395 \
396 unsigned int vp9_highbd_12_variance##W##x##H##_c(const uint8_t *a, \
397 int a_stride, \
398 const uint8_t *b, \
399 int b_stride, \
400 unsigned int *sse) { \
401 int sum; \
402 highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
403 return *sse - (((int64_t)sum * sum) / (W * H)); \
404 }
405
406 #define HIGHBD_SUBPIX_VAR(W, H) \ 212 #define HIGHBD_SUBPIX_VAR(W, H) \
407 unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \ 213 unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \
408 const uint8_t *src, int src_stride, \ 214 const uint8_t *src, int src_stride, \
409 int xoffset, int yoffset, \ 215 int xoffset, int yoffset, \
410 const uint8_t *dst, int dst_stride, \ 216 const uint8_t *dst, int dst_stride, \
411 unsigned int *sse) { \ 217 unsigned int *sse) { \
412 uint16_t fdata3[(H + 1) * W]; \ 218 uint16_t fdata3[(H + 1) * W]; \
413 uint16_t temp2[H * W]; \ 219 uint16_t temp2[H * W]; \
414 \ 220 \
415 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ 221 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
416 W, BILINEAR_FILTERS_2TAP(xoffset)); \ 222 W, BILINEAR_FILTERS_2TAP(xoffset)); \
417 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 223 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
418 BILINEAR_FILTERS_2TAP(yoffset)); \ 224 BILINEAR_FILTERS_2TAP(yoffset)); \
419 \ 225 \
420 return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ 226 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
421 dst_stride, sse); \ 227 dst_stride, sse); \
422 } \ 228 } \
423 \ 229 \
424 unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \ 230 unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \
425 const uint8_t *src, int src_stride, \ 231 const uint8_t *src, int src_stride, \
426 int xoffset, int yoffset, \ 232 int xoffset, int yoffset, \
427 const uint8_t *dst, int dst_stride, \ 233 const uint8_t *dst, int dst_stride, \
428 unsigned int *sse) { \ 234 unsigned int *sse) { \
429 uint16_t fdata3[(H + 1) * W]; \ 235 uint16_t fdata3[(H + 1) * W]; \
430 uint16_t temp2[H * W]; \ 236 uint16_t temp2[H * W]; \
431 \ 237 \
432 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ 238 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
433 W, BILINEAR_FILTERS_2TAP(xoffset)); \ 239 W, BILINEAR_FILTERS_2TAP(xoffset)); \
434 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 240 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
435 BILINEAR_FILTERS_2TAP(yoffset)); \ 241 BILINEAR_FILTERS_2TAP(yoffset)); \
436 \ 242 \
437 return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ 243 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
438 W, dst, dst_stride, sse); \ 244 W, dst, dst_stride, sse); \
439 } \ 245 } \
440 \ 246 \
441 unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \ 247 unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \
442 const uint8_t *src, int src_stride, \ 248 const uint8_t *src, int src_stride, \
443 int xoffset, int yoffset, \ 249 int xoffset, int yoffset, \
444 const uint8_t *dst, int dst_stride, \ 250 const uint8_t *dst, int dst_stride, \
445 unsigned int *sse) { \ 251 unsigned int *sse) { \
446 uint16_t fdata3[(H + 1) * W]; \ 252 uint16_t fdata3[(H + 1) * W]; \
447 uint16_t temp2[H * W]; \ 253 uint16_t temp2[H * W]; \
448 \ 254 \
449 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ 255 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
450 W, BILINEAR_FILTERS_2TAP(xoffset)); \ 256 W, BILINEAR_FILTERS_2TAP(xoffset)); \
451 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 257 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
452 BILINEAR_FILTERS_2TAP(yoffset)); \ 258 BILINEAR_FILTERS_2TAP(yoffset)); \
453 \ 259 \
454 return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ 260 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
455 W, dst, dst_stride, sse); \ 261 W, dst, dst_stride, sse); \
456 } 262 }
457 263
458 #define HIGHBD_SUBPIX_AVG_VAR(W, H) \ 264 #define HIGHBD_SUBPIX_AVG_VAR(W, H) \
459 unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \ 265 unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \
460 const uint8_t *src, int src_stride, \ 266 const uint8_t *src, int src_stride, \
461 int xoffset, int yoffset, \ 267 int xoffset, int yoffset, \
462 const uint8_t *dst, int dst_stride, \ 268 const uint8_t *dst, int dst_stride, \
463 unsigned int *sse, \ 269 unsigned int *sse, \
464 const uint8_t *second_pred) { \ 270 const uint8_t *second_pred) { \
465 uint16_t fdata3[(H + 1) * W]; \ 271 uint16_t fdata3[(H + 1) * W]; \
466 uint16_t temp2[H * W]; \ 272 uint16_t temp2[H * W]; \
467 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ 273 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
468 \ 274 \
469 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ 275 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
470 W, BILINEAR_FILTERS_2TAP(xoffset)); \ 276 W, BILINEAR_FILTERS_2TAP(xoffset)); \
471 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 277 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
472 BILINEAR_FILTERS_2TAP(yoffset)); \ 278 BILINEAR_FILTERS_2TAP(yoffset)); \
473 \ 279 \
474 vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \ 280 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
475 CONVERT_TO_BYTEPTR(temp2), W); \ 281 CONVERT_TO_BYTEPTR(temp2), W); \
476 \ 282 \
477 return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ 283 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
478 dst_stride, sse); \ 284 dst_stride, sse); \
479 } \ 285 } \
480 \ 286 \
481 unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ 287 unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
482 const uint8_t *src, int src_stride, \ 288 const uint8_t *src, int src_stride, \
483 int xoffset, int yoffset, \ 289 int xoffset, int yoffset, \
484 const uint8_t *dst, int dst_stride, \ 290 const uint8_t *dst, int dst_stride, \
485 unsigned int *sse, \ 291 unsigned int *sse, \
486 const uint8_t *second_pred) { \ 292 const uint8_t *second_pred) { \
487 uint16_t fdata3[(H + 1) * W]; \ 293 uint16_t fdata3[(H + 1) * W]; \
488 uint16_t temp2[H * W]; \ 294 uint16_t temp2[H * W]; \
489 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ 295 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
490 \ 296 \
491 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ 297 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
492 W, BILINEAR_FILTERS_2TAP(xoffset)); \ 298 W, BILINEAR_FILTERS_2TAP(xoffset)); \
493 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 299 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
494 BILINEAR_FILTERS_2TAP(yoffset)); \ 300 BILINEAR_FILTERS_2TAP(yoffset)); \
495 \ 301 \
496 vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \ 302 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
497 CONVERT_TO_BYTEPTR(temp2), W); \ 303 CONVERT_TO_BYTEPTR(temp2), W); \
498 \ 304 \
499 return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ 305 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
500 W, dst, dst_stride, sse); \ 306 W, dst, dst_stride, sse); \
501 } \ 307 } \
502 \ 308 \
503 unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ 309 unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
504 const uint8_t *src, int src_stride, \ 310 const uint8_t *src, int src_stride, \
505 int xoffset, int yoffset, \ 311 int xoffset, int yoffset, \
506 const uint8_t *dst, int dst_stride, \ 312 const uint8_t *dst, int dst_stride, \
507 unsigned int *sse, \ 313 unsigned int *sse, \
508 const uint8_t *second_pred) { \ 314 const uint8_t *second_pred) { \
509 uint16_t fdata3[(H + 1) * W]; \ 315 uint16_t fdata3[(H + 1) * W]; \
510 uint16_t temp2[H * W]; \ 316 uint16_t temp2[H * W]; \
511 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ 317 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
512 \ 318 \
513 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ 319 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
514 W, BILINEAR_FILTERS_2TAP(xoffset)); \ 320 W, BILINEAR_FILTERS_2TAP(xoffset)); \
515 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ 321 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
516 BILINEAR_FILTERS_2TAP(yoffset)); \ 322 BILINEAR_FILTERS_2TAP(yoffset)); \
517 \ 323 \
518 vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \ 324 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
519 CONVERT_TO_BYTEPTR(temp2), W); \ 325 CONVERT_TO_BYTEPTR(temp2), W); \
520 \ 326 \
521 return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ 327 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
522 W, dst, dst_stride, sse); \ 328 W, dst, dst_stride, sse); \
523 } 329 }
524 330
525 #define HIGHBD_GET_VAR(S) \
526 void vp9_highbd_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
527 const uint8_t *ref, int ref_stride, \
528 unsigned int *sse, int *sum) { \
529 highbd_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
530 } \
531 \
532 void vp9_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
533 const uint8_t *ref, int ref_stride, \
534 unsigned int *sse, int *sum) { \
535 highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
536 } \
537 \
538 void vp9_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
539 const uint8_t *ref, int ref_stride, \
540 unsigned int *sse, int *sum) { \
541 highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
542 }
543
544 #define HIGHBD_MSE(W, H) \
545 unsigned int vp9_highbd_mse##W##x##H##_c(const uint8_t *src, \
546 int src_stride, \
547 const uint8_t *ref, \
548 int ref_stride, \
549 unsigned int *sse) { \
550 int sum; \
551 highbd_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
552 return *sse; \
553 } \
554 \
555 unsigned int vp9_highbd_10_mse##W##x##H##_c(const uint8_t *src, \
556 int src_stride, \
557 const uint8_t *ref, \
558 int ref_stride, \
559 unsigned int *sse) { \
560 int sum; \
561 highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
562 return *sse; \
563 } \
564 \
565 unsigned int vp9_highbd_12_mse##W##x##H##_c(const uint8_t *src, \
566 int src_stride, \
567 const uint8_t *ref, \
568 int ref_stride, \
569 unsigned int *sse) { \
570 int sum; \
571 highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
572 return *sse; \
573 }
574
575 HIGHBD_GET_VAR(8)
576 HIGHBD_GET_VAR(16)
577
578 HIGHBD_MSE(16, 16)
579 HIGHBD_MSE(16, 8)
580 HIGHBD_MSE(8, 16)
581 HIGHBD_MSE(8, 8)
582
583 HIGHBD_VAR(4, 4)
584 HIGHBD_SUBPIX_VAR(4, 4) 331 HIGHBD_SUBPIX_VAR(4, 4)
585 HIGHBD_SUBPIX_AVG_VAR(4, 4) 332 HIGHBD_SUBPIX_AVG_VAR(4, 4)
586 333
587 HIGHBD_VAR(4, 8)
588 HIGHBD_SUBPIX_VAR(4, 8) 334 HIGHBD_SUBPIX_VAR(4, 8)
589 HIGHBD_SUBPIX_AVG_VAR(4, 8) 335 HIGHBD_SUBPIX_AVG_VAR(4, 8)
590 336
591 HIGHBD_VAR(8, 4)
592 HIGHBD_SUBPIX_VAR(8, 4) 337 HIGHBD_SUBPIX_VAR(8, 4)
593 HIGHBD_SUBPIX_AVG_VAR(8, 4) 338 HIGHBD_SUBPIX_AVG_VAR(8, 4)
594 339
595 HIGHBD_VAR(8, 8)
596 HIGHBD_SUBPIX_VAR(8, 8) 340 HIGHBD_SUBPIX_VAR(8, 8)
597 HIGHBD_SUBPIX_AVG_VAR(8, 8) 341 HIGHBD_SUBPIX_AVG_VAR(8, 8)
598 342
599 HIGHBD_VAR(8, 16)
600 HIGHBD_SUBPIX_VAR(8, 16) 343 HIGHBD_SUBPIX_VAR(8, 16)
601 HIGHBD_SUBPIX_AVG_VAR(8, 16) 344 HIGHBD_SUBPIX_AVG_VAR(8, 16)
602 345
603 HIGHBD_VAR(16, 8)
604 HIGHBD_SUBPIX_VAR(16, 8) 346 HIGHBD_SUBPIX_VAR(16, 8)
605 HIGHBD_SUBPIX_AVG_VAR(16, 8) 347 HIGHBD_SUBPIX_AVG_VAR(16, 8)
606 348
607 HIGHBD_VAR(16, 16)
608 HIGHBD_SUBPIX_VAR(16, 16) 349 HIGHBD_SUBPIX_VAR(16, 16)
609 HIGHBD_SUBPIX_AVG_VAR(16, 16) 350 HIGHBD_SUBPIX_AVG_VAR(16, 16)
610 351
611 HIGHBD_VAR(16, 32)
612 HIGHBD_SUBPIX_VAR(16, 32) 352 HIGHBD_SUBPIX_VAR(16, 32)
613 HIGHBD_SUBPIX_AVG_VAR(16, 32) 353 HIGHBD_SUBPIX_AVG_VAR(16, 32)
614 354
615 HIGHBD_VAR(32, 16)
616 HIGHBD_SUBPIX_VAR(32, 16) 355 HIGHBD_SUBPIX_VAR(32, 16)
617 HIGHBD_SUBPIX_AVG_VAR(32, 16) 356 HIGHBD_SUBPIX_AVG_VAR(32, 16)
618 357
619 HIGHBD_VAR(32, 32)
620 HIGHBD_SUBPIX_VAR(32, 32) 358 HIGHBD_SUBPIX_VAR(32, 32)
621 HIGHBD_SUBPIX_AVG_VAR(32, 32) 359 HIGHBD_SUBPIX_AVG_VAR(32, 32)
622 360
623 HIGHBD_VAR(32, 64)
624 HIGHBD_SUBPIX_VAR(32, 64) 361 HIGHBD_SUBPIX_VAR(32, 64)
625 HIGHBD_SUBPIX_AVG_VAR(32, 64) 362 HIGHBD_SUBPIX_AVG_VAR(32, 64)
626 363
627 HIGHBD_VAR(64, 32)
628 HIGHBD_SUBPIX_VAR(64, 32) 364 HIGHBD_SUBPIX_VAR(64, 32)
629 HIGHBD_SUBPIX_AVG_VAR(64, 32) 365 HIGHBD_SUBPIX_AVG_VAR(64, 32)
630 366
631 HIGHBD_VAR(64, 64)
632 HIGHBD_SUBPIX_VAR(64, 64) 367 HIGHBD_SUBPIX_VAR(64, 64)
633 HIGHBD_SUBPIX_AVG_VAR(64, 64) 368 HIGHBD_SUBPIX_AVG_VAR(64, 64)
634
635 void vp9_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
636 int width, int height, const uint8_t *ref8,
637 int ref_stride) {
638 int i, j;
639 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
640 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
641 for (i = 0; i < height; i++) {
642 for (j = 0; j < width; j++) {
643 const int tmp = pred[j] + ref[j];
644 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
645 }
646 comp_pred += width;
647 pred += width;
648 ref += ref_stride;
649 }
650 }
651 #endif // CONFIG_VP9_HIGHBITDEPTH 369 #endif // CONFIG_VP9_HIGHBITDEPTH
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_variance.h ('k') | source/libvpx/vp9/encoder/vp9_writer.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698