Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(326)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 #include "./vpx_config.h" 10 #include "./vpx_config.h"
11 #include "vp9/common/vp9_common.h" 11 #include "vp9/common/vp9_common.h"
12 12
13 #include "vp9/encoder/vp9_variance.h" 13 #include "vp9/encoder/vp9_variance.h"
14 #include "vpx_ports/mem.h" 14 #include "vpx_ports/mem.h"
15 15
16 typedef uint32_t (*high_variance_fn_t) (const uint16_t *src, int src_stride,
17 const uint16_t *ref, int ref_stride,
18 uint32_t *sse, int *sum);
19
20 uint32_t vp9_highbd_calc8x8var_sse2(const uint16_t *src, int src_stride,
21 const uint16_t *ref, int ref_stride,
22 uint32_t *sse, int *sum);
23
24 uint32_t vp9_highbd_calc16x16var_sse2(const uint16_t *src, int src_stride,
25 const uint16_t *ref, int ref_stride,
26 uint32_t *sse, int *sum);
27
28 static void highbd_variance_sse2(const uint16_t *src, int src_stride,
29 const uint16_t *ref, int ref_stride,
30 int w, int h, uint32_t *sse, int *sum,
31 high_variance_fn_t var_fn, int block_size) {
32 int i, j;
33
34 *sse = 0;
35 *sum = 0;
36
37 for (i = 0; i < h; i += block_size) {
38 for (j = 0; j < w; j += block_size) {
39 unsigned int sse0;
40 int sum0;
41 var_fn(src + src_stride * i + j, src_stride,
42 ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
43 *sse += sse0;
44 *sum += sum0;
45 }
46 }
47 }
48
49 static void highbd_10_variance_sse2(const uint16_t *src, int src_stride,
50 const uint16_t *ref, int ref_stride,
51 int w, int h, uint32_t *sse, int *sum,
52 high_variance_fn_t var_fn, int block_size) {
53 int i, j;
54 uint64_t sse_long = 0;
55 int64_t sum_long = 0;
56
57 for (i = 0; i < h; i += block_size) {
58 for (j = 0; j < w; j += block_size) {
59 unsigned int sse0;
60 int sum0;
61 var_fn(src + src_stride * i + j, src_stride,
62 ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
63 sse_long += sse0;
64 sum_long += sum0;
65 }
66 }
67 *sum = ROUND_POWER_OF_TWO(sum_long, 2);
68 *sse = ROUND_POWER_OF_TWO(sse_long, 4);
69 }
70
71 static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
72 const uint16_t *ref, int ref_stride,
73 int w, int h, uint32_t *sse, int *sum,
74 high_variance_fn_t var_fn, int block_size) {
75 int i, j;
76 uint64_t sse_long = 0;
77 int64_t sum_long = 0;
78
79 for (i = 0; i < h; i += block_size) {
80 for (j = 0; j < w; j += block_size) {
81 unsigned int sse0;
82 int sum0;
83 var_fn(src + src_stride * i + j, src_stride,
84 ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
85 sse_long += sse0;
86 sum_long += sum0;
87 }
88 }
89 *sum = ROUND_POWER_OF_TWO(sum_long, 4);
90 *sse = ROUND_POWER_OF_TWO(sse_long, 8);
91 }
92
93
94 #define HIGH_GET_VAR(S) \
95 void vp9_highbd_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
96 const uint8_t *ref8, int ref_stride, \
97 uint32_t *sse, int *sum) { \
98 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
99 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
100 vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
101 sse, sum); \
102 } \
103 \
104 void vp9_highbd_10_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
105 const uint8_t *ref8, int ref_stride, \
106 uint32_t *sse, int *sum) { \
107 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
108 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
109 vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
110 sse, sum); \
111 *sum = ROUND_POWER_OF_TWO(*sum, 2); \
112 *sse = ROUND_POWER_OF_TWO(*sse, 4); \
113 } \
114 \
115 void vp9_highbd_12_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
116 const uint8_t *ref8, int ref_stride, \
117 uint32_t *sse, int *sum) { \
118 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
119 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
120 vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
121 sse, sum); \
122 *sum = ROUND_POWER_OF_TWO(*sum, 4); \
123 *sse = ROUND_POWER_OF_TWO(*sse, 8); \
124 }
125
126 HIGH_GET_VAR(16);
127 HIGH_GET_VAR(8);
128
129 #undef HIGH_GET_VAR
130
131 #define VAR_FN(w, h, block_size, shift) \
132 uint32_t vp9_highbd_variance##w##x##h##_sse2( \
133 const uint8_t *src8, int src_stride, \
134 const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
135 int sum; \
136 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
137 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
138 highbd_variance_sse2(src, src_stride, ref, ref_stride, w, h, sse, &sum, \
139 vp9_highbd_calc##block_size##x##block_size##var_sse2, \
140 block_size); \
141 return *sse - (((int64_t)sum * sum) >> shift); \
142 } \
143 \
144 uint32_t vp9_highbd_10_variance##w##x##h##_sse2( \
145 const uint8_t *src8, int src_stride, \
146 const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
147 int sum; \
148 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
149 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
150 highbd_10_variance_sse2( \
151 src, src_stride, ref, ref_stride, w, h, sse, &sum, \
152 vp9_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
153 return *sse - (((int64_t)sum * sum) >> shift); \
154 } \
155 \
156 uint32_t vp9_highbd_12_variance##w##x##h##_sse2( \
157 const uint8_t *src8, int src_stride, \
158 const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
159 int sum; \
160 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
161 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
162 highbd_12_variance_sse2( \
163 src, src_stride, ref, ref_stride, w, h, sse, &sum, \
164 vp9_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
165 return *sse - (((int64_t)sum * sum) >> shift); \
166 }
167
168 VAR_FN(64, 64, 16, 12);
169 VAR_FN(64, 32, 16, 11);
170 VAR_FN(32, 64, 16, 11);
171 VAR_FN(32, 32, 16, 10);
172 VAR_FN(32, 16, 16, 9);
173 VAR_FN(16, 32, 16, 9);
174 VAR_FN(16, 16, 16, 8);
175 VAR_FN(16, 8, 8, 7);
176 VAR_FN(8, 16, 8, 7);
177 VAR_FN(8, 8, 8, 6);
178
179 #undef VAR_FN
180
181 unsigned int vp9_highbd_mse16x16_sse2(const uint8_t *src8, int src_stride,
182 const uint8_t *ref8, int ref_stride,
183 unsigned int *sse) {
184 int sum;
185 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
186 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
187 highbd_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
188 sse, &sum, vp9_highbd_calc16x16var_sse2, 16);
189 return *sse;
190 }
191
192 unsigned int vp9_highbd_10_mse16x16_sse2(const uint8_t *src8, int src_stride,
193 const uint8_t *ref8, int ref_stride,
194 unsigned int *sse) {
195 int sum;
196 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
197 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
198 highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
199 sse, &sum, vp9_highbd_calc16x16var_sse2, 16);
200 return *sse;
201 }
202
203 unsigned int vp9_highbd_12_mse16x16_sse2(const uint8_t *src8, int src_stride,
204 const uint8_t *ref8, int ref_stride,
205 unsigned int *sse) {
206 int sum;
207 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
208 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
209 highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
210 sse, &sum, vp9_highbd_calc16x16var_sse2, 16);
211 return *sse;
212 }
213
214 unsigned int vp9_highbd_mse8x8_sse2(const uint8_t *src8, int src_stride,
215 const uint8_t *ref8, int ref_stride,
216 unsigned int *sse) {
217 int sum;
218 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
219 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
220 highbd_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
221 sse, &sum, vp9_highbd_calc8x8var_sse2, 8);
222 return *sse;
223 }
224
225 unsigned int vp9_highbd_10_mse8x8_sse2(const uint8_t *src8, int src_stride,
226 const uint8_t *ref8, int ref_stride,
227 unsigned int *sse) {
228 int sum;
229 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
230 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
231 highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
232 sse, &sum, vp9_highbd_calc8x8var_sse2, 8);
233 return *sse;
234 }
235
236 unsigned int vp9_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
237 const uint8_t *ref8, int ref_stride,
238 unsigned int *sse) {
239 int sum;
240 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
241 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
242 highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
243 sse, &sum, vp9_highbd_calc8x8var_sse2, 8);
244 return *sse;
245 }
246
247 #define DECL(w, opt) \ 16 #define DECL(w, opt) \
248 int vp9_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \ 17 int vp9_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \
249 ptrdiff_t src_stride, \ 18 ptrdiff_t src_stride, \
250 int x_offset, int y_offset, \ 19 int x_offset, int y_offset, \
251 const uint16_t *dst, \ 20 const uint16_t *dst, \
252 ptrdiff_t dst_stride, \ 21 ptrdiff_t dst_stride, \
253 int height, unsigned int *sse); 22 int height, unsigned int *sse);
254 #define DECLS(opt1, opt2) \ 23 #define DECLS(opt1, opt2) \
255 DECL(8, opt1); \ 24 DECL(8, opt1); \
256 DECL(16, opt1) 25 DECL(16, opt1)
(...skipping 314 matching lines...) Expand 10 before | Expand all | Expand 10 after
571 FN(16, 16, 16, 4, 4, opt1, (int64_t)); \ 340 FN(16, 16, 16, 4, 4, opt1, (int64_t)); \
572 FN(16, 8, 16, 4, 3, opt1, (int64_t)); \ 341 FN(16, 8, 16, 4, 3, opt1, (int64_t)); \
573 FN(8, 16, 8, 4, 3, opt1, (int64_t)); \ 342 FN(8, 16, 8, 4, 3, opt1, (int64_t)); \
574 FN(8, 8, 8, 3, 3, opt1, (int64_t)); \ 343 FN(8, 8, 8, 3, 3, opt1, (int64_t)); \
575 FN(8, 4, 8, 3, 2, opt1, (int64_t)); 344 FN(8, 4, 8, 3, 2, opt1, (int64_t));
576 345
577 FNS(sse2); 346 FNS(sse2);
578 347
579 #undef FNS 348 #undef FNS
580 #undef FN 349 #undef FN
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_highbd_variance_impl_sse2.asm ('k') | source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698