source/libvpx/vp9/encoder/vp9_variance.c - Issue 1162573005: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/encoder/vp9_variance.c

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "./vp9_rtcd.h"	11 #include "./vp9_rtcd.h"

	12 #include "./vpx_dsp_rtcd.h"

12	13

13 #include "vpx_ports/mem.h"	14 #include "vpx_ports/mem.h"

14 #include "vpx/vpx_integer.h"	15 #include "vpx/vpx_integer.h"

15	16

16 #include "vp9/common/vp9_common.h"	17 #include "vp9/common/vp9_common.h"

17 #include "vp9/common/vp9_filter.h"	18 #include "vp9/common/vp9_filter.h"

18	19

19 #include "vp9/encoder/vp9_variance.h"	20 #include "vp9/encoder/vp9_variance.h"

20	21

21 void variance(const uint8_t *a, int a_stride,

22 const uint8_t *b, int b_stride,

23 int w, int h, unsigned int sse, int sum) {

24 int i, j;

25

26 *sum = 0;

27 *sse = 0;

28

29 for (i = 0; i < h; i++) {

30 for (j = 0; j < w; j++) {

31 const int diff = a[j] - b[j];

32 *sum += diff;

33 sse += diff diff;

34 }

35

36 a += a_stride;

37 b += b_stride;

38 }

39 }

40

41 // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal	22 // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal

42 // or vertical direction to produce the filtered output block. Used to implement	23 // or vertical direction to produce the filtered output block. Used to implement

43 // first-pass of 2-D separable filter.	24 // first-pass of 2-D separable filter.

44 //	25 //

45 // Produces int32_t output to retain precision for next pass. Two filter taps	26 // Produces int32_t output to retain precision for next pass. Two filter taps

46 // should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is	27 // should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is

47 // applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It	28 // applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It

48 // defines the offset required to move from one input to the next.	29 // defines the offset required to move from one input to the next.

49 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,	30 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,

50 uint16_t *output_ptr,	31 uint16_t *output_ptr,

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
93 (int)src_ptr[pixel_step] * vp9_filter[1],	74 (int)src_ptr[pixel_step] * vp9_filter[1],

94 FILTER_BITS);	75 FILTER_BITS);

95 src_ptr++;	76 src_ptr++;

96 }	77 }

97	78

98 src_ptr += src_pixels_per_line - output_width;	79 src_ptr += src_pixels_per_line - output_width;

99 output_ptr += output_width;	80 output_ptr += output_width;

100 }	81 }

101 }	82 }

102	83

103 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {

104 unsigned int i, sum = 0;

105

106 for (i = 0; i < 256; ++i) {

107 sum += src_ptr[i] * src_ptr[i];

108 }

109

110 return sum;

111 }

112

113 #define VAR(W, H) \

114 unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \

115 const uint8_t *b, int b_stride, \

116 unsigned int *sse) { \

117 int sum; \

118 variance(a, a_stride, b, b_stride, W, H, sse, &sum); \

119 return sse - (((int64_t)sum sum) / (W * H)); \

120 }

121

122 #define SUBPIX_VAR(W, H) \	84 #define SUBPIX_VAR(W, H) \

123 unsigned int vp9_sub_pixel_variance##W##x##H##_c( \	85 unsigned int vp9_sub_pixel_variance##W##x##H##_c( \

124 const uint8_t *src, int src_stride, \	86 const uint8_t *src, int src_stride, \

125 int xoffset, int yoffset, \	87 int xoffset, int yoffset, \

126 const uint8_t *dst, int dst_stride, \	88 const uint8_t *dst, int dst_stride, \

127 unsigned int *sse) { \	89 unsigned int *sse) { \

128 uint16_t fdata3[(H + 1) * W]; \	90 uint16_t fdata3[(H + 1) * W]; \

129 uint8_t temp2[H * W]; \	91 uint8_t temp2[H * W]; \

130 \	92 \

131 var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \	93 var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \

132 BILINEAR_FILTERS_2TAP(xoffset)); \	94 BILINEAR_FILTERS_2TAP(xoffset)); \

133 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \	95 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \

134 BILINEAR_FILTERS_2TAP(yoffset)); \	96 BILINEAR_FILTERS_2TAP(yoffset)); \

135 \	97 \

136 return vp9_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \	98 return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \

137 }	99 }

138	100

139 #define SUBPIX_AVG_VAR(W, H) \	101 #define SUBPIX_AVG_VAR(W, H) \

140 unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \	102 unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \

141 const uint8_t *src, int src_stride, \	103 const uint8_t *src, int src_stride, \

142 int xoffset, int yoffset, \	104 int xoffset, int yoffset, \

143 const uint8_t *dst, int dst_stride, \	105 const uint8_t *dst, int dst_stride, \

144 unsigned int *sse, \	106 unsigned int *sse, \

145 const uint8_t *second_pred) { \	107 const uint8_t *second_pred) { \

146 uint16_t fdata3[(H + 1) * W]; \	108 uint16_t fdata3[(H + 1) * W]; \

147 uint8_t temp2[H * W]; \	109 uint8_t temp2[H * W]; \

148 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \	110 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \

149 \	111 \

150 var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \	112 var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \

151 BILINEAR_FILTERS_2TAP(xoffset)); \	113 BILINEAR_FILTERS_2TAP(xoffset)); \

152 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \	114 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \

153 BILINEAR_FILTERS_2TAP(yoffset)); \	115 BILINEAR_FILTERS_2TAP(yoffset)); \

154 \	116 \

155 vp9_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \	117 vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \

156 \	118 \

157 return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \	119 return vpx_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \

158 }	120 }

159	121

160 void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride,

161 const uint8_t *ref_ptr, int ref_stride,

162 unsigned int sse, int sum) {

163 variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);

164 }

165

166 void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride,

167 const uint8_t *ref_ptr, int ref_stride,

168 unsigned int sse, int sum) {

169 variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);

170 }

171

172 unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride,

173 const uint8_t *ref, int ref_stride,

174 unsigned int *sse) {

175 int sum;

176 variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum);

177 return *sse;

178 }

179

180 unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride,

181 const uint8_t *ref, int ref_stride,

182 unsigned int *sse) {

183 int sum;

184 variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum);

185 return *sse;

186 }

187

188 unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride,

189 const uint8_t *ref, int ref_stride,

190 unsigned int *sse) {

191 int sum;

192 variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum);

193 return *sse;

194 }

195

196 unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride,

197 const uint8_t *ref, int ref_stride,

198 unsigned int *sse) {

199 int sum;

200 variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum);

201 return *sse;

202 }

203

204 VAR(4, 4)

205 SUBPIX_VAR(4, 4)	122 SUBPIX_VAR(4, 4)

206 SUBPIX_AVG_VAR(4, 4)	123 SUBPIX_AVG_VAR(4, 4)

207	124

208 VAR(4, 8)

209 SUBPIX_VAR(4, 8)	125 SUBPIX_VAR(4, 8)

210 SUBPIX_AVG_VAR(4, 8)	126 SUBPIX_AVG_VAR(4, 8)

211	127

212 VAR(8, 4)

213 SUBPIX_VAR(8, 4)	128 SUBPIX_VAR(8, 4)

214 SUBPIX_AVG_VAR(8, 4)	129 SUBPIX_AVG_VAR(8, 4)

215	130

216 VAR(8, 8)

217 SUBPIX_VAR(8, 8)	131 SUBPIX_VAR(8, 8)

218 SUBPIX_AVG_VAR(8, 8)	132 SUBPIX_AVG_VAR(8, 8)

219	133

220 VAR(8, 16)

221 SUBPIX_VAR(8, 16)	134 SUBPIX_VAR(8, 16)

222 SUBPIX_AVG_VAR(8, 16)	135 SUBPIX_AVG_VAR(8, 16)

223	136

224 VAR(16, 8)

225 SUBPIX_VAR(16, 8)	137 SUBPIX_VAR(16, 8)

226 SUBPIX_AVG_VAR(16, 8)	138 SUBPIX_AVG_VAR(16, 8)

227	139

228 VAR(16, 16)

229 SUBPIX_VAR(16, 16)	140 SUBPIX_VAR(16, 16)

230 SUBPIX_AVG_VAR(16, 16)	141 SUBPIX_AVG_VAR(16, 16)

231	142

232 VAR(16, 32)

233 SUBPIX_VAR(16, 32)	143 SUBPIX_VAR(16, 32)

234 SUBPIX_AVG_VAR(16, 32)	144 SUBPIX_AVG_VAR(16, 32)

235	145

236 VAR(32, 16)

237 SUBPIX_VAR(32, 16)	146 SUBPIX_VAR(32, 16)

238 SUBPIX_AVG_VAR(32, 16)	147 SUBPIX_AVG_VAR(32, 16)

239	148

240 VAR(32, 32)

241 SUBPIX_VAR(32, 32)	149 SUBPIX_VAR(32, 32)

242 SUBPIX_AVG_VAR(32, 32)	150 SUBPIX_AVG_VAR(32, 32)

243	151

244 VAR(32, 64)

245 SUBPIX_VAR(32, 64)	152 SUBPIX_VAR(32, 64)

246 SUBPIX_AVG_VAR(32, 64)	153 SUBPIX_AVG_VAR(32, 64)

247	154

248 VAR(64, 32)

249 SUBPIX_VAR(64, 32)	155 SUBPIX_VAR(64, 32)

250 SUBPIX_AVG_VAR(64, 32)	156 SUBPIX_AVG_VAR(64, 32)

251	157

252 VAR(64, 64)

253 SUBPIX_VAR(64, 64)	158 SUBPIX_VAR(64, 64)

254 SUBPIX_AVG_VAR(64, 64)	159 SUBPIX_AVG_VAR(64, 64)

255	160

256 void vp9_comp_avg_pred(uint8_t comp_pred, const uint8_t pred, int width,

257 int height, const uint8_t *ref, int ref_stride) {

258 int i, j;

259

260 for (i = 0; i < height; i++) {

261 for (j = 0; j < width; j++) {

262 const int tmp = pred[j] + ref[j];

263 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);

264 }

265 comp_pred += width;

266 pred += width;

267 ref += ref_stride;

268 }

269 }

270

271 #if CONFIG_VP9_HIGHBITDEPTH	161 #if CONFIG_VP9_HIGHBITDEPTH

272 void highbd_variance64(const uint8_t *a8, int a_stride,

273 const uint8_t *b8, int b_stride,

274 int w, int h, uint64_t *sse,

275 uint64_t *sum) {

276 int i, j;

277

278 uint16_t *a = CONVERT_TO_SHORTPTR(a8);

279 uint16_t *b = CONVERT_TO_SHORTPTR(b8);

280 *sum = 0;

281 *sse = 0;

282

283 for (i = 0; i < h; i++) {

284 for (j = 0; j < w; j++) {

285 const int diff = a[j] - b[j];

286 *sum += diff;

287 sse += diff diff;

288 }

289 a += a_stride;

290 b += b_stride;

291 }

292 }

293

294 void highbd_variance(const uint8_t *a8, int a_stride,

295 const uint8_t *b8, int b_stride,

296 int w, int h, unsigned int *sse,

297 int *sum) {

298 uint64_t sse_long = 0;

299 uint64_t sum_long = 0;

300 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);

301 *sse = (unsigned int)sse_long;

302 *sum = (int)sum_long;

303 }

304

305 void highbd_10_variance(const uint8_t *a8, int a_stride,

306 const uint8_t *b8, int b_stride,

307 int w, int h, unsigned int *sse,

308 int *sum) {

309 uint64_t sse_long = 0;

310 uint64_t sum_long = 0;

311 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);

312 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);

313 *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 4);

314 }

315

316 void highbd_12_variance(const uint8_t *a8, int a_stride,

317 const uint8_t *b8, int b_stride,

318 int w, int h, unsigned int *sse,

319 int *sum) {

320 uint64_t sse_long = 0;

321 uint64_t sum_long = 0;

322 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);

323 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);

324 *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 8);

325 }

326

327 static void highbd_var_filter_block2d_bil_first_pass(	162 static void highbd_var_filter_block2d_bil_first_pass(

328 const uint8_t *src_ptr8,	163 const uint8_t *src_ptr8,

329 uint16_t *output_ptr,	164 uint16_t *output_ptr,

330 unsigned int src_pixels_per_line,	165 unsigned int src_pixels_per_line,

331 int pixel_step,	166 int pixel_step,

332 unsigned int output_height,	167 unsigned int output_height,

333 unsigned int output_width,	168 unsigned int output_width,

334 const int16_t *vp9_filter) {	169 const int16_t *vp9_filter) {

335 unsigned int i, j;	170 unsigned int i, j;

336 uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);	171 uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);

(...skipping 30 matching lines...) Expand all Loading...
367 (int)src_ptr[pixel_step] * vp9_filter[1],	202 (int)src_ptr[pixel_step] * vp9_filter[1],

368 FILTER_BITS);	203 FILTER_BITS);

369 src_ptr++;	204 src_ptr++;

370 }	205 }

371	206

372 src_ptr += src_pixels_per_line - output_width;	207 src_ptr += src_pixels_per_line - output_width;

373 output_ptr += output_width;	208 output_ptr += output_width;

374 }	209 }

375 }	210 }

376	211

377 #define HIGHBD_VAR(W, H) \

378 unsigned int vp9_highbd_variance##W##x##H##_c(const uint8_t *a, int a_stride, \

379 const uint8_t *b, int b_stride, \

380 unsigned int *sse) { \

381 int sum; \

382 highbd_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \

383 return sse - (((int64_t)sum sum) / (W * H)); \

384 } \

385 \

386 unsigned int vp9_highbd_10_variance##W##x##H##_c(const uint8_t *a, \

387 int a_stride, \

388 const uint8_t *b, \

389 int b_stride, \

390 unsigned int *sse) { \

391 int sum; \

392 highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \

393 return sse - (((int64_t)sum sum) / (W * H)); \

394 } \

395 \

396 unsigned int vp9_highbd_12_variance##W##x##H##_c(const uint8_t *a, \

397 int a_stride, \

398 const uint8_t *b, \

399 int b_stride, \

400 unsigned int *sse) { \

401 int sum; \

402 highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \

403 return sse - (((int64_t)sum sum) / (W * H)); \

404 }

405

406 #define HIGHBD_SUBPIX_VAR(W, H) \	212 #define HIGHBD_SUBPIX_VAR(W, H) \

407 unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \	213 unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \

408 const uint8_t *src, int src_stride, \	214 const uint8_t *src, int src_stride, \

409 int xoffset, int yoffset, \	215 int xoffset, int yoffset, \

410 const uint8_t *dst, int dst_stride, \	216 const uint8_t *dst, int dst_stride, \

411 unsigned int *sse) { \	217 unsigned int *sse) { \

412 uint16_t fdata3[(H + 1) * W]; \	218 uint16_t fdata3[(H + 1) * W]; \

413 uint16_t temp2[H * W]; \	219 uint16_t temp2[H * W]; \

414 \	220 \

415 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \	221 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \

416 W, BILINEAR_FILTERS_2TAP(xoffset)); \	222 W, BILINEAR_FILTERS_2TAP(xoffset)); \

417 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \	223 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \

418 BILINEAR_FILTERS_2TAP(yoffset)); \	224 BILINEAR_FILTERS_2TAP(yoffset)); \

419 \	225 \

420 return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \	226 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \

421 dst_stride, sse); \	227 dst_stride, sse); \

422 } \	228 } \

423 \	229 \

424 unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \	230 unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \

425 const uint8_t *src, int src_stride, \	231 const uint8_t *src, int src_stride, \

426 int xoffset, int yoffset, \	232 int xoffset, int yoffset, \

427 const uint8_t *dst, int dst_stride, \	233 const uint8_t *dst, int dst_stride, \

428 unsigned int *sse) { \	234 unsigned int *sse) { \

429 uint16_t fdata3[(H + 1) * W]; \	235 uint16_t fdata3[(H + 1) * W]; \

430 uint16_t temp2[H * W]; \	236 uint16_t temp2[H * W]; \

431 \	237 \

432 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \	238 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \

433 W, BILINEAR_FILTERS_2TAP(xoffset)); \	239 W, BILINEAR_FILTERS_2TAP(xoffset)); \

434 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \	240 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \

435 BILINEAR_FILTERS_2TAP(yoffset)); \	241 BILINEAR_FILTERS_2TAP(yoffset)); \

436 \	242 \

437 return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \	243 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \

438 W, dst, dst_stride, sse); \	244 W, dst, dst_stride, sse); \

439 } \	245 } \

440 \	246 \

441 unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \	247 unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \

442 const uint8_t *src, int src_stride, \	248 const uint8_t *src, int src_stride, \

443 int xoffset, int yoffset, \	249 int xoffset, int yoffset, \

444 const uint8_t *dst, int dst_stride, \	250 const uint8_t *dst, int dst_stride, \

445 unsigned int *sse) { \	251 unsigned int *sse) { \

446 uint16_t fdata3[(H + 1) * W]; \	252 uint16_t fdata3[(H + 1) * W]; \

447 uint16_t temp2[H * W]; \	253 uint16_t temp2[H * W]; \

448 \	254 \

449 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \	255 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \

450 W, BILINEAR_FILTERS_2TAP(xoffset)); \	256 W, BILINEAR_FILTERS_2TAP(xoffset)); \

451 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \	257 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \

452 BILINEAR_FILTERS_2TAP(yoffset)); \	258 BILINEAR_FILTERS_2TAP(yoffset)); \

453 \	259 \

454 return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \	260 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \

455 W, dst, dst_stride, sse); \	261 W, dst, dst_stride, sse); \

456 }	262 }

457	263

458 #define HIGHBD_SUBPIX_AVG_VAR(W, H) \	264 #define HIGHBD_SUBPIX_AVG_VAR(W, H) \

459 unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \	265 unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \

460 const uint8_t *src, int src_stride, \	266 const uint8_t *src, int src_stride, \

461 int xoffset, int yoffset, \	267 int xoffset, int yoffset, \

462 const uint8_t *dst, int dst_stride, \	268 const uint8_t *dst, int dst_stride, \

463 unsigned int *sse, \	269 unsigned int *sse, \

464 const uint8_t *second_pred) { \	270 const uint8_t *second_pred) { \

465 uint16_t fdata3[(H + 1) * W]; \	271 uint16_t fdata3[(H + 1) * W]; \

466 uint16_t temp2[H * W]; \	272 uint16_t temp2[H * W]; \

467 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \	273 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \

468 \	274 \

469 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \	275 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \

470 W, BILINEAR_FILTERS_2TAP(xoffset)); \	276 W, BILINEAR_FILTERS_2TAP(xoffset)); \

471 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \	277 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \

472 BILINEAR_FILTERS_2TAP(yoffset)); \	278 BILINEAR_FILTERS_2TAP(yoffset)); \

473 \	279 \

474 vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \	280 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \

475 CONVERT_TO_BYTEPTR(temp2), W); \	281 CONVERT_TO_BYTEPTR(temp2), W); \

476 \	282 \

477 return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \	283 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \

478 dst_stride, sse); \	284 dst_stride, sse); \

479 } \	285 } \

480 \	286 \

481 unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \	287 unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \

482 const uint8_t *src, int src_stride, \	288 const uint8_t *src, int src_stride, \

483 int xoffset, int yoffset, \	289 int xoffset, int yoffset, \

484 const uint8_t *dst, int dst_stride, \	290 const uint8_t *dst, int dst_stride, \

485 unsigned int *sse, \	291 unsigned int *sse, \

486 const uint8_t *second_pred) { \	292 const uint8_t *second_pred) { \

487 uint16_t fdata3[(H + 1) * W]; \	293 uint16_t fdata3[(H + 1) * W]; \

488 uint16_t temp2[H * W]; \	294 uint16_t temp2[H * W]; \

489 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \	295 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \

490 \	296 \

491 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \	297 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \

492 W, BILINEAR_FILTERS_2TAP(xoffset)); \	298 W, BILINEAR_FILTERS_2TAP(xoffset)); \

493 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \	299 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \

494 BILINEAR_FILTERS_2TAP(yoffset)); \	300 BILINEAR_FILTERS_2TAP(yoffset)); \

495 \	301 \

496 vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \	302 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \

497 CONVERT_TO_BYTEPTR(temp2), W); \	303 CONVERT_TO_BYTEPTR(temp2), W); \

498 \	304 \

499 return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \	305 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \

500 W, dst, dst_stride, sse); \	306 W, dst, dst_stride, sse); \

501 } \	307 } \

502 \	308 \

503 unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \	309 unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \

504 const uint8_t *src, int src_stride, \	310 const uint8_t *src, int src_stride, \

505 int xoffset, int yoffset, \	311 int xoffset, int yoffset, \

506 const uint8_t *dst, int dst_stride, \	312 const uint8_t *dst, int dst_stride, \

507 unsigned int *sse, \	313 unsigned int *sse, \

508 const uint8_t *second_pred) { \	314 const uint8_t *second_pred) { \

509 uint16_t fdata3[(H + 1) * W]; \	315 uint16_t fdata3[(H + 1) * W]; \

510 uint16_t temp2[H * W]; \	316 uint16_t temp2[H * W]; \

511 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \	317 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \

512 \	318 \

513 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \	319 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \

514 W, BILINEAR_FILTERS_2TAP(xoffset)); \	320 W, BILINEAR_FILTERS_2TAP(xoffset)); \

515 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \	321 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \

516 BILINEAR_FILTERS_2TAP(yoffset)); \	322 BILINEAR_FILTERS_2TAP(yoffset)); \

517 \	323 \

518 vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \	324 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \

519 CONVERT_TO_BYTEPTR(temp2), W); \	325 CONVERT_TO_BYTEPTR(temp2), W); \

520 \	326 \

521 return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \	327 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \

522 W, dst, dst_stride, sse); \	328 W, dst, dst_stride, sse); \

523 }	329 }

524	330

525 #define HIGHBD_GET_VAR(S) \

526 void vp9_highbd_get##S##x##S##var_c(const uint8_t *src, int src_stride, \

527 const uint8_t *ref, int ref_stride, \

528 unsigned int sse, int sum) { \

529 highbd_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \

530 } \

531 \

532 void vp9_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \

533 const uint8_t *ref, int ref_stride, \

534 unsigned int sse, int sum) { \

535 highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \

536 } \

537 \

538 void vp9_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \

539 const uint8_t *ref, int ref_stride, \

540 unsigned int sse, int sum) { \

541 highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \

542 }

543

544 #define HIGHBD_MSE(W, H) \

545 unsigned int vp9_highbd_mse##W##x##H##_c(const uint8_t *src, \

546 int src_stride, \

547 const uint8_t *ref, \

548 int ref_stride, \

549 unsigned int *sse) { \

550 int sum; \

551 highbd_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \

552 return *sse; \

553 } \

554 \

555 unsigned int vp9_highbd_10_mse##W##x##H##_c(const uint8_t *src, \

556 int src_stride, \

557 const uint8_t *ref, \

558 int ref_stride, \

559 unsigned int *sse) { \

560 int sum; \

561 highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \

562 return *sse; \

563 } \

564 \

565 unsigned int vp9_highbd_12_mse##W##x##H##_c(const uint8_t *src, \

566 int src_stride, \

567 const uint8_t *ref, \

568 int ref_stride, \

569 unsigned int *sse) { \

570 int sum; \

571 highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \

572 return *sse; \

573 }

574

575 HIGHBD_GET_VAR(8)

576 HIGHBD_GET_VAR(16)

577

578 HIGHBD_MSE(16, 16)

579 HIGHBD_MSE(16, 8)

580 HIGHBD_MSE(8, 16)

581 HIGHBD_MSE(8, 8)

582

583 HIGHBD_VAR(4, 4)

584 HIGHBD_SUBPIX_VAR(4, 4)	331 HIGHBD_SUBPIX_VAR(4, 4)

585 HIGHBD_SUBPIX_AVG_VAR(4, 4)	332 HIGHBD_SUBPIX_AVG_VAR(4, 4)

586	333

587 HIGHBD_VAR(4, 8)

588 HIGHBD_SUBPIX_VAR(4, 8)	334 HIGHBD_SUBPIX_VAR(4, 8)

589 HIGHBD_SUBPIX_AVG_VAR(4, 8)	335 HIGHBD_SUBPIX_AVG_VAR(4, 8)

590	336

591 HIGHBD_VAR(8, 4)

592 HIGHBD_SUBPIX_VAR(8, 4)	337 HIGHBD_SUBPIX_VAR(8, 4)

593 HIGHBD_SUBPIX_AVG_VAR(8, 4)	338 HIGHBD_SUBPIX_AVG_VAR(8, 4)

594	339

595 HIGHBD_VAR(8, 8)

596 HIGHBD_SUBPIX_VAR(8, 8)	340 HIGHBD_SUBPIX_VAR(8, 8)

597 HIGHBD_SUBPIX_AVG_VAR(8, 8)	341 HIGHBD_SUBPIX_AVG_VAR(8, 8)

598	342

599 HIGHBD_VAR(8, 16)

600 HIGHBD_SUBPIX_VAR(8, 16)	343 HIGHBD_SUBPIX_VAR(8, 16)

601 HIGHBD_SUBPIX_AVG_VAR(8, 16)	344 HIGHBD_SUBPIX_AVG_VAR(8, 16)

602	345

603 HIGHBD_VAR(16, 8)

604 HIGHBD_SUBPIX_VAR(16, 8)	346 HIGHBD_SUBPIX_VAR(16, 8)

605 HIGHBD_SUBPIX_AVG_VAR(16, 8)	347 HIGHBD_SUBPIX_AVG_VAR(16, 8)

606	348

607 HIGHBD_VAR(16, 16)

608 HIGHBD_SUBPIX_VAR(16, 16)	349 HIGHBD_SUBPIX_VAR(16, 16)

609 HIGHBD_SUBPIX_AVG_VAR(16, 16)	350 HIGHBD_SUBPIX_AVG_VAR(16, 16)

610	351

611 HIGHBD_VAR(16, 32)

612 HIGHBD_SUBPIX_VAR(16, 32)	352 HIGHBD_SUBPIX_VAR(16, 32)

613 HIGHBD_SUBPIX_AVG_VAR(16, 32)	353 HIGHBD_SUBPIX_AVG_VAR(16, 32)

614	354

615 HIGHBD_VAR(32, 16)

616 HIGHBD_SUBPIX_VAR(32, 16)	355 HIGHBD_SUBPIX_VAR(32, 16)

617 HIGHBD_SUBPIX_AVG_VAR(32, 16)	356 HIGHBD_SUBPIX_AVG_VAR(32, 16)

618	357

619 HIGHBD_VAR(32, 32)

620 HIGHBD_SUBPIX_VAR(32, 32)	358 HIGHBD_SUBPIX_VAR(32, 32)

621 HIGHBD_SUBPIX_AVG_VAR(32, 32)	359 HIGHBD_SUBPIX_AVG_VAR(32, 32)

622	360

623 HIGHBD_VAR(32, 64)

624 HIGHBD_SUBPIX_VAR(32, 64)	361 HIGHBD_SUBPIX_VAR(32, 64)

625 HIGHBD_SUBPIX_AVG_VAR(32, 64)	362 HIGHBD_SUBPIX_AVG_VAR(32, 64)

626	363

627 HIGHBD_VAR(64, 32)

628 HIGHBD_SUBPIX_VAR(64, 32)	364 HIGHBD_SUBPIX_VAR(64, 32)

629 HIGHBD_SUBPIX_AVG_VAR(64, 32)	365 HIGHBD_SUBPIX_AVG_VAR(64, 32)

630	366

631 HIGHBD_VAR(64, 64)

632 HIGHBD_SUBPIX_VAR(64, 64)	367 HIGHBD_SUBPIX_VAR(64, 64)

633 HIGHBD_SUBPIX_AVG_VAR(64, 64)	368 HIGHBD_SUBPIX_AVG_VAR(64, 64)

634

635 void vp9_highbd_comp_avg_pred(uint16_t comp_pred, const uint8_t pred8,

636 int width, int height, const uint8_t *ref8,

637 int ref_stride) {

638 int i, j;

639 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);

640 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);

641 for (i = 0; i < height; i++) {

642 for (j = 0; j < width; j++) {

643 const int tmp = pred[j] + ref[j];

644 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);

645 }

646 comp_pred += width;

647 pred += width;

648 ref += ref_stride;

649 }

650 }

651 #endif // CONFIG_VP9_HIGHBITDEPTH	369 #endif // CONFIG_VP9_HIGHBITDEPTH

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/encoder/vp9_variance.h ('k') | source/libvpx/vp9/encoder/vp9_writer.h » ('j') | no next file with comments »