source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c - Issue 1162573005: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

	10 #include "./vp9_rtcd.h"

10 #include "./vpx_config.h"	11 #include "./vpx_config.h"

11	12

12 #include "vp9/encoder/vp9_variance.h"	13 #include "vp9/encoder/vp9_variance.h"

13 #include "vpx_ports/mem.h"	14 #include "vpx_ports/mem.h"

14	15

15 typedef void (get_var_avx2)(const uint8_t src, int src_stride,

16 const uint8_t *ref, int ref_stride,

17 unsigned int sse, int sum);

18

19 void vp9_get16x16var_avx2(const uint8_t *src, int src_stride,

20 const uint8_t *ref, int ref_stride,

21 unsigned int sse, int sum);

22

23 void vp9_get32x32var_avx2(const uint8_t *src, int src_stride,

24 const uint8_t *ref, int ref_stride,

25 unsigned int sse, int sum);

26

27 unsigned int vp9_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,	16 unsigned int vp9_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,

28 int x_offset, int y_offset,	17 int x_offset, int y_offset,

29 const uint8_t *dst, int dst_stride,	18 const uint8_t *dst, int dst_stride,

30 int height,	19 int height,

31 unsigned int *sse);	20 unsigned int *sse);

32	21

33 unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,	22 unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,

34 int src_stride,	23 int src_stride,

35 int x_offset,	24 int x_offset,

36 int y_offset,	25 int y_offset,

37 const uint8_t *dst,	26 const uint8_t *dst,

38 int dst_stride,	27 int dst_stride,

39 const uint8_t *sec,	28 const uint8_t *sec,

40 int sec_stride,	29 int sec_stride,

41 int height,	30 int height,

42 unsigned int *sseptr);	31 unsigned int *sseptr);

43	32

44 static void variance_avx2(const uint8_t *src, int src_stride,

45 const uint8_t *ref, int ref_stride,

46 int w, int h, unsigned int sse, int sum,

47 get_var_avx2 var_fn, int block_size) {

48 int i, j;

49

50 *sse = 0;

51 *sum = 0;

52

53 for (i = 0; i < h; i += 16) {

54 for (j = 0; j < w; j += block_size) {

55 unsigned int sse0;

56 int sum0;

57 var_fn(&src[src_stride * i + j], src_stride,

58 &ref[ref_stride * i + j], ref_stride, &sse0, &sum0);

59 *sse += sse0;

60 *sum += sum0;

61 }

62 }

63 }

64

65

66 unsigned int vp9_variance16x16_avx2(const uint8_t *src, int src_stride,

67 const uint8_t *ref, int ref_stride,

68 unsigned int *sse) {

69 int sum;

70 variance_avx2(src, src_stride, ref, ref_stride, 16, 16,

71 sse, &sum, vp9_get16x16var_avx2, 16);

72 return sse - (((unsigned int)sum sum) >> 8);

73 }

74

75 unsigned int vp9_mse16x16_avx2(const uint8_t *src, int src_stride,

76 const uint8_t *ref, int ref_stride,

77 unsigned int *sse) {

78 int sum;

79 vp9_get16x16var_avx2(src, src_stride, ref, ref_stride, sse, &sum);

80 return *sse;

81 }

82

83 unsigned int vp9_variance32x16_avx2(const uint8_t *src, int src_stride,

84 const uint8_t *ref, int ref_stride,

85 unsigned int *sse) {

86 int sum;

87 variance_avx2(src, src_stride, ref, ref_stride, 32, 16,

88 sse, &sum, vp9_get32x32var_avx2, 32);

89 return sse - (((int64_t)sum sum) >> 9);

90 }

91

92 unsigned int vp9_variance32x32_avx2(const uint8_t *src, int src_stride,

93 const uint8_t *ref, int ref_stride,

94 unsigned int *sse) {

95 int sum;

96 variance_avx2(src, src_stride, ref, ref_stride, 32, 32,

97 sse, &sum, vp9_get32x32var_avx2, 32);

98 return sse - (((int64_t)sum sum) >> 10);

99 }

100

101 unsigned int vp9_variance64x64_avx2(const uint8_t *src, int src_stride,

102 const uint8_t *ref, int ref_stride,

103 unsigned int *sse) {

104 int sum;

105 variance_avx2(src, src_stride, ref, ref_stride, 64, 64,

106 sse, &sum, vp9_get32x32var_avx2, 32);

107 return sse - (((int64_t)sum sum) >> 12);

108 }

109

110 unsigned int vp9_variance64x32_avx2(const uint8_t *src, int src_stride,

111 const uint8_t *ref, int ref_stride,

112 unsigned int *sse) {

113 int sum;

114 variance_avx2(src, src_stride, ref, ref_stride, 64, 32,

115 sse, &sum, vp9_get32x32var_avx2, 32);

116 return sse - (((int64_t)sum sum) >> 11);

117 }

118

119 unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src,	33 unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src,

120 int src_stride,	34 int src_stride,

121 int x_offset,	35 int x_offset,

122 int y_offset,	36 int y_offset,

123 const uint8_t *dst,	37 const uint8_t *dst,

124 int dst_stride,	38 int dst_stride,

125 unsigned int *sse) {	39 unsigned int *sse) {

126 unsigned int sse1;	40 unsigned int sse1;

127 const int se1 = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset,	41 const int se1 = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset,

128 y_offset, dst, dst_stride,	42 y_offset, dst, dst_stride,

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
181 const uint8_t *dst,	95 const uint8_t *dst,

182 int dst_stride,	96 int dst_stride,

183 unsigned int *sse,	97 unsigned int *sse,

184 const uint8_t *sec) {	98 const uint8_t *sec) {

185 // processing 32 element in parallel	99 // processing 32 element in parallel

186 const int se = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset,	100 const int se = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset,

187 y_offset, dst, dst_stride,	101 y_offset, dst, dst_stride,

188 sec, 32, 32, sse);	102 sec, 32, 32, sse);

189 return sse - (((int64_t)se se) >> 10);	103 return sse - (((int64_t)se se) >> 10);

190 }	104 }

OLD	NEW