source/libvpx/vp9/common/vp9_convolve.c - Issue 111463005: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/vp9_convolve.c

Issue 111463005: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 7 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include <assert.h>	11 #include <assert.h>

12	12

13 #include "./vpx_config.h"	13 #include "./vpx_config.h"

14 #include "./vp9_rtcd.h"	14 #include "./vp9_rtcd.h"

15 #include "vp9/common/vp9_common.h"	15 #include "vp9/common/vp9_common.h"

16 #include "vp9/common/vp9_convolve.h"	16 #include "vp9/common/vp9_convolve.h"

17 #include "vp9/common/vp9_filter.h"	17 #include "vp9/common/vp9_filter.h"

18 #include "vpx/vpx_integer.h"	18 #include "vpx/vpx_integer.h"

19 #include "vpx_ports/mem.h"	19 #include "vpx_ports/mem.h"

20	20

21 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,	21 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,

22 uint8_t *dst, ptrdiff_t dst_stride,	22 uint8_t *dst, ptrdiff_t dst_stride,

23 const int16_t *filter_x0, int x_step_q4,	23 const subpel_kernel *x_filters,

24 const int16_t *filter_y, int y_step_q4,	24 int x0_q4, int x_step_q4, int w, int h) {

25 int w, int h, int taps) {	25 int x, y;

26 int x, y, k;	26 src -= SUBPEL_TAPS / 2 - 1;

27

28 /* NOTE: This assumes that the filter table is 256-byte aligned. */

29 /* TODO(agrange) Modify to make independent of table alignment. */

30 const int16_t *const filter_x_base =

31 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);

32

33 /* Adjust base pointer address for this source line */

34 src -= taps / 2 - 1;

35

36 for (y = 0; y < h; ++y) {	27 for (y = 0; y < h; ++y) {

37 /* Initial phase offset */	28 int x_q4 = x0_q4;

38 int x_q4 = (int)(filter_x0 - filter_x_base) / taps;

39

40 for (x = 0; x < w; ++x) {	29 for (x = 0; x < w; ++x) {

41 /* Per-pixel src offset */	30 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

42 const int src_x = x_q4 >> SUBPEL_BITS;	31 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

43 int sum = 0;	32 int k, sum = 0;

44	33 for (k = 0; k < SUBPEL_TAPS; ++k)

45 /* Pointer to filter to use */	34 sum += src_x[k] * x_filter[k];

46 const int16_t *const filter_x = filter_x_base +

47 (x_q4 & SUBPEL_MASK) * taps;

48

49 for (k = 0; k < taps; ++k)

50 sum += src[src_x + k] * filter_x[k];

51

52 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));	35 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

53

54 /* Move to the next source pixel */

55 x_q4 += x_step_q4;	36 x_q4 += x_step_q4;

56 }	37 }

57 src += src_stride;	38 src += src_stride;

58 dst += dst_stride;	39 dst += dst_stride;

59 }	40 }

60 }	41 }

61	42

62 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,	43 static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,

63 uint8_t *dst, ptrdiff_t dst_stride,	44 uint8_t *dst, ptrdiff_t dst_stride,

64 const int16_t *filter_x0, int x_step_q4,	45 const subpel_kernel *x_filters,

65 const int16_t *filter_y, int y_step_q4,	46 int x0_q4, int x_step_q4, int w, int h) {

66 int w, int h, int taps) {	47 int x, y;

67 int x, y, k;	48 src -= SUBPEL_TAPS / 2 - 1;

68

69 /* NOTE: This assumes that the filter table is 256-byte aligned. */

70 /* TODO(agrange) Modify to make independent of table alignment. */

71 const int16_t *const filter_x_base =

72 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);

73

74 /* Adjust base pointer address for this source line */

75 src -= taps / 2 - 1;

76

77 for (y = 0; y < h; ++y) {	49 for (y = 0; y < h; ++y) {

78 /* Initial phase offset */	50 int x_q4 = x0_q4;

79 int x_q4 = (int)(filter_x0 - filter_x_base) / taps;

80

81 for (x = 0; x < w; ++x) {	51 for (x = 0; x < w; ++x) {

82 /* Per-pixel src offset */	52 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

83 const int src_x = x_q4 >> SUBPEL_BITS;	53 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

84 int sum = 0;	54 int k, sum = 0;

85	55 for (k = 0; k < SUBPEL_TAPS; ++k)

86 /* Pointer to filter to use */	56 sum += src_x[k] * x_filter[k];

87 const int16_t *const filter_x = filter_x_base +

88 (x_q4 & SUBPEL_MASK) * taps;

89

90 for (k = 0; k < taps; ++k)

91 sum += src[src_x + k] * filter_x[k];

92

93 dst[x] = ROUND_POWER_OF_TWO(dst[x] +	57 dst[x] = ROUND_POWER_OF_TWO(dst[x] +

94 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);	58 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);

95

96 /* Move to the next source pixel */

97 x_q4 += x_step_q4;	59 x_q4 += x_step_q4;

98 }	60 }

99 src += src_stride;	61 src += src_stride;

100 dst += dst_stride;	62 dst += dst_stride;

101 }	63 }

102 }	64 }

103	65

104 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,	66 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,

105 uint8_t *dst, ptrdiff_t dst_stride,	67 uint8_t *dst, ptrdiff_t dst_stride,

106 const int16_t *filter_x, int x_step_q4,	68 const subpel_kernel *y_filters,

107 const int16_t *filter_y0, int y_step_q4,	69 int y0_q4, int y_step_q4, int w, int h) {

108 int w, int h, int taps) {	70 int x, y;

109 int x, y, k;	71 src -= src_stride * (SUBPEL_TAPS / 2 - 1);

110

111 /* NOTE: This assumes that the filter table is 256-byte aligned. */

112 /* TODO(agrange) Modify to make independent of table alignment. */

113 const int16_t *const filter_y_base =

114 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);

115

116 /* Adjust base pointer address for this source column */

117 src -= src_stride * (taps / 2 - 1);

118	72

119 for (x = 0; x < w; ++x) {	73 for (x = 0; x < w; ++x) {

120 /* Initial phase offset */	74 int y_q4 = y0_q4;

121 int y_q4 = (int)(filter_y0 - filter_y_base) / taps;

122

123 for (y = 0; y < h; ++y) {	75 for (y = 0; y < h; ++y) {

124 /* Per-pixel src offset */	76 const unsigned char src_y = &src[(y_q4 >> SUBPEL_BITS) src_stride];

125 const int src_y = y_q4 >> SUBPEL_BITS;	77 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

126 int sum = 0;	78 int k, sum = 0;

127	79 for (k = 0; k < SUBPEL_TAPS; ++k)

128 /* Pointer to filter to use */	80 sum += src_y[k * src_stride] * y_filter[k];

129 const int16_t *const filter_y = filter_y_base +	81 dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

130 (y_q4 & SUBPEL_MASK) * taps;

131

132 for (k = 0; k < taps; ++k)

133 sum += src[(src_y + k) * src_stride] * filter_y[k];

134

135 dst[y * dst_stride] =

136 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

137

138 /* Move to the next source pixel */

139 y_q4 += y_step_q4;	82 y_q4 += y_step_q4;

140 }	83 }

141 ++src;	84 ++src;

142 ++dst;	85 ++dst;

143 }	86 }

144 }	87 }

145	88

146 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,	89 static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,

147 uint8_t *dst, ptrdiff_t dst_stride,	90 uint8_t *dst, ptrdiff_t dst_stride,

148 const int16_t *filter_x, int x_step_q4,	91 const subpel_kernel *y_filters,

149 const int16_t *filter_y0, int y_step_q4,	92 int y0_q4, int y_step_q4, int w, int h) {

150 int w, int h, int taps) {	93 int x, y;

151 int x, y, k;	94 src -= src_stride * (SUBPEL_TAPS / 2 - 1);

152

153 /* NOTE: This assumes that the filter table is 256-byte aligned. */

154 /* TODO(agrange) Modify to make independent of table alignment. */

155 const int16_t *const filter_y_base =

156 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);

157

158 /* Adjust base pointer address for this source column */

159 src -= src_stride * (taps / 2 - 1);

160	95

161 for (x = 0; x < w; ++x) {	96 for (x = 0; x < w; ++x) {

162 /* Initial phase offset */	97 int y_q4 = y0_q4;

163 int y_q4 = (int)(filter_y0 - filter_y_base) / taps;

164

165 for (y = 0; y < h; ++y) {	98 for (y = 0; y < h; ++y) {

166 /* Per-pixel src offset */	99 const unsigned char src_y = &src[(y_q4 >> SUBPEL_BITS) src_stride];

167 const int src_y = y_q4 >> SUBPEL_BITS;	100 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

168 int sum = 0;	101 int k, sum = 0;

169	102 for (k = 0; k < SUBPEL_TAPS; ++k)

170 /* Pointer to filter to use */	103 sum += src_y[k * src_stride] * y_filter[k];

171 const int16_t *const filter_y = filter_y_base +

172 (y_q4 & SUBPEL_MASK) * taps;

173

174 for (k = 0; k < taps; ++k)

175 sum += src[(src_y + k) * src_stride] * filter_y[k];

176

177 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +	104 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +

178 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);	105 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);

179

180 /* Move to the next source pixel */

181 y_q4 += y_step_q4;	106 y_q4 += y_step_q4;

182 }	107 }

183 ++src;	108 ++src;

184 ++dst;	109 ++dst;

185 }	110 }

186 }	111 }

187	112

188 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,	113 static void convolve(const uint8_t *src, ptrdiff_t src_stride,

189 uint8_t *dst, ptrdiff_t dst_stride,	114 uint8_t *dst, ptrdiff_t dst_stride,

190 const int16_t *filter_x, int x_step_q4,	115 const subpel_kernel *const x_filters,

191 const int16_t *filter_y, int y_step_q4,	116 int x0_q4, int x_step_q4,

192 int w, int h, int taps) {	117 const subpel_kernel *const y_filters,

193 /* Fixed size intermediate buffer places limits on parameters.	118 int y0_q4, int y_step_q4,

194 * Maximum intermediate_height is 324, for y_step_q4 == 80,	119 int w, int h) {

195 * h == 64, taps == 8.	120 // Fixed size intermediate buffer places limits on parameters.

196 * y_step_q4 of 80 allows for 1/10 scale for 5 layer svc	121 // Maximum intermediate_height is 324, for y_step_q4 == 80,

197 */	122 // h == 64, taps == 8.

	123 // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc

198 uint8_t temp[64 * 324];	124 uint8_t temp[64 * 324];

199 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + taps;	125 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS;

200	126

201 assert(w <= 64);	127 assert(w <= 64);

202 assert(h <= 64);	128 assert(h <= 64);

203 assert(taps <= 8);

204 assert(y_step_q4 <= 80);	129 assert(y_step_q4 <= 80);

205 assert(x_step_q4 <= 80);	130 assert(x_step_q4 <= 80);

206	131

207 if (intermediate_height < h)	132 if (intermediate_height < h)

208 intermediate_height = h;	133 intermediate_height = h;

209	134

210 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64,	135 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,

211 filter_x, x_step_q4, filter_y, y_step_q4, w,	136 x_filters, x0_q4, x_step_q4, w, intermediate_height);

212 intermediate_height, taps);	137 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,

213 convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x,	138 y_filters, y0_q4, y_step_q4, w, h);

214 x_step_q4, filter_y, y_step_q4, w, h, taps);	139 }

	140

	141 static const subpel_kernel get_filter_base(const int16_t filter) {

	142 // NOTE: This assumes that the filter table is 256-byte aligned.

	143 // TODO(agrange) Modify to make independent of table alignment.

	144 return (const subpel_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));

	145 }

	146

	147 static int get_filter_offset(const int16_t f, const subpel_kernel base) {

	148 return (const subpel_kernel *)(intptr_t)f - base;

215 }	149 }

216	150

217 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,	151 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

218 uint8_t *dst, ptrdiff_t dst_stride,	152 uint8_t *dst, ptrdiff_t dst_stride,

219 const int16_t *filter_x, int x_step_q4,	153 const int16_t *filter_x, int x_step_q4,

220 const int16_t *filter_y, int y_step_q4,	154 const int16_t *filter_y, int y_step_q4,

221 int w, int h) {	155 int w, int h) {

222 convolve_horiz_c(src, src_stride, dst, dst_stride,	156 const subpel_kernel *const filters_x = get_filter_base(filter_x);

223 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);	157 const int x0_q4 = get_filter_offset(filter_x, filters_x);

	158

	159 convolve_horiz(src, src_stride, dst, dst_stride, filters_x,

	160 x0_q4, x_step_q4, w, h);

224 }	161 }

225	162

226 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,	163 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

227 uint8_t *dst, ptrdiff_t dst_stride,	164 uint8_t *dst, ptrdiff_t dst_stride,

228 const int16_t *filter_x, int x_step_q4,	165 const int16_t *filter_x, int x_step_q4,

229 const int16_t *filter_y, int y_step_q4,	166 const int16_t *filter_y, int y_step_q4,

230 int w, int h) {	167 int w, int h) {

231 convolve_avg_horiz_c(src, src_stride, dst, dst_stride,	168 const subpel_kernel *const filters_x = get_filter_base(filter_x);

232 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);	169 const int x0_q4 = get_filter_offset(filter_x, filters_x);

	170

	171 convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,

	172 x0_q4, x_step_q4, w, h);

233 }	173 }

234	174

235 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,	175 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,

236 uint8_t *dst, ptrdiff_t dst_stride,	176 uint8_t *dst, ptrdiff_t dst_stride,

237 const int16_t *filter_x, int x_step_q4,	177 const int16_t *filter_x, int x_step_q4,

238 const int16_t *filter_y, int y_step_q4,	178 const int16_t *filter_y, int y_step_q4,

239 int w, int h) {	179 int w, int h) {

240 convolve_vert_c(src, src_stride, dst, dst_stride,	180 const subpel_kernel *const filters_y = get_filter_base(filter_y);

241 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);	181 const int y0_q4 = get_filter_offset(filter_y, filters_y);

	182 convolve_vert(src, src_stride, dst, dst_stride, filters_y,

	183 y0_q4, y_step_q4, w, h);

242 }	184 }

243	185

244 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,	186 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

245 uint8_t *dst, ptrdiff_t dst_stride,	187 uint8_t *dst, ptrdiff_t dst_stride,

246 const int16_t *filter_x, int x_step_q4,	188 const int16_t *filter_x, int x_step_q4,

247 const int16_t *filter_y, int y_step_q4,	189 const int16_t *filter_y, int y_step_q4,

248 int w, int h) {	190 int w, int h) {

249 convolve_avg_vert_c(src, src_stride, dst, dst_stride,	191 const subpel_kernel *const filters_y = get_filter_base(filter_y);

250 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);	192 const int y0_q4 = get_filter_offset(filter_y, filters_y);

	193 convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,

	194 y0_q4, y_step_q4, w, h);

251 }	195 }

252	196

253 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,	197 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,

254 uint8_t *dst, ptrdiff_t dst_stride,	198 uint8_t *dst, ptrdiff_t dst_stride,

255 const int16_t *filter_x, int x_step_q4,	199 const int16_t *filter_x, int x_step_q4,

256 const int16_t *filter_y, int y_step_q4,	200 const int16_t *filter_y, int y_step_q4,

257 int w, int h) {	201 int w, int h) {

258 convolve_c(src, src_stride, dst, dst_stride,	202 const subpel_kernel *const filters_x = get_filter_base(filter_x);

259 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);	203 const int x0_q4 = get_filter_offset(filter_x, filters_x);

	204

	205 const subpel_kernel *const filters_y = get_filter_base(filter_y);

	206 const int y0_q4 = get_filter_offset(filter_y, filters_y);

	207

	208 convolve(src, src_stride, dst, dst_stride,

	209 filters_x, x0_q4, x_step_q4,

	210 filters_y, y0_q4, y_step_q4, w, h);

260 }	211 }

261	212

262 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,	213 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,

263 uint8_t *dst, ptrdiff_t dst_stride,	214 uint8_t *dst, ptrdiff_t dst_stride,

264 const int16_t *filter_x, int x_step_q4,	215 const int16_t *filter_x, int x_step_q4,

265 const int16_t *filter_y, int y_step_q4,	216 const int16_t *filter_y, int y_step_q4,

266 int w, int h) {	217 int w, int h) {

267 /* Fixed size intermediate buffer places limits on parameters. */	218 /* Fixed size intermediate buffer places limits on parameters. */

268 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);	219 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);

269 assert(w <= 64);	220 assert(w <= 64);

270 assert(h <= 64);	221 assert(h <= 64);

271	222

272 vp9_convolve8(src, src_stride, temp, 64,	223 vp9_convolve8_c(src, src_stride, temp, 64,

273 filter_x, x_step_q4, filter_y, y_step_q4, w, h);	224 filter_x, x_step_q4, filter_y, y_step_q4, w, h);

274 vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);	225 vp9_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);

275 }	226 }

276	227

277 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,	228 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,

278 uint8_t *dst, ptrdiff_t dst_stride,	229 uint8_t *dst, ptrdiff_t dst_stride,

279 const int16_t *filter_x, int filter_x_stride,	230 const int16_t *filter_x, int filter_x_stride,

280 const int16_t *filter_y, int filter_y_stride,	231 const int16_t *filter_y, int filter_y_stride,

281 int w, int h) {	232 int w, int h) {

282 int r;	233 int r;

283	234

284 for (r = h; r > 0; --r) {	235 for (r = h; r > 0; --r) {

(...skipping 11 matching lines...) Expand all Loading...
296 int x, y;	247 int x, y;

297	248

298 for (y = 0; y < h; ++y) {	249 for (y = 0; y < h; ++y) {

299 for (x = 0; x < w; ++x)	250 for (x = 0; x < w; ++x)

300 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);	251 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);

301	252

302 src += src_stride;	253 src += src_stride;

303 dst += dst_stride;	254 dst += dst_stride;

304 }	255 }

305 }	256 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/common/vp9_common_data.c ('k') | source/libvpx/vp9/common/vp9_default_coef_probs.h » ('j') | no next file with comments »