source/libvpx/vp9/common/vp9_convolve.c - Issue 23600008: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/vp9_convolve.c

Issue 23600008: libvpx: Pull from upstream (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 7 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10 #include "vp9/common/vp9_convolve.h"	10 #include "vp9/common/vp9_convolve.h"

11	11

12 #include <assert.h>	12 #include <assert.h>

13	13

14 #include "./vpx_config.h"	14 #include "./vpx_config.h"

15 #include "./vp9_rtcd.h"	15 #include "./vp9_rtcd.h"

16 #include "vp9/common/vp9_common.h"	16 #include "vp9/common/vp9_common.h"

	17 #include "vp9/common/vp9_filter.h"

17 #include "vpx/vpx_integer.h"	18 #include "vpx/vpx_integer.h"

18 #include "vpx_ports/mem.h"	19 #include "vpx_ports/mem.h"

19	20

20 #define VP9_FILTER_WEIGHT 128

21 #define VP9_FILTER_SHIFT 7

22

23 /* Assume a bank of 16 filters to choose from. There are two implementations

24 * for filter wrapping behavior, since we want to be able to pick which filter

25 * to start with. We could either:

26 *

27 * 1) make filter_ a pointer to the base of the filter array, and then add an

28 * additional offset parameter, to choose the starting filter.

29 * 2) use a pointer to 2 periods worth of filters, so that even if the original

30 * phase offset is at 15/16, we'll have valid data to read. The filter

31 * tables become [32][8], and the second half is duplicated.

32 * 3) fix the alignment of the filter tables, so that we know the 0/16 is

33 * always 256 byte aligned.

34 *

35 * Implementations 2 and 3 are likely preferable, as they avoid an extra 2

36 * parameters, and switching between them is trivial, with the

37 * ALIGN_FILTERS_256 macro, below.

38 */

39 #define ALIGN_FILTERS_256 1

40

41 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,	21 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

42 uint8_t *dst, ptrdiff_t dst_stride,	22 uint8_t *dst, ptrdiff_t dst_stride,

43 const int16_t *filter_x0, int x_step_q4,	23 const int16_t *filter_x0, int x_step_q4,

44 const int16_t *filter_y, int y_step_q4,	24 const int16_t *filter_y, int y_step_q4,

45 int w, int h, int taps) {	25 int w, int h, int taps) {

46 int x, y, k, sum;	26 int x, y, k;

47 const int16_t *filter_x_base = filter_x0;

48	27

49 #if ALIGN_FILTERS_256	28 /* NOTE: This assumes that the filter table is 256-byte aligned. */

50 filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);	29 /* TODO(agrange) Modify to make independent of table alignment. */

51 #endif	30 const int16_t *const filter_x_base =

	31 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);

52	32

53 /* Adjust base pointer address for this source line */	33 /* Adjust base pointer address for this source line */

54 src -= taps / 2 - 1;	34 src -= taps / 2 - 1;

55	35

56 for (y = 0; y < h; ++y) {	36 for (y = 0; y < h; ++y) {

57 /* Pointer to filter to use */

58 const int16_t *filter_x = filter_x0;

59

60 /* Initial phase offset */	37 /* Initial phase offset */

61 int x0_q4 = (filter_x - filter_x_base) / taps;	38 int x_q4 = (filter_x0 - filter_x_base) / taps;

62 int x_q4 = x0_q4;

63	39

64 for (x = 0; x < w; ++x) {	40 for (x = 0; x < w; ++x) {

65 /* Per-pixel src offset */	41 /* Per-pixel src offset */

66 int src_x = (x_q4 - x0_q4) >> 4;	42 const int src_x = x_q4 >> SUBPEL_BITS;

	43 int sum = 0;

67	44

68 for (sum = 0, k = 0; k < taps; ++k) {	45 /* Pointer to filter to use */

	46 const int16_t *const filter_x = filter_x_base +

	47 (x_q4 & SUBPEL_MASK) * taps;

	48

	49 for (k = 0; k < taps; ++k)

69 sum += src[src_x + k] * filter_x[k];	50 sum += src[src_x + k] * filter_x[k];

70 }

71 sum += (VP9_FILTER_WEIGHT >> 1);

72 dst[x] = clip_pixel(sum >> VP9_FILTER_SHIFT);

73	51

74 /* Adjust source and filter to use for the next pixel */	52 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

	53

	54 /* Move to the next source pixel */

75 x_q4 += x_step_q4;	55 x_q4 += x_step_q4;

76 filter_x = filter_x_base + (x_q4 & 0xf) * taps;

77 }	56 }

78 src += src_stride;	57 src += src_stride;

79 dst += dst_stride;	58 dst += dst_stride;

80 }	59 }

81 }	60 }

82	61

83 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,	62 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

84 uint8_t *dst, ptrdiff_t dst_stride,	63 uint8_t *dst, ptrdiff_t dst_stride,

85 const int16_t *filter_x0, int x_step_q4,	64 const int16_t *filter_x0, int x_step_q4,

86 const int16_t *filter_y, int y_step_q4,	65 const int16_t *filter_y, int y_step_q4,

87 int w, int h, int taps) {	66 int w, int h, int taps) {

88 int x, y, k, sum;	67 int x, y, k;

89 const int16_t *filter_x_base = filter_x0;

90	68

91 #if ALIGN_FILTERS_256	69 /* NOTE: This assumes that the filter table is 256-byte aligned. */

92 filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);	70 /* TODO(agrange) Modify to make independent of table alignment. */

93 #endif	71 const int16_t *const filter_x_base =

	72 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);

94	73

95 /* Adjust base pointer address for this source line */	74 /* Adjust base pointer address for this source line */

96 src -= taps / 2 - 1;	75 src -= taps / 2 - 1;

97	76

98 for (y = 0; y < h; ++y) {	77 for (y = 0; y < h; ++y) {

99 /* Pointer to filter to use */

100 const int16_t *filter_x = filter_x0;

101

102 /* Initial phase offset */	78 /* Initial phase offset */

103 int x0_q4 = (filter_x - filter_x_base) / taps;	79 int x_q4 = (filter_x0 - filter_x_base) / taps;

104 int x_q4 = x0_q4;

105	80

106 for (x = 0; x < w; ++x) {	81 for (x = 0; x < w; ++x) {

107 /* Per-pixel src offset */	82 /* Per-pixel src offset */

108 int src_x = (x_q4 - x0_q4) >> 4;	83 const int src_x = x_q4 >> SUBPEL_BITS;

	84 int sum = 0;

109	85

110 for (sum = 0, k = 0; k < taps; ++k) {	86 /* Pointer to filter to use */

	87 const int16_t *const filter_x = filter_x_base +

	88 (x_q4 & SUBPEL_MASK) * taps;

	89

	90 for (k = 0; k < taps; ++k)

111 sum += src[src_x + k] * filter_x[k];	91 sum += src[src_x + k] * filter_x[k];

112 }

113 sum += (VP9_FILTER_WEIGHT >> 1);

114 dst[x] = (dst[x] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;

115	92

116 /* Adjust source and filter to use for the next pixel */	93 dst[x] = ROUND_POWER_OF_TWO(dst[x] +

	94 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);

	95

	96 /* Move to the next source pixel */

117 x_q4 += x_step_q4;	97 x_q4 += x_step_q4;

118 filter_x = filter_x_base + (x_q4 & 0xf) * taps;

119 }	98 }

120 src += src_stride;	99 src += src_stride;

121 dst += dst_stride;	100 dst += dst_stride;

122 }	101 }

123 }	102 }

124	103

125 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,	104 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,

126 uint8_t *dst, ptrdiff_t dst_stride,	105 uint8_t *dst, ptrdiff_t dst_stride,

127 const int16_t *filter_x, int x_step_q4,	106 const int16_t *filter_x, int x_step_q4,

128 const int16_t *filter_y0, int y_step_q4,	107 const int16_t *filter_y0, int y_step_q4,

129 int w, int h, int taps) {	108 int w, int h, int taps) {

130 int x, y, k, sum;	109 int x, y, k;

131	110

132 const int16_t *filter_y_base = filter_y0;	111 /* NOTE: This assumes that the filter table is 256-byte aligned. */

133	112 /* TODO(agrange) Modify to make independent of table alignment. */

134 #if ALIGN_FILTERS_256	113 const int16_t *const filter_y_base =

135 filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);	114 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);

136 #endif

137	115

138 /* Adjust base pointer address for this source column */	116 /* Adjust base pointer address for this source column */

139 src -= src_stride * (taps / 2 - 1);	117 src -= src_stride * (taps / 2 - 1);

	118

140 for (x = 0; x < w; ++x) {	119 for (x = 0; x < w; ++x) {

141 /* Pointer to filter to use */

142 const int16_t *filter_y = filter_y0;

143

144 /* Initial phase offset */	120 /* Initial phase offset */

145 int y0_q4 = (filter_y - filter_y_base) / taps;	121 int y_q4 = (filter_y0 - filter_y_base) / taps;

146 int y_q4 = y0_q4;

147	122

148 for (y = 0; y < h; ++y) {	123 for (y = 0; y < h; ++y) {

149 /* Per-pixel src offset */	124 /* Per-pixel src offset */

150 int src_y = (y_q4 - y0_q4) >> 4;	125 const int src_y = y_q4 >> SUBPEL_BITS;

	126 int sum = 0;

151	127

152 for (sum = 0, k = 0; k < taps; ++k) {	128 /* Pointer to filter to use */

	129 const int16_t *const filter_y = filter_y_base +

	130 (y_q4 & SUBPEL_MASK) * taps;

	131

	132 for (k = 0; k < taps; ++k)

153 sum += src[(src_y + k) * src_stride] * filter_y[k];	133 sum += src[(src_y + k) * src_stride] * filter_y[k];

154 }

155 sum += (VP9_FILTER_WEIGHT >> 1);

156 dst[y * dst_stride] = clip_pixel(sum >> VP9_FILTER_SHIFT);

157	134

158 /* Adjust source and filter to use for the next pixel */	135 dst[y * dst_stride] =

	136 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

	137

	138 /* Move to the next source pixel */

159 y_q4 += y_step_q4;	139 y_q4 += y_step_q4;

160 filter_y = filter_y_base + (y_q4 & 0xf) * taps;

161 }	140 }

162 ++src;	141 ++src;

163 ++dst;	142 ++dst;

164 }	143 }

165 }	144 }

166	145

167 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,	146 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

168 uint8_t *dst, ptrdiff_t dst_stride,	147 uint8_t *dst, ptrdiff_t dst_stride,

169 const int16_t *filter_x, int x_step_q4,	148 const int16_t *filter_x, int x_step_q4,

170 const int16_t *filter_y0, int y_step_q4,	149 const int16_t *filter_y0, int y_step_q4,

171 int w, int h, int taps) {	150 int w, int h, int taps) {

172 int x, y, k, sum;	151 int x, y, k;

173	152

174 const int16_t *filter_y_base = filter_y0;	153 /* NOTE: This assumes that the filter table is 256-byte aligned. */

175	154 /* TODO(agrange) Modify to make independent of table alignment. */

176 #if ALIGN_FILTERS_256	155 const int16_t *const filter_y_base =

177 filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);	156 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);

178 #endif

179	157

180 /* Adjust base pointer address for this source column */	158 /* Adjust base pointer address for this source column */

181 src -= src_stride * (taps / 2 - 1);	159 src -= src_stride * (taps / 2 - 1);

	160

182 for (x = 0; x < w; ++x) {	161 for (x = 0; x < w; ++x) {

183 /* Pointer to filter to use */

184 const int16_t *filter_y = filter_y0;

185

186 /* Initial phase offset */	162 /* Initial phase offset */

187 int y0_q4 = (filter_y - filter_y_base) / taps;	163 int y_q4 = (filter_y0 - filter_y_base) / taps;

188 int y_q4 = y0_q4;

189	164

190 for (y = 0; y < h; ++y) {	165 for (y = 0; y < h; ++y) {

191 /* Per-pixel src offset */	166 /* Per-pixel src offset */

192 int src_y = (y_q4 - y0_q4) >> 4;	167 const int src_y = y_q4 >> SUBPEL_BITS;

	168 int sum = 0;

193	169

194 for (sum = 0, k = 0; k < taps; ++k) {	170 /* Pointer to filter to use */

	171 const int16_t *const filter_y = filter_y_base +

	172 (y_q4 & SUBPEL_MASK) * taps;

	173

	174 for (k = 0; k < taps; ++k)

195 sum += src[(src_y + k) * src_stride] * filter_y[k];	175 sum += src[(src_y + k) * src_stride] * filter_y[k];

196 }

197 sum += (VP9_FILTER_WEIGHT >> 1);

198 dst[y * dst_stride] =

199 (dst[y * dst_stride] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;

200	176

201 /* Adjust source and filter to use for the next pixel */	177 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +

	178 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);

	179

	180 /* Move to the next source pixel */

202 y_q4 += y_step_q4;	181 y_q4 += y_step_q4;

203 filter_y = filter_y_base + (y_q4 & 0xf) * taps;

204 }	182 }

205 ++src;	183 ++src;

206 ++dst;	184 ++dst;

207 }	185 }

208 }	186 }

209	187

210 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,	188 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,

211 uint8_t *dst, ptrdiff_t dst_stride,	189 uint8_t *dst, ptrdiff_t dst_stride,

212 const int16_t *filter_x, int x_step_q4,	190 const int16_t *filter_x, int x_step_q4,

213 const int16_t *filter_y, int y_step_q4,	191 const int16_t *filter_y, int y_step_q4,

214 int w, int h, int taps) {	192 int w, int h, int taps) {

215 /* Fixed size intermediate buffer places limits on parameters.	193 /* Fixed size intermediate buffer places limits on parameters.

216 * Maximum intermediate_height is 135, for y_step_q4 == 32,	194 * Maximum intermediate_height is 135, for y_step_q4 == 32,

217 * h == 64, taps == 8.	195 * h == 64, taps == 8.

218 */	196 */

219 uint8_t temp[64 * 135];	197 uint8_t temp[64 * 135];

220 int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1;	198 int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1;

221	199

222 assert(w <= 64);	200 assert(w <= 64);

223 assert(h <= 64);	201 assert(h <= 64);

224 assert(taps <= 8);	202 assert(taps <= 8);

225 assert(y_step_q4 <= 32);	203 assert(y_step_q4 <= 32);

226 assert(x_step_q4 <= 32);	204 assert(x_step_q4 <= 32);

227	205

228 if (intermediate_height < h)	206 if (intermediate_height < h)

229 intermediate_height = h;	207 intermediate_height = h;

230	208

231 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,	209 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64,

232 temp, 64,	210 filter_x, x_step_q4, filter_y, y_step_q4, w,

233 filter_x, x_step_q4, filter_y, y_step_q4,	211 intermediate_height, taps);

234 w, intermediate_height, taps);	212 convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x,

235 convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,	213 x_step_q4, filter_y, y_step_q4, w, h, taps);

236 filter_x, x_step_q4, filter_y, y_step_q4,

237 w, h, taps);

238 }

239

240 static void convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,

241 uint8_t *dst, ptrdiff_t dst_stride,

242 const int16_t *filter_x, int x_step_q4,

243 const int16_t *filter_y, int y_step_q4,

244 int w, int h, int taps) {

245 /* Fixed size intermediate buffer places limits on parameters.

246 * Maximum intermediate_height is 135, for y_step_q4 == 32,

247 * h == 64, taps == 8.

248 */

249 uint8_t temp[64 * 135];

250 int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1;

251

252 assert(w <= 64);

253 assert(h <= 64);

254 assert(taps <= 8);

255 assert(y_step_q4 <= 32);

256 assert(x_step_q4 <= 32);

257

258 if (intermediate_height < h)

259 intermediate_height = h;

260

261 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,

262 temp, 64,

263 filter_x, x_step_q4, filter_y, y_step_q4,

264 w, intermediate_height, taps);

265 convolve_avg_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,

266 filter_x, x_step_q4, filter_y, y_step_q4,

267 w, h, taps);

268 }	214 }

269	215

270 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,	216 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

271 uint8_t *dst, ptrdiff_t dst_stride,	217 uint8_t *dst, ptrdiff_t dst_stride,

272 const int16_t *filter_x, int x_step_q4,	218 const int16_t *filter_x, int x_step_q4,

273 const int16_t *filter_y, int y_step_q4,	219 const int16_t *filter_y, int y_step_q4,

274 int w, int h) {	220 int w, int h) {

275 convolve_horiz_c(src, src_stride, dst, dst_stride,	221 convolve_horiz_c(src, src_stride, dst, dst_stride,

276 filter_x, x_step_q4, filter_y, y_step_q4,	222 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);

277 w, h, 8);

278 }	223 }

279	224

280 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,	225 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

281 uint8_t *dst, ptrdiff_t dst_stride,	226 uint8_t *dst, ptrdiff_t dst_stride,

282 const int16_t *filter_x, int x_step_q4,	227 const int16_t *filter_x, int x_step_q4,

283 const int16_t *filter_y, int y_step_q4,	228 const int16_t *filter_y, int y_step_q4,

284 int w, int h) {	229 int w, int h) {

285 convolve_avg_horiz_c(src, src_stride, dst, dst_stride,	230 convolve_avg_horiz_c(src, src_stride, dst, dst_stride,

286 filter_x, x_step_q4, filter_y, y_step_q4,	231 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);

287 w, h, 8);

288 }	232 }

289	233

290 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,	234 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,

291 uint8_t *dst, ptrdiff_t dst_stride,	235 uint8_t *dst, ptrdiff_t dst_stride,

292 const int16_t *filter_x, int x_step_q4,	236 const int16_t *filter_x, int x_step_q4,

293 const int16_t *filter_y, int y_step_q4,	237 const int16_t *filter_y, int y_step_q4,

294 int w, int h) {	238 int w, int h) {

295 convolve_vert_c(src, src_stride, dst, dst_stride,	239 convolve_vert_c(src, src_stride, dst, dst_stride,

296 filter_x, x_step_q4, filter_y, y_step_q4,	240 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);

297 w, h, 8);

298 }	241 }

299	242

300 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,	243 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

301 uint8_t *dst, ptrdiff_t dst_stride,	244 uint8_t *dst, ptrdiff_t dst_stride,

302 const int16_t *filter_x, int x_step_q4,	245 const int16_t *filter_x, int x_step_q4,

303 const int16_t *filter_y, int y_step_q4,	246 const int16_t *filter_y, int y_step_q4,

304 int w, int h) {	247 int w, int h) {

305 convolve_avg_vert_c(src, src_stride, dst, dst_stride,	248 convolve_avg_vert_c(src, src_stride, dst, dst_stride,

306 filter_x, x_step_q4, filter_y, y_step_q4,	249 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);

307 w, h, 8);

308 }	250 }

309	251

310 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,	252 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,

311 uint8_t *dst, ptrdiff_t dst_stride,	253 uint8_t *dst, ptrdiff_t dst_stride,

312 const int16_t *filter_x, int x_step_q4,	254 const int16_t *filter_x, int x_step_q4,

313 const int16_t *filter_y, int y_step_q4,	255 const int16_t *filter_y, int y_step_q4,

314 int w, int h) {	256 int w, int h) {

315 convolve_c(src, src_stride, dst, dst_stride,	257 convolve_c(src, src_stride, dst, dst_stride,

316 filter_x, x_step_q4, filter_y, y_step_q4,	258 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);

317 w, h, 8);

318 }	259 }

319	260

320 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,	261 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,

321 uint8_t *dst, ptrdiff_t dst_stride,	262 uint8_t *dst, ptrdiff_t dst_stride,

322 const int16_t *filter_x, int x_step_q4,	263 const int16_t *filter_x, int x_step_q4,

323 const int16_t *filter_y, int y_step_q4,	264 const int16_t *filter_y, int y_step_q4,

324 int w, int h) {	265 int w, int h) {

325 /* Fixed size intermediate buffer places limits on parameters. */	266 /* Fixed size intermediate buffer places limits on parameters. */

326 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);	267 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);

327 assert(w <= 64);	268 assert(w <= 64);

328 assert(h <= 64);	269 assert(h <= 64);

329	270

330 vp9_convolve8(src, src_stride,	271 vp9_convolve8(src, src_stride, temp, 64,

331 temp, 64,	272 filter_x, x_step_q4, filter_y, y_step_q4, w, h);

332 filter_x, x_step_q4,	273 vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);

333 filter_y, y_step_q4,

334 w, h);

335 vp9_convolve_avg(temp, 64,

336 dst, dst_stride,

337 NULL, 0, /* These unused parameter should be removed! */

338 NULL, 0, /* These unused parameter should be removed! */

339 w, h);

340 }	274 }

341	275

342 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,	276 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,

343 uint8_t *dst, ptrdiff_t dst_stride,	277 uint8_t *dst, ptrdiff_t dst_stride,

344 const int16_t *filter_x, int filter_x_stride,	278 const int16_t *filter_x, int filter_x_stride,

345 const int16_t *filter_y, int filter_y_stride,	279 const int16_t *filter_y, int filter_y_stride,

346 int w, int h) {	280 int w, int h) {

347 int r;	281 int r;

348	282

349 for (r = h; r > 0; --r) {	283 for (r = h; r > 0; --r) {

350 memcpy(dst, src, w);	284 memcpy(dst, src, w);

351 src += src_stride;	285 src += src_stride;

352 dst += dst_stride;	286 dst += dst_stride;

353 }	287 }

354 }	288 }

355	289

356 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,	290 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,

357 uint8_t *dst, ptrdiff_t dst_stride,	291 uint8_t *dst, ptrdiff_t dst_stride,

358 const int16_t *filter_x, int filter_x_stride,	292 const int16_t *filter_x, int filter_x_stride,

359 const int16_t *filter_y, int filter_y_stride,	293 const int16_t *filter_y, int filter_y_stride,

360 int w, int h) {	294 int w, int h) {

361 int x, y;	295 int x, y;

362	296

363 for (y = 0; y < h; ++y) {	297 for (y = 0; y < h; ++y) {

364 for (x = 0; x < w; ++x) {	298 for (x = 0; x < w; ++x)

365 dst[x] = (dst[x] + src[x] + 1) >> 1;	299 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);

366 }	300

367 src += src_stride;	301 src += src_stride;

368 dst += dst_stride;	302 dst += dst_stride;

369 }	303 }

370 }	304 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/common/vp9_convolve.h ('k') | source/libvpx/vp9/common/vp9_debugmodes.c » ('j') | no next file with comments »