source/libvpx/vp9/common/vp9_filter.c - Issue 11974002: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/vp9_filter.c

Issue 11974002: libvpx: Pull from upstream (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11	11

12 #include <stdlib.h>	12 #include <stdlib.h>

13 #include "vp9/common/vp9_filter.h"	13 #include "vp9/common/vp9_filter.h"

14 #include "vpx_ports/mem.h"	14 #include "vpx_ports/mem.h"

15 #include "vp9_rtcd.h"	15 #include "vp9_rtcd.h"

	16 #include "vp9/common/vp9_common.h"

16	17

17 DECLARE_ALIGNED(16, const short, vp9_bilinear_filters[SUBPEL_SHIFTS][2]) = {	18 DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][2]) = {

18 { 128, 0 },	19 { 128, 0 },

19 { 120, 8 },	20 { 120, 8 },

20 { 112, 16 },	21 { 112, 16 },

21 { 104, 24 },	22 { 104, 24 },

22 { 96, 32 },	23 { 96, 32 },

23 { 88, 40 },	24 { 88, 40 },

24 { 80, 48 },	25 { 80, 48 },

25 { 72, 56 },	26 { 72, 56 },

26 { 64, 64 },	27 { 64, 64 },

27 { 56, 72 },	28 { 56, 72 },

28 { 48, 80 },	29 { 48, 80 },

29 { 40, 88 },	30 { 40, 88 },

30 { 32, 96 },	31 { 32, 96 },

31 { 24, 104 },	32 { 24, 104 },

32 { 16, 112 },	33 { 16, 112 },

33 { 8, 120 }	34 { 8, 120 }

34 };	35 };

35	36

36 #define FILTER_ALPHA 0	37 #define FILTER_ALPHA 0

37 #define FILTER_ALPHA_SHARP 1	38 #define FILTER_ALPHA_SHARP 1

38 DECLARE_ALIGNED(16, const short, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = {	39 DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = {

39 #if FILTER_ALPHA == 0	40 #if FILTER_ALPHA == 0

40 /* Lagrangian interpolation filter */	41 /* Lagrangian interpolation filter */

41 { 0, 0, 0, 128, 0, 0, 0, 0},	42 { 0, 0, 0, 128, 0, 0, 0, 0},

42 { 0, 1, -5, 126, 8, -3, 1, 0},	43 { 0, 1, -5, 126, 8, -3, 1, 0},

43 { -1, 3, -10, 122, 18, -6, 2, 0},	44 { -1, 3, -10, 122, 18, -6, 2, 0},

44 { -1, 4, -13, 118, 27, -9, 3, -1},	45 { -1, 4, -13, 118, 27, -9, 3, -1},

45 { -1, 4, -16, 112, 37, -11, 4, -1},	46 { -1, 4, -16, 112, 37, -11, 4, -1},

46 { -1, 5, -18, 105, 48, -14, 4, -1},	47 { -1, 5, -18, 105, 48, -14, 4, -1},

47 { -1, 5, -19, 97, 58, -16, 5, -1},	48 { -1, 5, -19, 97, 58, -16, 5, -1},

48 { -1, 6, -19, 88, 68, -18, 5, -1},	49 { -1, 6, -19, 88, 68, -18, 5, -1},

(...skipping 25 matching lines...) Expand all Loading...
74 { -1, 5, -17, 68, 88, -19, 5, -1},	75 { -1, 5, -17, 68, 88, -19, 5, -1},

75 { -1, 5, -16, 58, 96, -18, 5, -1},	76 { -1, 5, -16, 58, 96, -18, 5, -1},

76 { -1, 4, -14, 48, 104, -17, 5, -1},	77 { -1, 4, -14, 48, 104, -17, 5, -1},

77 { 0, 3, -11, 37, 112, -16, 4, -1},	78 { 0, 3, -11, 37, 112, -16, 4, -1},

78 { 0, 3, -9, 27, 118, -13, 3, -1},	79 { 0, 3, -9, 27, 118, -13, 3, -1},

79 { 0, 2, -6, 18, 122, -10, 2, 0},	80 { 0, 2, -6, 18, 122, -10, 2, 0},

80 { 0, 1, -3, 8, 126, -5, 1, 0}	81 { 0, 1, -3, 8, 126, -5, 1, 0}

81 #endif /* FILTER_ALPHA */	82 #endif /* FILTER_ALPHA */

82 };	83 };

83	84

84 DECLARE_ALIGNED(16, const short, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = {	85 DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = {

85 #if FILTER_ALPHA_SHARP == 1	86 #if FILTER_ALPHA_SHARP == 1

86 /* dct based filter */	87 /* dct based filter */

87 {0, 0, 0, 128, 0, 0, 0, 0},	88 {0, 0, 0, 128, 0, 0, 0, 0},

88 {-1, 3, -7, 127, 8, -3, 1, 0},	89 {-1, 3, -7, 127, 8, -3, 1, 0},

89 {-2, 5, -13, 125, 17, -6, 3, -1},	90 {-2, 5, -13, 125, 17, -6, 3, -1},

90 {-3, 7, -17, 121, 27, -10, 5, -2},	91 {-3, 7, -17, 121, 27, -10, 5, -2},

91 {-4, 9, -20, 115, 37, -13, 6, -2},	92 {-4, 9, -20, 115, 37, -13, 6, -2},

92 {-4, 10, -23, 108, 48, -16, 8, -3},	93 {-4, 10, -23, 108, 48, -16, 8, -3},

93 {-4, 10, -24, 100, 59, -19, 9, -3},	94 {-4, 10, -24, 100, 59, -19, 9, -3},

94 {-4, 11, -24, 90, 70, -21, 10, -4},	95 {-4, 11, -24, 90, 70, -21, 10, -4},

(...skipping 19 matching lines...) Expand all Loading...
114 {-3, 9, -21, 70, 90, -23, 9, -3},	115 {-3, 9, -21, 70, 90, -23, 9, -3},

115 {-3, 8, -19, 59, 99, -22, 9, -3},	116 {-3, 8, -19, 59, 99, -22, 9, -3},

116 {-2, 7, -16, 49, 106, -21, 8, -3},	117 {-2, 7, -16, 49, 106, -21, 8, -3},

117 {-2, 6, -13, 38, 113, -19, 7, -2},	118 {-2, 6, -13, 38, 113, -19, 7, -2},

118 {-2, 5, -10, 28, 119, -16, 6, -2},	119 {-2, 5, -10, 28, 119, -16, 6, -2},

119 {-1, 3, -7, 18, 123, -11, 4, -1},	120 {-1, 3, -7, 18, 123, -11, 4, -1},

120 {-1, 2, -3, 9, 126, -6, 2, -1}	121 {-1, 2, -3, 9, 126, -6, 2, -1}

121 #endif /* FILTER_ALPHA_SHARP */	122 #endif /* FILTER_ALPHA_SHARP */

122 };	123 };

123	124

124 DECLARE_ALIGNED(16, const short, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][6]) = {	125 DECLARE_ALIGNED(16, const int16_t,

	126 vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8]) = {

	127 /* 8-tap lowpass filter */

	128 /* Hamming window */

	129 {-1, -7, 32, 80, 32, -7, -1, 0},

	130 {-1, -8, 28, 80, 37, -7, -2, 1},

	131 { 0, -8, 24, 79, 41, -7, -2, 1},

	132 { 0, -8, 20, 78, 45, -5, -3, 1},

	133 { 0, -8, 16, 76, 50, -4, -3, 1},

	134 { 0, -7, 13, 74, 54, -3, -4, 1},

	135 { 1, -7, 9, 71, 58, -1, -4, 1},

	136 { 1, -6, 6, 68, 62, 1, -5, 1},

	137 { 1, -6, 4, 65, 65, 4, -6, 1},

	138 { 1, -5, 1, 62, 68, 6, -6, 1},

	139 { 1, -4, -1, 58, 71, 9, -7, 1},

	140 { 1, -4, -3, 54, 74, 13, -7, 0},

	141 { 1, -3, -4, 50, 76, 16, -8, 0},

	142 { 1, -3, -5, 45, 78, 20, -8, 0},

	143 { 1, -2, -7, 41, 79, 24, -8, 0},

	144 { 1, -2, -7, 37, 80, 28, -8, -1}

	145 };

	146

	147 DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][6]) = {

125 {0, 0, 128, 0, 0, 0},	148 {0, 0, 128, 0, 0, 0},

126 {1, -5, 125, 8, -2, 1},	149 {1, -5, 125, 8, -2, 1},

127 {1, -8, 122, 17, -5, 1},	150 {1, -8, 122, 17, -5, 1},

128 {2, -11, 116, 27, -8, 2},	151 {2, -11, 116, 27, -8, 2},

129 {3, -14, 110, 37, -10, 2},	152 {3, -14, 110, 37, -10, 2},

130 {3, -15, 103, 47, -12, 2},	153 {3, -15, 103, 47, -12, 2},

131 {3, -16, 95, 57, -14, 3},	154 {3, -16, 95, 57, -14, 3},

132 {3, -16, 86, 67, -15, 3},	155 {3, -16, 86, 67, -15, 3},

133 {3, -16, 77, 77, -16, 3},	156 {3, -16, 77, 77, -16, 3},

134 {3, -15, 67, 86, -16, 3},	157 {3, -15, 67, 86, -16, 3},

135 {3, -14, 57, 95, -16, 3},	158 {3, -14, 57, 95, -16, 3},

136 {2, -12, 47, 103, -15, 3},	159 {2, -12, 47, 103, -15, 3},

137 {2, -10, 37, 110, -14, 3},	160 {2, -10, 37, 110, -14, 3},

138 {2, -8, 27, 116, -11, 2},	161 {2, -8, 27, 116, -11, 2},

139 {1, -5, 17, 122, -8, 1},	162 {1, -5, 17, 122, -8, 1},

140 {1, -2, 8, 125, -5, 1}	163 {1, -2, 8, 125, -5, 1}

141 };	164 };

142	165

143 static void filter_block2d_first_pass_6(unsigned char *src_ptr,	166 static void filter_block2d_first_pass_6(uint8_t *src_ptr,

144 int *output_ptr,	167 int *output_ptr,

145 unsigned int src_pixels_per_line,	168 unsigned int src_pixels_per_line,

146 unsigned int pixel_step,	169 unsigned int pixel_step,

147 unsigned int output_height,	170 unsigned int output_height,

148 unsigned int output_width,	171 unsigned int output_width,

149 const short *vp9_filter) {	172 const int16_t *vp9_filter) {

150 unsigned int i, j;	173 unsigned int i, j;

151 int Temp;	174 int temp;

152	175

153 for (i = 0; i < output_height; i++) {	176 for (i = 0; i < output_height; i++) {

154 for (j = 0; j < output_width; j++) {	177 for (j = 0; j < output_width; j++) {

155 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) +	178 temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) +

156 ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) +	179 ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) +

157 ((int)src_ptr[0] * vp9_filter[2]) +	180 ((int)src_ptr[0] * vp9_filter[2]) +

158 ((int)src_ptr[pixel_step] * vp9_filter[3]) +	181 ((int)src_ptr[pixel_step] * vp9_filter[3]) +

159 ((int)src_ptr[2 * pixel_step] * vp9_filter[4]) +	182 ((int)src_ptr[2 * pixel_step] * vp9_filter[4]) +

160 ((int)src_ptr[3 * pixel_step] * vp9_filter[5]) +	183 ((int)src_ptr[3 * pixel_step] * vp9_filter[5]) +

161 (VP9_FILTER_WEIGHT >> 1); /* Rounding */	184 (VP9_FILTER_WEIGHT >> 1); /* Rounding */

162	185

163 /* Normalize back to 0-255 */	186 /* Normalize back to 0-255 */

164 Temp = Temp >> VP9_FILTER_SHIFT;	187 output_ptr[j] = clip_pixel(temp >> VP9_FILTER_SHIFT);

165

166 if (Temp < 0)

167 Temp = 0;

168 else if (Temp > 255)

169 Temp = 255;

170

171 output_ptr[j] = Temp;

172 src_ptr++;	188 src_ptr++;

173 }	189 }

174	190

175 /* Next row... */	191 /* Next row... */

176 src_ptr += src_pixels_per_line - output_width;	192 src_ptr += src_pixels_per_line - output_width;

177 output_ptr += output_width;	193 output_ptr += output_width;

178 }	194 }

179 }	195 }

180	196

181 static void filter_block2d_second_pass_6(int *src_ptr,	197 static void filter_block2d_second_pass_6(int *src_ptr,

182 unsigned char *output_ptr,	198 uint8_t *output_ptr,

183 int output_pitch,	199 int output_pitch,

184 unsigned int src_pixels_per_line,	200 unsigned int src_pixels_per_line,

185 unsigned int pixel_step,	201 unsigned int pixel_step,

186 unsigned int output_height,	202 unsigned int output_height,

187 unsigned int output_width,	203 unsigned int output_width,

188 const short *vp9_filter) {	204 const int16_t *vp9_filter) {

189 unsigned int i, j;	205 unsigned int i, j;

190 int Temp;	206 int temp;

191	207

192 for (i = 0; i < output_height; i++) {	208 for (i = 0; i < output_height; i++) {

193 for (j = 0; j < output_width; j++) {	209 for (j = 0; j < output_width; j++) {

194 /* Apply filter */	210 /* Apply filter */

195 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) +	211 temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) +

196 ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) +	212 ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) +

197 ((int)src_ptr[0] * vp9_filter[2]) +	213 ((int)src_ptr[0] * vp9_filter[2]) +

198 ((int)src_ptr[pixel_step] * vp9_filter[3]) +	214 ((int)src_ptr[pixel_step] * vp9_filter[3]) +

199 ((int)src_ptr[2 * pixel_step] * vp9_filter[4]) +	215 ((int)src_ptr[2 * pixel_step] * vp9_filter[4]) +

200 ((int)src_ptr[3 * pixel_step] * vp9_filter[5]) +	216 ((int)src_ptr[3 * pixel_step] * vp9_filter[5]) +

201 (VP9_FILTER_WEIGHT >> 1); /* Rounding */	217 (VP9_FILTER_WEIGHT >> 1); /* Rounding */

202	218

203 /* Normalize back to 0-255 */	219 /* Normalize back to 0-255 */

204 Temp = Temp >> VP9_FILTER_SHIFT;	220 output_ptr[j] = clip_pixel(temp >> VP9_FILTER_SHIFT);

205

206 if (Temp < 0)

207 Temp = 0;

208 else if (Temp > 255)

209 Temp = 255;

210

211 output_ptr[j] = (unsigned char)Temp;

212 src_ptr++;	221 src_ptr++;

213 }	222 }

214	223

215 /* Start next row */	224 /* Start next row */

216 src_ptr += src_pixels_per_line - output_width;	225 src_ptr += src_pixels_per_line - output_width;

217 output_ptr += output_pitch;	226 output_ptr += output_pitch;

218 }	227 }

219 }	228 }

220	229

221 /*	230 /*

222 * The only functional difference between filter_block2d_second_pass()	231 * The only functional difference between filter_block2d_second_pass()

223 * and this function is that filter_block2d_second_pass() does a sixtap	232 * and this function is that filter_block2d_second_pass() does a sixtap

224 * filter on the input and stores it in the output. This function	233 * filter on the input and stores it in the output. This function

225 * (filter_block2d_second_pass_avg()) does a sixtap filter on the input,	234 * (filter_block2d_second_pass_avg()) does a sixtap filter on the input,

226 * and then averages that with the content already present in the output	235 * and then averages that with the content already present in the output

227 * ((filter_result + dest + 1) >> 1) and stores that in the output.	236 * ((filter_result + dest + 1) >> 1) and stores that in the output.

228 */	237 */

229 static void filter_block2d_second_pass_avg_6(int *src_ptr,	238 static void filter_block2d_second_pass_avg_6(int *src_ptr,

230 unsigned char *output_ptr,	239 uint8_t *output_ptr,

231 int output_pitch,	240 int output_pitch,

232 unsigned int src_pixels_per_line,	241 unsigned int src_pixels_per_line,

233 unsigned int pixel_step,	242 unsigned int pixel_step,

234 unsigned int output_height,	243 unsigned int output_height,

235 unsigned int output_width,	244 unsigned int output_width,

236 const short *vp9_filter) {	245 const int16_t *vp9_filter) {

237 unsigned int i, j;	246 unsigned int i, j;

238 int Temp;	247 int temp;

239	248

240 for (i = 0; i < output_height; i++) {	249 for (i = 0; i < output_height; i++) {

241 for (j = 0; j < output_width; j++) {	250 for (j = 0; j < output_width; j++) {

242 /* Apply filter */	251 /* Apply filter */

243 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) +	252 temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) +

244 ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) +	253 ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) +

245 ((int)src_ptr[0] * vp9_filter[2]) +	254 ((int)src_ptr[0] * vp9_filter[2]) +

246 ((int)src_ptr[pixel_step] * vp9_filter[3]) +	255 ((int)src_ptr[pixel_step] * vp9_filter[3]) +

247 ((int)src_ptr[2 * pixel_step] * vp9_filter[4]) +	256 ((int)src_ptr[2 * pixel_step] * vp9_filter[4]) +

248 ((int)src_ptr[3 * pixel_step] * vp9_filter[5]) +	257 ((int)src_ptr[3 * pixel_step] * vp9_filter[5]) +

249 (VP9_FILTER_WEIGHT >> 1); /* Rounding */	258 (VP9_FILTER_WEIGHT >> 1); /* Rounding */

250	259

251 /* Normalize back to 0-255 */	260 /* Normalize back to 0-255 */

252 Temp = Temp >> VP9_FILTER_SHIFT;	261 output_ptr[j] = (clip_pixel(temp >> VP9_FILTER_SHIFT) +

253	262 output_ptr[j] + 1) >> 1;

254 if (Temp < 0)

255 Temp = 0;

256 else if (Temp > 255)

257 Temp = 255;

258

259 output_ptr[j] = (unsigned char)((output_ptr[j] + Temp + 1) >> 1);

260 src_ptr++;	263 src_ptr++;

261 }	264 }

262	265

263 /* Start next row */	266 /* Start next row */

264 src_ptr += src_pixels_per_line - output_width;	267 src_ptr += src_pixels_per_line - output_width;

265 output_ptr += output_pitch;	268 output_ptr += output_pitch;

266 }	269 }

267 }	270 }

268	271

269 #define Interp_Extend 3	272 #define Interp_Extend 3

270 static void filter_block2d_6(unsigned char *src_ptr,	273 static void filter_block2d_6(uint8_t *src_ptr,

271 unsigned char *output_ptr,	274 uint8_t *output_ptr,

272 unsigned int src_pixels_per_line,	275 unsigned int src_pixels_per_line,

273 int output_pitch,	276 int output_pitch,

274 const short *HFilter,	277 const int16_t *HFilter,

275 const short *VFilter) {	278 const int16_t *VFilter) {

276 int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer used in filtering */	279 int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer */

277	280

278 /* First filter 1-D horizontally... */	281 /* First filter 1-D horizontally... */

279 filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_li ne), FData, src_pixels_per_line, 1,	282 filter_block2d_first_pass_6(

280 3 + Interp_Extend * 2, 4, HFilter);	283 src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,

	284 src_pixels_per_line, 1, 3 + Interp_Extend * 2, 4, HFilter);

281	285

282 /* then filter verticaly... */	286 /* then filter vertically... */

283 filter_block2d_second_pass_6(FData + 4 * (Interp_Extend - 1), output_ptr, outp ut_pitch, 4, 4, 4, 4, VFilter);	287 filter_block2d_second_pass_6(FData + 4 * (Interp_Extend - 1), output_ptr,

	288 output_pitch, 4, 4, 4, 4, VFilter);

284 }	289 }

285	290

286	291

287 void vp9_sixtap_predict_c(unsigned char *src_ptr,	292 void vp9_sixtap_predict4x4_c(uint8_t *src_ptr,

288 int src_pixels_per_line,	293 int src_pixels_per_line,

289 int xoffset,	294 int xoffset,

290 int yoffset,	295 int yoffset,

291 unsigned char *dst_ptr,	296 uint8_t *dst_ptr,

292 int dst_pitch) {	297 int dst_pitch) {

293 const short *HFilter;	298 const int16_t *HFilter;

294 const short *VFilter;	299 const int16_t *VFilter;

295	300

296 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */	301 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */

297 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */	302 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */

298	303

299 filter_block2d_6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VF ilter);	304 filter_block2d_6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,

	305 VFilter);

300 }	306 }

301	307

302 /*	308 /*

303 * The difference between filter_block2d_6() and filter_block2d_avg_6 is	309 * The difference between filter_block2d_6() and filter_block2d_avg_6 is

304 * that filter_block2d_6() does a 6-tap filter and stores it in the output	310 * that filter_block2d_6() does a 6-tap filter and stores it in the output

305 * buffer, whereas filter_block2d_avg_6() does the same 6-tap filter, and	311 * buffer, whereas filter_block2d_avg_6() does the same 6-tap filter, and

306 * then averages that with the content already present in the output	312 * then averages that with the content already present in the output

307 * ((filter_result + dest + 1) >> 1) and stores that in the output.	313 * ((filter_result + dest + 1) >> 1) and stores that in the output.

308 */	314 */

309 static void filter_block2d_avg_6(unsigned char *src_ptr,	315 static void filter_block2d_avg_6(uint8_t *src_ptr,

310 unsigned char *output_ptr,	316 uint8_t *output_ptr,

311 unsigned int src_pixels_per_line,	317 unsigned int src_pixels_per_line,

312 int output_pitch,	318 int output_pitch,

313 const short *HFilter,	319 const int16_t *HFilter,

314 const short *VFilter) {	320 const int16_t *VFilter) {

315 int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer used in filtering */	321 int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer */

316	322

317 /* First filter 1-D horizontally... */	323 /* First filter 1-D horizontally... */

318 filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_li ne),	324 filter_block2d_first_pass_6(

319 FData, src_pixels_per_line, 1,	325 src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,

320 3 + Interp_Extend * 2, 4, HFilter);	326 src_pixels_per_line, 1, 3 + Interp_Extend * 2, 4, HFilter);

321	327

322 /* then filter verticaly... */	328 /* then filter vertically... */

323 filter_block2d_second_pass_avg_6(FData + 4 * (Interp_Extend - 1), output_ptr,	329 filter_block2d_second_pass_avg_6(FData + 4 * (Interp_Extend - 1), output_ptr,

324 output_pitch, 4, 4, 4, 4, VFilter);	330 output_pitch, 4, 4, 4, 4, VFilter);

325 }	331 }

326	332

327 void vp9_sixtap_predict_avg_c	333 void vp9_sixtap_predict_avg4x4_c(uint8_t *src_ptr,

328 (	334 int src_pixels_per_line,

329 unsigned char *src_ptr,	335 int xoffset,

330 int src_pixels_per_line,	336 int yoffset,

331 int xoffset,	337 uint8_t *dst_ptr,

332 int yoffset,	338 int dst_pitch) {

333 unsigned char *dst_ptr,	339 const int16_t *HFilter;

334 int dst_pitch	340 const int16_t *VFilter;

335 ) {

336 const short *HFilter;

337 const short *VFilter;

338	341

339 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */	342 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */

340 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */	343 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */

341	344

342 filter_block2d_avg_6(src_ptr, dst_ptr, src_pixels_per_line,	345 filter_block2d_avg_6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch,

343 dst_pitch, HFilter, VFilter);	346 HFilter, VFilter);

344 }	347 }

345	348

346 void vp9_sixtap_predict8x8_c	349 void vp9_sixtap_predict8x8_c(uint8_t *src_ptr,

347 (	350 int src_pixels_per_line,

348 unsigned char *src_ptr,	351 int xoffset,

349 int src_pixels_per_line,	352 int yoffset,

350 int xoffset,	353 uint8_t *dst_ptr,

351 int yoffset,	354 int dst_pitch) {

352 unsigned char *dst_ptr,	355 const int16_t *HFilter;

353 int dst_pitch	356 const int16_t *VFilter;

354 ) {	357 int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer */

355 const short *HFilter;

356 const short *VFilter;

357 // int FData[(7+Interp_Extend2)16]; /* Temp data buffer used in filtering */

358 int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer used in filtering */

359	358

360 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */	359 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */

361 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */	360 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */

362	361

363 /* First filter 1-D horizontally... */	362 /* First filter 1-D horizontally... */

364 filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_li ne), FData, src_pixels_per_line, 1,	363 filter_block2d_first_pass_6(

365 7 + Interp_Extend * 2, 8, HFilter);	364 src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,

	365 src_pixels_per_line, 1, 7 + Interp_Extend * 2, 8, HFilter);

366	366

367	367 /* then filter vertically... */

368 /* then filter verticaly... */	368 filter_block2d_second_pass_6(FData + 8 * (Interp_Extend - 1), dst_ptr,

369 filter_block2d_second_pass_6(FData + 8 * (Interp_Extend - 1), dst_ptr, dst_pit ch, 8, 8, 8, 8, VFilter);	369 dst_pitch, 8, 8, 8, 8, VFilter);

370	370

371 }	371 }

372	372

373 void vp9_sixtap_predict_avg8x8_c	373 void vp9_sixtap_predict_avg8x8_c(uint8_t *src_ptr,

374 (	374 int src_pixels_per_line,

375 unsigned char *src_ptr,	375 int xoffset,

376 int src_pixels_per_line,	376 int yoffset,

377 int xoffset,	377 uint8_t *dst_ptr,

378 int yoffset,	378 int dst_pitch) {

379 unsigned char *dst_ptr,	379 const int16_t *HFilter;

380 int dst_pitch	380 const int16_t *VFilter;

381 ) {	381 int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer */

382 const short *HFilter;

383 const short *VFilter;

384 // int FData[(7+Interp_Extend2)16]; /* Temp data buffer used in filtering */

385 int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer used in filtering */

386	382

387 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */	383 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */

388 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */	384 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */

389	385

390 /* First filter 1-D horizontally... */	386 /* First filter 1-D horizontally... */

391 filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_li ne), FData, src_pixels_per_line, 1,	387 filter_block2d_first_pass_6(

392 7 + Interp_Extend * 2, 8, HFilter);	388 src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,

	389 src_pixels_per_line, 1, 7 + Interp_Extend * 2, 8, HFilter);

393	390

394 /* then filter verticaly... */	391 /* then filter vertically... */

395 filter_block2d_second_pass_avg_6(FData + 8 * (Interp_Extend - 1), dst_ptr, dst _pitch, 8, 8, 8, 8, VFilter);	392 filter_block2d_second_pass_avg_6(FData + 8 * (Interp_Extend - 1), dst_ptr,

	393 dst_pitch, 8, 8, 8, 8, VFilter);

396 }	394 }

397	395

398 void vp9_sixtap_predict8x4_c	396 void vp9_sixtap_predict8x4_c(uint8_t *src_ptr,

399 (	397 int src_pixels_per_line,

400 unsigned char *src_ptr,	398 int xoffset,

401 int src_pixels_per_line,	399 int yoffset,

402 int xoffset,	400 uint8_t *dst_ptr,

403 int yoffset,	401 int dst_pitch) {

404 unsigned char *dst_ptr,	402 const int16_t *HFilter;

405 int dst_pitch	403 const int16_t *VFilter;

406 ) {	404 int FData[(3 + Interp_Extend * 2) * 8]; /* Temp data buffer */

407 const short *HFilter;

408 const short *VFilter;

409 // int FData[(7+Interp_Extend2)16]; /* Temp data buffer used in filtering */

410 int FData[(3 + Interp_Extend * 2) * 8]; /* Temp data buffer used in filtering */

411	405

412 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */	406 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */

413 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */	407 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */

414	408

415 /* First filter 1-D horizontally... */	409 /* First filter 1-D horizontally... */

416 filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_li ne), FData, src_pixels_per_line, 1,	410 filter_block2d_first_pass_6(

417 3 + Interp_Extend * 2, 8, HFilter);	411 src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,

	412 src_pixels_per_line, 1, 3 + Interp_Extend * 2, 8, HFilter);

418	413

419	414 /* then filter vertically... */

420 /* then filter verticaly... */	415 filter_block2d_second_pass_6(FData + 8 * (Interp_Extend - 1), dst_ptr,

421 filter_block2d_second_pass_6(FData + 8 * (Interp_Extend - 1), dst_ptr, dst_pit ch, 8, 8, 4, 8, VFilter);	416 dst_pitch, 8, 8, 4, 8, VFilter);

422

423 }	417 }

424	418

425 void vp9_sixtap_predict16x16_c	419 void vp9_sixtap_predict16x16_c(uint8_t *src_ptr,

426 (	420 int src_pixels_per_line,

427 unsigned char *src_ptr,	421 int xoffset,

428 int src_pixels_per_line,	422 int yoffset,

429 int xoffset,	423 uint8_t *dst_ptr,

430 int yoffset,	424 int dst_pitch) {

431 unsigned char *dst_ptr,	425 const int16_t *HFilter;

432 int dst_pitch	426 const int16_t *VFilter;

433 ) {	427 int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer */

434 const short *HFilter;

435 const short *VFilter;

436 // int FData[(15+Interp_Extend2)24]; /* Temp data buffer used in filtering */

437 int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer used in filterin g */

438

439	428

440 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */	429 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */

441 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */	430 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */

442	431

443 /* First filter 1-D horizontally... */	432 /* First filter 1-D horizontally... */

444 filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_li ne), FData, src_pixels_per_line, 1,	433 filter_block2d_first_pass_6(

445 15 + Interp_Extend * 2, 16, HFilter);	434 src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,

	435 src_pixels_per_line, 1, 15 + Interp_Extend * 2, 16, HFilter);

446	436

447 /* then filter verticaly... */	437 /* then filter vertically... */

448 filter_block2d_second_pass_6(FData + 16 * (Interp_Extend - 1), dst_ptr, dst_pi tch, 16, 16, 16, 16, VFilter);	438 filter_block2d_second_pass_6(FData + 16 * (Interp_Extend - 1), dst_ptr,

449	439 dst_pitch, 16, 16, 16, 16, VFilter);

450 }	440 }

451	441

452 void vp9_sixtap_predict_avg16x16_c	442 void vp9_sixtap_predict_avg16x16_c(uint8_t *src_ptr,

453 (	443 int src_pixels_per_line,

454 unsigned char *src_ptr,	444 int xoffset,

455 int src_pixels_per_line,	445 int yoffset,

456 int xoffset,	446 uint8_t *dst_ptr,

457 int yoffset,	447 int dst_pitch) {

458 unsigned char *dst_ptr,	448 const int16_t *HFilter;

459 int dst_pitch	449 const int16_t *VFilter;

460 ) {	450 int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer */

461 const short *HFilter;

462 const short *VFilter;

463 // int FData[(15+Interp_Extend2)24]; /* Temp data buffer used in filtering */

464 int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer used in filterin g */

465	451

466 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */	452 HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */

467 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */	453 VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */

468	454

469 /* First filter 1-D horizontally... */	455 /* First filter 1-D horizontally... */

470 filter_block2d_first_pass_6(src_ptr - ((Interp_Extend - 1) * src_pixels_per_li ne), FData,	456 filter_block2d_first_pass_6(

471 src_pixels_per_line, 1, 15 + Interp_Extend * 2, 16 , HFilter);	457 src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,

	458 src_pixels_per_line, 1, 15 + Interp_Extend * 2, 16, HFilter);

472	459

473 /* then filter verticaly... */	460 /* then filter vertically... */

474 filter_block2d_second_pass_avg_6(FData + 16 * (Interp_Extend - 1), dst_ptr, ds t_pitch,	461 filter_block2d_second_pass_avg_6(FData + 16 * (Interp_Extend - 1), dst_ptr,

475 16, 16, 16, 16, VFilter);	462 dst_pitch, 16, 16, 16, 16, VFilter);

476 }	463 }

477	464

478 typedef enum {	465 typedef enum {

479 VPX_FILTER_4x4 = 0,	466 VPX_FILTER_4x4 = 0,

480 VPX_FILTER_8x8 = 1,	467 VPX_FILTER_8x8 = 1,

481 VPX_FILTER_8x4 = 2,	468 VPX_FILTER_8x4 = 2,

482 VPX_FILTER_16x16 = 3,	469 VPX_FILTER_16x16 = 3,

483 } filter_size_t;	470 } filter_size_t;

484	471

485 static const unsigned int filter_size_to_wh[][2] = {	472 static const unsigned int filter_size_to_wh[][2] = {

486 {4, 4},	473 {4, 4},

487 {8, 8},	474 {8, 8},

488 {8, 4},	475 {8, 4},

489 {16,16},	476 {16,16},

490 };	477 };

491	478

492 static const unsigned int filter_max_height = 16;	479 static void filter_block2d_8_c(const uint8_t *src_ptr,

493 static const unsigned int filter_max_width = 16;	480 const unsigned int src_stride,

494	481 const int16_t *HFilter,

495 static void filter_block2d_8_c(const unsigned char *src_ptr,	482 const int16_t *VFilter,

496 const unsigned int src_stride,

497 const short *HFilter,

498 const short *VFilter,

499 const filter_size_t filter_size,	483 const filter_size_t filter_size,

500 unsigned char *dst_ptr,	484 uint8_t *dst_ptr,

501 unsigned int dst_stride) {	485 unsigned int dst_stride) {

502 const unsigned int output_width = filter_size_to_wh[filter_size][0];	486 const unsigned int output_width = filter_size_to_wh[filter_size][0];

503 const unsigned int output_height = filter_size_to_wh[filter_size][1];	487 const unsigned int output_height = filter_size_to_wh[filter_size][1];

504	488

505 // Between passes, we use an intermediate buffer whose height is extended to	489 // Between passes, we use an intermediate buffer whose height is extended to

506 // have enough horizontally filtered values as input for the vertical pass.	490 // have enough horizontally filtered values as input for the vertical pass.

507 // This buffer is allocated to be big enough for the largest block type we	491 // This buffer is allocated to be big enough for the largest block type we

508 // support.	492 // support.

509 const int kInterp_Extend = 4;	493 const int kInterp_Extend = 4;

510 const unsigned int intermediate_height =	494 const unsigned int intermediate_height =

511 (kInterp_Extend - 1) + output_height + kInterp_Extend;	495 (kInterp_Extend - 1) + output_height + kInterp_Extend;

512 const unsigned int max_intermediate_height =	496

513 (kInterp_Extend - 1) + filter_max_height + kInterp_Extend;	497 /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,

514 #ifdef _MSC_VER	498 * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height

515 // MSVC does not support C99 style declaration	499 * + kInterp_Extend

516 unsigned char intermediate_buffer[23 * 16];	500 * = 3 + 16 + 4

517 #else	501 * = 23

518 unsigned char intermediate_buffer[max_intermediate_height * filter_max_width];	502 * and filter_max_width = 16

519 #endif	503 */

	504 uint8_t intermediate_buffer[23 * 16];

520 const int intermediate_next_stride = 1 - intermediate_height * output_width;	505 const int intermediate_next_stride = 1 - intermediate_height * output_width;

521	506

522 // Horizontal pass (src -> transposed intermediate).	507 // Horizontal pass (src -> transposed intermediate).

523 {	508 {

524 unsigned char *output_ptr = intermediate_buffer;	509 uint8_t *output_ptr = intermediate_buffer;

525 const int src_next_row_stride = src_stride - output_width;	510 const int src_next_row_stride = src_stride - output_width;

526 unsigned int i, j;	511 unsigned int i, j;

527 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);	512 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);

528 for (i = 0; i < intermediate_height; i++) {	513 for (i = 0; i < intermediate_height; i++) {

529 for (j = 0; j < output_width; j++) {	514 for (j = 0; j < output_width; j++) {

530 // Apply filter...	515 // Apply filter...

531 int temp = ((int)src_ptr[0] * HFilter[0]) +	516 int temp = ((int)src_ptr[0] * HFilter[0]) +

532 ((int)src_ptr[1] * HFilter[1]) +	517 ((int)src_ptr[1] * HFilter[1]) +

533 ((int)src_ptr[2] * HFilter[2]) +	518 ((int)src_ptr[2] * HFilter[2]) +

534 ((int)src_ptr[3] * HFilter[3]) +	519 ((int)src_ptr[3] * HFilter[3]) +

535 ((int)src_ptr[4] * HFilter[4]) +	520 ((int)src_ptr[4] * HFilter[4]) +

536 ((int)src_ptr[5] * HFilter[5]) +	521 ((int)src_ptr[5] * HFilter[5]) +

537 ((int)src_ptr[6] * HFilter[6]) +	522 ((int)src_ptr[6] * HFilter[6]) +

538 ((int)src_ptr[7] * HFilter[7]) +	523 ((int)src_ptr[7] * HFilter[7]) +

539 (VP9_FILTER_WEIGHT >> 1); // Rounding	524 (VP9_FILTER_WEIGHT >> 1); // Rounding

540	525

541 // Normalize back to 0-255...	526 // Normalize back to 0-255...

542 temp >>= VP9_FILTER_SHIFT;	527 *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);

543 if (temp < 0) {

544 temp = 0;

545 } else if (temp > 255) {

546 temp = 255;

547 }

548 src_ptr++;	528 src_ptr++;

549 *output_ptr = temp;

550 output_ptr += intermediate_height;	529 output_ptr += intermediate_height;

551 }	530 }

552 src_ptr += src_next_row_stride;	531 src_ptr += src_next_row_stride;

553 output_ptr += intermediate_next_stride;	532 output_ptr += intermediate_next_stride;

554 }	533 }

555 }	534 }

556	535

557 // Vertical pass (transposed intermediate -> dst).	536 // Vertical pass (transposed intermediate -> dst).

558 {	537 {

559 unsigned char *src_ptr = intermediate_buffer;	538 uint8_t *src_ptr = intermediate_buffer;

560 const int dst_next_row_stride = dst_stride - output_width;	539 const int dst_next_row_stride = dst_stride - output_width;

561 unsigned int i, j;	540 unsigned int i, j;

562 for (i = 0; i < output_height; i++) {	541 for (i = 0; i < output_height; i++) {

563 for (j = 0; j < output_width; j++) {	542 for (j = 0; j < output_width; j++) {

564 // Apply filter...	543 // Apply filter...

565 int temp = ((int)src_ptr[0] * VFilter[0]) +	544 int temp = ((int)src_ptr[0] * VFilter[0]) +

566 ((int)src_ptr[1] * VFilter[1]) +	545 ((int)src_ptr[1] * VFilter[1]) +

567 ((int)src_ptr[2] * VFilter[2]) +	546 ((int)src_ptr[2] * VFilter[2]) +

568 ((int)src_ptr[3] * VFilter[3]) +	547 ((int)src_ptr[3] * VFilter[3]) +

569 ((int)src_ptr[4] * VFilter[4]) +	548 ((int)src_ptr[4] * VFilter[4]) +

570 ((int)src_ptr[5] * VFilter[5]) +	549 ((int)src_ptr[5] * VFilter[5]) +

571 ((int)src_ptr[6] * VFilter[6]) +	550 ((int)src_ptr[6] * VFilter[6]) +

572 ((int)src_ptr[7] * VFilter[7]) +	551 ((int)src_ptr[7] * VFilter[7]) +

573 (VP9_FILTER_WEIGHT >> 1); // Rounding	552 (VP9_FILTER_WEIGHT >> 1); // Rounding

574	553

575 // Normalize back to 0-255...	554 // Normalize back to 0-255...

576 temp >>= VP9_FILTER_SHIFT;	555 *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);

577 if (temp < 0) {

578 temp = 0;

579 } else if (temp > 255) {

580 temp = 255;

581 }

582

583 src_ptr += intermediate_height;	556 src_ptr += intermediate_height;

584 *dst_ptr++ = (unsigned char)temp;

585 }	557 }

586 src_ptr += intermediate_next_stride;	558 src_ptr += intermediate_next_stride;

587 dst_ptr += dst_next_row_stride;	559 dst_ptr += dst_next_row_stride;

588 }	560 }

589 }	561 }

590 }	562 }

591	563

592 void vp9_filter_block2d_4x4_8_c(const unsigned char *src_ptr,	564 void vp9_filter_block2d_4x4_8_c(const uint8_t *src_ptr,

593 const unsigned int src_stride,	565 const unsigned int src_stride,

594 const short *HFilter_aligned16,	566 const int16_t *HFilter_aligned16,

595 const short *VFilter_aligned16,	567 const int16_t *VFilter_aligned16,

596 unsigned char *dst_ptr,	568 uint8_t *dst_ptr,

597 unsigned int dst_stride) {	569 unsigned int dst_stride) {

598 filter_block2d_8_c(src_ptr, src_stride,	570 filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16,

599 HFilter_aligned16, VFilter_aligned16,

600 VPX_FILTER_4x4, dst_ptr, dst_stride);	571 VPX_FILTER_4x4, dst_ptr, dst_stride);

601 }	572 }

602	573

603 void vp9_filter_block2d_8x4_8_c(const unsigned char *src_ptr,	574 void vp9_filter_block2d_8x4_8_c(const uint8_t *src_ptr,

604 const unsigned int src_stride,	575 const unsigned int src_stride,

605 const short *HFilter_aligned16,	576 const int16_t *HFilter_aligned16,

606 const short *VFilter_aligned16,	577 const int16_t *VFilter_aligned16,

607 unsigned char *dst_ptr,	578 uint8_t *dst_ptr,

608 unsigned int dst_stride) {	579 unsigned int dst_stride) {

609 filter_block2d_8_c(src_ptr, src_stride,	580 filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16,

610 HFilter_aligned16, VFilter_aligned16,

611 VPX_FILTER_8x4, dst_ptr, dst_stride);	581 VPX_FILTER_8x4, dst_ptr, dst_stride);

612 }	582 }

613	583

614 void vp9_filter_block2d_8x8_8_c(const unsigned char *src_ptr,	584 void vp9_filter_block2d_8x8_8_c(const uint8_t *src_ptr,

615 const unsigned int src_stride,	585 const unsigned int src_stride,

616 const short *HFilter_aligned16,	586 const int16_t *HFilter_aligned16,

617 const short *VFilter_aligned16,	587 const int16_t *VFilter_aligned16,

618 unsigned char *dst_ptr,	588 uint8_t *dst_ptr,

619 unsigned int dst_stride) {	589 unsigned int dst_stride) {

620 filter_block2d_8_c(src_ptr, src_stride,	590 filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16,

621 HFilter_aligned16, VFilter_aligned16,

622 VPX_FILTER_8x8, dst_ptr, dst_stride);	591 VPX_FILTER_8x8, dst_ptr, dst_stride);

623 }	592 }

624	593

625 void vp9_filter_block2d_16x16_8_c(const unsigned char *src_ptr,	594 void vp9_filter_block2d_16x16_8_c(const uint8_t *src_ptr,

626 const unsigned int src_stride,	595 const unsigned int src_stride,

627 const short *HFilter_aligned16,	596 const int16_t *HFilter_aligned16,

628 const short *VFilter_aligned16,	597 const int16_t *VFilter_aligned16,

629 unsigned char *dst_ptr,	598 uint8_t *dst_ptr,

630 unsigned int dst_stride) {	599 unsigned int dst_stride) {

631 filter_block2d_8_c(src_ptr, src_stride,	600 filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16,

632 HFilter_aligned16, VFilter_aligned16,

633 VPX_FILTER_16x16, dst_ptr, dst_stride);	601 VPX_FILTER_16x16, dst_ptr, dst_stride);

634 }	602 }

635	603

636 static void block2d_average_c(unsigned char *src,	604 static void block2d_average_c(uint8_t *src,

637 unsigned int src_stride,	605 unsigned int src_stride,

638 unsigned char *output_ptr,	606 uint8_t *output_ptr,

639 unsigned int output_stride,	607 unsigned int output_stride,

640 const filter_size_t filter_size) {	608 const filter_size_t filter_size) {

641 const unsigned int output_width = filter_size_to_wh[filter_size][0];	609 const unsigned int output_width = filter_size_to_wh[filter_size][0];

642 const unsigned int output_height = filter_size_to_wh[filter_size][1];	610 const unsigned int output_height = filter_size_to_wh[filter_size][1];

643	611

644 unsigned int i, j;	612 unsigned int i, j;

645 for (i = 0; i < output_height; i++) {	613 for (i = 0; i < output_height; i++) {

646 for (j = 0; j < output_width; j++) {	614 for (j = 0; j < output_width; j++) {

647 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;	615 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;

648 }	616 }

649 output_ptr += output_stride;	617 output_ptr += output_stride;

650 }	618 }

651 }	619 }

652	620

653 #define block2d_average block2d_average_c	621 #define block2d_average block2d_average_c

654	622

655 void vp9_eighttap_predict_c(unsigned char *src_ptr,	623 void vp9_eighttap_predict4x4_c(uint8_t *src_ptr,

656 int src_pixels_per_line,	624 int src_pixels_per_line,

657 int xoffset,	625 int xoffset,

658 int yoffset,	626 int yoffset,

659 unsigned char *dst_ptr,	627 uint8_t *dst_ptr,

660 int dst_pitch) {	628 int dst_pitch) {

661 const short *HFilter;	629 const int16_t *HFilter;

662 const short *VFilter;	630 const int16_t *VFilter;

663	631

664 HFilter = vp9_sub_pel_filters_8[xoffset];	632 HFilter = vp9_sub_pel_filters_8[xoffset];

665 VFilter = vp9_sub_pel_filters_8[yoffset];	633 VFilter = vp9_sub_pel_filters_8[yoffset];

666	634

	635 vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

	636 dst_ptr, dst_pitch);

	637 }

	638

	639 void vp9_eighttap_predict_avg4x4_c(uint8_t *src_ptr,

	640 int src_pixels_per_line,

	641 int xoffset,

	642 int yoffset,

	643 uint8_t *dst_ptr,

	644 int dst_pitch) {

	645 const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset];

	646 const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset];

	647 uint8_t tmp[4 * 4];

	648

	649 vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp,

	650 4);

	651 block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4);

	652 }

	653

	654 void vp9_eighttap_predict4x4_sharp_c(uint8_t *src_ptr,

	655 int src_pixels_per_line,

	656 int xoffset,

	657 int yoffset,

	658 uint8_t *dst_ptr,

	659 int dst_pitch) {

	660 const int16_t *HFilter;

	661 const int16_t *VFilter;

	662

	663 HFilter = vp9_sub_pel_filters_8s[xoffset];

	664 VFilter = vp9_sub_pel_filters_8s[yoffset];

	665

	666 vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

	667 dst_ptr, dst_pitch);

	668 }

	669

	670 void vp9_eighttap_predict4x4_smooth_c(uint8_t *src_ptr,

	671 int src_pixels_per_line,

	672 int xoffset,

	673 int yoffset,

	674 uint8_t *dst_ptr,

	675 int dst_pitch) {

	676 const int16_t *HFilter;

	677 const int16_t *VFilter;

	678

	679 HFilter = vp9_sub_pel_filters_8lp[xoffset];

	680 VFilter = vp9_sub_pel_filters_8lp[yoffset];

	681

667 vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line,	682 vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line,

668 HFilter, VFilter,	683 HFilter, VFilter,

669 dst_ptr, dst_pitch);	684 dst_ptr, dst_pitch);

670 }	685 }

671	686

672 void vp9_eighttap_predict_avg4x4_c(unsigned char *src_ptr,	687 void vp9_eighttap_predict_avg4x4_sharp_c(uint8_t *src_ptr,

673 int src_pixels_per_line,	688 int src_pixels_per_line,

674 int xoffset,	689 int xoffset,

675 int yoffset,	690 int yoffset,

676 unsigned char *dst_ptr,	691 uint8_t *dst_ptr,

	692 int dst_pitch) {

	693 const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset];

	694 const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset];

	695 uint8_t tmp[4 * 4];

	696

	697 vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp,

	698 4);

	699 block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4);

	700 }

	701

	702 void vp9_eighttap_predict_avg4x4_smooth_c(uint8_t *src_ptr,

	703 int src_pixels_per_line,

	704 int xoffset,

	705 int yoffset,

	706 uint8_t *dst_ptr,

	707 int dst_pitch) {

	708 const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset];

	709 const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset];

	710 uint8_t tmp[4 * 4];

	711

	712 vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp,

	713 4);

	714 block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4);

	715 }

	716

	717

	718 void vp9_eighttap_predict8x8_c(uint8_t *src_ptr,

	719 int src_pixels_per_line,

	720 int xoffset,

	721 int yoffset,

	722 uint8_t *dst_ptr,

	723 int dst_pitch) {

	724 const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset];

	725 const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset];

	726

	727 vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

	728 dst_ptr, dst_pitch);

	729 }

	730

	731 void vp9_eighttap_predict8x8_sharp_c(uint8_t *src_ptr,

	732 int src_pixels_per_line,

	733 int xoffset,

	734 int yoffset,

	735 uint8_t *dst_ptr,

	736 int dst_pitch) {

	737 const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset];

	738 const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset];

	739

	740 vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

	741 dst_ptr, dst_pitch);

	742 }

	743

	744 void vp9_eighttap_predict8x8_smooth_c(uint8_t *src_ptr,

	745 int src_pixels_per_line,

	746 int xoffset,

	747 int yoffset,

	748 uint8_t *dst_ptr,

	749 int dst_pitch) {

	750 const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset];

	751 const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset];

	752

	753 vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

	754 dst_ptr, dst_pitch);

	755 }

	756

	757 void vp9_eighttap_predict_avg8x8_c(uint8_t *src_ptr,

	758 int src_pixels_per_line,

	759 int xoffset,

	760 int yoffset,

	761 uint8_t *dst_ptr,

677 int dst_pitch) {	762 int dst_pitch) {

678 const short *HFilter = vp9_sub_pel_filters_8[xoffset];	763 uint8_t tmp[8 * 8];

679 const short *VFilter = vp9_sub_pel_filters_8[yoffset];	764 const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset];

680 unsigned char tmp[4 * 4];	765 const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset];

681	766

682 vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line,	767 vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp,

683 HFilter, VFilter,	768 8);

684 tmp, 4);	769 block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8);

685 block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4);	770 }

686 }	771

687	772 void vp9_eighttap_predict_avg8x8_sharp_c(uint8_t *src_ptr,

688 void vp9_eighttap_predict_sharp_c(unsigned char *src_ptr,	773 int src_pixels_per_line,

689 int src_pixels_per_line,	774 int xoffset,

690 int xoffset,	775 int yoffset,

691 int yoffset,	776 uint8_t *dst_ptr,

692 unsigned char *dst_ptr,

693 int dst_pitch) {

694 const short *HFilter;

695 const short *VFilter;

696

697 HFilter = vp9_sub_pel_filters_8s[xoffset];

698 VFilter = vp9_sub_pel_filters_8s[yoffset];

699

700 vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line,

701 HFilter, VFilter,

702 dst_ptr, dst_pitch);

703 }

704

705 void vp9_eighttap_predict_avg4x4_sharp_c(unsigned char *src_ptr,

706 int src_pixels_per_line,

707 int xoffset,

708 int yoffset,

709 unsigned char *dst_ptr,

710 int dst_pitch) {	777 int dst_pitch) {

711 const short *HFilter = vp9_sub_pel_filters_8s[xoffset];	778 uint8_t tmp[8 * 8];

712 const short *VFilter = vp9_sub_pel_filters_8s[yoffset];	779 const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset];

713 unsigned char tmp[4 * 4];	780 const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset];

714	781

715 vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line,	782 vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp,

716 HFilter, VFilter,	783 8);

717 tmp, 4);

718 block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4);

719 }

720

721 void vp9_eighttap_predict8x8_c(unsigned char *src_ptr,

722 int src_pixels_per_line,

723 int xoffset,

724 int yoffset,

725 unsigned char *dst_ptr,

726 int dst_pitch) {

727 const short *HFilter = vp9_sub_pel_filters_8[xoffset];

728 const short *VFilter = vp9_sub_pel_filters_8[yoffset];

729

730 vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line,

731 HFilter, VFilter,

732 dst_ptr, dst_pitch);

733 }

734

735 void vp9_eighttap_predict8x8_sharp_c(unsigned char *src_ptr,

736 int src_pixels_per_line,

737 int xoffset,

738 int yoffset,

739 unsigned char *dst_ptr,

740 int dst_pitch) {

741 const short *HFilter = vp9_sub_pel_filters_8s[xoffset];

742 const short *VFilter = vp9_sub_pel_filters_8s[yoffset];

743

744 vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line,

745 HFilter, VFilter,

746 dst_ptr, dst_pitch);

747 }

748

749 void vp9_eighttap_predict_avg8x8_c(unsigned char *src_ptr,

750 int src_pixels_per_line,

751 int xoffset,

752 int yoffset,

753 unsigned char *dst_ptr,

754 int dst_pitch) {

755 unsigned char tmp[8 * 8];

756 const short *HFilter = vp9_sub_pel_filters_8[xoffset];

757 const short *VFilter = vp9_sub_pel_filters_8[yoffset];

758

759 vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line,

760 HFilter, VFilter,

761 tmp, 8);

762 block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8);	784 block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8);

763 }	785 }

764	786

765 void vp9_eighttap_predict_avg8x8_sharp_c(unsigned char *src_ptr,	787 void vp9_eighttap_predict_avg8x8_smooth_c(uint8_t *src_ptr,

766 int src_pixels_per_line,	788 int src_pixels_per_line,

767 int xoffset,	789 int xoffset,

768 int yoffset,	790 int yoffset,

769 unsigned char *dst_ptr,	791 uint8_t *dst_ptr,

770 int dst_pitch) {	792 int dst_pitch) {

771 unsigned char tmp[8 * 8];	793 uint8_t tmp[8 * 8];

772 const short *HFilter = vp9_sub_pel_filters_8s[xoffset];	794 const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset];

773 const short *VFilter = vp9_sub_pel_filters_8s[yoffset];	795 const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset];

774	796

775 vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line,	797 vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp,

776 HFilter, VFilter,	798 8);

777 tmp, 8);

778 block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8);	799 block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8);

779 }	800 }

780	801

781 void vp9_eighttap_predict8x4_c(unsigned char *src_ptr,	802 void vp9_eighttap_predict8x4_c(uint8_t *src_ptr,

782 int src_pixels_per_line,	803 int src_pixels_per_line,

783 int xoffset,	804 int xoffset,

784 int yoffset,	805 int yoffset,

785 unsigned char *dst_ptr,	806 uint8_t *dst_ptr,

786 int dst_pitch) {	807 int dst_pitch) {

787 const short *HFilter = vp9_sub_pel_filters_8[xoffset];	808 const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset];

788 const short *VFilter = vp9_sub_pel_filters_8[yoffset];	809 const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset];

789	810

790 vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line,	811 vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

791 HFilter, VFilter,	812 dst_ptr, dst_pitch);

792 dst_ptr, dst_pitch);	813 }

793 }	814

794	815 void vp9_eighttap_predict8x4_sharp_c(uint8_t *src_ptr,

795 void vp9_eighttap_predict8x4_sharp_c(unsigned char *src_ptr,	816 int src_pixels_per_line,

796 int src_pixels_per_line,	817 int xoffset,

797 int xoffset,	818 int yoffset,

798 int yoffset,	819 uint8_t *dst_ptr,

799 unsigned char *dst_ptr,	820 int dst_pitch) {

800 int dst_pitch) {	821 const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset];

801 const short *HFilter = vp9_sub_pel_filters_8s[xoffset];	822 const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset];

802 const short *VFilter = vp9_sub_pel_filters_8s[yoffset];	823

803	824 vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

804 vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line,	825 dst_ptr, dst_pitch);

805 HFilter, VFilter,	826 }

806 dst_ptr, dst_pitch);	827

807 }	828 void vp9_eighttap_predict8x4_smooth_c(uint8_t *src_ptr,

808	829 int src_pixels_per_line,

809 void vp9_eighttap_predict16x16_c(unsigned char *src_ptr,	830 int xoffset,

810 int src_pixels_per_line,	831 int yoffset,

811 int xoffset,	832 uint8_t *dst_ptr,

812 int yoffset,	833 int dst_pitch) {

813 unsigned char *dst_ptr,	834 const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset];

814 int dst_pitch) {	835 const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset];

815 const short *HFilter = vp9_sub_pel_filters_8[xoffset];	836

816 const short *VFilter = vp9_sub_pel_filters_8[yoffset];	837 vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

817	838 dst_ptr, dst_pitch);

818 vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line,	839 }

819 HFilter, VFilter,	840

820 dst_ptr, dst_pitch);	841 void vp9_eighttap_predict16x16_c(uint8_t *src_ptr,

821 }	842 int src_pixels_per_line,

822	843 int xoffset,

823 void vp9_eighttap_predict16x16_sharp_c(unsigned char *src_ptr,	844 int yoffset,

824 int src_pixels_per_line,	845 uint8_t *dst_ptr,

825 int xoffset,	846 int dst_pitch) {

826 int yoffset,	847 const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset];

827 unsigned char *dst_ptr,	848 const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset];

828 int dst_pitch) {	849

829 const short *HFilter = vp9_sub_pel_filters_8s[xoffset];	850 vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

830 const short *VFilter = vp9_sub_pel_filters_8s[yoffset];	851 dst_ptr, dst_pitch);

831	852 }

832 vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line,	853

833 HFilter, VFilter,	854 void vp9_eighttap_predict16x16_sharp_c(uint8_t *src_ptr,

834 dst_ptr, dst_pitch);	855 int src_pixels_per_line,

835 }	856 int xoffset,

836	857 int yoffset,

837 void vp9_eighttap_predict_avg16x16_c(unsigned char *src_ptr,	858 uint8_t *dst_ptr,

838 int src_pixels_per_line,	859 int dst_pitch) {

839 int xoffset,	860 const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset];

840 int yoffset,	861 const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset];

841 unsigned char *dst_ptr,	862

842 int dst_pitch) {	863 vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

843 DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 16 * 16);	864 dst_ptr, dst_pitch);

844 const short *HFilter = vp9_sub_pel_filters_8[xoffset];	865 }

845 const short *VFilter = vp9_sub_pel_filters_8[yoffset];	866

846	867 void vp9_eighttap_predict16x16_smooth_c(uint8_t *src_ptr,

847 vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line,	868 int src_pixels_per_line,

848 HFilter, VFilter,	869 int xoffset,

849 tmp, 16);	870 int yoffset,

	871 uint8_t *dst_ptr,

	872 int dst_pitch) {

	873 const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset];

	874 const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset];

	875

	876 vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

	877 dst_ptr, dst_pitch);

	878 }

	879

	880 void vp9_eighttap_predict_avg16x16_c(uint8_t *src_ptr,

	881 int src_pixels_per_line,

	882 int xoffset,

	883 int yoffset,

	884 uint8_t *dst_ptr,

	885 int dst_pitch) {

	886 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp, 16 * 16);

	887 const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset];

	888 const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset];

	889

	890 vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

	891 tmp, 16);

850 block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16);	892 block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16);

851 }	893 }

852	894

853 void vp9_eighttap_predict_avg16x16_sharp_c(unsigned char *src_ptr,	895 void vp9_eighttap_predict_avg16x16_sharp_c(uint8_t *src_ptr,

854 int src_pixels_per_line,	896 int src_pixels_per_line,

855 int xoffset,	897 int xoffset,

856 int yoffset,	898 int yoffset,

857 unsigned char *dst_ptr,	899 uint8_t *dst_ptr,

858 int dst_pitch) {	900 int dst_pitch) {

859 DECLARE_ALIGNED_ARRAY(16, unsigned char, tmp, 16 * 16);	901 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp, 16 * 16);

860 const short *HFilter = vp9_sub_pel_filters_8s[xoffset];	902 const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset];

861 const short *VFilter = vp9_sub_pel_filters_8s[yoffset];	903 const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset];

862	904

863 vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line,	905 vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

864 HFilter, VFilter,	906 tmp, 16);

865 tmp, 16);

866 block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16);	907 block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16);

867 }	908 }

868	909

	910 void vp9_eighttap_predict_avg16x16_smooth_c(uint8_t *src_ptr,

	911 int src_pixels_per_line,

	912 int xoffset,

	913 int yoffset,

	914 uint8_t *dst_ptr,

	915 int dst_pitch) {

	916 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp, 16 * 16);

	917 const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset];

	918 const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset];

	919

	920 vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter,

	921 tmp, 16);

	922 block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16);

	923 }

	924

869 /****************************************************************************	925 /****************************************************************************

870 *	926 *

871 * ROUTINE : filter_block2d_bil_first_pass	927 * ROUTINE : filter_block2d_bil_first_pass

872 *	928 *

873 * INPUTS : UINT8 *src_ptr : Pointer to source block.	929 * INPUTS : uint8_t *src_ptr : Pointer to source block.

874 * UINT32 src_stride : Stride of source block.	930 * uint32_t src_stride : Stride of source block.

875 * UINT32 height : Block height.	931 * uint32_t height : Block height.

876 * UINT32 width : Block width.	932 * uint32_t width : Block width.

877 * INT32 *vp9_filter : Array of 2 bi-linear filter taps.	933 * int32_t *vp9_filter : Array of 2 bi-linear filter taps.

878 *	934 *

879 * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block.	935 * OUTPUTS : int32_t *dst_ptr : Pointer to filtered block.

880 *	936 *

881 * RETURNS : void	937 * RETURNS : void

882 *	938 *

883 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block	939 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block

884 * in the horizontal direction to produce the filtered output	940 * in the horizontal direction to produce the filtered output

885 * block. Used to implement first-pass of 2-D separable filter.	941 * block. Used to implement first-pass of 2-D separable filter.

886 *	942 *

887 * SPECIAL NOTES : Produces INT32 output to retain precision for next pass.	943 * SPECIAL NOTES : Produces int32_t output to retain precision for next pass.

888 * Two filter taps should sum to VP9_FILTER_WEIGHT.	944 * Two filter taps should sum to VP9_FILTER_WEIGHT.

889 *	945 *

890 ****************************************************************************/	946 ****************************************************************************/

891 static void filter_block2d_bil_first_pass(unsigned char *src_ptr,	947 static void filter_block2d_bil_first_pass(uint8_t *src_ptr,

892 unsigned short *dst_ptr,	948 uint16_t *dst_ptr,

893 unsigned int src_stride,	949 unsigned int src_stride,

894 unsigned int height,	950 unsigned int height,

895 unsigned int width,	951 unsigned int width,

896 const short *vp9_filter) {	952 const int16_t *vp9_filter) {

897 unsigned int i, j;	953 unsigned int i, j;

898	954

899 for (i = 0; i < height; i++) {	955 for (i = 0; i < height; i++) {

900 for (j = 0; j < width; j++) {	956 for (j = 0; j < width; j++) {

901 /* Apply bilinear filter */	957 /* Apply bilinear filter */

902 dst_ptr[j] = (((int)src_ptr[0] * vp9_filter[0]) +	958 dst_ptr[j] = (((int)src_ptr[0] * vp9_filter[0]) +

903 ((int)src_ptr[1] * vp9_filter[1]) +	959 ((int)src_ptr[1] * vp9_filter[1]) +

904 (VP9_FILTER_WEIGHT / 2)) >> VP9_FILTER_SHIFT;	960 (VP9_FILTER_WEIGHT / 2)) >> VP9_FILTER_SHIFT;

905 src_ptr++;	961 src_ptr++;

906 }	962 }

907	963

908 /* Next row... */	964 /* Next row... */

909 src_ptr += src_stride - width;	965 src_ptr += src_stride - width;

910 dst_ptr += width;	966 dst_ptr += width;

911 }	967 }

912 }	968 }

913	969

914 /****************************************************************************	970 /****************************************************************************

915 *	971 *

916 * ROUTINE : filter_block2d_bil_second_pass	972 * ROUTINE : filter_block2d_bil_second_pass

917 *	973 *

918 * INPUTS : INT32 *src_ptr : Pointer to source block.	974 * INPUTS : int32_t *src_ptr : Pointer to source block.

919 * UINT32 dst_pitch : Destination block pitch.	975 * uint32_t dst_pitch : Destination block pitch.

920 * UINT32 height : Block height.	976 * uint32_t height : Block height.

921 * UINT32 width : Block width.	977 * uint32_t width : Block width.

922 * INT32 *vp9_filter : Array of 2 bi-linear filter taps.	978 * int32_t *vp9_filter : Array of 2 bi-linear filter taps.

923 *	979 *

924 * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.	980 * OUTPUTS : uint16_t *dst_ptr : Pointer to filtered block.

925 *	981 *

926 * RETURNS : void	982 * RETURNS : void

927 *	983 *

928 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block	984 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block

929 * in the vertical direction to produce the filtered output	985 * in the vertical direction to produce the filtered output

930 * block. Used to implement second-pass of 2-D separable filter .	986 * block. Used to implement second-pass of 2-D separable filter .

931 *	987 *

932 * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_firs t_pass.	988 * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_firs t_pass.

933 * Two filter taps should sum to VP9_FILTER_WEIGHT.	989 * Two filter taps should sum to VP9_FILTER_WEIGHT.

934 *	990 *

935 ****************************************************************************/	991 ****************************************************************************/

936 static void filter_block2d_bil_second_pass(unsigned short *src_ptr,	992 static void filter_block2d_bil_second_pass(uint16_t *src_ptr,

937 unsigned char *dst_ptr,	993 uint8_t *dst_ptr,

938 int dst_pitch,	994 int dst_pitch,

939 unsigned int height,	995 unsigned int height,

940 unsigned int width,	996 unsigned int width,

941 const short *vp9_filter) {	997 const int16_t *vp9_filter) {

942 unsigned int i, j;	998 unsigned int i, j;

943 int Temp;	999 int temp;

944	1000

945 for (i = 0; i < height; i++) {	1001 for (i = 0; i < height; i++) {

946 for (j = 0; j < width; j++) {	1002 for (j = 0; j < width; j++) {

947 /* Apply filter */	1003 /* Apply filter */

948 Temp = ((int)src_ptr[0] * vp9_filter[0]) +	1004 temp = ((int)src_ptr[0] * vp9_filter[0]) +

949 ((int)src_ptr[width] * vp9_filter[1]) +	1005 ((int)src_ptr[width] * vp9_filter[1]) +

950 (VP9_FILTER_WEIGHT / 2);	1006 (VP9_FILTER_WEIGHT / 2);

951 dst_ptr[j] = (unsigned int)(Temp >> VP9_FILTER_SHIFT);	1007 dst_ptr[j] = (unsigned int)(temp >> VP9_FILTER_SHIFT);

952 src_ptr++;	1008 src_ptr++;

953 }	1009 }

954	1010

955 /* Next row... */	1011 /* Next row... */

956 dst_ptr += dst_pitch;	1012 dst_ptr += dst_pitch;

957 }	1013 }

958 }	1014 }

959	1015

960 /*	1016 /*

961 * As before for filter_block2d_second_pass_avg(), the functional difference	1017 * As before for filter_block2d_second_pass_avg(), the functional difference

962 * between filter_block2d_bil_second_pass() and filter_block2d_bil_second_pass_a vg()	1018 * between filter_block2d_bil_second_pass() and filter_block2d_bil_second_pass_a vg()

963 * is that filter_block2d_bil_second_pass() does a bilinear filter on input	1019 * is that filter_block2d_bil_second_pass() does a bilinear filter on input

964 * and stores the result in output; filter_block2d_bil_second_pass_avg(),	1020 * and stores the result in output; filter_block2d_bil_second_pass_avg(),

965 * instead, does a bilinear filter on input, averages the resulting value	1021 * instead, does a bilinear filter on input, averages the resulting value

966 * with the values already present in the output and stores the result of	1022 * with the values already present in the output and stores the result of

967 * that back into the output ((filter_result + dest + 1) >> 1).	1023 * that back into the output ((filter_result + dest + 1) >> 1).

968 */	1024 */

969 static void filter_block2d_bil_second_pass_avg(unsigned short *src_ptr,	1025 static void filter_block2d_bil_second_pass_avg(uint16_t *src_ptr,

970 unsigned char *dst_ptr,	1026 uint8_t *dst_ptr,

971 int dst_pitch,	1027 int dst_pitch,

972 unsigned int height,	1028 unsigned int height,

973 unsigned int width,	1029 unsigned int width,

974 const short *vp9_filter) {	1030 const int16_t *vp9_filter) {

975 unsigned int i, j;	1031 unsigned int i, j;

976 int Temp;	1032 int temp;

977	1033

978 for (i = 0; i < height; i++) {	1034 for (i = 0; i < height; i++) {

979 for (j = 0; j < width; j++) {	1035 for (j = 0; j < width; j++) {

980 /* Apply filter */	1036 /* Apply filter */

981 Temp = ((int)src_ptr[0] * vp9_filter[0]) +	1037 temp = (((int)src_ptr[0] * vp9_filter[0]) +

982 ((int)src_ptr[width] * vp9_filter[1]) +	1038 ((int)src_ptr[width] * vp9_filter[1]) +

983 (VP9_FILTER_WEIGHT / 2);	1039 (VP9_FILTER_WEIGHT / 2)) >> VP9_FILTER_SHIFT;

984 dst_ptr[j] = (unsigned int)(((Temp >> VP9_FILTER_SHIFT) + dst_ptr[j] + 1) >> 1);	1040 dst_ptr[j] = (unsigned int)((temp + dst_ptr[j] + 1) >> 1);

985 src_ptr++;	1041 src_ptr++;

986 }	1042 }

987	1043

988 /* Next row... */	1044 /* Next row... */

989 dst_ptr += dst_pitch;	1045 dst_ptr += dst_pitch;

990 }	1046 }

991 }	1047 }

992	1048

993 /****************************************************************************	1049 /****************************************************************************

994 *	1050 *

995 * ROUTINE : filter_block2d_bil	1051 * ROUTINE : filter_block2d_bil

996 *	1052 *

997 * INPUTS : UINT8 *src_ptr : Pointer to source block.	1053 * INPUTS : uint8_t *src_ptr : Pointer to source block.

998 * UINT32 src_pitch : Stride of source block.	1054 * uint32_t src_pitch : Stride of source block.

999 * UINT32 dst_pitch : Stride of destination block.	1055 * uint32_t dst_pitch : Stride of destination block.

1000 * INT32 *HFilter : Array of 2 horizontal filter taps .	1056 * int32_t *HFilter : Array of 2 horizontal filter ta ps.

1001 * INT32 *VFilter : Array of 2 vertical filter taps.	1057 * int32_t *VFilter : Array of 2 vertical filter taps .

1002 * INT32 Width : Block width	1058 * int32_t Width : Block width

1003 * INT32 Height : Block height	1059 * int32_t Height : Block height

1004 *	1060 *

1005 * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.	1061 * OUTPUTS : uint16_t *dst_ptr : Pointer to filtered block.

1006 *	1062 *

1007 * RETURNS : void	1063 * RETURNS : void

1008 *	1064 *

1009 * FUNCTION : 2-D filters an input block by applying a 2-tap	1065 * FUNCTION : 2-D filters an input block by applying a 2-tap

1010 * bi-linear filter horizontally followed by a 2-tap	1066 * bi-linear filter horizontally followed by a 2-tap

1011 * bi-linear filter vertically on the result.	1067 * bi-linear filter vertically on the result.

1012 *	1068 *

1013 * SPECIAL NOTES : The largest block size can be handled here is 16x16	1069 * SPECIAL NOTES : The largest block size can be handled here is 16x16

1014 *	1070 *

1015 ****************************************************************************/	1071 ****************************************************************************/

1016 static void filter_block2d_bil(unsigned char *src_ptr,	1072 static void filter_block2d_bil(uint8_t *src_ptr,

1017 unsigned char *dst_ptr,	1073 uint8_t *dst_ptr,

1018 unsigned int src_pitch,	1074 unsigned int src_pitch,

1019 unsigned int dst_pitch,	1075 unsigned int dst_pitch,

1020 const short *HFilter,	1076 const int16_t *HFilter,

1021 const short *VFilter,	1077 const int16_t *VFilter,

1022 int Width,	1078 int Width,

1023 int Height) {	1079 int Height) {

1024	1080

1025 unsigned short FData[17 * 16]; /* Temp data buffer used in filtering */	1081 uint16_t FData[17 * 16]; /* Temp data buffer used in filtering */

1026	1082

1027 /* First filter 1-D horizontally... */	1083 /* First filter 1-D horizontally... */

1028 filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HF ilter);	1084 filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HF ilter);

1029	1085

1030 /* then 1-D vertically... */	1086 /* then 1-D vertically... */

1031 filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilt er);	1087 filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilt er);

1032 }	1088 }

1033	1089

1034 static void filter_block2d_bil_avg(unsigned char *src_ptr,	1090 static void filter_block2d_bil_avg(uint8_t *src_ptr,

1035 unsigned char *dst_ptr,	1091 uint8_t *dst_ptr,

1036 unsigned int src_pitch,	1092 unsigned int src_pitch,

1037 unsigned int dst_pitch,	1093 unsigned int dst_pitch,

1038 const short *HFilter,	1094 const int16_t *HFilter,

1039 const short *VFilter,	1095 const int16_t *VFilter,

1040 int Width,	1096 int Width,

1041 int Height) {	1097 int Height) {

1042 unsigned short FData[17 * 16]; /* Temp data buffer used in filtering */	1098 uint16_t FData[17 * 16]; /* Temp data buffer used in filtering */

1043	1099

1044 /* First filter 1-D horizontally... */	1100 /* First filter 1-D horizontally... */

1045 filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HF ilter);	1101 filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HF ilter);

1046	1102

1047 /* then 1-D vertically... */	1103 /* then 1-D vertically... */

1048 filter_block2d_bil_second_pass_avg(FData, dst_ptr, dst_pitch, Height, Width, V Filter);	1104 filter_block2d_bil_second_pass_avg(FData, dst_ptr, dst_pitch, Height, Width, V Filter);

1049 }	1105 }

1050	1106

1051 void vp9_bilinear_predict4x4_c(unsigned char *src_ptr,	1107 void vp9_bilinear_predict4x4_c(uint8_t *src_ptr,

1052 int src_pixels_per_line,	1108 int src_pixels_per_line,

1053 int xoffset,	1109 int xoffset,

1054 int yoffset,	1110 int yoffset,

1055 unsigned char *dst_ptr,	1111 uint8_t *dst_ptr,

1056 int dst_pitch) {	1112 int dst_pitch) {

1057 const short *HFilter;	1113 const int16_t *HFilter;

1058 const short *VFilter;	1114 const int16_t *VFilter;

1059	1115

1060 HFilter = vp9_bilinear_filters[xoffset];	1116 HFilter = vp9_bilinear_filters[xoffset];

1061 VFilter = vp9_bilinear_filters[yoffset];	1117 VFilter = vp9_bilinear_filters[yoffset];

1062	1118

1063 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);	1119 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);

1064 }	1120 }

1065	1121

1066 void vp9_bilinear_predict_avg4x4_c(unsigned char *src_ptr,	1122 void vp9_bilinear_predict_avg4x4_c(uint8_t *src_ptr,

1067 int src_pixels_per_line,	1123 int src_pixels_per_line,

1068 int xoffset,	1124 int xoffset,

1069 int yoffset,	1125 int yoffset,

1070 unsigned char *dst_ptr,	1126 uint8_t *dst_ptr,

1071 int dst_pitch) {	1127 int dst_pitch) {

1072 const short *HFilter;	1128 const int16_t *HFilter;

1073 const short *VFilter;	1129 const int16_t *VFilter;

1074	1130

1075 HFilter = vp9_bilinear_filters[xoffset];	1131 HFilter = vp9_bilinear_filters[xoffset];

1076 VFilter = vp9_bilinear_filters[yoffset];	1132 VFilter = vp9_bilinear_filters[yoffset];

1077	1133

1078 filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,	1134 filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,

1079 dst_pitch, HFilter, VFilter, 4, 4);	1135 dst_pitch, HFilter, VFilter, 4, 4);

1080 }	1136 }

1081	1137

1082 void vp9_bilinear_predict8x8_c(unsigned char *src_ptr,	1138 void vp9_bilinear_predict8x8_c(uint8_t *src_ptr,

1083 int src_pixels_per_line,	1139 int src_pixels_per_line,

1084 int xoffset,	1140 int xoffset,

1085 int yoffset,	1141 int yoffset,

1086 unsigned char *dst_ptr,	1142 uint8_t *dst_ptr,

1087 int dst_pitch) {	1143 int dst_pitch) {

1088 const short *HFilter;	1144 const int16_t *HFilter;

1089 const short *VFilter;	1145 const int16_t *VFilter;

1090	1146

1091 HFilter = vp9_bilinear_filters[xoffset];	1147 HFilter = vp9_bilinear_filters[xoffset];

1092 VFilter = vp9_bilinear_filters[yoffset];	1148 VFilter = vp9_bilinear_filters[yoffset];

1093	1149

1094 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);	1150 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);

1095	1151

1096 }	1152 }

1097	1153

1098 void vp9_bilinear_predict_avg8x8_c(unsigned char *src_ptr,	1154 void vp9_bilinear_predict_avg8x8_c(uint8_t *src_ptr,

1099 int src_pixels_per_line,	1155 int src_pixels_per_line,

1100 int xoffset,	1156 int xoffset,

1101 int yoffset,	1157 int yoffset,

1102 unsigned char *dst_ptr,	1158 uint8_t *dst_ptr,

1103 int dst_pitch) {	1159 int dst_pitch) {

1104 const short *HFilter;	1160 const int16_t *HFilter;

1105 const short *VFilter;	1161 const int16_t *VFilter;

1106	1162

1107 HFilter = vp9_bilinear_filters[xoffset];	1163 HFilter = vp9_bilinear_filters[xoffset];

1108 VFilter = vp9_bilinear_filters[yoffset];	1164 VFilter = vp9_bilinear_filters[yoffset];

1109	1165

1110 filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,	1166 filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,

1111 dst_pitch, HFilter, VFilter, 8, 8);	1167 dst_pitch, HFilter, VFilter, 8, 8);

1112 }	1168 }

1113	1169

1114 void vp9_bilinear_predict8x4_c(unsigned char *src_ptr,	1170 void vp9_bilinear_predict8x4_c(uint8_t *src_ptr,

1115 int src_pixels_per_line,	1171 int src_pixels_per_line,

1116 int xoffset,	1172 int xoffset,

1117 int yoffset,	1173 int yoffset,

1118 unsigned char *dst_ptr,	1174 uint8_t *dst_ptr,

1119 int dst_pitch) {	1175 int dst_pitch) {

1120 const short *HFilter;	1176 const int16_t *HFilter;

1121 const short *VFilter;	1177 const int16_t *VFilter;

1122	1178

1123 HFilter = vp9_bilinear_filters[xoffset];	1179 HFilter = vp9_bilinear_filters[xoffset];

1124 VFilter = vp9_bilinear_filters[yoffset];	1180 VFilter = vp9_bilinear_filters[yoffset];

1125	1181

1126 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);	1182 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);

1127	1183

1128 }	1184 }

1129	1185

1130 void vp9_bilinear_predict16x16_c(unsigned char *src_ptr,	1186 void vp9_bilinear_predict16x16_c(uint8_t *src_ptr,

1131 int src_pixels_per_line,	1187 int src_pixels_per_line,

1132 int xoffset,	1188 int xoffset,

1133 int yoffset,	1189 int yoffset,

1134 unsigned char *dst_ptr,	1190 uint8_t *dst_ptr,

1135 int dst_pitch) {	1191 int dst_pitch) {

1136 const short *HFilter;	1192 const int16_t *HFilter;

1137 const short *VFilter;	1193 const int16_t *VFilter;

1138	1194

1139 HFilter = vp9_bilinear_filters[xoffset];	1195 HFilter = vp9_bilinear_filters[xoffset];

1140 VFilter = vp9_bilinear_filters[yoffset];	1196 VFilter = vp9_bilinear_filters[yoffset];

1141	1197

1142 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);	1198 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);

1143 }	1199 }

1144	1200

1145 void vp9_bilinear_predict_avg16x16_c(unsigned char *src_ptr,	1201 void vp9_bilinear_predict_avg16x16_c(uint8_t *src_ptr,

1146 int src_pixels_per_line,	1202 int src_pixels_per_line,

1147 int xoffset,	1203 int xoffset,

1148 int yoffset,	1204 int yoffset,

1149 unsigned char *dst_ptr,	1205 uint8_t *dst_ptr,

1150 int dst_pitch) {	1206 int dst_pitch) {

1151 const short *HFilter;	1207 const int16_t *HFilter;

1152 const short *VFilter;	1208 const int16_t *VFilter;

1153	1209

1154 HFilter = vp9_bilinear_filters[xoffset];	1210 HFilter = vp9_bilinear_filters[xoffset];

1155 VFilter = vp9_bilinear_filters[yoffset];	1211 VFilter = vp9_bilinear_filters[yoffset];

1156	1212

1157 filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,	1213 filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,

1158 dst_pitch, HFilter, VFilter, 16, 16);	1214 dst_pitch, HFilter, VFilter, 16, 16);

1159 }	1215 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/common/vp9_filter.h ('k') | source/libvpx/vp9/common/vp9_findnearmv.h » ('j') | no next file with comments »