OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
132 w, h + 1); \ | 132 w, h + 1); \ |
133 vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ | 133 vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ |
134 filter_x, x_step_q4, filter_y, \ | 134 filter_x, x_step_q4, filter_y, \ |
135 y_step_q4, w, h); \ | 135 y_step_q4, w, h); \ |
136 } \ | 136 } \ |
137 } else { \ | 137 } else { \ |
138 vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ | 138 vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ |
139 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ | 139 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ |
140 } \ | 140 } \ |
141 } | 141 } |
142 #if HAVE_AVX2 | 142 |
| 143 #if CONFIG_VP9_HIGHBITDEPTH |
| 144 |
| 145 typedef void high_filter8_1dfunction ( |
| 146 const uint16_t *src_ptr, |
| 147 const ptrdiff_t src_pitch, |
| 148 uint16_t *output_ptr, |
| 149 ptrdiff_t out_pitch, |
| 150 unsigned int output_height, |
| 151 const int16_t *filter, |
| 152 int bd |
| 153 ); |
| 154 |
| 155 #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ |
| 156 void vp9_high_convolve8_##name##_##opt(const uint8_t *src8, \ |
| 157 ptrdiff_t src_stride, \ |
| 158 uint8_t *dst8, ptrdiff_t dst_stride, \ |
| 159 const int16_t *filter_x, \ |
| 160 int x_step_q4, \ |
| 161 const int16_t *filter_y, \ |
| 162 int y_step_q4, \ |
| 163 int w, int h, int bd) { \ |
| 164 if (step_q4 == 16 && filter[3] != 128) { \ |
| 165 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ |
| 166 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ |
| 167 if (filter[0] || filter[1] || filter[2]) { \ |
| 168 while (w >= 16) { \ |
| 169 vp9_high_filter_block1d16_##dir##8_##avg##opt(src_start, \ |
| 170 src_stride, \ |
| 171 dst, \ |
| 172 dst_stride, \ |
| 173 h, \ |
| 174 filter, \ |
| 175 bd); \ |
| 176 src += 16; \ |
| 177 dst += 16; \ |
| 178 w -= 16; \ |
| 179 } \ |
| 180 while (w >= 8) { \ |
| 181 vp9_high_filter_block1d8_##dir##8_##avg##opt(src_start, \ |
| 182 src_stride, \ |
| 183 dst, \ |
| 184 dst_stride, \ |
| 185 h, \ |
| 186 filter, \ |
| 187 bd); \ |
| 188 src += 8; \ |
| 189 dst += 8; \ |
| 190 w -= 8; \ |
| 191 } \ |
| 192 while (w >= 4) { \ |
| 193 vp9_high_filter_block1d4_##dir##8_##avg##opt(src_start, \ |
| 194 src_stride, \ |
| 195 dst, \ |
| 196 dst_stride, \ |
| 197 h, \ |
| 198 filter, \ |
| 199 bd); \ |
| 200 src += 4; \ |
| 201 dst += 4; \ |
| 202 w -= 4; \ |
| 203 } \ |
| 204 } else { \ |
| 205 while (w >= 16) { \ |
| 206 vp9_high_filter_block1d16_##dir##2_##avg##opt(src, \ |
| 207 src_stride, \ |
| 208 dst, \ |
| 209 dst_stride, \ |
| 210 h, \ |
| 211 filter, \ |
| 212 bd); \ |
| 213 src += 16; \ |
| 214 dst += 16; \ |
| 215 w -= 16; \ |
| 216 } \ |
| 217 while (w >= 8) { \ |
| 218 vp9_high_filter_block1d8_##dir##2_##avg##opt(src, \ |
| 219 src_stride, \ |
| 220 dst, \ |
| 221 dst_stride, \ |
| 222 h, \ |
| 223 filter, \ |
| 224 bd); \ |
| 225 src += 8; \ |
| 226 dst += 8; \ |
| 227 w -= 8; \ |
| 228 } \ |
| 229 while (w >= 4) { \ |
| 230 vp9_high_filter_block1d4_##dir##2_##avg##opt(src, \ |
| 231 src_stride, \ |
| 232 dst, \ |
| 233 dst_stride, \ |
| 234 h, \ |
| 235 filter, \ |
| 236 bd); \ |
| 237 src += 4; \ |
| 238 dst += 4; \ |
| 239 w -= 4; \ |
| 240 } \ |
| 241 } \ |
| 242 } \ |
| 243 if (w) { \ |
| 244 vp9_high_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ |
| 245 filter_x, x_step_q4, filter_y, y_step_q4, \ |
| 246 w, h, bd); \ |
| 247 } \ |
| 248 } |
| 249 |
| 250 #define HIGH_FUN_CONV_2D(avg, opt) \ |
| 251 void vp9_high_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ |
| 252 uint8_t *dst, ptrdiff_t dst_stride, \ |
| 253 const int16_t *filter_x, int x_step_q4, \ |
| 254 const int16_t *filter_y, int y_step_q4, \ |
| 255 int w, int h, int bd) { \ |
| 256 assert(w <= 64); \ |
| 257 assert(h <= 64); \ |
| 258 if (x_step_q4 == 16 && y_step_q4 == 16) { \ |
| 259 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ |
| 260 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ |
| 261 DECLARE_ALIGNED_ARRAY(16, uint16_t, fdata2, 64 * 71); \ |
| 262 vp9_high_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ |
| 263 CONVERT_TO_BYTEPTR(fdata2), 64, \ |
| 264 filter_x, x_step_q4, filter_y, y_step_q4, \ |
| 265 w, h + 7, bd); \ |
| 266 vp9_high_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ |
| 267 64, dst, dst_stride, \ |
| 268 filter_x, x_step_q4, filter_y, \ |
| 269 y_step_q4, w, h, bd); \ |
| 270 } else { \ |
| 271 DECLARE_ALIGNED_ARRAY(16, uint16_t, fdata2, 64 * 65); \ |
| 272 vp9_high_convolve8_horiz_##opt(src, src_stride, \ |
| 273 CONVERT_TO_BYTEPTR(fdata2), 64, \ |
| 274 filter_x, x_step_q4, filter_y, y_step_q4, \ |
| 275 w, h + 1, bd); \ |
| 276 vp9_high_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ |
| 277 dst, dst_stride, \ |
| 278 filter_x, x_step_q4, filter_y, \ |
| 279 y_step_q4, w, h, bd); \ |
| 280 } \ |
| 281 } else { \ |
| 282 vp9_high_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ |
| 283 filter_x, x_step_q4, filter_y, y_step_q4, w, \ |
| 284 h, bd); \ |
| 285 } \ |
| 286 } |
| 287 #endif // CONFIG_VP9_HIGHBITDEPTH |
| 288 |
| 289 #if HAVE_AVX2 && HAVE_SSSE3 |
143 filter8_1dfunction vp9_filter_block1d16_v8_avx2; | 290 filter8_1dfunction vp9_filter_block1d16_v8_avx2; |
144 filter8_1dfunction vp9_filter_block1d16_h8_avx2; | 291 filter8_1dfunction vp9_filter_block1d16_h8_avx2; |
145 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; | 292 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; |
146 #if (ARCH_X86_64) | 293 #if ARCH_X86_64 |
147 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; | 294 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; |
148 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; | 295 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; |
149 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; | 296 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; |
150 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3 | 297 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3 |
151 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3 | 298 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3 |
152 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3 | 299 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3 |
153 #else | 300 #else // ARCH_X86 |
154 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; | 301 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; |
155 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; | 302 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; |
156 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; | 303 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; |
157 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 | 304 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 |
158 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 | 305 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 |
159 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 | 306 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 |
160 #endif | 307 #endif // ARCH_X86_64 / ARCH_X86 |
161 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; | 308 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; |
162 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; | 309 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; |
163 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; | 310 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; |
164 filter8_1dfunction vp9_filter_block1d8_h2_ssse3; | 311 filter8_1dfunction vp9_filter_block1d8_h2_ssse3; |
165 filter8_1dfunction vp9_filter_block1d4_v2_ssse3; | 312 filter8_1dfunction vp9_filter_block1d4_v2_ssse3; |
166 filter8_1dfunction vp9_filter_block1d4_h2_ssse3; | 313 filter8_1dfunction vp9_filter_block1d4_h2_ssse3; |
167 #define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3 | 314 #define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3 |
168 #define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3 | 315 #define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3 |
169 #define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3 | 316 #define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3 |
170 #define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3 | 317 #define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3 |
(...skipping 12 matching lines...) Expand all Loading... |
183 // int w, int h); | 330 // int w, int h); |
184 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); | 331 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); |
185 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); | 332 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); |
186 | 333 |
187 // void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, | 334 // void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, |
188 // uint8_t *dst, ptrdiff_t dst_stride, | 335 // uint8_t *dst, ptrdiff_t dst_stride, |
189 // const int16_t *filter_x, int x_step_q4, | 336 // const int16_t *filter_x, int x_step_q4, |
190 // const int16_t *filter_y, int y_step_q4, | 337 // const int16_t *filter_y, int y_step_q4, |
191 // int w, int h); | 338 // int w, int h); |
192 FUN_CONV_2D(, avx2); | 339 FUN_CONV_2D(, avx2); |
193 #endif | 340 #endif // HAVE_AX2 && HAVE_SSSE3 |
194 #if HAVE_SSSE3 | 341 #if HAVE_SSSE3 |
195 #if (ARCH_X86_64) | 342 #if ARCH_X86_64 |
196 filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; | 343 filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; |
197 filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; | 344 filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; |
198 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; | 345 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; |
199 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; | 346 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; |
200 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; | 347 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; |
201 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; | 348 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; |
202 #define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3 | 349 #define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3 |
203 #define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3 | 350 #define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3 |
204 #define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3 | 351 #define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3 |
205 #define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3 | 352 #define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3 |
206 #define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3 | 353 #define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3 |
207 #else | 354 #else // ARCH_X86 |
208 filter8_1dfunction vp9_filter_block1d16_v8_ssse3; | 355 filter8_1dfunction vp9_filter_block1d16_v8_ssse3; |
209 filter8_1dfunction vp9_filter_block1d16_h8_ssse3; | 356 filter8_1dfunction vp9_filter_block1d16_h8_ssse3; |
210 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; | 357 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; |
211 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; | 358 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; |
212 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; | 359 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; |
213 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; | 360 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; |
214 #endif | 361 #endif // ARCH_X86_64 / ARCH_X86 |
215 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; | 362 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; |
216 filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; | 363 filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; |
217 filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; | 364 filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; |
218 filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; | 365 filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; |
219 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; | 366 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; |
220 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; | 367 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; |
221 | 368 |
222 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; | 369 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; |
223 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; | 370 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; |
224 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; | 371 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
263 // const int16_t *filter_x, int x_step_q4, | 410 // const int16_t *filter_x, int x_step_q4, |
264 // const int16_t *filter_y, int y_step_q4, | 411 // const int16_t *filter_y, int y_step_q4, |
265 // int w, int h); | 412 // int w, int h); |
266 // void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, | 413 // void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, |
267 // uint8_t *dst, ptrdiff_t dst_stride, | 414 // uint8_t *dst, ptrdiff_t dst_stride, |
268 // const int16_t *filter_x, int x_step_q4, | 415 // const int16_t *filter_x, int x_step_q4, |
269 // const int16_t *filter_y, int y_step_q4, | 416 // const int16_t *filter_y, int y_step_q4, |
270 // int w, int h); | 417 // int w, int h); |
271 FUN_CONV_2D(, ssse3); | 418 FUN_CONV_2D(, ssse3); |
272 FUN_CONV_2D(avg_ , ssse3); | 419 FUN_CONV_2D(avg_ , ssse3); |
273 #endif | 420 #endif // HAVE_SSSE3 |
274 | 421 |
275 #if HAVE_SSE2 | 422 #if HAVE_SSE2 |
276 filter8_1dfunction vp9_filter_block1d16_v8_sse2; | 423 filter8_1dfunction vp9_filter_block1d16_v8_sse2; |
277 filter8_1dfunction vp9_filter_block1d16_h8_sse2; | 424 filter8_1dfunction vp9_filter_block1d16_h8_sse2; |
278 filter8_1dfunction vp9_filter_block1d8_v8_sse2; | 425 filter8_1dfunction vp9_filter_block1d8_v8_sse2; |
279 filter8_1dfunction vp9_filter_block1d8_h8_sse2; | 426 filter8_1dfunction vp9_filter_block1d8_h8_sse2; |
280 filter8_1dfunction vp9_filter_block1d4_v8_sse2; | 427 filter8_1dfunction vp9_filter_block1d4_v8_sse2; |
281 filter8_1dfunction vp9_filter_block1d4_h8_sse2; | 428 filter8_1dfunction vp9_filter_block1d4_h8_sse2; |
282 filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2; | 429 filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2; |
283 filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2; | 430 filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2; |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
329 // const int16_t *filter_x, int x_step_q4, | 476 // const int16_t *filter_x, int x_step_q4, |
330 // const int16_t *filter_y, int y_step_q4, | 477 // const int16_t *filter_y, int y_step_q4, |
331 // int w, int h); | 478 // int w, int h); |
332 // void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, | 479 // void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, |
333 // uint8_t *dst, ptrdiff_t dst_stride, | 480 // uint8_t *dst, ptrdiff_t dst_stride, |
334 // const int16_t *filter_x, int x_step_q4, | 481 // const int16_t *filter_x, int x_step_q4, |
335 // const int16_t *filter_y, int y_step_q4, | 482 // const int16_t *filter_y, int y_step_q4, |
336 // int w, int h); | 483 // int w, int h); |
337 FUN_CONV_2D(, sse2); | 484 FUN_CONV_2D(, sse2); |
338 FUN_CONV_2D(avg_ , sse2); | 485 FUN_CONV_2D(avg_ , sse2); |
339 #endif | 486 |
| 487 #if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 |
| 488 high_filter8_1dfunction vp9_high_filter_block1d16_v8_sse2; |
| 489 high_filter8_1dfunction vp9_high_filter_block1d16_h8_sse2; |
| 490 high_filter8_1dfunction vp9_high_filter_block1d8_v8_sse2; |
| 491 high_filter8_1dfunction vp9_high_filter_block1d8_h8_sse2; |
| 492 high_filter8_1dfunction vp9_high_filter_block1d4_v8_sse2; |
| 493 high_filter8_1dfunction vp9_high_filter_block1d4_h8_sse2; |
| 494 high_filter8_1dfunction vp9_high_filter_block1d16_v8_avg_sse2; |
| 495 high_filter8_1dfunction vp9_high_filter_block1d16_h8_avg_sse2; |
| 496 high_filter8_1dfunction vp9_high_filter_block1d8_v8_avg_sse2; |
| 497 high_filter8_1dfunction vp9_high_filter_block1d8_h8_avg_sse2; |
| 498 high_filter8_1dfunction vp9_high_filter_block1d4_v8_avg_sse2; |
| 499 high_filter8_1dfunction vp9_high_filter_block1d4_h8_avg_sse2; |
| 500 |
| 501 high_filter8_1dfunction vp9_high_filter_block1d16_v2_sse2; |
| 502 high_filter8_1dfunction vp9_high_filter_block1d16_h2_sse2; |
| 503 high_filter8_1dfunction vp9_high_filter_block1d8_v2_sse2; |
| 504 high_filter8_1dfunction vp9_high_filter_block1d8_h2_sse2; |
| 505 high_filter8_1dfunction vp9_high_filter_block1d4_v2_sse2; |
| 506 high_filter8_1dfunction vp9_high_filter_block1d4_h2_sse2; |
| 507 high_filter8_1dfunction vp9_high_filter_block1d16_v2_avg_sse2; |
| 508 high_filter8_1dfunction vp9_high_filter_block1d16_h2_avg_sse2; |
| 509 high_filter8_1dfunction vp9_high_filter_block1d8_v2_avg_sse2; |
| 510 high_filter8_1dfunction vp9_high_filter_block1d8_h2_avg_sse2; |
| 511 high_filter8_1dfunction vp9_high_filter_block1d4_v2_avg_sse2; |
| 512 high_filter8_1dfunction vp9_high_filter_block1d4_h2_avg_sse2; |
| 513 |
| 514 // void vp9_high_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, |
| 515 // uint8_t *dst, ptrdiff_t dst_stride, |
| 516 // const int16_t *filter_x, int x_step_q4, |
| 517 // const int16_t *filter_y, int y_step_q4, |
| 518 // int w, int h, int bd); |
| 519 // void vp9_high_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, |
| 520 // uint8_t *dst, ptrdiff_t dst_stride, |
| 521 // const int16_t *filter_x, int x_step_q4, |
| 522 // const int16_t *filter_y, int y_step_q4, |
| 523 // int w, int h, int bd); |
| 524 // void vp9_high_convolve8_avg_horiz_sse2(const uint8_t *src, |
| 525 // ptrdiff_t src_stride, |
| 526 // uint8_t *dst, ptrdiff_t dst_stride, |
| 527 // const int16_t *filter_x, |
| 528 // int x_step_q4, |
| 529 // const int16_t *filter_y, |
| 530 // int y_step_q4, |
| 531 // int w, int h, int bd); |
| 532 // void vp9_high_convolve8_avg_vert_sse2(const uint8_t *src, |
| 533 // ptrdiff_t src_stride, |
| 534 // uint8_t *dst, ptrdiff_t dst_stride, |
| 535 // const int16_t *filter_x, int x_step_q4, |
| 536 // const int16_t *filter_y, int y_step_q4, |
| 537 // int w, int h, int bd); |
| 538 HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); |
| 539 HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); |
| 540 HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); |
| 541 HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, |
| 542 sse2); |
| 543 |
| 544 // void vp9_high_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, |
| 545 // uint8_t *dst, ptrdiff_t dst_stride, |
| 546 // const int16_t *filter_x, int x_step_q4, |
| 547 // const int16_t *filter_y, int y_step_q4, |
| 548 // int w, int h, int bd); |
| 549 // void vp9_high_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, |
| 550 // uint8_t *dst, ptrdiff_t dst_stride, |
| 551 // const int16_t *filter_x, int x_step_q4, |
| 552 // const int16_t *filter_y, int y_step_q4, |
| 553 // int w, int h, int bd); |
| 554 HIGH_FUN_CONV_2D(, sse2); |
| 555 HIGH_FUN_CONV_2D(avg_ , sse2); |
| 556 #endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 |
| 557 #endif // HAVE_SSE2 |
OLD | NEW |