| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 135 y_step_q4, w, h); \ | 135 y_step_q4, w, h); \ |
| 136 } \ | 136 } \ |
| 137 } else { \ | 137 } else { \ |
| 138 vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ | 138 vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ |
| 139 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ | 139 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ |
| 140 } \ | 140 } \ |
| 141 } | 141 } |
| 142 #if HAVE_AVX2 | 142 #if HAVE_AVX2 |
| 143 filter8_1dfunction vp9_filter_block1d16_v8_avx2; | 143 filter8_1dfunction vp9_filter_block1d16_v8_avx2; |
| 144 filter8_1dfunction vp9_filter_block1d16_h8_avx2; | 144 filter8_1dfunction vp9_filter_block1d16_h8_avx2; |
| 145 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; |
| 146 #if (ARCH_X86_64) |
| 147 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; |
| 148 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; |
| 149 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; |
| 150 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3 |
| 151 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3 |
| 152 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3 |
| 153 #else |
| 145 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; | 154 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; |
| 146 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; | 155 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; |
| 147 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; | |
| 148 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; | 156 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; |
| 157 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 |
| 158 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 |
| 159 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 |
| 160 #endif |
| 149 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; | 161 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; |
| 150 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; | 162 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; |
| 151 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; | 163 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; |
| 152 filter8_1dfunction vp9_filter_block1d8_h2_ssse3; | 164 filter8_1dfunction vp9_filter_block1d8_h2_ssse3; |
| 153 filter8_1dfunction vp9_filter_block1d4_v2_ssse3; | 165 filter8_1dfunction vp9_filter_block1d4_v2_ssse3; |
| 154 filter8_1dfunction vp9_filter_block1d4_h2_ssse3; | 166 filter8_1dfunction vp9_filter_block1d4_h2_ssse3; |
| 155 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 | |
| 156 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 | |
| 157 #define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3 | 167 #define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3 |
| 158 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 | |
| 159 #define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3 | 168 #define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3 |
| 160 #define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3 | 169 #define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3 |
| 161 #define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3 | 170 #define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3 |
| 162 #define vp9_filter_block1d8_h2_avx2 vp9_filter_block1d8_h2_ssse3 | 171 #define vp9_filter_block1d8_h2_avx2 vp9_filter_block1d8_h2_ssse3 |
| 163 #define vp9_filter_block1d4_v2_avx2 vp9_filter_block1d4_v2_ssse3 | 172 #define vp9_filter_block1d4_v2_avx2 vp9_filter_block1d4_v2_ssse3 |
| 164 #define vp9_filter_block1d4_h2_avx2 vp9_filter_block1d4_h2_ssse3 | 173 #define vp9_filter_block1d4_h2_avx2 vp9_filter_block1d4_h2_ssse3 |
| 165 // void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, | 174 // void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, |
| 166 // uint8_t *dst, ptrdiff_t dst_stride, | 175 // uint8_t *dst, ptrdiff_t dst_stride, |
| 167 // const int16_t *filter_x, int x_step_q4, | 176 // const int16_t *filter_x, int x_step_q4, |
| 168 // const int16_t *filter_y, int y_step_q4, | 177 // const int16_t *filter_y, int y_step_q4, |
| 169 // int w, int h); | 178 // int w, int h); |
| 170 // void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, | 179 // void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, |
| 171 // uint8_t *dst, ptrdiff_t dst_stride, | 180 // uint8_t *dst, ptrdiff_t dst_stride, |
| 172 // const int16_t *filter_x, int x_step_q4, | 181 // const int16_t *filter_x, int x_step_q4, |
| 173 // const int16_t *filter_y, int y_step_q4, | 182 // const int16_t *filter_y, int y_step_q4, |
| 174 // int w, int h); | 183 // int w, int h); |
| 175 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); | 184 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); |
| 176 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); | 185 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); |
| 177 | 186 |
| 178 // void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, | 187 // void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, |
| 179 // uint8_t *dst, ptrdiff_t dst_stride, | 188 // uint8_t *dst, ptrdiff_t dst_stride, |
| 180 // const int16_t *filter_x, int x_step_q4, | 189 // const int16_t *filter_x, int x_step_q4, |
| 181 // const int16_t *filter_y, int y_step_q4, | 190 // const int16_t *filter_y, int y_step_q4, |
| 182 // int w, int h); | 191 // int w, int h); |
| 183 FUN_CONV_2D(, avx2); | 192 FUN_CONV_2D(, avx2); |
| 184 #endif | 193 #endif |
| 185 #if HAVE_SSSE3 | 194 #if HAVE_SSSE3 |
| 195 #if (ARCH_X86_64) |
| 196 filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; |
| 197 filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; |
| 198 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; |
| 199 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; |
| 200 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; |
| 201 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; |
| 202 #define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3 |
| 203 #define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3 |
| 204 #define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3 |
| 205 #define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3 |
| 206 #define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3 |
| 207 #else |
| 186 filter8_1dfunction vp9_filter_block1d16_v8_ssse3; | 208 filter8_1dfunction vp9_filter_block1d16_v8_ssse3; |
| 187 filter8_1dfunction vp9_filter_block1d16_h8_ssse3; | 209 filter8_1dfunction vp9_filter_block1d16_h8_ssse3; |
| 188 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; | 210 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; |
| 189 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; | 211 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; |
| 190 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; | 212 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; |
| 191 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; | 213 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; |
| 214 #endif |
| 192 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; | 215 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; |
| 193 filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; | 216 filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; |
| 194 filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; | 217 filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; |
| 195 filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; | 218 filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; |
| 196 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; | 219 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; |
| 197 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; | 220 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; |
| 198 | 221 |
| 199 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; | 222 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; |
| 200 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; | 223 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; |
| 201 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; | 224 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 307 // const int16_t *filter_y, int y_step_q4, | 330 // const int16_t *filter_y, int y_step_q4, |
| 308 // int w, int h); | 331 // int w, int h); |
| 309 // void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, | 332 // void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, |
| 310 // uint8_t *dst, ptrdiff_t dst_stride, | 333 // uint8_t *dst, ptrdiff_t dst_stride, |
| 311 // const int16_t *filter_x, int x_step_q4, | 334 // const int16_t *filter_x, int x_step_q4, |
| 312 // const int16_t *filter_y, int y_step_q4, | 335 // const int16_t *filter_y, int y_step_q4, |
| 313 // int w, int h); | 336 // int w, int h); |
| 314 FUN_CONV_2D(, sse2); | 337 FUN_CONV_2D(, sse2); |
| 315 FUN_CONV_2D(avg_ , sse2); | 338 FUN_CONV_2D(avg_ , sse2); |
| 316 #endif | 339 #endif |
| OLD | NEW |