| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 #ifndef VPX_DSP_X86_CONVOLVE_H_ | 10 #ifndef VPX_DSP_X86_CONVOLVE_H_ |
| (...skipping 13 matching lines...) Expand all Loading... |
| 24 uint32_t output_height, | 24 uint32_t output_height, |
| 25 const int16_t *filter | 25 const int16_t *filter |
| 26 ); | 26 ); |
| 27 | 27 |
| 28 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ | 28 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ |
| 29 void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ | 29 void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ |
| 30 uint8_t *dst, ptrdiff_t dst_stride, \ | 30 uint8_t *dst, ptrdiff_t dst_stride, \ |
| 31 const int16_t *filter_x, int x_step_q4, \ | 31 const int16_t *filter_x, int x_step_q4, \ |
| 32 const int16_t *filter_y, int y_step_q4, \ | 32 const int16_t *filter_y, int y_step_q4, \ |
| 33 int w, int h) { \ | 33 int w, int h) { \ |
| 34 if (step_q4 == 16 && filter[3] != 128) { \ | 34 assert(filter[3] != 128); \ |
| 35 if (filter[0] || filter[1] || filter[2]) { \ | 35 assert(step_q4 == 16); \ |
| 36 while (w >= 16) { \ | 36 if (filter[0] || filter[1] || filter[2]) { \ |
| 37 vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \ | 37 while (w >= 16) { \ |
| 38 src_stride, \ | 38 vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \ |
| 39 dst, \ | 39 src_stride, \ |
| 40 dst_stride, \ | 40 dst, \ |
| 41 h, \ | 41 dst_stride, \ |
| 42 filter); \ | 42 h, \ |
| 43 src += 16; \ | 43 filter); \ |
| 44 dst += 16; \ | 44 src += 16; \ |
| 45 w -= 16; \ | 45 dst += 16; \ |
| 46 } \ | 46 w -= 16; \ |
| 47 while (w >= 8) { \ | |
| 48 vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \ | |
| 49 src_stride, \ | |
| 50 dst, \ | |
| 51 dst_stride, \ | |
| 52 h, \ | |
| 53 filter); \ | |
| 54 src += 8; \ | |
| 55 dst += 8; \ | |
| 56 w -= 8; \ | |
| 57 } \ | |
| 58 while (w >= 4) { \ | |
| 59 vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \ | |
| 60 src_stride, \ | |
| 61 dst, \ | |
| 62 dst_stride, \ | |
| 63 h, \ | |
| 64 filter); \ | |
| 65 src += 4; \ | |
| 66 dst += 4; \ | |
| 67 w -= 4; \ | |
| 68 } \ | |
| 69 } else { \ | |
| 70 while (w >= 16) { \ | |
| 71 vpx_filter_block1d16_##dir##2_##avg##opt(src, \ | |
| 72 src_stride, \ | |
| 73 dst, \ | |
| 74 dst_stride, \ | |
| 75 h, \ | |
| 76 filter); \ | |
| 77 src += 16; \ | |
| 78 dst += 16; \ | |
| 79 w -= 16; \ | |
| 80 } \ | |
| 81 while (w >= 8) { \ | |
| 82 vpx_filter_block1d8_##dir##2_##avg##opt(src, \ | |
| 83 src_stride, \ | |
| 84 dst, \ | |
| 85 dst_stride, \ | |
| 86 h, \ | |
| 87 filter); \ | |
| 88 src += 8; \ | |
| 89 dst += 8; \ | |
| 90 w -= 8; \ | |
| 91 } \ | |
| 92 while (w >= 4) { \ | |
| 93 vpx_filter_block1d4_##dir##2_##avg##opt(src, \ | |
| 94 src_stride, \ | |
| 95 dst, \ | |
| 96 dst_stride, \ | |
| 97 h, \ | |
| 98 filter); \ | |
| 99 src += 4; \ | |
| 100 dst += 4; \ | |
| 101 w -= 4; \ | |
| 102 } \ | |
| 103 } \ | 47 } \ |
| 104 } \ | 48 while (w >= 8) { \ |
| 105 if (w) { \ | 49 vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \ |
| 106 vpx_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ | 50 src_stride, \ |
| 107 filter_x, x_step_q4, filter_y, y_step_q4, \ | 51 dst, \ |
| 108 w, h); \ | 52 dst_stride, \ |
| 53 h, \ |
| 54 filter); \ |
| 55 src += 8; \ |
| 56 dst += 8; \ |
| 57 w -= 8; \ |
| 58 } \ |
| 59 while (w >= 4) { \ |
| 60 vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \ |
| 61 src_stride, \ |
| 62 dst, \ |
| 63 dst_stride, \ |
| 64 h, \ |
| 65 filter); \ |
| 66 src += 4; \ |
| 67 dst += 4; \ |
| 68 w -= 4; \ |
| 69 } \ |
| 70 } else { \ |
| 71 while (w >= 16) { \ |
| 72 vpx_filter_block1d16_##dir##2_##avg##opt(src, \ |
| 73 src_stride, \ |
| 74 dst, \ |
| 75 dst_stride, \ |
| 76 h, \ |
| 77 filter); \ |
| 78 src += 16; \ |
| 79 dst += 16; \ |
| 80 w -= 16; \ |
| 81 } \ |
| 82 while (w >= 8) { \ |
| 83 vpx_filter_block1d8_##dir##2_##avg##opt(src, \ |
| 84 src_stride, \ |
| 85 dst, \ |
| 86 dst_stride, \ |
| 87 h, \ |
| 88 filter); \ |
| 89 src += 8; \ |
| 90 dst += 8; \ |
| 91 w -= 8; \ |
| 92 } \ |
| 93 while (w >= 4) { \ |
| 94 vpx_filter_block1d4_##dir##2_##avg##opt(src, \ |
| 95 src_stride, \ |
| 96 dst, \ |
| 97 dst_stride, \ |
| 98 h, \ |
| 99 filter); \ |
| 100 src += 4; \ |
| 101 dst += 4; \ |
| 102 w -= 4; \ |
| 103 } \ |
| 109 } \ | 104 } \ |
| 110 } | 105 } |
| 111 | 106 |
| 112 #define FUN_CONV_2D(avg, opt) \ | 107 #define FUN_CONV_2D(avg, opt) \ |
| 113 void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ | 108 void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ |
| 114 uint8_t *dst, ptrdiff_t dst_stride, \ | 109 uint8_t *dst, ptrdiff_t dst_stride, \ |
| 115 const int16_t *filter_x, int x_step_q4, \ | 110 const int16_t *filter_x, int x_step_q4, \ |
| 116 const int16_t *filter_y, int y_step_q4, \ | 111 const int16_t *filter_y, int y_step_q4, \ |
| 117 int w, int h) { \ | 112 int w, int h) { \ |
| 113 assert(filter_x[3] != 128); \ |
| 114 assert(filter_y[3] != 128); \ |
| 118 assert(w <= 64); \ | 115 assert(w <= 64); \ |
| 119 assert(h <= 64); \ | 116 assert(h <= 64); \ |
| 120 if (x_step_q4 == 16 && y_step_q4 == 16) { \ | 117 assert(x_step_q4 == 16); \ |
| 121 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ | 118 assert(y_step_q4 == 16); \ |
| 122 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ | 119 if (filter_x[0] || filter_x[1] || filter_x[2]|| \ |
| 123 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ | 120 filter_y[0] || filter_y[1] || filter_y[2]) { \ |
| 124 vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ | 121 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ |
| 125 filter_x, x_step_q4, filter_y, y_step_q4, \ | 122 vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ |
| 126 w, h + 7); \ | 123 filter_x, x_step_q4, filter_y, y_step_q4, \ |
| 127 vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ | 124 w, h + 7); \ |
| 128 filter_x, x_step_q4, filter_y, \ | 125 vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ |
| 129 y_step_q4, w, h); \ | 126 filter_x, x_step_q4, filter_y, \ |
| 130 } else { \ | 127 y_step_q4, w, h); \ |
| 131 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ | |
| 132 vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ | |
| 133 filter_x, x_step_q4, filter_y, y_step_q4, \ | |
| 134 w, h + 1); \ | |
| 135 vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ | |
| 136 filter_x, x_step_q4, filter_y, \ | |
| 137 y_step_q4, w, h); \ | |
| 138 } \ | |
| 139 } else { \ | 128 } else { \ |
| 140 vpx_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ | 129 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ |
| 141 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ | 130 vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ |
| 131 filter_x, x_step_q4, filter_y, y_step_q4, \ |
| 132 w, h + 1); \ |
| 133 vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ |
| 134 filter_x, x_step_q4, filter_y, \ |
| 135 y_step_q4, w, h); \ |
| 142 } \ | 136 } \ |
| 143 } | 137 } |
| 144 | 138 |
| 145 #if CONFIG_VP9_HIGHBITDEPTH | 139 #if CONFIG_VP9_HIGHBITDEPTH |
| 146 | 140 |
| 147 typedef void highbd_filter8_1dfunction ( | 141 typedef void highbd_filter8_1dfunction ( |
| 148 const uint16_t *src_ptr, | 142 const uint16_t *src_ptr, |
| 149 const ptrdiff_t src_pitch, | 143 const ptrdiff_t src_pitch, |
| 150 uint16_t *output_ptr, | 144 uint16_t *output_ptr, |
| 151 ptrdiff_t out_pitch, | 145 ptrdiff_t out_pitch, |
| (...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 287 } \ | 281 } \ |
| 288 } else { \ | 282 } else { \ |
| 289 vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ | 283 vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ |
| 290 filter_x, x_step_q4, filter_y, y_step_q4, w, \ | 284 filter_x, x_step_q4, filter_y, y_step_q4, w, \ |
| 291 h, bd); \ | 285 h, bd); \ |
| 292 } \ | 286 } \ |
| 293 } | 287 } |
| 294 #endif // CONFIG_VP9_HIGHBITDEPTH | 288 #endif // CONFIG_VP9_HIGHBITDEPTH |
| 295 | 289 |
| 296 #endif // VPX_DSP_X86_CONVOLVE_H_ | 290 #endif // VPX_DSP_X86_CONVOLVE_H_ |
| OLD | NEW |