OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 #ifndef VPX_DSP_X86_CONVOLVE_H_ | 10 #ifndef VPX_DSP_X86_CONVOLVE_H_ |
(...skipping 13 matching lines...) Expand all Loading... |
24 uint32_t output_height, | 24 uint32_t output_height, |
25 const int16_t *filter | 25 const int16_t *filter |
26 ); | 26 ); |
27 | 27 |
28 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ | 28 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ |
29 void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ | 29 void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ |
30 uint8_t *dst, ptrdiff_t dst_stride, \ | 30 uint8_t *dst, ptrdiff_t dst_stride, \ |
31 const int16_t *filter_x, int x_step_q4, \ | 31 const int16_t *filter_x, int x_step_q4, \ |
32 const int16_t *filter_y, int y_step_q4, \ | 32 const int16_t *filter_y, int y_step_q4, \ |
33 int w, int h) { \ | 33 int w, int h) { \ |
34 if (step_q4 == 16 && filter[3] != 128) { \ | 34 assert(filter[3] != 128); \ |
35 if (filter[0] || filter[1] || filter[2]) { \ | 35 assert(step_q4 == 16); \ |
36 while (w >= 16) { \ | 36 if (filter[0] || filter[1] || filter[2]) { \ |
37 vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \ | 37 while (w >= 16) { \ |
38 src_stride, \ | 38 vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \ |
39 dst, \ | 39 src_stride, \ |
40 dst_stride, \ | 40 dst, \ |
41 h, \ | 41 dst_stride, \ |
42 filter); \ | 42 h, \ |
43 src += 16; \ | 43 filter); \ |
44 dst += 16; \ | 44 src += 16; \ |
45 w -= 16; \ | 45 dst += 16; \ |
46 } \ | 46 w -= 16; \ |
47 while (w >= 8) { \ | |
48 vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \ | |
49 src_stride, \ | |
50 dst, \ | |
51 dst_stride, \ | |
52 h, \ | |
53 filter); \ | |
54 src += 8; \ | |
55 dst += 8; \ | |
56 w -= 8; \ | |
57 } \ | |
58 while (w >= 4) { \ | |
59 vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \ | |
60 src_stride, \ | |
61 dst, \ | |
62 dst_stride, \ | |
63 h, \ | |
64 filter); \ | |
65 src += 4; \ | |
66 dst += 4; \ | |
67 w -= 4; \ | |
68 } \ | |
69 } else { \ | |
70 while (w >= 16) { \ | |
71 vpx_filter_block1d16_##dir##2_##avg##opt(src, \ | |
72 src_stride, \ | |
73 dst, \ | |
74 dst_stride, \ | |
75 h, \ | |
76 filter); \ | |
77 src += 16; \ | |
78 dst += 16; \ | |
79 w -= 16; \ | |
80 } \ | |
81 while (w >= 8) { \ | |
82 vpx_filter_block1d8_##dir##2_##avg##opt(src, \ | |
83 src_stride, \ | |
84 dst, \ | |
85 dst_stride, \ | |
86 h, \ | |
87 filter); \ | |
88 src += 8; \ | |
89 dst += 8; \ | |
90 w -= 8; \ | |
91 } \ | |
92 while (w >= 4) { \ | |
93 vpx_filter_block1d4_##dir##2_##avg##opt(src, \ | |
94 src_stride, \ | |
95 dst, \ | |
96 dst_stride, \ | |
97 h, \ | |
98 filter); \ | |
99 src += 4; \ | |
100 dst += 4; \ | |
101 w -= 4; \ | |
102 } \ | |
103 } \ | 47 } \ |
104 } \ | 48 while (w >= 8) { \ |
105 if (w) { \ | 49 vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \ |
106 vpx_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ | 50 src_stride, \ |
107 filter_x, x_step_q4, filter_y, y_step_q4, \ | 51 dst, \ |
108 w, h); \ | 52 dst_stride, \ |
| 53 h, \ |
| 54 filter); \ |
| 55 src += 8; \ |
| 56 dst += 8; \ |
| 57 w -= 8; \ |
| 58 } \ |
| 59 while (w >= 4) { \ |
| 60 vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \ |
| 61 src_stride, \ |
| 62 dst, \ |
| 63 dst_stride, \ |
| 64 h, \ |
| 65 filter); \ |
| 66 src += 4; \ |
| 67 dst += 4; \ |
| 68 w -= 4; \ |
| 69 } \ |
| 70 } else { \ |
| 71 while (w >= 16) { \ |
| 72 vpx_filter_block1d16_##dir##2_##avg##opt(src, \ |
| 73 src_stride, \ |
| 74 dst, \ |
| 75 dst_stride, \ |
| 76 h, \ |
| 77 filter); \ |
| 78 src += 16; \ |
| 79 dst += 16; \ |
| 80 w -= 16; \ |
| 81 } \ |
| 82 while (w >= 8) { \ |
| 83 vpx_filter_block1d8_##dir##2_##avg##opt(src, \ |
| 84 src_stride, \ |
| 85 dst, \ |
| 86 dst_stride, \ |
| 87 h, \ |
| 88 filter); \ |
| 89 src += 8; \ |
| 90 dst += 8; \ |
| 91 w -= 8; \ |
| 92 } \ |
| 93 while (w >= 4) { \ |
| 94 vpx_filter_block1d4_##dir##2_##avg##opt(src, \ |
| 95 src_stride, \ |
| 96 dst, \ |
| 97 dst_stride, \ |
| 98 h, \ |
| 99 filter); \ |
| 100 src += 4; \ |
| 101 dst += 4; \ |
| 102 w -= 4; \ |
| 103 } \ |
109 } \ | 104 } \ |
110 } | 105 } |
111 | 106 |
112 #define FUN_CONV_2D(avg, opt) \ | 107 #define FUN_CONV_2D(avg, opt) \ |
113 void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ | 108 void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ |
114 uint8_t *dst, ptrdiff_t dst_stride, \ | 109 uint8_t *dst, ptrdiff_t dst_stride, \ |
115 const int16_t *filter_x, int x_step_q4, \ | 110 const int16_t *filter_x, int x_step_q4, \ |
116 const int16_t *filter_y, int y_step_q4, \ | 111 const int16_t *filter_y, int y_step_q4, \ |
117 int w, int h) { \ | 112 int w, int h) { \ |
| 113 assert(filter_x[3] != 128); \ |
| 114 assert(filter_y[3] != 128); \ |
118 assert(w <= 64); \ | 115 assert(w <= 64); \ |
119 assert(h <= 64); \ | 116 assert(h <= 64); \ |
120 if (x_step_q4 == 16 && y_step_q4 == 16) { \ | 117 assert(x_step_q4 == 16); \ |
121 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ | 118 assert(y_step_q4 == 16); \ |
122 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ | 119 if (filter_x[0] || filter_x[1] || filter_x[2]|| \ |
123 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ | 120 filter_y[0] || filter_y[1] || filter_y[2]) { \ |
124 vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ | 121 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ |
125 filter_x, x_step_q4, filter_y, y_step_q4, \ | 122 vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ |
126 w, h + 7); \ | 123 filter_x, x_step_q4, filter_y, y_step_q4, \ |
127 vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ | 124 w, h + 7); \ |
128 filter_x, x_step_q4, filter_y, \ | 125 vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ |
129 y_step_q4, w, h); \ | 126 filter_x, x_step_q4, filter_y, \ |
130 } else { \ | 127 y_step_q4, w, h); \ |
131 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ | |
132 vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ | |
133 filter_x, x_step_q4, filter_y, y_step_q4, \ | |
134 w, h + 1); \ | |
135 vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ | |
136 filter_x, x_step_q4, filter_y, \ | |
137 y_step_q4, w, h); \ | |
138 } \ | |
139 } else { \ | 128 } else { \ |
140 vpx_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ | 129 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ |
141 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ | 130 vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ |
| 131 filter_x, x_step_q4, filter_y, y_step_q4, \ |
| 132 w, h + 1); \ |
| 133 vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ |
| 134 filter_x, x_step_q4, filter_y, \ |
| 135 y_step_q4, w, h); \ |
142 } \ | 136 } \ |
143 } | 137 } |
144 | 138 |
145 #if CONFIG_VP9_HIGHBITDEPTH | 139 #if CONFIG_VP9_HIGHBITDEPTH |
146 | 140 |
147 typedef void highbd_filter8_1dfunction ( | 141 typedef void highbd_filter8_1dfunction ( |
148 const uint16_t *src_ptr, | 142 const uint16_t *src_ptr, |
149 const ptrdiff_t src_pitch, | 143 const ptrdiff_t src_pitch, |
150 uint16_t *output_ptr, | 144 uint16_t *output_ptr, |
151 ptrdiff_t out_pitch, | 145 ptrdiff_t out_pitch, |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
287 } \ | 281 } \ |
288 } else { \ | 282 } else { \ |
289 vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ | 283 vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ |
290 filter_x, x_step_q4, filter_y, y_step_q4, w, \ | 284 filter_x, x_step_q4, filter_y, y_step_q4, w, \ |
291 h, bd); \ | 285 h, bd); \ |
292 } \ | 286 } \ |
293 } | 287 } |
294 #endif // CONFIG_VP9_HIGHBITDEPTH | 288 #endif // CONFIG_VP9_HIGHBITDEPTH |
295 | 289 |
296 #endif // VPX_DSP_X86_CONVOLVE_H_ | 290 #endif // VPX_DSP_X86_CONVOLVE_H_ |
OLD | NEW |