OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 #include "vp9/common/vp9_convolve.h" | 10 #include "vp9/common/vp9_convolve.h" |
11 | 11 |
12 #include <assert.h> | 12 #include <assert.h> |
13 | 13 |
14 #include "./vpx_config.h" | 14 #include "./vpx_config.h" |
15 #include "./vp9_rtcd.h" | 15 #include "./vp9_rtcd.h" |
16 #include "vp9/common/vp9_common.h" | 16 #include "vp9/common/vp9_common.h" |
| 17 #include "vp9/common/vp9_filter.h" |
17 #include "vpx/vpx_integer.h" | 18 #include "vpx/vpx_integer.h" |
18 #include "vpx_ports/mem.h" | 19 #include "vpx_ports/mem.h" |
19 | 20 |
20 #define VP9_FILTER_WEIGHT 128 | |
21 #define VP9_FILTER_SHIFT 7 | |
22 | |
23 /* Assume a bank of 16 filters to choose from. There are two implementations | |
24 * for filter wrapping behavior, since we want to be able to pick which filter | |
25 * to start with. We could either: | |
26 * | |
27 * 1) make filter_ a pointer to the base of the filter array, and then add an | |
28 * additional offset parameter, to choose the starting filter. | |
29 * 2) use a pointer to 2 periods worth of filters, so that even if the original | |
30 * phase offset is at 15/16, we'll have valid data to read. The filter | |
31 * tables become [32][8], and the second half is duplicated. | |
32 * 3) fix the alignment of the filter tables, so that we know the 0/16 is | |
33 * always 256 byte aligned. | |
34 * | |
35 * Implementations 2 and 3 are likely preferable, as they avoid an extra 2 | |
36 * parameters, and switching between them is trivial, with the | |
37 * ALIGN_FILTERS_256 macro, below. | |
38 */ | |
39 #define ALIGN_FILTERS_256 1 | |
40 | |
41 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 21 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
42 uint8_t *dst, ptrdiff_t dst_stride, | 22 uint8_t *dst, ptrdiff_t dst_stride, |
43 const int16_t *filter_x0, int x_step_q4, | 23 const int16_t *filter_x0, int x_step_q4, |
44 const int16_t *filter_y, int y_step_q4, | 24 const int16_t *filter_y, int y_step_q4, |
45 int w, int h, int taps) { | 25 int w, int h, int taps) { |
46 int x, y, k, sum; | 26 int x, y, k; |
47 const int16_t *filter_x_base = filter_x0; | |
48 | 27 |
49 #if ALIGN_FILTERS_256 | 28 /* NOTE: This assumes that the filter table is 256-byte aligned. */ |
50 filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); | 29 /* TODO(agrange) Modify to make independent of table alignment. */ |
51 #endif | 30 const int16_t *const filter_x_base = |
| 31 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); |
52 | 32 |
53 /* Adjust base pointer address for this source line */ | 33 /* Adjust base pointer address for this source line */ |
54 src -= taps / 2 - 1; | 34 src -= taps / 2 - 1; |
55 | 35 |
56 for (y = 0; y < h; ++y) { | 36 for (y = 0; y < h; ++y) { |
57 /* Pointer to filter to use */ | |
58 const int16_t *filter_x = filter_x0; | |
59 | |
60 /* Initial phase offset */ | 37 /* Initial phase offset */ |
61 int x0_q4 = (filter_x - filter_x_base) / taps; | 38 int x_q4 = (filter_x0 - filter_x_base) / taps; |
62 int x_q4 = x0_q4; | |
63 | 39 |
64 for (x = 0; x < w; ++x) { | 40 for (x = 0; x < w; ++x) { |
65 /* Per-pixel src offset */ | 41 /* Per-pixel src offset */ |
66 int src_x = (x_q4 - x0_q4) >> 4; | 42 const int src_x = x_q4 >> SUBPEL_BITS; |
| 43 int sum = 0; |
67 | 44 |
68 for (sum = 0, k = 0; k < taps; ++k) { | 45 /* Pointer to filter to use */ |
| 46 const int16_t *const filter_x = filter_x_base + |
| 47 (x_q4 & SUBPEL_MASK) * taps; |
| 48 |
| 49 for (k = 0; k < taps; ++k) |
69 sum += src[src_x + k] * filter_x[k]; | 50 sum += src[src_x + k] * filter_x[k]; |
70 } | |
71 sum += (VP9_FILTER_WEIGHT >> 1); | |
72 dst[x] = clip_pixel(sum >> VP9_FILTER_SHIFT); | |
73 | 51 |
74 /* Adjust source and filter to use for the next pixel */ | 52 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
| 53 |
| 54 /* Move to the next source pixel */ |
75 x_q4 += x_step_q4; | 55 x_q4 += x_step_q4; |
76 filter_x = filter_x_base + (x_q4 & 0xf) * taps; | |
77 } | 56 } |
78 src += src_stride; | 57 src += src_stride; |
79 dst += dst_stride; | 58 dst += dst_stride; |
80 } | 59 } |
81 } | 60 } |
82 | 61 |
83 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 62 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
84 uint8_t *dst, ptrdiff_t dst_stride, | 63 uint8_t *dst, ptrdiff_t dst_stride, |
85 const int16_t *filter_x0, int x_step_q4, | 64 const int16_t *filter_x0, int x_step_q4, |
86 const int16_t *filter_y, int y_step_q4, | 65 const int16_t *filter_y, int y_step_q4, |
87 int w, int h, int taps) { | 66 int w, int h, int taps) { |
88 int x, y, k, sum; | 67 int x, y, k; |
89 const int16_t *filter_x_base = filter_x0; | |
90 | 68 |
91 #if ALIGN_FILTERS_256 | 69 /* NOTE: This assumes that the filter table is 256-byte aligned. */ |
92 filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); | 70 /* TODO(agrange) Modify to make independent of table alignment. */ |
93 #endif | 71 const int16_t *const filter_x_base = |
| 72 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); |
94 | 73 |
95 /* Adjust base pointer address for this source line */ | 74 /* Adjust base pointer address for this source line */ |
96 src -= taps / 2 - 1; | 75 src -= taps / 2 - 1; |
97 | 76 |
98 for (y = 0; y < h; ++y) { | 77 for (y = 0; y < h; ++y) { |
99 /* Pointer to filter to use */ | |
100 const int16_t *filter_x = filter_x0; | |
101 | |
102 /* Initial phase offset */ | 78 /* Initial phase offset */ |
103 int x0_q4 = (filter_x - filter_x_base) / taps; | 79 int x_q4 = (filter_x0 - filter_x_base) / taps; |
104 int x_q4 = x0_q4; | |
105 | 80 |
106 for (x = 0; x < w; ++x) { | 81 for (x = 0; x < w; ++x) { |
107 /* Per-pixel src offset */ | 82 /* Per-pixel src offset */ |
108 int src_x = (x_q4 - x0_q4) >> 4; | 83 const int src_x = x_q4 >> SUBPEL_BITS; |
| 84 int sum = 0; |
109 | 85 |
110 for (sum = 0, k = 0; k < taps; ++k) { | 86 /* Pointer to filter to use */ |
| 87 const int16_t *const filter_x = filter_x_base + |
| 88 (x_q4 & SUBPEL_MASK) * taps; |
| 89 |
| 90 for (k = 0; k < taps; ++k) |
111 sum += src[src_x + k] * filter_x[k]; | 91 sum += src[src_x + k] * filter_x[k]; |
112 } | |
113 sum += (VP9_FILTER_WEIGHT >> 1); | |
114 dst[x] = (dst[x] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1; | |
115 | 92 |
116 /* Adjust source and filter to use for the next pixel */ | 93 dst[x] = ROUND_POWER_OF_TWO(dst[x] + |
| 94 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); |
| 95 |
| 96 /* Move to the next source pixel */ |
117 x_q4 += x_step_q4; | 97 x_q4 += x_step_q4; |
118 filter_x = filter_x_base + (x_q4 & 0xf) * taps; | |
119 } | 98 } |
120 src += src_stride; | 99 src += src_stride; |
121 dst += dst_stride; | 100 dst += dst_stride; |
122 } | 101 } |
123 } | 102 } |
124 | 103 |
125 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 104 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
126 uint8_t *dst, ptrdiff_t dst_stride, | 105 uint8_t *dst, ptrdiff_t dst_stride, |
127 const int16_t *filter_x, int x_step_q4, | 106 const int16_t *filter_x, int x_step_q4, |
128 const int16_t *filter_y0, int y_step_q4, | 107 const int16_t *filter_y0, int y_step_q4, |
129 int w, int h, int taps) { | 108 int w, int h, int taps) { |
130 int x, y, k, sum; | 109 int x, y, k; |
131 | 110 |
132 const int16_t *filter_y_base = filter_y0; | 111 /* NOTE: This assumes that the filter table is 256-byte aligned. */ |
133 | 112 /* TODO(agrange) Modify to make independent of table alignment. */ |
134 #if ALIGN_FILTERS_256 | 113 const int16_t *const filter_y_base = |
135 filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); | 114 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); |
136 #endif | |
137 | 115 |
138 /* Adjust base pointer address for this source column */ | 116 /* Adjust base pointer address for this source column */ |
139 src -= src_stride * (taps / 2 - 1); | 117 src -= src_stride * (taps / 2 - 1); |
| 118 |
140 for (x = 0; x < w; ++x) { | 119 for (x = 0; x < w; ++x) { |
141 /* Pointer to filter to use */ | |
142 const int16_t *filter_y = filter_y0; | |
143 | |
144 /* Initial phase offset */ | 120 /* Initial phase offset */ |
145 int y0_q4 = (filter_y - filter_y_base) / taps; | 121 int y_q4 = (filter_y0 - filter_y_base) / taps; |
146 int y_q4 = y0_q4; | |
147 | 122 |
148 for (y = 0; y < h; ++y) { | 123 for (y = 0; y < h; ++y) { |
149 /* Per-pixel src offset */ | 124 /* Per-pixel src offset */ |
150 int src_y = (y_q4 - y0_q4) >> 4; | 125 const int src_y = y_q4 >> SUBPEL_BITS; |
| 126 int sum = 0; |
151 | 127 |
152 for (sum = 0, k = 0; k < taps; ++k) { | 128 /* Pointer to filter to use */ |
| 129 const int16_t *const filter_y = filter_y_base + |
| 130 (y_q4 & SUBPEL_MASK) * taps; |
| 131 |
| 132 for (k = 0; k < taps; ++k) |
153 sum += src[(src_y + k) * src_stride] * filter_y[k]; | 133 sum += src[(src_y + k) * src_stride] * filter_y[k]; |
154 } | |
155 sum += (VP9_FILTER_WEIGHT >> 1); | |
156 dst[y * dst_stride] = clip_pixel(sum >> VP9_FILTER_SHIFT); | |
157 | 134 |
158 /* Adjust source and filter to use for the next pixel */ | 135 dst[y * dst_stride] = |
| 136 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
| 137 |
| 138 /* Move to the next source pixel */ |
159 y_q4 += y_step_q4; | 139 y_q4 += y_step_q4; |
160 filter_y = filter_y_base + (y_q4 & 0xf) * taps; | |
161 } | 140 } |
162 ++src; | 141 ++src; |
163 ++dst; | 142 ++dst; |
164 } | 143 } |
165 } | 144 } |
166 | 145 |
167 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 146 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
168 uint8_t *dst, ptrdiff_t dst_stride, | 147 uint8_t *dst, ptrdiff_t dst_stride, |
169 const int16_t *filter_x, int x_step_q4, | 148 const int16_t *filter_x, int x_step_q4, |
170 const int16_t *filter_y0, int y_step_q4, | 149 const int16_t *filter_y0, int y_step_q4, |
171 int w, int h, int taps) { | 150 int w, int h, int taps) { |
172 int x, y, k, sum; | 151 int x, y, k; |
173 | 152 |
174 const int16_t *filter_y_base = filter_y0; | 153 /* NOTE: This assumes that the filter table is 256-byte aligned. */ |
175 | 154 /* TODO(agrange) Modify to make independent of table alignment. */ |
176 #if ALIGN_FILTERS_256 | 155 const int16_t *const filter_y_base = |
177 filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); | 156 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); |
178 #endif | |
179 | 157 |
180 /* Adjust base pointer address for this source column */ | 158 /* Adjust base pointer address for this source column */ |
181 src -= src_stride * (taps / 2 - 1); | 159 src -= src_stride * (taps / 2 - 1); |
| 160 |
182 for (x = 0; x < w; ++x) { | 161 for (x = 0; x < w; ++x) { |
183 /* Pointer to filter to use */ | |
184 const int16_t *filter_y = filter_y0; | |
185 | |
186 /* Initial phase offset */ | 162 /* Initial phase offset */ |
187 int y0_q4 = (filter_y - filter_y_base) / taps; | 163 int y_q4 = (filter_y0 - filter_y_base) / taps; |
188 int y_q4 = y0_q4; | |
189 | 164 |
190 for (y = 0; y < h; ++y) { | 165 for (y = 0; y < h; ++y) { |
191 /* Per-pixel src offset */ | 166 /* Per-pixel src offset */ |
192 int src_y = (y_q4 - y0_q4) >> 4; | 167 const int src_y = y_q4 >> SUBPEL_BITS; |
| 168 int sum = 0; |
193 | 169 |
194 for (sum = 0, k = 0; k < taps; ++k) { | 170 /* Pointer to filter to use */ |
| 171 const int16_t *const filter_y = filter_y_base + |
| 172 (y_q4 & SUBPEL_MASK) * taps; |
| 173 |
| 174 for (k = 0; k < taps; ++k) |
195 sum += src[(src_y + k) * src_stride] * filter_y[k]; | 175 sum += src[(src_y + k) * src_stride] * filter_y[k]; |
196 } | |
197 sum += (VP9_FILTER_WEIGHT >> 1); | |
198 dst[y * dst_stride] = | |
199 (dst[y * dst_stride] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1; | |
200 | 176 |
201 /* Adjust source and filter to use for the next pixel */ | 177 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + |
| 178 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); |
| 179 |
| 180 /* Move to the next source pixel */ |
202 y_q4 += y_step_q4; | 181 y_q4 += y_step_q4; |
203 filter_y = filter_y_base + (y_q4 & 0xf) * taps; | |
204 } | 182 } |
205 ++src; | 183 ++src; |
206 ++dst; | 184 ++dst; |
207 } | 185 } |
208 } | 186 } |
209 | 187 |
210 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride, | 188 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride, |
211 uint8_t *dst, ptrdiff_t dst_stride, | 189 uint8_t *dst, ptrdiff_t dst_stride, |
212 const int16_t *filter_x, int x_step_q4, | 190 const int16_t *filter_x, int x_step_q4, |
213 const int16_t *filter_y, int y_step_q4, | 191 const int16_t *filter_y, int y_step_q4, |
214 int w, int h, int taps) { | 192 int w, int h, int taps) { |
215 /* Fixed size intermediate buffer places limits on parameters. | 193 /* Fixed size intermediate buffer places limits on parameters. |
216 * Maximum intermediate_height is 135, for y_step_q4 == 32, | 194 * Maximum intermediate_height is 135, for y_step_q4 == 32, |
217 * h == 64, taps == 8. | 195 * h == 64, taps == 8. |
218 */ | 196 */ |
219 uint8_t temp[64 * 135]; | 197 uint8_t temp[64 * 135]; |
220 int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1; | 198 int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1; |
221 | 199 |
222 assert(w <= 64); | 200 assert(w <= 64); |
223 assert(h <= 64); | 201 assert(h <= 64); |
224 assert(taps <= 8); | 202 assert(taps <= 8); |
225 assert(y_step_q4 <= 32); | 203 assert(y_step_q4 <= 32); |
226 assert(x_step_q4 <= 32); | 204 assert(x_step_q4 <= 32); |
227 | 205 |
228 if (intermediate_height < h) | 206 if (intermediate_height < h) |
229 intermediate_height = h; | 207 intermediate_height = h; |
230 | 208 |
231 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, | 209 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64, |
232 temp, 64, | 210 filter_x, x_step_q4, filter_y, y_step_q4, w, |
233 filter_x, x_step_q4, filter_y, y_step_q4, | 211 intermediate_height, taps); |
234 w, intermediate_height, taps); | 212 convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x, |
235 convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, | 213 x_step_q4, filter_y, y_step_q4, w, h, taps); |
236 filter_x, x_step_q4, filter_y, y_step_q4, | |
237 w, h, taps); | |
238 } | |
239 | |
240 static void convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, | |
241 uint8_t *dst, ptrdiff_t dst_stride, | |
242 const int16_t *filter_x, int x_step_q4, | |
243 const int16_t *filter_y, int y_step_q4, | |
244 int w, int h, int taps) { | |
245 /* Fixed size intermediate buffer places limits on parameters. | |
246 * Maximum intermediate_height is 135, for y_step_q4 == 32, | |
247 * h == 64, taps == 8. | |
248 */ | |
249 uint8_t temp[64 * 135]; | |
250 int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1; | |
251 | |
252 assert(w <= 64); | |
253 assert(h <= 64); | |
254 assert(taps <= 8); | |
255 assert(y_step_q4 <= 32); | |
256 assert(x_step_q4 <= 32); | |
257 | |
258 if (intermediate_height < h) | |
259 intermediate_height = h; | |
260 | |
261 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, | |
262 temp, 64, | |
263 filter_x, x_step_q4, filter_y, y_step_q4, | |
264 w, intermediate_height, taps); | |
265 convolve_avg_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, | |
266 filter_x, x_step_q4, filter_y, y_step_q4, | |
267 w, h, taps); | |
268 } | 214 } |
269 | 215 |
270 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 216 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
271 uint8_t *dst, ptrdiff_t dst_stride, | 217 uint8_t *dst, ptrdiff_t dst_stride, |
272 const int16_t *filter_x, int x_step_q4, | 218 const int16_t *filter_x, int x_step_q4, |
273 const int16_t *filter_y, int y_step_q4, | 219 const int16_t *filter_y, int y_step_q4, |
274 int w, int h) { | 220 int w, int h) { |
275 convolve_horiz_c(src, src_stride, dst, dst_stride, | 221 convolve_horiz_c(src, src_stride, dst, dst_stride, |
276 filter_x, x_step_q4, filter_y, y_step_q4, | 222 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); |
277 w, h, 8); | |
278 } | 223 } |
279 | 224 |
280 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 225 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
281 uint8_t *dst, ptrdiff_t dst_stride, | 226 uint8_t *dst, ptrdiff_t dst_stride, |
282 const int16_t *filter_x, int x_step_q4, | 227 const int16_t *filter_x, int x_step_q4, |
283 const int16_t *filter_y, int y_step_q4, | 228 const int16_t *filter_y, int y_step_q4, |
284 int w, int h) { | 229 int w, int h) { |
285 convolve_avg_horiz_c(src, src_stride, dst, dst_stride, | 230 convolve_avg_horiz_c(src, src_stride, dst, dst_stride, |
286 filter_x, x_step_q4, filter_y, y_step_q4, | 231 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); |
287 w, h, 8); | |
288 } | 232 } |
289 | 233 |
290 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 234 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
291 uint8_t *dst, ptrdiff_t dst_stride, | 235 uint8_t *dst, ptrdiff_t dst_stride, |
292 const int16_t *filter_x, int x_step_q4, | 236 const int16_t *filter_x, int x_step_q4, |
293 const int16_t *filter_y, int y_step_q4, | 237 const int16_t *filter_y, int y_step_q4, |
294 int w, int h) { | 238 int w, int h) { |
295 convolve_vert_c(src, src_stride, dst, dst_stride, | 239 convolve_vert_c(src, src_stride, dst, dst_stride, |
296 filter_x, x_step_q4, filter_y, y_step_q4, | 240 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); |
297 w, h, 8); | |
298 } | 241 } |
299 | 242 |
300 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 243 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
301 uint8_t *dst, ptrdiff_t dst_stride, | 244 uint8_t *dst, ptrdiff_t dst_stride, |
302 const int16_t *filter_x, int x_step_q4, | 245 const int16_t *filter_x, int x_step_q4, |
303 const int16_t *filter_y, int y_step_q4, | 246 const int16_t *filter_y, int y_step_q4, |
304 int w, int h) { | 247 int w, int h) { |
305 convolve_avg_vert_c(src, src_stride, dst, dst_stride, | 248 convolve_avg_vert_c(src, src_stride, dst, dst_stride, |
306 filter_x, x_step_q4, filter_y, y_step_q4, | 249 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); |
307 w, h, 8); | |
308 } | 250 } |
309 | 251 |
310 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, | 252 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, |
311 uint8_t *dst, ptrdiff_t dst_stride, | 253 uint8_t *dst, ptrdiff_t dst_stride, |
312 const int16_t *filter_x, int x_step_q4, | 254 const int16_t *filter_x, int x_step_q4, |
313 const int16_t *filter_y, int y_step_q4, | 255 const int16_t *filter_y, int y_step_q4, |
314 int w, int h) { | 256 int w, int h) { |
315 convolve_c(src, src_stride, dst, dst_stride, | 257 convolve_c(src, src_stride, dst, dst_stride, |
316 filter_x, x_step_q4, filter_y, y_step_q4, | 258 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); |
317 w, h, 8); | |
318 } | 259 } |
319 | 260 |
320 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, | 261 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, |
321 uint8_t *dst, ptrdiff_t dst_stride, | 262 uint8_t *dst, ptrdiff_t dst_stride, |
322 const int16_t *filter_x, int x_step_q4, | 263 const int16_t *filter_x, int x_step_q4, |
323 const int16_t *filter_y, int y_step_q4, | 264 const int16_t *filter_y, int y_step_q4, |
324 int w, int h) { | 265 int w, int h) { |
325 /* Fixed size intermediate buffer places limits on parameters. */ | 266 /* Fixed size intermediate buffer places limits on parameters. */ |
326 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64); | 267 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64); |
327 assert(w <= 64); | 268 assert(w <= 64); |
328 assert(h <= 64); | 269 assert(h <= 64); |
329 | 270 |
330 vp9_convolve8(src, src_stride, | 271 vp9_convolve8(src, src_stride, temp, 64, |
331 temp, 64, | 272 filter_x, x_step_q4, filter_y, y_step_q4, w, h); |
332 filter_x, x_step_q4, | 273 vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h); |
333 filter_y, y_step_q4, | |
334 w, h); | |
335 vp9_convolve_avg(temp, 64, | |
336 dst, dst_stride, | |
337 NULL, 0, /* These unused parameter should be removed! */ | |
338 NULL, 0, /* These unused parameter should be removed! */ | |
339 w, h); | |
340 } | 274 } |
341 | 275 |
342 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, | 276 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, |
343 uint8_t *dst, ptrdiff_t dst_stride, | 277 uint8_t *dst, ptrdiff_t dst_stride, |
344 const int16_t *filter_x, int filter_x_stride, | 278 const int16_t *filter_x, int filter_x_stride, |
345 const int16_t *filter_y, int filter_y_stride, | 279 const int16_t *filter_y, int filter_y_stride, |
346 int w, int h) { | 280 int w, int h) { |
347 int r; | 281 int r; |
348 | 282 |
349 for (r = h; r > 0; --r) { | 283 for (r = h; r > 0; --r) { |
350 memcpy(dst, src, w); | 284 memcpy(dst, src, w); |
351 src += src_stride; | 285 src += src_stride; |
352 dst += dst_stride; | 286 dst += dst_stride; |
353 } | 287 } |
354 } | 288 } |
355 | 289 |
356 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, | 290 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, |
357 uint8_t *dst, ptrdiff_t dst_stride, | 291 uint8_t *dst, ptrdiff_t dst_stride, |
358 const int16_t *filter_x, int filter_x_stride, | 292 const int16_t *filter_x, int filter_x_stride, |
359 const int16_t *filter_y, int filter_y_stride, | 293 const int16_t *filter_y, int filter_y_stride, |
360 int w, int h) { | 294 int w, int h) { |
361 int x, y; | 295 int x, y; |
362 | 296 |
363 for (y = 0; y < h; ++y) { | 297 for (y = 0; y < h; ++y) { |
364 for (x = 0; x < w; ++x) { | 298 for (x = 0; x < w; ++x) |
365 dst[x] = (dst[x] + src[x] + 1) >> 1; | 299 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); |
366 } | 300 |
367 src += src_stride; | 301 src += src_stride; |
368 dst += dst_stride; | 302 dst += dst_stride; |
369 } | 303 } |
370 } | 304 } |
OLD | NEW |