Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(227)

Side by Side Diff: source/libvpx/vp9/common/vp9_convolve.c

Issue 111463005: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <assert.h> 11 #include <assert.h>
12 12
13 #include "./vpx_config.h" 13 #include "./vpx_config.h"
14 #include "./vp9_rtcd.h" 14 #include "./vp9_rtcd.h"
15 #include "vp9/common/vp9_common.h" 15 #include "vp9/common/vp9_common.h"
16 #include "vp9/common/vp9_convolve.h" 16 #include "vp9/common/vp9_convolve.h"
17 #include "vp9/common/vp9_filter.h" 17 #include "vp9/common/vp9_filter.h"
18 #include "vpx/vpx_integer.h" 18 #include "vpx/vpx_integer.h"
19 #include "vpx_ports/mem.h" 19 #include "vpx_ports/mem.h"
20 20
21 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride, 21 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
22 uint8_t *dst, ptrdiff_t dst_stride, 22 uint8_t *dst, ptrdiff_t dst_stride,
23 const int16_t *filter_x0, int x_step_q4, 23 const subpel_kernel *x_filters,
24 const int16_t *filter_y, int y_step_q4, 24 int x0_q4, int x_step_q4, int w, int h) {
25 int w, int h, int taps) { 25 int x, y;
26 int x, y, k; 26 src -= SUBPEL_TAPS / 2 - 1;
27
28 /* NOTE: This assumes that the filter table is 256-byte aligned. */
29 /* TODO(agrange) Modify to make independent of table alignment. */
30 const int16_t *const filter_x_base =
31 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
32
33 /* Adjust base pointer address for this source line */
34 src -= taps / 2 - 1;
35
36 for (y = 0; y < h; ++y) { 27 for (y = 0; y < h; ++y) {
37 /* Initial phase offset */ 28 int x_q4 = x0_q4;
38 int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
39
40 for (x = 0; x < w; ++x) { 29 for (x = 0; x < w; ++x) {
41 /* Per-pixel src offset */ 30 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
42 const int src_x = x_q4 >> SUBPEL_BITS; 31 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
43 int sum = 0; 32 int k, sum = 0;
44 33 for (k = 0; k < SUBPEL_TAPS; ++k)
45 /* Pointer to filter to use */ 34 sum += src_x[k] * x_filter[k];
46 const int16_t *const filter_x = filter_x_base +
47 (x_q4 & SUBPEL_MASK) * taps;
48
49 for (k = 0; k < taps; ++k)
50 sum += src[src_x + k] * filter_x[k];
51
52 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); 35 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
53
54 /* Move to the next source pixel */
55 x_q4 += x_step_q4; 36 x_q4 += x_step_q4;
56 } 37 }
57 src += src_stride; 38 src += src_stride;
58 dst += dst_stride; 39 dst += dst_stride;
59 } 40 }
60 } 41 }
61 42
62 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, 43 static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
63 uint8_t *dst, ptrdiff_t dst_stride, 44 uint8_t *dst, ptrdiff_t dst_stride,
64 const int16_t *filter_x0, int x_step_q4, 45 const subpel_kernel *x_filters,
65 const int16_t *filter_y, int y_step_q4, 46 int x0_q4, int x_step_q4, int w, int h) {
66 int w, int h, int taps) { 47 int x, y;
67 int x, y, k; 48 src -= SUBPEL_TAPS / 2 - 1;
68
69 /* NOTE: This assumes that the filter table is 256-byte aligned. */
70 /* TODO(agrange) Modify to make independent of table alignment. */
71 const int16_t *const filter_x_base =
72 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
73
74 /* Adjust base pointer address for this source line */
75 src -= taps / 2 - 1;
76
77 for (y = 0; y < h; ++y) { 49 for (y = 0; y < h; ++y) {
78 /* Initial phase offset */ 50 int x_q4 = x0_q4;
79 int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
80
81 for (x = 0; x < w; ++x) { 51 for (x = 0; x < w; ++x) {
82 /* Per-pixel src offset */ 52 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
83 const int src_x = x_q4 >> SUBPEL_BITS; 53 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
84 int sum = 0; 54 int k, sum = 0;
85 55 for (k = 0; k < SUBPEL_TAPS; ++k)
86 /* Pointer to filter to use */ 56 sum += src_x[k] * x_filter[k];
87 const int16_t *const filter_x = filter_x_base +
88 (x_q4 & SUBPEL_MASK) * taps;
89
90 for (k = 0; k < taps; ++k)
91 sum += src[src_x + k] * filter_x[k];
92
93 dst[x] = ROUND_POWER_OF_TWO(dst[x] + 57 dst[x] = ROUND_POWER_OF_TWO(dst[x] +
94 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); 58 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
95
96 /* Move to the next source pixel */
97 x_q4 += x_step_q4; 59 x_q4 += x_step_q4;
98 } 60 }
99 src += src_stride; 61 src += src_stride;
100 dst += dst_stride; 62 dst += dst_stride;
101 } 63 }
102 } 64 }
103 65
104 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride, 66 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
105 uint8_t *dst, ptrdiff_t dst_stride, 67 uint8_t *dst, ptrdiff_t dst_stride,
106 const int16_t *filter_x, int x_step_q4, 68 const subpel_kernel *y_filters,
107 const int16_t *filter_y0, int y_step_q4, 69 int y0_q4, int y_step_q4, int w, int h) {
108 int w, int h, int taps) { 70 int x, y;
109 int x, y, k; 71 src -= src_stride * (SUBPEL_TAPS / 2 - 1);
110
111 /* NOTE: This assumes that the filter table is 256-byte aligned. */
112 /* TODO(agrange) Modify to make independent of table alignment. */
113 const int16_t *const filter_y_base =
114 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
115
116 /* Adjust base pointer address for this source column */
117 src -= src_stride * (taps / 2 - 1);
118 72
119 for (x = 0; x < w; ++x) { 73 for (x = 0; x < w; ++x) {
120 /* Initial phase offset */ 74 int y_q4 = y0_q4;
121 int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
122
123 for (y = 0; y < h; ++y) { 75 for (y = 0; y < h; ++y) {
124 /* Per-pixel src offset */ 76 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
125 const int src_y = y_q4 >> SUBPEL_BITS; 77 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
126 int sum = 0; 78 int k, sum = 0;
127 79 for (k = 0; k < SUBPEL_TAPS; ++k)
128 /* Pointer to filter to use */ 80 sum += src_y[k * src_stride] * y_filter[k];
129 const int16_t *const filter_y = filter_y_base + 81 dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
130 (y_q4 & SUBPEL_MASK) * taps;
131
132 for (k = 0; k < taps; ++k)
133 sum += src[(src_y + k) * src_stride] * filter_y[k];
134
135 dst[y * dst_stride] =
136 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
137
138 /* Move to the next source pixel */
139 y_q4 += y_step_q4; 82 y_q4 += y_step_q4;
140 } 83 }
141 ++src; 84 ++src;
142 ++dst; 85 ++dst;
143 } 86 }
144 } 87 }
145 88
146 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, 89 static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
147 uint8_t *dst, ptrdiff_t dst_stride, 90 uint8_t *dst, ptrdiff_t dst_stride,
148 const int16_t *filter_x, int x_step_q4, 91 const subpel_kernel *y_filters,
149 const int16_t *filter_y0, int y_step_q4, 92 int y0_q4, int y_step_q4, int w, int h) {
150 int w, int h, int taps) { 93 int x, y;
151 int x, y, k; 94 src -= src_stride * (SUBPEL_TAPS / 2 - 1);
152
153 /* NOTE: This assumes that the filter table is 256-byte aligned. */
154 /* TODO(agrange) Modify to make independent of table alignment. */
155 const int16_t *const filter_y_base =
156 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
157
158 /* Adjust base pointer address for this source column */
159 src -= src_stride * (taps / 2 - 1);
160 95
161 for (x = 0; x < w; ++x) { 96 for (x = 0; x < w; ++x) {
162 /* Initial phase offset */ 97 int y_q4 = y0_q4;
163 int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
164
165 for (y = 0; y < h; ++y) { 98 for (y = 0; y < h; ++y) {
166 /* Per-pixel src offset */ 99 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
167 const int src_y = y_q4 >> SUBPEL_BITS; 100 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
168 int sum = 0; 101 int k, sum = 0;
169 102 for (k = 0; k < SUBPEL_TAPS; ++k)
170 /* Pointer to filter to use */ 103 sum += src_y[k * src_stride] * y_filter[k];
171 const int16_t *const filter_y = filter_y_base +
172 (y_q4 & SUBPEL_MASK) * taps;
173
174 for (k = 0; k < taps; ++k)
175 sum += src[(src_y + k) * src_stride] * filter_y[k];
176
177 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + 104 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
178 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); 105 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
179
180 /* Move to the next source pixel */
181 y_q4 += y_step_q4; 106 y_q4 += y_step_q4;
182 } 107 }
183 ++src; 108 ++src;
184 ++dst; 109 ++dst;
185 } 110 }
186 } 111 }
187 112
188 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride, 113 static void convolve(const uint8_t *src, ptrdiff_t src_stride,
189 uint8_t *dst, ptrdiff_t dst_stride, 114 uint8_t *dst, ptrdiff_t dst_stride,
190 const int16_t *filter_x, int x_step_q4, 115 const subpel_kernel *const x_filters,
191 const int16_t *filter_y, int y_step_q4, 116 int x0_q4, int x_step_q4,
192 int w, int h, int taps) { 117 const subpel_kernel *const y_filters,
193 /* Fixed size intermediate buffer places limits on parameters. 118 int y0_q4, int y_step_q4,
194 * Maximum intermediate_height is 324, for y_step_q4 == 80, 119 int w, int h) {
195 * h == 64, taps == 8. 120 // Fixed size intermediate buffer places limits on parameters.
196 * y_step_q4 of 80 allows for 1/10 scale for 5 layer svc 121 // Maximum intermediate_height is 324, for y_step_q4 == 80,
197 */ 122 // h == 64, taps == 8.
123 // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
198 uint8_t temp[64 * 324]; 124 uint8_t temp[64 * 324];
199 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + taps; 125 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS;
200 126
201 assert(w <= 64); 127 assert(w <= 64);
202 assert(h <= 64); 128 assert(h <= 64);
203 assert(taps <= 8);
204 assert(y_step_q4 <= 80); 129 assert(y_step_q4 <= 80);
205 assert(x_step_q4 <= 80); 130 assert(x_step_q4 <= 80);
206 131
207 if (intermediate_height < h) 132 if (intermediate_height < h)
208 intermediate_height = h; 133 intermediate_height = h;
209 134
210 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64, 135 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
211 filter_x, x_step_q4, filter_y, y_step_q4, w, 136 x_filters, x0_q4, x_step_q4, w, intermediate_height);
212 intermediate_height, taps); 137 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
213 convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x, 138 y_filters, y0_q4, y_step_q4, w, h);
214 x_step_q4, filter_y, y_step_q4, w, h, taps); 139 }
140
141 static const subpel_kernel *get_filter_base(const int16_t *filter) {
142 // NOTE: This assumes that the filter table is 256-byte aligned.
143 // TODO(agrange) Modify to make independent of table alignment.
144 return (const subpel_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
145 }
146
147 static int get_filter_offset(const int16_t *f, const subpel_kernel *base) {
148 return (const subpel_kernel *)(intptr_t)f - base;
215 } 149 }
216 150
217 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, 151 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
218 uint8_t *dst, ptrdiff_t dst_stride, 152 uint8_t *dst, ptrdiff_t dst_stride,
219 const int16_t *filter_x, int x_step_q4, 153 const int16_t *filter_x, int x_step_q4,
220 const int16_t *filter_y, int y_step_q4, 154 const int16_t *filter_y, int y_step_q4,
221 int w, int h) { 155 int w, int h) {
222 convolve_horiz_c(src, src_stride, dst, dst_stride, 156 const subpel_kernel *const filters_x = get_filter_base(filter_x);
223 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); 157 const int x0_q4 = get_filter_offset(filter_x, filters_x);
158
159 convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
160 x0_q4, x_step_q4, w, h);
224 } 161 }
225 162
226 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, 163 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
227 uint8_t *dst, ptrdiff_t dst_stride, 164 uint8_t *dst, ptrdiff_t dst_stride,
228 const int16_t *filter_x, int x_step_q4, 165 const int16_t *filter_x, int x_step_q4,
229 const int16_t *filter_y, int y_step_q4, 166 const int16_t *filter_y, int y_step_q4,
230 int w, int h) { 167 int w, int h) {
231 convolve_avg_horiz_c(src, src_stride, dst, dst_stride, 168 const subpel_kernel *const filters_x = get_filter_base(filter_x);
232 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); 169 const int x0_q4 = get_filter_offset(filter_x, filters_x);
170
171 convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
172 x0_q4, x_step_q4, w, h);
233 } 173 }
234 174
235 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, 175 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
236 uint8_t *dst, ptrdiff_t dst_stride, 176 uint8_t *dst, ptrdiff_t dst_stride,
237 const int16_t *filter_x, int x_step_q4, 177 const int16_t *filter_x, int x_step_q4,
238 const int16_t *filter_y, int y_step_q4, 178 const int16_t *filter_y, int y_step_q4,
239 int w, int h) { 179 int w, int h) {
240 convolve_vert_c(src, src_stride, dst, dst_stride, 180 const subpel_kernel *const filters_y = get_filter_base(filter_y);
241 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); 181 const int y0_q4 = get_filter_offset(filter_y, filters_y);
182 convolve_vert(src, src_stride, dst, dst_stride, filters_y,
183 y0_q4, y_step_q4, w, h);
242 } 184 }
243 185
244 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, 186 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
245 uint8_t *dst, ptrdiff_t dst_stride, 187 uint8_t *dst, ptrdiff_t dst_stride,
246 const int16_t *filter_x, int x_step_q4, 188 const int16_t *filter_x, int x_step_q4,
247 const int16_t *filter_y, int y_step_q4, 189 const int16_t *filter_y, int y_step_q4,
248 int w, int h) { 190 int w, int h) {
249 convolve_avg_vert_c(src, src_stride, dst, dst_stride, 191 const subpel_kernel *const filters_y = get_filter_base(filter_y);
250 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); 192 const int y0_q4 = get_filter_offset(filter_y, filters_y);
193 convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
194 y0_q4, y_step_q4, w, h);
251 } 195 }
252 196
253 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, 197 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
254 uint8_t *dst, ptrdiff_t dst_stride, 198 uint8_t *dst, ptrdiff_t dst_stride,
255 const int16_t *filter_x, int x_step_q4, 199 const int16_t *filter_x, int x_step_q4,
256 const int16_t *filter_y, int y_step_q4, 200 const int16_t *filter_y, int y_step_q4,
257 int w, int h) { 201 int w, int h) {
258 convolve_c(src, src_stride, dst, dst_stride, 202 const subpel_kernel *const filters_x = get_filter_base(filter_x);
259 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); 203 const int x0_q4 = get_filter_offset(filter_x, filters_x);
204
205 const subpel_kernel *const filters_y = get_filter_base(filter_y);
206 const int y0_q4 = get_filter_offset(filter_y, filters_y);
207
208 convolve(src, src_stride, dst, dst_stride,
209 filters_x, x0_q4, x_step_q4,
210 filters_y, y0_q4, y_step_q4, w, h);
260 } 211 }
261 212
262 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, 213 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
263 uint8_t *dst, ptrdiff_t dst_stride, 214 uint8_t *dst, ptrdiff_t dst_stride,
264 const int16_t *filter_x, int x_step_q4, 215 const int16_t *filter_x, int x_step_q4,
265 const int16_t *filter_y, int y_step_q4, 216 const int16_t *filter_y, int y_step_q4,
266 int w, int h) { 217 int w, int h) {
267 /* Fixed size intermediate buffer places limits on parameters. */ 218 /* Fixed size intermediate buffer places limits on parameters. */
268 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64); 219 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);
269 assert(w <= 64); 220 assert(w <= 64);
270 assert(h <= 64); 221 assert(h <= 64);
271 222
272 vp9_convolve8(src, src_stride, temp, 64, 223 vp9_convolve8_c(src, src_stride, temp, 64,
273 filter_x, x_step_q4, filter_y, y_step_q4, w, h); 224 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
274 vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h); 225 vp9_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
275 } 226 }
276 227
277 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, 228 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
278 uint8_t *dst, ptrdiff_t dst_stride, 229 uint8_t *dst, ptrdiff_t dst_stride,
279 const int16_t *filter_x, int filter_x_stride, 230 const int16_t *filter_x, int filter_x_stride,
280 const int16_t *filter_y, int filter_y_stride, 231 const int16_t *filter_y, int filter_y_stride,
281 int w, int h) { 232 int w, int h) {
282 int r; 233 int r;
283 234
284 for (r = h; r > 0; --r) { 235 for (r = h; r > 0; --r) {
(...skipping 11 matching lines...) Expand all
296 int x, y; 247 int x, y;
297 248
298 for (y = 0; y < h; ++y) { 249 for (y = 0; y < h; ++y) {
299 for (x = 0; x < w; ++x) 250 for (x = 0; x < w; ++x)
300 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); 251 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
301 252
302 src += src_stride; 253 src += src_stride;
303 dst += dst_stride; 254 dst += dst_stride;
304 } 255 }
305 } 256 }
OLDNEW
« no previous file with comments | « source/libvpx/vp9/common/vp9_common_data.c ('k') | source/libvpx/vp9/common/vp9_default_coef_probs.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698