OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <assert.h> | 11 #include <assert.h> |
12 | 12 |
13 #include "./vpx_config.h" | 13 #include "./vpx_config.h" |
14 #include "./vp9_rtcd.h" | 14 #include "./vp9_rtcd.h" |
15 #include "vp9/common/vp9_common.h" | 15 #include "vp9/common/vp9_common.h" |
16 #include "vp9/common/vp9_convolve.h" | 16 #include "vp9/common/vp9_convolve.h" |
17 #include "vp9/common/vp9_filter.h" | 17 #include "vp9/common/vp9_filter.h" |
18 #include "vpx/vpx_integer.h" | 18 #include "vpx/vpx_integer.h" |
19 #include "vpx_ports/mem.h" | 19 #include "vpx_ports/mem.h" |
20 | 20 |
21 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, | 21 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, |
22 uint8_t *dst, ptrdiff_t dst_stride, | 22 uint8_t *dst, ptrdiff_t dst_stride, |
23 const interp_kernel *x_filters, | 23 const InterpKernel *x_filters, |
24 int x0_q4, int x_step_q4, int w, int h) { | 24 int x0_q4, int x_step_q4, int w, int h) { |
25 int x, y; | 25 int x, y; |
26 src -= SUBPEL_TAPS / 2 - 1; | 26 src -= SUBPEL_TAPS / 2 - 1; |
27 for (y = 0; y < h; ++y) { | 27 for (y = 0; y < h; ++y) { |
28 int x_q4 = x0_q4; | 28 int x_q4 = x0_q4; |
29 for (x = 0; x < w; ++x) { | 29 for (x = 0; x < w; ++x) { |
30 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; | 30 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
31 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; | 31 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
32 int k, sum = 0; | 32 int k, sum = 0; |
33 for (k = 0; k < SUBPEL_TAPS; ++k) | 33 for (k = 0; k < SUBPEL_TAPS; ++k) |
34 sum += src_x[k] * x_filter[k]; | 34 sum += src_x[k] * x_filter[k]; |
35 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); | 35 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
36 x_q4 += x_step_q4; | 36 x_q4 += x_step_q4; |
37 } | 37 } |
38 src += src_stride; | 38 src += src_stride; |
39 dst += dst_stride; | 39 dst += dst_stride; |
40 } | 40 } |
41 } | 41 } |
42 | 42 |
43 static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, | 43 static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, |
44 uint8_t *dst, ptrdiff_t dst_stride, | 44 uint8_t *dst, ptrdiff_t dst_stride, |
45 const interp_kernel *x_filters, | 45 const InterpKernel *x_filters, |
46 int x0_q4, int x_step_q4, int w, int h) { | 46 int x0_q4, int x_step_q4, int w, int h) { |
47 int x, y; | 47 int x, y; |
48 src -= SUBPEL_TAPS / 2 - 1; | 48 src -= SUBPEL_TAPS / 2 - 1; |
49 for (y = 0; y < h; ++y) { | 49 for (y = 0; y < h; ++y) { |
50 int x_q4 = x0_q4; | 50 int x_q4 = x0_q4; |
51 for (x = 0; x < w; ++x) { | 51 for (x = 0; x < w; ++x) { |
52 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; | 52 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
53 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; | 53 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
54 int k, sum = 0; | 54 int k, sum = 0; |
55 for (k = 0; k < SUBPEL_TAPS; ++k) | 55 for (k = 0; k < SUBPEL_TAPS; ++k) |
56 sum += src_x[k] * x_filter[k]; | 56 sum += src_x[k] * x_filter[k]; |
57 dst[x] = ROUND_POWER_OF_TWO(dst[x] + | 57 dst[x] = ROUND_POWER_OF_TWO(dst[x] + |
58 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); | 58 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); |
59 x_q4 += x_step_q4; | 59 x_q4 += x_step_q4; |
60 } | 60 } |
61 src += src_stride; | 61 src += src_stride; |
62 dst += dst_stride; | 62 dst += dst_stride; |
63 } | 63 } |
64 } | 64 } |
65 | 65 |
66 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, | 66 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, |
67 uint8_t *dst, ptrdiff_t dst_stride, | 67 uint8_t *dst, ptrdiff_t dst_stride, |
68 const interp_kernel *y_filters, | 68 const InterpKernel *y_filters, |
69 int y0_q4, int y_step_q4, int w, int h) { | 69 int y0_q4, int y_step_q4, int w, int h) { |
70 int x, y; | 70 int x, y; |
71 src -= src_stride * (SUBPEL_TAPS / 2 - 1); | 71 src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
72 | 72 |
73 for (x = 0; x < w; ++x) { | 73 for (x = 0; x < w; ++x) { |
74 int y_q4 = y0_q4; | 74 int y_q4 = y0_q4; |
75 for (y = 0; y < h; ++y) { | 75 for (y = 0; y < h; ++y) { |
76 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; | 76 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
77 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; | 77 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
78 int k, sum = 0; | 78 int k, sum = 0; |
79 for (k = 0; k < SUBPEL_TAPS; ++k) | 79 for (k = 0; k < SUBPEL_TAPS; ++k) |
80 sum += src_y[k * src_stride] * y_filter[k]; | 80 sum += src_y[k * src_stride] * y_filter[k]; |
81 dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); | 81 dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
82 y_q4 += y_step_q4; | 82 y_q4 += y_step_q4; |
83 } | 83 } |
84 ++src; | 84 ++src; |
85 ++dst; | 85 ++dst; |
86 } | 86 } |
87 } | 87 } |
88 | 88 |
89 static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, | 89 static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, |
90 uint8_t *dst, ptrdiff_t dst_stride, | 90 uint8_t *dst, ptrdiff_t dst_stride, |
91 const interp_kernel *y_filters, | 91 const InterpKernel *y_filters, |
92 int y0_q4, int y_step_q4, int w, int h) { | 92 int y0_q4, int y_step_q4, int w, int h) { |
93 int x, y; | 93 int x, y; |
94 src -= src_stride * (SUBPEL_TAPS / 2 - 1); | 94 src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
95 | 95 |
96 for (x = 0; x < w; ++x) { | 96 for (x = 0; x < w; ++x) { |
97 int y_q4 = y0_q4; | 97 int y_q4 = y0_q4; |
98 for (y = 0; y < h; ++y) { | 98 for (y = 0; y < h; ++y) { |
99 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; | 99 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
100 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; | 100 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
101 int k, sum = 0; | 101 int k, sum = 0; |
102 for (k = 0; k < SUBPEL_TAPS; ++k) | 102 for (k = 0; k < SUBPEL_TAPS; ++k) |
103 sum += src_y[k * src_stride] * y_filter[k]; | 103 sum += src_y[k * src_stride] * y_filter[k]; |
104 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + | 104 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + |
105 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); | 105 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); |
106 y_q4 += y_step_q4; | 106 y_q4 += y_step_q4; |
107 } | 107 } |
108 ++src; | 108 ++src; |
109 ++dst; | 109 ++dst; |
110 } | 110 } |
111 } | 111 } |
112 | 112 |
113 static void convolve(const uint8_t *src, ptrdiff_t src_stride, | 113 static void convolve(const uint8_t *src, ptrdiff_t src_stride, |
114 uint8_t *dst, ptrdiff_t dst_stride, | 114 uint8_t *dst, ptrdiff_t dst_stride, |
115 const interp_kernel *const x_filters, | 115 const InterpKernel *const x_filters, |
116 int x0_q4, int x_step_q4, | 116 int x0_q4, int x_step_q4, |
117 const interp_kernel *const y_filters, | 117 const InterpKernel *const y_filters, |
118 int y0_q4, int y_step_q4, | 118 int y0_q4, int y_step_q4, |
119 int w, int h) { | 119 int w, int h) { |
120 // Fixed size intermediate buffer places limits on parameters. | 120 // Fixed size intermediate buffer places limits on parameters. |
121 // Maximum intermediate_height is 324, for y_step_q4 == 80, | 121 // Maximum intermediate_height is 324, for y_step_q4 == 80, |
122 // h == 64, taps == 8. | 122 // h == 64, taps == 8. |
123 // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc | 123 // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc |
124 uint8_t temp[64 * 324]; | 124 uint8_t temp[64 * 324]; |
125 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; | 125 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; |
126 | 126 |
127 assert(w <= 64); | 127 assert(w <= 64); |
128 assert(h <= 64); | 128 assert(h <= 64); |
129 assert(y_step_q4 <= 80); | 129 assert(y_step_q4 <= 80); |
130 assert(x_step_q4 <= 80); | 130 assert(x_step_q4 <= 80); |
131 | 131 |
132 if (intermediate_height < h) | 132 if (intermediate_height < h) |
133 intermediate_height = h; | 133 intermediate_height = h; |
134 | 134 |
135 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, | 135 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, |
136 x_filters, x0_q4, x_step_q4, w, intermediate_height); | 136 x_filters, x0_q4, x_step_q4, w, intermediate_height); |
137 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, | 137 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, |
138 y_filters, y0_q4, y_step_q4, w, h); | 138 y_filters, y0_q4, y_step_q4, w, h); |
139 } | 139 } |
140 | 140 |
141 static const interp_kernel *get_filter_base(const int16_t *filter) { | 141 static const InterpKernel *get_filter_base(const int16_t *filter) { |
142 // NOTE: This assumes that the filter table is 256-byte aligned. | 142 // NOTE: This assumes that the filter table is 256-byte aligned. |
143 // TODO(agrange) Modify to make independent of table alignment. | 143 // TODO(agrange) Modify to make independent of table alignment. |
144 return (const interp_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); | 144 return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); |
145 } | 145 } |
146 | 146 |
147 static int get_filter_offset(const int16_t *f, const interp_kernel *base) { | 147 static int get_filter_offset(const int16_t *f, const InterpKernel *base) { |
148 return (const interp_kernel *)(intptr_t)f - base; | 148 return (int)((const InterpKernel *)(intptr_t)f - base); |
149 } | 149 } |
150 | 150 |
151 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 151 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
152 uint8_t *dst, ptrdiff_t dst_stride, | 152 uint8_t *dst, ptrdiff_t dst_stride, |
153 const int16_t *filter_x, int x_step_q4, | 153 const int16_t *filter_x, int x_step_q4, |
154 const int16_t *filter_y, int y_step_q4, | 154 const int16_t *filter_y, int y_step_q4, |
155 int w, int h) { | 155 int w, int h) { |
156 const interp_kernel *const filters_x = get_filter_base(filter_x); | 156 const InterpKernel *const filters_x = get_filter_base(filter_x); |
157 const int x0_q4 = get_filter_offset(filter_x, filters_x); | 157 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
158 | 158 |
159 convolve_horiz(src, src_stride, dst, dst_stride, filters_x, | 159 convolve_horiz(src, src_stride, dst, dst_stride, filters_x, |
160 x0_q4, x_step_q4, w, h); | 160 x0_q4, x_step_q4, w, h); |
161 } | 161 } |
162 | 162 |
163 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 163 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
164 uint8_t *dst, ptrdiff_t dst_stride, | 164 uint8_t *dst, ptrdiff_t dst_stride, |
165 const int16_t *filter_x, int x_step_q4, | 165 const int16_t *filter_x, int x_step_q4, |
166 const int16_t *filter_y, int y_step_q4, | 166 const int16_t *filter_y, int y_step_q4, |
167 int w, int h) { | 167 int w, int h) { |
168 const interp_kernel *const filters_x = get_filter_base(filter_x); | 168 const InterpKernel *const filters_x = get_filter_base(filter_x); |
169 const int x0_q4 = get_filter_offset(filter_x, filters_x); | 169 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
170 | 170 |
171 convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, | 171 convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, |
172 x0_q4, x_step_q4, w, h); | 172 x0_q4, x_step_q4, w, h); |
173 } | 173 } |
174 | 174 |
175 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 175 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
176 uint8_t *dst, ptrdiff_t dst_stride, | 176 uint8_t *dst, ptrdiff_t dst_stride, |
177 const int16_t *filter_x, int x_step_q4, | 177 const int16_t *filter_x, int x_step_q4, |
178 const int16_t *filter_y, int y_step_q4, | 178 const int16_t *filter_y, int y_step_q4, |
179 int w, int h) { | 179 int w, int h) { |
180 const interp_kernel *const filters_y = get_filter_base(filter_y); | 180 const InterpKernel *const filters_y = get_filter_base(filter_y); |
181 const int y0_q4 = get_filter_offset(filter_y, filters_y); | 181 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
182 convolve_vert(src, src_stride, dst, dst_stride, filters_y, | 182 convolve_vert(src, src_stride, dst, dst_stride, filters_y, |
183 y0_q4, y_step_q4, w, h); | 183 y0_q4, y_step_q4, w, h); |
184 } | 184 } |
185 | 185 |
186 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 186 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
187 uint8_t *dst, ptrdiff_t dst_stride, | 187 uint8_t *dst, ptrdiff_t dst_stride, |
188 const int16_t *filter_x, int x_step_q4, | 188 const int16_t *filter_x, int x_step_q4, |
189 const int16_t *filter_y, int y_step_q4, | 189 const int16_t *filter_y, int y_step_q4, |
190 int w, int h) { | 190 int w, int h) { |
191 const interp_kernel *const filters_y = get_filter_base(filter_y); | 191 const InterpKernel *const filters_y = get_filter_base(filter_y); |
192 const int y0_q4 = get_filter_offset(filter_y, filters_y); | 192 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
193 convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, | 193 convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, |
194 y0_q4, y_step_q4, w, h); | 194 y0_q4, y_step_q4, w, h); |
195 } | 195 } |
196 | 196 |
197 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, | 197 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, |
198 uint8_t *dst, ptrdiff_t dst_stride, | 198 uint8_t *dst, ptrdiff_t dst_stride, |
199 const int16_t *filter_x, int x_step_q4, | 199 const int16_t *filter_x, int x_step_q4, |
200 const int16_t *filter_y, int y_step_q4, | 200 const int16_t *filter_y, int y_step_q4, |
201 int w, int h) { | 201 int w, int h) { |
202 const interp_kernel *const filters_x = get_filter_base(filter_x); | 202 const InterpKernel *const filters_x = get_filter_base(filter_x); |
203 const int x0_q4 = get_filter_offset(filter_x, filters_x); | 203 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
204 | 204 |
205 const interp_kernel *const filters_y = get_filter_base(filter_y); | 205 const InterpKernel *const filters_y = get_filter_base(filter_y); |
206 const int y0_q4 = get_filter_offset(filter_y, filters_y); | 206 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
207 | 207 |
208 convolve(src, src_stride, dst, dst_stride, | 208 convolve(src, src_stride, dst, dst_stride, |
209 filters_x, x0_q4, x_step_q4, | 209 filters_x, x0_q4, x_step_q4, |
210 filters_y, y0_q4, y_step_q4, w, h); | 210 filters_y, y0_q4, y_step_q4, w, h); |
211 } | 211 } |
212 | 212 |
213 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, | 213 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, |
214 uint8_t *dst, ptrdiff_t dst_stride, | 214 uint8_t *dst, ptrdiff_t dst_stride, |
215 const int16_t *filter_x, int x_step_q4, | 215 const int16_t *filter_x, int x_step_q4, |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
247 int x, y; | 247 int x, y; |
248 | 248 |
249 for (y = 0; y < h; ++y) { | 249 for (y = 0; y < h; ++y) { |
250 for (x = 0; x < w; ++x) | 250 for (x = 0; x < w; ++x) |
251 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); | 251 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); |
252 | 252 |
253 src += src_stride; | 253 src += src_stride; |
254 dst += dst_stride; | 254 dst += dst_stride; |
255 } | 255 } |
256 } | 256 } |
OLD | NEW |