Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(292)

Side by Side Diff: source/libvpx/vp9/common/vp9_convolve.c

Issue 168343002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: libvpx: Pull from upstream Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/libvpx/vp9/common/vp9_common.h ('k') | source/libvpx/vp9/common/vp9_debugmodes.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <assert.h> 11 #include <assert.h>
12 12
13 #include "./vpx_config.h" 13 #include "./vpx_config.h"
14 #include "./vp9_rtcd.h" 14 #include "./vp9_rtcd.h"
15 #include "vp9/common/vp9_common.h" 15 #include "vp9/common/vp9_common.h"
16 #include "vp9/common/vp9_convolve.h" 16 #include "vp9/common/vp9_convolve.h"
17 #include "vp9/common/vp9_filter.h" 17 #include "vp9/common/vp9_filter.h"
18 #include "vpx/vpx_integer.h" 18 #include "vpx/vpx_integer.h"
19 #include "vpx_ports/mem.h" 19 #include "vpx_ports/mem.h"
20 20
21 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, 21 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
22 uint8_t *dst, ptrdiff_t dst_stride, 22 uint8_t *dst, ptrdiff_t dst_stride,
23 const interp_kernel *x_filters, 23 const InterpKernel *x_filters,
24 int x0_q4, int x_step_q4, int w, int h) { 24 int x0_q4, int x_step_q4, int w, int h) {
25 int x, y; 25 int x, y;
26 src -= SUBPEL_TAPS / 2 - 1; 26 src -= SUBPEL_TAPS / 2 - 1;
27 for (y = 0; y < h; ++y) { 27 for (y = 0; y < h; ++y) {
28 int x_q4 = x0_q4; 28 int x_q4 = x0_q4;
29 for (x = 0; x < w; ++x) { 29 for (x = 0; x < w; ++x) {
30 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; 30 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
31 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; 31 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
32 int k, sum = 0; 32 int k, sum = 0;
33 for (k = 0; k < SUBPEL_TAPS; ++k) 33 for (k = 0; k < SUBPEL_TAPS; ++k)
34 sum += src_x[k] * x_filter[k]; 34 sum += src_x[k] * x_filter[k];
35 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); 35 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
36 x_q4 += x_step_q4; 36 x_q4 += x_step_q4;
37 } 37 }
38 src += src_stride; 38 src += src_stride;
39 dst += dst_stride; 39 dst += dst_stride;
40 } 40 }
41 } 41 }
42 42
43 static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, 43 static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
44 uint8_t *dst, ptrdiff_t dst_stride, 44 uint8_t *dst, ptrdiff_t dst_stride,
45 const interp_kernel *x_filters, 45 const InterpKernel *x_filters,
46 int x0_q4, int x_step_q4, int w, int h) { 46 int x0_q4, int x_step_q4, int w, int h) {
47 int x, y; 47 int x, y;
48 src -= SUBPEL_TAPS / 2 - 1; 48 src -= SUBPEL_TAPS / 2 - 1;
49 for (y = 0; y < h; ++y) { 49 for (y = 0; y < h; ++y) {
50 int x_q4 = x0_q4; 50 int x_q4 = x0_q4;
51 for (x = 0; x < w; ++x) { 51 for (x = 0; x < w; ++x) {
52 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; 52 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
53 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; 53 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
54 int k, sum = 0; 54 int k, sum = 0;
55 for (k = 0; k < SUBPEL_TAPS; ++k) 55 for (k = 0; k < SUBPEL_TAPS; ++k)
56 sum += src_x[k] * x_filter[k]; 56 sum += src_x[k] * x_filter[k];
57 dst[x] = ROUND_POWER_OF_TWO(dst[x] + 57 dst[x] = ROUND_POWER_OF_TWO(dst[x] +
58 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); 58 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
59 x_q4 += x_step_q4; 59 x_q4 += x_step_q4;
60 } 60 }
61 src += src_stride; 61 src += src_stride;
62 dst += dst_stride; 62 dst += dst_stride;
63 } 63 }
64 } 64 }
65 65
66 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, 66 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
67 uint8_t *dst, ptrdiff_t dst_stride, 67 uint8_t *dst, ptrdiff_t dst_stride,
68 const interp_kernel *y_filters, 68 const InterpKernel *y_filters,
69 int y0_q4, int y_step_q4, int w, int h) { 69 int y0_q4, int y_step_q4, int w, int h) {
70 int x, y; 70 int x, y;
71 src -= src_stride * (SUBPEL_TAPS / 2 - 1); 71 src -= src_stride * (SUBPEL_TAPS / 2 - 1);
72 72
73 for (x = 0; x < w; ++x) { 73 for (x = 0; x < w; ++x) {
74 int y_q4 = y0_q4; 74 int y_q4 = y0_q4;
75 for (y = 0; y < h; ++y) { 75 for (y = 0; y < h; ++y) {
76 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; 76 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
77 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; 77 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
78 int k, sum = 0; 78 int k, sum = 0;
79 for (k = 0; k < SUBPEL_TAPS; ++k) 79 for (k = 0; k < SUBPEL_TAPS; ++k)
80 sum += src_y[k * src_stride] * y_filter[k]; 80 sum += src_y[k * src_stride] * y_filter[k];
81 dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); 81 dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
82 y_q4 += y_step_q4; 82 y_q4 += y_step_q4;
83 } 83 }
84 ++src; 84 ++src;
85 ++dst; 85 ++dst;
86 } 86 }
87 } 87 }
88 88
89 static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, 89 static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
90 uint8_t *dst, ptrdiff_t dst_stride, 90 uint8_t *dst, ptrdiff_t dst_stride,
91 const interp_kernel *y_filters, 91 const InterpKernel *y_filters,
92 int y0_q4, int y_step_q4, int w, int h) { 92 int y0_q4, int y_step_q4, int w, int h) {
93 int x, y; 93 int x, y;
94 src -= src_stride * (SUBPEL_TAPS / 2 - 1); 94 src -= src_stride * (SUBPEL_TAPS / 2 - 1);
95 95
96 for (x = 0; x < w; ++x) { 96 for (x = 0; x < w; ++x) {
97 int y_q4 = y0_q4; 97 int y_q4 = y0_q4;
98 for (y = 0; y < h; ++y) { 98 for (y = 0; y < h; ++y) {
99 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; 99 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
100 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; 100 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
101 int k, sum = 0; 101 int k, sum = 0;
102 for (k = 0; k < SUBPEL_TAPS; ++k) 102 for (k = 0; k < SUBPEL_TAPS; ++k)
103 sum += src_y[k * src_stride] * y_filter[k]; 103 sum += src_y[k * src_stride] * y_filter[k];
104 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + 104 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
105 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); 105 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
106 y_q4 += y_step_q4; 106 y_q4 += y_step_q4;
107 } 107 }
108 ++src; 108 ++src;
109 ++dst; 109 ++dst;
110 } 110 }
111 } 111 }
112 112
113 static void convolve(const uint8_t *src, ptrdiff_t src_stride, 113 static void convolve(const uint8_t *src, ptrdiff_t src_stride,
114 uint8_t *dst, ptrdiff_t dst_stride, 114 uint8_t *dst, ptrdiff_t dst_stride,
115 const interp_kernel *const x_filters, 115 const InterpKernel *const x_filters,
116 int x0_q4, int x_step_q4, 116 int x0_q4, int x_step_q4,
117 const interp_kernel *const y_filters, 117 const InterpKernel *const y_filters,
118 int y0_q4, int y_step_q4, 118 int y0_q4, int y_step_q4,
119 int w, int h) { 119 int w, int h) {
120 // Fixed size intermediate buffer places limits on parameters. 120 // Fixed size intermediate buffer places limits on parameters.
121 // Maximum intermediate_height is 324, for y_step_q4 == 80, 121 // Maximum intermediate_height is 324, for y_step_q4 == 80,
122 // h == 64, taps == 8. 122 // h == 64, taps == 8.
123 // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc 123 // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
124 uint8_t temp[64 * 324]; 124 uint8_t temp[64 * 324];
125 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; 125 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS;
126 126
127 assert(w <= 64); 127 assert(w <= 64);
128 assert(h <= 64); 128 assert(h <= 64);
129 assert(y_step_q4 <= 80); 129 assert(y_step_q4 <= 80);
130 assert(x_step_q4 <= 80); 130 assert(x_step_q4 <= 80);
131 131
132 if (intermediate_height < h) 132 if (intermediate_height < h)
133 intermediate_height = h; 133 intermediate_height = h;
134 134
135 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, 135 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
136 x_filters, x0_q4, x_step_q4, w, intermediate_height); 136 x_filters, x0_q4, x_step_q4, w, intermediate_height);
137 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, 137 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
138 y_filters, y0_q4, y_step_q4, w, h); 138 y_filters, y0_q4, y_step_q4, w, h);
139 } 139 }
140 140
141 static const interp_kernel *get_filter_base(const int16_t *filter) { 141 static const InterpKernel *get_filter_base(const int16_t *filter) {
142 // NOTE: This assumes that the filter table is 256-byte aligned. 142 // NOTE: This assumes that the filter table is 256-byte aligned.
143 // TODO(agrange) Modify to make independent of table alignment. 143 // TODO(agrange) Modify to make independent of table alignment.
144 return (const interp_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); 144 return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
145 } 145 }
146 146
147 static int get_filter_offset(const int16_t *f, const interp_kernel *base) { 147 static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
148 return (const interp_kernel *)(intptr_t)f - base; 148 return (int)((const InterpKernel *)(intptr_t)f - base);
149 } 149 }
150 150
151 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, 151 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
152 uint8_t *dst, ptrdiff_t dst_stride, 152 uint8_t *dst, ptrdiff_t dst_stride,
153 const int16_t *filter_x, int x_step_q4, 153 const int16_t *filter_x, int x_step_q4,
154 const int16_t *filter_y, int y_step_q4, 154 const int16_t *filter_y, int y_step_q4,
155 int w, int h) { 155 int w, int h) {
156 const interp_kernel *const filters_x = get_filter_base(filter_x); 156 const InterpKernel *const filters_x = get_filter_base(filter_x);
157 const int x0_q4 = get_filter_offset(filter_x, filters_x); 157 const int x0_q4 = get_filter_offset(filter_x, filters_x);
158 158
159 convolve_horiz(src, src_stride, dst, dst_stride, filters_x, 159 convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
160 x0_q4, x_step_q4, w, h); 160 x0_q4, x_step_q4, w, h);
161 } 161 }
162 162
163 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, 163 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
164 uint8_t *dst, ptrdiff_t dst_stride, 164 uint8_t *dst, ptrdiff_t dst_stride,
165 const int16_t *filter_x, int x_step_q4, 165 const int16_t *filter_x, int x_step_q4,
166 const int16_t *filter_y, int y_step_q4, 166 const int16_t *filter_y, int y_step_q4,
167 int w, int h) { 167 int w, int h) {
168 const interp_kernel *const filters_x = get_filter_base(filter_x); 168 const InterpKernel *const filters_x = get_filter_base(filter_x);
169 const int x0_q4 = get_filter_offset(filter_x, filters_x); 169 const int x0_q4 = get_filter_offset(filter_x, filters_x);
170 170
171 convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, 171 convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
172 x0_q4, x_step_q4, w, h); 172 x0_q4, x_step_q4, w, h);
173 } 173 }
174 174
175 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, 175 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
176 uint8_t *dst, ptrdiff_t dst_stride, 176 uint8_t *dst, ptrdiff_t dst_stride,
177 const int16_t *filter_x, int x_step_q4, 177 const int16_t *filter_x, int x_step_q4,
178 const int16_t *filter_y, int y_step_q4, 178 const int16_t *filter_y, int y_step_q4,
179 int w, int h) { 179 int w, int h) {
180 const interp_kernel *const filters_y = get_filter_base(filter_y); 180 const InterpKernel *const filters_y = get_filter_base(filter_y);
181 const int y0_q4 = get_filter_offset(filter_y, filters_y); 181 const int y0_q4 = get_filter_offset(filter_y, filters_y);
182 convolve_vert(src, src_stride, dst, dst_stride, filters_y, 182 convolve_vert(src, src_stride, dst, dst_stride, filters_y,
183 y0_q4, y_step_q4, w, h); 183 y0_q4, y_step_q4, w, h);
184 } 184 }
185 185
186 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, 186 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
187 uint8_t *dst, ptrdiff_t dst_stride, 187 uint8_t *dst, ptrdiff_t dst_stride,
188 const int16_t *filter_x, int x_step_q4, 188 const int16_t *filter_x, int x_step_q4,
189 const int16_t *filter_y, int y_step_q4, 189 const int16_t *filter_y, int y_step_q4,
190 int w, int h) { 190 int w, int h) {
191 const interp_kernel *const filters_y = get_filter_base(filter_y); 191 const InterpKernel *const filters_y = get_filter_base(filter_y);
192 const int y0_q4 = get_filter_offset(filter_y, filters_y); 192 const int y0_q4 = get_filter_offset(filter_y, filters_y);
193 convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, 193 convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
194 y0_q4, y_step_q4, w, h); 194 y0_q4, y_step_q4, w, h);
195 } 195 }
196 196
197 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, 197 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
198 uint8_t *dst, ptrdiff_t dst_stride, 198 uint8_t *dst, ptrdiff_t dst_stride,
199 const int16_t *filter_x, int x_step_q4, 199 const int16_t *filter_x, int x_step_q4,
200 const int16_t *filter_y, int y_step_q4, 200 const int16_t *filter_y, int y_step_q4,
201 int w, int h) { 201 int w, int h) {
202 const interp_kernel *const filters_x = get_filter_base(filter_x); 202 const InterpKernel *const filters_x = get_filter_base(filter_x);
203 const int x0_q4 = get_filter_offset(filter_x, filters_x); 203 const int x0_q4 = get_filter_offset(filter_x, filters_x);
204 204
205 const interp_kernel *const filters_y = get_filter_base(filter_y); 205 const InterpKernel *const filters_y = get_filter_base(filter_y);
206 const int y0_q4 = get_filter_offset(filter_y, filters_y); 206 const int y0_q4 = get_filter_offset(filter_y, filters_y);
207 207
208 convolve(src, src_stride, dst, dst_stride, 208 convolve(src, src_stride, dst, dst_stride,
209 filters_x, x0_q4, x_step_q4, 209 filters_x, x0_q4, x_step_q4,
210 filters_y, y0_q4, y_step_q4, w, h); 210 filters_y, y0_q4, y_step_q4, w, h);
211 } 211 }
212 212
213 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, 213 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
214 uint8_t *dst, ptrdiff_t dst_stride, 214 uint8_t *dst, ptrdiff_t dst_stride,
215 const int16_t *filter_x, int x_step_q4, 215 const int16_t *filter_x, int x_step_q4,
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
247 int x, y; 247 int x, y;
248 248
249 for (y = 0; y < h; ++y) { 249 for (y = 0; y < h; ++y) {
250 for (x = 0; x < w; ++x) 250 for (x = 0; x < w; ++x)
251 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); 251 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
252 252
253 src += src_stride; 253 src += src_stride;
254 dst += dst_stride; 254 dst += dst_stride;
255 } 255 }
256 } 256 }
OLDNEW
« no previous file with comments | « source/libvpx/vp9/common/vp9_common.h ('k') | source/libvpx/vp9/common/vp9_debugmodes.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698