| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 #include "vp9/common/vp9_convolve.h" | 10 #include "vp9/common/vp9_convolve.h" |
| 11 | 11 |
| 12 #include <assert.h> | 12 #include <assert.h> |
| 13 | 13 |
| 14 #include "./vpx_config.h" | 14 #include "./vpx_config.h" |
| 15 #include "./vp9_rtcd.h" | 15 #include "./vp9_rtcd.h" |
| 16 #include "vp9/common/vp9_common.h" | 16 #include "vp9/common/vp9_common.h" |
| 17 #include "vp9/common/vp9_filter.h" |
| 17 #include "vpx/vpx_integer.h" | 18 #include "vpx/vpx_integer.h" |
| 18 #include "vpx_ports/mem.h" | 19 #include "vpx_ports/mem.h" |
| 19 | 20 |
| 20 #define VP9_FILTER_WEIGHT 128 | |
| 21 #define VP9_FILTER_SHIFT 7 | |
| 22 | |
| 23 /* Assume a bank of 16 filters to choose from. There are two implementations | |
| 24 * for filter wrapping behavior, since we want to be able to pick which filter | |
| 25 * to start with. We could either: | |
| 26 * | |
| 27 * 1) make filter_ a pointer to the base of the filter array, and then add an | |
| 28 * additional offset parameter, to choose the starting filter. | |
| 29 * 2) use a pointer to 2 periods worth of filters, so that even if the original | |
| 30 * phase offset is at 15/16, we'll have valid data to read. The filter | |
| 31 * tables become [32][8], and the second half is duplicated. | |
| 32 * 3) fix the alignment of the filter tables, so that we know the 0/16 is | |
| 33 * always 256 byte aligned. | |
| 34 * | |
| 35 * Implementations 2 and 3 are likely preferable, as they avoid an extra 2 | |
| 36 * parameters, and switching between them is trivial, with the | |
| 37 * ALIGN_FILTERS_256 macro, below. | |
| 38 */ | |
| 39 #define ALIGN_FILTERS_256 1 | |
| 40 | |
| 41 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 21 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
| 42 uint8_t *dst, ptrdiff_t dst_stride, | 22 uint8_t *dst, ptrdiff_t dst_stride, |
| 43 const int16_t *filter_x0, int x_step_q4, | 23 const int16_t *filter_x0, int x_step_q4, |
| 44 const int16_t *filter_y, int y_step_q4, | 24 const int16_t *filter_y, int y_step_q4, |
| 45 int w, int h, int taps) { | 25 int w, int h, int taps) { |
| 46 int x, y, k, sum; | 26 int x, y, k; |
| 47 const int16_t *filter_x_base = filter_x0; | |
| 48 | 27 |
| 49 #if ALIGN_FILTERS_256 | 28 /* NOTE: This assumes that the filter table is 256-byte aligned. */ |
| 50 filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); | 29 /* TODO(agrange) Modify to make independent of table alignment. */ |
| 51 #endif | 30 const int16_t *const filter_x_base = |
| 31 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); |
| 52 | 32 |
| 53 /* Adjust base pointer address for this source line */ | 33 /* Adjust base pointer address for this source line */ |
| 54 src -= taps / 2 - 1; | 34 src -= taps / 2 - 1; |
| 55 | 35 |
| 56 for (y = 0; y < h; ++y) { | 36 for (y = 0; y < h; ++y) { |
| 57 /* Pointer to filter to use */ | |
| 58 const int16_t *filter_x = filter_x0; | |
| 59 | |
| 60 /* Initial phase offset */ | 37 /* Initial phase offset */ |
| 61 int x0_q4 = (filter_x - filter_x_base) / taps; | 38 int x_q4 = (filter_x0 - filter_x_base) / taps; |
| 62 int x_q4 = x0_q4; | |
| 63 | 39 |
| 64 for (x = 0; x < w; ++x) { | 40 for (x = 0; x < w; ++x) { |
| 65 /* Per-pixel src offset */ | 41 /* Per-pixel src offset */ |
| 66 int src_x = (x_q4 - x0_q4) >> 4; | 42 const int src_x = x_q4 >> SUBPEL_BITS; |
| 43 int sum = 0; |
| 67 | 44 |
| 68 for (sum = 0, k = 0; k < taps; ++k) { | 45 /* Pointer to filter to use */ |
| 46 const int16_t *const filter_x = filter_x_base + |
| 47 (x_q4 & SUBPEL_MASK) * taps; |
| 48 |
| 49 for (k = 0; k < taps; ++k) |
| 69 sum += src[src_x + k] * filter_x[k]; | 50 sum += src[src_x + k] * filter_x[k]; |
| 70 } | |
| 71 sum += (VP9_FILTER_WEIGHT >> 1); | |
| 72 dst[x] = clip_pixel(sum >> VP9_FILTER_SHIFT); | |
| 73 | 51 |
| 74 /* Adjust source and filter to use for the next pixel */ | 52 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
| 53 |
| 54 /* Move to the next source pixel */ |
| 75 x_q4 += x_step_q4; | 55 x_q4 += x_step_q4; |
| 76 filter_x = filter_x_base + (x_q4 & 0xf) * taps; | |
| 77 } | 56 } |
| 78 src += src_stride; | 57 src += src_stride; |
| 79 dst += dst_stride; | 58 dst += dst_stride; |
| 80 } | 59 } |
| 81 } | 60 } |
| 82 | 61 |
| 83 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 62 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
| 84 uint8_t *dst, ptrdiff_t dst_stride, | 63 uint8_t *dst, ptrdiff_t dst_stride, |
| 85 const int16_t *filter_x0, int x_step_q4, | 64 const int16_t *filter_x0, int x_step_q4, |
| 86 const int16_t *filter_y, int y_step_q4, | 65 const int16_t *filter_y, int y_step_q4, |
| 87 int w, int h, int taps) { | 66 int w, int h, int taps) { |
| 88 int x, y, k, sum; | 67 int x, y, k; |
| 89 const int16_t *filter_x_base = filter_x0; | |
| 90 | 68 |
| 91 #if ALIGN_FILTERS_256 | 69 /* NOTE: This assumes that the filter table is 256-byte aligned. */ |
| 92 filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); | 70 /* TODO(agrange) Modify to make independent of table alignment. */ |
| 93 #endif | 71 const int16_t *const filter_x_base = |
| 72 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff); |
| 94 | 73 |
| 95 /* Adjust base pointer address for this source line */ | 74 /* Adjust base pointer address for this source line */ |
| 96 src -= taps / 2 - 1; | 75 src -= taps / 2 - 1; |
| 97 | 76 |
| 98 for (y = 0; y < h; ++y) { | 77 for (y = 0; y < h; ++y) { |
| 99 /* Pointer to filter to use */ | |
| 100 const int16_t *filter_x = filter_x0; | |
| 101 | |
| 102 /* Initial phase offset */ | 78 /* Initial phase offset */ |
| 103 int x0_q4 = (filter_x - filter_x_base) / taps; | 79 int x_q4 = (filter_x0 - filter_x_base) / taps; |
| 104 int x_q4 = x0_q4; | |
| 105 | 80 |
| 106 for (x = 0; x < w; ++x) { | 81 for (x = 0; x < w; ++x) { |
| 107 /* Per-pixel src offset */ | 82 /* Per-pixel src offset */ |
| 108 int src_x = (x_q4 - x0_q4) >> 4; | 83 const int src_x = x_q4 >> SUBPEL_BITS; |
| 84 int sum = 0; |
| 109 | 85 |
| 110 for (sum = 0, k = 0; k < taps; ++k) { | 86 /* Pointer to filter to use */ |
| 87 const int16_t *const filter_x = filter_x_base + |
| 88 (x_q4 & SUBPEL_MASK) * taps; |
| 89 |
| 90 for (k = 0; k < taps; ++k) |
| 111 sum += src[src_x + k] * filter_x[k]; | 91 sum += src[src_x + k] * filter_x[k]; |
| 112 } | |
| 113 sum += (VP9_FILTER_WEIGHT >> 1); | |
| 114 dst[x] = (dst[x] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1; | |
| 115 | 92 |
| 116 /* Adjust source and filter to use for the next pixel */ | 93 dst[x] = ROUND_POWER_OF_TWO(dst[x] + |
| 94 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); |
| 95 |
| 96 /* Move to the next source pixel */ |
| 117 x_q4 += x_step_q4; | 97 x_q4 += x_step_q4; |
| 118 filter_x = filter_x_base + (x_q4 & 0xf) * taps; | |
| 119 } | 98 } |
| 120 src += src_stride; | 99 src += src_stride; |
| 121 dst += dst_stride; | 100 dst += dst_stride; |
| 122 } | 101 } |
| 123 } | 102 } |
| 124 | 103 |
| 125 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 104 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
| 126 uint8_t *dst, ptrdiff_t dst_stride, | 105 uint8_t *dst, ptrdiff_t dst_stride, |
| 127 const int16_t *filter_x, int x_step_q4, | 106 const int16_t *filter_x, int x_step_q4, |
| 128 const int16_t *filter_y0, int y_step_q4, | 107 const int16_t *filter_y0, int y_step_q4, |
| 129 int w, int h, int taps) { | 108 int w, int h, int taps) { |
| 130 int x, y, k, sum; | 109 int x, y, k; |
| 131 | 110 |
| 132 const int16_t *filter_y_base = filter_y0; | 111 /* NOTE: This assumes that the filter table is 256-byte aligned. */ |
| 133 | 112 /* TODO(agrange) Modify to make independent of table alignment. */ |
| 134 #if ALIGN_FILTERS_256 | 113 const int16_t *const filter_y_base = |
| 135 filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); | 114 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); |
| 136 #endif | |
| 137 | 115 |
| 138 /* Adjust base pointer address for this source column */ | 116 /* Adjust base pointer address for this source column */ |
| 139 src -= src_stride * (taps / 2 - 1); | 117 src -= src_stride * (taps / 2 - 1); |
| 118 |
| 140 for (x = 0; x < w; ++x) { | 119 for (x = 0; x < w; ++x) { |
| 141 /* Pointer to filter to use */ | |
| 142 const int16_t *filter_y = filter_y0; | |
| 143 | |
| 144 /* Initial phase offset */ | 120 /* Initial phase offset */ |
| 145 int y0_q4 = (filter_y - filter_y_base) / taps; | 121 int y_q4 = (filter_y0 - filter_y_base) / taps; |
| 146 int y_q4 = y0_q4; | |
| 147 | 122 |
| 148 for (y = 0; y < h; ++y) { | 123 for (y = 0; y < h; ++y) { |
| 149 /* Per-pixel src offset */ | 124 /* Per-pixel src offset */ |
| 150 int src_y = (y_q4 - y0_q4) >> 4; | 125 const int src_y = y_q4 >> SUBPEL_BITS; |
| 126 int sum = 0; |
| 151 | 127 |
| 152 for (sum = 0, k = 0; k < taps; ++k) { | 128 /* Pointer to filter to use */ |
| 129 const int16_t *const filter_y = filter_y_base + |
| 130 (y_q4 & SUBPEL_MASK) * taps; |
| 131 |
| 132 for (k = 0; k < taps; ++k) |
| 153 sum += src[(src_y + k) * src_stride] * filter_y[k]; | 133 sum += src[(src_y + k) * src_stride] * filter_y[k]; |
| 154 } | |
| 155 sum += (VP9_FILTER_WEIGHT >> 1); | |
| 156 dst[y * dst_stride] = clip_pixel(sum >> VP9_FILTER_SHIFT); | |
| 157 | 134 |
| 158 /* Adjust source and filter to use for the next pixel */ | 135 dst[y * dst_stride] = |
| 136 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
| 137 |
| 138 /* Move to the next source pixel */ |
| 159 y_q4 += y_step_q4; | 139 y_q4 += y_step_q4; |
| 160 filter_y = filter_y_base + (y_q4 & 0xf) * taps; | |
| 161 } | 140 } |
| 162 ++src; | 141 ++src; |
| 163 ++dst; | 142 ++dst; |
| 164 } | 143 } |
| 165 } | 144 } |
| 166 | 145 |
| 167 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 146 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
| 168 uint8_t *dst, ptrdiff_t dst_stride, | 147 uint8_t *dst, ptrdiff_t dst_stride, |
| 169 const int16_t *filter_x, int x_step_q4, | 148 const int16_t *filter_x, int x_step_q4, |
| 170 const int16_t *filter_y0, int y_step_q4, | 149 const int16_t *filter_y0, int y_step_q4, |
| 171 int w, int h, int taps) { | 150 int w, int h, int taps) { |
| 172 int x, y, k, sum; | 151 int x, y, k; |
| 173 | 152 |
| 174 const int16_t *filter_y_base = filter_y0; | 153 /* NOTE: This assumes that the filter table is 256-byte aligned. */ |
| 175 | 154 /* TODO(agrange) Modify to make independent of table alignment. */ |
| 176 #if ALIGN_FILTERS_256 | 155 const int16_t *const filter_y_base = |
| 177 filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); | 156 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff); |
| 178 #endif | |
| 179 | 157 |
| 180 /* Adjust base pointer address for this source column */ | 158 /* Adjust base pointer address for this source column */ |
| 181 src -= src_stride * (taps / 2 - 1); | 159 src -= src_stride * (taps / 2 - 1); |
| 160 |
| 182 for (x = 0; x < w; ++x) { | 161 for (x = 0; x < w; ++x) { |
| 183 /* Pointer to filter to use */ | |
| 184 const int16_t *filter_y = filter_y0; | |
| 185 | |
| 186 /* Initial phase offset */ | 162 /* Initial phase offset */ |
| 187 int y0_q4 = (filter_y - filter_y_base) / taps; | 163 int y_q4 = (filter_y0 - filter_y_base) / taps; |
| 188 int y_q4 = y0_q4; | |
| 189 | 164 |
| 190 for (y = 0; y < h; ++y) { | 165 for (y = 0; y < h; ++y) { |
| 191 /* Per-pixel src offset */ | 166 /* Per-pixel src offset */ |
| 192 int src_y = (y_q4 - y0_q4) >> 4; | 167 const int src_y = y_q4 >> SUBPEL_BITS; |
| 168 int sum = 0; |
| 193 | 169 |
| 194 for (sum = 0, k = 0; k < taps; ++k) { | 170 /* Pointer to filter to use */ |
| 171 const int16_t *const filter_y = filter_y_base + |
| 172 (y_q4 & SUBPEL_MASK) * taps; |
| 173 |
| 174 for (k = 0; k < taps; ++k) |
| 195 sum += src[(src_y + k) * src_stride] * filter_y[k]; | 175 sum += src[(src_y + k) * src_stride] * filter_y[k]; |
| 196 } | |
| 197 sum += (VP9_FILTER_WEIGHT >> 1); | |
| 198 dst[y * dst_stride] = | |
| 199 (dst[y * dst_stride] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1; | |
| 200 | 176 |
| 201 /* Adjust source and filter to use for the next pixel */ | 177 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + |
| 178 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); |
| 179 |
| 180 /* Move to the next source pixel */ |
| 202 y_q4 += y_step_q4; | 181 y_q4 += y_step_q4; |
| 203 filter_y = filter_y_base + (y_q4 & 0xf) * taps; | |
| 204 } | 182 } |
| 205 ++src; | 183 ++src; |
| 206 ++dst; | 184 ++dst; |
| 207 } | 185 } |
| 208 } | 186 } |
| 209 | 187 |
| 210 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride, | 188 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride, |
| 211 uint8_t *dst, ptrdiff_t dst_stride, | 189 uint8_t *dst, ptrdiff_t dst_stride, |
| 212 const int16_t *filter_x, int x_step_q4, | 190 const int16_t *filter_x, int x_step_q4, |
| 213 const int16_t *filter_y, int y_step_q4, | 191 const int16_t *filter_y, int y_step_q4, |
| 214 int w, int h, int taps) { | 192 int w, int h, int taps) { |
| 215 /* Fixed size intermediate buffer places limits on parameters. | 193 /* Fixed size intermediate buffer places limits on parameters. |
| 216 * Maximum intermediate_height is 135, for y_step_q4 == 32, | 194 * Maximum intermediate_height is 135, for y_step_q4 == 32, |
| 217 * h == 64, taps == 8. | 195 * h == 64, taps == 8. |
| 218 */ | 196 */ |
| 219 uint8_t temp[64 * 135]; | 197 uint8_t temp[64 * 135]; |
| 220 int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1; | 198 int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1; |
| 221 | 199 |
| 222 assert(w <= 64); | 200 assert(w <= 64); |
| 223 assert(h <= 64); | 201 assert(h <= 64); |
| 224 assert(taps <= 8); | 202 assert(taps <= 8); |
| 225 assert(y_step_q4 <= 32); | 203 assert(y_step_q4 <= 32); |
| 226 assert(x_step_q4 <= 32); | 204 assert(x_step_q4 <= 32); |
| 227 | 205 |
| 228 if (intermediate_height < h) | 206 if (intermediate_height < h) |
| 229 intermediate_height = h; | 207 intermediate_height = h; |
| 230 | 208 |
| 231 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, | 209 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64, |
| 232 temp, 64, | 210 filter_x, x_step_q4, filter_y, y_step_q4, w, |
| 233 filter_x, x_step_q4, filter_y, y_step_q4, | 211 intermediate_height, taps); |
| 234 w, intermediate_height, taps); | 212 convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x, |
| 235 convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, | 213 x_step_q4, filter_y, y_step_q4, w, h, taps); |
| 236 filter_x, x_step_q4, filter_y, y_step_q4, | |
| 237 w, h, taps); | |
| 238 } | |
| 239 | |
| 240 static void convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, | |
| 241 uint8_t *dst, ptrdiff_t dst_stride, | |
| 242 const int16_t *filter_x, int x_step_q4, | |
| 243 const int16_t *filter_y, int y_step_q4, | |
| 244 int w, int h, int taps) { | |
| 245 /* Fixed size intermediate buffer places limits on parameters. | |
| 246 * Maximum intermediate_height is 135, for y_step_q4 == 32, | |
| 247 * h == 64, taps == 8. | |
| 248 */ | |
| 249 uint8_t temp[64 * 135]; | |
| 250 int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1; | |
| 251 | |
| 252 assert(w <= 64); | |
| 253 assert(h <= 64); | |
| 254 assert(taps <= 8); | |
| 255 assert(y_step_q4 <= 32); | |
| 256 assert(x_step_q4 <= 32); | |
| 257 | |
| 258 if (intermediate_height < h) | |
| 259 intermediate_height = h; | |
| 260 | |
| 261 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, | |
| 262 temp, 64, | |
| 263 filter_x, x_step_q4, filter_y, y_step_q4, | |
| 264 w, intermediate_height, taps); | |
| 265 convolve_avg_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, | |
| 266 filter_x, x_step_q4, filter_y, y_step_q4, | |
| 267 w, h, taps); | |
| 268 } | 214 } |
| 269 | 215 |
| 270 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 216 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
| 271 uint8_t *dst, ptrdiff_t dst_stride, | 217 uint8_t *dst, ptrdiff_t dst_stride, |
| 272 const int16_t *filter_x, int x_step_q4, | 218 const int16_t *filter_x, int x_step_q4, |
| 273 const int16_t *filter_y, int y_step_q4, | 219 const int16_t *filter_y, int y_step_q4, |
| 274 int w, int h) { | 220 int w, int h) { |
| 275 convolve_horiz_c(src, src_stride, dst, dst_stride, | 221 convolve_horiz_c(src, src_stride, dst, dst_stride, |
| 276 filter_x, x_step_q4, filter_y, y_step_q4, | 222 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); |
| 277 w, h, 8); | |
| 278 } | 223 } |
| 279 | 224 |
| 280 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 225 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
| 281 uint8_t *dst, ptrdiff_t dst_stride, | 226 uint8_t *dst, ptrdiff_t dst_stride, |
| 282 const int16_t *filter_x, int x_step_q4, | 227 const int16_t *filter_x, int x_step_q4, |
| 283 const int16_t *filter_y, int y_step_q4, | 228 const int16_t *filter_y, int y_step_q4, |
| 284 int w, int h) { | 229 int w, int h) { |
| 285 convolve_avg_horiz_c(src, src_stride, dst, dst_stride, | 230 convolve_avg_horiz_c(src, src_stride, dst, dst_stride, |
| 286 filter_x, x_step_q4, filter_y, y_step_q4, | 231 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); |
| 287 w, h, 8); | |
| 288 } | 232 } |
| 289 | 233 |
| 290 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 234 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
| 291 uint8_t *dst, ptrdiff_t dst_stride, | 235 uint8_t *dst, ptrdiff_t dst_stride, |
| 292 const int16_t *filter_x, int x_step_q4, | 236 const int16_t *filter_x, int x_step_q4, |
| 293 const int16_t *filter_y, int y_step_q4, | 237 const int16_t *filter_y, int y_step_q4, |
| 294 int w, int h) { | 238 int w, int h) { |
| 295 convolve_vert_c(src, src_stride, dst, dst_stride, | 239 convolve_vert_c(src, src_stride, dst, dst_stride, |
| 296 filter_x, x_step_q4, filter_y, y_step_q4, | 240 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); |
| 297 w, h, 8); | |
| 298 } | 241 } |
| 299 | 242 |
| 300 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 243 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
| 301 uint8_t *dst, ptrdiff_t dst_stride, | 244 uint8_t *dst, ptrdiff_t dst_stride, |
| 302 const int16_t *filter_x, int x_step_q4, | 245 const int16_t *filter_x, int x_step_q4, |
| 303 const int16_t *filter_y, int y_step_q4, | 246 const int16_t *filter_y, int y_step_q4, |
| 304 int w, int h) { | 247 int w, int h) { |
| 305 convolve_avg_vert_c(src, src_stride, dst, dst_stride, | 248 convolve_avg_vert_c(src, src_stride, dst, dst_stride, |
| 306 filter_x, x_step_q4, filter_y, y_step_q4, | 249 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); |
| 307 w, h, 8); | |
| 308 } | 250 } |
| 309 | 251 |
| 310 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, | 252 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, |
| 311 uint8_t *dst, ptrdiff_t dst_stride, | 253 uint8_t *dst, ptrdiff_t dst_stride, |
| 312 const int16_t *filter_x, int x_step_q4, | 254 const int16_t *filter_x, int x_step_q4, |
| 313 const int16_t *filter_y, int y_step_q4, | 255 const int16_t *filter_y, int y_step_q4, |
| 314 int w, int h) { | 256 int w, int h) { |
| 315 convolve_c(src, src_stride, dst, dst_stride, | 257 convolve_c(src, src_stride, dst, dst_stride, |
| 316 filter_x, x_step_q4, filter_y, y_step_q4, | 258 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8); |
| 317 w, h, 8); | |
| 318 } | 259 } |
| 319 | 260 |
| 320 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, | 261 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, |
| 321 uint8_t *dst, ptrdiff_t dst_stride, | 262 uint8_t *dst, ptrdiff_t dst_stride, |
| 322 const int16_t *filter_x, int x_step_q4, | 263 const int16_t *filter_x, int x_step_q4, |
| 323 const int16_t *filter_y, int y_step_q4, | 264 const int16_t *filter_y, int y_step_q4, |
| 324 int w, int h) { | 265 int w, int h) { |
| 325 /* Fixed size intermediate buffer places limits on parameters. */ | 266 /* Fixed size intermediate buffer places limits on parameters. */ |
| 326 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64); | 267 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64); |
| 327 assert(w <= 64); | 268 assert(w <= 64); |
| 328 assert(h <= 64); | 269 assert(h <= 64); |
| 329 | 270 |
| 330 vp9_convolve8(src, src_stride, | 271 vp9_convolve8(src, src_stride, temp, 64, |
| 331 temp, 64, | 272 filter_x, x_step_q4, filter_y, y_step_q4, w, h); |
| 332 filter_x, x_step_q4, | 273 vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h); |
| 333 filter_y, y_step_q4, | |
| 334 w, h); | |
| 335 vp9_convolve_avg(temp, 64, | |
| 336 dst, dst_stride, | |
| 337 NULL, 0, /* These unused parameter should be removed! */ | |
| 338 NULL, 0, /* These unused parameter should be removed! */ | |
| 339 w, h); | |
| 340 } | 274 } |
| 341 | 275 |
| 342 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, | 276 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, |
| 343 uint8_t *dst, ptrdiff_t dst_stride, | 277 uint8_t *dst, ptrdiff_t dst_stride, |
| 344 const int16_t *filter_x, int filter_x_stride, | 278 const int16_t *filter_x, int filter_x_stride, |
| 345 const int16_t *filter_y, int filter_y_stride, | 279 const int16_t *filter_y, int filter_y_stride, |
| 346 int w, int h) { | 280 int w, int h) { |
| 347 int r; | 281 int r; |
| 348 | 282 |
| 349 for (r = h; r > 0; --r) { | 283 for (r = h; r > 0; --r) { |
| 350 memcpy(dst, src, w); | 284 memcpy(dst, src, w); |
| 351 src += src_stride; | 285 src += src_stride; |
| 352 dst += dst_stride; | 286 dst += dst_stride; |
| 353 } | 287 } |
| 354 } | 288 } |
| 355 | 289 |
| 356 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, | 290 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, |
| 357 uint8_t *dst, ptrdiff_t dst_stride, | 291 uint8_t *dst, ptrdiff_t dst_stride, |
| 358 const int16_t *filter_x, int filter_x_stride, | 292 const int16_t *filter_x, int filter_x_stride, |
| 359 const int16_t *filter_y, int filter_y_stride, | 293 const int16_t *filter_y, int filter_y_stride, |
| 360 int w, int h) { | 294 int w, int h) { |
| 361 int x, y; | 295 int x, y; |
| 362 | 296 |
| 363 for (y = 0; y < h; ++y) { | 297 for (y = 0; y < h; ++y) { |
| 364 for (x = 0; x < w; ++x) { | 298 for (x = 0; x < w; ++x) |
| 365 dst[x] = (dst[x] + src[x] + 1) >> 1; | 299 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); |
| 366 } | 300 |
| 367 src += src_stride; | 301 src += src_stride; |
| 368 dst += dst_stride; | 302 dst += dst_stride; |
| 369 } | 303 } |
| 370 } | 304 } |
| OLD | NEW |