| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <assert.h> | 11 #include <assert.h> |
| 12 | 12 |
| 13 #include "./vpx_config.h" | 13 #include "./vpx_config.h" |
| 14 #include "./vp9_rtcd.h" | 14 #include "./vp9_rtcd.h" |
| 15 #include "vp9/common/vp9_common.h" | 15 #include "vp9/common/vp9_common.h" |
| 16 #include "vp9/common/vp9_convolve.h" | 16 #include "vp9/common/vp9_convolve.h" |
| 17 #include "vp9/common/vp9_filter.h" | 17 #include "vp9/common/vp9_filter.h" |
| 18 #include "vpx/vpx_integer.h" | 18 #include "vpx/vpx_integer.h" |
| 19 #include "vpx_ports/mem.h" | 19 #include "vpx_ports/mem.h" |
| 20 | 20 |
| 21 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, | 21 static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, |
| 22 uint8_t *dst, ptrdiff_t dst_stride, | 22 uint8_t *dst, ptrdiff_t dst_stride, |
| 23 const interp_kernel *x_filters, | 23 const InterpKernel *x_filters, |
| 24 int x0_q4, int x_step_q4, int w, int h) { | 24 int x0_q4, int x_step_q4, int w, int h) { |
| 25 int x, y; | 25 int x, y; |
| 26 src -= SUBPEL_TAPS / 2 - 1; | 26 src -= SUBPEL_TAPS / 2 - 1; |
| 27 for (y = 0; y < h; ++y) { | 27 for (y = 0; y < h; ++y) { |
| 28 int x_q4 = x0_q4; | 28 int x_q4 = x0_q4; |
| 29 for (x = 0; x < w; ++x) { | 29 for (x = 0; x < w; ++x) { |
| 30 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; | 30 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
| 31 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; | 31 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
| 32 int k, sum = 0; | 32 int k, sum = 0; |
| 33 for (k = 0; k < SUBPEL_TAPS; ++k) | 33 for (k = 0; k < SUBPEL_TAPS; ++k) |
| 34 sum += src_x[k] * x_filter[k]; | 34 sum += src_x[k] * x_filter[k]; |
| 35 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); | 35 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
| 36 x_q4 += x_step_q4; | 36 x_q4 += x_step_q4; |
| 37 } | 37 } |
| 38 src += src_stride; | 38 src += src_stride; |
| 39 dst += dst_stride; | 39 dst += dst_stride; |
| 40 } | 40 } |
| 41 } | 41 } |
| 42 | 42 |
| 43 static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, | 43 static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, |
| 44 uint8_t *dst, ptrdiff_t dst_stride, | 44 uint8_t *dst, ptrdiff_t dst_stride, |
| 45 const interp_kernel *x_filters, | 45 const InterpKernel *x_filters, |
| 46 int x0_q4, int x_step_q4, int w, int h) { | 46 int x0_q4, int x_step_q4, int w, int h) { |
| 47 int x, y; | 47 int x, y; |
| 48 src -= SUBPEL_TAPS / 2 - 1; | 48 src -= SUBPEL_TAPS / 2 - 1; |
| 49 for (y = 0; y < h; ++y) { | 49 for (y = 0; y < h; ++y) { |
| 50 int x_q4 = x0_q4; | 50 int x_q4 = x0_q4; |
| 51 for (x = 0; x < w; ++x) { | 51 for (x = 0; x < w; ++x) { |
| 52 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; | 52 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
| 53 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; | 53 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
| 54 int k, sum = 0; | 54 int k, sum = 0; |
| 55 for (k = 0; k < SUBPEL_TAPS; ++k) | 55 for (k = 0; k < SUBPEL_TAPS; ++k) |
| 56 sum += src_x[k] * x_filter[k]; | 56 sum += src_x[k] * x_filter[k]; |
| 57 dst[x] = ROUND_POWER_OF_TWO(dst[x] + | 57 dst[x] = ROUND_POWER_OF_TWO(dst[x] + |
| 58 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); | 58 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); |
| 59 x_q4 += x_step_q4; | 59 x_q4 += x_step_q4; |
| 60 } | 60 } |
| 61 src += src_stride; | 61 src += src_stride; |
| 62 dst += dst_stride; | 62 dst += dst_stride; |
| 63 } | 63 } |
| 64 } | 64 } |
| 65 | 65 |
| 66 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, | 66 static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, |
| 67 uint8_t *dst, ptrdiff_t dst_stride, | 67 uint8_t *dst, ptrdiff_t dst_stride, |
| 68 const interp_kernel *y_filters, | 68 const InterpKernel *y_filters, |
| 69 int y0_q4, int y_step_q4, int w, int h) { | 69 int y0_q4, int y_step_q4, int w, int h) { |
| 70 int x, y; | 70 int x, y; |
| 71 src -= src_stride * (SUBPEL_TAPS / 2 - 1); | 71 src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
| 72 | 72 |
| 73 for (x = 0; x < w; ++x) { | 73 for (x = 0; x < w; ++x) { |
| 74 int y_q4 = y0_q4; | 74 int y_q4 = y0_q4; |
| 75 for (y = 0; y < h; ++y) { | 75 for (y = 0; y < h; ++y) { |
| 76 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; | 76 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
| 77 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; | 77 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
| 78 int k, sum = 0; | 78 int k, sum = 0; |
| 79 for (k = 0; k < SUBPEL_TAPS; ++k) | 79 for (k = 0; k < SUBPEL_TAPS; ++k) |
| 80 sum += src_y[k * src_stride] * y_filter[k]; | 80 sum += src_y[k * src_stride] * y_filter[k]; |
| 81 dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); | 81 dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
| 82 y_q4 += y_step_q4; | 82 y_q4 += y_step_q4; |
| 83 } | 83 } |
| 84 ++src; | 84 ++src; |
| 85 ++dst; | 85 ++dst; |
| 86 } | 86 } |
| 87 } | 87 } |
| 88 | 88 |
| 89 static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, | 89 static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, |
| 90 uint8_t *dst, ptrdiff_t dst_stride, | 90 uint8_t *dst, ptrdiff_t dst_stride, |
| 91 const interp_kernel *y_filters, | 91 const InterpKernel *y_filters, |
| 92 int y0_q4, int y_step_q4, int w, int h) { | 92 int y0_q4, int y_step_q4, int w, int h) { |
| 93 int x, y; | 93 int x, y; |
| 94 src -= src_stride * (SUBPEL_TAPS / 2 - 1); | 94 src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
| 95 | 95 |
| 96 for (x = 0; x < w; ++x) { | 96 for (x = 0; x < w; ++x) { |
| 97 int y_q4 = y0_q4; | 97 int y_q4 = y0_q4; |
| 98 for (y = 0; y < h; ++y) { | 98 for (y = 0; y < h; ++y) { |
| 99 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; | 99 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
| 100 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; | 100 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
| 101 int k, sum = 0; | 101 int k, sum = 0; |
| 102 for (k = 0; k < SUBPEL_TAPS; ++k) | 102 for (k = 0; k < SUBPEL_TAPS; ++k) |
| 103 sum += src_y[k * src_stride] * y_filter[k]; | 103 sum += src_y[k * src_stride] * y_filter[k]; |
| 104 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + | 104 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + |
| 105 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); | 105 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); |
| 106 y_q4 += y_step_q4; | 106 y_q4 += y_step_q4; |
| 107 } | 107 } |
| 108 ++src; | 108 ++src; |
| 109 ++dst; | 109 ++dst; |
| 110 } | 110 } |
| 111 } | 111 } |
| 112 | 112 |
| 113 static void convolve(const uint8_t *src, ptrdiff_t src_stride, | 113 static void convolve(const uint8_t *src, ptrdiff_t src_stride, |
| 114 uint8_t *dst, ptrdiff_t dst_stride, | 114 uint8_t *dst, ptrdiff_t dst_stride, |
| 115 const interp_kernel *const x_filters, | 115 const InterpKernel *const x_filters, |
| 116 int x0_q4, int x_step_q4, | 116 int x0_q4, int x_step_q4, |
| 117 const interp_kernel *const y_filters, | 117 const InterpKernel *const y_filters, |
| 118 int y0_q4, int y_step_q4, | 118 int y0_q4, int y_step_q4, |
| 119 int w, int h) { | 119 int w, int h) { |
| 120 // Fixed size intermediate buffer places limits on parameters. | 120 // Fixed size intermediate buffer places limits on parameters. |
| 121 // Maximum intermediate_height is 324, for y_step_q4 == 80, | 121 // Maximum intermediate_height is 324, for y_step_q4 == 80, |
| 122 // h == 64, taps == 8. | 122 // h == 64, taps == 8. |
| 123 // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc | 123 // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc |
| 124 uint8_t temp[64 * 324]; | 124 uint8_t temp[64 * 324]; |
| 125 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; | 125 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; |
| 126 | 126 |
| 127 assert(w <= 64); | 127 assert(w <= 64); |
| 128 assert(h <= 64); | 128 assert(h <= 64); |
| 129 assert(y_step_q4 <= 80); | 129 assert(y_step_q4 <= 80); |
| 130 assert(x_step_q4 <= 80); | 130 assert(x_step_q4 <= 80); |
| 131 | 131 |
| 132 if (intermediate_height < h) | 132 if (intermediate_height < h) |
| 133 intermediate_height = h; | 133 intermediate_height = h; |
| 134 | 134 |
| 135 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, | 135 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, |
| 136 x_filters, x0_q4, x_step_q4, w, intermediate_height); | 136 x_filters, x0_q4, x_step_q4, w, intermediate_height); |
| 137 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, | 137 convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, |
| 138 y_filters, y0_q4, y_step_q4, w, h); | 138 y_filters, y0_q4, y_step_q4, w, h); |
| 139 } | 139 } |
| 140 | 140 |
| 141 static const interp_kernel *get_filter_base(const int16_t *filter) { | 141 static const InterpKernel *get_filter_base(const int16_t *filter) { |
| 142 // NOTE: This assumes that the filter table is 256-byte aligned. | 142 // NOTE: This assumes that the filter table is 256-byte aligned. |
| 143 // TODO(agrange) Modify to make independent of table alignment. | 143 // TODO(agrange) Modify to make independent of table alignment. |
| 144 return (const interp_kernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); | 144 return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); |
| 145 } | 145 } |
| 146 | 146 |
| 147 static int get_filter_offset(const int16_t *f, const interp_kernel *base) { | 147 static int get_filter_offset(const int16_t *f, const InterpKernel *base) { |
| 148 return (const interp_kernel *)(intptr_t)f - base; | 148 return (int)((const InterpKernel *)(intptr_t)f - base); |
| 149 } | 149 } |
| 150 | 150 |
| 151 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 151 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
| 152 uint8_t *dst, ptrdiff_t dst_stride, | 152 uint8_t *dst, ptrdiff_t dst_stride, |
| 153 const int16_t *filter_x, int x_step_q4, | 153 const int16_t *filter_x, int x_step_q4, |
| 154 const int16_t *filter_y, int y_step_q4, | 154 const int16_t *filter_y, int y_step_q4, |
| 155 int w, int h) { | 155 int w, int h) { |
| 156 const interp_kernel *const filters_x = get_filter_base(filter_x); | 156 const InterpKernel *const filters_x = get_filter_base(filter_x); |
| 157 const int x0_q4 = get_filter_offset(filter_x, filters_x); | 157 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
| 158 | 158 |
| 159 convolve_horiz(src, src_stride, dst, dst_stride, filters_x, | 159 convolve_horiz(src, src_stride, dst, dst_stride, filters_x, |
| 160 x0_q4, x_step_q4, w, h); | 160 x0_q4, x_step_q4, w, h); |
| 161 } | 161 } |
| 162 | 162 |
| 163 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, | 163 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
| 164 uint8_t *dst, ptrdiff_t dst_stride, | 164 uint8_t *dst, ptrdiff_t dst_stride, |
| 165 const int16_t *filter_x, int x_step_q4, | 165 const int16_t *filter_x, int x_step_q4, |
| 166 const int16_t *filter_y, int y_step_q4, | 166 const int16_t *filter_y, int y_step_q4, |
| 167 int w, int h) { | 167 int w, int h) { |
| 168 const interp_kernel *const filters_x = get_filter_base(filter_x); | 168 const InterpKernel *const filters_x = get_filter_base(filter_x); |
| 169 const int x0_q4 = get_filter_offset(filter_x, filters_x); | 169 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
| 170 | 170 |
| 171 convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, | 171 convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, |
| 172 x0_q4, x_step_q4, w, h); | 172 x0_q4, x_step_q4, w, h); |
| 173 } | 173 } |
| 174 | 174 |
| 175 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 175 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
| 176 uint8_t *dst, ptrdiff_t dst_stride, | 176 uint8_t *dst, ptrdiff_t dst_stride, |
| 177 const int16_t *filter_x, int x_step_q4, | 177 const int16_t *filter_x, int x_step_q4, |
| 178 const int16_t *filter_y, int y_step_q4, | 178 const int16_t *filter_y, int y_step_q4, |
| 179 int w, int h) { | 179 int w, int h) { |
| 180 const interp_kernel *const filters_y = get_filter_base(filter_y); | 180 const InterpKernel *const filters_y = get_filter_base(filter_y); |
| 181 const int y0_q4 = get_filter_offset(filter_y, filters_y); | 181 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
| 182 convolve_vert(src, src_stride, dst, dst_stride, filters_y, | 182 convolve_vert(src, src_stride, dst, dst_stride, filters_y, |
| 183 y0_q4, y_step_q4, w, h); | 183 y0_q4, y_step_q4, w, h); |
| 184 } | 184 } |
| 185 | 185 |
| 186 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, | 186 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
| 187 uint8_t *dst, ptrdiff_t dst_stride, | 187 uint8_t *dst, ptrdiff_t dst_stride, |
| 188 const int16_t *filter_x, int x_step_q4, | 188 const int16_t *filter_x, int x_step_q4, |
| 189 const int16_t *filter_y, int y_step_q4, | 189 const int16_t *filter_y, int y_step_q4, |
| 190 int w, int h) { | 190 int w, int h) { |
| 191 const interp_kernel *const filters_y = get_filter_base(filter_y); | 191 const InterpKernel *const filters_y = get_filter_base(filter_y); |
| 192 const int y0_q4 = get_filter_offset(filter_y, filters_y); | 192 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
| 193 convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, | 193 convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, |
| 194 y0_q4, y_step_q4, w, h); | 194 y0_q4, y_step_q4, w, h); |
| 195 } | 195 } |
| 196 | 196 |
| 197 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, | 197 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, |
| 198 uint8_t *dst, ptrdiff_t dst_stride, | 198 uint8_t *dst, ptrdiff_t dst_stride, |
| 199 const int16_t *filter_x, int x_step_q4, | 199 const int16_t *filter_x, int x_step_q4, |
| 200 const int16_t *filter_y, int y_step_q4, | 200 const int16_t *filter_y, int y_step_q4, |
| 201 int w, int h) { | 201 int w, int h) { |
| 202 const interp_kernel *const filters_x = get_filter_base(filter_x); | 202 const InterpKernel *const filters_x = get_filter_base(filter_x); |
| 203 const int x0_q4 = get_filter_offset(filter_x, filters_x); | 203 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
| 204 | 204 |
| 205 const interp_kernel *const filters_y = get_filter_base(filter_y); | 205 const InterpKernel *const filters_y = get_filter_base(filter_y); |
| 206 const int y0_q4 = get_filter_offset(filter_y, filters_y); | 206 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
| 207 | 207 |
| 208 convolve(src, src_stride, dst, dst_stride, | 208 convolve(src, src_stride, dst, dst_stride, |
| 209 filters_x, x0_q4, x_step_q4, | 209 filters_x, x0_q4, x_step_q4, |
| 210 filters_y, y0_q4, y_step_q4, w, h); | 210 filters_y, y0_q4, y_step_q4, w, h); |
| 211 } | 211 } |
| 212 | 212 |
| 213 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, | 213 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, |
| 214 uint8_t *dst, ptrdiff_t dst_stride, | 214 uint8_t *dst, ptrdiff_t dst_stride, |
| 215 const int16_t *filter_x, int x_step_q4, | 215 const int16_t *filter_x, int x_step_q4, |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 247 int x, y; | 247 int x, y; |
| 248 | 248 |
| 249 for (y = 0; y < h; ++y) { | 249 for (y = 0; y < h; ++y) { |
| 250 for (x = 0; x < w; ++x) | 250 for (x = 0; x < w; ++x) |
| 251 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); | 251 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); |
| 252 | 252 |
| 253 src += src_stride; | 253 src += src_stride; |
| 254 dst += dst_stride; | 254 dst += dst_stride; |
| 255 } | 255 } |
| 256 } | 256 } |
| OLD | NEW |