OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 #ifndef VP9_COMMON_X86_CONVOLVE_H_ |
| 11 #define VP9_COMMON_X86_CONVOLVE_H_ |
| 12 |
| 13 #include <assert.h> |
| 14 |
| 15 #include "./vpx_config.h" |
| 16 #include "vpx/vpx_integer.h" |
| 17 #include "vpx_ports/mem.h" |
| 18 |
| 19 typedef void filter8_1dfunction ( |
| 20 const uint8_t *src_ptr, |
| 21 ptrdiff_t src_pitch, |
| 22 uint8_t *output_ptr, |
| 23 ptrdiff_t out_pitch, |
| 24 uint32_t output_height, |
| 25 const int16_t *filter |
| 26 ); |
| 27 |
| 28 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ |
| 29 void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ |
| 30 uint8_t *dst, ptrdiff_t dst_stride, \ |
| 31 const int16_t *filter_x, int x_step_q4, \ |
| 32 const int16_t *filter_y, int y_step_q4, \ |
| 33 int w, int h) { \ |
| 34 if (step_q4 == 16 && filter[3] != 128) { \ |
| 35 if (filter[0] || filter[1] || filter[2]) { \ |
| 36 while (w >= 16) { \ |
| 37 vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \ |
| 38 src_stride, \ |
| 39 dst, \ |
| 40 dst_stride, \ |
| 41 h, \ |
| 42 filter); \ |
| 43 src += 16; \ |
| 44 dst += 16; \ |
| 45 w -= 16; \ |
| 46 } \ |
| 47 while (w >= 8) { \ |
| 48 vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \ |
| 49 src_stride, \ |
| 50 dst, \ |
| 51 dst_stride, \ |
| 52 h, \ |
| 53 filter); \ |
| 54 src += 8; \ |
| 55 dst += 8; \ |
| 56 w -= 8; \ |
| 57 } \ |
| 58 while (w >= 4) { \ |
| 59 vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \ |
| 60 src_stride, \ |
| 61 dst, \ |
| 62 dst_stride, \ |
| 63 h, \ |
| 64 filter); \ |
| 65 src += 4; \ |
| 66 dst += 4; \ |
| 67 w -= 4; \ |
| 68 } \ |
| 69 } else { \ |
| 70 while (w >= 16) { \ |
| 71 vp9_filter_block1d16_##dir##2_##avg##opt(src, \ |
| 72 src_stride, \ |
| 73 dst, \ |
| 74 dst_stride, \ |
| 75 h, \ |
| 76 filter); \ |
| 77 src += 16; \ |
| 78 dst += 16; \ |
| 79 w -= 16; \ |
| 80 } \ |
| 81 while (w >= 8) { \ |
| 82 vp9_filter_block1d8_##dir##2_##avg##opt(src, \ |
| 83 src_stride, \ |
| 84 dst, \ |
| 85 dst_stride, \ |
| 86 h, \ |
| 87 filter); \ |
| 88 src += 8; \ |
| 89 dst += 8; \ |
| 90 w -= 8; \ |
| 91 } \ |
| 92 while (w >= 4) { \ |
| 93 vp9_filter_block1d4_##dir##2_##avg##opt(src, \ |
| 94 src_stride, \ |
| 95 dst, \ |
| 96 dst_stride, \ |
| 97 h, \ |
| 98 filter); \ |
| 99 src += 4; \ |
| 100 dst += 4; \ |
| 101 w -= 4; \ |
| 102 } \ |
| 103 } \ |
| 104 } \ |
| 105 if (w) { \ |
| 106 vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ |
| 107 filter_x, x_step_q4, filter_y, y_step_q4, \ |
| 108 w, h); \ |
| 109 } \ |
| 110 } |
| 111 |
| 112 #define FUN_CONV_2D(avg, opt) \ |
| 113 void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ |
| 114 uint8_t *dst, ptrdiff_t dst_stride, \ |
| 115 const int16_t *filter_x, int x_step_q4, \ |
| 116 const int16_t *filter_y, int y_step_q4, \ |
| 117 int w, int h) { \ |
| 118 assert(w <= 64); \ |
| 119 assert(h <= 64); \ |
| 120 if (x_step_q4 == 16 && y_step_q4 == 16) { \ |
| 121 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ |
| 122 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ |
| 123 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ |
| 124 vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ |
| 125 filter_x, x_step_q4, filter_y, y_step_q4, \ |
| 126 w, h + 7); \ |
| 127 vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ |
| 128 filter_x, x_step_q4, filter_y, \ |
| 129 y_step_q4, w, h); \ |
| 130 } else { \ |
| 131 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ |
| 132 vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ |
| 133 filter_x, x_step_q4, filter_y, y_step_q4, \ |
| 134 w, h + 1); \ |
| 135 vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ |
| 136 filter_x, x_step_q4, filter_y, \ |
| 137 y_step_q4, w, h); \ |
| 138 } \ |
| 139 } else { \ |
| 140 vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ |
| 141 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ |
| 142 } \ |
| 143 } |
| 144 |
| 145 #if CONFIG_VP9_HIGHBITDEPTH |
| 146 |
| 147 typedef void highbd_filter8_1dfunction ( |
| 148 const uint16_t *src_ptr, |
| 149 const ptrdiff_t src_pitch, |
| 150 uint16_t *output_ptr, |
| 151 ptrdiff_t out_pitch, |
| 152 unsigned int output_height, |
| 153 const int16_t *filter, |
| 154 int bd |
| 155 ); |
| 156 |
| 157 #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ |
| 158 void vp9_highbd_convolve8_##name##_##opt(const uint8_t *src8, \ |
| 159 ptrdiff_t src_stride, \ |
| 160 uint8_t *dst8, \ |
| 161 ptrdiff_t dst_stride, \ |
| 162 const int16_t *filter_x, \ |
| 163 int x_step_q4, \ |
| 164 const int16_t *filter_y, \ |
| 165 int y_step_q4, \ |
| 166 int w, int h, int bd) { \ |
| 167 if (step_q4 == 16 && filter[3] != 128) { \ |
| 168 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ |
| 169 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ |
| 170 if (filter[0] || filter[1] || filter[2]) { \ |
| 171 while (w >= 16) { \ |
| 172 vp9_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \ |
| 173 src_stride, \ |
| 174 dst, \ |
| 175 dst_stride, \ |
| 176 h, \ |
| 177 filter, \ |
| 178 bd); \ |
| 179 src += 16; \ |
| 180 dst += 16; \ |
| 181 w -= 16; \ |
| 182 } \ |
| 183 while (w >= 8) { \ |
| 184 vp9_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \ |
| 185 src_stride, \ |
| 186 dst, \ |
| 187 dst_stride, \ |
| 188 h, \ |
| 189 filter, \ |
| 190 bd); \ |
| 191 src += 8; \ |
| 192 dst += 8; \ |
| 193 w -= 8; \ |
| 194 } \ |
| 195 while (w >= 4) { \ |
| 196 vp9_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \ |
| 197 src_stride, \ |
| 198 dst, \ |
| 199 dst_stride, \ |
| 200 h, \ |
| 201 filter, \ |
| 202 bd); \ |
| 203 src += 4; \ |
| 204 dst += 4; \ |
| 205 w -= 4; \ |
| 206 } \ |
| 207 } else { \ |
| 208 while (w >= 16) { \ |
| 209 vp9_highbd_filter_block1d16_##dir##2_##avg##opt(src, \ |
| 210 src_stride, \ |
| 211 dst, \ |
| 212 dst_stride, \ |
| 213 h, \ |
| 214 filter, \ |
| 215 bd); \ |
| 216 src += 16; \ |
| 217 dst += 16; \ |
| 218 w -= 16; \ |
| 219 } \ |
| 220 while (w >= 8) { \ |
| 221 vp9_highbd_filter_block1d8_##dir##2_##avg##opt(src, \ |
| 222 src_stride, \ |
| 223 dst, \ |
| 224 dst_stride, \ |
| 225 h, \ |
| 226 filter, \ |
| 227 bd); \ |
| 228 src += 8; \ |
| 229 dst += 8; \ |
| 230 w -= 8; \ |
| 231 } \ |
| 232 while (w >= 4) { \ |
| 233 vp9_highbd_filter_block1d4_##dir##2_##avg##opt(src, \ |
| 234 src_stride, \ |
| 235 dst, \ |
| 236 dst_stride, \ |
| 237 h, \ |
| 238 filter, \ |
| 239 bd); \ |
| 240 src += 4; \ |
| 241 dst += 4; \ |
| 242 w -= 4; \ |
| 243 } \ |
| 244 } \ |
| 245 } \ |
| 246 if (w) { \ |
| 247 vp9_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ |
| 248 filter_x, x_step_q4, filter_y, y_step_q4, \ |
| 249 w, h, bd); \ |
| 250 } \ |
| 251 } |
| 252 |
| 253 #define HIGH_FUN_CONV_2D(avg, opt) \ |
| 254 void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ |
| 255 uint8_t *dst, ptrdiff_t dst_stride, \ |
| 256 const int16_t *filter_x, int x_step_q4, \ |
| 257 const int16_t *filter_y, int y_step_q4, \ |
| 258 int w, int h, int bd) { \ |
| 259 assert(w <= 64); \ |
| 260 assert(h <= 64); \ |
| 261 if (x_step_q4 == 16 && y_step_q4 == 16) { \ |
| 262 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ |
| 263 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ |
| 264 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ |
| 265 vp9_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ |
| 266 CONVERT_TO_BYTEPTR(fdata2), 64, \ |
| 267 filter_x, x_step_q4, \ |
| 268 filter_y, y_step_q4, \ |
| 269 w, h + 7, bd); \ |
| 270 vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ |
| 271 64, dst, dst_stride, \ |
| 272 filter_x, x_step_q4, \ |
| 273 filter_y, y_step_q4, \ |
| 274 w, h, bd); \ |
| 275 } else { \ |
| 276 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ |
| 277 vp9_highbd_convolve8_horiz_##opt(src, src_stride, \ |
| 278 CONVERT_TO_BYTEPTR(fdata2), 64, \ |
| 279 filter_x, x_step_q4, \ |
| 280 filter_y, y_step_q4, \ |
| 281 w, h + 1, bd); \ |
| 282 vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ |
| 283 dst, dst_stride, \ |
| 284 filter_x, x_step_q4, \ |
| 285 filter_y, y_step_q4, \ |
| 286 w, h, bd); \ |
| 287 } \ |
| 288 } else { \ |
| 289 vp9_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ |
| 290 filter_x, x_step_q4, filter_y, y_step_q4, w, \ |
| 291 h, bd); \ |
| 292 } \ |
| 293 } |
| 294 #endif // CONFIG_VP9_HIGHBITDEPTH |
| 295 |
| 296 #endif // VP9_COMMON_X86_CONVOLVE_H_ |
OLD | NEW |