source/libvpx/vp9/common/x86/convolve.h - Issue 1162573005: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/x86/convolve.h

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.

	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 */

	10 #ifndef VP9_COMMON_X86_CONVOLVE_H_

	11 #define VP9_COMMON_X86_CONVOLVE_H_

	12

	13 #include <assert.h>

	14

	15 #include "./vpx_config.h"

	16 #include "vpx/vpx_integer.h"

	17 #include "vpx_ports/mem.h"

	18

	19 typedef void filter8_1dfunction (

	20 const uint8_t *src_ptr,

	21 ptrdiff_t src_pitch,

	22 uint8_t *output_ptr,

	23 ptrdiff_t out_pitch,

	24 uint32_t output_height,

	25 const int16_t *filter

	26 );

	27

	28 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \

	29 void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \

	30 uint8_t *dst, ptrdiff_t dst_stride, \

	31 const int16_t *filter_x, int x_step_q4, \

	32 const int16_t *filter_y, int y_step_q4, \

	33 int w, int h) { \

	34 if (step_q4 == 16 && filter[3] != 128) { \

	35 if (filter[0] \|\| filter[1] \|\| filter[2]) { \

	36 while (w >= 16) { \

	37 vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \

	38 src_stride, \

	39 dst, \

	40 dst_stride, \

	41 h, \

	42 filter); \

	43 src += 16; \

	44 dst += 16; \

	45 w -= 16; \

	46 } \

	47 while (w >= 8) { \

	48 vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \

	49 src_stride, \

	50 dst, \

	51 dst_stride, \

	52 h, \

	53 filter); \

	54 src += 8; \

	55 dst += 8; \

	56 w -= 8; \

	57 } \

	58 while (w >= 4) { \

	59 vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \

	60 src_stride, \

	61 dst, \

	62 dst_stride, \

	63 h, \

	64 filter); \

	65 src += 4; \

	66 dst += 4; \

	67 w -= 4; \

	68 } \

	69 } else { \

	70 while (w >= 16) { \

	71 vp9_filter_block1d16_##dir##2_##avg##opt(src, \

	72 src_stride, \

	73 dst, \

	74 dst_stride, \

	75 h, \

	76 filter); \

	77 src += 16; \

	78 dst += 16; \

	79 w -= 16; \

	80 } \

	81 while (w >= 8) { \

	82 vp9_filter_block1d8_##dir##2_##avg##opt(src, \

	83 src_stride, \

	84 dst, \

	85 dst_stride, \

	86 h, \

	87 filter); \

	88 src += 8; \

	89 dst += 8; \

	90 w -= 8; \

	91 } \

	92 while (w >= 4) { \

	93 vp9_filter_block1d4_##dir##2_##avg##opt(src, \

	94 src_stride, \

	95 dst, \

	96 dst_stride, \

	97 h, \

	98 filter); \

	99 src += 4; \

	100 dst += 4; \

	101 w -= 4; \

	102 } \

	103 } \

	104 } \

	105 if (w) { \

	106 vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \

	107 filter_x, x_step_q4, filter_y, y_step_q4, \

	108 w, h); \

	109 } \

	110 }

	111

	112 #define FUN_CONV_2D(avg, opt) \

	113 void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \

	114 uint8_t *dst, ptrdiff_t dst_stride, \

	115 const int16_t *filter_x, int x_step_q4, \

	116 const int16_t *filter_y, int y_step_q4, \

	117 int w, int h) { \

	118 assert(w <= 64); \

	119 assert(h <= 64); \

	120 if (x_step_q4 == 16 && y_step_q4 == 16) { \

	121 if (filter_x[0] \|\| filter_x[1] \|\| filter_x[2] \|\| filter_x[3] == 128 \|\| \

	122 filter_y[0] \|\| filter_y[1] \|\| filter_y[2] \|\| filter_y[3] == 128) { \

	123 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \

	124 vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \

	125 filter_x, x_step_q4, filter_y, y_step_q4, \

	126 w, h + 7); \

	127 vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \

	128 filter_x, x_step_q4, filter_y, \

	129 y_step_q4, w, h); \

	130 } else { \

	131 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \

	132 vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \

	133 filter_x, x_step_q4, filter_y, y_step_q4, \

	134 w, h + 1); \

	135 vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \

	136 filter_x, x_step_q4, filter_y, \

	137 y_step_q4, w, h); \

	138 } \

	139 } else { \

	140 vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \

	141 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \

	142 } \

	143 }

	144

	145 #if CONFIG_VP9_HIGHBITDEPTH

	146

	147 typedef void highbd_filter8_1dfunction (

	148 const uint16_t *src_ptr,

	149 const ptrdiff_t src_pitch,

	150 uint16_t *output_ptr,

	151 ptrdiff_t out_pitch,

	152 unsigned int output_height,

	153 const int16_t *filter,

	154 int bd

	155 );

	156

	157 #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \

	158 void vp9_highbd_convolve8_##name##_##opt(const uint8_t *src8, \

	159 ptrdiff_t src_stride, \

	160 uint8_t *dst8, \

	161 ptrdiff_t dst_stride, \

	162 const int16_t *filter_x, \

	163 int x_step_q4, \

	164 const int16_t *filter_y, \

	165 int y_step_q4, \

	166 int w, int h, int bd) { \

	167 if (step_q4 == 16 && filter[3] != 128) { \

	168 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \

	169 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \

	170 if (filter[0] \|\| filter[1] \|\| filter[2]) { \

	171 while (w >= 16) { \

	172 vp9_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \

	173 src_stride, \

	174 dst, \

	175 dst_stride, \

	176 h, \

	177 filter, \

	178 bd); \

	179 src += 16; \

	180 dst += 16; \

	181 w -= 16; \

	182 } \

	183 while (w >= 8) { \

	184 vp9_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \

	185 src_stride, \

	186 dst, \

	187 dst_stride, \

	188 h, \

	189 filter, \

	190 bd); \

	191 src += 8; \

	192 dst += 8; \

	193 w -= 8; \

	194 } \

	195 while (w >= 4) { \

	196 vp9_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \

	197 src_stride, \

	198 dst, \

	199 dst_stride, \

	200 h, \

	201 filter, \

	202 bd); \

	203 src += 4; \

	204 dst += 4; \

	205 w -= 4; \

	206 } \

	207 } else { \

	208 while (w >= 16) { \

	209 vp9_highbd_filter_block1d16_##dir##2_##avg##opt(src, \

	210 src_stride, \

	211 dst, \

	212 dst_stride, \

	213 h, \

	214 filter, \

	215 bd); \

	216 src += 16; \

	217 dst += 16; \

	218 w -= 16; \

	219 } \

	220 while (w >= 8) { \

	221 vp9_highbd_filter_block1d8_##dir##2_##avg##opt(src, \

	222 src_stride, \

	223 dst, \

	224 dst_stride, \

	225 h, \

	226 filter, \

	227 bd); \

	228 src += 8; \

	229 dst += 8; \

	230 w -= 8; \

	231 } \

	232 while (w >= 4) { \

	233 vp9_highbd_filter_block1d4_##dir##2_##avg##opt(src, \

	234 src_stride, \

	235 dst, \

	236 dst_stride, \

	237 h, \

	238 filter, \

	239 bd); \

	240 src += 4; \

	241 dst += 4; \

	242 w -= 4; \

	243 } \

	244 } \

	245 } \

	246 if (w) { \

	247 vp9_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \

	248 filter_x, x_step_q4, filter_y, y_step_q4, \

	249 w, h, bd); \

	250 } \

	251 }

	252

	253 #define HIGH_FUN_CONV_2D(avg, opt) \

	254 void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \

	255 uint8_t *dst, ptrdiff_t dst_stride, \

	256 const int16_t *filter_x, int x_step_q4, \

	257 const int16_t *filter_y, int y_step_q4, \

	258 int w, int h, int bd) { \

	259 assert(w <= 64); \

	260 assert(h <= 64); \

	261 if (x_step_q4 == 16 && y_step_q4 == 16) { \

	262 if (filter_x[0] \|\| filter_x[1] \|\| filter_x[2] \|\| filter_x[3] == 128 \|\| \

	263 filter_y[0] \|\| filter_y[1] \|\| filter_y[2] \|\| filter_y[3] == 128) { \

	264 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \

	265 vp9_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \

	266 CONVERT_TO_BYTEPTR(fdata2), 64, \

	267 filter_x, x_step_q4, \

	268 filter_y, y_step_q4, \

	269 w, h + 7, bd); \

	270 vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \

	271 64, dst, dst_stride, \

	272 filter_x, x_step_q4, \

	273 filter_y, y_step_q4, \

	274 w, h, bd); \

	275 } else { \

	276 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \

	277 vp9_highbd_convolve8_horiz_##opt(src, src_stride, \

	278 CONVERT_TO_BYTEPTR(fdata2), 64, \

	279 filter_x, x_step_q4, \

	280 filter_y, y_step_q4, \

	281 w, h + 1, bd); \

	282 vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \

	283 dst, dst_stride, \

	284 filter_x, x_step_q4, \

	285 filter_y, y_step_q4, \

	286 w, h, bd); \

	287 } \

	288 } else { \

	289 vp9_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \

	290 filter_x, x_step_q4, filter_y, y_step_q4, w, \

	291 h, bd); \

	292 } \

	293 }

	294 #endif // CONFIG_VP9_HIGHBITDEPTH

	295

	296 #endif // VP9_COMMON_X86_CONVOLVE_H_

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/common/vp9_systemdependent.h ('k') | source/libvpx/vp9/common/x86/vp9_asm_stubs.c » ('j') | no next file with comments »