source/libvpx/vpx_dsp/x86/convolve.h - Issue 1302353004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vpx_dsp/x86/convolve.h

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10 #ifndef VPX_DSP_X86_CONVOLVE_H_	10 #ifndef VPX_DSP_X86_CONVOLVE_H_

(...skipping 13 matching lines...) Expand all Loading...
24 uint32_t output_height,	24 uint32_t output_height,

25 const int16_t *filter	25 const int16_t *filter

26 );	26 );

27	27

28 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \	28 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \

29 void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \	29 void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \

30 uint8_t *dst, ptrdiff_t dst_stride, \	30 uint8_t *dst, ptrdiff_t dst_stride, \

31 const int16_t *filter_x, int x_step_q4, \	31 const int16_t *filter_x, int x_step_q4, \

32 const int16_t *filter_y, int y_step_q4, \	32 const int16_t *filter_y, int y_step_q4, \

33 int w, int h) { \	33 int w, int h) { \

34 if (step_q4 == 16 && filter[3] != 128) { \	34 assert(filter[3] != 128); \

35 if (filter[0] \|\| filter[1] \|\| filter[2]) { \	35 assert(step_q4 == 16); \

36 while (w >= 16) { \	36 if (filter[0] \|\| filter[1] \|\| filter[2]) { \

37 vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \	37 while (w >= 16) { \

38 src_stride, \	38 vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \

39 dst, \	39 src_stride, \

40 dst_stride, \	40 dst, \

41 h, \	41 dst_stride, \

42 filter); \	42 h, \

43 src += 16; \	43 filter); \

44 dst += 16; \	44 src += 16; \

45 w -= 16; \	45 dst += 16; \

46 } \	46 w -= 16; \

47 while (w >= 8) { \

48 vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \

49 src_stride, \

50 dst, \

51 dst_stride, \

52 h, \

53 filter); \

54 src += 8; \

55 dst += 8; \

56 w -= 8; \

57 } \

58 while (w >= 4) { \

59 vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \

60 src_stride, \

61 dst, \

62 dst_stride, \

63 h, \

64 filter); \

65 src += 4; \

66 dst += 4; \

67 w -= 4; \

68 } \

69 } else { \

70 while (w >= 16) { \

71 vpx_filter_block1d16_##dir##2_##avg##opt(src, \

72 src_stride, \

73 dst, \

74 dst_stride, \

75 h, \

76 filter); \

77 src += 16; \

78 dst += 16; \

79 w -= 16; \

80 } \

81 while (w >= 8) { \

82 vpx_filter_block1d8_##dir##2_##avg##opt(src, \

83 src_stride, \

84 dst, \

85 dst_stride, \

86 h, \

87 filter); \

88 src += 8; \

89 dst += 8; \

90 w -= 8; \

91 } \

92 while (w >= 4) { \

93 vpx_filter_block1d4_##dir##2_##avg##opt(src, \

94 src_stride, \

95 dst, \

96 dst_stride, \

97 h, \

98 filter); \

99 src += 4; \

100 dst += 4; \

101 w -= 4; \

102 } \

103 } \	47 } \

104 } \	48 while (w >= 8) { \

105 if (w) { \	49 vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \

106 vpx_convolve8_##name##_c(src, src_stride, dst, dst_stride, \	50 src_stride, \

107 filter_x, x_step_q4, filter_y, y_step_q4, \	51 dst, \

108 w, h); \	52 dst_stride, \

	53 h, \

	54 filter); \

	55 src += 8; \

	56 dst += 8; \

	57 w -= 8; \

	58 } \

	59 while (w >= 4) { \

	60 vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \

	61 src_stride, \

	62 dst, \

	63 dst_stride, \

	64 h, \

	65 filter); \

	66 src += 4; \

	67 dst += 4; \

	68 w -= 4; \

	69 } \

	70 } else { \

	71 while (w >= 16) { \

	72 vpx_filter_block1d16_##dir##2_##avg##opt(src, \

	73 src_stride, \

	74 dst, \

	75 dst_stride, \

	76 h, \

	77 filter); \

	78 src += 16; \

	79 dst += 16; \

	80 w -= 16; \

	81 } \

	82 while (w >= 8) { \

	83 vpx_filter_block1d8_##dir##2_##avg##opt(src, \

	84 src_stride, \

	85 dst, \

	86 dst_stride, \

	87 h, \

	88 filter); \

	89 src += 8; \

	90 dst += 8; \

	91 w -= 8; \

	92 } \

	93 while (w >= 4) { \

	94 vpx_filter_block1d4_##dir##2_##avg##opt(src, \

	95 src_stride, \

	96 dst, \

	97 dst_stride, \

	98 h, \

	99 filter); \

	100 src += 4; \

	101 dst += 4; \

	102 w -= 4; \

	103 } \

109 } \	104 } \

110 }	105 }

111	106

112 #define FUN_CONV_2D(avg, opt) \	107 #define FUN_CONV_2D(avg, opt) \

113 void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \	108 void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \

114 uint8_t *dst, ptrdiff_t dst_stride, \	109 uint8_t *dst, ptrdiff_t dst_stride, \

115 const int16_t *filter_x, int x_step_q4, \	110 const int16_t *filter_x, int x_step_q4, \

116 const int16_t *filter_y, int y_step_q4, \	111 const int16_t *filter_y, int y_step_q4, \

117 int w, int h) { \	112 int w, int h) { \

	113 assert(filter_x[3] != 128); \

	114 assert(filter_y[3] != 128); \

118 assert(w <= 64); \	115 assert(w <= 64); \

119 assert(h <= 64); \	116 assert(h <= 64); \

120 if (x_step_q4 == 16 && y_step_q4 == 16) { \	117 assert(x_step_q4 == 16); \

121 if (filter_x[0] \|\| filter_x[1] \|\| filter_x[2] \|\| filter_x[3] == 128 \|\| \	118 assert(y_step_q4 == 16); \

122 filter_y[0] \|\| filter_y[1] \|\| filter_y[2] \|\| filter_y[3] == 128) { \	119 if (filter_x[0] \|\| filter_x[1] \|\| filter_x[2]\|\| \

123 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \	120 filter_y[0] \|\| filter_y[1] \|\| filter_y[2]) { \

124 vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \	121 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \

125 filter_x, x_step_q4, filter_y, y_step_q4, \	122 vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \

126 w, h + 7); \	123 filter_x, x_step_q4, filter_y, y_step_q4, \

127 vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \	124 w, h + 7); \

128 filter_x, x_step_q4, filter_y, \	125 vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \

129 y_step_q4, w, h); \	126 filter_x, x_step_q4, filter_y, \

130 } else { \	127 y_step_q4, w, h); \

131 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \

132 vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \

133 filter_x, x_step_q4, filter_y, y_step_q4, \

134 w, h + 1); \

135 vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \

136 filter_x, x_step_q4, filter_y, \

137 y_step_q4, w, h); \

138 } \

139 } else { \	128 } else { \

140 vpx_convolve8_##avg##c(src, src_stride, dst, dst_stride, \	129 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \

141 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \	130 vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \

	131 filter_x, x_step_q4, filter_y, y_step_q4, \

	132 w, h + 1); \

	133 vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \

	134 filter_x, x_step_q4, filter_y, \

	135 y_step_q4, w, h); \

142 } \	136 } \

143 }	137 }

144	138

145 #if CONFIG_VP9_HIGHBITDEPTH	139 #if CONFIG_VP9_HIGHBITDEPTH

146	140

147 typedef void highbd_filter8_1dfunction (	141 typedef void highbd_filter8_1dfunction (

148 const uint16_t *src_ptr,	142 const uint16_t *src_ptr,

149 const ptrdiff_t src_pitch,	143 const ptrdiff_t src_pitch,

150 uint16_t *output_ptr,	144 uint16_t *output_ptr,

151 ptrdiff_t out_pitch,	145 ptrdiff_t out_pitch,

(...skipping 135 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
287 } \	281 } \

288 } else { \	282 } else { \

289 vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \	283 vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \

290 filter_x, x_step_q4, filter_y, y_step_q4, w, \	284 filter_x, x_step_q4, filter_y, y_step_q4, w, \

291 h, bd); \	285 h, bd); \

292 } \	286 } \

293 }	287 }

294 #endif // CONFIG_VP9_HIGHBITDEPTH	288 #endif // CONFIG_VP9_HIGHBITDEPTH

295	289

296 #endif // VPX_DSP_X86_CONVOLVE_H_	290 #endif // VPX_DSP_X86_CONVOLVE_H_

OLD	NEW

« no previous file with comments | « source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl ('k') | source/libvpx/vpx_dsp/x86/fwd_txfm_ssse3_x86_64.asm » ('j') | no next file with comments »