patched-ffmpeg-mt/libavcodec/ppc/h264_altivec.c - Issue 789004: ffmpeg roll of source to mar 9 version...

Side by Side Diff: patched-ffmpeg-mt/libavcodec/ppc/h264_altivec.c

Issue 789004: ffmpeg roll of source to mar 9 version... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/

Patch Set: '' Created 10 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« patched-ffmpeg-mt/libavcodec/mpeg4video_es_bsf.c ('K') | « patched-ffmpeg-mt/libavcodec/ppc/gmc_altivec.c ('k') | patched-ffmpeg-mt/libavcodec/ppc/h264_template_altivec.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>	2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>

3 *	3 *

4 * This file is part of FFmpeg.	4 * This file is part of FFmpeg.

5 *	5 *

6 * FFmpeg is free software; you can redistribute it and/or	6 * FFmpeg is free software; you can redistribute it and/or

7 * modify it under the terms of the GNU Lesser General Public	7 * modify it under the terms of the GNU Lesser General Public

8 * License as published by the Free Software Foundation; either	8 * License as published by the Free Software Foundation; either

9 * version 2.1 of the License, or (at your option) any later version.	9 * version 2.1 of the License, or (at your option) any later version.

10 *	10 *

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
72 #undef PREFIX_h264_qpel16_v_lowpass_num	72 #undef PREFIX_h264_qpel16_v_lowpass_num

73 #undef PREFIX_h264_qpel16_hv_lowpass_altivec	73 #undef PREFIX_h264_qpel16_hv_lowpass_altivec

74 #undef PREFIX_h264_qpel16_hv_lowpass_num	74 #undef PREFIX_h264_qpel16_hv_lowpass_num

75	75

76 #define H264_MC(OPNAME, SIZE, CODETYPE) \	76 #define H264_MC(OPNAME, SIZE, CODETYPE) \

77 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t dst, uin t8_t src, int stride){\	77 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t dst, uin t8_t src, int stride){\

78 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\	78 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\

79 }\	79 }\

80 \	80 \

81 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){ \	81 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){ \

82 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\	82 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\

83 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\	83 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\

84 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\	84 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\

85 }\	85 }\

86 \	86 \

87 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	87 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

88 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, str ide);\	88 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, str ide);\

89 }\	89 }\

90 \	90 \

91 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	91 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

92 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\	92 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\

93 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\	93 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\

94 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, strid e, SIZE);\	94 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, strid e, SIZE);\

95 }\	95 }\

96 \	96 \

97 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	97 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

98 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\	98 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\

99 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\	99 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\

100 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\	100 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\

101 }\	101 }\

102 \	102 \

103 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	103 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

104 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, str ide);\	104 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, str ide);\

105 }\	105 }\

106 \	106 \

107 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	107 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

108 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\	108 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\

109 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\	109 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\

110 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\	110 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\

111 }\	111 }\

112 \	112 \

113 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	113 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

114 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\	114 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\

115 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\	115 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\

116 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\	116 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\

117 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\	117 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\

118 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\	118 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\

119 }\	119 }\

120 \	120 \

121 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	121 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

122 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\	122 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\

123 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\	123 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\

124 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\	124 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\

125 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride) ;\	125 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride) ;\

126 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\	126 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\

127 }\	127 }\

128 \	128 \

129 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	129 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

130 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\	130 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\

131 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\	131 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\

132 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\	132 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\

133 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\	133 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\

134 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\	134 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\

135 }\	135 }\

136 \	136 \

137 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	137 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

138 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\	138 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\

139 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\	139 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\

140 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\	140 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\

141 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride) ;\	141 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride) ;\

142 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\	142 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\

143 }\	143 }\

144 \	144 \

145 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	145 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

146 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\	146 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\

147 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, strid e, SIZE, stride);\	147 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, strid e, SIZE, stride);\

148 }\	148 }\

149 \	149 \

150 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	150 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

151 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\	151 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\

152 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\	152 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\

153 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\	153 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\

154 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\	154 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\

155 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\	155 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\

156 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ E, SIZE);\	156 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ E, SIZE);\

157 }\	157 }\

158 \	158 \

159 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	159 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

160 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\	160 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\

161 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\	161 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\

162 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\	162 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\

163 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\	163 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\

164 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\	164 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\

165 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ E, SIZE);\	165 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ E, SIZE);\

166 }\	166 }\

167 \	167 \

168 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	168 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

169 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\	169 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\

170 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\	170 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\

171 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\	171 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\

172 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\	172 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\

173 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\	173 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\

174 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ E, SIZE);\	174 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ E, SIZE);\

175 }\	175 }\

176 \	176 \

177 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\	177 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t dst, uint 8_t src, int stride){\

178 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\	178 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\

179 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\	179 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\

180 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\	180 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\

181 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride) ;\	181 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride) ;\

182 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\	182 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\

183 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ E, SIZE);\	183 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ E, SIZE);\

184 }\	184 }\

185	185

186 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,	186 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,

187 const uint8_t * src2, int dst_stride,	187 const uint8_t * src2, int dst_stride,

188 int src_stride1, int h)	188 int src_stride1, int h)

189 {	189 {

190 int i;	190 int i;

(...skipping 233 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
424 vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\	424 vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\

425 vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \	425 vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \

426 lv = vec_sel( lv, bodyv, edgelv ); \	426 lv = vec_sel( lv, bodyv, edgelv ); \

427 vec_st( lv, 7, dest ); \	427 vec_st( lv, 7, dest ); \

428 hv = vec_ld( 0, dest ); \	428 hv = vec_ld( 0, dest ); \

429 edgehv = vec_perm( zero_u8v, sel, perm_stv ); \	429 edgehv = vec_perm( zero_u8v, sel, perm_stv ); \

430 hv = vec_sel( hv, bodyv, edgehv ); \	430 hv = vec_sel( hv, bodyv, edgehv ); \

431 vec_st( hv, 0, dest ); \	431 vec_st( hv, 0, dest ); \

432 }	432 }

433	433

434 void ff_h264_idct8_add_altivec( uint8_t dst, DCTELEM dct, int stride ) {	434 static void ff_h264_idct8_add_altivec( uint8_t dst, DCTELEM dct, int stride ) {

435 vec_s16 s0, s1, s2, s3, s4, s5, s6, s7;	435 vec_s16 s0, s1, s2, s3, s4, s5, s6, s7;

436 vec_s16 d0, d1, d2, d3, d4, d5, d6, d7;	436 vec_s16 d0, d1, d2, d3, d4, d5, d6, d7;

437 vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;	437 vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;

438	438

439 vec_u8 perm_ldv = vec_lvsl(0, dst);	439 vec_u8 perm_ldv = vec_lvsl(0, dst);

440 vec_u8 perm_stv = vec_lvsr(8, dst);	440 vec_u8 perm_stv = vec_lvsr(8, dst);

441	441

442 const vec_u16 onev = vec_splat_u16(1);	442 const vec_u16 onev = vec_splat_u16(1);

443 const vec_u16 twov = vec_splat_u16(2);	443 const vec_u16 twov = vec_splat_u16(2);

444 const vec_u16 sixv = vec_splat_u16(6);	444 const vec_u16 sixv = vec_splat_u16(6);

(...skipping 28 matching lines...) Expand all Loading...
473 ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel);	473 ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel);

474 ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel);	474 ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel);

475 ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);	475 ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);

476 }	476 }

477	477

478 static av_always_inline void h264_idct_dc_add_internal(uint8_t dst, DCTELEM bl ock, int stride, int size)	478 static av_always_inline void h264_idct_dc_add_internal(uint8_t dst, DCTELEM bl ock, int stride, int size)

479 {	479 {

480 vec_s16 dc16;	480 vec_s16 dc16;

481 vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;	481 vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;

482 LOAD_ZERO;	482 LOAD_ZERO;

483 DECLARE_ALIGNED_16(int, dc);	483 DECLARE_ALIGNED(16, int, dc);

484 int i;	484 int i;

485	485

486 dc = (block[0] + 32) >> 6;	486 dc = (block[0] + 32) >> 6;

487 dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1);	487 dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1);

488	488

489 if (size == 4)	489 if (size == 4)

490 dc16 = vec_sld(dc16, zero_s16v, 8);	490 dc16 = vec_sld(dc16, zero_s16v, 8);

491 dcplus = vec_packsu(dc16, zero_s16v);	491 dcplus = vec_packsu(dc16, zero_s16v);

492 dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);	492 dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);

493	493

(...skipping 89 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
583 \	583 \

584 r0 = vec_mergeh(r4, r6); /all set 0/ \	584 r0 = vec_mergeh(r4, r6); /all set 0/ \

585 r1 = vec_mergel(r4, r6); /all set 1/ \	585 r1 = vec_mergel(r4, r6); /all set 1/ \

586 r2 = vec_mergeh(r5, r7); /all set 2/ \	586 r2 = vec_mergeh(r5, r7); /all set 2/ \

587 r3 = vec_mergel(r5, r7); /all set 3/ \	587 r3 = vec_mergel(r5, r7); /all set 3/ \

588 }	588 }

589	589

590 static inline void write16x4(uint8_t *dst, int dst_stride,	590 static inline void write16x4(uint8_t *dst, int dst_stride,

591 register vec_u8 r0, register vec_u8 r1,	591 register vec_u8 r0, register vec_u8 r1,

592 register vec_u8 r2, register vec_u8 r3) {	592 register vec_u8 r2, register vec_u8 r3) {

593 DECLARE_ALIGNED_16(unsigned char, result)[64];	593 DECLARE_ALIGNED(16, unsigned char, result)[64];

594 uint32_t src_int = (uint32_t )result, dst_int = (uint32_t )dst;	594 uint32_t src_int = (uint32_t )result, dst_int = (uint32_t )dst;

595 int int_dst_stride = dst_stride/4;	595 int int_dst_stride = dst_stride/4;

596	596

597 vec_st(r0, 0, result);	597 vec_st(r0, 0, result);

598 vec_st(r1, 16, result);	598 vec_st(r1, 16, result);

599 vec_st(r2, 32, result);	599 vec_st(r2, 32, result);

600 vec_st(r3, 48, result);	600 vec_st(r3, 48, result);

601 /* FIXME: there has to be a better way!!!! */	601 /* FIXME: there has to be a better way!!!! */

602 dst_int = src_int;	602 dst_int = src_int;

603 (dst_int+ int_dst_stride) = (src_int + 1);	603 (dst_int+ int_dst_stride) = (src_int + 1);

(...skipping 159 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
763 delta = vec_subs(stage2, vec160); /* d */ \	763 delta = vec_subs(stage2, vec160); /* d */ \

764 deltaneg = vec_min(tc0masked, deltaneg); \	764 deltaneg = vec_min(tc0masked, deltaneg); \

765 delta = vec_min(tc0masked, delta); \	765 delta = vec_min(tc0masked, delta); \

766 p0 = vec_subs(p0, deltaneg); \	766 p0 = vec_subs(p0, deltaneg); \

767 q0 = vec_subs(q0, delta); \	767 q0 = vec_subs(q0, delta); \

768 p0 = vec_adds(p0, delta); \	768 p0 = vec_adds(p0, delta); \

769 q0 = vec_adds(q0, deltaneg); \	769 q0 = vec_adds(q0, deltaneg); \

770 }	770 }

771	771

772 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \	772 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \

773 DECLARE_ALIGNED_16(unsigned char, temp)[16]; \	773 DECLARE_ALIGNED(16, unsigned char, temp)[16]; \

774 register vec_u8 alphavec; \	774 register vec_u8 alphavec; \

775 register vec_u8 betavec; \	775 register vec_u8 betavec; \

776 register vec_u8 mask; \	776 register vec_u8 mask; \

777 register vec_u8 p1mask; \	777 register vec_u8 p1mask; \

778 register vec_u8 q1mask; \	778 register vec_u8 q1mask; \

779 register vector signed char tc0vec; \	779 register vector signed char tc0vec; \

780 register vec_u8 finaltc0; \	780 register vec_u8 finaltc0; \

781 register vec_u8 tc0masked; \	781 register vec_u8 tc0masked; \

782 register vec_u8 newp1; \	782 register vec_u8 newp1; \

783 register vec_u8 newq1; \	783 register vec_u8 newq1; \

(...skipping 59 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
843 write16x4(pix-2, stride, line1, line2, line3, line4);	843 write16x4(pix-2, stride, line1, line2, line3, line4);

844 }	844 }

845	845

846 static av_always_inline	846 static av_always_inline

847 void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei ght, int offset, int w, int h)	847 void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei ght, int offset, int w, int h)

848 {	848 {

849 int y, aligned;	849 int y, aligned;

850 vec_u8 vblock;	850 vec_u8 vblock;

851 vec_s16 vtemp, vweight, voffset, v0, v1;	851 vec_s16 vtemp, vweight, voffset, v0, v1;

852 vec_u16 vlog2_denom;	852 vec_u16 vlog2_denom;

853 DECLARE_ALIGNED_16(int32_t, temp)[4];	853 DECLARE_ALIGNED(16, int32_t, temp)[4];

854 LOAD_ZERO;	854 LOAD_ZERO;

855	855

856 offset <<= log2_denom;	856 offset <<= log2_denom;

857 if(log2_denom) offset += 1<<(log2_denom-1);	857 if(log2_denom) offset += 1<<(log2_denom-1);

858 temp[0] = log2_denom;	858 temp[0] = log2_denom;

859 temp[1] = weight;	859 temp[1] = weight;

860 temp[2] = offset;	860 temp[2] = offset;

861	861

862 vtemp = (vec_s16)vec_ld(0, temp);	862 vtemp = (vec_s16)vec_ld(0, temp);

863 vlog2_denom = (vec_u16)vec_splat(vtemp, 1);	863 vlog2_denom = (vec_u16)vec_splat(vtemp, 1);

(...skipping 25 matching lines...) Expand all Loading...
889 }	889 }

890	890

891 static av_always_inline	891 static av_always_inline

892 void biweight_h264_WxH_altivec(uint8_t dst, uint8_t src, int stride, int log2_ denom,	892 void biweight_h264_WxH_altivec(uint8_t dst, uint8_t src, int stride, int log2_ denom,

893 int weightd, int weights, int offset, int w, int h)	893 int weightd, int weights, int offset, int w, int h)

894 {	894 {

895 int y, dst_aligned, src_aligned;	895 int y, dst_aligned, src_aligned;

896 vec_u8 vsrc, vdst;	896 vec_u8 vsrc, vdst;

897 vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3;	897 vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3;

898 vec_u16 vlog2_denom;	898 vec_u16 vlog2_denom;

899 DECLARE_ALIGNED_16(int32_t, temp)[4];	899 DECLARE_ALIGNED(16, int32_t, temp)[4];

900 LOAD_ZERO;	900 LOAD_ZERO;

901	901

902 offset = ((offset + 1) \| 1) << log2_denom;	902 offset = ((offset + 1) \| 1) << log2_denom;

903 temp[0] = log2_denom+1;	903 temp[0] = log2_denom+1;

904 temp[1] = weights;	904 temp[1] = weights;

905 temp[2] = weightd;	905 temp[2] = weightd;

906 temp[3] = offset;	906 temp[3] = offset;

907	907

908 vtemp = (vec_s16)vec_ld(0, temp);	908 vtemp = (vec_s16)vec_ld(0, temp);

909 vlog2_denom = (vec_u16)vec_splat(vtemp, 1);	909 vlog2_denom = (vec_u16)vec_splat(vtemp, 1);

(...skipping 102 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1012 c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec;	1012 c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec;

1013 c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec;	1013 c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec;

1014 c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec;	1014 c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec;

1015 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;	1015 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;

1016 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;	1016 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;

1017 c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;	1017 c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;

1018 c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;	1018 c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;

1019 c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;	1019 c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;

1020 }	1020 }

1021 }	1021 }

OLD	NEW