source/patched-ffmpeg-mt/libavcodec/x86/dsputil_h264_template_ssse3.c - Issue 3384002: ffmpeg source update for sep 09

Side by Side Diff: source/patched-ffmpeg-mt/libavcodec/x86/dsputil_h264_template_ssse3.c

Issue 3384002: ffmpeg source update for sep 09 (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/

Patch Set: Created 10 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « source/patched-ffmpeg-mt/libavcodec/x86/dsputil_h264_template_mmx.c ('k') | source/patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 /*

2 * Copyright (c) 2008 Loren Merritt

3 *

4 * This file is part of FFmpeg.

5 *

6 * FFmpeg is free software; you can redistribute it and/or

7 * modify it under the terms of the GNU Lesser General Public

8 * License as published by the Free Software Foundation; either

9 * version 2.1 of the License, or (at your option) any later version.

10 *

11 * FFmpeg is distributed in the hope that it will be useful,

12 * but WITHOUT ANY WARRANTY; without even the implied warranty of

13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

14 * Lesser General Public License for more details.

15 *

16 * You should have received a copy of the GNU Lesser General Public

17 * License along with FFmpeg; if not, write to the Free Software

18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

19 */

20

21 /**

22 * SSSE3 optimized version of (put\|avg)_h264_chroma_mc8.

23 * H264_CHROMA_MC8_TMPL must be defined to the desired function name

24 * H264_CHROMA_MC8_MV0 must be defined to a (put\|avg)_pixels8 function

25 * AVG_OP must be defined to empty for put and the identify for avg

26 */

27 static void H264_CHROMA_MC8_TMPL(uint8_t dst/align 8/, uint8_t src/align 1 /, int stride, int h, int x, int y, int rnd)

28 {

29 if(y==0 && x==0) {

30 /* no filter needed */

31 H264_CHROMA_MC8_MV0(dst, src, stride, h);

32 return;

33 }

34

35 assert(x<8 && y<8 && x>=0 && y>=0);

36

37 if(y==0 \|\| x==0)

38 {

39 /* 1 dimensional filter only */

40 __asm__ volatile(

41 "movd %0, %%xmm7 \n\t"

42 "movq %1, %%xmm6 \n\t"

43 "pshuflw $0, %%xmm7, %%xmm7 \n\t"

44 "movlhps %%xmm6, %%xmm6 \n\t"

45 "movlhps %%xmm7, %%xmm7 \n\t"

46 :: "r"(255(x+y)+8), "m"((rnd?&ff_pw_4.a:&ff_pw_3))

47 );

48

49 if(x) {

50 __asm__ volatile(

51 "1: \n\t"

52 "movq (%1), %%xmm0 \n\t"

53 "movq 1(%1), %%xmm1 \n\t"

54 "movq (%1,%3), %%xmm2 \n\t"

55 "movq 1(%1,%3), %%xmm3 \n\t"

56 "punpcklbw %%xmm1, %%xmm0 \n\t"

57 "punpcklbw %%xmm3, %%xmm2 \n\t"

58 "pmaddubsw %%xmm7, %%xmm0 \n\t"

59 "pmaddubsw %%xmm7, %%xmm2 \n\t"

60 AVG_OP("movq (%0), %%xmm4 \n\t")

61 AVG_OP("movhps (%0,%3), %%xmm4 \n\t")

62 "paddw %%xmm6, %%xmm0 \n\t"

63 "paddw %%xmm6, %%xmm2 \n\t"

64 "psrlw $3, %%xmm0 \n\t"

65 "psrlw $3, %%xmm2 \n\t"

66 "packuswb %%xmm2, %%xmm0 \n\t"

67 AVG_OP("pavgb %%xmm4, %%xmm0 \n\t")

68 "movq %%xmm0, (%0) \n\t"

69 "movhps %%xmm0, (%0,%3) \n\t"

70 "sub $2, %2 \n\t"

71 "lea (%1,%3,2), %1 \n\t"

72 "lea (%0,%3,2), %0 \n\t"

73 "jg 1b \n\t"

74 :"+r"(dst), "+r"(src), "+r"(h)

75 :"r"((x86_reg)stride)

76 );

77 } else {

78 __asm__ volatile(

79 "1: \n\t"

80 "movq (%1), %%xmm0 \n\t"

81 "movq (%1,%3), %%xmm1 \n\t"

82 "movdqa %%xmm1, %%xmm2 \n\t"

83 "movq (%1,%3,2), %%xmm3 \n\t"

84 "punpcklbw %%xmm1, %%xmm0 \n\t"

85 "punpcklbw %%xmm3, %%xmm2 \n\t"

86 "pmaddubsw %%xmm7, %%xmm0 \n\t"

87 "pmaddubsw %%xmm7, %%xmm2 \n\t"

88 AVG_OP("movq (%0), %%xmm4 \n\t")

89 AVG_OP("movhps (%0,%3), %%xmm4 \n\t")

90 "paddw %%xmm6, %%xmm0 \n\t"

91 "paddw %%xmm6, %%xmm2 \n\t"

92 "psrlw $3, %%xmm0 \n\t"

93 "psrlw $3, %%xmm2 \n\t"

94 "packuswb %%xmm2, %%xmm0 \n\t"

95 AVG_OP("pavgb %%xmm4, %%xmm0 \n\t")

96 "movq %%xmm0, (%0) \n\t"

97 "movhps %%xmm0, (%0,%3) \n\t"

98 "sub $2, %2 \n\t"

99 "lea (%1,%3,2), %1 \n\t"

100 "lea (%0,%3,2), %0 \n\t"

101 "jg 1b \n\t"

102 :"+r"(dst), "+r"(src), "+r"(h)

103 :"r"((x86_reg)stride)

104 );

105 }

106 return;

107 }

108

109 /* general case, bilinear */

110 __asm__ volatile(

111 "movd %0, %%xmm7 \n\t"

112 "movd %1, %%xmm6 \n\t"

113 "movdqa %2, %%xmm5 \n\t"

114 "pshuflw $0, %%xmm7, %%xmm7 \n\t"

115 "pshuflw $0, %%xmm6, %%xmm6 \n\t"

116 "movlhps %%xmm7, %%xmm7 \n\t"

117 "movlhps %%xmm6, %%xmm6 \n\t"

118 :: "r"((x255+8)(8-y)), "r"((x255+8)y), "m"(*(rnd?&ff_pw_32:&ff_pw_28 ))

119 );

120

121 __asm__ volatile(

122 "movq (%1), %%xmm0 \n\t"

123 "movq 1(%1), %%xmm1 \n\t"

124 "punpcklbw %%xmm1, %%xmm0 \n\t"

125 "add %3, %1 \n\t"

126 "1: \n\t"

127 "movq (%1), %%xmm1 \n\t"

128 "movq 1(%1), %%xmm2 \n\t"

129 "movq (%1,%3), %%xmm3 \n\t"

130 "movq 1(%1,%3), %%xmm4 \n\t"

131 "lea (%1,%3,2), %1 \n\t"

132 "punpcklbw %%xmm2, %%xmm1 \n\t"

133 "punpcklbw %%xmm4, %%xmm3 \n\t"

134 "movdqa %%xmm1, %%xmm2 \n\t"

135 "movdqa %%xmm3, %%xmm4 \n\t"

136 "pmaddubsw %%xmm7, %%xmm0 \n\t"

137 "pmaddubsw %%xmm6, %%xmm1 \n\t"

138 "pmaddubsw %%xmm7, %%xmm2 \n\t"

139 "pmaddubsw %%xmm6, %%xmm3 \n\t"

140 "paddw %%xmm5, %%xmm0 \n\t"

141 "paddw %%xmm5, %%xmm2 \n\t"

142 "paddw %%xmm0, %%xmm1 \n\t"

143 "paddw %%xmm2, %%xmm3 \n\t"

144 "movdqa %%xmm4, %%xmm0 \n\t"

145 "psrlw $6, %%xmm1 \n\t"

146 "psrlw $6, %%xmm3 \n\t"

147 AVG_OP("movq (%0), %%xmm2 \n\t")

148 AVG_OP("movhps (%0,%3), %%xmm2 \n\t")

149 "packuswb %%xmm3, %%xmm1 \n\t"

150 AVG_OP("pavgb %%xmm2, %%xmm1 \n\t")

151 "movq %%xmm1, (%0)\n\t"

152 "movhps %%xmm1, (%0,%3)\n\t"

153 "sub $2, %2 \n\t"

154 "lea (%0,%3,2), %0 \n\t"

155 "jg 1b \n\t"

156 :"+r"(dst), "+r"(src), "+r"(h)

157 :"r"((x86_reg)stride)

158 );

159 }

160

161 static void H264_CHROMA_MC4_TMPL(uint8_t dst/align 4/, uint8_t src/align 1 /, int stride, int h, int x, int y)

162 {

163 __asm__ volatile(

164 "movd %0, %%mm7 \n\t"

165 "movd %1, %%mm6 \n\t"

166 "movq %2, %%mm5 \n\t"

167 "pshufw $0, %%mm7, %%mm7 \n\t"

168 "pshufw $0, %%mm6, %%mm6 \n\t"

169 :: "r"((x255+8)(8-y)), "r"((x255+8)y), "m"(ff_pw_32)

170 );

171

172 __asm__ volatile(

173 "movd (%1), %%mm0 \n\t"

174 "punpcklbw 1(%1), %%mm0 \n\t"

175 "add %3, %1 \n\t"

176 "1: \n\t"

177 "movd (%1), %%mm1 \n\t"

178 "movd (%1,%3), %%mm3 \n\t"

179 "punpcklbw 1(%1), %%mm1 \n\t"

180 "punpcklbw 1(%1,%3), %%mm3 \n\t"

181 "lea (%1,%3,2), %1 \n\t"

182 "movq %%mm1, %%mm2 \n\t"

183 "movq %%mm3, %%mm4 \n\t"

184 "pmaddubsw %%mm7, %%mm0 \n\t"

185 "pmaddubsw %%mm6, %%mm1 \n\t"

186 "pmaddubsw %%mm7, %%mm2 \n\t"

187 "pmaddubsw %%mm6, %%mm3 \n\t"

188 "paddw %%mm5, %%mm0 \n\t"

189 "paddw %%mm5, %%mm2 \n\t"

190 "paddw %%mm0, %%mm1 \n\t"

191 "paddw %%mm2, %%mm3 \n\t"

192 "movq %%mm4, %%mm0 \n\t"

193 "psrlw $6, %%mm1 \n\t"

194 "psrlw $6, %%mm3 \n\t"

195 "packuswb %%mm1, %%mm1 \n\t"

196 "packuswb %%mm3, %%mm3 \n\t"

197 AVG_OP("pavgb (%0), %%mm1 \n\t")

198 AVG_OP("pavgb (%0,%3), %%mm3 \n\t")

199 "movd %%mm1, (%0)\n\t"

200 "movd %%mm3, (%0,%3)\n\t"

201 "sub $2, %2 \n\t"

202 "lea (%0,%3,2), %0 \n\t"

203 "jg 1b \n\t"

204 :"+r"(dst), "+r"(src), "+r"(h)

205 :"r"((x86_reg)stride)

206 );

207 }

208

OLD	NEW