source/libvpx/vp8/encoder/x86/ssim_opt.asm - Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga).

Side by Side Diff: source/libvpx/vp8/encoder/x86/ssim_opt.asm

Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga). (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/

Patch Set: '' Created 9 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 ;	1 ;

2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.	2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.

3 ;	3 ;

4 ; Use of this source code is governed by a BSD-style license	4 ; Use of this source code is governed by a BSD-style license

5 ; that can be found in the LICENSE file in the root of the source	5 ; that can be found in the LICENSE file in the root of the source

6 ; tree. An additional intellectual property rights grant can be found	6 ; tree. An additional intellectual property rights grant can be found

7 ; in the file PATENTS. All contributing project authors may	7 ; in the file PATENTS. All contributing project authors may

8 ; be found in the AUTHORS file in the root of the source tree.	8 ; be found in the AUTHORS file in the root of the source tree.

9 ;	9 ;

10	10

11 %include "vpx_ports/x86_abi_support.asm"	11 %include "vpx_ports/x86_abi_support.asm"

12	12

13 ; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr	13 ; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr

14 %macro TABULATE_SSIM 0	14 %macro TABULATE_SSIM 0

15 paddusw xmm15, xmm3 ; sum_s	15 paddusw xmm15, xmm3 ; sum_s

16 paddusw xmm14, xmm4 ; sum_r	16 paddusw xmm14, xmm4 ; sum_r

17 movdqa xmm1, xmm3	17 movdqa xmm1, xmm3

18 pmaddwd xmm1, xmm1	18 pmaddwd xmm1, xmm1

19 paddq xmm13, xmm1 ; sum_sq_s	19 paddd xmm13, xmm1 ; sum_sq_s

20 movdqa xmm2, xmm4	20 movdqa xmm2, xmm4

21 pmaddwd xmm2, xmm2	21 pmaddwd xmm2, xmm2

22 paddq xmm12, xmm2 ; sum_sq_r	22 paddd xmm12, xmm2 ; sum_sq_r

23 pmaddwd xmm3, xmm4	23 pmaddwd xmm3, xmm4

24 paddq xmm11, xmm3 ; sum_sxr	24 paddd xmm11, xmm3 ; sum_sxr

25 %endmacro	25 %endmacro

26	26

27 ; Sum across the register %1 starting with q words	27 ; Sum across the register %1 starting with q words

28 %macro SUM_ACROSS_Q 1	28 %macro SUM_ACROSS_Q 1

29 movdqa xmm2,%1	29 movdqa xmm2,%1

30 punpckldq %1,xmm0	30 punpckldq %1,xmm0

31 punpckhdq xmm2,xmm0	31 punpckhdq xmm2,xmm0

32 paddq %1,xmm2	32 paddq %1,xmm2

33 movdqa xmm2,%1	33 movdqa xmm2,%1

34 punpcklqdq %1,xmm0	34 punpcklqdq %1,xmm0

(...skipping 24 matching lines...) Expand all Loading...
59 ; ( calling app will initialize to 0 ) could easily fit everything in sse2	59 ; ( calling app will initialize to 0 ) could easily fit everything in sse2

60 ; without too much hastle, and can probably do better estimates with psadw	60 ; without too much hastle, and can probably do better estimates with psadw

61 ; or pavgb At this point this is just meant to be first pass for calculating	61 ; or pavgb At this point this is just meant to be first pass for calculating

62 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion	62 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion

63 ; in mode selection code.	63 ; in mode selection code.

64 global sym(vp8_ssim_parms_16x16_sse3)	64 global sym(vp8_ssim_parms_16x16_sse3)

65 sym(vp8_ssim_parms_16x16_sse3):	65 sym(vp8_ssim_parms_16x16_sse3):

66 push rbp	66 push rbp

67 mov rbp, rsp	67 mov rbp, rsp

68 SHADOW_ARGS_TO_STACK 9	68 SHADOW_ARGS_TO_STACK 9

	69 SAVE_XMM 15

69 push rsi	70 push rsi

70 push rdi	71 push rdi

71 ; end prolog	72 ; end prolog

72	73

73 mov rsi, arg(0) ;s	74 mov rsi, arg(0) ;s

74 mov rcx, arg(1) ;sp	75 mov rcx, arg(1) ;sp

75 mov rdi, arg(2) ;r	76 mov rdi, arg(2) ;r

76 mov rax, arg(3) ;rp	77 mov rax, arg(3) ;rp

77	78

78 pxor xmm0, xmm0	79 pxor xmm0, xmm0

(...skipping 29 matching lines...) Expand all Loading...
108 dec rdx ; counter	109 dec rdx ; counter

109 jnz NextRow	110 jnz NextRow

110	111

111 SUM_ACROSS_W xmm15	112 SUM_ACROSS_W xmm15

112 SUM_ACROSS_W xmm14	113 SUM_ACROSS_W xmm14

113 SUM_ACROSS_Q xmm13	114 SUM_ACROSS_Q xmm13

114 SUM_ACROSS_Q xmm12	115 SUM_ACROSS_Q xmm12

115 SUM_ACROSS_Q xmm11	116 SUM_ACROSS_Q xmm11

116	117

117 mov rdi,arg(4)	118 mov rdi,arg(4)

118 movq [rdi], xmm15;	119 movd [rdi], xmm15;

119 mov rdi,arg(5)	120 mov rdi,arg(5)

120 movq [rdi], xmm14;	121 movd [rdi], xmm14;

121 mov rdi,arg(6)	122 mov rdi,arg(6)

122 movq [rdi], xmm13;	123 movd [rdi], xmm13;

123 mov rdi,arg(7)	124 mov rdi,arg(7)

124 movq [rdi], xmm12;	125 movd [rdi], xmm12;

125 mov rdi,arg(8)	126 mov rdi,arg(8)

126 movq [rdi], xmm11;	127 movd [rdi], xmm11;

127	128

128 ; begin epilog	129 ; begin epilog

129 pop rdi	130 pop rdi

130 pop rsi	131 pop rsi

	132 RESTORE_XMM

131 UNSHADOW_ARGS	133 UNSHADOW_ARGS

132 pop rbp	134 pop rbp

133 ret	135 ret

134	136

135 ;void ssim_parms_sse3(	137 ;void ssim_parms_sse3(

136 ; unsigned char *s,	138 ; unsigned char *s,

137 ; int sp,	139 ; int sp,

138 ; unsigned char *r,	140 ; unsigned char *r,

139 ; int rp	141 ; int rp

140 ; unsigned long *sum_s,	142 ; unsigned long *sum_s,

141 ; unsigned long *sum_r,	143 ; unsigned long *sum_r,

142 ; unsigned long *sum_sq_s,	144 ; unsigned long *sum_sq_s,

143 ; unsigned long *sum_sq_r,	145 ; unsigned long *sum_sq_r,

144 ; unsigned long *sum_sxr);	146 ; unsigned long *sum_sxr);

145 ;	147 ;

146 ; TODO: Use parm passing through structure, probably don't need the pxors	148 ; TODO: Use parm passing through structure, probably don't need the pxors

147 ; ( calling app will initialize to 0 ) could easily fit everything in sse2	149 ; ( calling app will initialize to 0 ) could easily fit everything in sse2

148 ; without too much hastle, and can probably do better estimates with psadw	150 ; without too much hastle, and can probably do better estimates with psadw

149 ; or pavgb At this point this is just meant to be first pass for calculating	151 ; or pavgb At this point this is just meant to be first pass for calculating

150 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion	152 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion

151 ; in mode selection code.	153 ; in mode selection code.

152 global sym(vp8_ssim_parms_8x8_sse3)	154 global sym(vp8_ssim_parms_8x8_sse3)

153 sym(vp8_ssim_parms_8x8_sse3):	155 sym(vp8_ssim_parms_8x8_sse3):

154 push rbp	156 push rbp

155 mov rbp, rsp	157 mov rbp, rsp

156 SHADOW_ARGS_TO_STACK 9	158 SHADOW_ARGS_TO_STACK 9

	159 SAVE_XMM 15

157 push rsi	160 push rsi

158 push rdi	161 push rdi

159 ; end prolog	162 ; end prolog

160	163

161 mov rsi, arg(0) ;s	164 mov rsi, arg(0) ;s

162 mov rcx, arg(1) ;sp	165 mov rcx, arg(1) ;sp

163 mov rdi, arg(2) ;r	166 mov rdi, arg(2) ;r

164 mov rax, arg(3) ;rp	167 mov rax, arg(3) ;rp

165	168

166 pxor xmm0, xmm0	169 pxor xmm0, xmm0

167 pxor xmm15,xmm15 ;sum_s	170 pxor xmm15,xmm15 ;sum_s

168 pxor xmm14,xmm14 ;sum_r	171 pxor xmm14,xmm14 ;sum_r

169 pxor xmm13,xmm13 ;sum_sq_s	172 pxor xmm13,xmm13 ;sum_sq_s

170 pxor xmm12,xmm12 ;sum_sq_r	173 pxor xmm12,xmm12 ;sum_sq_r

171 pxor xmm11,xmm11 ;sum_sxr	174 pxor xmm11,xmm11 ;sum_sxr

172	175

173 mov rdx, 8 ;row counter	176 mov rdx, 8 ;row counter

174 NextRow2:	177 NextRow2:

175	178

176 ;grab source and reference pixels	179 ;grab source and reference pixels

177 movq xmm5, [rsi]	180 movq xmm3, [rsi]

178 movq xmm6, [rdi]	181 movq xmm4, [rdi]

179

180 movdqa xmm3, xmm5

181 movdqa xmm4, xmm6

182 punpcklbw xmm3, xmm0 ; low_s	182 punpcklbw xmm3, xmm0 ; low_s

183 punpcklbw xmm4, xmm0 ; low_r	183 punpcklbw xmm4, xmm0 ; low_r

184	184

185 TABULATE_SSIM	185 TABULATE_SSIM

186	186

187 add rsi, rcx ; next s row	187 add rsi, rcx ; next s row

188 add rdi, rax ; next r row	188 add rdi, rax ; next r row

189	189

190 dec rdx ; counter	190 dec rdx ; counter

191 jnz NextRow2	191 jnz NextRow2

192	192

193 SUM_ACROSS_W xmm15	193 SUM_ACROSS_W xmm15

194 SUM_ACROSS_W xmm14	194 SUM_ACROSS_W xmm14

195 SUM_ACROSS_Q xmm13	195 SUM_ACROSS_Q xmm13

196 SUM_ACROSS_Q xmm12	196 SUM_ACROSS_Q xmm12

197 SUM_ACROSS_Q xmm11	197 SUM_ACROSS_Q xmm11

198	198

199 mov rdi,arg(4)	199 mov rdi,arg(4)

200 movq [rdi], xmm15;	200 movd [rdi], xmm15;

201 mov rdi,arg(5)	201 mov rdi,arg(5)

202 movq [rdi], xmm14;	202 movd [rdi], xmm14;

203 mov rdi,arg(6)	203 mov rdi,arg(6)

204 movq [rdi], xmm13;	204 movd [rdi], xmm13;

205 mov rdi,arg(7)	205 mov rdi,arg(7)

206 movq [rdi], xmm12;	206 movd [rdi], xmm12;

207 mov rdi,arg(8)	207 mov rdi,arg(8)

208 movq [rdi], xmm11;	208 movd [rdi], xmm11;

209	209

210 ; begin epilog	210 ; begin epilog

211 pop rdi	211 pop rdi

212 pop rsi	212 pop rsi

	213 RESTORE_XMM

213 UNSHADOW_ARGS	214 UNSHADOW_ARGS

214 pop rbp	215 pop rbp

215 ret	216 ret

OLD	NEW

« no previous file with comments | « source/libvpx/vp8/encoder/x86/sad_ssse3.asm ('k') | source/libvpx/vp8/encoder/x86/subtract_sse2.asm » ('j') | no next file with comments »