Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: source/libvpx/vpx_dsp/x86/sad_ssse3.asm

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/libvpx/vpx_dsp/x86/sad_sse4.asm ('k') | source/libvpx/vpx_mem/include/vpx_mem_intrnl.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
(...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after
139 139
140 PROCESS_16X2X3_OFFSET 1, %1 140 PROCESS_16X2X3_OFFSET 1, %1
141 PROCESS_16X2X3_OFFSET 0, %1 141 PROCESS_16X2X3_OFFSET 0, %1
142 PROCESS_16X2X3_OFFSET 0, %1 142 PROCESS_16X2X3_OFFSET 0, %1
143 PROCESS_16X2X3_OFFSET 0, %1 143 PROCESS_16X2X3_OFFSET 0, %1
144 144
145 jmp %2_store_off 145 jmp %2_store_off
146 146
147 %endmacro 147 %endmacro
148 148
149 ;void int vp8_sad16x16x3_ssse3( 149 ;void int vpx_sad16x16x3_ssse3(
150 ; unsigned char *src_ptr, 150 ; unsigned char *src_ptr,
151 ; int src_stride, 151 ; int src_stride,
152 ; unsigned char *ref_ptr, 152 ; unsigned char *ref_ptr,
153 ; int ref_stride, 153 ; int ref_stride,
154 ; int *results) 154 ; int *results)
155 global sym(vp8_sad16x16x3_ssse3) PRIVATE 155 global sym(vpx_sad16x16x3_ssse3) PRIVATE
156 sym(vp8_sad16x16x3_ssse3): 156 sym(vpx_sad16x16x3_ssse3):
157 push rbp 157 push rbp
158 mov rbp, rsp 158 mov rbp, rsp
159 SHADOW_ARGS_TO_STACK 5 159 SHADOW_ARGS_TO_STACK 5
160 SAVE_XMM 7 160 SAVE_XMM 7
161 push rsi 161 push rsi
162 push rdi 162 push rdi
163 push rcx 163 push rcx
164 ; end prolog 164 ; end prolog
165 165
166 mov rsi, arg(0) ;src_ptr 166 mov rsi, arg(0) ;src_ptr
167 mov rdi, arg(2) ;ref_ptr 167 mov rdi, arg(2) ;ref_ptr
168 168
169 mov rdx, 0xf 169 mov rdx, 0xf
170 and rdx, rdi 170 and rdx, rdi
171 171
172 jmp .vp8_sad16x16x3_ssse3_skiptable 172 jmp .vpx_sad16x16x3_ssse3_skiptable
173 .vp8_sad16x16x3_ssse3_jumptable: 173 .vpx_sad16x16x3_ssse3_jumptable:
174 dd .vp8_sad16x16x3_ssse3_aligned_by_0 - .vp8_sad16x16x3_ssse3_do_jump 174 dd .vpx_sad16x16x3_ssse3_aligned_by_0 - .vpx_sad16x16x3_ssse3_do_jump
175 dd .vp8_sad16x16x3_ssse3_aligned_by_1 - .vp8_sad16x16x3_ssse3_do_jump 175 dd .vpx_sad16x16x3_ssse3_aligned_by_1 - .vpx_sad16x16x3_ssse3_do_jump
176 dd .vp8_sad16x16x3_ssse3_aligned_by_2 - .vp8_sad16x16x3_ssse3_do_jump 176 dd .vpx_sad16x16x3_ssse3_aligned_by_2 - .vpx_sad16x16x3_ssse3_do_jump
177 dd .vp8_sad16x16x3_ssse3_aligned_by_3 - .vp8_sad16x16x3_ssse3_do_jump 177 dd .vpx_sad16x16x3_ssse3_aligned_by_3 - .vpx_sad16x16x3_ssse3_do_jump
178 dd .vp8_sad16x16x3_ssse3_aligned_by_4 - .vp8_sad16x16x3_ssse3_do_jump 178 dd .vpx_sad16x16x3_ssse3_aligned_by_4 - .vpx_sad16x16x3_ssse3_do_jump
179 dd .vp8_sad16x16x3_ssse3_aligned_by_5 - .vp8_sad16x16x3_ssse3_do_jump 179 dd .vpx_sad16x16x3_ssse3_aligned_by_5 - .vpx_sad16x16x3_ssse3_do_jump
180 dd .vp8_sad16x16x3_ssse3_aligned_by_6 - .vp8_sad16x16x3_ssse3_do_jump 180 dd .vpx_sad16x16x3_ssse3_aligned_by_6 - .vpx_sad16x16x3_ssse3_do_jump
181 dd .vp8_sad16x16x3_ssse3_aligned_by_7 - .vp8_sad16x16x3_ssse3_do_jump 181 dd .vpx_sad16x16x3_ssse3_aligned_by_7 - .vpx_sad16x16x3_ssse3_do_jump
182 dd .vp8_sad16x16x3_ssse3_aligned_by_8 - .vp8_sad16x16x3_ssse3_do_jump 182 dd .vpx_sad16x16x3_ssse3_aligned_by_8 - .vpx_sad16x16x3_ssse3_do_jump
183 dd .vp8_sad16x16x3_ssse3_aligned_by_9 - .vp8_sad16x16x3_ssse3_do_jump 183 dd .vpx_sad16x16x3_ssse3_aligned_by_9 - .vpx_sad16x16x3_ssse3_do_jump
184 dd .vp8_sad16x16x3_ssse3_aligned_by_10 - .vp8_sad16x16x3_ssse3_do_jump 184 dd .vpx_sad16x16x3_ssse3_aligned_by_10 - .vpx_sad16x16x3_ssse3_do_jump
185 dd .vp8_sad16x16x3_ssse3_aligned_by_11 - .vp8_sad16x16x3_ssse3_do_jump 185 dd .vpx_sad16x16x3_ssse3_aligned_by_11 - .vpx_sad16x16x3_ssse3_do_jump
186 dd .vp8_sad16x16x3_ssse3_aligned_by_12 - .vp8_sad16x16x3_ssse3_do_jump 186 dd .vpx_sad16x16x3_ssse3_aligned_by_12 - .vpx_sad16x16x3_ssse3_do_jump
187 dd .vp8_sad16x16x3_ssse3_aligned_by_13 - .vp8_sad16x16x3_ssse3_do_jump 187 dd .vpx_sad16x16x3_ssse3_aligned_by_13 - .vpx_sad16x16x3_ssse3_do_jump
188 dd .vp8_sad16x16x3_ssse3_aligned_by_14 - .vp8_sad16x16x3_ssse3_do_jump 188 dd .vpx_sad16x16x3_ssse3_aligned_by_14 - .vpx_sad16x16x3_ssse3_do_jump
189 dd .vp8_sad16x16x3_ssse3_aligned_by_15 - .vp8_sad16x16x3_ssse3_do_jump 189 dd .vpx_sad16x16x3_ssse3_aligned_by_15 - .vpx_sad16x16x3_ssse3_do_jump
190 .vp8_sad16x16x3_ssse3_skiptable: 190 .vpx_sad16x16x3_ssse3_skiptable:
191 191
192 call .vp8_sad16x16x3_ssse3_do_jump 192 call .vpx_sad16x16x3_ssse3_do_jump
193 .vp8_sad16x16x3_ssse3_do_jump: 193 .vpx_sad16x16x3_ssse3_do_jump:
194 pop rcx ; get the address of do_jump 194 pop rcx ; get the address of do_jump
195 mov rax, .vp8_sad16x16x3_ssse3_jumptable - .vp8_sad16x16x3_ ssse3_do_jump 195 mov rax, .vpx_sad16x16x3_ssse3_jumptable - .vpx_sad16x16x3_ ssse3_do_jump
196 add rax, rcx ; get the absolute address of vp8_sad16x16x3_ ssse3_jumptable 196 add rax, rcx ; get the absolute address of vpx_sad16x16x3_ ssse3_jumptable
197 197
198 movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable 198 movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable
199 add rcx, rax 199 add rcx, rax
200 200
201 movsxd rax, dword ptr arg(1) ;src_stride 201 movsxd rax, dword ptr arg(1) ;src_stride
202 movsxd rdx, dword ptr arg(3) ;ref_stride 202 movsxd rdx, dword ptr arg(3) ;ref_stride
203 203
204 jmp rcx 204 jmp rcx
205 205
206 PROCESS_16X16X3_OFFSET 0, .vp8_sad16x16x3_ssse3 206 PROCESS_16X16X3_OFFSET 0, .vpx_sad16x16x3_ssse3
207 PROCESS_16X16X3_OFFSET 1, .vp8_sad16x16x3_ssse3 207 PROCESS_16X16X3_OFFSET 1, .vpx_sad16x16x3_ssse3
208 PROCESS_16X16X3_OFFSET 2, .vp8_sad16x16x3_ssse3 208 PROCESS_16X16X3_OFFSET 2, .vpx_sad16x16x3_ssse3
209 PROCESS_16X16X3_OFFSET 3, .vp8_sad16x16x3_ssse3 209 PROCESS_16X16X3_OFFSET 3, .vpx_sad16x16x3_ssse3
210 PROCESS_16X16X3_OFFSET 4, .vp8_sad16x16x3_ssse3 210 PROCESS_16X16X3_OFFSET 4, .vpx_sad16x16x3_ssse3
211 PROCESS_16X16X3_OFFSET 5, .vp8_sad16x16x3_ssse3 211 PROCESS_16X16X3_OFFSET 5, .vpx_sad16x16x3_ssse3
212 PROCESS_16X16X3_OFFSET 6, .vp8_sad16x16x3_ssse3 212 PROCESS_16X16X3_OFFSET 6, .vpx_sad16x16x3_ssse3
213 PROCESS_16X16X3_OFFSET 7, .vp8_sad16x16x3_ssse3 213 PROCESS_16X16X3_OFFSET 7, .vpx_sad16x16x3_ssse3
214 PROCESS_16X16X3_OFFSET 8, .vp8_sad16x16x3_ssse3 214 PROCESS_16X16X3_OFFSET 8, .vpx_sad16x16x3_ssse3
215 PROCESS_16X16X3_OFFSET 9, .vp8_sad16x16x3_ssse3 215 PROCESS_16X16X3_OFFSET 9, .vpx_sad16x16x3_ssse3
216 PROCESS_16X16X3_OFFSET 10, .vp8_sad16x16x3_ssse3 216 PROCESS_16X16X3_OFFSET 10, .vpx_sad16x16x3_ssse3
217 PROCESS_16X16X3_OFFSET 11, .vp8_sad16x16x3_ssse3 217 PROCESS_16X16X3_OFFSET 11, .vpx_sad16x16x3_ssse3
218 PROCESS_16X16X3_OFFSET 12, .vp8_sad16x16x3_ssse3 218 PROCESS_16X16X3_OFFSET 12, .vpx_sad16x16x3_ssse3
219 PROCESS_16X16X3_OFFSET 13, .vp8_sad16x16x3_ssse3 219 PROCESS_16X16X3_OFFSET 13, .vpx_sad16x16x3_ssse3
220 PROCESS_16X16X3_OFFSET 14, .vp8_sad16x16x3_ssse3 220 PROCESS_16X16X3_OFFSET 14, .vpx_sad16x16x3_ssse3
221 221
222 .vp8_sad16x16x3_ssse3_aligned_by_15: 222 .vpx_sad16x16x3_ssse3_aligned_by_15:
223 PROCESS_16X2X3 1 223 PROCESS_16X2X3 1
224 PROCESS_16X2X3 0 224 PROCESS_16X2X3 0
225 PROCESS_16X2X3 0 225 PROCESS_16X2X3 0
226 PROCESS_16X2X3 0 226 PROCESS_16X2X3 0
227 PROCESS_16X2X3 0 227 PROCESS_16X2X3 0
228 PROCESS_16X2X3 0 228 PROCESS_16X2X3 0
229 PROCESS_16X2X3 0 229 PROCESS_16X2X3 0
230 PROCESS_16X2X3 0 230 PROCESS_16X2X3 0
231 231
232 .vp8_sad16x16x3_ssse3_store_off: 232 .vpx_sad16x16x3_ssse3_store_off:
233 mov rdi, arg(4) ;Results 233 mov rdi, arg(4) ;Results
234 234
235 movq xmm0, xmm5 235 movq xmm0, xmm5
236 psrldq xmm5, 8 236 psrldq xmm5, 8
237 237
238 paddw xmm0, xmm5 238 paddw xmm0, xmm5
239 movd [rdi], xmm0 239 movd [rdi], xmm0
240 ;- 240 ;-
241 movq xmm0, xmm6 241 movq xmm0, xmm6
242 psrldq xmm6, 8 242 psrldq xmm6, 8
243 243
244 paddw xmm0, xmm6 244 paddw xmm0, xmm6
245 movd [rdi+4], xmm0 245 movd [rdi+4], xmm0
246 ;- 246 ;-
247 movq xmm0, xmm7 247 movq xmm0, xmm7
248 psrldq xmm7, 8 248 psrldq xmm7, 8
249 249
250 paddw xmm0, xmm7 250 paddw xmm0, xmm7
251 movd [rdi+8], xmm0 251 movd [rdi+8], xmm0
252 252
253 ; begin epilog 253 ; begin epilog
254 pop rcx 254 pop rcx
255 pop rdi 255 pop rdi
256 pop rsi 256 pop rsi
257 RESTORE_XMM 257 RESTORE_XMM
258 UNSHADOW_ARGS 258 UNSHADOW_ARGS
259 pop rbp 259 pop rbp
260 ret 260 ret
261 261
262 ;void int vp8_sad16x8x3_ssse3( 262 ;void int vpx_sad16x8x3_ssse3(
263 ; unsigned char *src_ptr, 263 ; unsigned char *src_ptr,
264 ; int src_stride, 264 ; int src_stride,
265 ; unsigned char *ref_ptr, 265 ; unsigned char *ref_ptr,
266 ; int ref_stride, 266 ; int ref_stride,
267 ; int *results) 267 ; int *results)
268 global sym(vp8_sad16x8x3_ssse3) PRIVATE 268 global sym(vpx_sad16x8x3_ssse3) PRIVATE
269 sym(vp8_sad16x8x3_ssse3): 269 sym(vpx_sad16x8x3_ssse3):
270 push rbp 270 push rbp
271 mov rbp, rsp 271 mov rbp, rsp
272 SHADOW_ARGS_TO_STACK 5 272 SHADOW_ARGS_TO_STACK 5
273 SAVE_XMM 7 273 SAVE_XMM 7
274 push rsi 274 push rsi
275 push rdi 275 push rdi
276 push rcx 276 push rcx
277 ; end prolog 277 ; end prolog
278 278
279 mov rsi, arg(0) ;src_ptr 279 mov rsi, arg(0) ;src_ptr
280 mov rdi, arg(2) ;ref_ptr 280 mov rdi, arg(2) ;ref_ptr
281 281
282 mov rdx, 0xf 282 mov rdx, 0xf
283 and rdx, rdi 283 and rdx, rdi
284 284
285 jmp .vp8_sad16x8x3_ssse3_skiptable 285 jmp .vpx_sad16x8x3_ssse3_skiptable
286 .vp8_sad16x8x3_ssse3_jumptable: 286 .vpx_sad16x8x3_ssse3_jumptable:
287 dd .vp8_sad16x8x3_ssse3_aligned_by_0 - .vp8_sad16x8x3_ssse3_do_jump 287 dd .vpx_sad16x8x3_ssse3_aligned_by_0 - .vpx_sad16x8x3_ssse3_do_jump
288 dd .vp8_sad16x8x3_ssse3_aligned_by_1 - .vp8_sad16x8x3_ssse3_do_jump 288 dd .vpx_sad16x8x3_ssse3_aligned_by_1 - .vpx_sad16x8x3_ssse3_do_jump
289 dd .vp8_sad16x8x3_ssse3_aligned_by_2 - .vp8_sad16x8x3_ssse3_do_jump 289 dd .vpx_sad16x8x3_ssse3_aligned_by_2 - .vpx_sad16x8x3_ssse3_do_jump
290 dd .vp8_sad16x8x3_ssse3_aligned_by_3 - .vp8_sad16x8x3_ssse3_do_jump 290 dd .vpx_sad16x8x3_ssse3_aligned_by_3 - .vpx_sad16x8x3_ssse3_do_jump
291 dd .vp8_sad16x8x3_ssse3_aligned_by_4 - .vp8_sad16x8x3_ssse3_do_jump 291 dd .vpx_sad16x8x3_ssse3_aligned_by_4 - .vpx_sad16x8x3_ssse3_do_jump
292 dd .vp8_sad16x8x3_ssse3_aligned_by_5 - .vp8_sad16x8x3_ssse3_do_jump 292 dd .vpx_sad16x8x3_ssse3_aligned_by_5 - .vpx_sad16x8x3_ssse3_do_jump
293 dd .vp8_sad16x8x3_ssse3_aligned_by_6 - .vp8_sad16x8x3_ssse3_do_jump 293 dd .vpx_sad16x8x3_ssse3_aligned_by_6 - .vpx_sad16x8x3_ssse3_do_jump
294 dd .vp8_sad16x8x3_ssse3_aligned_by_7 - .vp8_sad16x8x3_ssse3_do_jump 294 dd .vpx_sad16x8x3_ssse3_aligned_by_7 - .vpx_sad16x8x3_ssse3_do_jump
295 dd .vp8_sad16x8x3_ssse3_aligned_by_8 - .vp8_sad16x8x3_ssse3_do_jump 295 dd .vpx_sad16x8x3_ssse3_aligned_by_8 - .vpx_sad16x8x3_ssse3_do_jump
296 dd .vp8_sad16x8x3_ssse3_aligned_by_9 - .vp8_sad16x8x3_ssse3_do_jump 296 dd .vpx_sad16x8x3_ssse3_aligned_by_9 - .vpx_sad16x8x3_ssse3_do_jump
297 dd .vp8_sad16x8x3_ssse3_aligned_by_10 - .vp8_sad16x8x3_ssse3_do_jump 297 dd .vpx_sad16x8x3_ssse3_aligned_by_10 - .vpx_sad16x8x3_ssse3_do_jump
298 dd .vp8_sad16x8x3_ssse3_aligned_by_11 - .vp8_sad16x8x3_ssse3_do_jump 298 dd .vpx_sad16x8x3_ssse3_aligned_by_11 - .vpx_sad16x8x3_ssse3_do_jump
299 dd .vp8_sad16x8x3_ssse3_aligned_by_12 - .vp8_sad16x8x3_ssse3_do_jump 299 dd .vpx_sad16x8x3_ssse3_aligned_by_12 - .vpx_sad16x8x3_ssse3_do_jump
300 dd .vp8_sad16x8x3_ssse3_aligned_by_13 - .vp8_sad16x8x3_ssse3_do_jump 300 dd .vpx_sad16x8x3_ssse3_aligned_by_13 - .vpx_sad16x8x3_ssse3_do_jump
301 dd .vp8_sad16x8x3_ssse3_aligned_by_14 - .vp8_sad16x8x3_ssse3_do_jump 301 dd .vpx_sad16x8x3_ssse3_aligned_by_14 - .vpx_sad16x8x3_ssse3_do_jump
302 dd .vp8_sad16x8x3_ssse3_aligned_by_15 - .vp8_sad16x8x3_ssse3_do_jump 302 dd .vpx_sad16x8x3_ssse3_aligned_by_15 - .vpx_sad16x8x3_ssse3_do_jump
303 .vp8_sad16x8x3_ssse3_skiptable: 303 .vpx_sad16x8x3_ssse3_skiptable:
304 304
305 call .vp8_sad16x8x3_ssse3_do_jump 305 call .vpx_sad16x8x3_ssse3_do_jump
306 .vp8_sad16x8x3_ssse3_do_jump: 306 .vpx_sad16x8x3_ssse3_do_jump:
307 pop rcx ; get the address of do_jump 307 pop rcx ; get the address of do_jump
308 mov rax, .vp8_sad16x8x3_ssse3_jumptable - .vp8_sad16x8x3_ss se3_do_jump 308 mov rax, .vpx_sad16x8x3_ssse3_jumptable - .vpx_sad16x8x3_ss se3_do_jump
309 add rax, rcx ; get the absolute address of vp8_sad16x8x3_s sse3_jumptable 309 add rax, rcx ; get the absolute address of vpx_sad16x8x3_s sse3_jumptable
310 310
311 movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable 311 movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable
312 add rcx, rax 312 add rcx, rax
313 313
314 movsxd rax, dword ptr arg(1) ;src_stride 314 movsxd rax, dword ptr arg(1) ;src_stride
315 movsxd rdx, dword ptr arg(3) ;ref_stride 315 movsxd rdx, dword ptr arg(3) ;ref_stride
316 316
317 jmp rcx 317 jmp rcx
318 318
319 PROCESS_16X8X3_OFFSET 0, .vp8_sad16x8x3_ssse3 319 PROCESS_16X8X3_OFFSET 0, .vpx_sad16x8x3_ssse3
320 PROCESS_16X8X3_OFFSET 1, .vp8_sad16x8x3_ssse3 320 PROCESS_16X8X3_OFFSET 1, .vpx_sad16x8x3_ssse3
321 PROCESS_16X8X3_OFFSET 2, .vp8_sad16x8x3_ssse3 321 PROCESS_16X8X3_OFFSET 2, .vpx_sad16x8x3_ssse3
322 PROCESS_16X8X3_OFFSET 3, .vp8_sad16x8x3_ssse3 322 PROCESS_16X8X3_OFFSET 3, .vpx_sad16x8x3_ssse3
323 PROCESS_16X8X3_OFFSET 4, .vp8_sad16x8x3_ssse3 323 PROCESS_16X8X3_OFFSET 4, .vpx_sad16x8x3_ssse3
324 PROCESS_16X8X3_OFFSET 5, .vp8_sad16x8x3_ssse3 324 PROCESS_16X8X3_OFFSET 5, .vpx_sad16x8x3_ssse3
325 PROCESS_16X8X3_OFFSET 6, .vp8_sad16x8x3_ssse3 325 PROCESS_16X8X3_OFFSET 6, .vpx_sad16x8x3_ssse3
326 PROCESS_16X8X3_OFFSET 7, .vp8_sad16x8x3_ssse3 326 PROCESS_16X8X3_OFFSET 7, .vpx_sad16x8x3_ssse3
327 PROCESS_16X8X3_OFFSET 8, .vp8_sad16x8x3_ssse3 327 PROCESS_16X8X3_OFFSET 8, .vpx_sad16x8x3_ssse3
328 PROCESS_16X8X3_OFFSET 9, .vp8_sad16x8x3_ssse3 328 PROCESS_16X8X3_OFFSET 9, .vpx_sad16x8x3_ssse3
329 PROCESS_16X8X3_OFFSET 10, .vp8_sad16x8x3_ssse3 329 PROCESS_16X8X3_OFFSET 10, .vpx_sad16x8x3_ssse3
330 PROCESS_16X8X3_OFFSET 11, .vp8_sad16x8x3_ssse3 330 PROCESS_16X8X3_OFFSET 11, .vpx_sad16x8x3_ssse3
331 PROCESS_16X8X3_OFFSET 12, .vp8_sad16x8x3_ssse3 331 PROCESS_16X8X3_OFFSET 12, .vpx_sad16x8x3_ssse3
332 PROCESS_16X8X3_OFFSET 13, .vp8_sad16x8x3_ssse3 332 PROCESS_16X8X3_OFFSET 13, .vpx_sad16x8x3_ssse3
333 PROCESS_16X8X3_OFFSET 14, .vp8_sad16x8x3_ssse3 333 PROCESS_16X8X3_OFFSET 14, .vpx_sad16x8x3_ssse3
334 334
335 .vp8_sad16x8x3_ssse3_aligned_by_15: 335 .vpx_sad16x8x3_ssse3_aligned_by_15:
336 336
337 PROCESS_16X2X3 1 337 PROCESS_16X2X3 1
338 PROCESS_16X2X3 0 338 PROCESS_16X2X3 0
339 PROCESS_16X2X3 0 339 PROCESS_16X2X3 0
340 PROCESS_16X2X3 0 340 PROCESS_16X2X3 0
341 341
342 .vp8_sad16x8x3_ssse3_store_off: 342 .vpx_sad16x8x3_ssse3_store_off:
343 mov rdi, arg(4) ;Results 343 mov rdi, arg(4) ;Results
344 344
345 movq xmm0, xmm5 345 movq xmm0, xmm5
346 psrldq xmm5, 8 346 psrldq xmm5, 8
347 347
348 paddw xmm0, xmm5 348 paddw xmm0, xmm5
349 movd [rdi], xmm0 349 movd [rdi], xmm0
350 ;- 350 ;-
351 movq xmm0, xmm6 351 movq xmm0, xmm6
352 psrldq xmm6, 8 352 psrldq xmm6, 8
353 353
354 paddw xmm0, xmm6 354 paddw xmm0, xmm6
355 movd [rdi+4], xmm0 355 movd [rdi+4], xmm0
356 ;- 356 ;-
357 movq xmm0, xmm7 357 movq xmm0, xmm7
358 psrldq xmm7, 8 358 psrldq xmm7, 8
359 359
360 paddw xmm0, xmm7 360 paddw xmm0, xmm7
361 movd [rdi+8], xmm0 361 movd [rdi+8], xmm0
362 362
363 ; begin epilog 363 ; begin epilog
364 pop rcx 364 pop rcx
365 pop rdi 365 pop rdi
366 pop rsi 366 pop rsi
367 RESTORE_XMM 367 RESTORE_XMM
368 UNSHADOW_ARGS 368 UNSHADOW_ARGS
369 pop rbp 369 pop rbp
370 ret 370 ret
OLDNEW
« no previous file with comments | « source/libvpx/vpx_dsp/x86/sad_sse4.asm ('k') | source/libvpx/vpx_mem/include/vpx_mem_intrnl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698