Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(403)

Side by Side Diff: source/libvpx/vpx_dsp/x86/inv_txfm_sse2.asm

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10 %include "third_party/x86inc/x86inc.asm"
11
12 SECTION .text
13
14 %macro REORDER_INPUTS 0
15 ; a c d b to a b c d
16 SWAP 1, 3, 2
17 %endmacro
18
19 %macro TRANSFORM_COLS 0
20 ; input:
21 ; m0 a
22 ; m1 b
23 ; m2 c
24 ; m3 d
25 paddw m0, m2
26 psubw m3, m1
27
28 ; wide subtract
29 punpcklwd m4, m0
30 punpcklwd m5, m3
31 psrad m4, 16
32 psrad m5, 16
33 psubd m4, m5
34 psrad m4, 1
35 packssdw m4, m4 ; e
36
37 psubw m5, m4, m1 ; b
38 psubw m4, m2 ; c
39 psubw m0, m5
40 paddw m3, m4
41 ; m0 a
42 SWAP 1, 5 ; m1 b
43 SWAP 2, 4 ; m2 c
44 ; m3 d
45 %endmacro
46
47 %macro TRANSPOSE_4X4 0
48 punpcklwd m0, m2
49 punpcklwd m1, m3
50 mova m2, m0
51 punpcklwd m0, m1
52 punpckhwd m2, m1
53 pshufd m1, m0, 0x0e
54 pshufd m3, m2, 0x0e
55 %endmacro
56
57 ; transpose a 4x4 int16 matrix in xmm0 and xmm1 to the bottom half of xmm0-xmm3
58 %macro TRANSPOSE_4X4_WIDE 0
59 mova m3, m0
60 punpcklwd m0, m1
61 punpckhwd m3, m1
62 mova m2, m0
63 punpcklwd m0, m3
64 punpckhwd m2, m3
65 pshufd m1, m0, 0x0e
66 pshufd m3, m2, 0x0e
67 %endmacro
68
69 %macro ADD_STORE_4P_2X 5 ; src1, src2, tmp1, tmp2, zero
70 movq m%3, [outputq]
71 movq m%4, [outputq + strideq]
72 punpcklbw m%3, m%5
73 punpcklbw m%4, m%5
74 paddw m%1, m%3
75 paddw m%2, m%4
76 packuswb m%1, m%5
77 packuswb m%2, m%5
78 movd [outputq], m%1
79 movd [outputq + strideq], m%2
80 %endmacro
81
82 INIT_XMM sse2
83 cglobal iwht4x4_16_add, 3, 3, 7, input, output, stride
84 mova m0, [inputq + 0]
85 mova m1, [inputq + 16]
86
87 psraw m0, 2
88 psraw m1, 2
89
90 TRANSPOSE_4X4_WIDE
91 REORDER_INPUTS
92 TRANSFORM_COLS
93 TRANSPOSE_4X4
94 REORDER_INPUTS
95 TRANSFORM_COLS
96
97 pxor m4, m4
98 ADD_STORE_4P_2X 0, 1, 5, 6, 4
99 lea outputq, [outputq + 2 * strideq]
100 ADD_STORE_4P_2X 2, 3, 5, 6, 4
101
102 RET
OLDNEW
« no previous file with comments | « source/libvpx/vpx_dsp/x86/intrapred_ssse3.asm ('k') | source/libvpx/vpx_dsp/x86/inv_txfm_sse2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698