Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(335)

Side by Side Diff: source/libvpx/vp8/common/ppc/idctllm_altivec.asm

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 .globl short_idct4x4llm_ppc
13
14 .macro load_c V, LABEL, OFF, R0, R1
15 lis \R0, \LABEL@ha
16 la \R1, \LABEL@l(\R0)
17 lvx \V, \OFF, \R1
18 .endm
19
20 ;# r3 short *input
21 ;# r4 short *output
22 ;# r5 int pitch
23 .align 2
24 short_idct4x4llm_ppc:
25 mfspr r11, 256 ;# get old VRSAVE
26 oris r12, r11, 0xfff8
27 mtspr 256, r12 ;# set VRSAVE
28
29 load_c v8, sinpi8sqrt2, 0, r9, r10
30 load_c v9, cospi8sqrt2minus1, 0, r9, r10
31 load_c v10, hi_hi, 0, r9, r10
32 load_c v11, lo_lo, 0, r9, r10
33 load_c v12, shift_16, 0, r9, r10
34
35 li r10, 16
36 lvx v0, 0, r3 ;# input ip[0], ip[ 4]
37 lvx v1, r10, r3 ;# input ip[8], ip[12]
38
39 ;# first pass
40 vupkhsh v2, v0
41 vupkhsh v3, v1
42 vaddsws v6, v2, v3 ;# a1 = ip[0]+ip[8]
43 vsubsws v7, v2, v3 ;# b1 = ip[0]-ip[8]
44
45 vupklsh v0, v0
46 vmulosh v4, v0, v8
47 vsraw v4, v4, v12
48 vaddsws v4, v4, v0 ;# ip[ 4] * sin(pi/8) * sqrt(2)
49
50 vupklsh v1, v1
51 vmulosh v5, v1, v9
52 vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2)
53 vaddsws v5, v5, v1
54
55 vsubsws v4, v4, v5 ;# c1
56
57 vmulosh v3, v1, v8
58 vsraw v3, v3, v12
59 vaddsws v3, v3, v1 ;# ip[12] * sin(pi/8) * sqrt(2)
60
61 vmulosh v5, v0, v9
62 vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2)
63 vaddsws v5, v5, v0
64
65 vaddsws v3, v3, v5 ;# d1
66
67 vaddsws v0, v6, v3 ;# a1 + d1
68 vsubsws v3, v6, v3 ;# a1 - d1
69
70 vaddsws v1, v7, v4 ;# b1 + c1
71 vsubsws v2, v7, v4 ;# b1 - c1
72
73 ;# transpose input
74 vmrghw v4, v0, v1 ;# a0 b0 a1 b1
75 vmrghw v5, v2, v3 ;# c0 d0 c1 d1
76
77 vmrglw v6, v0, v1 ;# a2 b2 a3 b3
78 vmrglw v7, v2, v3 ;# c2 d2 c3 d3
79
80 vperm v0, v4, v5, v10 ;# a0 b0 c0 d0
81 vperm v1, v4, v5, v11 ;# a1 b1 c1 d1
82
83 vperm v2, v6, v7, v10 ;# a2 b2 c2 d2
84 vperm v3, v6, v7, v11 ;# a3 b3 c3 d3
85
86 ;# second pass
87 vaddsws v6, v0, v2 ;# a1 = ip[0]+ip[8]
88 vsubsws v7, v0, v2 ;# b1 = ip[0]-ip[8]
89
90 vmulosh v4, v1, v8
91 vsraw v4, v4, v12
92 vaddsws v4, v4, v1 ;# ip[ 4] * sin(pi/8) * sqrt(2)
93
94 vmulosh v5, v3, v9
95 vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2)
96 vaddsws v5, v5, v3
97
98 vsubsws v4, v4, v5 ;# c1
99
100 vmulosh v2, v3, v8
101 vsraw v2, v2, v12
102 vaddsws v2, v2, v3 ;# ip[12] * sin(pi/8) * sqrt(2)
103
104 vmulosh v5, v1, v9
105 vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2)
106 vaddsws v5, v5, v1
107
108 vaddsws v3, v2, v5 ;# d1
109
110 vaddsws v0, v6, v3 ;# a1 + d1
111 vsubsws v3, v6, v3 ;# a1 - d1
112
113 vaddsws v1, v7, v4 ;# b1 + c1
114 vsubsws v2, v7, v4 ;# b1 - c1
115
116 vspltish v6, 4
117 vspltish v7, 3
118
119 vpkswss v0, v0, v1
120 vpkswss v1, v2, v3
121
122 vaddshs v0, v0, v6
123 vaddshs v1, v1, v6
124
125 vsrah v0, v0, v7
126 vsrah v1, v1, v7
127
128 ;# transpose output
129 vmrghh v2, v0, v1 ;# a0 c0 a1 c1 a2 c2 a3 c3
130 vmrglh v3, v0, v1 ;# b0 d0 b1 d1 b2 d2 b3 d3
131
132 vmrghh v0, v2, v3 ;# a0 b0 c0 d0 a1 b1 c1 d1
133 vmrglh v1, v2, v3 ;# a2 b2 c2 d2 a3 b3 c3 d3
134
135 stwu r1,-416(r1) ;# create space on the stack
136
137 stvx v0, 0, r1
138 lwz r6, 0(r1)
139 stw r6, 0(r4)
140 lwz r6, 4(r1)
141 stw r6, 4(r4)
142
143 add r4, r4, r5
144
145 lwz r6, 8(r1)
146 stw r6, 0(r4)
147 lwz r6, 12(r1)
148 stw r6, 4(r4)
149
150 add r4, r4, r5
151
152 stvx v1, 0, r1
153 lwz r6, 0(r1)
154 stw r6, 0(r4)
155 lwz r6, 4(r1)
156 stw r6, 4(r4)
157
158 add r4, r4, r5
159
160 lwz r6, 8(r1)
161 stw r6, 0(r4)
162 lwz r6, 12(r1)
163 stw r6, 4(r4)
164
165 addi r1, r1, 416 ;# recover stack
166
167 mtspr 256, r11 ;# reset old VRSAVE
168
169 blr
170
171 .align 4
172 sinpi8sqrt2:
173 .short 35468, 35468, 35468, 35468, 35468, 35468, 35468, 35468
174
175 .align 4
176 cospi8sqrt2minus1:
177 .short 20091, 20091, 20091, 20091, 20091, 20091, 20091, 20091
178
179 .align 4
180 shift_16:
181 .long 16, 16, 16, 16
182
183 .align 4
184 hi_hi:
185 .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
186
187 .align 4
188 lo_lo:
189 .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31
OLDNEW
« no previous file with comments | « source/libvpx/vp8/common/ppc/filter_bilinear_altivec.asm ('k') | source/libvpx/vp8/common/ppc/loopfilter_altivec.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698