Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(34)

Side by Side Diff: source/libvpx/vp9/encoder/ppc/vp9_sad_altivec.asm

Issue 11555023: libvpx: Add VP9 decoder. (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 .globl vp8_sad16x16_ppc
13 .globl vp8_sad16x8_ppc
14 .globl vp8_sad8x16_ppc
15 .globl vp8_sad8x8_ppc
16 .globl vp8_sad4x4_ppc
17
18 .macro load_aligned_16 V R O
19 lvsl v3, 0, \R ;# permutate value for alignment
20
21 lvx v1, 0, \R
22 lvx v2, \O, \R
23
24 vperm \V, v1, v2, v3
25 .endm
26
27 .macro prologue
28 mfspr r11, 256 ;# get old VRSAVE
29 oris r12, r11, 0xffc0
30 mtspr 256, r12 ;# set VRSAVE
31
32 stwu r1, -32(r1) ;# create space on the stack
33
34 li r10, 16 ;# load offset and loop counter
35
36 vspltisw v8, 0 ;# zero out total to start
37 .endm
38
39 .macro epilogue
40 addi r1, r1, 32 ;# recover stack
41
42 mtspr 256, r11 ;# reset old VRSAVE
43 .endm
44
45 .macro SAD_16
46 ;# v6 = abs (v4 - v5)
47 vsububs v6, v4, v5
48 vsububs v7, v5, v4
49 vor v6, v6, v7
50
51 ;# v8 += abs (v4 - v5)
52 vsum4ubs v8, v6, v8
53 .endm
54
55 .macro sad_16_loop loop_label
56 lvsl v3, 0, r5 ;# only needs to be done once per block
57
58 ;# preload a line of data before getting into the loop
59 lvx v4, 0, r3
60 lvx v1, 0, r5
61 lvx v2, r10, r5
62
63 add r5, r5, r6
64 add r3, r3, r4
65
66 vperm v5, v1, v2, v3
67
68 .align 4
69 \loop_label:
70 ;# compute difference on first row
71 vsububs v6, v4, v5
72 vsububs v7, v5, v4
73
74 ;# load up next set of data
75 lvx v9, 0, r3
76 lvx v1, 0, r5
77 lvx v2, r10, r5
78
79 ;# perform abs() of difference
80 vor v6, v6, v7
81 add r3, r3, r4
82
83 ;# add to the running tally
84 vsum4ubs v8, v6, v8
85
86 ;# now onto the next line
87 vperm v5, v1, v2, v3
88 add r5, r5, r6
89 lvx v4, 0, r3
90
91 ;# compute difference on second row
92 vsububs v6, v9, v5
93 lvx v1, 0, r5
94 vsububs v7, v5, v9
95 lvx v2, r10, r5
96 vor v6, v6, v7
97 add r3, r3, r4
98 vsum4ubs v8, v6, v8
99 vperm v5, v1, v2, v3
100 add r5, r5, r6
101
102 bdnz \loop_label
103
104 vspltisw v7, 0
105
106 vsumsws v8, v8, v7
107
108 stvx v8, 0, r1
109 lwz r3, 12(r1)
110 .endm
111
112 .macro sad_8_loop loop_label
113 .align 4
114 \loop_label:
115 ;# only one of the inputs should need to be aligned.
116 load_aligned_16 v4, r3, r10
117 load_aligned_16 v5, r5, r10
118
119 ;# move onto the next line
120 add r3, r3, r4
121 add r5, r5, r6
122
123 ;# only one of the inputs should need to be aligned.
124 load_aligned_16 v6, r3, r10
125 load_aligned_16 v7, r5, r10
126
127 ;# move onto the next line
128 add r3, r3, r4
129 add r5, r5, r6
130
131 vmrghb v4, v4, v6
132 vmrghb v5, v5, v7
133
134 SAD_16
135
136 bdnz \loop_label
137
138 vspltisw v7, 0
139
140 vsumsws v8, v8, v7
141
142 stvx v8, 0, r1
143 lwz r3, 12(r1)
144 .endm
145
146 .align 2
147 ;# r3 unsigned char *src_ptr
148 ;# r4 int src_stride
149 ;# r5 unsigned char *ref_ptr
150 ;# r6 int ref_stride
151 ;#
152 ;# r3 return value
153 vp8_sad16x16_ppc:
154
155 prologue
156
157 li r9, 8
158 mtctr r9
159
160 sad_16_loop sad16x16_loop
161
162 epilogue
163
164 blr
165
166 .align 2
167 ;# r3 unsigned char *src_ptr
168 ;# r4 int src_stride
169 ;# r5 unsigned char *ref_ptr
170 ;# r6 int ref_stride
171 ;#
172 ;# r3 return value
173 vp8_sad16x8_ppc:
174
175 prologue
176
177 li r9, 4
178 mtctr r9
179
180 sad_16_loop sad16x8_loop
181
182 epilogue
183
184 blr
185
186 .align 2
187 ;# r3 unsigned char *src_ptr
188 ;# r4 int src_stride
189 ;# r5 unsigned char *ref_ptr
190 ;# r6 int ref_stride
191 ;#
192 ;# r3 return value
193 vp8_sad8x16_ppc:
194
195 prologue
196
197 li r9, 8
198 mtctr r9
199
200 sad_8_loop sad8x16_loop
201
202 epilogue
203
204 blr
205
206 .align 2
207 ;# r3 unsigned char *src_ptr
208 ;# r4 int src_stride
209 ;# r5 unsigned char *ref_ptr
210 ;# r6 int ref_stride
211 ;#
212 ;# r3 return value
213 vp8_sad8x8_ppc:
214
215 prologue
216
217 li r9, 4
218 mtctr r9
219
220 sad_8_loop sad8x8_loop
221
222 epilogue
223
224 blr
225
226 .macro transfer_4x4 I P
227 lwz r0, 0(\I)
228 add \I, \I, \P
229
230 lwz r7, 0(\I)
231 add \I, \I, \P
232
233 lwz r8, 0(\I)
234 add \I, \I, \P
235
236 lwz r9, 0(\I)
237
238 stw r0, 0(r1)
239 stw r7, 4(r1)
240 stw r8, 8(r1)
241 stw r9, 12(r1)
242 .endm
243
244 .align 2
245 ;# r3 unsigned char *src_ptr
246 ;# r4 int src_stride
247 ;# r5 unsigned char *ref_ptr
248 ;# r6 int ref_stride
249 ;#
250 ;# r3 return value
251 vp8_sad4x4_ppc:
252
253 prologue
254
255 transfer_4x4 r3, r4
256 lvx v4, 0, r1
257
258 transfer_4x4 r5, r6
259 lvx v5, 0, r1
260
261 vspltisw v8, 0 ;# zero out total to start
262
263 ;# v6 = abs (v4 - v5)
264 vsububs v6, v4, v5
265 vsububs v7, v5, v4
266 vor v6, v6, v7
267
268 ;# v8 += abs (v4 - v5)
269 vsum4ubs v7, v6, v8
270 vsumsws v7, v7, v8
271
272 stvx v7, 0, r1
273 lwz r3, 12(r1)
274
275 epilogue
276
277 blr
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698