Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(147)

Side by Side Diff: source/libvpx/vp8/common/ppc/variance_altivec.asm

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 .globl vp8_get8x8var_ppc
13 .globl vp8_get16x16var_ppc
14 .globl vp8_mse16x16_ppc
15 .globl vp8_variance16x16_ppc
16 .globl vp8_variance16x8_ppc
17 .globl vp8_variance8x16_ppc
18 .globl vp8_variance8x8_ppc
19 .globl vp8_variance4x4_ppc
20
21 .macro load_aligned_16 V R O
22 lvsl v3, 0, \R ;# permutate value for alignment
23
24 lvx v1, 0, \R
25 lvx v2, \O, \R
26
27 vperm \V, v1, v2, v3
28 .endm
29
30 .macro prologue
31 mfspr r11, 256 ;# get old VRSAVE
32 oris r12, r11, 0xffc0
33 mtspr 256, r12 ;# set VRSAVE
34
35 stwu r1, -32(r1) ;# create space on the stack
36
37 li r10, 16 ;# load offset and loop counter
38
39 vspltisw v7, 0 ;# zero for merging
40 vspltisw v8, 0 ;# zero out total to start
41 vspltisw v9, 0 ;# zero out total for dif^2
42 .endm
43
44 .macro epilogue
45 addi r1, r1, 32 ;# recover stack
46
47 mtspr 256, r11 ;# reset old VRSAVE
48 .endm
49
50 .macro compute_sum_sse
51 ;# Compute sum first. Unpack to so signed subract
52 ;# can be used. Only have a half word signed
53 ;# subract. Do high, then low.
54 vmrghb v2, v7, v4
55 vmrghb v3, v7, v5
56 vsubshs v2, v2, v3
57 vsum4shs v8, v2, v8
58
59 vmrglb v2, v7, v4
60 vmrglb v3, v7, v5
61 vsubshs v2, v2, v3
62 vsum4shs v8, v2, v8
63
64 ;# Now compute sse.
65 vsububs v2, v4, v5
66 vsububs v3, v5, v4
67 vor v2, v2, v3
68
69 vmsumubm v9, v2, v2, v9
70 .endm
71
72 .macro variance_16 DS loop_label store_sum
73 \loop_label:
74 ;# only one of the inputs should need to be aligned.
75 load_aligned_16 v4, r3, r10
76 load_aligned_16 v5, r5, r10
77
78 ;# move onto the next line
79 add r3, r3, r4
80 add r5, r5, r6
81
82 compute_sum_sse
83
84 bdnz \loop_label
85
86 vsumsws v8, v8, v7
87 vsumsws v9, v9, v7
88
89 stvx v8, 0, r1
90 lwz r3, 12(r1)
91
92 stvx v9, 0, r1
93 lwz r4, 12(r1)
94
95 .if \store_sum
96 stw r3, 0(r8) ;# sum
97 .endif
98 stw r4, 0(r7) ;# sse
99
100 mullw r3, r3, r3 ;# sum*sum
101 srlwi r3, r3, \DS ;# (sum*sum) >> DS
102 subf r3, r3, r4 ;# sse - ((sum*sum) >> DS)
103 .endm
104
105 .macro variance_8 DS loop_label store_sum
106 \loop_label:
107 ;# only one of the inputs should need to be aligned.
108 load_aligned_16 v4, r3, r10
109 load_aligned_16 v5, r5, r10
110
111 ;# move onto the next line
112 add r3, r3, r4
113 add r5, r5, r6
114
115 ;# only one of the inputs should need to be aligned.
116 load_aligned_16 v6, r3, r10
117 load_aligned_16 v0, r5, r10
118
119 ;# move onto the next line
120 add r3, r3, r4
121 add r5, r5, r6
122
123 vmrghb v4, v4, v6
124 vmrghb v5, v5, v0
125
126 compute_sum_sse
127
128 bdnz \loop_label
129
130 vsumsws v8, v8, v7
131 vsumsws v9, v9, v7
132
133 stvx v8, 0, r1
134 lwz r3, 12(r1)
135
136 stvx v9, 0, r1
137 lwz r4, 12(r1)
138
139 .if \store_sum
140 stw r3, 0(r8) ;# sum
141 .endif
142 stw r4, 0(r7) ;# sse
143
144 mullw r3, r3, r3 ;# sum*sum
145 srlwi r3, r3, \DS ;# (sum*sum) >> 8
146 subf r3, r3, r4 ;# sse - ((sum*sum) >> 8)
147 .endm
148
149 .align 2
150 ;# r3 unsigned char *src_ptr
151 ;# r4 int source_stride
152 ;# r5 unsigned char *ref_ptr
153 ;# r6 int recon_stride
154 ;# r7 unsigned int *SSE
155 ;# r8 int *Sum
156 ;#
157 ;# r3 return value
158 vp8_get8x8var_ppc:
159
160 prologue
161
162 li r9, 4
163 mtctr r9
164
165 variance_8 6, get8x8var_loop, 1
166
167 epilogue
168
169 blr
170
171 .align 2
172 ;# r3 unsigned char *src_ptr
173 ;# r4 int source_stride
174 ;# r5 unsigned char *ref_ptr
175 ;# r6 int recon_stride
176 ;# r7 unsigned int *SSE
177 ;# r8 int *Sum
178 ;#
179 ;# r3 return value
180 vp8_get16x16var_ppc:
181
182 prologue
183
184 mtctr r10
185
186 variance_16 8, get16x16var_loop, 1
187
188 epilogue
189
190 blr
191
192 .align 2
193 ;# r3 unsigned char *src_ptr
194 ;# r4 int source_stride
195 ;# r5 unsigned char *ref_ptr
196 ;# r6 int recon_stride
197 ;# r7 unsigned int *sse
198 ;#
199 ;# r 3 return value
200 vp8_mse16x16_ppc:
201 prologue
202
203 mtctr r10
204
205 mse16x16_loop:
206 ;# only one of the inputs should need to be aligned.
207 load_aligned_16 v4, r3, r10
208 load_aligned_16 v5, r5, r10
209
210 ;# move onto the next line
211 add r3, r3, r4
212 add r5, r5, r6
213
214 ;# Now compute sse.
215 vsububs v2, v4, v5
216 vsububs v3, v5, v4
217 vor v2, v2, v3
218
219 vmsumubm v9, v2, v2, v9
220
221 bdnz mse16x16_loop
222
223 vsumsws v9, v9, v7
224
225 stvx v9, 0, r1
226 lwz r3, 12(r1)
227
228 stvx v9, 0, r1
229 lwz r3, 12(r1)
230
231 stw r3, 0(r7) ;# sse
232
233 epilogue
234
235 blr
236
237 .align 2
238 ;# r3 unsigned char *src_ptr
239 ;# r4 int source_stride
240 ;# r5 unsigned char *ref_ptr
241 ;# r6 int recon_stride
242 ;# r7 unsigned int *sse
243 ;#
244 ;# r3 return value
245 vp8_variance16x16_ppc:
246
247 prologue
248
249 mtctr r10
250
251 variance_16 8, variance16x16_loop, 0
252
253 epilogue
254
255 blr
256
257 .align 2
258 ;# r3 unsigned char *src_ptr
259 ;# r4 int source_stride
260 ;# r5 unsigned char *ref_ptr
261 ;# r6 int recon_stride
262 ;# r7 unsigned int *sse
263 ;#
264 ;# r3 return value
265 vp8_variance16x8_ppc:
266
267 prologue
268
269 li r9, 8
270 mtctr r9
271
272 variance_16 7, variance16x8_loop, 0
273
274 epilogue
275
276 blr
277
278 .align 2
279 ;# r3 unsigned char *src_ptr
280 ;# r4 int source_stride
281 ;# r5 unsigned char *ref_ptr
282 ;# r6 int recon_stride
283 ;# r7 unsigned int *sse
284 ;#
285 ;# r3 return value
286 vp8_variance8x16_ppc:
287
288 prologue
289
290 li r9, 8
291 mtctr r9
292
293 variance_8 7, variance8x16_loop, 0
294
295 epilogue
296
297 blr
298
299 .align 2
300 ;# r3 unsigned char *src_ptr
301 ;# r4 int source_stride
302 ;# r5 unsigned char *ref_ptr
303 ;# r6 int recon_stride
304 ;# r7 unsigned int *sse
305 ;#
306 ;# r3 return value
307 vp8_variance8x8_ppc:
308
309 prologue
310
311 li r9, 4
312 mtctr r9
313
314 variance_8 6, variance8x8_loop, 0
315
316 epilogue
317
318 blr
319
320 .macro transfer_4x4 I P
321 lwz r0, 0(\I)
322 add \I, \I, \P
323
324 lwz r10,0(\I)
325 add \I, \I, \P
326
327 lwz r8, 0(\I)
328 add \I, \I, \P
329
330 lwz r9, 0(\I)
331
332 stw r0, 0(r1)
333 stw r10, 4(r1)
334 stw r8, 8(r1)
335 stw r9, 12(r1)
336 .endm
337
338 .align 2
339 ;# r3 unsigned char *src_ptr
340 ;# r4 int source_stride
341 ;# r5 unsigned char *ref_ptr
342 ;# r6 int recon_stride
343 ;# r7 unsigned int *sse
344 ;#
345 ;# r3 return value
346 vp8_variance4x4_ppc:
347
348 prologue
349
350 transfer_4x4 r3, r4
351 lvx v4, 0, r1
352
353 transfer_4x4 r5, r6
354 lvx v5, 0, r1
355
356 compute_sum_sse
357
358 vsumsws v8, v8, v7
359 vsumsws v9, v9, v7
360
361 stvx v8, 0, r1
362 lwz r3, 12(r1)
363
364 stvx v9, 0, r1
365 lwz r4, 12(r1)
366
367 stw r4, 0(r7) ;# sse
368
369 mullw r3, r3, r3 ;# sum*sum
370 srlwi r3, r3, 4 ;# (sum*sum) >> 4
371 subf r3, r3, r4 ;# sse - ((sum*sum) >> 4)
372
373 epilogue
374
375 blr
OLDNEW
« no previous file with comments | « source/libvpx/vp8/common/ppc/systemdependent.c ('k') | source/libvpx/vp8/common/ppc/variance_subpixel_altivec.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698