Index: source/libvpx/vp9/common/ppc/vp9_recon_altivec.asm |
=================================================================== |
--- source/libvpx/vp9/common/ppc/vp9_recon_altivec.asm (revision 0) |
+++ source/libvpx/vp9/common/ppc/vp9_recon_altivec.asm (revision 0) |
@@ -0,0 +1,175 @@ |
+; |
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
+; |
+; Use of this source code is governed by a BSD-style license |
+; that can be found in the LICENSE file in the root of the source |
+; tree. An additional intellectual property rights grant can be found |
+; in the file PATENTS. All contributing project authors may |
+; be found in the AUTHORS file in the root of the source tree. |
+; |
+ |
+ |
+ .globl recon4b_ppc |
+ .globl recon2b_ppc |
+ .globl recon_b_ppc |
+ |
+.macro row_of16 Diff Pred Dst Stride |
+ lvx v1, 0, \Pred ;# v1 = pred = p0..p15 |
+ addi \Pred, \Pred, 16 ;# next pred |
+ vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 |
+ lvx v3, 0, \Diff ;# v3 = d0..d7 |
+ vaddshs v2, v2, v3 ;# v2 = r0..r7 |
+ vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 |
+ lvx v3, r8, \Diff ;# v3 = d8..d15 |
+ addi \Diff, \Diff, 32 ;# next diff |
+ vaddshs v3, v3, v1 ;# v3 = r8..r15 |
+ vpkshus v2, v2, v3 ;# v2 = 8-bit r0..r15 |
+ stvx v2, 0, \Dst ;# to dst |
+ add \Dst, \Dst, \Stride ;# next dst |
+.endm |
+ |
+ .text |
+ .align 2 |
+;# r3 = short *diff_ptr, |
+;# r4 = unsigned char *pred_ptr, |
+;# r5 = unsigned char *dst_ptr, |
+;# r6 = int stride |
+recon4b_ppc: |
+ mfspr r0, 256 ;# get old VRSAVE |
+ stw r0, -8(r1) ;# save old VRSAVE to stack |
+ oris r0, r0, 0xf000 |
+ mtspr 256,r0 ;# set VRSAVE |
+ |
+ vxor v0, v0, v0 |
+ li r8, 16 |
+ |
+ row_of16 r3, r4, r5, r6 |
+ row_of16 r3, r4, r5, r6 |
+ row_of16 r3, r4, r5, r6 |
+ row_of16 r3, r4, r5, r6 |
+ |
+ lwz r12, -8(r1) ;# restore old VRSAVE from stack |
+ mtspr 256, r12 ;# reset old VRSAVE |
+ |
+ blr |
+ |
+.macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels |
+ lvx v1, 0, \Pred ;# v1 = pred = p0..p15 |
+ vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 |
+ lvx v3, 0, \Diff ;# v3 = d0..d7 |
+ vaddshs v2, v2, v3 ;# v2 = r0..r7 |
+ vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 |
+ lvx v3, r8, \Diff ;# v2 = d8..d15 |
+ vaddshs v3, v3, v1 ;# v3 = r8..r15 |
+ vpkshus v2, v2, v3 ;# v3 = 8-bit r0..r15 |
+ stvx v2, 0, r10 ;# 2 rows to dst from buf |
+ lwz r0, 0(r10) |
+.if \write_first_four_pels |
+ stw r0, 0(\Dst) |
+ .else |
+ stwux r0, \Dst, \Stride |
+.endif |
+ lwz r0, 4(r10) |
+ stw r0, 4(\Dst) |
+ lwz r0, 8(r10) |
+ stwux r0, \Dst, \Stride ;# advance dst to next row |
+ lwz r0, 12(r10) |
+ stw r0, 4(\Dst) |
+.endm |
+ |
+ .align 2 |
+;# r3 = short *diff_ptr, |
+;# r4 = unsigned char *pred_ptr, |
+;# r5 = unsigned char *dst_ptr, |
+;# r6 = int stride |
+ |
+recon2b_ppc: |
+ mfspr r0, 256 ;# get old VRSAVE |
+ stw r0, -8(r1) ;# save old VRSAVE to stack |
+ oris r0, r0, 0xf000 |
+ mtspr 256,r0 ;# set VRSAVE |
+ |
+ vxor v0, v0, v0 |
+ li r8, 16 |
+ |
+ la r10, -48(r1) ;# buf |
+ |
+ two_rows_of8 r3, r4, r5, r6, 1 |
+ |
+ addi r4, r4, 16; ;# next pred |
+ addi r3, r3, 32; ;# next diff |
+ |
+ two_rows_of8 r3, r4, r5, r6, 0 |
+ |
+ lwz r12, -8(r1) ;# restore old VRSAVE from stack |
+ mtspr 256, r12 ;# reset old VRSAVE |
+ |
+ blr |
+ |
+.macro get_two_diff_rows |
+ stw r0, 0(r10) |
+ lwz r0, 4(r3) |
+ stw r0, 4(r10) |
+ lwzu r0, 32(r3) |
+ stw r0, 8(r10) |
+ lwz r0, 4(r3) |
+ stw r0, 12(r10) |
+ lvx v3, 0, r10 |
+.endm |
+ |
+ .align 2 |
+;# r3 = short *diff_ptr, |
+;# r4 = unsigned char *pred_ptr, |
+;# r5 = unsigned char *dst_ptr, |
+;# r6 = int stride |
+recon_b_ppc: |
+ mfspr r0, 256 ;# get old VRSAVE |
+ stw r0, -8(r1) ;# save old VRSAVE to stack |
+ oris r0, r0, 0xf000 |
+ mtspr 256,r0 ;# set VRSAVE |
+ |
+ vxor v0, v0, v0 |
+ |
+ la r10, -48(r1) ;# buf |
+ |
+ lwz r0, 0(r4) |
+ stw r0, 0(r10) |
+ lwz r0, 16(r4) |
+ stw r0, 4(r10) |
+ lwz r0, 32(r4) |
+ stw r0, 8(r10) |
+ lwz r0, 48(r4) |
+ stw r0, 12(r10) |
+ |
+ lvx v1, 0, r10; ;# v1 = pred = p0..p15 |
+ |
+ lwz r0, 0(r3) ;# v3 = d0..d7 |
+ |
+ get_two_diff_rows |
+ |
+ vmrghb v2, v0, v1; ;# v2 = 16-bit p0..p7 |
+ vaddshs v2, v2, v3; ;# v2 = r0..r7 |
+ |
+ lwzu r0, 32(r3) ;# v3 = d8..d15 |
+ |
+ get_two_diff_rows |
+ |
+ vmrglb v1, v0, v1; ;# v1 = 16-bit p8..p15 |
+ vaddshs v3, v3, v1; ;# v3 = r8..r15 |
+ |
+ vpkshus v2, v2, v3; ;# v2 = 8-bit r0..r15 |
+ stvx v2, 0, r10; ;# 16 pels to dst from buf |
+ |
+ lwz r0, 0(r10) |
+ stw r0, 0(r5) |
+ lwz r0, 4(r10) |
+ stwux r0, r5, r6 |
+ lwz r0, 8(r10) |
+ stwux r0, r5, r6 |
+ lwz r0, 12(r10) |
+ stwx r0, r5, r6 |
+ |
+ lwz r12, -8(r1) ;# restore old VRSAVE from stack |
+ mtspr 256, r12 ;# reset old VRSAVE |
+ |
+ blr |