OLD | NEW |
(Empty) | |
| 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; |
| 10 |
| 11 |
| 12 .globl recon4b_ppc |
| 13 .globl recon2b_ppc |
| 14 .globl recon_b_ppc |
| 15 |
| 16 .macro row_of16 Diff Pred Dst Stride |
| 17 lvx v1, 0, \Pred ;# v1 = pred = p0..p15 |
| 18 addi \Pred, \Pred, 16 ;# next pred |
| 19 vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 |
| 20 lvx v3, 0, \Diff ;# v3 = d0..d7 |
| 21 vaddshs v2, v2, v3 ;# v2 = r0..r7 |
| 22 vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 |
| 23 lvx v3, r8, \Diff ;# v3 = d8..d15 |
| 24 addi \Diff, \Diff, 32 ;# next diff |
| 25 vaddshs v3, v3, v1 ;# v3 = r8..r15 |
| 26 vpkshus v2, v2, v3 ;# v2 = 8-bit r0..r15 |
| 27 stvx v2, 0, \Dst ;# to dst |
| 28 add \Dst, \Dst, \Stride ;# next dst |
| 29 .endm |
| 30 |
| 31 .text |
| 32 .align 2 |
| 33 ;# r3 = short *diff_ptr, |
| 34 ;# r4 = unsigned char *pred_ptr, |
| 35 ;# r5 = unsigned char *dst_ptr, |
| 36 ;# r6 = int stride |
| 37 recon4b_ppc: |
| 38 mfspr r0, 256 ;# get old VRSAVE |
| 39 stw r0, -8(r1) ;# save old VRSAVE to stack |
| 40 oris r0, r0, 0xf000 |
| 41 mtspr 256,r0 ;# set VRSAVE |
| 42 |
| 43 vxor v0, v0, v0 |
| 44 li r8, 16 |
| 45 |
| 46 row_of16 r3, r4, r5, r6 |
| 47 row_of16 r3, r4, r5, r6 |
| 48 row_of16 r3, r4, r5, r6 |
| 49 row_of16 r3, r4, r5, r6 |
| 50 |
| 51 lwz r12, -8(r1) ;# restore old VRSAVE from stack |
| 52 mtspr 256, r12 ;# reset old VRSAVE |
| 53 |
| 54 blr |
| 55 |
| 56 .macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels |
| 57 lvx v1, 0, \Pred ;# v1 = pred = p0..p15 |
| 58 vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 |
| 59 lvx v3, 0, \Diff ;# v3 = d0..d7 |
| 60 vaddshs v2, v2, v3 ;# v2 = r0..r7 |
| 61 vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 |
| 62 lvx v3, r8, \Diff ;# v2 = d8..d15 |
| 63 vaddshs v3, v3, v1 ;# v3 = r8..r15 |
| 64 vpkshus v2, v2, v3 ;# v3 = 8-bit r0..r15 |
| 65 stvx v2, 0, r10 ;# 2 rows to dst from buf |
| 66 lwz r0, 0(r10) |
| 67 .if \write_first_four_pels |
| 68 stw r0, 0(\Dst) |
| 69 .else |
| 70 stwux r0, \Dst, \Stride |
| 71 .endif |
| 72 lwz r0, 4(r10) |
| 73 stw r0, 4(\Dst) |
| 74 lwz r0, 8(r10) |
| 75 stwux r0, \Dst, \Stride ;# advance dst to next row |
| 76 lwz r0, 12(r10) |
| 77 stw r0, 4(\Dst) |
| 78 .endm |
| 79 |
| 80 .align 2 |
| 81 ;# r3 = short *diff_ptr, |
| 82 ;# r4 = unsigned char *pred_ptr, |
| 83 ;# r5 = unsigned char *dst_ptr, |
| 84 ;# r6 = int stride |
| 85 |
| 86 recon2b_ppc: |
| 87 mfspr r0, 256 ;# get old VRSAVE |
| 88 stw r0, -8(r1) ;# save old VRSAVE to stack |
| 89 oris r0, r0, 0xf000 |
| 90 mtspr 256,r0 ;# set VRSAVE |
| 91 |
| 92 vxor v0, v0, v0 |
| 93 li r8, 16 |
| 94 |
| 95 la r10, -48(r1) ;# buf |
| 96 |
| 97 two_rows_of8 r3, r4, r5, r6, 1 |
| 98 |
| 99 addi r4, r4, 16; ;# next pred |
| 100 addi r3, r3, 32; ;# next diff |
| 101 |
| 102 two_rows_of8 r3, r4, r5, r6, 0 |
| 103 |
| 104 lwz r12, -8(r1) ;# restore old VRSAVE from stack |
| 105 mtspr 256, r12 ;# reset old VRSAVE |
| 106 |
| 107 blr |
| 108 |
| 109 .macro get_two_diff_rows |
| 110 stw r0, 0(r10) |
| 111 lwz r0, 4(r3) |
| 112 stw r0, 4(r10) |
| 113 lwzu r0, 32(r3) |
| 114 stw r0, 8(r10) |
| 115 lwz r0, 4(r3) |
| 116 stw r0, 12(r10) |
| 117 lvx v3, 0, r10 |
| 118 .endm |
| 119 |
| 120 .align 2 |
| 121 ;# r3 = short *diff_ptr, |
| 122 ;# r4 = unsigned char *pred_ptr, |
| 123 ;# r5 = unsigned char *dst_ptr, |
| 124 ;# r6 = int stride |
| 125 recon_b_ppc: |
| 126 mfspr r0, 256 ;# get old VRSAVE |
| 127 stw r0, -8(r1) ;# save old VRSAVE to stack |
| 128 oris r0, r0, 0xf000 |
| 129 mtspr 256,r0 ;# set VRSAVE |
| 130 |
| 131 vxor v0, v0, v0 |
| 132 |
| 133 la r10, -48(r1) ;# buf |
| 134 |
| 135 lwz r0, 0(r4) |
| 136 stw r0, 0(r10) |
| 137 lwz r0, 16(r4) |
| 138 stw r0, 4(r10) |
| 139 lwz r0, 32(r4) |
| 140 stw r0, 8(r10) |
| 141 lwz r0, 48(r4) |
| 142 stw r0, 12(r10) |
| 143 |
| 144 lvx v1, 0, r10; ;# v1 = pred = p0..p15 |
| 145 |
| 146 lwz r0, 0(r3) ;# v3 = d0..d7 |
| 147 |
| 148 get_two_diff_rows |
| 149 |
| 150 vmrghb v2, v0, v1; ;# v2 = 16-bit p0..p7 |
| 151 vaddshs v2, v2, v3; ;# v2 = r0..r7 |
| 152 |
| 153 lwzu r0, 32(r3) ;# v3 = d8..d15 |
| 154 |
| 155 get_two_diff_rows |
| 156 |
| 157 vmrglb v1, v0, v1; ;# v1 = 16-bit p8..p15 |
| 158 vaddshs v3, v3, v1; ;# v3 = r8..r15 |
| 159 |
| 160 vpkshus v2, v2, v3; ;# v2 = 8-bit r0..r15 |
| 161 stvx v2, 0, r10; ;# 16 pels to dst from buf |
| 162 |
| 163 lwz r0, 0(r10) |
| 164 stw r0, 0(r5) |
| 165 lwz r0, 4(r10) |
| 166 stwux r0, r5, r6 |
| 167 lwz r0, 8(r10) |
| 168 stwux r0, r5, r6 |
| 169 lwz r0, 12(r10) |
| 170 stwx r0, r5, r6 |
| 171 |
| 172 lwz r12, -8(r1) ;# restore old VRSAVE from stack |
| 173 mtspr 256, r12 ;# reset old VRSAVE |
| 174 |
| 175 blr |
OLD | NEW |