OLD | NEW |
(Empty) | |
| 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; |
| 10 |
| 11 |
| 12 .globl vp8_subtract_mbuv_ppc |
| 13 .globl vp8_subtract_mby_ppc |
| 14 |
| 15 ;# r3 short *diff |
| 16 ;# r4 unsigned char *usrc |
| 17 ;# r5 unsigned char *vsrc |
| 18 ;# r6 unsigned char *pred |
| 19 ;# r7 int stride |
| 20 vp8_subtract_mbuv_ppc: |
| 21 mfspr r11, 256 ;# get old VRSAVE |
| 22 oris r12, r11, 0xf000 |
| 23 mtspr 256, r12 ;# set VRSAVE |
| 24 |
| 25 li r9, 256 |
| 26 add r3, r3, r9 |
| 27 add r3, r3, r9 |
| 28 add r6, r6, r9 |
| 29 |
| 30 li r10, 16 |
| 31 li r9, 4 |
| 32 mtctr r9 |
| 33 |
| 34 vspltisw v0, 0 |
| 35 |
| 36 mbu_loop: |
| 37 lvsl v5, 0, r4 ;# permutate value for alignment |
| 38 lvx v1, 0, r4 ;# src |
| 39 lvx v2, 0, r6 ;# pred |
| 40 |
| 41 add r4, r4, r7 |
| 42 addi r6, r6, 16 |
| 43 |
| 44 vperm v1, v1, v0, v5 |
| 45 |
| 46 vmrghb v3, v0, v1 ;# unpack high src to short |
| 47 vmrghb v4, v0, v2 ;# unpack high pred to short |
| 48 |
| 49 lvsl v5, 0, r4 ;# permutate value for alignment |
| 50 lvx v1, 0, r4 ;# src |
| 51 |
| 52 add r4, r4, r7 |
| 53 |
| 54 vsubshs v3, v3, v4 |
| 55 |
| 56 stvx v3, 0, r3 ;# store out diff |
| 57 |
| 58 vperm v1, v1, v0, v5 |
| 59 |
| 60 vmrghb v3, v0, v1 ;# unpack high src to short |
| 61 vmrglb v4, v0, v2 ;# unpack high pred to short |
| 62 |
| 63 vsubshs v3, v3, v4 |
| 64 |
| 65 stvx v3, r10, r3 ;# store out diff |
| 66 |
| 67 addi r3, r3, 32 |
| 68 |
| 69 bdnz mbu_loop |
| 70 |
| 71 mtctr r9 |
| 72 |
| 73 mbv_loop: |
| 74 lvsl v5, 0, r5 ;# permutate value for alignment |
| 75 lvx v1, 0, r5 ;# src |
| 76 lvx v2, 0, r6 ;# pred |
| 77 |
| 78 add r5, r5, r7 |
| 79 addi r6, r6, 16 |
| 80 |
| 81 vperm v1, v1, v0, v5 |
| 82 |
| 83 vmrghb v3, v0, v1 ;# unpack high src to short |
| 84 vmrghb v4, v0, v2 ;# unpack high pred to short |
| 85 |
| 86 lvsl v5, 0, r5 ;# permutate value for alignment |
| 87 lvx v1, 0, r5 ;# src |
| 88 |
| 89 add r5, r5, r7 |
| 90 |
| 91 vsubshs v3, v3, v4 |
| 92 |
| 93 stvx v3, 0, r3 ;# store out diff |
| 94 |
| 95 vperm v1, v1, v0, v5 |
| 96 |
| 97 vmrghb v3, v0, v1 ;# unpack high src to short |
| 98 vmrglb v4, v0, v2 ;# unpack high pred to short |
| 99 |
| 100 vsubshs v3, v3, v4 |
| 101 |
| 102 stvx v3, r10, r3 ;# store out diff |
| 103 |
| 104 addi r3, r3, 32 |
| 105 |
| 106 bdnz mbv_loop |
| 107 |
| 108 mtspr 256, r11 ;# reset old VRSAVE |
| 109 |
| 110 blr |
| 111 |
| 112 ;# r3 short *diff |
| 113 ;# r4 unsigned char *src |
| 114 ;# r5 unsigned char *pred |
| 115 ;# r6 int stride |
| 116 vp8_subtract_mby_ppc: |
| 117 mfspr r11, 256 ;# get old VRSAVE |
| 118 oris r12, r11, 0xf800 |
| 119 mtspr 256, r12 ;# set VRSAVE |
| 120 |
| 121 li r10, 16 |
| 122 mtctr r10 |
| 123 |
| 124 vspltisw v0, 0 |
| 125 |
| 126 mby_loop: |
| 127 lvx v1, 0, r4 ;# src |
| 128 lvx v2, 0, r5 ;# pred |
| 129 |
| 130 add r4, r4, r6 |
| 131 addi r5, r5, 16 |
| 132 |
| 133 vmrghb v3, v0, v1 ;# unpack high src to short |
| 134 vmrghb v4, v0, v2 ;# unpack high pred to short |
| 135 |
| 136 vsubshs v3, v3, v4 |
| 137 |
| 138 stvx v3, 0, r3 ;# store out diff |
| 139 |
| 140 vmrglb v3, v0, v1 ;# unpack low src to short |
| 141 vmrglb v4, v0, v2 ;# unpack low pred to short |
| 142 |
| 143 vsubshs v3, v3, v4 |
| 144 |
| 145 stvx v3, r10, r3 ;# store out diff |
| 146 |
| 147 addi r3, r3, 32 |
| 148 |
| 149 bdnz mby_loop |
| 150 |
| 151 mtspr 256, r11 ;# reset old VRSAVE |
| 152 |
| 153 blr |
OLD | NEW |