source/libvpx/vp8/encoder/ppc/fdct_altivec.asm - Issue 1124333011: libvpx: Pull from upstream

Unified Diff: source/libvpx/vp8/encoder/ppc/fdct_altivec.asm

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: only update to last nights LKGR Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/libvpx/vp8/encoder/ppc/fdct_altivec.asm

diff --git a/source/libvpx/vp8/encoder/ppc/fdct_altivec.asm b/source/libvpx/vp8/encoder/ppc/fdct_altivec.asm

deleted file mode 100644

index 935d0cb097743755da5a427aef6db9efb16888f4..0000000000000000000000000000000000000000

--- a/source/libvpx/vp8/encoder/ppc/fdct_altivec.asm

+++ /dev/null

@@ -1,205 +0,0 @@

-; Use of this source code is governed by a BSD-style license

-; that can be found in the LICENSE file in the root of the source

-; tree. An additional intellectual property rights grant can be found

-; in the file PATENTS. All contributing project authors may

-; be found in the AUTHORS file in the root of the source tree.

- .globl vp8_short_fdct4x4_ppc

- .globl vp8_short_fdct8x4_ppc

-.macro load_c V, LABEL, OFF, R0, R1

- lis \R0, \LABEL@ha

- la \R1, \LABEL@l(\R0)

- lvx \V, \OFF, \R1

-.endm

-;# Forward and inverse DCTs are nearly identical; only differences are

-;# in normalization (fwd is twice unitary, inv is half unitary)

-;# and that they are of course transposes of each other.

-;#

-;# The following three accomplish most of implementation and

-;# are used only by ppc_idct.c and ppc_fdct.c.

-.macro prologue

- mfspr r11, 256 ;# get old VRSAVE

- oris r12, r11, 0xfffc

- mtspr 256, r12 ;# set VRSAVE

- stwu r1,-32(r1) ;# create space on the stack

- li r6, 16

- load_c v0, dct_tab, 0, r9, r10

- lvx v1, r6, r10

- addi r10, r10, 32

- lvx v2, 0, r10

- lvx v3, r6, r10

- load_c v4, ppc_dctperm_tab, 0, r9, r10

- load_c v5, ppc_dctperm_tab, r6, r9, r10

- load_c v6, round_tab, 0, r10, r9

-.endm

-.macro epilogue

- addi r1, r1, 32 ;# recover stack

- mtspr 256, r11 ;# reset old VRSAVE

-.endm

-;# Do horiz xf on two rows of coeffs v8 = a0 a1 a2 a3 b0 b1 b2 b3.

-;# a/A are the even rows 0,2 b/B are the odd rows 1,3

-;# For fwd transform, indices are horizontal positions, then frequencies.

-;# For inverse transform, frequencies then positions.

-;# The two resulting A0..A3 B0..B3 are later combined

-;# and vertically transformed.

-.macro two_rows_horiz Dst

- vperm v9, v8, v8, v4 ;# v9 = a2 a3 a0 a1 b2 b3 b0 b1

- vmsumshm v10, v0, v8, v6

- vmsumshm v10, v1, v9, v10

- vsraw v10, v10, v7 ;# v10 = A0 A1 B0 B1

- vmsumshm v11, v2, v8, v6

- vmsumshm v11, v3, v9, v11

- vsraw v11, v11, v7 ;# v11 = A2 A3 B2 B3

- vpkuwum v10, v10, v11 ;# v10 = A0 A1 B0 B1 A2 A3 B2 B3

- vperm \Dst, v10, v10, v5 ;# Dest = A0 B0 A1 B1 A2 B2 A3 B3

-.endm

-;# Vertical xf on two rows. DCT values in comments are for inverse transform;

-;# forward transform uses transpose.

-.macro two_rows_vert Ceven, Codd

- vspltw v8, \Ceven, 0 ;# v8 = c00 c10 or c02 c12 four times

- vspltw v9, \Codd, 0 ;# v9 = c20 c30 or c22 c32 ""

- vmsumshm v8, v8, v12, v6

- vmsumshm v8, v9, v13, v8

- vsraw v10, v8, v7

- vspltw v8, \Codd, 1 ;# v8 = c01 c11 or c03 c13

- vspltw v9, \Ceven, 1 ;# v9 = c21 c31 or c23 c33

- vmsumshm v8, v8, v12, v6

- vmsumshm v8, v9, v13, v8

- vsraw v8, v8, v7

- vpkuwum v8, v10, v8 ;# v8 = rows 0,1 or 2,3

-.endm

-.macro two_rows_h Dest

- stw r0, 0(r8)

- lwz r0, 4(r3)

- stw r0, 4(r8)

- lwzux r0, r3,r5

- stw r0, 8(r8)

- lwz r0, 4(r3)

- stw r0, 12(r8)

- lvx v8, 0,r8

- two_rows_horiz \Dest

-.endm

- .align 2

-;# r3 short *input

-;# r4 short *output

-;# r5 int pitch

-vp8_short_fdct4x4_ppc:

- prologue

- vspltisw v7, 14 ;# == 14, fits in 5 signed bits

- addi r8, r1, 0

- lwz r0, 0(r3)

- two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13

- lwzux r0, r3, r5

- two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33

- lvx v6, r6, r9 ;# v6 = Vround

- vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter

- two_rows_vert v0, v1

- stvx v8, 0, r4

- two_rows_vert v2, v3

- stvx v8, r6, r4

- epilogue

- blr

- .align 2

-;# r3 short *input

-;# r4 short *output

-;# r5 int pitch

-vp8_short_fdct8x4_ppc:

- prologue

- vspltisw v7, 14 ;# == 14, fits in 5 signed bits

- addi r8, r1, 0

- addi r10, r3, 0

- lwz r0, 0(r3)

- two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13

- lwzux r0, r3, r5

- two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33

- lvx v6, r6, r9 ;# v6 = Vround

- vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter

- two_rows_vert v0, v1

- stvx v8, 0, r4

- two_rows_vert v2, v3

- stvx v8, r6, r4

- ;# Next block

- addi r3, r10, 8

- addi r4, r4, 32

- lvx v6, 0, r9 ;# v6 = Hround

- vspltisw v7, 14 ;# == 14, fits in 5 signed bits

- addi r8, r1, 0

- lwz r0, 0(r3)

- two_rows_h v12 ;# v12 = H00 H10 H01 H11 H02 H12 H03 H13

- lwzux r0, r3, r5

- two_rows_h v13 ;# v13 = H20 H30 H21 H31 H22 H32 H23 H33

- lvx v6, r6, r9 ;# v6 = Vround

- vspltisw v7, -16 ;# == 16 == -16, only low 5 bits matter

- two_rows_vert v0, v1

- stvx v8, 0, r4

- two_rows_vert v2, v3

- stvx v8, r6, r4

- epilogue

- blr

- .data

- .align 4

-ppc_dctperm_tab:

- .byte 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11

- .byte 0,1,4,5, 2,3,6,7, 8,9,12,13, 10,11,14,15

- .align 4

-dct_tab:

- .short 23170, 23170,-12540,-30274, 23170, 23170,-12540,-30274

- .short 23170, 23170, 30274, 12540, 23170, 23170, 30274, 12540

- .short 23170,-23170, 30274,-12540, 23170,-23170, 30274,-12540

- .short -23170, 23170, 12540,-30274,-23170, 23170, 12540,-30274

- .align 4

-round_tab:

- .long (1 << (14-1)), (1 << (14-1)), (1 << (14-1)), (1 << (14-1))

- .long (1 << (16-1)), (1 << (16-1)), (1 << (16-1)), (1 << (16-1))

« no previous file with comments | « source/libvpx/vp8/encoder/ppc/encodemb_altivec.asm ('k') | source/libvpx/vp8/encoder/ppc/rdopt_altivec.asm » ('j') | no next file with comments »