openssl/crypto/modes/asm/ghash-alpha.pl - Issue 2072073002: Delete bundled copy of OpenSSL and replace with README.

Unified Diff: openssl/crypto/modes/asm/ghash-alpha.pl

Issue 2072073002: Delete bundled copy of OpenSSL and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/openssl@master

Patch Set: Delete bundled copy of OpenSSL and replace with README. Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: openssl/crypto/modes/asm/ghash-alpha.pl

diff --git a/openssl/crypto/modes/asm/ghash-alpha.pl b/openssl/crypto/modes/asm/ghash-alpha.pl

deleted file mode 100644

index 6358b2750fabf54c0c96b8103fbd2057ae651ad0..0000000000000000000000000000000000000000

--- a/openssl/crypto/modes/asm/ghash-alpha.pl

+++ /dev/null

@@ -1,451 +0,0 @@

-#!/usr/bin/env perl

-# ====================================================================

-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL

-# project. The module is, however, dual licensed under OpenSSL and

-# CRYPTOGAMS licenses depending on where you obtain it. For further

-# details see http://www.openssl.org/~appro/cryptogams/.

-# ====================================================================

-# March 2010

-# The module implements "4-bit" GCM GHASH function and underlying

-# single multiplication operation in GF(2^128). "4-bit" means that it

-# uses 256 bytes per-key table [+128 bytes shared table]. Even though

-# loops are aggressively modulo-scheduled in respect to references to

-# Htbl and Z.hi updates for 8 cycles per byte, measured performance is

-# ~12 cycles per processed byte on 21264 CPU. It seems to be a dynamic

-# scheduling "glitch," because uprofile(1) indicates uniform sample

-# distribution, as if all instruction bundles execute in 1.5 cycles.

-# Meaning that it could have been even faster, yet 12 cycles is ~60%

-# better than gcc-generated code and ~80% than code generated by vendor

-# compiler.

-$cnt="v0"; # $0

-$t0="t0";

-$t1="t1";

-$t2="t2";

-$Thi0="t3"; # $4

-$Tlo0="t4";

-$Thi1="t5";

-$Tlo1="t6";

-$rem="t7"; # $8

-#################

-$Xi="a0"; # $16, input argument block

-$Htbl="a1";

-$inp="a2";

-$len="a3";

-$nlo="a4"; # $20

-$nhi="a5";

-$Zhi="t8";

-$Zlo="t9";

-$Xhi="t10"; # $24

-$Xlo="t11";

-$remp="t12";

-$rem_4bit="AT"; # $28

-{ my $N;

- sub loop() {

- $N++;

-$code.=<<___;

-.align 4

- extbl $Xlo,7,$nlo

- and $nlo,0xf0,$nhi

- sll $nlo,4,$nlo

- and $nlo,0xf0,$nlo

- addq $nlo,$Htbl,$nlo

- ldq $Zlo,8($nlo)

- addq $nhi,$Htbl,$nhi

- ldq $Zhi,0($nlo)

- and $Zlo,0x0f,$remp

- sll $Zhi,60,$t0

- lda $cnt,6(zero)

- extbl $Xlo,6,$nlo

- ldq $Tlo1,8($nhi)

- s8addq $remp,$rem_4bit,$remp

- ldq $Thi1,0($nhi)

- srl $Zlo,4,$Zlo

- ldq $rem,0($remp)

- srl $Zhi,4,$Zhi

- xor $t0,$Zlo,$Zlo

- and $nlo,0xf0,$nhi

- xor $Tlo1,$Zlo,$Zlo

- sll $nlo,4,$nlo

- xor $Thi1,$Zhi,$Zhi

- and $nlo,0xf0,$nlo

- addq $nlo,$Htbl,$nlo

- ldq $Tlo0,8($nlo)

- addq $nhi,$Htbl,$nhi

- ldq $Thi0,0($nlo)

-.Looplo$N:

- and $Zlo,0x0f,$remp

- sll $Zhi,60,$t0

- subq $cnt,1,$cnt

- srl $Zlo,4,$Zlo

- ldq $Tlo1,8($nhi)

- xor $rem,$Zhi,$Zhi

- ldq $Thi1,0($nhi)

- s8addq $remp,$rem_4bit,$remp

- ldq $rem,0($remp)

- srl $Zhi,4,$Zhi

- xor $t0,$Zlo,$Zlo

- extbl $Xlo,$cnt,$nlo

- and $nlo,0xf0,$nhi

- xor $Thi0,$Zhi,$Zhi

- xor $Tlo0,$Zlo,$Zlo

- sll $nlo,4,$nlo

- and $Zlo,0x0f,$remp

- sll $Zhi,60,$t0

- and $nlo,0xf0,$nlo

- srl $Zlo,4,$Zlo

- s8addq $remp,$rem_4bit,$remp

- xor $rem,$Zhi,$Zhi

- addq $nlo,$Htbl,$nlo

- addq $nhi,$Htbl,$nhi

- ldq $rem,0($remp)

- srl $Zhi,4,$Zhi

- ldq $Tlo0,8($nlo)

- xor $t0,$Zlo,$Zlo

- xor $Tlo1,$Zlo,$Zlo

- xor $Thi1,$Zhi,$Zhi

- ldq $Thi0,0($nlo)

- bne $cnt,.Looplo$N

- and $Zlo,0x0f,$remp

- sll $Zhi,60,$t0

- lda $cnt,7(zero)

- srl $Zlo,4,$Zlo

- ldq $Tlo1,8($nhi)

- xor $rem,$Zhi,$Zhi

- ldq $Thi1,0($nhi)

- s8addq $remp,$rem_4bit,$remp

- ldq $rem,0($remp)

- srl $Zhi,4,$Zhi

- xor $t0,$Zlo,$Zlo

- extbl $Xhi,$cnt,$nlo

- and $nlo,0xf0,$nhi

- xor $Thi0,$Zhi,$Zhi

- xor $Tlo0,$Zlo,$Zlo

- sll $nlo,4,$nlo

- and $Zlo,0x0f,$remp

- sll $Zhi,60,$t0

- and $nlo,0xf0,$nlo

- srl $Zlo,4,$Zlo

- s8addq $remp,$rem_4bit,$remp

- xor $rem,$Zhi,$Zhi

- addq $nlo,$Htbl,$nlo

- addq $nhi,$Htbl,$nhi

- ldq $rem,0($remp)

- srl $Zhi,4,$Zhi

- ldq $Tlo0,8($nlo)

- xor $t0,$Zlo,$Zlo

- xor $Tlo1,$Zlo,$Zlo

- xor $Thi1,$Zhi,$Zhi

- ldq $Thi0,0($nlo)

- unop

-.Loophi$N:

- and $Zlo,0x0f,$remp

- sll $Zhi,60,$t0

- subq $cnt,1,$cnt

- srl $Zlo,4,$Zlo

- ldq $Tlo1,8($nhi)

- xor $rem,$Zhi,$Zhi

- ldq $Thi1,0($nhi)

- s8addq $remp,$rem_4bit,$remp

- ldq $rem,0($remp)

- srl $Zhi,4,$Zhi

- xor $t0,$Zlo,$Zlo

- extbl $Xhi,$cnt,$nlo

- and $nlo,0xf0,$nhi

- xor $Thi0,$Zhi,$Zhi

- xor $Tlo0,$Zlo,$Zlo

- sll $nlo,4,$nlo

- and $Zlo,0x0f,$remp

- sll $Zhi,60,$t0

- and $nlo,0xf0,$nlo

- srl $Zlo,4,$Zlo

- s8addq $remp,$rem_4bit,$remp

- xor $rem,$Zhi,$Zhi

- addq $nlo,$Htbl,$nlo

- addq $nhi,$Htbl,$nhi

- ldq $rem,0($remp)

- srl $Zhi,4,$Zhi

- ldq $Tlo0,8($nlo)

- xor $t0,$Zlo,$Zlo

- xor $Tlo1,$Zlo,$Zlo

- xor $Thi1,$Zhi,$Zhi

- ldq $Thi0,0($nlo)

- bne $cnt,.Loophi$N

- and $Zlo,0x0f,$remp

- sll $Zhi,60,$t0

- srl $Zlo,4,$Zlo

- ldq $Tlo1,8($nhi)

- xor $rem,$Zhi,$Zhi

- ldq $Thi1,0($nhi)

- s8addq $remp,$rem_4bit,$remp

- ldq $rem,0($remp)

- srl $Zhi,4,$Zhi

- xor $t0,$Zlo,$Zlo

- xor $Tlo0,$Zlo,$Zlo

- xor $Thi0,$Zhi,$Zhi

- and $Zlo,0x0f,$remp

- sll $Zhi,60,$t0

- srl $Zlo,4,$Zlo

- s8addq $remp,$rem_4bit,$remp

- xor $rem,$Zhi,$Zhi

- ldq $rem,0($remp)

- srl $Zhi,4,$Zhi

- xor $Tlo1,$Zlo,$Zlo

- xor $Thi1,$Zhi,$Zhi

- xor $t0,$Zlo,$Zlo

- xor $rem,$Zhi,$Zhi

-___

-}}

-$code=<<___;

-#ifdef __linux__

-#include <asm/regdef.h>

-#else

-#include <asm.h>

-#include <regdef.h>

-#endif

-.text

-.set noat

-.set noreorder

-.globl gcm_gmult_4bit

-.align 4

-.ent gcm_gmult_4bit

-gcm_gmult_4bit:

- .frame sp,0,ra

- .prologue 0

- ldq $Xlo,8($Xi)

- ldq $Xhi,0($Xi)

- br $rem_4bit,.Lpic1

-.Lpic1: lda $rem_4bit,rem_4bit-.Lpic1($rem_4bit)

-___

- &loop();

-$code.=<<___;

- srl $Zlo,24,$t0 # byte swap

- srl $Zlo,8,$t1

- sll $Zlo,8,$t2

- sll $Zlo,24,$Zlo

- zapnot $t0,0x11,$t0

- zapnot $t1,0x22,$t1

- zapnot $Zlo,0x88,$Zlo

- or $t0,$t1,$t0

- zapnot $t2,0x44,$t2

- or $Zlo,$t0,$Zlo

- srl $Zhi,24,$t0

- srl $Zhi,8,$t1

- or $Zlo,$t2,$Zlo

- sll $Zhi,8,$t2

- sll $Zhi,24,$Zhi

- srl $Zlo,32,$Xlo

- sll $Zlo,32,$Zlo

- zapnot $t0,0x11,$t0

- zapnot $t1,0x22,$t1

- or $Zlo,$Xlo,$Xlo

- zapnot $Zhi,0x88,$Zhi

- or $t0,$t1,$t0

- zapnot $t2,0x44,$t2

- or $Zhi,$t0,$Zhi

- or $Zhi,$t2,$Zhi

- srl $Zhi,32,$Xhi

- sll $Zhi,32,$Zhi

- or $Zhi,$Xhi,$Xhi

- stq $Xlo,8($Xi)

- stq $Xhi,0($Xi)

- ret (ra)

-.end gcm_gmult_4bit

-___

-$inhi="s0";

-$inlo="s1";

-$code.=<<___;

-.globl gcm_ghash_4bit

-.align 4

-.ent gcm_ghash_4bit

-gcm_ghash_4bit:

- lda sp,-32(sp)

- stq ra,0(sp)

- stq s0,8(sp)

- stq s1,16(sp)

- .mask 0x04000600,-32

- .frame sp,32,ra

- .prologue 0

- ldq_u $inhi,0($inp)

- ldq_u $Thi0,7($inp)

- ldq_u $inlo,8($inp)

- ldq_u $Tlo0,15($inp)

- ldq $Xhi,0($Xi)

- ldq $Xlo,8($Xi)

- br $rem_4bit,.Lpic2

-.Lpic2: lda $rem_4bit,rem_4bit-.Lpic2($rem_4bit)

-.Louter:

- extql $inhi,$inp,$inhi

- extqh $Thi0,$inp,$Thi0

- or $inhi,$Thi0,$inhi

- lda $inp,16($inp)

- extql $inlo,$inp,$inlo

- extqh $Tlo0,$inp,$Tlo0

- or $inlo,$Tlo0,$inlo

- subq $len,16,$len

- xor $Xlo,$inlo,$Xlo

- xor $Xhi,$inhi,$Xhi

-___

- &loop();

-$code.=<<___;

- srl $Zlo,24,$t0 # byte swap

- srl $Zlo,8,$t1

- sll $Zlo,8,$t2

- sll $Zlo,24,$Zlo

- zapnot $t0,0x11,$t0

- zapnot $t1,0x22,$t1

- zapnot $Zlo,0x88,$Zlo

- or $t0,$t1,$t0

- zapnot $t2,0x44,$t2

- or $Zlo,$t0,$Zlo

- srl $Zhi,24,$t0

- srl $Zhi,8,$t1

- or $Zlo,$t2,$Zlo

- sll $Zhi,8,$t2

- sll $Zhi,24,$Zhi

- srl $Zlo,32,$Xlo

- sll $Zlo,32,$Zlo

- beq $len,.Ldone

- zapnot $t0,0x11,$t0

- zapnot $t1,0x22,$t1

- or $Zlo,$Xlo,$Xlo

- ldq_u $inhi,0($inp)

- zapnot $Zhi,0x88,$Zhi

- or $t0,$t1,$t0

- zapnot $t2,0x44,$t2

- ldq_u $Thi0,7($inp)

- or $Zhi,$t0,$Zhi

- or $Zhi,$t2,$Zhi

- ldq_u $inlo,8($inp)

- ldq_u $Tlo0,15($inp)

- srl $Zhi,32,$Xhi

- sll $Zhi,32,$Zhi

- or $Zhi,$Xhi,$Xhi

- br zero,.Louter

-.Ldone:

- zapnot $t0,0x11,$t0

- zapnot $t1,0x22,$t1

- or $Zlo,$Xlo,$Xlo

- zapnot $Zhi,0x88,$Zhi

- or $t0,$t1,$t0

- zapnot $t2,0x44,$t2

- or $Zhi,$t0,$Zhi

- or $Zhi,$t2,$Zhi

- srl $Zhi,32,$Xhi

- sll $Zhi,32,$Zhi

- or $Zhi,$Xhi,$Xhi

- stq $Xlo,8($Xi)

- stq $Xhi,0($Xi)

- .set noreorder

- /*ldq ra,0(sp)*/

- ldq s0,8(sp)

- ldq s1,16(sp)

- lda sp,32(sp)

- ret (ra)

-.end gcm_ghash_4bit

-.align 4

-rem_4bit:

- .quad 0x0000<<48, 0x1C20<<48, 0x3840<<48, 0x2460<<48

- .quad 0x7080<<48, 0x6CA0<<48, 0x48C0<<48, 0x54E0<<48

- .quad 0xE100<<48, 0xFD20<<48, 0xD940<<48, 0xC560<<48

- .quad 0x9180<<48, 0x8DA0<<48, 0xA9C0<<48, 0xB5E0<<48

-.ascii "GHASH for Alpha, CRYPTOGAMS by <appro\@openssl.org>"

-.align 4

-___

-$output=shift and open STDOUT,">$output";

-print $code;

-close STDOUT;

« no previous file with comments | « openssl/crypto/mem_dbg.c ('k') | openssl/crypto/modes/asm/ghash-armv4.S » ('j') | no next file with comments »