| Index: openssl/crypto/modes/asm/ghash-parisc.pl
|
| diff --git a/openssl/crypto/modes/asm/ghash-parisc.pl b/openssl/crypto/modes/asm/ghash-parisc.pl
|
| deleted file mode 100644
|
| index 8c7454ee93407c615dc6be6931007b77e0308080..0000000000000000000000000000000000000000
|
| --- a/openssl/crypto/modes/asm/ghash-parisc.pl
|
| +++ /dev/null
|
| @@ -1,730 +0,0 @@
|
| -#!/usr/bin/env perl
|
| -#
|
| -# ====================================================================
|
| -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
| -# project. The module is, however, dual licensed under OpenSSL and
|
| -# CRYPTOGAMS licenses depending on where you obtain it. For further
|
| -# details see http://www.openssl.org/~appro/cryptogams/.
|
| -# ====================================================================
|
| -#
|
| -# April 2010
|
| -#
|
| -# The module implements "4-bit" GCM GHASH function and underlying
|
| -# single multiplication operation in GF(2^128). "4-bit" means that it
|
| -# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
|
| -# it processes one byte in 19.6 cycles, which is more than twice as
|
| -# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
|
| -# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
|
| -# processed byte. This is ~2.2x faster than 64-bit code generated by
|
| -# vendor compiler (which used to be very hard to beat:-).
|
| -#
|
| -# Special thanks to polarhome.com for providing HP-UX account.
|
| -
|
| -$flavour = shift;
|
| -$output = shift;
|
| -open STDOUT,">$output";
|
| -
|
| -if ($flavour =~ /64/) {
|
| - $LEVEL ="2.0W";
|
| - $SIZE_T =8;
|
| - $FRAME_MARKER =80;
|
| - $SAVED_RP =16;
|
| - $PUSH ="std";
|
| - $PUSHMA ="std,ma";
|
| - $POP ="ldd";
|
| - $POPMB ="ldd,mb";
|
| - $NREGS =6;
|
| -} else {
|
| - $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
|
| - $SIZE_T =4;
|
| - $FRAME_MARKER =48;
|
| - $SAVED_RP =20;
|
| - $PUSH ="stw";
|
| - $PUSHMA ="stwm";
|
| - $POP ="ldw";
|
| - $POPMB ="ldwm";
|
| - $NREGS =11;
|
| -}
|
| -
|
| -$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
|
| - # [+ argument transfer]
|
| -
|
| -################# volatile registers
|
| -$Xi="%r26"; # argument block
|
| -$Htbl="%r25";
|
| -$inp="%r24";
|
| -$len="%r23";
|
| -$Hhh=$Htbl; # variables
|
| -$Hll="%r22";
|
| -$Zhh="%r21";
|
| -$Zll="%r20";
|
| -$cnt="%r19";
|
| -$rem_4bit="%r28";
|
| -$rem="%r29";
|
| -$mask0xf0="%r31";
|
| -
|
| -################# preserved registers
|
| -$Thh="%r1";
|
| -$Tll="%r2";
|
| -$nlo="%r3";
|
| -$nhi="%r4";
|
| -$byte="%r5";
|
| -if ($SIZE_T==4) {
|
| - $Zhl="%r6";
|
| - $Zlh="%r7";
|
| - $Hhl="%r8";
|
| - $Hlh="%r9";
|
| - $Thl="%r10";
|
| - $Tlh="%r11";
|
| -}
|
| -$rem2="%r6"; # used in PA-RISC 2.0 code
|
| -
|
| -$code.=<<___;
|
| - .LEVEL $LEVEL
|
| - .SPACE \$TEXT\$
|
| - .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
|
| -
|
| - .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
|
| - .ALIGN 64
|
| -gcm_gmult_4bit
|
| - .PROC
|
| - .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
|
| - .ENTRY
|
| - $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
|
| - $PUSHMA %r3,$FRAME(%sp)
|
| - $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
|
| - $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
|
| - $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
|
| -___
|
| -$code.=<<___ if ($SIZE_T==4);
|
| - $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
|
| - $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
|
| - $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
|
| - $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
|
| - $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
|
| -___
|
| -$code.=<<___;
|
| - blr %r0,$rem_4bit
|
| - ldi 3,$rem
|
| -L\$pic_gmult
|
| - andcm $rem_4bit,$rem,$rem_4bit
|
| - addl $inp,$len,$len
|
| - ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
|
| - ldi 0xf0,$mask0xf0
|
| -___
|
| -$code.=<<___ if ($SIZE_T==4);
|
| - ldi 31,$rem
|
| - mtctl $rem,%cr11
|
| - extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
|
| - b L\$parisc1_gmult
|
| - nop
|
| -___
|
| -
|
| -$code.=<<___;
|
| - ldb 15($Xi),$nlo
|
| - ldo 8($Htbl),$Hll
|
| -
|
| - and $mask0xf0,$nlo,$nhi
|
| - depd,z $nlo,59,4,$nlo
|
| -
|
| - ldd $nlo($Hll),$Zll
|
| - ldd $nlo($Hhh),$Zhh
|
| -
|
| - depd,z $Zll,60,4,$rem
|
| - shrpd $Zhh,$Zll,4,$Zll
|
| - extrd,u $Zhh,59,60,$Zhh
|
| - ldb 14($Xi),$nlo
|
| -
|
| - ldd $nhi($Hll),$Tll
|
| - ldd $nhi($Hhh),$Thh
|
| - and $mask0xf0,$nlo,$nhi
|
| - depd,z $nlo,59,4,$nlo
|
| -
|
| - xor $Tll,$Zll,$Zll
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldd $rem($rem_4bit),$rem
|
| - b L\$oop_gmult_pa2
|
| - ldi 13,$cnt
|
| -
|
| - .ALIGN 8
|
| -L\$oop_gmult_pa2
|
| - xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
|
| - depd,z $Zll,60,4,$rem
|
| -
|
| - shrpd $Zhh,$Zll,4,$Zll
|
| - extrd,u $Zhh,59,60,$Zhh
|
| - ldd $nlo($Hll),$Tll
|
| - ldd $nlo($Hhh),$Thh
|
| -
|
| - xor $Tll,$Zll,$Zll
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldd $rem($rem_4bit),$rem
|
| -
|
| - xor $rem,$Zhh,$Zhh
|
| - depd,z $Zll,60,4,$rem
|
| - ldbx $cnt($Xi),$nlo
|
| -
|
| - shrpd $Zhh,$Zll,4,$Zll
|
| - extrd,u $Zhh,59,60,$Zhh
|
| - ldd $nhi($Hll),$Tll
|
| - ldd $nhi($Hhh),$Thh
|
| -
|
| - and $mask0xf0,$nlo,$nhi
|
| - depd,z $nlo,59,4,$nlo
|
| - ldd $rem($rem_4bit),$rem
|
| -
|
| - xor $Tll,$Zll,$Zll
|
| - addib,uv -1,$cnt,L\$oop_gmult_pa2
|
| - xor $Thh,$Zhh,$Zhh
|
| -
|
| - xor $rem,$Zhh,$Zhh
|
| - depd,z $Zll,60,4,$rem
|
| -
|
| - shrpd $Zhh,$Zll,4,$Zll
|
| - extrd,u $Zhh,59,60,$Zhh
|
| - ldd $nlo($Hll),$Tll
|
| - ldd $nlo($Hhh),$Thh
|
| -
|
| - xor $Tll,$Zll,$Zll
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldd $rem($rem_4bit),$rem
|
| -
|
| - xor $rem,$Zhh,$Zhh
|
| - depd,z $Zll,60,4,$rem
|
| -
|
| - shrpd $Zhh,$Zll,4,$Zll
|
| - extrd,u $Zhh,59,60,$Zhh
|
| - ldd $nhi($Hll),$Tll
|
| - ldd $nhi($Hhh),$Thh
|
| -
|
| - xor $Tll,$Zll,$Zll
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldd $rem($rem_4bit),$rem
|
| -
|
| - xor $rem,$Zhh,$Zhh
|
| - std $Zll,8($Xi)
|
| - std $Zhh,0($Xi)
|
| -___
|
| -
|
| -$code.=<<___ if ($SIZE_T==4);
|
| - b L\$done_gmult
|
| - nop
|
| -
|
| -L\$parisc1_gmult
|
| - ldb 15($Xi),$nlo
|
| - ldo 12($Htbl),$Hll
|
| - ldo 8($Htbl),$Hlh
|
| - ldo 4($Htbl),$Hhl
|
| -
|
| - and $mask0xf0,$nlo,$nhi
|
| - zdep $nlo,27,4,$nlo
|
| -
|
| - ldwx $nlo($Hll),$Zll
|
| - ldwx $nlo($Hlh),$Zlh
|
| - ldwx $nlo($Hhl),$Zhl
|
| - ldwx $nlo($Hhh),$Zhh
|
| - zdep $Zll,28,4,$rem
|
| - ldb 14($Xi),$nlo
|
| - ldwx $rem($rem_4bit),$rem
|
| - shrpw $Zlh,$Zll,4,$Zll
|
| - ldwx $nhi($Hll),$Tll
|
| - shrpw $Zhl,$Zlh,4,$Zlh
|
| - ldwx $nhi($Hlh),$Tlh
|
| - shrpw $Zhh,$Zhl,4,$Zhl
|
| - ldwx $nhi($Hhl),$Thl
|
| - extru $Zhh,27,28,$Zhh
|
| - ldwx $nhi($Hhh),$Thh
|
| - xor $rem,$Zhh,$Zhh
|
| - and $mask0xf0,$nlo,$nhi
|
| - zdep $nlo,27,4,$nlo
|
| -
|
| - xor $Tll,$Zll,$Zll
|
| - ldwx $nlo($Hll),$Tll
|
| - xor $Tlh,$Zlh,$Zlh
|
| - ldwx $nlo($Hlh),$Tlh
|
| - xor $Thl,$Zhl,$Zhl
|
| - b L\$oop_gmult_pa1
|
| - ldi 13,$cnt
|
| -
|
| - .ALIGN 8
|
| -L\$oop_gmult_pa1
|
| - zdep $Zll,28,4,$rem
|
| - ldwx $nlo($Hhl),$Thl
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldwx $rem($rem_4bit),$rem
|
| - shrpw $Zlh,$Zll,4,$Zll
|
| - ldwx $nlo($Hhh),$Thh
|
| - shrpw $Zhl,$Zlh,4,$Zlh
|
| - ldbx $cnt($Xi),$nlo
|
| - xor $Tll,$Zll,$Zll
|
| - ldwx $nhi($Hll),$Tll
|
| - shrpw $Zhh,$Zhl,4,$Zhl
|
| - xor $Tlh,$Zlh,$Zlh
|
| - ldwx $nhi($Hlh),$Tlh
|
| - extru $Zhh,27,28,$Zhh
|
| - xor $Thl,$Zhl,$Zhl
|
| - ldwx $nhi($Hhl),$Thl
|
| - xor $rem,$Zhh,$Zhh
|
| - zdep $Zll,28,4,$rem
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldwx $nhi($Hhh),$Thh
|
| - shrpw $Zlh,$Zll,4,$Zll
|
| - ldwx $rem($rem_4bit),$rem
|
| - shrpw $Zhl,$Zlh,4,$Zlh
|
| - shrpw $Zhh,$Zhl,4,$Zhl
|
| - and $mask0xf0,$nlo,$nhi
|
| - extru $Zhh,27,28,$Zhh
|
| - zdep $nlo,27,4,$nlo
|
| - xor $Tll,$Zll,$Zll
|
| - ldwx $nlo($Hll),$Tll
|
| - xor $Tlh,$Zlh,$Zlh
|
| - ldwx $nlo($Hlh),$Tlh
|
| - xor $rem,$Zhh,$Zhh
|
| - addib,uv -1,$cnt,L\$oop_gmult_pa1
|
| - xor $Thl,$Zhl,$Zhl
|
| -
|
| - zdep $Zll,28,4,$rem
|
| - ldwx $nlo($Hhl),$Thl
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldwx $rem($rem_4bit),$rem
|
| - shrpw $Zlh,$Zll,4,$Zll
|
| - ldwx $nlo($Hhh),$Thh
|
| - shrpw $Zhl,$Zlh,4,$Zlh
|
| - xor $Tll,$Zll,$Zll
|
| - ldwx $nhi($Hll),$Tll
|
| - shrpw $Zhh,$Zhl,4,$Zhl
|
| - xor $Tlh,$Zlh,$Zlh
|
| - ldwx $nhi($Hlh),$Tlh
|
| - extru $Zhh,27,28,$Zhh
|
| - xor $rem,$Zhh,$Zhh
|
| - xor $Thl,$Zhl,$Zhl
|
| - ldwx $nhi($Hhl),$Thl
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldwx $nhi($Hhh),$Thh
|
| - zdep $Zll,28,4,$rem
|
| - ldwx $rem($rem_4bit),$rem
|
| - shrpw $Zlh,$Zll,4,$Zll
|
| - shrpw $Zhl,$Zlh,4,$Zlh
|
| - shrpw $Zhh,$Zhl,4,$Zhl
|
| - extru $Zhh,27,28,$Zhh
|
| - xor $Tll,$Zll,$Zll
|
| - xor $Tlh,$Zlh,$Zlh
|
| - xor $rem,$Zhh,$Zhh
|
| - stw $Zll,12($Xi)
|
| - xor $Thl,$Zhl,$Zhl
|
| - stw $Zlh,8($Xi)
|
| - xor $Thh,$Zhh,$Zhh
|
| - stw $Zhl,4($Xi)
|
| - stw $Zhh,0($Xi)
|
| -___
|
| -$code.=<<___;
|
| -L\$done_gmult
|
| - $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
|
| - $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
|
| - $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
|
| - $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
|
| -___
|
| -$code.=<<___ if ($SIZE_T==4);
|
| - $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
|
| - $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
|
| - $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
|
| - $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
|
| - $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
|
| -___
|
| -$code.=<<___;
|
| - bv (%r2)
|
| - .EXIT
|
| - $POPMB -$FRAME(%sp),%r3
|
| - .PROCEND
|
| -
|
| - .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
|
| - .ALIGN 64
|
| -gcm_ghash_4bit
|
| - .PROC
|
| - .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
|
| - .ENTRY
|
| - $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
|
| - $PUSHMA %r3,$FRAME(%sp)
|
| - $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
|
| - $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
|
| - $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
|
| -___
|
| -$code.=<<___ if ($SIZE_T==4);
|
| - $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
|
| - $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
|
| - $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
|
| - $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
|
| - $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
|
| -___
|
| -$code.=<<___;
|
| - blr %r0,$rem_4bit
|
| - ldi 3,$rem
|
| -L\$pic_ghash
|
| - andcm $rem_4bit,$rem,$rem_4bit
|
| - addl $inp,$len,$len
|
| - ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
|
| - ldi 0xf0,$mask0xf0
|
| -___
|
| -$code.=<<___ if ($SIZE_T==4);
|
| - ldi 31,$rem
|
| - mtctl $rem,%cr11
|
| - extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
|
| - b L\$parisc1_ghash
|
| - nop
|
| -___
|
| -
|
| -$code.=<<___;
|
| - ldb 15($Xi),$nlo
|
| - ldo 8($Htbl),$Hll
|
| -
|
| -L\$outer_ghash_pa2
|
| - ldb 15($inp),$nhi
|
| - xor $nhi,$nlo,$nlo
|
| - and $mask0xf0,$nlo,$nhi
|
| - depd,z $nlo,59,4,$nlo
|
| -
|
| - ldd $nlo($Hll),$Zll
|
| - ldd $nlo($Hhh),$Zhh
|
| -
|
| - depd,z $Zll,60,4,$rem
|
| - shrpd $Zhh,$Zll,4,$Zll
|
| - extrd,u $Zhh,59,60,$Zhh
|
| - ldb 14($Xi),$nlo
|
| - ldb 14($inp),$byte
|
| -
|
| - ldd $nhi($Hll),$Tll
|
| - ldd $nhi($Hhh),$Thh
|
| - xor $byte,$nlo,$nlo
|
| - and $mask0xf0,$nlo,$nhi
|
| - depd,z $nlo,59,4,$nlo
|
| -
|
| - xor $Tll,$Zll,$Zll
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldd $rem($rem_4bit),$rem
|
| - b L\$oop_ghash_pa2
|
| - ldi 13,$cnt
|
| -
|
| - .ALIGN 8
|
| -L\$oop_ghash_pa2
|
| - xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
|
| - depd,z $Zll,60,4,$rem2
|
| -
|
| - shrpd $Zhh,$Zll,4,$Zll
|
| - extrd,u $Zhh,59,60,$Zhh
|
| - ldd $nlo($Hll),$Tll
|
| - ldd $nlo($Hhh),$Thh
|
| -
|
| - xor $Tll,$Zll,$Zll
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldbx $cnt($Xi),$nlo
|
| - ldbx $cnt($inp),$byte
|
| -
|
| - depd,z $Zll,60,4,$rem
|
| - shrpd $Zhh,$Zll,4,$Zll
|
| - ldd $rem2($rem_4bit),$rem2
|
| -
|
| - xor $rem2,$Zhh,$Zhh
|
| - xor $byte,$nlo,$nlo
|
| - ldd $nhi($Hll),$Tll
|
| - ldd $nhi($Hhh),$Thh
|
| -
|
| - and $mask0xf0,$nlo,$nhi
|
| - depd,z $nlo,59,4,$nlo
|
| -
|
| - extrd,u $Zhh,59,60,$Zhh
|
| - xor $Tll,$Zll,$Zll
|
| -
|
| - ldd $rem($rem_4bit),$rem
|
| - addib,uv -1,$cnt,L\$oop_ghash_pa2
|
| - xor $Thh,$Zhh,$Zhh
|
| -
|
| - xor $rem,$Zhh,$Zhh
|
| - depd,z $Zll,60,4,$rem2
|
| -
|
| - shrpd $Zhh,$Zll,4,$Zll
|
| - extrd,u $Zhh,59,60,$Zhh
|
| - ldd $nlo($Hll),$Tll
|
| - ldd $nlo($Hhh),$Thh
|
| -
|
| - xor $Tll,$Zll,$Zll
|
| - xor $Thh,$Zhh,$Zhh
|
| -
|
| - depd,z $Zll,60,4,$rem
|
| - shrpd $Zhh,$Zll,4,$Zll
|
| - ldd $rem2($rem_4bit),$rem2
|
| -
|
| - xor $rem2,$Zhh,$Zhh
|
| - ldd $nhi($Hll),$Tll
|
| - ldd $nhi($Hhh),$Thh
|
| -
|
| - extrd,u $Zhh,59,60,$Zhh
|
| - xor $Tll,$Zll,$Zll
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldd $rem($rem_4bit),$rem
|
| -
|
| - xor $rem,$Zhh,$Zhh
|
| - std $Zll,8($Xi)
|
| - ldo 16($inp),$inp
|
| - std $Zhh,0($Xi)
|
| - cmpb,*<> $inp,$len,L\$outer_ghash_pa2
|
| - copy $Zll,$nlo
|
| -___
|
| -
|
| -$code.=<<___ if ($SIZE_T==4);
|
| - b L\$done_ghash
|
| - nop
|
| -
|
| -L\$parisc1_ghash
|
| - ldb 15($Xi),$nlo
|
| - ldo 12($Htbl),$Hll
|
| - ldo 8($Htbl),$Hlh
|
| - ldo 4($Htbl),$Hhl
|
| -
|
| -L\$outer_ghash_pa1
|
| - ldb 15($inp),$byte
|
| - xor $byte,$nlo,$nlo
|
| - and $mask0xf0,$nlo,$nhi
|
| - zdep $nlo,27,4,$nlo
|
| -
|
| - ldwx $nlo($Hll),$Zll
|
| - ldwx $nlo($Hlh),$Zlh
|
| - ldwx $nlo($Hhl),$Zhl
|
| - ldwx $nlo($Hhh),$Zhh
|
| - zdep $Zll,28,4,$rem
|
| - ldb 14($Xi),$nlo
|
| - ldb 14($inp),$byte
|
| - ldwx $rem($rem_4bit),$rem
|
| - shrpw $Zlh,$Zll,4,$Zll
|
| - ldwx $nhi($Hll),$Tll
|
| - shrpw $Zhl,$Zlh,4,$Zlh
|
| - ldwx $nhi($Hlh),$Tlh
|
| - shrpw $Zhh,$Zhl,4,$Zhl
|
| - ldwx $nhi($Hhl),$Thl
|
| - extru $Zhh,27,28,$Zhh
|
| - ldwx $nhi($Hhh),$Thh
|
| - xor $byte,$nlo,$nlo
|
| - xor $rem,$Zhh,$Zhh
|
| - and $mask0xf0,$nlo,$nhi
|
| - zdep $nlo,27,4,$nlo
|
| -
|
| - xor $Tll,$Zll,$Zll
|
| - ldwx $nlo($Hll),$Tll
|
| - xor $Tlh,$Zlh,$Zlh
|
| - ldwx $nlo($Hlh),$Tlh
|
| - xor $Thl,$Zhl,$Zhl
|
| - b L\$oop_ghash_pa1
|
| - ldi 13,$cnt
|
| -
|
| - .ALIGN 8
|
| -L\$oop_ghash_pa1
|
| - zdep $Zll,28,4,$rem
|
| - ldwx $nlo($Hhl),$Thl
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldwx $rem($rem_4bit),$rem
|
| - shrpw $Zlh,$Zll,4,$Zll
|
| - ldwx $nlo($Hhh),$Thh
|
| - shrpw $Zhl,$Zlh,4,$Zlh
|
| - ldbx $cnt($Xi),$nlo
|
| - xor $Tll,$Zll,$Zll
|
| - ldwx $nhi($Hll),$Tll
|
| - shrpw $Zhh,$Zhl,4,$Zhl
|
| - ldbx $cnt($inp),$byte
|
| - xor $Tlh,$Zlh,$Zlh
|
| - ldwx $nhi($Hlh),$Tlh
|
| - extru $Zhh,27,28,$Zhh
|
| - xor $Thl,$Zhl,$Zhl
|
| - ldwx $nhi($Hhl),$Thl
|
| - xor $rem,$Zhh,$Zhh
|
| - zdep $Zll,28,4,$rem
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldwx $nhi($Hhh),$Thh
|
| - shrpw $Zlh,$Zll,4,$Zll
|
| - ldwx $rem($rem_4bit),$rem
|
| - shrpw $Zhl,$Zlh,4,$Zlh
|
| - xor $byte,$nlo,$nlo
|
| - shrpw $Zhh,$Zhl,4,$Zhl
|
| - and $mask0xf0,$nlo,$nhi
|
| - extru $Zhh,27,28,$Zhh
|
| - zdep $nlo,27,4,$nlo
|
| - xor $Tll,$Zll,$Zll
|
| - ldwx $nlo($Hll),$Tll
|
| - xor $Tlh,$Zlh,$Zlh
|
| - ldwx $nlo($Hlh),$Tlh
|
| - xor $rem,$Zhh,$Zhh
|
| - addib,uv -1,$cnt,L\$oop_ghash_pa1
|
| - xor $Thl,$Zhl,$Zhl
|
| -
|
| - zdep $Zll,28,4,$rem
|
| - ldwx $nlo($Hhl),$Thl
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldwx $rem($rem_4bit),$rem
|
| - shrpw $Zlh,$Zll,4,$Zll
|
| - ldwx $nlo($Hhh),$Thh
|
| - shrpw $Zhl,$Zlh,4,$Zlh
|
| - xor $Tll,$Zll,$Zll
|
| - ldwx $nhi($Hll),$Tll
|
| - shrpw $Zhh,$Zhl,4,$Zhl
|
| - xor $Tlh,$Zlh,$Zlh
|
| - ldwx $nhi($Hlh),$Tlh
|
| - extru $Zhh,27,28,$Zhh
|
| - xor $rem,$Zhh,$Zhh
|
| - xor $Thl,$Zhl,$Zhl
|
| - ldwx $nhi($Hhl),$Thl
|
| - xor $Thh,$Zhh,$Zhh
|
| - ldwx $nhi($Hhh),$Thh
|
| - zdep $Zll,28,4,$rem
|
| - ldwx $rem($rem_4bit),$rem
|
| - shrpw $Zlh,$Zll,4,$Zll
|
| - shrpw $Zhl,$Zlh,4,$Zlh
|
| - shrpw $Zhh,$Zhl,4,$Zhl
|
| - extru $Zhh,27,28,$Zhh
|
| - xor $Tll,$Zll,$Zll
|
| - xor $Tlh,$Zlh,$Zlh
|
| - xor $rem,$Zhh,$Zhh
|
| - stw $Zll,12($Xi)
|
| - xor $Thl,$Zhl,$Zhl
|
| - stw $Zlh,8($Xi)
|
| - xor $Thh,$Zhh,$Zhh
|
| - stw $Zhl,4($Xi)
|
| - ldo 16($inp),$inp
|
| - stw $Zhh,0($Xi)
|
| - comb,<> $inp,$len,L\$outer_ghash_pa1
|
| - copy $Zll,$nlo
|
| -___
|
| -$code.=<<___;
|
| -L\$done_ghash
|
| - $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
|
| - $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
|
| - $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
|
| - $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
|
| -___
|
| -$code.=<<___ if ($SIZE_T==4);
|
| - $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
|
| - $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
|
| - $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
|
| - $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
|
| - $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
|
| -___
|
| -$code.=<<___;
|
| - bv (%r2)
|
| - .EXIT
|
| - $POPMB -$FRAME(%sp),%r3
|
| - .PROCEND
|
| -
|
| - .ALIGN 64
|
| -L\$rem_4bit
|
| - .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
|
| - .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
|
| - .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
|
| - .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
|
| - .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
|
| - .ALIGN 64
|
| -___
|
| -
|
| -# Explicitly encode PA-RISC 2.0 instructions used in this module, so
|
| -# that it can be compiled with .LEVEL 1.0. It should be noted that I
|
| -# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
|
| -# directive...
|
| -
|
| -my $ldd = sub {
|
| - my ($mod,$args) = @_;
|
| - my $orig = "ldd$mod\t$args";
|
| -
|
| - if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
|
| - { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
|
| - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
| - }
|
| - elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
|
| - { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
|
| - $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
|
| - $opcode|=(1<<5) if ($mod =~ /^,m/);
|
| - $opcode|=(1<<13) if ($mod =~ /^,mb/);
|
| - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
| - }
|
| - else { "\t".$orig; }
|
| -};
|
| -
|
| -my $std = sub {
|
| - my ($mod,$args) = @_;
|
| - my $orig = "std$mod\t$args";
|
| -
|
| - if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
|
| - { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
|
| - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
| - }
|
| - else { "\t".$orig; }
|
| -};
|
| -
|
| -my $extrd = sub {
|
| - my ($mod,$args) = @_;
|
| - my $orig = "extrd$mod\t$args";
|
| -
|
| - # I only have ",u" completer, it's implicitly encoded...
|
| - if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
|
| - { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
|
| - my $len=32-$3;
|
| - $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
|
| - $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
|
| - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
| - }
|
| - elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
|
| - { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
|
| - my $len=32-$2;
|
| - $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
|
| - $opcode |= (1<<13) if ($mod =~ /,\**=/);
|
| - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
| - }
|
| - else { "\t".$orig; }
|
| -};
|
| -
|
| -my $shrpd = sub {
|
| - my ($mod,$args) = @_;
|
| - my $orig = "shrpd$mod\t$args";
|
| -
|
| - if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
|
| - { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
|
| - my $cpos=63-$3;
|
| - $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
|
| - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
| - }
|
| - elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
|
| - { sprintf "\t.WORD\t0x%08x\t; %s",
|
| - (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
|
| - }
|
| - else { "\t".$orig; }
|
| -};
|
| -
|
| -my $depd = sub {
|
| - my ($mod,$args) = @_;
|
| - my $orig = "depd$mod\t$args";
|
| -
|
| - # I only have ",z" completer, it's impicitly encoded...
|
| - if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
|
| - { my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
|
| - my $cpos=63-$2;
|
| - my $len=32-$3;
|
| - $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
|
| - $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
|
| - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
|
| - }
|
| - else { "\t".$orig; }
|
| -};
|
| -
|
| -sub assemble {
|
| - my ($mnemonic,$mod,$args)=@_;
|
| - my $opcode = eval("\$$mnemonic");
|
| -
|
| - ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
|
| -}
|
| -
|
| -foreach (split("\n",$code)) {
|
| - s/\`([^\`]*)\`/eval $1/ge;
|
| - if ($SIZE_T==4) {
|
| - s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
|
| - s/cmpb,\*/comb,/;
|
| - s/,\*/,/;
|
| - }
|
| - print $_,"\n";
|
| -}
|
| -
|
| -close STDOUT;
|
|
|