openssl/crypto/modes/asm/ghash-s390x.pl - Issue 2072073002: Delete bundled copy of OpenSSL and replace with README.

Unified Diff: openssl/crypto/modes/asm/ghash-s390x.pl

Issue 2072073002: Delete bundled copy of OpenSSL and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/openssl@master

Patch Set: Delete bundled copy of OpenSSL and replace with README. Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: openssl/crypto/modes/asm/ghash-s390x.pl

diff --git a/openssl/crypto/modes/asm/ghash-s390x.pl b/openssl/crypto/modes/asm/ghash-s390x.pl

deleted file mode 100644

index 6a40d5d89c0cd25ca6e884746f58b25ccf726bba..0000000000000000000000000000000000000000

--- a/openssl/crypto/modes/asm/ghash-s390x.pl

+++ /dev/null

@@ -1,262 +0,0 @@

-#!/usr/bin/env perl

-# ====================================================================

-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL

-# project. The module is, however, dual licensed under OpenSSL and

-# CRYPTOGAMS licenses depending on where you obtain it. For further

-# details see http://www.openssl.org/~appro/cryptogams/.

-# ====================================================================

-# September 2010.

-# The module implements "4-bit" GCM GHASH function and underlying

-# single multiplication operation in GF(2^128). "4-bit" means that it

-# uses 256 bytes per-key table [+128 bytes shared table]. Performance

-# was measured to be ~18 cycles per processed byte on z10, which is

-# almost 40% better than gcc-generated code. It should be noted that

-# 18 cycles is worse result than expected: loop is scheduled for 12

-# and the result should be close to 12. In the lack of instruction-

-# level profiling data it's impossible to tell why...

-# November 2010.

-# Adapt for -m31 build. If kernel supports what's called "highgprs"

-# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit

-# instructions and achieve "64-bit" performance even in 31-bit legacy

-# application context. The feature is not specific to any particular

-# processor, as long as it's "z-CPU". Latter implies that the code

-# remains z/Architecture specific. On z990 it was measured to perform

-# 2.8x better than 32-bit code generated by gcc 4.3.

-# March 2011.

-# Support for hardware KIMD-GHASH is verified to produce correct

-# result and therefore is engaged. On z196 it was measured to process

-# 8KB buffer ~7 faster than software implementation. It's not as

-# impressive for smaller buffer sizes and for smallest 16-bytes buffer

-# it's actually almost 2 times slower. Which is the reason why

-# KIMD-GHASH is not used in gcm_gmult_4bit.

-$flavour = shift;

-if ($flavour =~ /3[12]/) {

- $SIZE_T=4;

- $g="";

-} else {

- $SIZE_T=8;

- $g="g";

-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}

-open STDOUT,">$output";

-$softonly=0;

-$Zhi="%r0";

-$Zlo="%r1";

-$Xi="%r2"; # argument block

-$Htbl="%r3";

-$inp="%r4";

-$len="%r5";

-$rem0="%r6"; # variables

-$rem1="%r7";

-$nlo="%r8";

-$nhi="%r9";

-$xi="%r10";

-$cnt="%r11";

-$tmp="%r12";

-$x78="%r13";

-$rem_4bit="%r14";

-$sp="%r15";

-$code.=<<___;

-.text

-.globl gcm_gmult_4bit

-.align 32

-gcm_gmult_4bit:

-___

-$code.=<<___ if(!$softonly && 0); # hardware is slow for single block...

- larl %r1,OPENSSL_s390xcap_P

- lg %r0,0(%r1)

- tmhl %r0,0x4000 # check for message-security-assist

- jz .Lsoft_gmult

- lghi %r0,0

- la %r1,16($sp)

- .long 0xb93e0004 # kimd %r0,%r4

- lg %r1,24($sp)

- tmhh %r1,0x4000 # check for function 65

- jz .Lsoft_gmult

- stg %r0,16($sp) # arrange 16 bytes of zero input

- stg %r0,24($sp)

- lghi %r0,65 # function 65

- la %r1,0($Xi) # H lies right after Xi in gcm128_context

- la $inp,16($sp)

- lghi $len,16

- .long 0xb93e0004 # kimd %r0,$inp

- brc 1,.-4 # pay attention to "partial completion"

- br %r14

-.align 32

-.Lsoft_gmult:

-___

-$code.=<<___;

- stm${g} %r6,%r14,6*$SIZE_T($sp)

- aghi $Xi,-1

- lghi $len,1

- lghi $x78,`0xf<<3`

- larl $rem_4bit,rem_4bit

- lg $Zlo,8+1($Xi) # Xi

- j .Lgmult_shortcut

-.type gcm_gmult_4bit,\@function

-.size gcm_gmult_4bit,(.-gcm_gmult_4bit)

-.globl gcm_ghash_4bit

-.align 32

-gcm_ghash_4bit:

-___

-$code.=<<___ if(!$softonly);

- larl %r1,OPENSSL_s390xcap_P

- lg %r0,0(%r1)

- tmhl %r0,0x4000 # check for message-security-assist

- jz .Lsoft_ghash

- lghi %r0,0

- la %r1,16($sp)

- .long 0xb93e0004 # kimd %r0,%r4

- lg %r1,24($sp)

- tmhh %r1,0x4000 # check for function 65

- jz .Lsoft_ghash

- lghi %r0,65 # function 65

- la %r1,0($Xi) # H lies right after Xi in gcm128_context

- .long 0xb93e0004 # kimd %r0,$inp

- brc 1,.-4 # pay attention to "partial completion"

- br %r14

-.align 32

-.Lsoft_ghash:

-___

-$code.=<<___ if ($flavour =~ /3[12]/);

- llgfr $len,$len

-___

-$code.=<<___;

- stm${g} %r6,%r14,6*$SIZE_T($sp)

- aghi $Xi,-1

- srlg $len,$len,4

- lghi $x78,`0xf<<3`

- larl $rem_4bit,rem_4bit

- lg $Zlo,8+1($Xi) # Xi

- lg $Zhi,0+1($Xi)

- lghi $tmp,0

-.Louter:

- xg $Zhi,0($inp) # Xi ^= inp

- xg $Zlo,8($inp)

- xgr $Zhi,$tmp

- stg $Zlo,8+1($Xi)

- stg $Zhi,0+1($Xi)

-.Lgmult_shortcut:

- lghi $tmp,0xf0

- sllg $nlo,$Zlo,4

- srlg $xi,$Zlo,8 # extract second byte

- ngr $nlo,$tmp

- lgr $nhi,$Zlo

- lghi $cnt,14

- ngr $nhi,$tmp

- lg $Zlo,8($nlo,$Htbl)

- lg $Zhi,0($nlo,$Htbl)

- sllg $nlo,$xi,4

- sllg $rem0,$Zlo,3

- ngr $nlo,$tmp

- ngr $rem0,$x78

- ngr $xi,$tmp

- sllg $tmp,$Zhi,60

- srlg $Zlo,$Zlo,4

- srlg $Zhi,$Zhi,4

- xg $Zlo,8($nhi,$Htbl)

- xg $Zhi,0($nhi,$Htbl)

- lgr $nhi,$xi

- sllg $rem1,$Zlo,3

- xgr $Zlo,$tmp

- ngr $rem1,$x78

- j .Lghash_inner

-.align 16

-.Lghash_inner:

- srlg $Zlo,$Zlo,4

- sllg $tmp,$Zhi,60

- xg $Zlo,8($nlo,$Htbl)

- srlg $Zhi,$Zhi,4

- llgc $xi,0($cnt,$Xi)

- xg $Zhi,0($nlo,$Htbl)

- sllg $nlo,$xi,4

- xg $Zhi,0($rem0,$rem_4bit)

- nill $nlo,0xf0

- sllg $rem0,$Zlo,3

- xgr $Zlo,$tmp

- ngr $rem0,$x78

- nill $xi,0xf0

- sllg $tmp,$Zhi,60

- srlg $Zlo,$Zlo,4

- srlg $Zhi,$Zhi,4

- xg $Zlo,8($nhi,$Htbl)

- xg $Zhi,0($nhi,$Htbl)

- lgr $nhi,$xi

- xg $Zhi,0($rem1,$rem_4bit)

- sllg $rem1,$Zlo,3

- xgr $Zlo,$tmp

- ngr $rem1,$x78

- brct $cnt,.Lghash_inner

- sllg $tmp,$Zhi,60

- srlg $Zlo,$Zlo,4

- srlg $Zhi,$Zhi,4

- xg $Zlo,8($nlo,$Htbl)

- xg $Zhi,0($nlo,$Htbl)

- sllg $xi,$Zlo,3

- xg $Zhi,0($rem0,$rem_4bit)

- xgr $Zlo,$tmp

- ngr $xi,$x78

- sllg $tmp,$Zhi,60

- srlg $Zlo,$Zlo,4

- srlg $Zhi,$Zhi,4

- xg $Zlo,8($nhi,$Htbl)

- xg $Zhi,0($nhi,$Htbl)

- xgr $Zlo,$tmp

- xg $Zhi,0($rem1,$rem_4bit)

- lg $tmp,0($xi,$rem_4bit)

- la $inp,16($inp)

- sllg $tmp,$tmp,4 # correct last rem_4bit[rem]

- brctg $len,.Louter

- xgr $Zhi,$tmp

- stg $Zlo,8+1($Xi)

- stg $Zhi,0+1($Xi)

- lm${g} %r6,%r14,6*$SIZE_T($sp)

- br %r14

-.type gcm_ghash_4bit,\@function

-.size gcm_ghash_4bit,(.-gcm_ghash_4bit)

-.align 64

-rem_4bit:

- .long `0x0000<<12`,0,`0x1C20<<12`,0,`0x3840<<12`,0,`0x2460<<12`,0

- .long `0x7080<<12`,0,`0x6CA0<<12`,0,`0x48C0<<12`,0,`0x54E0<<12`,0

- .long `0xE100<<12`,0,`0xFD20<<12`,0,`0xD940<<12`,0,`0xC560<<12`,0

- .long `0x9180<<12`,0,`0x8DA0<<12`,0,`0xA9C0<<12`,0,`0xB5E0<<12`,0

-.type rem_4bit,\@object

-.size rem_4bit,(.-rem_4bit)

-.string "GHASH for s390x, CRYPTOGAMS by <appro\@openssl.org>"

-___

-$code =~ s/\`([^\`]*)\`/eval $1/gem;

-print $code;

-close STDOUT;

« no previous file with comments | « openssl/crypto/modes/asm/ghash-parisc.pl ('k') | openssl/crypto/modes/asm/ghash-sparcv9.pl » ('j') | no next file with comments »