openssl/crypto/bn/asm/s390x-gf2m.pl - Issue 2072073002: Delete bundled copy of OpenSSL and replace with README.

Side by Side Diff: openssl/crypto/bn/asm/s390x-gf2m.pl

Issue 2072073002: Delete bundled copy of OpenSSL and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/openssl@master

Patch Set: Delete bundled copy of OpenSSL and replace with README. Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 #!/usr/bin/env perl

2 #

3 # ====================================================================

4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL

5 # project. The module is, however, dual licensed under OpenSSL and

6 # CRYPTOGAMS licenses depending on where you obtain it. For further

7 # details see http://www.openssl.org/~appro/cryptogams/.

8 # ====================================================================

9 #

10 # May 2011

11 #

12 # The module implements bn_GF2m_mul_2x2 polynomial multiplication used

13 # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for

14 # the time being... gcc 4.3 appeared to generate poor code, therefore

15 # the effort. And indeed, the module delivers 55%-90%(*) improvement

16 # on haviest ECDSA verify and ECDH benchmarks for 163- and 571-bit

17 # key lengths on z990, 30%-55%() - on z10, and 70%-110%() - on z196.

18 # This is for 64-bit build. In 32-bit "highgprs" case improvement is

19 # even higher, for example on z990 it was measured 80%-150%. ECDSA

20 # sign is modest 9%-12% faster. Keep in mind that these coefficients

21 # are not ones for bn_GF2m_mul_2x2 itself, as not all CPU time is

22 # burnt in it...

23 #

24 # (*) gcc 4.1 was observed to deliver better results than gcc 4.3,

25 # so that improvement coefficients can vary from one specific

26 # setup to another.

27

28 $flavour = shift;

29

30 if ($flavour =~ /3[12]/) {

31 $SIZE_T=4;

32 $g="";

33 } else {

34 $SIZE_T=8;

35 $g="g";

36 }

37

38 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}

39 open STDOUT,">$output";

40

41 $stdframe=16$SIZE_T+48;

42

43 $rp="%r2";

44 $a1="%r3";

45 $a0="%r4";

46 $b1="%r5";

47 $b0="%r6";

48

49 $ra="%r14";

50 $sp="%r15";

51

52 @T=("%r0","%r1");

53 @i=("%r12","%r13");

54

55 ($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(6..11));

56 ($lo,$hi,$b)=map("%r$_",(3..5)); $a=$lo; $mask=$a8;

57

58 $code.=<<___;

59 .text

60

61 .type _mul_1x1,\@function

62 .align 16

63 _mul_1x1:

64 lgr $a1,$a

65 sllg $a2,$a,1

66 sllg $a4,$a,2

67 sllg $a8,$a,3

68

69 srag $lo,$a1,63 # broadcast 63rd bit

70 nihh $a1,0x1fff

71 srag @i[0],$a2,63 # broadcast 62nd bit

72 nihh $a2,0x3fff

73 srag @i[1],$a4,63 # broadcast 61st bit

74 nihh $a4,0x7fff

75 ngr $lo,$b

76 ngr @i[0],$b

77 ngr @i[1],$b

78

79 lghi @T[0],0

80 lgr $a12,$a1

81 stg @T[0],`$stdframe+0*8`($sp) # tab[0]=0

82 xgr $a12,$a2

83 stg $a1,`$stdframe+1*8`($sp) # tab[1]=a1

84 lgr $a48,$a4

85 stg $a2,`$stdframe+2*8`($sp) # tab[2]=a2

86 xgr $a48,$a8

87 stg $a12,`$stdframe+3*8`($sp) # tab[3]=a1^a2

88 xgr $a1,$a4

89

90 stg $a4,`$stdframe+4*8`($sp) # tab[4]=a4

91 xgr $a2,$a4

92 stg $a1,`$stdframe+5*8`($sp) # tab[5]=a1^a4

93 xgr $a12,$a4

94 stg $a2,`$stdframe+6*8`($sp) # tab[6]=a2^a4

95 xgr $a1,$a48

96 stg $a12,`$stdframe+7*8`($sp) # tab[7]=a1^a2^a4

97 xgr $a2,$a48

98

99 stg $a8,`$stdframe+8*8`($sp) # tab[8]=a8

100 xgr $a12,$a48

101 stg $a1,`$stdframe+9*8`($sp) # tab[9]=a1^a8

102 xgr $a1,$a4

103 stg $a2,`$stdframe+10*8`($sp) # tab[10]=a2^a8

104 xgr $a2,$a4

105 stg $a12,`$stdframe+11*8`($sp) # tab[11]=a1^a2^a8

106

107 xgr $a12,$a4

108 stg $a48,`$stdframe+12*8`($sp) # tab[12]=a4^a8

109 srlg $hi,$lo,1

110 stg $a1,`$stdframe+13*8`($sp) # tab[13]=a1^a4^a8

111 sllg $lo,$lo,63

112 stg $a2,`$stdframe+14*8`($sp) # tab[14]=a2^a4^a8

113 srlg @T[0],@i[0],2

114 stg $a12,`$stdframe+15*8`($sp) # tab[15]=a1^a2^a4^a8

115

116 lghi $mask,`0xf<<3`

117 sllg $a1,@i[0],62

118 sllg @i[0],$b,3

119 srlg @T[1],@i[1],3

120 ngr @i[0],$mask

121 sllg $a2,@i[1],61

122 srlg @i[1],$b,4-3

123 xgr $hi,@T[0]

124 ngr @i[1],$mask

125 xgr $lo,$a1

126 xgr $hi,@T[1]

127 xgr $lo,$a2

128

129 xg $lo,$stdframe(@i[0],$sp)

130 srlg @i[0],$b,8-3

131 ngr @i[0],$mask

132 ___

133 for($n=1;$n<14;$n++) {

134 $code.=<<___;

135 lg @T[1],$stdframe(@i[1],$sp)

136 srlg @i[1],$b,`($n+2)*4`-3

137 sllg @T[0],@T[1],`$n*4`

138 ngr @i[1],$mask

139 srlg @T[1],@T[1],`64-$n*4`

140 xgr $lo,@T[0]

141 xgr $hi,@T[1]

142 ___

143 push(@i,shift(@i)); push(@T,shift(@T));

144 }

145 $code.=<<___;

146 lg @T[1],$stdframe(@i[1],$sp)

147 sllg @T[0],@T[1],`$n*4`

148 srlg @T[1],@T[1],`64-$n*4`

149 xgr $lo,@T[0]

150 xgr $hi,@T[1]

151

152 lg @T[0],$stdframe(@i[0],$sp)

153 sllg @T[1],@T[0],`($n+1)*4`

154 srlg @T[0],@T[0],`64-($n+1)*4`

155 xgr $lo,@T[1]

156 xgr $hi,@T[0]

157

158 br $ra

159 .size _mul_1x1,.-_mul_1x1

160

161 .globl bn_GF2m_mul_2x2

162 .type bn_GF2m_mul_2x2,\@function

163 .align 16

164 bn_GF2m_mul_2x2:

165 stm${g} %r3,%r15,3*$SIZE_T($sp)

166

167 lghi %r1,-$stdframe-128

168 la %r0,0($sp)

169 la $sp,0(%r1,$sp) # alloca

170 st${g} %r0,0($sp) # back chain

171 ___

172 if ($SIZE_T==8) {

173 my @r=map("%r$_",(6..9));

174 $code.=<<___;

175 bras $ra,_mul_1x1 # a1·b1

176 stmg $lo,$hi,16($rp)

177

178 lg $a,`$stdframe+128+4*$SIZE_T`($sp)

179 lg $b,`$stdframe+128+6*$SIZE_T`($sp)

180 bras $ra,_mul_1x1 # a0·b0

181 stmg $lo,$hi,0($rp)

182

183 lg $a,`$stdframe+128+3*$SIZE_T`($sp)

184 lg $b,`$stdframe+128+5*$SIZE_T`($sp)

185 xg $a,`$stdframe+128+4*$SIZE_T`($sp)

186 xg $b,`$stdframe+128+6*$SIZE_T`($sp)

187 bras $ra,_mul_1x1 # (a0+a1)·(b0+b1)

188 lmg @r[0],@r[3],0($rp)

189

190 xgr $lo,$hi

191 xgr $hi,@r[1]

192 xgr $lo,@r[0]

193 xgr $hi,@r[2]

194 xgr $lo,@r[3]

195 xgr $hi,@r[3]

196 xgr $lo,$hi

197 stg $hi,16($rp)

198 stg $lo,8($rp)

199 ___

200 } else {

201 $code.=<<___;

202 sllg %r3,%r3,32

203 sllg %r5,%r5,32

204 or %r3,%r4

205 or %r5,%r6

206 bras $ra,_mul_1x1

207 rllg $lo,$lo,32

208 rllg $hi,$hi,32

209 stmg $lo,$hi,0($rp)

210 ___

211 }

212 $code.=<<___;

213 lm${g} %r6,%r15,`$stdframe+128+6*$SIZE_T`($sp)

214 br $ra

215 .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2

216 .string "GF(2^m) Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"

217 ___

218

219 $code =~ s/\`([^\`]*)\`/eval($1)/gem;

220 print $code;

221 close STDOUT;

OLD	NEW

« no previous file with comments | « openssl/crypto/bn/asm/s390x.S ('k') | openssl/crypto/bn/asm/s390x-mont.pl » ('j') | no next file with comments »