openssl/crypto/sha/asm/sha512-sparcv9.pl - Issue 2072073002: Delete bundled copy of OpenSSL and replace with README.

Side by Side Diff: openssl/crypto/sha/asm/sha512-sparcv9.pl

Issue 2072073002: Delete bundled copy of OpenSSL and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/openssl@master

Patch Set: Delete bundled copy of OpenSSL and replace with README. Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 #!/usr/bin/env perl

2

3 # ====================================================================

4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL

5 # project. The module is, however, dual licensed under OpenSSL and

6 # CRYPTOGAMS licenses depending on where you obtain it. For further

7 # details see http://www.openssl.org/~appro/cryptogams/.

8 # ====================================================================

9

10 # SHA256 performance improvement over compiler generated code varies

11 # from 40% for Sun C [32-bit build] to 70% for gcc [3.3, 64-bit

12 # build]. Just like in SHA1 module I aim to ensure scalability on

13 # UltraSPARC T1 by packing X[16] to 8 64-bit registers.

14

15 # SHA512 on pre-T1 UltraSPARC.

16 #

17 # Performance is >75% better than 64-bit code generated by Sun C and

18 # over 2x than 32-bit code. X[16] resides on stack, but access to it

19 # is scheduled for L2 latency and staged through 32 least significant

20 # bits of %l0-%l7. The latter is done to achieve 32-/64-bit ABI

21 # duality. Nevetheless it's ~40% faster than SHA256, which is pretty

22 # good [optimal coefficient is 50%].

23 #

24 # SHA512 on UltraSPARC T1.

25 #

26 # It's not any faster than 64-bit code generated by Sun C 5.8. This is

27 # because 64-bit code generator has the advantage of using 64-bit

28 # loads(*) to access X[16], which I consciously traded for 32-/64-bit

29 # ABI duality [as per above]. But it surpasses 32-bit Sun C generated

30 # code by 60%, not to mention that it doesn't suffer from severe decay

31 # when running 4 times physical cores threads and that it leaves gcc

32 # [3.4] behind by over 4x factor! If compared to SHA256, single thread

33 # performance is only 10% better, but overall throughput for maximum

34 # amount of threads for given CPU exceeds corresponding one of SHA256

35 # by 30% [again, optimal coefficient is 50%].

36 #

37 # (*) Unlike pre-T1 UltraSPARC loads on T1 are executed strictly

38 # in-order, i.e. load instruction has to complete prior next

39 # instruction in given thread is executed, even if the latter is

40 # not dependent on load result! This means that on T1 two 32-bit

41 # loads are always slower than one 64-bit load. Once again this

42 # is unlike pre-T1 UltraSPARC, where, if scheduled appropriately,

43 # 2x32-bit loads can be as fast as 1x64-bit ones.

44

45 $bits=32;

46 for (@ARGV) { $bits=64 if (/\-m64/ \|\| /\-xarch\=v9/); }

47 if ($bits==64) { $bias=2047; $frame=192; }

48 else { $bias=0; $frame=112; }

49

50 $output=shift;

51 open STDOUT,">$output";

52

53 if ($output =~ /512/) {

54 $label="512";

55 $SZ=8;

56 $LD="ldx"; # load from memory

57 $ST="stx"; # store to memory

58 $SLL="sllx"; # shift left logical

59 $SRL="srlx"; # shift right logical

60 @Sigma0=(28,34,39);

61 @Sigma1=(14,18,41);

62 @sigma0=( 7, 1, 8); # right shift first

63 @sigma1=( 6,19,61); # right shift first

64 $lastK=0x817;

65 $rounds=80;

66 $align=4;

67

68 $locals=16*$SZ; # X[16]

69

70 $A="%o0";

71 $B="%o1";

72 $C="%o2";

73 $D="%o3";

74 $E="%o4";

75 $F="%o5";

76 $G="%g1";

77 $H="%o7";

78 @V=($A,$B,$C,$D,$E,$F,$G,$H);

79 } else {

80 $label="256";

81 $SZ=4;

82 $LD="ld"; # load from memory

83 $ST="st"; # store to memory

84 $SLL="sll"; # shift left logical

85 $SRL="srl"; # shift right logical

86 @Sigma0=( 2,13,22);

87 @Sigma1=( 6,11,25);

88 @sigma0=( 3, 7,18); # right shift first

89 @sigma1=(10,17,19); # right shift first

90 $lastK=0x8f2;

91 $rounds=64;

92 $align=8;

93

94 $locals=0; # X[16] is register resident

95 @X=("%o0","%o1","%o2","%o3","%o4","%o5","%g1","%o7");

96

97 $A="%l0";

98 $B="%l1";

99 $C="%l2";

100 $D="%l3";

101 $E="%l4";

102 $F="%l5";

103 $G="%l6";

104 $H="%l7";

105 @V=($A,$B,$C,$D,$E,$F,$G,$H);

106 }

107 $T1="%g2";

108 $tmp0="%g3";

109 $tmp1="%g4";

110 $tmp2="%g5";

111

112 $ctx="%i0";

113 $inp="%i1";

114 $len="%i2";

115 $Ktbl="%i3";

116 $tmp31="%i4";

117 $tmp32="%i5";

118

119 ########### SHA256

120 $Xload = sub {

121 my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;

122

123 if ($i==0) {

124 $code.=<<___;

125 ldx [$inp+0],@X[0]

126 ldx [$inp+16],@X[2]

127 ldx [$inp+32],@X[4]

128 ldx [$inp+48],@X[6]

129 ldx [$inp+8],@X[1]

130 ldx [$inp+24],@X[3]

131 subcc %g0,$tmp31,$tmp32 ! should be 64-$tmp31, but -$tmp31 works too

132 ldx [$inp+40],@X[5]

133 bz,pt %icc,.Laligned

134 ldx [$inp+56],@X[7]

135

136 sllx @X[0],$tmp31,@X[0]

137 ldx [$inp+64],$T1

138 ___

139 for($j=0;$j<7;$j++)

140 { $code.=<<___;

141 srlx @X[$j+1],$tmp32,$tmp1

142 sllx @X[$j+1],$tmp31,@X[$j+1]

143 or $tmp1,@X[$j],@X[$j]

144 ___

145 }

146 $code.=<<___;

147 srlx $T1,$tmp32,$T1

148 or $T1,@X[7],@X[7]

149 .Laligned:

150 ___

151 }

152

153 if ($i&1) {

154 $code.="\tadd @X[$i/2],$h,$T1\n";

155 } else {

156 $code.="\tsrlx @X[$i/2],32,$T1\n\tadd $h,$T1,$T1\n";

157 }

158 } if ($SZ==4);

159

160 ########### SHA512

161 $Xload = sub {

162 my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;

163 my @pair=("%l".eval(($i2)%8),"%l".eval(($i2)%8+1),"%l".eval((($i+1)*2)%8));

164

165 $code.=<<___ if ($i==0);

166 ld [$inp+0],%l0

167 ld [$inp+4],%l1

168 ld [$inp+8],%l2

169 ld [$inp+12],%l3

170 ld [$inp+16],%l4

171 ld [$inp+20],%l5

172 ld [$inp+24],%l6

173 ld [$inp+28],%l7

174 ___

175 $code.=<<___ if ($i<15);

176 sllx @pair[1],$tmp31,$tmp2 ! Xload($i)

177 add $tmp31,32,$tmp0

178 sllx @pair[0],$tmp0,$tmp1

179 `"ld [$inp+".eval(32+0+$i*8)."],@pair[0]" if ($i<12)`

180 srlx @pair[2],$tmp32,@pair[1]

181 or $tmp1,$tmp2,$tmp2

182 or @pair[1],$tmp2,$tmp2

183 `"ld [$inp+".eval(32+4+$i*8)."],@pair[1]" if ($i<12)`

184 add $h,$tmp2,$T1

185 $ST $tmp2,[%sp+`$bias+$frame+$i*$SZ`]

186 ___

187 $code.=<<___ if ($i==12);

188 brnz,a $tmp31,.+8

189 ld [$inp+128],%l0

190 ___

191 $code.=<<___ if ($i==15);

192 ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2

193 sllx @pair[1],$tmp31,$tmp2 ! Xload($i)

194 add $tmp31,32,$tmp0

195 ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3

196 sllx @pair[0],$tmp0,$tmp1

197 ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4

198 srlx @pair[2],$tmp32,@pair[1]

199 or $tmp1,$tmp2,$tmp2

200 ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5

201 or @pair[1],$tmp2,$tmp2

202 ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6

203 add $h,$tmp2,$T1

204 $ST $tmp2,[%sp+`$bias+$frame+$i*$SZ`]

205 ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7

206 ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0

207 ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1

208 ___

209 } if ($SZ==8);

210

211 ########### common

212 sub BODY_00_15 {

213 my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;

214

215 if ($i<16) {

216 &$Xload(@_);

217 } else {

218 $code.="\tadd $h,$T1,$T1\n";

219 }

220

221 $code.=<<___;

222 $SRL $e,@Sigma1[0],$h !! $i

223 xor $f,$g,$tmp2

224 $SLL $e,`$SZ*8-@Sigma1[2]`,$tmp1

225 and $e,$tmp2,$tmp2

226 $SRL $e,@Sigma1[1],$tmp0

227 xor $tmp1,$h,$h

228 $SLL $e,`$SZ*8-@Sigma1[1]`,$tmp1

229 xor $tmp0,$h,$h

230 $SRL $e,@Sigma1[2],$tmp0

231 xor $tmp1,$h,$h

232 $SLL $e,`$SZ*8-@Sigma1[0]`,$tmp1

233 xor $tmp0,$h,$h

234 xor $g,$tmp2,$tmp2 ! Ch(e,f,g)

235 xor $tmp1,$h,$tmp0 ! Sigma1(e)

236

237 $SRL $a,@Sigma0[0],$h

238 add $tmp2,$T1,$T1

239 $LD [$Ktbl+`$i*$SZ`],$tmp2 ! K[$i]

240 $SLL $a,`$SZ*8-@Sigma0[2]`,$tmp1

241 add $tmp0,$T1,$T1

242 $SRL $a,@Sigma0[1],$tmp0

243 xor $tmp1,$h,$h

244 $SLL $a,`$SZ*8-@Sigma0[1]`,$tmp1

245 xor $tmp0,$h,$h

246 $SRL $a,@Sigma0[2],$tmp0

247 xor $tmp1,$h,$h

248 $SLL $a,`$SZ*8-@Sigma0[0]`,$tmp1

249 xor $tmp0,$h,$h

250 xor $tmp1,$h,$h ! Sigma0(a)

251

252 or $a,$b,$tmp0

253 and $a,$b,$tmp1

254 and $c,$tmp0,$tmp0

255 or $tmp0,$tmp1,$tmp1 ! Maj(a,b,c)

256 add $tmp2,$T1,$T1 ! +=K[$i]

257 add $tmp1,$h,$h

258

259 add $T1,$d,$d

260 add $T1,$h,$h

261 ___

262 }

263

264 ########### SHA256

265 $BODY_16_XX = sub {

266 my $i=@_[0];

267 my $xi;

268

269 if ($i&1) {

270 $xi=$tmp32;

271 $code.="\tsrlx @X[(($i+1)/2)%8],32,$xi\n";

272 } else {

273 $xi=@X[(($i+1)/2)%8];

274 }

275 $code.=<<___;

276 srl $xi,@sigma0[0],$T1 !! Xupdate($i)

277 sll $xi,`32-@sigma0[2]`,$tmp1

278 srl $xi,@sigma0[1],$tmp0

279 xor $tmp1,$T1,$T1

280 sll $tmp1,`@sigma0[2]-@sigma0[1]`,$tmp1

281 xor $tmp0,$T1,$T1

282 srl $xi,@sigma0[2],$tmp0

283 xor $tmp1,$T1,$T1

284 ___

285 if ($i&1) {

286 $xi=@X[(($i+14)/2)%8];

287 } else {

288 $xi=$tmp32;

289 $code.="\tsrlx @X[(($i+14)/2)%8],32,$xi\n";

290 }

291 $code.=<<___;

292 srl $xi,@sigma1[0],$tmp2

293 xor $tmp0,$T1,$T1 ! T1=sigma0(X[i+1])

294 sll $xi,`32-@sigma1[2]`,$tmp1

295 srl $xi,@sigma1[1],$tmp0

296 xor $tmp1,$tmp2,$tmp2

297 sll $tmp1,`@sigma1[2]-@sigma1[1]`,$tmp1

298 xor $tmp0,$tmp2,$tmp2

299 srl $xi,@sigma1[2],$tmp0

300 xor $tmp1,$tmp2,$tmp2

301 ___

302 if ($i&1) {

303 $xi=@X[($i/2)%8];

304 $code.=<<___;

305 srlx @X[(($i+9)/2)%8],32,$tmp1 ! X[i+9]

306 xor $tmp0,$tmp2,$tmp2 ! sigma1(X[i+14])

307 srl @X[($i/2)%8],0,$tmp0

308 add $tmp2,$tmp1,$tmp1

309 add $xi,$T1,$T1 ! +=X[i]

310 xor $tmp0,@X[($i/2)%8],@X[($i/2)%8]

311 add $tmp1,$T1,$T1

312

313 srl $T1,0,$T1

314 or $T1,@X[($i/2)%8],@X[($i/2)%8]

315 ___

316 } else {

317 $xi=@X[(($i+9)/2)%8];

318 $code.=<<___;

319 srlx @X[($i/2)%8],32,$tmp1 ! X[i]

320 xor $tmp0,$tmp2,$tmp2 ! sigma1(X[i+14])

321 add $xi,$T1,$T1 ! +=X[i+9]

322 add $tmp2,$tmp1,$tmp1

323 srl @X[($i/2)%8],0,@X[($i/2)%8]

324 add $tmp1,$T1,$T1

325

326 sllx $T1,32,$tmp0

327 or $tmp0,@X[($i/2)%8],@X[($i/2)%8]

328 ___

329 }

330 &BODY_00_15(@_);

331 } if ($SZ==4);

332

333 ########### SHA512

334 $BODY_16_XX = sub {

335 my $i=@_[0];

336 my @pair=("%l".eval(($i2)%8),"%l".eval(($i2)%8+1));

337

338 $code.=<<___;

339 sllx %l2,32,$tmp0 !! Xupdate($i)

340 or %l3,$tmp0,$tmp0

341

342 srlx $tmp0,@sigma0[0],$T1

343 ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2

344 sllx $tmp0,`64-@sigma0[2]`,$tmp1

345 ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3

346 srlx $tmp0,@sigma0[1],$tmp0

347 xor $tmp1,$T1,$T1

348 sllx $tmp1,`@sigma0[2]-@sigma0[1]`,$tmp1

349 xor $tmp0,$T1,$T1

350 srlx $tmp0,`@sigma0[2]-@sigma0[1]`,$tmp0

351 xor $tmp1,$T1,$T1

352 sllx %l6,32,$tmp2

353 xor $tmp0,$T1,$T1 ! sigma0(X[$i+1])

354 or %l7,$tmp2,$tmp2

355

356 srlx $tmp2,@sigma1[0],$tmp1

357 ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6

358 sllx $tmp2,`64-@sigma1[2]`,$tmp0

359 ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7

360 srlx $tmp2,@sigma1[1],$tmp2

361 xor $tmp0,$tmp1,$tmp1

362 sllx $tmp0,`@sigma1[2]-@sigma1[1]`,$tmp0

363 xor $tmp2,$tmp1,$tmp1

364 srlx $tmp2,`@sigma1[2]-@sigma1[1]`,$tmp2

365 xor $tmp0,$tmp1,$tmp1

366 sllx %l4,32,$tmp0

367 xor $tmp2,$tmp1,$tmp1 ! sigma1(X[$i+14])

368 ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4

369 or %l5,$tmp0,$tmp0

370 ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5

371

372 sllx %l0,32,$tmp2

373 add $tmp1,$T1,$T1

374 ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0

375 or %l1,$tmp2,$tmp2

376 add $tmp0,$T1,$T1 ! +=X[$i+9]

377 ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1

378 add $tmp2,$T1,$T1 ! +=X[$i]

379 $ST $T1,[%sp+`$bias+$frame+($i%16)*$SZ`]

380 ___

381 &BODY_00_15(@_);

382 } if ($SZ==8);

383

384 $code.=<<___ if ($bits==64);

385 .register %g2,#scratch

386 .register %g3,#scratch

387 ___

388 $code.=<<___;

389 .section ".text",#alloc,#execinstr

390

391 .align 64

392 K${label}:

393 .type K${label},#object

394 ___

395 if ($SZ==4) {

396 $code.=<<___;

397 .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5

398 .long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5

399 .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3

400 .long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174

401 .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc

402 .long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da

403 .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7

404 .long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967

405 .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13

406 .long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85

407 .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3

408 .long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070

409 .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5

410 .long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3

411 .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208

412 .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2

413 ___

414 } else {

415 $code.=<<___;

416 .long 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd

417 .long 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc

418 .long 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019

419 .long 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118

420 .long 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe

421 .long 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2

422 .long 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1

423 .long 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694

424 .long 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3

425 .long 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65

426 .long 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483

427 .long 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5

428 .long 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210

429 .long 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4

430 .long 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725

431 .long 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70

432 .long 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926

433 .long 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df

434 .long 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8

435 .long 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b

436 .long 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001

437 .long 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30

438 .long 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910

439 .long 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8

440 .long 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53

441 .long 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8

442 .long 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb

443 .long 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3

444 .long 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60

445 .long 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec

446 .long 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9

447 .long 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b

448 .long 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207

449 .long 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178

450 .long 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6

451 .long 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b

452 .long 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493

453 .long 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c

454 .long 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a

455 .long 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817

456 ___

457 }

458 $code.=<<___;

459 .size K${label},.-K${label}

460 .globl sha${label}_block_data_order

461 sha${label}_block_data_order:

462 save %sp,`-$frame-$locals`,%sp

463 and $inp,`$align-1`,$tmp31

464 sllx $len,`log(16*$SZ)/log(2)`,$len

465 andn $inp,`$align-1`,$inp

466 sll $tmp31,3,$tmp31

467 add $inp,$len,$len

468 ___

469 $code.=<<___ if ($SZ==8); # SHA512

470 mov 32,$tmp32

471 sub $tmp32,$tmp31,$tmp32

472 ___

473 $code.=<<___;

474 .Lpic: call .+8

475 add %o7,K${label}-.Lpic,$Ktbl

476

477 $LD [$ctx+`0*$SZ`],$A

478 $LD [$ctx+`1*$SZ`],$B

479 $LD [$ctx+`2*$SZ`],$C

480 $LD [$ctx+`3*$SZ`],$D

481 $LD [$ctx+`4*$SZ`],$E

482 $LD [$ctx+`5*$SZ`],$F

483 $LD [$ctx+`6*$SZ`],$G

484 $LD [$ctx+`7*$SZ`],$H

485

486 .Lloop:

487 ___

488 for ($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }

489 $code.=".L16_xx:\n";

490 for (;$i<32;$i++) { &$BODY_16_XX($i,@V); unshift(@V,pop(@V)); }

491 $code.=<<___;

492 and $tmp2,0xfff,$tmp2

493 cmp $tmp2,$lastK

494 bne .L16_xx

495 add $Ktbl,`16*$SZ`,$Ktbl ! Ktbl+=16

496

497 ___

498 $code.=<<___ if ($SZ==4); # SHA256

499 $LD [$ctx+`0*$SZ`],@X[0]

500 $LD [$ctx+`1*$SZ`],@X[1]

501 $LD [$ctx+`2*$SZ`],@X[2]

502 $LD [$ctx+`3*$SZ`],@X[3]

503 $LD [$ctx+`4*$SZ`],@X[4]

504 $LD [$ctx+`5*$SZ`],@X[5]

505 $LD [$ctx+`6*$SZ`],@X[6]

506 $LD [$ctx+`7*$SZ`],@X[7]

507

508 add $A,@X[0],$A

509 $ST $A,[$ctx+`0*$SZ`]

510 add $B,@X[1],$B

511 $ST $B,[$ctx+`1*$SZ`]

512 add $C,@X[2],$C

513 $ST $C,[$ctx+`2*$SZ`]

514 add $D,@X[3],$D

515 $ST $D,[$ctx+`3*$SZ`]

516 add $E,@X[4],$E

517 $ST $E,[$ctx+`4*$SZ`]

518 add $F,@X[5],$F

519 $ST $F,[$ctx+`5*$SZ`]

520 add $G,@X[6],$G

521 $ST $G,[$ctx+`6*$SZ`]

522 add $H,@X[7],$H

523 $ST $H,[$ctx+`7*$SZ`]

524 ___

525 $code.=<<___ if ($SZ==8); # SHA512

526 ld [$ctx+`0*$SZ+0`],%l0

527 ld [$ctx+`0*$SZ+4`],%l1

528 ld [$ctx+`1*$SZ+0`],%l2

529 ld [$ctx+`1*$SZ+4`],%l3

530 ld [$ctx+`2*$SZ+0`],%l4

531 ld [$ctx+`2*$SZ+4`],%l5

532 ld [$ctx+`3*$SZ+0`],%l6

533

534 sllx %l0,32,$tmp0

535 ld [$ctx+`3*$SZ+4`],%l7

536 sllx %l2,32,$tmp1

537 or %l1,$tmp0,$tmp0

538 or %l3,$tmp1,$tmp1

539 add $tmp0,$A,$A

540 add $tmp1,$B,$B

541 $ST $A,[$ctx+`0*$SZ`]

542 sllx %l4,32,$tmp2

543 $ST $B,[$ctx+`1*$SZ`]

544 sllx %l6,32,$T1

545 or %l5,$tmp2,$tmp2

546 or %l7,$T1,$T1

547 add $tmp2,$C,$C

548 $ST $C,[$ctx+`2*$SZ`]

549 add $T1,$D,$D

550 $ST $D,[$ctx+`3*$SZ`]

551

552 ld [$ctx+`4*$SZ+0`],%l0

553 ld [$ctx+`4*$SZ+4`],%l1

554 ld [$ctx+`5*$SZ+0`],%l2

555 ld [$ctx+`5*$SZ+4`],%l3

556 ld [$ctx+`6*$SZ+0`],%l4

557 ld [$ctx+`6*$SZ+4`],%l5

558 ld [$ctx+`7*$SZ+0`],%l6

559

560 sllx %l0,32,$tmp0

561 ld [$ctx+`7*$SZ+4`],%l7

562 sllx %l2,32,$tmp1

563 or %l1,$tmp0,$tmp0

564 or %l3,$tmp1,$tmp1

565 add $tmp0,$E,$E

566 add $tmp1,$F,$F

567 $ST $E,[$ctx+`4*$SZ`]

568 sllx %l4,32,$tmp2

569 $ST $F,[$ctx+`5*$SZ`]

570 sllx %l6,32,$T1

571 or %l5,$tmp2,$tmp2

572 or %l7,$T1,$T1

573 add $tmp2,$G,$G

574 $ST $G,[$ctx+`6*$SZ`]

575 add $T1,$H,$H

576 $ST $H,[$ctx+`7*$SZ`]

577 ___

578 $code.=<<___;

579 add $inp,`16*$SZ`,$inp ! advance inp

580 cmp $inp,$len

581 bne `$bits==64?"%xcc":"%icc"`,.Lloop

582 sub $Ktbl,`($rounds-16)*$SZ`,$Ktbl ! rewind Ktbl

583

584 ret

585 restore

586 .type sha${label}_block_data_order,#function

587 .size sha${label}_block_data_order,(.-sha${label}_block_data_order)

588 .asciz "SHA${label} block transform for SPARCv9, CRYPTOGAMS by <appro\@openssl. org>"

589 .align 4

590 ___

591

592 $code =~ s/\`([^\`]*)\`/eval $1/gem;

593 print $code;

594 close STDOUT;

OLD	NEW

« no previous file with comments | « openssl/crypto/sha/asm/sha512-s390x.pl ('k') | openssl/crypto/sha/asm/sha512-x86_64.S » ('j') | no next file with comments »