blob: 4b17848287a12e598594fec86a88fab7c6953ac2 [file] [log] [blame]
Alexandre Savard1b09e312012-08-07 20:33:29 -04001#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA1 block procedure for s390x.
11
12# April 2007.
13#
14# Performance is >30% better than gcc 3.3 generated code. But the real
15# twist is that SHA1 hardware support is detected and utilized. In
16# which case performance can reach further >4.5x for larger chunks.
17
18# January 2009.
19#
20# Optimize Xupdate for amount of memory references and reschedule
21# instructions to favour dual-issue z10 pipeline. On z10 hardware is
22# "only" ~2.3x faster than software.
23
Alexandre Savard1b09e312012-08-07 20:33:29 -040024$kimdfunc=1; # magic function code for kimd instruction
25
Alexandre Savard75410672012-08-08 09:50:01 -040026$output=shift;
Alexandre Savard1b09e312012-08-07 20:33:29 -040027open STDOUT,">$output";
28
29$K_00_39="%r0"; $K=$K_00_39;
30$K_40_79="%r1";
31$ctx="%r2"; $prefetch="%r2";
32$inp="%r3";
33$len="%r4";
34
35$A="%r5";
36$B="%r6";
37$C="%r7";
38$D="%r8";
39$E="%r9"; @V=($A,$B,$C,$D,$E);
40$t0="%r10";
41$t1="%r11";
42@X=("%r12","%r13","%r14");
43$sp="%r15";
44
Alexandre Savard75410672012-08-08 09:50:01 -040045$frame=160+16*4;
Alexandre Savard1b09e312012-08-07 20:33:29 -040046
47sub Xupdate {
48my $i=shift;
49
50$code.=<<___ if ($i==15);
Alexandre Savard75410672012-08-08 09:50:01 -040051 lg $prefetch,160($sp) ### Xupdate(16) warm-up
Alexandre Savard1b09e312012-08-07 20:33:29 -040052 lr $X[0],$X[2]
53___
54return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle
55$code.=<<___ if ($i<16);
56 lg $X[0],`$i*4`($inp) ### Xload($i)
57 rllg $X[1],$X[0],32
58___
59$code.=<<___ if ($i>=16);
60 xgr $X[0],$prefetch ### Xupdate($i)
Alexandre Savard75410672012-08-08 09:50:01 -040061 lg $prefetch,`160+4*(($i+2)%16)`($sp)
62 xg $X[0],`160+4*(($i+8)%16)`($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -040063 xgr $X[0],$prefetch
64 rll $X[0],$X[0],1
65 rllg $X[1],$X[0],32
66 rll $X[1],$X[1],1
67 rllg $X[0],$X[1],32
68 lr $X[2],$X[1] # feedback
69___
70$code.=<<___ if ($i<=70);
Alexandre Savard75410672012-08-08 09:50:01 -040071 stg $X[0],`160+4*($i%16)`($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -040072___
73unshift(@X,pop(@X));
74}
75
76sub BODY_00_19 {
77my ($i,$a,$b,$c,$d,$e)=@_;
78my $xi=$X[1];
79
80 &Xupdate($i);
81$code.=<<___;
82 alr $e,$K ### $i
83 rll $t1,$a,5
84 lr $t0,$d
85 xr $t0,$c
86 alr $e,$t1
87 nr $t0,$b
88 alr $e,$xi
89 xr $t0,$d
90 rll $b,$b,30
91 alr $e,$t0
92___
93}
94
95sub BODY_20_39 {
96my ($i,$a,$b,$c,$d,$e)=@_;
97my $xi=$X[1];
98
99 &Xupdate($i);
100$code.=<<___;
101 alr $e,$K ### $i
102 rll $t1,$a,5
103 lr $t0,$b
104 alr $e,$t1
105 xr $t0,$c
106 alr $e,$xi
107 xr $t0,$d
108 rll $b,$b,30
109 alr $e,$t0
110___
111}
112
113sub BODY_40_59 {
114my ($i,$a,$b,$c,$d,$e)=@_;
115my $xi=$X[1];
116
117 &Xupdate($i);
118$code.=<<___;
119 alr $e,$K ### $i
120 rll $t1,$a,5
121 lr $t0,$b
122 alr $e,$t1
123 or $t0,$c
124 lr $t1,$b
125 nr $t0,$d
126 nr $t1,$c
127 alr $e,$xi
128 or $t0,$t1
129 rll $b,$b,30
130 alr $e,$t0
131___
132}
133
134$code.=<<___;
135.text
136.align 64
137.type Ktable,\@object
138Ktable: .long 0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6
139 .skip 48 #.long 0,0,0,0,0,0,0,0,0,0,0,0
140.size Ktable,.-Ktable
141.globl sha1_block_data_order
142.type sha1_block_data_order,\@function
143sha1_block_data_order:
144___
145$code.=<<___ if ($kimdfunc);
146 larl %r1,OPENSSL_s390xcap_P
147 lg %r0,0(%r1)
148 tmhl %r0,0x4000 # check for message-security assist
149 jz .Lsoftware
150 lghi %r0,0
Alexandre Savard75410672012-08-08 09:50:01 -0400151 la %r1,16($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400152 .long 0xb93e0002 # kimd %r0,%r2
Alexandre Savard75410672012-08-08 09:50:01 -0400153 lg %r0,16($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400154 tmhh %r0,`0x8000>>$kimdfunc`
155 jz .Lsoftware
156 lghi %r0,$kimdfunc
157 lgr %r1,$ctx
158 lgr %r2,$inp
159 sllg %r3,$len,6
160 .long 0xb93e0002 # kimd %r0,%r2
161 brc 1,.-4 # pay attention to "partial completion"
162 br %r14
163.align 16
164.Lsoftware:
165___
166$code.=<<___;
167 lghi %r1,-$frame
Alexandre Savard75410672012-08-08 09:50:01 -0400168 stg $ctx,16($sp)
169 stmg %r6,%r15,48($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400170 lgr %r0,$sp
171 la $sp,0(%r1,$sp)
Alexandre Savard75410672012-08-08 09:50:01 -0400172 stg %r0,0($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400173
174 larl $t0,Ktable
175 llgf $A,0($ctx)
176 llgf $B,4($ctx)
177 llgf $C,8($ctx)
178 llgf $D,12($ctx)
179 llgf $E,16($ctx)
180
181 lg $K_00_39,0($t0)
182 lg $K_40_79,8($t0)
183
184.Lloop:
185 rllg $K_00_39,$K_00_39,32
186___
187for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
188$code.=<<___;
189 rllg $K_00_39,$K_00_39,32
190___
191for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
192$code.=<<___; $K=$K_40_79;
193 rllg $K_40_79,$K_40_79,32
194___
195for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
196$code.=<<___;
197 rllg $K_40_79,$K_40_79,32
198___
199for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
200$code.=<<___;
201
Alexandre Savard75410672012-08-08 09:50:01 -0400202 lg $ctx,`$frame+16`($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400203 la $inp,64($inp)
204 al $A,0($ctx)
205 al $B,4($ctx)
206 al $C,8($ctx)
207 al $D,12($ctx)
208 al $E,16($ctx)
209 st $A,0($ctx)
210 st $B,4($ctx)
211 st $C,8($ctx)
212 st $D,12($ctx)
213 st $E,16($ctx)
Alexandre Savard75410672012-08-08 09:50:01 -0400214 brct $len,.Lloop
Alexandre Savard1b09e312012-08-07 20:33:29 -0400215
Alexandre Savard75410672012-08-08 09:50:01 -0400216 lmg %r6,%r15,`$frame+48`($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400217 br %r14
218.size sha1_block_data_order,.-sha1_block_data_order
219.string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
Alexandre Savard75410672012-08-08 09:50:01 -0400220.comm OPENSSL_s390xcap_P,8,8
Alexandre Savard1b09e312012-08-07 20:33:29 -0400221___
222
223$code =~ s/\`([^\`]*)\`/eval $1/gem;
224
225print $code;
226close STDOUT;