blob: 96681fa05ec915847036ce1461c8fb0906ad1e59 [file] [log] [blame]
Alexandre Savard1b09e312012-08-07 20:33:29 -04001#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# February 2009
11#
12# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to
13# "cluster" Address Generation Interlocks, so that one pipeline stall
14# resolves several dependencies.
15
Alexandre Savard1b09e312012-08-07 20:33:29 -040016$rp="%r14";
17$sp="%r15";
18$code=<<___;
19.text
20
21___
22
23# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out)
24{
25$acc="%r0";
26$cnt="%r1";
27$key="%r2";
28$len="%r3";
29$inp="%r4";
30$out="%r5";
31
32@XX=("%r6","%r7");
33@TX=("%r8","%r9");
34$YY="%r10";
35$TY="%r11";
36
37$code.=<<___;
38.globl RC4
39.type RC4,\@function
40.align 64
41RC4:
Alexandre Savard75410672012-08-08 09:50:01 -040042 stmg %r6,%r11,48($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -040043 llgc $XX[0],0($key)
44 llgc $YY,1($key)
45 la $XX[0],1($XX[0])
46 nill $XX[0],0xff
47 srlg $cnt,$len,3
48 ltgr $cnt,$cnt
49 llgc $TX[0],2($XX[0],$key)
50 jz .Lshort
51 j .Loop8
52
53.align 64
54.Loop8:
55___
56for ($i=0;$i<8;$i++) {
57$code.=<<___;
58 la $YY,0($YY,$TX[0]) # $i
59 nill $YY,255
60 la $XX[1],1($XX[0])
61 nill $XX[1],255
62___
63$code.=<<___ if ($i==1);
64 llgc $acc,2($TY,$key)
65___
66$code.=<<___ if ($i>1);
67 sllg $acc,$acc,8
68 ic $acc,2($TY,$key)
69___
70$code.=<<___;
71 llgc $TY,2($YY,$key)
72 stc $TX[0],2($YY,$key)
73 llgc $TX[1],2($XX[1],$key)
74 stc $TY,2($XX[0],$key)
75 cr $XX[1],$YY
76 jne .Lcmov$i
77 la $TX[1],0($TX[0])
78.Lcmov$i:
79 la $TY,0($TY,$TX[0])
80 nill $TY,255
81___
82push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
83}
84
85$code.=<<___;
86 lg $TX[1],0($inp)
87 sllg $acc,$acc,8
88 la $inp,8($inp)
89 ic $acc,2($TY,$key)
90 xgr $acc,$TX[1]
91 stg $acc,0($out)
92 la $out,8($out)
Alexandre Savard75410672012-08-08 09:50:01 -040093 brct $cnt,.Loop8
Alexandre Savard1b09e312012-08-07 20:33:29 -040094
95.Lshort:
96 lghi $acc,7
97 ngr $len,$acc
98 jz .Lexit
99 j .Loop1
100
101.align 16
102.Loop1:
103 la $YY,0($YY,$TX[0])
104 nill $YY,255
105 llgc $TY,2($YY,$key)
106 stc $TX[0],2($YY,$key)
107 stc $TY,2($XX[0],$key)
108 ar $TY,$TX[0]
109 ahi $XX[0],1
110 nill $TY,255
111 nill $XX[0],255
112 llgc $acc,0($inp)
113 la $inp,1($inp)
114 llgc $TY,2($TY,$key)
115 llgc $TX[0],2($XX[0],$key)
116 xr $acc,$TY
117 stc $acc,0($out)
118 la $out,1($out)
119 brct $len,.Loop1
120
121.Lexit:
122 ahi $XX[0],-1
123 stc $XX[0],0($key)
124 stc $YY,1($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400125 lmg %r6,%r11,48($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400126 br $rp
127.size RC4,.-RC4
128.string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
129
130___
131}
132
133# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp)
134{
135$cnt="%r0";
136$idx="%r1";
137$key="%r2";
138$len="%r3";
139$inp="%r4";
140$acc="%r5";
141$dat="%r6";
142$ikey="%r7";
143$iinp="%r8";
144
145$code.=<<___;
Alexandre Savard75410672012-08-08 09:50:01 -0400146.globl RC4_set_key
147.type RC4_set_key,\@function
Alexandre Savard1b09e312012-08-07 20:33:29 -0400148.align 64
Alexandre Savard75410672012-08-08 09:50:01 -0400149RC4_set_key:
150 stmg %r6,%r8,48($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400151 lhi $cnt,256
152 la $idx,0(%r0)
153 sth $idx,0($key)
154.align 4
155.L1stloop:
156 stc $idx,2($idx,$key)
157 la $idx,1($idx)
158 brct $cnt,.L1stloop
159
160 lghi $ikey,-256
161 lr $cnt,$len
162 la $iinp,0(%r0)
163 la $idx,0(%r0)
164.align 16
165.L2ndloop:
166 llgc $acc,2+256($ikey,$key)
167 llgc $dat,0($iinp,$inp)
168 la $idx,0($idx,$acc)
169 la $ikey,1($ikey)
170 la $idx,0($idx,$dat)
171 nill $idx,255
172 la $iinp,1($iinp)
173 tml $ikey,255
174 llgc $dat,2($idx,$key)
175 stc $dat,2+256-1($ikey,$key)
176 stc $acc,2($idx,$key)
177 jz .Ldone
178 brct $cnt,.L2ndloop
179 lr $cnt,$len
180 la $iinp,0(%r0)
181 j .L2ndloop
182.Ldone:
Alexandre Savard75410672012-08-08 09:50:01 -0400183 lmg %r6,%r8,48($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400184 br $rp
Alexandre Savard75410672012-08-08 09:50:01 -0400185.size RC4_set_key,.-RC4_set_key
Alexandre Savard1b09e312012-08-07 20:33:29 -0400186
187___
188}
189
190# const char *RC4_options()
191$code.=<<___;
192.globl RC4_options
193.type RC4_options,\@function
194.align 16
195RC4_options:
196 larl %r2,.Loptions
197 br %r14
198.size RC4_options,.-RC4_options
199.section .rodata
200.Loptions:
201.align 8
202.string "rc4(8x,char)"
203___
204
205print $code;