blob: 2140dd2f8dd6fc46b2fb85e8916376c21a0cfaf9 [file] [log] [blame]
Alexandre Savard1b09e312012-08-07 20:33:29 -04001#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# I let hardware handle unaligned input(*), except on page boundaries
11# (see below for details). Otherwise straightforward implementation
12# with X vector in register bank. The module is big-endian [which is
13# not big deal as there're no little-endian targets left around].
14#
15# (*) this means that this module is inappropriate for PPC403? Does
16# anybody know if pre-POWER3 can sustain unaligned load?
17
18# -m64 -m32
19# ----------------------------------
20# PPC970,gcc-4.0.0 +76% +59%
21# Power6,xlc-7 +68% +33%
22
23$flavour = shift;
24
25if ($flavour =~ /64/) {
26 $SIZE_T =8;
27 $LRSAVE =2*$SIZE_T;
28 $UCMP ="cmpld";
29 $STU ="stdu";
30 $POP ="ld";
31 $PUSH ="std";
32} elsif ($flavour =~ /32/) {
33 $SIZE_T =4;
34 $LRSAVE =$SIZE_T;
35 $UCMP ="cmplw";
36 $STU ="stwu";
37 $POP ="lwz";
38 $PUSH ="stw";
39} else { die "nonsense $flavour"; }
40
41$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
42( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
43( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
44die "can't locate ppc-xlate.pl";
45
46open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
47
48$FRAME=24*$SIZE_T+64;
49$LOCALS=6*$SIZE_T;
50
51$K ="r0";
52$sp ="r1";
53$toc="r2";
54$ctx="r3";
55$inp="r4";
56$num="r5";
57$t0 ="r15";
58$t1 ="r6";
59
60$A ="r7";
61$B ="r8";
62$C ="r9";
63$D ="r10";
64$E ="r11";
65$T ="r12";
66
67@V=($A,$B,$C,$D,$E,$T);
68@X=("r16","r17","r18","r19","r20","r21","r22","r23",
69 "r24","r25","r26","r27","r28","r29","r30","r31");
70
71sub BODY_00_19 {
72my ($i,$a,$b,$c,$d,$e,$f)=@_;
73my $j=$i+1;
74$code.=<<___ if ($i==0);
75 lwz @X[$i],`$i*4`($inp)
76___
77$code.=<<___ if ($i<15);
78 lwz @X[$j],`$j*4`($inp)
79 add $f,$K,$e
80 rotlwi $e,$a,5
81 add $f,$f,@X[$i]
82 and $t0,$c,$b
83 add $f,$f,$e
84 andc $t1,$d,$b
85 rotlwi $b,$b,30
86 or $t0,$t0,$t1
87 add $f,$f,$t0
88___
89$code.=<<___ if ($i>=15);
90 add $f,$K,$e
91 rotlwi $e,$a,5
92 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
93 add $f,$f,@X[$i%16]
94 and $t0,$c,$b
95 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
96 add $f,$f,$e
97 andc $t1,$d,$b
98 rotlwi $b,$b,30
99 or $t0,$t0,$t1
100 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
101 add $f,$f,$t0
102 rotlwi @X[$j%16],@X[$j%16],1
103___
104}
105
106sub BODY_20_39 {
107my ($i,$a,$b,$c,$d,$e,$f)=@_;
108my $j=$i+1;
109$code.=<<___ if ($i<79);
110 add $f,$K,$e
111 rotlwi $e,$a,5
112 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
113 add $f,$f,@X[$i%16]
114 xor $t0,$b,$c
115 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
116 add $f,$f,$e
117 rotlwi $b,$b,30
118 xor $t0,$t0,$d
119 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
120 add $f,$f,$t0
121 rotlwi @X[$j%16],@X[$j%16],1
122___
123$code.=<<___ if ($i==79);
124 add $f,$K,$e
125 rotlwi $e,$a,5
126 lwz r16,0($ctx)
127 add $f,$f,@X[$i%16]
128 xor $t0,$b,$c
129 lwz r17,4($ctx)
130 add $f,$f,$e
131 rotlwi $b,$b,30
132 lwz r18,8($ctx)
133 xor $t0,$t0,$d
134 lwz r19,12($ctx)
135 add $f,$f,$t0
136 lwz r20,16($ctx)
137___
138}
139
140sub BODY_40_59 {
141my ($i,$a,$b,$c,$d,$e,$f)=@_;
142my $j=$i+1;
143$code.=<<___;
144 add $f,$K,$e
145 rotlwi $e,$a,5
146 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
147 add $f,$f,@X[$i%16]
148 and $t0,$b,$c
149 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
150 add $f,$f,$e
151 or $t1,$b,$c
152 rotlwi $b,$b,30
153 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
154 and $t1,$t1,$d
155 or $t0,$t0,$t1
156 rotlwi @X[$j%16],@X[$j%16],1
157 add $f,$f,$t0
158___
159}
160
161$code=<<___;
162.machine "any"
163.text
164
165.globl .sha1_block_data_order
166.align 4
167.sha1_block_data_order:
168 $STU $sp,-$FRAME($sp)
169 mflr r0
170 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
171 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
172 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
173 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
174 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
175 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
176 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
177 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
178 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
179 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
180 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
181 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
182 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
183 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
184 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
185 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
186 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
187 $PUSH r0,`$FRAME+$LRSAVE`($sp)
188 lwz $A,0($ctx)
189 lwz $B,4($ctx)
190 lwz $C,8($ctx)
191 lwz $D,12($ctx)
192 lwz $E,16($ctx)
193 andi. r0,$inp,3
194 bne Lunaligned
195Laligned:
196 mtctr $num
197 bl Lsha1_block_private
198 b Ldone
199
200; PowerPC specification allows an implementation to be ill-behaved
201; upon unaligned access which crosses page boundary. "Better safe
202; than sorry" principle makes me treat it specially. But I don't
203; look for particular offending word, but rather for 64-byte input
204; block which crosses the boundary. Once found that block is aligned
205; and hashed separately...
206.align 4
207Lunaligned:
208 subfic $t1,$inp,4096
209 andi. $t1,$t1,4095 ; distance to closest page boundary
210 srwi. $t1,$t1,6 ; t1/=64
211 beq Lcross_page
212 $UCMP $num,$t1
213 ble- Laligned ; didn't cross the page boundary
214 mtctr $t1
215 subfc $num,$t1,$num
216 bl Lsha1_block_private
217Lcross_page:
218 li $t1,16
219 mtctr $t1
220 addi r20,$sp,$LOCALS ; spot within the frame
221Lmemcpy:
222 lbz r16,0($inp)
223 lbz r17,1($inp)
224 lbz r18,2($inp)
225 lbz r19,3($inp)
226 addi $inp,$inp,4
227 stb r16,0(r20)
228 stb r17,1(r20)
229 stb r18,2(r20)
230 stb r19,3(r20)
231 addi r20,r20,4
232 bdnz Lmemcpy
233
234 $PUSH $inp,`$FRAME-$SIZE_T*18`($sp)
235 li $t1,1
236 addi $inp,$sp,$LOCALS
237 mtctr $t1
238 bl Lsha1_block_private
239 $POP $inp,`$FRAME-$SIZE_T*18`($sp)
240 addic. $num,$num,-1
241 bne- Lunaligned
242
243Ldone:
244 $POP r0,`$FRAME+$LRSAVE`($sp)
245 $POP r15,`$FRAME-$SIZE_T*17`($sp)
246 $POP r16,`$FRAME-$SIZE_T*16`($sp)
247 $POP r17,`$FRAME-$SIZE_T*15`($sp)
248 $POP r18,`$FRAME-$SIZE_T*14`($sp)
249 $POP r19,`$FRAME-$SIZE_T*13`($sp)
250 $POP r20,`$FRAME-$SIZE_T*12`($sp)
251 $POP r21,`$FRAME-$SIZE_T*11`($sp)
252 $POP r22,`$FRAME-$SIZE_T*10`($sp)
253 $POP r23,`$FRAME-$SIZE_T*9`($sp)
254 $POP r24,`$FRAME-$SIZE_T*8`($sp)
255 $POP r25,`$FRAME-$SIZE_T*7`($sp)
256 $POP r26,`$FRAME-$SIZE_T*6`($sp)
257 $POP r27,`$FRAME-$SIZE_T*5`($sp)
258 $POP r28,`$FRAME-$SIZE_T*4`($sp)
259 $POP r29,`$FRAME-$SIZE_T*3`($sp)
260 $POP r30,`$FRAME-$SIZE_T*2`($sp)
261 $POP r31,`$FRAME-$SIZE_T*1`($sp)
262 mtlr r0
263 addi $sp,$sp,$FRAME
264 blr
265 .long 0
266 .byte 0,12,4,1,0x80,18,3,0
267 .long 0
268___
269
270# This is private block function, which uses tailored calling
271# interface, namely upon entry SHA_CTX is pre-loaded to given
272# registers and counter register contains amount of chunks to
273# digest...
274$code.=<<___;
275.align 4
276Lsha1_block_private:
277___
278$code.=<<___; # load K_00_19
279 lis $K,0x5a82
280 ori $K,$K,0x7999
281___
282for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
283$code.=<<___; # load K_20_39
284 lis $K,0x6ed9
285 ori $K,$K,0xeba1
286___
287for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
288$code.=<<___; # load K_40_59
289 lis $K,0x8f1b
290 ori $K,$K,0xbcdc
291___
292for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
293$code.=<<___; # load K_60_79
294 lis $K,0xca62
295 ori $K,$K,0xc1d6
296___
297for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
298$code.=<<___;
299 add r16,r16,$E
300 add r17,r17,$T
301 add r18,r18,$A
302 add r19,r19,$B
303 add r20,r20,$C
304 stw r16,0($ctx)
305 mr $A,r16
306 stw r17,4($ctx)
307 mr $B,r17
308 stw r18,8($ctx)
309 mr $C,r18
310 stw r19,12($ctx)
311 mr $D,r19
312 stw r20,16($ctx)
313 mr $E,r20
314 addi $inp,$inp,`16*4`
315 bdnz- Lsha1_block_private
316 blr
317 .long 0
318 .byte 0,12,0x14,0,0,0,0,0
319___
320$code.=<<___;
321.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
322___
323
324$code =~ s/\`([^\`]*)\`/eval $1/gem;
325print $code;
326close STDOUT;