blob: 97ad8bea8b3f88998e57f0d51329ed576f0fd338 [file] [log] [blame]
Alexandre Savard1b09e312012-08-07 20:33:29 -04001#include "arm_arch.h"
2
3.text
4
5.global sha1_block_data_order
6.type sha1_block_data_order,%function
7
8.align 2
9sha1_block_data_order:
10 stmdb sp!,{r4-r12,lr}
11 add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
12 ldmia r0,{r3,r4,r5,r6,r7}
13.Lloop:
14 ldr r8,.LK_00_19
15 mov r14,sp
16 sub sp,sp,#15*4
17 mov r5,r5,ror#30
18 mov r6,r6,ror#30
19 mov r7,r7,ror#30 @ [6]
20.L_00_15:
21#if __ARM_ARCH__<7
22 ldrb r10,[r1,#2]
23 ldrb r9,[r1,#3]
24 ldrb r11,[r1,#1]
25 add r7,r8,r7,ror#2 @ E+=K_00_19
26 ldrb r12,[r1],#4
27 orr r9,r9,r10,lsl#8
28 eor r10,r5,r6 @ F_xx_xx
29 orr r9,r9,r11,lsl#16
30 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
31 orr r9,r9,r12,lsl#24
32#else
33 ldr r9,[r1],#4 @ handles unaligned
34 add r7,r8,r7,ror#2 @ E+=K_00_19
35 eor r10,r5,r6 @ F_xx_xx
36 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
37#ifdef __ARMEL__
38 rev r9,r9 @ byte swap
39#endif
40#endif
41 and r10,r4,r10,ror#2
42 add r7,r7,r9 @ E+=X[i]
43 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
44 str r9,[r14,#-4]!
45 add r7,r7,r10 @ E+=F_00_19(B,C,D)
46#if __ARM_ARCH__<7
47 ldrb r10,[r1,#2]
48 ldrb r9,[r1,#3]
49 ldrb r11,[r1,#1]
50 add r6,r8,r6,ror#2 @ E+=K_00_19
51 ldrb r12,[r1],#4
52 orr r9,r9,r10,lsl#8
53 eor r10,r4,r5 @ F_xx_xx
54 orr r9,r9,r11,lsl#16
55 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
56 orr r9,r9,r12,lsl#24
57#else
58 ldr r9,[r1],#4 @ handles unaligned
59 add r6,r8,r6,ror#2 @ E+=K_00_19
60 eor r10,r4,r5 @ F_xx_xx
61 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
62#ifdef __ARMEL__
63 rev r9,r9 @ byte swap
64#endif
65#endif
66 and r10,r3,r10,ror#2
67 add r6,r6,r9 @ E+=X[i]
68 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
69 str r9,[r14,#-4]!
70 add r6,r6,r10 @ E+=F_00_19(B,C,D)
71#if __ARM_ARCH__<7
72 ldrb r10,[r1,#2]
73 ldrb r9,[r1,#3]
74 ldrb r11,[r1,#1]
75 add r5,r8,r5,ror#2 @ E+=K_00_19
76 ldrb r12,[r1],#4
77 orr r9,r9,r10,lsl#8
78 eor r10,r3,r4 @ F_xx_xx
79 orr r9,r9,r11,lsl#16
80 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
81 orr r9,r9,r12,lsl#24
82#else
83 ldr r9,[r1],#4 @ handles unaligned
84 add r5,r8,r5,ror#2 @ E+=K_00_19
85 eor r10,r3,r4 @ F_xx_xx
86 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
87#ifdef __ARMEL__
88 rev r9,r9 @ byte swap
89#endif
90#endif
91 and r10,r7,r10,ror#2
92 add r5,r5,r9 @ E+=X[i]
93 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
94 str r9,[r14,#-4]!
95 add r5,r5,r10 @ E+=F_00_19(B,C,D)
96#if __ARM_ARCH__<7
97 ldrb r10,[r1,#2]
98 ldrb r9,[r1,#3]
99 ldrb r11,[r1,#1]
100 add r4,r8,r4,ror#2 @ E+=K_00_19
101 ldrb r12,[r1],#4
102 orr r9,r9,r10,lsl#8
103 eor r10,r7,r3 @ F_xx_xx
104 orr r9,r9,r11,lsl#16
105 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
106 orr r9,r9,r12,lsl#24
107#else
108 ldr r9,[r1],#4 @ handles unaligned
109 add r4,r8,r4,ror#2 @ E+=K_00_19
110 eor r10,r7,r3 @ F_xx_xx
111 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
112#ifdef __ARMEL__
113 rev r9,r9 @ byte swap
114#endif
115#endif
116 and r10,r6,r10,ror#2
117 add r4,r4,r9 @ E+=X[i]
118 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
119 str r9,[r14,#-4]!
120 add r4,r4,r10 @ E+=F_00_19(B,C,D)
121#if __ARM_ARCH__<7
122 ldrb r10,[r1,#2]
123 ldrb r9,[r1,#3]
124 ldrb r11,[r1,#1]
125 add r3,r8,r3,ror#2 @ E+=K_00_19
126 ldrb r12,[r1],#4
127 orr r9,r9,r10,lsl#8
128 eor r10,r6,r7 @ F_xx_xx
129 orr r9,r9,r11,lsl#16
130 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
131 orr r9,r9,r12,lsl#24
132#else
133 ldr r9,[r1],#4 @ handles unaligned
134 add r3,r8,r3,ror#2 @ E+=K_00_19
135 eor r10,r6,r7 @ F_xx_xx
136 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
137#ifdef __ARMEL__
138 rev r9,r9 @ byte swap
139#endif
140#endif
141 and r10,r5,r10,ror#2
142 add r3,r3,r9 @ E+=X[i]
143 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
144 str r9,[r14,#-4]!
145 add r3,r3,r10 @ E+=F_00_19(B,C,D)
146 teq r14,sp
147 bne .L_00_15 @ [((11+4)*5+2)*3]
148 sub sp,sp,#5*4
149#if __ARM_ARCH__<7
150 ldrb r10,[r1,#2]
151 ldrb r9,[r1,#3]
152 ldrb r11,[r1,#1]
153 add r7,r8,r7,ror#2 @ E+=K_00_19
154 ldrb r12,[r1],#4
155 orr r9,r9,r10,lsl#8
156 eor r10,r5,r6 @ F_xx_xx
157 orr r9,r9,r11,lsl#16
158 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
159 orr r9,r9,r12,lsl#24
160#else
161 ldr r9,[r1],#4 @ handles unaligned
162 add r7,r8,r7,ror#2 @ E+=K_00_19
163 eor r10,r5,r6 @ F_xx_xx
164 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
165#ifdef __ARMEL__
166 rev r9,r9 @ byte swap
167#endif
168#endif
169 and r10,r4,r10,ror#2
170 add r7,r7,r9 @ E+=X[i]
171 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
172 str r9,[r14,#-4]!
173 add r7,r7,r10 @ E+=F_00_19(B,C,D)
174 ldr r9,[r14,#15*4]
175 ldr r10,[r14,#13*4]
176 ldr r11,[r14,#7*4]
177 add r6,r8,r6,ror#2 @ E+=K_xx_xx
178 ldr r12,[r14,#2*4]
179 eor r9,r9,r10
180 eor r11,r11,r12 @ 1 cycle stall
181 eor r10,r4,r5 @ F_xx_xx
182 mov r9,r9,ror#31
183 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
184 eor r9,r9,r11,ror#31
185 str r9,[r14,#-4]!
186 and r10,r3,r10,ror#2 @ F_xx_xx
187 @ F_xx_xx
188 add r6,r6,r9 @ E+=X[i]
189 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
190 add r6,r6,r10 @ E+=F_00_19(B,C,D)
191 ldr r9,[r14,#15*4]
192 ldr r10,[r14,#13*4]
193 ldr r11,[r14,#7*4]
194 add r5,r8,r5,ror#2 @ E+=K_xx_xx
195 ldr r12,[r14,#2*4]
196 eor r9,r9,r10
197 eor r11,r11,r12 @ 1 cycle stall
198 eor r10,r3,r4 @ F_xx_xx
199 mov r9,r9,ror#31
200 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
201 eor r9,r9,r11,ror#31
202 str r9,[r14,#-4]!
203 and r10,r7,r10,ror#2 @ F_xx_xx
204 @ F_xx_xx
205 add r5,r5,r9 @ E+=X[i]
206 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
207 add r5,r5,r10 @ E+=F_00_19(B,C,D)
208 ldr r9,[r14,#15*4]
209 ldr r10,[r14,#13*4]
210 ldr r11,[r14,#7*4]
211 add r4,r8,r4,ror#2 @ E+=K_xx_xx
212 ldr r12,[r14,#2*4]
213 eor r9,r9,r10
214 eor r11,r11,r12 @ 1 cycle stall
215 eor r10,r7,r3 @ F_xx_xx
216 mov r9,r9,ror#31
217 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
218 eor r9,r9,r11,ror#31
219 str r9,[r14,#-4]!
220 and r10,r6,r10,ror#2 @ F_xx_xx
221 @ F_xx_xx
222 add r4,r4,r9 @ E+=X[i]
223 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
224 add r4,r4,r10 @ E+=F_00_19(B,C,D)
225 ldr r9,[r14,#15*4]
226 ldr r10,[r14,#13*4]
227 ldr r11,[r14,#7*4]
228 add r3,r8,r3,ror#2 @ E+=K_xx_xx
229 ldr r12,[r14,#2*4]
230 eor r9,r9,r10
231 eor r11,r11,r12 @ 1 cycle stall
232 eor r10,r6,r7 @ F_xx_xx
233 mov r9,r9,ror#31
234 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
235 eor r9,r9,r11,ror#31
236 str r9,[r14,#-4]!
237 and r10,r5,r10,ror#2 @ F_xx_xx
238 @ F_xx_xx
239 add r3,r3,r9 @ E+=X[i]
240 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
241 add r3,r3,r10 @ E+=F_00_19(B,C,D)
242
243 ldr r8,.LK_20_39 @ [+15+16*4]
244 sub sp,sp,#20*4
245 cmn sp,#0 @ [+3], clear carry to denote 20_39
246.L_20_39_or_60_79:
247 ldr r9,[r14,#15*4]
248 ldr r10,[r14,#13*4]
249 ldr r11,[r14,#7*4]
250 add r7,r8,r7,ror#2 @ E+=K_xx_xx
251 ldr r12,[r14,#2*4]
252 eor r9,r9,r10
253 eor r11,r11,r12 @ 1 cycle stall
254 eor r10,r5,r6 @ F_xx_xx
255 mov r9,r9,ror#31
256 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
257 eor r9,r9,r11,ror#31
258 str r9,[r14,#-4]!
259 eor r10,r4,r10,ror#2 @ F_xx_xx
260 @ F_xx_xx
261 add r7,r7,r9 @ E+=X[i]
262 add r7,r7,r10 @ E+=F_20_39(B,C,D)
263 ldr r9,[r14,#15*4]
264 ldr r10,[r14,#13*4]
265 ldr r11,[r14,#7*4]
266 add r6,r8,r6,ror#2 @ E+=K_xx_xx
267 ldr r12,[r14,#2*4]
268 eor r9,r9,r10
269 eor r11,r11,r12 @ 1 cycle stall
270 eor r10,r4,r5 @ F_xx_xx
271 mov r9,r9,ror#31
272 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
273 eor r9,r9,r11,ror#31
274 str r9,[r14,#-4]!
275 eor r10,r3,r10,ror#2 @ F_xx_xx
276 @ F_xx_xx
277 add r6,r6,r9 @ E+=X[i]
278 add r6,r6,r10 @ E+=F_20_39(B,C,D)
279 ldr r9,[r14,#15*4]
280 ldr r10,[r14,#13*4]
281 ldr r11,[r14,#7*4]
282 add r5,r8,r5,ror#2 @ E+=K_xx_xx
283 ldr r12,[r14,#2*4]
284 eor r9,r9,r10
285 eor r11,r11,r12 @ 1 cycle stall
286 eor r10,r3,r4 @ F_xx_xx
287 mov r9,r9,ror#31
288 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
289 eor r9,r9,r11,ror#31
290 str r9,[r14,#-4]!
291 eor r10,r7,r10,ror#2 @ F_xx_xx
292 @ F_xx_xx
293 add r5,r5,r9 @ E+=X[i]
294 add r5,r5,r10 @ E+=F_20_39(B,C,D)
295 ldr r9,[r14,#15*4]
296 ldr r10,[r14,#13*4]
297 ldr r11,[r14,#7*4]
298 add r4,r8,r4,ror#2 @ E+=K_xx_xx
299 ldr r12,[r14,#2*4]
300 eor r9,r9,r10
301 eor r11,r11,r12 @ 1 cycle stall
302 eor r10,r7,r3 @ F_xx_xx
303 mov r9,r9,ror#31
304 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
305 eor r9,r9,r11,ror#31
306 str r9,[r14,#-4]!
307 eor r10,r6,r10,ror#2 @ F_xx_xx
308 @ F_xx_xx
309 add r4,r4,r9 @ E+=X[i]
310 add r4,r4,r10 @ E+=F_20_39(B,C,D)
311 ldr r9,[r14,#15*4]
312 ldr r10,[r14,#13*4]
313 ldr r11,[r14,#7*4]
314 add r3,r8,r3,ror#2 @ E+=K_xx_xx
315 ldr r12,[r14,#2*4]
316 eor r9,r9,r10
317 eor r11,r11,r12 @ 1 cycle stall
318 eor r10,r6,r7 @ F_xx_xx
319 mov r9,r9,ror#31
320 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
321 eor r9,r9,r11,ror#31
322 str r9,[r14,#-4]!
323 eor r10,r5,r10,ror#2 @ F_xx_xx
324 @ F_xx_xx
325 add r3,r3,r9 @ E+=X[i]
326 add r3,r3,r10 @ E+=F_20_39(B,C,D)
327 teq r14,sp @ preserve carry
328 bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
329 bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
330
331 ldr r8,.LK_40_59
332 sub sp,sp,#20*4 @ [+2]
333.L_40_59:
334 ldr r9,[r14,#15*4]
335 ldr r10,[r14,#13*4]
336 ldr r11,[r14,#7*4]
337 add r7,r8,r7,ror#2 @ E+=K_xx_xx
338 ldr r12,[r14,#2*4]
339 eor r9,r9,r10
340 eor r11,r11,r12 @ 1 cycle stall
341 eor r10,r5,r6 @ F_xx_xx
342 mov r9,r9,ror#31
343 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
344 eor r9,r9,r11,ror#31
345 str r9,[r14,#-4]!
346 and r10,r4,r10,ror#2 @ F_xx_xx
347 and r11,r5,r6 @ F_xx_xx
348 add r7,r7,r9 @ E+=X[i]
349 add r7,r7,r10 @ E+=F_40_59(B,C,D)
350 add r7,r7,r11,ror#2
351 ldr r9,[r14,#15*4]
352 ldr r10,[r14,#13*4]
353 ldr r11,[r14,#7*4]
354 add r6,r8,r6,ror#2 @ E+=K_xx_xx
355 ldr r12,[r14,#2*4]
356 eor r9,r9,r10
357 eor r11,r11,r12 @ 1 cycle stall
358 eor r10,r4,r5 @ F_xx_xx
359 mov r9,r9,ror#31
360 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
361 eor r9,r9,r11,ror#31
362 str r9,[r14,#-4]!
363 and r10,r3,r10,ror#2 @ F_xx_xx
364 and r11,r4,r5 @ F_xx_xx
365 add r6,r6,r9 @ E+=X[i]
366 add r6,r6,r10 @ E+=F_40_59(B,C,D)
367 add r6,r6,r11,ror#2
368 ldr r9,[r14,#15*4]
369 ldr r10,[r14,#13*4]
370 ldr r11,[r14,#7*4]
371 add r5,r8,r5,ror#2 @ E+=K_xx_xx
372 ldr r12,[r14,#2*4]
373 eor r9,r9,r10
374 eor r11,r11,r12 @ 1 cycle stall
375 eor r10,r3,r4 @ F_xx_xx
376 mov r9,r9,ror#31
377 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
378 eor r9,r9,r11,ror#31
379 str r9,[r14,#-4]!
380 and r10,r7,r10,ror#2 @ F_xx_xx
381 and r11,r3,r4 @ F_xx_xx
382 add r5,r5,r9 @ E+=X[i]
383 add r5,r5,r10 @ E+=F_40_59(B,C,D)
384 add r5,r5,r11,ror#2
385 ldr r9,[r14,#15*4]
386 ldr r10,[r14,#13*4]
387 ldr r11,[r14,#7*4]
388 add r4,r8,r4,ror#2 @ E+=K_xx_xx
389 ldr r12,[r14,#2*4]
390 eor r9,r9,r10
391 eor r11,r11,r12 @ 1 cycle stall
392 eor r10,r7,r3 @ F_xx_xx
393 mov r9,r9,ror#31
394 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
395 eor r9,r9,r11,ror#31
396 str r9,[r14,#-4]!
397 and r10,r6,r10,ror#2 @ F_xx_xx
398 and r11,r7,r3 @ F_xx_xx
399 add r4,r4,r9 @ E+=X[i]
400 add r4,r4,r10 @ E+=F_40_59(B,C,D)
401 add r4,r4,r11,ror#2
402 ldr r9,[r14,#15*4]
403 ldr r10,[r14,#13*4]
404 ldr r11,[r14,#7*4]
405 add r3,r8,r3,ror#2 @ E+=K_xx_xx
406 ldr r12,[r14,#2*4]
407 eor r9,r9,r10
408 eor r11,r11,r12 @ 1 cycle stall
409 eor r10,r6,r7 @ F_xx_xx
410 mov r9,r9,ror#31
411 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
412 eor r9,r9,r11,ror#31
413 str r9,[r14,#-4]!
414 and r10,r5,r10,ror#2 @ F_xx_xx
415 and r11,r6,r7 @ F_xx_xx
416 add r3,r3,r9 @ E+=X[i]
417 add r3,r3,r10 @ E+=F_40_59(B,C,D)
418 add r3,r3,r11,ror#2
419 teq r14,sp
420 bne .L_40_59 @ [+((12+5)*5+2)*4]
421
422 ldr r8,.LK_60_79
423 sub sp,sp,#20*4
424 cmp sp,#0 @ set carry to denote 60_79
425 b .L_20_39_or_60_79 @ [+4], spare 300 bytes
426.L_done:
427 add sp,sp,#80*4 @ "deallocate" stack frame
428 ldmia r0,{r8,r9,r10,r11,r12}
429 add r3,r8,r3
430 add r4,r9,r4
431 add r5,r10,r5,ror#2
432 add r6,r11,r6,ror#2
433 add r7,r12,r7,ror#2
434 stmia r0,{r3,r4,r5,r6,r7}
435 teq r1,r2
436 bne .Lloop @ [+18], total 1307
437
438#if __ARM_ARCH__>=5
439 ldmia sp!,{r4-r12,pc}
440#else
441 ldmia sp!,{r4-r12,lr}
442 tst lr,#1
443 moveq pc,lr @ be binary compatible with V4, yet
444 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
445#endif
446.align 2
447.LK_00_19: .word 0x5a827999
448.LK_20_39: .word 0x6ed9eba1
449.LK_40_59: .word 0x8f1bbcdc
450.LK_60_79: .word 0xca62c1d6
451.size sha1_block_data_order,.-sha1_block_data_order
452.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
453.align 2