Alexandre Lision | f26d3e5 | 2014-04-14 16:22:31 -0400 | [diff] [blame^] | 1 | .file "crypto/bn/asm/x86-gf2m.s" |
| 2 | .text |
| 3 | .type _mul_1x1_mmx,@function |
| 4 | .align 16 |
| 5 | _mul_1x1_mmx: |
| 6 | subl $36,%esp |
| 7 | movl %eax,%ecx |
| 8 | leal (%eax,%eax,1),%edx |
| 9 | andl $1073741823,%ecx |
| 10 | leal (%edx,%edx,1),%ebp |
| 11 | movl $0,(%esp) |
| 12 | andl $2147483647,%edx |
| 13 | movd %eax,%mm2 |
| 14 | movd %ebx,%mm3 |
| 15 | movl %ecx,4(%esp) |
| 16 | xorl %edx,%ecx |
| 17 | pxor %mm5,%mm5 |
| 18 | pxor %mm4,%mm4 |
| 19 | movl %edx,8(%esp) |
| 20 | xorl %ebp,%edx |
| 21 | movl %ecx,12(%esp) |
| 22 | pcmpgtd %mm2,%mm5 |
| 23 | paddd %mm2,%mm2 |
| 24 | xorl %edx,%ecx |
| 25 | movl %ebp,16(%esp) |
| 26 | xorl %edx,%ebp |
| 27 | pand %mm3,%mm5 |
| 28 | pcmpgtd %mm2,%mm4 |
| 29 | movl %ecx,20(%esp) |
| 30 | xorl %ecx,%ebp |
| 31 | psllq $31,%mm5 |
| 32 | pand %mm3,%mm4 |
| 33 | movl %edx,24(%esp) |
| 34 | movl $7,%esi |
| 35 | movl %ebp,28(%esp) |
| 36 | movl %esi,%ebp |
| 37 | andl %ebx,%esi |
| 38 | shrl $3,%ebx |
| 39 | movl %ebp,%edi |
| 40 | psllq $30,%mm4 |
| 41 | andl %ebx,%edi |
| 42 | shrl $3,%ebx |
| 43 | movd (%esp,%esi,4),%mm0 |
| 44 | movl %ebp,%esi |
| 45 | andl %ebx,%esi |
| 46 | shrl $3,%ebx |
| 47 | movd (%esp,%edi,4),%mm2 |
| 48 | movl %ebp,%edi |
| 49 | psllq $3,%mm2 |
| 50 | andl %ebx,%edi |
| 51 | shrl $3,%ebx |
| 52 | pxor %mm2,%mm0 |
| 53 | movd (%esp,%esi,4),%mm1 |
| 54 | movl %ebp,%esi |
| 55 | psllq $6,%mm1 |
| 56 | andl %ebx,%esi |
| 57 | shrl $3,%ebx |
| 58 | pxor %mm1,%mm0 |
| 59 | movd (%esp,%edi,4),%mm2 |
| 60 | movl %ebp,%edi |
| 61 | psllq $9,%mm2 |
| 62 | andl %ebx,%edi |
| 63 | shrl $3,%ebx |
| 64 | pxor %mm2,%mm0 |
| 65 | movd (%esp,%esi,4),%mm1 |
| 66 | movl %ebp,%esi |
| 67 | psllq $12,%mm1 |
| 68 | andl %ebx,%esi |
| 69 | shrl $3,%ebx |
| 70 | pxor %mm1,%mm0 |
| 71 | movd (%esp,%edi,4),%mm2 |
| 72 | movl %ebp,%edi |
| 73 | psllq $15,%mm2 |
| 74 | andl %ebx,%edi |
| 75 | shrl $3,%ebx |
| 76 | pxor %mm2,%mm0 |
| 77 | movd (%esp,%esi,4),%mm1 |
| 78 | movl %ebp,%esi |
| 79 | psllq $18,%mm1 |
| 80 | andl %ebx,%esi |
| 81 | shrl $3,%ebx |
| 82 | pxor %mm1,%mm0 |
| 83 | movd (%esp,%edi,4),%mm2 |
| 84 | movl %ebp,%edi |
| 85 | psllq $21,%mm2 |
| 86 | andl %ebx,%edi |
| 87 | shrl $3,%ebx |
| 88 | pxor %mm2,%mm0 |
| 89 | movd (%esp,%esi,4),%mm1 |
| 90 | movl %ebp,%esi |
| 91 | psllq $24,%mm1 |
| 92 | andl %ebx,%esi |
| 93 | shrl $3,%ebx |
| 94 | pxor %mm1,%mm0 |
| 95 | movd (%esp,%edi,4),%mm2 |
| 96 | pxor %mm4,%mm0 |
| 97 | psllq $27,%mm2 |
| 98 | pxor %mm2,%mm0 |
| 99 | movd (%esp,%esi,4),%mm1 |
| 100 | pxor %mm5,%mm0 |
| 101 | psllq $30,%mm1 |
| 102 | addl $36,%esp |
| 103 | pxor %mm1,%mm0 |
| 104 | ret |
| 105 | .size _mul_1x1_mmx,.-_mul_1x1_mmx |
| 106 | .type _mul_1x1_ialu,@function |
| 107 | .align 16 |
| 108 | _mul_1x1_ialu: |
| 109 | subl $36,%esp |
| 110 | movl %eax,%ecx |
| 111 | leal (%eax,%eax,1),%edx |
| 112 | leal (,%eax,4),%ebp |
| 113 | andl $1073741823,%ecx |
| 114 | leal (%eax,%eax,1),%edi |
| 115 | sarl $31,%eax |
| 116 | movl $0,(%esp) |
| 117 | andl $2147483647,%edx |
| 118 | movl %ecx,4(%esp) |
| 119 | xorl %edx,%ecx |
| 120 | movl %edx,8(%esp) |
| 121 | xorl %ebp,%edx |
| 122 | movl %ecx,12(%esp) |
| 123 | xorl %edx,%ecx |
| 124 | movl %ebp,16(%esp) |
| 125 | xorl %edx,%ebp |
| 126 | movl %ecx,20(%esp) |
| 127 | xorl %ecx,%ebp |
| 128 | sarl $31,%edi |
| 129 | andl %ebx,%eax |
| 130 | movl %edx,24(%esp) |
| 131 | andl %ebx,%edi |
| 132 | movl %ebp,28(%esp) |
| 133 | movl %eax,%edx |
| 134 | shll $31,%eax |
| 135 | movl %edi,%ecx |
| 136 | shrl $1,%edx |
| 137 | movl $7,%esi |
| 138 | shll $30,%edi |
| 139 | andl %ebx,%esi |
| 140 | shrl $2,%ecx |
| 141 | xorl %edi,%eax |
| 142 | shrl $3,%ebx |
| 143 | movl $7,%edi |
| 144 | andl %ebx,%edi |
| 145 | shrl $3,%ebx |
| 146 | xorl %ecx,%edx |
| 147 | xorl (%esp,%esi,4),%eax |
| 148 | movl $7,%esi |
| 149 | andl %ebx,%esi |
| 150 | shrl $3,%ebx |
| 151 | movl (%esp,%edi,4),%ebp |
| 152 | movl $7,%edi |
| 153 | movl %ebp,%ecx |
| 154 | shll $3,%ebp |
| 155 | andl %ebx,%edi |
| 156 | shrl $29,%ecx |
| 157 | xorl %ebp,%eax |
| 158 | shrl $3,%ebx |
| 159 | xorl %ecx,%edx |
| 160 | movl (%esp,%esi,4),%ecx |
| 161 | movl $7,%esi |
| 162 | movl %ecx,%ebp |
| 163 | shll $6,%ecx |
| 164 | andl %ebx,%esi |
| 165 | shrl $26,%ebp |
| 166 | xorl %ecx,%eax |
| 167 | shrl $3,%ebx |
| 168 | xorl %ebp,%edx |
| 169 | movl (%esp,%edi,4),%ebp |
| 170 | movl $7,%edi |
| 171 | movl %ebp,%ecx |
| 172 | shll $9,%ebp |
| 173 | andl %ebx,%edi |
| 174 | shrl $23,%ecx |
| 175 | xorl %ebp,%eax |
| 176 | shrl $3,%ebx |
| 177 | xorl %ecx,%edx |
| 178 | movl (%esp,%esi,4),%ecx |
| 179 | movl $7,%esi |
| 180 | movl %ecx,%ebp |
| 181 | shll $12,%ecx |
| 182 | andl %ebx,%esi |
| 183 | shrl $20,%ebp |
| 184 | xorl %ecx,%eax |
| 185 | shrl $3,%ebx |
| 186 | xorl %ebp,%edx |
| 187 | movl (%esp,%edi,4),%ebp |
| 188 | movl $7,%edi |
| 189 | movl %ebp,%ecx |
| 190 | shll $15,%ebp |
| 191 | andl %ebx,%edi |
| 192 | shrl $17,%ecx |
| 193 | xorl %ebp,%eax |
| 194 | shrl $3,%ebx |
| 195 | xorl %ecx,%edx |
| 196 | movl (%esp,%esi,4),%ecx |
| 197 | movl $7,%esi |
| 198 | movl %ecx,%ebp |
| 199 | shll $18,%ecx |
| 200 | andl %ebx,%esi |
| 201 | shrl $14,%ebp |
| 202 | xorl %ecx,%eax |
| 203 | shrl $3,%ebx |
| 204 | xorl %ebp,%edx |
| 205 | movl (%esp,%edi,4),%ebp |
| 206 | movl $7,%edi |
| 207 | movl %ebp,%ecx |
| 208 | shll $21,%ebp |
| 209 | andl %ebx,%edi |
| 210 | shrl $11,%ecx |
| 211 | xorl %ebp,%eax |
| 212 | shrl $3,%ebx |
| 213 | xorl %ecx,%edx |
| 214 | movl (%esp,%esi,4),%ecx |
| 215 | movl $7,%esi |
| 216 | movl %ecx,%ebp |
| 217 | shll $24,%ecx |
| 218 | andl %ebx,%esi |
| 219 | shrl $8,%ebp |
| 220 | xorl %ecx,%eax |
| 221 | shrl $3,%ebx |
| 222 | xorl %ebp,%edx |
| 223 | movl (%esp,%edi,4),%ebp |
| 224 | movl %ebp,%ecx |
| 225 | shll $27,%ebp |
| 226 | movl (%esp,%esi,4),%edi |
| 227 | shrl $5,%ecx |
| 228 | movl %edi,%esi |
| 229 | xorl %ebp,%eax |
| 230 | shll $30,%edi |
| 231 | xorl %ecx,%edx |
| 232 | shrl $2,%esi |
| 233 | xorl %edi,%eax |
| 234 | xorl %esi,%edx |
| 235 | addl $36,%esp |
| 236 | ret |
| 237 | .size _mul_1x1_ialu,.-_mul_1x1_ialu |
| 238 | .globl bn_GF2m_mul_2x2 |
| 239 | .type bn_GF2m_mul_2x2,@function |
| 240 | .align 16 |
| 241 | bn_GF2m_mul_2x2: |
| 242 | .L_bn_GF2m_mul_2x2_begin: |
| 243 | call .L000PIC_me_up |
| 244 | .L000PIC_me_up: |
| 245 | popl %edx |
| 246 | leal _GLOBAL_OFFSET_TABLE_+[.-.L000PIC_me_up](%edx),%edx |
| 247 | movl OPENSSL_ia32cap_P@GOT(%edx),%edx |
| 248 | movl (%edx),%eax |
| 249 | movl 4(%edx),%edx |
| 250 | testl $8388608,%eax |
| 251 | jz .L001ialu |
| 252 | pushl %ebp |
| 253 | pushl %ebx |
| 254 | pushl %esi |
| 255 | pushl %edi |
| 256 | movl 24(%esp),%eax |
| 257 | movl 32(%esp),%ebx |
| 258 | call _mul_1x1_mmx |
| 259 | movq %mm0,%mm7 |
| 260 | movl 28(%esp),%eax |
| 261 | movl 36(%esp),%ebx |
| 262 | call _mul_1x1_mmx |
| 263 | movq %mm0,%mm6 |
| 264 | movl 24(%esp),%eax |
| 265 | movl 32(%esp),%ebx |
| 266 | xorl 28(%esp),%eax |
| 267 | xorl 36(%esp),%ebx |
| 268 | call _mul_1x1_mmx |
| 269 | pxor %mm7,%mm0 |
| 270 | movl 20(%esp),%eax |
| 271 | pxor %mm6,%mm0 |
| 272 | movq %mm0,%mm2 |
| 273 | psllq $32,%mm0 |
| 274 | popl %edi |
| 275 | psrlq $32,%mm2 |
| 276 | popl %esi |
| 277 | pxor %mm6,%mm0 |
| 278 | popl %ebx |
| 279 | pxor %mm7,%mm2 |
| 280 | movq %mm0,(%eax) |
| 281 | popl %ebp |
| 282 | movq %mm2,8(%eax) |
| 283 | emms |
| 284 | ret |
| 285 | .align 16 |
| 286 | .L001ialu: |
| 287 | pushl %ebp |
| 288 | pushl %ebx |
| 289 | pushl %esi |
| 290 | pushl %edi |
| 291 | subl $20,%esp |
| 292 | movl 44(%esp),%eax |
| 293 | movl 52(%esp),%ebx |
| 294 | call _mul_1x1_ialu |
| 295 | movl %eax,8(%esp) |
| 296 | movl %edx,12(%esp) |
| 297 | movl 48(%esp),%eax |
| 298 | movl 56(%esp),%ebx |
| 299 | call _mul_1x1_ialu |
| 300 | movl %eax,(%esp) |
| 301 | movl %edx,4(%esp) |
| 302 | movl 44(%esp),%eax |
| 303 | movl 52(%esp),%ebx |
| 304 | xorl 48(%esp),%eax |
| 305 | xorl 56(%esp),%ebx |
| 306 | call _mul_1x1_ialu |
| 307 | movl 40(%esp),%ebp |
| 308 | movl (%esp),%ebx |
| 309 | movl 4(%esp),%ecx |
| 310 | movl 8(%esp),%edi |
| 311 | movl 12(%esp),%esi |
| 312 | xorl %edx,%eax |
| 313 | xorl %ecx,%edx |
| 314 | xorl %ebx,%eax |
| 315 | movl %ebx,(%ebp) |
| 316 | xorl %edi,%edx |
| 317 | movl %esi,12(%ebp) |
| 318 | xorl %esi,%eax |
| 319 | addl $20,%esp |
| 320 | xorl %esi,%edx |
| 321 | popl %edi |
| 322 | xorl %edx,%eax |
| 323 | popl %esi |
| 324 | movl %edx,8(%ebp) |
| 325 | popl %ebx |
| 326 | movl %eax,4(%ebp) |
| 327 | popl %ebp |
| 328 | ret |
| 329 | .size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin |
| 330 | .byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 |
| 331 | .byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 |
| 332 | .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 |
| 333 | .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 |
| 334 | .byte 62,0 |
| 335 | .comm OPENSSL_ia32cap_P,8,4 |