Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 1 | #!/usr/bin/env perl |
| 2 | |
| 3 | $flavour = shift; |
| 4 | $output = shift; |
| 5 | if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } |
| 6 | |
| 7 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); |
| 8 | |
| 9 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
Alexandre Savard | 7541067 | 2012-08-08 09:50:01 -0400 | [diff] [blame] | 10 | open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output"; |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 11 | |
Alexandre Savard | 7541067 | 2012-08-08 09:50:01 -0400 | [diff] [blame] | 12 | if ($win64) { $arg1="%rcx"; $arg2="%rdx"; } |
| 13 | else { $arg1="%rdi"; $arg2="%rsi"; } |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 14 | print<<___; |
| 15 | .extern OPENSSL_cpuid_setup |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 16 | .section .init |
| 17 | call OPENSSL_cpuid_setup |
| 18 | |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 19 | .text |
| 20 | |
| 21 | .globl OPENSSL_atomic_add |
| 22 | .type OPENSSL_atomic_add,\@abi-omnipotent |
| 23 | .align 16 |
| 24 | OPENSSL_atomic_add: |
| 25 | movl ($arg1),%eax |
| 26 | .Lspin: leaq ($arg2,%rax),%r8 |
| 27 | .byte 0xf0 # lock |
| 28 | cmpxchgl %r8d,($arg1) |
| 29 | jne .Lspin |
| 30 | movl %r8d,%eax |
| 31 | .byte 0x48,0x98 # cltq/cdqe |
| 32 | ret |
| 33 | .size OPENSSL_atomic_add,.-OPENSSL_atomic_add |
| 34 | |
| 35 | .globl OPENSSL_rdtsc |
| 36 | .type OPENSSL_rdtsc,\@abi-omnipotent |
| 37 | .align 16 |
| 38 | OPENSSL_rdtsc: |
| 39 | rdtsc |
| 40 | shl \$32,%rdx |
| 41 | or %rdx,%rax |
| 42 | ret |
| 43 | .size OPENSSL_rdtsc,.-OPENSSL_rdtsc |
| 44 | |
| 45 | .globl OPENSSL_ia32_cpuid |
| 46 | .type OPENSSL_ia32_cpuid,\@abi-omnipotent |
| 47 | .align 16 |
| 48 | OPENSSL_ia32_cpuid: |
Alexandre Savard | 7541067 | 2012-08-08 09:50:01 -0400 | [diff] [blame] | 49 | mov %rbx,%r8 |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 50 | |
| 51 | xor %eax,%eax |
| 52 | cpuid |
| 53 | mov %eax,%r11d # max value for standard query level |
| 54 | |
| 55 | xor %eax,%eax |
| 56 | cmp \$0x756e6547,%ebx # "Genu" |
| 57 | setne %al |
| 58 | mov %eax,%r9d |
| 59 | cmp \$0x49656e69,%edx # "ineI" |
| 60 | setne %al |
| 61 | or %eax,%r9d |
| 62 | cmp \$0x6c65746e,%ecx # "ntel" |
| 63 | setne %al |
| 64 | or %eax,%r9d # 0 indicates Intel CPU |
| 65 | jz .Lintel |
| 66 | |
| 67 | cmp \$0x68747541,%ebx # "Auth" |
| 68 | setne %al |
| 69 | mov %eax,%r10d |
| 70 | cmp \$0x69746E65,%edx # "enti" |
| 71 | setne %al |
| 72 | or %eax,%r10d |
| 73 | cmp \$0x444D4163,%ecx # "cAMD" |
| 74 | setne %al |
| 75 | or %eax,%r10d # 0 indicates AMD CPU |
| 76 | jnz .Lintel |
| 77 | |
| 78 | # AMD specific |
| 79 | mov \$0x80000000,%eax |
| 80 | cpuid |
Alexandre Savard | 7541067 | 2012-08-08 09:50:01 -0400 | [diff] [blame] | 81 | cmp \$0x80000008,%eax |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 82 | jb .Lintel |
| 83 | |
| 84 | mov \$0x80000008,%eax |
| 85 | cpuid |
| 86 | movzb %cl,%r10 # number of cores - 1 |
| 87 | inc %r10 # number of cores |
| 88 | |
| 89 | mov \$1,%eax |
| 90 | cpuid |
| 91 | bt \$28,%edx # test hyper-threading bit |
Alexandre Savard | 7541067 | 2012-08-08 09:50:01 -0400 | [diff] [blame] | 92 | jnc .Ldone |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 93 | shr \$16,%ebx # number of logical processors |
| 94 | cmp %r10b,%bl |
Alexandre Savard | 7541067 | 2012-08-08 09:50:01 -0400 | [diff] [blame] | 95 | ja .Ldone |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 96 | and \$0xefffffff,%edx # ~(1<<28) |
Alexandre Savard | 7541067 | 2012-08-08 09:50:01 -0400 | [diff] [blame] | 97 | jmp .Ldone |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 98 | |
| 99 | .Lintel: |
| 100 | cmp \$4,%r11d |
| 101 | mov \$-1,%r10d |
| 102 | jb .Lnocacheinfo |
| 103 | |
| 104 | mov \$4,%eax |
| 105 | mov \$0,%ecx # query L1D |
| 106 | cpuid |
| 107 | mov %eax,%r10d |
| 108 | shr \$14,%r10d |
| 109 | and \$0xfff,%r10d # number of cores -1 per L1D |
| 110 | |
| 111 | .Lnocacheinfo: |
| 112 | mov \$1,%eax |
| 113 | cpuid |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 114 | cmp \$0,%r9d |
| 115 | jne .Lnotintel |
Alexandre Savard | 7541067 | 2012-08-08 09:50:01 -0400 | [diff] [blame] | 116 | or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 117 | and \$15,%ah |
| 118 | cmp \$15,%ah # examine Family ID |
Alexandre Savard | 7541067 | 2012-08-08 09:50:01 -0400 | [diff] [blame] | 119 | je .Lnotintel |
| 120 | or \$0x40000000,%edx # use reserved bit to skip unrolled loop |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 121 | .Lnotintel: |
| 122 | bt \$28,%edx # test hyper-threading bit |
Alexandre Savard | 7541067 | 2012-08-08 09:50:01 -0400 | [diff] [blame] | 123 | jnc .Ldone |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 124 | and \$0xefffffff,%edx # ~(1<<28) |
| 125 | cmp \$0,%r10d |
Alexandre Savard | 7541067 | 2012-08-08 09:50:01 -0400 | [diff] [blame] | 126 | je .Ldone |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 127 | |
| 128 | or \$0x10000000,%edx # 1<<28 |
| 129 | shr \$16,%ebx |
| 130 | cmp \$1,%bl # see if cache is shared |
Alexandre Savard | 7541067 | 2012-08-08 09:50:01 -0400 | [diff] [blame] | 131 | ja .Ldone |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 132 | and \$0xefffffff,%edx # ~(1<<28) |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 133 | .Ldone: |
Alexandre Savard | 7541067 | 2012-08-08 09:50:01 -0400 | [diff] [blame] | 134 | shl \$32,%rcx |
| 135 | mov %edx,%eax |
| 136 | mov %r8,%rbx |
| 137 | or %rcx,%rax |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 138 | ret |
| 139 | .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid |
| 140 | |
| 141 | .globl OPENSSL_cleanse |
| 142 | .type OPENSSL_cleanse,\@abi-omnipotent |
| 143 | .align 16 |
| 144 | OPENSSL_cleanse: |
| 145 | xor %rax,%rax |
| 146 | cmp \$15,$arg2 |
| 147 | jae .Lot |
| 148 | cmp \$0,$arg2 |
| 149 | je .Lret |
| 150 | .Little: |
| 151 | mov %al,($arg1) |
| 152 | sub \$1,$arg2 |
| 153 | lea 1($arg1),$arg1 |
| 154 | jnz .Little |
| 155 | .Lret: |
| 156 | ret |
| 157 | .align 16 |
| 158 | .Lot: |
| 159 | test \$7,$arg1 |
| 160 | jz .Laligned |
| 161 | mov %al,($arg1) |
| 162 | lea -1($arg2),$arg2 |
| 163 | lea 1($arg1),$arg1 |
| 164 | jmp .Lot |
| 165 | .Laligned: |
| 166 | mov %rax,($arg1) |
| 167 | lea -8($arg2),$arg2 |
| 168 | test \$-8,$arg2 |
| 169 | lea 8($arg1),$arg1 |
| 170 | jnz .Laligned |
| 171 | cmp \$0,$arg2 |
| 172 | jne .Little |
| 173 | ret |
| 174 | .size OPENSSL_cleanse,.-OPENSSL_cleanse |
| 175 | ___ |
| 176 | |
| 177 | print<<___ if (!$win64); |
| 178 | .globl OPENSSL_wipe_cpu |
| 179 | .type OPENSSL_wipe_cpu,\@abi-omnipotent |
| 180 | .align 16 |
| 181 | OPENSSL_wipe_cpu: |
| 182 | pxor %xmm0,%xmm0 |
| 183 | pxor %xmm1,%xmm1 |
| 184 | pxor %xmm2,%xmm2 |
| 185 | pxor %xmm3,%xmm3 |
| 186 | pxor %xmm4,%xmm4 |
| 187 | pxor %xmm5,%xmm5 |
| 188 | pxor %xmm6,%xmm6 |
| 189 | pxor %xmm7,%xmm7 |
| 190 | pxor %xmm8,%xmm8 |
| 191 | pxor %xmm9,%xmm9 |
| 192 | pxor %xmm10,%xmm10 |
| 193 | pxor %xmm11,%xmm11 |
| 194 | pxor %xmm12,%xmm12 |
| 195 | pxor %xmm13,%xmm13 |
| 196 | pxor %xmm14,%xmm14 |
| 197 | pxor %xmm15,%xmm15 |
| 198 | xorq %rcx,%rcx |
| 199 | xorq %rdx,%rdx |
| 200 | xorq %rsi,%rsi |
| 201 | xorq %rdi,%rdi |
| 202 | xorq %r8,%r8 |
| 203 | xorq %r9,%r9 |
| 204 | xorq %r10,%r10 |
| 205 | xorq %r11,%r11 |
| 206 | leaq 8(%rsp),%rax |
| 207 | ret |
| 208 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
| 209 | ___ |
| 210 | print<<___ if ($win64); |
| 211 | .globl OPENSSL_wipe_cpu |
| 212 | .type OPENSSL_wipe_cpu,\@abi-omnipotent |
| 213 | .align 16 |
| 214 | OPENSSL_wipe_cpu: |
| 215 | pxor %xmm0,%xmm0 |
| 216 | pxor %xmm1,%xmm1 |
| 217 | pxor %xmm2,%xmm2 |
| 218 | pxor %xmm3,%xmm3 |
| 219 | pxor %xmm4,%xmm4 |
| 220 | pxor %xmm5,%xmm5 |
| 221 | xorq %rcx,%rcx |
| 222 | xorq %rdx,%rdx |
| 223 | xorq %r8,%r8 |
| 224 | xorq %r9,%r9 |
| 225 | xorq %r10,%r10 |
| 226 | xorq %r11,%r11 |
| 227 | leaq 8(%rsp),%rax |
| 228 | ret |
| 229 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
| 230 | ___ |
| 231 | |
Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 232 | close STDOUT; # flush |