Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 1 | #!/usr/local/bin/perl |
| 2 | |
| 3 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| 4 | push(@INC,"${dir}","${dir}../../perlasm"); |
| 5 | require "x86asm.pl"; |
| 6 | |
| 7 | &asm_init($ARGV[0],$0); |
| 8 | |
| 9 | &bn_mul_comba("bn_mul_comba8",8); |
| 10 | &bn_mul_comba("bn_mul_comba4",4); |
| 11 | &bn_sqr_comba("bn_sqr_comba8",8); |
| 12 | &bn_sqr_comba("bn_sqr_comba4",4); |
| 13 | |
| 14 | &asm_finish(); |
| 15 | |
| 16 | sub mul_add_c |
| 17 | { |
| 18 | local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; |
| 19 | |
| 20 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next |
| 21 | # words, and 1 if load return value |
| 22 | |
| 23 | &comment("mul a[$ai]*b[$bi]"); |
| 24 | |
| 25 | # "eax" and "edx" will always be pre-loaded. |
| 26 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; |
| 27 | # &mov("edx",&DWP($bi*4,$b,"",0)); |
| 28 | |
| 29 | &mul("edx"); |
| 30 | &add($c0,"eax"); |
| 31 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a |
| 32 | &mov("eax",&wparam(0)) if $pos > 0; # load r[] |
| 33 | ### |
| 34 | &adc($c1,"edx"); |
| 35 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b |
| 36 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b |
| 37 | ### |
| 38 | &adc($c2,0); |
| 39 | # is pos > 1, it means it is the last loop |
| 40 | &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; |
| 41 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a |
| 42 | } |
| 43 | |
| 44 | sub sqr_add_c |
| 45 | { |
| 46 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; |
| 47 | |
| 48 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next |
| 49 | # words, and 1 if load return value |
| 50 | |
| 51 | &comment("sqr a[$ai]*a[$bi]"); |
| 52 | |
| 53 | # "eax" and "edx" will always be pre-loaded. |
| 54 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; |
| 55 | # &mov("edx",&DWP($bi*4,$b,"",0)); |
| 56 | |
| 57 | if ($ai == $bi) |
| 58 | { &mul("eax");} |
| 59 | else |
| 60 | { &mul("edx");} |
| 61 | &add($c0,"eax"); |
| 62 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a |
| 63 | ### |
| 64 | &adc($c1,"edx"); |
| 65 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); |
| 66 | ### |
| 67 | &adc($c2,0); |
| 68 | # is pos > 1, it means it is the last loop |
| 69 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; |
| 70 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b |
| 71 | } |
| 72 | |
| 73 | sub sqr_add_c2 |
| 74 | { |
| 75 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; |
| 76 | |
| 77 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next |
| 78 | # words, and 1 if load return value |
| 79 | |
| 80 | &comment("sqr a[$ai]*a[$bi]"); |
| 81 | |
| 82 | # "eax" and "edx" will always be pre-loaded. |
| 83 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; |
| 84 | # &mov("edx",&DWP($bi*4,$a,"",0)); |
| 85 | |
| 86 | if ($ai == $bi) |
| 87 | { &mul("eax");} |
| 88 | else |
| 89 | { &mul("edx");} |
| 90 | &add("eax","eax"); |
| 91 | ### |
| 92 | &adc("edx","edx"); |
| 93 | ### |
| 94 | &adc($c2,0); |
| 95 | &add($c0,"eax"); |
| 96 | &adc($c1,"edx"); |
| 97 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a |
| 98 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b |
| 99 | &adc($c2,0); |
| 100 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; |
| 101 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); |
| 102 | ### |
| 103 | } |
| 104 | |
| 105 | sub bn_mul_comba |
| 106 | { |
| 107 | local($name,$num)=@_; |
| 108 | local($a,$b,$c0,$c1,$c2); |
| 109 | local($i,$as,$ae,$bs,$be,$ai,$bi); |
| 110 | local($tot,$end); |
| 111 | |
| 112 | &function_begin_B($name,""); |
| 113 | |
| 114 | $c0="ebx"; |
| 115 | $c1="ecx"; |
| 116 | $c2="ebp"; |
| 117 | $a="esi"; |
| 118 | $b="edi"; |
| 119 | |
| 120 | $as=0; |
| 121 | $ae=0; |
| 122 | $bs=0; |
| 123 | $be=0; |
| 124 | $tot=$num+$num-1; |
| 125 | |
| 126 | &push("esi"); |
| 127 | &mov($a,&wparam(1)); |
| 128 | &push("edi"); |
| 129 | &mov($b,&wparam(2)); |
| 130 | &push("ebp"); |
| 131 | &push("ebx"); |
| 132 | |
| 133 | &xor($c0,$c0); |
| 134 | &mov("eax",&DWP(0,$a,"",0)); # load the first word |
| 135 | &xor($c1,$c1); |
| 136 | &mov("edx",&DWP(0,$b,"",0)); # load the first second |
| 137 | |
| 138 | for ($i=0; $i<$tot; $i++) |
| 139 | { |
| 140 | $ai=$as; |
| 141 | $bi=$bs; |
| 142 | $end=$be+1; |
| 143 | |
| 144 | &comment("################## Calculate word $i"); |
| 145 | |
| 146 | for ($j=$bs; $j<$end; $j++) |
| 147 | { |
| 148 | &xor($c2,$c2) if ($j == $bs); |
| 149 | if (($j+1) == $end) |
| 150 | { |
| 151 | $v=1; |
| 152 | $v=2 if (($i+1) == $tot); |
| 153 | } |
| 154 | else |
| 155 | { $v=0; } |
| 156 | if (($j+1) != $end) |
| 157 | { |
| 158 | $na=($ai-1); |
| 159 | $nb=($bi+1); |
| 160 | } |
| 161 | else |
| 162 | { |
| 163 | $na=$as+($i < ($num-1)); |
| 164 | $nb=$bs+($i >= ($num-1)); |
| 165 | } |
| 166 | #printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; |
| 167 | &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); |
| 168 | if ($v) |
| 169 | { |
| 170 | &comment("saved r[$i]"); |
| 171 | # &mov("eax",&wparam(0)); |
| 172 | # &mov(&DWP($i*4,"eax","",0),$c0); |
| 173 | ($c0,$c1,$c2)=($c1,$c2,$c0); |
| 174 | } |
| 175 | $ai--; |
| 176 | $bi++; |
| 177 | } |
| 178 | $as++ if ($i < ($num-1)); |
| 179 | $ae++ if ($i >= ($num-1)); |
| 180 | |
| 181 | $bs++ if ($i >= ($num-1)); |
| 182 | $be++ if ($i < ($num-1)); |
| 183 | } |
| 184 | &comment("save r[$i]"); |
| 185 | # &mov("eax",&wparam(0)); |
| 186 | &mov(&DWP($i*4,"eax","",0),$c0); |
| 187 | |
| 188 | &pop("ebx"); |
| 189 | &pop("ebp"); |
| 190 | &pop("edi"); |
| 191 | &pop("esi"); |
| 192 | &ret(); |
| 193 | &function_end_B($name); |
| 194 | } |
| 195 | |
| 196 | sub bn_sqr_comba |
| 197 | { |
| 198 | local($name,$num)=@_; |
| 199 | local($r,$a,$c0,$c1,$c2)=@_; |
| 200 | local($i,$as,$ae,$bs,$be,$ai,$bi); |
| 201 | local($b,$tot,$end,$half); |
| 202 | |
| 203 | &function_begin_B($name,""); |
| 204 | |
| 205 | $c0="ebx"; |
| 206 | $c1="ecx"; |
| 207 | $c2="ebp"; |
| 208 | $a="esi"; |
| 209 | $r="edi"; |
| 210 | |
| 211 | &push("esi"); |
| 212 | &push("edi"); |
| 213 | &push("ebp"); |
| 214 | &push("ebx"); |
| 215 | &mov($r,&wparam(0)); |
| 216 | &mov($a,&wparam(1)); |
| 217 | &xor($c0,$c0); |
| 218 | &xor($c1,$c1); |
| 219 | &mov("eax",&DWP(0,$a,"",0)); # load the first word |
| 220 | |
| 221 | $as=0; |
| 222 | $ae=0; |
| 223 | $bs=0; |
| 224 | $be=0; |
| 225 | $tot=$num+$num-1; |
| 226 | |
| 227 | for ($i=0; $i<$tot; $i++) |
| 228 | { |
| 229 | $ai=$as; |
| 230 | $bi=$bs; |
| 231 | $end=$be+1; |
| 232 | |
| 233 | &comment("############### Calculate word $i"); |
| 234 | for ($j=$bs; $j<$end; $j++) |
| 235 | { |
| 236 | &xor($c2,$c2) if ($j == $bs); |
| 237 | if (($ai-1) < ($bi+1)) |
| 238 | { |
| 239 | $v=1; |
| 240 | $v=2 if ($i+1) == $tot; |
| 241 | } |
| 242 | else |
| 243 | { $v=0; } |
| 244 | if (!$v) |
| 245 | { |
| 246 | $na=$ai-1; |
| 247 | $nb=$bi+1; |
| 248 | } |
| 249 | else |
| 250 | { |
| 251 | $na=$as+($i < ($num-1)); |
| 252 | $nb=$bs+($i >= ($num-1)); |
| 253 | } |
| 254 | if ($ai == $bi) |
| 255 | { |
| 256 | &sqr_add_c($r,$a,$ai,$bi, |
| 257 | $c0,$c1,$c2,$v,$i,$na,$nb); |
| 258 | } |
| 259 | else |
| 260 | { |
| 261 | &sqr_add_c2($r,$a,$ai,$bi, |
| 262 | $c0,$c1,$c2,$v,$i,$na,$nb); |
| 263 | } |
| 264 | if ($v) |
| 265 | { |
| 266 | &comment("saved r[$i]"); |
| 267 | #&mov(&DWP($i*4,$r,"",0),$c0); |
| 268 | ($c0,$c1,$c2)=($c1,$c2,$c0); |
| 269 | last; |
| 270 | } |
| 271 | $ai--; |
| 272 | $bi++; |
| 273 | } |
| 274 | $as++ if ($i < ($num-1)); |
| 275 | $ae++ if ($i >= ($num-1)); |
| 276 | |
| 277 | $bs++ if ($i >= ($num-1)); |
| 278 | $be++ if ($i < ($num-1)); |
| 279 | } |
| 280 | &mov(&DWP($i*4,$r,"",0),$c0); |
| 281 | &pop("ebx"); |
| 282 | &pop("ebp"); |
| 283 | &pop("edi"); |
| 284 | &pop("esi"); |
| 285 | &ret(); |
| 286 | &function_end_B($name); |
| 287 | } |