Alexandre Savard | 1b09e31 | 2012-08-07 20:33:29 -0400 | [diff] [blame] | 1 | #!/usr/local/bin/perl |
| 2 | # x86 assember |
| 3 | |
| 4 | sub mul_add_c |
| 5 | { |
| 6 | local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; |
| 7 | |
| 8 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next |
| 9 | # words, and 1 if load return value |
| 10 | |
| 11 | &comment("mul a[$ai]*b[$bi]"); |
| 12 | |
| 13 | # "eax" and "edx" will always be pre-loaded. |
| 14 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; |
| 15 | # &mov("edx",&DWP($bi*4,$b,"",0)); |
| 16 | |
| 17 | &mul("edx"); |
| 18 | &add($c0,"eax"); |
| 19 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a |
| 20 | &mov("eax",&wparam(0)) if $pos > 0; # load r[] |
| 21 | ### |
| 22 | &adc($c1,"edx"); |
| 23 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b |
| 24 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b |
| 25 | ### |
| 26 | &adc($c2,0); |
| 27 | # is pos > 1, it means it is the last loop |
| 28 | &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; |
| 29 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a |
| 30 | } |
| 31 | |
| 32 | sub sqr_add_c |
| 33 | { |
| 34 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; |
| 35 | |
| 36 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next |
| 37 | # words, and 1 if load return value |
| 38 | |
| 39 | &comment("sqr a[$ai]*a[$bi]"); |
| 40 | |
| 41 | # "eax" and "edx" will always be pre-loaded. |
| 42 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; |
| 43 | # &mov("edx",&DWP($bi*4,$b,"",0)); |
| 44 | |
| 45 | if ($ai == $bi) |
| 46 | { &mul("eax");} |
| 47 | else |
| 48 | { &mul("edx");} |
| 49 | &add($c0,"eax"); |
| 50 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a |
| 51 | ### |
| 52 | &adc($c1,"edx"); |
| 53 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); |
| 54 | ### |
| 55 | &adc($c2,0); |
| 56 | # is pos > 1, it means it is the last loop |
| 57 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; |
| 58 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b |
| 59 | } |
| 60 | |
| 61 | sub sqr_add_c2 |
| 62 | { |
| 63 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; |
| 64 | |
| 65 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next |
| 66 | # words, and 1 if load return value |
| 67 | |
| 68 | &comment("sqr a[$ai]*a[$bi]"); |
| 69 | |
| 70 | # "eax" and "edx" will always be pre-loaded. |
| 71 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; |
| 72 | # &mov("edx",&DWP($bi*4,$a,"",0)); |
| 73 | |
| 74 | if ($ai == $bi) |
| 75 | { &mul("eax");} |
| 76 | else |
| 77 | { &mul("edx");} |
| 78 | &add("eax","eax"); |
| 79 | ### |
| 80 | &adc("edx","edx"); |
| 81 | ### |
| 82 | &adc($c2,0); |
| 83 | &add($c0,"eax"); |
| 84 | &adc($c1,"edx"); |
| 85 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a |
| 86 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b |
| 87 | &adc($c2,0); |
| 88 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; |
| 89 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); |
| 90 | ### |
| 91 | } |
| 92 | |
| 93 | sub bn_mul_comba |
| 94 | { |
| 95 | local($name,$num)=@_; |
| 96 | local($a,$b,$c0,$c1,$c2); |
| 97 | local($i,$as,$ae,$bs,$be,$ai,$bi); |
| 98 | local($tot,$end); |
| 99 | |
| 100 | &function_begin_B($name,""); |
| 101 | |
| 102 | $c0="ebx"; |
| 103 | $c1="ecx"; |
| 104 | $c2="ebp"; |
| 105 | $a="esi"; |
| 106 | $b="edi"; |
| 107 | |
| 108 | $as=0; |
| 109 | $ae=0; |
| 110 | $bs=0; |
| 111 | $be=0; |
| 112 | $tot=$num+$num-1; |
| 113 | |
| 114 | &push("esi"); |
| 115 | &mov($a,&wparam(1)); |
| 116 | &push("edi"); |
| 117 | &mov($b,&wparam(2)); |
| 118 | &push("ebp"); |
| 119 | &push("ebx"); |
| 120 | |
| 121 | &xor($c0,$c0); |
| 122 | &mov("eax",&DWP(0,$a,"",0)); # load the first word |
| 123 | &xor($c1,$c1); |
| 124 | &mov("edx",&DWP(0,$b,"",0)); # load the first second |
| 125 | |
| 126 | for ($i=0; $i<$tot; $i++) |
| 127 | { |
| 128 | $ai=$as; |
| 129 | $bi=$bs; |
| 130 | $end=$be+1; |
| 131 | |
| 132 | &comment("################## Calculate word $i"); |
| 133 | |
| 134 | for ($j=$bs; $j<$end; $j++) |
| 135 | { |
| 136 | &xor($c2,$c2) if ($j == $bs); |
| 137 | if (($j+1) == $end) |
| 138 | { |
| 139 | $v=1; |
| 140 | $v=2 if (($i+1) == $tot); |
| 141 | } |
| 142 | else |
| 143 | { $v=0; } |
| 144 | if (($j+1) != $end) |
| 145 | { |
| 146 | $na=($ai-1); |
| 147 | $nb=($bi+1); |
| 148 | } |
| 149 | else |
| 150 | { |
| 151 | $na=$as+($i < ($num-1)); |
| 152 | $nb=$bs+($i >= ($num-1)); |
| 153 | } |
| 154 | #printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; |
| 155 | &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); |
| 156 | if ($v) |
| 157 | { |
| 158 | &comment("saved r[$i]"); |
| 159 | # &mov("eax",&wparam(0)); |
| 160 | # &mov(&DWP($i*4,"eax","",0),$c0); |
| 161 | ($c0,$c1,$c2)=($c1,$c2,$c0); |
| 162 | } |
| 163 | $ai--; |
| 164 | $bi++; |
| 165 | } |
| 166 | $as++ if ($i < ($num-1)); |
| 167 | $ae++ if ($i >= ($num-1)); |
| 168 | |
| 169 | $bs++ if ($i >= ($num-1)); |
| 170 | $be++ if ($i < ($num-1)); |
| 171 | } |
| 172 | &comment("save r[$i]"); |
| 173 | # &mov("eax",&wparam(0)); |
| 174 | &mov(&DWP($i*4,"eax","",0),$c0); |
| 175 | |
| 176 | &pop("ebx"); |
| 177 | &pop("ebp"); |
| 178 | &pop("edi"); |
| 179 | &pop("esi"); |
| 180 | &ret(); |
| 181 | &function_end_B($name); |
| 182 | } |
| 183 | |
| 184 | sub bn_sqr_comba |
| 185 | { |
| 186 | local($name,$num)=@_; |
| 187 | local($r,$a,$c0,$c1,$c2)=@_; |
| 188 | local($i,$as,$ae,$bs,$be,$ai,$bi); |
| 189 | local($b,$tot,$end,$half); |
| 190 | |
| 191 | &function_begin_B($name,""); |
| 192 | |
| 193 | $c0="ebx"; |
| 194 | $c1="ecx"; |
| 195 | $c2="ebp"; |
| 196 | $a="esi"; |
| 197 | $r="edi"; |
| 198 | |
| 199 | &push("esi"); |
| 200 | &push("edi"); |
| 201 | &push("ebp"); |
| 202 | &push("ebx"); |
| 203 | &mov($r,&wparam(0)); |
| 204 | &mov($a,&wparam(1)); |
| 205 | &xor($c0,$c0); |
| 206 | &xor($c1,$c1); |
| 207 | &mov("eax",&DWP(0,$a,"",0)); # load the first word |
| 208 | |
| 209 | $as=0; |
| 210 | $ae=0; |
| 211 | $bs=0; |
| 212 | $be=0; |
| 213 | $tot=$num+$num-1; |
| 214 | |
| 215 | for ($i=0; $i<$tot; $i++) |
| 216 | { |
| 217 | $ai=$as; |
| 218 | $bi=$bs; |
| 219 | $end=$be+1; |
| 220 | |
| 221 | &comment("############### Calculate word $i"); |
| 222 | for ($j=$bs; $j<$end; $j++) |
| 223 | { |
| 224 | &xor($c2,$c2) if ($j == $bs); |
| 225 | if (($ai-1) < ($bi+1)) |
| 226 | { |
| 227 | $v=1; |
| 228 | $v=2 if ($i+1) == $tot; |
| 229 | } |
| 230 | else |
| 231 | { $v=0; } |
| 232 | if (!$v) |
| 233 | { |
| 234 | $na=$ai-1; |
| 235 | $nb=$bi+1; |
| 236 | } |
| 237 | else |
| 238 | { |
| 239 | $na=$as+($i < ($num-1)); |
| 240 | $nb=$bs+($i >= ($num-1)); |
| 241 | } |
| 242 | if ($ai == $bi) |
| 243 | { |
| 244 | &sqr_add_c($r,$a,$ai,$bi, |
| 245 | $c0,$c1,$c2,$v,$i,$na,$nb); |
| 246 | } |
| 247 | else |
| 248 | { |
| 249 | &sqr_add_c2($r,$a,$ai,$bi, |
| 250 | $c0,$c1,$c2,$v,$i,$na,$nb); |
| 251 | } |
| 252 | if ($v) |
| 253 | { |
| 254 | &comment("saved r[$i]"); |
| 255 | #&mov(&DWP($i*4,$r,"",0),$c0); |
| 256 | ($c0,$c1,$c2)=($c1,$c2,$c0); |
| 257 | last; |
| 258 | } |
| 259 | $ai--; |
| 260 | $bi++; |
| 261 | } |
| 262 | $as++ if ($i < ($num-1)); |
| 263 | $ae++ if ($i >= ($num-1)); |
| 264 | |
| 265 | $bs++ if ($i >= ($num-1)); |
| 266 | $be++ if ($i < ($num-1)); |
| 267 | } |
| 268 | &mov(&DWP($i*4,$r,"",0),$c0); |
| 269 | &pop("ebx"); |
| 270 | &pop("ebp"); |
| 271 | &pop("edi"); |
| 272 | &pop("esi"); |
| 273 | &ret(); |
| 274 | &function_end_B($name); |
| 275 | } |
| 276 | |
| 277 | 1; |