blob: f82c5e18141ea354c76d4759e51cb08a6f7c9ff1 [file] [log] [blame]
Alexandre Savard1b09e312012-08-07 20:33:29 -04001#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
Alexandre Savard75410672012-08-08 09:50:01 -040010# Needs more work: key setup, page boundaries, CBC routine...
Alexandre Savard1b09e312012-08-07 20:33:29 -040011#
12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13# 128-bit key, which is ~40% better than 64-bit code generated by gcc
14# 4.0. But these are not the ones currently used! Their "compact"
15# counterparts are, for security reason. ppc_AES_encrypt_compact runs
16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17# at 1/3 of ppc_AES_decrypt.
18
19# February 2010
20#
Alexandre Savard75410672012-08-08 09:50:01 -040021# Rescheduling instructions to favour Power6 pipeline gives 10%
Alexandre Savard1b09e312012-08-07 20:33:29 -040022# performance improvement on the platfrom in question (and marginal
23# improvement even on others). It should be noted that Power6 fails
24# to process byte in 18 cycles, only in 23, because it fails to issue
25# 4 load instructions in two cycles, only in 3. As result non-compact
26# block subroutines are 25% slower than one would expect. Compact
27# functions scale better, because they have pure computational part,
28# which scales perfectly with clock frequency. To be specific
29# ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30# ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
32$flavour = shift;
33
34if ($flavour =~ /64/) {
35 $SIZE_T =8;
Alexandre Savard1b09e312012-08-07 20:33:29 -040036 $STU ="stdu";
37 $POP ="ld";
38 $PUSH ="std";
39} elsif ($flavour =~ /32/) {
40 $SIZE_T =4;
Alexandre Savard1b09e312012-08-07 20:33:29 -040041 $STU ="stwu";
42 $POP ="lwz";
43 $PUSH ="stw";
44} else { die "nonsense $flavour"; }
45
46$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
47( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
48( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
49die "can't locate ppc-xlate.pl";
50
51open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
52
53$FRAME=32*$SIZE_T;
54
55sub _data_word()
56{ my $i;
57 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
58}
59
60$sp="r1";
61$toc="r2";
62$inp="r3";
63$out="r4";
64$key="r5";
65
66$Tbl0="r3";
67$Tbl1="r6";
68$Tbl2="r7";
69$Tbl3="r2";
70
71$s0="r8";
72$s1="r9";
73$s2="r10";
74$s3="r11";
75
76$t0="r12";
77$t1="r13";
78$t2="r14";
79$t3="r15";
80
81$acc00="r16";
82$acc01="r17";
83$acc02="r18";
84$acc03="r19";
85
86$acc04="r20";
87$acc05="r21";
88$acc06="r22";
89$acc07="r23";
90
91$acc08="r24";
92$acc09="r25";
93$acc10="r26";
94$acc11="r27";
95
96$acc12="r28";
97$acc13="r29";
98$acc14="r30";
99$acc15="r31";
100
101# stay away from TLS pointer
102if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
103else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
104$mask80=$Tbl2;
105$mask1b=$Tbl3;
106
107$code.=<<___;
108.machine "any"
109.text
110
111.align 7
112LAES_Te:
113 mflr r0
114 bcl 20,31,\$+4
115 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
116 addi $Tbl0,$Tbl0,`128-8`
117 mtlr r0
118 blr
Alexandre Savard75410672012-08-08 09:50:01 -0400119 .space `32-24`
Alexandre Savard1b09e312012-08-07 20:33:29 -0400120LAES_Td:
121 mflr r0
122 bcl 20,31,\$+4
123 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
Alexandre Savard75410672012-08-08 09:50:01 -0400124 addi $Tbl0,$Tbl0,`128-8-32+2048+256`
Alexandre Savard1b09e312012-08-07 20:33:29 -0400125 mtlr r0
126 blr
Alexandre Savard75410672012-08-08 09:50:01 -0400127 .space `128-32-24`
Alexandre Savard1b09e312012-08-07 20:33:29 -0400128___
129&_data_word(
130 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
131 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
132 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
133 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
134 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
135 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
136 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
137 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
138 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
139 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
140 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
141 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
142 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
143 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
144 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
145 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
146 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
147 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
148 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
149 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
150 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
151 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
152 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
153 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
154 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
155 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
156 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
157 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
158 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
159 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
160 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
161 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
162 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
163 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
164 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
165 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
166 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
167 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
168 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
169 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
170 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
171 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
172 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
173 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
174 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
175 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
176 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
177 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
178 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
179 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
180 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
181 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
182 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
183 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
184 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
185 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
186 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
187 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
188 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
189 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
190 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
191 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
192 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
193 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
194$code.=<<___;
195.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
196.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
197.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
198.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
199.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
200.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
201.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
202.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
203.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
204.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
205.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
206.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
207.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
208.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
209.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
210.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
211.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
212.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
213.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
214.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
215.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
216.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
217.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
218.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
219.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
220.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
221.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
222.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
223.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
224.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
225.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
226.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
227___
228&_data_word(
229 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
230 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
231 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
232 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
233 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
234 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
235 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
236 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
237 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
238 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
239 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
240 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
241 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
242 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
243 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
244 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
245 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
246 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
247 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
248 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
249 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
250 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
251 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
252 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
253 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
254 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
255 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
256 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
257 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
258 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
259 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
260 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
261 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
262 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
263 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
264 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
265 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
266 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
267 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
268 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
269 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
270 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
271 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
272 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
273 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
274 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
275 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
276 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
277 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
278 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
279 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
280 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
281 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
282 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
283 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
284 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
285 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
286 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
287 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
288 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
289 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
290 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
291 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
292 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
293$code.=<<___;
294.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
295.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
296.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
297.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
298.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
299.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
300.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
301.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
302.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
303.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
304.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
305.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
306.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
307.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
308.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
309.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
310.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
311.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
312.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
313.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
314.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
315.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
316.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
317.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
318.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
319.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
320.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
321.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
322.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
323.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
324.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
325.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
326
327
328.globl .AES_encrypt
329.align 7
330.AES_encrypt:
Alexandre Savard1b09e312012-08-07 20:33:29 -0400331 mflr r0
Alexandre Savard75410672012-08-08 09:50:01 -0400332 $STU $sp,-$FRAME($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400333
Alexandre Savard75410672012-08-08 09:50:01 -0400334 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400335 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
336 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
337 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
338 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
339 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
340 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
341 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
342 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
343 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
344 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
345 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
346 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
347 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
348 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
349 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
350 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
351 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
352 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
353 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
354 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400355
Alexandre Savard1b09e312012-08-07 20:33:29 -0400356 lwz $s0,0($inp)
357 lwz $s1,4($inp)
358 lwz $s2,8($inp)
359 lwz $s3,12($inp)
360 bl LAES_Te
361 bl Lppc_AES_encrypt_compact
362 stw $s0,0($out)
363 stw $s1,4($out)
364 stw $s2,8($out)
365 stw $s3,12($out)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400366
Alexandre Savard75410672012-08-08 09:50:01 -0400367 $POP r0,`$FRAME-$SIZE_T*21`($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400368 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
369 $POP r13,`$FRAME-$SIZE_T*19`($sp)
370 $POP r14,`$FRAME-$SIZE_T*18`($sp)
371 $POP r15,`$FRAME-$SIZE_T*17`($sp)
372 $POP r16,`$FRAME-$SIZE_T*16`($sp)
373 $POP r17,`$FRAME-$SIZE_T*15`($sp)
374 $POP r18,`$FRAME-$SIZE_T*14`($sp)
375 $POP r19,`$FRAME-$SIZE_T*13`($sp)
376 $POP r20,`$FRAME-$SIZE_T*12`($sp)
377 $POP r21,`$FRAME-$SIZE_T*11`($sp)
378 $POP r22,`$FRAME-$SIZE_T*10`($sp)
379 $POP r23,`$FRAME-$SIZE_T*9`($sp)
380 $POP r24,`$FRAME-$SIZE_T*8`($sp)
381 $POP r25,`$FRAME-$SIZE_T*7`($sp)
382 $POP r26,`$FRAME-$SIZE_T*6`($sp)
383 $POP r27,`$FRAME-$SIZE_T*5`($sp)
384 $POP r28,`$FRAME-$SIZE_T*4`($sp)
385 $POP r29,`$FRAME-$SIZE_T*3`($sp)
386 $POP r30,`$FRAME-$SIZE_T*2`($sp)
387 $POP r31,`$FRAME-$SIZE_T*1`($sp)
388 mtlr r0
389 addi $sp,$sp,$FRAME
390 blr
Alexandre Savard1b09e312012-08-07 20:33:29 -0400391
392.align 5
393Lppc_AES_encrypt:
394 lwz $acc00,240($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400395 lwz $t0,0($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400396 lwz $t1,4($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400397 lwz $t2,8($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400398 lwz $t3,12($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400399 addi $Tbl1,$Tbl0,3
400 addi $Tbl2,$Tbl0,2
401 addi $Tbl3,$Tbl0,1
402 addi $acc00,$acc00,-1
Alexandre Savard1b09e312012-08-07 20:33:29 -0400403 addi $key,$key,16
404 xor $s0,$s0,$t0
405 xor $s1,$s1,$t1
406 xor $s2,$s2,$t2
407 xor $s3,$s3,$t3
408 mtctr $acc00
409.align 4
410Lenc_loop:
411 rlwinm $acc00,$s0,`32-24+3`,21,28
412 rlwinm $acc01,$s1,`32-24+3`,21,28
413 rlwinm $acc02,$s2,`32-24+3`,21,28
414 rlwinm $acc03,$s3,`32-24+3`,21,28
415 lwz $t0,0($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400416 lwz $t1,4($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400417 rlwinm $acc04,$s1,`32-16+3`,21,28
Alexandre Savard1b09e312012-08-07 20:33:29 -0400418 rlwinm $acc05,$s2,`32-16+3`,21,28
419 lwz $t2,8($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400420 lwz $t3,12($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400421 rlwinm $acc06,$s3,`32-16+3`,21,28
Alexandre Savard1b09e312012-08-07 20:33:29 -0400422 rlwinm $acc07,$s0,`32-16+3`,21,28
423 lwzx $acc00,$Tbl0,$acc00
Alexandre Savard1b09e312012-08-07 20:33:29 -0400424 lwzx $acc01,$Tbl0,$acc01
Alexandre Savard75410672012-08-08 09:50:01 -0400425 rlwinm $acc08,$s2,`32-8+3`,21,28
Alexandre Savard1b09e312012-08-07 20:33:29 -0400426 rlwinm $acc09,$s3,`32-8+3`,21,28
427 lwzx $acc02,$Tbl0,$acc02
Alexandre Savard1b09e312012-08-07 20:33:29 -0400428 lwzx $acc03,$Tbl0,$acc03
Alexandre Savard75410672012-08-08 09:50:01 -0400429 rlwinm $acc10,$s0,`32-8+3`,21,28
Alexandre Savard1b09e312012-08-07 20:33:29 -0400430 rlwinm $acc11,$s1,`32-8+3`,21,28
431 lwzx $acc04,$Tbl1,$acc04
Alexandre Savard1b09e312012-08-07 20:33:29 -0400432 lwzx $acc05,$Tbl1,$acc05
Alexandre Savard75410672012-08-08 09:50:01 -0400433 rlwinm $acc12,$s3,`0+3`,21,28
Alexandre Savard1b09e312012-08-07 20:33:29 -0400434 rlwinm $acc13,$s0,`0+3`,21,28
435 lwzx $acc06,$Tbl1,$acc06
Alexandre Savard1b09e312012-08-07 20:33:29 -0400436 lwzx $acc07,$Tbl1,$acc07
Alexandre Savard75410672012-08-08 09:50:01 -0400437 rlwinm $acc14,$s1,`0+3`,21,28
Alexandre Savard1b09e312012-08-07 20:33:29 -0400438 rlwinm $acc15,$s2,`0+3`,21,28
439 lwzx $acc08,$Tbl2,$acc08
Alexandre Savard1b09e312012-08-07 20:33:29 -0400440 lwzx $acc09,$Tbl2,$acc09
Alexandre Savard75410672012-08-08 09:50:01 -0400441 xor $t0,$t0,$acc00
Alexandre Savard1b09e312012-08-07 20:33:29 -0400442 xor $t1,$t1,$acc01
443 lwzx $acc10,$Tbl2,$acc10
Alexandre Savard1b09e312012-08-07 20:33:29 -0400444 lwzx $acc11,$Tbl2,$acc11
Alexandre Savard75410672012-08-08 09:50:01 -0400445 xor $t2,$t2,$acc02
Alexandre Savard1b09e312012-08-07 20:33:29 -0400446 xor $t3,$t3,$acc03
447 lwzx $acc12,$Tbl3,$acc12
Alexandre Savard1b09e312012-08-07 20:33:29 -0400448 lwzx $acc13,$Tbl3,$acc13
Alexandre Savard75410672012-08-08 09:50:01 -0400449 xor $t0,$t0,$acc04
Alexandre Savard1b09e312012-08-07 20:33:29 -0400450 xor $t1,$t1,$acc05
451 lwzx $acc14,$Tbl3,$acc14
Alexandre Savard1b09e312012-08-07 20:33:29 -0400452 lwzx $acc15,$Tbl3,$acc15
Alexandre Savard75410672012-08-08 09:50:01 -0400453 xor $t2,$t2,$acc06
Alexandre Savard1b09e312012-08-07 20:33:29 -0400454 xor $t3,$t3,$acc07
455 xor $t0,$t0,$acc08
456 xor $t1,$t1,$acc09
457 xor $t2,$t2,$acc10
458 xor $t3,$t3,$acc11
459 xor $s0,$t0,$acc12
460 xor $s1,$t1,$acc13
461 xor $s2,$t2,$acc14
462 xor $s3,$t3,$acc15
463 addi $key,$key,16
464 bdnz- Lenc_loop
465
466 addi $Tbl2,$Tbl0,2048
467 nop
468 lwz $t0,0($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400469 lwz $t1,4($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400470 rlwinm $acc00,$s0,`32-24`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400471 rlwinm $acc01,$s1,`32-24`,24,31
472 lwz $t2,8($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400473 lwz $t3,12($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400474 rlwinm $acc02,$s2,`32-24`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400475 rlwinm $acc03,$s3,`32-24`,24,31
476 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
Alexandre Savard1b09e312012-08-07 20:33:29 -0400477 lwz $acc09,`2048+32`($Tbl0)
Alexandre Savard75410672012-08-08 09:50:01 -0400478 rlwinm $acc04,$s1,`32-16`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400479 rlwinm $acc05,$s2,`32-16`,24,31
480 lwz $acc10,`2048+64`($Tbl0)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400481 lwz $acc11,`2048+96`($Tbl0)
Alexandre Savard75410672012-08-08 09:50:01 -0400482 rlwinm $acc06,$s3,`32-16`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400483 rlwinm $acc07,$s0,`32-16`,24,31
484 lwz $acc12,`2048+128`($Tbl0)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400485 lwz $acc13,`2048+160`($Tbl0)
Alexandre Savard75410672012-08-08 09:50:01 -0400486 rlwinm $acc08,$s2,`32-8`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400487 rlwinm $acc09,$s3,`32-8`,24,31
488 lwz $acc14,`2048+192`($Tbl0)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400489 lwz $acc15,`2048+224`($Tbl0)
Alexandre Savard75410672012-08-08 09:50:01 -0400490 rlwinm $acc10,$s0,`32-8`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400491 rlwinm $acc11,$s1,`32-8`,24,31
492 lbzx $acc00,$Tbl2,$acc00
Alexandre Savard1b09e312012-08-07 20:33:29 -0400493 lbzx $acc01,$Tbl2,$acc01
Alexandre Savard75410672012-08-08 09:50:01 -0400494 rlwinm $acc12,$s3,`0`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400495 rlwinm $acc13,$s0,`0`,24,31
496 lbzx $acc02,$Tbl2,$acc02
Alexandre Savard1b09e312012-08-07 20:33:29 -0400497 lbzx $acc03,$Tbl2,$acc03
Alexandre Savard75410672012-08-08 09:50:01 -0400498 rlwinm $acc14,$s1,`0`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400499 rlwinm $acc15,$s2,`0`,24,31
500 lbzx $acc04,$Tbl2,$acc04
Alexandre Savard1b09e312012-08-07 20:33:29 -0400501 lbzx $acc05,$Tbl2,$acc05
Alexandre Savard75410672012-08-08 09:50:01 -0400502 rlwinm $s0,$acc00,24,0,7
Alexandre Savard1b09e312012-08-07 20:33:29 -0400503 rlwinm $s1,$acc01,24,0,7
504 lbzx $acc06,$Tbl2,$acc06
Alexandre Savard1b09e312012-08-07 20:33:29 -0400505 lbzx $acc07,$Tbl2,$acc07
Alexandre Savard75410672012-08-08 09:50:01 -0400506 rlwinm $s2,$acc02,24,0,7
Alexandre Savard1b09e312012-08-07 20:33:29 -0400507 rlwinm $s3,$acc03,24,0,7
508 lbzx $acc08,$Tbl2,$acc08
Alexandre Savard1b09e312012-08-07 20:33:29 -0400509 lbzx $acc09,$Tbl2,$acc09
Alexandre Savard75410672012-08-08 09:50:01 -0400510 rlwimi $s0,$acc04,16,8,15
Alexandre Savard1b09e312012-08-07 20:33:29 -0400511 rlwimi $s1,$acc05,16,8,15
512 lbzx $acc10,$Tbl2,$acc10
Alexandre Savard1b09e312012-08-07 20:33:29 -0400513 lbzx $acc11,$Tbl2,$acc11
Alexandre Savard75410672012-08-08 09:50:01 -0400514 rlwimi $s2,$acc06,16,8,15
Alexandre Savard1b09e312012-08-07 20:33:29 -0400515 rlwimi $s3,$acc07,16,8,15
516 lbzx $acc12,$Tbl2,$acc12
Alexandre Savard1b09e312012-08-07 20:33:29 -0400517 lbzx $acc13,$Tbl2,$acc13
Alexandre Savard75410672012-08-08 09:50:01 -0400518 rlwimi $s0,$acc08,8,16,23
Alexandre Savard1b09e312012-08-07 20:33:29 -0400519 rlwimi $s1,$acc09,8,16,23
520 lbzx $acc14,$Tbl2,$acc14
Alexandre Savard1b09e312012-08-07 20:33:29 -0400521 lbzx $acc15,$Tbl2,$acc15
Alexandre Savard75410672012-08-08 09:50:01 -0400522 rlwimi $s2,$acc10,8,16,23
Alexandre Savard1b09e312012-08-07 20:33:29 -0400523 rlwimi $s3,$acc11,8,16,23
524 or $s0,$s0,$acc12
525 or $s1,$s1,$acc13
526 or $s2,$s2,$acc14
527 or $s3,$s3,$acc15
528 xor $s0,$s0,$t0
529 xor $s1,$s1,$t1
530 xor $s2,$s2,$t2
531 xor $s3,$s3,$t3
532 blr
Alexandre Savard1b09e312012-08-07 20:33:29 -0400533
534.align 4
535Lppc_AES_encrypt_compact:
536 lwz $acc00,240($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400537 lwz $t0,0($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400538 lwz $t1,4($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400539 lwz $t2,8($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400540 lwz $t3,12($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400541 addi $Tbl1,$Tbl0,2048
542 lis $mask80,0x8080
543 lis $mask1b,0x1b1b
Alexandre Savard1b09e312012-08-07 20:33:29 -0400544 addi $key,$key,16
Alexandre Savard75410672012-08-08 09:50:01 -0400545 ori $mask80,$mask80,0x8080
546 ori $mask1b,$mask1b,0x1b1b
Alexandre Savard1b09e312012-08-07 20:33:29 -0400547 mtctr $acc00
548.align 4
549Lenc_compact_loop:
550 xor $s0,$s0,$t0
551 xor $s1,$s1,$t1
Alexandre Savard1b09e312012-08-07 20:33:29 -0400552 xor $s2,$s2,$t2
Alexandre Savard1b09e312012-08-07 20:33:29 -0400553 xor $s3,$s3,$t3
Alexandre Savard75410672012-08-08 09:50:01 -0400554 rlwinm $acc00,$s0,`32-24`,24,31
555 rlwinm $acc01,$s1,`32-24`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400556 rlwinm $acc02,$s2,`32-24`,24,31
557 rlwinm $acc03,$s3,`32-24`,24,31
558 rlwinm $acc04,$s1,`32-16`,24,31
559 rlwinm $acc05,$s2,`32-16`,24,31
560 rlwinm $acc06,$s3,`32-16`,24,31
561 rlwinm $acc07,$s0,`32-16`,24,31
562 lbzx $acc00,$Tbl1,$acc00
Alexandre Savard1b09e312012-08-07 20:33:29 -0400563 lbzx $acc01,$Tbl1,$acc01
Alexandre Savard75410672012-08-08 09:50:01 -0400564 rlwinm $acc08,$s2,`32-8`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400565 rlwinm $acc09,$s3,`32-8`,24,31
566 lbzx $acc02,$Tbl1,$acc02
Alexandre Savard1b09e312012-08-07 20:33:29 -0400567 lbzx $acc03,$Tbl1,$acc03
Alexandre Savard75410672012-08-08 09:50:01 -0400568 rlwinm $acc10,$s0,`32-8`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400569 rlwinm $acc11,$s1,`32-8`,24,31
570 lbzx $acc04,$Tbl1,$acc04
Alexandre Savard1b09e312012-08-07 20:33:29 -0400571 lbzx $acc05,$Tbl1,$acc05
Alexandre Savard75410672012-08-08 09:50:01 -0400572 rlwinm $acc12,$s3,`0`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400573 rlwinm $acc13,$s0,`0`,24,31
574 lbzx $acc06,$Tbl1,$acc06
Alexandre Savard1b09e312012-08-07 20:33:29 -0400575 lbzx $acc07,$Tbl1,$acc07
Alexandre Savard75410672012-08-08 09:50:01 -0400576 rlwinm $acc14,$s1,`0`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400577 rlwinm $acc15,$s2,`0`,24,31
578 lbzx $acc08,$Tbl1,$acc08
Alexandre Savard1b09e312012-08-07 20:33:29 -0400579 lbzx $acc09,$Tbl1,$acc09
Alexandre Savard75410672012-08-08 09:50:01 -0400580 rlwinm $s0,$acc00,24,0,7
Alexandre Savard1b09e312012-08-07 20:33:29 -0400581 rlwinm $s1,$acc01,24,0,7
582 lbzx $acc10,$Tbl1,$acc10
Alexandre Savard1b09e312012-08-07 20:33:29 -0400583 lbzx $acc11,$Tbl1,$acc11
Alexandre Savard75410672012-08-08 09:50:01 -0400584 rlwinm $s2,$acc02,24,0,7
Alexandre Savard1b09e312012-08-07 20:33:29 -0400585 rlwinm $s3,$acc03,24,0,7
586 lbzx $acc12,$Tbl1,$acc12
Alexandre Savard1b09e312012-08-07 20:33:29 -0400587 lbzx $acc13,$Tbl1,$acc13
Alexandre Savard75410672012-08-08 09:50:01 -0400588 rlwimi $s0,$acc04,16,8,15
Alexandre Savard1b09e312012-08-07 20:33:29 -0400589 rlwimi $s1,$acc05,16,8,15
590 lbzx $acc14,$Tbl1,$acc14
Alexandre Savard1b09e312012-08-07 20:33:29 -0400591 lbzx $acc15,$Tbl1,$acc15
Alexandre Savard75410672012-08-08 09:50:01 -0400592 rlwimi $s2,$acc06,16,8,15
Alexandre Savard1b09e312012-08-07 20:33:29 -0400593 rlwimi $s3,$acc07,16,8,15
594 rlwimi $s0,$acc08,8,16,23
595 rlwimi $s1,$acc09,8,16,23
596 rlwimi $s2,$acc10,8,16,23
597 rlwimi $s3,$acc11,8,16,23
598 lwz $t0,0($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400599 lwz $t1,4($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400600 or $s0,$s0,$acc12
Alexandre Savard1b09e312012-08-07 20:33:29 -0400601 or $s1,$s1,$acc13
602 lwz $t2,8($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400603 lwz $t3,12($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400604 or $s2,$s2,$acc14
Alexandre Savard1b09e312012-08-07 20:33:29 -0400605 or $s3,$s3,$acc15
606
607 addi $key,$key,16
608 bdz Lenc_compact_done
609
610 and $acc00,$s0,$mask80 # r1=r0&0x80808080
611 and $acc01,$s1,$mask80
612 and $acc02,$s2,$mask80
613 and $acc03,$s3,$mask80
614 srwi $acc04,$acc00,7 # r1>>7
Alexandre Savard1b09e312012-08-07 20:33:29 -0400615 srwi $acc05,$acc01,7
Alexandre Savard1b09e312012-08-07 20:33:29 -0400616 srwi $acc06,$acc02,7
Alexandre Savard1b09e312012-08-07 20:33:29 -0400617 srwi $acc07,$acc03,7
Alexandre Savard75410672012-08-08 09:50:01 -0400618 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
619 andc $acc09,$s1,$mask80
620 andc $acc10,$s2,$mask80
Alexandre Savard1b09e312012-08-07 20:33:29 -0400621 andc $acc11,$s3,$mask80
622 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
623 sub $acc01,$acc01,$acc05
624 sub $acc02,$acc02,$acc06
625 sub $acc03,$acc03,$acc07
626 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
627 add $acc09,$acc09,$acc09
628 add $acc10,$acc10,$acc10
629 add $acc11,$acc11,$acc11
630 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
631 and $acc01,$acc01,$mask1b
632 and $acc02,$acc02,$mask1b
633 and $acc03,$acc03,$mask1b
634 xor $acc00,$acc00,$acc08 # r2
635 xor $acc01,$acc01,$acc09
Alexandre Savard1b09e312012-08-07 20:33:29 -0400636 xor $acc02,$acc02,$acc10
Alexandre Savard1b09e312012-08-07 20:33:29 -0400637 xor $acc03,$acc03,$acc11
Alexandre Savard1b09e312012-08-07 20:33:29 -0400638
Alexandre Savard75410672012-08-08 09:50:01 -0400639 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
640 rotlwi $acc13,$s1,16
641 rotlwi $acc14,$s2,16
Alexandre Savard1b09e312012-08-07 20:33:29 -0400642 rotlwi $acc15,$s3,16
Alexandre Savard75410672012-08-08 09:50:01 -0400643 xor $s0,$s0,$acc00 # r0^r2
Alexandre Savard1b09e312012-08-07 20:33:29 -0400644 xor $s1,$s1,$acc01
Alexandre Savard1b09e312012-08-07 20:33:29 -0400645 xor $s2,$s2,$acc02
Alexandre Savard1b09e312012-08-07 20:33:29 -0400646 xor $s3,$s3,$acc03
Alexandre Savard75410672012-08-08 09:50:01 -0400647 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
648 rotrwi $s1,$s1,24
Alexandre Savard1b09e312012-08-07 20:33:29 -0400649 rotrwi $s2,$s2,24
Alexandre Savard1b09e312012-08-07 20:33:29 -0400650 rotrwi $s3,$s3,24
Alexandre Savard75410672012-08-08 09:50:01 -0400651 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
Alexandre Savard1b09e312012-08-07 20:33:29 -0400652 xor $s1,$s1,$acc01
653 xor $s2,$s2,$acc02
654 xor $s3,$s3,$acc03
655 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400656 rotlwi $acc09,$acc13,8
Alexandre Savard1b09e312012-08-07 20:33:29 -0400657 rotlwi $acc10,$acc14,8
Alexandre Savard1b09e312012-08-07 20:33:29 -0400658 rotlwi $acc11,$acc15,8
Alexandre Savard75410672012-08-08 09:50:01 -0400659 xor $s0,$s0,$acc12 #
660 xor $s1,$s1,$acc13
661 xor $s2,$s2,$acc14
Alexandre Savard1b09e312012-08-07 20:33:29 -0400662 xor $s3,$s3,$acc15
663 xor $s0,$s0,$acc08 #
664 xor $s1,$s1,$acc09
665 xor $s2,$s2,$acc10
666 xor $s3,$s3,$acc11
667
668 b Lenc_compact_loop
669.align 4
670Lenc_compact_done:
671 xor $s0,$s0,$t0
672 xor $s1,$s1,$t1
673 xor $s2,$s2,$t2
674 xor $s3,$s3,$t3
675 blr
Alexandre Savard1b09e312012-08-07 20:33:29 -0400676
677.globl .AES_decrypt
678.align 7
679.AES_decrypt:
Alexandre Savard1b09e312012-08-07 20:33:29 -0400680 mflr r0
Alexandre Savard75410672012-08-08 09:50:01 -0400681 $STU $sp,-$FRAME($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400682
Alexandre Savard75410672012-08-08 09:50:01 -0400683 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400684 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
685 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
686 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
687 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
688 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
689 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
690 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
691 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
692 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
693 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
694 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
695 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
696 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
697 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
698 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
699 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
700 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
701 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
702 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
703 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400704
Alexandre Savard1b09e312012-08-07 20:33:29 -0400705 lwz $s0,0($inp)
706 lwz $s1,4($inp)
707 lwz $s2,8($inp)
708 lwz $s3,12($inp)
709 bl LAES_Td
710 bl Lppc_AES_decrypt_compact
711 stw $s0,0($out)
712 stw $s1,4($out)
713 stw $s2,8($out)
714 stw $s3,12($out)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400715
Alexandre Savard75410672012-08-08 09:50:01 -0400716 $POP r0,`$FRAME-$SIZE_T*21`($sp)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400717 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
718 $POP r13,`$FRAME-$SIZE_T*19`($sp)
719 $POP r14,`$FRAME-$SIZE_T*18`($sp)
720 $POP r15,`$FRAME-$SIZE_T*17`($sp)
721 $POP r16,`$FRAME-$SIZE_T*16`($sp)
722 $POP r17,`$FRAME-$SIZE_T*15`($sp)
723 $POP r18,`$FRAME-$SIZE_T*14`($sp)
724 $POP r19,`$FRAME-$SIZE_T*13`($sp)
725 $POP r20,`$FRAME-$SIZE_T*12`($sp)
726 $POP r21,`$FRAME-$SIZE_T*11`($sp)
727 $POP r22,`$FRAME-$SIZE_T*10`($sp)
728 $POP r23,`$FRAME-$SIZE_T*9`($sp)
729 $POP r24,`$FRAME-$SIZE_T*8`($sp)
730 $POP r25,`$FRAME-$SIZE_T*7`($sp)
731 $POP r26,`$FRAME-$SIZE_T*6`($sp)
732 $POP r27,`$FRAME-$SIZE_T*5`($sp)
733 $POP r28,`$FRAME-$SIZE_T*4`($sp)
734 $POP r29,`$FRAME-$SIZE_T*3`($sp)
735 $POP r30,`$FRAME-$SIZE_T*2`($sp)
736 $POP r31,`$FRAME-$SIZE_T*1`($sp)
737 mtlr r0
738 addi $sp,$sp,$FRAME
739 blr
Alexandre Savard1b09e312012-08-07 20:33:29 -0400740
741.align 5
742Lppc_AES_decrypt:
743 lwz $acc00,240($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400744 lwz $t0,0($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400745 lwz $t1,4($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400746 lwz $t2,8($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400747 lwz $t3,12($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400748 addi $Tbl1,$Tbl0,3
749 addi $Tbl2,$Tbl0,2
750 addi $Tbl3,$Tbl0,1
751 addi $acc00,$acc00,-1
Alexandre Savard1b09e312012-08-07 20:33:29 -0400752 addi $key,$key,16
753 xor $s0,$s0,$t0
754 xor $s1,$s1,$t1
755 xor $s2,$s2,$t2
756 xor $s3,$s3,$t3
757 mtctr $acc00
758.align 4
759Ldec_loop:
760 rlwinm $acc00,$s0,`32-24+3`,21,28
761 rlwinm $acc01,$s1,`32-24+3`,21,28
762 rlwinm $acc02,$s2,`32-24+3`,21,28
763 rlwinm $acc03,$s3,`32-24+3`,21,28
764 lwz $t0,0($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400765 lwz $t1,4($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400766 rlwinm $acc04,$s3,`32-16+3`,21,28
Alexandre Savard1b09e312012-08-07 20:33:29 -0400767 rlwinm $acc05,$s0,`32-16+3`,21,28
768 lwz $t2,8($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400769 lwz $t3,12($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400770 rlwinm $acc06,$s1,`32-16+3`,21,28
Alexandre Savard1b09e312012-08-07 20:33:29 -0400771 rlwinm $acc07,$s2,`32-16+3`,21,28
772 lwzx $acc00,$Tbl0,$acc00
Alexandre Savard1b09e312012-08-07 20:33:29 -0400773 lwzx $acc01,$Tbl0,$acc01
Alexandre Savard75410672012-08-08 09:50:01 -0400774 rlwinm $acc08,$s2,`32-8+3`,21,28
Alexandre Savard1b09e312012-08-07 20:33:29 -0400775 rlwinm $acc09,$s3,`32-8+3`,21,28
776 lwzx $acc02,$Tbl0,$acc02
Alexandre Savard1b09e312012-08-07 20:33:29 -0400777 lwzx $acc03,$Tbl0,$acc03
Alexandre Savard75410672012-08-08 09:50:01 -0400778 rlwinm $acc10,$s0,`32-8+3`,21,28
Alexandre Savard1b09e312012-08-07 20:33:29 -0400779 rlwinm $acc11,$s1,`32-8+3`,21,28
780 lwzx $acc04,$Tbl1,$acc04
Alexandre Savard1b09e312012-08-07 20:33:29 -0400781 lwzx $acc05,$Tbl1,$acc05
Alexandre Savard75410672012-08-08 09:50:01 -0400782 rlwinm $acc12,$s1,`0+3`,21,28
Alexandre Savard1b09e312012-08-07 20:33:29 -0400783 rlwinm $acc13,$s2,`0+3`,21,28
784 lwzx $acc06,$Tbl1,$acc06
Alexandre Savard1b09e312012-08-07 20:33:29 -0400785 lwzx $acc07,$Tbl1,$acc07
Alexandre Savard75410672012-08-08 09:50:01 -0400786 rlwinm $acc14,$s3,`0+3`,21,28
Alexandre Savard1b09e312012-08-07 20:33:29 -0400787 rlwinm $acc15,$s0,`0+3`,21,28
788 lwzx $acc08,$Tbl2,$acc08
Alexandre Savard1b09e312012-08-07 20:33:29 -0400789 lwzx $acc09,$Tbl2,$acc09
Alexandre Savard75410672012-08-08 09:50:01 -0400790 xor $t0,$t0,$acc00
Alexandre Savard1b09e312012-08-07 20:33:29 -0400791 xor $t1,$t1,$acc01
792 lwzx $acc10,$Tbl2,$acc10
Alexandre Savard1b09e312012-08-07 20:33:29 -0400793 lwzx $acc11,$Tbl2,$acc11
Alexandre Savard75410672012-08-08 09:50:01 -0400794 xor $t2,$t2,$acc02
Alexandre Savard1b09e312012-08-07 20:33:29 -0400795 xor $t3,$t3,$acc03
796 lwzx $acc12,$Tbl3,$acc12
Alexandre Savard1b09e312012-08-07 20:33:29 -0400797 lwzx $acc13,$Tbl3,$acc13
Alexandre Savard75410672012-08-08 09:50:01 -0400798 xor $t0,$t0,$acc04
Alexandre Savard1b09e312012-08-07 20:33:29 -0400799 xor $t1,$t1,$acc05
800 lwzx $acc14,$Tbl3,$acc14
Alexandre Savard1b09e312012-08-07 20:33:29 -0400801 lwzx $acc15,$Tbl3,$acc15
Alexandre Savard75410672012-08-08 09:50:01 -0400802 xor $t2,$t2,$acc06
Alexandre Savard1b09e312012-08-07 20:33:29 -0400803 xor $t3,$t3,$acc07
804 xor $t0,$t0,$acc08
805 xor $t1,$t1,$acc09
806 xor $t2,$t2,$acc10
807 xor $t3,$t3,$acc11
808 xor $s0,$t0,$acc12
809 xor $s1,$t1,$acc13
810 xor $s2,$t2,$acc14
811 xor $s3,$t3,$acc15
812 addi $key,$key,16
813 bdnz- Ldec_loop
814
815 addi $Tbl2,$Tbl0,2048
816 nop
817 lwz $t0,0($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400818 lwz $t1,4($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400819 rlwinm $acc00,$s0,`32-24`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400820 rlwinm $acc01,$s1,`32-24`,24,31
821 lwz $t2,8($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400822 lwz $t3,12($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400823 rlwinm $acc02,$s2,`32-24`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400824 rlwinm $acc03,$s3,`32-24`,24,31
825 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
Alexandre Savard1b09e312012-08-07 20:33:29 -0400826 lwz $acc09,`2048+32`($Tbl0)
Alexandre Savard75410672012-08-08 09:50:01 -0400827 rlwinm $acc04,$s3,`32-16`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400828 rlwinm $acc05,$s0,`32-16`,24,31
829 lwz $acc10,`2048+64`($Tbl0)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400830 lwz $acc11,`2048+96`($Tbl0)
Alexandre Savard75410672012-08-08 09:50:01 -0400831 lbzx $acc00,$Tbl2,$acc00
Alexandre Savard1b09e312012-08-07 20:33:29 -0400832 lbzx $acc01,$Tbl2,$acc01
833 lwz $acc12,`2048+128`($Tbl0)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400834 lwz $acc13,`2048+160`($Tbl0)
Alexandre Savard75410672012-08-08 09:50:01 -0400835 rlwinm $acc06,$s1,`32-16`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400836 rlwinm $acc07,$s2,`32-16`,24,31
837 lwz $acc14,`2048+192`($Tbl0)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400838 lwz $acc15,`2048+224`($Tbl0)
Alexandre Savard75410672012-08-08 09:50:01 -0400839 rlwinm $acc08,$s2,`32-8`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400840 rlwinm $acc09,$s3,`32-8`,24,31
841 lbzx $acc02,$Tbl2,$acc02
Alexandre Savard1b09e312012-08-07 20:33:29 -0400842 lbzx $acc03,$Tbl2,$acc03
Alexandre Savard75410672012-08-08 09:50:01 -0400843 rlwinm $acc10,$s0,`32-8`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400844 rlwinm $acc11,$s1,`32-8`,24,31
845 lbzx $acc04,$Tbl2,$acc04
Alexandre Savard1b09e312012-08-07 20:33:29 -0400846 lbzx $acc05,$Tbl2,$acc05
Alexandre Savard75410672012-08-08 09:50:01 -0400847 rlwinm $acc12,$s1,`0`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400848 rlwinm $acc13,$s2,`0`,24,31
849 lbzx $acc06,$Tbl2,$acc06
Alexandre Savard1b09e312012-08-07 20:33:29 -0400850 lbzx $acc07,$Tbl2,$acc07
Alexandre Savard75410672012-08-08 09:50:01 -0400851 rlwinm $acc14,$s3,`0`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400852 rlwinm $acc15,$s0,`0`,24,31
853 lbzx $acc08,$Tbl2,$acc08
Alexandre Savard1b09e312012-08-07 20:33:29 -0400854 lbzx $acc09,$Tbl2,$acc09
Alexandre Savard75410672012-08-08 09:50:01 -0400855 rlwinm $s0,$acc00,24,0,7
Alexandre Savard1b09e312012-08-07 20:33:29 -0400856 rlwinm $s1,$acc01,24,0,7
857 lbzx $acc10,$Tbl2,$acc10
Alexandre Savard1b09e312012-08-07 20:33:29 -0400858 lbzx $acc11,$Tbl2,$acc11
Alexandre Savard75410672012-08-08 09:50:01 -0400859 rlwinm $s2,$acc02,24,0,7
Alexandre Savard1b09e312012-08-07 20:33:29 -0400860 rlwinm $s3,$acc03,24,0,7
861 lbzx $acc12,$Tbl2,$acc12
Alexandre Savard1b09e312012-08-07 20:33:29 -0400862 lbzx $acc13,$Tbl2,$acc13
Alexandre Savard75410672012-08-08 09:50:01 -0400863 rlwimi $s0,$acc04,16,8,15
Alexandre Savard1b09e312012-08-07 20:33:29 -0400864 rlwimi $s1,$acc05,16,8,15
865 lbzx $acc14,$Tbl2,$acc14
Alexandre Savard1b09e312012-08-07 20:33:29 -0400866 lbzx $acc15,$Tbl2,$acc15
Alexandre Savard75410672012-08-08 09:50:01 -0400867 rlwimi $s2,$acc06,16,8,15
Alexandre Savard1b09e312012-08-07 20:33:29 -0400868 rlwimi $s3,$acc07,16,8,15
869 rlwimi $s0,$acc08,8,16,23
870 rlwimi $s1,$acc09,8,16,23
871 rlwimi $s2,$acc10,8,16,23
872 rlwimi $s3,$acc11,8,16,23
873 or $s0,$s0,$acc12
874 or $s1,$s1,$acc13
875 or $s2,$s2,$acc14
876 or $s3,$s3,$acc15
877 xor $s0,$s0,$t0
878 xor $s1,$s1,$t1
879 xor $s2,$s2,$t2
880 xor $s3,$s3,$t3
881 blr
Alexandre Savard1b09e312012-08-07 20:33:29 -0400882
883.align 4
884Lppc_AES_decrypt_compact:
885 lwz $acc00,240($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400886 lwz $t0,0($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400887 lwz $t1,4($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400888 lwz $t2,8($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400889 lwz $t3,12($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400890 addi $Tbl1,$Tbl0,2048
891 lis $mask80,0x8080
892 lis $mask1b,0x1b1b
Alexandre Savard1b09e312012-08-07 20:33:29 -0400893 addi $key,$key,16
Alexandre Savard75410672012-08-08 09:50:01 -0400894 ori $mask80,$mask80,0x8080
895 ori $mask1b,$mask1b,0x1b1b
Alexandre Savard1b09e312012-08-07 20:33:29 -0400896___
897$code.=<<___ if ($SIZE_T==8);
898 insrdi $mask80,$mask80,32,0
899 insrdi $mask1b,$mask1b,32,0
900___
901$code.=<<___;
902 mtctr $acc00
903.align 4
904Ldec_compact_loop:
905 xor $s0,$s0,$t0
906 xor $s1,$s1,$t1
Alexandre Savard1b09e312012-08-07 20:33:29 -0400907 xor $s2,$s2,$t2
Alexandre Savard1b09e312012-08-07 20:33:29 -0400908 xor $s3,$s3,$t3
Alexandre Savard75410672012-08-08 09:50:01 -0400909 rlwinm $acc00,$s0,`32-24`,24,31
910 rlwinm $acc01,$s1,`32-24`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400911 rlwinm $acc02,$s2,`32-24`,24,31
912 rlwinm $acc03,$s3,`32-24`,24,31
913 rlwinm $acc04,$s3,`32-16`,24,31
914 rlwinm $acc05,$s0,`32-16`,24,31
915 rlwinm $acc06,$s1,`32-16`,24,31
916 rlwinm $acc07,$s2,`32-16`,24,31
917 lbzx $acc00,$Tbl1,$acc00
Alexandre Savard1b09e312012-08-07 20:33:29 -0400918 lbzx $acc01,$Tbl1,$acc01
Alexandre Savard75410672012-08-08 09:50:01 -0400919 rlwinm $acc08,$s2,`32-8`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400920 rlwinm $acc09,$s3,`32-8`,24,31
921 lbzx $acc02,$Tbl1,$acc02
Alexandre Savard1b09e312012-08-07 20:33:29 -0400922 lbzx $acc03,$Tbl1,$acc03
Alexandre Savard75410672012-08-08 09:50:01 -0400923 rlwinm $acc10,$s0,`32-8`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400924 rlwinm $acc11,$s1,`32-8`,24,31
925 lbzx $acc04,$Tbl1,$acc04
Alexandre Savard1b09e312012-08-07 20:33:29 -0400926 lbzx $acc05,$Tbl1,$acc05
Alexandre Savard75410672012-08-08 09:50:01 -0400927 rlwinm $acc12,$s1,`0`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400928 rlwinm $acc13,$s2,`0`,24,31
929 lbzx $acc06,$Tbl1,$acc06
Alexandre Savard1b09e312012-08-07 20:33:29 -0400930 lbzx $acc07,$Tbl1,$acc07
Alexandre Savard75410672012-08-08 09:50:01 -0400931 rlwinm $acc14,$s3,`0`,24,31
Alexandre Savard1b09e312012-08-07 20:33:29 -0400932 rlwinm $acc15,$s0,`0`,24,31
933 lbzx $acc08,$Tbl1,$acc08
Alexandre Savard1b09e312012-08-07 20:33:29 -0400934 lbzx $acc09,$Tbl1,$acc09
Alexandre Savard75410672012-08-08 09:50:01 -0400935 rlwinm $s0,$acc00,24,0,7
Alexandre Savard1b09e312012-08-07 20:33:29 -0400936 rlwinm $s1,$acc01,24,0,7
937 lbzx $acc10,$Tbl1,$acc10
Alexandre Savard1b09e312012-08-07 20:33:29 -0400938 lbzx $acc11,$Tbl1,$acc11
Alexandre Savard75410672012-08-08 09:50:01 -0400939 rlwinm $s2,$acc02,24,0,7
Alexandre Savard1b09e312012-08-07 20:33:29 -0400940 rlwinm $s3,$acc03,24,0,7
941 lbzx $acc12,$Tbl1,$acc12
Alexandre Savard1b09e312012-08-07 20:33:29 -0400942 lbzx $acc13,$Tbl1,$acc13
Alexandre Savard75410672012-08-08 09:50:01 -0400943 rlwimi $s0,$acc04,16,8,15
Alexandre Savard1b09e312012-08-07 20:33:29 -0400944 rlwimi $s1,$acc05,16,8,15
945 lbzx $acc14,$Tbl1,$acc14
Alexandre Savard1b09e312012-08-07 20:33:29 -0400946 lbzx $acc15,$Tbl1,$acc15
Alexandre Savard75410672012-08-08 09:50:01 -0400947 rlwimi $s2,$acc06,16,8,15
Alexandre Savard1b09e312012-08-07 20:33:29 -0400948 rlwimi $s3,$acc07,16,8,15
949 rlwimi $s0,$acc08,8,16,23
950 rlwimi $s1,$acc09,8,16,23
951 rlwimi $s2,$acc10,8,16,23
952 rlwimi $s3,$acc11,8,16,23
953 lwz $t0,0($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400954 lwz $t1,4($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400955 or $s0,$s0,$acc12
Alexandre Savard1b09e312012-08-07 20:33:29 -0400956 or $s1,$s1,$acc13
957 lwz $t2,8($key)
Alexandre Savard1b09e312012-08-07 20:33:29 -0400958 lwz $t3,12($key)
Alexandre Savard75410672012-08-08 09:50:01 -0400959 or $s2,$s2,$acc14
Alexandre Savard1b09e312012-08-07 20:33:29 -0400960 or $s3,$s3,$acc15
961
962 addi $key,$key,16
963 bdz Ldec_compact_done
964___
965$code.=<<___ if ($SIZE_T==8);
966 # vectorized permutation improves decrypt performance by 10%
967 insrdi $s0,$s1,32,0
968 insrdi $s2,$s3,32,0
969
970 and $acc00,$s0,$mask80 # r1=r0&0x80808080
971 and $acc02,$s2,$mask80
972 srdi $acc04,$acc00,7 # r1>>7
973 srdi $acc06,$acc02,7
974 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
975 andc $acc10,$s2,$mask80
976 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
977 sub $acc02,$acc02,$acc06
978 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
979 add $acc10,$acc10,$acc10
980 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
981 and $acc02,$acc02,$mask1b
982 xor $acc00,$acc00,$acc08 # r2
983 xor $acc02,$acc02,$acc10
984
985 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
986 and $acc06,$acc02,$mask80
987 srdi $acc08,$acc04,7 # r1>>7
988 srdi $acc10,$acc06,7
989 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
990 andc $acc14,$acc02,$mask80
991 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
992 sub $acc06,$acc06,$acc10
993 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
994 add $acc14,$acc14,$acc14
995 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
996 and $acc06,$acc06,$mask1b
997 xor $acc04,$acc04,$acc12 # r4
998 xor $acc06,$acc06,$acc14
999
1000 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1001 and $acc10,$acc06,$mask80
1002 srdi $acc12,$acc08,7 # r1>>7
1003 srdi $acc14,$acc10,7
1004 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1005 sub $acc10,$acc10,$acc14
1006 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1007 andc $acc14,$acc06,$mask80
1008 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1009 add $acc14,$acc14,$acc14
1010 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1011 and $acc10,$acc10,$mask1b
1012 xor $acc08,$acc08,$acc12 # r8
1013 xor $acc10,$acc10,$acc14
1014
1015 xor $acc00,$acc00,$s0 # r2^r0
1016 xor $acc02,$acc02,$s2
1017 xor $acc04,$acc04,$s0 # r4^r0
1018 xor $acc06,$acc06,$s2
1019
1020 extrdi $acc01,$acc00,32,0
1021 extrdi $acc03,$acc02,32,0
1022 extrdi $acc05,$acc04,32,0
1023 extrdi $acc07,$acc06,32,0
1024 extrdi $acc09,$acc08,32,0
1025 extrdi $acc11,$acc10,32,0
1026___
1027$code.=<<___ if ($SIZE_T==4);
1028 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1029 and $acc01,$s1,$mask80
1030 and $acc02,$s2,$mask80
1031 and $acc03,$s3,$mask80
1032 srwi $acc04,$acc00,7 # r1>>7
Alexandre Savard1b09e312012-08-07 20:33:29 -04001033 srwi $acc05,$acc01,7
Alexandre Savard1b09e312012-08-07 20:33:29 -04001034 srwi $acc06,$acc02,7
Alexandre Savard1b09e312012-08-07 20:33:29 -04001035 srwi $acc07,$acc03,7
Alexandre Savard75410672012-08-08 09:50:01 -04001036 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1037 andc $acc09,$s1,$mask80
1038 andc $acc10,$s2,$mask80
Alexandre Savard1b09e312012-08-07 20:33:29 -04001039 andc $acc11,$s3,$mask80
1040 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1041 sub $acc01,$acc01,$acc05
1042 sub $acc02,$acc02,$acc06
1043 sub $acc03,$acc03,$acc07
1044 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1045 add $acc09,$acc09,$acc09
1046 add $acc10,$acc10,$acc10
1047 add $acc11,$acc11,$acc11
1048 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1049 and $acc01,$acc01,$mask1b
1050 and $acc02,$acc02,$mask1b
1051 and $acc03,$acc03,$mask1b
1052 xor $acc00,$acc00,$acc08 # r2
1053 xor $acc01,$acc01,$acc09
1054 xor $acc02,$acc02,$acc10
1055 xor $acc03,$acc03,$acc11
1056
1057 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1058 and $acc05,$acc01,$mask80
1059 and $acc06,$acc02,$mask80
1060 and $acc07,$acc03,$mask80
1061 srwi $acc08,$acc04,7 # r1>>7
Alexandre Savard1b09e312012-08-07 20:33:29 -04001062 srwi $acc09,$acc05,7
Alexandre Savard1b09e312012-08-07 20:33:29 -04001063 srwi $acc10,$acc06,7
Alexandre Savard1b09e312012-08-07 20:33:29 -04001064 srwi $acc11,$acc07,7
Alexandre Savard75410672012-08-08 09:50:01 -04001065 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1066 andc $acc13,$acc01,$mask80
1067 andc $acc14,$acc02,$mask80
Alexandre Savard1b09e312012-08-07 20:33:29 -04001068 andc $acc15,$acc03,$mask80
1069 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1070 sub $acc05,$acc05,$acc09
1071 sub $acc06,$acc06,$acc10
1072 sub $acc07,$acc07,$acc11
1073 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1074 add $acc13,$acc13,$acc13
1075 add $acc14,$acc14,$acc14
1076 add $acc15,$acc15,$acc15
1077 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1078 and $acc05,$acc05,$mask1b
1079 and $acc06,$acc06,$mask1b
1080 and $acc07,$acc07,$mask1b
1081 xor $acc04,$acc04,$acc12 # r4
1082 xor $acc05,$acc05,$acc13
1083 xor $acc06,$acc06,$acc14
1084 xor $acc07,$acc07,$acc15
1085
1086 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1087 and $acc09,$acc05,$mask80
Alexandre Savard1b09e312012-08-07 20:33:29 -04001088 and $acc10,$acc06,$mask80
Alexandre Savard1b09e312012-08-07 20:33:29 -04001089 and $acc11,$acc07,$mask80
Alexandre Savard75410672012-08-08 09:50:01 -04001090 srwi $acc12,$acc08,7 # r1>>7
1091 srwi $acc13,$acc09,7
Alexandre Savard1b09e312012-08-07 20:33:29 -04001092 srwi $acc14,$acc10,7
Alexandre Savard1b09e312012-08-07 20:33:29 -04001093 srwi $acc15,$acc11,7
Alexandre Savard75410672012-08-08 09:50:01 -04001094 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
Alexandre Savard1b09e312012-08-07 20:33:29 -04001095 sub $acc09,$acc09,$acc13
1096 sub $acc10,$acc10,$acc14
1097 sub $acc11,$acc11,$acc15
1098 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1099 andc $acc13,$acc05,$mask80
1100 andc $acc14,$acc06,$mask80
1101 andc $acc15,$acc07,$mask80
1102 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1103 add $acc13,$acc13,$acc13
1104 add $acc14,$acc14,$acc14
1105 add $acc15,$acc15,$acc15
1106 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1107 and $acc09,$acc09,$mask1b
1108 and $acc10,$acc10,$mask1b
1109 and $acc11,$acc11,$mask1b
1110 xor $acc08,$acc08,$acc12 # r8
1111 xor $acc09,$acc09,$acc13
1112 xor $acc10,$acc10,$acc14
1113 xor $acc11,$acc11,$acc15
1114
1115 xor $acc00,$acc00,$s0 # r2^r0
1116 xor $acc01,$acc01,$s1
1117 xor $acc02,$acc02,$s2
1118 xor $acc03,$acc03,$s3
1119 xor $acc04,$acc04,$s0 # r4^r0
1120 xor $acc05,$acc05,$s1
1121 xor $acc06,$acc06,$s2
1122 xor $acc07,$acc07,$s3
1123___
1124$code.=<<___;
1125 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1126 rotrwi $s1,$s1,8
Alexandre Savard1b09e312012-08-07 20:33:29 -04001127 rotrwi $s2,$s2,8
Alexandre Savard1b09e312012-08-07 20:33:29 -04001128 rotrwi $s3,$s3,8
Alexandre Savard75410672012-08-08 09:50:01 -04001129 xor $s0,$s0,$acc00 # ^= r2^r0
1130 xor $s1,$s1,$acc01
Alexandre Savard1b09e312012-08-07 20:33:29 -04001131 xor $s2,$s2,$acc02
1132 xor $s3,$s3,$acc03
1133 xor $acc00,$acc00,$acc08
1134 xor $acc01,$acc01,$acc09
1135 xor $acc02,$acc02,$acc10
1136 xor $acc03,$acc03,$acc11
1137 xor $s0,$s0,$acc04 # ^= r4^r0
Alexandre Savard1b09e312012-08-07 20:33:29 -04001138 xor $s1,$s1,$acc05
Alexandre Savard1b09e312012-08-07 20:33:29 -04001139 xor $s2,$s2,$acc06
Alexandre Savard1b09e312012-08-07 20:33:29 -04001140 xor $s3,$s3,$acc07
Alexandre Savard75410672012-08-08 09:50:01 -04001141 rotrwi $acc00,$acc00,24
1142 rotrwi $acc01,$acc01,24
1143 rotrwi $acc02,$acc02,24
Alexandre Savard1b09e312012-08-07 20:33:29 -04001144 rotrwi $acc03,$acc03,24
1145 xor $acc04,$acc04,$acc08
1146 xor $acc05,$acc05,$acc09
1147 xor $acc06,$acc06,$acc10
1148 xor $acc07,$acc07,$acc11
1149 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
Alexandre Savard1b09e312012-08-07 20:33:29 -04001150 xor $s1,$s1,$acc09
Alexandre Savard1b09e312012-08-07 20:33:29 -04001151 xor $s2,$s2,$acc10
Alexandre Savard1b09e312012-08-07 20:33:29 -04001152 xor $s3,$s3,$acc11
Alexandre Savard75410672012-08-08 09:50:01 -04001153 rotrwi $acc04,$acc04,16
1154 rotrwi $acc05,$acc05,16
1155 rotrwi $acc06,$acc06,16
Alexandre Savard1b09e312012-08-07 20:33:29 -04001156 rotrwi $acc07,$acc07,16
1157 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
Alexandre Savard1b09e312012-08-07 20:33:29 -04001158 xor $s1,$s1,$acc01
Alexandre Savard1b09e312012-08-07 20:33:29 -04001159 xor $s2,$s2,$acc02
Alexandre Savard1b09e312012-08-07 20:33:29 -04001160 xor $s3,$s3,$acc03
Alexandre Savard75410672012-08-08 09:50:01 -04001161 rotrwi $acc08,$acc08,8
1162 rotrwi $acc09,$acc09,8
1163 rotrwi $acc10,$acc10,8
Alexandre Savard1b09e312012-08-07 20:33:29 -04001164 rotrwi $acc11,$acc11,8
1165 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1166 xor $s1,$s1,$acc05
1167 xor $s2,$s2,$acc06
1168 xor $s3,$s3,$acc07
1169 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1170 xor $s1,$s1,$acc09
1171 xor $s2,$s2,$acc10
1172 xor $s3,$s3,$acc11
1173
1174 b Ldec_compact_loop
1175.align 4
1176Ldec_compact_done:
1177 xor $s0,$s0,$t0
1178 xor $s1,$s1,$t1
1179 xor $s2,$s2,$t2
1180 xor $s3,$s3,$t3
1181 blr
Alexandre Savard75410672012-08-08 09:50:01 -04001182.long 0
Alexandre Savard1b09e312012-08-07 20:33:29 -04001183.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1184.align 7
1185___
1186
1187$code =~ s/\`([^\`]*)\`/eval $1/gem;
1188print $code;
1189close STDOUT;