Blame - jni/openssl/crypto/rc4/asm/rc4-x86_64.pl - jami-client-android

blob: 677be5fe25badfe44a315855a827c65d71543e72 [file] [log] [blame]

Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	1	#!/usr/bin/env perl
				2	#
				3	# ====================================================================
				4	# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
				5	# project. The module is, however, dual licensed under OpenSSL and
				6	# CRYPTOGAMS licenses depending on where you obtain it. For further
				7	# details see http://www.openssl.org/~appro/cryptogams/.
				8	# ====================================================================
				9	#
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	10	# 2.22x RC4 tune-up:-) It should be noted though that my hand [as in
				11	# "hand-coded assembler"] doesn't stand for the whole improvement
				12	# coefficient. It turned out that eliminating RC4_CHAR from config
				13	# line results in ~40% improvement (yes, even for C implementation).
				14	# Presumably it has everything to do with AMD cache architecture and
				15	# RAW or whatever penalties. Once again! The module requires config
				16	# line without RC4_CHAR! As for coding "secret," I bet on partial
				17	# register arithmetics. For example instead of 'inc %r8; and $255,%r8'
				18	# I simply 'inc %r8b'. Even though optimization manual discourages
				19	# to operate on partial registers, it turned out to be the best bet.
				20	# At least for AMD... How IA32E would perform remains to be seen...
				21
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	22	# As was shown by Marc Bevand reordering of couple of load operations
				23	# results in even higher performance gain of 3.3x:-) At least on
				24	# Opteron... For reference, 1x in this case is RC4_CHAR C-code
				25	# compiled with gcc 3.3.2, which performs at ~54MBps per 1GHz clock.
				26	# Latter means that if you want to estimate what to expect from
				27	# your Opteron, then multiply 54 by 3.3 and clock frequency in GHz.
				28
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	29	# Intel P4 EM64T core was found to run the AMD64 code really slow...
				30	# The only way to achieve comparable performance on P4 was to keep
				31	# RC4_CHAR. Kind of ironic, huh? As it's apparently impossible to
				32	# compose blended code, which would perform even within 30% marginal
				33	# on either AMD and Intel platforms, I implement both cases. See
				34	# rc4_skey.c for further details...
				35
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	36	# P4 EM64T core appears to be "allergic" to 64-bit inc/dec. Replacing
				37	# those with add/sub results in 50% performance improvement of folded
				38	# loop...
				39
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	40	# As was shown by Zou Nanhai loop unrolling can improve Intel EM64T
				41	# performance by >30% [unlike P4 32-bit case that is]. But this is
				42	# provided that loads are reordered even more aggressively! Both code
				43	# pathes, AMD64 and EM64T, reorder loads in essentially same manner
				44	# as my IA-64 implementation. On Opteron this resulted in modest 5%
				45	# improvement [I had to test it], while final Intel P4 performance
				46	# achieves respectful 432MBps on 2.8GHz processor now. For reference.
				47	# If executed on Xeon, current RC4_CHAR code-path is 2.7x faster than
				48	# RC4_INT code-path. While if executed on Opteron, it's only 25%
				49	# slower than the RC4_INT one [meaning that if CPU µ-arch detection
				50	# is not implemented, then this final RC4_CHAR code-path should be
				51	# preferred, as it provides better all-round performance].
				52
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	53	# Intel Core2 was observed to perform poorly on both code paths:-( It
				54	# apparently suffers from some kind of partial register stall, which
				55	# occurs in 64-bit mode only [as virtually identical 32-bit loop was
				56	# observed to outperform 64-bit one by almost 50%]. Adding two movzb to
				57	# cloop1 boosts its performance by 80%! This loop appears to be optimal
				58	# fit for Core2 and therefore the code was modified to skip cloop8 on
				59	# this CPU.
				60
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	61	$flavour = shift;
				62	$output = shift;
				63	if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
				64
				65	$win64=0; $win64=1 if ($flavour =~ /[nm]asm\|mingw64/ \|\| $output =~ /\.asm$/);
				66
				67	$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
				68	( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
				69	( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
				70	die "can't locate x86_64-xlate.pl";
				71
				72	open STDOUT,"\| $^X $xlate $flavour $output";
				73
				74	$dat="%rdi"; # arg1
				75	$len="%rsi"; # arg2
				76	$inp="%rdx"; # arg3
				77	$out="%rcx"; # arg4
				78
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	79	@XX=("%r8","%r10");
				80	@TX=("%r9","%r11");
				81	$YY="%r12";
				82	$TY="%r13";
				83
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	84	$code=<<___;
				85	.text
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	86
				87	.globl RC4
				88	.type RC4,\@function,4
				89	.align 16
				90	RC4: or $len,$len
				91	jne .Lentry
				92	ret
				93	.Lentry:
				94	push %rbx
				95	push %r12
				96	push %r13
				97	.Lprologue:
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	98
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	99	add \$8,$dat
				100	movl -8($dat),$XX[0]#d
				101	movl -4($dat),$YY#d
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	102	cmpl \$-1,256($dat)
				103	je .LRC4_CHAR
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	104	inc $XX[0]#b
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	105	movl ($dat,$XX[0],4),$TX[0]#d
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	106	test \$-8,$len
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	107	jz .Lloop1
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	108	jmp .Lloop8
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	109	.align 16
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	110	.Lloop8:
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	111	___
				112	for ($i=0;$i<8;$i++) {
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	113	$code.=<<___;
				114	add $TX[0]#b,$YY#b
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	115	mov $XX[0],$XX[1]
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	116	movl ($dat,$YY,4),$TY#d
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	117	ror \$8,%rax # ror is redundant when $i=0
				118	inc $XX[1]#b
				119	movl ($dat,$XX[1],4),$TX[1]#d
				120	cmp $XX[1],$YY
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	121	movl $TX[0]#d,($dat,$YY,4)
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	122	cmove $TX[0],$TX[1]
				123	movl $TY#d,($dat,$XX[0],4)
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	124	add $TX[0]#b,$TY#b
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	125	movb ($dat,$TY,4),%al
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	126	___
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	127	push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	128	}
				129	$code.=<<___;
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	130	ror \$8,%rax
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	131	sub \$8,$len
				132
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	133	xor ($inp),%rax
				134	add \$8,$inp
				135	mov %rax,($out)
				136	add \$8,$out
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	137
				138	test \$-8,$len
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	139	jnz .Lloop8
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	140	cmp \$0,$len
				141	jne .Lloop1
				142	jmp .Lexit
				143
				144	.align 16
				145	.Lloop1:
				146	add $TX[0]#b,$YY#b
				147	movl ($dat,$YY,4),$TY#d
				148	movl $TX[0]#d,($dat,$YY,4)
				149	movl $TY#d,($dat,$XX[0],4)
				150	add $TY#b,$TX[0]#b
				151	inc $XX[0]#b
				152	movl ($dat,$TX[0],4),$TY#d
				153	movl ($dat,$XX[0],4),$TX[0]#d
				154	xorb ($inp),$TY#b
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	155	inc $inp
				156	movb $TY#b,($out)
				157	inc $out
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	158	dec $len
				159	jnz .Lloop1
				160	jmp .Lexit
				161
				162	.align 16
				163	.LRC4_CHAR:
				164	add \$1,$XX[0]#b
				165	movzb ($dat,$XX[0]),$TX[0]#d
				166	test \$-8,$len
				167	jz .Lcloop1
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	168	cmpl \$0,260($dat)
				169	jnz .Lcloop1
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	170	jmp .Lcloop8
				171	.align 16
				172	.Lcloop8:
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	173	mov ($inp),%eax
				174	mov 4($inp),%ebx
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	175	___
				176	# unroll 2x4-wise, because 64-bit rotates kill Intel P4...
				177	for ($i=0;$i<4;$i++) {
				178	$code.=<<___;
				179	add $TX[0]#b,$YY#b
				180	lea 1($XX[0]),$XX[1]
				181	movzb ($dat,$YY),$TY#d
				182	movzb $XX[1]#b,$XX[1]#d
				183	movzb ($dat,$XX[1]),$TX[1]#d
				184	movb $TX[0]#b,($dat,$YY)
				185	cmp $XX[1],$YY
				186	movb $TY#b,($dat,$XX[0])
				187	jne .Lcmov$i # Intel cmov is sloooow...
				188	mov $TX[0],$TX[1]
				189	.Lcmov$i:
				190	add $TX[0]#b,$TY#b
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	191	xor ($dat,$TY),%al
				192	ror \$8,%eax
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	193	___
				194	push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
				195	}
				196	for ($i=4;$i<8;$i++) {
				197	$code.=<<___;
				198	add $TX[0]#b,$YY#b
				199	lea 1($XX[0]),$XX[1]
				200	movzb ($dat,$YY),$TY#d
				201	movzb $XX[1]#b,$XX[1]#d
				202	movzb ($dat,$XX[1]),$TX[1]#d
				203	movb $TX[0]#b,($dat,$YY)
				204	cmp $XX[1],$YY
				205	movb $TY#b,($dat,$XX[0])
				206	jne .Lcmov$i # Intel cmov is sloooow...
				207	mov $TX[0],$TX[1]
				208	.Lcmov$i:
				209	add $TX[0]#b,$TY#b
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	210	xor ($dat,$TY),%bl
				211	ror \$8,%ebx
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	212	___
				213	push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
				214	}
				215	$code.=<<___;
				216	lea -8($len),$len
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	217	mov %eax,($out)
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	218	lea 8($inp),$inp
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	219	mov %ebx,4($out)
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	220	lea 8($out),$out
				221
				222	test \$-8,$len
				223	jnz .Lcloop8
				224	cmp \$0,$len
				225	jne .Lcloop1
				226	jmp .Lexit
				227	___
				228	$code.=<<___;
				229	.align 16
				230	.Lcloop1:
				231	add $TX[0]#b,$YY#b
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	232	movzb ($dat,$YY),$TY#d
				233	movb $TX[0]#b,($dat,$YY)
				234	movb $TY#b,($dat,$XX[0])
				235	add $TX[0]#b,$TY#b
				236	add \$1,$XX[0]#b
				237	movzb $TY#b,$TY#d
				238	movzb $XX[0]#b,$XX[0]#d
				239	movzb ($dat,$TY),$TY#d
				240	movzb ($dat,$XX[0]),$TX[0]#d
				241	xorb ($inp),$TY#b
				242	lea 1($inp),$inp
				243	movb $TY#b,($out)
				244	lea 1($out),$out
				245	sub \$1,$len
				246	jnz .Lcloop1
				247	jmp .Lexit
				248
				249	.align 16
				250	.Lexit:
				251	sub \$1,$XX[0]#b
				252	movl $XX[0]#d,-8($dat)
				253	movl $YY#d,-4($dat)
				254
				255	mov (%rsp),%r13
				256	mov 8(%rsp),%r12
				257	mov 16(%rsp),%rbx
				258	add \$24,%rsp
				259	.Lepilogue:
				260	ret
				261	.size RC4,.-RC4
				262	___
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	263
				264	$idx="%r8";
				265	$ido="%r9";
				266
				267	$code.=<<___;
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	268	.extern OPENSSL_ia32cap_P
				269	.globl RC4_set_key
				270	.type RC4_set_key,\@function,3
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	271	.align 16
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	272	RC4_set_key:
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	273	lea 8($dat),$dat
				274	lea ($inp,$len),$inp
				275	neg $len
				276	mov $len,%rcx
				277	xor %eax,%eax
				278	xor $ido,$ido
				279	xor %r10,%r10
				280	xor %r11,%r11
				281
				282	mov OPENSSL_ia32cap_P(%rip),$idx#d
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	283	bt \$20,$idx#d
				284	jnc .Lw1stloop
				285	bt \$30,$idx#d
				286	setc $ido#b
				287	mov $ido#d,260($dat)
				288	jmp .Lc1stloop
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	289
				290	.align 16
				291	.Lw1stloop:
				292	mov %eax,($dat,%rax,4)
				293	add \$1,%al
				294	jnc .Lw1stloop
				295
				296	xor $ido,$ido
				297	xor $idx,$idx
				298	.align 16
				299	.Lw2ndloop:
				300	mov ($dat,$ido,4),%r10d
				301	add ($inp,$len,1),$idx#b
				302	add %r10b,$idx#b
				303	add \$1,$len
				304	mov ($dat,$idx,4),%r11d
				305	cmovz %rcx,$len
				306	mov %r10d,($dat,$idx,4)
				307	mov %r11d,($dat,$ido,4)
				308	add \$1,$ido#b
				309	jnc .Lw2ndloop
				310	jmp .Lexit_key
				311
				312	.align 16
				313	.Lc1stloop:
				314	mov %al,($dat,%rax)
				315	add \$1,%al
				316	jnc .Lc1stloop
				317
				318	xor $ido,$ido
				319	xor $idx,$idx
				320	.align 16
				321	.Lc2ndloop:
				322	mov ($dat,$ido),%r10b
				323	add ($inp,$len),$idx#b
				324	add %r10b,$idx#b
				325	add \$1,$len
				326	mov ($dat,$idx),%r11b
				327	jnz .Lcnowrap
				328	mov %rcx,$len
				329	.Lcnowrap:
				330	mov %r10b,($dat,$idx)
				331	mov %r11b,($dat,$ido)
				332	add \$1,$ido#b
				333	jnc .Lc2ndloop
				334	movl \$-1,256($dat)
				335
				336	.align 16
				337	.Lexit_key:
				338	xor %eax,%eax
				339	mov %eax,-8($dat)
				340	mov %eax,-4($dat)
				341	ret
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	342	.size RC4_set_key,.-RC4_set_key
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	343
				344	.globl RC4_options
				345	.type RC4_options,\@abi-omnipotent
				346	.align 16
				347	RC4_options:
				348	lea .Lopts(%rip),%rax
				349	mov OPENSSL_ia32cap_P(%rip),%edx
				350	bt \$20,%edx
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	351	jnc .Ldone
				352	add \$12,%rax
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	353	bt \$30,%edx
				354	jnc .Ldone
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	355	add \$13,%rax
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	356	.Ldone:
				357	ret
				358	.align 64
				359	.Lopts:
				360	.asciz "rc4(8x,int)"
				361	.asciz "rc4(8x,char)"
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	362	.asciz "rc4(1x,char)"
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	363	.asciz "RC4 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
				364	.align 64
				365	.size RC4_options,.-RC4_options
				366	___
				367
				368	# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
				369	# CONTEXT context,DISPATCHER_CONTEXT disp)
				370	if ($win64) {
				371	$rec="%rcx";
				372	$frame="%rdx";
				373	$context="%r8";
				374	$disp="%r9";
				375
				376	$code.=<<___;
				377	.extern __imp_RtlVirtualUnwind
				378	.type stream_se_handler,\@abi-omnipotent
				379	.align 16
				380	stream_se_handler:
				381	push %rsi
				382	push %rdi
				383	push %rbx
				384	push %rbp
				385	push %r12
				386	push %r13
				387	push %r14
				388	push %r15
				389	pushfq
				390	sub \$64,%rsp
				391
				392	mov 120($context),%rax # pull context->Rax
				393	mov 248($context),%rbx # pull context->Rip
				394
				395	lea .Lprologue(%rip),%r10
				396	cmp %r10,%rbx # context->Rip<prologue label
				397	jb .Lin_prologue
				398
				399	mov 152($context),%rax # pull context->Rsp
				400
				401	lea .Lepilogue(%rip),%r10
				402	cmp %r10,%rbx # context->Rip>=epilogue label
				403	jae .Lin_prologue
				404
				405	lea 24(%rax),%rax
				406
				407	mov -8(%rax),%rbx
				408	mov -16(%rax),%r12
				409	mov -24(%rax),%r13
				410	mov %rbx,144($context) # restore context->Rbx
				411	mov %r12,216($context) # restore context->R12
				412	mov %r13,224($context) # restore context->R13
				413
				414	.Lin_prologue:
				415	mov 8(%rax),%rdi
				416	mov 16(%rax),%rsi
				417	mov %rax,152($context) # restore context->Rsp
				418	mov %rsi,168($context) # restore context->Rsi
				419	mov %rdi,176($context) # restore context->Rdi
				420
				421	jmp .Lcommon_seh_exit
				422	.size stream_se_handler,.-stream_se_handler
				423
				424	.type key_se_handler,\@abi-omnipotent
				425	.align 16
				426	key_se_handler:
				427	push %rsi
				428	push %rdi
				429	push %rbx
				430	push %rbp
				431	push %r12
				432	push %r13
				433	push %r14
				434	push %r15
				435	pushfq
				436	sub \$64,%rsp
				437
				438	mov 152($context),%rax # pull context->Rsp
				439	mov 8(%rax),%rdi
				440	mov 16(%rax),%rsi
				441	mov %rsi,168($context) # restore context->Rsi
				442	mov %rdi,176($context) # restore context->Rdi
				443
				444	.Lcommon_seh_exit:
				445
				446	mov 40($disp),%rdi # disp->ContextRecord
				447	mov $context,%rsi # context
				448	mov \$154,%ecx # sizeof(CONTEXT)
				449	.long 0xa548f3fc # cld; rep movsq
				450
				451	mov $disp,%rsi
				452	xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
				453	mov 8(%rsi),%rdx # arg2, disp->ImageBase
				454	mov 0(%rsi),%r8 # arg3, disp->ControlPc
				455	mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
				456	mov 40(%rsi),%r10 # disp->ContextRecord
				457	lea 56(%rsi),%r11 # &disp->HandlerData
				458	lea 24(%rsi),%r12 # &disp->EstablisherFrame
				459	mov %r10,32(%rsp) # arg5
				460	mov %r11,40(%rsp) # arg6
				461	mov %r12,48(%rsp) # arg7
				462	mov %rcx,56(%rsp) # arg8, (NULL)
				463	call *__imp_RtlVirtualUnwind(%rip)
				464
				465	mov \$1,%eax # ExceptionContinueSearch
				466	add \$64,%rsp
				467	popfq
				468	pop %r15
				469	pop %r14
				470	pop %r13
				471	pop %r12
				472	pop %rbp
				473	pop %rbx
				474	pop %rdi
				475	pop %rsi
				476	ret
				477	.size key_se_handler,.-key_se_handler
				478
				479	.section .pdata
				480	.align 4
				481	.rva .LSEH_begin_RC4
				482	.rva .LSEH_end_RC4
				483	.rva .LSEH_info_RC4
				484
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	485	.rva .LSEH_begin_RC4_set_key
				486	.rva .LSEH_end_RC4_set_key
				487	.rva .LSEH_info_RC4_set_key
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	488
				489	.section .xdata
				490	.align 8
				491	.LSEH_info_RC4:
				492	.byte 9,0,0,0
				493	.rva stream_se_handler
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	494	.LSEH_info_RC4_set_key:
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	495	.byte 9,0,0,0
				496	.rva key_se_handler
				497	___
				498	}
				499
Alexandre Savard	7541067	2012-08-08 09:50:01 -0400	[diff] [blame]	500	$code =~ s/#([bwd])/$1/gm;
Alexandre Savard	1b09e31	2012-08-07 20:33:29 -0400	[diff] [blame]	501
				502	print $code;
				503
				504	close STDOUT;