#14465: Update openssl to tag android-4.0.4_r2.1
diff --git a/jni/openssl/crypto/rc4/asm/rc4-586.pl b/jni/openssl/crypto/rc4/asm/rc4-586.pl
index 5c9ac6a..38a44a7 100644
--- a/jni/openssl/crypto/rc4/asm/rc4-586.pl
+++ b/jni/openssl/crypto/rc4/asm/rc4-586.pl
@@ -28,34 +28,6 @@
#
# <appro@fy.chalmers.se>
-# May 2011
-#
-# Optimize for Core2 and Westmere [and incidentally Opteron]. Current
-# performance in cycles per processed byte (less is better) and
-# improvement relative to previous version of this module is:
-#
-# Pentium 10.2 # original numbers
-# Pentium III 7.8(*)
-# Intel P4 7.5
-#
-# Opteron 6.1/+20% # new MMX numbers
-# Core2 5.3/+67%(**)
-# Westmere 5.1/+94%(**)
-# Sandy Bridge 5.0/+8%
-# Atom 12.6/+6%
-#
-# (*) PIII can actually deliver 6.6 cycles per byte with MMX code,
-# but this specific code performs poorly on Core2. And vice
-# versa, below MMX/SSE code delivering 5.8/7.1 on Core2 performs
-# poorly on PIII, at 8.0/14.5:-( As PIII is not a "hot" CPU
-# [anymore], I chose to discard PIII-specific code path and opt
-# for original IALU-only code, which is why MMX/SSE code path
-# is guarded by SSE2 bit (see below), not MMX/SSE.
-# (**) Performance vs. block size on Core2 and Westmere had a maximum
-# at ... 64 bytes block size. And it was quite a maximum, 40-60%
-# in comparison to largest 8KB block size. Above improvement
-# coefficients are for the largest block size.
-
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
@@ -90,68 +62,6 @@
&$func ($out,&DWP(0,$dat,$ty,4));
}
-if ($alt=0) {
- # >20% faster on Atom and Sandy Bridge[!], 8% faster on Opteron,
- # but ~40% slower on Core2 and Westmere... Attempt to add movz
- # brings down Opteron by 25%, Atom and Sandy Bridge by 15%, yet
- # on Core2 with movz it's almost 20% slower than below alternative
- # code... Yes, it's a total mess...
- my @XX=($xx,$out);
- $RC4_loop_mmx = sub { # SSE actually...
- my $i=shift;
- my $j=$i<=0?0:$i>>1;
- my $mm=$i<=0?"mm0":"mm".($i&1);
-
- &add (&LB($yy),&LB($tx));
- &lea (@XX[1],&DWP(1,@XX[0]));
- &pxor ("mm2","mm0") if ($i==0);
- &psllq ("mm1",8) if ($i==0);
- &and (@XX[1],0xff);
- &pxor ("mm0","mm0") if ($i<=0);
- &mov ($ty,&DWP(0,$dat,$yy,4));
- &mov (&DWP(0,$dat,$yy,4),$tx);
- &pxor ("mm1","mm2") if ($i==0);
- &mov (&DWP(0,$dat,$XX[0],4),$ty);
- &add (&LB($ty),&LB($tx));
- &movd (@XX[0],"mm7") if ($i==0);
- &mov ($tx,&DWP(0,$dat,@XX[1],4));
- &pxor ("mm1","mm1") if ($i==1);
- &movq ("mm2",&QWP(0,$inp)) if ($i==1);
- &movq (&QWP(-8,(@XX[0],$inp)),"mm1") if ($i==0);
- &pinsrw ($mm,&DWP(0,$dat,$ty,4),$j);
-
- push (@XX,shift(@XX)) if ($i>=0);
- }
-} else {
- # Using pinsrw here improves performane on Intel CPUs by 2-3%, but
- # brings down AMD by 7%...
- $RC4_loop_mmx = sub {
- my $i=shift;
-
- &add (&LB($yy),&LB($tx));
- &psllq ("mm1",8*(($i-1)&7)) if (abs($i)!=1);
- &mov ($ty,&DWP(0,$dat,$yy,4));
- &mov (&DWP(0,$dat,$yy,4),$tx);
- &mov (&DWP(0,$dat,$xx,4),$ty);
- &inc ($xx);
- &add ($ty,$tx);
- &movz ($xx,&LB($xx)); # (*)
- &movz ($ty,&LB($ty)); # (*)
- &pxor ("mm2",$i==1?"mm0":"mm1") if ($i>=0);
- &movq ("mm0",&QWP(0,$inp)) if ($i<=0);
- &movq (&QWP(-8,($out,$inp)),"mm2") if ($i==0);
- &mov ($tx,&DWP(0,$dat,$xx,4));
- &movd ($i>0?"mm1":"mm2",&DWP(0,$dat,$ty,4));
-
- # (*) This is the key to Core2 and Westmere performance.
- # Whithout movz out-of-order execution logic confuses
- # itself and fails to reorder loads and stores. Problem
- # appears to be fixed in Sandy Bridge...
- }
-}
-
-&external_label("OPENSSL_ia32cap_P");
-
# void RC4(RC4_KEY *key,size_t len,const unsigned char *inp,unsigned char *out);
&function_begin("RC4");
&mov ($dat,&wparam(0)); # load key schedule pointer
@@ -184,56 +94,11 @@
&and ($ty,-4); # how many 4-byte chunks?
&jz (&label("loop1"));
- &test ($ty,-8);
- &mov (&wparam(3),$out); # $out as accumulator in these loops
- &jz (&label("go4loop4"));
-
- &picmeup($out,"OPENSSL_ia32cap_P");
- &bt (&DWP(0,$out),26); # check SSE2 bit [could have been MMX]
- &jnc (&label("go4loop4"));
-
- &mov ($out,&wparam(3)) if (!$alt);
- &movd ("mm7",&wparam(3)) if ($alt);
- &and ($ty,-8);
- &lea ($ty,&DWP(-8,$inp,$ty));
- &mov (&DWP(-4,$dat),$ty); # save input+(len/8)*8-8
-
- &$RC4_loop_mmx(-1);
- &jmp(&label("loop_mmx_enter"));
-
- &set_label("loop_mmx",16);
- &$RC4_loop_mmx(0);
- &set_label("loop_mmx_enter");
- for ($i=1;$i<8;$i++) { &$RC4_loop_mmx($i); }
- &mov ($ty,$yy);
- &xor ($yy,$yy); # this is second key to Core2
- &mov (&LB($yy),&LB($ty)); # and Westmere performance...
- &cmp ($inp,&DWP(-4,$dat));
- &lea ($inp,&DWP(8,$inp));
- &jb (&label("loop_mmx"));
-
- if ($alt) {
- &movd ($out,"mm7");
- &pxor ("mm2","mm0");
- &psllq ("mm1",8);
- &pxor ("mm1","mm2");
- &movq (&QWP(-8,$out,$inp),"mm1");
- } else {
- &psllq ("mm1",56);
- &pxor ("mm2","mm1");
- &movq (&QWP(-8,$out,$inp),"mm2");
- }
- &emms ();
-
- &cmp ($inp,&wparam(1)); # compare to input+len
- &je (&label("done"));
- &jmp (&label("loop1"));
-
-&set_label("go4loop4",16);
&lea ($ty,&DWP(-4,$inp,$ty));
&mov (&wparam(2),$ty); # save input+(len/4)*4-4
+ &mov (&wparam(3),$out); # $out as accumulator in this loop
- &set_label("loop4");
+ &set_label("loop4",16);
for ($i=0;$i<4;$i++) { RC4_loop($i); }
&ror ($out,8);
&xor ($out,&DWP(0,$inp));
@@ -286,7 +151,7 @@
&set_label("done");
&dec (&LB($xx));
- &mov (&DWP(-4,$dat),$yy); # save key->y
+ &mov (&BP(-4,$dat),&LB($yy)); # save key->y
&mov (&BP(-8,$dat),&LB($xx)); # save key->x
&set_label("abort");
&function_end("RC4");
@@ -299,8 +164,10 @@
$ido="ecx";
$idx="edx";
+&external_label("OPENSSL_ia32cap_P");
+
# void RC4_set_key(RC4_KEY *key,int len,const unsigned char *data);
-&function_begin("private_RC4_set_key");
+&function_begin("RC4_set_key");
&mov ($out,&wparam(0)); # load key
&mov ($idi,&wparam(1)); # load len
&mov ($inp,&wparam(2)); # load data
@@ -378,7 +245,7 @@
&xor ("eax","eax");
&mov (&DWP(-8,$out),"eax"); # key->x=0;
&mov (&DWP(-4,$out),"eax"); # key->y=0;
-&function_end("private_RC4_set_key");
+&function_end("RC4_set_key");
# const char *RC4_options(void);
&function_begin_B("RC4_options");
@@ -387,21 +254,14 @@
&blindpop("eax");
&lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax"));
&picmeup("edx","OPENSSL_ia32cap_P");
- &mov ("edx",&DWP(0,"edx"));
- &bt ("edx",20);
- &jc (&label("1xchar"));
- &bt ("edx",26);
- &jnc (&label("ret"));
- &add ("eax",25);
- &ret ();
-&set_label("1xchar");
- &add ("eax",12);
-&set_label("ret");
+ &bt (&DWP(0,"edx"),20);
+ &jnc (&label("skip"));
+ &add ("eax",12);
+ &set_label("skip");
&ret ();
&set_label("opts",64);
&asciz ("rc4(4x,int)");
&asciz ("rc4(1x,char)");
-&asciz ("rc4(8x,mmx)");
&asciz ("RC4 for x86, CRYPTOGAMS by <appro\@openssl.org>");
&align (64);
&function_end_B("RC4_options");
diff --git a/jni/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl b/jni/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl
deleted file mode 100644
index 7f68409..0000000
--- a/jni/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl
+++ /dev/null
@@ -1,631 +0,0 @@
-#!/usr/bin/env perl
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# June 2011
-#
-# This is RC4+MD5 "stitch" implementation. The idea, as spelled in
-# http://download.intel.com/design/intarch/papers/323686.pdf, is that
-# since both algorithms exhibit instruction-level parallelism, ILP,
-# below theoretical maximum, interleaving them would allow to utilize
-# processor resources better and achieve better performance. RC4
-# instruction sequence is virtually identical to rc4-x86_64.pl, which
-# is heavily based on submission by Maxim Perminov, Maxim Locktyukhin
-# and Jim Guilford of Intel. MD5 is fresh implementation aiming to
-# minimize register usage, which was used as "main thread" with RC4
-# weaved into it, one RC4 round per one MD5 round. In addition to the
-# stiched subroutine the script can generate standalone replacement
-# md5_block_asm_data_order and RC4. Below are performance numbers in
-# cycles per processed byte, less is better, for these the standalone
-# subroutines, sum of them, and stitched one:
-#
-# RC4 MD5 RC4+MD5 stitch gain
-# Opteron 6.5(*) 5.4 11.9 7.0 +70%(*)
-# Core2 6.5 5.8 12.3 7.7 +60%
-# Westmere 4.3 5.2 9.5 7.0 +36%
-# Sandy Bridge 4.2 5.5 9.7 6.8 +43%
-# Atom 9.3 6.5 15.8 11.1 +42%
-#
-# (*) rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement
-# is +53%...
-
-my ($rc4,$md5)=(1,1); # what to generate?
-my $D="#" if (!$md5); # if set to "#", MD5 is stitched into RC4(),
- # but its result is discarded. Idea here is
- # to be able to use 'openssl speed rc4' for
- # benchmarking the stitched subroutine...
-
-my $flavour = shift;
-my $output = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
-
-my $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate;
-( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
-die "can't locate x86_64-xlate.pl";
-
-open STDOUT,"| $^X $xlate $flavour $output";
-
-my ($dat,$in0,$out,$ctx,$inp,$len, $func,$nargs);
-
-if ($rc4 && !$md5) {
- ($dat,$len,$in0,$out) = ("%rdi","%rsi","%rdx","%rcx");
- $func="RC4"; $nargs=4;
-} elsif ($md5 && !$rc4) {
- ($ctx,$inp,$len) = ("%rdi","%rsi","%rdx");
- $func="md5_block_asm_data_order"; $nargs=3;
-} else {
- ($dat,$in0,$out,$ctx,$inp,$len) = ("%rdi","%rsi","%rdx","%rcx","%r8","%r9");
- $func="rc4_md5_enc"; $nargs=6;
- # void rc4_md5_enc(
- # RC4_KEY *key, #
- # const void *in0, # RC4 input
- # void *out, # RC4 output
- # MD5_CTX *ctx, #
- # const void *inp, # MD5 input
- # size_t len); # number of 64-byte blocks
-}
-
-my @K=( 0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee,
- 0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501,
- 0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be,
- 0x6b901122,0xfd987193,0xa679438e,0x49b40821,
-
- 0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa,
- 0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8,
- 0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed,
- 0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a,
-
- 0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c,
- 0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70,
- 0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05,
- 0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665,
-
- 0xf4292244,0x432aff97,0xab9423a7,0xfc93a039,
- 0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1,
- 0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1,
- 0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391 );
-
-my @V=("%r8d","%r9d","%r10d","%r11d"); # MD5 registers
-my $tmp="%r12d";
-
-my @XX=("%rbp","%rsi"); # RC4 registers
-my @TX=("%rax","%rbx");
-my $YY="%rcx";
-my $TY="%rdx";
-
-my $MOD=32; # 16, 32 or 64
-
-$code.=<<___;
-.text
-.align 16
-
-.globl $func
-.type $func,\@function,$nargs
-$func:
- cmp \$0,$len
- je .Labort
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- sub \$40,%rsp
-.Lbody:
-___
-if ($rc4) {
-$code.=<<___;
-$D#md5# mov $ctx,%r11 # reassign arguments
- mov $len,%r12
- mov $in0,%r13
- mov $out,%r14
-$D#md5# mov $inp,%r15
-___
- $ctx="%r11" if ($md5); # reassign arguments
- $len="%r12";
- $in0="%r13";
- $out="%r14";
- $inp="%r15" if ($md5);
- $inp=$in0 if (!$md5);
-$code.=<<___;
- xor $XX[0],$XX[0]
- xor $YY,$YY
-
- lea 8($dat),$dat
- mov -8($dat),$XX[0]#b
- mov -4($dat),$YY#b
-
- inc $XX[0]#b
- sub $in0,$out
- movl ($dat,$XX[0],4),$TX[0]#d
-___
-$code.=<<___ if (!$md5);
- xor $TX[1],$TX[1]
- test \$-128,$len
- jz .Loop1
- sub $XX[0],$TX[1]
- and \$`$MOD-1`,$TX[1]
- jz .Loop${MOD}_is_hot
- sub $TX[1],$len
-.Loop${MOD}_warmup:
- add $TX[0]#b,$YY#b
- movl ($dat,$YY,4),$TY#d
- movl $TX[0]#d,($dat,$YY,4)
- movl $TY#d,($dat,$XX[0],4)
- add $TY#b,$TX[0]#b
- inc $XX[0]#b
- movl ($dat,$TX[0],4),$TY#d
- movl ($dat,$XX[0],4),$TX[0]#d
- xorb ($in0),$TY#b
- movb $TY#b,($out,$in0)
- lea 1($in0),$in0
- dec $TX[1]
- jnz .Loop${MOD}_warmup
-
- mov $YY,$TX[1]
- xor $YY,$YY
- mov $TX[1]#b,$YY#b
-
-.Loop${MOD}_is_hot:
- mov $len,32(%rsp) # save original $len
- shr \$6,$len # number of 64-byte blocks
-___
- if ($D && !$md5) { # stitch in dummy MD5
- $md5=1;
- $ctx="%r11";
- $inp="%r15";
- $code.=<<___;
- mov %rsp,$ctx
- mov $in0,$inp
-___
- }
-}
-$code.=<<___;
-#rc4# add $TX[0]#b,$YY#b
-#rc4# lea ($dat,$XX[0],4),$XX[1]
- shl \$6,$len
- add $inp,$len # pointer to the end of input
- mov $len,16(%rsp)
-
-#md5# mov $ctx,24(%rsp) # save pointer to MD5_CTX
-#md5# mov 0*4($ctx),$V[0] # load current hash value from MD5_CTX
-#md5# mov 1*4($ctx),$V[1]
-#md5# mov 2*4($ctx),$V[2]
-#md5# mov 3*4($ctx),$V[3]
- jmp .Loop
-
-.align 16
-.Loop:
-#md5# mov $V[0],0*4(%rsp) # put aside current hash value
-#md5# mov $V[1],1*4(%rsp)
-#md5# mov $V[2],2*4(%rsp)
-#md5# mov $V[3],$tmp # forward reference
-#md5# mov $V[3],3*4(%rsp)
-___
-
-sub R0 {
- my ($i,$a,$b,$c,$d)=@_;
- my @rot0=(7,12,17,22);
- my $j=$i%16;
- my $k=$i%$MOD;
- my $xmm="%xmm".($j&1);
- $code.=" movdqu ($in0),%xmm2\n" if ($rc4 && $j==15);
- $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1);
- $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1);
- $code.=<<___;
-#rc4# movl ($dat,$YY,4),$TY#d
-#md5# xor $c,$tmp
-#rc4# movl $TX[0]#d,($dat,$YY,4)
-#md5# and $b,$tmp
-#md5# add 4*`$j`($inp),$a
-#rc4# add $TY#b,$TX[0]#b
-#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
-#md5# add \$$K[$i],$a
-#md5# xor $d,$tmp
-#rc4# movz $TX[0]#b,$TX[0]#d
-#rc4# movl $TY#d,4*$k($XX[1])
-#md5# add $tmp,$a
-#rc4# add $TX[1]#b,$YY#b
-#md5# rol \$$rot0[$j%4],$a
-#md5# mov `$j==15?"$b":"$c"`,$tmp # forward reference
-#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
-#md5# add $b,$a
-___
- $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
- mov $YY,$XX[1]
- xor $YY,$YY # keyword to partial register
- mov $XX[1]#b,$YY#b
- lea ($dat,$XX[0],4),$XX[1]
-___
- $code.=<<___ if ($rc4 && $j==15);
- psllq \$8,%xmm1
- pxor %xmm0,%xmm2
- pxor %xmm1,%xmm2
-___
-}
-sub R1 {
- my ($i,$a,$b,$c,$d)=@_;
- my @rot1=(5,9,14,20);
- my $j=$i%16;
- my $k=$i%$MOD;
- my $xmm="%xmm".($j&1);
- $code.=" movdqu 16($in0),%xmm3\n" if ($rc4 && $j==15);
- $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1);
- $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1);
- $code.=<<___;
-#rc4# movl ($dat,$YY,4),$TY#d
-#md5# xor $b,$tmp
-#rc4# movl $TX[0]#d,($dat,$YY,4)
-#md5# and $d,$tmp
-#md5# add 4*`((1+5*$j)%16)`($inp),$a
-#rc4# add $TY#b,$TX[0]#b
-#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
-#md5# add \$$K[$i],$a
-#md5# xor $c,$tmp
-#rc4# movz $TX[0]#b,$TX[0]#d
-#rc4# movl $TY#d,4*$k($XX[1])
-#md5# add $tmp,$a
-#rc4# add $TX[1]#b,$YY#b
-#md5# rol \$$rot1[$j%4],$a
-#md5# mov `$j==15?"$c":"$b"`,$tmp # forward reference
-#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
-#md5# add $b,$a
-___
- $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
- mov $YY,$XX[1]
- xor $YY,$YY # keyword to partial register
- mov $XX[1]#b,$YY#b
- lea ($dat,$XX[0],4),$XX[1]
-___
- $code.=<<___ if ($rc4 && $j==15);
- psllq \$8,%xmm1
- pxor %xmm0,%xmm3
- pxor %xmm1,%xmm3
-___
-}
-sub R2 {
- my ($i,$a,$b,$c,$d)=@_;
- my @rot2=(4,11,16,23);
- my $j=$i%16;
- my $k=$i%$MOD;
- my $xmm="%xmm".($j&1);
- $code.=" movdqu 32($in0),%xmm4\n" if ($rc4 && $j==15);
- $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1);
- $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1);
- $code.=<<___;
-#rc4# movl ($dat,$YY,4),$TY#d
-#md5# xor $c,$tmp
-#rc4# movl $TX[0]#d,($dat,$YY,4)
-#md5# xor $b,$tmp
-#md5# add 4*`((5+3*$j)%16)`($inp),$a
-#rc4# add $TY#b,$TX[0]#b
-#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
-#md5# add \$$K[$i],$a
-#rc4# movz $TX[0]#b,$TX[0]#d
-#md5# add $tmp,$a
-#rc4# movl $TY#d,4*$k($XX[1])
-#rc4# add $TX[1]#b,$YY#b
-#md5# rol \$$rot2[$j%4],$a
-#md5# mov `$j==15?"\\\$-1":"$c"`,$tmp # forward reference
-#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
-#md5# add $b,$a
-___
- $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
- mov $YY,$XX[1]
- xor $YY,$YY # keyword to partial register
- mov $XX[1]#b,$YY#b
- lea ($dat,$XX[0],4),$XX[1]
-___
- $code.=<<___ if ($rc4 && $j==15);
- psllq \$8,%xmm1
- pxor %xmm0,%xmm4
- pxor %xmm1,%xmm4
-___
-}
-sub R3 {
- my ($i,$a,$b,$c,$d)=@_;
- my @rot3=(6,10,15,21);
- my $j=$i%16;
- my $k=$i%$MOD;
- my $xmm="%xmm".($j&1);
- $code.=" movdqu 48($in0),%xmm5\n" if ($rc4 && $j==15);
- $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1);
- $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1);
- $code.=<<___;
-#rc4# movl ($dat,$YY,4),$TY#d
-#md5# xor $d,$tmp
-#rc4# movl $TX[0]#d,($dat,$YY,4)
-#md5# or $b,$tmp
-#md5# add 4*`((7*$j)%16)`($inp),$a
-#rc4# add $TY#b,$TX[0]#b
-#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
-#md5# add \$$K[$i],$a
-#rc4# movz $TX[0]#b,$TX[0]#d
-#md5# xor $c,$tmp
-#rc4# movl $TY#d,4*$k($XX[1])
-#md5# add $tmp,$a
-#rc4# add $TX[1]#b,$YY#b
-#md5# rol \$$rot3[$j%4],$a
-#md5# mov \$-1,$tmp # forward reference
-#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
-#md5# add $b,$a
-___
- $code.=<<___ if ($rc4 && $j==15);
- mov $XX[0],$XX[1]
- xor $XX[0],$XX[0] # keyword to partial register
- mov $XX[1]#b,$XX[0]#b
- mov $YY,$XX[1]
- xor $YY,$YY # keyword to partial register
- mov $XX[1]#b,$YY#b
- lea ($dat,$XX[0],4),$XX[1]
- psllq \$8,%xmm1
- pxor %xmm0,%xmm5
- pxor %xmm1,%xmm5
-___
-}
-
-my $i=0;
-for(;$i<16;$i++) { R0($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
-for(;$i<32;$i++) { R1($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
-for(;$i<48;$i++) { R2($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
-for(;$i<64;$i++) { R3($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
-
-$code.=<<___;
-#md5# add 0*4(%rsp),$V[0] # accumulate hash value
-#md5# add 1*4(%rsp),$V[1]
-#md5# add 2*4(%rsp),$V[2]
-#md5# add 3*4(%rsp),$V[3]
-
-#rc4# movdqu %xmm2,($out,$in0) # write RC4 output
-#rc4# movdqu %xmm3,16($out,$in0)
-#rc4# movdqu %xmm4,32($out,$in0)
-#rc4# movdqu %xmm5,48($out,$in0)
-#md5# lea 64($inp),$inp
-#rc4# lea 64($in0),$in0
- cmp 16(%rsp),$inp # are we done?
- jb .Loop
-
-#md5# mov 24(%rsp),$len # restore pointer to MD5_CTX
-#rc4# sub $TX[0]#b,$YY#b # correct $YY
-#md5# mov $V[0],0*4($len) # write MD5_CTX
-#md5# mov $V[1],1*4($len)
-#md5# mov $V[2],2*4($len)
-#md5# mov $V[3],3*4($len)
-___
-$code.=<<___ if ($rc4 && (!$md5 || $D));
- mov 32(%rsp),$len # restore original $len
- and \$63,$len # remaining bytes
- jnz .Loop1
- jmp .Ldone
-
-.align 16
-.Loop1:
- add $TX[0]#b,$YY#b
- movl ($dat,$YY,4),$TY#d
- movl $TX[0]#d,($dat,$YY,4)
- movl $TY#d,($dat,$XX[0],4)
- add $TY#b,$TX[0]#b
- inc $XX[0]#b
- movl ($dat,$TX[0],4),$TY#d
- movl ($dat,$XX[0],4),$TX[0]#d
- xorb ($in0),$TY#b
- movb $TY#b,($out,$in0)
- lea 1($in0),$in0
- dec $len
- jnz .Loop1
-
-.Ldone:
-___
-$code.=<<___;
-#rc4# sub \$1,$XX[0]#b
-#rc4# movl $XX[0]#d,-8($dat)
-#rc4# movl $YY#d,-4($dat)
-
- mov 40(%rsp),%r15
- mov 48(%rsp),%r14
- mov 56(%rsp),%r13
- mov 64(%rsp),%r12
- mov 72(%rsp),%rbp
- mov 80(%rsp),%rbx
- lea 88(%rsp),%rsp
-.Lepilogue:
-.Labort:
- ret
-.size $func,.-$func
-___
-
-if ($rc4 && $D) { # sole purpose of this section is to provide
- # option to use the generated module as drop-in
- # replacement for rc4-x86_64.pl for debugging
- # and testing purposes...
-my ($idx,$ido)=("%r8","%r9");
-my ($dat,$len,$inp)=("%rdi","%rsi","%rdx");
-
-$code.=<<___;
-.globl RC4_set_key
-.type RC4_set_key,\@function,3
-.align 16
-RC4_set_key:
- lea 8($dat),$dat
- lea ($inp,$len),$inp
- neg $len
- mov $len,%rcx
- xor %eax,%eax
- xor $ido,$ido
- xor %r10,%r10
- xor %r11,%r11
- jmp .Lw1stloop
-
-.align 16
-.Lw1stloop:
- mov %eax,($dat,%rax,4)
- add \$1,%al
- jnc .Lw1stloop
-
- xor $ido,$ido
- xor $idx,$idx
-.align 16
-.Lw2ndloop:
- mov ($dat,$ido,4),%r10d
- add ($inp,$len,1),$idx#b
- add %r10b,$idx#b
- add \$1,$len
- mov ($dat,$idx,4),%r11d
- cmovz %rcx,$len
- mov %r10d,($dat,$idx,4)
- mov %r11d,($dat,$ido,4)
- add \$1,$ido#b
- jnc .Lw2ndloop
-
- xor %eax,%eax
- mov %eax,-8($dat)
- mov %eax,-4($dat)
- ret
-.size RC4_set_key,.-RC4_set_key
-
-.globl RC4_options
-.type RC4_options,\@abi-omnipotent
-.align 16
-RC4_options:
- lea .Lopts(%rip),%rax
- ret
-.align 64
-.Lopts:
-.asciz "rc4(64x,int)"
-.align 64
-.size RC4_options,.-RC4_options
-___
-}
-# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
-# CONTEXT *context,DISPATCHER_CONTEXT *disp)
-if ($win64) {
-my $rec="%rcx";
-my $frame="%rdx";
-my $context="%r8";
-my $disp="%r9";
-
-$code.=<<___;
-.extern __imp_RtlVirtualUnwind
-.type se_handler,\@abi-omnipotent
-.align 16
-se_handler:
- push %rsi
- push %rdi
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- pushfq
- sub \$64,%rsp
-
- mov 120($context),%rax # pull context->Rax
- mov 248($context),%rbx # pull context->Rip
-
- lea .Lbody(%rip),%r10
- cmp %r10,%rbx # context->Rip<.Lbody
- jb .Lin_prologue
-
- mov 152($context),%rax # pull context->Rsp
-
- lea .Lepilogue(%rip),%r10
- cmp %r10,%rbx # context->Rip>=.Lepilogue
- jae .Lin_prologue
-
- mov 40(%rax),%r15
- mov 48(%rax),%r14
- mov 56(%rax),%r13
- mov 64(%rax),%r12
- mov 72(%rax),%rbp
- mov 80(%rax),%rbx
- lea 88(%rax),%rax
-
- mov %rbx,144($context) # restore context->Rbx
- mov %rbp,160($context) # restore context->Rbp
- mov %r12,216($context) # restore context->R12
- mov %r13,224($context) # restore context->R12
- mov %r14,232($context) # restore context->R14
- mov %r15,240($context) # restore context->R15
-
-.Lin_prologue:
- mov 8(%rax),%rdi
- mov 16(%rax),%rsi
- mov %rax,152($context) # restore context->Rsp
- mov %rsi,168($context) # restore context->Rsi
- mov %rdi,176($context) # restore context->Rdi
-
- mov 40($disp),%rdi # disp->ContextRecord
- mov $context,%rsi # context
- mov \$154,%ecx # sizeof(CONTEXT)
- .long 0xa548f3fc # cld; rep movsq
-
- mov $disp,%rsi
- xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
- mov 8(%rsi),%rdx # arg2, disp->ImageBase
- mov 0(%rsi),%r8 # arg3, disp->ControlPc
- mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
- mov 40(%rsi),%r10 # disp->ContextRecord
- lea 56(%rsi),%r11 # &disp->HandlerData
- lea 24(%rsi),%r12 # &disp->EstablisherFrame
- mov %r10,32(%rsp) # arg5
- mov %r11,40(%rsp) # arg6
- mov %r12,48(%rsp) # arg7
- mov %rcx,56(%rsp) # arg8, (NULL)
- call *__imp_RtlVirtualUnwind(%rip)
-
- mov \$1,%eax # ExceptionContinueSearch
- add \$64,%rsp
- popfq
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbp
- pop %rbx
- pop %rdi
- pop %rsi
- ret
-.size se_handler,.-se_handler
-
-.section .pdata
-.align 4
- .rva .LSEH_begin_$func
- .rva .LSEH_end_$func
- .rva .LSEH_info_$func
-
-.section .xdata
-.align 8
-.LSEH_info_$func:
- .byte 9,0,0,0
- .rva se_handler
-___
-}
-
-sub reg_part {
-my ($reg,$conv)=@_;
- if ($reg =~ /%r[0-9]+/) { $reg .= $conv; }
- elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; }
- elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; }
- elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; }
- return $reg;
-}
-
-$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem;
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/pinsrw\s+\$0,/movd /gm;
-
-$code =~ s/#md5#//gm if ($md5);
-$code =~ s/#rc4#//gm if ($rc4);
-
-print $code;
-
-close STDOUT;
diff --git a/jni/openssl/crypto/rc4/asm/rc4-parisc.pl b/jni/openssl/crypto/rc4/asm/rc4-parisc.pl
deleted file mode 100644
index 9165067..0000000
--- a/jni/openssl/crypto/rc4/asm/rc4-parisc.pl
+++ /dev/null
@@ -1,313 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# RC4 for PA-RISC.
-
-# June 2009.
-#
-# Performance is 33% better than gcc 3.2 generated code on PA-7100LC.
-# For reference, [4x] unrolled loop is >40% faster than folded one.
-# It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement
-# is believed to be not sufficient to justify the effort...
-#
-# Special thanks to polarhome.com for providing HP-UX account.
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-
-$flavour = shift;
-$output = shift;
-open STDOUT,">$output";
-
-if ($flavour =~ /64/) {
- $LEVEL ="2.0W";
- $SIZE_T =8;
- $FRAME_MARKER =80;
- $SAVED_RP =16;
- $PUSH ="std";
- $PUSHMA ="std,ma";
- $POP ="ldd";
- $POPMB ="ldd,mb";
-} else {
- $LEVEL ="1.0";
- $SIZE_T =4;
- $FRAME_MARKER =48;
- $SAVED_RP =20;
- $PUSH ="stw";
- $PUSHMA ="stwm";
- $POP ="ldw";
- $POPMB ="ldwm";
-}
-
-$FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker
- # [+ argument transfer]
-$SZ=1; # defaults to RC4_CHAR
-if (open CONF,"<${dir}../../opensslconf.h") {
- while(<CONF>) {
- if (m/#\s*define\s+RC4_INT\s+(.*)/) {
- $SZ = ($1=~/char$/) ? 1 : 4;
- last;
- }
- }
- close CONF;
-}
-
-if ($SZ==1) { # RC4_CHAR
- $LD="ldb";
- $LDX="ldbx";
- $MKX="addl";
- $ST="stb";
-} else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC)
- $LD="ldw";
- $LDX="ldwx,s";
- $MKX="sh2addl";
- $ST="stw";
-}
-
-$key="%r26";
-$len="%r25";
-$inp="%r24";
-$out="%r23";
-
-@XX=("%r19","%r20");
-@TX=("%r21","%r22");
-$YY="%r28";
-$TY="%r29";
-
-$acc="%r1";
-$ix="%r2";
-$iy="%r3";
-$dat0="%r4";
-$dat1="%r5";
-$rem="%r6";
-$mask="%r31";
-
-sub unrolledloopbody {
-for ($i=0;$i<4;$i++) {
-$code.=<<___;
- ldo 1($XX[0]),$XX[1]
- `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)`
- and $mask,$XX[1],$XX[1]
- $LDX $YY($key),$TY
- $MKX $YY,$key,$ix
- $LDX $XX[1]($key),$TX[1]
- $MKX $XX[0],$key,$iy
- $ST $TX[0],0($ix)
- comclr,<> $XX[1],$YY,%r0 ; conditional
- copy $TX[0],$TX[1] ; move
- `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)`
- $ST $TY,0($iy)
- addl $TX[0],$TY,$TY
- addl $TX[1],$YY,$YY
- and $mask,$TY,$TY
- and $mask,$YY,$YY
-___
-push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
-} }
-
-sub foldedloop {
-my ($label,$count)=@_;
-$code.=<<___;
-$label
- $MKX $YY,$key,$iy
- $LDX $YY($key),$TY
- $MKX $XX[0],$key,$ix
- $ST $TX[0],0($iy)
- ldo 1($XX[0]),$XX[0]
- $ST $TY,0($ix)
- addl $TX[0],$TY,$TY
- ldbx $inp($out),$dat1
- and $mask,$TY,$TY
- and $mask,$XX[0],$XX[0]
- $LDX $TY($key),$acc
- $LDX $XX[0]($key),$TX[0]
- ldo 1($out),$out
- xor $dat1,$acc,$acc
- addl $TX[0],$YY,$YY
- stb $acc,-1($out)
- addib,<> -1,$count,$label ; $count is always small
- and $mask,$YY,$YY
-___
-}
-
-$code=<<___;
- .LEVEL $LEVEL
- .SPACE \$TEXT\$
- .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
-
- .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
-RC4
- .PROC
- .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6
- .ENTRY
- $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
- $PUSHMA %r3,$FRAME(%sp)
- $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
- $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
- $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
-
- cmpib,*= 0,$len,L\$abort
- sub $inp,$out,$inp ; distance between $inp and $out
-
- $LD `0*$SZ`($key),$XX[0]
- $LD `1*$SZ`($key),$YY
- ldo `2*$SZ`($key),$key
-
- ldi 0xff,$mask
- ldi 3,$dat0
-
- ldo 1($XX[0]),$XX[0] ; warm up loop
- and $mask,$XX[0],$XX[0]
- $LDX $XX[0]($key),$TX[0]
- addl $TX[0],$YY,$YY
- cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother?
- and $mask,$YY,$YY
-
- and,<> $out,$dat0,$rem ; is $out aligned?
- b L\$alignedout
- subi 4,$rem,$rem
- sub $len,$rem,$len
-___
-&foldedloop("L\$alignout",$rem); # process till $out is aligned
-
-$code.=<<___;
-L\$alignedout ; $len is at least 4 here
- and,<> $inp,$dat0,$acc ; is $inp aligned?
- b L\$oop4
- sub $inp,$acc,$rem ; align $inp
-
- sh3addl $acc,%r0,$acc
- subi 32,$acc,$acc
- mtctl $acc,%cr11 ; load %sar with vshd align factor
- ldwx $rem($out),$dat0
- ldo 4($rem),$rem
-L\$oop4misalignedinp
-___
-&unrolledloopbody();
-$code.=<<___;
- $LDX $TY($key),$ix
- ldwx $rem($out),$dat1
- ldo -4($len),$len
- or $ix,$acc,$acc ; last piece, no need to dep
- vshd $dat0,$dat1,$iy ; align data
- copy $dat1,$dat0
- xor $iy,$acc,$acc
- stw $acc,0($out)
- cmpib,*<< 3,$len,L\$oop4misalignedinp
- ldo 4($out),$out
- cmpib,*= 0,$len,L\$done
- nop
- b L\$oop1
- nop
-
- .ALIGN 8
-L\$oop4
-___
-&unrolledloopbody();
-$code.=<<___;
- $LDX $TY($key),$ix
- ldwx $inp($out),$dat0
- ldo -4($len),$len
- or $ix,$acc,$acc ; last piece, no need to dep
- xor $dat0,$acc,$acc
- stw $acc,0($out)
- cmpib,*<< 3,$len,L\$oop4
- ldo 4($out),$out
- cmpib,*= 0,$len,L\$done
- nop
-___
-&foldedloop("L\$oop1",$len);
-$code.=<<___;
-L\$done
- $POP `-$FRAME-$SAVED_RP`(%sp),%r2
- ldo -1($XX[0]),$XX[0] ; chill out loop
- sub $YY,$TX[0],$YY
- and $mask,$XX[0],$XX[0]
- and $mask,$YY,$YY
- $ST $XX[0],`-2*$SZ`($key)
- $ST $YY,`-1*$SZ`($key)
- $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
- $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
- $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
-L\$abort
- bv (%r2)
- .EXIT
- $POPMB -$FRAME(%sp),%r3
- .PROCEND
-___
-
-$code.=<<___;
-
- .EXPORT private_RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
- .ALIGN 8
-private_RC4_set_key
- .PROC
- .CALLINFO NO_CALLS
- .ENTRY
- $ST %r0,`0*$SZ`($key)
- $ST %r0,`1*$SZ`($key)
- ldo `2*$SZ`($key),$key
- copy %r0,@XX[0]
-L\$1st
- $ST @XX[0],0($key)
- ldo 1(@XX[0]),@XX[0]
- bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256
- ldo $SZ($key),$key
-
- ldo `-256*$SZ`($key),$key ; rewind $key
- addl $len,$inp,$inp ; $inp to point at the end
- sub %r0,$len,%r23 ; inverse index
- copy %r0,@XX[0]
- copy %r0,@XX[1]
- ldi 0xff,$mask
-
-L\$2nd
- $LDX @XX[0]($key),@TX[0]
- ldbx %r23($inp),@TX[1]
- addi,nuv 1,%r23,%r23 ; increment and conditional
- sub %r0,$len,%r23 ; inverse index
- addl @TX[0],@XX[1],@XX[1]
- addl @TX[1],@XX[1],@XX[1]
- and $mask,@XX[1],@XX[1]
- $MKX @XX[0],$key,$TY
- $LDX @XX[1]($key),@TX[1]
- $MKX @XX[1],$key,$YY
- ldo 1(@XX[0]),@XX[0]
- $ST @TX[0],0($YY)
- bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256
- $ST @TX[1],0($TY)
-
- bv,n (%r2)
- .EXIT
- nop
- .PROCEND
-
- .EXPORT RC4_options,ENTRY
- .ALIGN 8
-RC4_options
- .PROC
- .CALLINFO NO_CALLS
- .ENTRY
- blr %r0,%r28
- ldi 3,%r1
-L\$pic
- andcm %r28,%r1,%r28
- bv (%r2)
- .EXIT
- ldo L\$opts-L\$pic(%r28),%r28
- .PROCEND
- .ALIGN 8
-L\$opts
- .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)"
- .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
-___
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4);
-
-print $code;
-close STDOUT;
diff --git a/jni/openssl/crypto/rc4/asm/rc4-s390x.pl b/jni/openssl/crypto/rc4/asm/rc4-s390x.pl
index 7528ece..96681fa 100644
--- a/jni/openssl/crypto/rc4/asm/rc4-s390x.pl
+++ b/jni/openssl/crypto/rc4/asm/rc4-s390x.pl
@@ -13,29 +13,6 @@
# "cluster" Address Generation Interlocks, so that one pipeline stall
# resolves several dependencies.
-# November 2010.
-#
-# Adapt for -m31 build. If kernel supports what's called "highgprs"
-# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
-# instructions and achieve "64-bit" performance even in 31-bit legacy
-# application context. The feature is not specific to any particular
-# processor, as long as it's "z-CPU". Latter implies that the code
-# remains z/Architecture specific. On z990 it was measured to perform
-# 50% better than code generated by gcc 4.3.
-
-$flavour = shift;
-
-if ($flavour =~ /3[12]/) {
- $SIZE_T=4;
- $g="";
-} else {
- $SIZE_T=8;
- $g="g";
-}
-
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
-
$rp="%r14";
$sp="%r15";
$code=<<___;
@@ -62,12 +39,7 @@
.type RC4,\@function
.align 64
RC4:
- stm${g} %r6,%r11,6*$SIZE_T($sp)
-___
-$code.=<<___ if ($flavour =~ /3[12]/);
- llgfr $len,$len
-___
-$code.=<<___;
+ stmg %r6,%r11,48($sp)
llgc $XX[0],0($key)
llgc $YY,1($key)
la $XX[0],1($XX[0])
@@ -118,7 +90,7 @@
xgr $acc,$TX[1]
stg $acc,0($out)
la $out,8($out)
- brctg $cnt,.Loop8
+ brct $cnt,.Loop8
.Lshort:
lghi $acc,7
@@ -150,7 +122,7 @@
ahi $XX[0],-1
stc $XX[0],0($key)
stc $YY,1($key)
- lm${g} %r6,%r11,6*$SIZE_T($sp)
+ lmg %r6,%r11,48($sp)
br $rp
.size RC4,.-RC4
.string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
@@ -171,11 +143,11 @@
$iinp="%r8";
$code.=<<___;
-.globl private_RC4_set_key
-.type private_RC4_set_key,\@function
+.globl RC4_set_key
+.type RC4_set_key,\@function
.align 64
-private_RC4_set_key:
- stm${g} %r6,%r8,6*$SIZE_T($sp)
+RC4_set_key:
+ stmg %r6,%r8,48($sp)
lhi $cnt,256
la $idx,0(%r0)
sth $idx,0($key)
@@ -208,9 +180,9 @@
la $iinp,0(%r0)
j .L2ndloop
.Ldone:
- lm${g} %r6,%r8,6*$SIZE_T($sp)
+ lmg %r6,%r8,48($sp)
br $rp
-.size private_RC4_set_key,.-private_RC4_set_key
+.size RC4_set_key,.-RC4_set_key
___
}
@@ -231,4 +203,3 @@
___
print $code;
-close STDOUT; # force flush
diff --git a/jni/openssl/crypto/rc4/asm/rc4-x86_64.pl b/jni/openssl/crypto/rc4/asm/rc4-x86_64.pl
index d6eac20..677be5f 100755
--- a/jni/openssl/crypto/rc4/asm/rc4-x86_64.pl
+++ b/jni/openssl/crypto/rc4/asm/rc4-x86_64.pl
@@ -7,8 +7,6 @@
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
-# July 2004
-#
# 2.22x RC4 tune-up:-) It should be noted though that my hand [as in
# "hand-coded assembler"] doesn't stand for the whole improvement
# coefficient. It turned out that eliminating RC4_CHAR from config
@@ -21,8 +19,6 @@
# to operate on partial registers, it turned out to be the best bet.
# At least for AMD... How IA32E would perform remains to be seen...
-# November 2004
-#
# As was shown by Marc Bevand reordering of couple of load operations
# results in even higher performance gain of 3.3x:-) At least on
# Opteron... For reference, 1x in this case is RC4_CHAR C-code
@@ -30,8 +26,6 @@
# Latter means that if you want to *estimate* what to expect from
# *your* Opteron, then multiply 54 by 3.3 and clock frequency in GHz.
-# November 2004
-#
# Intel P4 EM64T core was found to run the AMD64 code really slow...
# The only way to achieve comparable performance on P4 was to keep
# RC4_CHAR. Kind of ironic, huh? As it's apparently impossible to
@@ -39,14 +33,10 @@
# on either AMD and Intel platforms, I implement both cases. See
# rc4_skey.c for further details...
-# April 2005
-#
# P4 EM64T core appears to be "allergic" to 64-bit inc/dec. Replacing
# those with add/sub results in 50% performance improvement of folded
# loop...
-# May 2005
-#
# As was shown by Zou Nanhai loop unrolling can improve Intel EM64T
# performance by >30% [unlike P4 32-bit case that is]. But this is
# provided that loads are reordered even more aggressively! Both code
@@ -60,8 +50,6 @@
# is not implemented, then this final RC4_CHAR code-path should be
# preferred, as it provides better *all-round* performance].
-# March 2007
-#
# Intel Core2 was observed to perform poorly on both code paths:-( It
# apparently suffers from some kind of partial register stall, which
# occurs in 64-bit mode only [as virtually identical 32-bit loop was
@@ -70,37 +58,6 @@
# fit for Core2 and therefore the code was modified to skip cloop8 on
# this CPU.
-# May 2010
-#
-# Intel Westmere was observed to perform suboptimally. Adding yet
-# another movzb to cloop1 improved performance by almost 50%! Core2
-# performance is improved too, but nominally...
-
-# May 2011
-#
-# The only code path that was not modified is P4-specific one. Non-P4
-# Intel code path optimization is heavily based on submission by Maxim
-# Perminov, Maxim Locktyukhin and Jim Guilford of Intel. I've used
-# some of the ideas even in attempt to optmize the original RC4_INT
-# code path... Current performance in cycles per processed byte (less
-# is better) and improvement coefficients relative to previous
-# version of this module are:
-#
-# Opteron 5.3/+0%(*)
-# P4 6.5
-# Core2 6.2/+15%(**)
-# Westmere 4.2/+60%
-# Sandy Bridge 4.2/+120%
-# Atom 9.3/+80%
-#
-# (*) But corresponding loop has less instructions, which should have
-# positive effect on upcoming Bulldozer, which has one less ALU.
-# For reference, Intel code runs at 6.8 cpb rate on Opteron.
-# (**) Note that Core2 result is ~15% lower than corresponding result
-# for 32-bit code, meaning that it's possible to improve it,
-# but more than likely at the cost of the others (see rc4-586.pl
-# to get the idea)...
-
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
@@ -119,10 +76,13 @@
$inp="%rdx"; # arg3
$out="%rcx"; # arg4
-{
+@XX=("%r8","%r10");
+@TX=("%r9","%r11");
+$YY="%r12";
+$TY="%r13";
+
$code=<<___;
.text
-.extern OPENSSL_ia32cap_P
.globl RC4
.type RC4,\@function,4
@@ -135,173 +95,48 @@
push %r12
push %r13
.Lprologue:
- mov $len,%r11
- mov $inp,%r12
- mov $out,%r13
-___
-my $len="%r11"; # reassign input arguments
-my $inp="%r12";
-my $out="%r13";
-my @XX=("%r10","%rsi");
-my @TX=("%rax","%rbx");
-my $YY="%rcx";
-my $TY="%rdx";
-
-$code.=<<___;
- xor $XX[0],$XX[0]
- xor $YY,$YY
-
- lea 8($dat),$dat
- mov -8($dat),$XX[0]#b
- mov -4($dat),$YY#b
+ add \$8,$dat
+ movl -8($dat),$XX[0]#d
+ movl -4($dat),$YY#d
cmpl \$-1,256($dat)
je .LRC4_CHAR
- mov OPENSSL_ia32cap_P(%rip),%r8d
- xor $TX[1],$TX[1]
inc $XX[0]#b
- sub $XX[0],$TX[1]
- sub $inp,$out
movl ($dat,$XX[0],4),$TX[0]#d
- test \$-16,$len
+ test \$-8,$len
jz .Lloop1
- bt \$30,%r8d # Intel CPU?
- jc .Lintel
- and \$7,$TX[1]
- lea 1($XX[0]),$XX[1]
- jz .Loop8
- sub $TX[1],$len
-.Loop8_warmup:
- add $TX[0]#b,$YY#b
- movl ($dat,$YY,4),$TY#d
- movl $TX[0]#d,($dat,$YY,4)
- movl $TY#d,($dat,$XX[0],4)
- add $TY#b,$TX[0]#b
- inc $XX[0]#b
- movl ($dat,$TX[0],4),$TY#d
- movl ($dat,$XX[0],4),$TX[0]#d
- xorb ($inp),$TY#b
- movb $TY#b,($out,$inp)
- lea 1($inp),$inp
- dec $TX[1]
- jnz .Loop8_warmup
-
- lea 1($XX[0]),$XX[1]
- jmp .Loop8
+ jmp .Lloop8
.align 16
-.Loop8:
+.Lloop8:
___
for ($i=0;$i<8;$i++) {
-$code.=<<___ if ($i==7);
- add \$8,$XX[1]#b
-___
$code.=<<___;
add $TX[0]#b,$YY#b
+ mov $XX[0],$XX[1]
movl ($dat,$YY,4),$TY#d
+ ror \$8,%rax # ror is redundant when $i=0
+ inc $XX[1]#b
+ movl ($dat,$XX[1],4),$TX[1]#d
+ cmp $XX[1],$YY
movl $TX[0]#d,($dat,$YY,4)
- movl `4*($i==7?-1:$i)`($dat,$XX[1],4),$TX[1]#d
- ror \$8,%r8 # ror is redundant when $i=0
- movl $TY#d,4*$i($dat,$XX[0],4)
+ cmove $TX[0],$TX[1]
+ movl $TY#d,($dat,$XX[0],4)
add $TX[0]#b,$TY#b
- movb ($dat,$TY,4),%r8b
+ movb ($dat,$TY,4),%al
___
-push(@TX,shift(@TX)); #push(@XX,shift(@XX)); # "rotate" registers
+push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
}
$code.=<<___;
- add \$8,$XX[0]#b
- ror \$8,%r8
+ ror \$8,%rax
sub \$8,$len
- xor ($inp),%r8
- mov %r8,($out,$inp)
- lea 8($inp),$inp
+ xor ($inp),%rax
+ add \$8,$inp
+ mov %rax,($out)
+ add \$8,$out
test \$-8,$len
- jnz .Loop8
- cmp \$0,$len
- jne .Lloop1
- jmp .Lexit
-
-.align 16
-.Lintel:
- test \$-32,$len
- jz .Lloop1
- and \$15,$TX[1]
- jz .Loop16_is_hot
- sub $TX[1],$len
-.Loop16_warmup:
- add $TX[0]#b,$YY#b
- movl ($dat,$YY,4),$TY#d
- movl $TX[0]#d,($dat,$YY,4)
- movl $TY#d,($dat,$XX[0],4)
- add $TY#b,$TX[0]#b
- inc $XX[0]#b
- movl ($dat,$TX[0],4),$TY#d
- movl ($dat,$XX[0],4),$TX[0]#d
- xorb ($inp),$TY#b
- movb $TY#b,($out,$inp)
- lea 1($inp),$inp
- dec $TX[1]
- jnz .Loop16_warmup
-
- mov $YY,$TX[1]
- xor $YY,$YY
- mov $TX[1]#b,$YY#b
-
-.Loop16_is_hot:
- lea ($dat,$XX[0],4),$XX[1]
-___
-sub RC4_loop {
- my $i=shift;
- my $j=$i<0?0:$i;
- my $xmm="%xmm".($j&1);
-
- $code.=" add \$16,$XX[0]#b\n" if ($i==15);
- $code.=" movdqu ($inp),%xmm2\n" if ($i==15);
- $code.=" add $TX[0]#b,$YY#b\n" if ($i<=0);
- $code.=" movl ($dat,$YY,4),$TY#d\n";
- $code.=" pxor %xmm0,%xmm2\n" if ($i==0);
- $code.=" psllq \$8,%xmm1\n" if ($i==0);
- $code.=" pxor $xmm,$xmm\n" if ($i<=1);
- $code.=" movl $TX[0]#d,($dat,$YY,4)\n";
- $code.=" add $TY#b,$TX[0]#b\n";
- $code.=" movl `4*($j+1)`($XX[1]),$TX[1]#d\n" if ($i<15);
- $code.=" movz $TX[0]#b,$TX[0]#d\n";
- $code.=" movl $TY#d,4*$j($XX[1])\n";
- $code.=" pxor %xmm1,%xmm2\n" if ($i==0);
- $code.=" lea ($dat,$XX[0],4),$XX[1]\n" if ($i==15);
- $code.=" add $TX[1]#b,$YY#b\n" if ($i<15);
- $code.=" pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n";
- $code.=" movdqu %xmm2,($out,$inp)\n" if ($i==0);
- $code.=" lea 16($inp),$inp\n" if ($i==0);
- $code.=" movl ($XX[1]),$TX[1]#d\n" if ($i==15);
-}
- RC4_loop(-1);
-$code.=<<___;
- jmp .Loop16_enter
-.align 16
-.Loop16:
-___
-
-for ($i=0;$i<16;$i++) {
- $code.=".Loop16_enter:\n" if ($i==1);
- RC4_loop($i);
- push(@TX,shift(@TX)); # "rotate" registers
-}
-$code.=<<___;
- mov $YY,$TX[1]
- xor $YY,$YY # keyword to partial register
- sub \$16,$len
- mov $TX[1]#b,$YY#b
- test \$-16,$len
- jnz .Loop16
-
- psllq \$8,%xmm1
- pxor %xmm0,%xmm2
- pxor %xmm1,%xmm2
- movdqu %xmm2,($out,$inp)
- lea 16($inp),$inp
-
+ jnz .Lloop8
cmp \$0,$len
jne .Lloop1
jmp .Lexit
@@ -317,8 +152,9 @@
movl ($dat,$TX[0],4),$TY#d
movl ($dat,$XX[0],4),$TX[0]#d
xorb ($inp),$TY#b
- movb $TY#b,($out,$inp)
- lea 1($inp),$inp
+ inc $inp
+ movb $TY#b,($out)
+ inc $out
dec $len
jnz .Lloop1
jmp .Lexit
@@ -329,11 +165,13 @@
movzb ($dat,$XX[0]),$TX[0]#d
test \$-8,$len
jz .Lcloop1
+ cmpl \$0,260($dat)
+ jnz .Lcloop1
jmp .Lcloop8
.align 16
.Lcloop8:
- mov ($inp),%r8d
- mov 4($inp),%r9d
+ mov ($inp),%eax
+ mov 4($inp),%ebx
___
# unroll 2x4-wise, because 64-bit rotates kill Intel P4...
for ($i=0;$i<4;$i++) {
@@ -350,8 +188,8 @@
mov $TX[0],$TX[1]
.Lcmov$i:
add $TX[0]#b,$TY#b
- xor ($dat,$TY),%r8b
- ror \$8,%r8d
+ xor ($dat,$TY),%al
+ ror \$8,%eax
___
push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
}
@@ -369,16 +207,16 @@
mov $TX[0],$TX[1]
.Lcmov$i:
add $TX[0]#b,$TY#b
- xor ($dat,$TY),%r9b
- ror \$8,%r9d
+ xor ($dat,$TY),%bl
+ ror \$8,%ebx
___
push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
}
$code.=<<___;
lea -8($len),$len
- mov %r8d,($out)
+ mov %eax,($out)
lea 8($inp),$inp
- mov %r9d,4($out)
+ mov %ebx,4($out)
lea 8($out),$out
test \$-8,$len
@@ -391,7 +229,6 @@
.align 16
.Lcloop1:
add $TX[0]#b,$YY#b
- movzb $YY#b,$YY#d
movzb ($dat,$YY),$TY#d
movb $TX[0]#b,($dat,$YY)
movb $TY#b,($dat,$XX[0])
@@ -423,16 +260,16 @@
ret
.size RC4,.-RC4
___
-}
$idx="%r8";
$ido="%r9";
$code.=<<___;
-.globl private_RC4_set_key
-.type private_RC4_set_key,\@function,3
+.extern OPENSSL_ia32cap_P
+.globl RC4_set_key
+.type RC4_set_key,\@function,3
.align 16
-private_RC4_set_key:
+RC4_set_key:
lea 8($dat),$dat
lea ($inp,$len),$inp
neg $len
@@ -443,9 +280,12 @@
xor %r11,%r11
mov OPENSSL_ia32cap_P(%rip),$idx#d
- bt \$20,$idx#d # RC4_CHAR?
- jc .Lc1stloop
- jmp .Lw1stloop
+ bt \$20,$idx#d
+ jnc .Lw1stloop
+ bt \$30,$idx#d
+ setc $ido#b
+ mov $ido#d,260($dat)
+ jmp .Lc1stloop
.align 16
.Lw1stloop:
@@ -499,7 +339,7 @@
mov %eax,-8($dat)
mov %eax,-4($dat)
ret
-.size private_RC4_set_key,.-private_RC4_set_key
+.size RC4_set_key,.-RC4_set_key
.globl RC4_options
.type RC4_options,\@abi-omnipotent
@@ -508,20 +348,18 @@
lea .Lopts(%rip),%rax
mov OPENSSL_ia32cap_P(%rip),%edx
bt \$20,%edx
- jc .L8xchar
+ jnc .Ldone
+ add \$12,%rax
bt \$30,%edx
jnc .Ldone
- add \$25,%rax
- ret
-.L8xchar:
- add \$12,%rax
+ add \$13,%rax
.Ldone:
ret
.align 64
.Lopts:
.asciz "rc4(8x,int)"
.asciz "rc4(8x,char)"
-.asciz "rc4(16x,int)"
+.asciz "rc4(1x,char)"
.asciz "RC4 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
.align 64
.size RC4_options,.-RC4_options
@@ -644,32 +482,22 @@
.rva .LSEH_end_RC4
.rva .LSEH_info_RC4
- .rva .LSEH_begin_private_RC4_set_key
- .rva .LSEH_end_private_RC4_set_key
- .rva .LSEH_info_private_RC4_set_key
+ .rva .LSEH_begin_RC4_set_key
+ .rva .LSEH_end_RC4_set_key
+ .rva .LSEH_info_RC4_set_key
.section .xdata
.align 8
.LSEH_info_RC4:
.byte 9,0,0,0
.rva stream_se_handler
-.LSEH_info_private_RC4_set_key:
+.LSEH_info_RC4_set_key:
.byte 9,0,0,0
.rva key_se_handler
___
}
-sub reg_part {
-my ($reg,$conv)=@_;
- if ($reg =~ /%r[0-9]+/) { $reg .= $conv; }
- elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; }
- elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; }
- elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; }
- return $reg;
-}
-
-$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem;
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
+$code =~ s/#([bwd])/$1/gm;
print $code;
diff --git a/jni/openssl/crypto/rc4/rc4.h b/jni/openssl/crypto/rc4/rc4.h
index 88ceb46..29d1acc 100644
--- a/jni/openssl/crypto/rc4/rc4.h
+++ b/jni/openssl/crypto/rc4/rc4.h
@@ -79,7 +79,6 @@
const char *RC4_options(void);
void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data);
-void private_RC4_set_key(RC4_KEY *key, int len, const unsigned char *data);
void RC4(RC4_KEY *key, size_t len, const unsigned char *indata,
unsigned char *outdata);
diff --git a/jni/openssl/crypto/rc4/rc4_skey.c b/jni/openssl/crypto/rc4/rc4_skey.c
index fda2763..b22c40b 100644
--- a/jni/openssl/crypto/rc4/rc4_skey.c
+++ b/jni/openssl/crypto/rc4/rc4_skey.c
@@ -85,7 +85,7 @@
* Date: Wed, 14 Sep 1994 06:35:31 GMT
*/
-void private_RC4_set_key(RC4_KEY *key, int len, const unsigned char *data)
+void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data)
{
register RC4_INT tmp;
register int id1,id2;
@@ -104,6 +104,40 @@
d[(n)]=d[id2]; \
d[id2]=tmp; }
+#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM)
+# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
+ defined(__INTEL__) || \
+ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64)
+ if (sizeof(RC4_INT) > 1) {
+ /*
+ * Unlike all other x86 [and x86_64] implementations,
+ * Intel P4 core [including EM64T] was found to perform
+ * poorly with wider RC4_INT. Performance improvement
+ * for IA-32 hand-coded assembler turned out to be 2.8x
+ * if re-coded for RC4_CHAR! It's however inappropriate
+ * to just switch to RC4_CHAR for x86[_64], as non-P4
+ * implementations suffer from significant performance
+ * losses then, e.g. PIII exhibits >2x deterioration,
+ * and so does Opteron. In order to assure optimal
+ * all-round performance, let us [try to] detect P4 at
+ * run-time by checking upon HTT bit in CPU capability
+ * vector and set up compressed key schedule, which is
+ * recognized by correspondingly updated assembler
+ * module...
+ * <appro@fy.chalmers.se>
+ */
+ if (OPENSSL_ia32cap_P & (1<<28)) {
+ unsigned char *cp=(unsigned char *)d;
+
+ for (i=0;i<256;i++) cp[i]=i;
+ for (i=0;i<256;i++) SK_LOOP(cp,i);
+ /* mark schedule as compressed! */
+ d[256/sizeof(RC4_INT)]=-1;
+ return;
+ }
+ }
+# endif
+#endif
for (i=0; i < 256; i++) d[i]=i;
for (i=0; i < 256; i+=4)
{
diff --git a/jni/openssl/crypto/rc4/rc4_utl.c b/jni/openssl/crypto/rc4/rc4_utl.c
deleted file mode 100644
index ab3f02f..0000000
--- a/jni/openssl/crypto/rc4/rc4_utl.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* crypto/rc4/rc4_utl.c -*- mode:C; c-file-style: "eay" -*- */
-/* ====================================================================
- * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- * software must display the following acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- * endorse or promote products derived from this software without
- * prior written permission. For written permission, please contact
- * openssl-core@openssl.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- * nor may "OpenSSL" appear in their names without prior written
- * permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- * acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
- *
- */
-
-#include <openssl/opensslv.h>
-#include <openssl/crypto.h>
-#include <openssl/rc4.h>
-
-void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data)
- {
-#ifdef OPENSSL_FIPS
- fips_cipher_abort(RC4);
-#endif
- private_RC4_set_key(key, len, data);
- }
diff --git a/jni/openssl/crypto/rc4/rc4test.c b/jni/openssl/crypto/rc4/rc4test.c
index 4312605..633a79e 100644
--- a/jni/openssl/crypto/rc4/rc4test.c
+++ b/jni/openssl/crypto/rc4/rc4test.c
@@ -120,12 +120,6 @@
RC4_KEY key;
unsigned char obuf[512];
-#if !defined(OPENSSL_PIC)
- void OPENSSL_cpuid_setup(void);
-
- OPENSSL_cpuid_setup();
-#endif
-
for (i=0; i<6; i++)
{
RC4_set_key(&key,keys[i][0],&(keys[i][1]));