summaryrefslogtreecommitdiff
path: root/drivers/builtin_openssl2/crypto/aes/asm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/builtin_openssl2/crypto/aes/asm')
-rwxr-xr-xdrivers/builtin_openssl2/crypto/aes/asm/aes-586.pl6
-rw-r--r--drivers/builtin_openssl2/crypto/aes/asm/aes-mips.pl2
-rw-r--r--drivers/builtin_openssl2/crypto/aes/asm/aesni-x86.pl2
-rw-r--r--drivers/builtin_openssl2/crypto/aes/asm/aesni-x86_64.pl52
4 files changed, 32 insertions, 30 deletions
diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aes-586.pl b/drivers/builtin_openssl2/crypto/aes/asm/aes-586.pl
index 687ed811be..51b500ddef 100755
--- a/drivers/builtin_openssl2/crypto/aes/asm/aes-586.pl
+++ b/drivers/builtin_openssl2/crypto/aes/asm/aes-586.pl
@@ -45,7 +45,7 @@
# the undertaken effort was that it appeared that in tight IA-32
# register window little-endian flavor could achieve slightly higher
# Instruction Level Parallelism, and it indeed resulted in up to 15%
-# better performance on most recent µ-archs...
+# better performance on most recent µ-archs...
#
# Third version adds AES_cbc_encrypt implementation, which resulted in
# up to 40% performance imrovement of CBC benchmark results. 40% was
@@ -223,7 +223,7 @@ sub _data_word() { my $i; while(defined($i=shift)) { &data_word($i,$i); } }
$speed_limit=512; # chunks smaller than $speed_limit are
# processed with compact routine in CBC mode
$small_footprint=1; # $small_footprint=1 code is ~5% slower [on
- # recent µ-archs], but ~5 times smaller!
+ # recent µ-archs], but ~5 times smaller!
# I favor compact code to minimize cache
# contention and in hope to "collect" 5% back
# in real-life applications...
@@ -562,7 +562,7 @@ sub enctransform()
# Performance is not actually extraordinary in comparison to pure
# x86 code. In particular encrypt performance is virtually the same.
# Decrypt performance on the other hand is 15-20% better on newer
-# µ-archs [but we're thankful for *any* improvement here], and ~50%
+# µ-archs [but we're thankful for *any* improvement here], and ~50%
# better on PIII:-) And additionally on the pros side this code
# eliminates redundant references to stack and thus relieves/
# minimizes the pressure on the memory bus.
diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aes-mips.pl b/drivers/builtin_openssl2/crypto/aes/asm/aes-mips.pl
index e52395421b..537c8d3172 100644
--- a/drivers/builtin_openssl2/crypto/aes/asm/aes-mips.pl
+++ b/drivers/builtin_openssl2/crypto/aes/asm/aes-mips.pl
@@ -70,7 +70,7 @@ $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
#
######################################################################
-$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
+$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
open STDOUT,">$output";
diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86.pl b/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86.pl
index 3dc345b585..8c1d0b5bed 100644
--- a/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86.pl
+++ b/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86.pl
@@ -74,7 +74,7 @@ $inout3="xmm5"; $in1="xmm5";
$inout4="xmm6"; $in0="xmm6";
$inout5="xmm7"; $ivec="xmm7";
-# AESNI extenstion
+# AESNI extension
sub aeskeygenassist
{ my($dst,$src,$imm)=@_;
if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86_64.pl b/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86_64.pl
index 0dbb194b8d..c9270dfddc 100644
--- a/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86_64.pl
+++ b/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86_64.pl
@@ -525,6 +525,16 @@ $code.=<<___;
.type aesni_ecb_encrypt,\@function,5
.align 16
aesni_ecb_encrypt:
+___
+$code.=<<___ if ($win64);
+ lea -0x58(%rsp),%rsp
+ movaps %xmm6,(%rsp)
+ movaps %xmm7,0x10(%rsp)
+ movaps %xmm8,0x20(%rsp)
+ movaps %xmm9,0x30(%rsp)
+.Lecb_enc_body:
+___
+$code.=<<___;
and \$-16,$len
jz .Lecb_ret
@@ -805,6 +815,16 @@ $code.=<<___;
movups $inout5,0x50($out)
.Lecb_ret:
+___
+$code.=<<___ if ($win64);
+ movaps (%rsp),%xmm6
+ movaps 0x10(%rsp),%xmm7
+ movaps 0x20(%rsp),%xmm8
+ movaps 0x30(%rsp),%xmm9
+ lea 0x58(%rsp),%rsp
+.Lecb_enc_ret:
+___
+$code.=<<___;
ret
.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
___
@@ -2730,28 +2750,9 @@ $code.=<<___;
.extern __imp_RtlVirtualUnwind
___
$code.=<<___ if ($PREFIX eq "aesni");
-.type ecb_se_handler,\@abi-omnipotent
-.align 16
-ecb_se_handler:
- push %rsi
- push %rdi
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- pushfq
- sub \$64,%rsp
-
- mov 152($context),%rax # pull context->Rsp
-
- jmp .Lcommon_seh_tail
-.size ecb_se_handler,.-ecb_se_handler
-
-.type ccm64_se_handler,\@abi-omnipotent
+.type ecb_ccm64_se_handler,\@abi-omnipotent
.align 16
-ccm64_se_handler:
+ecb_ccm64_se_handler:
push %rsi
push %rdi
push %rbx
@@ -2788,7 +2789,7 @@ ccm64_se_handler:
lea 0x58(%rax),%rax # adjust stack pointer
jmp .Lcommon_seh_tail
-.size ccm64_se_handler,.-ccm64_se_handler
+.size ecb_ccm64_se_handler,.-ecb_ccm64_se_handler
.type ctr32_se_handler,\@abi-omnipotent
.align 16
@@ -2993,14 +2994,15 @@ ___
$code.=<<___ if ($PREFIX eq "aesni");
.LSEH_info_ecb:
.byte 9,0,0,0
- .rva ecb_se_handler
+ .rva ecb_ccm64_se_handler
+ .rva .Lecb_enc_body,.Lecb_enc_ret # HandlerData[]
.LSEH_info_ccm64_enc:
.byte 9,0,0,0
- .rva ccm64_se_handler
+ .rva ecb_ccm64_se_handler
.rva .Lccm64_enc_body,.Lccm64_enc_ret # HandlerData[]
.LSEH_info_ccm64_dec:
.byte 9,0,0,0
- .rva ccm64_se_handler
+ .rva ecb_ccm64_se_handler
.rva .Lccm64_dec_body,.Lccm64_dec_ret # HandlerData[]
.LSEH_info_ctr32:
.byte 9,0,0,0