diff options
Diffstat (limited to 'drivers/builtin_openssl2/crypto/aes/asm')
4 files changed, 32 insertions, 30 deletions
diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aes-586.pl b/drivers/builtin_openssl2/crypto/aes/asm/aes-586.pl index 687ed811be..51b500ddef 100755 --- a/drivers/builtin_openssl2/crypto/aes/asm/aes-586.pl +++ b/drivers/builtin_openssl2/crypto/aes/asm/aes-586.pl @@ -45,7 +45,7 @@ # the undertaken effort was that it appeared that in tight IA-32 # register window little-endian flavor could achieve slightly higher # Instruction Level Parallelism, and it indeed resulted in up to 15% -# better performance on most recent µ-archs... +# better performance on most recent µ-archs... # # Third version adds AES_cbc_encrypt implementation, which resulted in # up to 40% performance imrovement of CBC benchmark results. 40% was @@ -223,7 +223,7 @@ sub _data_word() { my $i; while(defined($i=shift)) { &data_word($i,$i); } } $speed_limit=512; # chunks smaller than $speed_limit are # processed with compact routine in CBC mode $small_footprint=1; # $small_footprint=1 code is ~5% slower [on - # recent µ-archs], but ~5 times smaller! + # recent µ-archs], but ~5 times smaller! # I favor compact code to minimize cache # contention and in hope to "collect" 5% back # in real-life applications... @@ -562,7 +562,7 @@ sub enctransform() # Performance is not actually extraordinary in comparison to pure # x86 code. In particular encrypt performance is virtually the same. # Decrypt performance on the other hand is 15-20% better on newer -# µ-archs [but we're thankful for *any* improvement here], and ~50% +# µ-archs [but we're thankful for *any* improvement here], and ~50% # better on PIII:-) And additionally on the pros side this code # eliminates redundant references to stack and thus relieves/ # minimizes the pressure on the memory bus. diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aes-mips.pl b/drivers/builtin_openssl2/crypto/aes/asm/aes-mips.pl index e52395421b..537c8d3172 100644 --- a/drivers/builtin_openssl2/crypto/aes/asm/aes-mips.pl +++ b/drivers/builtin_openssl2/crypto/aes/asm/aes-mips.pl @@ -70,7 +70,7 @@ $pf = ($flavour =~ /nubi/i) ? $t0 : $t2; # ###################################################################### -$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; +$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC}); for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } open STDOUT,">$output"; diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86.pl b/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86.pl index 3dc345b585..8c1d0b5bed 100644 --- a/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86.pl +++ b/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86.pl @@ -74,7 +74,7 @@ $inout3="xmm5"; $in1="xmm5"; $inout4="xmm6"; $in0="xmm6"; $inout5="xmm7"; $ivec="xmm7"; -# AESNI extenstion +# AESNI extension sub aeskeygenassist { my($dst,$src,$imm)=@_; if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86_64.pl b/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86_64.pl index 0dbb194b8d..c9270dfddc 100644 --- a/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86_64.pl +++ b/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86_64.pl @@ -525,6 +525,16 @@ $code.=<<___; .type aesni_ecb_encrypt,\@function,5 .align 16 aesni_ecb_encrypt: +___ +$code.=<<___ if ($win64); + lea -0x58(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) +.Lecb_enc_body: +___ +$code.=<<___; and \$-16,$len jz .Lecb_ret @@ -805,6 +815,16 @@ $code.=<<___; movups $inout5,0x50($out) .Lecb_ret: +___ +$code.=<<___ if ($win64); + movaps (%rsp),%xmm6 + movaps 0x10(%rsp),%xmm7 + movaps 0x20(%rsp),%xmm8 + movaps 0x30(%rsp),%xmm9 + lea 0x58(%rsp),%rsp +.Lecb_enc_ret: +___ +$code.=<<___; ret .size aesni_ecb_encrypt,.-aesni_ecb_encrypt ___ @@ -2730,28 +2750,9 @@ $code.=<<___; .extern __imp_RtlVirtualUnwind ___ $code.=<<___ if ($PREFIX eq "aesni"); -.type ecb_se_handler,\@abi-omnipotent -.align 16 -ecb_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 152($context),%rax # pull context->Rsp - - jmp .Lcommon_seh_tail -.size ecb_se_handler,.-ecb_se_handler - -.type ccm64_se_handler,\@abi-omnipotent +.type ecb_ccm64_se_handler,\@abi-omnipotent .align 16 -ccm64_se_handler: +ecb_ccm64_se_handler: push %rsi push %rdi push %rbx @@ -2788,7 +2789,7 @@ ccm64_se_handler: lea 0x58(%rax),%rax # adjust stack pointer jmp .Lcommon_seh_tail -.size ccm64_se_handler,.-ccm64_se_handler +.size ecb_ccm64_se_handler,.-ecb_ccm64_se_handler .type ctr32_se_handler,\@abi-omnipotent .align 16 @@ -2993,14 +2994,15 @@ ___ $code.=<<___ if ($PREFIX eq "aesni"); .LSEH_info_ecb: .byte 9,0,0,0 - .rva ecb_se_handler + .rva ecb_ccm64_se_handler + .rva .Lecb_enc_body,.Lecb_enc_ret # HandlerData[] .LSEH_info_ccm64_enc: .byte 9,0,0,0 - .rva ccm64_se_handler + .rva ecb_ccm64_se_handler .rva .Lccm64_enc_body,.Lccm64_enc_ret # HandlerData[] .LSEH_info_ccm64_dec: .byte 9,0,0,0 - .rva ccm64_se_handler + .rva ecb_ccm64_se_handler .rva .Lccm64_dec_body,.Lccm64_dec_ret # HandlerData[] .LSEH_info_ctr32: .byte 9,0,0,0 |