summaryrefslogtreecommitdiff
path: root/drivers/builtin_openssl2/crypto/des/asm
diff options
context:
space:
mode:
authorRémi Verschelde <rverschelde@gmail.com>2016-10-12 23:06:17 +0200
committerRémi Verschelde <rverschelde@gmail.com>2016-10-15 11:50:41 +0200
commit422196759f93df249db38619f136cabd5dcf42cd (patch)
tree1e5846507af0f8f1bc7ca294ccfb0d4ac3392d17 /drivers/builtin_openssl2/crypto/des/asm
parentd9a291f6411f2e571c181da0ac89f550ba73f681 (diff)
openssl: Move to a module and split thirdparty lib
Same rationale as the previous commits.
Diffstat (limited to 'drivers/builtin_openssl2/crypto/des/asm')
-rw-r--r--drivers/builtin_openssl2/crypto/des/asm/des_enc.m42101
-rw-r--r--drivers/builtin_openssl2/crypto/des/asm/readme131
2 files changed, 0 insertions, 2232 deletions
diff --git a/drivers/builtin_openssl2/crypto/des/asm/des_enc.m4 b/drivers/builtin_openssl2/crypto/des/asm/des_enc.m4
deleted file mode 100644
index dda08e126d..0000000000
--- a/drivers/builtin_openssl2/crypto/des/asm/des_enc.m4
+++ /dev/null
@@ -1,2101 +0,0 @@
-! des_enc.m4
-! des_enc.S (generated from des_enc.m4)
-!
-! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
-!
-! Version 1.0. 32-bit version.
-!
-! June 8, 2000.
-!
-! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
-! by Andy Polyakov.
-!
-! January 1, 2003.
-!
-! Assembler version: Copyright Svend Olaf Mikkelsen.
-!
-! Original C code: Copyright Eric A. Young.
-!
-! This code can be freely used by LibDES/SSLeay/OpenSSL users.
-!
-! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
-!
-! This version can be redistributed.
-!
-! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
-!
-! Global registers 1 to 5 are used. This is the same as done by the
-! cc compiler. The UltraSPARC load/store little endian feature is used.
-!
-! Instruction grouping often refers to one CPU cycle.
-!
-! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
-!
-! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
-!
-! Performance improvement according to './apps/openssl speed des'
-!
-! 32-bit build:
-! 23% faster than cc-5.2 -xarch=v8plus -xO5
-! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
-! 64-bit build:
-! 50% faster than cc-5.2 -xarch=v9 -xO5
-! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
-!
-
-.ident "des_enc.m4 2.1"
-.file "des_enc-sparc.S"
-
-#include <openssl/opensslconf.h>
-
-#if defined(__SUNPRO_C) && defined(__sparcv9)
-# define ABI64 /* They've said -xarch=v9 at command line */
-#elif defined(__GNUC__) && defined(__arch64__)
-# define ABI64 /* They've said -m64 at command line */
-#endif
-
-#ifdef ABI64
- .register %g2,#scratch
- .register %g3,#scratch
-# define FRAME -192
-# define BIAS 2047
-# define LDPTR ldx
-# define STPTR stx
-# define ARG0 128
-# define ARGSZ 8
-# ifndef OPENSSL_SYSNAME_ULTRASPARC
-# define OPENSSL_SYSNAME_ULTRASPARC
-# endif
-#else
-# define FRAME -96
-# define BIAS 0
-# define LDPTR ld
-# define STPTR st
-# define ARG0 68
-# define ARGSZ 4
-#endif
-
-#define LOOPS 7
-
-#define global0 %g0
-#define global1 %g1
-#define global2 %g2
-#define global3 %g3
-#define global4 %g4
-#define global5 %g5
-
-#define local0 %l0
-#define local1 %l1
-#define local2 %l2
-#define local3 %l3
-#define local4 %l4
-#define local5 %l5
-#define local7 %l6
-#define local6 %l7
-
-#define in0 %i0
-#define in1 %i1
-#define in2 %i2
-#define in3 %i3
-#define in4 %i4
-#define in5 %i5
-#define in6 %i6
-#define in7 %i7
-
-#define out0 %o0
-#define out1 %o1
-#define out2 %o2
-#define out3 %o3
-#define out4 %o4
-#define out5 %o5
-#define out6 %o6
-#define out7 %o7
-
-#define stub stb
-
-changequote({,})
-
-
-! Macro definitions:
-
-
-! {ip_macro}
-!
-! The logic used in initial and final permutations is the same as in
-! the C code. The permutations are done with a clever shift, xor, and
-! technique.
-!
-! The macro also loads address sbox 1 to 5 to global 1 to 5, address
-! sbox 6 to local6, and addres sbox 8 to out3.
-!
-! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
-!
-! Loads key first round from address in parameter 5 to out0, out1.
-!
-! After the the original LibDES initial permutation, the resulting left
-! is in the variable initially used for right and vice versa. The macro
-! implements the possibility to keep the halfs in the original registers.
-!
-! parameter 1 left
-! parameter 2 right
-! parameter 3 result left (modify in first round)
-! parameter 4 result right (use in first round)
-! parameter 5 key address
-! parameter 6 1/2 for include encryption/decryption
-! parameter 7 1 for move in1 to in3
-! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
-! parameter 9 1 for load ks3 and ks2 to in4 and in3
-
-define(ip_macro, {
-
-! {ip_macro}
-! $1 $2 $4 $3 $5 $6 $7 $8 $9
-
- ld [out2+256], local1
- srl $2, 4, local4
-
- xor local4, $1, local4
- ifelse($7,1,{mov in1, in3},{nop})
-
- ld [out2+260], local2
- and local4, local1, local4
- ifelse($8,1,{mov in3, in4},{})
- ifelse($8,2,{mov in4, in3},{})
-
- ld [out2+280], out4 ! loop counter
- sll local4, 4, local1
- xor $1, local4, $1
-
- ld [out2+264], local3
- srl $1, 16, local4
- xor $2, local1, $2
-
- ifelse($9,1,{LDPTR KS3, in4},{})
- xor local4, $2, local4
- nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
-
- ifelse($9,1,{LDPTR KS2, in3},{})
- and local4, local2, local4
- nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
-
- sll local4, 16, local1
- xor $2, local4, $2
-
- srl $2, 2, local4
- xor $1, local1, $1
-
- sethi %hi(16711680), local5
- xor local4, $1, local4
-
- and local4, local3, local4
- or local5, 255, local5
-
- sll local4, 2, local2
- xor $1, local4, $1
-
- srl $1, 8, local4
- xor $2, local2, $2
-
- xor local4, $2, local4
- add global1, 768, global4
-
- and local4, local5, local4
- add global1, 1024, global5
-
- ld [out2+272], local7
- sll local4, 8, local1
- xor $2, local4, $2
-
- srl $2, 1, local4
- xor $1, local1, $1
-
- ld [$5], out0 ! key 7531
- xor local4, $1, local4
- add global1, 256, global2
-
- ld [$5+4], out1 ! key 8642
- and local4, local7, local4
- add global1, 512, global3
-
- sll local4, 1, local1
- xor $1, local4, $1
-
- sll $1, 3, local3
- xor $2, local1, $2
-
- sll $2, 3, local2
- add global1, 1280, local6 ! address sbox 8
-
- srl $1, 29, local4
- add global1, 1792, out3 ! address sbox 8
-
- srl $2, 29, local1
- or local4, local3, $4
-
- or local2, local1, $3
-
- ifelse($6, 1, {
-
- ld [out2+284], local5 ! 0x0000FC00 used in the rounds
- or local2, local1, $3
- xor $4, out0, local1
-
- call .des_enc.1
- and local1, 252, local1
-
- },{})
-
- ifelse($6, 2, {
-
- ld [out2+284], local5 ! 0x0000FC00 used in the rounds
- or local2, local1, $3
- xor $4, out0, local1
-
- call .des_dec.1
- and local1, 252, local1
-
- },{})
-})
-
-
-! {rounds_macro}
-!
-! The logic used in the DES rounds is the same as in the C code,
-! except that calculations for sbox 1 and sbox 5 begin before
-! the previous round is finished.
-!
-! In each round one half (work) is modified based on key and the
-! other half (use).
-!
-! In this version we do two rounds in a loop repeated 7 times
-! and two rounds seperately.
-!
-! One half has the bits for the sboxes in the following positions:
-!
-! 777777xx555555xx333333xx111111xx
-!
-! 88xx666666xx444444xx222222xx8888
-!
-! The bits for each sbox are xor-ed with the key bits for that box.
-! The above xx bits are cleared, and the result used for lookup in
-! the sbox table. Each sbox entry contains the 4 output bits permuted
-! into 32 bits according to the P permutation.
-!
-! In the description of DES, left and right are switched after
-! each round, except after last round. In this code the original
-! left and right are kept in the same register in all rounds, meaning
-! that after the 16 rounds the result for right is in the register
-! originally used for left.
-!
-! parameter 1 first work (left in first round)
-! parameter 2 first use (right in first round)
-! parameter 3 enc/dec 1/-1
-! parameter 4 loop label
-! parameter 5 key address register
-! parameter 6 optional address for key next encryption/decryption
-! parameter 7 not empty for include retl
-!
-! also compares in2 to 8
-
-define(rounds_macro, {
-
-! {rounds_macro}
-! $1 $2 $3 $4 $5 $6 $7 $8 $9
-
- xor $2, out0, local1
-
- ld [out2+284], local5 ! 0x0000FC00
- ba $4
- and local1, 252, local1
-
- .align 32
-
-$4:
- ! local6 is address sbox 6
- ! out3 is address sbox 8
- ! out4 is loop counter
-
- ld [global1+local1], local1
- xor $2, out1, out1 ! 8642
- xor $2, out0, out0 ! 7531
- ! fmovs %f0, %f0 ! fxor used for alignment
-
- srl out1, 4, local0 ! rotate 4 right
- and out0, local5, local3 ! 3
- ! fmovs %f0, %f0
-
- ld [$5+$3*8], local7 ! key 7531 next round
- srl local3, 8, local3 ! 3
- and local0, 252, local2 ! 2
- ! fmovs %f0, %f0
-
- ld [global3+local3],local3 ! 3
- sll out1, 28, out1 ! rotate
- xor $1, local1, $1 ! 1 finished, local1 now sbox 7
-
- ld [global2+local2], local2 ! 2
- srl out0, 24, local1 ! 7
- or out1, local0, out1 ! rotate
-
- ldub [out2+local1], local1 ! 7 (and 0xFC)
- srl out1, 24, local0 ! 8
- and out1, local5, local4 ! 4
-
- ldub [out2+local0], local0 ! 8 (and 0xFC)
- srl local4, 8, local4 ! 4
- xor $1, local2, $1 ! 2 finished local2 now sbox 6
-
- ld [global4+local4],local4 ! 4
- srl out1, 16, local2 ! 6
- xor $1, local3, $1 ! 3 finished local3 now sbox 5
-
- ld [out3+local0],local0 ! 8
- and local2, 252, local2 ! 6
- add global1, 1536, local5 ! address sbox 7
-
- ld [local6+local2], local2 ! 6
- srl out0, 16, local3 ! 5
- xor $1, local4, $1 ! 4 finished
-
- ld [local5+local1],local1 ! 7
- and local3, 252, local3 ! 5
- xor $1, local0, $1 ! 8 finished
-
- ld [global5+local3],local3 ! 5
- xor $1, local2, $1 ! 6 finished
- subcc out4, 1, out4
-
- ld [$5+$3*8+4], out0 ! key 8642 next round
- xor $1, local7, local2 ! sbox 5 next round
- xor $1, local1, $1 ! 7 finished
-
- srl local2, 16, local2 ! sbox 5 next round
- xor $1, local3, $1 ! 5 finished
-
- ld [$5+$3*16+4], out1 ! key 8642 next round again
- and local2, 252, local2 ! sbox5 next round
-! next round
- xor $1, local7, local7 ! 7531
-
- ld [global5+local2], local2 ! 5
- srl local7, 24, local3 ! 7
- xor $1, out0, out0 ! 8642
-
- ldub [out2+local3], local3 ! 7 (and 0xFC)
- srl out0, 4, local0 ! rotate 4 right
- and local7, 252, local1 ! 1
-
- sll out0, 28, out0 ! rotate
- xor $2, local2, $2 ! 5 finished local2 used
-
- srl local0, 8, local4 ! 4
- and local0, 252, local2 ! 2
- ld [local5+local3], local3 ! 7
-
- srl local0, 16, local5 ! 6
- or out0, local0, out0 ! rotate
- ld [global2+local2], local2 ! 2
-
- srl out0, 24, local0
- ld [$5+$3*16], out0 ! key 7531 next round
- and local4, 252, local4 ! 4
-
- and local5, 252, local5 ! 6
- ld [global4+local4], local4 ! 4
- xor $2, local3, $2 ! 7 finished local3 used
-
- and local0, 252, local0 ! 8
- ld [local6+local5], local5 ! 6
- xor $2, local2, $2 ! 2 finished local2 now sbox 3
-
- srl local7, 8, local2 ! 3 start
- ld [out3+local0], local0 ! 8
- xor $2, local4, $2 ! 4 finished
-
- and local2, 252, local2 ! 3
- ld [global1+local1], local1 ! 1
- xor $2, local5, $2 ! 6 finished local5 used
-
- ld [global3+local2], local2 ! 3
- xor $2, local0, $2 ! 8 finished
- add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
-
- ld [out2+284], local5 ! 0x0000FC00
- xor $2, out0, local4 ! sbox 1 next round
- xor $2, local1, $2 ! 1 finished
-
- xor $2, local2, $2 ! 3 finished
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- bne,pt %icc, $4
-#else
- bne $4
-#endif
- and local4, 252, local1 ! sbox 1 next round
-
-! two rounds more:
-
- ld [global1+local1], local1
- xor $2, out1, out1
- xor $2, out0, out0
-
- srl out1, 4, local0 ! rotate
- and out0, local5, local3
-
- ld [$5+$3*8], local7 ! key 7531
- srl local3, 8, local3
- and local0, 252, local2
-
- ld [global3+local3],local3
- sll out1, 28, out1 ! rotate
- xor $1, local1, $1 ! 1 finished, local1 now sbox 7
-
- ld [global2+local2], local2
- srl out0, 24, local1
- or out1, local0, out1 ! rotate
-
- ldub [out2+local1], local1
- srl out1, 24, local0
- and out1, local5, local4
-
- ldub [out2+local0], local0
- srl local4, 8, local4
- xor $1, local2, $1 ! 2 finished local2 now sbox 6
-
- ld [global4+local4],local4
- srl out1, 16, local2
- xor $1, local3, $1 ! 3 finished local3 now sbox 5
-
- ld [out3+local0],local0
- and local2, 252, local2
- add global1, 1536, local5 ! address sbox 7
-
- ld [local6+local2], local2
- srl out0, 16, local3
- xor $1, local4, $1 ! 4 finished
-
- ld [local5+local1],local1
- and local3, 252, local3
- xor $1, local0, $1
-
- ld [global5+local3],local3
- xor $1, local2, $1 ! 6 finished
- cmp in2, 8
-
- ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
- xor $1, local7, local2 ! sbox 5 next round
- xor $1, local1, $1 ! 7 finished
-
- ld [$5+$3*8+4], out0
- srl local2, 16, local2 ! sbox 5 next round
- xor $1, local3, $1 ! 5 finished
-
- and local2, 252, local2
-! next round (two rounds more)
- xor $1, local7, local7 ! 7531
-
- ld [global5+local2], local2
- srl local7, 24, local3
- xor $1, out0, out0 ! 8642
-
- ldub [out2+local3], local3
- srl out0, 4, local0 ! rotate
- and local7, 252, local1
-
- sll out0, 28, out0 ! rotate
- xor $2, local2, $2 ! 5 finished local2 used
-
- srl local0, 8, local4
- and local0, 252, local2
- ld [local5+local3], local3
-
- srl local0, 16, local5
- or out0, local0, out0 ! rotate
- ld [global2+local2], local2
-
- srl out0, 24, local0
- ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
- and local4, 252, local4
-
- and local5, 252, local5
- ld [global4+local4], local4
- xor $2, local3, $2 ! 7 finished local3 used
-
- and local0, 252, local0
- ld [local6+local5], local5
- xor $2, local2, $2 ! 2 finished local2 now sbox 3
-
- srl local7, 8, local2 ! 3 start
- ld [out3+local0], local0
- xor $2, local4, $2
-
- and local2, 252, local2
- ld [global1+local1], local1
- xor $2, local5, $2 ! 6 finished local5 used
-
- ld [global3+local2], local2
- srl $1, 3, local3
- xor $2, local0, $2
-
- ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
- sll $1, 29, local4
- xor $2, local1, $2
-
- ifelse($7,{}, {}, {retl})
- xor $2, local2, $2
-})
-
-
-! {fp_macro}
-!
-! parameter 1 right (original left)
-! parameter 2 left (original right)
-! parameter 3 1 for optional store to [in0]
-! parameter 4 1 for load input/output address to local5/7
-!
-! The final permutation logic switches the halfes, meaning that
-! left and right ends up the the registers originally used.
-
-define(fp_macro, {
-
-! {fp_macro}
-! $1 $2 $3 $4 $5 $6 $7 $8 $9
-
- ! initially undo the rotate 3 left done after initial permutation
- ! original left is received shifted 3 right and 29 left in local3/4
-
- sll $2, 29, local1
- or local3, local4, $1
-
- srl $2, 3, $2
- sethi %hi(0x55555555), local2
-
- or $2, local1, $2
- or local2, %lo(0x55555555), local2
-
- srl $2, 1, local3
- sethi %hi(0x00ff00ff), local1
- xor local3, $1, local3
- or local1, %lo(0x00ff00ff), local1
- and local3, local2, local3
- sethi %hi(0x33333333), local4
- sll local3, 1, local2
-
- xor $1, local3, $1
-
- srl $1, 8, local3
- xor $2, local2, $2
- xor local3, $2, local3
- or local4, %lo(0x33333333), local4
- and local3, local1, local3
- sethi %hi(0x0000ffff), local1
- sll local3, 8, local2
-
- xor $2, local3, $2
-
- srl $2, 2, local3
- xor $1, local2, $1
- xor local3, $1, local3
- or local1, %lo(0x0000ffff), local1
- and local3, local4, local3
- sethi %hi(0x0f0f0f0f), local4
- sll local3, 2, local2
-
- ifelse($4,1, {LDPTR INPUT, local5})
- xor $1, local3, $1
-
- ifelse($4,1, {LDPTR OUTPUT, local7})
- srl $1, 16, local3
- xor $2, local2, $2
- xor local3, $2, local3
- or local4, %lo(0x0f0f0f0f), local4
- and local3, local1, local3
- sll local3, 16, local2
-
- xor $2, local3, local1
-
- srl local1, 4, local3
- xor $1, local2, $1
- xor local3, $1, local3
- and local3, local4, local3
- sll local3, 4, local2
-
- xor $1, local3, $1
-
- ! optional store:
-
- ifelse($3,1, {st $1, [in0]})
-
- xor local1, local2, $2
-
- ifelse($3,1, {st $2, [in0+4]})
-
-})
-
-
-! {fp_ip_macro}
-!
-! Does initial permutation for next block mixed with
-! final permutation for current block.
-!
-! parameter 1 original left
-! parameter 2 original right
-! parameter 3 left ip
-! parameter 4 right ip
-! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
-! 2: mov in4 to in3
-!
-! also adds -8 to length in2 and loads loop counter to out4
-
-define(fp_ip_macro, {
-
-! {fp_ip_macro}
-! $1 $2 $3 $4 $5 $6 $7 $8 $9
-
- define({temp1},{out4})
- define({temp2},{local3})
-
- define({ip1},{local1})
- define({ip2},{local2})
- define({ip4},{local4})
- define({ip5},{local5})
-
- ! $1 in local3, local4
-
- ld [out2+256], ip1
- sll out5, 29, temp1
- or local3, local4, $1
-
- srl out5, 3, $2
- ifelse($5,2,{mov in4, in3})
-
- ld [out2+272], ip5
- srl $4, 4, local0
- or $2, temp1, $2
-
- srl $2, 1, temp1
- xor temp1, $1, temp1
-
- and temp1, ip5, temp1
- xor local0, $3, local0
-
- sll temp1, 1, temp2
- xor $1, temp1, $1
-
- and local0, ip1, local0
- add in2, -8, in2
-
- sll local0, 4, local7
- xor $3, local0, $3
-
- ld [out2+268], ip4
- srl $1, 8, temp1
- xor $2, temp2, $2
- ld [out2+260], ip2
- srl $3, 16, local0
- xor $4, local7, $4
- xor temp1, $2, temp1
- xor local0, $4, local0
- and temp1, ip4, temp1
- and local0, ip2, local0
- sll temp1, 8, temp2
- xor $2, temp1, $2
- sll local0, 16, local7
- xor $4, local0, $4
-
- srl $2, 2, temp1
- xor $1, temp2, $1
-
- ld [out2+264], temp2 ! ip3
- srl $4, 2, local0
- xor $3, local7, $3
- xor temp1, $1, temp1
- xor local0, $3, local0
- and temp1, temp2, temp1
- and local0, temp2, local0
- sll temp1, 2, temp2
- xor $1, temp1, $1
- sll local0, 2, local7
- xor $3, local0, $3
-
- srl $1, 16, temp1
- xor $2, temp2, $2
- srl $3, 8, local0
- xor $4, local7, $4
- xor temp1, $2, temp1
- xor local0, $4, local0
- and temp1, ip2, temp1
- and local0, ip4, local0
- sll temp1, 16, temp2
- xor $2, temp1, local4
- sll local0, 8, local7
- xor $4, local0, $4
-
- srl $4, 1, local0
- xor $3, local7, $3
-
- srl local4, 4, temp1
- xor local0, $3, local0
-
- xor $1, temp2, $1
- and local0, ip5, local0
-
- sll local0, 1, local7
- xor temp1, $1, temp1
-
- xor $3, local0, $3
- xor $4, local7, $4
-
- sll $3, 3, local5
- and temp1, ip1, temp1
-
- sll temp1, 4, temp2
- xor $1, temp1, $1
-
- ifelse($5,1,{LDPTR KS2, in4})
- sll $4, 3, local2
- xor local4, temp2, $2
-
- ! reload since used as temporar:
-
- ld [out2+280], out4 ! loop counter
-
- srl $3, 29, local0
- ifelse($5,1,{add in4, 120, in4})
-
- ifelse($5,1,{LDPTR KS1, in3})
- srl $4, 29, local7
-
- or local0, local5, $4
- or local2, local7, $3
-
-})
-
-
-
-! {load_little_endian}
-!
-! parameter 1 address
-! parameter 2 destination left
-! parameter 3 destination right
-! parameter 4 temporar
-! parameter 5 label
-
-define(load_little_endian, {
-
-! {load_little_endian}
-! $1 $2 $3 $4 $5 $6 $7 $8 $9
-
- ! first in memory to rightmost in register
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- andcc $1, 3, global0
- bne,pn %icc, $5
- nop
-
- lda [$1] 0x88, $2
- add $1, 4, $4
-
- ba,pt %icc, $5a
- lda [$4] 0x88, $3
-#endif
-
-$5:
- ldub [$1+3], $2
-
- ldub [$1+2], $4
- sll $2, 8, $2
- or $2, $4, $2
-
- ldub [$1+1], $4
- sll $2, 8, $2
- or $2, $4, $2
-
- ldub [$1+0], $4
- sll $2, 8, $2
- or $2, $4, $2
-
-
- ldub [$1+3+4], $3
-
- ldub [$1+2+4], $4
- sll $3, 8, $3
- or $3, $4, $3
-
- ldub [$1+1+4], $4
- sll $3, 8, $3
- or $3, $4, $3
-
- ldub [$1+0+4], $4
- sll $3, 8, $3
- or $3, $4, $3
-$5a:
-
-})
-
-
-! {load_little_endian_inc}
-!
-! parameter 1 address
-! parameter 2 destination left
-! parameter 3 destination right
-! parameter 4 temporar
-! parameter 4 label
-!
-! adds 8 to address
-
-define(load_little_endian_inc, {
-
-! {load_little_endian_inc}
-! $1 $2 $3 $4 $5 $6 $7 $8 $9
-
- ! first in memory to rightmost in register
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- andcc $1, 3, global0
- bne,pn %icc, $5
- nop
-
- lda [$1] 0x88, $2
- add $1, 4, $1
-
- lda [$1] 0x88, $3
- ba,pt %icc, $5a
- add $1, 4, $1
-#endif
-
-$5:
- ldub [$1+3], $2
-
- ldub [$1+2], $4
- sll $2, 8, $2
- or $2, $4, $2
-
- ldub [$1+1], $4
- sll $2, 8, $2
- or $2, $4, $2
-
- ldub [$1+0], $4
- sll $2, 8, $2
- or $2, $4, $2
-
- ldub [$1+3+4], $3
- add $1, 8, $1
-
- ldub [$1+2+4-8], $4
- sll $3, 8, $3
- or $3, $4, $3
-
- ldub [$1+1+4-8], $4
- sll $3, 8, $3
- or $3, $4, $3
-
- ldub [$1+0+4-8], $4
- sll $3, 8, $3
- or $3, $4, $3
-$5a:
-
-})
-
-
-! {load_n_bytes}
-!
-! Loads 1 to 7 bytes little endian
-! Remaining bytes are zeroed.
-!
-! parameter 1 address
-! parameter 2 length
-! parameter 3 destination register left
-! parameter 4 destination register right
-! parameter 5 temp
-! parameter 6 temp2
-! parameter 7 label
-! parameter 8 return label
-
-define(load_n_bytes, {
-
-! {load_n_bytes}
-! $1 $2 $5 $6 $7 $8 $7 $8 $9
-
-$7.0: call .+8
- sll $2, 2, $6
-
- add %o7,$7.jmp.table-$7.0,$5
-
- add $5, $6, $5
- mov 0, $4
-
- ld [$5], $5
-
- jmp %o7+$5
- mov 0, $3
-
-$7.7:
- ldub [$1+6], $5
- sll $5, 16, $5
- or $3, $5, $3
-$7.6:
- ldub [$1+5], $5
- sll $5, 8, $5
- or $3, $5, $3
-$7.5:
- ldub [$1+4], $5
- or $3, $5, $3
-$7.4:
- ldub [$1+3], $5
- sll $5, 24, $5
- or $4, $5, $4
-$7.3:
- ldub [$1+2], $5
- sll $5, 16, $5
- or $4, $5, $4
-$7.2:
- ldub [$1+1], $5
- sll $5, 8, $5
- or $4, $5, $4
-$7.1:
- ldub [$1+0], $5
- ba $8
- or $4, $5, $4
-
- .align 4
-
-$7.jmp.table:
- .word 0
- .word $7.1-$7.0
- .word $7.2-$7.0
- .word $7.3-$7.0
- .word $7.4-$7.0
- .word $7.5-$7.0
- .word $7.6-$7.0
- .word $7.7-$7.0
-})
-
-
-! {store_little_endian}
-!
-! parameter 1 address
-! parameter 2 source left
-! parameter 3 source right
-! parameter 4 temporar
-
-define(store_little_endian, {
-
-! {store_little_endian}
-! $1 $2 $3 $4 $5 $6 $7 $8 $9
-
- ! rightmost in register to first in memory
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- andcc $1, 3, global0
- bne,pn %icc, $5
- nop
-
- sta $2, [$1] 0x88
- add $1, 4, $4
-
- ba,pt %icc, $5a
- sta $3, [$4] 0x88
-#endif
-
-$5:
- and $2, 255, $4
- stub $4, [$1+0]
-
- srl $2, 8, $4
- and $4, 255, $4
- stub $4, [$1+1]
-
- srl $2, 16, $4
- and $4, 255, $4
- stub $4, [$1+2]
-
- srl $2, 24, $4
- stub $4, [$1+3]
-
-
- and $3, 255, $4
- stub $4, [$1+0+4]
-
- srl $3, 8, $4
- and $4, 255, $4
- stub $4, [$1+1+4]
-
- srl $3, 16, $4
- and $4, 255, $4
- stub $4, [$1+2+4]
-
- srl $3, 24, $4
- stub $4, [$1+3+4]
-
-$5a:
-
-})
-
-
-! {store_n_bytes}
-!
-! Stores 1 to 7 bytes little endian
-!
-! parameter 1 address
-! parameter 2 length
-! parameter 3 source register left
-! parameter 4 source register right
-! parameter 5 temp
-! parameter 6 temp2
-! parameter 7 label
-! parameter 8 return label
-
-define(store_n_bytes, {
-
-! {store_n_bytes}
-! $1 $2 $5 $6 $7 $8 $7 $8 $9
-
-$7.0: call .+8
- sll $2, 2, $6
-
- add %o7,$7.jmp.table-$7.0,$5
-
- add $5, $6, $5
-
- ld [$5], $5
-
- jmp %o7+$5
- nop
-
-$7.7:
- srl $3, 16, $5
- and $5, 0xff, $5
- stub $5, [$1+6]
-$7.6:
- srl $3, 8, $5
- and $5, 0xff, $5
- stub $5, [$1+5]
-$7.5:
- and $3, 0xff, $5
- stub $5, [$1+4]
-$7.4:
- srl $4, 24, $5
- stub $5, [$1+3]
-$7.3:
- srl $4, 16, $5
- and $5, 0xff, $5
- stub $5, [$1+2]
-$7.2:
- srl $4, 8, $5
- and $5, 0xff, $5
- stub $5, [$1+1]
-$7.1:
- and $4, 0xff, $5
-
-
- ba $8
- stub $5, [$1]
-
- .align 4
-
-$7.jmp.table:
-
- .word 0
- .word $7.1-$7.0
- .word $7.2-$7.0
- .word $7.3-$7.0
- .word $7.4-$7.0
- .word $7.5-$7.0
- .word $7.6-$7.0
- .word $7.7-$7.0
-})
-
-
-define(testvalue,{1})
-
-define(register_init, {
-
-! For test purposes:
-
- sethi %hi(testvalue), local0
- or local0, %lo(testvalue), local0
-
- ifelse($1,{},{}, {mov local0, $1})
- ifelse($2,{},{}, {mov local0, $2})
- ifelse($3,{},{}, {mov local0, $3})
- ifelse($4,{},{}, {mov local0, $4})
- ifelse($5,{},{}, {mov local0, $5})
- ifelse($6,{},{}, {mov local0, $6})
- ifelse($7,{},{}, {mov local0, $7})
- ifelse($8,{},{}, {mov local0, $8})
-
- mov local0, local1
- mov local0, local2
- mov local0, local3
- mov local0, local4
- mov local0, local5
- mov local0, local7
- mov local0, local6
- mov local0, out0
- mov local0, out1
- mov local0, out2
- mov local0, out3
- mov local0, out4
- mov local0, out5
- mov local0, global1
- mov local0, global2
- mov local0, global3
- mov local0, global4
- mov local0, global5
-
-})
-
-.section ".text"
-
- .align 32
-
-.des_enc:
-
- ! key address in3
- ! loads key next encryption/decryption first round from [in4]
-
- rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
-
-
- .align 32
-
-.des_dec:
-
- ! implemented with out5 as first parameter to avoid
- ! register exchange in ede modes
-
- ! key address in4
- ! loads key next encryption/decryption first round from [in3]
-
- rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
-
-
-
-! void DES_encrypt1(data, ks, enc)
-! *******************************
-
- .align 32
- .global DES_encrypt1
- .type DES_encrypt1,#function
-
-DES_encrypt1:
-
- save %sp, FRAME, %sp
-
- sethi %hi(.PIC.DES_SPtrans-1f),global1
- or global1,%lo(.PIC.DES_SPtrans-1f),global1
-1: call .+8
- add %o7,global1,global1
- sub global1,.PIC.DES_SPtrans-.des_and,out2
-
- ld [in0], in5 ! left
- cmp in2, 0 ! enc
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- be,pn %icc, .encrypt.dec ! enc/dec
-#else
- be .encrypt.dec
-#endif
- ld [in0+4], out5 ! right
-
- ! parameter 6 1/2 for include encryption/decryption
- ! parameter 7 1 for move in1 to in3
- ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
-
- ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
-
- rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
-
- fp_macro(in5, out5, 1) ! 1 for store to [in0]
-
- ret
- restore
-
-.encrypt.dec:
-
- add in1, 120, in3 ! use last subkey for first round
-
- ! parameter 6 1/2 for include encryption/decryption
- ! parameter 7 1 for move in1 to in3
- ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
-
- ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
-
- fp_macro(out5, in5, 1) ! 1 for store to [in0]
-
- ret
- restore
-
-.DES_encrypt1.end:
- .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
-
-
-! void DES_encrypt2(data, ks, enc)
-!*********************************
-
- ! encrypts/decrypts without initial/final permutation
-
- .align 32
- .global DES_encrypt2
- .type DES_encrypt2,#function
-
-DES_encrypt2:
-
- save %sp, FRAME, %sp
-
- sethi %hi(.PIC.DES_SPtrans-1f),global1
- or global1,%lo(.PIC.DES_SPtrans-1f),global1
-1: call .+8
- add %o7,global1,global1
- sub global1,.PIC.DES_SPtrans-.des_and,out2
-
- ! Set sbox address 1 to 6 and rotate halfs 3 left
- ! Errors caught by destest? Yes. Still? *NO*
-
- !sethi %hi(DES_SPtrans), global1 ! address sbox 1
-
- !or global1, %lo(DES_SPtrans), global1 ! sbox 1
-
- add global1, 256, global2 ! sbox 2
- add global1, 512, global3 ! sbox 3
-
- ld [in0], out5 ! right
- add global1, 768, global4 ! sbox 4
- add global1, 1024, global5 ! sbox 5
-
- ld [in0+4], in5 ! left
- add global1, 1280, local6 ! sbox 6
- add global1, 1792, out3 ! sbox 8
-
- ! rotate
-
- sll in5, 3, local5
- mov in1, in3 ! key address to in3
-
- sll out5, 3, local7
- srl in5, 29, in5
-
- srl out5, 29, out5
- add in5, local5, in5
-
- add out5, local7, out5
- cmp in2, 0
-
- ! we use our own stackframe
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- be,pn %icc, .encrypt2.dec ! decryption
-#else
- be .encrypt2.dec
-#endif
- STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
-
- ld [in3], out0 ! key 7531 first round
- mov LOOPS, out4 ! loop counter
-
- ld [in3+4], out1 ! key 8642 first round
- sethi %hi(0x0000FC00), local5
-
- call .des_enc
- mov in3, in4
-
- ! rotate
- sll in5, 29, in0
- srl in5, 3, in5
- sll out5, 29, in1
- add in5, in0, in5
- srl out5, 3, out5
- LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
- add out5, in1, out5
- st in5, [in0]
- st out5, [in0+4]
-
- ret
- restore
-
-
-.encrypt2.dec:
-
- add in3, 120, in4
-
- ld [in4], out0 ! key 7531 first round
- mov LOOPS, out4 ! loop counter
-
- ld [in4+4], out1 ! key 8642 first round
- sethi %hi(0x0000FC00), local5
-
- mov in5, local1 ! left expected in out5
- mov out5, in5
-
- call .des_dec
- mov local1, out5
-
-.encrypt2.finish:
-
- ! rotate
- sll in5, 29, in0
- srl in5, 3, in5
- sll out5, 29, in1
- add in5, in0, in5
- srl out5, 3, out5
- LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
- add out5, in1, out5
- st out5, [in0]
- st in5, [in0+4]
-
- ret
- restore
-
-.DES_encrypt2.end:
- .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
-
-
-! void DES_encrypt3(data, ks1, ks2, ks3)
-! **************************************
-
- .align 32
- .global DES_encrypt3
- .type DES_encrypt3,#function
-
-DES_encrypt3:
-
- save %sp, FRAME, %sp
-
- sethi %hi(.PIC.DES_SPtrans-1f),global1
- or global1,%lo(.PIC.DES_SPtrans-1f),global1
-1: call .+8
- add %o7,global1,global1
- sub global1,.PIC.DES_SPtrans-.des_and,out2
-
- ld [in0], in5 ! left
- add in2, 120, in4 ! ks2
-
- ld [in0+4], out5 ! right
- mov in3, in2 ! save ks3
-
- ! parameter 6 1/2 for include encryption/decryption
- ! parameter 7 1 for mov in1 to in3
- ! parameter 8 1 for mov in3 to in4
- ! parameter 9 1 for load ks3 and ks2 to in4 and in3
-
- ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
-
- call .des_dec
- mov in2, in3 ! preload ks3
-
- call .des_enc
- nop
-
- fp_macro(in5, out5, 1)
-
- ret
- restore
-
-.DES_encrypt3.end:
- .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
-
-
-! void DES_decrypt3(data, ks1, ks2, ks3)
-! **************************************
-
- .align 32
- .global DES_decrypt3
- .type DES_decrypt3,#function
-
-DES_decrypt3:
-
- save %sp, FRAME, %sp
-
- sethi %hi(.PIC.DES_SPtrans-1f),global1
- or global1,%lo(.PIC.DES_SPtrans-1f),global1
-1: call .+8
- add %o7,global1,global1
- sub global1,.PIC.DES_SPtrans-.des_and,out2
-
- ld [in0], in5 ! left
- add in3, 120, in4 ! ks3
-
- ld [in0+4], out5 ! right
- mov in2, in3 ! ks2
-
- ! parameter 6 1/2 for include encryption/decryption
- ! parameter 7 1 for mov in1 to in3
- ! parameter 8 1 for mov in3 to in4
- ! parameter 9 1 for load ks3 and ks2 to in4 and in3
-
- ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
-
- call .des_enc
- add in1, 120, in4 ! preload ks1
-
- call .des_dec
- nop
-
- fp_macro(out5, in5, 1)
-
- ret
- restore
-
-.DES_decrypt3.end:
- .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
-
-! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
-! *****************************************************************
-
-
- .align 32
- .global DES_ncbc_encrypt
- .type DES_ncbc_encrypt,#function
-
-DES_ncbc_encrypt:
-
- save %sp, FRAME, %sp
-
- define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
- define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
- define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
-
- sethi %hi(.PIC.DES_SPtrans-1f),global1
- or global1,%lo(.PIC.DES_SPtrans-1f),global1
-1: call .+8
- add %o7,global1,global1
- sub global1,.PIC.DES_SPtrans-.des_and,out2
-
- cmp in5, 0 ! enc
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- be,pn %icc, .ncbc.dec
-#else
- be .ncbc.dec
-#endif
- STPTR in4, IVEC
-
- ! addr left right temp label
- load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
-
- addcc in2, -8, in2 ! bytes missing when first block done
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- bl,pn %icc, .ncbc.enc.seven.or.less
-#else
- bl .ncbc.enc.seven.or.less
-#endif
- mov in3, in4 ! schedule
-
-.ncbc.enc.next.block:
-
- load_little_endian(in0, out4, global4, local3, .LLE2) ! block
-
-.ncbc.enc.next.block_1:
-
- xor in5, out4, in5 ! iv xor
- xor out5, global4, out5 ! iv xor
-
- ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
- ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
-
-.ncbc.enc.next.block_2:
-
-!// call .des_enc ! compares in2 to 8
-! rounds inlined for alignment purposes
-
- add global1, 768, global4 ! address sbox 4 since register used below
-
- rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- bl,pn %icc, .ncbc.enc.next.block_fp
-#else
- bl .ncbc.enc.next.block_fp
-#endif
- add in0, 8, in0 ! input address
-
- ! If 8 or more bytes are to be encrypted after this block,
- ! we combine final permutation for this block with initial
- ! permutation for next block. Load next block:
-
- load_little_endian(in0, global3, global4, local5, .LLE12)
-
- ! parameter 1 original left
- ! parameter 2 original right
- ! parameter 3 left ip
- ! parameter 4 right ip
- ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
- ! 2: mov in4 to in3
- !
- ! also adds -8 to length in2 and loads loop counter to out4
-
- fp_ip_macro(out0, out1, global3, global4, 2)
-
- store_little_endian(in1, out0, out1, local3, .SLE10) ! block
-
- ld [in3], out0 ! key 7531 first round next block
- mov in5, local1
- xor global3, out5, in5 ! iv xor next block
-
- ld [in3+4], out1 ! key 8642
- add global1, 512, global3 ! address sbox 3 since register used
- xor global4, local1, out5 ! iv xor next block
-
- ba .ncbc.enc.next.block_2
- add in1, 8, in1 ! output adress
-
-.ncbc.enc.next.block_fp:
-
- fp_macro(in5, out5)
-
- store_little_endian(in1, in5, out5, local3, .SLE1) ! block
-
- addcc in2, -8, in2 ! bytes missing when next block done
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0
-#else
- bpos .ncbc.enc.next.block
-#endif
- add in1, 8, in1
-
-.ncbc.enc.seven.or.less:
-
- cmp in2, -8
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- ble,pt %icc, .ncbc.enc.finish
-#else
- ble .ncbc.enc.finish
-#endif
- nop
-
- add in2, 8, local1 ! bytes to load
-
- ! addr, length, dest left, dest right, temp, temp2, label, ret label
- load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
-
- ! Loads 1 to 7 bytes little endian to global4, out4
-
-
-.ncbc.enc.finish:
-
- LDPTR IVEC, local4
- store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
-
- ret
- restore
-
-
-.ncbc.dec:
-
- STPTR in0, INPUT
- cmp in2, 0 ! length
- add in3, 120, in3
-
- LDPTR IVEC, local7 ! ivec
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- ble,pn %icc, .ncbc.dec.finish
-#else
- ble .ncbc.dec.finish
-#endif
- mov in3, in4 ! schedule
-
- STPTR in1, OUTPUT
- mov in0, local5 ! input
-
- load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
-
-.ncbc.dec.next.block:
-
- load_little_endian(local5, in5, out5, local3, .LLE4) ! block
-
- ! parameter 6 1/2 for include encryption/decryption
- ! parameter 7 1 for mov in1 to in3
- ! parameter 8 1 for mov in3 to in4
-
- ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4
-
- fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
-
- ! in2 is bytes left to be stored
- ! in2 is compared to 8 in the rounds
-
- xor out5, in0, out4 ! iv xor
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- bl,pn %icc, .ncbc.dec.seven.or.less
-#else
- bl .ncbc.dec.seven.or.less
-#endif
- xor in5, in1, global4 ! iv xor
-
- ! Load ivec next block now, since input and output address might be the same.
-
- load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
-
- store_little_endian(local7, out4, global4, local3, .SLE3)
-
- STPTR local5, INPUT
- add local7, 8, local7
- addcc in2, -8, in2
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- bg,pt %icc, .ncbc.dec.next.block
-#else
- bg .ncbc.dec.next.block
-#endif
- STPTR local7, OUTPUT
-
-
-.ncbc.dec.store.iv:
-
- LDPTR IVEC, local4 ! ivec
- store_little_endian(local4, in0, in1, local5, .SLE4)
-
-.ncbc.dec.finish:
-
- ret
- restore
-
-.ncbc.dec.seven.or.less:
-
- load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
-
- store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
-
-
-.DES_ncbc_encrypt.end:
- .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
-
-
-! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
-! **************************************************************************
-
-
- .align 32
- .global DES_ede3_cbc_encrypt
- .type DES_ede3_cbc_encrypt,#function
-
-DES_ede3_cbc_encrypt:
-
- save %sp, FRAME, %sp
-
- define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
- define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
- define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
-
- sethi %hi(.PIC.DES_SPtrans-1f),global1
- or global1,%lo(.PIC.DES_SPtrans-1f),global1
-1: call .+8
- add %o7,global1,global1
- sub global1,.PIC.DES_SPtrans-.des_and,out2
-
- LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
- LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
- cmp local3, 0 ! enc
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- be,pn %icc, .ede3.dec
-#else
- be .ede3.dec
-#endif
- STPTR in4, KS2
-
- STPTR in5, KS3
-
- load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
-
- addcc in2, -8, in2 ! bytes missing after next block
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- bl,pn %icc, .ede3.enc.seven.or.less
-#else
- bl .ede3.enc.seven.or.less
-#endif
- STPTR in3, KS1
-
-.ede3.enc.next.block:
-
- load_little_endian(in0, out4, global4, local3, .LLE7)
-
-.ede3.enc.next.block_1:
-
- LDPTR KS2, in4
- xor in5, out4, in5 ! iv xor
- xor out5, global4, out5 ! iv xor
-
- LDPTR KS1, in3
- add in4, 120, in4 ! for decryption we use last subkey first
- nop
-
- ip_macro(in5, out5, in5, out5, in3)
-
-.ede3.enc.next.block_2:
-
- call .des_enc ! ks1 in3
- nop
-
- call .des_dec ! ks2 in4
- LDPTR KS3, in3
-
- call .des_enc ! ks3 in3 compares in2 to 8
- nop
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- bl,pn %icc, .ede3.enc.next.block_fp
-#else
- bl .ede3.enc.next.block_fp
-#endif
- add in0, 8, in0
-
- ! If 8 or more bytes are to be encrypted after this block,
- ! we combine final permutation for this block with initial
- ! permutation for next block. Load next block:
-
- load_little_endian(in0, global3, global4, local5, .LLE11)
-
- ! parameter 1 original left
- ! parameter 2 original right
- ! parameter 3 left ip
- ! parameter 4 right ip
- ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
- ! 2: mov in4 to in3
- !
- ! also adds -8 to length in2 and loads loop counter to out4
-
- fp_ip_macro(out0, out1, global3, global4, 1)
-
- store_little_endian(in1, out0, out1, local3, .SLE9) ! block
-
- mov in5, local1
- xor global3, out5, in5 ! iv xor next block
-
- ld [in3], out0 ! key 7531
- add global1, 512, global3 ! address sbox 3
- xor global4, local1, out5 ! iv xor next block
-
- ld [in3+4], out1 ! key 8642
- add global1, 768, global4 ! address sbox 4
- ba .ede3.enc.next.block_2
- add in1, 8, in1
-
-.ede3.enc.next.block_fp:
-
- fp_macro(in5, out5)
-
- store_little_endian(in1, in5, out5, local3, .SLE5) ! block
-
- addcc in2, -8, in2 ! bytes missing when next block done
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- bpos,pt %icc, .ede3.enc.next.block
-#else
- bpos .ede3.enc.next.block
-#endif
- add in1, 8, in1
-
-.ede3.enc.seven.or.less:
-
- cmp in2, -8
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- ble,pt %icc, .ede3.enc.finish
-#else
- ble .ede3.enc.finish
-#endif
- nop
-
- add in2, 8, local1 ! bytes to load
-
- ! addr, length, dest left, dest right, temp, temp2, label, ret label
- load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
-
-.ede3.enc.finish:
-
- LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
- store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
-
- ret
- restore
-
-.ede3.dec:
-
- STPTR in0, INPUT
- add in5, 120, in5
-
- STPTR in1, OUTPUT
- mov in0, local5
- add in3, 120, in3
-
- STPTR in3, KS1
- cmp in2, 0
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- ble %icc, .ede3.dec.finish
-#else
- ble .ede3.dec.finish
-#endif
- STPTR in5, KS3
-
- LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
- load_little_endian(local7, in0, in1, local3, .LLE8)
-
-.ede3.dec.next.block:
-
- load_little_endian(local5, in5, out5, local3, .LLE9)
-
- ! parameter 6 1/2 for include encryption/decryption
- ! parameter 7 1 for mov in1 to in3
- ! parameter 8 1 for mov in3 to in4
- ! parameter 9 1 for load ks3 and ks2 to in4 and in3
-
- ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
-
- call .des_enc ! ks2 in3
- LDPTR KS1, in4
-
- call .des_dec ! ks1 in4
- nop
-
- fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
-
- ! in2 is bytes left to be stored
- ! in2 is compared to 8 in the rounds
-
- xor out5, in0, out4
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- bl,pn %icc, .ede3.dec.seven.or.less
-#else
- bl .ede3.dec.seven.or.less
-#endif
- xor in5, in1, global4
-
- load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
-
- store_little_endian(local7, out4, global4, local3, .SLE7) ! block
-
- STPTR local5, INPUT
- addcc in2, -8, in2
- add local7, 8, local7
-
-#ifdef OPENSSL_SYSNAME_ULTRASPARC
- bg,pt %icc, .ede3.dec.next.block
-#else
- bg .ede3.dec.next.block
-#endif
- STPTR local7, OUTPUT
-
-.ede3.dec.store.iv:
-
- LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
- store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
-
-.ede3.dec.finish:
-
- ret
- restore
-
-.ede3.dec.seven.or.less:
-
- load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
-
- store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
-
-
-.DES_ede3_cbc_encrypt.end:
- .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
-
- .align 256
- .type .des_and,#object
- .size .des_and,284
-
-.des_and:
-
-! This table is used for AND 0xFC when it is known that register
-! bits 8-31 are zero. Makes it possible to do three arithmetic
-! operations in one cycle.
-
- .byte 0, 0, 0, 0, 4, 4, 4, 4
- .byte 8, 8, 8, 8, 12, 12, 12, 12
- .byte 16, 16, 16, 16, 20, 20, 20, 20
- .byte 24, 24, 24, 24, 28, 28, 28, 28
- .byte 32, 32, 32, 32, 36, 36, 36, 36
- .byte 40, 40, 40, 40, 44, 44, 44, 44
- .byte 48, 48, 48, 48, 52, 52, 52, 52
- .byte 56, 56, 56, 56, 60, 60, 60, 60
- .byte 64, 64, 64, 64, 68, 68, 68, 68
- .byte 72, 72, 72, 72, 76, 76, 76, 76
- .byte 80, 80, 80, 80, 84, 84, 84, 84
- .byte 88, 88, 88, 88, 92, 92, 92, 92
- .byte 96, 96, 96, 96, 100, 100, 100, 100
- .byte 104, 104, 104, 104, 108, 108, 108, 108
- .byte 112, 112, 112, 112, 116, 116, 116, 116
- .byte 120, 120, 120, 120, 124, 124, 124, 124
- .byte 128, 128, 128, 128, 132, 132, 132, 132
- .byte 136, 136, 136, 136, 140, 140, 140, 140
- .byte 144, 144, 144, 144, 148, 148, 148, 148
- .byte 152, 152, 152, 152, 156, 156, 156, 156
- .byte 160, 160, 160, 160, 164, 164, 164, 164
- .byte 168, 168, 168, 168, 172, 172, 172, 172
- .byte 176, 176, 176, 176, 180, 180, 180, 180
- .byte 184, 184, 184, 184, 188, 188, 188, 188
- .byte 192, 192, 192, 192, 196, 196, 196, 196
- .byte 200, 200, 200, 200, 204, 204, 204, 204
- .byte 208, 208, 208, 208, 212, 212, 212, 212
- .byte 216, 216, 216, 216, 220, 220, 220, 220
- .byte 224, 224, 224, 224, 228, 228, 228, 228
- .byte 232, 232, 232, 232, 236, 236, 236, 236
- .byte 240, 240, 240, 240, 244, 244, 244, 244
- .byte 248, 248, 248, 248, 252, 252, 252, 252
-
- ! 5 numbers for initil/final permutation
-
- .word 0x0f0f0f0f ! offset 256
- .word 0x0000ffff ! 260
- .word 0x33333333 ! 264
- .word 0x00ff00ff ! 268
- .word 0x55555555 ! 272
-
- .word 0 ! 276
- .word LOOPS ! 280
- .word 0x0000FC00 ! 284
-
- .global DES_SPtrans
- .type DES_SPtrans,#object
- .size DES_SPtrans,2048
-.align 64
-DES_SPtrans:
-.PIC.DES_SPtrans:
- ! nibble 0
- .word 0x02080800, 0x00080000, 0x02000002, 0x02080802
- .word 0x02000000, 0x00080802, 0x00080002, 0x02000002
- .word 0x00080802, 0x02080800, 0x02080000, 0x00000802
- .word 0x02000802, 0x02000000, 0x00000000, 0x00080002
- .word 0x00080000, 0x00000002, 0x02000800, 0x00080800
- .word 0x02080802, 0x02080000, 0x00000802, 0x02000800
- .word 0x00000002, 0x00000800, 0x00080800, 0x02080002
- .word 0x00000800, 0x02000802, 0x02080002, 0x00000000
- .word 0x00000000, 0x02080802, 0x02000800, 0x00080002
- .word 0x02080800, 0x00080000, 0x00000802, 0x02000800
- .word 0x02080002, 0x00000800, 0x00080800, 0x02000002
- .word 0x00080802, 0x00000002, 0x02000002, 0x02080000
- .word 0x02080802, 0x00080800, 0x02080000, 0x02000802
- .word 0x02000000, 0x00000802, 0x00080002, 0x00000000
- .word 0x00080000, 0x02000000, 0x02000802, 0x02080800
- .word 0x00000002, 0x02080002, 0x00000800, 0x00080802
- ! nibble 1
- .word 0x40108010, 0x00000000, 0x00108000, 0x40100000
- .word 0x40000010, 0x00008010, 0x40008000, 0x00108000
- .word 0x00008000, 0x40100010, 0x00000010, 0x40008000
- .word 0x00100010, 0x40108000, 0x40100000, 0x00000010
- .word 0x00100000, 0x40008010, 0x40100010, 0x00008000
- .word 0x00108010, 0x40000000, 0x00000000, 0x00100010
- .word 0x40008010, 0x00108010, 0x40108000, 0x40000010
- .word 0x40000000, 0x00100000, 0x00008010, 0x40108010
- .word 0x00100010, 0x40108000, 0x40008000, 0x00108010
- .word 0x40108010, 0x00100010, 0x40000010, 0x00000000
- .word 0x40000000, 0x00008010, 0x00100000, 0x40100010
- .word 0x00008000, 0x40000000, 0x00108010, 0x40008010
- .word 0x40108000, 0x00008000, 0x00000000, 0x40000010
- .word 0x00000010, 0x40108010, 0x00108000, 0x40100000
- .word 0x40100010, 0x00100000, 0x00008010, 0x40008000
- .word 0x40008010, 0x00000010, 0x40100000, 0x00108000
- ! nibble 2
- .word 0x04000001, 0x04040100, 0x00000100, 0x04000101
- .word 0x00040001, 0x04000000, 0x04000101, 0x00040100
- .word 0x04000100, 0x00040000, 0x04040000, 0x00000001
- .word 0x04040101, 0x00000101, 0x00000001, 0x04040001
- .word 0x00000000, 0x00040001, 0x04040100, 0x00000100
- .word 0x00000101, 0x04040101, 0x00040000, 0x04000001
- .word 0x04040001, 0x04000100, 0x00040101, 0x04040000
- .word 0x00040100, 0x00000000, 0x04000000, 0x00040101
- .word 0x04040100, 0x00000100, 0x00000001, 0x00040000
- .word 0x00000101, 0x00040001, 0x04040000, 0x04000101
- .word 0x00000000, 0x04040100, 0x00040100, 0x04040001
- .word 0x00040001, 0x04000000, 0x04040101, 0x00000001
- .word 0x00040101, 0x04000001, 0x04000000, 0x04040101
- .word 0x00040000, 0x04000100, 0x04000101, 0x00040100
- .word 0x04000100, 0x00000000, 0x04040001, 0x00000101
- .word 0x04000001, 0x00040101, 0x00000100, 0x04040000
- ! nibble 3
- .word 0x00401008, 0x10001000, 0x00000008, 0x10401008
- .word 0x00000000, 0x10400000, 0x10001008, 0x00400008
- .word 0x10401000, 0x10000008, 0x10000000, 0x00001008
- .word 0x10000008, 0x00401008, 0x00400000, 0x10000000
- .word 0x10400008, 0x00401000, 0x00001000, 0x00000008
- .word 0x00401000, 0x10001008, 0x10400000, 0x00001000
- .word 0x00001008, 0x00000000, 0x00400008, 0x10401000
- .word 0x10001000, 0x10400008, 0x10401008, 0x00400000
- .word 0x10400008, 0x00001008, 0x00400000, 0x10000008
- .word 0x00401000, 0x10001000, 0x00000008, 0x10400000
- .word 0x10001008, 0x00000000, 0x00001000, 0x00400008
- .word 0x00000000, 0x10400008, 0x10401000, 0x00001000
- .word 0x10000000, 0x10401008, 0x00401008, 0x00400000
- .word 0x10401008, 0x00000008, 0x10001000, 0x00401008
- .word 0x00400008, 0x00401000, 0x10400000, 0x10001008
- .word 0x00001008, 0x10000000, 0x10000008, 0x10401000
- ! nibble 4
- .word 0x08000000, 0x00010000, 0x00000400, 0x08010420
- .word 0x08010020, 0x08000400, 0x00010420, 0x08010000
- .word 0x00010000, 0x00000020, 0x08000020, 0x00010400
- .word 0x08000420, 0x08010020, 0x08010400, 0x00000000
- .word 0x00010400, 0x08000000, 0x00010020, 0x00000420
- .word 0x08000400, 0x00010420, 0x00000000, 0x08000020
- .word 0x00000020, 0x08000420, 0x08010420, 0x00010020
- .word 0x08010000, 0x00000400, 0x00000420, 0x08010400
- .word 0x08010400, 0x08000420, 0x00010020, 0x08010000
- .word 0x00010000, 0x00000020, 0x08000020, 0x08000400
- .word 0x08000000, 0x00010400, 0x08010420, 0x00000000
- .word 0x00010420, 0x08000000, 0x00000400, 0x00010020
- .word 0x08000420, 0x00000400, 0x00000000, 0x08010420
- .word 0x08010020, 0x08010400, 0x00000420, 0x00010000
- .word 0x00010400, 0x08010020, 0x08000400, 0x00000420
- .word 0x00000020, 0x00010420, 0x08010000, 0x08000020
- ! nibble 5
- .word 0x80000040, 0x00200040, 0x00000000, 0x80202000
- .word 0x00200040, 0x00002000, 0x80002040, 0x00200000
- .word 0x00002040, 0x80202040, 0x00202000, 0x80000000
- .word 0x80002000, 0x80000040, 0x80200000, 0x00202040
- .word 0x00200000, 0x80002040, 0x80200040, 0x00000000
- .word 0x00002000, 0x00000040, 0x80202000, 0x80200040
- .word 0x80202040, 0x80200000, 0x80000000, 0x00002040
- .word 0x00000040, 0x00202000, 0x00202040, 0x80002000
- .word 0x00002040, 0x80000000, 0x80002000, 0x00202040
- .word 0x80202000, 0x00200040, 0x00000000, 0x80002000
- .word 0x80000000, 0x00002000, 0x80200040, 0x00200000
- .word 0x00200040, 0x80202040, 0x00202000, 0x00000040
- .word 0x80202040, 0x00202000, 0x00200000, 0x80002040
- .word 0x80000040, 0x80200000, 0x00202040, 0x00000000
- .word 0x00002000, 0x80000040, 0x80002040, 0x80202000
- .word 0x80200000, 0x00002040, 0x00000040, 0x80200040
- ! nibble 6
- .word 0x00004000, 0x00000200, 0x01000200, 0x01000004
- .word 0x01004204, 0x00004004, 0x00004200, 0x00000000
- .word 0x01000000, 0x01000204, 0x00000204, 0x01004000
- .word 0x00000004, 0x01004200, 0x01004000, 0x00000204
- .word 0x01000204, 0x00004000, 0x00004004, 0x01004204
- .word 0x00000000, 0x01000200, 0x01000004, 0x00004200
- .word 0x01004004, 0x00004204, 0x01004200, 0x00000004
- .word 0x00004204, 0x01004004, 0x00000200, 0x01000000
- .word 0x00004204, 0x01004000, 0x01004004, 0x00000204
- .word 0x00004000, 0x00000200, 0x01000000, 0x01004004
- .word 0x01000204, 0x00004204, 0x00004200, 0x00000000
- .word 0x00000200, 0x01000004, 0x00000004, 0x01000200
- .word 0x00000000, 0x01000204, 0x01000200, 0x00004200
- .word 0x00000204, 0x00004000, 0x01004204, 0x01000000
- .word 0x01004200, 0x00000004, 0x00004004, 0x01004204
- .word 0x01000004, 0x01004200, 0x01004000, 0x00004004
- ! nibble 7
- .word 0x20800080, 0x20820000, 0x00020080, 0x00000000
- .word 0x20020000, 0x00800080, 0x20800000, 0x20820080
- .word 0x00000080, 0x20000000, 0x00820000, 0x00020080
- .word 0x00820080, 0x20020080, 0x20000080, 0x20800000
- .word 0x00020000, 0x00820080, 0x00800080, 0x20020000
- .word 0x20820080, 0x20000080, 0x00000000, 0x00820000
- .word 0x20000000, 0x00800000, 0x20020080, 0x20800080
- .word 0x00800000, 0x00020000, 0x20820000, 0x00000080
- .word 0x00800000, 0x00020000, 0x20000080, 0x20820080
- .word 0x00020080, 0x20000000, 0x00000000, 0x00820000
- .word 0x20800080, 0x20020080, 0x20020000, 0x00800080
- .word 0x20820000, 0x00000080, 0x00800080, 0x20020000
- .word 0x20820080, 0x00800000, 0x20800000, 0x20000080
- .word 0x00820000, 0x00020080, 0x20020080, 0x20800000
- .word 0x00000080, 0x20820000, 0x00820080, 0x00000000
- .word 0x20000000, 0x20800080, 0x00020000, 0x00820080
-
diff --git a/drivers/builtin_openssl2/crypto/des/asm/readme b/drivers/builtin_openssl2/crypto/des/asm/readme
deleted file mode 100644
index 1beafe253b..0000000000
--- a/drivers/builtin_openssl2/crypto/des/asm/readme
+++ /dev/null
@@ -1,131 +0,0 @@
-First up, let me say I don't like writing in assembler. It is not portable,
-dependant on the particular CPU architecture release and is generally a pig
-to debug and get right. Having said that, the x86 architecture is probably
-the most important for speed due to number of boxes and since
-it appears to be the worst architecture to to get
-good C compilers for. So due to this, I have lowered myself to do
-assembler for the inner DES routines in libdes :-).
-
-The file to implement in assembler is des_enc.c. Replace the following
-4 functions
-des_encrypt1(DES_LONG data[2],des_key_schedule ks, int encrypt);
-des_encrypt2(DES_LONG data[2],des_key_schedule ks, int encrypt);
-des_encrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3);
-des_decrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3);
-
-They encrypt/decrypt the 64 bits held in 'data' using
-the 'ks' key schedules. The only difference between the 4 functions is that
-des_encrypt2() does not perform IP() or FP() on the data (this is an
-optimization for when doing triple DES and des_encrypt3() and des_decrypt3()
-perform triple des. The triple DES routines are in here because it does
-make a big difference to have them located near the des_encrypt2 function
-at link time..
-
-Now as we all know, there are lots of different operating systems running on
-x86 boxes, and unfortunately they normally try to make sure their assembler
-formating is not the same as the other peoples.
-The 4 main formats I know of are
-Microsoft Windows 95/Windows NT
-Elf Includes Linux and FreeBSD(?).
-a.out The older Linux.
-Solaris Same as Elf but different comments :-(.
-
-Now I was not overly keen to write 4 different copies of the same code,
-so I wrote a few perl routines to output the correct assembler, given
-a target assembler type. This code is ugly and is just a hack.
-The libraries are x86unix.pl and x86ms.pl.
-des586.pl, des686.pl and des-som[23].pl are the programs to actually
-generate the assembler.
-
-So to generate elf assembler
-perl des-som3.pl elf >dx86-elf.s
-For Windows 95/NT
-perl des-som2.pl win32 >win32.asm
-
-[ update 4 Jan 1996 ]
-I have added another way to do things.
-perl des-som3.pl cpp >dx86-cpp.s
-generates a file that will be included by dx86unix.cpp when it is compiled.
-To build for elf, a.out, solaris, bsdi etc,
-cc -E -DELF asm/dx86unix.cpp | as -o asm/dx86-elf.o
-cc -E -DSOL asm/dx86unix.cpp | as -o asm/dx86-sol.o
-cc -E -DOUT asm/dx86unix.cpp | as -o asm/dx86-out.o
-cc -E -DBSDI asm/dx86unix.cpp | as -o asm/dx86bsdi.o
-This was done to cut down the number of files in the distribution.
-
-Now the ugly part. I acquired my copy of Intels
-"Optimization's For Intel's 32-Bit Processors" and found a few interesting
-things. First, the aim of the exersize is to 'extract' one byte at a time
-from a word and do an array lookup. This involves getting the byte from
-the 4 locations in the word and moving it to a new word and doing the lookup.
-The most obvious way to do this is
-xor eax, eax # clear word
-movb al, cl # get low byte
-xor edi DWORD PTR 0x100+des_SP[eax] # xor in word
-movb al, ch # get next byte
-xor edi DWORD PTR 0x300+des_SP[eax] # xor in word
-shr ecx 16
-which seems ok. For the pentium, this system appears to be the best.
-One has to do instruction interleaving to keep both functional units
-operating, but it is basically very efficient.
-
-Now the crunch. When a full register is used after a partial write, eg.
-mov al, cl
-xor edi, DWORD PTR 0x100+des_SP[eax]
-386 - 1 cycle stall
-486 - 1 cycle stall
-586 - 0 cycle stall
-686 - at least 7 cycle stall (page 22 of the above mentioned document).
-
-So the technique that produces the best results on a pentium, according to
-the documentation, will produce hideous results on a pentium pro.
-
-To get around this, des686.pl will generate code that is not as fast on
-a pentium, should be very good on a pentium pro.
-mov eax, ecx # copy word
-shr ecx, 8 # line up next byte
-and eax, 0fch # mask byte
-xor edi DWORD PTR 0x100+des_SP[eax] # xor in array lookup
-mov eax, ecx # get word
-shr ecx 8 # line up next byte
-and eax, 0fch # mask byte
-xor edi DWORD PTR 0x300+des_SP[eax] # xor in array lookup
-
-Due to the execution units in the pentium, this actually works quite well.
-For a pentium pro it should be very good. This is the type of output
-Visual C++ generates.
-
-There is a third option. instead of using
-mov al, ch
-which is bad on the pentium pro, one may be able to use
-movzx eax, ch
-which may not incur the partial write penalty. On the pentium,
-this instruction takes 4 cycles so is not worth using but on the
-pentium pro it appears it may be worth while. I need access to one to
-experiment :-).
-
-eric (20 Oct 1996)
-
-22 Nov 1996 - I have asked people to run the 2 different version on pentium
-pros and it appears that the intel documentation is wrong. The
-mov al,bh is still faster on a pentium pro, so just use the des586.pl
-install des686.pl
-
-3 Dec 1996 - I added des_encrypt3/des_decrypt3 because I have moved these
-functions into des_enc.c because it does make a massive performance
-difference on some boxes to have the functions code located close to
-the des_encrypt2() function.
-
-9 Jan 1997 - des-som2.pl is now the correct perl script to use for
-pentiums. It contains an inner loop from
-Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> which does raw ecb DES calls at
-273,000 per second. He had a previous version at 250,000 and the best
-I was able to get was 203,000. The content has not changed, this is all
-due to instruction sequencing (and actual instructions choice) which is able
-to keep both functional units of the pentium going.
-We may have lost the ugly register usage restrictions when x86 went 32 bit
-but for the pentium it has been replaced by evil instruction ordering tricks.
-
-13 Jan 1997 - des-som3.pl, more optimizations from Svend Olaf.
-raw DES at 281,000 per second on a pentium 100.
-