From e97922f22038e9049ed4c2db5b3736dfaa0edde3 Mon Sep 17 00:00:00 2001 From: mrezai Date: Fri, 15 Apr 2016 19:03:35 +0430 Subject: Update OpenSSL to version 1.0.2g --- drivers/builtin_openssl2/crypto/aes/aes_wrap.c | 186 +- drivers/builtin_openssl2/crypto/aes/aes_x86core.c | 6 +- drivers/builtin_openssl2/crypto/aes/asm/aes-586.pl | 2980 ------ .../builtin_openssl2/crypto/aes/asm/aes-armv4.pl | 1134 --- drivers/builtin_openssl2/crypto/aes/asm/aes-ia64.S | 1123 --- .../builtin_openssl2/crypto/aes/asm/aes-mips.pl | 1611 ---- .../builtin_openssl2/crypto/aes/asm/aes-parisc.pl | 1022 --- drivers/builtin_openssl2/crypto/aes/asm/aes-ppc.pl | 1365 --- .../builtin_openssl2/crypto/aes/asm/aes-s390x.pl | 2237 ----- .../builtin_openssl2/crypto/aes/asm/aes-sparcv9.pl | 1182 --- .../builtin_openssl2/crypto/aes/asm/aes-x86_64.pl | 2819 ------ .../crypto/aes/asm/aesni-sha1-x86_64.pl | 1250 --- .../builtin_openssl2/crypto/aes/asm/aesni-x86.pl | 2189 ----- .../crypto/aes/asm/aesni-x86_64.pl | 3071 ------- .../crypto/aes/asm/bsaes-x86_64.pl | 3108 ------- .../builtin_openssl2/crypto/aes/asm/vpaes-x86.pl | 903 -- .../crypto/aes/asm/vpaes-x86_64.pl | 1207 --- drivers/builtin_openssl2/crypto/alphacpuid.pl | 126 - drivers/builtin_openssl2/crypto/arm_arch.h | 35 +- drivers/builtin_openssl2/crypto/armcap.c | 97 +- drivers/builtin_openssl2/crypto/armv4cpuid.S | 154 - drivers/builtin_openssl2/crypto/asn1/a_gentm.c | 46 +- drivers/builtin_openssl2/crypto/asn1/a_time.c | 30 + drivers/builtin_openssl2/crypto/asn1/a_utctm.c | 92 +- drivers/builtin_openssl2/crypto/asn1/ameth_lib.c | 24 +- drivers/builtin_openssl2/crypto/asn1/asn1_locl.h | 3 + drivers/builtin_openssl2/crypto/asn1/charmap.pl | 83 - drivers/builtin_openssl2/crypto/asn1/t_x509.c | 15 + drivers/builtin_openssl2/crypto/asn1/tasn_dec.c | 14 +- drivers/builtin_openssl2/crypto/asn1/x_crl.c | 4 +- drivers/builtin_openssl2/crypto/asn1/x_x509.c | 20 + drivers/builtin_openssl2/crypto/asn1/x_x509a.c | 7 +- drivers/builtin_openssl2/crypto/bf/asm/bf-586.pl | 137 - drivers/builtin_openssl2/crypto/bf/asm/bf-686.pl | 127 - drivers/builtin_openssl2/crypto/bf/bftest.c | 538 -- drivers/builtin_openssl2/crypto/bio/b_dump.c | 25 + drivers/builtin_openssl2/crypto/bio/b_sock.c | 8 +- drivers/builtin_openssl2/crypto/bio/bio_err.c | 2 +- drivers/builtin_openssl2/crypto/bio/bss_acpt.c | 2 +- drivers/builtin_openssl2/crypto/bio/bss_conn.c | 2 +- drivers/builtin_openssl2/crypto/bio/bss_dgram.c | 84 +- drivers/builtin_openssl2/crypto/bio/bss_fd.c | 22 +- drivers/builtin_openssl2/crypto/bio/bss_mem.c | 6 +- .../builtin_openssl2/crypto/bn/asm/alpha-mont.pl | 321 - .../builtin_openssl2/crypto/bn/asm/armv4-gf2m.pl | 278 - .../builtin_openssl2/crypto/bn/asm/armv4-mont.pl | 204 - drivers/builtin_openssl2/crypto/bn/asm/bn-586.pl | 774 -- drivers/builtin_openssl2/crypto/bn/asm/co-586.pl | 287 - .../builtin_openssl2/crypto/bn/asm/ia64-mont.pl | 851 -- drivers/builtin_openssl2/crypto/bn/asm/ia64.S | 1555 ---- .../builtin_openssl2/crypto/bn/asm/mips-mont.pl | 426 - drivers/builtin_openssl2/crypto/bn/asm/mips.pl | 2234 ----- .../builtin_openssl2/crypto/bn/asm/mips3-mont.pl | 327 - drivers/builtin_openssl2/crypto/bn/asm/mips3.s | 2201 ----- .../crypto/bn/asm/modexp512-x86_64.pl | 1497 --- drivers/builtin_openssl2/crypto/bn/asm/pa-risc2.s | 1618 ---- drivers/builtin_openssl2/crypto/bn/asm/pa-risc2W.s | 1605 ---- .../builtin_openssl2/crypto/bn/asm/parisc-mont.pl | 995 -- drivers/builtin_openssl2/crypto/bn/asm/ppc-mont.pl | 334 - drivers/builtin_openssl2/crypto/bn/asm/ppc.pl | 1998 ---- .../builtin_openssl2/crypto/bn/asm/ppc64-mont.pl | 1088 --- .../builtin_openssl2/crypto/bn/asm/s390x-gf2m.pl | 221 - .../builtin_openssl2/crypto/bn/asm/s390x-mont.pl | 277 - drivers/builtin_openssl2/crypto/bn/asm/s390x.S | 678 -- drivers/builtin_openssl2/crypto/bn/asm/sparcv8.S | 1458 --- .../builtin_openssl2/crypto/bn/asm/sparcv8plus.S | 1558 ---- .../builtin_openssl2/crypto/bn/asm/sparcv9-mont.pl | 606 -- .../crypto/bn/asm/sparcv9a-mont.pl | 882 -- drivers/builtin_openssl2/crypto/bn/asm/via-mont.pl | 242 - drivers/builtin_openssl2/crypto/bn/asm/x86-gf2m.pl | 313 - drivers/builtin_openssl2/crypto/bn/asm/x86-mont.pl | 593 -- drivers/builtin_openssl2/crypto/bn/asm/x86.pl | 28 - drivers/builtin_openssl2/crypto/bn/asm/x86/add.pl | 76 - .../builtin_openssl2/crypto/bn/asm/x86/comba.pl | 277 - drivers/builtin_openssl2/crypto/bn/asm/x86/div.pl | 15 - drivers/builtin_openssl2/crypto/bn/asm/x86/mul.pl | 77 - .../builtin_openssl2/crypto/bn/asm/x86/mul_add.pl | 87 - drivers/builtin_openssl2/crypto/bn/asm/x86/sqr.pl | 60 - drivers/builtin_openssl2/crypto/bn/asm/x86/sub.pl | 76 - .../builtin_openssl2/crypto/bn/asm/x86_64-gcc.c | 110 +- .../builtin_openssl2/crypto/bn/asm/x86_64-gf2m.pl | 390 - .../builtin_openssl2/crypto/bn/asm/x86_64-mont.pl | 1681 ---- .../builtin_openssl2/crypto/bn/asm/x86_64-mont5.pl | 1186 --- drivers/builtin_openssl2/crypto/bn/bn_asm.c | 243 +- drivers/builtin_openssl2/crypto/bn/bn_const.c | 0 drivers/builtin_openssl2/crypto/bn/bn_exp.c | 318 +- drivers/builtin_openssl2/crypto/bn/bn_gf2m.c | 3 +- drivers/builtin_openssl2/crypto/bn/bn_lcl.h | 27 + drivers/builtin_openssl2/crypto/bn/bn_prime.pl | 119 - drivers/builtin_openssl2/crypto/bn/bntest.c | 2137 ----- drivers/builtin_openssl2/crypto/bn/divtest.c | 42 - drivers/builtin_openssl2/crypto/bn/exptest.c | 313 - drivers/builtin_openssl2/crypto/bn/rsaz_exp.c | 346 + drivers/builtin_openssl2/crypto/bn/rsaz_exp.h | 68 + drivers/builtin_openssl2/crypto/buffer/buf_str.c | 11 + .../crypto/camellia/asm/cmll-x86.pl | 1138 --- .../crypto/camellia/asm/cmll-x86_64.pl | 1081 --- .../builtin_openssl2/crypto/cast/asm/cast-586.pl | 177 - drivers/builtin_openssl2/crypto/cast/cast_lcl.h | 2 + drivers/builtin_openssl2/crypto/cast/casttest.c | 241 - drivers/builtin_openssl2/crypto/cmac/cmac.c | 8 + drivers/builtin_openssl2/crypto/cms/cms_asn1.c | 86 +- drivers/builtin_openssl2/crypto/cms/cms_env.c | 320 +- drivers/builtin_openssl2/crypto/cms/cms_err.c | 18 +- drivers/builtin_openssl2/crypto/cms/cms_kari.c | 465 + drivers/builtin_openssl2/crypto/cms/cms_lcl.h | 32 +- drivers/builtin_openssl2/crypto/cms/cms_lib.c | 59 +- drivers/builtin_openssl2/crypto/cms/cms_sd.c | 207 +- drivers/builtin_openssl2/crypto/cms/cms_smime.c | 54 +- drivers/builtin_openssl2/crypto/conf/keysets.pl | 185 - drivers/builtin_openssl2/crypto/conf/test.c | 97 - drivers/builtin_openssl2/crypto/cryptlib.c | 41 +- drivers/builtin_openssl2/crypto/crypto-lib.com | 28 +- drivers/builtin_openssl2/crypto/cversion.c | 4 + .../builtin_openssl2/crypto/des/asm/crypt586.pl | 209 - drivers/builtin_openssl2/crypto/des/asm/des-586.pl | 453 - drivers/builtin_openssl2/crypto/des/asm/des_enc.m4 | 2 + drivers/builtin_openssl2/crypto/des/asm/desboth.pl | 79 - drivers/builtin_openssl2/crypto/des/des.pod | 217 - drivers/builtin_openssl2/crypto/des/des_locl.h | 4 +- drivers/builtin_openssl2/crypto/des/destest.c | 929 -- drivers/builtin_openssl2/crypto/des/read_pwd.c | 2 +- drivers/builtin_openssl2/crypto/dh/dh_ameth.c | 519 +- drivers/builtin_openssl2/crypto/dh/dh_asn1.c | 98 +- drivers/builtin_openssl2/crypto/dh/dh_check.c | 80 +- drivers/builtin_openssl2/crypto/dh/dh_err.c | 8 +- drivers/builtin_openssl2/crypto/dh/dh_kdf.c | 187 + drivers/builtin_openssl2/crypto/dh/dh_key.c | 14 + drivers/builtin_openssl2/crypto/dh/dh_pmeth.c | 322 +- drivers/builtin_openssl2/crypto/dh/dh_rfc5114.c | 285 + drivers/builtin_openssl2/crypto/dh/dhtest.c | 241 - drivers/builtin_openssl2/crypto/dsa/dsa_ameth.c | 8 +- drivers/builtin_openssl2/crypto/dsa/dsa_err.c | 5 +- drivers/builtin_openssl2/crypto/dsa/dsa_gen.c | 369 + drivers/builtin_openssl2/crypto/dsa/dsa_locl.h | 8 + drivers/builtin_openssl2/crypto/dsa/dsa_ossl.c | 6 +- drivers/builtin_openssl2/crypto/dsa/dsa_pmeth.c | 4 + drivers/builtin_openssl2/crypto/dsa/dsatest.c | 268 - drivers/builtin_openssl2/crypto/dso/dso_lib.c | 1 + drivers/builtin_openssl2/crypto/dso/dso_win32.c | 39 +- drivers/builtin_openssl2/crypto/ebcdic.c | 2 +- drivers/builtin_openssl2/crypto/ec/ec2_smpl.c | 1 + drivers/builtin_openssl2/crypto/ec/ec_ameth.c | 340 +- drivers/builtin_openssl2/crypto/ec/ec_curve.c | 643 +- drivers/builtin_openssl2/crypto/ec/ec_cvt.c | 12 + drivers/builtin_openssl2/crypto/ec/ec_err.c | 15 +- drivers/builtin_openssl2/crypto/ec/ec_key.c | 2 + drivers/builtin_openssl2/crypto/ec/ec_lcl.h | 25 + drivers/builtin_openssl2/crypto/ec/ec_lib.c | 81 +- drivers/builtin_openssl2/crypto/ec/ec_pmeth.c | 220 +- drivers/builtin_openssl2/crypto/ec/eck_prn.c | 8 + drivers/builtin_openssl2/crypto/ec/ecp_nistp224.c | 4 +- drivers/builtin_openssl2/crypto/ec/ecp_nistp256.c | 4 +- drivers/builtin_openssl2/crypto/ec/ecp_nistp521.c | 14 +- drivers/builtin_openssl2/crypto/ec/ecp_nistz256.c | 1521 ++++ .../crypto/ec/ecp_nistz256_table.c | 9533 ++++++++++++++++++++ drivers/builtin_openssl2/crypto/ec/ectest.c | 1861 ---- drivers/builtin_openssl2/crypto/ecdh/ecdhtest.c | 410 - drivers/builtin_openssl2/crypto/ecdh/ech_kdf.c | 111 + drivers/builtin_openssl2/crypto/ecdh/ech_ossl.c | 10 + drivers/builtin_openssl2/crypto/ecdsa/ecdsatest.c | 556 -- drivers/builtin_openssl2/crypto/ecdsa/ecs_err.c | 1 + drivers/builtin_openssl2/crypto/ecdsa/ecs_lib.c | 77 + drivers/builtin_openssl2/crypto/ecdsa/ecs_locl.h | 6 +- drivers/builtin_openssl2/crypto/ecdsa/ecs_ossl.c | 28 +- drivers/builtin_openssl2/crypto/engine/eng_all.c | 3 - .../builtin_openssl2/crypto/engine/eng_cryptodev.c | 70 +- drivers/builtin_openssl2/crypto/engine/eng_dyn.c | 4 +- drivers/builtin_openssl2/crypto/engine/eng_rsax.c | 701 -- .../builtin_openssl2/crypto/engine/enginetest.c | 269 - drivers/builtin_openssl2/crypto/err/openssl.ec | 1 + drivers/builtin_openssl2/crypto/evp/c_allc.c | 11 + drivers/builtin_openssl2/crypto/evp/digest.c | 12 + drivers/builtin_openssl2/crypto/evp/e_aes.c | 962 +- .../crypto/evp/e_aes_cbc_hmac_sha1.c | 460 +- .../crypto/evp/e_aes_cbc_hmac_sha256.c | 982 ++ drivers/builtin_openssl2/crypto/evp/e_camellia.c | 321 +- drivers/builtin_openssl2/crypto/evp/e_des.c | 60 +- drivers/builtin_openssl2/crypto/evp/e_des3.c | 274 +- drivers/builtin_openssl2/crypto/evp/e_null.c | 3 - drivers/builtin_openssl2/crypto/evp/evp_enc.c | 17 +- drivers/builtin_openssl2/crypto/evp/evp_err.c | 5 +- drivers/builtin_openssl2/crypto/evp/evp_fips.c | 310 - drivers/builtin_openssl2/crypto/evp/evp_lib.c | 55 + drivers/builtin_openssl2/crypto/evp/evp_locl.h | 3 + drivers/builtin_openssl2/crypto/evp/evp_test.c | 424 - drivers/builtin_openssl2/crypto/evp/evptests.txt | 67 + drivers/builtin_openssl2/crypto/evp/m_dss.c | 2 - drivers/builtin_openssl2/crypto/evp/m_dss1.c | 3 - drivers/builtin_openssl2/crypto/evp/m_ecdsa.c | 2 - drivers/builtin_openssl2/crypto/evp/m_sha1.c | 28 +- drivers/builtin_openssl2/crypto/evp/m_sigver.c | 44 +- drivers/builtin_openssl2/crypto/evp/p_lib.c | 2 +- drivers/builtin_openssl2/crypto/evp/pmeth_lib.c | 6 +- drivers/builtin_openssl2/crypto/hmac/hm_ameth.c | 2 +- drivers/builtin_openssl2/crypto/hmac/hmac.c | 10 + drivers/builtin_openssl2/crypto/hmac/hmactest.c | 332 - drivers/builtin_openssl2/crypto/ia64cpuid.S | 167 - drivers/builtin_openssl2/crypto/idea/ideatest.c | 232 - drivers/builtin_openssl2/crypto/install-crypto.com | 5 +- drivers/builtin_openssl2/crypto/jpake/jpake.c | 1 + drivers/builtin_openssl2/crypto/jpake/jpaketest.c | 185 - drivers/builtin_openssl2/crypto/lhash/lh_test.c | 88 - drivers/builtin_openssl2/crypto/lhash/num.pl | 17 - drivers/builtin_openssl2/crypto/md2/md2test.c | 142 - drivers/builtin_openssl2/crypto/md32_common.h | 38 +- drivers/builtin_openssl2/crypto/md4/md4test.c | 133 - drivers/builtin_openssl2/crypto/md5/asm/md5-586.pl | 307 - drivers/builtin_openssl2/crypto/md5/asm/md5-ia64.S | 992 -- .../builtin_openssl2/crypto/md5/asm/md5-x86_64.pl | 370 - drivers/builtin_openssl2/crypto/md5/md5_locl.h | 2 + drivers/builtin_openssl2/crypto/md5/md5test.c | 138 - drivers/builtin_openssl2/crypto/mdc2/mdc2test.c | 146 - .../crypto/modes/asm/ghash-alpha.pl | 460 - .../crypto/modes/asm/ghash-armv4.pl | 429 - .../crypto/modes/asm/ghash-ia64.pl | 463 - .../crypto/modes/asm/ghash-parisc.pl | 731 -- .../crypto/modes/asm/ghash-s390x.pl | 262 - .../crypto/modes/asm/ghash-sparcv9.pl | 330 - .../builtin_openssl2/crypto/modes/asm/ghash-x86.pl | 1342 --- .../crypto/modes/asm/ghash-x86_64.pl | 806 -- drivers/builtin_openssl2/crypto/modes/cbc128.c | 2 +- drivers/builtin_openssl2/crypto/modes/gcm128.c | 98 +- drivers/builtin_openssl2/crypto/modes/modes_lcl.h | 50 +- drivers/builtin_openssl2/crypto/modes/wrap128.c | 138 + drivers/builtin_openssl2/crypto/o_dir_test.c | 68 - drivers/builtin_openssl2/crypto/o_str.c | 2 +- drivers/builtin_openssl2/crypto/o_time.c | 126 +- drivers/builtin_openssl2/crypto/o_time.h | 2 + drivers/builtin_openssl2/crypto/objects/obj_dat.h | 234 +- drivers/builtin_openssl2/crypto/objects/obj_dat.pl | 307 - .../builtin_openssl2/crypto/objects/obj_mac.num | 38 + drivers/builtin_openssl2/crypto/objects/obj_xref.h | 25 + .../builtin_openssl2/crypto/objects/obj_xref.txt | 12 + drivers/builtin_openssl2/crypto/objects/objects.pl | 240 - .../builtin_openssl2/crypto/objects/objects.txt | 58 + drivers/builtin_openssl2/crypto/objects/objxref.pl | 115 - drivers/builtin_openssl2/crypto/ocsp/ocsp_ht.c | 151 +- drivers/builtin_openssl2/crypto/ocsp/ocsp_lib.c | 2 +- drivers/builtin_openssl2/crypto/opensslconf.h.bak | 229 - drivers/builtin_openssl2/crypto/pariscid.pl | 225 - drivers/builtin_openssl2/crypto/pem/pem_all.c | 5 +- drivers/builtin_openssl2/crypto/pem/pem_err.c | 4 +- drivers/builtin_openssl2/crypto/pem/pem_lib.c | 17 +- drivers/builtin_openssl2/crypto/pem/pem_pkey.c | 50 + drivers/builtin_openssl2/crypto/perlasm/cbc.pl | 349 - .../builtin_openssl2/crypto/perlasm/ppc-xlate.pl | 159 - .../crypto/perlasm/x86_64-xlate.pl | 1080 --- drivers/builtin_openssl2/crypto/perlasm/x86asm.pl | 260 - drivers/builtin_openssl2/crypto/perlasm/x86gas.pl | 253 - drivers/builtin_openssl2/crypto/perlasm/x86masm.pl | 198 - drivers/builtin_openssl2/crypto/perlasm/x86nasm.pl | 177 - drivers/builtin_openssl2/crypto/pkcs12/p12_decr.c | 12 +- drivers/builtin_openssl2/crypto/pkcs12/p12_p8e.c | 6 +- drivers/builtin_openssl2/crypto/pkcs7/bio_ber.c | 466 - drivers/builtin_openssl2/crypto/pkcs7/dec.c | 248 - drivers/builtin_openssl2/crypto/pkcs7/des.pem | 15 - drivers/builtin_openssl2/crypto/pkcs7/doc | 24 - drivers/builtin_openssl2/crypto/pkcs7/enc.c | 174 - drivers/builtin_openssl2/crypto/pkcs7/es1.pem | 66 - drivers/builtin_openssl2/crypto/pkcs7/example.c | 329 - drivers/builtin_openssl2/crypto/pkcs7/example.h | 57 - drivers/builtin_openssl2/crypto/pkcs7/info.pem | 57 - drivers/builtin_openssl2/crypto/pkcs7/infokey.pem | 9 - drivers/builtin_openssl2/crypto/pkcs7/p7/a1 | 2 - drivers/builtin_openssl2/crypto/pkcs7/p7/a2 | 1 - drivers/builtin_openssl2/crypto/pkcs7/p7/cert.p7c | Bin 1728 -> 0 bytes drivers/builtin_openssl2/crypto/pkcs7/p7/smime.p7m | Bin 4894 -> 0 bytes drivers/builtin_openssl2/crypto/pkcs7/p7/smime.p7s | Bin 2625 -> 0 bytes drivers/builtin_openssl2/crypto/pkcs7/pk7_smime.c | 32 +- drivers/builtin_openssl2/crypto/pkcs7/server.pem | 52 - drivers/builtin_openssl2/crypto/pkcs7/sign.c | 160 - drivers/builtin_openssl2/crypto/pkcs7/t/3des.pem | 16 - drivers/builtin_openssl2/crypto/pkcs7/t/3dess.pem | 32 - drivers/builtin_openssl2/crypto/pkcs7/t/c.pem | 48 - drivers/builtin_openssl2/crypto/pkcs7/t/ff | 32 - drivers/builtin_openssl2/crypto/pkcs7/t/msie-e | 20 - drivers/builtin_openssl2/crypto/pkcs7/t/msie-e.pem | 22 - .../builtin_openssl2/crypto/pkcs7/t/msie-enc-01 | 62 - .../crypto/pkcs7/t/msie-enc-01.pem | 66 - .../builtin_openssl2/crypto/pkcs7/t/msie-enc-02 | 90 - .../crypto/pkcs7/t/msie-enc-02.pem | 106 - drivers/builtin_openssl2/crypto/pkcs7/t/msie-s-a-e | 91 - .../builtin_openssl2/crypto/pkcs7/t/msie-s-a-e.pem | 106 - drivers/builtin_openssl2/crypto/pkcs7/t/nav-smime | 157 - drivers/builtin_openssl2/crypto/pkcs7/t/s.pem | 57 - drivers/builtin_openssl2/crypto/pkcs7/t/server.pem | 57 - drivers/builtin_openssl2/crypto/pkcs7/verify.c | 347 - drivers/builtin_openssl2/crypto/ppc_arch.h | 10 + drivers/builtin_openssl2/crypto/ppccap.c | 56 +- drivers/builtin_openssl2/crypto/ppccpuid.pl | 132 - drivers/builtin_openssl2/crypto/pqueue/pq_test.c | 94 - drivers/builtin_openssl2/crypto/rand/rand_win.c | 37 +- drivers/builtin_openssl2/crypto/rand/randtest.c | 209 - drivers/builtin_openssl2/crypto/rc2/rc2test.c | 274 - drivers/builtin_openssl2/crypto/rc4/asm/rc4-586.pl | 410 - .../builtin_openssl2/crypto/rc4/asm/rc4-ia64.pl | 755 -- .../crypto/rc4/asm/rc4-md5-x86_64.pl | 632 -- .../builtin_openssl2/crypto/rc4/asm/rc4-parisc.pl | 314 - .../builtin_openssl2/crypto/rc4/asm/rc4-s390x.pl | 234 - .../builtin_openssl2/crypto/rc4/asm/rc4-x86_64.pl | 677 -- drivers/builtin_openssl2/crypto/rc4/rc4_enc.c | 2 +- drivers/builtin_openssl2/crypto/rc4/rc4test.c | 235 - drivers/builtin_openssl2/crypto/rc5/asm/rc5-586.pl | 110 - drivers/builtin_openssl2/crypto/rc5/rc5_locl.h | 5 +- drivers/builtin_openssl2/crypto/rc5/rc5test.c | 381 - .../builtin_openssl2/crypto/ripemd/asm/rmd-586.pl | 591 -- drivers/builtin_openssl2/crypto/ripemd/rmdtest.c | 143 - drivers/builtin_openssl2/crypto/rsa/rsa_ameth.c | 538 +- drivers/builtin_openssl2/crypto/rsa/rsa_asn1.c | 8 + drivers/builtin_openssl2/crypto/rsa/rsa_err.c | 18 +- drivers/builtin_openssl2/crypto/rsa/rsa_oaep.c | 110 +- drivers/builtin_openssl2/crypto/rsa/rsa_pmeth.c | 159 +- drivers/builtin_openssl2/crypto/rsa/rsa_sign.c | 15 +- drivers/builtin_openssl2/crypto/rsa/rsa_test.c | 339 - drivers/builtin_openssl2/crypto/s390xcpuid.S | 99 - .../builtin_openssl2/crypto/sha/asm/sha1-586.pl | 1229 --- .../builtin_openssl2/crypto/sha/asm/sha1-alpha.pl | 322 - .../crypto/sha/asm/sha1-armv4-large.pl | 248 - .../builtin_openssl2/crypto/sha/asm/sha1-ia64.pl | 305 - .../builtin_openssl2/crypto/sha/asm/sha1-mips.pl | 354 - .../builtin_openssl2/crypto/sha/asm/sha1-parisc.pl | 260 - .../builtin_openssl2/crypto/sha/asm/sha1-ppc.pl | 326 - .../builtin_openssl2/crypto/sha/asm/sha1-s390x.pl | 246 - .../crypto/sha/asm/sha1-sparcv9.pl | 284 - .../crypto/sha/asm/sha1-sparcv9a.pl | 601 -- .../builtin_openssl2/crypto/sha/asm/sha1-thumb.pl | 259 - .../builtin_openssl2/crypto/sha/asm/sha1-x86_64.pl | 1261 --- .../builtin_openssl2/crypto/sha/asm/sha256-586.pl | 249 - .../crypto/sha/asm/sha256-armv4.pl | 211 - .../builtin_openssl2/crypto/sha/asm/sha512-586.pl | 644 -- .../crypto/sha/asm/sha512-armv4.pl | 582 -- .../builtin_openssl2/crypto/sha/asm/sha512-ia64.pl | 672 -- .../builtin_openssl2/crypto/sha/asm/sha512-mips.pl | 455 - .../crypto/sha/asm/sha512-parisc.pl | 793 -- .../builtin_openssl2/crypto/sha/asm/sha512-ppc.pl | 460 - .../crypto/sha/asm/sha512-s390x.pl | 322 - .../crypto/sha/asm/sha512-sparcv9.pl | 594 -- .../crypto/sha/asm/sha512-x86_64.pl | 451 - drivers/builtin_openssl2/crypto/sha/sha1test.c | 174 - drivers/builtin_openssl2/crypto/sha/sha512.c | 13 + drivers/builtin_openssl2/crypto/sha/shatest.c | 174 - drivers/builtin_openssl2/crypto/sparc_arch.h | 101 + drivers/builtin_openssl2/crypto/sparccpuid.S | 402 - drivers/builtin_openssl2/crypto/sparcv9cap.c | 204 +- drivers/builtin_openssl2/crypto/srp/srptest.c | 154 - drivers/builtin_openssl2/crypto/stack/stack.c | 36 +- drivers/builtin_openssl2/crypto/threads/mttest.c | 1211 --- drivers/builtin_openssl2/crypto/ts/ts_rsp_sign.c | 7 +- drivers/builtin_openssl2/crypto/ts/ts_rsp_verify.c | 2 +- drivers/builtin_openssl2/crypto/ui/ui_openssl.c | 2 +- drivers/builtin_openssl2/crypto/vms_rms.h | 0 .../builtin_openssl2/crypto/whrlpool/asm/wp-mmx.pl | 493 - .../crypto/whrlpool/asm/wp-x86_64.pl | 590 -- drivers/builtin_openssl2/crypto/whrlpool/wp_test.c | 241 - drivers/builtin_openssl2/crypto/x509/vpm_int.h | 70 + drivers/builtin_openssl2/crypto/x509/x509_cmp.c | 146 +- drivers/builtin_openssl2/crypto/x509/x509_err.c | 10 +- drivers/builtin_openssl2/crypto/x509/x509_lu.c | 26 + drivers/builtin_openssl2/crypto/x509/x509_set.c | 5 + drivers/builtin_openssl2/crypto/x509/x509_trs.c | 8 + drivers/builtin_openssl2/crypto/x509/x509_txt.c | 20 + drivers/builtin_openssl2/crypto/x509/x509_vfy.c | 387 +- drivers/builtin_openssl2/crypto/x509/x509_vpm.c | 253 +- drivers/builtin_openssl2/crypto/x509/x_all.c | 14 + drivers/builtin_openssl2/crypto/x509v3/ext_dat.h | 3 + drivers/builtin_openssl2/crypto/x509v3/tabtest.c | 92 - drivers/builtin_openssl2/crypto/x509v3/v3_lib.c | 22 + drivers/builtin_openssl2/crypto/x509v3/v3_purp.c | 46 +- drivers/builtin_openssl2/crypto/x509v3/v3_scts.c | 334 + drivers/builtin_openssl2/crypto/x509v3/v3_utl.c | 432 + drivers/builtin_openssl2/crypto/x509v3/v3err.c | 9 +- drivers/builtin_openssl2/crypto/x86_64cpuid.pl | 284 - drivers/builtin_openssl2/crypto/x86cpuid.pl | 358 - 374 files changed, 23879 insertions(+), 107375 deletions(-) delete mode 100755 drivers/builtin_openssl2/crypto/aes/asm/aes-586.pl delete mode 100644 drivers/builtin_openssl2/crypto/aes/asm/aes-armv4.pl delete mode 100644 drivers/builtin_openssl2/crypto/aes/asm/aes-ia64.S delete mode 100644 drivers/builtin_openssl2/crypto/aes/asm/aes-mips.pl delete mode 100644 drivers/builtin_openssl2/crypto/aes/asm/aes-parisc.pl delete mode 100644 drivers/builtin_openssl2/crypto/aes/asm/aes-ppc.pl delete mode 100644 drivers/builtin_openssl2/crypto/aes/asm/aes-s390x.pl delete mode 100755 drivers/builtin_openssl2/crypto/aes/asm/aes-sparcv9.pl delete mode 100755 drivers/builtin_openssl2/crypto/aes/asm/aes-x86_64.pl delete mode 100644 drivers/builtin_openssl2/crypto/aes/asm/aesni-sha1-x86_64.pl delete mode 100644 drivers/builtin_openssl2/crypto/aes/asm/aesni-x86.pl delete mode 100644 drivers/builtin_openssl2/crypto/aes/asm/aesni-x86_64.pl delete mode 100644 drivers/builtin_openssl2/crypto/aes/asm/bsaes-x86_64.pl delete mode 100644 drivers/builtin_openssl2/crypto/aes/asm/vpaes-x86.pl delete mode 100644 drivers/builtin_openssl2/crypto/aes/asm/vpaes-x86_64.pl delete mode 100644 drivers/builtin_openssl2/crypto/alphacpuid.pl delete mode 100644 drivers/builtin_openssl2/crypto/armv4cpuid.S delete mode 100644 drivers/builtin_openssl2/crypto/asn1/charmap.pl delete mode 100644 drivers/builtin_openssl2/crypto/bf/asm/bf-586.pl delete mode 100644 drivers/builtin_openssl2/crypto/bf/asm/bf-686.pl delete mode 100644 drivers/builtin_openssl2/crypto/bf/bftest.c delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/alpha-mont.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/armv4-gf2m.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/armv4-mont.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/bn-586.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/co-586.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/ia64-mont.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/ia64.S delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/mips-mont.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/mips.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/mips3-mont.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/mips3.s delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/modexp512-x86_64.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/pa-risc2.s delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/pa-risc2W.s delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/parisc-mont.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/ppc-mont.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/ppc.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/ppc64-mont.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/s390x-gf2m.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/s390x-mont.pl delete mode 100755 drivers/builtin_openssl2/crypto/bn/asm/s390x.S delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/sparcv8.S delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/sparcv8plus.S delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/sparcv9-mont.pl delete mode 100755 drivers/builtin_openssl2/crypto/bn/asm/sparcv9a-mont.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/via-mont.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/x86-gf2m.pl delete mode 100755 drivers/builtin_openssl2/crypto/bn/asm/x86-mont.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/x86.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/x86/add.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/x86/comba.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/x86/div.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/x86/mul.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/x86/mul_add.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/x86/sqr.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/x86/sub.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/asm/x86_64-gf2m.pl delete mode 100755 drivers/builtin_openssl2/crypto/bn/asm/x86_64-mont.pl delete mode 100755 drivers/builtin_openssl2/crypto/bn/asm/x86_64-mont5.pl mode change 100755 => 100644 drivers/builtin_openssl2/crypto/bn/bn_const.c delete mode 100644 drivers/builtin_openssl2/crypto/bn/bn_prime.pl delete mode 100644 drivers/builtin_openssl2/crypto/bn/bntest.c delete mode 100644 drivers/builtin_openssl2/crypto/bn/divtest.c delete mode 100644 drivers/builtin_openssl2/crypto/bn/exptest.c create mode 100644 drivers/builtin_openssl2/crypto/bn/rsaz_exp.c create mode 100644 drivers/builtin_openssl2/crypto/bn/rsaz_exp.h delete mode 100644 drivers/builtin_openssl2/crypto/camellia/asm/cmll-x86.pl delete mode 100644 drivers/builtin_openssl2/crypto/camellia/asm/cmll-x86_64.pl delete mode 100644 drivers/builtin_openssl2/crypto/cast/asm/cast-586.pl delete mode 100644 drivers/builtin_openssl2/crypto/cast/casttest.c create mode 100644 drivers/builtin_openssl2/crypto/cms/cms_kari.c delete mode 100644 drivers/builtin_openssl2/crypto/conf/keysets.pl delete mode 100644 drivers/builtin_openssl2/crypto/conf/test.c delete mode 100644 drivers/builtin_openssl2/crypto/des/asm/crypt586.pl delete mode 100644 drivers/builtin_openssl2/crypto/des/asm/des-586.pl delete mode 100644 drivers/builtin_openssl2/crypto/des/asm/desboth.pl delete mode 100644 drivers/builtin_openssl2/crypto/des/des.pod delete mode 100644 drivers/builtin_openssl2/crypto/des/destest.c create mode 100644 drivers/builtin_openssl2/crypto/dh/dh_kdf.c create mode 100644 drivers/builtin_openssl2/crypto/dh/dh_rfc5114.c delete mode 100644 drivers/builtin_openssl2/crypto/dh/dhtest.c delete mode 100644 drivers/builtin_openssl2/crypto/dsa/dsatest.c create mode 100644 drivers/builtin_openssl2/crypto/ec/ecp_nistz256.c create mode 100644 drivers/builtin_openssl2/crypto/ec/ecp_nistz256_table.c delete mode 100644 drivers/builtin_openssl2/crypto/ec/ectest.c delete mode 100644 drivers/builtin_openssl2/crypto/ecdh/ecdhtest.c create mode 100644 drivers/builtin_openssl2/crypto/ecdh/ech_kdf.c delete mode 100644 drivers/builtin_openssl2/crypto/ecdsa/ecdsatest.c delete mode 100644 drivers/builtin_openssl2/crypto/engine/eng_rsax.c delete mode 100644 drivers/builtin_openssl2/crypto/engine/enginetest.c create mode 100644 drivers/builtin_openssl2/crypto/evp/e_aes_cbc_hmac_sha256.c delete mode 100644 drivers/builtin_openssl2/crypto/evp/evp_fips.c delete mode 100644 drivers/builtin_openssl2/crypto/evp/evp_test.c delete mode 100644 drivers/builtin_openssl2/crypto/hmac/hmactest.c delete mode 100644 drivers/builtin_openssl2/crypto/ia64cpuid.S delete mode 100644 drivers/builtin_openssl2/crypto/idea/ideatest.c delete mode 100644 drivers/builtin_openssl2/crypto/jpake/jpaketest.c delete mode 100644 drivers/builtin_openssl2/crypto/lhash/lh_test.c delete mode 100644 drivers/builtin_openssl2/crypto/lhash/num.pl delete mode 100644 drivers/builtin_openssl2/crypto/md2/md2test.c delete mode 100644 drivers/builtin_openssl2/crypto/md4/md4test.c delete mode 100644 drivers/builtin_openssl2/crypto/md5/asm/md5-586.pl delete mode 100644 drivers/builtin_openssl2/crypto/md5/asm/md5-ia64.S delete mode 100755 drivers/builtin_openssl2/crypto/md5/asm/md5-x86_64.pl delete mode 100644 drivers/builtin_openssl2/crypto/md5/md5test.c delete mode 100644 drivers/builtin_openssl2/crypto/mdc2/mdc2test.c delete mode 100644 drivers/builtin_openssl2/crypto/modes/asm/ghash-alpha.pl delete mode 100644 drivers/builtin_openssl2/crypto/modes/asm/ghash-armv4.pl delete mode 100755 drivers/builtin_openssl2/crypto/modes/asm/ghash-ia64.pl delete mode 100644 drivers/builtin_openssl2/crypto/modes/asm/ghash-parisc.pl delete mode 100644 drivers/builtin_openssl2/crypto/modes/asm/ghash-s390x.pl delete mode 100644 drivers/builtin_openssl2/crypto/modes/asm/ghash-sparcv9.pl delete mode 100644 drivers/builtin_openssl2/crypto/modes/asm/ghash-x86.pl delete mode 100644 drivers/builtin_openssl2/crypto/modes/asm/ghash-x86_64.pl create mode 100644 drivers/builtin_openssl2/crypto/modes/wrap128.c delete mode 100644 drivers/builtin_openssl2/crypto/o_dir_test.c delete mode 100644 drivers/builtin_openssl2/crypto/objects/obj_dat.pl delete mode 100644 drivers/builtin_openssl2/crypto/objects/objects.pl delete mode 100644 drivers/builtin_openssl2/crypto/objects/objxref.pl delete mode 100644 drivers/builtin_openssl2/crypto/opensslconf.h.bak delete mode 100644 drivers/builtin_openssl2/crypto/pariscid.pl delete mode 100644 drivers/builtin_openssl2/crypto/perlasm/cbc.pl delete mode 100755 drivers/builtin_openssl2/crypto/perlasm/ppc-xlate.pl delete mode 100755 drivers/builtin_openssl2/crypto/perlasm/x86_64-xlate.pl delete mode 100644 drivers/builtin_openssl2/crypto/perlasm/x86asm.pl delete mode 100644 drivers/builtin_openssl2/crypto/perlasm/x86gas.pl delete mode 100644 drivers/builtin_openssl2/crypto/perlasm/x86masm.pl delete mode 100644 drivers/builtin_openssl2/crypto/perlasm/x86nasm.pl delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/bio_ber.c delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/dec.c delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/des.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/doc delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/enc.c delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/es1.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/example.c delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/example.h delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/info.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/infokey.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/p7/a1 delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/p7/a2 delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/p7/cert.p7c delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/p7/smime.p7m delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/p7/smime.p7s delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/server.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/sign.c delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/3des.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/3dess.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/c.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/ff delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/msie-e delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/msie-e.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-01 delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-01.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-02 delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-02.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/msie-s-a-e delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/msie-s-a-e.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/nav-smime delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/s.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/t/server.pem delete mode 100644 drivers/builtin_openssl2/crypto/pkcs7/verify.c create mode 100644 drivers/builtin_openssl2/crypto/ppc_arch.h delete mode 100755 drivers/builtin_openssl2/crypto/ppccpuid.pl delete mode 100644 drivers/builtin_openssl2/crypto/pqueue/pq_test.c delete mode 100644 drivers/builtin_openssl2/crypto/rand/randtest.c delete mode 100644 drivers/builtin_openssl2/crypto/rc2/rc2test.c delete mode 100644 drivers/builtin_openssl2/crypto/rc4/asm/rc4-586.pl delete mode 100644 drivers/builtin_openssl2/crypto/rc4/asm/rc4-ia64.pl delete mode 100644 drivers/builtin_openssl2/crypto/rc4/asm/rc4-md5-x86_64.pl delete mode 100644 drivers/builtin_openssl2/crypto/rc4/asm/rc4-parisc.pl delete mode 100644 drivers/builtin_openssl2/crypto/rc4/asm/rc4-s390x.pl delete mode 100755 drivers/builtin_openssl2/crypto/rc4/asm/rc4-x86_64.pl delete mode 100644 drivers/builtin_openssl2/crypto/rc4/rc4test.c delete mode 100644 drivers/builtin_openssl2/crypto/rc5/asm/rc5-586.pl delete mode 100644 drivers/builtin_openssl2/crypto/rc5/rc5test.c delete mode 100644 drivers/builtin_openssl2/crypto/ripemd/asm/rmd-586.pl delete mode 100644 drivers/builtin_openssl2/crypto/ripemd/rmdtest.c delete mode 100644 drivers/builtin_openssl2/crypto/rsa/rsa_test.c delete mode 100644 drivers/builtin_openssl2/crypto/s390xcpuid.S delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha1-586.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha1-alpha.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha1-armv4-large.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha1-ia64.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha1-mips.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha1-parisc.pl delete mode 100755 drivers/builtin_openssl2/crypto/sha/asm/sha1-ppc.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha1-s390x.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha1-sparcv9.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha1-sparcv9a.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha1-thumb.pl delete mode 100755 drivers/builtin_openssl2/crypto/sha/asm/sha1-x86_64.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha256-586.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha256-armv4.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha512-586.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha512-armv4.pl delete mode 100755 drivers/builtin_openssl2/crypto/sha/asm/sha512-ia64.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha512-mips.pl delete mode 100755 drivers/builtin_openssl2/crypto/sha/asm/sha512-parisc.pl delete mode 100755 drivers/builtin_openssl2/crypto/sha/asm/sha512-ppc.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha512-s390x.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/asm/sha512-sparcv9.pl delete mode 100755 drivers/builtin_openssl2/crypto/sha/asm/sha512-x86_64.pl delete mode 100644 drivers/builtin_openssl2/crypto/sha/sha1test.c delete mode 100644 drivers/builtin_openssl2/crypto/sha/shatest.c create mode 100644 drivers/builtin_openssl2/crypto/sparc_arch.h delete mode 100644 drivers/builtin_openssl2/crypto/sparccpuid.S delete mode 100644 drivers/builtin_openssl2/crypto/srp/srptest.c delete mode 100644 drivers/builtin_openssl2/crypto/threads/mttest.c mode change 100755 => 100644 drivers/builtin_openssl2/crypto/vms_rms.h delete mode 100644 drivers/builtin_openssl2/crypto/whrlpool/asm/wp-mmx.pl delete mode 100644 drivers/builtin_openssl2/crypto/whrlpool/asm/wp-x86_64.pl delete mode 100644 drivers/builtin_openssl2/crypto/whrlpool/wp_test.c create mode 100644 drivers/builtin_openssl2/crypto/x509/vpm_int.h delete mode 100644 drivers/builtin_openssl2/crypto/x509v3/tabtest.c create mode 100644 drivers/builtin_openssl2/crypto/x509v3/v3_scts.c delete mode 100644 drivers/builtin_openssl2/crypto/x86_64cpuid.pl delete mode 100644 drivers/builtin_openssl2/crypto/x86cpuid.pl (limited to 'drivers/builtin_openssl2/crypto') diff --git a/drivers/builtin_openssl2/crypto/aes/aes_wrap.c b/drivers/builtin_openssl2/crypto/aes/aes_wrap.c index b1ab8e25ef..b7b64d57a4 100644 --- a/drivers/builtin_openssl2/crypto/aes/aes_wrap.c +++ b/drivers/builtin_openssl2/crypto/aes/aes_wrap.c @@ -54,197 +54,19 @@ #include "cryptlib.h" #include -#include - -static const unsigned char default_iv[] = { - 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, -}; +#include int AES_wrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out, const unsigned char *in, unsigned int inlen) { - unsigned char *A, B[16], *R; - unsigned int i, j, t; - if ((inlen & 0x7) || (inlen < 8)) - return -1; - A = B; - t = 1; - memcpy(out + 8, in, inlen); - if (!iv) - iv = default_iv; - - memcpy(A, iv, 8); - - for (j = 0; j < 6; j++) { - R = out + 8; - for (i = 0; i < inlen; i += 8, t++, R += 8) { - memcpy(B + 8, R, 8); - AES_encrypt(B, B, key); - A[7] ^= (unsigned char)(t & 0xff); - if (t > 0xff) { - A[6] ^= (unsigned char)((t >> 8) & 0xff); - A[5] ^= (unsigned char)((t >> 16) & 0xff); - A[4] ^= (unsigned char)((t >> 24) & 0xff); - } - memcpy(R, B + 8, 8); - } - } - memcpy(out, A, 8); - return inlen + 8; + return CRYPTO_128_wrap(key, iv, out, in, inlen, (block128_f) AES_encrypt); } int AES_unwrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out, const unsigned char *in, unsigned int inlen) { - unsigned char *A, B[16], *R; - unsigned int i, j, t; - inlen -= 8; - if (inlen & 0x7) - return -1; - if (inlen < 8) - return -1; - A = B; - t = 6 * (inlen >> 3); - memcpy(A, in, 8); - memcpy(out, in + 8, inlen); - for (j = 0; j < 6; j++) { - R = out + inlen - 8; - for (i = 0; i < inlen; i += 8, t--, R -= 8) { - A[7] ^= (unsigned char)(t & 0xff); - if (t > 0xff) { - A[6] ^= (unsigned char)((t >> 8) & 0xff); - A[5] ^= (unsigned char)((t >> 16) & 0xff); - A[4] ^= (unsigned char)((t >> 24) & 0xff); - } - memcpy(B + 8, R, 8); - AES_decrypt(B, B, key); - memcpy(R, B + 8, 8); - } - } - if (!iv) - iv = default_iv; - if (memcmp(A, iv, 8)) { - OPENSSL_cleanse(out, inlen); - return 0; - } - return inlen; -} - -#ifdef AES_WRAP_TEST - -int AES_wrap_unwrap_test(const unsigned char *kek, int keybits, - const unsigned char *iv, - const unsigned char *eout, - const unsigned char *key, int keylen) -{ - unsigned char *otmp = NULL, *ptmp = NULL; - int r, ret = 0; - AES_KEY wctx; - otmp = OPENSSL_malloc(keylen + 8); - ptmp = OPENSSL_malloc(keylen); - if (!otmp || !ptmp) - return 0; - if (AES_set_encrypt_key(kek, keybits, &wctx)) - goto err; - r = AES_wrap_key(&wctx, iv, otmp, key, keylen); - if (r <= 0) - goto err; - - if (eout && memcmp(eout, otmp, keylen)) - goto err; - - if (AES_set_decrypt_key(kek, keybits, &wctx)) - goto err; - r = AES_unwrap_key(&wctx, iv, ptmp, otmp, r); - - if (memcmp(key, ptmp, keylen)) - goto err; - - ret = 1; - - err: - if (otmp) - OPENSSL_free(otmp); - if (ptmp) - OPENSSL_free(ptmp); - - return ret; - + return CRYPTO_128_unwrap(key, iv, out, in, inlen, + (block128_f) AES_decrypt); } - -int main(int argc, char **argv) -{ - - static const unsigned char kek[] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f - }; - - static const unsigned char key[] = { - 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, - 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f - }; - - static const unsigned char e1[] = { - 0x1f, 0xa6, 0x8b, 0x0a, 0x81, 0x12, 0xb4, 0x47, - 0xae, 0xf3, 0x4b, 0xd8, 0xfb, 0x5a, 0x7b, 0x82, - 0x9d, 0x3e, 0x86, 0x23, 0x71, 0xd2, 0xcf, 0xe5 - }; - - static const unsigned char e2[] = { - 0x96, 0x77, 0x8b, 0x25, 0xae, 0x6c, 0xa4, 0x35, - 0xf9, 0x2b, 0x5b, 0x97, 0xc0, 0x50, 0xae, 0xd2, - 0x46, 0x8a, 0xb8, 0xa1, 0x7a, 0xd8, 0x4e, 0x5d - }; - - static const unsigned char e3[] = { - 0x64, 0xe8, 0xc3, 0xf9, 0xce, 0x0f, 0x5b, 0xa2, - 0x63, 0xe9, 0x77, 0x79, 0x05, 0x81, 0x8a, 0x2a, - 0x93, 0xc8, 0x19, 0x1e, 0x7d, 0x6e, 0x8a, 0xe7 - }; - - static const unsigned char e4[] = { - 0x03, 0x1d, 0x33, 0x26, 0x4e, 0x15, 0xd3, 0x32, - 0x68, 0xf2, 0x4e, 0xc2, 0x60, 0x74, 0x3e, 0xdc, - 0xe1, 0xc6, 0xc7, 0xdd, 0xee, 0x72, 0x5a, 0x93, - 0x6b, 0xa8, 0x14, 0x91, 0x5c, 0x67, 0x62, 0xd2 - }; - - static const unsigned char e5[] = { - 0xa8, 0xf9, 0xbc, 0x16, 0x12, 0xc6, 0x8b, 0x3f, - 0xf6, 0xe6, 0xf4, 0xfb, 0xe3, 0x0e, 0x71, 0xe4, - 0x76, 0x9c, 0x8b, 0x80, 0xa3, 0x2c, 0xb8, 0x95, - 0x8c, 0xd5, 0xd1, 0x7d, 0x6b, 0x25, 0x4d, 0xa1 - }; - - static const unsigned char e6[] = { - 0x28, 0xc9, 0xf4, 0x04, 0xc4, 0xb8, 0x10, 0xf4, - 0xcb, 0xcc, 0xb3, 0x5c, 0xfb, 0x87, 0xf8, 0x26, - 0x3f, 0x57, 0x86, 0xe2, 0xd8, 0x0e, 0xd3, 0x26, - 0xcb, 0xc7, 0xf0, 0xe7, 0x1a, 0x99, 0xf4, 0x3b, - 0xfb, 0x98, 0x8b, 0x9b, 0x7a, 0x02, 0xdd, 0x21 - }; - - AES_KEY wctx, xctx; - int ret; - ret = AES_wrap_unwrap_test(kek, 128, NULL, e1, key, 16); - fprintf(stderr, "Key test result %d\n", ret); - ret = AES_wrap_unwrap_test(kek, 192, NULL, e2, key, 16); - fprintf(stderr, "Key test result %d\n", ret); - ret = AES_wrap_unwrap_test(kek, 256, NULL, e3, key, 16); - fprintf(stderr, "Key test result %d\n", ret); - ret = AES_wrap_unwrap_test(kek, 192, NULL, e4, key, 24); - fprintf(stderr, "Key test result %d\n", ret); - ret = AES_wrap_unwrap_test(kek, 256, NULL, e5, key, 24); - fprintf(stderr, "Key test result %d\n", ret); - ret = AES_wrap_unwrap_test(kek, 256, NULL, e6, key, 32); - fprintf(stderr, "Key test result %d\n", ret); -} - -#endif diff --git a/drivers/builtin_openssl2/crypto/aes/aes_x86core.c b/drivers/builtin_openssl2/crypto/aes/aes_x86core.c index 428bd58d38..b5dd697677 100644 --- a/drivers/builtin_openssl2/crypto/aes/aes_x86core.c +++ b/drivers/builtin_openssl2/crypto/aes/aes_x86core.c @@ -89,8 +89,10 @@ typedef unsigned long long u64; #endif #undef ROTATE -#if defined(_MSC_VER) || defined(__ICC) -# define ROTATE(a,n) _lrotl(a,n) +#if defined(_MSC_VER) +# define ROTATE(a,n) _lrotl(a,n) +#elif defined(__ICC) +# define ROTATE(a,n) _rotl(a,n) #elif defined(__GNUC__) && __GNUC__>=2 # if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) # define ROTATE(a,n) ({ register unsigned int ret; \ diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aes-586.pl b/drivers/builtin_openssl2/crypto/aes/asm/aes-586.pl deleted file mode 100755 index 51b500ddef..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/aes-586.pl +++ /dev/null @@ -1,2980 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# Version 4.3. -# -# You might fail to appreciate this module performance from the first -# try. If compared to "vanilla" linux-ia32-icc target, i.e. considered -# to be *the* best Intel C compiler without -KPIC, performance appears -# to be virtually identical... But try to re-configure with shared -# library support... Aha! Intel compiler "suddenly" lags behind by 30% -# [on P4, more on others]:-) And if compared to position-independent -# code generated by GNU C, this code performs *more* than *twice* as -# fast! Yes, all this buzz about PIC means that unlike other hand- -# coded implementations, this one was explicitly designed to be safe -# to use even in shared library context... This also means that this -# code isn't necessarily absolutely fastest "ever," because in order -# to achieve position independence an extra register has to be -# off-loaded to stack, which affects the benchmark result. -# -# Special note about instruction choice. Do you recall RC4_INT code -# performing poorly on P4? It might be the time to figure out why. -# RC4_INT code implies effective address calculations in base+offset*4 -# form. Trouble is that it seems that offset scaling turned to be -# critical path... At least eliminating scaling resulted in 2.8x RC4 -# performance improvement [as you might recall]. As AES code is hungry -# for scaling too, I [try to] avoid the latter by favoring off-by-2 -# shifts and masking the result with 0xFF<<2 instead of "boring" 0xFF. -# -# As was shown by Dean Gaudet , the above note turned -# void. Performance improvement with off-by-2 shifts was observed on -# intermediate implementation, which was spilling yet another register -# to stack... Final offset*4 code below runs just a tad faster on P4, -# but exhibits up to 10% improvement on other cores. -# -# Second version is "monolithic" replacement for aes_core.c, which in -# addition to AES_[de|en]crypt implements private_AES_set_[de|en]cryption_key. -# This made it possible to implement little-endian variant of the -# algorithm without modifying the base C code. Motivating factor for -# the undertaken effort was that it appeared that in tight IA-32 -# register window little-endian flavor could achieve slightly higher -# Instruction Level Parallelism, and it indeed resulted in up to 15% -# better performance on most recent µ-archs... -# -# Third version adds AES_cbc_encrypt implementation, which resulted in -# up to 40% performance imrovement of CBC benchmark results. 40% was -# observed on P4 core, where "overall" imrovement coefficient, i.e. if -# compared to PIC generated by GCC and in CBC mode, was observed to be -# as large as 4x:-) CBC performance is virtually identical to ECB now -# and on some platforms even better, e.g. 17.6 "small" cycles/byte on -# Opteron, because certain function prologues and epilogues are -# effectively taken out of the loop... -# -# Version 3.2 implements compressed tables and prefetch of these tables -# in CBC[!] mode. Former means that 3/4 of table references are now -# misaligned, which unfortunately has negative impact on elder IA-32 -# implementations, Pentium suffered 30% penalty, PIII - 10%. -# -# Version 3.3 avoids L1 cache aliasing between stack frame and -# S-boxes, and 3.4 - L1 cache aliasing even between key schedule. The -# latter is achieved by copying the key schedule to controlled place in -# stack. This unfortunately has rather strong impact on small block CBC -# performance, ~2x deterioration on 16-byte block if compared to 3.3. -# -# Version 3.5 checks if there is L1 cache aliasing between user-supplied -# key schedule and S-boxes and abstains from copying the former if -# there is no. This allows end-user to consciously retain small block -# performance by aligning key schedule in specific manner. -# -# Version 3.6 compresses Td4 to 256 bytes and prefetches it in ECB. -# -# Current ECB performance numbers for 128-bit key in CPU cycles per -# processed byte [measure commonly used by AES benchmarkers] are: -# -# small footprint fully unrolled -# P4 24 22 -# AMD K8 20 19 -# PIII 25 23 -# Pentium 81 78 -# -# Version 3.7 reimplements outer rounds as "compact." Meaning that -# first and last rounds reference compact 256 bytes S-box. This means -# that first round consumes a lot more CPU cycles and that encrypt -# and decrypt performance becomes asymmetric. Encrypt performance -# drops by 10-12%, while decrypt - by 20-25%:-( 256 bytes S-box is -# aggressively pre-fetched. -# -# Version 4.0 effectively rolls back to 3.6 and instead implements -# additional set of functions, _[x86|sse]_AES_[en|de]crypt_compact, -# which use exclusively 256 byte S-box. These functions are to be -# called in modes not concealing plain text, such as ECB, or when -# we're asked to process smaller amount of data [or unconditionally -# on hyper-threading CPU]. Currently it's called unconditionally from -# AES_[en|de]crypt, which affects all modes, but CBC. CBC routine -# still needs to be modified to switch between slower and faster -# mode when appropriate... But in either case benchmark landscape -# changes dramatically and below numbers are CPU cycles per processed -# byte for 128-bit key. -# -# ECB encrypt ECB decrypt CBC large chunk -# P4 56[60] 84[100] 23 -# AMD K8 48[44] 70[79] 18 -# PIII 41[50] 61[91] 24 -# Core 2 32[38] 45[70] 18.5 -# Pentium 120 160 77 -# -# Version 4.1 switches to compact S-box even in key schedule setup. -# -# Version 4.2 prefetches compact S-box in every SSE round or in other -# words every cache-line is *guaranteed* to be accessed within ~50 -# cycles window. Why just SSE? Because it's needed on hyper-threading -# CPU! Which is also why it's prefetched with 64 byte stride. Best -# part is that it has no negative effect on performance:-) -# -# Version 4.3 implements switch between compact and non-compact block -# functions in AES_cbc_encrypt depending on how much data was asked -# to be processed in one stroke. -# -###################################################################### -# Timing attacks are classified in two classes: synchronous when -# attacker consciously initiates cryptographic operation and collects -# timing data of various character afterwards, and asynchronous when -# malicious code is executed on same CPU simultaneously with AES, -# instruments itself and performs statistical analysis of this data. -# -# As far as synchronous attacks go the root to the AES timing -# vulnerability is twofold. Firstly, of 256 S-box elements at most 160 -# are referred to in single 128-bit block operation. Well, in C -# implementation with 4 distinct tables it's actually as little as 40 -# references per 256 elements table, but anyway... Secondly, even -# though S-box elements are clustered into smaller amount of cache- -# lines, smaller than 160 and even 40, it turned out that for certain -# plain-text pattern[s] or simply put chosen plain-text and given key -# few cache-lines remain unaccessed during block operation. Now, if -# attacker can figure out this access pattern, he can deduct the key -# [or at least part of it]. The natural way to mitigate this kind of -# attacks is to minimize the amount of cache-lines in S-box and/or -# prefetch them to ensure that every one is accessed for more uniform -# timing. But note that *if* plain-text was concealed in such way that -# input to block function is distributed *uniformly*, then attack -# wouldn't apply. Now note that some encryption modes, most notably -# CBC, do mask the plain-text in this exact way [secure cipher output -# is distributed uniformly]. Yes, one still might find input that -# would reveal the information about given key, but if amount of -# candidate inputs to be tried is larger than amount of possible key -# combinations then attack becomes infeasible. This is why revised -# AES_cbc_encrypt "dares" to switch to larger S-box when larger chunk -# of data is to be processed in one stroke. The current size limit of -# 512 bytes is chosen to provide same [diminishigly low] probability -# for cache-line to remain untouched in large chunk operation with -# large S-box as for single block operation with compact S-box and -# surely needs more careful consideration... -# -# As for asynchronous attacks. There are two flavours: attacker code -# being interleaved with AES on hyper-threading CPU at *instruction* -# level, and two processes time sharing single core. As for latter. -# Two vectors. 1. Given that attacker process has higher priority, -# yield execution to process performing AES just before timer fires -# off the scheduler, immediately regain control of CPU and analyze the -# cache state. For this attack to be efficient attacker would have to -# effectively slow down the operation by several *orders* of magnitute, -# by ratio of time slice to duration of handful of AES rounds, which -# unlikely to remain unnoticed. Not to mention that this also means -# that he would spend correspondigly more time to collect enough -# statistical data to mount the attack. It's probably appropriate to -# say that if adeversary reckons that this attack is beneficial and -# risks to be noticed, you probably have larger problems having him -# mere opportunity. In other words suggested code design expects you -# to preclude/mitigate this attack by overall system security design. -# 2. Attacker manages to make his code interrupt driven. In order for -# this kind of attack to be feasible, interrupt rate has to be high -# enough, again comparable to duration of handful of AES rounds. But -# is there interrupt source of such rate? Hardly, not even 1Gbps NIC -# generates interrupts at such raging rate... -# -# And now back to the former, hyper-threading CPU or more specifically -# Intel P4. Recall that asynchronous attack implies that malicious -# code instruments itself. And naturally instrumentation granularity -# has be noticeably lower than duration of codepath accessing S-box. -# Given that all cache-lines are accessed during that time that is. -# Current implementation accesses *all* cache-lines within ~50 cycles -# window, which is actually *less* than RDTSC latency on Intel P4! - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],"aes-586.pl",$x86only = $ARGV[$#ARGV] eq "386"); -&static_label("AES_Te"); -&static_label("AES_Td"); - -$s0="eax"; -$s1="ebx"; -$s2="ecx"; -$s3="edx"; -$key="edi"; -$acc="esi"; -$tbl="ebp"; - -# stack frame layout in _[x86|sse]_AES_* routines, frame is allocated -# by caller -$__ra=&DWP(0,"esp"); # return address -$__s0=&DWP(4,"esp"); # s0 backing store -$__s1=&DWP(8,"esp"); # s1 backing store -$__s2=&DWP(12,"esp"); # s2 backing store -$__s3=&DWP(16,"esp"); # s3 backing store -$__key=&DWP(20,"esp"); # pointer to key schedule -$__end=&DWP(24,"esp"); # pointer to end of key schedule -$__tbl=&DWP(28,"esp"); # %ebp backing store - -# stack frame layout in AES_[en|crypt] routines, which differs from -# above by 4 and overlaps by %ebp backing store -$_tbl=&DWP(24,"esp"); -$_esp=&DWP(28,"esp"); - -sub _data_word() { my $i; while(defined($i=shift)) { &data_word($i,$i); } } - -$speed_limit=512; # chunks smaller than $speed_limit are - # processed with compact routine in CBC mode -$small_footprint=1; # $small_footprint=1 code is ~5% slower [on - # recent µ-archs], but ~5 times smaller! - # I favor compact code to minimize cache - # contention and in hope to "collect" 5% back - # in real-life applications... - -$vertical_spin=0; # shift "verticaly" defaults to 0, because of - # its proof-of-concept status... -# Note that there is no decvert(), as well as last encryption round is -# performed with "horizontal" shifts. This is because this "vertical" -# implementation [one which groups shifts on a given $s[i] to form a -# "column," unlike "horizontal" one, which groups shifts on different -# $s[i] to form a "row"] is work in progress. It was observed to run -# few percents faster on Intel cores, but not AMD. On AMD K8 core it's -# whole 12% slower:-( So we face a trade-off... Shall it be resolved -# some day? Till then the code is considered experimental and by -# default remains dormant... - -sub encvert() -{ my ($te,@s) = @_; - my $v0 = $acc, $v1 = $key; - - &mov ($v0,$s[3]); # copy s3 - &mov (&DWP(4,"esp"),$s[2]); # save s2 - &mov ($v1,$s[0]); # copy s0 - &mov (&DWP(8,"esp"),$s[1]); # save s1 - - &movz ($s[2],&HB($s[0])); - &and ($s[0],0xFF); - &mov ($s[0],&DWP(0,$te,$s[0],8)); # s0>>0 - &shr ($v1,16); - &mov ($s[3],&DWP(3,$te,$s[2],8)); # s0>>8 - &movz ($s[1],&HB($v1)); - &and ($v1,0xFF); - &mov ($s[2],&DWP(2,$te,$v1,8)); # s0>>16 - &mov ($v1,$v0); - &mov ($s[1],&DWP(1,$te,$s[1],8)); # s0>>24 - - &and ($v0,0xFF); - &xor ($s[3],&DWP(0,$te,$v0,8)); # s3>>0 - &movz ($v0,&HB($v1)); - &shr ($v1,16); - &xor ($s[2],&DWP(3,$te,$v0,8)); # s3>>8 - &movz ($v0,&HB($v1)); - &and ($v1,0xFF); - &xor ($s[1],&DWP(2,$te,$v1,8)); # s3>>16 - &mov ($v1,&DWP(4,"esp")); # restore s2 - &xor ($s[0],&DWP(1,$te,$v0,8)); # s3>>24 - - &mov ($v0,$v1); - &and ($v1,0xFF); - &xor ($s[2],&DWP(0,$te,$v1,8)); # s2>>0 - &movz ($v1,&HB($v0)); - &shr ($v0,16); - &xor ($s[1],&DWP(3,$te,$v1,8)); # s2>>8 - &movz ($v1,&HB($v0)); - &and ($v0,0xFF); - &xor ($s[0],&DWP(2,$te,$v0,8)); # s2>>16 - &mov ($v0,&DWP(8,"esp")); # restore s1 - &xor ($s[3],&DWP(1,$te,$v1,8)); # s2>>24 - - &mov ($v1,$v0); - &and ($v0,0xFF); - &xor ($s[1],&DWP(0,$te,$v0,8)); # s1>>0 - &movz ($v0,&HB($v1)); - &shr ($v1,16); - &xor ($s[0],&DWP(3,$te,$v0,8)); # s1>>8 - &movz ($v0,&HB($v1)); - &and ($v1,0xFF); - &xor ($s[3],&DWP(2,$te,$v1,8)); # s1>>16 - &mov ($key,$__key); # reincarnate v1 as key - &xor ($s[2],&DWP(1,$te,$v0,8)); # s1>>24 -} - -# Another experimental routine, which features "horizontal spin," but -# eliminates one reference to stack. Strangely enough runs slower... -sub enchoriz() -{ my $v0 = $key, $v1 = $acc; - - &movz ($v0,&LB($s0)); # 3, 2, 1, 0* - &rotr ($s2,8); # 8,11,10, 9 - &mov ($v1,&DWP(0,$te,$v0,8)); # 0 - &movz ($v0,&HB($s1)); # 7, 6, 5*, 4 - &rotr ($s3,16); # 13,12,15,14 - &xor ($v1,&DWP(3,$te,$v0,8)); # 5 - &movz ($v0,&HB($s2)); # 8,11,10*, 9 - &rotr ($s0,16); # 1, 0, 3, 2 - &xor ($v1,&DWP(2,$te,$v0,8)); # 10 - &movz ($v0,&HB($s3)); # 13,12,15*,14 - &xor ($v1,&DWP(1,$te,$v0,8)); # 15, t[0] collected - &mov ($__s0,$v1); # t[0] saved - - &movz ($v0,&LB($s1)); # 7, 6, 5, 4* - &shr ($s1,16); # -, -, 7, 6 - &mov ($v1,&DWP(0,$te,$v0,8)); # 4 - &movz ($v0,&LB($s3)); # 13,12,15,14* - &xor ($v1,&DWP(2,$te,$v0,8)); # 14 - &movz ($v0,&HB($s0)); # 1, 0, 3*, 2 - &and ($s3,0xffff0000); # 13,12, -, - - &xor ($v1,&DWP(1,$te,$v0,8)); # 3 - &movz ($v0,&LB($s2)); # 8,11,10, 9* - &or ($s3,$s1); # 13,12, 7, 6 - &xor ($v1,&DWP(3,$te,$v0,8)); # 9, t[1] collected - &mov ($s1,$v1); # s[1]=t[1] - - &movz ($v0,&LB($s0)); # 1, 0, 3, 2* - &shr ($s2,16); # -, -, 8,11 - &mov ($v1,&DWP(2,$te,$v0,8)); # 2 - &movz ($v0,&HB($s3)); # 13,12, 7*, 6 - &xor ($v1,&DWP(1,$te,$v0,8)); # 7 - &movz ($v0,&HB($s2)); # -, -, 8*,11 - &xor ($v1,&DWP(0,$te,$v0,8)); # 8 - &mov ($v0,$s3); - &shr ($v0,24); # 13 - &xor ($v1,&DWP(3,$te,$v0,8)); # 13, t[2] collected - - &movz ($v0,&LB($s2)); # -, -, 8,11* - &shr ($s0,24); # 1* - &mov ($s2,&DWP(1,$te,$v0,8)); # 11 - &xor ($s2,&DWP(3,$te,$s0,8)); # 1 - &mov ($s0,$__s0); # s[0]=t[0] - &movz ($v0,&LB($s3)); # 13,12, 7, 6* - &shr ($s3,16); # , ,13,12 - &xor ($s2,&DWP(2,$te,$v0,8)); # 6 - &mov ($key,$__key); # reincarnate v0 as key - &and ($s3,0xff); # , ,13,12* - &mov ($s3,&DWP(0,$te,$s3,8)); # 12 - &xor ($s3,$s2); # s[2]=t[3] collected - &mov ($s2,$v1); # s[2]=t[2] -} - -# More experimental code... SSE one... Even though this one eliminates -# *all* references to stack, it's not faster... -sub sse_encbody() -{ - &movz ($acc,&LB("eax")); # 0 - &mov ("ecx",&DWP(0,$tbl,$acc,8)); # 0 - &pshufw ("mm2","mm0",0x0d); # 7, 6, 3, 2 - &movz ("edx",&HB("eax")); # 1 - &mov ("edx",&DWP(3,$tbl,"edx",8)); # 1 - &shr ("eax",16); # 5, 4 - - &movz ($acc,&LB("ebx")); # 10 - &xor ("ecx",&DWP(2,$tbl,$acc,8)); # 10 - &pshufw ("mm6","mm4",0x08); # 13,12, 9, 8 - &movz ($acc,&HB("ebx")); # 11 - &xor ("edx",&DWP(1,$tbl,$acc,8)); # 11 - &shr ("ebx",16); # 15,14 - - &movz ($acc,&HB("eax")); # 5 - &xor ("ecx",&DWP(3,$tbl,$acc,8)); # 5 - &movq ("mm3",QWP(16,$key)); - &movz ($acc,&HB("ebx")); # 15 - &xor ("ecx",&DWP(1,$tbl,$acc,8)); # 15 - &movd ("mm0","ecx"); # t[0] collected - - &movz ($acc,&LB("eax")); # 4 - &mov ("ecx",&DWP(0,$tbl,$acc,8)); # 4 - &movd ("eax","mm2"); # 7, 6, 3, 2 - &movz ($acc,&LB("ebx")); # 14 - &xor ("ecx",&DWP(2,$tbl,$acc,8)); # 14 - &movd ("ebx","mm6"); # 13,12, 9, 8 - - &movz ($acc,&HB("eax")); # 3 - &xor ("ecx",&DWP(1,$tbl,$acc,8)); # 3 - &movz ($acc,&HB("ebx")); # 9 - &xor ("ecx",&DWP(3,$tbl,$acc,8)); # 9 - &movd ("mm1","ecx"); # t[1] collected - - &movz ($acc,&LB("eax")); # 2 - &mov ("ecx",&DWP(2,$tbl,$acc,8)); # 2 - &shr ("eax",16); # 7, 6 - &punpckldq ("mm0","mm1"); # t[0,1] collected - &movz ($acc,&LB("ebx")); # 8 - &xor ("ecx",&DWP(0,$tbl,$acc,8)); # 8 - &shr ("ebx",16); # 13,12 - - &movz ($acc,&HB("eax")); # 7 - &xor ("ecx",&DWP(1,$tbl,$acc,8)); # 7 - &pxor ("mm0","mm3"); - &movz ("eax",&LB("eax")); # 6 - &xor ("edx",&DWP(2,$tbl,"eax",8)); # 6 - &pshufw ("mm1","mm0",0x08); # 5, 4, 1, 0 - &movz ($acc,&HB("ebx")); # 13 - &xor ("ecx",&DWP(3,$tbl,$acc,8)); # 13 - &xor ("ecx",&DWP(24,$key)); # t[2] - &movd ("mm4","ecx"); # t[2] collected - &movz ("ebx",&LB("ebx")); # 12 - &xor ("edx",&DWP(0,$tbl,"ebx",8)); # 12 - &shr ("ecx",16); - &movd ("eax","mm1"); # 5, 4, 1, 0 - &mov ("ebx",&DWP(28,$key)); # t[3] - &xor ("ebx","edx"); - &movd ("mm5","ebx"); # t[3] collected - &and ("ebx",0xffff0000); - &or ("ebx","ecx"); - - &punpckldq ("mm4","mm5"); # t[2,3] collected -} - -###################################################################### -# "Compact" block function -###################################################################### - -sub enccompact() -{ my $Fn = mov; - while ($#_>5) { pop(@_); $Fn=sub{}; } - my ($i,$te,@s)=@_; - my $tmp = $key; - my $out = $i==3?$s[0]:$acc; - - # $Fn is used in first compact round and its purpose is to - # void restoration of some values from stack, so that after - # 4xenccompact with extra argument $key value is left there... - if ($i==3) { &$Fn ($key,$__key); }##%edx - else { &mov ($out,$s[0]); } - &and ($out,0xFF); - if ($i==1) { &shr ($s[0],16); }#%ebx[1] - if ($i==2) { &shr ($s[0],24); }#%ecx[2] - &movz ($out,&BP(-128,$te,$out,1)); - - if ($i==3) { $tmp=$s[1]; }##%eax - &movz ($tmp,&HB($s[1])); - &movz ($tmp,&BP(-128,$te,$tmp,1)); - &shl ($tmp,8); - &xor ($out,$tmp); - - if ($i==3) { $tmp=$s[2]; &mov ($s[1],$__s0); }##%ebx - else { &mov ($tmp,$s[2]); - &shr ($tmp,16); } - if ($i==2) { &and ($s[1],0xFF); }#%edx[2] - &and ($tmp,0xFF); - &movz ($tmp,&BP(-128,$te,$tmp,1)); - &shl ($tmp,16); - &xor ($out,$tmp); - - if ($i==3) { $tmp=$s[3]; &mov ($s[2],$__s1); }##%ecx - elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2] - else { &mov ($tmp,$s[3]); - &shr ($tmp,24); } - &movz ($tmp,&BP(-128,$te,$tmp,1)); - &shl ($tmp,24); - &xor ($out,$tmp); - if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); } - if ($i==3) { &mov ($s[3],$acc); } - &comment(); -} - -sub enctransform() -{ my @s = ($s0,$s1,$s2,$s3); - my $i = shift; - my $tmp = $tbl; - my $r2 = $key ; - - &mov ($acc,$s[$i]); - &and ($acc,0x80808080); - &mov ($tmp,$acc); - &shr ($tmp,7); - &lea ($r2,&DWP(0,$s[$i],$s[$i])); - &sub ($acc,$tmp); - &and ($r2,0xfefefefe); - &and ($acc,0x1b1b1b1b); - &mov ($tmp,$s[$i]); - &xor ($acc,$r2); # r2 - - &xor ($s[$i],$acc); # r0 ^ r2 - &rotl ($s[$i],24); - &xor ($s[$i],$acc) # ROTATE(r2^r0,24) ^ r2 - &rotr ($tmp,16); - &xor ($s[$i],$tmp); - &rotr ($tmp,8); - &xor ($s[$i],$tmp); -} - -&function_begin_B("_x86_AES_encrypt_compact"); - # note that caller is expected to allocate stack frame for me! - &mov ($__key,$key); # save key - - &xor ($s0,&DWP(0,$key)); # xor with key - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &mov ($acc,&DWP(240,$key)); # load key->rounds - &lea ($acc,&DWP(-2,$acc,$acc)); - &lea ($acc,&DWP(0,$key,$acc,8)); - &mov ($__end,$acc); # end of key schedule - - # prefetch Te4 - &mov ($key,&DWP(0-128,$tbl)); - &mov ($acc,&DWP(32-128,$tbl)); - &mov ($key,&DWP(64-128,$tbl)); - &mov ($acc,&DWP(96-128,$tbl)); - &mov ($key,&DWP(128-128,$tbl)); - &mov ($acc,&DWP(160-128,$tbl)); - &mov ($key,&DWP(192-128,$tbl)); - &mov ($acc,&DWP(224-128,$tbl)); - - &set_label("loop",16); - - &enccompact(0,$tbl,$s0,$s1,$s2,$s3,1); - &enccompact(1,$tbl,$s1,$s2,$s3,$s0,1); - &enccompact(2,$tbl,$s2,$s3,$s0,$s1,1); - &enccompact(3,$tbl,$s3,$s0,$s1,$s2,1); - &enctransform(2); - &enctransform(3); - &enctransform(0); - &enctransform(1); - &mov ($key,$__key); - &mov ($tbl,$__tbl); - &add ($key,16); # advance rd_key - &xor ($s0,&DWP(0,$key)); - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &cmp ($key,$__end); - &mov ($__key,$key); - &jb (&label("loop")); - - &enccompact(0,$tbl,$s0,$s1,$s2,$s3); - &enccompact(1,$tbl,$s1,$s2,$s3,$s0); - &enccompact(2,$tbl,$s2,$s3,$s0,$s1); - &enccompact(3,$tbl,$s3,$s0,$s1,$s2); - - &xor ($s0,&DWP(16,$key)); - &xor ($s1,&DWP(20,$key)); - &xor ($s2,&DWP(24,$key)); - &xor ($s3,&DWP(28,$key)); - - &ret (); -&function_end_B("_x86_AES_encrypt_compact"); - -###################################################################### -# "Compact" SSE block function. -###################################################################### -# -# Performance is not actually extraordinary in comparison to pure -# x86 code. In particular encrypt performance is virtually the same. -# Decrypt performance on the other hand is 15-20% better on newer -# µ-archs [but we're thankful for *any* improvement here], and ~50% -# better on PIII:-) And additionally on the pros side this code -# eliminates redundant references to stack and thus relieves/ -# minimizes the pressure on the memory bus. -# -# MMX register layout lsb -# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ -# | mm4 | mm0 | -# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ -# | s3 | s2 | s1 | s0 | -# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ -# |15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0| -# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ -# -# Indexes translate as s[N/4]>>(8*(N%4)), e.g. 5 means s1>>8. -# In this terms encryption and decryption "compact" permutation -# matrices can be depicted as following: -# -# encryption lsb # decryption lsb -# +----++----+----+----+----+ # +----++----+----+----+----+ -# | t0 || 15 | 10 | 5 | 0 | # | t0 || 7 | 10 | 13 | 0 | -# +----++----+----+----+----+ # +----++----+----+----+----+ -# | t1 || 3 | 14 | 9 | 4 | # | t1 || 11 | 14 | 1 | 4 | -# +----++----+----+----+----+ # +----++----+----+----+----+ -# | t2 || 7 | 2 | 13 | 8 | # | t2 || 15 | 2 | 5 | 8 | -# +----++----+----+----+----+ # +----++----+----+----+----+ -# | t3 || 11 | 6 | 1 | 12 | # | t3 || 3 | 6 | 9 | 12 | -# +----++----+----+----+----+ # +----++----+----+----+----+ -# -###################################################################### -# Why not xmm registers? Short answer. It was actually tested and -# was not any faster, but *contrary*, most notably on Intel CPUs. -# Longer answer. Main advantage of using mm registers is that movd -# latency is lower, especially on Intel P4. While arithmetic -# instructions are twice as many, they can be scheduled every cycle -# and not every second one when they are operating on xmm register, -# so that "arithmetic throughput" remains virtually the same. And -# finally the code can be executed even on elder SSE-only CPUs:-) - -sub sse_enccompact() -{ - &pshufw ("mm1","mm0",0x08); # 5, 4, 1, 0 - &pshufw ("mm5","mm4",0x0d); # 15,14,11,10 - &movd ("eax","mm1"); # 5, 4, 1, 0 - &movd ("ebx","mm5"); # 15,14,11,10 - - &movz ($acc,&LB("eax")); # 0 - &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 0 - &pshufw ("mm2","mm0",0x0d); # 7, 6, 3, 2 - &movz ("edx",&HB("eax")); # 1 - &movz ("edx",&BP(-128,$tbl,"edx",1)); # 1 - &shl ("edx",8); # 1 - &shr ("eax",16); # 5, 4 - - &movz ($acc,&LB("ebx")); # 10 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 10 - &shl ($acc,16); # 10 - &or ("ecx",$acc); # 10 - &pshufw ("mm6","mm4",0x08); # 13,12, 9, 8 - &movz ($acc,&HB("ebx")); # 11 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 11 - &shl ($acc,24); # 11 - &or ("edx",$acc); # 11 - &shr ("ebx",16); # 15,14 - - &movz ($acc,&HB("eax")); # 5 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 5 - &shl ($acc,8); # 5 - &or ("ecx",$acc); # 5 - &movz ($acc,&HB("ebx")); # 15 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 15 - &shl ($acc,24); # 15 - &or ("ecx",$acc); # 15 - &movd ("mm0","ecx"); # t[0] collected - - &movz ($acc,&LB("eax")); # 4 - &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 4 - &movd ("eax","mm2"); # 7, 6, 3, 2 - &movz ($acc,&LB("ebx")); # 14 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 14 - &shl ($acc,16); # 14 - &or ("ecx",$acc); # 14 - - &movd ("ebx","mm6"); # 13,12, 9, 8 - &movz ($acc,&HB("eax")); # 3 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 3 - &shl ($acc,24); # 3 - &or ("ecx",$acc); # 3 - &movz ($acc,&HB("ebx")); # 9 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 9 - &shl ($acc,8); # 9 - &or ("ecx",$acc); # 9 - &movd ("mm1","ecx"); # t[1] collected - - &movz ($acc,&LB("ebx")); # 8 - &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 8 - &shr ("ebx",16); # 13,12 - &movz ($acc,&LB("eax")); # 2 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 2 - &shl ($acc,16); # 2 - &or ("ecx",$acc); # 2 - &shr ("eax",16); # 7, 6 - - &punpckldq ("mm0","mm1"); # t[0,1] collected - - &movz ($acc,&HB("eax")); # 7 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 7 - &shl ($acc,24); # 7 - &or ("ecx",$acc); # 7 - &and ("eax",0xff); # 6 - &movz ("eax",&BP(-128,$tbl,"eax",1)); # 6 - &shl ("eax",16); # 6 - &or ("edx","eax"); # 6 - &movz ($acc,&HB("ebx")); # 13 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 13 - &shl ($acc,8); # 13 - &or ("ecx",$acc); # 13 - &movd ("mm4","ecx"); # t[2] collected - &and ("ebx",0xff); # 12 - &movz ("ebx",&BP(-128,$tbl,"ebx",1)); # 12 - &or ("edx","ebx"); # 12 - &movd ("mm5","edx"); # t[3] collected - - &punpckldq ("mm4","mm5"); # t[2,3] collected -} - - if (!$x86only) { -&function_begin_B("_sse_AES_encrypt_compact"); - &pxor ("mm0",&QWP(0,$key)); # 7, 6, 5, 4, 3, 2, 1, 0 - &pxor ("mm4",&QWP(8,$key)); # 15,14,13,12,11,10, 9, 8 - - # note that caller is expected to allocate stack frame for me! - &mov ($acc,&DWP(240,$key)); # load key->rounds - &lea ($acc,&DWP(-2,$acc,$acc)); - &lea ($acc,&DWP(0,$key,$acc,8)); - &mov ($__end,$acc); # end of key schedule - - &mov ($s0,0x1b1b1b1b); # magic constant - &mov (&DWP(8,"esp"),$s0); - &mov (&DWP(12,"esp"),$s0); - - # prefetch Te4 - &mov ($s0,&DWP(0-128,$tbl)); - &mov ($s1,&DWP(32-128,$tbl)); - &mov ($s2,&DWP(64-128,$tbl)); - &mov ($s3,&DWP(96-128,$tbl)); - &mov ($s0,&DWP(128-128,$tbl)); - &mov ($s1,&DWP(160-128,$tbl)); - &mov ($s2,&DWP(192-128,$tbl)); - &mov ($s3,&DWP(224-128,$tbl)); - - &set_label("loop",16); - &sse_enccompact(); - &add ($key,16); - &cmp ($key,$__end); - &ja (&label("out")); - - &movq ("mm2",&QWP(8,"esp")); - &pxor ("mm3","mm3"); &pxor ("mm7","mm7"); - &movq ("mm1","mm0"); &movq ("mm5","mm4"); # r0 - &pcmpgtb("mm3","mm0"); &pcmpgtb("mm7","mm4"); - &pand ("mm3","mm2"); &pand ("mm7","mm2"); - &pshufw ("mm2","mm0",0xb1); &pshufw ("mm6","mm4",0xb1);# ROTATE(r0,16) - &paddb ("mm0","mm0"); &paddb ("mm4","mm4"); - &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # = r2 - &pshufw ("mm3","mm2",0xb1); &pshufw ("mm7","mm6",0xb1);# r0 - &pxor ("mm1","mm0"); &pxor ("mm5","mm4"); # r0^r2 - &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); # ^= ROTATE(r0,16) - - &movq ("mm2","mm3"); &movq ("mm6","mm7"); - &pslld ("mm3",8); &pslld ("mm7",8); - &psrld ("mm2",24); &psrld ("mm6",24); - &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= r0<<8 - &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); # ^= r0>>24 - - &movq ("mm3","mm1"); &movq ("mm7","mm5"); - &movq ("mm2",&QWP(0,$key)); &movq ("mm6",&QWP(8,$key)); - &psrld ("mm1",8); &psrld ("mm5",8); - &mov ($s0,&DWP(0-128,$tbl)); - &pslld ("mm3",24); &pslld ("mm7",24); - &mov ($s1,&DWP(64-128,$tbl)); - &pxor ("mm0","mm1"); &pxor ("mm4","mm5"); # ^= (r2^r0)<<8 - &mov ($s2,&DWP(128-128,$tbl)); - &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= (r2^r0)>>24 - &mov ($s3,&DWP(192-128,$tbl)); - - &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); - &jmp (&label("loop")); - - &set_label("out",16); - &pxor ("mm0",&QWP(0,$key)); - &pxor ("mm4",&QWP(8,$key)); - - &ret (); -&function_end_B("_sse_AES_encrypt_compact"); - } - -###################################################################### -# Vanilla block function. -###################################################################### - -sub encstep() -{ my ($i,$te,@s) = @_; - my $tmp = $key; - my $out = $i==3?$s[0]:$acc; - - # lines marked with #%e?x[i] denote "reordered" instructions... - if ($i==3) { &mov ($key,$__key); }##%edx - else { &mov ($out,$s[0]); - &and ($out,0xFF); } - if ($i==1) { &shr ($s[0],16); }#%ebx[1] - if ($i==2) { &shr ($s[0],24); }#%ecx[2] - &mov ($out,&DWP(0,$te,$out,8)); - - if ($i==3) { $tmp=$s[1]; }##%eax - &movz ($tmp,&HB($s[1])); - &xor ($out,&DWP(3,$te,$tmp,8)); - - if ($i==3) { $tmp=$s[2]; &mov ($s[1],$__s0); }##%ebx - else { &mov ($tmp,$s[2]); - &shr ($tmp,16); } - if ($i==2) { &and ($s[1],0xFF); }#%edx[2] - &and ($tmp,0xFF); - &xor ($out,&DWP(2,$te,$tmp,8)); - - if ($i==3) { $tmp=$s[3]; &mov ($s[2],$__s1); }##%ecx - elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2] - else { &mov ($tmp,$s[3]); - &shr ($tmp,24) } - &xor ($out,&DWP(1,$te,$tmp,8)); - if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); } - if ($i==3) { &mov ($s[3],$acc); } - &comment(); -} - -sub enclast() -{ my ($i,$te,@s)=@_; - my $tmp = $key; - my $out = $i==3?$s[0]:$acc; - - if ($i==3) { &mov ($key,$__key); }##%edx - else { &mov ($out,$s[0]); } - &and ($out,0xFF); - if ($i==1) { &shr ($s[0],16); }#%ebx[1] - if ($i==2) { &shr ($s[0],24); }#%ecx[2] - &mov ($out,&DWP(2,$te,$out,8)); - &and ($out,0x000000ff); - - if ($i==3) { $tmp=$s[1]; }##%eax - &movz ($tmp,&HB($s[1])); - &mov ($tmp,&DWP(0,$te,$tmp,8)); - &and ($tmp,0x0000ff00); - &xor ($out,$tmp); - - if ($i==3) { $tmp=$s[2]; &mov ($s[1],$__s0); }##%ebx - else { &mov ($tmp,$s[2]); - &shr ($tmp,16); } - if ($i==2) { &and ($s[1],0xFF); }#%edx[2] - &and ($tmp,0xFF); - &mov ($tmp,&DWP(0,$te,$tmp,8)); - &and ($tmp,0x00ff0000); - &xor ($out,$tmp); - - if ($i==3) { $tmp=$s[3]; &mov ($s[2],$__s1); }##%ecx - elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2] - else { &mov ($tmp,$s[3]); - &shr ($tmp,24); } - &mov ($tmp,&DWP(2,$te,$tmp,8)); - &and ($tmp,0xff000000); - &xor ($out,$tmp); - if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); } - if ($i==3) { &mov ($s[3],$acc); } -} - -&function_begin_B("_x86_AES_encrypt"); - if ($vertical_spin) { - # I need high parts of volatile registers to be accessible... - &exch ($s1="edi",$key="ebx"); - &mov ($s2="esi",$acc="ecx"); - } - - # note that caller is expected to allocate stack frame for me! - &mov ($__key,$key); # save key - - &xor ($s0,&DWP(0,$key)); # xor with key - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &mov ($acc,&DWP(240,$key)); # load key->rounds - - if ($small_footprint) { - &lea ($acc,&DWP(-2,$acc,$acc)); - &lea ($acc,&DWP(0,$key,$acc,8)); - &mov ($__end,$acc); # end of key schedule - - &set_label("loop",16); - if ($vertical_spin) { - &encvert($tbl,$s0,$s1,$s2,$s3); - } else { - &encstep(0,$tbl,$s0,$s1,$s2,$s3); - &encstep(1,$tbl,$s1,$s2,$s3,$s0); - &encstep(2,$tbl,$s2,$s3,$s0,$s1); - &encstep(3,$tbl,$s3,$s0,$s1,$s2); - } - &add ($key,16); # advance rd_key - &xor ($s0,&DWP(0,$key)); - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - &cmp ($key,$__end); - &mov ($__key,$key); - &jb (&label("loop")); - } - else { - &cmp ($acc,10); - &jle (&label("10rounds")); - &cmp ($acc,12); - &jle (&label("12rounds")); - - &set_label("14rounds",4); - for ($i=1;$i<3;$i++) { - if ($vertical_spin) { - &encvert($tbl,$s0,$s1,$s2,$s3); - } else { - &encstep(0,$tbl,$s0,$s1,$s2,$s3); - &encstep(1,$tbl,$s1,$s2,$s3,$s0); - &encstep(2,$tbl,$s2,$s3,$s0,$s1); - &encstep(3,$tbl,$s3,$s0,$s1,$s2); - } - &xor ($s0,&DWP(16*$i+0,$key)); - &xor ($s1,&DWP(16*$i+4,$key)); - &xor ($s2,&DWP(16*$i+8,$key)); - &xor ($s3,&DWP(16*$i+12,$key)); - } - &add ($key,32); - &mov ($__key,$key); # advance rd_key - &set_label("12rounds",4); - for ($i=1;$i<3;$i++) { - if ($vertical_spin) { - &encvert($tbl,$s0,$s1,$s2,$s3); - } else { - &encstep(0,$tbl,$s0,$s1,$s2,$s3); - &encstep(1,$tbl,$s1,$s2,$s3,$s0); - &encstep(2,$tbl,$s2,$s3,$s0,$s1); - &encstep(3,$tbl,$s3,$s0,$s1,$s2); - } - &xor ($s0,&DWP(16*$i+0,$key)); - &xor ($s1,&DWP(16*$i+4,$key)); - &xor ($s2,&DWP(16*$i+8,$key)); - &xor ($s3,&DWP(16*$i+12,$key)); - } - &add ($key,32); - &mov ($__key,$key); # advance rd_key - &set_label("10rounds",4); - for ($i=1;$i<10;$i++) { - if ($vertical_spin) { - &encvert($tbl,$s0,$s1,$s2,$s3); - } else { - &encstep(0,$tbl,$s0,$s1,$s2,$s3); - &encstep(1,$tbl,$s1,$s2,$s3,$s0); - &encstep(2,$tbl,$s2,$s3,$s0,$s1); - &encstep(3,$tbl,$s3,$s0,$s1,$s2); - } - &xor ($s0,&DWP(16*$i+0,$key)); - &xor ($s1,&DWP(16*$i+4,$key)); - &xor ($s2,&DWP(16*$i+8,$key)); - &xor ($s3,&DWP(16*$i+12,$key)); - } - } - - if ($vertical_spin) { - # "reincarnate" some registers for "horizontal" spin... - &mov ($s1="ebx",$key="edi"); - &mov ($s2="ecx",$acc="esi"); - } - &enclast(0,$tbl,$s0,$s1,$s2,$s3); - &enclast(1,$tbl,$s1,$s2,$s3,$s0); - &enclast(2,$tbl,$s2,$s3,$s0,$s1); - &enclast(3,$tbl,$s3,$s0,$s1,$s2); - - &add ($key,$small_footprint?16:160); - &xor ($s0,&DWP(0,$key)); - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &ret (); - -&set_label("AES_Te",64); # Yes! I keep it in the code segment! - &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); - &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); - &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56); - &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec); - &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa); - &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb); - &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45); - &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b); - &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c); - &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83); - &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9); - &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a); - &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d); - &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f); - &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df); - &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea); - &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34); - &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b); - &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d); - &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413); - &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1); - &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6); - &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972); - &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85); - &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed); - &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511); - &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe); - &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b); - &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05); - &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1); - &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142); - &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf); - &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3); - &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e); - &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a); - &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6); - &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3); - &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b); - &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428); - &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad); - &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14); - &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8); - &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4); - &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2); - &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda); - &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949); - &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf); - &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810); - &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c); - &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697); - &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e); - &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f); - &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc); - &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c); - &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969); - &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27); - &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122); - &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433); - &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9); - &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5); - &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a); - &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0); - &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e); - &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c); - -#Te4 # four copies of Te4 to choose from to avoid L1 aliasing - &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5); - &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76); - &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0); - &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0); - &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc); - &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15); - &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a); - &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75); - &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0); - &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84); - &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b); - &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf); - &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85); - &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8); - &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5); - &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2); - &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17); - &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73); - &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88); - &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb); - &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c); - &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79); - &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9); - &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08); - &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6); - &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a); - &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e); - &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e); - &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94); - &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf); - &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68); - &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16); - - &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5); - &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76); - &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0); - &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0); - &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc); - &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15); - &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a); - &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75); - &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0); - &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84); - &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b); - &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf); - &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85); - &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8); - &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5); - &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2); - &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17); - &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73); - &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88); - &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb); - &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c); - &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79); - &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9); - &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08); - &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6); - &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a); - &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e); - &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e); - &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94); - &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf); - &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68); - &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16); - - &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5); - &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76); - &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0); - &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0); - &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc); - &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15); - &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a); - &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75); - &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0); - &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84); - &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b); - &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf); - &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85); - &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8); - &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5); - &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2); - &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17); - &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73); - &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88); - &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb); - &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c); - &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79); - &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9); - &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08); - &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6); - &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a); - &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e); - &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e); - &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94); - &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf); - &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68); - &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16); - - &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5); - &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76); - &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0); - &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0); - &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc); - &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15); - &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a); - &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75); - &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0); - &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84); - &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b); - &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf); - &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85); - &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8); - &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5); - &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2); - &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17); - &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73); - &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88); - &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb); - &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c); - &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79); - &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9); - &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08); - &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6); - &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a); - &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e); - &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e); - &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94); - &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf); - &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68); - &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16); -#rcon: - &data_word(0x00000001, 0x00000002, 0x00000004, 0x00000008); - &data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080); - &data_word(0x0000001b, 0x00000036, 0x00000000, 0x00000000); - &data_word(0x00000000, 0x00000000, 0x00000000, 0x00000000); -&function_end_B("_x86_AES_encrypt"); - -# void AES_encrypt (const void *inp,void *out,const AES_KEY *key); -&function_begin("AES_encrypt"); - &mov ($acc,&wparam(0)); # load inp - &mov ($key,&wparam(2)); # load key - - &mov ($s0,"esp"); - &sub ("esp",36); - &and ("esp",-64); # align to cache-line - - # place stack frame just "above" the key schedule - &lea ($s1,&DWP(-64-63,$key)); - &sub ($s1,"esp"); - &neg ($s1); - &and ($s1,0x3C0); # modulo 1024, but aligned to cache-line - &sub ("esp",$s1); - &add ("esp",4); # 4 is reserved for caller's return address - &mov ($_esp,$s0); # save stack pointer - - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tbl); - &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if (!$x86only); - &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl)); - - # pick Te4 copy which can't "overlap" with stack frame or key schedule - &lea ($s1,&DWP(768-4,"esp")); - &sub ($s1,$tbl); - &and ($s1,0x300); - &lea ($tbl,&DWP(2048+128,$tbl,$s1)); - - if (!$x86only) { - &bt (&DWP(0,$s0),25); # check for SSE bit - &jnc (&label("x86")); - - &movq ("mm0",&QWP(0,$acc)); - &movq ("mm4",&QWP(8,$acc)); - &call ("_sse_AES_encrypt_compact"); - &mov ("esp",$_esp); # restore stack pointer - &mov ($acc,&wparam(1)); # load out - &movq (&QWP(0,$acc),"mm0"); # write output data - &movq (&QWP(8,$acc),"mm4"); - &emms (); - &function_end_A(); - } - &set_label("x86",16); - &mov ($_tbl,$tbl); - &mov ($s0,&DWP(0,$acc)); # load input data - &mov ($s1,&DWP(4,$acc)); - &mov ($s2,&DWP(8,$acc)); - &mov ($s3,&DWP(12,$acc)); - &call ("_x86_AES_encrypt_compact"); - &mov ("esp",$_esp); # restore stack pointer - &mov ($acc,&wparam(1)); # load out - &mov (&DWP(0,$acc),$s0); # write output data - &mov (&DWP(4,$acc),$s1); - &mov (&DWP(8,$acc),$s2); - &mov (&DWP(12,$acc),$s3); -&function_end("AES_encrypt"); - -#--------------------------------------------------------------------# - -###################################################################### -# "Compact" block function -###################################################################### - -sub deccompact() -{ my $Fn = mov; - while ($#_>5) { pop(@_); $Fn=sub{}; } - my ($i,$td,@s)=@_; - my $tmp = $key; - my $out = $i==3?$s[0]:$acc; - - # $Fn is used in first compact round and its purpose is to - # void restoration of some values from stack, so that after - # 4xdeccompact with extra argument $key, $s0 and $s1 values - # are left there... - if($i==3) { &$Fn ($key,$__key); } - else { &mov ($out,$s[0]); } - &and ($out,0xFF); - &movz ($out,&BP(-128,$td,$out,1)); - - if ($i==3) { $tmp=$s[1]; } - &movz ($tmp,&HB($s[1])); - &movz ($tmp,&BP(-128,$td,$tmp,1)); - &shl ($tmp,8); - &xor ($out,$tmp); - - if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); } - else { mov ($tmp,$s[2]); } - &shr ($tmp,16); - &and ($tmp,0xFF); - &movz ($tmp,&BP(-128,$td,$tmp,1)); - &shl ($tmp,16); - &xor ($out,$tmp); - - if ($i==3) { $tmp=$s[3]; &$Fn ($s[2],$__s1); } - else { &mov ($tmp,$s[3]); } - &shr ($tmp,24); - &movz ($tmp,&BP(-128,$td,$tmp,1)); - &shl ($tmp,24); - &xor ($out,$tmp); - if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); } - if ($i==3) { &$Fn ($s[3],$__s0); } -} - -# must be called with 2,3,0,1 as argument sequence!!! -sub dectransform() -{ my @s = ($s0,$s1,$s2,$s3); - my $i = shift; - my $tmp = $key; - my $tp2 = @s[($i+2)%4]; $tp2 = @s[2] if ($i==1); - my $tp4 = @s[($i+3)%4]; $tp4 = @s[3] if ($i==1); - my $tp8 = $tbl; - - &mov ($acc,$s[$i]); - &and ($acc,0x80808080); - &mov ($tmp,$acc); - &shr ($tmp,7); - &lea ($tp2,&DWP(0,$s[$i],$s[$i])); - &sub ($acc,$tmp); - &and ($tp2,0xfefefefe); - &and ($acc,0x1b1b1b1b); - &xor ($acc,$tp2); - &mov ($tp2,$acc); - - &and ($acc,0x80808080); - &mov ($tmp,$acc); - &shr ($tmp,7); - &lea ($tp4,&DWP(0,$tp2,$tp2)); - &sub ($acc,$tmp); - &and ($tp4,0xfefefefe); - &and ($acc,0x1b1b1b1b); - &xor ($tp2,$s[$i]); # tp2^tp1 - &xor ($acc,$tp4); - &mov ($tp4,$acc); - - &and ($acc,0x80808080); - &mov ($tmp,$acc); - &shr ($tmp,7); - &lea ($tp8,&DWP(0,$tp4,$tp4)); - &sub ($acc,$tmp); - &and ($tp8,0xfefefefe); - &and ($acc,0x1b1b1b1b); - &xor ($tp4,$s[$i]); # tp4^tp1 - &rotl ($s[$i],8); # = ROTATE(tp1,8) - &xor ($tp8,$acc); - - &xor ($s[$i],$tp2); - &xor ($tp2,$tp8); - &rotl ($tp2,24); - &xor ($s[$i],$tp4); - &xor ($tp4,$tp8); - &rotl ($tp4,16); - &xor ($s[$i],$tp8); # ^= tp8^(tp4^tp1)^(tp2^tp1) - &rotl ($tp8,8); - &xor ($s[$i],$tp2); # ^= ROTATE(tp8^tp2^tp1,24) - &xor ($s[$i],$tp4); # ^= ROTATE(tp8^tp4^tp1,16) - &mov ($s[0],$__s0) if($i==2); #prefetch $s0 - &mov ($s[1],$__s1) if($i==3); #prefetch $s1 - &mov ($s[2],$__s2) if($i==1); - &xor ($s[$i],$tp8); # ^= ROTATE(tp8,8) - - &mov ($s[3],$__s3) if($i==1); - &mov (&DWP(4+4*$i,"esp"),$s[$i]) if($i>=2); -} - -&function_begin_B("_x86_AES_decrypt_compact"); - # note that caller is expected to allocate stack frame for me! - &mov ($__key,$key); # save key - - &xor ($s0,&DWP(0,$key)); # xor with key - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &mov ($acc,&DWP(240,$key)); # load key->rounds - - &lea ($acc,&DWP(-2,$acc,$acc)); - &lea ($acc,&DWP(0,$key,$acc,8)); - &mov ($__end,$acc); # end of key schedule - - # prefetch Td4 - &mov ($key,&DWP(0-128,$tbl)); - &mov ($acc,&DWP(32-128,$tbl)); - &mov ($key,&DWP(64-128,$tbl)); - &mov ($acc,&DWP(96-128,$tbl)); - &mov ($key,&DWP(128-128,$tbl)); - &mov ($acc,&DWP(160-128,$tbl)); - &mov ($key,&DWP(192-128,$tbl)); - &mov ($acc,&DWP(224-128,$tbl)); - - &set_label("loop",16); - - &deccompact(0,$tbl,$s0,$s3,$s2,$s1,1); - &deccompact(1,$tbl,$s1,$s0,$s3,$s2,1); - &deccompact(2,$tbl,$s2,$s1,$s0,$s3,1); - &deccompact(3,$tbl,$s3,$s2,$s1,$s0,1); - &dectransform(2); - &dectransform(3); - &dectransform(0); - &dectransform(1); - &mov ($key,$__key); - &mov ($tbl,$__tbl); - &add ($key,16); # advance rd_key - &xor ($s0,&DWP(0,$key)); - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &cmp ($key,$__end); - &mov ($__key,$key); - &jb (&label("loop")); - - &deccompact(0,$tbl,$s0,$s3,$s2,$s1); - &deccompact(1,$tbl,$s1,$s0,$s3,$s2); - &deccompact(2,$tbl,$s2,$s1,$s0,$s3); - &deccompact(3,$tbl,$s3,$s2,$s1,$s0); - - &xor ($s0,&DWP(16,$key)); - &xor ($s1,&DWP(20,$key)); - &xor ($s2,&DWP(24,$key)); - &xor ($s3,&DWP(28,$key)); - - &ret (); -&function_end_B("_x86_AES_decrypt_compact"); - -###################################################################### -# "Compact" SSE block function. -###################################################################### - -sub sse_deccompact() -{ - &pshufw ("mm1","mm0",0x0c); # 7, 6, 1, 0 - &movd ("eax","mm1"); # 7, 6, 1, 0 - - &pshufw ("mm5","mm4",0x09); # 13,12,11,10 - &movz ($acc,&LB("eax")); # 0 - &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 0 - &movd ("ebx","mm5"); # 13,12,11,10 - &movz ("edx",&HB("eax")); # 1 - &movz ("edx",&BP(-128,$tbl,"edx",1)); # 1 - &shl ("edx",8); # 1 - - &pshufw ("mm2","mm0",0x06); # 3, 2, 5, 4 - &movz ($acc,&LB("ebx")); # 10 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 10 - &shl ($acc,16); # 10 - &or ("ecx",$acc); # 10 - &shr ("eax",16); # 7, 6 - &movz ($acc,&HB("ebx")); # 11 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 11 - &shl ($acc,24); # 11 - &or ("edx",$acc); # 11 - &shr ("ebx",16); # 13,12 - - &pshufw ("mm6","mm4",0x03); # 9, 8,15,14 - &movz ($acc,&HB("eax")); # 7 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 7 - &shl ($acc,24); # 7 - &or ("ecx",$acc); # 7 - &movz ($acc,&HB("ebx")); # 13 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 13 - &shl ($acc,8); # 13 - &or ("ecx",$acc); # 13 - &movd ("mm0","ecx"); # t[0] collected - - &movz ($acc,&LB("eax")); # 6 - &movd ("eax","mm2"); # 3, 2, 5, 4 - &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 6 - &shl ("ecx",16); # 6 - &movz ($acc,&LB("ebx")); # 12 - &movd ("ebx","mm6"); # 9, 8,15,14 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 12 - &or ("ecx",$acc); # 12 - - &movz ($acc,&LB("eax")); # 4 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 4 - &or ("edx",$acc); # 4 - &movz ($acc,&LB("ebx")); # 14 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 14 - &shl ($acc,16); # 14 - &or ("edx",$acc); # 14 - &movd ("mm1","edx"); # t[1] collected - - &movz ($acc,&HB("eax")); # 5 - &movz ("edx",&BP(-128,$tbl,$acc,1)); # 5 - &shl ("edx",8); # 5 - &movz ($acc,&HB("ebx")); # 15 - &shr ("eax",16); # 3, 2 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 15 - &shl ($acc,24); # 15 - &or ("edx",$acc); # 15 - &shr ("ebx",16); # 9, 8 - - &punpckldq ("mm0","mm1"); # t[0,1] collected - - &movz ($acc,&HB("ebx")); # 9 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 9 - &shl ($acc,8); # 9 - &or ("ecx",$acc); # 9 - &and ("ebx",0xff); # 8 - &movz ("ebx",&BP(-128,$tbl,"ebx",1)); # 8 - &or ("edx","ebx"); # 8 - &movz ($acc,&LB("eax")); # 2 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 2 - &shl ($acc,16); # 2 - &or ("edx",$acc); # 2 - &movd ("mm4","edx"); # t[2] collected - &movz ("eax",&HB("eax")); # 3 - &movz ("eax",&BP(-128,$tbl,"eax",1)); # 3 - &shl ("eax",24); # 3 - &or ("ecx","eax"); # 3 - &movd ("mm5","ecx"); # t[3] collected - - &punpckldq ("mm4","mm5"); # t[2,3] collected -} - - if (!$x86only) { -&function_begin_B("_sse_AES_decrypt_compact"); - &pxor ("mm0",&QWP(0,$key)); # 7, 6, 5, 4, 3, 2, 1, 0 - &pxor ("mm4",&QWP(8,$key)); # 15,14,13,12,11,10, 9, 8 - - # note that caller is expected to allocate stack frame for me! - &mov ($acc,&DWP(240,$key)); # load key->rounds - &lea ($acc,&DWP(-2,$acc,$acc)); - &lea ($acc,&DWP(0,$key,$acc,8)); - &mov ($__end,$acc); # end of key schedule - - &mov ($s0,0x1b1b1b1b); # magic constant - &mov (&DWP(8,"esp"),$s0); - &mov (&DWP(12,"esp"),$s0); - - # prefetch Td4 - &mov ($s0,&DWP(0-128,$tbl)); - &mov ($s1,&DWP(32-128,$tbl)); - &mov ($s2,&DWP(64-128,$tbl)); - &mov ($s3,&DWP(96-128,$tbl)); - &mov ($s0,&DWP(128-128,$tbl)); - &mov ($s1,&DWP(160-128,$tbl)); - &mov ($s2,&DWP(192-128,$tbl)); - &mov ($s3,&DWP(224-128,$tbl)); - - &set_label("loop",16); - &sse_deccompact(); - &add ($key,16); - &cmp ($key,$__end); - &ja (&label("out")); - - # ROTATE(x^y,N) == ROTATE(x,N)^ROTATE(y,N) - &movq ("mm3","mm0"); &movq ("mm7","mm4"); - &movq ("mm2","mm0",1); &movq ("mm6","mm4",1); - &movq ("mm1","mm0"); &movq ("mm5","mm4"); - &pshufw ("mm0","mm0",0xb1); &pshufw ("mm4","mm4",0xb1);# = ROTATE(tp0,16) - &pslld ("mm2",8); &pslld ("mm6",8); - &psrld ("mm3",8); &psrld ("mm7",8); - &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); # ^= tp0<<8 - &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= tp0>>8 - &pslld ("mm2",16); &pslld ("mm6",16); - &psrld ("mm3",16); &psrld ("mm7",16); - &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); # ^= tp0<<24 - &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= tp0>>24 - - &movq ("mm3",&QWP(8,"esp")); - &pxor ("mm2","mm2"); &pxor ("mm6","mm6"); - &pcmpgtb("mm2","mm1"); &pcmpgtb("mm6","mm5"); - &pand ("mm2","mm3"); &pand ("mm6","mm3"); - &paddb ("mm1","mm1"); &paddb ("mm5","mm5"); - &pxor ("mm1","mm2"); &pxor ("mm5","mm6"); # tp2 - &movq ("mm3","mm1"); &movq ("mm7","mm5"); - &movq ("mm2","mm1"); &movq ("mm6","mm5"); - &pxor ("mm0","mm1"); &pxor ("mm4","mm5"); # ^= tp2 - &pslld ("mm3",24); &pslld ("mm7",24); - &psrld ("mm2",8); &psrld ("mm6",8); - &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= tp2<<24 - &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); # ^= tp2>>8 - - &movq ("mm2",&QWP(8,"esp")); - &pxor ("mm3","mm3"); &pxor ("mm7","mm7"); - &pcmpgtb("mm3","mm1"); &pcmpgtb("mm7","mm5"); - &pand ("mm3","mm2"); &pand ("mm7","mm2"); - &paddb ("mm1","mm1"); &paddb ("mm5","mm5"); - &pxor ("mm1","mm3"); &pxor ("mm5","mm7"); # tp4 - &pshufw ("mm3","mm1",0xb1); &pshufw ("mm7","mm5",0xb1); - &pxor ("mm0","mm1"); &pxor ("mm4","mm5"); # ^= tp4 - &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= ROTATE(tp4,16) - - &pxor ("mm3","mm3"); &pxor ("mm7","mm7"); - &pcmpgtb("mm3","mm1"); &pcmpgtb("mm7","mm5"); - &pand ("mm3","mm2"); &pand ("mm7","mm2"); - &paddb ("mm1","mm1"); &paddb ("mm5","mm5"); - &pxor ("mm1","mm3"); &pxor ("mm5","mm7"); # tp8 - &pxor ("mm0","mm1"); &pxor ("mm4","mm5"); # ^= tp8 - &movq ("mm3","mm1"); &movq ("mm7","mm5"); - &pshufw ("mm2","mm1",0xb1); &pshufw ("mm6","mm5",0xb1); - &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); # ^= ROTATE(tp8,16) - &pslld ("mm1",8); &pslld ("mm5",8); - &psrld ("mm3",8); &psrld ("mm7",8); - &movq ("mm2",&QWP(0,$key)); &movq ("mm6",&QWP(8,$key)); - &pxor ("mm0","mm1"); &pxor ("mm4","mm5"); # ^= tp8<<8 - &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= tp8>>8 - &mov ($s0,&DWP(0-128,$tbl)); - &pslld ("mm1",16); &pslld ("mm5",16); - &mov ($s1,&DWP(64-128,$tbl)); - &psrld ("mm3",16); &psrld ("mm7",16); - &mov ($s2,&DWP(128-128,$tbl)); - &pxor ("mm0","mm1"); &pxor ("mm4","mm5"); # ^= tp8<<24 - &mov ($s3,&DWP(192-128,$tbl)); - &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= tp8>>24 - - &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); - &jmp (&label("loop")); - - &set_label("out",16); - &pxor ("mm0",&QWP(0,$key)); - &pxor ("mm4",&QWP(8,$key)); - - &ret (); -&function_end_B("_sse_AES_decrypt_compact"); - } - -###################################################################### -# Vanilla block function. -###################################################################### - -sub decstep() -{ my ($i,$td,@s) = @_; - my $tmp = $key; - my $out = $i==3?$s[0]:$acc; - - # no instructions are reordered, as performance appears - # optimal... or rather that all attempts to reorder didn't - # result in better performance [which by the way is not a - # bit lower than ecryption]. - if($i==3) { &mov ($key,$__key); } - else { &mov ($out,$s[0]); } - &and ($out,0xFF); - &mov ($out,&DWP(0,$td,$out,8)); - - if ($i==3) { $tmp=$s[1]; } - &movz ($tmp,&HB($s[1])); - &xor ($out,&DWP(3,$td,$tmp,8)); - - if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); } - else { &mov ($tmp,$s[2]); } - &shr ($tmp,16); - &and ($tmp,0xFF); - &xor ($out,&DWP(2,$td,$tmp,8)); - - if ($i==3) { $tmp=$s[3]; &mov ($s[2],$__s1); } - else { &mov ($tmp,$s[3]); } - &shr ($tmp,24); - &xor ($out,&DWP(1,$td,$tmp,8)); - if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); } - if ($i==3) { &mov ($s[3],$__s0); } - &comment(); -} - -sub declast() -{ my ($i,$td,@s)=@_; - my $tmp = $key; - my $out = $i==3?$s[0]:$acc; - - if($i==0) { &lea ($td,&DWP(2048+128,$td)); - &mov ($tmp,&DWP(0-128,$td)); - &mov ($acc,&DWP(32-128,$td)); - &mov ($tmp,&DWP(64-128,$td)); - &mov ($acc,&DWP(96-128,$td)); - &mov ($tmp,&DWP(128-128,$td)); - &mov ($acc,&DWP(160-128,$td)); - &mov ($tmp,&DWP(192-128,$td)); - &mov ($acc,&DWP(224-128,$td)); - &lea ($td,&DWP(-128,$td)); } - if($i==3) { &mov ($key,$__key); } - else { &mov ($out,$s[0]); } - &and ($out,0xFF); - &movz ($out,&BP(0,$td,$out,1)); - - if ($i==3) { $tmp=$s[1]; } - &movz ($tmp,&HB($s[1])); - &movz ($tmp,&BP(0,$td,$tmp,1)); - &shl ($tmp,8); - &xor ($out,$tmp); - - if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); } - else { mov ($tmp,$s[2]); } - &shr ($tmp,16); - &and ($tmp,0xFF); - &movz ($tmp,&BP(0,$td,$tmp,1)); - &shl ($tmp,16); - &xor ($out,$tmp); - - if ($i==3) { $tmp=$s[3]; &mov ($s[2],$__s1); } - else { &mov ($tmp,$s[3]); } - &shr ($tmp,24); - &movz ($tmp,&BP(0,$td,$tmp,1)); - &shl ($tmp,24); - &xor ($out,$tmp); - if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); } - if ($i==3) { &mov ($s[3],$__s0); - &lea ($td,&DWP(-2048,$td)); } -} - -&function_begin_B("_x86_AES_decrypt"); - # note that caller is expected to allocate stack frame for me! - &mov ($__key,$key); # save key - - &xor ($s0,&DWP(0,$key)); # xor with key - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &mov ($acc,&DWP(240,$key)); # load key->rounds - - if ($small_footprint) { - &lea ($acc,&DWP(-2,$acc,$acc)); - &lea ($acc,&DWP(0,$key,$acc,8)); - &mov ($__end,$acc); # end of key schedule - &set_label("loop",16); - &decstep(0,$tbl,$s0,$s3,$s2,$s1); - &decstep(1,$tbl,$s1,$s0,$s3,$s2); - &decstep(2,$tbl,$s2,$s1,$s0,$s3); - &decstep(3,$tbl,$s3,$s2,$s1,$s0); - &add ($key,16); # advance rd_key - &xor ($s0,&DWP(0,$key)); - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - &cmp ($key,$__end); - &mov ($__key,$key); - &jb (&label("loop")); - } - else { - &cmp ($acc,10); - &jle (&label("10rounds")); - &cmp ($acc,12); - &jle (&label("12rounds")); - - &set_label("14rounds",4); - for ($i=1;$i<3;$i++) { - &decstep(0,$tbl,$s0,$s3,$s2,$s1); - &decstep(1,$tbl,$s1,$s0,$s3,$s2); - &decstep(2,$tbl,$s2,$s1,$s0,$s3); - &decstep(3,$tbl,$s3,$s2,$s1,$s0); - &xor ($s0,&DWP(16*$i+0,$key)); - &xor ($s1,&DWP(16*$i+4,$key)); - &xor ($s2,&DWP(16*$i+8,$key)); - &xor ($s3,&DWP(16*$i+12,$key)); - } - &add ($key,32); - &mov ($__key,$key); # advance rd_key - &set_label("12rounds",4); - for ($i=1;$i<3;$i++) { - &decstep(0,$tbl,$s0,$s3,$s2,$s1); - &decstep(1,$tbl,$s1,$s0,$s3,$s2); - &decstep(2,$tbl,$s2,$s1,$s0,$s3); - &decstep(3,$tbl,$s3,$s2,$s1,$s0); - &xor ($s0,&DWP(16*$i+0,$key)); - &xor ($s1,&DWP(16*$i+4,$key)); - &xor ($s2,&DWP(16*$i+8,$key)); - &xor ($s3,&DWP(16*$i+12,$key)); - } - &add ($key,32); - &mov ($__key,$key); # advance rd_key - &set_label("10rounds",4); - for ($i=1;$i<10;$i++) { - &decstep(0,$tbl,$s0,$s3,$s2,$s1); - &decstep(1,$tbl,$s1,$s0,$s3,$s2); - &decstep(2,$tbl,$s2,$s1,$s0,$s3); - &decstep(3,$tbl,$s3,$s2,$s1,$s0); - &xor ($s0,&DWP(16*$i+0,$key)); - &xor ($s1,&DWP(16*$i+4,$key)); - &xor ($s2,&DWP(16*$i+8,$key)); - &xor ($s3,&DWP(16*$i+12,$key)); - } - } - - &declast(0,$tbl,$s0,$s3,$s2,$s1); - &declast(1,$tbl,$s1,$s0,$s3,$s2); - &declast(2,$tbl,$s2,$s1,$s0,$s3); - &declast(3,$tbl,$s3,$s2,$s1,$s0); - - &add ($key,$small_footprint?16:160); - &xor ($s0,&DWP(0,$key)); - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &ret (); - -&set_label("AES_Td",64); # Yes! I keep it in the code segment! - &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a); - &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b); - &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5); - &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5); - &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d); - &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b); - &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295); - &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e); - &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927); - &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d); - &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362); - &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9); - &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52); - &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566); - &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3); - &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed); - &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e); - &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4); - &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4); - &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd); - &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d); - &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060); - &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967); - &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879); - &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000); - &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c); - &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36); - &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624); - &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b); - &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c); - &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12); - &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14); - &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3); - &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b); - &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8); - &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684); - &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7); - &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177); - &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947); - &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322); - &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498); - &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f); - &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54); - &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382); - &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf); - &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb); - &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83); - &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef); - &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029); - &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235); - &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733); - &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117); - &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4); - &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546); - &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb); - &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d); - &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb); - &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a); - &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773); - &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478); - &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2); - &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff); - &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664); - &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0); - -#Td4: # four copies of Td4 to choose from to avoid L1 aliasing - &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); - &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); - &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); - &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); - &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); - &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); - &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); - &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); - &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); - &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); - &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); - &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); - &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); - &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); - &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); - &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); - &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); - &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); - &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); - &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); - &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); - &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); - &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); - &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); - &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); - &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); - &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); - &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); - &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); - &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); - &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); - &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); - - &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); - &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); - &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); - &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); - &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); - &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); - &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); - &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); - &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); - &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); - &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); - &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); - &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); - &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); - &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); - &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); - &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); - &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); - &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); - &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); - &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); - &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); - &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); - &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); - &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); - &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); - &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); - &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); - &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); - &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); - &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); - &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); - - &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); - &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); - &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); - &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); - &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); - &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); - &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); - &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); - &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); - &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); - &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); - &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); - &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); - &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); - &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); - &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); - &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); - &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); - &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); - &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); - &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); - &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); - &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); - &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); - &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); - &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); - &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); - &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); - &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); - &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); - &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); - &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); - - &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); - &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); - &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); - &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); - &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); - &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); - &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); - &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); - &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); - &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); - &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); - &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); - &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); - &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); - &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); - &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); - &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); - &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); - &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); - &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); - &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); - &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); - &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); - &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); - &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); - &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); - &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); - &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); - &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); - &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); - &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); - &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); -&function_end_B("_x86_AES_decrypt"); - -# void AES_decrypt (const void *inp,void *out,const AES_KEY *key); -&function_begin("AES_decrypt"); - &mov ($acc,&wparam(0)); # load inp - &mov ($key,&wparam(2)); # load key - - &mov ($s0,"esp"); - &sub ("esp",36); - &and ("esp",-64); # align to cache-line - - # place stack frame just "above" the key schedule - &lea ($s1,&DWP(-64-63,$key)); - &sub ($s1,"esp"); - &neg ($s1); - &and ($s1,0x3C0); # modulo 1024, but aligned to cache-line - &sub ("esp",$s1); - &add ("esp",4); # 4 is reserved for caller's return address - &mov ($_esp,$s0); # save stack pointer - - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tbl); - &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only); - &lea ($tbl,&DWP(&label("AES_Td")."-".&label("pic_point"),$tbl)); - - # pick Td4 copy which can't "overlap" with stack frame or key schedule - &lea ($s1,&DWP(768-4,"esp")); - &sub ($s1,$tbl); - &and ($s1,0x300); - &lea ($tbl,&DWP(2048+128,$tbl,$s1)); - - if (!$x86only) { - &bt (&DWP(0,$s0),25); # check for SSE bit - &jnc (&label("x86")); - - &movq ("mm0",&QWP(0,$acc)); - &movq ("mm4",&QWP(8,$acc)); - &call ("_sse_AES_decrypt_compact"); - &mov ("esp",$_esp); # restore stack pointer - &mov ($acc,&wparam(1)); # load out - &movq (&QWP(0,$acc),"mm0"); # write output data - &movq (&QWP(8,$acc),"mm4"); - &emms (); - &function_end_A(); - } - &set_label("x86",16); - &mov ($_tbl,$tbl); - &mov ($s0,&DWP(0,$acc)); # load input data - &mov ($s1,&DWP(4,$acc)); - &mov ($s2,&DWP(8,$acc)); - &mov ($s3,&DWP(12,$acc)); - &call ("_x86_AES_decrypt_compact"); - &mov ("esp",$_esp); # restore stack pointer - &mov ($acc,&wparam(1)); # load out - &mov (&DWP(0,$acc),$s0); # write output data - &mov (&DWP(4,$acc),$s1); - &mov (&DWP(8,$acc),$s2); - &mov (&DWP(12,$acc),$s3); -&function_end("AES_decrypt"); - -# void AES_cbc_encrypt (const void char *inp, unsigned char *out, -# size_t length, const AES_KEY *key, -# unsigned char *ivp,const int enc); -{ -# stack frame layout -# -4(%esp) # return address 0(%esp) -# 0(%esp) # s0 backing store 4(%esp) -# 4(%esp) # s1 backing store 8(%esp) -# 8(%esp) # s2 backing store 12(%esp) -# 12(%esp) # s3 backing store 16(%esp) -# 16(%esp) # key backup 20(%esp) -# 20(%esp) # end of key schedule 24(%esp) -# 24(%esp) # %ebp backup 28(%esp) -# 28(%esp) # %esp backup -my $_inp=&DWP(32,"esp"); # copy of wparam(0) -my $_out=&DWP(36,"esp"); # copy of wparam(1) -my $_len=&DWP(40,"esp"); # copy of wparam(2) -my $_key=&DWP(44,"esp"); # copy of wparam(3) -my $_ivp=&DWP(48,"esp"); # copy of wparam(4) -my $_tmp=&DWP(52,"esp"); # volatile variable -# -my $ivec=&DWP(60,"esp"); # ivec[16] -my $aes_key=&DWP(76,"esp"); # copy of aes_key -my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds - -&function_begin("AES_cbc_encrypt"); - &mov ($s2 eq "ecx"? $s2 : "",&wparam(2)); # load len - &cmp ($s2,0); - &je (&label("drop_out")); - - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tbl); - &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only); - - &cmp (&wparam(5),0); - &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl)); - &jne (&label("picked_te")); - &lea ($tbl,&DWP(&label("AES_Td")."-".&label("AES_Te"),$tbl)); - &set_label("picked_te"); - - # one can argue if this is required - &pushf (); - &cld (); - - &cmp ($s2,$speed_limit); - &jb (&label("slow_way")); - &test ($s2,15); - &jnz (&label("slow_way")); - if (!$x86only) { - &bt (&DWP(0,$s0),28); # check for hyper-threading bit - &jc (&label("slow_way")); - } - # pre-allocate aligned stack frame... - &lea ($acc,&DWP(-80-244,"esp")); - &and ($acc,-64); - - # ... and make sure it doesn't alias with $tbl modulo 4096 - &mov ($s0,$tbl); - &lea ($s1,&DWP(2048+256,$tbl)); - &mov ($s3,$acc); - &and ($s0,0xfff); # s = %ebp&0xfff - &and ($s1,0xfff); # e = (%ebp+2048+256)&0xfff - &and ($s3,0xfff); # p = %esp&0xfff - - &cmp ($s3,$s1); # if (p>=e) %esp =- (p-e); - &jb (&label("tbl_break_out")); - &sub ($s3,$s1); - &sub ($acc,$s3); - &jmp (&label("tbl_ok")); - &set_label("tbl_break_out",4); # else %esp -= (p-s)&0xfff + framesz; - &sub ($s3,$s0); - &and ($s3,0xfff); - &add ($s3,384); - &sub ($acc,$s3); - &set_label("tbl_ok",4); - - &lea ($s3,&wparam(0)); # obtain pointer to parameter block - &exch ("esp",$acc); # allocate stack frame - &add ("esp",4); # reserve for return address! - &mov ($_tbl,$tbl); # save %ebp - &mov ($_esp,$acc); # save %esp - - &mov ($s0,&DWP(0,$s3)); # load inp - &mov ($s1,&DWP(4,$s3)); # load out - #&mov ($s2,&DWP(8,$s3)); # load len - &mov ($key,&DWP(12,$s3)); # load key - &mov ($acc,&DWP(16,$s3)); # load ivp - &mov ($s3,&DWP(20,$s3)); # load enc flag - - &mov ($_inp,$s0); # save copy of inp - &mov ($_out,$s1); # save copy of out - &mov ($_len,$s2); # save copy of len - &mov ($_key,$key); # save copy of key - &mov ($_ivp,$acc); # save copy of ivp - - &mov ($mark,0); # copy of aes_key->rounds = 0; - # do we copy key schedule to stack? - &mov ($s1 eq "ebx" ? $s1 : "",$key); - &mov ($s2 eq "ecx" ? $s2 : "",244/4); - &sub ($s1,$tbl); - &mov ("esi",$key); - &and ($s1,0xfff); - &lea ("edi",$aes_key); - &cmp ($s1,2048+256); - &jb (&label("do_copy")); - &cmp ($s1,4096-244); - &jb (&label("skip_copy")); - &set_label("do_copy",4); - &mov ($_key,"edi"); - &data_word(0xA5F3F689); # rep movsd - &set_label("skip_copy"); - - &mov ($key,16); - &set_label("prefetch_tbl",4); - &mov ($s0,&DWP(0,$tbl)); - &mov ($s1,&DWP(32,$tbl)); - &mov ($s2,&DWP(64,$tbl)); - &mov ($acc,&DWP(96,$tbl)); - &lea ($tbl,&DWP(128,$tbl)); - &sub ($key,1); - &jnz (&label("prefetch_tbl")); - &sub ($tbl,2048); - - &mov ($acc,$_inp); - &mov ($key,$_ivp); - - &cmp ($s3,0); - &je (&label("fast_decrypt")); - -#----------------------------- ENCRYPT -----------------------------# - &mov ($s0,&DWP(0,$key)); # load iv - &mov ($s1,&DWP(4,$key)); - - &set_label("fast_enc_loop",16); - &mov ($s2,&DWP(8,$key)); - &mov ($s3,&DWP(12,$key)); - - &xor ($s0,&DWP(0,$acc)); # xor input data - &xor ($s1,&DWP(4,$acc)); - &xor ($s2,&DWP(8,$acc)); - &xor ($s3,&DWP(12,$acc)); - - &mov ($key,$_key); # load key - &call ("_x86_AES_encrypt"); - - &mov ($acc,$_inp); # load inp - &mov ($key,$_out); # load out - - &mov (&DWP(0,$key),$s0); # save output data - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &lea ($acc,&DWP(16,$acc)); # advance inp - &mov ($s2,$_len); # load len - &mov ($_inp,$acc); # save inp - &lea ($s3,&DWP(16,$key)); # advance out - &mov ($_out,$s3); # save out - &sub ($s2,16); # decrease len - &mov ($_len,$s2); # save len - &jnz (&label("fast_enc_loop")); - &mov ($acc,$_ivp); # load ivp - &mov ($s2,&DWP(8,$key)); # restore last 2 dwords - &mov ($s3,&DWP(12,$key)); - &mov (&DWP(0,$acc),$s0); # save ivec - &mov (&DWP(4,$acc),$s1); - &mov (&DWP(8,$acc),$s2); - &mov (&DWP(12,$acc),$s3); - - &cmp ($mark,0); # was the key schedule copied? - &mov ("edi",$_key); - &je (&label("skip_ezero")); - # zero copy of key schedule - &mov ("ecx",240/4); - &xor ("eax","eax"); - &align (4); - &data_word(0xABF3F689); # rep stosd - &set_label("skip_ezero") - &mov ("esp",$_esp); - &popf (); - &set_label("drop_out"); - &function_end_A(); - &pushf (); # kludge, never executed - -#----------------------------- DECRYPT -----------------------------# -&set_label("fast_decrypt",16); - - &cmp ($acc,$_out); - &je (&label("fast_dec_in_place")); # in-place processing... - - &mov ($_tmp,$key); - - &align (4); - &set_label("fast_dec_loop",16); - &mov ($s0,&DWP(0,$acc)); # read input - &mov ($s1,&DWP(4,$acc)); - &mov ($s2,&DWP(8,$acc)); - &mov ($s3,&DWP(12,$acc)); - - &mov ($key,$_key); # load key - &call ("_x86_AES_decrypt"); - - &mov ($key,$_tmp); # load ivp - &mov ($acc,$_len); # load len - &xor ($s0,&DWP(0,$key)); # xor iv - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &mov ($key,$_out); # load out - &mov ($acc,$_inp); # load inp - - &mov (&DWP(0,$key),$s0); # write output - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &mov ($s2,$_len); # load len - &mov ($_tmp,$acc); # save ivp - &lea ($acc,&DWP(16,$acc)); # advance inp - &mov ($_inp,$acc); # save inp - &lea ($key,&DWP(16,$key)); # advance out - &mov ($_out,$key); # save out - &sub ($s2,16); # decrease len - &mov ($_len,$s2); # save len - &jnz (&label("fast_dec_loop")); - &mov ($key,$_tmp); # load temp ivp - &mov ($acc,$_ivp); # load user ivp - &mov ($s0,&DWP(0,$key)); # load iv - &mov ($s1,&DWP(4,$key)); - &mov ($s2,&DWP(8,$key)); - &mov ($s3,&DWP(12,$key)); - &mov (&DWP(0,$acc),$s0); # copy back to user - &mov (&DWP(4,$acc),$s1); - &mov (&DWP(8,$acc),$s2); - &mov (&DWP(12,$acc),$s3); - &jmp (&label("fast_dec_out")); - - &set_label("fast_dec_in_place",16); - &set_label("fast_dec_in_place_loop"); - &mov ($s0,&DWP(0,$acc)); # read input - &mov ($s1,&DWP(4,$acc)); - &mov ($s2,&DWP(8,$acc)); - &mov ($s3,&DWP(12,$acc)); - - &lea ($key,$ivec); - &mov (&DWP(0,$key),$s0); # copy to temp - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &mov ($key,$_key); # load key - &call ("_x86_AES_decrypt"); - - &mov ($key,$_ivp); # load ivp - &mov ($acc,$_out); # load out - &xor ($s0,&DWP(0,$key)); # xor iv - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &mov (&DWP(0,$acc),$s0); # write output - &mov (&DWP(4,$acc),$s1); - &mov (&DWP(8,$acc),$s2); - &mov (&DWP(12,$acc),$s3); - - &lea ($acc,&DWP(16,$acc)); # advance out - &mov ($_out,$acc); # save out - - &lea ($acc,$ivec); - &mov ($s0,&DWP(0,$acc)); # read temp - &mov ($s1,&DWP(4,$acc)); - &mov ($s2,&DWP(8,$acc)); - &mov ($s3,&DWP(12,$acc)); - - &mov (&DWP(0,$key),$s0); # copy iv - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &mov ($acc,$_inp); # load inp - &mov ($s2,$_len); # load len - &lea ($acc,&DWP(16,$acc)); # advance inp - &mov ($_inp,$acc); # save inp - &sub ($s2,16); # decrease len - &mov ($_len,$s2); # save len - &jnz (&label("fast_dec_in_place_loop")); - - &set_label("fast_dec_out",4); - &cmp ($mark,0); # was the key schedule copied? - &mov ("edi",$_key); - &je (&label("skip_dzero")); - # zero copy of key schedule - &mov ("ecx",240/4); - &xor ("eax","eax"); - &align (4); - &data_word(0xABF3F689); # rep stosd - &set_label("skip_dzero") - &mov ("esp",$_esp); - &popf (); - &function_end_A(); - &pushf (); # kludge, never executed - -#--------------------------- SLOW ROUTINE ---------------------------# -&set_label("slow_way",16); - - &mov ($s0,&DWP(0,$s0)) if (!$x86only);# load OPENSSL_ia32cap - &mov ($key,&wparam(3)); # load key - - # pre-allocate aligned stack frame... - &lea ($acc,&DWP(-80,"esp")); - &and ($acc,-64); - - # ... and make sure it doesn't alias with $key modulo 1024 - &lea ($s1,&DWP(-80-63,$key)); - &sub ($s1,$acc); - &neg ($s1); - &and ($s1,0x3C0); # modulo 1024, but aligned to cache-line - &sub ($acc,$s1); - - # pick S-box copy which can't overlap with stack frame or $key - &lea ($s1,&DWP(768,$acc)); - &sub ($s1,$tbl); - &and ($s1,0x300); - &lea ($tbl,&DWP(2048+128,$tbl,$s1)); - - &lea ($s3,&wparam(0)); # pointer to parameter block - - &exch ("esp",$acc); - &add ("esp",4); # reserve for return address! - &mov ($_tbl,$tbl); # save %ebp - &mov ($_esp,$acc); # save %esp - &mov ($_tmp,$s0); # save OPENSSL_ia32cap - - &mov ($s0,&DWP(0,$s3)); # load inp - &mov ($s1,&DWP(4,$s3)); # load out - #&mov ($s2,&DWP(8,$s3)); # load len - #&mov ($key,&DWP(12,$s3)); # load key - &mov ($acc,&DWP(16,$s3)); # load ivp - &mov ($s3,&DWP(20,$s3)); # load enc flag - - &mov ($_inp,$s0); # save copy of inp - &mov ($_out,$s1); # save copy of out - &mov ($_len,$s2); # save copy of len - &mov ($_key,$key); # save copy of key - &mov ($_ivp,$acc); # save copy of ivp - - &mov ($key,$acc); - &mov ($acc,$s0); - - &cmp ($s3,0); - &je (&label("slow_decrypt")); - -#--------------------------- SLOW ENCRYPT ---------------------------# - &cmp ($s2,16); - &mov ($s3,$s1); - &jb (&label("slow_enc_tail")); - - if (!$x86only) { - &bt ($_tmp,25); # check for SSE bit - &jnc (&label("slow_enc_x86")); - - &movq ("mm0",&QWP(0,$key)); # load iv - &movq ("mm4",&QWP(8,$key)); - - &set_label("slow_enc_loop_sse",16); - &pxor ("mm0",&QWP(0,$acc)); # xor input data - &pxor ("mm4",&QWP(8,$acc)); - - &mov ($key,$_key); - &call ("_sse_AES_encrypt_compact"); - - &mov ($acc,$_inp); # load inp - &mov ($key,$_out); # load out - &mov ($s2,$_len); # load len - - &movq (&QWP(0,$key),"mm0"); # save output data - &movq (&QWP(8,$key),"mm4"); - - &lea ($acc,&DWP(16,$acc)); # advance inp - &mov ($_inp,$acc); # save inp - &lea ($s3,&DWP(16,$key)); # advance out - &mov ($_out,$s3); # save out - &sub ($s2,16); # decrease len - &cmp ($s2,16); - &mov ($_len,$s2); # save len - &jae (&label("slow_enc_loop_sse")); - &test ($s2,15); - &jnz (&label("slow_enc_tail")); - &mov ($acc,$_ivp); # load ivp - &movq (&QWP(0,$acc),"mm0"); # save ivec - &movq (&QWP(8,$acc),"mm4"); - &emms (); - &mov ("esp",$_esp); - &popf (); - &function_end_A(); - &pushf (); # kludge, never executed - } - &set_label("slow_enc_x86",16); - &mov ($s0,&DWP(0,$key)); # load iv - &mov ($s1,&DWP(4,$key)); - - &set_label("slow_enc_loop_x86",4); - &mov ($s2,&DWP(8,$key)); - &mov ($s3,&DWP(12,$key)); - - &xor ($s0,&DWP(0,$acc)); # xor input data - &xor ($s1,&DWP(4,$acc)); - &xor ($s2,&DWP(8,$acc)); - &xor ($s3,&DWP(12,$acc)); - - &mov ($key,$_key); # load key - &call ("_x86_AES_encrypt_compact"); - - &mov ($acc,$_inp); # load inp - &mov ($key,$_out); # load out - - &mov (&DWP(0,$key),$s0); # save output data - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &mov ($s2,$_len); # load len - &lea ($acc,&DWP(16,$acc)); # advance inp - &mov ($_inp,$acc); # save inp - &lea ($s3,&DWP(16,$key)); # advance out - &mov ($_out,$s3); # save out - &sub ($s2,16); # decrease len - &cmp ($s2,16); - &mov ($_len,$s2); # save len - &jae (&label("slow_enc_loop_x86")); - &test ($s2,15); - &jnz (&label("slow_enc_tail")); - &mov ($acc,$_ivp); # load ivp - &mov ($s2,&DWP(8,$key)); # restore last dwords - &mov ($s3,&DWP(12,$key)); - &mov (&DWP(0,$acc),$s0); # save ivec - &mov (&DWP(4,$acc),$s1); - &mov (&DWP(8,$acc),$s2); - &mov (&DWP(12,$acc),$s3); - - &mov ("esp",$_esp); - &popf (); - &function_end_A(); - &pushf (); # kludge, never executed - - &set_label("slow_enc_tail",16); - &emms () if (!$x86only); - &mov ($key eq "edi"? $key:"",$s3); # load out to edi - &mov ($s1,16); - &sub ($s1,$s2); - &cmp ($key,$acc eq "esi"? $acc:""); # compare with inp - &je (&label("enc_in_place")); - &align (4); - &data_word(0xA4F3F689); # rep movsb # copy input - &jmp (&label("enc_skip_in_place")); - &set_label("enc_in_place"); - &lea ($key,&DWP(0,$key,$s2)); - &set_label("enc_skip_in_place"); - &mov ($s2,$s1); - &xor ($s0,$s0); - &align (4); - &data_word(0xAAF3F689); # rep stosb # zero tail - - &mov ($key,$_ivp); # restore ivp - &mov ($acc,$s3); # output as input - &mov ($s0,&DWP(0,$key)); - &mov ($s1,&DWP(4,$key)); - &mov ($_len,16); # len=16 - &jmp (&label("slow_enc_loop_x86")); # one more spin... - -#--------------------------- SLOW DECRYPT ---------------------------# -&set_label("slow_decrypt",16); - if (!$x86only) { - &bt ($_tmp,25); # check for SSE bit - &jnc (&label("slow_dec_loop_x86")); - - &set_label("slow_dec_loop_sse",4); - &movq ("mm0",&QWP(0,$acc)); # read input - &movq ("mm4",&QWP(8,$acc)); - - &mov ($key,$_key); - &call ("_sse_AES_decrypt_compact"); - - &mov ($acc,$_inp); # load inp - &lea ($s0,$ivec); - &mov ($s1,$_out); # load out - &mov ($s2,$_len); # load len - &mov ($key,$_ivp); # load ivp - - &movq ("mm1",&QWP(0,$acc)); # re-read input - &movq ("mm5",&QWP(8,$acc)); - - &pxor ("mm0",&QWP(0,$key)); # xor iv - &pxor ("mm4",&QWP(8,$key)); - - &movq (&QWP(0,$key),"mm1"); # copy input to iv - &movq (&QWP(8,$key),"mm5"); - - &sub ($s2,16); # decrease len - &jc (&label("slow_dec_partial_sse")); - - &movq (&QWP(0,$s1),"mm0"); # write output - &movq (&QWP(8,$s1),"mm4"); - - &lea ($s1,&DWP(16,$s1)); # advance out - &mov ($_out,$s1); # save out - &lea ($acc,&DWP(16,$acc)); # advance inp - &mov ($_inp,$acc); # save inp - &mov ($_len,$s2); # save len - &jnz (&label("slow_dec_loop_sse")); - &emms (); - &mov ("esp",$_esp); - &popf (); - &function_end_A(); - &pushf (); # kludge, never executed - - &set_label("slow_dec_partial_sse",16); - &movq (&QWP(0,$s0),"mm0"); # save output to temp - &movq (&QWP(8,$s0),"mm4"); - &emms (); - - &add ($s2 eq "ecx" ? "ecx":"",16); - &mov ("edi",$s1); # out - &mov ("esi",$s0); # temp - &align (4); - &data_word(0xA4F3F689); # rep movsb # copy partial output - - &mov ("esp",$_esp); - &popf (); - &function_end_A(); - &pushf (); # kludge, never executed - } - &set_label("slow_dec_loop_x86",16); - &mov ($s0,&DWP(0,$acc)); # read input - &mov ($s1,&DWP(4,$acc)); - &mov ($s2,&DWP(8,$acc)); - &mov ($s3,&DWP(12,$acc)); - - &lea ($key,$ivec); - &mov (&DWP(0,$key),$s0); # copy to temp - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &mov ($key,$_key); # load key - &call ("_x86_AES_decrypt_compact"); - - &mov ($key,$_ivp); # load ivp - &mov ($acc,$_len); # load len - &xor ($s0,&DWP(0,$key)); # xor iv - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &sub ($acc,16); - &jc (&label("slow_dec_partial_x86")); - - &mov ($_len,$acc); # save len - &mov ($acc,$_out); # load out - - &mov (&DWP(0,$acc),$s0); # write output - &mov (&DWP(4,$acc),$s1); - &mov (&DWP(8,$acc),$s2); - &mov (&DWP(12,$acc),$s3); - - &lea ($acc,&DWP(16,$acc)); # advance out - &mov ($_out,$acc); # save out - - &lea ($acc,$ivec); - &mov ($s0,&DWP(0,$acc)); # read temp - &mov ($s1,&DWP(4,$acc)); - &mov ($s2,&DWP(8,$acc)); - &mov ($s3,&DWP(12,$acc)); - - &mov (&DWP(0,$key),$s0); # copy it to iv - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &mov ($acc,$_inp); # load inp - &lea ($acc,&DWP(16,$acc)); # advance inp - &mov ($_inp,$acc); # save inp - &jnz (&label("slow_dec_loop_x86")); - &mov ("esp",$_esp); - &popf (); - &function_end_A(); - &pushf (); # kludge, never executed - - &set_label("slow_dec_partial_x86",16); - &lea ($acc,$ivec); - &mov (&DWP(0,$acc),$s0); # save output to temp - &mov (&DWP(4,$acc),$s1); - &mov (&DWP(8,$acc),$s2); - &mov (&DWP(12,$acc),$s3); - - &mov ($acc,$_inp); - &mov ($s0,&DWP(0,$acc)); # re-read input - &mov ($s1,&DWP(4,$acc)); - &mov ($s2,&DWP(8,$acc)); - &mov ($s3,&DWP(12,$acc)); - - &mov (&DWP(0,$key),$s0); # copy it to iv - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &mov ("ecx",$_len); - &mov ("edi",$_out); - &lea ("esi",$ivec); - &align (4); - &data_word(0xA4F3F689); # rep movsb # copy partial output - - &mov ("esp",$_esp); - &popf (); -&function_end("AES_cbc_encrypt"); -} - -#------------------------------------------------------------------# - -sub enckey() -{ - &movz ("esi",&LB("edx")); # rk[i]>>0 - &movz ("ebx",&BP(-128,$tbl,"esi",1)); - &movz ("esi",&HB("edx")); # rk[i]>>8 - &shl ("ebx",24); - &xor ("eax","ebx"); - - &movz ("ebx",&BP(-128,$tbl,"esi",1)); - &shr ("edx",16); - &movz ("esi",&LB("edx")); # rk[i]>>16 - &xor ("eax","ebx"); - - &movz ("ebx",&BP(-128,$tbl,"esi",1)); - &movz ("esi",&HB("edx")); # rk[i]>>24 - &shl ("ebx",8); - &xor ("eax","ebx"); - - &movz ("ebx",&BP(-128,$tbl,"esi",1)); - &shl ("ebx",16); - &xor ("eax","ebx"); - - &xor ("eax",&DWP(1024-128,$tbl,"ecx",4)); # rcon -} - -&function_begin("_x86_AES_set_encrypt_key"); - &mov ("esi",&wparam(1)); # user supplied key - &mov ("edi",&wparam(3)); # private key schedule - - &test ("esi",-1); - &jz (&label("badpointer")); - &test ("edi",-1); - &jz (&label("badpointer")); - - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($tbl); - &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl)); - &lea ($tbl,&DWP(2048+128,$tbl)); - - # prefetch Te4 - &mov ("eax",&DWP(0-128,$tbl)); - &mov ("ebx",&DWP(32-128,$tbl)); - &mov ("ecx",&DWP(64-128,$tbl)); - &mov ("edx",&DWP(96-128,$tbl)); - &mov ("eax",&DWP(128-128,$tbl)); - &mov ("ebx",&DWP(160-128,$tbl)); - &mov ("ecx",&DWP(192-128,$tbl)); - &mov ("edx",&DWP(224-128,$tbl)); - - &mov ("ecx",&wparam(2)); # number of bits in key - &cmp ("ecx",128); - &je (&label("10rounds")); - &cmp ("ecx",192); - &je (&label("12rounds")); - &cmp ("ecx",256); - &je (&label("14rounds")); - &mov ("eax",-2); # invalid number of bits - &jmp (&label("exit")); - - &set_label("10rounds"); - &mov ("eax",&DWP(0,"esi")); # copy first 4 dwords - &mov ("ebx",&DWP(4,"esi")); - &mov ("ecx",&DWP(8,"esi")); - &mov ("edx",&DWP(12,"esi")); - &mov (&DWP(0,"edi"),"eax"); - &mov (&DWP(4,"edi"),"ebx"); - &mov (&DWP(8,"edi"),"ecx"); - &mov (&DWP(12,"edi"),"edx"); - - &xor ("ecx","ecx"); - &jmp (&label("10shortcut")); - - &align (4); - &set_label("10loop"); - &mov ("eax",&DWP(0,"edi")); # rk[0] - &mov ("edx",&DWP(12,"edi")); # rk[3] - &set_label("10shortcut"); - &enckey (); - - &mov (&DWP(16,"edi"),"eax"); # rk[4] - &xor ("eax",&DWP(4,"edi")); - &mov (&DWP(20,"edi"),"eax"); # rk[5] - &xor ("eax",&DWP(8,"edi")); - &mov (&DWP(24,"edi"),"eax"); # rk[6] - &xor ("eax",&DWP(12,"edi")); - &mov (&DWP(28,"edi"),"eax"); # rk[7] - &inc ("ecx"); - &add ("edi",16); - &cmp ("ecx",10); - &jl (&label("10loop")); - - &mov (&DWP(80,"edi"),10); # setup number of rounds - &xor ("eax","eax"); - &jmp (&label("exit")); - - &set_label("12rounds"); - &mov ("eax",&DWP(0,"esi")); # copy first 6 dwords - &mov ("ebx",&DWP(4,"esi")); - &mov ("ecx",&DWP(8,"esi")); - &mov ("edx",&DWP(12,"esi")); - &mov (&DWP(0,"edi"),"eax"); - &mov (&DWP(4,"edi"),"ebx"); - &mov (&DWP(8,"edi"),"ecx"); - &mov (&DWP(12,"edi"),"edx"); - &mov ("ecx",&DWP(16,"esi")); - &mov ("edx",&DWP(20,"esi")); - &mov (&DWP(16,"edi"),"ecx"); - &mov (&DWP(20,"edi"),"edx"); - - &xor ("ecx","ecx"); - &jmp (&label("12shortcut")); - - &align (4); - &set_label("12loop"); - &mov ("eax",&DWP(0,"edi")); # rk[0] - &mov ("edx",&DWP(20,"edi")); # rk[5] - &set_label("12shortcut"); - &enckey (); - - &mov (&DWP(24,"edi"),"eax"); # rk[6] - &xor ("eax",&DWP(4,"edi")); - &mov (&DWP(28,"edi"),"eax"); # rk[7] - &xor ("eax",&DWP(8,"edi")); - &mov (&DWP(32,"edi"),"eax"); # rk[8] - &xor ("eax",&DWP(12,"edi")); - &mov (&DWP(36,"edi"),"eax"); # rk[9] - - &cmp ("ecx",7); - &je (&label("12break")); - &inc ("ecx"); - - &xor ("eax",&DWP(16,"edi")); - &mov (&DWP(40,"edi"),"eax"); # rk[10] - &xor ("eax",&DWP(20,"edi")); - &mov (&DWP(44,"edi"),"eax"); # rk[11] - - &add ("edi",24); - &jmp (&label("12loop")); - - &set_label("12break"); - &mov (&DWP(72,"edi"),12); # setup number of rounds - &xor ("eax","eax"); - &jmp (&label("exit")); - - &set_label("14rounds"); - &mov ("eax",&DWP(0,"esi")); # copy first 8 dwords - &mov ("ebx",&DWP(4,"esi")); - &mov ("ecx",&DWP(8,"esi")); - &mov ("edx",&DWP(12,"esi")); - &mov (&DWP(0,"edi"),"eax"); - &mov (&DWP(4,"edi"),"ebx"); - &mov (&DWP(8,"edi"),"ecx"); - &mov (&DWP(12,"edi"),"edx"); - &mov ("eax",&DWP(16,"esi")); - &mov ("ebx",&DWP(20,"esi")); - &mov ("ecx",&DWP(24,"esi")); - &mov ("edx",&DWP(28,"esi")); - &mov (&DWP(16,"edi"),"eax"); - &mov (&DWP(20,"edi"),"ebx"); - &mov (&DWP(24,"edi"),"ecx"); - &mov (&DWP(28,"edi"),"edx"); - - &xor ("ecx","ecx"); - &jmp (&label("14shortcut")); - - &align (4); - &set_label("14loop"); - &mov ("edx",&DWP(28,"edi")); # rk[7] - &set_label("14shortcut"); - &mov ("eax",&DWP(0,"edi")); # rk[0] - - &enckey (); - - &mov (&DWP(32,"edi"),"eax"); # rk[8] - &xor ("eax",&DWP(4,"edi")); - &mov (&DWP(36,"edi"),"eax"); # rk[9] - &xor ("eax",&DWP(8,"edi")); - &mov (&DWP(40,"edi"),"eax"); # rk[10] - &xor ("eax",&DWP(12,"edi")); - &mov (&DWP(44,"edi"),"eax"); # rk[11] - - &cmp ("ecx",6); - &je (&label("14break")); - &inc ("ecx"); - - &mov ("edx","eax"); - &mov ("eax",&DWP(16,"edi")); # rk[4] - &movz ("esi",&LB("edx")); # rk[11]>>0 - &movz ("ebx",&BP(-128,$tbl,"esi",1)); - &movz ("esi",&HB("edx")); # rk[11]>>8 - &xor ("eax","ebx"); - - &movz ("ebx",&BP(-128,$tbl,"esi",1)); - &shr ("edx",16); - &shl ("ebx",8); - &movz ("esi",&LB("edx")); # rk[11]>>16 - &xor ("eax","ebx"); - - &movz ("ebx",&BP(-128,$tbl,"esi",1)); - &movz ("esi",&HB("edx")); # rk[11]>>24 - &shl ("ebx",16); - &xor ("eax","ebx"); - - &movz ("ebx",&BP(-128,$tbl,"esi",1)); - &shl ("ebx",24); - &xor ("eax","ebx"); - - &mov (&DWP(48,"edi"),"eax"); # rk[12] - &xor ("eax",&DWP(20,"edi")); - &mov (&DWP(52,"edi"),"eax"); # rk[13] - &xor ("eax",&DWP(24,"edi")); - &mov (&DWP(56,"edi"),"eax"); # rk[14] - &xor ("eax",&DWP(28,"edi")); - &mov (&DWP(60,"edi"),"eax"); # rk[15] - - &add ("edi",32); - &jmp (&label("14loop")); - - &set_label("14break"); - &mov (&DWP(48,"edi"),14); # setup number of rounds - &xor ("eax","eax"); - &jmp (&label("exit")); - - &set_label("badpointer"); - &mov ("eax",-1); - &set_label("exit"); -&function_end("_x86_AES_set_encrypt_key"); - -# int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, -# AES_KEY *key) -&function_begin_B("private_AES_set_encrypt_key"); - &call ("_x86_AES_set_encrypt_key"); - &ret (); -&function_end_B("private_AES_set_encrypt_key"); - -sub deckey() -{ my ($i,$key,$tp1,$tp2,$tp4,$tp8) = @_; - my $tmp = $tbl; - - &mov ($acc,$tp1); - &and ($acc,0x80808080); - &mov ($tmp,$acc); - &shr ($tmp,7); - &lea ($tp2,&DWP(0,$tp1,$tp1)); - &sub ($acc,$tmp); - &and ($tp2,0xfefefefe); - &and ($acc,0x1b1b1b1b); - &xor ($acc,$tp2); - &mov ($tp2,$acc); - - &and ($acc,0x80808080); - &mov ($tmp,$acc); - &shr ($tmp,7); - &lea ($tp4,&DWP(0,$tp2,$tp2)); - &sub ($acc,$tmp); - &and ($tp4,0xfefefefe); - &and ($acc,0x1b1b1b1b); - &xor ($tp2,$tp1); # tp2^tp1 - &xor ($acc,$tp4); - &mov ($tp4,$acc); - - &and ($acc,0x80808080); - &mov ($tmp,$acc); - &shr ($tmp,7); - &lea ($tp8,&DWP(0,$tp4,$tp4)); - &xor ($tp4,$tp1); # tp4^tp1 - &sub ($acc,$tmp); - &and ($tp8,0xfefefefe); - &and ($acc,0x1b1b1b1b); - &rotl ($tp1,8); # = ROTATE(tp1,8) - &xor ($tp8,$acc); - - &mov ($tmp,&DWP(4*($i+1),$key)); # modulo-scheduled load - - &xor ($tp1,$tp2); - &xor ($tp2,$tp8); - &xor ($tp1,$tp4); - &rotl ($tp2,24); - &xor ($tp4,$tp8); - &xor ($tp1,$tp8); # ^= tp8^(tp4^tp1)^(tp2^tp1) - &rotl ($tp4,16); - &xor ($tp1,$tp2); # ^= ROTATE(tp8^tp2^tp1,24) - &rotl ($tp8,8); - &xor ($tp1,$tp4); # ^= ROTATE(tp8^tp4^tp1,16) - &mov ($tp2,$tmp); - &xor ($tp1,$tp8); # ^= ROTATE(tp8,8) - - &mov (&DWP(4*$i,$key),$tp1); -} - -# int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, -# AES_KEY *key) -&function_begin_B("private_AES_set_decrypt_key"); - &call ("_x86_AES_set_encrypt_key"); - &cmp ("eax",0); - &je (&label("proceed")); - &ret (); - - &set_label("proceed"); - &push ("ebp"); - &push ("ebx"); - &push ("esi"); - &push ("edi"); - - &mov ("esi",&wparam(2)); - &mov ("ecx",&DWP(240,"esi")); # pull number of rounds - &lea ("ecx",&DWP(0,"","ecx",4)); - &lea ("edi",&DWP(0,"esi","ecx",4)); # pointer to last chunk - - &set_label("invert",4); # invert order of chunks - &mov ("eax",&DWP(0,"esi")); - &mov ("ebx",&DWP(4,"esi")); - &mov ("ecx",&DWP(0,"edi")); - &mov ("edx",&DWP(4,"edi")); - &mov (&DWP(0,"edi"),"eax"); - &mov (&DWP(4,"edi"),"ebx"); - &mov (&DWP(0,"esi"),"ecx"); - &mov (&DWP(4,"esi"),"edx"); - &mov ("eax",&DWP(8,"esi")); - &mov ("ebx",&DWP(12,"esi")); - &mov ("ecx",&DWP(8,"edi")); - &mov ("edx",&DWP(12,"edi")); - &mov (&DWP(8,"edi"),"eax"); - &mov (&DWP(12,"edi"),"ebx"); - &mov (&DWP(8,"esi"),"ecx"); - &mov (&DWP(12,"esi"),"edx"); - &add ("esi",16); - &sub ("edi",16); - &cmp ("esi","edi"); - &jne (&label("invert")); - - &mov ($key,&wparam(2)); - &mov ($acc,&DWP(240,$key)); # pull number of rounds - &lea ($acc,&DWP(-2,$acc,$acc)); - &lea ($acc,&DWP(0,$key,$acc,8)); - &mov (&wparam(2),$acc); - - &mov ($s0,&DWP(16,$key)); # modulo-scheduled load - &set_label("permute",4); # permute the key schedule - &add ($key,16); - &deckey (0,$key,$s0,$s1,$s2,$s3); - &deckey (1,$key,$s1,$s2,$s3,$s0); - &deckey (2,$key,$s2,$s3,$s0,$s1); - &deckey (3,$key,$s3,$s0,$s1,$s2); - &cmp ($key,&wparam(2)); - &jb (&label("permute")); - - &xor ("eax","eax"); # return success -&function_end("private_AES_set_decrypt_key"); -&asciz("AES for x86, CRYPTOGAMS by "); - -&asm_finish(); diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aes-armv4.pl b/drivers/builtin_openssl2/crypto/aes/asm/aes-armv4.pl deleted file mode 100644 index 86b86c4a0f..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/aes-armv4.pl +++ /dev/null @@ -1,1134 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# AES for ARMv4 - -# January 2007. -# -# Code uses single 1K S-box and is >2 times faster than code generated -# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which -# allows to merge logical or arithmetic operation with shift or rotate -# in one instruction and emit combined result every cycle. The module -# is endian-neutral. The performance is ~42 cycles/byte for 128-bit -# key [on single-issue Xscale PXA250 core]. - -# May 2007. -# -# AES_set_[en|de]crypt_key is added. - -# July 2010. -# -# Rescheduling for dual-issue pipeline resulted in 12% improvement on -# Cortex A8 core and ~25 cycles per byte processed with 128-bit key. - -# February 2011. -# -# Profiler-assisted and platform-specific optimization resulted in 16% -# improvement on Cortex A8 core and ~21.5 cycles per byte. - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -$s0="r0"; -$s1="r1"; -$s2="r2"; -$s3="r3"; -$t1="r4"; -$t2="r5"; -$t3="r6"; -$i1="r7"; -$i2="r8"; -$i3="r9"; - -$tbl="r10"; -$key="r11"; -$rounds="r12"; - -$code=<<___; -#include "arm_arch.h" -.text -.code 32 - -.type AES_Te,%object -.align 5 -AES_Te: -.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d -.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 -.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d -.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a -.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 -.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b -.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea -.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b -.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a -.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f -.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 -.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f -.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e -.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 -.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d -.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f -.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e -.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb -.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce -.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 -.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c -.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed -.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b -.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a -.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 -.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 -.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 -.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 -.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a -.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 -.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 -.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d -.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f -.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 -.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 -.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 -.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f -.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 -.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c -.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 -.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e -.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 -.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 -.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b -.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 -.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 -.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 -.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 -.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 -.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 -.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 -.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 -.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa -.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 -.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 -.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 -.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 -.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 -.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 -.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a -.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 -.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 -.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 -.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a -@ Te4[256] -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -@ rcon[] -.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 -.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 -.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 -.size AES_Te,.-AES_Te - -@ void AES_encrypt(const unsigned char *in, unsigned char *out, -@ const AES_KEY *key) { -.global AES_encrypt -.type AES_encrypt,%function -.align 5 -AES_encrypt: - sub r3,pc,#8 @ AES_encrypt - stmdb sp!,{r1,r4-r12,lr} - mov $rounds,r0 @ inp - mov $key,r2 - sub $tbl,r3,#AES_encrypt-AES_Te @ Te -#if __ARM_ARCH__<7 - ldrb $s0,[$rounds,#3] @ load input data in endian-neutral - ldrb $t1,[$rounds,#2] @ manner... - ldrb $t2,[$rounds,#1] - ldrb $t3,[$rounds,#0] - orr $s0,$s0,$t1,lsl#8 - ldrb $s1,[$rounds,#7] - orr $s0,$s0,$t2,lsl#16 - ldrb $t1,[$rounds,#6] - orr $s0,$s0,$t3,lsl#24 - ldrb $t2,[$rounds,#5] - ldrb $t3,[$rounds,#4] - orr $s1,$s1,$t1,lsl#8 - ldrb $s2,[$rounds,#11] - orr $s1,$s1,$t2,lsl#16 - ldrb $t1,[$rounds,#10] - orr $s1,$s1,$t3,lsl#24 - ldrb $t2,[$rounds,#9] - ldrb $t3,[$rounds,#8] - orr $s2,$s2,$t1,lsl#8 - ldrb $s3,[$rounds,#15] - orr $s2,$s2,$t2,lsl#16 - ldrb $t1,[$rounds,#14] - orr $s2,$s2,$t3,lsl#24 - ldrb $t2,[$rounds,#13] - ldrb $t3,[$rounds,#12] - orr $s3,$s3,$t1,lsl#8 - orr $s3,$s3,$t2,lsl#16 - orr $s3,$s3,$t3,lsl#24 -#else - ldr $s0,[$rounds,#0] - ldr $s1,[$rounds,#4] - ldr $s2,[$rounds,#8] - ldr $s3,[$rounds,#12] -#ifdef __ARMEL__ - rev $s0,$s0 - rev $s1,$s1 - rev $s2,$s2 - rev $s3,$s3 -#endif -#endif - bl _armv4_AES_encrypt - - ldr $rounds,[sp],#4 @ pop out -#if __ARM_ARCH__>=7 -#ifdef __ARMEL__ - rev $s0,$s0 - rev $s1,$s1 - rev $s2,$s2 - rev $s3,$s3 -#endif - str $s0,[$rounds,#0] - str $s1,[$rounds,#4] - str $s2,[$rounds,#8] - str $s3,[$rounds,#12] -#else - mov $t1,$s0,lsr#24 @ write output in endian-neutral - mov $t2,$s0,lsr#16 @ manner... - mov $t3,$s0,lsr#8 - strb $t1,[$rounds,#0] - strb $t2,[$rounds,#1] - mov $t1,$s1,lsr#24 - strb $t3,[$rounds,#2] - mov $t2,$s1,lsr#16 - strb $s0,[$rounds,#3] - mov $t3,$s1,lsr#8 - strb $t1,[$rounds,#4] - strb $t2,[$rounds,#5] - mov $t1,$s2,lsr#24 - strb $t3,[$rounds,#6] - mov $t2,$s2,lsr#16 - strb $s1,[$rounds,#7] - mov $t3,$s2,lsr#8 - strb $t1,[$rounds,#8] - strb $t2,[$rounds,#9] - mov $t1,$s3,lsr#24 - strb $t3,[$rounds,#10] - mov $t2,$s3,lsr#16 - strb $s2,[$rounds,#11] - mov $t3,$s3,lsr#8 - strb $t1,[$rounds,#12] - strb $t2,[$rounds,#13] - strb $t3,[$rounds,#14] - strb $s3,[$rounds,#15] -#endif -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r12,pc} -#else - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -#endif -.size AES_encrypt,.-AES_encrypt - -.type _armv4_AES_encrypt,%function -.align 2 -_armv4_AES_encrypt: - str lr,[sp,#-4]! @ push lr - ldmia $key!,{$t1-$i1} - eor $s0,$s0,$t1 - ldr $rounds,[$key,#240-16] - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - sub $rounds,$rounds,#1 - mov lr,#255 - - and $i1,lr,$s0 - and $i2,lr,$s0,lsr#8 - and $i3,lr,$s0,lsr#16 - mov $s0,$s0,lsr#24 -.Lenc_loop: - ldr $t1,[$tbl,$i1,lsl#2] @ Te3[s0>>0] - and $i1,lr,$s1,lsr#16 @ i0 - ldr $t2,[$tbl,$i2,lsl#2] @ Te2[s0>>8] - and $i2,lr,$s1 - ldr $t3,[$tbl,$i3,lsl#2] @ Te1[s0>>16] - and $i3,lr,$s1,lsr#8 - ldr $s0,[$tbl,$s0,lsl#2] @ Te0[s0>>24] - mov $s1,$s1,lsr#24 - - ldr $i1,[$tbl,$i1,lsl#2] @ Te1[s1>>16] - ldr $i2,[$tbl,$i2,lsl#2] @ Te3[s1>>0] - ldr $i3,[$tbl,$i3,lsl#2] @ Te2[s1>>8] - eor $s0,$s0,$i1,ror#8 - ldr $s1,[$tbl,$s1,lsl#2] @ Te0[s1>>24] - and $i1,lr,$s2,lsr#8 @ i0 - eor $t2,$t2,$i2,ror#8 - and $i2,lr,$s2,lsr#16 @ i1 - eor $t3,$t3,$i3,ror#8 - and $i3,lr,$s2 - ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8] - eor $s1,$s1,$t1,ror#24 - ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16] - mov $s2,$s2,lsr#24 - - ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0] - eor $s0,$s0,$i1,ror#16 - ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24] - and $i1,lr,$s3 @ i0 - eor $s1,$s1,$i2,ror#8 - and $i2,lr,$s3,lsr#8 @ i1 - eor $t3,$t3,$i3,ror#16 - and $i3,lr,$s3,lsr#16 @ i2 - ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0] - eor $s2,$s2,$t2,ror#16 - ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8] - mov $s3,$s3,lsr#24 - - ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16] - eor $s0,$s0,$i1,ror#24 - ldr $i1,[$key],#16 - eor $s1,$s1,$i2,ror#16 - ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24] - eor $s2,$s2,$i3,ror#8 - ldr $t1,[$key,#-12] - eor $s3,$s3,$t3,ror#8 - - ldr $t2,[$key,#-8] - eor $s0,$s0,$i1 - ldr $t3,[$key,#-4] - and $i1,lr,$s0 - eor $s1,$s1,$t1 - and $i2,lr,$s0,lsr#8 - eor $s2,$s2,$t2 - and $i3,lr,$s0,lsr#16 - eor $s3,$s3,$t3 - mov $s0,$s0,lsr#24 - - subs $rounds,$rounds,#1 - bne .Lenc_loop - - add $tbl,$tbl,#2 - - ldrb $t1,[$tbl,$i1,lsl#2] @ Te4[s0>>0] - and $i1,lr,$s1,lsr#16 @ i0 - ldrb $t2,[$tbl,$i2,lsl#2] @ Te4[s0>>8] - and $i2,lr,$s1 - ldrb $t3,[$tbl,$i3,lsl#2] @ Te4[s0>>16] - and $i3,lr,$s1,lsr#8 - ldrb $s0,[$tbl,$s0,lsl#2] @ Te4[s0>>24] - mov $s1,$s1,lsr#24 - - ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s1>>16] - ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s1>>0] - ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s1>>8] - eor $s0,$i1,$s0,lsl#8 - ldrb $s1,[$tbl,$s1,lsl#2] @ Te4[s1>>24] - and $i1,lr,$s2,lsr#8 @ i0 - eor $t2,$i2,$t2,lsl#8 - and $i2,lr,$s2,lsr#16 @ i1 - eor $t3,$i3,$t3,lsl#8 - and $i3,lr,$s2 - ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8] - eor $s1,$t1,$s1,lsl#24 - ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16] - mov $s2,$s2,lsr#24 - - ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0] - eor $s0,$i1,$s0,lsl#8 - ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24] - and $i1,lr,$s3 @ i0 - eor $s1,$s1,$i2,lsl#16 - and $i2,lr,$s3,lsr#8 @ i1 - eor $t3,$i3,$t3,lsl#8 - and $i3,lr,$s3,lsr#16 @ i2 - ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0] - eor $s2,$t2,$s2,lsl#24 - ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8] - mov $s3,$s3,lsr#24 - - ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16] - eor $s0,$i1,$s0,lsl#8 - ldr $i1,[$key,#0] - ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24] - eor $s1,$s1,$i2,lsl#8 - ldr $t1,[$key,#4] - eor $s2,$s2,$i3,lsl#16 - ldr $t2,[$key,#8] - eor $s3,$t3,$s3,lsl#24 - ldr $t3,[$key,#12] - - eor $s0,$s0,$i1 - eor $s1,$s1,$t1 - eor $s2,$s2,$t2 - eor $s3,$s3,$t3 - - sub $tbl,$tbl,#2 - ldr pc,[sp],#4 @ pop and return -.size _armv4_AES_encrypt,.-_armv4_AES_encrypt - -.global private_AES_set_encrypt_key -.type private_AES_set_encrypt_key,%function -.align 5 -private_AES_set_encrypt_key: -_armv4_AES_set_encrypt_key: - sub r3,pc,#8 @ AES_set_encrypt_key - teq r0,#0 - moveq r0,#-1 - beq .Labrt - teq r2,#0 - moveq r0,#-1 - beq .Labrt - - teq r1,#128 - beq .Lok - teq r1,#192 - beq .Lok - teq r1,#256 - movne r0,#-1 - bne .Labrt - -.Lok: stmdb sp!,{r4-r12,lr} - sub $tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 - - mov $rounds,r0 @ inp - mov lr,r1 @ bits - mov $key,r2 @ key - -#if __ARM_ARCH__<7 - ldrb $s0,[$rounds,#3] @ load input data in endian-neutral - ldrb $t1,[$rounds,#2] @ manner... - ldrb $t2,[$rounds,#1] - ldrb $t3,[$rounds,#0] - orr $s0,$s0,$t1,lsl#8 - ldrb $s1,[$rounds,#7] - orr $s0,$s0,$t2,lsl#16 - ldrb $t1,[$rounds,#6] - orr $s0,$s0,$t3,lsl#24 - ldrb $t2,[$rounds,#5] - ldrb $t3,[$rounds,#4] - orr $s1,$s1,$t1,lsl#8 - ldrb $s2,[$rounds,#11] - orr $s1,$s1,$t2,lsl#16 - ldrb $t1,[$rounds,#10] - orr $s1,$s1,$t3,lsl#24 - ldrb $t2,[$rounds,#9] - ldrb $t3,[$rounds,#8] - orr $s2,$s2,$t1,lsl#8 - ldrb $s3,[$rounds,#15] - orr $s2,$s2,$t2,lsl#16 - ldrb $t1,[$rounds,#14] - orr $s2,$s2,$t3,lsl#24 - ldrb $t2,[$rounds,#13] - ldrb $t3,[$rounds,#12] - orr $s3,$s3,$t1,lsl#8 - str $s0,[$key],#16 - orr $s3,$s3,$t2,lsl#16 - str $s1,[$key,#-12] - orr $s3,$s3,$t3,lsl#24 - str $s2,[$key,#-8] - str $s3,[$key,#-4] -#else - ldr $s0,[$rounds,#0] - ldr $s1,[$rounds,#4] - ldr $s2,[$rounds,#8] - ldr $s3,[$rounds,#12] -#ifdef __ARMEL__ - rev $s0,$s0 - rev $s1,$s1 - rev $s2,$s2 - rev $s3,$s3 -#endif - str $s0,[$key],#16 - str $s1,[$key,#-12] - str $s2,[$key,#-8] - str $s3,[$key,#-4] -#endif - - teq lr,#128 - bne .Lnot128 - mov $rounds,#10 - str $rounds,[$key,#240-16] - add $t3,$tbl,#256 @ rcon - mov lr,#255 - -.L128_loop: - and $t2,lr,$s3,lsr#24 - and $i1,lr,$s3,lsr#16 - ldrb $t2,[$tbl,$t2] - and $i2,lr,$s3,lsr#8 - ldrb $i1,[$tbl,$i1] - and $i3,lr,$s3 - ldrb $i2,[$tbl,$i2] - orr $t2,$t2,$i1,lsl#24 - ldrb $i3,[$tbl,$i3] - orr $t2,$t2,$i2,lsl#16 - ldr $t1,[$t3],#4 @ rcon[i++] - orr $t2,$t2,$i3,lsl#8 - eor $t2,$t2,$t1 - eor $s0,$s0,$t2 @ rk[4]=rk[0]^... - eor $s1,$s1,$s0 @ rk[5]=rk[1]^rk[4] - str $s0,[$key],#16 - eor $s2,$s2,$s1 @ rk[6]=rk[2]^rk[5] - str $s1,[$key,#-12] - eor $s3,$s3,$s2 @ rk[7]=rk[3]^rk[6] - str $s2,[$key,#-8] - subs $rounds,$rounds,#1 - str $s3,[$key,#-4] - bne .L128_loop - sub r2,$key,#176 - b .Ldone - -.Lnot128: -#if __ARM_ARCH__<7 - ldrb $i2,[$rounds,#19] - ldrb $t1,[$rounds,#18] - ldrb $t2,[$rounds,#17] - ldrb $t3,[$rounds,#16] - orr $i2,$i2,$t1,lsl#8 - ldrb $i3,[$rounds,#23] - orr $i2,$i2,$t2,lsl#16 - ldrb $t1,[$rounds,#22] - orr $i2,$i2,$t3,lsl#24 - ldrb $t2,[$rounds,#21] - ldrb $t3,[$rounds,#20] - orr $i3,$i3,$t1,lsl#8 - orr $i3,$i3,$t2,lsl#16 - str $i2,[$key],#8 - orr $i3,$i3,$t3,lsl#24 - str $i3,[$key,#-4] -#else - ldr $i2,[$rounds,#16] - ldr $i3,[$rounds,#20] -#ifdef __ARMEL__ - rev $i2,$i2 - rev $i3,$i3 -#endif - str $i2,[$key],#8 - str $i3,[$key,#-4] -#endif - - teq lr,#192 - bne .Lnot192 - mov $rounds,#12 - str $rounds,[$key,#240-24] - add $t3,$tbl,#256 @ rcon - mov lr,#255 - mov $rounds,#8 - -.L192_loop: - and $t2,lr,$i3,lsr#24 - and $i1,lr,$i3,lsr#16 - ldrb $t2,[$tbl,$t2] - and $i2,lr,$i3,lsr#8 - ldrb $i1,[$tbl,$i1] - and $i3,lr,$i3 - ldrb $i2,[$tbl,$i2] - orr $t2,$t2,$i1,lsl#24 - ldrb $i3,[$tbl,$i3] - orr $t2,$t2,$i2,lsl#16 - ldr $t1,[$t3],#4 @ rcon[i++] - orr $t2,$t2,$i3,lsl#8 - eor $i3,$t2,$t1 - eor $s0,$s0,$i3 @ rk[6]=rk[0]^... - eor $s1,$s1,$s0 @ rk[7]=rk[1]^rk[6] - str $s0,[$key],#24 - eor $s2,$s2,$s1 @ rk[8]=rk[2]^rk[7] - str $s1,[$key,#-20] - eor $s3,$s3,$s2 @ rk[9]=rk[3]^rk[8] - str $s2,[$key,#-16] - subs $rounds,$rounds,#1 - str $s3,[$key,#-12] - subeq r2,$key,#216 - beq .Ldone - - ldr $i1,[$key,#-32] - ldr $i2,[$key,#-28] - eor $i1,$i1,$s3 @ rk[10]=rk[4]^rk[9] - eor $i3,$i2,$i1 @ rk[11]=rk[5]^rk[10] - str $i1,[$key,#-8] - str $i3,[$key,#-4] - b .L192_loop - -.Lnot192: -#if __ARM_ARCH__<7 - ldrb $i2,[$rounds,#27] - ldrb $t1,[$rounds,#26] - ldrb $t2,[$rounds,#25] - ldrb $t3,[$rounds,#24] - orr $i2,$i2,$t1,lsl#8 - ldrb $i3,[$rounds,#31] - orr $i2,$i2,$t2,lsl#16 - ldrb $t1,[$rounds,#30] - orr $i2,$i2,$t3,lsl#24 - ldrb $t2,[$rounds,#29] - ldrb $t3,[$rounds,#28] - orr $i3,$i3,$t1,lsl#8 - orr $i3,$i3,$t2,lsl#16 - str $i2,[$key],#8 - orr $i3,$i3,$t3,lsl#24 - str $i3,[$key,#-4] -#else - ldr $i2,[$rounds,#24] - ldr $i3,[$rounds,#28] -#ifdef __ARMEL__ - rev $i2,$i2 - rev $i3,$i3 -#endif - str $i2,[$key],#8 - str $i3,[$key,#-4] -#endif - - mov $rounds,#14 - str $rounds,[$key,#240-32] - add $t3,$tbl,#256 @ rcon - mov lr,#255 - mov $rounds,#7 - -.L256_loop: - and $t2,lr,$i3,lsr#24 - and $i1,lr,$i3,lsr#16 - ldrb $t2,[$tbl,$t2] - and $i2,lr,$i3,lsr#8 - ldrb $i1,[$tbl,$i1] - and $i3,lr,$i3 - ldrb $i2,[$tbl,$i2] - orr $t2,$t2,$i1,lsl#24 - ldrb $i3,[$tbl,$i3] - orr $t2,$t2,$i2,lsl#16 - ldr $t1,[$t3],#4 @ rcon[i++] - orr $t2,$t2,$i3,lsl#8 - eor $i3,$t2,$t1 - eor $s0,$s0,$i3 @ rk[8]=rk[0]^... - eor $s1,$s1,$s0 @ rk[9]=rk[1]^rk[8] - str $s0,[$key],#32 - eor $s2,$s2,$s1 @ rk[10]=rk[2]^rk[9] - str $s1,[$key,#-28] - eor $s3,$s3,$s2 @ rk[11]=rk[3]^rk[10] - str $s2,[$key,#-24] - subs $rounds,$rounds,#1 - str $s3,[$key,#-20] - subeq r2,$key,#256 - beq .Ldone - - and $t2,lr,$s3 - and $i1,lr,$s3,lsr#8 - ldrb $t2,[$tbl,$t2] - and $i2,lr,$s3,lsr#16 - ldrb $i1,[$tbl,$i1] - and $i3,lr,$s3,lsr#24 - ldrb $i2,[$tbl,$i2] - orr $t2,$t2,$i1,lsl#8 - ldrb $i3,[$tbl,$i3] - orr $t2,$t2,$i2,lsl#16 - ldr $t1,[$key,#-48] - orr $t2,$t2,$i3,lsl#24 - - ldr $i1,[$key,#-44] - ldr $i2,[$key,#-40] - eor $t1,$t1,$t2 @ rk[12]=rk[4]^... - ldr $i3,[$key,#-36] - eor $i1,$i1,$t1 @ rk[13]=rk[5]^rk[12] - str $t1,[$key,#-16] - eor $i2,$i2,$i1 @ rk[14]=rk[6]^rk[13] - str $i1,[$key,#-12] - eor $i3,$i3,$i2 @ rk[15]=rk[7]^rk[14] - str $i2,[$key,#-8] - str $i3,[$key,#-4] - b .L256_loop - -.Ldone: mov r0,#0 - ldmia sp!,{r4-r12,lr} -.Labrt: tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key - -.global private_AES_set_decrypt_key -.type private_AES_set_decrypt_key,%function -.align 5 -private_AES_set_decrypt_key: - str lr,[sp,#-4]! @ push lr - bl _armv4_AES_set_encrypt_key - teq r0,#0 - ldrne lr,[sp],#4 @ pop lr - bne .Labrt - - stmdb sp!,{r4-r12} - - ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2, - mov $key,r2 @ which is AES_KEY *key - mov $i1,r2 - add $i2,r2,$rounds,lsl#4 - -.Linv: ldr $s0,[$i1] - ldr $s1,[$i1,#4] - ldr $s2,[$i1,#8] - ldr $s3,[$i1,#12] - ldr $t1,[$i2] - ldr $t2,[$i2,#4] - ldr $t3,[$i2,#8] - ldr $i3,[$i2,#12] - str $s0,[$i2],#-16 - str $s1,[$i2,#16+4] - str $s2,[$i2,#16+8] - str $s3,[$i2,#16+12] - str $t1,[$i1],#16 - str $t2,[$i1,#-12] - str $t3,[$i1,#-8] - str $i3,[$i1,#-4] - teq $i1,$i2 - bne .Linv -___ -$mask80=$i1; -$mask1b=$i2; -$mask7f=$i3; -$code.=<<___; - ldr $s0,[$key,#16]! @ prefetch tp1 - mov $mask80,#0x80 - mov $mask1b,#0x1b - orr $mask80,$mask80,#0x8000 - orr $mask1b,$mask1b,#0x1b00 - orr $mask80,$mask80,$mask80,lsl#16 - orr $mask1b,$mask1b,$mask1b,lsl#16 - sub $rounds,$rounds,#1 - mvn $mask7f,$mask80 - mov $rounds,$rounds,lsl#2 @ (rounds-1)*4 - -.Lmix: and $t1,$s0,$mask80 - and $s1,$s0,$mask7f - sub $t1,$t1,$t1,lsr#7 - and $t1,$t1,$mask1b - eor $s1,$t1,$s1,lsl#1 @ tp2 - - and $t1,$s1,$mask80 - and $s2,$s1,$mask7f - sub $t1,$t1,$t1,lsr#7 - and $t1,$t1,$mask1b - eor $s2,$t1,$s2,lsl#1 @ tp4 - - and $t1,$s2,$mask80 - and $s3,$s2,$mask7f - sub $t1,$t1,$t1,lsr#7 - and $t1,$t1,$mask1b - eor $s3,$t1,$s3,lsl#1 @ tp8 - - eor $t1,$s1,$s2 - eor $t2,$s0,$s3 @ tp9 - eor $t1,$t1,$s3 @ tpe - eor $t1,$t1,$s1,ror#24 - eor $t1,$t1,$t2,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) - eor $t1,$t1,$s2,ror#16 - eor $t1,$t1,$t2,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) - eor $t1,$t1,$t2,ror#8 @ ^= ROTATE(tp9,24) - - ldr $s0,[$key,#4] @ prefetch tp1 - str $t1,[$key],#4 - subs $rounds,$rounds,#1 - bne .Lmix - - mov r0,#0 -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r12,pc} -#else - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -#endif -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key - -.type AES_Td,%object -.align 5 -AES_Td: -.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 -.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 -.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 -.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f -.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 -.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 -.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da -.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 -.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd -.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 -.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 -.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 -.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 -.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a -.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 -.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c -.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 -.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a -.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 -.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 -.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 -.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff -.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 -.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb -.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 -.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e -.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 -.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a -.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e -.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 -.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d -.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 -.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd -.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 -.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 -.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 -.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d -.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 -.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 -.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef -.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 -.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 -.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 -.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 -.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 -.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b -.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 -.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 -.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 -.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 -.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 -.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f -.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df -.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f -.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e -.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 -.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 -.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c -.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf -.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 -.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f -.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 -.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 -.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 -@ Td4[256] -.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 -.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb -.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 -.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb -.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d -.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e -.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 -.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 -.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 -.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 -.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda -.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 -.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a -.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 -.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 -.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b -.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea -.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 -.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 -.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e -.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 -.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b -.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 -.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 -.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 -.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f -.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d -.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef -.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 -.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 -.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 -.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.size AES_Td,.-AES_Td - -@ void AES_decrypt(const unsigned char *in, unsigned char *out, -@ const AES_KEY *key) { -.global AES_decrypt -.type AES_decrypt,%function -.align 5 -AES_decrypt: - sub r3,pc,#8 @ AES_decrypt - stmdb sp!,{r1,r4-r12,lr} - mov $rounds,r0 @ inp - mov $key,r2 - sub $tbl,r3,#AES_decrypt-AES_Td @ Td -#if __ARM_ARCH__<7 - ldrb $s0,[$rounds,#3] @ load input data in endian-neutral - ldrb $t1,[$rounds,#2] @ manner... - ldrb $t2,[$rounds,#1] - ldrb $t3,[$rounds,#0] - orr $s0,$s0,$t1,lsl#8 - ldrb $s1,[$rounds,#7] - orr $s0,$s0,$t2,lsl#16 - ldrb $t1,[$rounds,#6] - orr $s0,$s0,$t3,lsl#24 - ldrb $t2,[$rounds,#5] - ldrb $t3,[$rounds,#4] - orr $s1,$s1,$t1,lsl#8 - ldrb $s2,[$rounds,#11] - orr $s1,$s1,$t2,lsl#16 - ldrb $t1,[$rounds,#10] - orr $s1,$s1,$t3,lsl#24 - ldrb $t2,[$rounds,#9] - ldrb $t3,[$rounds,#8] - orr $s2,$s2,$t1,lsl#8 - ldrb $s3,[$rounds,#15] - orr $s2,$s2,$t2,lsl#16 - ldrb $t1,[$rounds,#14] - orr $s2,$s2,$t3,lsl#24 - ldrb $t2,[$rounds,#13] - ldrb $t3,[$rounds,#12] - orr $s3,$s3,$t1,lsl#8 - orr $s3,$s3,$t2,lsl#16 - orr $s3,$s3,$t3,lsl#24 -#else - ldr $s0,[$rounds,#0] - ldr $s1,[$rounds,#4] - ldr $s2,[$rounds,#8] - ldr $s3,[$rounds,#12] -#ifdef __ARMEL__ - rev $s0,$s0 - rev $s1,$s1 - rev $s2,$s2 - rev $s3,$s3 -#endif -#endif - bl _armv4_AES_decrypt - - ldr $rounds,[sp],#4 @ pop out -#if __ARM_ARCH__>=7 -#ifdef __ARMEL__ - rev $s0,$s0 - rev $s1,$s1 - rev $s2,$s2 - rev $s3,$s3 -#endif - str $s0,[$rounds,#0] - str $s1,[$rounds,#4] - str $s2,[$rounds,#8] - str $s3,[$rounds,#12] -#else - mov $t1,$s0,lsr#24 @ write output in endian-neutral - mov $t2,$s0,lsr#16 @ manner... - mov $t3,$s0,lsr#8 - strb $t1,[$rounds,#0] - strb $t2,[$rounds,#1] - mov $t1,$s1,lsr#24 - strb $t3,[$rounds,#2] - mov $t2,$s1,lsr#16 - strb $s0,[$rounds,#3] - mov $t3,$s1,lsr#8 - strb $t1,[$rounds,#4] - strb $t2,[$rounds,#5] - mov $t1,$s2,lsr#24 - strb $t3,[$rounds,#6] - mov $t2,$s2,lsr#16 - strb $s1,[$rounds,#7] - mov $t3,$s2,lsr#8 - strb $t1,[$rounds,#8] - strb $t2,[$rounds,#9] - mov $t1,$s3,lsr#24 - strb $t3,[$rounds,#10] - mov $t2,$s3,lsr#16 - strb $s2,[$rounds,#11] - mov $t3,$s3,lsr#8 - strb $t1,[$rounds,#12] - strb $t2,[$rounds,#13] - strb $t3,[$rounds,#14] - strb $s3,[$rounds,#15] -#endif -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r12,pc} -#else - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -#endif -.size AES_decrypt,.-AES_decrypt - -.type _armv4_AES_decrypt,%function -.align 2 -_armv4_AES_decrypt: - str lr,[sp,#-4]! @ push lr - ldmia $key!,{$t1-$i1} - eor $s0,$s0,$t1 - ldr $rounds,[$key,#240-16] - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - sub $rounds,$rounds,#1 - mov lr,#255 - - and $i1,lr,$s0,lsr#16 - and $i2,lr,$s0,lsr#8 - and $i3,lr,$s0 - mov $s0,$s0,lsr#24 -.Ldec_loop: - ldr $t1,[$tbl,$i1,lsl#2] @ Td1[s0>>16] - and $i1,lr,$s1 @ i0 - ldr $t2,[$tbl,$i2,lsl#2] @ Td2[s0>>8] - and $i2,lr,$s1,lsr#16 - ldr $t3,[$tbl,$i3,lsl#2] @ Td3[s0>>0] - and $i3,lr,$s1,lsr#8 - ldr $s0,[$tbl,$s0,lsl#2] @ Td0[s0>>24] - mov $s1,$s1,lsr#24 - - ldr $i1,[$tbl,$i1,lsl#2] @ Td3[s1>>0] - ldr $i2,[$tbl,$i2,lsl#2] @ Td1[s1>>16] - ldr $i3,[$tbl,$i3,lsl#2] @ Td2[s1>>8] - eor $s0,$s0,$i1,ror#24 - ldr $s1,[$tbl,$s1,lsl#2] @ Td0[s1>>24] - and $i1,lr,$s2,lsr#8 @ i0 - eor $t2,$i2,$t2,ror#8 - and $i2,lr,$s2 @ i1 - eor $t3,$i3,$t3,ror#8 - and $i3,lr,$s2,lsr#16 - ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8] - eor $s1,$s1,$t1,ror#8 - ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0] - mov $s2,$s2,lsr#24 - - ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16] - eor $s0,$s0,$i1,ror#16 - ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24] - and $i1,lr,$s3,lsr#16 @ i0 - eor $s1,$s1,$i2,ror#24 - and $i2,lr,$s3,lsr#8 @ i1 - eor $t3,$i3,$t3,ror#8 - and $i3,lr,$s3 @ i2 - ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16] - eor $s2,$s2,$t2,ror#8 - ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8] - mov $s3,$s3,lsr#24 - - ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0] - eor $s0,$s0,$i1,ror#8 - ldr $i1,[$key],#16 - eor $s1,$s1,$i2,ror#16 - ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24] - eor $s2,$s2,$i3,ror#24 - - ldr $t1,[$key,#-12] - eor $s0,$s0,$i1 - ldr $t2,[$key,#-8] - eor $s3,$s3,$t3,ror#8 - ldr $t3,[$key,#-4] - and $i1,lr,$s0,lsr#16 - eor $s1,$s1,$t1 - and $i2,lr,$s0,lsr#8 - eor $s2,$s2,$t2 - and $i3,lr,$s0 - eor $s3,$s3,$t3 - mov $s0,$s0,lsr#24 - - subs $rounds,$rounds,#1 - bne .Ldec_loop - - add $tbl,$tbl,#1024 - - ldr $t2,[$tbl,#0] @ prefetch Td4 - ldr $t3,[$tbl,#32] - ldr $t1,[$tbl,#64] - ldr $t2,[$tbl,#96] - ldr $t3,[$tbl,#128] - ldr $t1,[$tbl,#160] - ldr $t2,[$tbl,#192] - ldr $t3,[$tbl,#224] - - ldrb $s0,[$tbl,$s0] @ Td4[s0>>24] - ldrb $t1,[$tbl,$i1] @ Td4[s0>>16] - and $i1,lr,$s1 @ i0 - ldrb $t2,[$tbl,$i2] @ Td4[s0>>8] - and $i2,lr,$s1,lsr#16 - ldrb $t3,[$tbl,$i3] @ Td4[s0>>0] - and $i3,lr,$s1,lsr#8 - - ldrb $i1,[$tbl,$i1] @ Td4[s1>>0] - ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24] - ldrb $i2,[$tbl,$i2] @ Td4[s1>>16] - eor $s0,$i1,$s0,lsl#24 - ldrb $i3,[$tbl,$i3] @ Td4[s1>>8] - eor $s1,$t1,$s1,lsl#8 - and $i1,lr,$s2,lsr#8 @ i0 - eor $t2,$t2,$i2,lsl#8 - and $i2,lr,$s2 @ i1 - ldrb $i1,[$tbl,$i1] @ Td4[s2>>8] - eor $t3,$t3,$i3,lsl#8 - ldrb $i2,[$tbl,$i2] @ Td4[s2>>0] - and $i3,lr,$s2,lsr#16 - - ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24] - eor $s0,$s0,$i1,lsl#8 - ldrb $i3,[$tbl,$i3] @ Td4[s2>>16] - eor $s1,$i2,$s1,lsl#16 - and $i1,lr,$s3,lsr#16 @ i0 - eor $s2,$t2,$s2,lsl#16 - and $i2,lr,$s3,lsr#8 @ i1 - ldrb $i1,[$tbl,$i1] @ Td4[s3>>16] - eor $t3,$t3,$i3,lsl#16 - ldrb $i2,[$tbl,$i2] @ Td4[s3>>8] - and $i3,lr,$s3 @ i2 - - ldrb $i3,[$tbl,$i3] @ Td4[s3>>0] - ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24] - eor $s0,$s0,$i1,lsl#16 - ldr $i1,[$key,#0] - eor $s1,$s1,$i2,lsl#8 - ldr $t1,[$key,#4] - eor $s2,$i3,$s2,lsl#8 - ldr $t2,[$key,#8] - eor $s3,$t3,$s3,lsl#24 - ldr $t3,[$key,#12] - - eor $s0,$s0,$i1 - eor $s1,$s1,$t1 - eor $s2,$s2,$t2 - eor $s3,$s3,$t3 - - sub $tbl,$tbl,#1024 - ldr pc,[sp],#4 @ pop and return -.size _armv4_AES_decrypt,.-_armv4_AES_decrypt -.asciz "AES for ARMv4, CRYPTOGAMS by " -.align 2 -___ - -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; -close STDOUT; # enforce flush diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aes-ia64.S b/drivers/builtin_openssl2/crypto/aes/asm/aes-ia64.S deleted file mode 100644 index 7f6c4c3662..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/aes-ia64.S +++ /dev/null @@ -1,1123 +0,0 @@ -// ==================================================================== -// Written by Andy Polyakov for the OpenSSL -// project. Rights for redistribution and usage in source and binary -// forms are granted according to the OpenSSL license. -// ==================================================================== -// -// What's wrong with compiler generated code? Compiler never uses -// variable 'shr' which is pairable with 'extr'/'dep' instructions. -// Then it uses 'zxt' which is an I-type, but can be replaced with -// 'and' which in turn can be assigned to M-port [there're double as -// much M-ports as there're I-ports on Itanium 2]. By sacrificing few -// registers for small constants (255, 24 and 16) to be used with -// 'shr' and 'and' instructions I can achieve better ILP, Intruction -// Level Parallelism, and performance. This code outperforms GCC 3.3 -// generated code by over factor of 2 (two), GCC 3.4 - by 70% and -// HP C - by 40%. Measured best-case scenario, i.e. aligned -// big-endian input, ECB timing on Itanium 2 is (18 + 13*rounds) -// ticks per block, or 9.25 CPU cycles per byte for 128 bit key. - -// Version 1.2 mitigates the hazard of cache-timing attacks by -// a) compressing S-boxes from 8KB to 2KB+256B, b) scheduling -// references to S-boxes for L2 cache latency, c) prefetching T[ed]4 -// prior last round. As result performance dropped to (26 + 15*rounds) -// ticks per block or 11 cycles per byte processed with 128-bit key. -// This is ~16% deterioration. For reference Itanium 2 L1 cache has -// 64 bytes line size and L2 - 128 bytes... - -.ident "aes-ia64.S, version 1.2" -.ident "IA-64 ISA artwork by Andy Polyakov " -.explicit -.text - -rk0=r8; rk1=r9; - -pfssave=r2; -lcsave=r10; -prsave=r3; -maskff=r11; -twenty4=r14; -sixteen=r15; - -te00=r16; te11=r17; te22=r18; te33=r19; -te01=r20; te12=r21; te23=r22; te30=r23; -te02=r24; te13=r25; te20=r26; te31=r27; -te03=r28; te10=r29; te21=r30; te32=r31; - -// these are rotating... -t0=r32; s0=r33; -t1=r34; s1=r35; -t2=r36; s2=r37; -t3=r38; s3=r39; - -te0=r40; te1=r41; te2=r42; te3=r43; - -#if defined(_HPUX_SOURCE) && !defined(_LP64) -# define ADDP addp4 -#else -# define ADDP add -#endif - -// Offsets from Te0 -#define TE0 0 -#define TE2 2 -#if defined(_HPUX_SOURCE) || defined(B_ENDIAN) -#define TE1 3 -#define TE3 1 -#else -#define TE1 1 -#define TE3 3 -#endif - -// This implies that AES_KEY comprises 32-bit key schedule elements -// even on LP64 platforms. -#ifndef KSZ -# define KSZ 4 -# define LDKEY ld4 -#endif - -.proc _ia64_AES_encrypt# -// Input: rk0-rk1 -// te0 -// te3 as AES_KEY->rounds!!! -// s0-s3 -// maskff,twenty4,sixteen -// Output: r16,r20,r24,r28 as s0-s3 -// Clobber: r16-r31,rk0-rk1,r32-r43 -.align 32 -_ia64_AES_encrypt: - .prologue - .altrp b6 - .body -{ .mmi; alloc r16=ar.pfs,12,0,0,8 - LDKEY t0=[rk0],2*KSZ - mov pr.rot=1<<16 } -{ .mmi; LDKEY t1=[rk1],2*KSZ - add te1=TE1,te0 - add te3=-3,te3 };; -{ .mib; LDKEY t2=[rk0],2*KSZ - mov ar.ec=2 } -{ .mib; LDKEY t3=[rk1],2*KSZ - add te2=TE2,te0 - brp.loop.imp .Le_top,.Le_end-16 };; - -{ .mmi; xor s0=s0,t0 - xor s1=s1,t1 - mov ar.lc=te3 } -{ .mmi; xor s2=s2,t2 - xor s3=s3,t3 - add te3=TE3,te0 };; - -.align 32 -.Le_top: -{ .mmi; (p0) LDKEY t0=[rk0],2*KSZ // 0/0:rk[0] - (p0) and te33=s3,maskff // 0/0:s3&0xff - (p0) extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff -{ .mmi; (p0) LDKEY t1=[rk1],2*KSZ // 0/1:rk[1] - (p0) and te30=s0,maskff // 0/1:s0&0xff - (p0) shr.u te00=s0,twenty4 };; // 0/0:s0>>24 -{ .mmi; (p0) LDKEY t2=[rk0],2*KSZ // 1/2:rk[2] - (p0) shladd te33=te33,3,te3 // 1/0:te0+s0>>24 - (p0) extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff -{ .mmi; (p0) LDKEY t3=[rk1],2*KSZ // 1/3:rk[3] - (p0) shladd te30=te30,3,te3 // 1/1:te3+s0 - (p0) shr.u te01=s1,twenty4 };; // 1/1:s1>>24 -{ .mmi; (p0) ld4 te33=[te33] // 2/0:te3[s3&0xff] - (p0) shladd te22=te22,3,te2 // 2/0:te2+s2>>8&0xff - (p0) extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff -{ .mmi; (p0) ld4 te30=[te30] // 2/1:te3[s0] - (p0) shladd te23=te23,3,te2 // 2/1:te2+s3>>8 - (p0) shr.u te02=s2,twenty4 };; // 2/2:s2>>24 -{ .mmi; (p0) ld4 te22=[te22] // 3/0:te2[s2>>8] - (p0) shladd te20=te20,3,te2 // 3/2:te2+s0>>8 - (p0) extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff -{ .mmi; (p0) ld4 te23=[te23] // 3/1:te2[s3>>8] - (p0) shladd te00=te00,3,te0 // 3/0:te0+s0>>24 - (p0) shr.u te03=s3,twenty4 };; // 3/3:s3>>24 -{ .mmi; (p0) ld4 te20=[te20] // 4/2:te2[s0>>8] - (p0) shladd te21=te21,3,te2 // 4/3:te3+s2 - (p0) extr.u te11=s1,16,8 } // 4/0:s1>>16&0xff -{ .mmi; (p0) ld4 te00=[te00] // 4/0:te0[s0>>24] - (p0) shladd te01=te01,3,te0 // 4/1:te0+s1>>24 - (p0) shr.u te13=s3,sixteen };; // 4/2:s3>>16 -{ .mmi; (p0) ld4 te21=[te21] // 5/3:te2[s1>>8] - (p0) shladd te11=te11,3,te1 // 5/0:te1+s1>>16 - (p0) extr.u te12=s2,16,8 } // 5/1:s2>>16&0xff -{ .mmi; (p0) ld4 te01=[te01] // 5/1:te0[s1>>24] - (p0) shladd te02=te02,3,te0 // 5/2:te0+s2>>24 - (p0) and te31=s1,maskff };; // 5/2:s1&0xff -{ .mmi; (p0) ld4 te11=[te11] // 6/0:te1[s1>>16] - (p0) shladd te12=te12,3,te1 // 6/1:te1+s2>>16 - (p0) extr.u te10=s0,16,8 } // 6/3:s0>>16&0xff -{ .mmi; (p0) ld4 te02=[te02] // 6/2:te0[s2>>24] - (p0) shladd te03=te03,3,te0 // 6/3:te1+s0>>16 - (p0) and te32=s2,maskff };; // 6/3:s2&0xff - -{ .mmi; (p0) ld4 te12=[te12] // 7/1:te1[s2>>16] - (p0) shladd te31=te31,3,te3 // 7/2:te3+s1&0xff - (p0) and te13=te13,maskff} // 7/2:s3>>16&0xff -{ .mmi; (p0) ld4 te03=[te03] // 7/3:te0[s3>>24] - (p0) shladd te32=te32,3,te3 // 7/3:te3+s2 - (p0) xor t0=t0,te33 };; // 7/0: -{ .mmi; (p0) ld4 te31=[te31] // 8/2:te3[s1] - (p0) shladd te13=te13,3,te1 // 8/2:te1+s3>>16 - (p0) xor t0=t0,te22 } // 8/0: -{ .mmi; (p0) ld4 te32=[te32] // 8/3:te3[s2] - (p0) shladd te10=te10,3,te1 // 8/3:te1+s0>>16 - (p0) xor t1=t1,te30 };; // 8/1: -{ .mmi; (p0) ld4 te13=[te13] // 9/2:te1[s3>>16] - (p0) ld4 te10=[te10] // 9/3:te1[s0>>16] - (p0) xor t0=t0,te00 };; // 9/0: !L2 scheduling -{ .mmi; (p0) xor t1=t1,te23 // 10[9]/1: - (p0) xor t2=t2,te20 // 10[9]/2: - (p0) xor t3=t3,te21 };; // 10[9]/3: -{ .mmi; (p0) xor t0=t0,te11 // 11[10]/0:done! - (p0) xor t1=t1,te01 // 11[10]/1: - (p0) xor t2=t2,te02 };; // 11[10]/2: !L2 scheduling -{ .mmi; (p0) xor t3=t3,te03 // 12[10]/3: - (p16) cmp.eq p0,p17=r0,r0 };; // 12[10]/clear (p17) -{ .mmi; (p0) xor t1=t1,te12 // 13[11]/1:done! - (p0) xor t2=t2,te31 // 13[11]/2: - (p0) xor t3=t3,te32 } // 13[11]/3: -{ .mmi; (p17) add te0=2048,te0 // 13[11]/ - (p17) add te1=2048+64-TE1,te1};; // 13[11]/ -{ .mib; (p0) xor t2=t2,te13 // 14[12]/2:done! - (p17) add te2=2048+128-TE2,te2} // 14[12]/ -{ .mib; (p0) xor t3=t3,te10 // 14[12]/3:done! - (p17) add te3=2048+192-TE3,te3 // 14[12]/ - br.ctop.sptk .Le_top };; -.Le_end: - - -{ .mmi; ld8 te12=[te0] // prefetch Te4 - ld8 te31=[te1] } -{ .mmi; ld8 te10=[te2] - ld8 te32=[te3] } - -{ .mmi; LDKEY t0=[rk0],2*KSZ // 0/0:rk[0] - and te33=s3,maskff // 0/0:s3&0xff - extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff -{ .mmi; LDKEY t1=[rk1],2*KSZ // 0/1:rk[1] - and te30=s0,maskff // 0/1:s0&0xff - shr.u te00=s0,twenty4 };; // 0/0:s0>>24 -{ .mmi; LDKEY t2=[rk0],2*KSZ // 1/2:rk[2] - add te33=te33,te0 // 1/0:te0+s0>>24 - extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff -{ .mmi; LDKEY t3=[rk1],2*KSZ // 1/3:rk[3] - add te30=te30,te0 // 1/1:te0+s0 - shr.u te01=s1,twenty4 };; // 1/1:s1>>24 -{ .mmi; ld1 te33=[te33] // 2/0:te0[s3&0xff] - add te22=te22,te0 // 2/0:te0+s2>>8&0xff - extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff -{ .mmi; ld1 te30=[te30] // 2/1:te0[s0] - add te23=te23,te0 // 2/1:te0+s3>>8 - shr.u te02=s2,twenty4 };; // 2/2:s2>>24 -{ .mmi; ld1 te22=[te22] // 3/0:te0[s2>>8] - add te20=te20,te0 // 3/2:te0+s0>>8 - extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff -{ .mmi; ld1 te23=[te23] // 3/1:te0[s3>>8] - add te00=te00,te0 // 3/0:te0+s0>>24 - shr.u te03=s3,twenty4 };; // 3/3:s3>>24 -{ .mmi; ld1 te20=[te20] // 4/2:te0[s0>>8] - add te21=te21,te0 // 4/3:te0+s2 - extr.u te11=s1,16,8 } // 4/0:s1>>16&0xff -{ .mmi; ld1 te00=[te00] // 4/0:te0[s0>>24] - add te01=te01,te0 // 4/1:te0+s1>>24 - shr.u te13=s3,sixteen };; // 4/2:s3>>16 -{ .mmi; ld1 te21=[te21] // 5/3:te0[s1>>8] - add te11=te11,te0 // 5/0:te0+s1>>16 - extr.u te12=s2,16,8 } // 5/1:s2>>16&0xff -{ .mmi; ld1 te01=[te01] // 5/1:te0[s1>>24] - add te02=te02,te0 // 5/2:te0+s2>>24 - and te31=s1,maskff };; // 5/2:s1&0xff -{ .mmi; ld1 te11=[te11] // 6/0:te0[s1>>16] - add te12=te12,te0 // 6/1:te0+s2>>16 - extr.u te10=s0,16,8 } // 6/3:s0>>16&0xff -{ .mmi; ld1 te02=[te02] // 6/2:te0[s2>>24] - add te03=te03,te0 // 6/3:te0+s0>>16 - and te32=s2,maskff };; // 6/3:s2&0xff - -{ .mmi; ld1 te12=[te12] // 7/1:te0[s2>>16] - add te31=te31,te0 // 7/2:te0+s1&0xff - dep te33=te22,te33,8,8} // 7/0: -{ .mmi; ld1 te03=[te03] // 7/3:te0[s3>>24] - add te32=te32,te0 // 7/3:te0+s2 - and te13=te13,maskff};; // 7/2:s3>>16&0xff -{ .mmi; ld1 te31=[te31] // 8/2:te0[s1] - add te13=te13,te0 // 8/2:te0+s3>>16 - dep te30=te23,te30,8,8} // 8/1: -{ .mmi; ld1 te32=[te32] // 8/3:te0[s2] - add te10=te10,te0 // 8/3:te0+s0>>16 - shl te00=te00,twenty4};; // 8/0: -{ .mii; ld1 te13=[te13] // 9/2:te0[s3>>16] - dep te33=te11,te33,16,8 // 9/0: - shl te01=te01,twenty4};; // 9/1: -{ .mii; ld1 te10=[te10] // 10/3:te0[s0>>16] - dep te31=te20,te31,8,8 // 10/2: - shl te02=te02,twenty4};; // 10/2: -{ .mii; xor t0=t0,te33 // 11/0: - dep te32=te21,te32,8,8 // 11/3: - shl te12=te12,sixteen};; // 11/1: -{ .mii; xor r16=t0,te00 // 12/0:done! - dep te31=te13,te31,16,8 // 12/2: - shl te03=te03,twenty4};; // 12/3: -{ .mmi; xor t1=t1,te01 // 13/1: - xor t2=t2,te02 // 13/2: - dep te32=te10,te32,16,8};; // 13/3: -{ .mmi; xor t1=t1,te30 // 14/1: - xor r24=t2,te31 // 14/2:done! - xor t3=t3,te32 };; // 14/3: -{ .mib; xor r20=t1,te12 // 15/1:done! - xor r28=t3,te03 // 15/3:done! - br.ret.sptk b6 };; -.endp _ia64_AES_encrypt# - -// void AES_encrypt (const void *in,void *out,const AES_KEY *key); -.global AES_encrypt# -.proc AES_encrypt# -.align 32 -AES_encrypt: - .prologue - .save ar.pfs,pfssave -{ .mmi; alloc pfssave=ar.pfs,3,1,12,0 - and out0=3,in0 - mov r3=ip } -{ .mmi; ADDP in0=0,in0 - mov loc0=psr.um - ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds - -{ .mmi; ld4 out11=[out11] // AES_KEY->rounds - add out8=(AES_Te#-AES_encrypt#),r3 // Te0 - .save pr,prsave - mov prsave=pr } -{ .mmi; rum 1<<3 // clear um.ac - .save ar.lc,lcsave - mov lcsave=ar.lc };; - - .body -#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles... -{ .mib; cmp.ne p6,p0=out0,r0 - add out0=4,in0 -(p6) br.dpnt.many .Le_i_unaligned };; - -{ .mmi; ld4 out1=[in0],8 // s0 - and out9=3,in1 - mov twenty4=24 } -{ .mmi; ld4 out3=[out0],8 // s1 - ADDP rk0=0,in2 - mov sixteen=16 };; -{ .mmi; ld4 out5=[in0] // s2 - cmp.ne p6,p0=out9,r0 - mov maskff=0xff } -{ .mmb; ld4 out7=[out0] // s3 - ADDP rk1=KSZ,in2 - br.call.sptk.many b6=_ia64_AES_encrypt };; - -{ .mib; ADDP in0=4,in1 - ADDP in1=0,in1 -(p6) br.spnt .Le_o_unaligned };; - -{ .mii; mov psr.um=loc0 - mov ar.pfs=pfssave - mov ar.lc=lcsave };; -{ .mmi; st4 [in1]=r16,8 // s0 - st4 [in0]=r20,8 // s1 - mov pr=prsave,0x1ffff };; -{ .mmb; st4 [in1]=r24 // s2 - st4 [in0]=r28 // s3 - br.ret.sptk.many b0 };; -#endif - -.align 32 -.Le_i_unaligned: -{ .mmi; add out0=1,in0 - add out2=2,in0 - add out4=3,in0 };; -{ .mmi; ld1 r16=[in0],4 - ld1 r17=[out0],4 }//;; -{ .mmi; ld1 r18=[out2],4 - ld1 out1=[out4],4 };; // s0 -{ .mmi; ld1 r20=[in0],4 - ld1 r21=[out0],4 }//;; -{ .mmi; ld1 r22=[out2],4 - ld1 out3=[out4],4 };; // s1 -{ .mmi; ld1 r24=[in0],4 - ld1 r25=[out0],4 }//;; -{ .mmi; ld1 r26=[out2],4 - ld1 out5=[out4],4 };; // s2 -{ .mmi; ld1 r28=[in0] - ld1 r29=[out0] }//;; -{ .mmi; ld1 r30=[out2] - ld1 out7=[out4] };; // s3 - -{ .mii; - dep out1=r16,out1,24,8 //;; - dep out3=r20,out3,24,8 }//;; -{ .mii; ADDP rk0=0,in2 - dep out5=r24,out5,24,8 //;; - dep out7=r28,out7,24,8 };; -{ .mii; ADDP rk1=KSZ,in2 - dep out1=r17,out1,16,8 //;; - dep out3=r21,out3,16,8 }//;; -{ .mii; mov twenty4=24 - dep out5=r25,out5,16,8 //;; - dep out7=r29,out7,16,8 };; -{ .mii; mov sixteen=16 - dep out1=r18,out1,8,8 //;; - dep out3=r22,out3,8,8 }//;; -{ .mii; mov maskff=0xff - dep out5=r26,out5,8,8 //;; - dep out7=r30,out7,8,8 };; - -{ .mib; br.call.sptk.many b6=_ia64_AES_encrypt };; - -.Le_o_unaligned: -{ .mii; ADDP out0=0,in1 - extr.u r17=r16,8,8 // s0 - shr.u r19=r16,twenty4 }//;; -{ .mii; ADDP out1=1,in1 - extr.u r18=r16,16,8 - shr.u r23=r20,twenty4 }//;; // s1 -{ .mii; ADDP out2=2,in1 - extr.u r21=r20,8,8 - shr.u r22=r20,sixteen }//;; -{ .mii; ADDP out3=3,in1 - extr.u r25=r24,8,8 // s2 - shr.u r27=r24,twenty4 };; -{ .mii; st1 [out3]=r16,4 - extr.u r26=r24,16,8 - shr.u r31=r28,twenty4 }//;; // s3 -{ .mii; st1 [out2]=r17,4 - extr.u r29=r28,8,8 - shr.u r30=r28,sixteen }//;; - -{ .mmi; st1 [out1]=r18,4 - st1 [out0]=r19,4 };; -{ .mmi; st1 [out3]=r20,4 - st1 [out2]=r21,4 }//;; -{ .mmi; st1 [out1]=r22,4 - st1 [out0]=r23,4 };; -{ .mmi; st1 [out3]=r24,4 - st1 [out2]=r25,4 - mov pr=prsave,0x1ffff }//;; -{ .mmi; st1 [out1]=r26,4 - st1 [out0]=r27,4 - mov ar.pfs=pfssave };; -{ .mmi; st1 [out3]=r28 - st1 [out2]=r29 - mov ar.lc=lcsave }//;; -{ .mmi; st1 [out1]=r30 - st1 [out0]=r31 } -{ .mfb; mov psr.um=loc0 // restore user mask - br.ret.sptk.many b0 };; -.endp AES_encrypt# - -// *AES_decrypt are autogenerated by the following script: -#if 0 -#!/usr/bin/env perl -print "// *AES_decrypt are autogenerated by the following script:\n#if 0\n"; -open(PROG,'<'.$0); while() { print; } close(PROG); -print "#endif\n"; -while(<>) { - $process=1 if (/\.proc\s+_ia64_AES_encrypt/); - next if (!$process); - - #s/te00=s0/td00=s0/; s/te00/td00/g; - s/te11=s1/td13=s3/; s/te11/td13/g; - #s/te22=s2/td22=s2/; s/te22/td22/g; - s/te33=s3/td31=s1/; s/te33/td31/g; - - #s/te01=s1/td01=s1/; s/te01/td01/g; - s/te12=s2/td10=s0/; s/te12/td10/g; - #s/te23=s3/td23=s3/; s/te23/td23/g; - s/te30=s0/td32=s2/; s/te30/td32/g; - - #s/te02=s2/td02=s2/; s/te02/td02/g; - s/te13=s3/td11=s1/; s/te13/td11/g; - #s/te20=s0/td20=s0/; s/te20/td20/g; - s/te31=s1/td33=s3/; s/te31/td33/g; - - #s/te03=s3/td03=s3/; s/te03/td03/g; - s/te10=s0/td12=s2/; s/te10/td12/g; - #s/te21=s1/td21=s1/; s/te21/td21/g; - s/te32=s2/td30=s0/; s/te32/td30/g; - - s/td/te/g; - - s/AES_encrypt/AES_decrypt/g; - s/\.Le_/.Ld_/g; - s/AES_Te#/AES_Td#/g; - - print; - - exit if (/\.endp\s+AES_decrypt/); -} -#endif -.proc _ia64_AES_decrypt# -// Input: rk0-rk1 -// te0 -// te3 as AES_KEY->rounds!!! -// s0-s3 -// maskff,twenty4,sixteen -// Output: r16,r20,r24,r28 as s0-s3 -// Clobber: r16-r31,rk0-rk1,r32-r43 -.align 32 -_ia64_AES_decrypt: - .prologue - .altrp b6 - .body -{ .mmi; alloc r16=ar.pfs,12,0,0,8 - LDKEY t0=[rk0],2*KSZ - mov pr.rot=1<<16 } -{ .mmi; LDKEY t1=[rk1],2*KSZ - add te1=TE1,te0 - add te3=-3,te3 };; -{ .mib; LDKEY t2=[rk0],2*KSZ - mov ar.ec=2 } -{ .mib; LDKEY t3=[rk1],2*KSZ - add te2=TE2,te0 - brp.loop.imp .Ld_top,.Ld_end-16 };; - -{ .mmi; xor s0=s0,t0 - xor s1=s1,t1 - mov ar.lc=te3 } -{ .mmi; xor s2=s2,t2 - xor s3=s3,t3 - add te3=TE3,te0 };; - -.align 32 -.Ld_top: -{ .mmi; (p0) LDKEY t0=[rk0],2*KSZ // 0/0:rk[0] - (p0) and te31=s1,maskff // 0/0:s3&0xff - (p0) extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff -{ .mmi; (p0) LDKEY t1=[rk1],2*KSZ // 0/1:rk[1] - (p0) and te32=s2,maskff // 0/1:s0&0xff - (p0) shr.u te00=s0,twenty4 };; // 0/0:s0>>24 -{ .mmi; (p0) LDKEY t2=[rk0],2*KSZ // 1/2:rk[2] - (p0) shladd te31=te31,3,te3 // 1/0:te0+s0>>24 - (p0) extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff -{ .mmi; (p0) LDKEY t3=[rk1],2*KSZ // 1/3:rk[3] - (p0) shladd te32=te32,3,te3 // 1/1:te3+s0 - (p0) shr.u te01=s1,twenty4 };; // 1/1:s1>>24 -{ .mmi; (p0) ld4 te31=[te31] // 2/0:te3[s3&0xff] - (p0) shladd te22=te22,3,te2 // 2/0:te2+s2>>8&0xff - (p0) extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff -{ .mmi; (p0) ld4 te32=[te32] // 2/1:te3[s0] - (p0) shladd te23=te23,3,te2 // 2/1:te2+s3>>8 - (p0) shr.u te02=s2,twenty4 };; // 2/2:s2>>24 -{ .mmi; (p0) ld4 te22=[te22] // 3/0:te2[s2>>8] - (p0) shladd te20=te20,3,te2 // 3/2:te2+s0>>8 - (p0) extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff -{ .mmi; (p0) ld4 te23=[te23] // 3/1:te2[s3>>8] - (p0) shladd te00=te00,3,te0 // 3/0:te0+s0>>24 - (p0) shr.u te03=s3,twenty4 };; // 3/3:s3>>24 -{ .mmi; (p0) ld4 te20=[te20] // 4/2:te2[s0>>8] - (p0) shladd te21=te21,3,te2 // 4/3:te3+s2 - (p0) extr.u te13=s3,16,8 } // 4/0:s1>>16&0xff -{ .mmi; (p0) ld4 te00=[te00] // 4/0:te0[s0>>24] - (p0) shladd te01=te01,3,te0 // 4/1:te0+s1>>24 - (p0) shr.u te11=s1,sixteen };; // 4/2:s3>>16 -{ .mmi; (p0) ld4 te21=[te21] // 5/3:te2[s1>>8] - (p0) shladd te13=te13,3,te1 // 5/0:te1+s1>>16 - (p0) extr.u te10=s0,16,8 } // 5/1:s2>>16&0xff -{ .mmi; (p0) ld4 te01=[te01] // 5/1:te0[s1>>24] - (p0) shladd te02=te02,3,te0 // 5/2:te0+s2>>24 - (p0) and te33=s3,maskff };; // 5/2:s1&0xff -{ .mmi; (p0) ld4 te13=[te13] // 6/0:te1[s1>>16] - (p0) shladd te10=te10,3,te1 // 6/1:te1+s2>>16 - (p0) extr.u te12=s2,16,8 } // 6/3:s0>>16&0xff -{ .mmi; (p0) ld4 te02=[te02] // 6/2:te0[s2>>24] - (p0) shladd te03=te03,3,te0 // 6/3:te1+s0>>16 - (p0) and te30=s0,maskff };; // 6/3:s2&0xff - -{ .mmi; (p0) ld4 te10=[te10] // 7/1:te1[s2>>16] - (p0) shladd te33=te33,3,te3 // 7/2:te3+s1&0xff - (p0) and te11=te11,maskff} // 7/2:s3>>16&0xff -{ .mmi; (p0) ld4 te03=[te03] // 7/3:te0[s3>>24] - (p0) shladd te30=te30,3,te3 // 7/3:te3+s2 - (p0) xor t0=t0,te31 };; // 7/0: -{ .mmi; (p0) ld4 te33=[te33] // 8/2:te3[s1] - (p0) shladd te11=te11,3,te1 // 8/2:te1+s3>>16 - (p0) xor t0=t0,te22 } // 8/0: -{ .mmi; (p0) ld4 te30=[te30] // 8/3:te3[s2] - (p0) shladd te12=te12,3,te1 // 8/3:te1+s0>>16 - (p0) xor t1=t1,te32 };; // 8/1: -{ .mmi; (p0) ld4 te11=[te11] // 9/2:te1[s3>>16] - (p0) ld4 te12=[te12] // 9/3:te1[s0>>16] - (p0) xor t0=t0,te00 };; // 9/0: !L2 scheduling -{ .mmi; (p0) xor t1=t1,te23 // 10[9]/1: - (p0) xor t2=t2,te20 // 10[9]/2: - (p0) xor t3=t3,te21 };; // 10[9]/3: -{ .mmi; (p0) xor t0=t0,te13 // 11[10]/0:done! - (p0) xor t1=t1,te01 // 11[10]/1: - (p0) xor t2=t2,te02 };; // 11[10]/2: !L2 scheduling -{ .mmi; (p0) xor t3=t3,te03 // 12[10]/3: - (p16) cmp.eq p0,p17=r0,r0 };; // 12[10]/clear (p17) -{ .mmi; (p0) xor t1=t1,te10 // 13[11]/1:done! - (p0) xor t2=t2,te33 // 13[11]/2: - (p0) xor t3=t3,te30 } // 13[11]/3: -{ .mmi; (p17) add te0=2048,te0 // 13[11]/ - (p17) add te1=2048+64-TE1,te1};; // 13[11]/ -{ .mib; (p0) xor t2=t2,te11 // 14[12]/2:done! - (p17) add te2=2048+128-TE2,te2} // 14[12]/ -{ .mib; (p0) xor t3=t3,te12 // 14[12]/3:done! - (p17) add te3=2048+192-TE3,te3 // 14[12]/ - br.ctop.sptk .Ld_top };; -.Ld_end: - - -{ .mmi; ld8 te10=[te0] // prefetch Td4 - ld8 te33=[te1] } -{ .mmi; ld8 te12=[te2] - ld8 te30=[te3] } - -{ .mmi; LDKEY t0=[rk0],2*KSZ // 0/0:rk[0] - and te31=s1,maskff // 0/0:s3&0xff - extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff -{ .mmi; LDKEY t1=[rk1],2*KSZ // 0/1:rk[1] - and te32=s2,maskff // 0/1:s0&0xff - shr.u te00=s0,twenty4 };; // 0/0:s0>>24 -{ .mmi; LDKEY t2=[rk0],2*KSZ // 1/2:rk[2] - add te31=te31,te0 // 1/0:te0+s0>>24 - extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff -{ .mmi; LDKEY t3=[rk1],2*KSZ // 1/3:rk[3] - add te32=te32,te0 // 1/1:te0+s0 - shr.u te01=s1,twenty4 };; // 1/1:s1>>24 -{ .mmi; ld1 te31=[te31] // 2/0:te0[s3&0xff] - add te22=te22,te0 // 2/0:te0+s2>>8&0xff - extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff -{ .mmi; ld1 te32=[te32] // 2/1:te0[s0] - add te23=te23,te0 // 2/1:te0+s3>>8 - shr.u te02=s2,twenty4 };; // 2/2:s2>>24 -{ .mmi; ld1 te22=[te22] // 3/0:te0[s2>>8] - add te20=te20,te0 // 3/2:te0+s0>>8 - extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff -{ .mmi; ld1 te23=[te23] // 3/1:te0[s3>>8] - add te00=te00,te0 // 3/0:te0+s0>>24 - shr.u te03=s3,twenty4 };; // 3/3:s3>>24 -{ .mmi; ld1 te20=[te20] // 4/2:te0[s0>>8] - add te21=te21,te0 // 4/3:te0+s2 - extr.u te13=s3,16,8 } // 4/0:s1>>16&0xff -{ .mmi; ld1 te00=[te00] // 4/0:te0[s0>>24] - add te01=te01,te0 // 4/1:te0+s1>>24 - shr.u te11=s1,sixteen };; // 4/2:s3>>16 -{ .mmi; ld1 te21=[te21] // 5/3:te0[s1>>8] - add te13=te13,te0 // 5/0:te0+s1>>16 - extr.u te10=s0,16,8 } // 5/1:s2>>16&0xff -{ .mmi; ld1 te01=[te01] // 5/1:te0[s1>>24] - add te02=te02,te0 // 5/2:te0+s2>>24 - and te33=s3,maskff };; // 5/2:s1&0xff -{ .mmi; ld1 te13=[te13] // 6/0:te0[s1>>16] - add te10=te10,te0 // 6/1:te0+s2>>16 - extr.u te12=s2,16,8 } // 6/3:s0>>16&0xff -{ .mmi; ld1 te02=[te02] // 6/2:te0[s2>>24] - add te03=te03,te0 // 6/3:te0+s0>>16 - and te30=s0,maskff };; // 6/3:s2&0xff - -{ .mmi; ld1 te10=[te10] // 7/1:te0[s2>>16] - add te33=te33,te0 // 7/2:te0+s1&0xff - dep te31=te22,te31,8,8} // 7/0: -{ .mmi; ld1 te03=[te03] // 7/3:te0[s3>>24] - add te30=te30,te0 // 7/3:te0+s2 - and te11=te11,maskff};; // 7/2:s3>>16&0xff -{ .mmi; ld1 te33=[te33] // 8/2:te0[s1] - add te11=te11,te0 // 8/2:te0+s3>>16 - dep te32=te23,te32,8,8} // 8/1: -{ .mmi; ld1 te30=[te30] // 8/3:te0[s2] - add te12=te12,te0 // 8/3:te0+s0>>16 - shl te00=te00,twenty4};; // 8/0: -{ .mii; ld1 te11=[te11] // 9/2:te0[s3>>16] - dep te31=te13,te31,16,8 // 9/0: - shl te01=te01,twenty4};; // 9/1: -{ .mii; ld1 te12=[te12] // 10/3:te0[s0>>16] - dep te33=te20,te33,8,8 // 10/2: - shl te02=te02,twenty4};; // 10/2: -{ .mii; xor t0=t0,te31 // 11/0: - dep te30=te21,te30,8,8 // 11/3: - shl te10=te10,sixteen};; // 11/1: -{ .mii; xor r16=t0,te00 // 12/0:done! - dep te33=te11,te33,16,8 // 12/2: - shl te03=te03,twenty4};; // 12/3: -{ .mmi; xor t1=t1,te01 // 13/1: - xor t2=t2,te02 // 13/2: - dep te30=te12,te30,16,8};; // 13/3: -{ .mmi; xor t1=t1,te32 // 14/1: - xor r24=t2,te33 // 14/2:done! - xor t3=t3,te30 };; // 14/3: -{ .mib; xor r20=t1,te10 // 15/1:done! - xor r28=t3,te03 // 15/3:done! - br.ret.sptk b6 };; -.endp _ia64_AES_decrypt# - -// void AES_decrypt (const void *in,void *out,const AES_KEY *key); -.global AES_decrypt# -.proc AES_decrypt# -.align 32 -AES_decrypt: - .prologue - .save ar.pfs,pfssave -{ .mmi; alloc pfssave=ar.pfs,3,1,12,0 - and out0=3,in0 - mov r3=ip } -{ .mmi; ADDP in0=0,in0 - mov loc0=psr.um - ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds - -{ .mmi; ld4 out11=[out11] // AES_KEY->rounds - add out8=(AES_Td#-AES_decrypt#),r3 // Te0 - .save pr,prsave - mov prsave=pr } -{ .mmi; rum 1<<3 // clear um.ac - .save ar.lc,lcsave - mov lcsave=ar.lc };; - - .body -#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles... -{ .mib; cmp.ne p6,p0=out0,r0 - add out0=4,in0 -(p6) br.dpnt.many .Ld_i_unaligned };; - -{ .mmi; ld4 out1=[in0],8 // s0 - and out9=3,in1 - mov twenty4=24 } -{ .mmi; ld4 out3=[out0],8 // s1 - ADDP rk0=0,in2 - mov sixteen=16 };; -{ .mmi; ld4 out5=[in0] // s2 - cmp.ne p6,p0=out9,r0 - mov maskff=0xff } -{ .mmb; ld4 out7=[out0] // s3 - ADDP rk1=KSZ,in2 - br.call.sptk.many b6=_ia64_AES_decrypt };; - -{ .mib; ADDP in0=4,in1 - ADDP in1=0,in1 -(p6) br.spnt .Ld_o_unaligned };; - -{ .mii; mov psr.um=loc0 - mov ar.pfs=pfssave - mov ar.lc=lcsave };; -{ .mmi; st4 [in1]=r16,8 // s0 - st4 [in0]=r20,8 // s1 - mov pr=prsave,0x1ffff };; -{ .mmb; st4 [in1]=r24 // s2 - st4 [in0]=r28 // s3 - br.ret.sptk.many b0 };; -#endif - -.align 32 -.Ld_i_unaligned: -{ .mmi; add out0=1,in0 - add out2=2,in0 - add out4=3,in0 };; -{ .mmi; ld1 r16=[in0],4 - ld1 r17=[out0],4 }//;; -{ .mmi; ld1 r18=[out2],4 - ld1 out1=[out4],4 };; // s0 -{ .mmi; ld1 r20=[in0],4 - ld1 r21=[out0],4 }//;; -{ .mmi; ld1 r22=[out2],4 - ld1 out3=[out4],4 };; // s1 -{ .mmi; ld1 r24=[in0],4 - ld1 r25=[out0],4 }//;; -{ .mmi; ld1 r26=[out2],4 - ld1 out5=[out4],4 };; // s2 -{ .mmi; ld1 r28=[in0] - ld1 r29=[out0] }//;; -{ .mmi; ld1 r30=[out2] - ld1 out7=[out4] };; // s3 - -{ .mii; - dep out1=r16,out1,24,8 //;; - dep out3=r20,out3,24,8 }//;; -{ .mii; ADDP rk0=0,in2 - dep out5=r24,out5,24,8 //;; - dep out7=r28,out7,24,8 };; -{ .mii; ADDP rk1=KSZ,in2 - dep out1=r17,out1,16,8 //;; - dep out3=r21,out3,16,8 }//;; -{ .mii; mov twenty4=24 - dep out5=r25,out5,16,8 //;; - dep out7=r29,out7,16,8 };; -{ .mii; mov sixteen=16 - dep out1=r18,out1,8,8 //;; - dep out3=r22,out3,8,8 }//;; -{ .mii; mov maskff=0xff - dep out5=r26,out5,8,8 //;; - dep out7=r30,out7,8,8 };; - -{ .mib; br.call.sptk.many b6=_ia64_AES_decrypt };; - -.Ld_o_unaligned: -{ .mii; ADDP out0=0,in1 - extr.u r17=r16,8,8 // s0 - shr.u r19=r16,twenty4 }//;; -{ .mii; ADDP out1=1,in1 - extr.u r18=r16,16,8 - shr.u r23=r20,twenty4 }//;; // s1 -{ .mii; ADDP out2=2,in1 - extr.u r21=r20,8,8 - shr.u r22=r20,sixteen }//;; -{ .mii; ADDP out3=3,in1 - extr.u r25=r24,8,8 // s2 - shr.u r27=r24,twenty4 };; -{ .mii; st1 [out3]=r16,4 - extr.u r26=r24,16,8 - shr.u r31=r28,twenty4 }//;; // s3 -{ .mii; st1 [out2]=r17,4 - extr.u r29=r28,8,8 - shr.u r30=r28,sixteen }//;; - -{ .mmi; st1 [out1]=r18,4 - st1 [out0]=r19,4 };; -{ .mmi; st1 [out3]=r20,4 - st1 [out2]=r21,4 }//;; -{ .mmi; st1 [out1]=r22,4 - st1 [out0]=r23,4 };; -{ .mmi; st1 [out3]=r24,4 - st1 [out2]=r25,4 - mov pr=prsave,0x1ffff }//;; -{ .mmi; st1 [out1]=r26,4 - st1 [out0]=r27,4 - mov ar.pfs=pfssave };; -{ .mmi; st1 [out3]=r28 - st1 [out2]=r29 - mov ar.lc=lcsave }//;; -{ .mmi; st1 [out1]=r30 - st1 [out0]=r31 } -{ .mfb; mov psr.um=loc0 // restore user mask - br.ret.sptk.many b0 };; -.endp AES_decrypt# - -// leave it in .text segment... -.align 64 -.global AES_Te# -.type AES_Te#,@object -AES_Te: data4 0xc66363a5,0xc66363a5, 0xf87c7c84,0xf87c7c84 - data4 0xee777799,0xee777799, 0xf67b7b8d,0xf67b7b8d - data4 0xfff2f20d,0xfff2f20d, 0xd66b6bbd,0xd66b6bbd - data4 0xde6f6fb1,0xde6f6fb1, 0x91c5c554,0x91c5c554 - data4 0x60303050,0x60303050, 0x02010103,0x02010103 - data4 0xce6767a9,0xce6767a9, 0x562b2b7d,0x562b2b7d - data4 0xe7fefe19,0xe7fefe19, 0xb5d7d762,0xb5d7d762 - data4 0x4dababe6,0x4dababe6, 0xec76769a,0xec76769a - data4 0x8fcaca45,0x8fcaca45, 0x1f82829d,0x1f82829d - data4 0x89c9c940,0x89c9c940, 0xfa7d7d87,0xfa7d7d87 - data4 0xeffafa15,0xeffafa15, 0xb25959eb,0xb25959eb - data4 0x8e4747c9,0x8e4747c9, 0xfbf0f00b,0xfbf0f00b - data4 0x41adadec,0x41adadec, 0xb3d4d467,0xb3d4d467 - data4 0x5fa2a2fd,0x5fa2a2fd, 0x45afafea,0x45afafea - data4 0x239c9cbf,0x239c9cbf, 0x53a4a4f7,0x53a4a4f7 - data4 0xe4727296,0xe4727296, 0x9bc0c05b,0x9bc0c05b - data4 0x75b7b7c2,0x75b7b7c2, 0xe1fdfd1c,0xe1fdfd1c - data4 0x3d9393ae,0x3d9393ae, 0x4c26266a,0x4c26266a - data4 0x6c36365a,0x6c36365a, 0x7e3f3f41,0x7e3f3f41 - data4 0xf5f7f702,0xf5f7f702, 0x83cccc4f,0x83cccc4f - data4 0x6834345c,0x6834345c, 0x51a5a5f4,0x51a5a5f4 - data4 0xd1e5e534,0xd1e5e534, 0xf9f1f108,0xf9f1f108 - data4 0xe2717193,0xe2717193, 0xabd8d873,0xabd8d873 - data4 0x62313153,0x62313153, 0x2a15153f,0x2a15153f - data4 0x0804040c,0x0804040c, 0x95c7c752,0x95c7c752 - data4 0x46232365,0x46232365, 0x9dc3c35e,0x9dc3c35e - data4 0x30181828,0x30181828, 0x379696a1,0x379696a1 - data4 0x0a05050f,0x0a05050f, 0x2f9a9ab5,0x2f9a9ab5 - data4 0x0e070709,0x0e070709, 0x24121236,0x24121236 - data4 0x1b80809b,0x1b80809b, 0xdfe2e23d,0xdfe2e23d - data4 0xcdebeb26,0xcdebeb26, 0x4e272769,0x4e272769 - data4 0x7fb2b2cd,0x7fb2b2cd, 0xea75759f,0xea75759f - data4 0x1209091b,0x1209091b, 0x1d83839e,0x1d83839e - data4 0x582c2c74,0x582c2c74, 0x341a1a2e,0x341a1a2e - data4 0x361b1b2d,0x361b1b2d, 0xdc6e6eb2,0xdc6e6eb2 - data4 0xb45a5aee,0xb45a5aee, 0x5ba0a0fb,0x5ba0a0fb - data4 0xa45252f6,0xa45252f6, 0x763b3b4d,0x763b3b4d - data4 0xb7d6d661,0xb7d6d661, 0x7db3b3ce,0x7db3b3ce - data4 0x5229297b,0x5229297b, 0xdde3e33e,0xdde3e33e - data4 0x5e2f2f71,0x5e2f2f71, 0x13848497,0x13848497 - data4 0xa65353f5,0xa65353f5, 0xb9d1d168,0xb9d1d168 - data4 0x00000000,0x00000000, 0xc1eded2c,0xc1eded2c - data4 0x40202060,0x40202060, 0xe3fcfc1f,0xe3fcfc1f - data4 0x79b1b1c8,0x79b1b1c8, 0xb65b5bed,0xb65b5bed - data4 0xd46a6abe,0xd46a6abe, 0x8dcbcb46,0x8dcbcb46 - data4 0x67bebed9,0x67bebed9, 0x7239394b,0x7239394b - data4 0x944a4ade,0x944a4ade, 0x984c4cd4,0x984c4cd4 - data4 0xb05858e8,0xb05858e8, 0x85cfcf4a,0x85cfcf4a - data4 0xbbd0d06b,0xbbd0d06b, 0xc5efef2a,0xc5efef2a - data4 0x4faaaae5,0x4faaaae5, 0xedfbfb16,0xedfbfb16 - data4 0x864343c5,0x864343c5, 0x9a4d4dd7,0x9a4d4dd7 - data4 0x66333355,0x66333355, 0x11858594,0x11858594 - data4 0x8a4545cf,0x8a4545cf, 0xe9f9f910,0xe9f9f910 - data4 0x04020206,0x04020206, 0xfe7f7f81,0xfe7f7f81 - data4 0xa05050f0,0xa05050f0, 0x783c3c44,0x783c3c44 - data4 0x259f9fba,0x259f9fba, 0x4ba8a8e3,0x4ba8a8e3 - data4 0xa25151f3,0xa25151f3, 0x5da3a3fe,0x5da3a3fe - data4 0x804040c0,0x804040c0, 0x058f8f8a,0x058f8f8a - data4 0x3f9292ad,0x3f9292ad, 0x219d9dbc,0x219d9dbc - data4 0x70383848,0x70383848, 0xf1f5f504,0xf1f5f504 - data4 0x63bcbcdf,0x63bcbcdf, 0x77b6b6c1,0x77b6b6c1 - data4 0xafdada75,0xafdada75, 0x42212163,0x42212163 - data4 0x20101030,0x20101030, 0xe5ffff1a,0xe5ffff1a - data4 0xfdf3f30e,0xfdf3f30e, 0xbfd2d26d,0xbfd2d26d - data4 0x81cdcd4c,0x81cdcd4c, 0x180c0c14,0x180c0c14 - data4 0x26131335,0x26131335, 0xc3ecec2f,0xc3ecec2f - data4 0xbe5f5fe1,0xbe5f5fe1, 0x359797a2,0x359797a2 - data4 0x884444cc,0x884444cc, 0x2e171739,0x2e171739 - data4 0x93c4c457,0x93c4c457, 0x55a7a7f2,0x55a7a7f2 - data4 0xfc7e7e82,0xfc7e7e82, 0x7a3d3d47,0x7a3d3d47 - data4 0xc86464ac,0xc86464ac, 0xba5d5de7,0xba5d5de7 - data4 0x3219192b,0x3219192b, 0xe6737395,0xe6737395 - data4 0xc06060a0,0xc06060a0, 0x19818198,0x19818198 - data4 0x9e4f4fd1,0x9e4f4fd1, 0xa3dcdc7f,0xa3dcdc7f - data4 0x44222266,0x44222266, 0x542a2a7e,0x542a2a7e - data4 0x3b9090ab,0x3b9090ab, 0x0b888883,0x0b888883 - data4 0x8c4646ca,0x8c4646ca, 0xc7eeee29,0xc7eeee29 - data4 0x6bb8b8d3,0x6bb8b8d3, 0x2814143c,0x2814143c - data4 0xa7dede79,0xa7dede79, 0xbc5e5ee2,0xbc5e5ee2 - data4 0x160b0b1d,0x160b0b1d, 0xaddbdb76,0xaddbdb76 - data4 0xdbe0e03b,0xdbe0e03b, 0x64323256,0x64323256 - data4 0x743a3a4e,0x743a3a4e, 0x140a0a1e,0x140a0a1e - data4 0x924949db,0x924949db, 0x0c06060a,0x0c06060a - data4 0x4824246c,0x4824246c, 0xb85c5ce4,0xb85c5ce4 - data4 0x9fc2c25d,0x9fc2c25d, 0xbdd3d36e,0xbdd3d36e - data4 0x43acacef,0x43acacef, 0xc46262a6,0xc46262a6 - data4 0x399191a8,0x399191a8, 0x319595a4,0x319595a4 - data4 0xd3e4e437,0xd3e4e437, 0xf279798b,0xf279798b - data4 0xd5e7e732,0xd5e7e732, 0x8bc8c843,0x8bc8c843 - data4 0x6e373759,0x6e373759, 0xda6d6db7,0xda6d6db7 - data4 0x018d8d8c,0x018d8d8c, 0xb1d5d564,0xb1d5d564 - data4 0x9c4e4ed2,0x9c4e4ed2, 0x49a9a9e0,0x49a9a9e0 - data4 0xd86c6cb4,0xd86c6cb4, 0xac5656fa,0xac5656fa - data4 0xf3f4f407,0xf3f4f407, 0xcfeaea25,0xcfeaea25 - data4 0xca6565af,0xca6565af, 0xf47a7a8e,0xf47a7a8e - data4 0x47aeaee9,0x47aeaee9, 0x10080818,0x10080818 - data4 0x6fbabad5,0x6fbabad5, 0xf0787888,0xf0787888 - data4 0x4a25256f,0x4a25256f, 0x5c2e2e72,0x5c2e2e72 - data4 0x381c1c24,0x381c1c24, 0x57a6a6f1,0x57a6a6f1 - data4 0x73b4b4c7,0x73b4b4c7, 0x97c6c651,0x97c6c651 - data4 0xcbe8e823,0xcbe8e823, 0xa1dddd7c,0xa1dddd7c - data4 0xe874749c,0xe874749c, 0x3e1f1f21,0x3e1f1f21 - data4 0x964b4bdd,0x964b4bdd, 0x61bdbddc,0x61bdbddc - data4 0x0d8b8b86,0x0d8b8b86, 0x0f8a8a85,0x0f8a8a85 - data4 0xe0707090,0xe0707090, 0x7c3e3e42,0x7c3e3e42 - data4 0x71b5b5c4,0x71b5b5c4, 0xcc6666aa,0xcc6666aa - data4 0x904848d8,0x904848d8, 0x06030305,0x06030305 - data4 0xf7f6f601,0xf7f6f601, 0x1c0e0e12,0x1c0e0e12 - data4 0xc26161a3,0xc26161a3, 0x6a35355f,0x6a35355f - data4 0xae5757f9,0xae5757f9, 0x69b9b9d0,0x69b9b9d0 - data4 0x17868691,0x17868691, 0x99c1c158,0x99c1c158 - data4 0x3a1d1d27,0x3a1d1d27, 0x279e9eb9,0x279e9eb9 - data4 0xd9e1e138,0xd9e1e138, 0xebf8f813,0xebf8f813 - data4 0x2b9898b3,0x2b9898b3, 0x22111133,0x22111133 - data4 0xd26969bb,0xd26969bb, 0xa9d9d970,0xa9d9d970 - data4 0x078e8e89,0x078e8e89, 0x339494a7,0x339494a7 - data4 0x2d9b9bb6,0x2d9b9bb6, 0x3c1e1e22,0x3c1e1e22 - data4 0x15878792,0x15878792, 0xc9e9e920,0xc9e9e920 - data4 0x87cece49,0x87cece49, 0xaa5555ff,0xaa5555ff - data4 0x50282878,0x50282878, 0xa5dfdf7a,0xa5dfdf7a - data4 0x038c8c8f,0x038c8c8f, 0x59a1a1f8,0x59a1a1f8 - data4 0x09898980,0x09898980, 0x1a0d0d17,0x1a0d0d17 - data4 0x65bfbfda,0x65bfbfda, 0xd7e6e631,0xd7e6e631 - data4 0x844242c6,0x844242c6, 0xd06868b8,0xd06868b8 - data4 0x824141c3,0x824141c3, 0x299999b0,0x299999b0 - data4 0x5a2d2d77,0x5a2d2d77, 0x1e0f0f11,0x1e0f0f11 - data4 0x7bb0b0cb,0x7bb0b0cb, 0xa85454fc,0xa85454fc - data4 0x6dbbbbd6,0x6dbbbbd6, 0x2c16163a,0x2c16163a -// Te4: - data1 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 - data1 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 - data1 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 - data1 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 - data1 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc - data1 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 - data1 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a - data1 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 - data1 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 - data1 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 - data1 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b - data1 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf - data1 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 - data1 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 - data1 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 - data1 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 - data1 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 - data1 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 - data1 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 - data1 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb - data1 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c - data1 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 - data1 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 - data1 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 - data1 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 - data1 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a - data1 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e - data1 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e - data1 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 - data1 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf - data1 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 - data1 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -.size AES_Te#,2048+256 // HP-UX assembler fails to ".-AES_Te#" - -.align 64 -.global AES_Td# -.type AES_Td#,@object -AES_Td: data4 0x51f4a750,0x51f4a750, 0x7e416553,0x7e416553 - data4 0x1a17a4c3,0x1a17a4c3, 0x3a275e96,0x3a275e96 - data4 0x3bab6bcb,0x3bab6bcb, 0x1f9d45f1,0x1f9d45f1 - data4 0xacfa58ab,0xacfa58ab, 0x4be30393,0x4be30393 - data4 0x2030fa55,0x2030fa55, 0xad766df6,0xad766df6 - data4 0x88cc7691,0x88cc7691, 0xf5024c25,0xf5024c25 - data4 0x4fe5d7fc,0x4fe5d7fc, 0xc52acbd7,0xc52acbd7 - data4 0x26354480,0x26354480, 0xb562a38f,0xb562a38f - data4 0xdeb15a49,0xdeb15a49, 0x25ba1b67,0x25ba1b67 - data4 0x45ea0e98,0x45ea0e98, 0x5dfec0e1,0x5dfec0e1 - data4 0xc32f7502,0xc32f7502, 0x814cf012,0x814cf012 - data4 0x8d4697a3,0x8d4697a3, 0x6bd3f9c6,0x6bd3f9c6 - data4 0x038f5fe7,0x038f5fe7, 0x15929c95,0x15929c95 - data4 0xbf6d7aeb,0xbf6d7aeb, 0x955259da,0x955259da - data4 0xd4be832d,0xd4be832d, 0x587421d3,0x587421d3 - data4 0x49e06929,0x49e06929, 0x8ec9c844,0x8ec9c844 - data4 0x75c2896a,0x75c2896a, 0xf48e7978,0xf48e7978 - data4 0x99583e6b,0x99583e6b, 0x27b971dd,0x27b971dd - data4 0xbee14fb6,0xbee14fb6, 0xf088ad17,0xf088ad17 - data4 0xc920ac66,0xc920ac66, 0x7dce3ab4,0x7dce3ab4 - data4 0x63df4a18,0x63df4a18, 0xe51a3182,0xe51a3182 - data4 0x97513360,0x97513360, 0x62537f45,0x62537f45 - data4 0xb16477e0,0xb16477e0, 0xbb6bae84,0xbb6bae84 - data4 0xfe81a01c,0xfe81a01c, 0xf9082b94,0xf9082b94 - data4 0x70486858,0x70486858, 0x8f45fd19,0x8f45fd19 - data4 0x94de6c87,0x94de6c87, 0x527bf8b7,0x527bf8b7 - data4 0xab73d323,0xab73d323, 0x724b02e2,0x724b02e2 - data4 0xe31f8f57,0xe31f8f57, 0x6655ab2a,0x6655ab2a - data4 0xb2eb2807,0xb2eb2807, 0x2fb5c203,0x2fb5c203 - data4 0x86c57b9a,0x86c57b9a, 0xd33708a5,0xd33708a5 - data4 0x302887f2,0x302887f2, 0x23bfa5b2,0x23bfa5b2 - data4 0x02036aba,0x02036aba, 0xed16825c,0xed16825c - data4 0x8acf1c2b,0x8acf1c2b, 0xa779b492,0xa779b492 - data4 0xf307f2f0,0xf307f2f0, 0x4e69e2a1,0x4e69e2a1 - data4 0x65daf4cd,0x65daf4cd, 0x0605bed5,0x0605bed5 - data4 0xd134621f,0xd134621f, 0xc4a6fe8a,0xc4a6fe8a - data4 0x342e539d,0x342e539d, 0xa2f355a0,0xa2f355a0 - data4 0x058ae132,0x058ae132, 0xa4f6eb75,0xa4f6eb75 - data4 0x0b83ec39,0x0b83ec39, 0x4060efaa,0x4060efaa - data4 0x5e719f06,0x5e719f06, 0xbd6e1051,0xbd6e1051 - data4 0x3e218af9,0x3e218af9, 0x96dd063d,0x96dd063d - data4 0xdd3e05ae,0xdd3e05ae, 0x4de6bd46,0x4de6bd46 - data4 0x91548db5,0x91548db5, 0x71c45d05,0x71c45d05 - data4 0x0406d46f,0x0406d46f, 0x605015ff,0x605015ff - data4 0x1998fb24,0x1998fb24, 0xd6bde997,0xd6bde997 - data4 0x894043cc,0x894043cc, 0x67d99e77,0x67d99e77 - data4 0xb0e842bd,0xb0e842bd, 0x07898b88,0x07898b88 - data4 0xe7195b38,0xe7195b38, 0x79c8eedb,0x79c8eedb - data4 0xa17c0a47,0xa17c0a47, 0x7c420fe9,0x7c420fe9 - data4 0xf8841ec9,0xf8841ec9, 0x00000000,0x00000000 - data4 0x09808683,0x09808683, 0x322bed48,0x322bed48 - data4 0x1e1170ac,0x1e1170ac, 0x6c5a724e,0x6c5a724e - data4 0xfd0efffb,0xfd0efffb, 0x0f853856,0x0f853856 - data4 0x3daed51e,0x3daed51e, 0x362d3927,0x362d3927 - data4 0x0a0fd964,0x0a0fd964, 0x685ca621,0x685ca621 - data4 0x9b5b54d1,0x9b5b54d1, 0x24362e3a,0x24362e3a - data4 0x0c0a67b1,0x0c0a67b1, 0x9357e70f,0x9357e70f - data4 0xb4ee96d2,0xb4ee96d2, 0x1b9b919e,0x1b9b919e - data4 0x80c0c54f,0x80c0c54f, 0x61dc20a2,0x61dc20a2 - data4 0x5a774b69,0x5a774b69, 0x1c121a16,0x1c121a16 - data4 0xe293ba0a,0xe293ba0a, 0xc0a02ae5,0xc0a02ae5 - data4 0x3c22e043,0x3c22e043, 0x121b171d,0x121b171d - data4 0x0e090d0b,0x0e090d0b, 0xf28bc7ad,0xf28bc7ad - data4 0x2db6a8b9,0x2db6a8b9, 0x141ea9c8,0x141ea9c8 - data4 0x57f11985,0x57f11985, 0xaf75074c,0xaf75074c - data4 0xee99ddbb,0xee99ddbb, 0xa37f60fd,0xa37f60fd - data4 0xf701269f,0xf701269f, 0x5c72f5bc,0x5c72f5bc - data4 0x44663bc5,0x44663bc5, 0x5bfb7e34,0x5bfb7e34 - data4 0x8b432976,0x8b432976, 0xcb23c6dc,0xcb23c6dc - data4 0xb6edfc68,0xb6edfc68, 0xb8e4f163,0xb8e4f163 - data4 0xd731dcca,0xd731dcca, 0x42638510,0x42638510 - data4 0x13972240,0x13972240, 0x84c61120,0x84c61120 - data4 0x854a247d,0x854a247d, 0xd2bb3df8,0xd2bb3df8 - data4 0xaef93211,0xaef93211, 0xc729a16d,0xc729a16d - data4 0x1d9e2f4b,0x1d9e2f4b, 0xdcb230f3,0xdcb230f3 - data4 0x0d8652ec,0x0d8652ec, 0x77c1e3d0,0x77c1e3d0 - data4 0x2bb3166c,0x2bb3166c, 0xa970b999,0xa970b999 - data4 0x119448fa,0x119448fa, 0x47e96422,0x47e96422 - data4 0xa8fc8cc4,0xa8fc8cc4, 0xa0f03f1a,0xa0f03f1a - data4 0x567d2cd8,0x567d2cd8, 0x223390ef,0x223390ef - data4 0x87494ec7,0x87494ec7, 0xd938d1c1,0xd938d1c1 - data4 0x8ccaa2fe,0x8ccaa2fe, 0x98d40b36,0x98d40b36 - data4 0xa6f581cf,0xa6f581cf, 0xa57ade28,0xa57ade28 - data4 0xdab78e26,0xdab78e26, 0x3fadbfa4,0x3fadbfa4 - data4 0x2c3a9de4,0x2c3a9de4, 0x5078920d,0x5078920d - data4 0x6a5fcc9b,0x6a5fcc9b, 0x547e4662,0x547e4662 - data4 0xf68d13c2,0xf68d13c2, 0x90d8b8e8,0x90d8b8e8 - data4 0x2e39f75e,0x2e39f75e, 0x82c3aff5,0x82c3aff5 - data4 0x9f5d80be,0x9f5d80be, 0x69d0937c,0x69d0937c - data4 0x6fd52da9,0x6fd52da9, 0xcf2512b3,0xcf2512b3 - data4 0xc8ac993b,0xc8ac993b, 0x10187da7,0x10187da7 - data4 0xe89c636e,0xe89c636e, 0xdb3bbb7b,0xdb3bbb7b - data4 0xcd267809,0xcd267809, 0x6e5918f4,0x6e5918f4 - data4 0xec9ab701,0xec9ab701, 0x834f9aa8,0x834f9aa8 - data4 0xe6956e65,0xe6956e65, 0xaaffe67e,0xaaffe67e - data4 0x21bccf08,0x21bccf08, 0xef15e8e6,0xef15e8e6 - data4 0xbae79bd9,0xbae79bd9, 0x4a6f36ce,0x4a6f36ce - data4 0xea9f09d4,0xea9f09d4, 0x29b07cd6,0x29b07cd6 - data4 0x31a4b2af,0x31a4b2af, 0x2a3f2331,0x2a3f2331 - data4 0xc6a59430,0xc6a59430, 0x35a266c0,0x35a266c0 - data4 0x744ebc37,0x744ebc37, 0xfc82caa6,0xfc82caa6 - data4 0xe090d0b0,0xe090d0b0, 0x33a7d815,0x33a7d815 - data4 0xf104984a,0xf104984a, 0x41ecdaf7,0x41ecdaf7 - data4 0x7fcd500e,0x7fcd500e, 0x1791f62f,0x1791f62f - data4 0x764dd68d,0x764dd68d, 0x43efb04d,0x43efb04d - data4 0xccaa4d54,0xccaa4d54, 0xe49604df,0xe49604df - data4 0x9ed1b5e3,0x9ed1b5e3, 0x4c6a881b,0x4c6a881b - data4 0xc12c1fb8,0xc12c1fb8, 0x4665517f,0x4665517f - data4 0x9d5eea04,0x9d5eea04, 0x018c355d,0x018c355d - data4 0xfa877473,0xfa877473, 0xfb0b412e,0xfb0b412e - data4 0xb3671d5a,0xb3671d5a, 0x92dbd252,0x92dbd252 - data4 0xe9105633,0xe9105633, 0x6dd64713,0x6dd64713 - data4 0x9ad7618c,0x9ad7618c, 0x37a10c7a,0x37a10c7a - data4 0x59f8148e,0x59f8148e, 0xeb133c89,0xeb133c89 - data4 0xcea927ee,0xcea927ee, 0xb761c935,0xb761c935 - data4 0xe11ce5ed,0xe11ce5ed, 0x7a47b13c,0x7a47b13c - data4 0x9cd2df59,0x9cd2df59, 0x55f2733f,0x55f2733f - data4 0x1814ce79,0x1814ce79, 0x73c737bf,0x73c737bf - data4 0x53f7cdea,0x53f7cdea, 0x5ffdaa5b,0x5ffdaa5b - data4 0xdf3d6f14,0xdf3d6f14, 0x7844db86,0x7844db86 - data4 0xcaaff381,0xcaaff381, 0xb968c43e,0xb968c43e - data4 0x3824342c,0x3824342c, 0xc2a3405f,0xc2a3405f - data4 0x161dc372,0x161dc372, 0xbce2250c,0xbce2250c - data4 0x283c498b,0x283c498b, 0xff0d9541,0xff0d9541 - data4 0x39a80171,0x39a80171, 0x080cb3de,0x080cb3de - data4 0xd8b4e49c,0xd8b4e49c, 0x6456c190,0x6456c190 - data4 0x7bcb8461,0x7bcb8461, 0xd532b670,0xd532b670 - data4 0x486c5c74,0x486c5c74, 0xd0b85742,0xd0b85742 -// Td4: - data1 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 - data1 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb - data1 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 - data1 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb - data1 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d - data1 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e - data1 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 - data1 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 - data1 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 - data1 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 - data1 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda - data1 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 - data1 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a - data1 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 - data1 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 - data1 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b - data1 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea - data1 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 - data1 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 - data1 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e - data1 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 - data1 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b - data1 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 - data1 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 - data1 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 - data1 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f - data1 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d - data1 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef - data1 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 - data1 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 - data1 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 - data1 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.size AES_Td#,2048+256 // HP-UX assembler fails to ".-AES_Td#" diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aes-mips.pl b/drivers/builtin_openssl2/crypto/aes/asm/aes-mips.pl deleted file mode 100644 index 537c8d3172..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/aes-mips.pl +++ /dev/null @@ -1,1611 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# AES for MIPS - -# October 2010 -# -# Code uses 1K[+256B] S-box and on single-issue core [such as R5000] -# spends ~68 cycles per byte processed with 128-bit key. This is ~16% -# faster than gcc-generated code, which is not very impressive. But -# recall that compressed S-box requires extra processing, namely -# additional rotations. Rotations are implemented with lwl/lwr pairs, -# which is normally used for loading unaligned data. Another cool -# thing about this module is its endian neutrality, which means that -# it processes data without ever changing byte order... - -###################################################################### -# There is a number of MIPS ABI in use, O32 and N32/64 are most -# widely used. Then there is a new contender: NUBI. It appears that if -# one picks the latter, it's possible to arrange code in ABI neutral -# manner. Therefore let's stick to NUBI register layout: -# -($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); -($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); -($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); -# -# The return value is placed in $a0. Following coding rules facilitate -# interoperability: -# -# - never ever touch $tp, "thread pointer", former $gp; -# - copy return value to $t0, former $v0 [or to $a0 if you're adapting -# old code]; -# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; -# -# For reference here is register layout for N32/64 MIPS ABIs: -# -# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); -# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); -# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); -# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); -# -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 - -if ($flavour =~ /64|n32/i) { - $PTR_ADD="dadd"; # incidentally works even on n32 - $PTR_SUB="dsub"; # incidentally works even on n32 - $REG_S="sd"; - $REG_L="ld"; - $PTR_SLL="dsll"; # incidentally works even on n32 - $SZREG=8; -} else { - $PTR_ADD="add"; - $PTR_SUB="sub"; - $REG_S="sw"; - $REG_L="lw"; - $PTR_SLL="sll"; - $SZREG=4; -} -$pf = ($flavour =~ /nubi/i) ? $t0 : $t2; -# -# -# -###################################################################### - -$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC}); - -for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } -open STDOUT,">$output"; - -if (!defined($big_endian)) -{ $big_endian=(unpack('L',pack('N',1))==1); } - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -my ($MSB,$LSB)=(0,3); # automatically converted to little-endian - -$code.=<<___; -.text -#ifdef OPENSSL_FIPSCANISTER -# include -#endif - -#if !defined(__vxworks) || defined(__pic__) -.option pic2 -#endif -.set noat -___ - -{{{ -my $FRAMESIZE=16*$SZREG; -my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; - -my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7); -my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2); -my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23)); -my ($key0,$cnt)=($gp,$fp); - -# instuction ordering is "stolen" from output from MIPSpro assembler -# invoked with -mips3 -O3 arguments... -$code.=<<___; -.align 5 -.ent _mips_AES_encrypt -_mips_AES_encrypt: - .frame $sp,0,$ra - .set reorder - lw $t0,0($key) - lw $t1,4($key) - lw $t2,8($key) - lw $t3,12($key) - lw $cnt,240($key) - $PTR_ADD $key0,$key,16 - - xor $s0,$t0 - xor $s1,$t1 - xor $s2,$t2 - xor $s3,$t3 - - sub $cnt,1 - _xtr $i0,$s1,16-2 -.Loop_enc: - _xtr $i1,$s2,16-2 - _xtr $i2,$s3,16-2 - _xtr $i3,$s0,16-2 - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lwl $t0,3($i0) # Te1[s1>>16] - lwl $t1,3($i1) # Te1[s2>>16] - lwl $t2,3($i2) # Te1[s3>>16] - lwl $t3,3($i3) # Te1[s0>>16] - lwr $t0,2($i0) # Te1[s1>>16] - lwr $t1,2($i1) # Te1[s2>>16] - lwr $t2,2($i2) # Te1[s3>>16] - lwr $t3,2($i3) # Te1[s0>>16] - - _xtr $i0,$s2,8-2 - _xtr $i1,$s3,8-2 - _xtr $i2,$s0,8-2 - _xtr $i3,$s1,8-2 - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lwl $t4,2($i0) # Te2[s2>>8] - lwl $t5,2($i1) # Te2[s3>>8] - lwl $t6,2($i2) # Te2[s0>>8] - lwl $t7,2($i3) # Te2[s1>>8] - lwr $t4,1($i0) # Te2[s2>>8] - lwr $t5,1($i1) # Te2[s3>>8] - lwr $t6,1($i2) # Te2[s0>>8] - lwr $t7,1($i3) # Te2[s1>>8] - - _xtr $i0,$s3,0-2 - _xtr $i1,$s0,0-2 - _xtr $i2,$s1,0-2 - _xtr $i3,$s2,0-2 - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lwl $t8,1($i0) # Te3[s3] - lwl $t9,1($i1) # Te3[s0] - lwl $t10,1($i2) # Te3[s1] - lwl $t11,1($i3) # Te3[s2] - lwr $t8,0($i0) # Te3[s3] - lwr $t9,0($i1) # Te3[s0] - lwr $t10,0($i2) # Te3[s1] - lwr $t11,0($i3) # Te3[s2] - - _xtr $i0,$s0,24-2 - _xtr $i1,$s1,24-2 - _xtr $i2,$s2,24-2 - _xtr $i3,$s3,24-2 - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - xor $t0,$t4 - xor $t1,$t5 - xor $t2,$t6 - xor $t3,$t7 - lw $t4,0($i0) # Te0[s0>>24] - lw $t5,0($i1) # Te0[s1>>24] - lw $t6,0($i2) # Te0[s2>>24] - lw $t7,0($i3) # Te0[s3>>24] - - lw $s0,0($key0) - lw $s1,4($key0) - lw $s2,8($key0) - lw $s3,12($key0) - - xor $t0,$t8 - xor $t1,$t9 - xor $t2,$t10 - xor $t3,$t11 - - xor $t0,$t4 - xor $t1,$t5 - xor $t2,$t6 - xor $t3,$t7 - - sub $cnt,1 - $PTR_ADD $key0,16 - xor $s0,$t0 - xor $s1,$t1 - xor $s2,$t2 - xor $s3,$t3 - .set noreorder - bnez $cnt,.Loop_enc - _xtr $i0,$s1,16-2 - - .set reorder - _xtr $i1,$s2,16-2 - _xtr $i2,$s3,16-2 - _xtr $i3,$s0,16-2 - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lbu $t0,2($i0) # Te4[s1>>16] - lbu $t1,2($i1) # Te4[s2>>16] - lbu $t2,2($i2) # Te4[s3>>16] - lbu $t3,2($i3) # Te4[s0>>16] - - _xtr $i0,$s2,8-2 - _xtr $i1,$s3,8-2 - _xtr $i2,$s0,8-2 - _xtr $i3,$s1,8-2 - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lbu $t4,2($i0) # Te4[s2>>8] - lbu $t5,2($i1) # Te4[s3>>8] - lbu $t6,2($i2) # Te4[s0>>8] - lbu $t7,2($i3) # Te4[s1>>8] - - _xtr $i0,$s0,24-2 - _xtr $i1,$s1,24-2 - _xtr $i2,$s2,24-2 - _xtr $i3,$s3,24-2 - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lbu $t8,2($i0) # Te4[s0>>24] - lbu $t9,2($i1) # Te4[s1>>24] - lbu $t10,2($i2) # Te4[s2>>24] - lbu $t11,2($i3) # Te4[s3>>24] - - _xtr $i0,$s3,0-2 - _xtr $i1,$s0,0-2 - _xtr $i2,$s1,0-2 - _xtr $i3,$s2,0-2 - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - - _ins $t0,16 - _ins $t1,16 - _ins $t2,16 - _ins $t3,16 - - _ins $t4,8 - _ins $t5,8 - _ins $t6,8 - _ins $t7,8 - - xor $t0,$t4 - xor $t1,$t5 - xor $t2,$t6 - xor $t3,$t7 - - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lbu $t4,2($i0) # Te4[s3] - lbu $t5,2($i1) # Te4[s0] - lbu $t6,2($i2) # Te4[s1] - lbu $t7,2($i3) # Te4[s2] - - _ins $t8,24 - _ins $t9,24 - _ins $t10,24 - _ins $t11,24 - - lw $s0,0($key0) - lw $s1,4($key0) - lw $s2,8($key0) - lw $s3,12($key0) - - xor $t0,$t8 - xor $t1,$t9 - xor $t2,$t10 - xor $t3,$t11 - - _ins $t4,0 - _ins $t5,0 - _ins $t6,0 - _ins $t7,0 - - xor $t0,$t4 - xor $t1,$t5 - xor $t2,$t6 - xor $t3,$t7 - - xor $s0,$t0 - xor $s1,$t1 - xor $s2,$t2 - xor $s3,$t3 - - jr $ra -.end _mips_AES_encrypt - -.align 5 -.globl AES_encrypt -.ent AES_encrypt -AES_encrypt: - .frame $sp,$FRAMESIZE,$ra - .mask $SAVED_REGS_MASK,-$SZREG - .set noreorder -___ -$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification - .cpload $pf -___ -$code.=<<___; - $PTR_SUB $sp,$FRAMESIZE - $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) - $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) - $REG_S $s11,$FRAMESIZE-3*$SZREG($sp) - $REG_S $s10,$FRAMESIZE-4*$SZREG($sp) - $REG_S $s9,$FRAMESIZE-5*$SZREG($sp) - $REG_S $s8,$FRAMESIZE-6*$SZREG($sp) - $REG_S $s7,$FRAMESIZE-7*$SZREG($sp) - $REG_S $s6,$FRAMESIZE-8*$SZREG($sp) - $REG_S $s5,$FRAMESIZE-9*$SZREG($sp) - $REG_S $s4,$FRAMESIZE-10*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue - $REG_S \$15,$FRAMESIZE-11*$SZREG($sp) - $REG_S \$14,$FRAMESIZE-12*$SZREG($sp) - $REG_S \$13,$FRAMESIZE-13*$SZREG($sp) - $REG_S \$12,$FRAMESIZE-14*$SZREG($sp) - $REG_S $gp,$FRAMESIZE-15*$SZREG($sp) -___ -$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification - .cplocal $Tbl - .cpsetup $pf,$zero,AES_encrypt -___ -$code.=<<___; - .set reorder - la $Tbl,AES_Te # PIC-ified 'load address' - - lwl $s0,0+$MSB($inp) - lwl $s1,4+$MSB($inp) - lwl $s2,8+$MSB($inp) - lwl $s3,12+$MSB($inp) - lwr $s0,0+$LSB($inp) - lwr $s1,4+$LSB($inp) - lwr $s2,8+$LSB($inp) - lwr $s3,12+$LSB($inp) - - bal _mips_AES_encrypt - - swr $s0,0+$LSB($out) - swr $s1,4+$LSB($out) - swr $s2,8+$LSB($out) - swr $s3,12+$LSB($out) - swl $s0,0+$MSB($out) - swl $s1,4+$MSB($out) - swl $s2,8+$MSB($out) - swl $s3,12+$MSB($out) - - .set noreorder - $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) - $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) - $REG_L $s11,$FRAMESIZE-3*$SZREG($sp) - $REG_L $s10,$FRAMESIZE-4*$SZREG($sp) - $REG_L $s9,$FRAMESIZE-5*$SZREG($sp) - $REG_L $s8,$FRAMESIZE-6*$SZREG($sp) - $REG_L $s7,$FRAMESIZE-7*$SZREG($sp) - $REG_L $s6,$FRAMESIZE-8*$SZREG($sp) - $REG_L $s5,$FRAMESIZE-9*$SZREG($sp) - $REG_L $s4,$FRAMESIZE-10*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L \$15,$FRAMESIZE-11*$SZREG($sp) - $REG_L \$14,$FRAMESIZE-12*$SZREG($sp) - $REG_L \$13,$FRAMESIZE-13*$SZREG($sp) - $REG_L \$12,$FRAMESIZE-14*$SZREG($sp) - $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) -___ -$code.=<<___; - jr $ra - $PTR_ADD $sp,$FRAMESIZE -.end AES_encrypt -___ - -$code.=<<___; -.align 5 -.ent _mips_AES_decrypt -_mips_AES_decrypt: - .frame $sp,0,$ra - .set reorder - lw $t0,0($key) - lw $t1,4($key) - lw $t2,8($key) - lw $t3,12($key) - lw $cnt,240($key) - $PTR_ADD $key0,$key,16 - - xor $s0,$t0 - xor $s1,$t1 - xor $s2,$t2 - xor $s3,$t3 - - sub $cnt,1 - _xtr $i0,$s3,16-2 -.Loop_dec: - _xtr $i1,$s0,16-2 - _xtr $i2,$s1,16-2 - _xtr $i3,$s2,16-2 - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lwl $t0,3($i0) # Td1[s3>>16] - lwl $t1,3($i1) # Td1[s0>>16] - lwl $t2,3($i2) # Td1[s1>>16] - lwl $t3,3($i3) # Td1[s2>>16] - lwr $t0,2($i0) # Td1[s3>>16] - lwr $t1,2($i1) # Td1[s0>>16] - lwr $t2,2($i2) # Td1[s1>>16] - lwr $t3,2($i3) # Td1[s2>>16] - - _xtr $i0,$s2,8-2 - _xtr $i1,$s3,8-2 - _xtr $i2,$s0,8-2 - _xtr $i3,$s1,8-2 - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lwl $t4,2($i0) # Td2[s2>>8] - lwl $t5,2($i1) # Td2[s3>>8] - lwl $t6,2($i2) # Td2[s0>>8] - lwl $t7,2($i3) # Td2[s1>>8] - lwr $t4,1($i0) # Td2[s2>>8] - lwr $t5,1($i1) # Td2[s3>>8] - lwr $t6,1($i2) # Td2[s0>>8] - lwr $t7,1($i3) # Td2[s1>>8] - - _xtr $i0,$s1,0-2 - _xtr $i1,$s2,0-2 - _xtr $i2,$s3,0-2 - _xtr $i3,$s0,0-2 - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lwl $t8,1($i0) # Td3[s1] - lwl $t9,1($i1) # Td3[s2] - lwl $t10,1($i2) # Td3[s3] - lwl $t11,1($i3) # Td3[s0] - lwr $t8,0($i0) # Td3[s1] - lwr $t9,0($i1) # Td3[s2] - lwr $t10,0($i2) # Td3[s3] - lwr $t11,0($i3) # Td3[s0] - - _xtr $i0,$s0,24-2 - _xtr $i1,$s1,24-2 - _xtr $i2,$s2,24-2 - _xtr $i3,$s3,24-2 - and $i0,0x3fc - and $i1,0x3fc - and $i2,0x3fc - and $i3,0x3fc - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - - xor $t0,$t4 - xor $t1,$t5 - xor $t2,$t6 - xor $t3,$t7 - - - lw $t4,0($i0) # Td0[s0>>24] - lw $t5,0($i1) # Td0[s1>>24] - lw $t6,0($i2) # Td0[s2>>24] - lw $t7,0($i3) # Td0[s3>>24] - - lw $s0,0($key0) - lw $s1,4($key0) - lw $s2,8($key0) - lw $s3,12($key0) - - xor $t0,$t8 - xor $t1,$t9 - xor $t2,$t10 - xor $t3,$t11 - - xor $t0,$t4 - xor $t1,$t5 - xor $t2,$t6 - xor $t3,$t7 - - sub $cnt,1 - $PTR_ADD $key0,16 - xor $s0,$t0 - xor $s1,$t1 - xor $s2,$t2 - xor $s3,$t3 - .set noreorder - bnez $cnt,.Loop_dec - _xtr $i0,$s3,16-2 - - .set reorder - lw $t4,1024($Tbl) # prefetch Td4 - lw $t5,1024+32($Tbl) - lw $t6,1024+64($Tbl) - lw $t7,1024+96($Tbl) - lw $t8,1024+128($Tbl) - lw $t9,1024+160($Tbl) - lw $t10,1024+192($Tbl) - lw $t11,1024+224($Tbl) - - _xtr $i0,$s3,16 - _xtr $i1,$s0,16 - _xtr $i2,$s1,16 - _xtr $i3,$s2,16 - and $i0,0xff - and $i1,0xff - and $i2,0xff - and $i3,0xff - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lbu $t0,1024($i0) # Td4[s3>>16] - lbu $t1,1024($i1) # Td4[s0>>16] - lbu $t2,1024($i2) # Td4[s1>>16] - lbu $t3,1024($i3) # Td4[s2>>16] - - _xtr $i0,$s2,8 - _xtr $i1,$s3,8 - _xtr $i2,$s0,8 - _xtr $i3,$s1,8 - and $i0,0xff - and $i1,0xff - and $i2,0xff - and $i3,0xff - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lbu $t4,1024($i0) # Td4[s2>>8] - lbu $t5,1024($i1) # Td4[s3>>8] - lbu $t6,1024($i2) # Td4[s0>>8] - lbu $t7,1024($i3) # Td4[s1>>8] - - _xtr $i0,$s0,24 - _xtr $i1,$s1,24 - _xtr $i2,$s2,24 - _xtr $i3,$s3,24 - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lbu $t8,1024($i0) # Td4[s0>>24] - lbu $t9,1024($i1) # Td4[s1>>24] - lbu $t10,1024($i2) # Td4[s2>>24] - lbu $t11,1024($i3) # Td4[s3>>24] - - _xtr $i0,$s1,0 - _xtr $i1,$s2,0 - _xtr $i2,$s3,0 - _xtr $i3,$s0,0 - - _ins $t0,16 - _ins $t1,16 - _ins $t2,16 - _ins $t3,16 - - _ins $t4,8 - _ins $t5,8 - _ins $t6,8 - _ins $t7,8 - - xor $t0,$t4 - xor $t1,$t5 - xor $t2,$t6 - xor $t3,$t7 - - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lbu $t4,1024($i0) # Td4[s1] - lbu $t5,1024($i1) # Td4[s2] - lbu $t6,1024($i2) # Td4[s3] - lbu $t7,1024($i3) # Td4[s0] - - _ins $t8,24 - _ins $t9,24 - _ins $t10,24 - _ins $t11,24 - - lw $s0,0($key0) - lw $s1,4($key0) - lw $s2,8($key0) - lw $s3,12($key0) - - _ins $t4,0 - _ins $t5,0 - _ins $t6,0 - _ins $t7,0 - - - xor $t0,$t8 - xor $t1,$t9 - xor $t2,$t10 - xor $t3,$t11 - - xor $t0,$t4 - xor $t1,$t5 - xor $t2,$t6 - xor $t3,$t7 - - xor $s0,$t0 - xor $s1,$t1 - xor $s2,$t2 - xor $s3,$t3 - - jr $ra -.end _mips_AES_decrypt - -.align 5 -.globl AES_decrypt -.ent AES_decrypt -AES_decrypt: - .frame $sp,$FRAMESIZE,$ra - .mask $SAVED_REGS_MASK,-$SZREG - .set noreorder -___ -$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification - .cpload $pf -___ -$code.=<<___; - $PTR_SUB $sp,$FRAMESIZE - $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) - $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) - $REG_S $s11,$FRAMESIZE-3*$SZREG($sp) - $REG_S $s10,$FRAMESIZE-4*$SZREG($sp) - $REG_S $s9,$FRAMESIZE-5*$SZREG($sp) - $REG_S $s8,$FRAMESIZE-6*$SZREG($sp) - $REG_S $s7,$FRAMESIZE-7*$SZREG($sp) - $REG_S $s6,$FRAMESIZE-8*$SZREG($sp) - $REG_S $s5,$FRAMESIZE-9*$SZREG($sp) - $REG_S $s4,$FRAMESIZE-10*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue - $REG_S \$15,$FRAMESIZE-11*$SZREG($sp) - $REG_S \$14,$FRAMESIZE-12*$SZREG($sp) - $REG_S \$13,$FRAMESIZE-13*$SZREG($sp) - $REG_S \$12,$FRAMESIZE-14*$SZREG($sp) - $REG_S $gp,$FRAMESIZE-15*$SZREG($sp) -___ -$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification - .cplocal $Tbl - .cpsetup $pf,$zero,AES_decrypt -___ -$code.=<<___; - .set reorder - la $Tbl,AES_Td # PIC-ified 'load address' - - lwl $s0,0+$MSB($inp) - lwl $s1,4+$MSB($inp) - lwl $s2,8+$MSB($inp) - lwl $s3,12+$MSB($inp) - lwr $s0,0+$LSB($inp) - lwr $s1,4+$LSB($inp) - lwr $s2,8+$LSB($inp) - lwr $s3,12+$LSB($inp) - - bal _mips_AES_decrypt - - swr $s0,0+$LSB($out) - swr $s1,4+$LSB($out) - swr $s2,8+$LSB($out) - swr $s3,12+$LSB($out) - swl $s0,0+$MSB($out) - swl $s1,4+$MSB($out) - swl $s2,8+$MSB($out) - swl $s3,12+$MSB($out) - - .set noreorder - $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) - $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) - $REG_L $s11,$FRAMESIZE-3*$SZREG($sp) - $REG_L $s10,$FRAMESIZE-4*$SZREG($sp) - $REG_L $s9,$FRAMESIZE-5*$SZREG($sp) - $REG_L $s8,$FRAMESIZE-6*$SZREG($sp) - $REG_L $s7,$FRAMESIZE-7*$SZREG($sp) - $REG_L $s6,$FRAMESIZE-8*$SZREG($sp) - $REG_L $s5,$FRAMESIZE-9*$SZREG($sp) - $REG_L $s4,$FRAMESIZE-10*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L \$15,$FRAMESIZE-11*$SZREG($sp) - $REG_L \$14,$FRAMESIZE-12*$SZREG($sp) - $REG_L \$13,$FRAMESIZE-13*$SZREG($sp) - $REG_L \$12,$FRAMESIZE-14*$SZREG($sp) - $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) -___ -$code.=<<___; - jr $ra - $PTR_ADD $sp,$FRAMESIZE -.end AES_decrypt -___ -}}} - -{{{ -my $FRAMESIZE=8*$SZREG; -my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000; - -my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3); -my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3); -my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2); -my ($rcon,$cnt)=($gp,$fp); - -$code.=<<___; -.align 5 -.ent _mips_AES_set_encrypt_key -_mips_AES_set_encrypt_key: - .frame $sp,0,$ra - .set noreorder - beqz $inp,.Lekey_done - li $t0,-1 - beqz $key,.Lekey_done - $PTR_ADD $rcon,$Tbl,1024+256 - - .set reorder - lwl $rk0,0+$MSB($inp) # load 128 bits - lwl $rk1,4+$MSB($inp) - lwl $rk2,8+$MSB($inp) - lwl $rk3,12+$MSB($inp) - li $at,128 - lwr $rk0,0+$LSB($inp) - lwr $rk1,4+$LSB($inp) - lwr $rk2,8+$LSB($inp) - lwr $rk3,12+$LSB($inp) - .set noreorder - beq $bits,$at,.L128bits - li $cnt,10 - - .set reorder - lwl $rk4,16+$MSB($inp) # load 192 bits - lwl $rk5,20+$MSB($inp) - li $at,192 - lwr $rk4,16+$LSB($inp) - lwr $rk5,20+$LSB($inp) - .set noreorder - beq $bits,$at,.L192bits - li $cnt,8 - - .set reorder - lwl $rk6,24+$MSB($inp) # load 256 bits - lwl $rk7,28+$MSB($inp) - li $at,256 - lwr $rk6,24+$LSB($inp) - lwr $rk7,28+$LSB($inp) - .set noreorder - beq $bits,$at,.L256bits - li $cnt,7 - - b .Lekey_done - li $t0,-2 - -.align 4 -.L128bits: - .set reorder - srl $i0,$rk3,16 - srl $i1,$rk3,8 - and $i0,0xff - and $i1,0xff - and $i2,$rk3,0xff - srl $i3,$rk3,24 - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lbu $i0,1024($i0) - lbu $i1,1024($i1) - lbu $i2,1024($i2) - lbu $i3,1024($i3) - - sw $rk0,0($key) - sw $rk1,4($key) - sw $rk2,8($key) - sw $rk3,12($key) - sub $cnt,1 - $PTR_ADD $key,16 - - _bias $i0,24 - _bias $i1,16 - _bias $i2,8 - _bias $i3,0 - - xor $rk0,$i0 - lw $i0,0($rcon) - xor $rk0,$i1 - xor $rk0,$i2 - xor $rk0,$i3 - xor $rk0,$i0 - - xor $rk1,$rk0 - xor $rk2,$rk1 - xor $rk3,$rk2 - - .set noreorder - bnez $cnt,.L128bits - $PTR_ADD $rcon,4 - - sw $rk0,0($key) - sw $rk1,4($key) - sw $rk2,8($key) - li $cnt,10 - sw $rk3,12($key) - li $t0,0 - sw $cnt,80($key) - b .Lekey_done - $PTR_SUB $key,10*16 - -.align 4 -.L192bits: - .set reorder - srl $i0,$rk5,16 - srl $i1,$rk5,8 - and $i0,0xff - and $i1,0xff - and $i2,$rk5,0xff - srl $i3,$rk5,24 - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lbu $i0,1024($i0) - lbu $i1,1024($i1) - lbu $i2,1024($i2) - lbu $i3,1024($i3) - - sw $rk0,0($key) - sw $rk1,4($key) - sw $rk2,8($key) - sw $rk3,12($key) - sw $rk4,16($key) - sw $rk5,20($key) - sub $cnt,1 - $PTR_ADD $key,24 - - _bias $i0,24 - _bias $i1,16 - _bias $i2,8 - _bias $i3,0 - - xor $rk0,$i0 - lw $i0,0($rcon) - xor $rk0,$i1 - xor $rk0,$i2 - xor $rk0,$i3 - xor $rk0,$i0 - - xor $rk1,$rk0 - xor $rk2,$rk1 - xor $rk3,$rk2 - xor $rk4,$rk3 - xor $rk5,$rk4 - - .set noreorder - bnez $cnt,.L192bits - $PTR_ADD $rcon,4 - - sw $rk0,0($key) - sw $rk1,4($key) - sw $rk2,8($key) - li $cnt,12 - sw $rk3,12($key) - li $t0,0 - sw $cnt,48($key) - b .Lekey_done - $PTR_SUB $key,12*16 - -.align 4 -.L256bits: - .set reorder - srl $i0,$rk7,16 - srl $i1,$rk7,8 - and $i0,0xff - and $i1,0xff - and $i2,$rk7,0xff - srl $i3,$rk7,24 - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lbu $i0,1024($i0) - lbu $i1,1024($i1) - lbu $i2,1024($i2) - lbu $i3,1024($i3) - - sw $rk0,0($key) - sw $rk1,4($key) - sw $rk2,8($key) - sw $rk3,12($key) - sw $rk4,16($key) - sw $rk5,20($key) - sw $rk6,24($key) - sw $rk7,28($key) - sub $cnt,1 - - _bias $i0,24 - _bias $i1,16 - _bias $i2,8 - _bias $i3,0 - - xor $rk0,$i0 - lw $i0,0($rcon) - xor $rk0,$i1 - xor $rk0,$i2 - xor $rk0,$i3 - xor $rk0,$i0 - - xor $rk1,$rk0 - xor $rk2,$rk1 - xor $rk3,$rk2 - beqz $cnt,.L256bits_done - - srl $i0,$rk3,24 - srl $i1,$rk3,16 - srl $i2,$rk3,8 - and $i3,$rk3,0xff - and $i1,0xff - and $i2,0xff - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl - lbu $i0,1024($i0) - lbu $i1,1024($i1) - lbu $i2,1024($i2) - lbu $i3,1024($i3) - sll $i0,24 - sll $i1,16 - sll $i2,8 - - xor $rk4,$i0 - xor $rk4,$i1 - xor $rk4,$i2 - xor $rk4,$i3 - - xor $rk5,$rk4 - xor $rk6,$rk5 - xor $rk7,$rk6 - - $PTR_ADD $key,32 - .set noreorder - b .L256bits - $PTR_ADD $rcon,4 - -.L256bits_done: - sw $rk0,32($key) - sw $rk1,36($key) - sw $rk2,40($key) - li $cnt,14 - sw $rk3,44($key) - li $t0,0 - sw $cnt,48($key) - $PTR_SUB $key,12*16 - -.Lekey_done: - jr $ra - nop -.end _mips_AES_set_encrypt_key - -.globl private_AES_set_encrypt_key -.ent private_AES_set_encrypt_key -private_AES_set_encrypt_key: - .frame $sp,$FRAMESIZE,$ra - .mask $SAVED_REGS_MASK,-$SZREG - .set noreorder -___ -$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification - .cpload $pf -___ -$code.=<<___; - $PTR_SUB $sp,$FRAMESIZE - $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) - $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue - $REG_S $s3,$FRAMESIZE-3*$SZREG($sp) - $REG_S $s2,$FRAMESIZE-4*$SZREG($sp) - $REG_S $s1,$FRAMESIZE-5*$SZREG($sp) - $REG_S $s0,$FRAMESIZE-6*$SZREG($sp) - $REG_S $gp,$FRAMESIZE-7*$SZREG($sp) -___ -$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification - .cplocal $Tbl - .cpsetup $pf,$zero,private_AES_set_encrypt_key -___ -$code.=<<___; - .set reorder - la $Tbl,AES_Te # PIC-ified 'load address' - - bal _mips_AES_set_encrypt_key - - .set noreorder - move $a0,$t0 - $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) - $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $s3,$FRAMESIZE-11*$SZREG($sp) - $REG_L $s2,$FRAMESIZE-12*$SZREG($sp) - $REG_L $s1,$FRAMESIZE-13*$SZREG($sp) - $REG_L $s0,$FRAMESIZE-14*$SZREG($sp) - $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) -___ -$code.=<<___; - jr $ra - $PTR_ADD $sp,$FRAMESIZE -.end private_AES_set_encrypt_key -___ - -my ($head,$tail)=($inp,$bits); -my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3); -my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2); -$code.=<<___; -.align 5 -.globl private_AES_set_decrypt_key -.ent private_AES_set_decrypt_key -private_AES_set_decrypt_key: - .frame $sp,$FRAMESIZE,$ra - .mask $SAVED_REGS_MASK,-$SZREG - .set noreorder -___ -$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification - .cpload $pf -___ -$code.=<<___; - $PTR_SUB $sp,$FRAMESIZE - $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) - $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue - $REG_S $s3,$FRAMESIZE-3*$SZREG($sp) - $REG_S $s2,$FRAMESIZE-4*$SZREG($sp) - $REG_S $s1,$FRAMESIZE-5*$SZREG($sp) - $REG_S $s0,$FRAMESIZE-6*$SZREG($sp) - $REG_S $gp,$FRAMESIZE-7*$SZREG($sp) -___ -$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification - .cplocal $Tbl - .cpsetup $pf,$zero,private_AES_set_decrypt_key -___ -$code.=<<___; - .set reorder - la $Tbl,AES_Te # PIC-ified 'load address' - - bal _mips_AES_set_encrypt_key - - bltz $t0,.Ldkey_done - - sll $at,$cnt,4 - $PTR_ADD $head,$key,0 - $PTR_ADD $tail,$key,$at -.align 4 -.Lswap: - lw $rk0,0($head) - lw $rk1,4($head) - lw $rk2,8($head) - lw $rk3,12($head) - lw $rk4,0($tail) - lw $rk5,4($tail) - lw $rk6,8($tail) - lw $rk7,12($tail) - sw $rk0,0($tail) - sw $rk1,4($tail) - sw $rk2,8($tail) - sw $rk3,12($tail) - $PTR_ADD $head,16 - $PTR_SUB $tail,16 - sw $rk4,-16($head) - sw $rk5,-12($head) - sw $rk6,-8($head) - sw $rk7,-4($head) - bne $head,$tail,.Lswap - - lw $tp1,16($key) # modulo-scheduled - lui $x80808080,0x8080 - sub $cnt,1 - or $x80808080,0x8080 - sll $cnt,2 - $PTR_ADD $key,16 - lui $x1b1b1b1b,0x1b1b - nor $x7f7f7f7f,$zero,$x80808080 - or $x1b1b1b1b,0x1b1b -.align 4 -.Lmix: - and $m,$tp1,$x80808080 - and $tp2,$tp1,$x7f7f7f7f - srl $tp4,$m,7 - addu $tp2,$tp2 # tp2<<1 - subu $m,$tp4 - and $m,$x1b1b1b1b - xor $tp2,$m - - and $m,$tp2,$x80808080 - and $tp4,$tp2,$x7f7f7f7f - srl $tp8,$m,7 - addu $tp4,$tp4 # tp4<<1 - subu $m,$tp8 - and $m,$x1b1b1b1b - xor $tp4,$m - - and $m,$tp4,$x80808080 - and $tp8,$tp4,$x7f7f7f7f - srl $tp9,$m,7 - addu $tp8,$tp8 # tp8<<1 - subu $m,$tp9 - and $m,$x1b1b1b1b - xor $tp8,$m - - xor $tp9,$tp8,$tp1 - xor $tpe,$tp8,$tp4 - xor $tpb,$tp9,$tp2 - xor $tpd,$tp9,$tp4 - - _ror $tp1,$tpd,16 - xor $tpe,$tp2 - _ror $tp2,$tpd,-16 - xor $tpe,$tp1 - _ror $tp1,$tp9,8 - xor $tpe,$tp2 - _ror $tp2,$tp9,-24 - xor $tpe,$tp1 - _ror $tp1,$tpb,24 - xor $tpe,$tp2 - _ror $tp2,$tpb,-8 - xor $tpe,$tp1 - lw $tp1,4($key) # modulo-scheduled - xor $tpe,$tp2 - sub $cnt,1 - sw $tpe,0($key) - $PTR_ADD $key,4 - bnez $cnt,.Lmix - - li $t0,0 -.Ldkey_done: - .set noreorder - move $a0,$t0 - $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) - $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $s3,$FRAMESIZE-11*$SZREG($sp) - $REG_L $s2,$FRAMESIZE-12*$SZREG($sp) - $REG_L $s1,$FRAMESIZE-13*$SZREG($sp) - $REG_L $s0,$FRAMESIZE-14*$SZREG($sp) - $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) -___ -$code.=<<___; - jr $ra - $PTR_ADD $sp,$FRAMESIZE -.end private_AES_set_decrypt_key -___ -}}} - -###################################################################### -# Tables are kept in endian-neutral manner -$code.=<<___; -.rdata -.align 6 -AES_Te: -.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0 -.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d -.byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd -.byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54 -.byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03 -.byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d -.byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62 -.byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a -.byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d -.byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87 -.byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb -.byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b -.byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67 -.byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea -.byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7 -.byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b -.byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c -.byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a -.byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41 -.byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f -.byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4 -.byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08 -.byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73 -.byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f -.byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52 -.byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e -.byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1 -.byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5 -.byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36 -.byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d -.byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69 -.byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f -.byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e -.byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e -.byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2 -.byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb -.byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d -.byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce -.byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e -.byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97 -.byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68 -.byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c -.byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f -.byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed -.byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46 -.byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b -.byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4 -.byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a -.byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a -.byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16 -.byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7 -.byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94 -.byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10 -.byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81 -.byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44 -.byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3 -.byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe -.byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a -.byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc -.byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04 -.byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1 -.byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63 -.byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a -.byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d -.byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14 -.byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f -.byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2 -.byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39 -.byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2 -.byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47 -.byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7 -.byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95 -.byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98 -.byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f -.byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e -.byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83 -.byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29 -.byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c -.byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2 -.byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76 -.byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56 -.byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e -.byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a -.byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4 -.byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e -.byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6 -.byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4 -.byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b -.byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43 -.byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7 -.byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64 -.byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0 -.byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa -.byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25 -.byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e -.byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18 -.byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88 -.byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72 -.byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1 -.byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51 -.byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c -.byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21 -.byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc -.byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85 -.byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42 -.byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa -.byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05 -.byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12 -.byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f -.byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0 -.byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58 -.byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9 -.byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13 -.byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33 -.byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70 -.byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7 -.byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22 -.byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20 -.byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff -.byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a -.byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8 -.byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17 -.byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31 -.byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8 -.byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0 -.byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11 -.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc -.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a - -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 - -.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon -.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00 -.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00 -.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00 -.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00 - -.align 6 -AES_Td: -.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0 -.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96 -.byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1 -.byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93 -.byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6 -.byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25 -.byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7 -.byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f -.byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67 -.byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1 -.byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12 -.byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6 -.byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95 -.byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda -.byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3 -.byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44 -.byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78 -.byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd -.byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17 -.byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4 -.byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82 -.byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45 -.byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84 -.byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94 -.byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19 -.byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7 -.byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2 -.byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a -.byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03 -.byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5 -.byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2 -.byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c -.byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92 -.byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1 -.byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5 -.byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a -.byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0 -.byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75 -.byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa -.byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51 -.byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d -.byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46 -.byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05 -.byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff -.byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97 -.byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77 -.byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88 -.byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb -.byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9 -.byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00 -.byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48 -.byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e -.byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56 -.byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27 -.byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21 -.byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a -.byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f -.byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e -.byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2 -.byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16 -.byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5 -.byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d -.byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad -.byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8 -.byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c -.byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd -.byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc -.byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34 -.byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc -.byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63 -.byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10 -.byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20 -.byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8 -.byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d -.byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3 -.byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0 -.byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99 -.byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22 -.byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a -.byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef -.byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1 -.byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36 -.byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28 -.byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4 -.byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d -.byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62 -.byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8 -.byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5 -.byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c -.byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3 -.byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7 -.byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b -.byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4 -.byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8 -.byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e -.byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6 -.byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce -.byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6 -.byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31 -.byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0 -.byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6 -.byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15 -.byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7 -.byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f -.byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d -.byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf -.byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b -.byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f -.byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d -.byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e -.byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52 -.byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13 -.byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a -.byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89 -.byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35 -.byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c -.byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f -.byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf -.byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b -.byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86 -.byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e -.byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f -.byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c -.byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41 -.byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde -.byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90 -.byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70 -.byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42 - -.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 # Td4 -.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb -.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 -.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb -.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d -.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e -.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 -.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 -.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 -.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 -.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda -.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 -.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a -.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 -.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 -.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b -.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea -.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 -.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 -.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e -.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 -.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b -.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 -.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 -.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 -.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f -.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d -.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef -.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 -.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 -.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 -.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -___ - -foreach (split("\n",$code)) { - s/\`([^\`]*)\`/eval $1/ge; - - # made-up _instructions, _xtr, _ins, _ror and _bias, cope - # with byte order dependencies... - if (/^\s+_/) { - s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/; - - s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/ - sprintf("srl\t$1,$2,%d",$big_endian ? eval($3) - : eval("24-$3"))/e or - s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/ - sprintf("sll\t$1,$2,%d",$big_endian ? eval($3) - : eval("24-$3"))/e or - s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/ - sprintf("srl\t$1,$2,%d",$big_endian ? eval($3) - : eval("$3*-1"))/e or - s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/ - sprintf("sll\t$1,$2,%d",$big_endian ? eval($3) - : eval("($3-16)&31"))/e; - - s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/ - sprintf("sll\t$1,$2,$3")/e or - s/srl\s+(\$[0-9]+),(\$[0-9]+),0/ - sprintf("and\t$1,$2,0xff")/e or - s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/; - } - - # convert lwl/lwr and swr/swl to little-endian order - if (!$big_endian && /^\s+[sl]w[lr]\s+/) { - s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/ - sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e or - s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/ - sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e; - } - - print $_,"\n"; -} - -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aes-parisc.pl b/drivers/builtin_openssl2/crypto/aes/asm/aes-parisc.pl deleted file mode 100644 index 714dcfbbe3..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/aes-parisc.pl +++ /dev/null @@ -1,1022 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# AES for PA-RISC. -# -# June 2009. -# -# The module is mechanical transliteration of aes-sparcv9.pl, but with -# a twist: S-boxes are compressed even further down to 1K+256B. On -# PA-7100LC performance is ~40% better than gcc 3.2 generated code and -# is about 33 cycles per byte processed with 128-bit key. Newer CPUs -# perform at 16 cycles per byte. It's not faster than code generated -# by vendor compiler, but recall that it has compressed S-boxes, which -# requires extra processing. -# -# Special thanks to polarhome.com for providing HP-UX account. - -$flavour = shift; -$output = shift; -open STDOUT,">$output"; - -if ($flavour =~ /64/) { - $LEVEL ="2.0W"; - $SIZE_T =8; - $FRAME_MARKER =80; - $SAVED_RP =16; - $PUSH ="std"; - $PUSHMA ="std,ma"; - $POP ="ldd"; - $POPMB ="ldd,mb"; -} else { - $LEVEL ="1.0"; - $SIZE_T =4; - $FRAME_MARKER =48; - $SAVED_RP =20; - $PUSH ="stw"; - $PUSHMA ="stwm"; - $POP ="ldw"; - $POPMB ="ldwm"; -} - -$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker - # [+ argument transfer] -$inp="%r26"; # arg0 -$out="%r25"; # arg1 -$key="%r24"; # arg2 - -($s0,$s1,$s2,$s3) = ("%r1","%r2","%r3","%r4"); -($t0,$t1,$t2,$t3) = ("%r5","%r6","%r7","%r8"); - -($acc0, $acc1, $acc2, $acc3, $acc4, $acc5, $acc6, $acc7, - $acc8, $acc9,$acc10,$acc11,$acc12,$acc13,$acc14,$acc15) = -("%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16", -"%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r26"); - -$tbl="%r28"; -$rounds="%r29"; - -$code=<<___; - .LEVEL $LEVEL - .SPACE \$TEXT\$ - .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY - - .EXPORT AES_encrypt,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR - .ALIGN 64 -AES_encrypt - .PROC - .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18 - .ENTRY - $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue - $PUSHMA %r3,$FRAME(%sp) - $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) - $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) - $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) - $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) - $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) - $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) - $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) - $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) - $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) - $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) - $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) - $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) - $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) - $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp) - $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp) - - blr %r0,$tbl - ldi 3,$t0 -L\$enc_pic - andcm $tbl,$t0,$tbl - ldo L\$AES_Te-L\$enc_pic($tbl),$tbl - - and $inp,$t0,$t0 - sub $inp,$t0,$inp - ldw 0($inp),$s0 - ldw 4($inp),$s1 - ldw 8($inp),$s2 - comib,= 0,$t0,L\$enc_inp_aligned - ldw 12($inp),$s3 - - sh3addl $t0,%r0,$t0 - subi 32,$t0,$t0 - mtctl $t0,%cr11 - ldw 16($inp),$t1 - vshd $s0,$s1,$s0 - vshd $s1,$s2,$s1 - vshd $s2,$s3,$s2 - vshd $s3,$t1,$s3 - -L\$enc_inp_aligned - bl _parisc_AES_encrypt,%r31 - nop - - extru,<> $out,31,2,%r0 - b L\$enc_out_aligned - nop - - _srm $s0,24,$acc0 - _srm $s0,16,$acc1 - stb $acc0,0($out) - _srm $s0,8,$acc2 - stb $acc1,1($out) - _srm $s1,24,$acc4 - stb $acc2,2($out) - _srm $s1,16,$acc5 - stb $s0,3($out) - _srm $s1,8,$acc6 - stb $acc4,4($out) - _srm $s2,24,$acc0 - stb $acc5,5($out) - _srm $s2,16,$acc1 - stb $acc6,6($out) - _srm $s2,8,$acc2 - stb $s1,7($out) - _srm $s3,24,$acc4 - stb $acc0,8($out) - _srm $s3,16,$acc5 - stb $acc1,9($out) - _srm $s3,8,$acc6 - stb $acc2,10($out) - stb $s2,11($out) - stb $acc4,12($out) - stb $acc5,13($out) - stb $acc6,14($out) - b L\$enc_done - stb $s3,15($out) - -L\$enc_out_aligned - stw $s0,0($out) - stw $s1,4($out) - stw $s2,8($out) - stw $s3,12($out) - -L\$enc_done - $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue - $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 - $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 - $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 - $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 - $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 - $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 - $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 - $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 - $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 - $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 - $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 - $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 - $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 - $POP `-$FRAME+14*$SIZE_T`(%sp),%r17 - $POP `-$FRAME+15*$SIZE_T`(%sp),%r18 - bv (%r2) - .EXIT - $POPMB -$FRAME(%sp),%r3 - .PROCEND - - .ALIGN 16 -_parisc_AES_encrypt - .PROC - .CALLINFO MILLICODE - .ENTRY - ldw 240($key),$rounds - ldw 0($key),$t0 - ldw 4($key),$t1 - ldw 8($key),$t2 - _srm $rounds,1,$rounds - xor $t0,$s0,$s0 - ldw 12($key),$t3 - _srm $s0,24,$acc0 - xor $t1,$s1,$s1 - ldw 16($key),$t0 - _srm $s1,16,$acc1 - xor $t2,$s2,$s2 - ldw 20($key),$t1 - xor $t3,$s3,$s3 - ldw 24($key),$t2 - ldw 28($key),$t3 -L\$enc_loop - _srm $s2,8,$acc2 - ldwx,s $acc0($tbl),$acc0 - _srm $s3,0,$acc3 - ldwx,s $acc1($tbl),$acc1 - _srm $s1,24,$acc4 - ldwx,s $acc2($tbl),$acc2 - _srm $s2,16,$acc5 - ldwx,s $acc3($tbl),$acc3 - _srm $s3,8,$acc6 - ldwx,s $acc4($tbl),$acc4 - _srm $s0,0,$acc7 - ldwx,s $acc5($tbl),$acc5 - _srm $s2,24,$acc8 - ldwx,s $acc6($tbl),$acc6 - _srm $s3,16,$acc9 - ldwx,s $acc7($tbl),$acc7 - _srm $s0,8,$acc10 - ldwx,s $acc8($tbl),$acc8 - _srm $s1,0,$acc11 - ldwx,s $acc9($tbl),$acc9 - _srm $s3,24,$acc12 - ldwx,s $acc10($tbl),$acc10 - _srm $s0,16,$acc13 - ldwx,s $acc11($tbl),$acc11 - _srm $s1,8,$acc14 - ldwx,s $acc12($tbl),$acc12 - _srm $s2,0,$acc15 - ldwx,s $acc13($tbl),$acc13 - ldwx,s $acc14($tbl),$acc14 - ldwx,s $acc15($tbl),$acc15 - addib,= -1,$rounds,L\$enc_last - ldo 32($key),$key - - _ror $acc1,8,$acc1 - xor $acc0,$t0,$t0 - ldw 0($key),$s0 - _ror $acc2,16,$acc2 - xor $acc1,$t0,$t0 - ldw 4($key),$s1 - _ror $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ldw 8($key),$s2 - _ror $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ldw 12($key),$s3 - _ror $acc6,16,$acc6 - xor $acc4,$t1,$t1 - _ror $acc7,24,$acc7 - xor $acc5,$t1,$t1 - _ror $acc9,8,$acc9 - xor $acc6,$t1,$t1 - _ror $acc10,16,$acc10 - xor $acc7,$t1,$t1 - _ror $acc11,24,$acc11 - xor $acc8,$t2,$t2 - _ror $acc13,8,$acc13 - xor $acc9,$t2,$t2 - _ror $acc14,16,$acc14 - xor $acc10,$t2,$t2 - _ror $acc15,24,$acc15 - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - _srm $t0,24,$acc0 - xor $acc14,$t3,$t3 - _srm $t1,16,$acc1 - xor $acc15,$t3,$t3 - - _srm $t2,8,$acc2 - ldwx,s $acc0($tbl),$acc0 - _srm $t3,0,$acc3 - ldwx,s $acc1($tbl),$acc1 - _srm $t1,24,$acc4 - ldwx,s $acc2($tbl),$acc2 - _srm $t2,16,$acc5 - ldwx,s $acc3($tbl),$acc3 - _srm $t3,8,$acc6 - ldwx,s $acc4($tbl),$acc4 - _srm $t0,0,$acc7 - ldwx,s $acc5($tbl),$acc5 - _srm $t2,24,$acc8 - ldwx,s $acc6($tbl),$acc6 - _srm $t3,16,$acc9 - ldwx,s $acc7($tbl),$acc7 - _srm $t0,8,$acc10 - ldwx,s $acc8($tbl),$acc8 - _srm $t1,0,$acc11 - ldwx,s $acc9($tbl),$acc9 - _srm $t3,24,$acc12 - ldwx,s $acc10($tbl),$acc10 - _srm $t0,16,$acc13 - ldwx,s $acc11($tbl),$acc11 - _srm $t1,8,$acc14 - ldwx,s $acc12($tbl),$acc12 - _srm $t2,0,$acc15 - ldwx,s $acc13($tbl),$acc13 - _ror $acc1,8,$acc1 - ldwx,s $acc14($tbl),$acc14 - - _ror $acc2,16,$acc2 - xor $acc0,$s0,$s0 - ldwx,s $acc15($tbl),$acc15 - _ror $acc3,24,$acc3 - xor $acc1,$s0,$s0 - ldw 16($key),$t0 - _ror $acc5,8,$acc5 - xor $acc2,$s0,$s0 - ldw 20($key),$t1 - _ror $acc6,16,$acc6 - xor $acc3,$s0,$s0 - ldw 24($key),$t2 - _ror $acc7,24,$acc7 - xor $acc4,$s1,$s1 - ldw 28($key),$t3 - _ror $acc9,8,$acc9 - xor $acc5,$s1,$s1 - ldw 1024+0($tbl),%r0 ; prefetch te4 - _ror $acc10,16,$acc10 - xor $acc6,$s1,$s1 - ldw 1024+32($tbl),%r0 ; prefetch te4 - _ror $acc11,24,$acc11 - xor $acc7,$s1,$s1 - ldw 1024+64($tbl),%r0 ; prefetch te4 - _ror $acc13,8,$acc13 - xor $acc8,$s2,$s2 - ldw 1024+96($tbl),%r0 ; prefetch te4 - _ror $acc14,16,$acc14 - xor $acc9,$s2,$s2 - ldw 1024+128($tbl),%r0 ; prefetch te4 - _ror $acc15,24,$acc15 - xor $acc10,$s2,$s2 - ldw 1024+160($tbl),%r0 ; prefetch te4 - _srm $s0,24,$acc0 - xor $acc11,$s2,$s2 - ldw 1024+192($tbl),%r0 ; prefetch te4 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - ldw 1024+224($tbl),%r0 ; prefetch te4 - _srm $s1,16,$acc1 - xor $acc14,$s3,$s3 - b L\$enc_loop - xor $acc15,$s3,$s3 - - .ALIGN 16 -L\$enc_last - ldo 1024($tbl),$rounds - _ror $acc1,8,$acc1 - xor $acc0,$t0,$t0 - ldw 0($key),$s0 - _ror $acc2,16,$acc2 - xor $acc1,$t0,$t0 - ldw 4($key),$s1 - _ror $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ldw 8($key),$s2 - _ror $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ldw 12($key),$s3 - _ror $acc6,16,$acc6 - xor $acc4,$t1,$t1 - _ror $acc7,24,$acc7 - xor $acc5,$t1,$t1 - _ror $acc9,8,$acc9 - xor $acc6,$t1,$t1 - _ror $acc10,16,$acc10 - xor $acc7,$t1,$t1 - _ror $acc11,24,$acc11 - xor $acc8,$t2,$t2 - _ror $acc13,8,$acc13 - xor $acc9,$t2,$t2 - _ror $acc14,16,$acc14 - xor $acc10,$t2,$t2 - _ror $acc15,24,$acc15 - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - _srm $t0,24,$acc0 - xor $acc14,$t3,$t3 - _srm $t1,16,$acc1 - xor $acc15,$t3,$t3 - - _srm $t2,8,$acc2 - ldbx $acc0($rounds),$acc0 - _srm $t1,24,$acc4 - ldbx $acc1($rounds),$acc1 - _srm $t2,16,$acc5 - _srm $t3,0,$acc3 - ldbx $acc2($rounds),$acc2 - ldbx $acc3($rounds),$acc3 - _srm $t3,8,$acc6 - ldbx $acc4($rounds),$acc4 - _srm $t2,24,$acc8 - ldbx $acc5($rounds),$acc5 - _srm $t3,16,$acc9 - _srm $t0,0,$acc7 - ldbx $acc6($rounds),$acc6 - ldbx $acc7($rounds),$acc7 - _srm $t0,8,$acc10 - ldbx $acc8($rounds),$acc8 - _srm $t3,24,$acc12 - ldbx $acc9($rounds),$acc9 - _srm $t0,16,$acc13 - _srm $t1,0,$acc11 - ldbx $acc10($rounds),$acc10 - _srm $t1,8,$acc14 - ldbx $acc11($rounds),$acc11 - ldbx $acc12($rounds),$acc12 - ldbx $acc13($rounds),$acc13 - _srm $t2,0,$acc15 - ldbx $acc14($rounds),$acc14 - - dep $acc0,7,8,$acc3 - ldbx $acc15($rounds),$acc15 - dep $acc4,7,8,$acc7 - dep $acc1,15,8,$acc3 - dep $acc5,15,8,$acc7 - dep $acc2,23,8,$acc3 - dep $acc6,23,8,$acc7 - xor $acc3,$s0,$s0 - xor $acc7,$s1,$s1 - dep $acc8,7,8,$acc11 - dep $acc12,7,8,$acc15 - dep $acc9,15,8,$acc11 - dep $acc13,15,8,$acc15 - dep $acc10,23,8,$acc11 - dep $acc14,23,8,$acc15 - xor $acc11,$s2,$s2 - - bv (%r31) - .EXIT - xor $acc15,$s3,$s3 - .PROCEND - - .ALIGN 64 -L\$AES_Te - .WORD 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d - .WORD 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 - .WORD 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d - .WORD 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a - .WORD 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 - .WORD 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b - .WORD 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea - .WORD 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b - .WORD 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a - .WORD 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f - .WORD 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 - .WORD 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f - .WORD 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e - .WORD 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 - .WORD 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d - .WORD 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f - .WORD 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e - .WORD 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb - .WORD 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce - .WORD 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 - .WORD 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c - .WORD 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed - .WORD 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b - .WORD 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a - .WORD 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 - .WORD 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 - .WORD 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 - .WORD 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 - .WORD 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a - .WORD 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 - .WORD 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 - .WORD 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d - .WORD 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f - .WORD 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 - .WORD 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 - .WORD 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 - .WORD 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f - .WORD 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 - .WORD 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c - .WORD 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 - .WORD 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e - .WORD 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 - .WORD 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 - .WORD 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b - .WORD 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 - .WORD 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 - .WORD 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 - .WORD 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 - .WORD 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 - .WORD 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 - .WORD 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 - .WORD 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 - .WORD 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa - .WORD 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 - .WORD 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 - .WORD 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 - .WORD 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 - .WORD 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 - .WORD 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 - .WORD 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a - .WORD 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 - .WORD 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 - .WORD 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 - .WORD 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a - .BYTE 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 - .BYTE 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 - .BYTE 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 - .BYTE 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 - .BYTE 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc - .BYTE 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 - .BYTE 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a - .BYTE 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 - .BYTE 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 - .BYTE 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 - .BYTE 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b - .BYTE 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf - .BYTE 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 - .BYTE 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 - .BYTE 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 - .BYTE 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 - .BYTE 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 - .BYTE 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 - .BYTE 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 - .BYTE 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb - .BYTE 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c - .BYTE 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 - .BYTE 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 - .BYTE 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 - .BYTE 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 - .BYTE 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a - .BYTE 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e - .BYTE 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e - .BYTE 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 - .BYTE 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf - .BYTE 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 - .BYTE 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -___ - -$code.=<<___; - .EXPORT AES_decrypt,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR - .ALIGN 16 -AES_decrypt - .PROC - .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18 - .ENTRY - $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue - $PUSHMA %r3,$FRAME(%sp) - $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) - $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) - $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) - $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) - $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) - $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) - $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) - $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) - $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) - $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) - $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) - $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) - $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) - $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp) - $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp) - - blr %r0,$tbl - ldi 3,$t0 -L\$dec_pic - andcm $tbl,$t0,$tbl - ldo L\$AES_Td-L\$dec_pic($tbl),$tbl - - and $inp,$t0,$t0 - sub $inp,$t0,$inp - ldw 0($inp),$s0 - ldw 4($inp),$s1 - ldw 8($inp),$s2 - comib,= 0,$t0,L\$dec_inp_aligned - ldw 12($inp),$s3 - - sh3addl $t0,%r0,$t0 - subi 32,$t0,$t0 - mtctl $t0,%cr11 - ldw 16($inp),$t1 - vshd $s0,$s1,$s0 - vshd $s1,$s2,$s1 - vshd $s2,$s3,$s2 - vshd $s3,$t1,$s3 - -L\$dec_inp_aligned - bl _parisc_AES_decrypt,%r31 - nop - - extru,<> $out,31,2,%r0 - b L\$dec_out_aligned - nop - - _srm $s0,24,$acc0 - _srm $s0,16,$acc1 - stb $acc0,0($out) - _srm $s0,8,$acc2 - stb $acc1,1($out) - _srm $s1,24,$acc4 - stb $acc2,2($out) - _srm $s1,16,$acc5 - stb $s0,3($out) - _srm $s1,8,$acc6 - stb $acc4,4($out) - _srm $s2,24,$acc0 - stb $acc5,5($out) - _srm $s2,16,$acc1 - stb $acc6,6($out) - _srm $s2,8,$acc2 - stb $s1,7($out) - _srm $s3,24,$acc4 - stb $acc0,8($out) - _srm $s3,16,$acc5 - stb $acc1,9($out) - _srm $s3,8,$acc6 - stb $acc2,10($out) - stb $s2,11($out) - stb $acc4,12($out) - stb $acc5,13($out) - stb $acc6,14($out) - b L\$dec_done - stb $s3,15($out) - -L\$dec_out_aligned - stw $s0,0($out) - stw $s1,4($out) - stw $s2,8($out) - stw $s3,12($out) - -L\$dec_done - $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue - $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 - $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 - $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 - $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 - $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 - $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 - $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 - $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 - $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 - $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 - $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 - $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 - $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 - $POP `-$FRAME+14*$SIZE_T`(%sp),%r17 - $POP `-$FRAME+15*$SIZE_T`(%sp),%r18 - bv (%r2) - .EXIT - $POPMB -$FRAME(%sp),%r3 - .PROCEND - - .ALIGN 16 -_parisc_AES_decrypt - .PROC - .CALLINFO MILLICODE - .ENTRY - ldw 240($key),$rounds - ldw 0($key),$t0 - ldw 4($key),$t1 - ldw 8($key),$t2 - ldw 12($key),$t3 - _srm $rounds,1,$rounds - xor $t0,$s0,$s0 - ldw 16($key),$t0 - xor $t1,$s1,$s1 - ldw 20($key),$t1 - _srm $s0,24,$acc0 - xor $t2,$s2,$s2 - ldw 24($key),$t2 - xor $t3,$s3,$s3 - ldw 28($key),$t3 - _srm $s3,16,$acc1 -L\$dec_loop - _srm $s2,8,$acc2 - ldwx,s $acc0($tbl),$acc0 - _srm $s1,0,$acc3 - ldwx,s $acc1($tbl),$acc1 - _srm $s1,24,$acc4 - ldwx,s $acc2($tbl),$acc2 - _srm $s0,16,$acc5 - ldwx,s $acc3($tbl),$acc3 - _srm $s3,8,$acc6 - ldwx,s $acc4($tbl),$acc4 - _srm $s2,0,$acc7 - ldwx,s $acc5($tbl),$acc5 - _srm $s2,24,$acc8 - ldwx,s $acc6($tbl),$acc6 - _srm $s1,16,$acc9 - ldwx,s $acc7($tbl),$acc7 - _srm $s0,8,$acc10 - ldwx,s $acc8($tbl),$acc8 - _srm $s3,0,$acc11 - ldwx,s $acc9($tbl),$acc9 - _srm $s3,24,$acc12 - ldwx,s $acc10($tbl),$acc10 - _srm $s2,16,$acc13 - ldwx,s $acc11($tbl),$acc11 - _srm $s1,8,$acc14 - ldwx,s $acc12($tbl),$acc12 - _srm $s0,0,$acc15 - ldwx,s $acc13($tbl),$acc13 - ldwx,s $acc14($tbl),$acc14 - ldwx,s $acc15($tbl),$acc15 - addib,= -1,$rounds,L\$dec_last - ldo 32($key),$key - - _ror $acc1,8,$acc1 - xor $acc0,$t0,$t0 - ldw 0($key),$s0 - _ror $acc2,16,$acc2 - xor $acc1,$t0,$t0 - ldw 4($key),$s1 - _ror $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ldw 8($key),$s2 - _ror $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ldw 12($key),$s3 - _ror $acc6,16,$acc6 - xor $acc4,$t1,$t1 - _ror $acc7,24,$acc7 - xor $acc5,$t1,$t1 - _ror $acc9,8,$acc9 - xor $acc6,$t1,$t1 - _ror $acc10,16,$acc10 - xor $acc7,$t1,$t1 - _ror $acc11,24,$acc11 - xor $acc8,$t2,$t2 - _ror $acc13,8,$acc13 - xor $acc9,$t2,$t2 - _ror $acc14,16,$acc14 - xor $acc10,$t2,$t2 - _ror $acc15,24,$acc15 - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - _srm $t0,24,$acc0 - xor $acc14,$t3,$t3 - xor $acc15,$t3,$t3 - _srm $t3,16,$acc1 - - _srm $t2,8,$acc2 - ldwx,s $acc0($tbl),$acc0 - _srm $t1,0,$acc3 - ldwx,s $acc1($tbl),$acc1 - _srm $t1,24,$acc4 - ldwx,s $acc2($tbl),$acc2 - _srm $t0,16,$acc5 - ldwx,s $acc3($tbl),$acc3 - _srm $t3,8,$acc6 - ldwx,s $acc4($tbl),$acc4 - _srm $t2,0,$acc7 - ldwx,s $acc5($tbl),$acc5 - _srm $t2,24,$acc8 - ldwx,s $acc6($tbl),$acc6 - _srm $t1,16,$acc9 - ldwx,s $acc7($tbl),$acc7 - _srm $t0,8,$acc10 - ldwx,s $acc8($tbl),$acc8 - _srm $t3,0,$acc11 - ldwx,s $acc9($tbl),$acc9 - _srm $t3,24,$acc12 - ldwx,s $acc10($tbl),$acc10 - _srm $t2,16,$acc13 - ldwx,s $acc11($tbl),$acc11 - _srm $t1,8,$acc14 - ldwx,s $acc12($tbl),$acc12 - _srm $t0,0,$acc15 - ldwx,s $acc13($tbl),$acc13 - _ror $acc1,8,$acc1 - ldwx,s $acc14($tbl),$acc14 - - _ror $acc2,16,$acc2 - xor $acc0,$s0,$s0 - ldwx,s $acc15($tbl),$acc15 - _ror $acc3,24,$acc3 - xor $acc1,$s0,$s0 - ldw 16($key),$t0 - _ror $acc5,8,$acc5 - xor $acc2,$s0,$s0 - ldw 20($key),$t1 - _ror $acc6,16,$acc6 - xor $acc3,$s0,$s0 - ldw 24($key),$t2 - _ror $acc7,24,$acc7 - xor $acc4,$s1,$s1 - ldw 28($key),$t3 - _ror $acc9,8,$acc9 - xor $acc5,$s1,$s1 - ldw 1024+0($tbl),%r0 ; prefetch td4 - _ror $acc10,16,$acc10 - xor $acc6,$s1,$s1 - ldw 1024+32($tbl),%r0 ; prefetch td4 - _ror $acc11,24,$acc11 - xor $acc7,$s1,$s1 - ldw 1024+64($tbl),%r0 ; prefetch td4 - _ror $acc13,8,$acc13 - xor $acc8,$s2,$s2 - ldw 1024+96($tbl),%r0 ; prefetch td4 - _ror $acc14,16,$acc14 - xor $acc9,$s2,$s2 - ldw 1024+128($tbl),%r0 ; prefetch td4 - _ror $acc15,24,$acc15 - xor $acc10,$s2,$s2 - ldw 1024+160($tbl),%r0 ; prefetch td4 - _srm $s0,24,$acc0 - xor $acc11,$s2,$s2 - ldw 1024+192($tbl),%r0 ; prefetch td4 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - ldw 1024+224($tbl),%r0 ; prefetch td4 - xor $acc14,$s3,$s3 - xor $acc15,$s3,$s3 - b L\$dec_loop - _srm $s3,16,$acc1 - - .ALIGN 16 -L\$dec_last - ldo 1024($tbl),$rounds - _ror $acc1,8,$acc1 - xor $acc0,$t0,$t0 - ldw 0($key),$s0 - _ror $acc2,16,$acc2 - xor $acc1,$t0,$t0 - ldw 4($key),$s1 - _ror $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ldw 8($key),$s2 - _ror $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ldw 12($key),$s3 - _ror $acc6,16,$acc6 - xor $acc4,$t1,$t1 - _ror $acc7,24,$acc7 - xor $acc5,$t1,$t1 - _ror $acc9,8,$acc9 - xor $acc6,$t1,$t1 - _ror $acc10,16,$acc10 - xor $acc7,$t1,$t1 - _ror $acc11,24,$acc11 - xor $acc8,$t2,$t2 - _ror $acc13,8,$acc13 - xor $acc9,$t2,$t2 - _ror $acc14,16,$acc14 - xor $acc10,$t2,$t2 - _ror $acc15,24,$acc15 - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - _srm $t0,24,$acc0 - xor $acc14,$t3,$t3 - xor $acc15,$t3,$t3 - _srm $t3,16,$acc1 - - _srm $t2,8,$acc2 - ldbx $acc0($rounds),$acc0 - _srm $t1,24,$acc4 - ldbx $acc1($rounds),$acc1 - _srm $t0,16,$acc5 - _srm $t1,0,$acc3 - ldbx $acc2($rounds),$acc2 - ldbx $acc3($rounds),$acc3 - _srm $t3,8,$acc6 - ldbx $acc4($rounds),$acc4 - _srm $t2,24,$acc8 - ldbx $acc5($rounds),$acc5 - _srm $t1,16,$acc9 - _srm $t2,0,$acc7 - ldbx $acc6($rounds),$acc6 - ldbx $acc7($rounds),$acc7 - _srm $t0,8,$acc10 - ldbx $acc8($rounds),$acc8 - _srm $t3,24,$acc12 - ldbx $acc9($rounds),$acc9 - _srm $t2,16,$acc13 - _srm $t3,0,$acc11 - ldbx $acc10($rounds),$acc10 - _srm $t1,8,$acc14 - ldbx $acc11($rounds),$acc11 - ldbx $acc12($rounds),$acc12 - ldbx $acc13($rounds),$acc13 - _srm $t0,0,$acc15 - ldbx $acc14($rounds),$acc14 - - dep $acc0,7,8,$acc3 - ldbx $acc15($rounds),$acc15 - dep $acc4,7,8,$acc7 - dep $acc1,15,8,$acc3 - dep $acc5,15,8,$acc7 - dep $acc2,23,8,$acc3 - dep $acc6,23,8,$acc7 - xor $acc3,$s0,$s0 - xor $acc7,$s1,$s1 - dep $acc8,7,8,$acc11 - dep $acc12,7,8,$acc15 - dep $acc9,15,8,$acc11 - dep $acc13,15,8,$acc15 - dep $acc10,23,8,$acc11 - dep $acc14,23,8,$acc15 - xor $acc11,$s2,$s2 - - bv (%r31) - .EXIT - xor $acc15,$s3,$s3 - .PROCEND - - .ALIGN 64 -L\$AES_Td - .WORD 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 - .WORD 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 - .WORD 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 - .WORD 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f - .WORD 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 - .WORD 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 - .WORD 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da - .WORD 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 - .WORD 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd - .WORD 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 - .WORD 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 - .WORD 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 - .WORD 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 - .WORD 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a - .WORD 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 - .WORD 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c - .WORD 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 - .WORD 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a - .WORD 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 - .WORD 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 - .WORD 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 - .WORD 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff - .WORD 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 - .WORD 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb - .WORD 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 - .WORD 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e - .WORD 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 - .WORD 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a - .WORD 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e - .WORD 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 - .WORD 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d - .WORD 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 - .WORD 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd - .WORD 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 - .WORD 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 - .WORD 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 - .WORD 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d - .WORD 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 - .WORD 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 - .WORD 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef - .WORD 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 - .WORD 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 - .WORD 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 - .WORD 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 - .WORD 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 - .WORD 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b - .WORD 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 - .WORD 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 - .WORD 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 - .WORD 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 - .WORD 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 - .WORD 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f - .WORD 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df - .WORD 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f - .WORD 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e - .WORD 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 - .WORD 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 - .WORD 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c - .WORD 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf - .WORD 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 - .WORD 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f - .WORD 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 - .WORD 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 - .WORD 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 - .BYTE 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 - .BYTE 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb - .BYTE 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 - .BYTE 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb - .BYTE 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d - .BYTE 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e - .BYTE 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 - .BYTE 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 - .BYTE 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 - .BYTE 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 - .BYTE 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda - .BYTE 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 - .BYTE 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a - .BYTE 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 - .BYTE 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 - .BYTE 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b - .BYTE 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea - .BYTE 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 - .BYTE 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 - .BYTE 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e - .BYTE 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 - .BYTE 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b - .BYTE 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 - .BYTE 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 - .BYTE 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 - .BYTE 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f - .BYTE 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d - .BYTE 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef - .BYTE 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 - .BYTE 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 - .BYTE 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 - .BYTE 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d - .STRINGZ "AES for PA-RISC, CRYPTOGAMS by " -___ - -foreach (split("\n",$code)) { - s/\`([^\`]*)\`/eval $1/ge; - - # translate made up instructons: _ror, _srm - s/_ror(\s+)(%r[0-9]+),/shd$1$2,$2,/ or - - s/_srm(\s+%r[0-9]+),([0-9]+),/ - $SIZE_T==4 ? sprintf("extru%s,%d,8,",$1,31-$2) - : sprintf("extrd,u%s,%d,8,",$1,63-$2)/e; - - s/,\*/,/ if ($SIZE_T==4); - s/\bbv\b(.*\(%r2\))/bve$1/ if ($SIZE_T==8); - print $_,"\n"; -} -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aes-ppc.pl b/drivers/builtin_openssl2/crypto/aes/asm/aes-ppc.pl deleted file mode 100644 index 7c52cbe5f9..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/aes-ppc.pl +++ /dev/null @@ -1,1365 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# Needs more work: key setup, CBC routine... -# -# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with -# 128-bit key, which is ~40% better than 64-bit code generated by gcc -# 4.0. But these are not the ones currently used! Their "compact" -# counterparts are, for security reason. ppc_AES_encrypt_compact runs -# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - -# at 1/3 of ppc_AES_decrypt. - -# February 2010 -# -# Rescheduling instructions to favour Power6 pipeline gave 10% -# performance improvement on the platfrom in question (and marginal -# improvement even on others). It should be noted that Power6 fails -# to process byte in 18 cycles, only in 23, because it fails to issue -# 4 load instructions in two cycles, only in 3. As result non-compact -# block subroutines are 25% slower than one would expect. Compact -# functions scale better, because they have pure computational part, -# which scales perfectly with clock frequency. To be specific -# ppc_AES_encrypt_compact operates at 42 cycles per byte, while -# ppc_AES_decrypt_compact - at 55 (in 64-bit build). - -$flavour = shift; - -if ($flavour =~ /64/) { - $SIZE_T =8; - $LRSAVE =2*$SIZE_T; - $STU ="stdu"; - $POP ="ld"; - $PUSH ="std"; -} elsif ($flavour =~ /32/) { - $SIZE_T =4; - $LRSAVE =$SIZE_T; - $STU ="stwu"; - $POP ="lwz"; - $PUSH ="stw"; -} else { die "nonsense $flavour"; } - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; - -$FRAME=32*$SIZE_T; - -sub _data_word() -{ my $i; - while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } -} - -$sp="r1"; -$toc="r2"; -$inp="r3"; -$out="r4"; -$key="r5"; - -$Tbl0="r3"; -$Tbl1="r6"; -$Tbl2="r7"; -$Tbl3="r2"; - -$s0="r8"; -$s1="r9"; -$s2="r10"; -$s3="r11"; - -$t0="r12"; -$t1="r13"; -$t2="r14"; -$t3="r15"; - -$acc00="r16"; -$acc01="r17"; -$acc02="r18"; -$acc03="r19"; - -$acc04="r20"; -$acc05="r21"; -$acc06="r22"; -$acc07="r23"; - -$acc08="r24"; -$acc09="r25"; -$acc10="r26"; -$acc11="r27"; - -$acc12="r28"; -$acc13="r29"; -$acc14="r30"; -$acc15="r31"; - -# stay away from TLS pointer -if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; } -else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; } -$mask80=$Tbl2; -$mask1b=$Tbl3; - -$code.=<<___; -.machine "any" -.text - -.align 7 -LAES_Te: - mflr r0 - bcl 20,31,\$+4 - mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry - addi $Tbl0,$Tbl0,`128-8` - mtlr r0 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - .space `64-9*4` -LAES_Td: - mflr r0 - bcl 20,31,\$+4 - mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry - addi $Tbl0,$Tbl0,`128-64-8+2048+256` - mtlr r0 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - .space `128-64-9*4` -___ -&_data_word( - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); -$code.=<<___; -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -___ -&_data_word( - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); -$code.=<<___; -.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 -.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb -.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 -.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb -.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d -.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e -.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 -.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 -.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 -.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 -.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda -.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 -.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a -.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 -.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 -.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b -.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea -.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 -.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 -.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e -.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 -.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b -.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 -.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 -.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 -.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f -.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d -.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef -.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 -.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 -.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 -.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d - - -.globl .AES_encrypt -.align 7 -.AES_encrypt: - $STU $sp,-$FRAME($sp) - mflr r0 - - $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) - $PUSH r13,`$FRAME-$SIZE_T*19`($sp) - $PUSH r14,`$FRAME-$SIZE_T*18`($sp) - $PUSH r15,`$FRAME-$SIZE_T*17`($sp) - $PUSH r16,`$FRAME-$SIZE_T*16`($sp) - $PUSH r17,`$FRAME-$SIZE_T*15`($sp) - $PUSH r18,`$FRAME-$SIZE_T*14`($sp) - $PUSH r19,`$FRAME-$SIZE_T*13`($sp) - $PUSH r20,`$FRAME-$SIZE_T*12`($sp) - $PUSH r21,`$FRAME-$SIZE_T*11`($sp) - $PUSH r22,`$FRAME-$SIZE_T*10`($sp) - $PUSH r23,`$FRAME-$SIZE_T*9`($sp) - $PUSH r24,`$FRAME-$SIZE_T*8`($sp) - $PUSH r25,`$FRAME-$SIZE_T*7`($sp) - $PUSH r26,`$FRAME-$SIZE_T*6`($sp) - $PUSH r27,`$FRAME-$SIZE_T*5`($sp) - $PUSH r28,`$FRAME-$SIZE_T*4`($sp) - $PUSH r29,`$FRAME-$SIZE_T*3`($sp) - $PUSH r30,`$FRAME-$SIZE_T*2`($sp) - $PUSH r31,`$FRAME-$SIZE_T*1`($sp) - $PUSH r0,`$FRAME+$LRSAVE`($sp) - - andi. $t0,$inp,3 - andi. $t1,$out,3 - or. $t0,$t0,$t1 - bne Lenc_unaligned - -Lenc_unaligned_ok: - lwz $s0,0($inp) - lwz $s1,4($inp) - lwz $s2,8($inp) - lwz $s3,12($inp) - bl LAES_Te - bl Lppc_AES_encrypt_compact - stw $s0,0($out) - stw $s1,4($out) - stw $s2,8($out) - stw $s3,12($out) - b Lenc_done - -Lenc_unaligned: - subfic $t0,$inp,4096 - subfic $t1,$out,4096 - andi. $t0,$t0,4096-16 - beq Lenc_xpage - andi. $t1,$t1,4096-16 - bne Lenc_unaligned_ok - -Lenc_xpage: - lbz $acc00,0($inp) - lbz $acc01,1($inp) - lbz $acc02,2($inp) - lbz $s0,3($inp) - lbz $acc04,4($inp) - lbz $acc05,5($inp) - lbz $acc06,6($inp) - lbz $s1,7($inp) - lbz $acc08,8($inp) - lbz $acc09,9($inp) - lbz $acc10,10($inp) - insrwi $s0,$acc00,8,0 - lbz $s2,11($inp) - insrwi $s1,$acc04,8,0 - lbz $acc12,12($inp) - insrwi $s0,$acc01,8,8 - lbz $acc13,13($inp) - insrwi $s1,$acc05,8,8 - lbz $acc14,14($inp) - insrwi $s0,$acc02,8,16 - lbz $s3,15($inp) - insrwi $s1,$acc06,8,16 - insrwi $s2,$acc08,8,0 - insrwi $s3,$acc12,8,0 - insrwi $s2,$acc09,8,8 - insrwi $s3,$acc13,8,8 - insrwi $s2,$acc10,8,16 - insrwi $s3,$acc14,8,16 - - bl LAES_Te - bl Lppc_AES_encrypt_compact - - extrwi $acc00,$s0,8,0 - extrwi $acc01,$s0,8,8 - stb $acc00,0($out) - extrwi $acc02,$s0,8,16 - stb $acc01,1($out) - stb $acc02,2($out) - extrwi $acc04,$s1,8,0 - stb $s0,3($out) - extrwi $acc05,$s1,8,8 - stb $acc04,4($out) - extrwi $acc06,$s1,8,16 - stb $acc05,5($out) - stb $acc06,6($out) - extrwi $acc08,$s2,8,0 - stb $s1,7($out) - extrwi $acc09,$s2,8,8 - stb $acc08,8($out) - extrwi $acc10,$s2,8,16 - stb $acc09,9($out) - stb $acc10,10($out) - extrwi $acc12,$s3,8,0 - stb $s2,11($out) - extrwi $acc13,$s3,8,8 - stb $acc12,12($out) - extrwi $acc14,$s3,8,16 - stb $acc13,13($out) - stb $acc14,14($out) - stb $s3,15($out) - -Lenc_done: - $POP r0,`$FRAME+$LRSAVE`($sp) - $POP $toc,`$FRAME-$SIZE_T*20`($sp) - $POP r13,`$FRAME-$SIZE_T*19`($sp) - $POP r14,`$FRAME-$SIZE_T*18`($sp) - $POP r15,`$FRAME-$SIZE_T*17`($sp) - $POP r16,`$FRAME-$SIZE_T*16`($sp) - $POP r17,`$FRAME-$SIZE_T*15`($sp) - $POP r18,`$FRAME-$SIZE_T*14`($sp) - $POP r19,`$FRAME-$SIZE_T*13`($sp) - $POP r20,`$FRAME-$SIZE_T*12`($sp) - $POP r21,`$FRAME-$SIZE_T*11`($sp) - $POP r22,`$FRAME-$SIZE_T*10`($sp) - $POP r23,`$FRAME-$SIZE_T*9`($sp) - $POP r24,`$FRAME-$SIZE_T*8`($sp) - $POP r25,`$FRAME-$SIZE_T*7`($sp) - $POP r26,`$FRAME-$SIZE_T*6`($sp) - $POP r27,`$FRAME-$SIZE_T*5`($sp) - $POP r28,`$FRAME-$SIZE_T*4`($sp) - $POP r29,`$FRAME-$SIZE_T*3`($sp) - $POP r30,`$FRAME-$SIZE_T*2`($sp) - $POP r31,`$FRAME-$SIZE_T*1`($sp) - mtlr r0 - addi $sp,$sp,$FRAME - blr - .long 0 - .byte 0,12,4,1,0x80,18,3,0 - .long 0 - -.align 5 -Lppc_AES_encrypt: - lwz $acc00,240($key) - addi $Tbl1,$Tbl0,3 - lwz $t0,0($key) - addi $Tbl2,$Tbl0,2 - lwz $t1,4($key) - addi $Tbl3,$Tbl0,1 - lwz $t2,8($key) - addi $acc00,$acc00,-1 - lwz $t3,12($key) - addi $key,$key,16 - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - mtctr $acc00 -.align 4 -Lenc_loop: - rlwinm $acc00,$s0,`32-24+3`,21,28 - rlwinm $acc01,$s1,`32-24+3`,21,28 - rlwinm $acc02,$s2,`32-24+3`,21,28 - rlwinm $acc03,$s3,`32-24+3`,21,28 - lwz $t0,0($key) - rlwinm $acc04,$s1,`32-16+3`,21,28 - lwz $t1,4($key) - rlwinm $acc05,$s2,`32-16+3`,21,28 - lwz $t2,8($key) - rlwinm $acc06,$s3,`32-16+3`,21,28 - lwz $t3,12($key) - rlwinm $acc07,$s0,`32-16+3`,21,28 - lwzx $acc00,$Tbl0,$acc00 - rlwinm $acc08,$s2,`32-8+3`,21,28 - lwzx $acc01,$Tbl0,$acc01 - rlwinm $acc09,$s3,`32-8+3`,21,28 - lwzx $acc02,$Tbl0,$acc02 - rlwinm $acc10,$s0,`32-8+3`,21,28 - lwzx $acc03,$Tbl0,$acc03 - rlwinm $acc11,$s1,`32-8+3`,21,28 - lwzx $acc04,$Tbl1,$acc04 - rlwinm $acc12,$s3,`0+3`,21,28 - lwzx $acc05,$Tbl1,$acc05 - rlwinm $acc13,$s0,`0+3`,21,28 - lwzx $acc06,$Tbl1,$acc06 - rlwinm $acc14,$s1,`0+3`,21,28 - lwzx $acc07,$Tbl1,$acc07 - rlwinm $acc15,$s2,`0+3`,21,28 - lwzx $acc08,$Tbl2,$acc08 - xor $t0,$t0,$acc00 - lwzx $acc09,$Tbl2,$acc09 - xor $t1,$t1,$acc01 - lwzx $acc10,$Tbl2,$acc10 - xor $t2,$t2,$acc02 - lwzx $acc11,$Tbl2,$acc11 - xor $t3,$t3,$acc03 - lwzx $acc12,$Tbl3,$acc12 - xor $t0,$t0,$acc04 - lwzx $acc13,$Tbl3,$acc13 - xor $t1,$t1,$acc05 - lwzx $acc14,$Tbl3,$acc14 - xor $t2,$t2,$acc06 - lwzx $acc15,$Tbl3,$acc15 - xor $t3,$t3,$acc07 - xor $t0,$t0,$acc08 - xor $t1,$t1,$acc09 - xor $t2,$t2,$acc10 - xor $t3,$t3,$acc11 - xor $s0,$t0,$acc12 - xor $s1,$t1,$acc13 - xor $s2,$t2,$acc14 - xor $s3,$t3,$acc15 - addi $key,$key,16 - bdnz- Lenc_loop - - addi $Tbl2,$Tbl0,2048 - nop - lwz $t0,0($key) - rlwinm $acc00,$s0,`32-24`,24,31 - lwz $t1,4($key) - rlwinm $acc01,$s1,`32-24`,24,31 - lwz $t2,8($key) - rlwinm $acc02,$s2,`32-24`,24,31 - lwz $t3,12($key) - rlwinm $acc03,$s3,`32-24`,24,31 - lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 - rlwinm $acc04,$s1,`32-16`,24,31 - lwz $acc09,`2048+32`($Tbl0) - rlwinm $acc05,$s2,`32-16`,24,31 - lwz $acc10,`2048+64`($Tbl0) - rlwinm $acc06,$s3,`32-16`,24,31 - lwz $acc11,`2048+96`($Tbl0) - rlwinm $acc07,$s0,`32-16`,24,31 - lwz $acc12,`2048+128`($Tbl0) - rlwinm $acc08,$s2,`32-8`,24,31 - lwz $acc13,`2048+160`($Tbl0) - rlwinm $acc09,$s3,`32-8`,24,31 - lwz $acc14,`2048+192`($Tbl0) - rlwinm $acc10,$s0,`32-8`,24,31 - lwz $acc15,`2048+224`($Tbl0) - rlwinm $acc11,$s1,`32-8`,24,31 - lbzx $acc00,$Tbl2,$acc00 - rlwinm $acc12,$s3,`0`,24,31 - lbzx $acc01,$Tbl2,$acc01 - rlwinm $acc13,$s0,`0`,24,31 - lbzx $acc02,$Tbl2,$acc02 - rlwinm $acc14,$s1,`0`,24,31 - lbzx $acc03,$Tbl2,$acc03 - rlwinm $acc15,$s2,`0`,24,31 - lbzx $acc04,$Tbl2,$acc04 - rlwinm $s0,$acc00,24,0,7 - lbzx $acc05,$Tbl2,$acc05 - rlwinm $s1,$acc01,24,0,7 - lbzx $acc06,$Tbl2,$acc06 - rlwinm $s2,$acc02,24,0,7 - lbzx $acc07,$Tbl2,$acc07 - rlwinm $s3,$acc03,24,0,7 - lbzx $acc08,$Tbl2,$acc08 - rlwimi $s0,$acc04,16,8,15 - lbzx $acc09,$Tbl2,$acc09 - rlwimi $s1,$acc05,16,8,15 - lbzx $acc10,$Tbl2,$acc10 - rlwimi $s2,$acc06,16,8,15 - lbzx $acc11,$Tbl2,$acc11 - rlwimi $s3,$acc07,16,8,15 - lbzx $acc12,$Tbl2,$acc12 - rlwimi $s0,$acc08,8,16,23 - lbzx $acc13,$Tbl2,$acc13 - rlwimi $s1,$acc09,8,16,23 - lbzx $acc14,$Tbl2,$acc14 - rlwimi $s2,$acc10,8,16,23 - lbzx $acc15,$Tbl2,$acc15 - rlwimi $s3,$acc11,8,16,23 - or $s0,$s0,$acc12 - or $s1,$s1,$acc13 - or $s2,$s2,$acc14 - or $s3,$s3,$acc15 - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - -.align 4 -Lppc_AES_encrypt_compact: - lwz $acc00,240($key) - addi $Tbl1,$Tbl0,2048 - lwz $t0,0($key) - lis $mask80,0x8080 - lwz $t1,4($key) - lis $mask1b,0x1b1b - lwz $t2,8($key) - ori $mask80,$mask80,0x8080 - lwz $t3,12($key) - ori $mask1b,$mask1b,0x1b1b - addi $key,$key,16 - mtctr $acc00 -.align 4 -Lenc_compact_loop: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - rlwinm $acc00,$s0,`32-24`,24,31 - xor $s2,$s2,$t2 - rlwinm $acc01,$s1,`32-24`,24,31 - xor $s3,$s3,$t3 - rlwinm $acc02,$s2,`32-24`,24,31 - rlwinm $acc03,$s3,`32-24`,24,31 - rlwinm $acc04,$s1,`32-16`,24,31 - rlwinm $acc05,$s2,`32-16`,24,31 - rlwinm $acc06,$s3,`32-16`,24,31 - rlwinm $acc07,$s0,`32-16`,24,31 - lbzx $acc00,$Tbl1,$acc00 - rlwinm $acc08,$s2,`32-8`,24,31 - lbzx $acc01,$Tbl1,$acc01 - rlwinm $acc09,$s3,`32-8`,24,31 - lbzx $acc02,$Tbl1,$acc02 - rlwinm $acc10,$s0,`32-8`,24,31 - lbzx $acc03,$Tbl1,$acc03 - rlwinm $acc11,$s1,`32-8`,24,31 - lbzx $acc04,$Tbl1,$acc04 - rlwinm $acc12,$s3,`0`,24,31 - lbzx $acc05,$Tbl1,$acc05 - rlwinm $acc13,$s0,`0`,24,31 - lbzx $acc06,$Tbl1,$acc06 - rlwinm $acc14,$s1,`0`,24,31 - lbzx $acc07,$Tbl1,$acc07 - rlwinm $acc15,$s2,`0`,24,31 - lbzx $acc08,$Tbl1,$acc08 - rlwinm $s0,$acc00,24,0,7 - lbzx $acc09,$Tbl1,$acc09 - rlwinm $s1,$acc01,24,0,7 - lbzx $acc10,$Tbl1,$acc10 - rlwinm $s2,$acc02,24,0,7 - lbzx $acc11,$Tbl1,$acc11 - rlwinm $s3,$acc03,24,0,7 - lbzx $acc12,$Tbl1,$acc12 - rlwimi $s0,$acc04,16,8,15 - lbzx $acc13,$Tbl1,$acc13 - rlwimi $s1,$acc05,16,8,15 - lbzx $acc14,$Tbl1,$acc14 - rlwimi $s2,$acc06,16,8,15 - lbzx $acc15,$Tbl1,$acc15 - rlwimi $s3,$acc07,16,8,15 - rlwimi $s0,$acc08,8,16,23 - rlwimi $s1,$acc09,8,16,23 - rlwimi $s2,$acc10,8,16,23 - rlwimi $s3,$acc11,8,16,23 - lwz $t0,0($key) - or $s0,$s0,$acc12 - lwz $t1,4($key) - or $s1,$s1,$acc13 - lwz $t2,8($key) - or $s2,$s2,$acc14 - lwz $t3,12($key) - or $s3,$s3,$acc15 - - addi $key,$key,16 - bdz Lenc_compact_done - - and $acc00,$s0,$mask80 # r1=r0&0x80808080 - and $acc01,$s1,$mask80 - and $acc02,$s2,$mask80 - and $acc03,$s3,$mask80 - srwi $acc04,$acc00,7 # r1>>7 - andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f - srwi $acc05,$acc01,7 - andc $acc09,$s1,$mask80 - srwi $acc06,$acc02,7 - andc $acc10,$s2,$mask80 - srwi $acc07,$acc03,7 - andc $acc11,$s3,$mask80 - sub $acc00,$acc00,$acc04 # r1-(r1>>7) - sub $acc01,$acc01,$acc05 - sub $acc02,$acc02,$acc06 - sub $acc03,$acc03,$acc07 - add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 - add $acc09,$acc09,$acc09 - add $acc10,$acc10,$acc10 - add $acc11,$acc11,$acc11 - and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc01,$acc01,$mask1b - and $acc02,$acc02,$mask1b - and $acc03,$acc03,$mask1b - xor $acc00,$acc00,$acc08 # r2 - xor $acc01,$acc01,$acc09 - rotlwi $acc12,$s0,16 # ROTATE(r0,16) - xor $acc02,$acc02,$acc10 - rotlwi $acc13,$s1,16 - xor $acc03,$acc03,$acc11 - rotlwi $acc14,$s2,16 - - xor $s0,$s0,$acc00 # r0^r2 - rotlwi $acc15,$s3,16 - xor $s1,$s1,$acc01 - rotrwi $s0,$s0,24 # ROTATE(r2^r0,24) - xor $s2,$s2,$acc02 - rotrwi $s1,$s1,24 - xor $s3,$s3,$acc03 - rotrwi $s2,$s2,24 - xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2 - rotrwi $s3,$s3,24 - xor $s1,$s1,$acc01 - xor $s2,$s2,$acc02 - xor $s3,$s3,$acc03 - rotlwi $acc08,$acc12,8 # ROTATE(r0,24) - xor $s0,$s0,$acc12 # - rotlwi $acc09,$acc13,8 - xor $s1,$s1,$acc13 - rotlwi $acc10,$acc14,8 - xor $s2,$s2,$acc14 - rotlwi $acc11,$acc15,8 - xor $s3,$s3,$acc15 - xor $s0,$s0,$acc08 # - xor $s1,$s1,$acc09 - xor $s2,$s2,$acc10 - xor $s3,$s3,$acc11 - - b Lenc_compact_loop -.align 4 -Lenc_compact_done: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - -.globl .AES_decrypt -.align 7 -.AES_decrypt: - $STU $sp,-$FRAME($sp) - mflr r0 - - $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) - $PUSH r13,`$FRAME-$SIZE_T*19`($sp) - $PUSH r14,`$FRAME-$SIZE_T*18`($sp) - $PUSH r15,`$FRAME-$SIZE_T*17`($sp) - $PUSH r16,`$FRAME-$SIZE_T*16`($sp) - $PUSH r17,`$FRAME-$SIZE_T*15`($sp) - $PUSH r18,`$FRAME-$SIZE_T*14`($sp) - $PUSH r19,`$FRAME-$SIZE_T*13`($sp) - $PUSH r20,`$FRAME-$SIZE_T*12`($sp) - $PUSH r21,`$FRAME-$SIZE_T*11`($sp) - $PUSH r22,`$FRAME-$SIZE_T*10`($sp) - $PUSH r23,`$FRAME-$SIZE_T*9`($sp) - $PUSH r24,`$FRAME-$SIZE_T*8`($sp) - $PUSH r25,`$FRAME-$SIZE_T*7`($sp) - $PUSH r26,`$FRAME-$SIZE_T*6`($sp) - $PUSH r27,`$FRAME-$SIZE_T*5`($sp) - $PUSH r28,`$FRAME-$SIZE_T*4`($sp) - $PUSH r29,`$FRAME-$SIZE_T*3`($sp) - $PUSH r30,`$FRAME-$SIZE_T*2`($sp) - $PUSH r31,`$FRAME-$SIZE_T*1`($sp) - $PUSH r0,`$FRAME+$LRSAVE`($sp) - - andi. $t0,$inp,3 - andi. $t1,$out,3 - or. $t0,$t0,$t1 - bne Ldec_unaligned - -Ldec_unaligned_ok: - lwz $s0,0($inp) - lwz $s1,4($inp) - lwz $s2,8($inp) - lwz $s3,12($inp) - bl LAES_Td - bl Lppc_AES_decrypt_compact - stw $s0,0($out) - stw $s1,4($out) - stw $s2,8($out) - stw $s3,12($out) - b Ldec_done - -Ldec_unaligned: - subfic $t0,$inp,4096 - subfic $t1,$out,4096 - andi. $t0,$t0,4096-16 - beq Ldec_xpage - andi. $t1,$t1,4096-16 - bne Ldec_unaligned_ok - -Ldec_xpage: - lbz $acc00,0($inp) - lbz $acc01,1($inp) - lbz $acc02,2($inp) - lbz $s0,3($inp) - lbz $acc04,4($inp) - lbz $acc05,5($inp) - lbz $acc06,6($inp) - lbz $s1,7($inp) - lbz $acc08,8($inp) - lbz $acc09,9($inp) - lbz $acc10,10($inp) - insrwi $s0,$acc00,8,0 - lbz $s2,11($inp) - insrwi $s1,$acc04,8,0 - lbz $acc12,12($inp) - insrwi $s0,$acc01,8,8 - lbz $acc13,13($inp) - insrwi $s1,$acc05,8,8 - lbz $acc14,14($inp) - insrwi $s0,$acc02,8,16 - lbz $s3,15($inp) - insrwi $s1,$acc06,8,16 - insrwi $s2,$acc08,8,0 - insrwi $s3,$acc12,8,0 - insrwi $s2,$acc09,8,8 - insrwi $s3,$acc13,8,8 - insrwi $s2,$acc10,8,16 - insrwi $s3,$acc14,8,16 - - bl LAES_Td - bl Lppc_AES_decrypt_compact - - extrwi $acc00,$s0,8,0 - extrwi $acc01,$s0,8,8 - stb $acc00,0($out) - extrwi $acc02,$s0,8,16 - stb $acc01,1($out) - stb $acc02,2($out) - extrwi $acc04,$s1,8,0 - stb $s0,3($out) - extrwi $acc05,$s1,8,8 - stb $acc04,4($out) - extrwi $acc06,$s1,8,16 - stb $acc05,5($out) - stb $acc06,6($out) - extrwi $acc08,$s2,8,0 - stb $s1,7($out) - extrwi $acc09,$s2,8,8 - stb $acc08,8($out) - extrwi $acc10,$s2,8,16 - stb $acc09,9($out) - stb $acc10,10($out) - extrwi $acc12,$s3,8,0 - stb $s2,11($out) - extrwi $acc13,$s3,8,8 - stb $acc12,12($out) - extrwi $acc14,$s3,8,16 - stb $acc13,13($out) - stb $acc14,14($out) - stb $s3,15($out) - -Ldec_done: - $POP r0,`$FRAME+$LRSAVE`($sp) - $POP $toc,`$FRAME-$SIZE_T*20`($sp) - $POP r13,`$FRAME-$SIZE_T*19`($sp) - $POP r14,`$FRAME-$SIZE_T*18`($sp) - $POP r15,`$FRAME-$SIZE_T*17`($sp) - $POP r16,`$FRAME-$SIZE_T*16`($sp) - $POP r17,`$FRAME-$SIZE_T*15`($sp) - $POP r18,`$FRAME-$SIZE_T*14`($sp) - $POP r19,`$FRAME-$SIZE_T*13`($sp) - $POP r20,`$FRAME-$SIZE_T*12`($sp) - $POP r21,`$FRAME-$SIZE_T*11`($sp) - $POP r22,`$FRAME-$SIZE_T*10`($sp) - $POP r23,`$FRAME-$SIZE_T*9`($sp) - $POP r24,`$FRAME-$SIZE_T*8`($sp) - $POP r25,`$FRAME-$SIZE_T*7`($sp) - $POP r26,`$FRAME-$SIZE_T*6`($sp) - $POP r27,`$FRAME-$SIZE_T*5`($sp) - $POP r28,`$FRAME-$SIZE_T*4`($sp) - $POP r29,`$FRAME-$SIZE_T*3`($sp) - $POP r30,`$FRAME-$SIZE_T*2`($sp) - $POP r31,`$FRAME-$SIZE_T*1`($sp) - mtlr r0 - addi $sp,$sp,$FRAME - blr - .long 0 - .byte 0,12,4,1,0x80,18,3,0 - .long 0 - -.align 5 -Lppc_AES_decrypt: - lwz $acc00,240($key) - addi $Tbl1,$Tbl0,3 - lwz $t0,0($key) - addi $Tbl2,$Tbl0,2 - lwz $t1,4($key) - addi $Tbl3,$Tbl0,1 - lwz $t2,8($key) - addi $acc00,$acc00,-1 - lwz $t3,12($key) - addi $key,$key,16 - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - mtctr $acc00 -.align 4 -Ldec_loop: - rlwinm $acc00,$s0,`32-24+3`,21,28 - rlwinm $acc01,$s1,`32-24+3`,21,28 - rlwinm $acc02,$s2,`32-24+3`,21,28 - rlwinm $acc03,$s3,`32-24+3`,21,28 - lwz $t0,0($key) - rlwinm $acc04,$s3,`32-16+3`,21,28 - lwz $t1,4($key) - rlwinm $acc05,$s0,`32-16+3`,21,28 - lwz $t2,8($key) - rlwinm $acc06,$s1,`32-16+3`,21,28 - lwz $t3,12($key) - rlwinm $acc07,$s2,`32-16+3`,21,28 - lwzx $acc00,$Tbl0,$acc00 - rlwinm $acc08,$s2,`32-8+3`,21,28 - lwzx $acc01,$Tbl0,$acc01 - rlwinm $acc09,$s3,`32-8+3`,21,28 - lwzx $acc02,$Tbl0,$acc02 - rlwinm $acc10,$s0,`32-8+3`,21,28 - lwzx $acc03,$Tbl0,$acc03 - rlwinm $acc11,$s1,`32-8+3`,21,28 - lwzx $acc04,$Tbl1,$acc04 - rlwinm $acc12,$s1,`0+3`,21,28 - lwzx $acc05,$Tbl1,$acc05 - rlwinm $acc13,$s2,`0+3`,21,28 - lwzx $acc06,$Tbl1,$acc06 - rlwinm $acc14,$s3,`0+3`,21,28 - lwzx $acc07,$Tbl1,$acc07 - rlwinm $acc15,$s0,`0+3`,21,28 - lwzx $acc08,$Tbl2,$acc08 - xor $t0,$t0,$acc00 - lwzx $acc09,$Tbl2,$acc09 - xor $t1,$t1,$acc01 - lwzx $acc10,$Tbl2,$acc10 - xor $t2,$t2,$acc02 - lwzx $acc11,$Tbl2,$acc11 - xor $t3,$t3,$acc03 - lwzx $acc12,$Tbl3,$acc12 - xor $t0,$t0,$acc04 - lwzx $acc13,$Tbl3,$acc13 - xor $t1,$t1,$acc05 - lwzx $acc14,$Tbl3,$acc14 - xor $t2,$t2,$acc06 - lwzx $acc15,$Tbl3,$acc15 - xor $t3,$t3,$acc07 - xor $t0,$t0,$acc08 - xor $t1,$t1,$acc09 - xor $t2,$t2,$acc10 - xor $t3,$t3,$acc11 - xor $s0,$t0,$acc12 - xor $s1,$t1,$acc13 - xor $s2,$t2,$acc14 - xor $s3,$t3,$acc15 - addi $key,$key,16 - bdnz- Ldec_loop - - addi $Tbl2,$Tbl0,2048 - nop - lwz $t0,0($key) - rlwinm $acc00,$s0,`32-24`,24,31 - lwz $t1,4($key) - rlwinm $acc01,$s1,`32-24`,24,31 - lwz $t2,8($key) - rlwinm $acc02,$s2,`32-24`,24,31 - lwz $t3,12($key) - rlwinm $acc03,$s3,`32-24`,24,31 - lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 - rlwinm $acc04,$s3,`32-16`,24,31 - lwz $acc09,`2048+32`($Tbl0) - rlwinm $acc05,$s0,`32-16`,24,31 - lwz $acc10,`2048+64`($Tbl0) - lbzx $acc00,$Tbl2,$acc00 - lwz $acc11,`2048+96`($Tbl0) - lbzx $acc01,$Tbl2,$acc01 - lwz $acc12,`2048+128`($Tbl0) - rlwinm $acc06,$s1,`32-16`,24,31 - lwz $acc13,`2048+160`($Tbl0) - rlwinm $acc07,$s2,`32-16`,24,31 - lwz $acc14,`2048+192`($Tbl0) - rlwinm $acc08,$s2,`32-8`,24,31 - lwz $acc15,`2048+224`($Tbl0) - rlwinm $acc09,$s3,`32-8`,24,31 - lbzx $acc02,$Tbl2,$acc02 - rlwinm $acc10,$s0,`32-8`,24,31 - lbzx $acc03,$Tbl2,$acc03 - rlwinm $acc11,$s1,`32-8`,24,31 - lbzx $acc04,$Tbl2,$acc04 - rlwinm $acc12,$s1,`0`,24,31 - lbzx $acc05,$Tbl2,$acc05 - rlwinm $acc13,$s2,`0`,24,31 - lbzx $acc06,$Tbl2,$acc06 - rlwinm $acc14,$s3,`0`,24,31 - lbzx $acc07,$Tbl2,$acc07 - rlwinm $acc15,$s0,`0`,24,31 - lbzx $acc08,$Tbl2,$acc08 - rlwinm $s0,$acc00,24,0,7 - lbzx $acc09,$Tbl2,$acc09 - rlwinm $s1,$acc01,24,0,7 - lbzx $acc10,$Tbl2,$acc10 - rlwinm $s2,$acc02,24,0,7 - lbzx $acc11,$Tbl2,$acc11 - rlwinm $s3,$acc03,24,0,7 - lbzx $acc12,$Tbl2,$acc12 - rlwimi $s0,$acc04,16,8,15 - lbzx $acc13,$Tbl2,$acc13 - rlwimi $s1,$acc05,16,8,15 - lbzx $acc14,$Tbl2,$acc14 - rlwimi $s2,$acc06,16,8,15 - lbzx $acc15,$Tbl2,$acc15 - rlwimi $s3,$acc07,16,8,15 - rlwimi $s0,$acc08,8,16,23 - rlwimi $s1,$acc09,8,16,23 - rlwimi $s2,$acc10,8,16,23 - rlwimi $s3,$acc11,8,16,23 - or $s0,$s0,$acc12 - or $s1,$s1,$acc13 - or $s2,$s2,$acc14 - or $s3,$s3,$acc15 - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - -.align 4 -Lppc_AES_decrypt_compact: - lwz $acc00,240($key) - addi $Tbl1,$Tbl0,2048 - lwz $t0,0($key) - lis $mask80,0x8080 - lwz $t1,4($key) - lis $mask1b,0x1b1b - lwz $t2,8($key) - ori $mask80,$mask80,0x8080 - lwz $t3,12($key) - ori $mask1b,$mask1b,0x1b1b - addi $key,$key,16 -___ -$code.=<<___ if ($SIZE_T==8); - insrdi $mask80,$mask80,32,0 - insrdi $mask1b,$mask1b,32,0 -___ -$code.=<<___; - mtctr $acc00 -.align 4 -Ldec_compact_loop: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - rlwinm $acc00,$s0,`32-24`,24,31 - xor $s2,$s2,$t2 - rlwinm $acc01,$s1,`32-24`,24,31 - xor $s3,$s3,$t3 - rlwinm $acc02,$s2,`32-24`,24,31 - rlwinm $acc03,$s3,`32-24`,24,31 - rlwinm $acc04,$s3,`32-16`,24,31 - rlwinm $acc05,$s0,`32-16`,24,31 - rlwinm $acc06,$s1,`32-16`,24,31 - rlwinm $acc07,$s2,`32-16`,24,31 - lbzx $acc00,$Tbl1,$acc00 - rlwinm $acc08,$s2,`32-8`,24,31 - lbzx $acc01,$Tbl1,$acc01 - rlwinm $acc09,$s3,`32-8`,24,31 - lbzx $acc02,$Tbl1,$acc02 - rlwinm $acc10,$s0,`32-8`,24,31 - lbzx $acc03,$Tbl1,$acc03 - rlwinm $acc11,$s1,`32-8`,24,31 - lbzx $acc04,$Tbl1,$acc04 - rlwinm $acc12,$s1,`0`,24,31 - lbzx $acc05,$Tbl1,$acc05 - rlwinm $acc13,$s2,`0`,24,31 - lbzx $acc06,$Tbl1,$acc06 - rlwinm $acc14,$s3,`0`,24,31 - lbzx $acc07,$Tbl1,$acc07 - rlwinm $acc15,$s0,`0`,24,31 - lbzx $acc08,$Tbl1,$acc08 - rlwinm $s0,$acc00,24,0,7 - lbzx $acc09,$Tbl1,$acc09 - rlwinm $s1,$acc01,24,0,7 - lbzx $acc10,$Tbl1,$acc10 - rlwinm $s2,$acc02,24,0,7 - lbzx $acc11,$Tbl1,$acc11 - rlwinm $s3,$acc03,24,0,7 - lbzx $acc12,$Tbl1,$acc12 - rlwimi $s0,$acc04,16,8,15 - lbzx $acc13,$Tbl1,$acc13 - rlwimi $s1,$acc05,16,8,15 - lbzx $acc14,$Tbl1,$acc14 - rlwimi $s2,$acc06,16,8,15 - lbzx $acc15,$Tbl1,$acc15 - rlwimi $s3,$acc07,16,8,15 - rlwimi $s0,$acc08,8,16,23 - rlwimi $s1,$acc09,8,16,23 - rlwimi $s2,$acc10,8,16,23 - rlwimi $s3,$acc11,8,16,23 - lwz $t0,0($key) - or $s0,$s0,$acc12 - lwz $t1,4($key) - or $s1,$s1,$acc13 - lwz $t2,8($key) - or $s2,$s2,$acc14 - lwz $t3,12($key) - or $s3,$s3,$acc15 - - addi $key,$key,16 - bdz Ldec_compact_done -___ -$code.=<<___ if ($SIZE_T==8); - # vectorized permutation improves decrypt performance by 10% - insrdi $s0,$s1,32,0 - insrdi $s2,$s3,32,0 - - and $acc00,$s0,$mask80 # r1=r0&0x80808080 - and $acc02,$s2,$mask80 - srdi $acc04,$acc00,7 # r1>>7 - srdi $acc06,$acc02,7 - andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f - andc $acc10,$s2,$mask80 - sub $acc00,$acc00,$acc04 # r1-(r1>>7) - sub $acc02,$acc02,$acc06 - add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 - add $acc10,$acc10,$acc10 - and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc02,$acc02,$mask1b - xor $acc00,$acc00,$acc08 # r2 - xor $acc02,$acc02,$acc10 - - and $acc04,$acc00,$mask80 # r1=r2&0x80808080 - and $acc06,$acc02,$mask80 - srdi $acc08,$acc04,7 # r1>>7 - srdi $acc10,$acc06,7 - andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f - andc $acc14,$acc02,$mask80 - sub $acc04,$acc04,$acc08 # r1-(r1>>7) - sub $acc06,$acc06,$acc10 - add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 - add $acc14,$acc14,$acc14 - and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc06,$acc06,$mask1b - xor $acc04,$acc04,$acc12 # r4 - xor $acc06,$acc06,$acc14 - - and $acc08,$acc04,$mask80 # r1=r4&0x80808080 - and $acc10,$acc06,$mask80 - srdi $acc12,$acc08,7 # r1>>7 - srdi $acc14,$acc10,7 - sub $acc08,$acc08,$acc12 # r1-(r1>>7) - sub $acc10,$acc10,$acc14 - andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f - andc $acc14,$acc06,$mask80 - add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 - add $acc14,$acc14,$acc14 - and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc10,$acc10,$mask1b - xor $acc08,$acc08,$acc12 # r8 - xor $acc10,$acc10,$acc14 - - xor $acc00,$acc00,$s0 # r2^r0 - xor $acc02,$acc02,$s2 - xor $acc04,$acc04,$s0 # r4^r0 - xor $acc06,$acc06,$s2 - - extrdi $acc01,$acc00,32,0 - extrdi $acc03,$acc02,32,0 - extrdi $acc05,$acc04,32,0 - extrdi $acc07,$acc06,32,0 - extrdi $acc09,$acc08,32,0 - extrdi $acc11,$acc10,32,0 -___ -$code.=<<___ if ($SIZE_T==4); - and $acc00,$s0,$mask80 # r1=r0&0x80808080 - and $acc01,$s1,$mask80 - and $acc02,$s2,$mask80 - and $acc03,$s3,$mask80 - srwi $acc04,$acc00,7 # r1>>7 - andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f - srwi $acc05,$acc01,7 - andc $acc09,$s1,$mask80 - srwi $acc06,$acc02,7 - andc $acc10,$s2,$mask80 - srwi $acc07,$acc03,7 - andc $acc11,$s3,$mask80 - sub $acc00,$acc00,$acc04 # r1-(r1>>7) - sub $acc01,$acc01,$acc05 - sub $acc02,$acc02,$acc06 - sub $acc03,$acc03,$acc07 - add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 - add $acc09,$acc09,$acc09 - add $acc10,$acc10,$acc10 - add $acc11,$acc11,$acc11 - and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc01,$acc01,$mask1b - and $acc02,$acc02,$mask1b - and $acc03,$acc03,$mask1b - xor $acc00,$acc00,$acc08 # r2 - xor $acc01,$acc01,$acc09 - xor $acc02,$acc02,$acc10 - xor $acc03,$acc03,$acc11 - - and $acc04,$acc00,$mask80 # r1=r2&0x80808080 - and $acc05,$acc01,$mask80 - and $acc06,$acc02,$mask80 - and $acc07,$acc03,$mask80 - srwi $acc08,$acc04,7 # r1>>7 - andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f - srwi $acc09,$acc05,7 - andc $acc13,$acc01,$mask80 - srwi $acc10,$acc06,7 - andc $acc14,$acc02,$mask80 - srwi $acc11,$acc07,7 - andc $acc15,$acc03,$mask80 - sub $acc04,$acc04,$acc08 # r1-(r1>>7) - sub $acc05,$acc05,$acc09 - sub $acc06,$acc06,$acc10 - sub $acc07,$acc07,$acc11 - add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 - add $acc13,$acc13,$acc13 - add $acc14,$acc14,$acc14 - add $acc15,$acc15,$acc15 - and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc05,$acc05,$mask1b - and $acc06,$acc06,$mask1b - and $acc07,$acc07,$mask1b - xor $acc04,$acc04,$acc12 # r4 - xor $acc05,$acc05,$acc13 - xor $acc06,$acc06,$acc14 - xor $acc07,$acc07,$acc15 - - and $acc08,$acc04,$mask80 # r1=r4&0x80808080 - and $acc09,$acc05,$mask80 - srwi $acc12,$acc08,7 # r1>>7 - and $acc10,$acc06,$mask80 - srwi $acc13,$acc09,7 - and $acc11,$acc07,$mask80 - srwi $acc14,$acc10,7 - sub $acc08,$acc08,$acc12 # r1-(r1>>7) - srwi $acc15,$acc11,7 - sub $acc09,$acc09,$acc13 - sub $acc10,$acc10,$acc14 - sub $acc11,$acc11,$acc15 - andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f - andc $acc13,$acc05,$mask80 - andc $acc14,$acc06,$mask80 - andc $acc15,$acc07,$mask80 - add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 - add $acc13,$acc13,$acc13 - add $acc14,$acc14,$acc14 - add $acc15,$acc15,$acc15 - and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc09,$acc09,$mask1b - and $acc10,$acc10,$mask1b - and $acc11,$acc11,$mask1b - xor $acc08,$acc08,$acc12 # r8 - xor $acc09,$acc09,$acc13 - xor $acc10,$acc10,$acc14 - xor $acc11,$acc11,$acc15 - - xor $acc00,$acc00,$s0 # r2^r0 - xor $acc01,$acc01,$s1 - xor $acc02,$acc02,$s2 - xor $acc03,$acc03,$s3 - xor $acc04,$acc04,$s0 # r4^r0 - xor $acc05,$acc05,$s1 - xor $acc06,$acc06,$s2 - xor $acc07,$acc07,$s3 -___ -$code.=<<___; - rotrwi $s0,$s0,8 # = ROTATE(r0,8) - rotrwi $s1,$s1,8 - xor $s0,$s0,$acc00 # ^= r2^r0 - rotrwi $s2,$s2,8 - xor $s1,$s1,$acc01 - rotrwi $s3,$s3,8 - xor $s2,$s2,$acc02 - xor $s3,$s3,$acc03 - xor $acc00,$acc00,$acc08 - xor $acc01,$acc01,$acc09 - xor $acc02,$acc02,$acc10 - xor $acc03,$acc03,$acc11 - xor $s0,$s0,$acc04 # ^= r4^r0 - rotrwi $acc00,$acc00,24 - xor $s1,$s1,$acc05 - rotrwi $acc01,$acc01,24 - xor $s2,$s2,$acc06 - rotrwi $acc02,$acc02,24 - xor $s3,$s3,$acc07 - rotrwi $acc03,$acc03,24 - xor $acc04,$acc04,$acc08 - xor $acc05,$acc05,$acc09 - xor $acc06,$acc06,$acc10 - xor $acc07,$acc07,$acc11 - xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)] - rotrwi $acc04,$acc04,16 - xor $s1,$s1,$acc09 - rotrwi $acc05,$acc05,16 - xor $s2,$s2,$acc10 - rotrwi $acc06,$acc06,16 - xor $s3,$s3,$acc11 - rotrwi $acc07,$acc07,16 - xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24) - rotrwi $acc08,$acc08,8 - xor $s1,$s1,$acc01 - rotrwi $acc09,$acc09,8 - xor $s2,$s2,$acc02 - rotrwi $acc10,$acc10,8 - xor $s3,$s3,$acc03 - rotrwi $acc11,$acc11,8 - xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16) - xor $s1,$s1,$acc05 - xor $s2,$s2,$acc06 - xor $s3,$s3,$acc07 - xor $s0,$s0,$acc08 # ^= ROTATE(r8,8) - xor $s1,$s1,$acc09 - xor $s2,$s2,$acc10 - xor $s3,$s3,$acc11 - - b Ldec_compact_loop -.align 4 -Ldec_compact_done: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - -.asciz "AES for PPC, CRYPTOGAMS by " -.align 7 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aes-s390x.pl b/drivers/builtin_openssl2/crypto/aes/asm/aes-s390x.pl deleted file mode 100644 index e75dcd0315..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/aes-s390x.pl +++ /dev/null @@ -1,2237 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# AES for s390x. - -# April 2007. -# -# Software performance improvement over gcc-generated code is ~70% and -# in absolute terms is ~73 cycles per byte processed with 128-bit key. -# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are -# *strictly* in-order execution and issued instruction [in this case -# load value from memory is critical] has to complete before execution -# flow proceeds. S-boxes are compressed to 2KB[+256B]. -# -# As for hardware acceleration support. It's basically a "teaser," as -# it can and should be improved in several ways. Most notably support -# for CBC is not utilized, nor multiple blocks are ever processed. -# Then software key schedule can be postponed till hardware support -# detection... Performance improvement over assembler is reportedly -# ~2.5x, but can reach >8x [naturally on larger chunks] if proper -# support is implemented. - -# May 2007. -# -# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided -# for 128-bit keys, if hardware support is detected. - -# Januray 2009. -# -# Add support for hardware AES192/256 and reschedule instructions to -# minimize/avoid Address Generation Interlock hazard and to favour -# dual-issue z10 pipeline. This gave ~25% improvement on z10 and -# almost 50% on z9. The gain is smaller on z10, because being dual- -# issue z10 makes it improssible to eliminate the interlock condition: -# critial path is not long enough. Yet it spends ~24 cycles per byte -# processed with 128-bit key. -# -# Unlike previous version hardware support detection takes place only -# at the moment of key schedule setup, which is denoted in key->rounds. -# This is done, because deferred key setup can't be made MT-safe, not -# for keys longer than 128 bits. -# -# Add AES_cbc_encrypt, which gives incredible performance improvement, -# it was measured to be ~6.6x. It's less than previously mentioned 8x, -# because software implementation was optimized. - -# May 2010. -# -# Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x -# performance improvement over "generic" counter mode routine relying -# on single-block, also hardware-assisted, AES_encrypt. "Up to" refers -# to the fact that exact throughput value depends on current stack -# frame alignment within 4KB page. In worst case you get ~75% of the -# maximum, but *on average* it would be as much as ~98%. Meaning that -# worst case is unlike, it's like hitting ravine on plateau. - -# November 2010. -# -# Adapt for -m31 build. If kernel supports what's called "highgprs" -# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit -# instructions and achieve "64-bit" performance even in 31-bit legacy -# application context. The feature is not specific to any particular -# processor, as long as it's "z-CPU". Latter implies that the code -# remains z/Architecture specific. On z990 it was measured to perform -# 2x better than code generated by gcc 4.3. - -# December 2010. -# -# Add support for z196 "cipher message with counter" instruction. -# Note however that it's disengaged, because it was measured to -# perform ~12% worse than vanilla km-based code... - -# February 2011. -# -# Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes -# instructions, which deliver ~70% improvement at 8KB block size over -# vanilla km-based code, 37% - at most like 512-bytes block size. - -$flavour = shift; - -if ($flavour =~ /3[12]/) { - $SIZE_T=4; - $g=""; -} else { - $SIZE_T=8; - $g="g"; -} - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -$softonly=0; # allow hardware support - -$t0="%r0"; $mask="%r0"; -$t1="%r1"; -$t2="%r2"; $inp="%r2"; -$t3="%r3"; $out="%r3"; $bits="%r3"; -$key="%r4"; -$i1="%r5"; -$i2="%r6"; -$i3="%r7"; -$s0="%r8"; -$s1="%r9"; -$s2="%r10"; -$s3="%r11"; -$tbl="%r12"; -$rounds="%r13"; -$ra="%r14"; -$sp="%r15"; - -$stdframe=16*$SIZE_T+4*8; - -sub _data_word() -{ my $i; - while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } -} - -$code=<<___; -.text - -.type AES_Te,\@object -.align 256 -AES_Te: -___ -&_data_word( - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); -$code.=<<___; -# Te4[256] -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -# rcon[] -.long 0x01000000, 0x02000000, 0x04000000, 0x08000000 -.long 0x10000000, 0x20000000, 0x40000000, 0x80000000 -.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 -.align 256 -.size AES_Te,.-AES_Te - -# void AES_encrypt(const unsigned char *inp, unsigned char *out, -# const AES_KEY *key) { -.globl AES_encrypt -.type AES_encrypt,\@function -AES_encrypt: -___ -$code.=<<___ if (!$softonly); - l %r0,240($key) - lhi %r1,16 - clr %r0,%r1 - jl .Lesoft - - la %r1,0($key) - #la %r2,0($inp) - la %r4,0($out) - lghi %r3,16 # single block length - .long 0xb92e0042 # km %r4,%r2 - brc 1,.-4 # can this happen? - br %r14 -.align 64 -.Lesoft: -___ -$code.=<<___; - stm${g} %r3,$ra,3*$SIZE_T($sp) - - llgf $s0,0($inp) - llgf $s1,4($inp) - llgf $s2,8($inp) - llgf $s3,12($inp) - - larl $tbl,AES_Te - bras $ra,_s390x_AES_encrypt - - l${g} $out,3*$SIZE_T($sp) - st $s0,0($out) - st $s1,4($out) - st $s2,8($out) - st $s3,12($out) - - lm${g} %r6,$ra,6*$SIZE_T($sp) - br $ra -.size AES_encrypt,.-AES_encrypt - -.type _s390x_AES_encrypt,\@function -.align 16 -_s390x_AES_encrypt: - st${g} $ra,15*$SIZE_T($sp) - x $s0,0($key) - x $s1,4($key) - x $s2,8($key) - x $s3,12($key) - l $rounds,240($key) - llill $mask,`0xff<<3` - aghi $rounds,-1 - j .Lenc_loop -.align 16 -.Lenc_loop: - sllg $t1,$s0,`0+3` - srlg $t2,$s0,`8-3` - srlg $t3,$s0,`16-3` - srl $s0,`24-3` - nr $s0,$mask - ngr $t1,$mask - nr $t2,$mask - nr $t3,$mask - - srlg $i1,$s1,`16-3` # i0 - sllg $i2,$s1,`0+3` - srlg $i3,$s1,`8-3` - srl $s1,`24-3` - nr $i1,$mask - nr $s1,$mask - ngr $i2,$mask - nr $i3,$mask - - l $s0,0($s0,$tbl) # Te0[s0>>24] - l $t1,1($t1,$tbl) # Te3[s0>>0] - l $t2,2($t2,$tbl) # Te2[s0>>8] - l $t3,3($t3,$tbl) # Te1[s0>>16] - - x $s0,3($i1,$tbl) # Te1[s1>>16] - l $s1,0($s1,$tbl) # Te0[s1>>24] - x $t2,1($i2,$tbl) # Te3[s1>>0] - x $t3,2($i3,$tbl) # Te2[s1>>8] - - srlg $i1,$s2,`8-3` # i0 - srlg $i2,$s2,`16-3` # i1 - nr $i1,$mask - nr $i2,$mask - sllg $i3,$s2,`0+3` - srl $s2,`24-3` - nr $s2,$mask - ngr $i3,$mask - - xr $s1,$t1 - srlg $ra,$s3,`8-3` # i1 - sllg $t1,$s3,`0+3` # i0 - nr $ra,$mask - la $key,16($key) - ngr $t1,$mask - - x $s0,2($i1,$tbl) # Te2[s2>>8] - x $s1,3($i2,$tbl) # Te1[s2>>16] - l $s2,0($s2,$tbl) # Te0[s2>>24] - x $t3,1($i3,$tbl) # Te3[s2>>0] - - srlg $i3,$s3,`16-3` # i2 - xr $s2,$t2 - srl $s3,`24-3` - nr $i3,$mask - nr $s3,$mask - - x $s0,0($key) - x $s1,4($key) - x $s2,8($key) - x $t3,12($key) - - x $s0,1($t1,$tbl) # Te3[s3>>0] - x $s1,2($ra,$tbl) # Te2[s3>>8] - x $s2,3($i3,$tbl) # Te1[s3>>16] - l $s3,0($s3,$tbl) # Te0[s3>>24] - xr $s3,$t3 - - brct $rounds,.Lenc_loop - .align 16 - - sllg $t1,$s0,`0+3` - srlg $t2,$s0,`8-3` - ngr $t1,$mask - srlg $t3,$s0,`16-3` - srl $s0,`24-3` - nr $s0,$mask - nr $t2,$mask - nr $t3,$mask - - srlg $i1,$s1,`16-3` # i0 - sllg $i2,$s1,`0+3` - ngr $i2,$mask - srlg $i3,$s1,`8-3` - srl $s1,`24-3` - nr $i1,$mask - nr $s1,$mask - nr $i3,$mask - - llgc $s0,2($s0,$tbl) # Te4[s0>>24] - llgc $t1,2($t1,$tbl) # Te4[s0>>0] - sll $s0,24 - llgc $t2,2($t2,$tbl) # Te4[s0>>8] - llgc $t3,2($t3,$tbl) # Te4[s0>>16] - sll $t2,8 - sll $t3,16 - - llgc $i1,2($i1,$tbl) # Te4[s1>>16] - llgc $s1,2($s1,$tbl) # Te4[s1>>24] - llgc $i2,2($i2,$tbl) # Te4[s1>>0] - llgc $i3,2($i3,$tbl) # Te4[s1>>8] - sll $i1,16 - sll $s1,24 - sll $i3,8 - or $s0,$i1 - or $s1,$t1 - or $t2,$i2 - or $t3,$i3 - - srlg $i1,$s2,`8-3` # i0 - srlg $i2,$s2,`16-3` # i1 - nr $i1,$mask - nr $i2,$mask - sllg $i3,$s2,`0+3` - srl $s2,`24-3` - ngr $i3,$mask - nr $s2,$mask - - sllg $t1,$s3,`0+3` # i0 - srlg $ra,$s3,`8-3` # i1 - ngr $t1,$mask - - llgc $i1,2($i1,$tbl) # Te4[s2>>8] - llgc $i2,2($i2,$tbl) # Te4[s2>>16] - sll $i1,8 - llgc $s2,2($s2,$tbl) # Te4[s2>>24] - llgc $i3,2($i3,$tbl) # Te4[s2>>0] - sll $i2,16 - nr $ra,$mask - sll $s2,24 - or $s0,$i1 - or $s1,$i2 - or $s2,$t2 - or $t3,$i3 - - srlg $i3,$s3,`16-3` # i2 - srl $s3,`24-3` - nr $i3,$mask - nr $s3,$mask - - l $t0,16($key) - l $t2,20($key) - - llgc $i1,2($t1,$tbl) # Te4[s3>>0] - llgc $i2,2($ra,$tbl) # Te4[s3>>8] - llgc $i3,2($i3,$tbl) # Te4[s3>>16] - llgc $s3,2($s3,$tbl) # Te4[s3>>24] - sll $i2,8 - sll $i3,16 - sll $s3,24 - or $s0,$i1 - or $s1,$i2 - or $s2,$i3 - or $s3,$t3 - - l${g} $ra,15*$SIZE_T($sp) - xr $s0,$t0 - xr $s1,$t2 - x $s2,24($key) - x $s3,28($key) - - br $ra -.size _s390x_AES_encrypt,.-_s390x_AES_encrypt -___ - -$code.=<<___; -.type AES_Td,\@object -.align 256 -AES_Td: -___ -&_data_word( - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); -$code.=<<___; -# Td4[256] -.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 -.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb -.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 -.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb -.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d -.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e -.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 -.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 -.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 -.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 -.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda -.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 -.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a -.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 -.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 -.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b -.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea -.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 -.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 -.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e -.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 -.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b -.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 -.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 -.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 -.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f -.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d -.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef -.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 -.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 -.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 -.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.size AES_Td,.-AES_Td - -# void AES_decrypt(const unsigned char *inp, unsigned char *out, -# const AES_KEY *key) { -.globl AES_decrypt -.type AES_decrypt,\@function -AES_decrypt: -___ -$code.=<<___ if (!$softonly); - l %r0,240($key) - lhi %r1,16 - clr %r0,%r1 - jl .Ldsoft - - la %r1,0($key) - #la %r2,0($inp) - la %r4,0($out) - lghi %r3,16 # single block length - .long 0xb92e0042 # km %r4,%r2 - brc 1,.-4 # can this happen? - br %r14 -.align 64 -.Ldsoft: -___ -$code.=<<___; - stm${g} %r3,$ra,3*$SIZE_T($sp) - - llgf $s0,0($inp) - llgf $s1,4($inp) - llgf $s2,8($inp) - llgf $s3,12($inp) - - larl $tbl,AES_Td - bras $ra,_s390x_AES_decrypt - - l${g} $out,3*$SIZE_T($sp) - st $s0,0($out) - st $s1,4($out) - st $s2,8($out) - st $s3,12($out) - - lm${g} %r6,$ra,6*$SIZE_T($sp) - br $ra -.size AES_decrypt,.-AES_decrypt - -.type _s390x_AES_decrypt,\@function -.align 16 -_s390x_AES_decrypt: - st${g} $ra,15*$SIZE_T($sp) - x $s0,0($key) - x $s1,4($key) - x $s2,8($key) - x $s3,12($key) - l $rounds,240($key) - llill $mask,`0xff<<3` - aghi $rounds,-1 - j .Ldec_loop -.align 16 -.Ldec_loop: - srlg $t1,$s0,`16-3` - srlg $t2,$s0,`8-3` - sllg $t3,$s0,`0+3` - srl $s0,`24-3` - nr $s0,$mask - nr $t1,$mask - nr $t2,$mask - ngr $t3,$mask - - sllg $i1,$s1,`0+3` # i0 - srlg $i2,$s1,`16-3` - srlg $i3,$s1,`8-3` - srl $s1,`24-3` - ngr $i1,$mask - nr $s1,$mask - nr $i2,$mask - nr $i3,$mask - - l $s0,0($s0,$tbl) # Td0[s0>>24] - l $t1,3($t1,$tbl) # Td1[s0>>16] - l $t2,2($t2,$tbl) # Td2[s0>>8] - l $t3,1($t3,$tbl) # Td3[s0>>0] - - x $s0,1($i1,$tbl) # Td3[s1>>0] - l $s1,0($s1,$tbl) # Td0[s1>>24] - x $t2,3($i2,$tbl) # Td1[s1>>16] - x $t3,2($i3,$tbl) # Td2[s1>>8] - - srlg $i1,$s2,`8-3` # i0 - sllg $i2,$s2,`0+3` # i1 - srlg $i3,$s2,`16-3` - srl $s2,`24-3` - nr $i1,$mask - ngr $i2,$mask - nr $s2,$mask - nr $i3,$mask - - xr $s1,$t1 - srlg $ra,$s3,`8-3` # i1 - srlg $t1,$s3,`16-3` # i0 - nr $ra,$mask - la $key,16($key) - nr $t1,$mask - - x $s0,2($i1,$tbl) # Td2[s2>>8] - x $s1,1($i2,$tbl) # Td3[s2>>0] - l $s2,0($s2,$tbl) # Td0[s2>>24] - x $t3,3($i3,$tbl) # Td1[s2>>16] - - sllg $i3,$s3,`0+3` # i2 - srl $s3,`24-3` - ngr $i3,$mask - nr $s3,$mask - - xr $s2,$t2 - x $s0,0($key) - x $s1,4($key) - x $s2,8($key) - x $t3,12($key) - - x $s0,3($t1,$tbl) # Td1[s3>>16] - x $s1,2($ra,$tbl) # Td2[s3>>8] - x $s2,1($i3,$tbl) # Td3[s3>>0] - l $s3,0($s3,$tbl) # Td0[s3>>24] - xr $s3,$t3 - - brct $rounds,.Ldec_loop - .align 16 - - l $t1,`2048+0`($tbl) # prefetch Td4 - l $t2,`2048+64`($tbl) - l $t3,`2048+128`($tbl) - l $i1,`2048+192`($tbl) - llill $mask,0xff - - srlg $i3,$s0,24 # i0 - srlg $t1,$s0,16 - srlg $t2,$s0,8 - nr $s0,$mask # i3 - nr $t1,$mask - - srlg $i1,$s1,24 - nr $t2,$mask - srlg $i2,$s1,16 - srlg $ra,$s1,8 - nr $s1,$mask # i0 - nr $i2,$mask - nr $ra,$mask - - llgc $i3,2048($i3,$tbl) # Td4[s0>>24] - llgc $t1,2048($t1,$tbl) # Td4[s0>>16] - llgc $t2,2048($t2,$tbl) # Td4[s0>>8] - sll $t1,16 - llgc $t3,2048($s0,$tbl) # Td4[s0>>0] - sllg $s0,$i3,24 - sll $t2,8 - - llgc $s1,2048($s1,$tbl) # Td4[s1>>0] - llgc $i1,2048($i1,$tbl) # Td4[s1>>24] - llgc $i2,2048($i2,$tbl) # Td4[s1>>16] - sll $i1,24 - llgc $i3,2048($ra,$tbl) # Td4[s1>>8] - sll $i2,16 - sll $i3,8 - or $s0,$s1 - or $t1,$i1 - or $t2,$i2 - or $t3,$i3 - - srlg $i1,$s2,8 # i0 - srlg $i2,$s2,24 - srlg $i3,$s2,16 - nr $s2,$mask # i1 - nr $i1,$mask - nr $i3,$mask - llgc $i1,2048($i1,$tbl) # Td4[s2>>8] - llgc $s1,2048($s2,$tbl) # Td4[s2>>0] - llgc $i2,2048($i2,$tbl) # Td4[s2>>24] - llgc $i3,2048($i3,$tbl) # Td4[s2>>16] - sll $i1,8 - sll $i2,24 - or $s0,$i1 - sll $i3,16 - or $t2,$i2 - or $t3,$i3 - - srlg $i1,$s3,16 # i0 - srlg $i2,$s3,8 # i1 - srlg $i3,$s3,24 - nr $s3,$mask # i2 - nr $i1,$mask - nr $i2,$mask - - l${g} $ra,15*$SIZE_T($sp) - or $s1,$t1 - l $t0,16($key) - l $t1,20($key) - - llgc $i1,2048($i1,$tbl) # Td4[s3>>16] - llgc $i2,2048($i2,$tbl) # Td4[s3>>8] - sll $i1,16 - llgc $s2,2048($s3,$tbl) # Td4[s3>>0] - llgc $s3,2048($i3,$tbl) # Td4[s3>>24] - sll $i2,8 - sll $s3,24 - or $s0,$i1 - or $s1,$i2 - or $s2,$t2 - or $s3,$t3 - - xr $s0,$t0 - xr $s1,$t1 - x $s2,24($key) - x $s3,28($key) - - br $ra -.size _s390x_AES_decrypt,.-_s390x_AES_decrypt -___ - -$code.=<<___; -# void AES_set_encrypt_key(const unsigned char *in, int bits, -# AES_KEY *key) { -.globl private_AES_set_encrypt_key -.type private_AES_set_encrypt_key,\@function -.align 16 -private_AES_set_encrypt_key: -_s390x_AES_set_encrypt_key: - lghi $t0,0 - cl${g}r $inp,$t0 - je .Lminus1 - cl${g}r $key,$t0 - je .Lminus1 - - lghi $t0,128 - clr $bits,$t0 - je .Lproceed - lghi $t0,192 - clr $bits,$t0 - je .Lproceed - lghi $t0,256 - clr $bits,$t0 - je .Lproceed - lghi %r2,-2 - br %r14 - -.align 16 -.Lproceed: -___ -$code.=<<___ if (!$softonly); - # convert bits to km code, [128,192,256]->[18,19,20] - lhi %r5,-128 - lhi %r0,18 - ar %r5,$bits - srl %r5,6 - ar %r5,%r0 - - larl %r1,OPENSSL_s390xcap_P - lg %r0,0(%r1) - tmhl %r0,0x4000 # check for message-security assist - jz .Lekey_internal - - lghi %r0,0 # query capability vector - la %r1,16($sp) - .long 0xb92f0042 # kmc %r4,%r2 - - llihh %r1,0x8000 - srlg %r1,%r1,0(%r5) - ng %r1,16($sp) - jz .Lekey_internal - - lmg %r0,%r1,0($inp) # just copy 128 bits... - stmg %r0,%r1,0($key) - lhi %r0,192 - cr $bits,%r0 - jl 1f - lg %r1,16($inp) - stg %r1,16($key) - je 1f - lg %r1,24($inp) - stg %r1,24($key) -1: st $bits,236($key) # save bits [for debugging purposes] - lgr $t0,%r5 - st %r5,240($key) # save km code - lghi %r2,0 - br %r14 -___ -$code.=<<___; -.align 16 -.Lekey_internal: - stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key - - larl $tbl,AES_Te+2048 - - llgf $s0,0($inp) - llgf $s1,4($inp) - llgf $s2,8($inp) - llgf $s3,12($inp) - st $s0,0($key) - st $s1,4($key) - st $s2,8($key) - st $s3,12($key) - lghi $t0,128 - cr $bits,$t0 - jne .Lnot128 - - llill $mask,0xff - lghi $t3,0 # i=0 - lghi $rounds,10 - st $rounds,240($key) - - llgfr $t2,$s3 # temp=rk[3] - srlg $i1,$s3,8 - srlg $i2,$s3,16 - srlg $i3,$s3,24 - nr $t2,$mask - nr $i1,$mask - nr $i2,$mask - -.align 16 -.L128_loop: - la $t2,0($t2,$tbl) - la $i1,0($i1,$tbl) - la $i2,0($i2,$tbl) - la $i3,0($i3,$tbl) - icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8 - icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16 - icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24 - icm $t2,1,0($i3) # Te4[rk[3]>>24] - x $t2,256($t3,$tbl) # rcon[i] - xr $s0,$t2 # rk[4]=rk[0]^... - xr $s1,$s0 # rk[5]=rk[1]^rk[4] - xr $s2,$s1 # rk[6]=rk[2]^rk[5] - xr $s3,$s2 # rk[7]=rk[3]^rk[6] - - llgfr $t2,$s3 # temp=rk[3] - srlg $i1,$s3,8 - srlg $i2,$s3,16 - nr $t2,$mask - nr $i1,$mask - srlg $i3,$s3,24 - nr $i2,$mask - - st $s0,16($key) - st $s1,20($key) - st $s2,24($key) - st $s3,28($key) - la $key,16($key) # key+=4 - la $t3,4($t3) # i++ - brct $rounds,.L128_loop - lghi $t0,10 - lghi %r2,0 - lm${g} %r4,%r13,4*$SIZE_T($sp) - br $ra - -.align 16 -.Lnot128: - llgf $t0,16($inp) - llgf $t1,20($inp) - st $t0,16($key) - st $t1,20($key) - lghi $t0,192 - cr $bits,$t0 - jne .Lnot192 - - llill $mask,0xff - lghi $t3,0 # i=0 - lghi $rounds,12 - st $rounds,240($key) - lghi $rounds,8 - - srlg $i1,$t1,8 - srlg $i2,$t1,16 - srlg $i3,$t1,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - -.align 16 -.L192_loop: - la $t1,0($t1,$tbl) - la $i1,0($i1,$tbl) - la $i2,0($i2,$tbl) - la $i3,0($i3,$tbl) - icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8 - icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16 - icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24 - icm $t1,1,0($i3) # Te4[rk[5]>>24] - x $t1,256($t3,$tbl) # rcon[i] - xr $s0,$t1 # rk[6]=rk[0]^... - xr $s1,$s0 # rk[7]=rk[1]^rk[6] - xr $s2,$s1 # rk[8]=rk[2]^rk[7] - xr $s3,$s2 # rk[9]=rk[3]^rk[8] - - st $s0,24($key) - st $s1,28($key) - st $s2,32($key) - st $s3,36($key) - brct $rounds,.L192_continue - lghi $t0,12 - lghi %r2,0 - lm${g} %r4,%r13,4*$SIZE_T($sp) - br $ra - -.align 16 -.L192_continue: - lgr $t1,$s3 - x $t1,16($key) # rk[10]=rk[4]^rk[9] - st $t1,40($key) - x $t1,20($key) # rk[11]=rk[5]^rk[10] - st $t1,44($key) - - srlg $i1,$t1,8 - srlg $i2,$t1,16 - srlg $i3,$t1,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - - la $key,24($key) # key+=6 - la $t3,4($t3) # i++ - j .L192_loop - -.align 16 -.Lnot192: - llgf $t0,24($inp) - llgf $t1,28($inp) - st $t0,24($key) - st $t1,28($key) - llill $mask,0xff - lghi $t3,0 # i=0 - lghi $rounds,14 - st $rounds,240($key) - lghi $rounds,7 - - srlg $i1,$t1,8 - srlg $i2,$t1,16 - srlg $i3,$t1,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - -.align 16 -.L256_loop: - la $t1,0($t1,$tbl) - la $i1,0($i1,$tbl) - la $i2,0($i2,$tbl) - la $i3,0($i3,$tbl) - icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8 - icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16 - icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24 - icm $t1,1,0($i3) # Te4[rk[7]>>24] - x $t1,256($t3,$tbl) # rcon[i] - xr $s0,$t1 # rk[8]=rk[0]^... - xr $s1,$s0 # rk[9]=rk[1]^rk[8] - xr $s2,$s1 # rk[10]=rk[2]^rk[9] - xr $s3,$s2 # rk[11]=rk[3]^rk[10] - st $s0,32($key) - st $s1,36($key) - st $s2,40($key) - st $s3,44($key) - brct $rounds,.L256_continue - lghi $t0,14 - lghi %r2,0 - lm${g} %r4,%r13,4*$SIZE_T($sp) - br $ra - -.align 16 -.L256_continue: - lgr $t1,$s3 # temp=rk[11] - srlg $i1,$s3,8 - srlg $i2,$s3,16 - srlg $i3,$s3,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - la $t1,0($t1,$tbl) - la $i1,0($i1,$tbl) - la $i2,0($i2,$tbl) - la $i3,0($i3,$tbl) - llgc $t1,0($t1) # Te4[rk[11]>>0] - icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8 - icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16 - icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24 - x $t1,16($key) # rk[12]=rk[4]^... - st $t1,48($key) - x $t1,20($key) # rk[13]=rk[5]^rk[12] - st $t1,52($key) - x $t1,24($key) # rk[14]=rk[6]^rk[13] - st $t1,56($key) - x $t1,28($key) # rk[15]=rk[7]^rk[14] - st $t1,60($key) - - srlg $i1,$t1,8 - srlg $i2,$t1,16 - srlg $i3,$t1,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - - la $key,32($key) # key+=8 - la $t3,4($t3) # i++ - j .L256_loop - -.Lminus1: - lghi %r2,-1 - br $ra -.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key - -# void AES_set_decrypt_key(const unsigned char *in, int bits, -# AES_KEY *key) { -.globl private_AES_set_decrypt_key -.type private_AES_set_decrypt_key,\@function -.align 16 -private_AES_set_decrypt_key: - #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to - st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key! - bras $ra,_s390x_AES_set_encrypt_key - #l${g} $key,4*$SIZE_T($sp) - l${g} $ra,14*$SIZE_T($sp) - ltgr %r2,%r2 - bnzr $ra -___ -$code.=<<___ if (!$softonly); - #l $t0,240($key) - lhi $t1,16 - cr $t0,$t1 - jl .Lgo - oill $t0,0x80 # set "decrypt" bit - st $t0,240($key) - br $ra -___ -$code.=<<___; -.align 16 -.Lgo: lgr $rounds,$t0 #llgf $rounds,240($key) - la $i1,0($key) - sllg $i2,$rounds,4 - la $i2,0($i2,$key) - srl $rounds,1 - lghi $t1,-16 - -.align 16 -.Linv: lmg $s0,$s1,0($i1) - lmg $s2,$s3,0($i2) - stmg $s0,$s1,0($i2) - stmg $s2,$s3,0($i1) - la $i1,16($i1) - la $i2,0($t1,$i2) - brct $rounds,.Linv -___ -$mask80=$i1; -$mask1b=$i2; -$maskfe=$i3; -$code.=<<___; - llgf $rounds,240($key) - aghi $rounds,-1 - sll $rounds,2 # (rounds-1)*4 - llilh $mask80,0x8080 - llilh $mask1b,0x1b1b - llilh $maskfe,0xfefe - oill $mask80,0x8080 - oill $mask1b,0x1b1b - oill $maskfe,0xfefe - -.align 16 -.Lmix: l $s0,16($key) # tp1 - lr $s1,$s0 - ngr $s1,$mask80 - srlg $t1,$s1,7 - slr $s1,$t1 - nr $s1,$mask1b - sllg $t1,$s0,1 - nr $t1,$maskfe - xr $s1,$t1 # tp2 - - lr $s2,$s1 - ngr $s2,$mask80 - srlg $t1,$s2,7 - slr $s2,$t1 - nr $s2,$mask1b - sllg $t1,$s1,1 - nr $t1,$maskfe - xr $s2,$t1 # tp4 - - lr $s3,$s2 - ngr $s3,$mask80 - srlg $t1,$s3,7 - slr $s3,$t1 - nr $s3,$mask1b - sllg $t1,$s2,1 - nr $t1,$maskfe - xr $s3,$t1 # tp8 - - xr $s1,$s0 # tp2^tp1 - xr $s2,$s0 # tp4^tp1 - rll $s0,$s0,24 # = ROTATE(tp1,8) - xr $s2,$s3 # ^=tp8 - xr $s0,$s1 # ^=tp2^tp1 - xr $s1,$s3 # tp2^tp1^tp8 - xr $s0,$s2 # ^=tp4^tp1^tp8 - rll $s1,$s1,8 - rll $s2,$s2,16 - xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24) - rll $s3,$s3,24 - xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16) - xr $s0,$s3 # ^= ROTATE(tp8,8) - - st $s0,16($key) - la $key,4($key) - brct $rounds,.Lmix - - lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key! - lghi %r2,0 - br $ra -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key -___ - -######################################################################## -# void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, -# size_t length, const AES_KEY *key, -# unsigned char *ivec, const int enc) -{ -my $inp="%r2"; -my $out="%r4"; # length and out are swapped -my $len="%r3"; -my $key="%r5"; -my $ivp="%r6"; - -$code.=<<___; -.globl AES_cbc_encrypt -.type AES_cbc_encrypt,\@function -.align 16 -AES_cbc_encrypt: - xgr %r3,%r4 # flip %r3 and %r4, out and len - xgr %r4,%r3 - xgr %r3,%r4 -___ -$code.=<<___ if (!$softonly); - lhi %r0,16 - cl %r0,240($key) - jh .Lcbc_software - - lg %r0,0($ivp) # copy ivec - lg %r1,8($ivp) - stmg %r0,%r1,16($sp) - lmg %r0,%r1,0($key) # copy key, cover 256 bit - stmg %r0,%r1,32($sp) - lmg %r0,%r1,16($key) - stmg %r0,%r1,48($sp) - l %r0,240($key) # load kmc code - lghi $key,15 # res=len%16, len-=res; - ngr $key,$len - sl${g}r $len,$key - la %r1,16($sp) # parameter block - ivec || key - jz .Lkmc_truncated - .long 0xb92f0042 # kmc %r4,%r2 - brc 1,.-4 # pay attention to "partial completion" - ltr $key,$key - jnz .Lkmc_truncated -.Lkmc_done: - lmg %r0,%r1,16($sp) # copy ivec to caller - stg %r0,0($ivp) - stg %r1,8($ivp) - br $ra -.align 16 -.Lkmc_truncated: - ahi $key,-1 # it's the way it's encoded in mvc - tmll %r0,0x80 - jnz .Lkmc_truncated_dec - lghi %r1,0 - stg %r1,16*$SIZE_T($sp) - stg %r1,16*$SIZE_T+8($sp) - bras %r1,1f - mvc 16*$SIZE_T(1,$sp),0($inp) -1: ex $key,0(%r1) - la %r1,16($sp) # restore parameter block - la $inp,16*$SIZE_T($sp) - lghi $len,16 - .long 0xb92f0042 # kmc %r4,%r2 - j .Lkmc_done -.align 16 -.Lkmc_truncated_dec: - st${g} $out,4*$SIZE_T($sp) - la $out,16*$SIZE_T($sp) - lghi $len,16 - .long 0xb92f0042 # kmc %r4,%r2 - l${g} $out,4*$SIZE_T($sp) - bras %r1,2f - mvc 0(1,$out),16*$SIZE_T($sp) -2: ex $key,0(%r1) - j .Lkmc_done -.align 16 -.Lcbc_software: -___ -$code.=<<___; - stm${g} $key,$ra,5*$SIZE_T($sp) - lhi %r0,0 - cl %r0,`$stdframe+$SIZE_T-4`($sp) - je .Lcbc_decrypt - - larl $tbl,AES_Te - - llgf $s0,0($ivp) - llgf $s1,4($ivp) - llgf $s2,8($ivp) - llgf $s3,12($ivp) - - lghi $t0,16 - sl${g}r $len,$t0 - brc 4,.Lcbc_enc_tail # if borrow -.Lcbc_enc_loop: - stm${g} $inp,$out,2*$SIZE_T($sp) - x $s0,0($inp) - x $s1,4($inp) - x $s2,8($inp) - x $s3,12($inp) - lgr %r4,$key - - bras $ra,_s390x_AES_encrypt - - lm${g} $inp,$key,2*$SIZE_T($sp) - st $s0,0($out) - st $s1,4($out) - st $s2,8($out) - st $s3,12($out) - - la $inp,16($inp) - la $out,16($out) - lghi $t0,16 - lt${g}r $len,$len - jz .Lcbc_enc_done - sl${g}r $len,$t0 - brc 4,.Lcbc_enc_tail # if borrow - j .Lcbc_enc_loop -.align 16 -.Lcbc_enc_done: - l${g} $ivp,6*$SIZE_T($sp) - st $s0,0($ivp) - st $s1,4($ivp) - st $s2,8($ivp) - st $s3,12($ivp) - - lm${g} %r7,$ra,7*$SIZE_T($sp) - br $ra - -.align 16 -.Lcbc_enc_tail: - aghi $len,15 - lghi $t0,0 - stg $t0,16*$SIZE_T($sp) - stg $t0,16*$SIZE_T+8($sp) - bras $t1,3f - mvc 16*$SIZE_T(1,$sp),0($inp) -3: ex $len,0($t1) - lghi $len,0 - la $inp,16*$SIZE_T($sp) - j .Lcbc_enc_loop - -.align 16 -.Lcbc_decrypt: - larl $tbl,AES_Td - - lg $t0,0($ivp) - lg $t1,8($ivp) - stmg $t0,$t1,16*$SIZE_T($sp) - -.Lcbc_dec_loop: - stm${g} $inp,$out,2*$SIZE_T($sp) - llgf $s0,0($inp) - llgf $s1,4($inp) - llgf $s2,8($inp) - llgf $s3,12($inp) - lgr %r4,$key - - bras $ra,_s390x_AES_decrypt - - lm${g} $inp,$key,2*$SIZE_T($sp) - sllg $s0,$s0,32 - sllg $s2,$s2,32 - lr $s0,$s1 - lr $s2,$s3 - - lg $t0,0($inp) - lg $t1,8($inp) - xg $s0,16*$SIZE_T($sp) - xg $s2,16*$SIZE_T+8($sp) - lghi $s1,16 - sl${g}r $len,$s1 - brc 4,.Lcbc_dec_tail # if borrow - brc 2,.Lcbc_dec_done # if zero - stg $s0,0($out) - stg $s2,8($out) - stmg $t0,$t1,16*$SIZE_T($sp) - - la $inp,16($inp) - la $out,16($out) - j .Lcbc_dec_loop - -.Lcbc_dec_done: - stg $s0,0($out) - stg $s2,8($out) -.Lcbc_dec_exit: - lm${g} %r6,$ra,6*$SIZE_T($sp) - stmg $t0,$t1,0($ivp) - - br $ra - -.align 16 -.Lcbc_dec_tail: - aghi $len,15 - stg $s0,16*$SIZE_T($sp) - stg $s2,16*$SIZE_T+8($sp) - bras $s1,4f - mvc 0(1,$out),16*$SIZE_T($sp) -4: ex $len,0($s1) - j .Lcbc_dec_exit -.size AES_cbc_encrypt,.-AES_cbc_encrypt -___ -} -######################################################################## -# void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, -# size_t blocks, const AES_KEY *key, -# const unsigned char *ivec) -{ -my $inp="%r2"; -my $out="%r4"; # blocks and out are swapped -my $len="%r3"; -my $key="%r5"; my $iv0="%r5"; -my $ivp="%r6"; -my $fp ="%r7"; - -$code.=<<___; -.globl AES_ctr32_encrypt -.type AES_ctr32_encrypt,\@function -.align 16 -AES_ctr32_encrypt: - xgr %r3,%r4 # flip %r3 and %r4, $out and $len - xgr %r4,%r3 - xgr %r3,%r4 - llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case -___ -$code.=<<___ if (!$softonly); - l %r0,240($key) - lhi %r1,16 - clr %r0,%r1 - jl .Lctr32_software - - stm${g} %r6,$s3,6*$SIZE_T($sp) - - slgr $out,$inp - la %r1,0($key) # %r1 is permanent copy of $key - lg $iv0,0($ivp) # load ivec - lg $ivp,8($ivp) - - # prepare and allocate stack frame at the top of 4K page - # with 1K reserved for eventual signal handling - lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer - lghi $s1,-4096 - algr $s0,$sp - lgr $fp,$sp - ngr $s0,$s1 # align at page boundary - slgr $fp,$s0 # total buffer size - lgr $s2,$sp - lghi $s1,1024+16 # sl[g]fi is extended-immediate facility - slgr $fp,$s1 # deduct reservation to get usable buffer size - # buffer size is at lest 256 and at most 3072+256-16 - - la $sp,1024($s0) # alloca - srlg $fp,$fp,4 # convert bytes to blocks, minimum 16 - st${g} $s2,0($sp) # back-chain - st${g} $fp,$SIZE_T($sp) - - slgr $len,$fp - brc 1,.Lctr32_hw_switch # not zero, no borrow - algr $fp,$len # input is shorter than allocated buffer - lghi $len,0 - st${g} $fp,$SIZE_T($sp) - -.Lctr32_hw_switch: -___ -$code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower - larl $s0,OPENSSL_s390xcap_P - lg $s0,8($s0) - tmhh $s0,0x0004 # check for message_security-assist-4 - jz .Lctr32_km_loop - - llgfr $s0,%r0 - lgr $s1,%r1 - lghi %r0,0 - la %r1,16($sp) - .long 0xb92d2042 # kmctr %r4,%r2,%r2 - - llihh %r0,0x8000 # check if kmctr supports the function code - srlg %r0,%r0,0($s0) - ng %r0,16($sp) - lgr %r0,$s0 - lgr %r1,$s1 - jz .Lctr32_km_loop - -####### kmctr code - algr $out,$inp # restore $out - lgr $s1,$len # $s1 undertakes $len - j .Lctr32_kmctr_loop -.align 16 -.Lctr32_kmctr_loop: - la $s2,16($sp) - lgr $s3,$fp -.Lctr32_kmctr_prepare: - stg $iv0,0($s2) - stg $ivp,8($s2) - la $s2,16($s2) - ahi $ivp,1 # 32-bit increment, preserves upper half - brct $s3,.Lctr32_kmctr_prepare - - #la $inp,0($inp) # inp - sllg $len,$fp,4 # len - #la $out,0($out) # out - la $s2,16($sp) # iv - .long 0xb92da042 # kmctr $out,$s2,$inp - brc 1,.-4 # pay attention to "partial completion" - - slgr $s1,$fp - brc 1,.Lctr32_kmctr_loop # not zero, no borrow - algr $fp,$s1 - lghi $s1,0 - brc 4+1,.Lctr32_kmctr_loop # not zero - - l${g} $sp,0($sp) - lm${g} %r6,$s3,6*$SIZE_T($sp) - br $ra -.align 16 -___ -$code.=<<___; -.Lctr32_km_loop: - la $s2,16($sp) - lgr $s3,$fp -.Lctr32_km_prepare: - stg $iv0,0($s2) - stg $ivp,8($s2) - la $s2,16($s2) - ahi $ivp,1 # 32-bit increment, preserves upper half - brct $s3,.Lctr32_km_prepare - - la $s0,16($sp) # inp - sllg $s1,$fp,4 # len - la $s2,16($sp) # out - .long 0xb92e00a8 # km %r10,%r8 - brc 1,.-4 # pay attention to "partial completion" - - la $s2,16($sp) - lgr $s3,$fp - slgr $s2,$inp -.Lctr32_km_xor: - lg $s0,0($inp) - lg $s1,8($inp) - xg $s0,0($s2,$inp) - xg $s1,8($s2,$inp) - stg $s0,0($out,$inp) - stg $s1,8($out,$inp) - la $inp,16($inp) - brct $s3,.Lctr32_km_xor - - slgr $len,$fp - brc 1,.Lctr32_km_loop # not zero, no borrow - algr $fp,$len - lghi $len,0 - brc 4+1,.Lctr32_km_loop # not zero - - l${g} $s0,0($sp) - l${g} $s1,$SIZE_T($sp) - la $s2,16($sp) -.Lctr32_km_zap: - stg $s0,0($s2) - stg $s0,8($s2) - la $s2,16($s2) - brct $s1,.Lctr32_km_zap - - la $sp,0($s0) - lm${g} %r6,$s3,6*$SIZE_T($sp) - br $ra -.align 16 -.Lctr32_software: -___ -$code.=<<___; - stm${g} $key,$ra,5*$SIZE_T($sp) - sl${g}r $inp,$out - larl $tbl,AES_Te - llgf $t1,12($ivp) - -.Lctr32_loop: - stm${g} $inp,$out,2*$SIZE_T($sp) - llgf $s0,0($ivp) - llgf $s1,4($ivp) - llgf $s2,8($ivp) - lgr $s3,$t1 - st $t1,16*$SIZE_T($sp) - lgr %r4,$key - - bras $ra,_s390x_AES_encrypt - - lm${g} $inp,$ivp,2*$SIZE_T($sp) - llgf $t1,16*$SIZE_T($sp) - x $s0,0($inp,$out) - x $s1,4($inp,$out) - x $s2,8($inp,$out) - x $s3,12($inp,$out) - stm $s0,$s3,0($out) - - la $out,16($out) - ahi $t1,1 # 32-bit increment - brct $len,.Lctr32_loop - - lm${g} %r6,$ra,6*$SIZE_T($sp) - br $ra -.size AES_ctr32_encrypt,.-AES_ctr32_encrypt -___ -} - -######################################################################## -# void AES_xts_encrypt(const char *inp,char *out,size_t len, -# const AES_KEY *key1, const AES_KEY *key2, -# const unsigned char iv[16]); -# -{ -my $inp="%r2"; -my $out="%r4"; # len and out are swapped -my $len="%r3"; -my $key1="%r5"; # $i1 -my $key2="%r6"; # $i2 -my $fp="%r7"; # $i3 -my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame... - -$code.=<<___; -.type _s390x_xts_km,\@function -.align 16 -_s390x_xts_km: -___ -$code.=<<___ if(1); - llgfr $s0,%r0 # put aside the function code - lghi $s1,0x7f - nr $s1,%r0 - lghi %r0,0 # query capability vector - la %r1,$tweak-16($sp) - .long 0xb92e0042 # km %r4,%r2 - llihh %r1,0x8000 - srlg %r1,%r1,32($s1) # check for 32+function code - ng %r1,$tweak-16($sp) - lgr %r0,$s0 # restore the function code - la %r1,0($key1) # restore $key1 - jz .Lxts_km_vanilla - - lmg $i2,$i3,$tweak($sp) # put aside the tweak value - algr $out,$inp - - oill %r0,32 # switch to xts function code - aghi $s1,-18 # - sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16 - la %r1,$tweak-16($sp) - slgr %r1,$s1 # parameter block position - lmg $s0,$s3,0($key1) # load 256 bits of key material, - stmg $s0,$s3,0(%r1) # and copy it to parameter block. - # yes, it contains junk and overlaps - # with the tweak in 128-bit case. - # it's done to avoid conditional - # branch. - stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value - - .long 0xb92e0042 # km %r4,%r2 - brc 1,.-4 # pay attention to "partial completion" - - lrvg $s0,$tweak+0($sp) # load the last tweak - lrvg $s1,$tweak+8($sp) - stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key - - nill %r0,0xffdf # switch back to original function code - la %r1,0($key1) # restore pointer to $key1 - slgr $out,$inp - - llgc $len,2*$SIZE_T-1($sp) - nill $len,0x0f # $len%=16 - br $ra - -.align 16 -.Lxts_km_vanilla: -___ -$code.=<<___; - # prepare and allocate stack frame at the top of 4K page - # with 1K reserved for eventual signal handling - lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer - lghi $s1,-4096 - algr $s0,$sp - lgr $fp,$sp - ngr $s0,$s1 # align at page boundary - slgr $fp,$s0 # total buffer size - lgr $s2,$sp - lghi $s1,1024+16 # sl[g]fi is extended-immediate facility - slgr $fp,$s1 # deduct reservation to get usable buffer size - # buffer size is at lest 256 and at most 3072+256-16 - - la $sp,1024($s0) # alloca - nill $fp,0xfff0 # round to 16*n - st${g} $s2,0($sp) # back-chain - nill $len,0xfff0 # redundant - st${g} $fp,$SIZE_T($sp) - - slgr $len,$fp - brc 1,.Lxts_km_go # not zero, no borrow - algr $fp,$len # input is shorter than allocated buffer - lghi $len,0 - st${g} $fp,$SIZE_T($sp) - -.Lxts_km_go: - lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian - lrvg $s1,$tweak+8($s2) - - la $s2,16($sp) # vector of ascending tweak values - slgr $s2,$inp - srlg $s3,$fp,4 - j .Lxts_km_start - -.Lxts_km_loop: - la $s2,16($sp) - slgr $s2,$inp - srlg $s3,$fp,4 -.Lxts_km_prepare: - lghi $i1,0x87 - srag $i2,$s1,63 # broadcast upper bit - ngr $i1,$i2 # rem - algr $s0,$s0 - alcgr $s1,$s1 - xgr $s0,$i1 -.Lxts_km_start: - lrvgr $i1,$s0 # flip byte order - lrvgr $i2,$s1 - stg $i1,0($s2,$inp) - stg $i2,8($s2,$inp) - xg $i1,0($inp) - xg $i2,8($inp) - stg $i1,0($out,$inp) - stg $i2,8($out,$inp) - la $inp,16($inp) - brct $s3,.Lxts_km_prepare - - slgr $inp,$fp # rewind $inp - la $s2,0($out,$inp) - lgr $s3,$fp - .long 0xb92e00aa # km $s2,$s2 - brc 1,.-4 # pay attention to "partial completion" - - la $s2,16($sp) - slgr $s2,$inp - srlg $s3,$fp,4 -.Lxts_km_xor: - lg $i1,0($out,$inp) - lg $i2,8($out,$inp) - xg $i1,0($s2,$inp) - xg $i2,8($s2,$inp) - stg $i1,0($out,$inp) - stg $i2,8($out,$inp) - la $inp,16($inp) - brct $s3,.Lxts_km_xor - - slgr $len,$fp - brc 1,.Lxts_km_loop # not zero, no borrow - algr $fp,$len - lghi $len,0 - brc 4+1,.Lxts_km_loop # not zero - - l${g} $i1,0($sp) # back-chain - llgf $fp,`2*$SIZE_T-4`($sp) # bytes used - la $i2,16($sp) - srlg $fp,$fp,4 -.Lxts_km_zap: - stg $i1,0($i2) - stg $i1,8($i2) - la $i2,16($i2) - brct $fp,.Lxts_km_zap - - la $sp,0($i1) - llgc $len,2*$SIZE_T-1($i1) - nill $len,0x0f # $len%=16 - bzr $ra - - # generate one more tweak... - lghi $i1,0x87 - srag $i2,$s1,63 # broadcast upper bit - ngr $i1,$i2 # rem - algr $s0,$s0 - alcgr $s1,$s1 - xgr $s0,$i1 - - ltr $len,$len # clear zero flag - br $ra -.size _s390x_xts_km,.-_s390x_xts_km - -.globl AES_xts_encrypt -.type AES_xts_encrypt,\@function -.align 16 -AES_xts_encrypt: - xgr %r3,%r4 # flip %r3 and %r4, $out and $len - xgr %r4,%r3 - xgr %r3,%r4 -___ -$code.=<<___ if ($SIZE_T==4); - llgfr $len,$len -___ -$code.=<<___; - st${g} $len,1*$SIZE_T($sp) # save copy of $len - srag $len,$len,4 # formally wrong, because it expands - # sign byte, but who can afford asking - # to process more than 2^63-1 bytes? - # I use it, because it sets condition - # code... - bcr 8,$ra # abort if zero (i.e. less than 16) -___ -$code.=<<___ if (!$softonly); - llgf %r0,240($key2) - lhi %r1,16 - clr %r0,%r1 - jl .Lxts_enc_software - - st${g} $ra,5*$SIZE_T($sp) - stm${g} %r6,$s3,6*$SIZE_T($sp) - - sllg $len,$len,4 # $len&=~15 - slgr $out,$inp - - # generate the tweak value - l${g} $s3,$stdframe($sp) # pointer to iv - la $s2,$tweak($sp) - lmg $s0,$s1,0($s3) - lghi $s3,16 - stmg $s0,$s1,0($s2) - la %r1,0($key2) # $key2 is not needed anymore - .long 0xb92e00aa # km $s2,$s2, generate the tweak - brc 1,.-4 # can this happen? - - l %r0,240($key1) - la %r1,0($key1) # $key1 is not needed anymore - bras $ra,_s390x_xts_km - jz .Lxts_enc_km_done - - aghi $inp,-16 # take one step back - la $i3,0($out,$inp) # put aside real $out -.Lxts_enc_km_steal: - llgc $i1,16($inp) - llgc $i2,0($out,$inp) - stc $i1,0($out,$inp) - stc $i2,16($out,$inp) - la $inp,1($inp) - brct $len,.Lxts_enc_km_steal - - la $s2,0($i3) - lghi $s3,16 - lrvgr $i1,$s0 # flip byte order - lrvgr $i2,$s1 - xg $i1,0($s2) - xg $i2,8($s2) - stg $i1,0($s2) - stg $i2,8($s2) - .long 0xb92e00aa # km $s2,$s2 - brc 1,.-4 # can this happen? - lrvgr $i1,$s0 # flip byte order - lrvgr $i2,$s1 - xg $i1,0($i3) - xg $i2,8($i3) - stg $i1,0($i3) - stg $i2,8($i3) - -.Lxts_enc_km_done: - stg $sp,$tweak+0($sp) # wipe tweak - stg $sp,$tweak+8($sp) - l${g} $ra,5*$SIZE_T($sp) - lm${g} %r6,$s3,6*$SIZE_T($sp) - br $ra -.align 16 -.Lxts_enc_software: -___ -$code.=<<___; - stm${g} %r6,$ra,6*$SIZE_T($sp) - - slgr $out,$inp - - l${g} $s3,$stdframe($sp) # ivp - llgf $s0,0($s3) # load iv - llgf $s1,4($s3) - llgf $s2,8($s3) - llgf $s3,12($s3) - stm${g} %r2,%r5,2*$SIZE_T($sp) - la $key,0($key2) - larl $tbl,AES_Te - bras $ra,_s390x_AES_encrypt # generate the tweak - lm${g} %r2,%r5,2*$SIZE_T($sp) - stm $s0,$s3,$tweak($sp) # save the tweak - j .Lxts_enc_enter - -.align 16 -.Lxts_enc_loop: - lrvg $s1,$tweak+0($sp) # load the tweak in little-endian - lrvg $s3,$tweak+8($sp) - lghi %r1,0x87 - srag %r0,$s3,63 # broadcast upper bit - ngr %r1,%r0 # rem - algr $s1,$s1 - alcgr $s3,$s3 - xgr $s1,%r1 - lrvgr $s1,$s1 # flip byte order - lrvgr $s3,$s3 - srlg $s0,$s1,32 # smash the tweak to 4x32-bits - stg $s1,$tweak+0($sp) # save the tweak - llgfr $s1,$s1 - srlg $s2,$s3,32 - stg $s3,$tweak+8($sp) - llgfr $s3,$s3 - la $inp,16($inp) # $inp+=16 -.Lxts_enc_enter: - x $s0,0($inp) # ^=*($inp) - x $s1,4($inp) - x $s2,8($inp) - x $s3,12($inp) - stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing - la $key,0($key1) - bras $ra,_s390x_AES_encrypt - lm${g} %r2,%r5,2*$SIZE_T($sp) - x $s0,$tweak+0($sp) # ^=tweak - x $s1,$tweak+4($sp) - x $s2,$tweak+8($sp) - x $s3,$tweak+12($sp) - st $s0,0($out,$inp) - st $s1,4($out,$inp) - st $s2,8($out,$inp) - st $s3,12($out,$inp) - brct${g} $len,.Lxts_enc_loop - - llgc $len,`2*$SIZE_T-1`($sp) - nill $len,0x0f # $len%16 - jz .Lxts_enc_done - - la $i3,0($inp,$out) # put aside real $out -.Lxts_enc_steal: - llgc %r0,16($inp) - llgc %r1,0($out,$inp) - stc %r0,0($out,$inp) - stc %r1,16($out,$inp) - la $inp,1($inp) - brct $len,.Lxts_enc_steal - la $out,0($i3) # restore real $out - - # generate last tweak... - lrvg $s1,$tweak+0($sp) # load the tweak in little-endian - lrvg $s3,$tweak+8($sp) - lghi %r1,0x87 - srag %r0,$s3,63 # broadcast upper bit - ngr %r1,%r0 # rem - algr $s1,$s1 - alcgr $s3,$s3 - xgr $s1,%r1 - lrvgr $s1,$s1 # flip byte order - lrvgr $s3,$s3 - srlg $s0,$s1,32 # smash the tweak to 4x32-bits - stg $s1,$tweak+0($sp) # save the tweak - llgfr $s1,$s1 - srlg $s2,$s3,32 - stg $s3,$tweak+8($sp) - llgfr $s3,$s3 - - x $s0,0($out) # ^=*(inp)|stolen cipther-text - x $s1,4($out) - x $s2,8($out) - x $s3,12($out) - st${g} $out,4*$SIZE_T($sp) - la $key,0($key1) - bras $ra,_s390x_AES_encrypt - l${g} $out,4*$SIZE_T($sp) - x $s0,`$tweak+0`($sp) # ^=tweak - x $s1,`$tweak+4`($sp) - x $s2,`$tweak+8`($sp) - x $s3,`$tweak+12`($sp) - st $s0,0($out) - st $s1,4($out) - st $s2,8($out) - st $s3,12($out) - -.Lxts_enc_done: - stg $sp,$tweak+0($sp) # wipe tweak - stg $sp,$twesk+8($sp) - lm${g} %r6,$ra,6*$SIZE_T($sp) - br $ra -.size AES_xts_encrypt,.-AES_xts_encrypt -___ -# void AES_xts_decrypt(const char *inp,char *out,size_t len, -# const AES_KEY *key1, const AES_KEY *key2, -# const unsigned char iv[16]); -# -$code.=<<___; -.globl AES_xts_decrypt -.type AES_xts_decrypt,\@function -.align 16 -AES_xts_decrypt: - xgr %r3,%r4 # flip %r3 and %r4, $out and $len - xgr %r4,%r3 - xgr %r3,%r4 -___ -$code.=<<___ if ($SIZE_T==4); - llgfr $len,$len -___ -$code.=<<___; - st${g} $len,1*$SIZE_T($sp) # save copy of $len - aghi $len,-16 - bcr 4,$ra # abort if less than zero. formally - # wrong, because $len is unsigned, - # but who can afford asking to - # process more than 2^63-1 bytes? - tmll $len,0x0f - jnz .Lxts_dec_proceed - aghi $len,16 -.Lxts_dec_proceed: -___ -$code.=<<___ if (!$softonly); - llgf %r0,240($key2) - lhi %r1,16 - clr %r0,%r1 - jl .Lxts_dec_software - - st${g} $ra,5*$SIZE_T($sp) - stm${g} %r6,$s3,6*$SIZE_T($sp) - - nill $len,0xfff0 # $len&=~15 - slgr $out,$inp - - # generate the tweak value - l${g} $s3,$stdframe($sp) # pointer to iv - la $s2,$tweak($sp) - lmg $s0,$s1,0($s3) - lghi $s3,16 - stmg $s0,$s1,0($s2) - la %r1,0($key2) # $key2 is not needed past this point - .long 0xb92e00aa # km $s2,$s2, generate the tweak - brc 1,.-4 # can this happen? - - l %r0,240($key1) - la %r1,0($key1) # $key1 is not needed anymore - - ltgr $len,$len - jz .Lxts_dec_km_short - bras $ra,_s390x_xts_km - jz .Lxts_dec_km_done - - lrvgr $s2,$s0 # make copy in reverse byte order - lrvgr $s3,$s1 - j .Lxts_dec_km_2ndtweak - -.Lxts_dec_km_short: - llgc $len,`2*$SIZE_T-1`($sp) - nill $len,0x0f # $len%=16 - lrvg $s0,$tweak+0($sp) # load the tweak - lrvg $s1,$tweak+8($sp) - lrvgr $s2,$s0 # make copy in reverse byte order - lrvgr $s3,$s1 - -.Lxts_dec_km_2ndtweak: - lghi $i1,0x87 - srag $i2,$s1,63 # broadcast upper bit - ngr $i1,$i2 # rem - algr $s0,$s0 - alcgr $s1,$s1 - xgr $s0,$i1 - lrvgr $i1,$s0 # flip byte order - lrvgr $i2,$s1 - - xg $i1,0($inp) - xg $i2,8($inp) - stg $i1,0($out,$inp) - stg $i2,8($out,$inp) - la $i2,0($out,$inp) - lghi $i3,16 - .long 0xb92e0066 # km $i2,$i2 - brc 1,.-4 # can this happen? - lrvgr $i1,$s0 - lrvgr $i2,$s1 - xg $i1,0($out,$inp) - xg $i2,8($out,$inp) - stg $i1,0($out,$inp) - stg $i2,8($out,$inp) - - la $i3,0($out,$inp) # put aside real $out -.Lxts_dec_km_steal: - llgc $i1,16($inp) - llgc $i2,0($out,$inp) - stc $i1,0($out,$inp) - stc $i2,16($out,$inp) - la $inp,1($inp) - brct $len,.Lxts_dec_km_steal - - lgr $s0,$s2 - lgr $s1,$s3 - xg $s0,0($i3) - xg $s1,8($i3) - stg $s0,0($i3) - stg $s1,8($i3) - la $s0,0($i3) - lghi $s1,16 - .long 0xb92e0088 # km $s0,$s0 - brc 1,.-4 # can this happen? - xg $s2,0($i3) - xg $s3,8($i3) - stg $s2,0($i3) - stg $s3,8($i3) -.Lxts_dec_km_done: - stg $sp,$tweak+0($sp) # wipe tweak - stg $sp,$tweak+8($sp) - l${g} $ra,5*$SIZE_T($sp) - lm${g} %r6,$s3,6*$SIZE_T($sp) - br $ra -.align 16 -.Lxts_dec_software: -___ -$code.=<<___; - stm${g} %r6,$ra,6*$SIZE_T($sp) - - srlg $len,$len,4 - slgr $out,$inp - - l${g} $s3,$stdframe($sp) # ivp - llgf $s0,0($s3) # load iv - llgf $s1,4($s3) - llgf $s2,8($s3) - llgf $s3,12($s3) - stm${g} %r2,%r5,2*$SIZE_T($sp) - la $key,0($key2) - larl $tbl,AES_Te - bras $ra,_s390x_AES_encrypt # generate the tweak - lm${g} %r2,%r5,2*$SIZE_T($sp) - larl $tbl,AES_Td - lt${g}r $len,$len - stm $s0,$s3,$tweak($sp) # save the tweak - jz .Lxts_dec_short - j .Lxts_dec_enter - -.align 16 -.Lxts_dec_loop: - lrvg $s1,$tweak+0($sp) # load the tweak in little-endian - lrvg $s3,$tweak+8($sp) - lghi %r1,0x87 - srag %r0,$s3,63 # broadcast upper bit - ngr %r1,%r0 # rem - algr $s1,$s1 - alcgr $s3,$s3 - xgr $s1,%r1 - lrvgr $s1,$s1 # flip byte order - lrvgr $s3,$s3 - srlg $s0,$s1,32 # smash the tweak to 4x32-bits - stg $s1,$tweak+0($sp) # save the tweak - llgfr $s1,$s1 - srlg $s2,$s3,32 - stg $s3,$tweak+8($sp) - llgfr $s3,$s3 -.Lxts_dec_enter: - x $s0,0($inp) # tweak^=*(inp) - x $s1,4($inp) - x $s2,8($inp) - x $s3,12($inp) - stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing - la $key,0($key1) - bras $ra,_s390x_AES_decrypt - lm${g} %r2,%r5,2*$SIZE_T($sp) - x $s0,$tweak+0($sp) # ^=tweak - x $s1,$tweak+4($sp) - x $s2,$tweak+8($sp) - x $s3,$tweak+12($sp) - st $s0,0($out,$inp) - st $s1,4($out,$inp) - st $s2,8($out,$inp) - st $s3,12($out,$inp) - la $inp,16($inp) - brct${g} $len,.Lxts_dec_loop - - llgc $len,`2*$SIZE_T-1`($sp) - nill $len,0x0f # $len%16 - jz .Lxts_dec_done - - # generate pair of tweaks... - lrvg $s1,$tweak+0($sp) # load the tweak in little-endian - lrvg $s3,$tweak+8($sp) - lghi %r1,0x87 - srag %r0,$s3,63 # broadcast upper bit - ngr %r1,%r0 # rem - algr $s1,$s1 - alcgr $s3,$s3 - xgr $s1,%r1 - lrvgr $i2,$s1 # flip byte order - lrvgr $i3,$s3 - stmg $i2,$i3,$tweak($sp) # save the 1st tweak - j .Lxts_dec_2ndtweak - -.align 16 -.Lxts_dec_short: - llgc $len,`2*$SIZE_T-1`($sp) - nill $len,0x0f # $len%16 - lrvg $s1,$tweak+0($sp) # load the tweak in little-endian - lrvg $s3,$tweak+8($sp) -.Lxts_dec_2ndtweak: - lghi %r1,0x87 - srag %r0,$s3,63 # broadcast upper bit - ngr %r1,%r0 # rem - algr $s1,$s1 - alcgr $s3,$s3 - xgr $s1,%r1 - lrvgr $s1,$s1 # flip byte order - lrvgr $s3,$s3 - srlg $s0,$s1,32 # smash the tweak to 4x32-bits - stg $s1,$tweak-16+0($sp) # save the 2nd tweak - llgfr $s1,$s1 - srlg $s2,$s3,32 - stg $s3,$tweak-16+8($sp) - llgfr $s3,$s3 - - x $s0,0($inp) # tweak_the_2nd^=*(inp) - x $s1,4($inp) - x $s2,8($inp) - x $s3,12($inp) - stm${g} %r2,%r3,2*$SIZE_T($sp) - la $key,0($key1) - bras $ra,_s390x_AES_decrypt - lm${g} %r2,%r5,2*$SIZE_T($sp) - x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd - x $s1,$tweak-16+4($sp) - x $s2,$tweak-16+8($sp) - x $s3,$tweak-16+12($sp) - st $s0,0($out,$inp) - st $s1,4($out,$inp) - st $s2,8($out,$inp) - st $s3,12($out,$inp) - - la $i3,0($out,$inp) # put aside real $out -.Lxts_dec_steal: - llgc %r0,16($inp) - llgc %r1,0($out,$inp) - stc %r0,0($out,$inp) - stc %r1,16($out,$inp) - la $inp,1($inp) - brct $len,.Lxts_dec_steal - la $out,0($i3) # restore real $out - - lm $s0,$s3,$tweak($sp) # load the 1st tweak - x $s0,0($out) # tweak^=*(inp)|stolen cipher-text - x $s1,4($out) - x $s2,8($out) - x $s3,12($out) - st${g} $out,4*$SIZE_T($sp) - la $key,0($key1) - bras $ra,_s390x_AES_decrypt - l${g} $out,4*$SIZE_T($sp) - x $s0,$tweak+0($sp) # ^=tweak - x $s1,$tweak+4($sp) - x $s2,$tweak+8($sp) - x $s3,$tweak+12($sp) - st $s0,0($out) - st $s1,4($out) - st $s2,8($out) - st $s3,12($out) - stg $sp,$tweak-16+0($sp) # wipe 2nd tweak - stg $sp,$tweak-16+8($sp) -.Lxts_dec_done: - stg $sp,$tweak+0($sp) # wipe tweak - stg $sp,$twesk+8($sp) - lm${g} %r6,$ra,6*$SIZE_T($sp) - br $ra -.size AES_xts_decrypt,.-AES_xts_decrypt -___ -} -$code.=<<___; -.string "AES for s390x, CRYPTOGAMS by " -.comm OPENSSL_s390xcap_P,16,8 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; # force flush diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aes-sparcv9.pl b/drivers/builtin_openssl2/crypto/aes/asm/aes-sparcv9.pl deleted file mode 100755 index 403c4d1290..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/aes-sparcv9.pl +++ /dev/null @@ -1,1182 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. Rights for redistribution and usage in source and binary -# forms are granted according to the OpenSSL license. -# ==================================================================== -# -# Version 1.1 -# -# The major reason for undertaken effort was to mitigate the hazard of -# cache-timing attack. This is [currently and initially!] addressed in -# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each. -# 2. References to them are scheduled for L2 cache latency, meaning -# that the tables don't have to reside in L1 cache. Once again, this -# is an initial draft and one should expect more countermeasures to -# be implemented... -# -# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last -# round. -# -# Even though performance was not the primary goal [on the contrary, -# extra shifts "induced" by compressed S-box and longer loop epilogue -# "induced" by scheduling for L2 have negative effect on performance], -# the code turned out to run in ~23 cycles per processed byte en-/ -# decrypted with 128-bit key. This is pretty good result for code -# with mentioned qualities and UltraSPARC core. Compared to Sun C -# generated code my encrypt procedure runs just few percents faster, -# while decrypt one - whole 50% faster [yes, Sun C failed to generate -# optimal decrypt procedure]. Compared to GNU C generated code both -# procedures are more than 60% faster:-) - -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=112; } -$locals=16; - -$acc0="%l0"; -$acc1="%o0"; -$acc2="%o1"; -$acc3="%o2"; - -$acc4="%l1"; -$acc5="%o3"; -$acc6="%o4"; -$acc7="%o5"; - -$acc8="%l2"; -$acc9="%o7"; -$acc10="%g1"; -$acc11="%g2"; - -$acc12="%l3"; -$acc13="%g3"; -$acc14="%g4"; -$acc15="%g5"; - -$t0="%l4"; -$t1="%l5"; -$t2="%l6"; -$t3="%l7"; - -$s0="%i0"; -$s1="%i1"; -$s2="%i2"; -$s3="%i3"; -$tbl="%i4"; -$key="%i5"; -$rounds="%i7"; # aliases with return address, which is off-loaded to stack - -sub _data_word() -{ my $i; - while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } -} - -$code.=<<___ if ($bits==64); -.register %g2,#scratch -.register %g3,#scratch -___ -$code.=<<___; -.section ".text",#alloc,#execinstr - -.align 256 -AES_Te: -___ -&_data_word( - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); -$code.=<<___; - .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 - .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 - .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 - .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 - .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc - .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 - .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a - .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 - .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 - .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 - .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b - .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf - .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 - .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 - .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 - .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 - .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 - .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 - .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 - .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb - .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c - .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 - .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 - .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 - .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 - .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a - .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e - .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e - .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 - .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf - .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 - .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -.type AES_Te,#object -.size AES_Te,(.-AES_Te) - -.align 64 -.skip 16 -_sparcv9_AES_encrypt: - save %sp,-$frame-$locals,%sp - stx %i7,[%sp+$bias+$frame+0] ! off-load return address - ld [$key+240],$rounds - ld [$key+0],$t0 - ld [$key+4],$t1 ! - ld [$key+8],$t2 - srl $rounds,1,$rounds - xor $t0,$s0,$s0 - ld [$key+12],$t3 - srl $s0,21,$acc0 - xor $t1,$s1,$s1 - ld [$key+16],$t0 - srl $s1,13,$acc1 ! - xor $t2,$s2,$s2 - ld [$key+20],$t1 - xor $t3,$s3,$s3 - ld [$key+24],$t2 - and $acc0,2040,$acc0 - ld [$key+28],$t3 - nop -.Lenc_loop: - srl $s2,5,$acc2 ! - and $acc1,2040,$acc1 - ldx [$tbl+$acc0],$acc0 - sll $s3,3,$acc3 - and $acc2,2040,$acc2 - ldx [$tbl+$acc1],$acc1 - srl $s1,21,$acc4 - and $acc3,2040,$acc3 - ldx [$tbl+$acc2],$acc2 ! - srl $s2,13,$acc5 - and $acc4,2040,$acc4 - ldx [$tbl+$acc3],$acc3 - srl $s3,5,$acc6 - and $acc5,2040,$acc5 - ldx [$tbl+$acc4],$acc4 - fmovs %f0,%f0 - sll $s0,3,$acc7 ! - and $acc6,2040,$acc6 - ldx [$tbl+$acc5],$acc5 - srl $s2,21,$acc8 - and $acc7,2040,$acc7 - ldx [$tbl+$acc6],$acc6 - srl $s3,13,$acc9 - and $acc8,2040,$acc8 - ldx [$tbl+$acc7],$acc7 ! - srl $s0,5,$acc10 - and $acc9,2040,$acc9 - ldx [$tbl+$acc8],$acc8 - sll $s1,3,$acc11 - and $acc10,2040,$acc10 - ldx [$tbl+$acc9],$acc9 - fmovs %f0,%f0 - srl $s3,21,$acc12 ! - and $acc11,2040,$acc11 - ldx [$tbl+$acc10],$acc10 - srl $s0,13,$acc13 - and $acc12,2040,$acc12 - ldx [$tbl+$acc11],$acc11 - srl $s1,5,$acc14 - and $acc13,2040,$acc13 - ldx [$tbl+$acc12],$acc12 ! - sll $s2,3,$acc15 - and $acc14,2040,$acc14 - ldx [$tbl+$acc13],$acc13 - and $acc15,2040,$acc15 - add $key,32,$key - ldx [$tbl+$acc14],$acc14 - fmovs %f0,%f0 - subcc $rounds,1,$rounds ! - ldx [$tbl+$acc15],$acc15 - bz,a,pn %icc,.Lenc_last - add $tbl,2048,$rounds - - srlx $acc1,8,$acc1 - xor $acc0,$t0,$t0 - ld [$key+0],$s0 - fmovs %f0,%f0 - srlx $acc2,16,$acc2 ! - xor $acc1,$t0,$t0 - ld [$key+4],$s1 - srlx $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ld [$key+8],$s2 - srlx $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ld [$key+12],$s3 ! - srlx $acc6,16,$acc6 - xor $acc4,$t1,$t1 - fmovs %f0,%f0 - srlx $acc7,24,$acc7 - xor $acc5,$t1,$t1 - srlx $acc9,8,$acc9 - xor $acc6,$t1,$t1 - srlx $acc10,16,$acc10 ! - xor $acc7,$t1,$t1 - srlx $acc11,24,$acc11 - xor $acc8,$t2,$t2 - srlx $acc13,8,$acc13 - xor $acc9,$t2,$t2 - srlx $acc14,16,$acc14 - xor $acc10,$t2,$t2 - srlx $acc15,24,$acc15 ! - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - srl $t0,21,$acc0 - xor $acc14,$t3,$t3 - srl $t1,13,$acc1 - xor $acc15,$t3,$t3 - - and $acc0,2040,$acc0 ! - srl $t2,5,$acc2 - and $acc1,2040,$acc1 - ldx [$tbl+$acc0],$acc0 - sll $t3,3,$acc3 - and $acc2,2040,$acc2 - ldx [$tbl+$acc1],$acc1 - fmovs %f0,%f0 - srl $t1,21,$acc4 ! - and $acc3,2040,$acc3 - ldx [$tbl+$acc2],$acc2 - srl $t2,13,$acc5 - and $acc4,2040,$acc4 - ldx [$tbl+$acc3],$acc3 - srl $t3,5,$acc6 - and $acc5,2040,$acc5 - ldx [$tbl+$acc4],$acc4 ! - sll $t0,3,$acc7 - and $acc6,2040,$acc6 - ldx [$tbl+$acc5],$acc5 - srl $t2,21,$acc8 - and $acc7,2040,$acc7 - ldx [$tbl+$acc6],$acc6 - fmovs %f0,%f0 - srl $t3,13,$acc9 ! - and $acc8,2040,$acc8 - ldx [$tbl+$acc7],$acc7 - srl $t0,5,$acc10 - and $acc9,2040,$acc9 - ldx [$tbl+$acc8],$acc8 - sll $t1,3,$acc11 - and $acc10,2040,$acc10 - ldx [$tbl+$acc9],$acc9 ! - srl $t3,21,$acc12 - and $acc11,2040,$acc11 - ldx [$tbl+$acc10],$acc10 - srl $t0,13,$acc13 - and $acc12,2040,$acc12 - ldx [$tbl+$acc11],$acc11 - fmovs %f0,%f0 - srl $t1,5,$acc14 ! - and $acc13,2040,$acc13 - ldx [$tbl+$acc12],$acc12 - sll $t2,3,$acc15 - and $acc14,2040,$acc14 - ldx [$tbl+$acc13],$acc13 - srlx $acc1,8,$acc1 - and $acc15,2040,$acc15 - ldx [$tbl+$acc14],$acc14 ! - - srlx $acc2,16,$acc2 - xor $acc0,$s0,$s0 - ldx [$tbl+$acc15],$acc15 - srlx $acc3,24,$acc3 - xor $acc1,$s0,$s0 - ld [$key+16],$t0 - fmovs %f0,%f0 - srlx $acc5,8,$acc5 ! - xor $acc2,$s0,$s0 - ld [$key+20],$t1 - srlx $acc6,16,$acc6 - xor $acc3,$s0,$s0 - ld [$key+24],$t2 - srlx $acc7,24,$acc7 - xor $acc4,$s1,$s1 - ld [$key+28],$t3 ! - srlx $acc9,8,$acc9 - xor $acc5,$s1,$s1 - ldx [$tbl+2048+0],%g0 ! prefetch te4 - srlx $acc10,16,$acc10 - xor $acc6,$s1,$s1 - ldx [$tbl+2048+32],%g0 ! prefetch te4 - srlx $acc11,24,$acc11 - xor $acc7,$s1,$s1 - ldx [$tbl+2048+64],%g0 ! prefetch te4 - srlx $acc13,8,$acc13 - xor $acc8,$s2,$s2 - ldx [$tbl+2048+96],%g0 ! prefetch te4 - srlx $acc14,16,$acc14 ! - xor $acc9,$s2,$s2 - ldx [$tbl+2048+128],%g0 ! prefetch te4 - srlx $acc15,24,$acc15 - xor $acc10,$s2,$s2 - ldx [$tbl+2048+160],%g0 ! prefetch te4 - srl $s0,21,$acc0 - xor $acc11,$s2,$s2 - ldx [$tbl+2048+192],%g0 ! prefetch te4 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - ldx [$tbl+2048+224],%g0 ! prefetch te4 - srl $s1,13,$acc1 ! - xor $acc14,$s3,$s3 - xor $acc15,$s3,$s3 - ba .Lenc_loop - and $acc0,2040,$acc0 - -.align 32 -.Lenc_last: - srlx $acc1,8,$acc1 ! - xor $acc0,$t0,$t0 - ld [$key+0],$s0 - srlx $acc2,16,$acc2 - xor $acc1,$t0,$t0 - ld [$key+4],$s1 - srlx $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ld [$key+8],$s2 ! - srlx $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ld [$key+12],$s3 - srlx $acc6,16,$acc6 - xor $acc4,$t1,$t1 - srlx $acc7,24,$acc7 - xor $acc5,$t1,$t1 - srlx $acc9,8,$acc9 ! - xor $acc6,$t1,$t1 - srlx $acc10,16,$acc10 - xor $acc7,$t1,$t1 - srlx $acc11,24,$acc11 - xor $acc8,$t2,$t2 - srlx $acc13,8,$acc13 - xor $acc9,$t2,$t2 - srlx $acc14,16,$acc14 ! - xor $acc10,$t2,$t2 - srlx $acc15,24,$acc15 - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - srl $t0,24,$acc0 - xor $acc14,$t3,$t3 - srl $t1,16,$acc1 ! - xor $acc15,$t3,$t3 - - srl $t2,8,$acc2 - and $acc1,255,$acc1 - ldub [$rounds+$acc0],$acc0 - srl $t1,24,$acc4 - and $acc2,255,$acc2 - ldub [$rounds+$acc1],$acc1 - srl $t2,16,$acc5 ! - and $t3,255,$acc3 - ldub [$rounds+$acc2],$acc2 - ldub [$rounds+$acc3],$acc3 - srl $t3,8,$acc6 - and $acc5,255,$acc5 - ldub [$rounds+$acc4],$acc4 - fmovs %f0,%f0 - srl $t2,24,$acc8 ! - and $acc6,255,$acc6 - ldub [$rounds+$acc5],$acc5 - srl $t3,16,$acc9 - and $t0,255,$acc7 - ldub [$rounds+$acc6],$acc6 - ldub [$rounds+$acc7],$acc7 - fmovs %f0,%f0 - srl $t0,8,$acc10 ! - and $acc9,255,$acc9 - ldub [$rounds+$acc8],$acc8 - srl $t3,24,$acc12 - and $acc10,255,$acc10 - ldub [$rounds+$acc9],$acc9 - srl $t0,16,$acc13 - and $t1,255,$acc11 - ldub [$rounds+$acc10],$acc10 ! - srl $t1,8,$acc14 - and $acc13,255,$acc13 - ldub [$rounds+$acc11],$acc11 - ldub [$rounds+$acc12],$acc12 - and $acc14,255,$acc14 - ldub [$rounds+$acc13],$acc13 - and $t2,255,$acc15 - ldub [$rounds+$acc14],$acc14 ! - - sll $acc0,24,$acc0 - xor $acc3,$s0,$s0 - ldub [$rounds+$acc15],$acc15 - sll $acc1,16,$acc1 - xor $acc0,$s0,$s0 - ldx [%sp+$bias+$frame+0],%i7 ! restore return address - fmovs %f0,%f0 - sll $acc2,8,$acc2 ! - xor $acc1,$s0,$s0 - sll $acc4,24,$acc4 - xor $acc2,$s0,$s0 - sll $acc5,16,$acc5 - xor $acc7,$s1,$s1 - sll $acc6,8,$acc6 - xor $acc4,$s1,$s1 - sll $acc8,24,$acc8 ! - xor $acc5,$s1,$s1 - sll $acc9,16,$acc9 - xor $acc11,$s2,$s2 - sll $acc10,8,$acc10 - xor $acc6,$s1,$s1 - sll $acc12,24,$acc12 - xor $acc8,$s2,$s2 - sll $acc13,16,$acc13 ! - xor $acc9,$s2,$s2 - sll $acc14,8,$acc14 - xor $acc10,$s2,$s2 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - xor $acc14,$s3,$s3 - xor $acc15,$s3,$s3 - - ret - restore -.type _sparcv9_AES_encrypt,#function -.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt) - -.align 32 -.globl AES_encrypt -AES_encrypt: - or %o0,%o1,%g1 - andcc %g1,3,%g0 - bnz,pn %xcc,.Lunaligned_enc - save %sp,-$frame,%sp - - ld [%i0+0],%o0 - ld [%i0+4],%o1 - ld [%i0+8],%o2 - ld [%i0+12],%o3 - -1: call .+8 - add %o7,AES_Te-1b,%o4 - call _sparcv9_AES_encrypt - mov %i2,%o5 - - st %o0,[%i1+0] - st %o1,[%i1+4] - st %o2,[%i1+8] - st %o3,[%i1+12] - - ret - restore - -.align 32 -.Lunaligned_enc: - ldub [%i0+0],%l0 - ldub [%i0+1],%l1 - ldub [%i0+2],%l2 - - sll %l0,24,%l0 - ldub [%i0+3],%l3 - sll %l1,16,%l1 - ldub [%i0+4],%l4 - sll %l2,8,%l2 - or %l1,%l0,%l0 - ldub [%i0+5],%l5 - sll %l4,24,%l4 - or %l3,%l2,%l2 - ldub [%i0+6],%l6 - sll %l5,16,%l5 - or %l0,%l2,%o0 - ldub [%i0+7],%l7 - - sll %l6,8,%l6 - or %l5,%l4,%l4 - ldub [%i0+8],%l0 - or %l7,%l6,%l6 - ldub [%i0+9],%l1 - or %l4,%l6,%o1 - ldub [%i0+10],%l2 - - sll %l0,24,%l0 - ldub [%i0+11],%l3 - sll %l1,16,%l1 - ldub [%i0+12],%l4 - sll %l2,8,%l2 - or %l1,%l0,%l0 - ldub [%i0+13],%l5 - sll %l4,24,%l4 - or %l3,%l2,%l2 - ldub [%i0+14],%l6 - sll %l5,16,%l5 - or %l0,%l2,%o2 - ldub [%i0+15],%l7 - - sll %l6,8,%l6 - or %l5,%l4,%l4 - or %l7,%l6,%l6 - or %l4,%l6,%o3 - -1: call .+8 - add %o7,AES_Te-1b,%o4 - call _sparcv9_AES_encrypt - mov %i2,%o5 - - srl %o0,24,%l0 - srl %o0,16,%l1 - stb %l0,[%i1+0] - srl %o0,8,%l2 - stb %l1,[%i1+1] - stb %l2,[%i1+2] - srl %o1,24,%l4 - stb %o0,[%i1+3] - - srl %o1,16,%l5 - stb %l4,[%i1+4] - srl %o1,8,%l6 - stb %l5,[%i1+5] - stb %l6,[%i1+6] - srl %o2,24,%l0 - stb %o1,[%i1+7] - - srl %o2,16,%l1 - stb %l0,[%i1+8] - srl %o2,8,%l2 - stb %l1,[%i1+9] - stb %l2,[%i1+10] - srl %o3,24,%l4 - stb %o2,[%i1+11] - - srl %o3,16,%l5 - stb %l4,[%i1+12] - srl %o3,8,%l6 - stb %l5,[%i1+13] - stb %l6,[%i1+14] - stb %o3,[%i1+15] - - ret - restore -.type AES_encrypt,#function -.size AES_encrypt,(.-AES_encrypt) - -___ - -$code.=<<___; -.align 256 -AES_Td: -___ -&_data_word( - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); -$code.=<<___; - .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 - .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb - .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 - .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb - .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d - .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e - .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 - .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 - .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 - .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 - .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda - .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 - .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a - .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 - .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 - .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b - .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea - .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 - .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 - .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e - .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 - .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b - .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 - .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 - .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 - .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f - .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d - .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef - .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 - .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 - .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 - .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.type AES_Td,#object -.size AES_Td,(.-AES_Td) - -.align 64 -.skip 16 -_sparcv9_AES_decrypt: - save %sp,-$frame-$locals,%sp - stx %i7,[%sp+$bias+$frame+0] ! off-load return address - ld [$key+240],$rounds - ld [$key+0],$t0 - ld [$key+4],$t1 ! - ld [$key+8],$t2 - ld [$key+12],$t3 - srl $rounds,1,$rounds - xor $t0,$s0,$s0 - ld [$key+16],$t0 - xor $t1,$s1,$s1 - ld [$key+20],$t1 - srl $s0,21,$acc0 ! - xor $t2,$s2,$s2 - ld [$key+24],$t2 - xor $t3,$s3,$s3 - and $acc0,2040,$acc0 - ld [$key+28],$t3 - srl $s3,13,$acc1 - nop -.Ldec_loop: - srl $s2,5,$acc2 ! - and $acc1,2040,$acc1 - ldx [$tbl+$acc0],$acc0 - sll $s1,3,$acc3 - and $acc2,2040,$acc2 - ldx [$tbl+$acc1],$acc1 - srl $s1,21,$acc4 - and $acc3,2040,$acc3 - ldx [$tbl+$acc2],$acc2 ! - srl $s0,13,$acc5 - and $acc4,2040,$acc4 - ldx [$tbl+$acc3],$acc3 - srl $s3,5,$acc6 - and $acc5,2040,$acc5 - ldx [$tbl+$acc4],$acc4 - fmovs %f0,%f0 - sll $s2,3,$acc7 ! - and $acc6,2040,$acc6 - ldx [$tbl+$acc5],$acc5 - srl $s2,21,$acc8 - and $acc7,2040,$acc7 - ldx [$tbl+$acc6],$acc6 - srl $s1,13,$acc9 - and $acc8,2040,$acc8 - ldx [$tbl+$acc7],$acc7 ! - srl $s0,5,$acc10 - and $acc9,2040,$acc9 - ldx [$tbl+$acc8],$acc8 - sll $s3,3,$acc11 - and $acc10,2040,$acc10 - ldx [$tbl+$acc9],$acc9 - fmovs %f0,%f0 - srl $s3,21,$acc12 ! - and $acc11,2040,$acc11 - ldx [$tbl+$acc10],$acc10 - srl $s2,13,$acc13 - and $acc12,2040,$acc12 - ldx [$tbl+$acc11],$acc11 - srl $s1,5,$acc14 - and $acc13,2040,$acc13 - ldx [$tbl+$acc12],$acc12 ! - sll $s0,3,$acc15 - and $acc14,2040,$acc14 - ldx [$tbl+$acc13],$acc13 - and $acc15,2040,$acc15 - add $key,32,$key - ldx [$tbl+$acc14],$acc14 - fmovs %f0,%f0 - subcc $rounds,1,$rounds ! - ldx [$tbl+$acc15],$acc15 - bz,a,pn %icc,.Ldec_last - add $tbl,2048,$rounds - - srlx $acc1,8,$acc1 - xor $acc0,$t0,$t0 - ld [$key+0],$s0 - fmovs %f0,%f0 - srlx $acc2,16,$acc2 ! - xor $acc1,$t0,$t0 - ld [$key+4],$s1 - srlx $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ld [$key+8],$s2 - srlx $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ld [$key+12],$s3 ! - srlx $acc6,16,$acc6 - xor $acc4,$t1,$t1 - fmovs %f0,%f0 - srlx $acc7,24,$acc7 - xor $acc5,$t1,$t1 - srlx $acc9,8,$acc9 - xor $acc6,$t1,$t1 - srlx $acc10,16,$acc10 ! - xor $acc7,$t1,$t1 - srlx $acc11,24,$acc11 - xor $acc8,$t2,$t2 - srlx $acc13,8,$acc13 - xor $acc9,$t2,$t2 - srlx $acc14,16,$acc14 - xor $acc10,$t2,$t2 - srlx $acc15,24,$acc15 ! - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - srl $t0,21,$acc0 - xor $acc14,$t3,$t3 - xor $acc15,$t3,$t3 - srl $t3,13,$acc1 - - and $acc0,2040,$acc0 ! - srl $t2,5,$acc2 - and $acc1,2040,$acc1 - ldx [$tbl+$acc0],$acc0 - sll $t1,3,$acc3 - and $acc2,2040,$acc2 - ldx [$tbl+$acc1],$acc1 - fmovs %f0,%f0 - srl $t1,21,$acc4 ! - and $acc3,2040,$acc3 - ldx [$tbl+$acc2],$acc2 - srl $t0,13,$acc5 - and $acc4,2040,$acc4 - ldx [$tbl+$acc3],$acc3 - srl $t3,5,$acc6 - and $acc5,2040,$acc5 - ldx [$tbl+$acc4],$acc4 ! - sll $t2,3,$acc7 - and $acc6,2040,$acc6 - ldx [$tbl+$acc5],$acc5 - srl $t2,21,$acc8 - and $acc7,2040,$acc7 - ldx [$tbl+$acc6],$acc6 - fmovs %f0,%f0 - srl $t1,13,$acc9 ! - and $acc8,2040,$acc8 - ldx [$tbl+$acc7],$acc7 - srl $t0,5,$acc10 - and $acc9,2040,$acc9 - ldx [$tbl+$acc8],$acc8 - sll $t3,3,$acc11 - and $acc10,2040,$acc10 - ldx [$tbl+$acc9],$acc9 ! - srl $t3,21,$acc12 - and $acc11,2040,$acc11 - ldx [$tbl+$acc10],$acc10 - srl $t2,13,$acc13 - and $acc12,2040,$acc12 - ldx [$tbl+$acc11],$acc11 - fmovs %f0,%f0 - srl $t1,5,$acc14 ! - and $acc13,2040,$acc13 - ldx [$tbl+$acc12],$acc12 - sll $t0,3,$acc15 - and $acc14,2040,$acc14 - ldx [$tbl+$acc13],$acc13 - srlx $acc1,8,$acc1 - and $acc15,2040,$acc15 - ldx [$tbl+$acc14],$acc14 ! - - srlx $acc2,16,$acc2 - xor $acc0,$s0,$s0 - ldx [$tbl+$acc15],$acc15 - srlx $acc3,24,$acc3 - xor $acc1,$s0,$s0 - ld [$key+16],$t0 - fmovs %f0,%f0 - srlx $acc5,8,$acc5 ! - xor $acc2,$s0,$s0 - ld [$key+20],$t1 - srlx $acc6,16,$acc6 - xor $acc3,$s0,$s0 - ld [$key+24],$t2 - srlx $acc7,24,$acc7 - xor $acc4,$s1,$s1 - ld [$key+28],$t3 ! - srlx $acc9,8,$acc9 - xor $acc5,$s1,$s1 - ldx [$tbl+2048+0],%g0 ! prefetch td4 - srlx $acc10,16,$acc10 - xor $acc6,$s1,$s1 - ldx [$tbl+2048+32],%g0 ! prefetch td4 - srlx $acc11,24,$acc11 - xor $acc7,$s1,$s1 - ldx [$tbl+2048+64],%g0 ! prefetch td4 - srlx $acc13,8,$acc13 - xor $acc8,$s2,$s2 - ldx [$tbl+2048+96],%g0 ! prefetch td4 - srlx $acc14,16,$acc14 ! - xor $acc9,$s2,$s2 - ldx [$tbl+2048+128],%g0 ! prefetch td4 - srlx $acc15,24,$acc15 - xor $acc10,$s2,$s2 - ldx [$tbl+2048+160],%g0 ! prefetch td4 - srl $s0,21,$acc0 - xor $acc11,$s2,$s2 - ldx [$tbl+2048+192],%g0 ! prefetch td4 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - ldx [$tbl+2048+224],%g0 ! prefetch td4 - and $acc0,2040,$acc0 ! - xor $acc14,$s3,$s3 - xor $acc15,$s3,$s3 - ba .Ldec_loop - srl $s3,13,$acc1 - -.align 32 -.Ldec_last: - srlx $acc1,8,$acc1 ! - xor $acc0,$t0,$t0 - ld [$key+0],$s0 - srlx $acc2,16,$acc2 - xor $acc1,$t0,$t0 - ld [$key+4],$s1 - srlx $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ld [$key+8],$s2 ! - srlx $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ld [$key+12],$s3 - srlx $acc6,16,$acc6 - xor $acc4,$t1,$t1 - srlx $acc7,24,$acc7 - xor $acc5,$t1,$t1 - srlx $acc9,8,$acc9 ! - xor $acc6,$t1,$t1 - srlx $acc10,16,$acc10 - xor $acc7,$t1,$t1 - srlx $acc11,24,$acc11 - xor $acc8,$t2,$t2 - srlx $acc13,8,$acc13 - xor $acc9,$t2,$t2 - srlx $acc14,16,$acc14 ! - xor $acc10,$t2,$t2 - srlx $acc15,24,$acc15 - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - srl $t0,24,$acc0 - xor $acc14,$t3,$t3 - xor $acc15,$t3,$t3 ! - srl $t3,16,$acc1 - - srl $t2,8,$acc2 - and $acc1,255,$acc1 - ldub [$rounds+$acc0],$acc0 - srl $t1,24,$acc4 - and $acc2,255,$acc2 - ldub [$rounds+$acc1],$acc1 - srl $t0,16,$acc5 ! - and $t1,255,$acc3 - ldub [$rounds+$acc2],$acc2 - ldub [$rounds+$acc3],$acc3 - srl $t3,8,$acc6 - and $acc5,255,$acc5 - ldub [$rounds+$acc4],$acc4 - fmovs %f0,%f0 - srl $t2,24,$acc8 ! - and $acc6,255,$acc6 - ldub [$rounds+$acc5],$acc5 - srl $t1,16,$acc9 - and $t2,255,$acc7 - ldub [$rounds+$acc6],$acc6 - ldub [$rounds+$acc7],$acc7 - fmovs %f0,%f0 - srl $t0,8,$acc10 ! - and $acc9,255,$acc9 - ldub [$rounds+$acc8],$acc8 - srl $t3,24,$acc12 - and $acc10,255,$acc10 - ldub [$rounds+$acc9],$acc9 - srl $t2,16,$acc13 - and $t3,255,$acc11 - ldub [$rounds+$acc10],$acc10 ! - srl $t1,8,$acc14 - and $acc13,255,$acc13 - ldub [$rounds+$acc11],$acc11 - ldub [$rounds+$acc12],$acc12 - and $acc14,255,$acc14 - ldub [$rounds+$acc13],$acc13 - and $t0,255,$acc15 - ldub [$rounds+$acc14],$acc14 ! - - sll $acc0,24,$acc0 - xor $acc3,$s0,$s0 - ldub [$rounds+$acc15],$acc15 - sll $acc1,16,$acc1 - xor $acc0,$s0,$s0 - ldx [%sp+$bias+$frame+0],%i7 ! restore return address - fmovs %f0,%f0 - sll $acc2,8,$acc2 ! - xor $acc1,$s0,$s0 - sll $acc4,24,$acc4 - xor $acc2,$s0,$s0 - sll $acc5,16,$acc5 - xor $acc7,$s1,$s1 - sll $acc6,8,$acc6 - xor $acc4,$s1,$s1 - sll $acc8,24,$acc8 ! - xor $acc5,$s1,$s1 - sll $acc9,16,$acc9 - xor $acc11,$s2,$s2 - sll $acc10,8,$acc10 - xor $acc6,$s1,$s1 - sll $acc12,24,$acc12 - xor $acc8,$s2,$s2 - sll $acc13,16,$acc13 ! - xor $acc9,$s2,$s2 - sll $acc14,8,$acc14 - xor $acc10,$s2,$s2 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - xor $acc14,$s3,$s3 - xor $acc15,$s3,$s3 - - ret - restore -.type _sparcv9_AES_decrypt,#function -.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt) - -.align 32 -.globl AES_decrypt -AES_decrypt: - or %o0,%o1,%g1 - andcc %g1,3,%g0 - bnz,pn %xcc,.Lunaligned_dec - save %sp,-$frame,%sp - - ld [%i0+0],%o0 - ld [%i0+4],%o1 - ld [%i0+8],%o2 - ld [%i0+12],%o3 - -1: call .+8 - add %o7,AES_Td-1b,%o4 - call _sparcv9_AES_decrypt - mov %i2,%o5 - - st %o0,[%i1+0] - st %o1,[%i1+4] - st %o2,[%i1+8] - st %o3,[%i1+12] - - ret - restore - -.align 32 -.Lunaligned_dec: - ldub [%i0+0],%l0 - ldub [%i0+1],%l1 - ldub [%i0+2],%l2 - - sll %l0,24,%l0 - ldub [%i0+3],%l3 - sll %l1,16,%l1 - ldub [%i0+4],%l4 - sll %l2,8,%l2 - or %l1,%l0,%l0 - ldub [%i0+5],%l5 - sll %l4,24,%l4 - or %l3,%l2,%l2 - ldub [%i0+6],%l6 - sll %l5,16,%l5 - or %l0,%l2,%o0 - ldub [%i0+7],%l7 - - sll %l6,8,%l6 - or %l5,%l4,%l4 - ldub [%i0+8],%l0 - or %l7,%l6,%l6 - ldub [%i0+9],%l1 - or %l4,%l6,%o1 - ldub [%i0+10],%l2 - - sll %l0,24,%l0 - ldub [%i0+11],%l3 - sll %l1,16,%l1 - ldub [%i0+12],%l4 - sll %l2,8,%l2 - or %l1,%l0,%l0 - ldub [%i0+13],%l5 - sll %l4,24,%l4 - or %l3,%l2,%l2 - ldub [%i0+14],%l6 - sll %l5,16,%l5 - or %l0,%l2,%o2 - ldub [%i0+15],%l7 - - sll %l6,8,%l6 - or %l5,%l4,%l4 - or %l7,%l6,%l6 - or %l4,%l6,%o3 - -1: call .+8 - add %o7,AES_Td-1b,%o4 - call _sparcv9_AES_decrypt - mov %i2,%o5 - - srl %o0,24,%l0 - srl %o0,16,%l1 - stb %l0,[%i1+0] - srl %o0,8,%l2 - stb %l1,[%i1+1] - stb %l2,[%i1+2] - srl %o1,24,%l4 - stb %o0,[%i1+3] - - srl %o1,16,%l5 - stb %l4,[%i1+4] - srl %o1,8,%l6 - stb %l5,[%i1+5] - stb %l6,[%i1+6] - srl %o2,24,%l0 - stb %o1,[%i1+7] - - srl %o2,16,%l1 - stb %l0,[%i1+8] - srl %o2,8,%l2 - stb %l1,[%i1+9] - stb %l2,[%i1+10] - srl %o3,24,%l4 - stb %o2,[%i1+11] - - srl %o3,16,%l5 - stb %l4,[%i1+12] - srl %o3,8,%l6 - stb %l5,[%i1+13] - stb %l6,[%i1+14] - stb %o3,[%i1+15] - - ret - restore -.type AES_decrypt,#function -.size AES_decrypt,(.-AES_decrypt) -___ - -# fmovs instructions substituting for FP nops were originally added -# to meet specific instruction alignment requirements to maximize ILP. -# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have -# undesired effect, so just omit them and sacrifice some portion of -# percent in performance... -$code =~ s/fmovs.*$//gm; - -print $code; -close STDOUT; # ensure flush diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aes-x86_64.pl b/drivers/builtin_openssl2/crypto/aes/asm/aes-x86_64.pl deleted file mode 100755 index 34cbb5d844..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/aes-x86_64.pl +++ /dev/null @@ -1,2819 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# Version 2.1. -# -# aes-*-cbc benchmarks are improved by >70% [compared to gcc 3.3.2 on -# Opteron 240 CPU] plus all the bells-n-whistles from 32-bit version -# [you'll notice a lot of resemblance], such as compressed S-boxes -# in little-endian byte order, prefetch of these tables in CBC mode, -# as well as avoiding L1 cache aliasing between stack frame and key -# schedule and already mentioned tables, compressed Td4... -# -# Performance in number of cycles per processed byte for 128-bit key: -# -# ECB encrypt ECB decrypt CBC large chunk -# AMD64 33 41 13.0 -# EM64T 38 59 18.6(*) -# Core 2 30 43 14.5(*) -# -# (*) with hyper-threading off - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -$verticalspin=1; # unlike 32-bit version $verticalspin performs - # ~15% better on both AMD and Intel cores -$speed_limit=512; # see aes-586.pl for details - -$code=".text\n"; - -$s0="%eax"; -$s1="%ebx"; -$s2="%ecx"; -$s3="%edx"; -$acc0="%esi"; $mask80="%rsi"; -$acc1="%edi"; $maskfe="%rdi"; -$acc2="%ebp"; $mask1b="%rbp"; -$inp="%r8"; -$out="%r9"; -$t0="%r10d"; -$t1="%r11d"; -$t2="%r12d"; -$rnds="%r13d"; -$sbox="%r14"; -$key="%r15"; - -sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; } -sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/; - $r =~ s/%[er]([sd]i)/%\1l/; - $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; } -sub LO() { my $r=shift; $r =~ s/%r([a-z]+)/%e\1/; - $r =~ s/%r([0-9]+)/%r\1d/; $r; } -sub _data_word() -{ my $i; - while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } -} -sub data_word() -{ my $i; - my $last=pop(@_); - $code.=".long\t"; - while(defined($i=shift)) { $code.=sprintf"0x%08x,",$i; } - $code.=sprintf"0x%08x\n",$last; -} - -sub data_byte() -{ my $i; - my $last=pop(@_); - $code.=".byte\t"; - while(defined($i=shift)) { $code.=sprintf"0x%02x,",$i&0xff; } - $code.=sprintf"0x%02x\n",$last&0xff; -} - -sub encvert() -{ my $t3="%r8d"; # zaps $inp! - -$code.=<<___; - # favor 3-way issue Opteron pipeline... - movzb `&lo("$s0")`,$acc0 - movzb `&lo("$s1")`,$acc1 - movzb `&lo("$s2")`,$acc2 - mov 0($sbox,$acc0,8),$t0 - mov 0($sbox,$acc1,8),$t1 - mov 0($sbox,$acc2,8),$t2 - - movzb `&hi("$s1")`,$acc0 - movzb `&hi("$s2")`,$acc1 - movzb `&lo("$s3")`,$acc2 - xor 3($sbox,$acc0,8),$t0 - xor 3($sbox,$acc1,8),$t1 - mov 0($sbox,$acc2,8),$t3 - - movzb `&hi("$s3")`,$acc0 - shr \$16,$s2 - movzb `&hi("$s0")`,$acc2 - xor 3($sbox,$acc0,8),$t2 - shr \$16,$s3 - xor 3($sbox,$acc2,8),$t3 - - shr \$16,$s1 - lea 16($key),$key - shr \$16,$s0 - - movzb `&lo("$s2")`,$acc0 - movzb `&lo("$s3")`,$acc1 - movzb `&lo("$s0")`,$acc2 - xor 2($sbox,$acc0,8),$t0 - xor 2($sbox,$acc1,8),$t1 - xor 2($sbox,$acc2,8),$t2 - - movzb `&hi("$s3")`,$acc0 - movzb `&hi("$s0")`,$acc1 - movzb `&lo("$s1")`,$acc2 - xor 1($sbox,$acc0,8),$t0 - xor 1($sbox,$acc1,8),$t1 - xor 2($sbox,$acc2,8),$t3 - - mov 12($key),$s3 - movzb `&hi("$s1")`,$acc1 - movzb `&hi("$s2")`,$acc2 - mov 0($key),$s0 - xor 1($sbox,$acc1,8),$t2 - xor 1($sbox,$acc2,8),$t3 - - mov 4($key),$s1 - mov 8($key),$s2 - xor $t0,$s0 - xor $t1,$s1 - xor $t2,$s2 - xor $t3,$s3 -___ -} - -sub enclastvert() -{ my $t3="%r8d"; # zaps $inp! - -$code.=<<___; - movzb `&lo("$s0")`,$acc0 - movzb `&lo("$s1")`,$acc1 - movzb `&lo("$s2")`,$acc2 - movzb 2($sbox,$acc0,8),$t0 - movzb 2($sbox,$acc1,8),$t1 - movzb 2($sbox,$acc2,8),$t2 - - movzb `&lo("$s3")`,$acc0 - movzb `&hi("$s1")`,$acc1 - movzb `&hi("$s2")`,$acc2 - movzb 2($sbox,$acc0,8),$t3 - mov 0($sbox,$acc1,8),$acc1 #$t0 - mov 0($sbox,$acc2,8),$acc2 #$t1 - - and \$0x0000ff00,$acc1 - and \$0x0000ff00,$acc2 - - xor $acc1,$t0 - xor $acc2,$t1 - shr \$16,$s2 - - movzb `&hi("$s3")`,$acc0 - movzb `&hi("$s0")`,$acc1 - shr \$16,$s3 - mov 0($sbox,$acc0,8),$acc0 #$t2 - mov 0($sbox,$acc1,8),$acc1 #$t3 - - and \$0x0000ff00,$acc0 - and \$0x0000ff00,$acc1 - shr \$16,$s1 - xor $acc0,$t2 - xor $acc1,$t3 - shr \$16,$s0 - - movzb `&lo("$s2")`,$acc0 - movzb `&lo("$s3")`,$acc1 - movzb `&lo("$s0")`,$acc2 - mov 0($sbox,$acc0,8),$acc0 #$t0 - mov 0($sbox,$acc1,8),$acc1 #$t1 - mov 0($sbox,$acc2,8),$acc2 #$t2 - - and \$0x00ff0000,$acc0 - and \$0x00ff0000,$acc1 - and \$0x00ff0000,$acc2 - - xor $acc0,$t0 - xor $acc1,$t1 - xor $acc2,$t2 - - movzb `&lo("$s1")`,$acc0 - movzb `&hi("$s3")`,$acc1 - movzb `&hi("$s0")`,$acc2 - mov 0($sbox,$acc0,8),$acc0 #$t3 - mov 2($sbox,$acc1,8),$acc1 #$t0 - mov 2($sbox,$acc2,8),$acc2 #$t1 - - and \$0x00ff0000,$acc0 - and \$0xff000000,$acc1 - and \$0xff000000,$acc2 - - xor $acc0,$t3 - xor $acc1,$t0 - xor $acc2,$t1 - - movzb `&hi("$s1")`,$acc0 - movzb `&hi("$s2")`,$acc1 - mov 16+12($key),$s3 - mov 2($sbox,$acc0,8),$acc0 #$t2 - mov 2($sbox,$acc1,8),$acc1 #$t3 - mov 16+0($key),$s0 - - and \$0xff000000,$acc0 - and \$0xff000000,$acc1 - - xor $acc0,$t2 - xor $acc1,$t3 - - mov 16+4($key),$s1 - mov 16+8($key),$s2 - xor $t0,$s0 - xor $t1,$s1 - xor $t2,$s2 - xor $t3,$s3 -___ -} - -sub encstep() -{ my ($i,@s) = @_; - my $tmp0=$acc0; - my $tmp1=$acc1; - my $tmp2=$acc2; - my $out=($t0,$t1,$t2,$s[0])[$i]; - - if ($i==3) { - $tmp0=$s[1]; - $tmp1=$s[2]; - $tmp2=$s[3]; - } - $code.=" movzb ".&lo($s[0]).",$out\n"; - $code.=" mov $s[2],$tmp1\n" if ($i!=3); - $code.=" lea 16($key),$key\n" if ($i==0); - - $code.=" movzb ".&hi($s[1]).",$tmp0\n"; - $code.=" mov 0($sbox,$out,8),$out\n"; - - $code.=" shr \$16,$tmp1\n"; - $code.=" mov $s[3],$tmp2\n" if ($i!=3); - $code.=" xor 3($sbox,$tmp0,8),$out\n"; - - $code.=" movzb ".&lo($tmp1).",$tmp1\n"; - $code.=" shr \$24,$tmp2\n"; - $code.=" xor 4*$i($key),$out\n"; - - $code.=" xor 2($sbox,$tmp1,8),$out\n"; - $code.=" xor 1($sbox,$tmp2,8),$out\n"; - - $code.=" mov $t0,$s[1]\n" if ($i==3); - $code.=" mov $t1,$s[2]\n" if ($i==3); - $code.=" mov $t2,$s[3]\n" if ($i==3); - $code.="\n"; -} - -sub enclast() -{ my ($i,@s)=@_; - my $tmp0=$acc0; - my $tmp1=$acc1; - my $tmp2=$acc2; - my $out=($t0,$t1,$t2,$s[0])[$i]; - - if ($i==3) { - $tmp0=$s[1]; - $tmp1=$s[2]; - $tmp2=$s[3]; - } - $code.=" movzb ".&lo($s[0]).",$out\n"; - $code.=" mov $s[2],$tmp1\n" if ($i!=3); - - $code.=" mov 2($sbox,$out,8),$out\n"; - $code.=" shr \$16,$tmp1\n"; - $code.=" mov $s[3],$tmp2\n" if ($i!=3); - - $code.=" and \$0x000000ff,$out\n"; - $code.=" movzb ".&hi($s[1]).",$tmp0\n"; - $code.=" movzb ".&lo($tmp1).",$tmp1\n"; - $code.=" shr \$24,$tmp2\n"; - - $code.=" mov 0($sbox,$tmp0,8),$tmp0\n"; - $code.=" mov 0($sbox,$tmp1,8),$tmp1\n"; - $code.=" mov 2($sbox,$tmp2,8),$tmp2\n"; - - $code.=" and \$0x0000ff00,$tmp0\n"; - $code.=" and \$0x00ff0000,$tmp1\n"; - $code.=" and \$0xff000000,$tmp2\n"; - - $code.=" xor $tmp0,$out\n"; - $code.=" mov $t0,$s[1]\n" if ($i==3); - $code.=" xor $tmp1,$out\n"; - $code.=" mov $t1,$s[2]\n" if ($i==3); - $code.=" xor $tmp2,$out\n"; - $code.=" mov $t2,$s[3]\n" if ($i==3); - $code.="\n"; -} - -$code.=<<___; -.type _x86_64_AES_encrypt,\@abi-omnipotent -.align 16 -_x86_64_AES_encrypt: - xor 0($key),$s0 # xor with key - xor 4($key),$s1 - xor 8($key),$s2 - xor 12($key),$s3 - - mov 240($key),$rnds # load key->rounds - sub \$1,$rnds - jmp .Lenc_loop -.align 16 -.Lenc_loop: -___ - if ($verticalspin) { &encvert(); } - else { &encstep(0,$s0,$s1,$s2,$s3); - &encstep(1,$s1,$s2,$s3,$s0); - &encstep(2,$s2,$s3,$s0,$s1); - &encstep(3,$s3,$s0,$s1,$s2); - } -$code.=<<___; - sub \$1,$rnds - jnz .Lenc_loop -___ - if ($verticalspin) { &enclastvert(); } - else { &enclast(0,$s0,$s1,$s2,$s3); - &enclast(1,$s1,$s2,$s3,$s0); - &enclast(2,$s2,$s3,$s0,$s1); - &enclast(3,$s3,$s0,$s1,$s2); - $code.=<<___; - xor 16+0($key),$s0 # xor with key - xor 16+4($key),$s1 - xor 16+8($key),$s2 - xor 16+12($key),$s3 -___ - } -$code.=<<___; - .byte 0xf3,0xc3 # rep ret -.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt -___ - -# it's possible to implement this by shifting tN by 8, filling least -# significant byte with byte load and finally bswap-ing at the end, -# but such partial register load kills Core 2... -sub enccompactvert() -{ my ($t3,$t4,$t5)=("%r8d","%r9d","%r13d"); - -$code.=<<___; - movzb `&lo("$s0")`,$t0 - movzb `&lo("$s1")`,$t1 - movzb `&lo("$s2")`,$t2 - movzb ($sbox,$t0,1),$t0 - movzb ($sbox,$t1,1),$t1 - movzb ($sbox,$t2,1),$t2 - - movzb `&lo("$s3")`,$t3 - movzb `&hi("$s1")`,$acc0 - movzb `&hi("$s2")`,$acc1 - movzb ($sbox,$t3,1),$t3 - movzb ($sbox,$acc0,1),$t4 #$t0 - movzb ($sbox,$acc1,1),$t5 #$t1 - - movzb `&hi("$s3")`,$acc2 - movzb `&hi("$s0")`,$acc0 - shr \$16,$s2 - movzb ($sbox,$acc2,1),$acc2 #$t2 - movzb ($sbox,$acc0,1),$acc0 #$t3 - shr \$16,$s3 - - movzb `&lo("$s2")`,$acc1 - shl \$8,$t4 - shl \$8,$t5 - movzb ($sbox,$acc1,1),$acc1 #$t0 - xor $t4,$t0 - xor $t5,$t1 - - movzb `&lo("$s3")`,$t4 - shr \$16,$s0 - shr \$16,$s1 - movzb `&lo("$s0")`,$t5 - shl \$8,$acc2 - shl \$8,$acc0 - movzb ($sbox,$t4,1),$t4 #$t1 - movzb ($sbox,$t5,1),$t5 #$t2 - xor $acc2,$t2 - xor $acc0,$t3 - - movzb `&lo("$s1")`,$acc2 - movzb `&hi("$s3")`,$acc0 - shl \$16,$acc1 - movzb ($sbox,$acc2,1),$acc2 #$t3 - movzb ($sbox,$acc0,1),$acc0 #$t0 - xor $acc1,$t0 - - movzb `&hi("$s0")`,$acc1 - shr \$8,$s2 - shr \$8,$s1 - movzb ($sbox,$acc1,1),$acc1 #$t1 - movzb ($sbox,$s2,1),$s3 #$t3 - movzb ($sbox,$s1,1),$s2 #$t2 - shl \$16,$t4 - shl \$16,$t5 - shl \$16,$acc2 - xor $t4,$t1 - xor $t5,$t2 - xor $acc2,$t3 - - shl \$24,$acc0 - shl \$24,$acc1 - shl \$24,$s3 - xor $acc0,$t0 - shl \$24,$s2 - xor $acc1,$t1 - mov $t0,$s0 - mov $t1,$s1 - xor $t2,$s2 - xor $t3,$s3 -___ -} - -sub enctransform_ref() -{ my $sn = shift; - my ($acc,$r2,$tmp)=("%r8d","%r9d","%r13d"); - -$code.=<<___; - mov $sn,$acc - and \$0x80808080,$acc - mov $acc,$tmp - shr \$7,$tmp - lea ($sn,$sn),$r2 - sub $tmp,$acc - and \$0xfefefefe,$r2 - and \$0x1b1b1b1b,$acc - mov $sn,$tmp - xor $acc,$r2 - - xor $r2,$sn - rol \$24,$sn - xor $r2,$sn - ror \$16,$tmp - xor $tmp,$sn - ror \$8,$tmp - xor $tmp,$sn -___ -} - -# unlike decrypt case it does not pay off to parallelize enctransform -sub enctransform() -{ my ($t3,$r20,$r21)=($acc2,"%r8d","%r9d"); - -$code.=<<___; - mov $s0,$acc0 - mov $s1,$acc1 - and \$0x80808080,$acc0 - and \$0x80808080,$acc1 - mov $acc0,$t0 - mov $acc1,$t1 - shr \$7,$t0 - lea ($s0,$s0),$r20 - shr \$7,$t1 - lea ($s1,$s1),$r21 - sub $t0,$acc0 - sub $t1,$acc1 - and \$0xfefefefe,$r20 - and \$0xfefefefe,$r21 - and \$0x1b1b1b1b,$acc0 - and \$0x1b1b1b1b,$acc1 - mov $s0,$t0 - mov $s1,$t1 - xor $acc0,$r20 - xor $acc1,$r21 - - xor $r20,$s0 - xor $r21,$s1 - mov $s2,$acc0 - mov $s3,$acc1 - rol \$24,$s0 - rol \$24,$s1 - and \$0x80808080,$acc0 - and \$0x80808080,$acc1 - xor $r20,$s0 - xor $r21,$s1 - mov $acc0,$t2 - mov $acc1,$t3 - ror \$16,$t0 - ror \$16,$t1 - shr \$7,$t2 - lea ($s2,$s2),$r20 - xor $t0,$s0 - xor $t1,$s1 - shr \$7,$t3 - lea ($s3,$s3),$r21 - ror \$8,$t0 - ror \$8,$t1 - sub $t2,$acc0 - sub $t3,$acc1 - xor $t0,$s0 - xor $t1,$s1 - - and \$0xfefefefe,$r20 - and \$0xfefefefe,$r21 - and \$0x1b1b1b1b,$acc0 - and \$0x1b1b1b1b,$acc1 - mov $s2,$t2 - mov $s3,$t3 - xor $acc0,$r20 - xor $acc1,$r21 - - xor $r20,$s2 - xor $r21,$s3 - rol \$24,$s2 - rol \$24,$s3 - xor $r20,$s2 - xor $r21,$s3 - mov 0($sbox),$acc0 # prefetch Te4 - ror \$16,$t2 - ror \$16,$t3 - mov 64($sbox),$acc1 - xor $t2,$s2 - xor $t3,$s3 - mov 128($sbox),$r20 - ror \$8,$t2 - ror \$8,$t3 - mov 192($sbox),$r21 - xor $t2,$s2 - xor $t3,$s3 -___ -} - -$code.=<<___; -.type _x86_64_AES_encrypt_compact,\@abi-omnipotent -.align 16 -_x86_64_AES_encrypt_compact: - lea 128($sbox),$inp # size optimization - mov 0-128($inp),$acc1 # prefetch Te4 - mov 32-128($inp),$acc2 - mov 64-128($inp),$t0 - mov 96-128($inp),$t1 - mov 128-128($inp),$acc1 - mov 160-128($inp),$acc2 - mov 192-128($inp),$t0 - mov 224-128($inp),$t1 - jmp .Lenc_loop_compact -.align 16 -.Lenc_loop_compact: - xor 0($key),$s0 # xor with key - xor 4($key),$s1 - xor 8($key),$s2 - xor 12($key),$s3 - lea 16($key),$key -___ - &enccompactvert(); -$code.=<<___; - cmp 16(%rsp),$key - je .Lenc_compact_done -___ - &enctransform(); -$code.=<<___; - jmp .Lenc_loop_compact -.align 16 -.Lenc_compact_done: - xor 0($key),$s0 - xor 4($key),$s1 - xor 8($key),$s2 - xor 12($key),$s3 - .byte 0xf3,0xc3 # rep ret -.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact -___ - -# void AES_encrypt (const void *inp,void *out,const AES_KEY *key); -$code.=<<___; -.globl AES_encrypt -.type AES_encrypt,\@function,3 -.align 16 -.globl asm_AES_encrypt -.hidden asm_AES_encrypt -asm_AES_encrypt: -AES_encrypt: - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - - # allocate frame "above" key schedule - mov %rsp,%r10 - lea -63(%rdx),%rcx # %rdx is key argument - and \$-64,%rsp - sub %rsp,%rcx - neg %rcx - and \$0x3c0,%rcx - sub %rcx,%rsp - sub \$32,%rsp - - mov %rsi,16(%rsp) # save out - mov %r10,24(%rsp) # save real stack pointer -.Lenc_prologue: - - mov %rdx,$key - mov 240($key),$rnds # load rounds - - mov 0(%rdi),$s0 # load input vector - mov 4(%rdi),$s1 - mov 8(%rdi),$s2 - mov 12(%rdi),$s3 - - shl \$4,$rnds - lea ($key,$rnds),%rbp - mov $key,(%rsp) # key schedule - mov %rbp,8(%rsp) # end of key schedule - - # pick Te4 copy which can't "overlap" with stack frame or key schedule - lea .LAES_Te+2048(%rip),$sbox - lea 768(%rsp),%rbp - sub $sbox,%rbp - and \$0x300,%rbp - lea ($sbox,%rbp),$sbox - - call _x86_64_AES_encrypt_compact - - mov 16(%rsp),$out # restore out - mov 24(%rsp),%rsi # restore saved stack pointer - mov $s0,0($out) # write output vector - mov $s1,4($out) - mov $s2,8($out) - mov $s3,12($out) - - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lenc_epilogue: - ret -.size AES_encrypt,.-AES_encrypt -___ - -#------------------------------------------------------------------# - -sub decvert() -{ my $t3="%r8d"; # zaps $inp! - -$code.=<<___; - # favor 3-way issue Opteron pipeline... - movzb `&lo("$s0")`,$acc0 - movzb `&lo("$s1")`,$acc1 - movzb `&lo("$s2")`,$acc2 - mov 0($sbox,$acc0,8),$t0 - mov 0($sbox,$acc1,8),$t1 - mov 0($sbox,$acc2,8),$t2 - - movzb `&hi("$s3")`,$acc0 - movzb `&hi("$s0")`,$acc1 - movzb `&lo("$s3")`,$acc2 - xor 3($sbox,$acc0,8),$t0 - xor 3($sbox,$acc1,8),$t1 - mov 0($sbox,$acc2,8),$t3 - - movzb `&hi("$s1")`,$acc0 - shr \$16,$s0 - movzb `&hi("$s2")`,$acc2 - xor 3($sbox,$acc0,8),$t2 - shr \$16,$s3 - xor 3($sbox,$acc2,8),$t3 - - shr \$16,$s1 - lea 16($key),$key - shr \$16,$s2 - - movzb `&lo("$s2")`,$acc0 - movzb `&lo("$s3")`,$acc1 - movzb `&lo("$s0")`,$acc2 - xor 2($sbox,$acc0,8),$t0 - xor 2($sbox,$acc1,8),$t1 - xor 2($sbox,$acc2,8),$t2 - - movzb `&hi("$s1")`,$acc0 - movzb `&hi("$s2")`,$acc1 - movzb `&lo("$s1")`,$acc2 - xor 1($sbox,$acc0,8),$t0 - xor 1($sbox,$acc1,8),$t1 - xor 2($sbox,$acc2,8),$t3 - - movzb `&hi("$s3")`,$acc0 - mov 12($key),$s3 - movzb `&hi("$s0")`,$acc2 - xor 1($sbox,$acc0,8),$t2 - mov 0($key),$s0 - xor 1($sbox,$acc2,8),$t3 - - xor $t0,$s0 - mov 4($key),$s1 - mov 8($key),$s2 - xor $t2,$s2 - xor $t1,$s1 - xor $t3,$s3 -___ -} - -sub declastvert() -{ my $t3="%r8d"; # zaps $inp! - -$code.=<<___; - lea 2048($sbox),$sbox # size optimization - movzb `&lo("$s0")`,$acc0 - movzb `&lo("$s1")`,$acc1 - movzb `&lo("$s2")`,$acc2 - movzb ($sbox,$acc0,1),$t0 - movzb ($sbox,$acc1,1),$t1 - movzb ($sbox,$acc2,1),$t2 - - movzb `&lo("$s3")`,$acc0 - movzb `&hi("$s3")`,$acc1 - movzb `&hi("$s0")`,$acc2 - movzb ($sbox,$acc0,1),$t3 - movzb ($sbox,$acc1,1),$acc1 #$t0 - movzb ($sbox,$acc2,1),$acc2 #$t1 - - shl \$8,$acc1 - shl \$8,$acc2 - - xor $acc1,$t0 - xor $acc2,$t1 - shr \$16,$s3 - - movzb `&hi("$s1")`,$acc0 - movzb `&hi("$s2")`,$acc1 - shr \$16,$s0 - movzb ($sbox,$acc0,1),$acc0 #$t2 - movzb ($sbox,$acc1,1),$acc1 #$t3 - - shl \$8,$acc0 - shl \$8,$acc1 - shr \$16,$s1 - xor $acc0,$t2 - xor $acc1,$t3 - shr \$16,$s2 - - movzb `&lo("$s2")`,$acc0 - movzb `&lo("$s3")`,$acc1 - movzb `&lo("$s0")`,$acc2 - movzb ($sbox,$acc0,1),$acc0 #$t0 - movzb ($sbox,$acc1,1),$acc1 #$t1 - movzb ($sbox,$acc2,1),$acc2 #$t2 - - shl \$16,$acc0 - shl \$16,$acc1 - shl \$16,$acc2 - - xor $acc0,$t0 - xor $acc1,$t1 - xor $acc2,$t2 - - movzb `&lo("$s1")`,$acc0 - movzb `&hi("$s1")`,$acc1 - movzb `&hi("$s2")`,$acc2 - movzb ($sbox,$acc0,1),$acc0 #$t3 - movzb ($sbox,$acc1,1),$acc1 #$t0 - movzb ($sbox,$acc2,1),$acc2 #$t1 - - shl \$16,$acc0 - shl \$24,$acc1 - shl \$24,$acc2 - - xor $acc0,$t3 - xor $acc1,$t0 - xor $acc2,$t1 - - movzb `&hi("$s3")`,$acc0 - movzb `&hi("$s0")`,$acc1 - mov 16+12($key),$s3 - movzb ($sbox,$acc0,1),$acc0 #$t2 - movzb ($sbox,$acc1,1),$acc1 #$t3 - mov 16+0($key),$s0 - - shl \$24,$acc0 - shl \$24,$acc1 - - xor $acc0,$t2 - xor $acc1,$t3 - - mov 16+4($key),$s1 - mov 16+8($key),$s2 - lea -2048($sbox),$sbox - xor $t0,$s0 - xor $t1,$s1 - xor $t2,$s2 - xor $t3,$s3 -___ -} - -sub decstep() -{ my ($i,@s) = @_; - my $tmp0=$acc0; - my $tmp1=$acc1; - my $tmp2=$acc2; - my $out=($t0,$t1,$t2,$s[0])[$i]; - - $code.=" mov $s[0],$out\n" if ($i!=3); - $tmp1=$s[2] if ($i==3); - $code.=" mov $s[2],$tmp1\n" if ($i!=3); - $code.=" and \$0xFF,$out\n"; - - $code.=" mov 0($sbox,$out,8),$out\n"; - $code.=" shr \$16,$tmp1\n"; - $tmp2=$s[3] if ($i==3); - $code.=" mov $s[3],$tmp2\n" if ($i!=3); - - $tmp0=$s[1] if ($i==3); - $code.=" movzb ".&hi($s[1]).",$tmp0\n"; - $code.=" and \$0xFF,$tmp1\n"; - $code.=" shr \$24,$tmp2\n"; - - $code.=" xor 3($sbox,$tmp0,8),$out\n"; - $code.=" xor 2($sbox,$tmp1,8),$out\n"; - $code.=" xor 1($sbox,$tmp2,8),$out\n"; - - $code.=" mov $t2,$s[1]\n" if ($i==3); - $code.=" mov $t1,$s[2]\n" if ($i==3); - $code.=" mov $t0,$s[3]\n" if ($i==3); - $code.="\n"; -} - -sub declast() -{ my ($i,@s)=@_; - my $tmp0=$acc0; - my $tmp1=$acc1; - my $tmp2=$acc2; - my $out=($t0,$t1,$t2,$s[0])[$i]; - - $code.=" mov $s[0],$out\n" if ($i!=3); - $tmp1=$s[2] if ($i==3); - $code.=" mov $s[2],$tmp1\n" if ($i!=3); - $code.=" and \$0xFF,$out\n"; - - $code.=" movzb 2048($sbox,$out,1),$out\n"; - $code.=" shr \$16,$tmp1\n"; - $tmp2=$s[3] if ($i==3); - $code.=" mov $s[3],$tmp2\n" if ($i!=3); - - $tmp0=$s[1] if ($i==3); - $code.=" movzb ".&hi($s[1]).",$tmp0\n"; - $code.=" and \$0xFF,$tmp1\n"; - $code.=" shr \$24,$tmp2\n"; - - $code.=" movzb 2048($sbox,$tmp0,1),$tmp0\n"; - $code.=" movzb 2048($sbox,$tmp1,1),$tmp1\n"; - $code.=" movzb 2048($sbox,$tmp2,1),$tmp2\n"; - - $code.=" shl \$8,$tmp0\n"; - $code.=" shl \$16,$tmp1\n"; - $code.=" shl \$24,$tmp2\n"; - - $code.=" xor $tmp0,$out\n"; - $code.=" mov $t2,$s[1]\n" if ($i==3); - $code.=" xor $tmp1,$out\n"; - $code.=" mov $t1,$s[2]\n" if ($i==3); - $code.=" xor $tmp2,$out\n"; - $code.=" mov $t0,$s[3]\n" if ($i==3); - $code.="\n"; -} - -$code.=<<___; -.type _x86_64_AES_decrypt,\@abi-omnipotent -.align 16 -_x86_64_AES_decrypt: - xor 0($key),$s0 # xor with key - xor 4($key),$s1 - xor 8($key),$s2 - xor 12($key),$s3 - - mov 240($key),$rnds # load key->rounds - sub \$1,$rnds - jmp .Ldec_loop -.align 16 -.Ldec_loop: -___ - if ($verticalspin) { &decvert(); } - else { &decstep(0,$s0,$s3,$s2,$s1); - &decstep(1,$s1,$s0,$s3,$s2); - &decstep(2,$s2,$s1,$s0,$s3); - &decstep(3,$s3,$s2,$s1,$s0); - $code.=<<___; - lea 16($key),$key - xor 0($key),$s0 # xor with key - xor 4($key),$s1 - xor 8($key),$s2 - xor 12($key),$s3 -___ - } -$code.=<<___; - sub \$1,$rnds - jnz .Ldec_loop -___ - if ($verticalspin) { &declastvert(); } - else { &declast(0,$s0,$s3,$s2,$s1); - &declast(1,$s1,$s0,$s3,$s2); - &declast(2,$s2,$s1,$s0,$s3); - &declast(3,$s3,$s2,$s1,$s0); - $code.=<<___; - xor 16+0($key),$s0 # xor with key - xor 16+4($key),$s1 - xor 16+8($key),$s2 - xor 16+12($key),$s3 -___ - } -$code.=<<___; - .byte 0xf3,0xc3 # rep ret -.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt -___ - -sub deccompactvert() -{ my ($t3,$t4,$t5)=("%r8d","%r9d","%r13d"); - -$code.=<<___; - movzb `&lo("$s0")`,$t0 - movzb `&lo("$s1")`,$t1 - movzb `&lo("$s2")`,$t2 - movzb ($sbox,$t0,1),$t0 - movzb ($sbox,$t1,1),$t1 - movzb ($sbox,$t2,1),$t2 - - movzb `&lo("$s3")`,$t3 - movzb `&hi("$s3")`,$acc0 - movzb `&hi("$s0")`,$acc1 - movzb ($sbox,$t3,1),$t3 - movzb ($sbox,$acc0,1),$t4 #$t0 - movzb ($sbox,$acc1,1),$t5 #$t1 - - movzb `&hi("$s1")`,$acc2 - movzb `&hi("$s2")`,$acc0 - shr \$16,$s2 - movzb ($sbox,$acc2,1),$acc2 #$t2 - movzb ($sbox,$acc0,1),$acc0 #$t3 - shr \$16,$s3 - - movzb `&lo("$s2")`,$acc1 - shl \$8,$t4 - shl \$8,$t5 - movzb ($sbox,$acc1,1),$acc1 #$t0 - xor $t4,$t0 - xor $t5,$t1 - - movzb `&lo("$s3")`,$t4 - shr \$16,$s0 - shr \$16,$s1 - movzb `&lo("$s0")`,$t5 - shl \$8,$acc2 - shl \$8,$acc0 - movzb ($sbox,$t4,1),$t4 #$t1 - movzb ($sbox,$t5,1),$t5 #$t2 - xor $acc2,$t2 - xor $acc0,$t3 - - movzb `&lo("$s1")`,$acc2 - movzb `&hi("$s1")`,$acc0 - shl \$16,$acc1 - movzb ($sbox,$acc2,1),$acc2 #$t3 - movzb ($sbox,$acc0,1),$acc0 #$t0 - xor $acc1,$t0 - - movzb `&hi("$s2")`,$acc1 - shl \$16,$t4 - shl \$16,$t5 - movzb ($sbox,$acc1,1),$s1 #$t1 - xor $t4,$t1 - xor $t5,$t2 - - movzb `&hi("$s3")`,$acc1 - shr \$8,$s0 - shl \$16,$acc2 - movzb ($sbox,$acc1,1),$s2 #$t2 - movzb ($sbox,$s0,1),$s3 #$t3 - xor $acc2,$t3 - - shl \$24,$acc0 - shl \$24,$s1 - shl \$24,$s2 - xor $acc0,$t0 - shl \$24,$s3 - xor $t1,$s1 - mov $t0,$s0 - xor $t2,$s2 - xor $t3,$s3 -___ -} - -# parallelized version! input is pair of 64-bit values: %rax=s1.s0 -# and %rcx=s3.s2, output is four 32-bit values in %eax=s0, %ebx=s1, -# %ecx=s2 and %edx=s3. -sub dectransform() -{ my ($tp10,$tp20,$tp40,$tp80,$acc0)=("%rax","%r8", "%r9", "%r10","%rbx"); - my ($tp18,$tp28,$tp48,$tp88,$acc8)=("%rcx","%r11","%r12","%r13","%rdx"); - my $prefetch = shift; - -$code.=<<___; - mov $tp10,$acc0 - mov $tp18,$acc8 - and $mask80,$acc0 - and $mask80,$acc8 - mov $acc0,$tp40 - mov $acc8,$tp48 - shr \$7,$tp40 - lea ($tp10,$tp10),$tp20 - shr \$7,$tp48 - lea ($tp18,$tp18),$tp28 - sub $tp40,$acc0 - sub $tp48,$acc8 - and $maskfe,$tp20 - and $maskfe,$tp28 - and $mask1b,$acc0 - and $mask1b,$acc8 - xor $tp20,$acc0 - xor $tp28,$acc8 - mov $acc0,$tp20 - mov $acc8,$tp28 - - and $mask80,$acc0 - and $mask80,$acc8 - mov $acc0,$tp80 - mov $acc8,$tp88 - shr \$7,$tp80 - lea ($tp20,$tp20),$tp40 - shr \$7,$tp88 - lea ($tp28,$tp28),$tp48 - sub $tp80,$acc0 - sub $tp88,$acc8 - and $maskfe,$tp40 - and $maskfe,$tp48 - and $mask1b,$acc0 - and $mask1b,$acc8 - xor $tp40,$acc0 - xor $tp48,$acc8 - mov $acc0,$tp40 - mov $acc8,$tp48 - - and $mask80,$acc0 - and $mask80,$acc8 - mov $acc0,$tp80 - mov $acc8,$tp88 - shr \$7,$tp80 - xor $tp10,$tp20 # tp2^=tp1 - shr \$7,$tp88 - xor $tp18,$tp28 # tp2^=tp1 - sub $tp80,$acc0 - sub $tp88,$acc8 - lea ($tp40,$tp40),$tp80 - lea ($tp48,$tp48),$tp88 - xor $tp10,$tp40 # tp4^=tp1 - xor $tp18,$tp48 # tp4^=tp1 - and $maskfe,$tp80 - and $maskfe,$tp88 - and $mask1b,$acc0 - and $mask1b,$acc8 - xor $acc0,$tp80 - xor $acc8,$tp88 - - xor $tp80,$tp10 # tp1^=tp8 - xor $tp88,$tp18 # tp1^=tp8 - xor $tp80,$tp20 # tp2^tp1^=tp8 - xor $tp88,$tp28 # tp2^tp1^=tp8 - mov $tp10,$acc0 - mov $tp18,$acc8 - xor $tp80,$tp40 # tp4^tp1^=tp8 - xor $tp88,$tp48 # tp4^tp1^=tp8 - shr \$32,$acc0 - shr \$32,$acc8 - xor $tp20,$tp80 # tp8^=tp8^tp2^tp1=tp2^tp1 - xor $tp28,$tp88 # tp8^=tp8^tp2^tp1=tp2^tp1 - rol \$8,`&LO("$tp10")` # ROTATE(tp1^tp8,8) - rol \$8,`&LO("$tp18")` # ROTATE(tp1^tp8,8) - xor $tp40,$tp80 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2 - xor $tp48,$tp88 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2 - - rol \$8,`&LO("$acc0")` # ROTATE(tp1^tp8,8) - rol \$8,`&LO("$acc8")` # ROTATE(tp1^tp8,8) - xor `&LO("$tp80")`,`&LO("$tp10")` - xor `&LO("$tp88")`,`&LO("$tp18")` - shr \$32,$tp80 - shr \$32,$tp88 - xor `&LO("$tp80")`,`&LO("$acc0")` - xor `&LO("$tp88")`,`&LO("$acc8")` - - mov $tp20,$tp80 - mov $tp28,$tp88 - shr \$32,$tp80 - shr \$32,$tp88 - rol \$24,`&LO("$tp20")` # ROTATE(tp2^tp1^tp8,24) - rol \$24,`&LO("$tp28")` # ROTATE(tp2^tp1^tp8,24) - rol \$24,`&LO("$tp80")` # ROTATE(tp2^tp1^tp8,24) - rol \$24,`&LO("$tp88")` # ROTATE(tp2^tp1^tp8,24) - xor `&LO("$tp20")`,`&LO("$tp10")` - xor `&LO("$tp28")`,`&LO("$tp18")` - mov $tp40,$tp20 - mov $tp48,$tp28 - xor `&LO("$tp80")`,`&LO("$acc0")` - xor `&LO("$tp88")`,`&LO("$acc8")` - - `"mov 0($sbox),$mask80" if ($prefetch)` - shr \$32,$tp20 - shr \$32,$tp28 - `"mov 64($sbox),$maskfe" if ($prefetch)` - rol \$16,`&LO("$tp40")` # ROTATE(tp4^tp1^tp8,16) - rol \$16,`&LO("$tp48")` # ROTATE(tp4^tp1^tp8,16) - `"mov 128($sbox),$mask1b" if ($prefetch)` - rol \$16,`&LO("$tp20")` # ROTATE(tp4^tp1^tp8,16) - rol \$16,`&LO("$tp28")` # ROTATE(tp4^tp1^tp8,16) - `"mov 192($sbox),$tp80" if ($prefetch)` - xor `&LO("$tp40")`,`&LO("$tp10")` - xor `&LO("$tp48")`,`&LO("$tp18")` - `"mov 256($sbox),$tp88" if ($prefetch)` - xor `&LO("$tp20")`,`&LO("$acc0")` - xor `&LO("$tp28")`,`&LO("$acc8")` -___ -} - -$code.=<<___; -.type _x86_64_AES_decrypt_compact,\@abi-omnipotent -.align 16 -_x86_64_AES_decrypt_compact: - lea 128($sbox),$inp # size optimization - mov 0-128($inp),$acc1 # prefetch Td4 - mov 32-128($inp),$acc2 - mov 64-128($inp),$t0 - mov 96-128($inp),$t1 - mov 128-128($inp),$acc1 - mov 160-128($inp),$acc2 - mov 192-128($inp),$t0 - mov 224-128($inp),$t1 - jmp .Ldec_loop_compact - -.align 16 -.Ldec_loop_compact: - xor 0($key),$s0 # xor with key - xor 4($key),$s1 - xor 8($key),$s2 - xor 12($key),$s3 - lea 16($key),$key -___ - &deccompactvert(); -$code.=<<___; - cmp 16(%rsp),$key - je .Ldec_compact_done - - mov 256+0($sbox),$mask80 - shl \$32,%rbx - shl \$32,%rdx - mov 256+8($sbox),$maskfe - or %rbx,%rax - or %rdx,%rcx - mov 256+16($sbox),$mask1b -___ - &dectransform(1); -$code.=<<___; - jmp .Ldec_loop_compact -.align 16 -.Ldec_compact_done: - xor 0($key),$s0 - xor 4($key),$s1 - xor 8($key),$s2 - xor 12($key),$s3 - .byte 0xf3,0xc3 # rep ret -.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact -___ - -# void AES_decrypt (const void *inp,void *out,const AES_KEY *key); -$code.=<<___; -.globl AES_decrypt -.type AES_decrypt,\@function,3 -.align 16 -.globl asm_AES_decrypt -.hidden asm_AES_decrypt -asm_AES_decrypt: -AES_decrypt: - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - - # allocate frame "above" key schedule - mov %rsp,%r10 - lea -63(%rdx),%rcx # %rdx is key argument - and \$-64,%rsp - sub %rsp,%rcx - neg %rcx - and \$0x3c0,%rcx - sub %rcx,%rsp - sub \$32,%rsp - - mov %rsi,16(%rsp) # save out - mov %r10,24(%rsp) # save real stack pointer -.Ldec_prologue: - - mov %rdx,$key - mov 240($key),$rnds # load rounds - - mov 0(%rdi),$s0 # load input vector - mov 4(%rdi),$s1 - mov 8(%rdi),$s2 - mov 12(%rdi),$s3 - - shl \$4,$rnds - lea ($key,$rnds),%rbp - mov $key,(%rsp) # key schedule - mov %rbp,8(%rsp) # end of key schedule - - # pick Td4 copy which can't "overlap" with stack frame or key schedule - lea .LAES_Td+2048(%rip),$sbox - lea 768(%rsp),%rbp - sub $sbox,%rbp - and \$0x300,%rbp - lea ($sbox,%rbp),$sbox - shr \$3,%rbp # recall "magic" constants! - add %rbp,$sbox - - call _x86_64_AES_decrypt_compact - - mov 16(%rsp),$out # restore out - mov 24(%rsp),%rsi # restore saved stack pointer - mov $s0,0($out) # write output vector - mov $s1,4($out) - mov $s2,8($out) - mov $s3,12($out) - - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Ldec_epilogue: - ret -.size AES_decrypt,.-AES_decrypt -___ -#------------------------------------------------------------------# - -sub enckey() -{ -$code.=<<___; - movz %dl,%esi # rk[i]>>0 - movzb -128(%rbp,%rsi),%ebx - movz %dh,%esi # rk[i]>>8 - shl \$24,%ebx - xor %ebx,%eax - - movzb -128(%rbp,%rsi),%ebx - shr \$16,%edx - movz %dl,%esi # rk[i]>>16 - xor %ebx,%eax - - movzb -128(%rbp,%rsi),%ebx - movz %dh,%esi # rk[i]>>24 - shl \$8,%ebx - xor %ebx,%eax - - movzb -128(%rbp,%rsi),%ebx - shl \$16,%ebx - xor %ebx,%eax - - xor 1024-128(%rbp,%rcx,4),%eax # rcon -___ -} - -# int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, -# AES_KEY *key) -$code.=<<___; -.globl private_AES_set_encrypt_key -.type private_AES_set_encrypt_key,\@function,3 -.align 16 -private_AES_set_encrypt_key: - push %rbx - push %rbp - push %r12 # redundant, but allows to share - push %r13 # exception handler... - push %r14 - push %r15 - sub \$8,%rsp -.Lenc_key_prologue: - - call _x86_64_AES_set_encrypt_key - - mov 8(%rsp),%r15 - mov 16(%rsp),%r14 - mov 24(%rsp),%r13 - mov 32(%rsp),%r12 - mov 40(%rsp),%rbp - mov 48(%rsp),%rbx - add \$56,%rsp -.Lenc_key_epilogue: - ret -.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key - -.type _x86_64_AES_set_encrypt_key,\@abi-omnipotent -.align 16 -_x86_64_AES_set_encrypt_key: - mov %esi,%ecx # %ecx=bits - mov %rdi,%rsi # %rsi=userKey - mov %rdx,%rdi # %rdi=key - - test \$-1,%rsi - jz .Lbadpointer - test \$-1,%rdi - jz .Lbadpointer - - lea .LAES_Te(%rip),%rbp - lea 2048+128(%rbp),%rbp - - # prefetch Te4 - mov 0-128(%rbp),%eax - mov 32-128(%rbp),%ebx - mov 64-128(%rbp),%r8d - mov 96-128(%rbp),%edx - mov 128-128(%rbp),%eax - mov 160-128(%rbp),%ebx - mov 192-128(%rbp),%r8d - mov 224-128(%rbp),%edx - - cmp \$128,%ecx - je .L10rounds - cmp \$192,%ecx - je .L12rounds - cmp \$256,%ecx - je .L14rounds - mov \$-2,%rax # invalid number of bits - jmp .Lexit - -.L10rounds: - mov 0(%rsi),%rax # copy first 4 dwords - mov 8(%rsi),%rdx - mov %rax,0(%rdi) - mov %rdx,8(%rdi) - - shr \$32,%rdx - xor %ecx,%ecx - jmp .L10shortcut -.align 4 -.L10loop: - mov 0(%rdi),%eax # rk[0] - mov 12(%rdi),%edx # rk[3] -.L10shortcut: -___ - &enckey (); -$code.=<<___; - mov %eax,16(%rdi) # rk[4] - xor 4(%rdi),%eax - mov %eax,20(%rdi) # rk[5] - xor 8(%rdi),%eax - mov %eax,24(%rdi) # rk[6] - xor 12(%rdi),%eax - mov %eax,28(%rdi) # rk[7] - add \$1,%ecx - lea 16(%rdi),%rdi - cmp \$10,%ecx - jl .L10loop - - movl \$10,80(%rdi) # setup number of rounds - xor %rax,%rax - jmp .Lexit - -.L12rounds: - mov 0(%rsi),%rax # copy first 6 dwords - mov 8(%rsi),%rbx - mov 16(%rsi),%rdx - mov %rax,0(%rdi) - mov %rbx,8(%rdi) - mov %rdx,16(%rdi) - - shr \$32,%rdx - xor %ecx,%ecx - jmp .L12shortcut -.align 4 -.L12loop: - mov 0(%rdi),%eax # rk[0] - mov 20(%rdi),%edx # rk[5] -.L12shortcut: -___ - &enckey (); -$code.=<<___; - mov %eax,24(%rdi) # rk[6] - xor 4(%rdi),%eax - mov %eax,28(%rdi) # rk[7] - xor 8(%rdi),%eax - mov %eax,32(%rdi) # rk[8] - xor 12(%rdi),%eax - mov %eax,36(%rdi) # rk[9] - - cmp \$7,%ecx - je .L12break - add \$1,%ecx - - xor 16(%rdi),%eax - mov %eax,40(%rdi) # rk[10] - xor 20(%rdi),%eax - mov %eax,44(%rdi) # rk[11] - - lea 24(%rdi),%rdi - jmp .L12loop -.L12break: - movl \$12,72(%rdi) # setup number of rounds - xor %rax,%rax - jmp .Lexit - -.L14rounds: - mov 0(%rsi),%rax # copy first 8 dwords - mov 8(%rsi),%rbx - mov 16(%rsi),%rcx - mov 24(%rsi),%rdx - mov %rax,0(%rdi) - mov %rbx,8(%rdi) - mov %rcx,16(%rdi) - mov %rdx,24(%rdi) - - shr \$32,%rdx - xor %ecx,%ecx - jmp .L14shortcut -.align 4 -.L14loop: - mov 0(%rdi),%eax # rk[0] - mov 28(%rdi),%edx # rk[4] -.L14shortcut: -___ - &enckey (); -$code.=<<___; - mov %eax,32(%rdi) # rk[8] - xor 4(%rdi),%eax - mov %eax,36(%rdi) # rk[9] - xor 8(%rdi),%eax - mov %eax,40(%rdi) # rk[10] - xor 12(%rdi),%eax - mov %eax,44(%rdi) # rk[11] - - cmp \$6,%ecx - je .L14break - add \$1,%ecx - - mov %eax,%edx - mov 16(%rdi),%eax # rk[4] - movz %dl,%esi # rk[11]>>0 - movzb -128(%rbp,%rsi),%ebx - movz %dh,%esi # rk[11]>>8 - xor %ebx,%eax - - movzb -128(%rbp,%rsi),%ebx - shr \$16,%edx - shl \$8,%ebx - movz %dl,%esi # rk[11]>>16 - xor %ebx,%eax - - movzb -128(%rbp,%rsi),%ebx - movz %dh,%esi # rk[11]>>24 - shl \$16,%ebx - xor %ebx,%eax - - movzb -128(%rbp,%rsi),%ebx - shl \$24,%ebx - xor %ebx,%eax - - mov %eax,48(%rdi) # rk[12] - xor 20(%rdi),%eax - mov %eax,52(%rdi) # rk[13] - xor 24(%rdi),%eax - mov %eax,56(%rdi) # rk[14] - xor 28(%rdi),%eax - mov %eax,60(%rdi) # rk[15] - - lea 32(%rdi),%rdi - jmp .L14loop -.L14break: - movl \$14,48(%rdi) # setup number of rounds - xor %rax,%rax - jmp .Lexit - -.Lbadpointer: - mov \$-1,%rax -.Lexit: - .byte 0xf3,0xc3 # rep ret -.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key -___ - -sub deckey_ref() -{ my ($i,$ptr,$te,$td) = @_; - my ($tp1,$tp2,$tp4,$tp8,$acc)=("%eax","%ebx","%edi","%edx","%r8d"); -$code.=<<___; - mov $i($ptr),$tp1 - mov $tp1,$acc - and \$0x80808080,$acc - mov $acc,$tp4 - shr \$7,$tp4 - lea 0($tp1,$tp1),$tp2 - sub $tp4,$acc - and \$0xfefefefe,$tp2 - and \$0x1b1b1b1b,$acc - xor $tp2,$acc - mov $acc,$tp2 - - and \$0x80808080,$acc - mov $acc,$tp8 - shr \$7,$tp8 - lea 0($tp2,$tp2),$tp4 - sub $tp8,$acc - and \$0xfefefefe,$tp4 - and \$0x1b1b1b1b,$acc - xor $tp1,$tp2 # tp2^tp1 - xor $tp4,$acc - mov $acc,$tp4 - - and \$0x80808080,$acc - mov $acc,$tp8 - shr \$7,$tp8 - sub $tp8,$acc - lea 0($tp4,$tp4),$tp8 - xor $tp1,$tp4 # tp4^tp1 - and \$0xfefefefe,$tp8 - and \$0x1b1b1b1b,$acc - xor $acc,$tp8 - - xor $tp8,$tp1 # tp1^tp8 - rol \$8,$tp1 # ROTATE(tp1^tp8,8) - xor $tp8,$tp2 # tp2^tp1^tp8 - xor $tp8,$tp4 # tp4^tp1^tp8 - xor $tp2,$tp8 - xor $tp4,$tp8 # tp8^(tp8^tp4^tp1)^(tp8^tp2^tp1)=tp8^tp4^tp2 - - xor $tp8,$tp1 - rol \$24,$tp2 # ROTATE(tp2^tp1^tp8,24) - xor $tp2,$tp1 - rol \$16,$tp4 # ROTATE(tp4^tp1^tp8,16) - xor $tp4,$tp1 - - mov $tp1,$i($ptr) -___ -} - -# int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, -# AES_KEY *key) -$code.=<<___; -.globl private_AES_set_decrypt_key -.type private_AES_set_decrypt_key,\@function,3 -.align 16 -private_AES_set_decrypt_key: - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - push %rdx # save key schedule -.Ldec_key_prologue: - - call _x86_64_AES_set_encrypt_key - mov (%rsp),%r8 # restore key schedule - cmp \$0,%eax - jne .Labort - - mov 240(%r8),%r14d # pull number of rounds - xor %rdi,%rdi - lea (%rdi,%r14d,4),%rcx - mov %r8,%rsi - lea (%r8,%rcx,4),%rdi # pointer to last chunk -.align 4 -.Linvert: - mov 0(%rsi),%rax - mov 8(%rsi),%rbx - mov 0(%rdi),%rcx - mov 8(%rdi),%rdx - mov %rax,0(%rdi) - mov %rbx,8(%rdi) - mov %rcx,0(%rsi) - mov %rdx,8(%rsi) - lea 16(%rsi),%rsi - lea -16(%rdi),%rdi - cmp %rsi,%rdi - jne .Linvert - - lea .LAES_Te+2048+1024(%rip),%rax # rcon - - mov 40(%rax),$mask80 - mov 48(%rax),$maskfe - mov 56(%rax),$mask1b - - mov %r8,$key - sub \$1,%r14d -.align 4 -.Lpermute: - lea 16($key),$key - mov 0($key),%rax - mov 8($key),%rcx -___ - &dectransform (); -$code.=<<___; - mov %eax,0($key) - mov %ebx,4($key) - mov %ecx,8($key) - mov %edx,12($key) - sub \$1,%r14d - jnz .Lpermute - - xor %rax,%rax -.Labort: - mov 8(%rsp),%r15 - mov 16(%rsp),%r14 - mov 24(%rsp),%r13 - mov 32(%rsp),%r12 - mov 40(%rsp),%rbp - mov 48(%rsp),%rbx - add \$56,%rsp -.Ldec_key_epilogue: - ret -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key -___ - -# void AES_cbc_encrypt (const void char *inp, unsigned char *out, -# size_t length, const AES_KEY *key, -# unsigned char *ivp,const int enc); -{ -# stack frame layout -# -8(%rsp) return address -my $keyp="0(%rsp)"; # one to pass as $key -my $keyend="8(%rsp)"; # &(keyp->rd_key[4*keyp->rounds]) -my $_rsp="16(%rsp)"; # saved %rsp -my $_inp="24(%rsp)"; # copy of 1st parameter, inp -my $_out="32(%rsp)"; # copy of 2nd parameter, out -my $_len="40(%rsp)"; # copy of 3rd parameter, length -my $_key="48(%rsp)"; # copy of 4th parameter, key -my $_ivp="56(%rsp)"; # copy of 5th parameter, ivp -my $ivec="64(%rsp)"; # ivec[16] -my $aes_key="80(%rsp)"; # copy of aes_key -my $mark="80+240(%rsp)"; # copy of aes_key->rounds - -$code.=<<___; -.globl AES_cbc_encrypt -.type AES_cbc_encrypt,\@function,6 -.align 16 -.extern OPENSSL_ia32cap_P -.globl asm_AES_cbc_encrypt -.hidden asm_AES_cbc_encrypt -asm_AES_cbc_encrypt: -AES_cbc_encrypt: - cmp \$0,%rdx # check length - je .Lcbc_epilogue - pushfq - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 -.Lcbc_prologue: - - cld - mov %r9d,%r9d # clear upper half of enc - - lea .LAES_Te(%rip),$sbox - cmp \$0,%r9 - jne .Lcbc_picked_te - lea .LAES_Td(%rip),$sbox -.Lcbc_picked_te: - - mov OPENSSL_ia32cap_P(%rip),%r10d - cmp \$$speed_limit,%rdx - jb .Lcbc_slow_prologue - test \$15,%rdx - jnz .Lcbc_slow_prologue - bt \$28,%r10d - jc .Lcbc_slow_prologue - - # allocate aligned stack frame... - lea -88-248(%rsp),$key - and \$-64,$key - - # ... and make sure it doesn't alias with AES_T[ed] modulo 4096 - mov $sbox,%r10 - lea 2304($sbox),%r11 - mov $key,%r12 - and \$0xFFF,%r10 # s = $sbox&0xfff - and \$0xFFF,%r11 # e = ($sbox+2048)&0xfff - and \$0xFFF,%r12 # p = %rsp&0xfff - - cmp %r11,%r12 # if (p=>e) %rsp =- (p-e); - jb .Lcbc_te_break_out - sub %r11,%r12 - sub %r12,$key - jmp .Lcbc_te_ok -.Lcbc_te_break_out: # else %rsp -= (p-s)&0xfff + framesz - sub %r10,%r12 - and \$0xFFF,%r12 - add \$320,%r12 - sub %r12,$key -.align 4 -.Lcbc_te_ok: - - xchg %rsp,$key - #add \$8,%rsp # reserve for return address! - mov $key,$_rsp # save %rsp -.Lcbc_fast_body: - mov %rdi,$_inp # save copy of inp - mov %rsi,$_out # save copy of out - mov %rdx,$_len # save copy of len - mov %rcx,$_key # save copy of key - mov %r8,$_ivp # save copy of ivp - movl \$0,$mark # copy of aes_key->rounds = 0; - mov %r8,%rbp # rearrange input arguments - mov %r9,%rbx - mov %rsi,$out - mov %rdi,$inp - mov %rcx,$key - - mov 240($key),%eax # key->rounds - # do we copy key schedule to stack? - mov $key,%r10 - sub $sbox,%r10 - and \$0xfff,%r10 - cmp \$2304,%r10 - jb .Lcbc_do_ecopy - cmp \$4096-248,%r10 - jb .Lcbc_skip_ecopy -.align 4 -.Lcbc_do_ecopy: - mov $key,%rsi - lea $aes_key,%rdi - lea $aes_key,$key - mov \$240/8,%ecx - .long 0x90A548F3 # rep movsq - mov %eax,(%rdi) # copy aes_key->rounds -.Lcbc_skip_ecopy: - mov $key,$keyp # save key pointer - - mov \$18,%ecx -.align 4 -.Lcbc_prefetch_te: - mov 0($sbox),%r10 - mov 32($sbox),%r11 - mov 64($sbox),%r12 - mov 96($sbox),%r13 - lea 128($sbox),$sbox - sub \$1,%ecx - jnz .Lcbc_prefetch_te - lea -2304($sbox),$sbox - - cmp \$0,%rbx - je .LFAST_DECRYPT - -#----------------------------- ENCRYPT -----------------------------# - mov 0(%rbp),$s0 # load iv - mov 4(%rbp),$s1 - mov 8(%rbp),$s2 - mov 12(%rbp),$s3 - -.align 4 -.Lcbc_fast_enc_loop: - xor 0($inp),$s0 - xor 4($inp),$s1 - xor 8($inp),$s2 - xor 12($inp),$s3 - mov $keyp,$key # restore key - mov $inp,$_inp # if ($verticalspin) save inp - - call _x86_64_AES_encrypt - - mov $_inp,$inp # if ($verticalspin) restore inp - mov $_len,%r10 - mov $s0,0($out) - mov $s1,4($out) - mov $s2,8($out) - mov $s3,12($out) - - lea 16($inp),$inp - lea 16($out),$out - sub \$16,%r10 - test \$-16,%r10 - mov %r10,$_len - jnz .Lcbc_fast_enc_loop - mov $_ivp,%rbp # restore ivp - mov $s0,0(%rbp) # save ivec - mov $s1,4(%rbp) - mov $s2,8(%rbp) - mov $s3,12(%rbp) - - jmp .Lcbc_fast_cleanup - -#----------------------------- DECRYPT -----------------------------# -.align 16 -.LFAST_DECRYPT: - cmp $inp,$out - je .Lcbc_fast_dec_in_place - - mov %rbp,$ivec -.align 4 -.Lcbc_fast_dec_loop: - mov 0($inp),$s0 # read input - mov 4($inp),$s1 - mov 8($inp),$s2 - mov 12($inp),$s3 - mov $keyp,$key # restore key - mov $inp,$_inp # if ($verticalspin) save inp - - call _x86_64_AES_decrypt - - mov $ivec,%rbp # load ivp - mov $_inp,$inp # if ($verticalspin) restore inp - mov $_len,%r10 # load len - xor 0(%rbp),$s0 # xor iv - xor 4(%rbp),$s1 - xor 8(%rbp),$s2 - xor 12(%rbp),$s3 - mov $inp,%rbp # current input, next iv - - sub \$16,%r10 - mov %r10,$_len # update len - mov %rbp,$ivec # update ivp - - mov $s0,0($out) # write output - mov $s1,4($out) - mov $s2,8($out) - mov $s3,12($out) - - lea 16($inp),$inp - lea 16($out),$out - jnz .Lcbc_fast_dec_loop - mov $_ivp,%r12 # load user ivp - mov 0(%rbp),%r10 # load iv - mov 8(%rbp),%r11 - mov %r10,0(%r12) # copy back to user - mov %r11,8(%r12) - jmp .Lcbc_fast_cleanup - -.align 16 -.Lcbc_fast_dec_in_place: - mov 0(%rbp),%r10 # copy iv to stack - mov 8(%rbp),%r11 - mov %r10,0+$ivec - mov %r11,8+$ivec -.align 4 -.Lcbc_fast_dec_in_place_loop: - mov 0($inp),$s0 # load input - mov 4($inp),$s1 - mov 8($inp),$s2 - mov 12($inp),$s3 - mov $keyp,$key # restore key - mov $inp,$_inp # if ($verticalspin) save inp - - call _x86_64_AES_decrypt - - mov $_inp,$inp # if ($verticalspin) restore inp - mov $_len,%r10 - xor 0+$ivec,$s0 - xor 4+$ivec,$s1 - xor 8+$ivec,$s2 - xor 12+$ivec,$s3 - - mov 0($inp),%r11 # load input - mov 8($inp),%r12 - sub \$16,%r10 - jz .Lcbc_fast_dec_in_place_done - - mov %r11,0+$ivec # copy input to iv - mov %r12,8+$ivec - - mov $s0,0($out) # save output [zaps input] - mov $s1,4($out) - mov $s2,8($out) - mov $s3,12($out) - - lea 16($inp),$inp - lea 16($out),$out - mov %r10,$_len - jmp .Lcbc_fast_dec_in_place_loop -.Lcbc_fast_dec_in_place_done: - mov $_ivp,%rdi - mov %r11,0(%rdi) # copy iv back to user - mov %r12,8(%rdi) - - mov $s0,0($out) # save output [zaps input] - mov $s1,4($out) - mov $s2,8($out) - mov $s3,12($out) - -.align 4 -.Lcbc_fast_cleanup: - cmpl \$0,$mark # was the key schedule copied? - lea $aes_key,%rdi - je .Lcbc_exit - mov \$240/8,%ecx - xor %rax,%rax - .long 0x90AB48F3 # rep stosq - - jmp .Lcbc_exit - -#--------------------------- SLOW ROUTINE ---------------------------# -.align 16 -.Lcbc_slow_prologue: - # allocate aligned stack frame... - lea -88(%rsp),%rbp - and \$-64,%rbp - # ... just "above" key schedule - lea -88-63(%rcx),%r10 - sub %rbp,%r10 - neg %r10 - and \$0x3c0,%r10 - sub %r10,%rbp - - xchg %rsp,%rbp - #add \$8,%rsp # reserve for return address! - mov %rbp,$_rsp # save %rsp -.Lcbc_slow_body: - #mov %rdi,$_inp # save copy of inp - #mov %rsi,$_out # save copy of out - #mov %rdx,$_len # save copy of len - #mov %rcx,$_key # save copy of key - mov %r8,$_ivp # save copy of ivp - mov %r8,%rbp # rearrange input arguments - mov %r9,%rbx - mov %rsi,$out - mov %rdi,$inp - mov %rcx,$key - mov %rdx,%r10 - - mov 240($key),%eax - mov $key,$keyp # save key pointer - shl \$4,%eax - lea ($key,%rax),%rax - mov %rax,$keyend - - # pick Te4 copy which can't "overlap" with stack frame or key scdedule - lea 2048($sbox),$sbox - lea 768-8(%rsp),%rax - sub $sbox,%rax - and \$0x300,%rax - lea ($sbox,%rax),$sbox - - cmp \$0,%rbx - je .LSLOW_DECRYPT - -#--------------------------- SLOW ENCRYPT ---------------------------# - test \$-16,%r10 # check upon length - mov 0(%rbp),$s0 # load iv - mov 4(%rbp),$s1 - mov 8(%rbp),$s2 - mov 12(%rbp),$s3 - jz .Lcbc_slow_enc_tail # short input... - -.align 4 -.Lcbc_slow_enc_loop: - xor 0($inp),$s0 - xor 4($inp),$s1 - xor 8($inp),$s2 - xor 12($inp),$s3 - mov $keyp,$key # restore key - mov $inp,$_inp # save inp - mov $out,$_out # save out - mov %r10,$_len # save len - - call _x86_64_AES_encrypt_compact - - mov $_inp,$inp # restore inp - mov $_out,$out # restore out - mov $_len,%r10 # restore len - mov $s0,0($out) - mov $s1,4($out) - mov $s2,8($out) - mov $s3,12($out) - - lea 16($inp),$inp - lea 16($out),$out - sub \$16,%r10 - test \$-16,%r10 - jnz .Lcbc_slow_enc_loop - test \$15,%r10 - jnz .Lcbc_slow_enc_tail - mov $_ivp,%rbp # restore ivp - mov $s0,0(%rbp) # save ivec - mov $s1,4(%rbp) - mov $s2,8(%rbp) - mov $s3,12(%rbp) - - jmp .Lcbc_exit - -.align 4 -.Lcbc_slow_enc_tail: - mov %rax,%r11 - mov %rcx,%r12 - mov %r10,%rcx - mov $inp,%rsi - mov $out,%rdi - .long 0x9066A4F3 # rep movsb - mov \$16,%rcx # zero tail - sub %r10,%rcx - xor %rax,%rax - .long 0x9066AAF3 # rep stosb - mov $out,$inp # this is not a mistake! - mov \$16,%r10 # len=16 - mov %r11,%rax - mov %r12,%rcx - jmp .Lcbc_slow_enc_loop # one more spin... -#--------------------------- SLOW DECRYPT ---------------------------# -.align 16 -.LSLOW_DECRYPT: - shr \$3,%rax - add %rax,$sbox # recall "magic" constants! - - mov 0(%rbp),%r11 # copy iv to stack - mov 8(%rbp),%r12 - mov %r11,0+$ivec - mov %r12,8+$ivec - -.align 4 -.Lcbc_slow_dec_loop: - mov 0($inp),$s0 # load input - mov 4($inp),$s1 - mov 8($inp),$s2 - mov 12($inp),$s3 - mov $keyp,$key # restore key - mov $inp,$_inp # save inp - mov $out,$_out # save out - mov %r10,$_len # save len - - call _x86_64_AES_decrypt_compact - - mov $_inp,$inp # restore inp - mov $_out,$out # restore out - mov $_len,%r10 - xor 0+$ivec,$s0 - xor 4+$ivec,$s1 - xor 8+$ivec,$s2 - xor 12+$ivec,$s3 - - mov 0($inp),%r11 # load input - mov 8($inp),%r12 - sub \$16,%r10 - jc .Lcbc_slow_dec_partial - jz .Lcbc_slow_dec_done - - mov %r11,0+$ivec # copy input to iv - mov %r12,8+$ivec - - mov $s0,0($out) # save output [can zap input] - mov $s1,4($out) - mov $s2,8($out) - mov $s3,12($out) - - lea 16($inp),$inp - lea 16($out),$out - jmp .Lcbc_slow_dec_loop -.Lcbc_slow_dec_done: - mov $_ivp,%rdi - mov %r11,0(%rdi) # copy iv back to user - mov %r12,8(%rdi) - - mov $s0,0($out) # save output [can zap input] - mov $s1,4($out) - mov $s2,8($out) - mov $s3,12($out) - - jmp .Lcbc_exit - -.align 4 -.Lcbc_slow_dec_partial: - mov $_ivp,%rdi - mov %r11,0(%rdi) # copy iv back to user - mov %r12,8(%rdi) - - mov $s0,0+$ivec # save output to stack - mov $s1,4+$ivec - mov $s2,8+$ivec - mov $s3,12+$ivec - - mov $out,%rdi - lea $ivec,%rsi - lea 16(%r10),%rcx - .long 0x9066A4F3 # rep movsb - jmp .Lcbc_exit - -.align 16 -.Lcbc_exit: - mov $_rsp,%rsi - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lcbc_popfq: - popfq -.Lcbc_epilogue: - ret -.size AES_cbc_encrypt,.-AES_cbc_encrypt -___ -} - -$code.=<<___; -.align 64 -.LAES_Te: -___ - &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); - &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); - &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56); - &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec); - &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa); - &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb); - &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45); - &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b); - &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c); - &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83); - &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9); - &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a); - &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d); - &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f); - &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df); - &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea); - &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34); - &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b); - &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d); - &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413); - &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1); - &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6); - &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972); - &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85); - &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed); - &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511); - &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe); - &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b); - &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05); - &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1); - &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142); - &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf); - &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3); - &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e); - &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a); - &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6); - &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3); - &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b); - &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428); - &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad); - &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14); - &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8); - &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4); - &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2); - &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda); - &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949); - &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf); - &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810); - &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c); - &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697); - &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e); - &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f); - &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc); - &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c); - &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969); - &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27); - &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122); - &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433); - &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9); - &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5); - &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a); - &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0); - &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e); - &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c); - -#Te4 # four copies of Te4 to choose from to avoid L1 aliasing - &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5); - &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76); - &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0); - &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0); - &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc); - &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15); - &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a); - &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75); - &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0); - &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84); - &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b); - &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf); - &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85); - &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8); - &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5); - &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2); - &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17); - &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73); - &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88); - &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb); - &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c); - &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79); - &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9); - &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08); - &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6); - &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a); - &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e); - &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e); - &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94); - &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf); - &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68); - &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16); - - &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5); - &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76); - &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0); - &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0); - &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc); - &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15); - &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a); - &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75); - &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0); - &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84); - &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b); - &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf); - &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85); - &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8); - &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5); - &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2); - &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17); - &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73); - &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88); - &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb); - &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c); - &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79); - &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9); - &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08); - &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6); - &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a); - &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e); - &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e); - &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94); - &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf); - &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68); - &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16); - - &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5); - &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76); - &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0); - &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0); - &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc); - &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15); - &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a); - &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75); - &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0); - &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84); - &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b); - &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf); - &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85); - &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8); - &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5); - &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2); - &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17); - &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73); - &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88); - &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb); - &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c); - &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79); - &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9); - &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08); - &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6); - &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a); - &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e); - &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e); - &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94); - &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf); - &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68); - &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16); - - &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5); - &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76); - &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0); - &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0); - &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc); - &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15); - &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a); - &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75); - &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0); - &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84); - &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b); - &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf); - &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85); - &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8); - &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5); - &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2); - &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17); - &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73); - &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88); - &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb); - &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c); - &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79); - &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9); - &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08); - &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6); - &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a); - &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e); - &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e); - &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94); - &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf); - &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68); - &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16); -#rcon: -$code.=<<___; - .long 0x00000001, 0x00000002, 0x00000004, 0x00000008 - .long 0x00000010, 0x00000020, 0x00000040, 0x00000080 - .long 0x0000001b, 0x00000036, 0x80808080, 0x80808080 - .long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b -___ -$code.=<<___; -.align 64 -.LAES_Td: -___ - &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a); - &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b); - &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5); - &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5); - &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d); - &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b); - &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295); - &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e); - &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927); - &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d); - &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362); - &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9); - &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52); - &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566); - &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3); - &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed); - &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e); - &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4); - &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4); - &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd); - &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d); - &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060); - &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967); - &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879); - &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000); - &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c); - &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36); - &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624); - &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b); - &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c); - &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12); - &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14); - &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3); - &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b); - &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8); - &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684); - &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7); - &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177); - &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947); - &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322); - &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498); - &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f); - &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54); - &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382); - &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf); - &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb); - &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83); - &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef); - &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029); - &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235); - &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733); - &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117); - &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4); - &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546); - &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb); - &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d); - &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb); - &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a); - &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773); - &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478); - &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2); - &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff); - &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664); - &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0); - -#Td4: # four copies of Td4 to choose from to avoid L1 aliasing - &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); - &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); - &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); - &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); - &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); - &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); - &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); - &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); - &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); - &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); - &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); - &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); - &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); - &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); - &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); - &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); - &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); - &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); - &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); - &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); - &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); - &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); - &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); - &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); - &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); - &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); - &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); - &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); - &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); - &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); - &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); - &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); -$code.=<<___; - .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe - .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 -___ - &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); - &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); - &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); - &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); - &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); - &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); - &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); - &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); - &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); - &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); - &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); - &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); - &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); - &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); - &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); - &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); - &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); - &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); - &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); - &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); - &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); - &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); - &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); - &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); - &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); - &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); - &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); - &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); - &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); - &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); - &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); - &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); -$code.=<<___; - .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe - .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 -___ - &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); - &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); - &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); - &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); - &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); - &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); - &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); - &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); - &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); - &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); - &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); - &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); - &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); - &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); - &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); - &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); - &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); - &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); - &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); - &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); - &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); - &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); - &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); - &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); - &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); - &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); - &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); - &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); - &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); - &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); - &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); - &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); -$code.=<<___; - .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe - .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 -___ - &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); - &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); - &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); - &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); - &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); - &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); - &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); - &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); - &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); - &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); - &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); - &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); - &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); - &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); - &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); - &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); - &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); - &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); - &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); - &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); - &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); - &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); - &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); - &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); - &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); - &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); - &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); - &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); - &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); - &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); - &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); - &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); -$code.=<<___; - .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe - .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 -.asciz "AES for x86_64, CRYPTOGAMS by " -.align 64 -___ - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type block_se_handler,\@abi-omnipotent -.align 16 -block_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - mov 8($disp),%rsi # disp->ImageBase - mov 56($disp),%r11 # disp->HandlerData - - mov 0(%r11),%r10d # HandlerData[0] - lea (%rsi,%r10),%r10 # prologue label - cmp %r10,%rbx # context->RipRsp - - mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lin_block_prologue - - mov 24(%rax),%rax # pull saved real stack pointer - lea 48(%rax),%rax # adjust... - - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov -32(%rax),%r13 - mov -40(%rax),%r14 - mov -48(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_block_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - jmp .Lcommon_seh_exit -.size block_se_handler,.-block_se_handler - -.type key_se_handler,\@abi-omnipotent -.align 16 -key_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - mov 8($disp),%rsi # disp->ImageBase - mov 56($disp),%r11 # disp->HandlerData - - mov 0(%r11),%r10d # HandlerData[0] - lea (%rsi,%r10),%r10 # prologue label - cmp %r10,%rbx # context->RipRsp - - mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lin_key_prologue - - lea 56(%rax),%rax - - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov -32(%rax),%r13 - mov -40(%rax),%r14 - mov -48(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_key_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - jmp .Lcommon_seh_exit -.size key_se_handler,.-key_se_handler - -.type cbc_se_handler,\@abi-omnipotent -.align 16 -cbc_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lcbc_prologue(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lcbc_prologue - jb .Lin_cbc_prologue - - lea .Lcbc_fast_body(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lcbc_fast_body - jb .Lin_cbc_frame_setup - - lea .Lcbc_slow_prologue(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lcbc_slow_prologue - jb .Lin_cbc_body - - lea .Lcbc_slow_body(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lcbc_slow_body - jb .Lin_cbc_frame_setup - -.Lin_cbc_body: - mov 152($context),%rax # pull context->Rsp - - lea .Lcbc_epilogue(%rip),%r10 - cmp %r10,%rbx # context->Rip>=.Lcbc_epilogue - jae .Lin_cbc_prologue - - lea 8(%rax),%rax - - lea .Lcbc_popfq(%rip),%r10 - cmp %r10,%rbx # context->Rip>=.Lcbc_popfq - jae .Lin_cbc_prologue - - mov `16-8`(%rax),%rax # biased $_rsp - lea 56(%rax),%rax - -.Lin_cbc_frame_setup: - mov -16(%rax),%rbx - mov -24(%rax),%rbp - mov -32(%rax),%r12 - mov -40(%rax),%r13 - mov -48(%rax),%r14 - mov -56(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_cbc_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - -.Lcommon_seh_exit: - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$`1232/8`,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size cbc_se_handler,.-cbc_se_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_AES_encrypt - .rva .LSEH_end_AES_encrypt - .rva .LSEH_info_AES_encrypt - - .rva .LSEH_begin_AES_decrypt - .rva .LSEH_end_AES_decrypt - .rva .LSEH_info_AES_decrypt - - .rva .LSEH_begin_private_AES_set_encrypt_key - .rva .LSEH_end_private_AES_set_encrypt_key - .rva .LSEH_info_private_AES_set_encrypt_key - - .rva .LSEH_begin_private_AES_set_decrypt_key - .rva .LSEH_end_private_AES_set_decrypt_key - .rva .LSEH_info_private_AES_set_decrypt_key - - .rva .LSEH_begin_AES_cbc_encrypt - .rva .LSEH_end_AES_cbc_encrypt - .rva .LSEH_info_AES_cbc_encrypt - -.section .xdata -.align 8 -.LSEH_info_AES_encrypt: - .byte 9,0,0,0 - .rva block_se_handler - .rva .Lenc_prologue,.Lenc_epilogue # HandlerData[] -.LSEH_info_AES_decrypt: - .byte 9,0,0,0 - .rva block_se_handler - .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[] -.LSEH_info_private_AES_set_encrypt_key: - .byte 9,0,0,0 - .rva key_se_handler - .rva .Lenc_key_prologue,.Lenc_key_epilogue # HandlerData[] -.LSEH_info_private_AES_set_decrypt_key: - .byte 9,0,0,0 - .rva key_se_handler - .rva .Ldec_key_prologue,.Ldec_key_epilogue # HandlerData[] -.LSEH_info_AES_cbc_encrypt: - .byte 9,0,0,0 - .rva cbc_se_handler -___ -} - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; - -print $code; - -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aesni-sha1-x86_64.pl b/drivers/builtin_openssl2/crypto/aes/asm/aesni-sha1-x86_64.pl deleted file mode 100644 index 3c8f6c19e7..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/aesni-sha1-x86_64.pl +++ /dev/null @@ -1,1250 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# June 2011 -# -# This is AESNI-CBC+SHA1 "stitch" implementation. The idea, as spelled -# in http://download.intel.com/design/intarch/papers/323686.pdf, is -# that since AESNI-CBC encrypt exhibit *very* low instruction-level -# parallelism, interleaving it with another algorithm would allow to -# utilize processor resources better and achieve better performance. -# SHA1 instruction sequences(*) are taken from sha1-x86_64.pl and -# AESNI code is weaved into it. Below are performance numbers in -# cycles per processed byte, less is better, for standalone AESNI-CBC -# encrypt, sum of the latter and standalone SHA1, and "stitched" -# subroutine: -# -# AES-128-CBC +SHA1 stitch gain -# Westmere 3.77[+5.6] 9.37 6.65 +41% -# Sandy Bridge 5.05[+5.2(6.3)] 10.25(11.35) 6.16(7.08) +67%(+60%) -# -# AES-192-CBC -# Westmere 4.51 10.11 6.97 +45% -# Sandy Bridge 6.05 11.25(12.35) 6.34(7.27) +77%(+70%) -# -# AES-256-CBC -# Westmere 5.25 10.85 7.25 +50% -# Sandy Bridge 7.05 12.25(13.35) 7.06(7.70) +74%(+73%) -# -# (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for -# background information. Above numbers in parentheses are SSSE3 -# results collected on AVX-capable CPU, i.e. apply on OSes that -# don't support AVX. -# -# Needless to mention that it makes no sense to implement "stitched" -# *decrypt* subroutine. Because *both* AESNI-CBC decrypt and SHA1 -# fully utilize parallelism, so stitching would not give any gain -# anyway. Well, there might be some, e.g. because of better cache -# locality... For reference, here are performance results for -# standalone AESNI-CBC decrypt: -# -# AES-128-CBC AES-192-CBC AES-256-CBC -# Westmere 1.31 1.55 1.80 -# Sandy Bridge 0.93 1.06 1.22 - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -$avx=1 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` - =~ /GNU assembler version ([2-9]\.[0-9]+)/ && - $1>=2.19); -$avx=1 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && - `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ && - $1>=2.09); -$avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && - `ml64 2>&1` =~ /Version ([0-9]+)\./ && - $1>=10); - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -# void aesni_cbc_sha1_enc(const void *inp, -# void *out, -# size_t length, -# const AES_KEY *key, -# unsigned char *iv, -# SHA_CTX *ctx, -# const void *in0); - -$code.=<<___; -.text -.extern OPENSSL_ia32cap_P - -.globl aesni_cbc_sha1_enc -.type aesni_cbc_sha1_enc,\@abi-omnipotent -.align 16 -aesni_cbc_sha1_enc: - # caller should check for SSSE3 and AES-NI bits - mov OPENSSL_ia32cap_P+0(%rip),%r10d - mov OPENSSL_ia32cap_P+4(%rip),%r11d -___ -$code.=<<___ if ($avx); - and \$`1<<28`,%r11d # mask AVX bit - and \$`1<<30`,%r10d # mask "Intel CPU" bit - or %r11d,%r10d - cmp \$`1<<28|1<<30`,%r10d - je aesni_cbc_sha1_enc_avx -___ -$code.=<<___; - jmp aesni_cbc_sha1_enc_ssse3 - ret -.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc -___ - -my ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); - -my $Xi=4; -my @X=map("%xmm$_",(4..7,0..3)); -my @Tx=map("%xmm$_",(8..10)); -my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization -my @T=("%esi","%edi"); -my $j=0; my $jj=0; my $r=0; my $sn=0; -my $K_XX_XX="%r11"; -my ($iv,$in,$rndkey0)=map("%xmm$_",(11..13)); -my @rndkey=("%xmm14","%xmm15"); - -sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm -{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; - my $arg = pop; - $arg = "\$$arg" if ($arg*1 eq $arg); - $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n"; -} - -my $_rol=sub { &rol(@_) }; -my $_ror=sub { &ror(@_) }; - -$code.=<<___; -.type aesni_cbc_sha1_enc_ssse3,\@function,6 -.align 16 -aesni_cbc_sha1_enc_ssse3: - mov `($win64?56:8)`(%rsp),$inp # load 7th argument - #shr \$6,$len # debugging artefact - #jz .Lepilogue_ssse3 # debugging artefact - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - lea `-104-($win64?10*16:0)`(%rsp),%rsp - #mov $in0,$inp # debugging artefact - #lea 64(%rsp),$ctx # debugging artefact -___ -$code.=<<___ if ($win64); - movaps %xmm6,96+0(%rsp) - movaps %xmm7,96+16(%rsp) - movaps %xmm8,96+32(%rsp) - movaps %xmm9,96+48(%rsp) - movaps %xmm10,96+64(%rsp) - movaps %xmm11,96+80(%rsp) - movaps %xmm12,96+96(%rsp) - movaps %xmm13,96+112(%rsp) - movaps %xmm14,96+128(%rsp) - movaps %xmm15,96+144(%rsp) -.Lprologue_ssse3: -___ -$code.=<<___; - mov $in0,%r12 # reassign arguments - mov $out,%r13 - mov $len,%r14 - mov $key,%r15 - movdqu ($ivp),$iv # load IV - mov $ivp,88(%rsp) # save $ivp -___ -my ($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments -my $rounds="${ivp}d"; -$code.=<<___; - shl \$6,$len - sub $in0,$out - mov 240($key),$rounds - add $inp,$len # end of input - - lea K_XX_XX(%rip),$K_XX_XX - mov 0($ctx),$A # load context - mov 4($ctx),$B - mov 8($ctx),$C - mov 12($ctx),$D - mov $B,@T[0] # magic seed - mov 16($ctx),$E - - movdqa 64($K_XX_XX),@X[2] # pbswap mask - movdqa 0($K_XX_XX),@Tx[1] # K_00_19 - movdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] - movdqu 16($inp),@X[-3&7] - movdqu 32($inp),@X[-2&7] - movdqu 48($inp),@X[-1&7] - pshufb @X[2],@X[-4&7] # byte swap - add \$64,$inp - pshufb @X[2],@X[-3&7] - pshufb @X[2],@X[-2&7] - pshufb @X[2],@X[-1&7] - paddd @Tx[1],@X[-4&7] # add K_00_19 - paddd @Tx[1],@X[-3&7] - paddd @Tx[1],@X[-2&7] - movdqa @X[-4&7],0(%rsp) # X[]+K xfer to IALU - psubd @Tx[1],@X[-4&7] # restore X[] - movdqa @X[-3&7],16(%rsp) - psubd @Tx[1],@X[-3&7] - movdqa @X[-2&7],32(%rsp) - psubd @Tx[1],@X[-2&7] - movups ($key),$rndkey0 # $key[0] - movups 16($key),$rndkey[0] # forward reference - jmp .Loop_ssse3 -___ - -my $aesenc=sub { - use integer; - my ($n,$k)=($r/10,$r%10); - if ($k==0) { - $code.=<<___; - movups `16*$n`($in0),$in # load input - xorps $rndkey0,$in -___ - $code.=<<___ if ($n); - movups $iv,`16*($n-1)`($out,$in0) # write output -___ - $code.=<<___; - xorps $in,$iv - aesenc $rndkey[0],$iv - movups `32+16*$k`($key),$rndkey[1] -___ - } elsif ($k==9) { - $sn++; - $code.=<<___; - cmp \$11,$rounds - jb .Laesenclast$sn - movups `32+16*($k+0)`($key),$rndkey[1] - aesenc $rndkey[0],$iv - movups `32+16*($k+1)`($key),$rndkey[0] - aesenc $rndkey[1],$iv - je .Laesenclast$sn - movups `32+16*($k+2)`($key),$rndkey[1] - aesenc $rndkey[0],$iv - movups `32+16*($k+3)`($key),$rndkey[0] - aesenc $rndkey[1],$iv -.Laesenclast$sn: - aesenclast $rndkey[0],$iv - movups 16($key),$rndkey[1] # forward reference -___ - } else { - $code.=<<___; - aesenc $rndkey[0],$iv - movups `32+16*$k`($key),$rndkey[1] -___ - } - $r++; unshift(@rndkey,pop(@rndkey)); -}; - -sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4 -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 40 instructions - my ($a,$b,$c,$d,$e); - - &movdqa (@X[0],@X[-3&7]); - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (@Tx[0],@X[-1&7]); - &palignr(@X[0],@X[-4&7],8); # compose "X[-14]" in "X[0]" - eval(shift(@insns)); - eval(shift(@insns)); - - &paddd (@Tx[1],@X[-1&7]); - eval(shift(@insns)); - eval(shift(@insns)); - &psrldq (@Tx[0],4); # "X[-3]", 3 dwords - eval(shift(@insns)); - eval(shift(@insns)); - &pxor (@X[0],@X[-4&7]); # "X[0]"^="X[-16]" - eval(shift(@insns)); - eval(shift(@insns)); - - &pxor (@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]" - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - - &movdqa (@Tx[2],@X[0]); - &movdqa (@Tx[0],@X[0]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &pslldq (@Tx[2],12); # "X[0]"<<96, extract one dword - &paddd (@X[0],@X[0]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &psrld (@Tx[0],31); - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (@Tx[1],@Tx[2]); - eval(shift(@insns)); - eval(shift(@insns)); - - &psrld (@Tx[2],30); - &por (@X[0],@Tx[0]); # "X[0]"<<<=1 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &pslld (@Tx[1],2); - &pxor (@X[0],@Tx[2]); - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX - eval(shift(@insns)); - eval(shift(@insns)); - - &pxor (@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2 - - foreach (@insns) { eval; } # remaining instructions [if any] - - $Xi++; push(@X,shift(@X)); # "rotate" X[] - push(@Tx,shift(@Tx)); -} - -sub Xupdate_ssse3_32_79() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions - my ($a,$b,$c,$d,$e); - - &movdqa (@Tx[0],@X[-1&7]) if ($Xi==8); - eval(shift(@insns)); # body_20_39 - &pxor (@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" - &palignr(@Tx[0],@X[-2&7],8); # compose "X[-6]" - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - - &pxor (@X[0],@X[-7&7]); # "X[0]"^="X[-28]" - eval(shift(@insns)); - eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/); - if ($Xi%5) { - &movdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX... - } else { # ... or load next one - &movdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)"); - } - &paddd (@Tx[1],@X[-1&7]); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-6]" - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - - &movdqa (@Tx[0],@X[0]); - &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &pslld (@X[0],2); - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - &psrld (@Tx[0],30); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &por (@X[0],@Tx[0]); # "X[0]"<<<=2 - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - &movdqa (@Tx[1],@X[0]) if ($Xi<19); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - - foreach (@insns) { eval; } # remaining instructions - - $Xi++; push(@X,shift(@X)); # "rotate" X[] - push(@Tx,shift(@Tx)); -} - -sub Xuplast_ssse3_80() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - &paddd (@Tx[1],@X[-1&7]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU - - foreach (@insns) { eval; } # remaining instructions - - &cmp ($inp,$len); - &je (".Ldone_ssse3"); - - unshift(@Tx,pop(@Tx)); - - &movdqa (@X[2],"64($K_XX_XX)"); # pbswap mask - &movdqa (@Tx[1],"0($K_XX_XX)"); # K_00_19 - &movdqu (@X[-4&7],"0($inp)"); # load input - &movdqu (@X[-3&7],"16($inp)"); - &movdqu (@X[-2&7],"32($inp)"); - &movdqu (@X[-1&7],"48($inp)"); - &pshufb (@X[-4&7],@X[2]); # byte swap - &add ($inp,64); - - $Xi=0; -} - -sub Xloop_ssse3() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - eval(shift(@insns)); - &pshufb (@X[($Xi-3)&7],@X[2]); - eval(shift(@insns)); - eval(shift(@insns)); - &paddd (@X[($Xi-4)&7],@Tx[1]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (eval(16*$Xi)."(%rsp)",@X[($Xi-4)&7]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - &psubd (@X[($Xi-4)&7],@Tx[1]); - - foreach (@insns) { eval; } - $Xi++; -} - -sub Xtail_ssse3() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - foreach (@insns) { eval; } -} - -sub body_00_19 () { - use integer; - my ($k,$n); - my @r=( - '($a,$b,$c,$d,$e)=@V;'. - '&add ($e,eval(4*($j&15))."(%rsp)");', # X[]+K xfer - '&xor ($c,$d);', - '&mov (@T[1],$a);', # $b in next round - '&$_rol ($a,5);', - '&and (@T[0],$c);', # ($b&($c^$d)) - '&xor ($c,$d);', # restore $c - '&xor (@T[0],$d);', - '&add ($e,$a);', - '&$_ror ($b,$j?7:2);', # $b>>>2 - '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' - ); - $n = scalar(@r); - $k = (($jj+1)*12/20)*20*$n/12; # 12 aesencs per these 20 rounds - @r[$k%$n].='&$aesenc();' if ($jj==$k/$n); - $jj++; - return @r; -} - -sub body_20_39 () { - use integer; - my ($k,$n); - my @r=( - '($a,$b,$c,$d,$e)=@V;'. - '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer - '&xor (@T[0],$d);', # ($b^$d) - '&mov (@T[1],$a);', # $b in next round - '&$_rol ($a,5);', - '&xor (@T[0],$c);', # ($b^$d^$c) - '&add ($e,$a);', - '&$_ror ($b,7);', # $b>>>2 - '&add ($e,@T[0]);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' - ); - $n = scalar(@r); - $k = (($jj+1)*8/20)*20*$n/8; # 8 aesencs per these 20 rounds - @r[$k%$n].='&$aesenc();' if ($jj==$k/$n); - $jj++; - return @r; -} - -sub body_40_59 () { - use integer; - my ($k,$n); - my @r=( - '($a,$b,$c,$d,$e)=@V;'. - '&mov (@T[1],$c);', - '&xor ($c,$d);', - '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer - '&and (@T[1],$d);', - '&and (@T[0],$c);', # ($b&($c^$d)) - '&$_ror ($b,7);', # $b>>>2 - '&add ($e,@T[1]);', - '&mov (@T[1],$a);', # $b in next round - '&$_rol ($a,5);', - '&add ($e,@T[0]);', - '&xor ($c,$d);', # restore $c - '&add ($e,$a);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' - ); - $n = scalar(@r); - $k=(($jj+1)*12/20)*20*$n/12; # 12 aesencs per these 20 rounds - @r[$k%$n].='&$aesenc();' if ($jj==$k/$n); - $jj++; - return @r; -} -$code.=<<___; -.align 16 -.Loop_ssse3: -___ - &Xupdate_ssse3_16_31(\&body_00_19); - &Xupdate_ssse3_16_31(\&body_00_19); - &Xupdate_ssse3_16_31(\&body_00_19); - &Xupdate_ssse3_16_31(\&body_00_19); - &Xupdate_ssse3_32_79(\&body_00_19); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xuplast_ssse3_80(\&body_20_39); # can jump to "done" - - $saved_j=$j; @saved_V=@V; - $saved_r=$r; @saved_rndkey=@rndkey; - - &Xloop_ssse3(\&body_20_39); - &Xloop_ssse3(\&body_20_39); - &Xloop_ssse3(\&body_20_39); - -$code.=<<___; - movups $iv,48($out,$in0) # write output - lea 64($in0),$in0 - - add 0($ctx),$A # update context - add 4($ctx),@T[0] - add 8($ctx),$C - add 12($ctx),$D - mov $A,0($ctx) - add 16($ctx),$E - mov @T[0],4($ctx) - mov @T[0],$B # magic seed - mov $C,8($ctx) - mov $D,12($ctx) - mov $E,16($ctx) - jmp .Loop_ssse3 - -.align 16 -.Ldone_ssse3: -___ - $jj=$j=$saved_j; @V=@saved_V; - $r=$saved_r; @rndkey=@saved_rndkey; - - &Xtail_ssse3(\&body_20_39); - &Xtail_ssse3(\&body_20_39); - &Xtail_ssse3(\&body_20_39); - -$code.=<<___; - movups $iv,48($out,$in0) # write output - mov 88(%rsp),$ivp # restore $ivp - - add 0($ctx),$A # update context - add 4($ctx),@T[0] - add 8($ctx),$C - mov $A,0($ctx) - add 12($ctx),$D - mov @T[0],4($ctx) - add 16($ctx),$E - mov $C,8($ctx) - mov $D,12($ctx) - mov $E,16($ctx) - movups $iv,($ivp) # write IV -___ -$code.=<<___ if ($win64); - movaps 96+0(%rsp),%xmm6 - movaps 96+16(%rsp),%xmm7 - movaps 96+32(%rsp),%xmm8 - movaps 96+48(%rsp),%xmm9 - movaps 96+64(%rsp),%xmm10 - movaps 96+80(%rsp),%xmm11 - movaps 96+96(%rsp),%xmm12 - movaps 96+112(%rsp),%xmm13 - movaps 96+128(%rsp),%xmm14 - movaps 96+144(%rsp),%xmm15 -___ -$code.=<<___; - lea `104+($win64?10*16:0)`(%rsp),%rsi - mov 0(%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lepilogue_ssse3: - ret -.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 -___ - -$j=$jj=$r=$sn=0; - -if ($avx) { -my ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); - -my $Xi=4; -my @X=map("%xmm$_",(4..7,0..3)); -my @Tx=map("%xmm$_",(8..10)); -my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization -my @T=("%esi","%edi"); - -my $_rol=sub { &shld(@_[0],@_) }; -my $_ror=sub { &shrd(@_[0],@_) }; - -$code.=<<___; -.type aesni_cbc_sha1_enc_avx,\@function,6 -.align 16 -aesni_cbc_sha1_enc_avx: - mov `($win64?56:8)`(%rsp),$inp # load 7th argument - #shr \$6,$len # debugging artefact - #jz .Lepilogue_avx # debugging artefact - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - lea `-104-($win64?10*16:0)`(%rsp),%rsp - #mov $in0,$inp # debugging artefact - #lea 64(%rsp),$ctx # debugging artefact -___ -$code.=<<___ if ($win64); - movaps %xmm6,96+0(%rsp) - movaps %xmm7,96+16(%rsp) - movaps %xmm8,96+32(%rsp) - movaps %xmm9,96+48(%rsp) - movaps %xmm10,96+64(%rsp) - movaps %xmm11,96+80(%rsp) - movaps %xmm12,96+96(%rsp) - movaps %xmm13,96+112(%rsp) - movaps %xmm14,96+128(%rsp) - movaps %xmm15,96+144(%rsp) -.Lprologue_avx: -___ -$code.=<<___; - vzeroall - mov $in0,%r12 # reassign arguments - mov $out,%r13 - mov $len,%r14 - mov $key,%r15 - vmovdqu ($ivp),$iv # load IV - mov $ivp,88(%rsp) # save $ivp -___ -my ($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments -my $rounds="${ivp}d"; -$code.=<<___; - shl \$6,$len - sub $in0,$out - mov 240($key),$rounds - add \$112,$key # size optimization - add $inp,$len # end of input - - lea K_XX_XX(%rip),$K_XX_XX - mov 0($ctx),$A # load context - mov 4($ctx),$B - mov 8($ctx),$C - mov 12($ctx),$D - mov $B,@T[0] # magic seed - mov 16($ctx),$E - - vmovdqa 64($K_XX_XX),@X[2] # pbswap mask - vmovdqa 0($K_XX_XX),@Tx[1] # K_00_19 - vmovdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] - vmovdqu 16($inp),@X[-3&7] - vmovdqu 32($inp),@X[-2&7] - vmovdqu 48($inp),@X[-1&7] - vpshufb @X[2],@X[-4&7],@X[-4&7] # byte swap - add \$64,$inp - vpshufb @X[2],@X[-3&7],@X[-3&7] - vpshufb @X[2],@X[-2&7],@X[-2&7] - vpshufb @X[2],@X[-1&7],@X[-1&7] - vpaddd @Tx[1],@X[-4&7],@X[0] # add K_00_19 - vpaddd @Tx[1],@X[-3&7],@X[1] - vpaddd @Tx[1],@X[-2&7],@X[2] - vmovdqa @X[0],0(%rsp) # X[]+K xfer to IALU - vmovdqa @X[1],16(%rsp) - vmovdqa @X[2],32(%rsp) - vmovups -112($key),$rndkey0 # $key[0] - vmovups 16-112($key),$rndkey[0] # forward reference - jmp .Loop_avx -___ - -my $aesenc=sub { - use integer; - my ($n,$k)=($r/10,$r%10); - if ($k==0) { - $code.=<<___; - vmovups `16*$n`($in0),$in # load input - vxorps $rndkey0,$in,$in -___ - $code.=<<___ if ($n); - vmovups $iv,`16*($n-1)`($out,$in0) # write output -___ - $code.=<<___; - vxorps $in,$iv,$iv - vaesenc $rndkey[0],$iv,$iv - vmovups `32+16*$k-112`($key),$rndkey[1] -___ - } elsif ($k==9) { - $sn++; - $code.=<<___; - cmp \$11,$rounds - jb .Lvaesenclast$sn - vaesenc $rndkey[0],$iv,$iv - vmovups `32+16*($k+0)-112`($key),$rndkey[1] - vaesenc $rndkey[1],$iv,$iv - vmovups `32+16*($k+1)-112`($key),$rndkey[0] - je .Lvaesenclast$sn - vaesenc $rndkey[0],$iv,$iv - vmovups `32+16*($k+2)-112`($key),$rndkey[1] - vaesenc $rndkey[1],$iv,$iv - vmovups `32+16*($k+3)-112`($key),$rndkey[0] -.Lvaesenclast$sn: - vaesenclast $rndkey[0],$iv,$iv - vmovups 16-112($key),$rndkey[1] # forward reference -___ - } else { - $code.=<<___; - vaesenc $rndkey[0],$iv,$iv - vmovups `32+16*$k-112`($key),$rndkey[1] -___ - } - $r++; unshift(@rndkey,pop(@rndkey)); -}; - -sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4 -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 40 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - eval(shift(@insns)); - &vpalignr(@X[0],@X[-3&7],@X[-4&7],8); # compose "X[-14]" in "X[0]" - eval(shift(@insns)); - eval(shift(@insns)); - - &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); - eval(shift(@insns)); - eval(shift(@insns)); - &vpsrldq(@Tx[0],@X[-1&7],4); # "X[-3]", 3 dwords - eval(shift(@insns)); - eval(shift(@insns)); - &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]" - eval(shift(@insns)); - eval(shift(@insns)); - - &vpxor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]" - eval(shift(@insns)); - eval(shift(@insns)); - &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - - &vpsrld (@Tx[0],@X[0],31); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpslldq(@Tx[2],@X[0],12); # "X[0]"<<96, extract one dword - &vpaddd (@X[0],@X[0],@X[0]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpsrld (@Tx[1],@Tx[2],30); - &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=1 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpslld (@Tx[2],@Tx[2],2); - &vpxor (@X[0],@X[0],@Tx[1]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpxor (@X[0],@X[0],@Tx[2]); # "X[0]"^=("X[0]">>96)<<<2 - eval(shift(@insns)); - eval(shift(@insns)); - &vmovdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX - eval(shift(@insns)); - eval(shift(@insns)); - - - foreach (@insns) { eval; } # remaining instructions [if any] - - $Xi++; push(@X,shift(@X)); # "rotate" X[] - push(@Tx,shift(@Tx)); -} - -sub Xupdate_avx_32_79() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions - my ($a,$b,$c,$d,$e); - - &vpalignr(@Tx[0],@X[-1&7],@X[-2&7],8); # compose "X[-6]" - &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - - &vpxor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]" - eval(shift(@insns)); - eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/); - if ($Xi%5) { - &vmovdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX... - } else { # ... or load next one - &vmovdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)"); - } - &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-6]" - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - - &vpsrld (@Tx[0],@X[0],30); - &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &vpslld (@X[0],@X[0],2); - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=2 - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - &vmovdqa (@Tx[1],@X[0]) if ($Xi<19); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - - foreach (@insns) { eval; } # remaining instructions - - $Xi++; push(@X,shift(@X)); # "rotate" X[] - push(@Tx,shift(@Tx)); -} - -sub Xuplast_avx_80() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU - - foreach (@insns) { eval; } # remaining instructions - - &cmp ($inp,$len); - &je (".Ldone_avx"); - - unshift(@Tx,pop(@Tx)); - - &vmovdqa(@X[2],"64($K_XX_XX)"); # pbswap mask - &vmovdqa(@Tx[1],"0($K_XX_XX)"); # K_00_19 - &vmovdqu(@X[-4&7],"0($inp)"); # load input - &vmovdqu(@X[-3&7],"16($inp)"); - &vmovdqu(@X[-2&7],"32($inp)"); - &vmovdqu(@X[-1&7],"48($inp)"); - &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap - &add ($inp,64); - - $Xi=0; -} - -sub Xloop_avx() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - eval(shift(@insns)); - &vpshufb(@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]); - eval(shift(@insns)); - eval(shift(@insns)); - &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@Tx[1]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &vmovdqa(eval(16*$Xi)."(%rsp)",@X[$Xi&7]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - - foreach (@insns) { eval; } - $Xi++; -} - -sub Xtail_avx() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - foreach (@insns) { eval; } -} - -$code.=<<___; -.align 16 -.Loop_avx: -___ - &Xupdate_avx_16_31(\&body_00_19); - &Xupdate_avx_16_31(\&body_00_19); - &Xupdate_avx_16_31(\&body_00_19); - &Xupdate_avx_16_31(\&body_00_19); - &Xupdate_avx_32_79(\&body_00_19); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_20_39); - &Xuplast_avx_80(\&body_20_39); # can jump to "done" - - $saved_j=$j; @saved_V=@V; - $saved_r=$r; @saved_rndkey=@rndkey; - - &Xloop_avx(\&body_20_39); - &Xloop_avx(\&body_20_39); - &Xloop_avx(\&body_20_39); - -$code.=<<___; - vmovups $iv,48($out,$in0) # write output - lea 64($in0),$in0 - - add 0($ctx),$A # update context - add 4($ctx),@T[0] - add 8($ctx),$C - add 12($ctx),$D - mov $A,0($ctx) - add 16($ctx),$E - mov @T[0],4($ctx) - mov @T[0],$B # magic seed - mov $C,8($ctx) - mov $D,12($ctx) - mov $E,16($ctx) - jmp .Loop_avx - -.align 16 -.Ldone_avx: -___ - $jj=$j=$saved_j; @V=@saved_V; - $r=$saved_r; @rndkey=@saved_rndkey; - - &Xtail_avx(\&body_20_39); - &Xtail_avx(\&body_20_39); - &Xtail_avx(\&body_20_39); - -$code.=<<___; - vmovups $iv,48($out,$in0) # write output - mov 88(%rsp),$ivp # restore $ivp - - add 0($ctx),$A # update context - add 4($ctx),@T[0] - add 8($ctx),$C - mov $A,0($ctx) - add 12($ctx),$D - mov @T[0],4($ctx) - add 16($ctx),$E - mov $C,8($ctx) - mov $D,12($ctx) - mov $E,16($ctx) - vmovups $iv,($ivp) # write IV - vzeroall -___ -$code.=<<___ if ($win64); - movaps 96+0(%rsp),%xmm6 - movaps 96+16(%rsp),%xmm7 - movaps 96+32(%rsp),%xmm8 - movaps 96+48(%rsp),%xmm9 - movaps 96+64(%rsp),%xmm10 - movaps 96+80(%rsp),%xmm11 - movaps 96+96(%rsp),%xmm12 - movaps 96+112(%rsp),%xmm13 - movaps 96+128(%rsp),%xmm14 - movaps 96+144(%rsp),%xmm15 -___ -$code.=<<___; - lea `104+($win64?10*16:0)`(%rsp),%rsi - mov 0(%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lepilogue_avx: - ret -.size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx -___ -} -$code.=<<___; -.align 64 -K_XX_XX: -.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 # K_00_19 -.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 # K_20_39 -.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59 -.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79 -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap mask - -.asciz "AESNI-CBC+SHA1 stitch for x86_64, CRYPTOGAMS by " -.align 64 -___ - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type ssse3_handler,\@abi-omnipotent -.align 16 -ssse3_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - mov 8($disp),%rsi # disp->ImageBase - mov 56($disp),%r11 # disp->HandlerData - - mov 0(%r11),%r10d # HandlerData[0] - lea (%rsi,%r10),%r10 # prologue label - cmp %r10,%rbx # context->RipRsp - - mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lcommon_seh_tail - - lea 96(%rax),%rsi - lea 512($context),%rdi # &context.Xmm6 - mov \$20,%ecx - .long 0xa548f3fc # cld; rep movsq - lea `104+10*16`(%rax),%rax # adjust stack pointer - - mov 0(%rax),%r15 - mov 8(%rax),%r14 - mov 16(%rax),%r13 - mov 24(%rax),%r12 - mov 32(%rax),%rbp - mov 40(%rax),%rbx - lea 48(%rax),%rax - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lcommon_seh_tail: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size ssse3_handler,.-ssse3_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_aesni_cbc_sha1_enc_ssse3 - .rva .LSEH_end_aesni_cbc_sha1_enc_ssse3 - .rva .LSEH_info_aesni_cbc_sha1_enc_ssse3 -___ -$code.=<<___ if ($avx); - .rva .LSEH_begin_aesni_cbc_sha1_enc_avx - .rva .LSEH_end_aesni_cbc_sha1_enc_avx - .rva .LSEH_info_aesni_cbc_sha1_enc_avx -___ -$code.=<<___; -.section .xdata -.align 8 -.LSEH_info_aesni_cbc_sha1_enc_ssse3: - .byte 9,0,0,0 - .rva ssse3_handler - .rva .Lprologue_ssse3,.Lepilogue_ssse3 # HandlerData[] -___ -$code.=<<___ if ($avx); -.LSEH_info_aesni_cbc_sha1_enc_avx: - .byte 9,0,0,0 - .rva ssse3_handler - .rva .Lprologue_avx,.Lepilogue_avx # HandlerData[] -___ -} - -#################################################################### -sub rex { - local *opcode=shift; - my ($dst,$src)=@_; - my $rex=0; - - $rex|=0x04 if($dst>=8); - $rex|=0x01 if($src>=8); - push @opcode,$rex|0x40 if($rex); -} - -sub aesni { - my $line=shift; - my @opcode=(0x66); - - if ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) { - my %opcodelet = ( - "aesenc" => 0xdc, "aesenclast" => 0xdd - ); - return undef if (!defined($opcodelet{$1})); - rex(\@opcode,$3,$2); - push @opcode,0x0f,0x38,$opcodelet{$1}; - push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M - return ".byte\t".join(',',@opcode); - } - return $line; -} - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; -$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem; - -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86.pl b/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86.pl deleted file mode 100644 index 8c1d0b5bed..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86.pl +++ /dev/null @@ -1,2189 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# This module implements support for Intel AES-NI extension. In -# OpenSSL context it's used with Intel engine, but can also be used as -# drop-in replacement for crypto/aes/asm/aes-586.pl [see below for -# details]. -# -# Performance. -# -# To start with see corresponding paragraph in aesni-x86_64.pl... -# Instead of filling table similar to one found there I've chosen to -# summarize *comparison* results for raw ECB, CTR and CBC benchmarks. -# The simplified table below represents 32-bit performance relative -# to 64-bit one in every given point. Ratios vary for different -# encryption modes, therefore interval values. -# -# 16-byte 64-byte 256-byte 1-KB 8-KB -# 53-67% 67-84% 91-94% 95-98% 97-99.5% -# -# Lower ratios for smaller block sizes are perfectly understandable, -# because function call overhead is higher in 32-bit mode. Largest -# 8-KB block performance is virtually same: 32-bit code is less than -# 1% slower for ECB, CBC and CCM, and ~3% slower otherwise. - -# January 2011 -# -# See aesni-x86_64.pl for details. Unlike x86_64 version this module -# interleaves at most 6 aes[enc|dec] instructions, because there are -# not enough registers for 8x interleave [which should be optimal for -# Sandy Bridge]. Actually, performance results for 6x interleave -# factor presented in aesni-x86_64.pl (except for CTR) are for this -# module. - -# April 2011 -# -# Add aesni_xts_[en|de]crypt. Westmere spends 1.50 cycles processing -# one byte out of 8KB with 128-bit key, Sandy Bridge - 1.09. - -$PREFIX="aesni"; # if $PREFIX is set to "AES", the script - # generates drop-in replacement for - # crypto/aes/asm/aes-586.pl:-) -$inline=1; # inline _aesni_[en|de]crypt - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],$0); - -if ($PREFIX eq "aesni") { $movekey=*movups; } -else { $movekey=*movups; } - -$len="eax"; -$rounds="ecx"; -$key="edx"; -$inp="esi"; -$out="edi"; -$rounds_="ebx"; # backup copy for $rounds -$key_="ebp"; # backup copy for $key - -$rndkey0="xmm0"; -$rndkey1="xmm1"; -$inout0="xmm2"; -$inout1="xmm3"; -$inout2="xmm4"; -$inout3="xmm5"; $in1="xmm5"; -$inout4="xmm6"; $in0="xmm6"; -$inout5="xmm7"; $ivec="xmm7"; - -# AESNI extension -sub aeskeygenassist -{ my($dst,$src,$imm)=@_; - if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) - { &data_byte(0x66,0x0f,0x3a,0xdf,0xc0|($1<<3)|$2,$imm); } -} -sub aescommon -{ my($opcodelet,$dst,$src)=@_; - if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) - { &data_byte(0x66,0x0f,0x38,$opcodelet,0xc0|($1<<3)|$2);} -} -sub aesimc { aescommon(0xdb,@_); } -sub aesenc { aescommon(0xdc,@_); } -sub aesenclast { aescommon(0xdd,@_); } -sub aesdec { aescommon(0xde,@_); } -sub aesdeclast { aescommon(0xdf,@_); } - -# Inline version of internal aesni_[en|de]crypt1 -{ my $sn; -sub aesni_inline_generate1 -{ my ($p,$inout,$ivec)=@_; $inout=$inout0 if (!defined($inout)); - $sn++; - - &$movekey ($rndkey0,&QWP(0,$key)); - &$movekey ($rndkey1,&QWP(16,$key)); - &xorps ($ivec,$rndkey0) if (defined($ivec)); - &lea ($key,&DWP(32,$key)); - &xorps ($inout,$ivec) if (defined($ivec)); - &xorps ($inout,$rndkey0) if (!defined($ivec)); - &set_label("${p}1_loop_$sn"); - eval"&aes${p} ($inout,$rndkey1)"; - &dec ($rounds); - &$movekey ($rndkey1,&QWP(0,$key)); - &lea ($key,&DWP(16,$key)); - &jnz (&label("${p}1_loop_$sn")); - eval"&aes${p}last ($inout,$rndkey1)"; -}} - -sub aesni_generate1 # fully unrolled loop -{ my ($p,$inout)=@_; $inout=$inout0 if (!defined($inout)); - - &function_begin_B("_aesni_${p}rypt1"); - &movups ($rndkey0,&QWP(0,$key)); - &$movekey ($rndkey1,&QWP(0x10,$key)); - &xorps ($inout,$rndkey0); - &$movekey ($rndkey0,&QWP(0x20,$key)); - &lea ($key,&DWP(0x30,$key)); - &cmp ($rounds,11); - &jb (&label("${p}128")); - &lea ($key,&DWP(0x20,$key)); - &je (&label("${p}192")); - &lea ($key,&DWP(0x20,$key)); - eval"&aes${p} ($inout,$rndkey1)"; - &$movekey ($rndkey1,&QWP(-0x40,$key)); - eval"&aes${p} ($inout,$rndkey0)"; - &$movekey ($rndkey0,&QWP(-0x30,$key)); - &set_label("${p}192"); - eval"&aes${p} ($inout,$rndkey1)"; - &$movekey ($rndkey1,&QWP(-0x20,$key)); - eval"&aes${p} ($inout,$rndkey0)"; - &$movekey ($rndkey0,&QWP(-0x10,$key)); - &set_label("${p}128"); - eval"&aes${p} ($inout,$rndkey1)"; - &$movekey ($rndkey1,&QWP(0,$key)); - eval"&aes${p} ($inout,$rndkey0)"; - &$movekey ($rndkey0,&QWP(0x10,$key)); - eval"&aes${p} ($inout,$rndkey1)"; - &$movekey ($rndkey1,&QWP(0x20,$key)); - eval"&aes${p} ($inout,$rndkey0)"; - &$movekey ($rndkey0,&QWP(0x30,$key)); - eval"&aes${p} ($inout,$rndkey1)"; - &$movekey ($rndkey1,&QWP(0x40,$key)); - eval"&aes${p} ($inout,$rndkey0)"; - &$movekey ($rndkey0,&QWP(0x50,$key)); - eval"&aes${p} ($inout,$rndkey1)"; - &$movekey ($rndkey1,&QWP(0x60,$key)); - eval"&aes${p} ($inout,$rndkey0)"; - &$movekey ($rndkey0,&QWP(0x70,$key)); - eval"&aes${p} ($inout,$rndkey1)"; - eval"&aes${p}last ($inout,$rndkey0)"; - &ret(); - &function_end_B("_aesni_${p}rypt1"); -} - -# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key); -&aesni_generate1("enc") if (!$inline); -&function_begin_B("${PREFIX}_encrypt"); - &mov ("eax",&wparam(0)); - &mov ($key,&wparam(2)); - &movups ($inout0,&QWP(0,"eax")); - &mov ($rounds,&DWP(240,$key)); - &mov ("eax",&wparam(1)); - if ($inline) - { &aesni_inline_generate1("enc"); } - else - { &call ("_aesni_encrypt1"); } - &movups (&QWP(0,"eax"),$inout0); - &ret (); -&function_end_B("${PREFIX}_encrypt"); - -# void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key); -&aesni_generate1("dec") if(!$inline); -&function_begin_B("${PREFIX}_decrypt"); - &mov ("eax",&wparam(0)); - &mov ($key,&wparam(2)); - &movups ($inout0,&QWP(0,"eax")); - &mov ($rounds,&DWP(240,$key)); - &mov ("eax",&wparam(1)); - if ($inline) - { &aesni_inline_generate1("dec"); } - else - { &call ("_aesni_decrypt1"); } - &movups (&QWP(0,"eax"),$inout0); - &ret (); -&function_end_B("${PREFIX}_decrypt"); - -# _aesni_[en|de]cryptN are private interfaces, N denotes interleave -# factor. Why 3x subroutine were originally used in loops? Even though -# aes[enc|dec] latency was originally 6, it could be scheduled only -# every *2nd* cycle. Thus 3x interleave was the one providing optimal -# utilization, i.e. when subroutine's throughput is virtually same as -# of non-interleaved subroutine [for number of input blocks up to 3]. -# This is why it makes no sense to implement 2x subroutine. -# aes[enc|dec] latency in next processor generation is 8, but the -# instructions can be scheduled every cycle. Optimal interleave for -# new processor is therefore 8x, but it's unfeasible to accommodate it -# in XMM registers addreassable in 32-bit mode and therefore 6x is -# used instead... - -sub aesni_generate3 -{ my $p=shift; - - &function_begin_B("_aesni_${p}rypt3"); - &$movekey ($rndkey0,&QWP(0,$key)); - &shr ($rounds,1); - &$movekey ($rndkey1,&QWP(16,$key)); - &lea ($key,&DWP(32,$key)); - &xorps ($inout0,$rndkey0); - &pxor ($inout1,$rndkey0); - &pxor ($inout2,$rndkey0); - &$movekey ($rndkey0,&QWP(0,$key)); - - &set_label("${p}3_loop"); - eval"&aes${p} ($inout0,$rndkey1)"; - eval"&aes${p} ($inout1,$rndkey1)"; - &dec ($rounds); - eval"&aes${p} ($inout2,$rndkey1)"; - &$movekey ($rndkey1,&QWP(16,$key)); - eval"&aes${p} ($inout0,$rndkey0)"; - eval"&aes${p} ($inout1,$rndkey0)"; - &lea ($key,&DWP(32,$key)); - eval"&aes${p} ($inout2,$rndkey0)"; - &$movekey ($rndkey0,&QWP(0,$key)); - &jnz (&label("${p}3_loop")); - eval"&aes${p} ($inout0,$rndkey1)"; - eval"&aes${p} ($inout1,$rndkey1)"; - eval"&aes${p} ($inout2,$rndkey1)"; - eval"&aes${p}last ($inout0,$rndkey0)"; - eval"&aes${p}last ($inout1,$rndkey0)"; - eval"&aes${p}last ($inout2,$rndkey0)"; - &ret(); - &function_end_B("_aesni_${p}rypt3"); -} - -# 4x interleave is implemented to improve small block performance, -# most notably [and naturally] 4 block by ~30%. One can argue that one -# should have implemented 5x as well, but improvement would be <20%, -# so it's not worth it... -sub aesni_generate4 -{ my $p=shift; - - &function_begin_B("_aesni_${p}rypt4"); - &$movekey ($rndkey0,&QWP(0,$key)); - &$movekey ($rndkey1,&QWP(16,$key)); - &shr ($rounds,1); - &lea ($key,&DWP(32,$key)); - &xorps ($inout0,$rndkey0); - &pxor ($inout1,$rndkey0); - &pxor ($inout2,$rndkey0); - &pxor ($inout3,$rndkey0); - &$movekey ($rndkey0,&QWP(0,$key)); - - &set_label("${p}4_loop"); - eval"&aes${p} ($inout0,$rndkey1)"; - eval"&aes${p} ($inout1,$rndkey1)"; - &dec ($rounds); - eval"&aes${p} ($inout2,$rndkey1)"; - eval"&aes${p} ($inout3,$rndkey1)"; - &$movekey ($rndkey1,&QWP(16,$key)); - eval"&aes${p} ($inout0,$rndkey0)"; - eval"&aes${p} ($inout1,$rndkey0)"; - &lea ($key,&DWP(32,$key)); - eval"&aes${p} ($inout2,$rndkey0)"; - eval"&aes${p} ($inout3,$rndkey0)"; - &$movekey ($rndkey0,&QWP(0,$key)); - &jnz (&label("${p}4_loop")); - - eval"&aes${p} ($inout0,$rndkey1)"; - eval"&aes${p} ($inout1,$rndkey1)"; - eval"&aes${p} ($inout2,$rndkey1)"; - eval"&aes${p} ($inout3,$rndkey1)"; - eval"&aes${p}last ($inout0,$rndkey0)"; - eval"&aes${p}last ($inout1,$rndkey0)"; - eval"&aes${p}last ($inout2,$rndkey0)"; - eval"&aes${p}last ($inout3,$rndkey0)"; - &ret(); - &function_end_B("_aesni_${p}rypt4"); -} - -sub aesni_generate6 -{ my $p=shift; - - &function_begin_B("_aesni_${p}rypt6"); - &static_label("_aesni_${p}rypt6_enter"); - &$movekey ($rndkey0,&QWP(0,$key)); - &shr ($rounds,1); - &$movekey ($rndkey1,&QWP(16,$key)); - &lea ($key,&DWP(32,$key)); - &xorps ($inout0,$rndkey0); - &pxor ($inout1,$rndkey0); # pxor does better here - eval"&aes${p} ($inout0,$rndkey1)"; - &pxor ($inout2,$rndkey0); - eval"&aes${p} ($inout1,$rndkey1)"; - &pxor ($inout3,$rndkey0); - &dec ($rounds); - eval"&aes${p} ($inout2,$rndkey1)"; - &pxor ($inout4,$rndkey0); - eval"&aes${p} ($inout3,$rndkey1)"; - &pxor ($inout5,$rndkey0); - eval"&aes${p} ($inout4,$rndkey1)"; - &$movekey ($rndkey0,&QWP(0,$key)); - eval"&aes${p} ($inout5,$rndkey1)"; - &jmp (&label("_aesni_${p}rypt6_enter")); - - &set_label("${p}6_loop",16); - eval"&aes${p} ($inout0,$rndkey1)"; - eval"&aes${p} ($inout1,$rndkey1)"; - &dec ($rounds); - eval"&aes${p} ($inout2,$rndkey1)"; - eval"&aes${p} ($inout3,$rndkey1)"; - eval"&aes${p} ($inout4,$rndkey1)"; - eval"&aes${p} ($inout5,$rndkey1)"; - &set_label("_aesni_${p}rypt6_enter",16); - &$movekey ($rndkey1,&QWP(16,$key)); - eval"&aes${p} ($inout0,$rndkey0)"; - eval"&aes${p} ($inout1,$rndkey0)"; - &lea ($key,&DWP(32,$key)); - eval"&aes${p} ($inout2,$rndkey0)"; - eval"&aes${p} ($inout3,$rndkey0)"; - eval"&aes${p} ($inout4,$rndkey0)"; - eval"&aes${p} ($inout5,$rndkey0)"; - &$movekey ($rndkey0,&QWP(0,$key)); - &jnz (&label("${p}6_loop")); - - eval"&aes${p} ($inout0,$rndkey1)"; - eval"&aes${p} ($inout1,$rndkey1)"; - eval"&aes${p} ($inout2,$rndkey1)"; - eval"&aes${p} ($inout3,$rndkey1)"; - eval"&aes${p} ($inout4,$rndkey1)"; - eval"&aes${p} ($inout5,$rndkey1)"; - eval"&aes${p}last ($inout0,$rndkey0)"; - eval"&aes${p}last ($inout1,$rndkey0)"; - eval"&aes${p}last ($inout2,$rndkey0)"; - eval"&aes${p}last ($inout3,$rndkey0)"; - eval"&aes${p}last ($inout4,$rndkey0)"; - eval"&aes${p}last ($inout5,$rndkey0)"; - &ret(); - &function_end_B("_aesni_${p}rypt6"); -} -&aesni_generate3("enc") if ($PREFIX eq "aesni"); -&aesni_generate3("dec"); -&aesni_generate4("enc") if ($PREFIX eq "aesni"); -&aesni_generate4("dec"); -&aesni_generate6("enc") if ($PREFIX eq "aesni"); -&aesni_generate6("dec"); - -if ($PREFIX eq "aesni") { -###################################################################### -# void aesni_ecb_encrypt (const void *in, void *out, -# size_t length, const AES_KEY *key, -# int enc); -&function_begin("aesni_ecb_encrypt"); - &mov ($inp,&wparam(0)); - &mov ($out,&wparam(1)); - &mov ($len,&wparam(2)); - &mov ($key,&wparam(3)); - &mov ($rounds_,&wparam(4)); - &and ($len,-16); - &jz (&label("ecb_ret")); - &mov ($rounds,&DWP(240,$key)); - &test ($rounds_,$rounds_); - &jz (&label("ecb_decrypt")); - - &mov ($key_,$key); # backup $key - &mov ($rounds_,$rounds); # backup $rounds - &cmp ($len,0x60); - &jb (&label("ecb_enc_tail")); - - &movdqu ($inout0,&QWP(0,$inp)); - &movdqu ($inout1,&QWP(0x10,$inp)); - &movdqu ($inout2,&QWP(0x20,$inp)); - &movdqu ($inout3,&QWP(0x30,$inp)); - &movdqu ($inout4,&QWP(0x40,$inp)); - &movdqu ($inout5,&QWP(0x50,$inp)); - &lea ($inp,&DWP(0x60,$inp)); - &sub ($len,0x60); - &jmp (&label("ecb_enc_loop6_enter")); - -&set_label("ecb_enc_loop6",16); - &movups (&QWP(0,$out),$inout0); - &movdqu ($inout0,&QWP(0,$inp)); - &movups (&QWP(0x10,$out),$inout1); - &movdqu ($inout1,&QWP(0x10,$inp)); - &movups (&QWP(0x20,$out),$inout2); - &movdqu ($inout2,&QWP(0x20,$inp)); - &movups (&QWP(0x30,$out),$inout3); - &movdqu ($inout3,&QWP(0x30,$inp)); - &movups (&QWP(0x40,$out),$inout4); - &movdqu ($inout4,&QWP(0x40,$inp)); - &movups (&QWP(0x50,$out),$inout5); - &lea ($out,&DWP(0x60,$out)); - &movdqu ($inout5,&QWP(0x50,$inp)); - &lea ($inp,&DWP(0x60,$inp)); -&set_label("ecb_enc_loop6_enter"); - - &call ("_aesni_encrypt6"); - - &mov ($key,$key_); # restore $key - &mov ($rounds,$rounds_); # restore $rounds - &sub ($len,0x60); - &jnc (&label("ecb_enc_loop6")); - - &movups (&QWP(0,$out),$inout0); - &movups (&QWP(0x10,$out),$inout1); - &movups (&QWP(0x20,$out),$inout2); - &movups (&QWP(0x30,$out),$inout3); - &movups (&QWP(0x40,$out),$inout4); - &movups (&QWP(0x50,$out),$inout5); - &lea ($out,&DWP(0x60,$out)); - &add ($len,0x60); - &jz (&label("ecb_ret")); - -&set_label("ecb_enc_tail"); - &movups ($inout0,&QWP(0,$inp)); - &cmp ($len,0x20); - &jb (&label("ecb_enc_one")); - &movups ($inout1,&QWP(0x10,$inp)); - &je (&label("ecb_enc_two")); - &movups ($inout2,&QWP(0x20,$inp)); - &cmp ($len,0x40); - &jb (&label("ecb_enc_three")); - &movups ($inout3,&QWP(0x30,$inp)); - &je (&label("ecb_enc_four")); - &movups ($inout4,&QWP(0x40,$inp)); - &xorps ($inout5,$inout5); - &call ("_aesni_encrypt6"); - &movups (&QWP(0,$out),$inout0); - &movups (&QWP(0x10,$out),$inout1); - &movups (&QWP(0x20,$out),$inout2); - &movups (&QWP(0x30,$out),$inout3); - &movups (&QWP(0x40,$out),$inout4); - jmp (&label("ecb_ret")); - -&set_label("ecb_enc_one",16); - if ($inline) - { &aesni_inline_generate1("enc"); } - else - { &call ("_aesni_encrypt1"); } - &movups (&QWP(0,$out),$inout0); - &jmp (&label("ecb_ret")); - -&set_label("ecb_enc_two",16); - &xorps ($inout2,$inout2); - &call ("_aesni_encrypt3"); - &movups (&QWP(0,$out),$inout0); - &movups (&QWP(0x10,$out),$inout1); - &jmp (&label("ecb_ret")); - -&set_label("ecb_enc_three",16); - &call ("_aesni_encrypt3"); - &movups (&QWP(0,$out),$inout0); - &movups (&QWP(0x10,$out),$inout1); - &movups (&QWP(0x20,$out),$inout2); - &jmp (&label("ecb_ret")); - -&set_label("ecb_enc_four",16); - &call ("_aesni_encrypt4"); - &movups (&QWP(0,$out),$inout0); - &movups (&QWP(0x10,$out),$inout1); - &movups (&QWP(0x20,$out),$inout2); - &movups (&QWP(0x30,$out),$inout3); - &jmp (&label("ecb_ret")); -###################################################################### -&set_label("ecb_decrypt",16); - &mov ($key_,$key); # backup $key - &mov ($rounds_,$rounds); # backup $rounds - &cmp ($len,0x60); - &jb (&label("ecb_dec_tail")); - - &movdqu ($inout0,&QWP(0,$inp)); - &movdqu ($inout1,&QWP(0x10,$inp)); - &movdqu ($inout2,&QWP(0x20,$inp)); - &movdqu ($inout3,&QWP(0x30,$inp)); - &movdqu ($inout4,&QWP(0x40,$inp)); - &movdqu ($inout5,&QWP(0x50,$inp)); - &lea ($inp,&DWP(0x60,$inp)); - &sub ($len,0x60); - &jmp (&label("ecb_dec_loop6_enter")); - -&set_label("ecb_dec_loop6",16); - &movups (&QWP(0,$out),$inout0); - &movdqu ($inout0,&QWP(0,$inp)); - &movups (&QWP(0x10,$out),$inout1); - &movdqu ($inout1,&QWP(0x10,$inp)); - &movups (&QWP(0x20,$out),$inout2); - &movdqu ($inout2,&QWP(0x20,$inp)); - &movups (&QWP(0x30,$out),$inout3); - &movdqu ($inout3,&QWP(0x30,$inp)); - &movups (&QWP(0x40,$out),$inout4); - &movdqu ($inout4,&QWP(0x40,$inp)); - &movups (&QWP(0x50,$out),$inout5); - &lea ($out,&DWP(0x60,$out)); - &movdqu ($inout5,&QWP(0x50,$inp)); - &lea ($inp,&DWP(0x60,$inp)); -&set_label("ecb_dec_loop6_enter"); - - &call ("_aesni_decrypt6"); - - &mov ($key,$key_); # restore $key - &mov ($rounds,$rounds_); # restore $rounds - &sub ($len,0x60); - &jnc (&label("ecb_dec_loop6")); - - &movups (&QWP(0,$out),$inout0); - &movups (&QWP(0x10,$out),$inout1); - &movups (&QWP(0x20,$out),$inout2); - &movups (&QWP(0x30,$out),$inout3); - &movups (&QWP(0x40,$out),$inout4); - &movups (&QWP(0x50,$out),$inout5); - &lea ($out,&DWP(0x60,$out)); - &add ($len,0x60); - &jz (&label("ecb_ret")); - -&set_label("ecb_dec_tail"); - &movups ($inout0,&QWP(0,$inp)); - &cmp ($len,0x20); - &jb (&label("ecb_dec_one")); - &movups ($inout1,&QWP(0x10,$inp)); - &je (&label("ecb_dec_two")); - &movups ($inout2,&QWP(0x20,$inp)); - &cmp ($len,0x40); - &jb (&label("ecb_dec_three")); - &movups ($inout3,&QWP(0x30,$inp)); - &je (&label("ecb_dec_four")); - &movups ($inout4,&QWP(0x40,$inp)); - &xorps ($inout5,$inout5); - &call ("_aesni_decrypt6"); - &movups (&QWP(0,$out),$inout0); - &movups (&QWP(0x10,$out),$inout1); - &movups (&QWP(0x20,$out),$inout2); - &movups (&QWP(0x30,$out),$inout3); - &movups (&QWP(0x40,$out),$inout4); - &jmp (&label("ecb_ret")); - -&set_label("ecb_dec_one",16); - if ($inline) - { &aesni_inline_generate1("dec"); } - else - { &call ("_aesni_decrypt1"); } - &movups (&QWP(0,$out),$inout0); - &jmp (&label("ecb_ret")); - -&set_label("ecb_dec_two",16); - &xorps ($inout2,$inout2); - &call ("_aesni_decrypt3"); - &movups (&QWP(0,$out),$inout0); - &movups (&QWP(0x10,$out),$inout1); - &jmp (&label("ecb_ret")); - -&set_label("ecb_dec_three",16); - &call ("_aesni_decrypt3"); - &movups (&QWP(0,$out),$inout0); - &movups (&QWP(0x10,$out),$inout1); - &movups (&QWP(0x20,$out),$inout2); - &jmp (&label("ecb_ret")); - -&set_label("ecb_dec_four",16); - &call ("_aesni_decrypt4"); - &movups (&QWP(0,$out),$inout0); - &movups (&QWP(0x10,$out),$inout1); - &movups (&QWP(0x20,$out),$inout2); - &movups (&QWP(0x30,$out),$inout3); - -&set_label("ecb_ret"); -&function_end("aesni_ecb_encrypt"); - -###################################################################### -# void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out, -# size_t blocks, const AES_KEY *key, -# const char *ivec,char *cmac); -# -# Handles only complete blocks, operates on 64-bit counter and -# does not update *ivec! Nor does it finalize CMAC value -# (see engine/eng_aesni.c for details) -# -{ my $cmac=$inout1; -&function_begin("aesni_ccm64_encrypt_blocks"); - &mov ($inp,&wparam(0)); - &mov ($out,&wparam(1)); - &mov ($len,&wparam(2)); - &mov ($key,&wparam(3)); - &mov ($rounds_,&wparam(4)); - &mov ($rounds,&wparam(5)); - &mov ($key_,"esp"); - &sub ("esp",60); - &and ("esp",-16); # align stack - &mov (&DWP(48,"esp"),$key_); - - &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec - &movdqu ($cmac,&QWP(0,$rounds)); # load cmac - &mov ($rounds,&DWP(240,$key)); - - # compose byte-swap control mask for pshufb on stack - &mov (&DWP(0,"esp"),0x0c0d0e0f); - &mov (&DWP(4,"esp"),0x08090a0b); - &mov (&DWP(8,"esp"),0x04050607); - &mov (&DWP(12,"esp"),0x00010203); - - # compose counter increment vector on stack - &mov ($rounds_,1); - &xor ($key_,$key_); - &mov (&DWP(16,"esp"),$rounds_); - &mov (&DWP(20,"esp"),$key_); - &mov (&DWP(24,"esp"),$key_); - &mov (&DWP(28,"esp"),$key_); - - &shr ($rounds,1); - &lea ($key_,&DWP(0,$key)); - &movdqa ($inout3,&QWP(0,"esp")); - &movdqa ($inout0,$ivec); - &mov ($rounds_,$rounds); - &pshufb ($ivec,$inout3); - -&set_label("ccm64_enc_outer"); - &$movekey ($rndkey0,&QWP(0,$key_)); - &mov ($rounds,$rounds_); - &movups ($in0,&QWP(0,$inp)); - - &xorps ($inout0,$rndkey0); - &$movekey ($rndkey1,&QWP(16,$key_)); - &xorps ($rndkey0,$in0); - &lea ($key,&DWP(32,$key_)); - &xorps ($cmac,$rndkey0); # cmac^=inp - &$movekey ($rndkey0,&QWP(0,$key)); - -&set_label("ccm64_enc2_loop"); - &aesenc ($inout0,$rndkey1); - &dec ($rounds); - &aesenc ($cmac,$rndkey1); - &$movekey ($rndkey1,&QWP(16,$key)); - &aesenc ($inout0,$rndkey0); - &lea ($key,&DWP(32,$key)); - &aesenc ($cmac,$rndkey0); - &$movekey ($rndkey0,&QWP(0,$key)); - &jnz (&label("ccm64_enc2_loop")); - &aesenc ($inout0,$rndkey1); - &aesenc ($cmac,$rndkey1); - &paddq ($ivec,&QWP(16,"esp")); - &aesenclast ($inout0,$rndkey0); - &aesenclast ($cmac,$rndkey0); - - &dec ($len); - &lea ($inp,&DWP(16,$inp)); - &xorps ($in0,$inout0); # inp^=E(ivec) - &movdqa ($inout0,$ivec); - &movups (&QWP(0,$out),$in0); # save output - &lea ($out,&DWP(16,$out)); - &pshufb ($inout0,$inout3); - &jnz (&label("ccm64_enc_outer")); - - &mov ("esp",&DWP(48,"esp")); - &mov ($out,&wparam(5)); - &movups (&QWP(0,$out),$cmac); -&function_end("aesni_ccm64_encrypt_blocks"); - -&function_begin("aesni_ccm64_decrypt_blocks"); - &mov ($inp,&wparam(0)); - &mov ($out,&wparam(1)); - &mov ($len,&wparam(2)); - &mov ($key,&wparam(3)); - &mov ($rounds_,&wparam(4)); - &mov ($rounds,&wparam(5)); - &mov ($key_,"esp"); - &sub ("esp",60); - &and ("esp",-16); # align stack - &mov (&DWP(48,"esp"),$key_); - - &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec - &movdqu ($cmac,&QWP(0,$rounds)); # load cmac - &mov ($rounds,&DWP(240,$key)); - - # compose byte-swap control mask for pshufb on stack - &mov (&DWP(0,"esp"),0x0c0d0e0f); - &mov (&DWP(4,"esp"),0x08090a0b); - &mov (&DWP(8,"esp"),0x04050607); - &mov (&DWP(12,"esp"),0x00010203); - - # compose counter increment vector on stack - &mov ($rounds_,1); - &xor ($key_,$key_); - &mov (&DWP(16,"esp"),$rounds_); - &mov (&DWP(20,"esp"),$key_); - &mov (&DWP(24,"esp"),$key_); - &mov (&DWP(28,"esp"),$key_); - - &movdqa ($inout3,&QWP(0,"esp")); # bswap mask - &movdqa ($inout0,$ivec); - - &mov ($key_,$key); - &mov ($rounds_,$rounds); - - &pshufb ($ivec,$inout3); - if ($inline) - { &aesni_inline_generate1("enc"); } - else - { &call ("_aesni_encrypt1"); } - &movups ($in0,&QWP(0,$inp)); # load inp - &paddq ($ivec,&QWP(16,"esp")); - &lea ($inp,&QWP(16,$inp)); - &jmp (&label("ccm64_dec_outer")); - -&set_label("ccm64_dec_outer",16); - &xorps ($in0,$inout0); # inp ^= E(ivec) - &movdqa ($inout0,$ivec); - &mov ($rounds,$rounds_); - &movups (&QWP(0,$out),$in0); # save output - &lea ($out,&DWP(16,$out)); - &pshufb ($inout0,$inout3); - - &sub ($len,1); - &jz (&label("ccm64_dec_break")); - - &$movekey ($rndkey0,&QWP(0,$key_)); - &shr ($rounds,1); - &$movekey ($rndkey1,&QWP(16,$key_)); - &xorps ($in0,$rndkey0); - &lea ($key,&DWP(32,$key_)); - &xorps ($inout0,$rndkey0); - &xorps ($cmac,$in0); # cmac^=out - &$movekey ($rndkey0,&QWP(0,$key)); - -&set_label("ccm64_dec2_loop"); - &aesenc ($inout0,$rndkey1); - &dec ($rounds); - &aesenc ($cmac,$rndkey1); - &$movekey ($rndkey1,&QWP(16,$key)); - &aesenc ($inout0,$rndkey0); - &lea ($key,&DWP(32,$key)); - &aesenc ($cmac,$rndkey0); - &$movekey ($rndkey0,&QWP(0,$key)); - &jnz (&label("ccm64_dec2_loop")); - &movups ($in0,&QWP(0,$inp)); # load inp - &paddq ($ivec,&QWP(16,"esp")); - &aesenc ($inout0,$rndkey1); - &aesenc ($cmac,$rndkey1); - &lea ($inp,&QWP(16,$inp)); - &aesenclast ($inout0,$rndkey0); - &aesenclast ($cmac,$rndkey0); - &jmp (&label("ccm64_dec_outer")); - -&set_label("ccm64_dec_break",16); - &mov ($key,$key_); - if ($inline) - { &aesni_inline_generate1("enc",$cmac,$in0); } - else - { &call ("_aesni_encrypt1",$cmac); } - - &mov ("esp",&DWP(48,"esp")); - &mov ($out,&wparam(5)); - &movups (&QWP(0,$out),$cmac); -&function_end("aesni_ccm64_decrypt_blocks"); -} - -###################################################################### -# void aesni_ctr32_encrypt_blocks (const void *in, void *out, -# size_t blocks, const AES_KEY *key, -# const char *ivec); -# -# Handles only complete blocks, operates on 32-bit counter and -# does not update *ivec! (see engine/eng_aesni.c for details) -# -# stack layout: -# 0 pshufb mask -# 16 vector addend: 0,6,6,6 -# 32 counter-less ivec -# 48 1st triplet of counter vector -# 64 2nd triplet of counter vector -# 80 saved %esp - -&function_begin("aesni_ctr32_encrypt_blocks"); - &mov ($inp,&wparam(0)); - &mov ($out,&wparam(1)); - &mov ($len,&wparam(2)); - &mov ($key,&wparam(3)); - &mov ($rounds_,&wparam(4)); - &mov ($key_,"esp"); - &sub ("esp",88); - &and ("esp",-16); # align stack - &mov (&DWP(80,"esp"),$key_); - - &cmp ($len,1); - &je (&label("ctr32_one_shortcut")); - - &movdqu ($inout5,&QWP(0,$rounds_)); # load ivec - - # compose byte-swap control mask for pshufb on stack - &mov (&DWP(0,"esp"),0x0c0d0e0f); - &mov (&DWP(4,"esp"),0x08090a0b); - &mov (&DWP(8,"esp"),0x04050607); - &mov (&DWP(12,"esp"),0x00010203); - - # compose counter increment vector on stack - &mov ($rounds,6); - &xor ($key_,$key_); - &mov (&DWP(16,"esp"),$rounds); - &mov (&DWP(20,"esp"),$rounds); - &mov (&DWP(24,"esp"),$rounds); - &mov (&DWP(28,"esp"),$key_); - - &pextrd ($rounds_,$inout5,3); # pull 32-bit counter - &pinsrd ($inout5,$key_,3); # wipe 32-bit counter - - &mov ($rounds,&DWP(240,$key)); # key->rounds - - # compose 2 vectors of 3x32-bit counters - &bswap ($rounds_); - &pxor ($rndkey1,$rndkey1); - &pxor ($rndkey0,$rndkey0); - &movdqa ($inout0,&QWP(0,"esp")); # load byte-swap mask - &pinsrd ($rndkey1,$rounds_,0); - &lea ($key_,&DWP(3,$rounds_)); - &pinsrd ($rndkey0,$key_,0); - &inc ($rounds_); - &pinsrd ($rndkey1,$rounds_,1); - &inc ($key_); - &pinsrd ($rndkey0,$key_,1); - &inc ($rounds_); - &pinsrd ($rndkey1,$rounds_,2); - &inc ($key_); - &pinsrd ($rndkey0,$key_,2); - &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet - &pshufb ($rndkey1,$inout0); # byte swap - &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet - &pshufb ($rndkey0,$inout0); # byte swap - - &pshufd ($inout0,$rndkey1,3<<6); # place counter to upper dword - &pshufd ($inout1,$rndkey1,2<<6); - &cmp ($len,6); - &jb (&label("ctr32_tail")); - &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec - &shr ($rounds,1); - &mov ($key_,$key); # backup $key - &mov ($rounds_,$rounds); # backup $rounds - &sub ($len,6); - &jmp (&label("ctr32_loop6")); - -&set_label("ctr32_loop6",16); - &pshufd ($inout2,$rndkey1,1<<6); - &movdqa ($rndkey1,&QWP(32,"esp")); # pull counter-less ivec - &pshufd ($inout3,$rndkey0,3<<6); - &por ($inout0,$rndkey1); # merge counter-less ivec - &pshufd ($inout4,$rndkey0,2<<6); - &por ($inout1,$rndkey1); - &pshufd ($inout5,$rndkey0,1<<6); - &por ($inout2,$rndkey1); - &por ($inout3,$rndkey1); - &por ($inout4,$rndkey1); - &por ($inout5,$rndkey1); - - # inlining _aesni_encrypt6's prologue gives ~4% improvement... - &$movekey ($rndkey0,&QWP(0,$key_)); - &$movekey ($rndkey1,&QWP(16,$key_)); - &lea ($key,&DWP(32,$key_)); - &dec ($rounds); - &pxor ($inout0,$rndkey0); - &pxor ($inout1,$rndkey0); - &aesenc ($inout0,$rndkey1); - &pxor ($inout2,$rndkey0); - &aesenc ($inout1,$rndkey1); - &pxor ($inout3,$rndkey0); - &aesenc ($inout2,$rndkey1); - &pxor ($inout4,$rndkey0); - &aesenc ($inout3,$rndkey1); - &pxor ($inout5,$rndkey0); - &aesenc ($inout4,$rndkey1); - &$movekey ($rndkey0,&QWP(0,$key)); - &aesenc ($inout5,$rndkey1); - - &call (&label("_aesni_encrypt6_enter")); - - &movups ($rndkey1,&QWP(0,$inp)); - &movups ($rndkey0,&QWP(0x10,$inp)); - &xorps ($inout0,$rndkey1); - &movups ($rndkey1,&QWP(0x20,$inp)); - &xorps ($inout1,$rndkey0); - &movups (&QWP(0,$out),$inout0); - &movdqa ($rndkey0,&QWP(16,"esp")); # load increment - &xorps ($inout2,$rndkey1); - &movdqa ($rndkey1,&QWP(48,"esp")); # load 1st triplet - &movups (&QWP(0x10,$out),$inout1); - &movups (&QWP(0x20,$out),$inout2); - - &paddd ($rndkey1,$rndkey0); # 1st triplet increment - &paddd ($rndkey0,&QWP(64,"esp")); # 2nd triplet increment - &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask - - &movups ($inout1,&QWP(0x30,$inp)); - &movups ($inout2,&QWP(0x40,$inp)); - &xorps ($inout3,$inout1); - &movups ($inout1,&QWP(0x50,$inp)); - &lea ($inp,&DWP(0x60,$inp)); - &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet - &pshufb ($rndkey1,$inout0); # byte swap - &xorps ($inout4,$inout2); - &movups (&QWP(0x30,$out),$inout3); - &xorps ($inout5,$inout1); - &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet - &pshufb ($rndkey0,$inout0); # byte swap - &movups (&QWP(0x40,$out),$inout4); - &pshufd ($inout0,$rndkey1,3<<6); - &movups (&QWP(0x50,$out),$inout5); - &lea ($out,&DWP(0x60,$out)); - - &mov ($rounds,$rounds_); - &pshufd ($inout1,$rndkey1,2<<6); - &sub ($len,6); - &jnc (&label("ctr32_loop6")); - - &add ($len,6); - &jz (&label("ctr32_ret")); - &mov ($key,$key_); - &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds - &movdqa ($inout5,&QWP(32,"esp")); # pull count-less ivec - -&set_label("ctr32_tail"); - &por ($inout0,$inout5); - &cmp ($len,2); - &jb (&label("ctr32_one")); - - &pshufd ($inout2,$rndkey1,1<<6); - &por ($inout1,$inout5); - &je (&label("ctr32_two")); - - &pshufd ($inout3,$rndkey0,3<<6); - &por ($inout2,$inout5); - &cmp ($len,4); - &jb (&label("ctr32_three")); - - &pshufd ($inout4,$rndkey0,2<<6); - &por ($inout3,$inout5); - &je (&label("ctr32_four")); - - &por ($inout4,$inout5); - &call ("_aesni_encrypt6"); - &movups ($rndkey1,&QWP(0,$inp)); - &movups ($rndkey0,&QWP(0x10,$inp)); - &xorps ($inout0,$rndkey1); - &movups ($rndkey1,&QWP(0x20,$inp)); - &xorps ($inout1,$rndkey0); - &movups ($rndkey0,&QWP(0x30,$inp)); - &xorps ($inout2,$rndkey1); - &movups ($rndkey1,&QWP(0x40,$inp)); - &xorps ($inout3,$rndkey0); - &movups (&QWP(0,$out),$inout0); - &xorps ($inout4,$rndkey1); - &movups (&QWP(0x10,$out),$inout1); - &movups (&QWP(0x20,$out),$inout2); - &movups (&QWP(0x30,$out),$inout3); - &movups (&QWP(0x40,$out),$inout4); - &jmp (&label("ctr32_ret")); - -&set_label("ctr32_one_shortcut",16); - &movups ($inout0,&QWP(0,$rounds_)); # load ivec - &mov ($rounds,&DWP(240,$key)); - -&set_label("ctr32_one"); - if ($inline) - { &aesni_inline_generate1("enc"); } - else - { &call ("_aesni_encrypt1"); } - &movups ($in0,&QWP(0,$inp)); - &xorps ($in0,$inout0); - &movups (&QWP(0,$out),$in0); - &jmp (&label("ctr32_ret")); - -&set_label("ctr32_two",16); - &call ("_aesni_encrypt3"); - &movups ($inout3,&QWP(0,$inp)); - &movups ($inout4,&QWP(0x10,$inp)); - &xorps ($inout0,$inout3); - &xorps ($inout1,$inout4); - &movups (&QWP(0,$out),$inout0); - &movups (&QWP(0x10,$out),$inout1); - &jmp (&label("ctr32_ret")); - -&set_label("ctr32_three",16); - &call ("_aesni_encrypt3"); - &movups ($inout3,&QWP(0,$inp)); - &movups ($inout4,&QWP(0x10,$inp)); - &xorps ($inout0,$inout3); - &movups ($inout5,&QWP(0x20,$inp)); - &xorps ($inout1,$inout4); - &movups (&QWP(0,$out),$inout0); - &xorps ($inout2,$inout5); - &movups (&QWP(0x10,$out),$inout1); - &movups (&QWP(0x20,$out),$inout2); - &jmp (&label("ctr32_ret")); - -&set_label("ctr32_four",16); - &call ("_aesni_encrypt4"); - &movups ($inout4,&QWP(0,$inp)); - &movups ($inout5,&QWP(0x10,$inp)); - &movups ($rndkey1,&QWP(0x20,$inp)); - &xorps ($inout0,$inout4); - &movups ($rndkey0,&QWP(0x30,$inp)); - &xorps ($inout1,$inout5); - &movups (&QWP(0,$out),$inout0); - &xorps ($inout2,$rndkey1); - &movups (&QWP(0x10,$out),$inout1); - &xorps ($inout3,$rndkey0); - &movups (&QWP(0x20,$out),$inout2); - &movups (&QWP(0x30,$out),$inout3); - -&set_label("ctr32_ret"); - &mov ("esp",&DWP(80,"esp")); -&function_end("aesni_ctr32_encrypt_blocks"); - -###################################################################### -# void aesni_xts_[en|de]crypt(const char *inp,char *out,size_t len, -# const AES_KEY *key1, const AES_KEY *key2 -# const unsigned char iv[16]); -# -{ my ($tweak,$twtmp,$twres,$twmask)=($rndkey1,$rndkey0,$inout0,$inout1); - -&function_begin("aesni_xts_encrypt"); - &mov ($key,&wparam(4)); # key2 - &mov ($inp,&wparam(5)); # clear-text tweak - - &mov ($rounds,&DWP(240,$key)); # key2->rounds - &movups ($inout0,&QWP(0,$inp)); - if ($inline) - { &aesni_inline_generate1("enc"); } - else - { &call ("_aesni_encrypt1"); } - - &mov ($inp,&wparam(0)); - &mov ($out,&wparam(1)); - &mov ($len,&wparam(2)); - &mov ($key,&wparam(3)); # key1 - - &mov ($key_,"esp"); - &sub ("esp",16*7+8); - &mov ($rounds,&DWP(240,$key)); # key1->rounds - &and ("esp",-16); # align stack - - &mov (&DWP(16*6+0,"esp"),0x87); # compose the magic constant - &mov (&DWP(16*6+4,"esp"),0); - &mov (&DWP(16*6+8,"esp"),1); - &mov (&DWP(16*6+12,"esp"),0); - &mov (&DWP(16*7+0,"esp"),$len); # save original $len - &mov (&DWP(16*7+4,"esp"),$key_); # save original %esp - - &movdqa ($tweak,$inout0); - &pxor ($twtmp,$twtmp); - &movdqa ($twmask,&QWP(6*16,"esp")); # 0x0...010...87 - &pcmpgtd($twtmp,$tweak); # broadcast upper bits - - &and ($len,-16); - &mov ($key_,$key); # backup $key - &mov ($rounds_,$rounds); # backup $rounds - &sub ($len,16*6); - &jc (&label("xts_enc_short")); - - &shr ($rounds,1); - &mov ($rounds_,$rounds); - &jmp (&label("xts_enc_loop6")); - -&set_label("xts_enc_loop6",16); - for ($i=0;$i<4;$i++) { - &pshufd ($twres,$twtmp,0x13); - &pxor ($twtmp,$twtmp); - &movdqa (&QWP(16*$i,"esp"),$tweak); - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &pand ($twres,$twmask); # isolate carry and residue - &pcmpgtd ($twtmp,$tweak); # broadcast upper bits - &pxor ($tweak,$twres); - } - &pshufd ($inout5,$twtmp,0x13); - &movdqa (&QWP(16*$i++,"esp"),$tweak); - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &$movekey ($rndkey0,&QWP(0,$key_)); - &pand ($inout5,$twmask); # isolate carry and residue - &movups ($inout0,&QWP(0,$inp)); # load input - &pxor ($inout5,$tweak); - - # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] - &movdqu ($inout1,&QWP(16*1,$inp)); - &xorps ($inout0,$rndkey0); # input^=rndkey[0] - &movdqu ($inout2,&QWP(16*2,$inp)); - &pxor ($inout1,$rndkey0); - &movdqu ($inout3,&QWP(16*3,$inp)); - &pxor ($inout2,$rndkey0); - &movdqu ($inout4,&QWP(16*4,$inp)); - &pxor ($inout3,$rndkey0); - &movdqu ($rndkey1,&QWP(16*5,$inp)); - &pxor ($inout4,$rndkey0); - &lea ($inp,&DWP(16*6,$inp)); - &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak - &movdqa (&QWP(16*$i,"esp"),$inout5); # save last tweak - &pxor ($inout5,$rndkey1); - - &$movekey ($rndkey1,&QWP(16,$key_)); - &lea ($key,&DWP(32,$key_)); - &pxor ($inout1,&QWP(16*1,"esp")); - &aesenc ($inout0,$rndkey1); - &pxor ($inout2,&QWP(16*2,"esp")); - &aesenc ($inout1,$rndkey1); - &pxor ($inout3,&QWP(16*3,"esp")); - &dec ($rounds); - &aesenc ($inout2,$rndkey1); - &pxor ($inout4,&QWP(16*4,"esp")); - &aesenc ($inout3,$rndkey1); - &pxor ($inout5,$rndkey0); - &aesenc ($inout4,$rndkey1); - &$movekey ($rndkey0,&QWP(0,$key)); - &aesenc ($inout5,$rndkey1); - &call (&label("_aesni_encrypt6_enter")); - - &movdqa ($tweak,&QWP(16*5,"esp")); # last tweak - &pxor ($twtmp,$twtmp); - &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak - &pcmpgtd ($twtmp,$tweak); # broadcast upper bits - &xorps ($inout1,&QWP(16*1,"esp")); - &movups (&QWP(16*0,$out),$inout0); # write output - &xorps ($inout2,&QWP(16*2,"esp")); - &movups (&QWP(16*1,$out),$inout1); - &xorps ($inout3,&QWP(16*3,"esp")); - &movups (&QWP(16*2,$out),$inout2); - &xorps ($inout4,&QWP(16*4,"esp")); - &movups (&QWP(16*3,$out),$inout3); - &xorps ($inout5,$tweak); - &movups (&QWP(16*4,$out),$inout4); - &pshufd ($twres,$twtmp,0x13); - &movups (&QWP(16*5,$out),$inout5); - &lea ($out,&DWP(16*6,$out)); - &movdqa ($twmask,&QWP(16*6,"esp")); # 0x0...010...87 - - &pxor ($twtmp,$twtmp); - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &pand ($twres,$twmask); # isolate carry and residue - &pcmpgtd($twtmp,$tweak); # broadcast upper bits - &mov ($rounds,$rounds_); # restore $rounds - &pxor ($tweak,$twres); - - &sub ($len,16*6); - &jnc (&label("xts_enc_loop6")); - - &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds - &mov ($key,$key_); # restore $key - &mov ($rounds_,$rounds); - -&set_label("xts_enc_short"); - &add ($len,16*6); - &jz (&label("xts_enc_done6x")); - - &movdqa ($inout3,$tweak); # put aside previous tweak - &cmp ($len,0x20); - &jb (&label("xts_enc_one")); - - &pshufd ($twres,$twtmp,0x13); - &pxor ($twtmp,$twtmp); - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &pand ($twres,$twmask); # isolate carry and residue - &pcmpgtd($twtmp,$tweak); # broadcast upper bits - &pxor ($tweak,$twres); - &je (&label("xts_enc_two")); - - &pshufd ($twres,$twtmp,0x13); - &pxor ($twtmp,$twtmp); - &movdqa ($inout4,$tweak); # put aside previous tweak - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &pand ($twres,$twmask); # isolate carry and residue - &pcmpgtd($twtmp,$tweak); # broadcast upper bits - &pxor ($tweak,$twres); - &cmp ($len,0x40); - &jb (&label("xts_enc_three")); - - &pshufd ($twres,$twtmp,0x13); - &pxor ($twtmp,$twtmp); - &movdqa ($inout5,$tweak); # put aside previous tweak - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &pand ($twres,$twmask); # isolate carry and residue - &pcmpgtd($twtmp,$tweak); # broadcast upper bits - &pxor ($tweak,$twres); - &movdqa (&QWP(16*0,"esp"),$inout3); - &movdqa (&QWP(16*1,"esp"),$inout4); - &je (&label("xts_enc_four")); - - &movdqa (&QWP(16*2,"esp"),$inout5); - &pshufd ($inout5,$twtmp,0x13); - &movdqa (&QWP(16*3,"esp"),$tweak); - &paddq ($tweak,$tweak); # &psllq($inout0,1); - &pand ($inout5,$twmask); # isolate carry and residue - &pxor ($inout5,$tweak); - - &movdqu ($inout0,&QWP(16*0,$inp)); # load input - &movdqu ($inout1,&QWP(16*1,$inp)); - &movdqu ($inout2,&QWP(16*2,$inp)); - &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak - &movdqu ($inout3,&QWP(16*3,$inp)); - &pxor ($inout1,&QWP(16*1,"esp")); - &movdqu ($inout4,&QWP(16*4,$inp)); - &pxor ($inout2,&QWP(16*2,"esp")); - &lea ($inp,&DWP(16*5,$inp)); - &pxor ($inout3,&QWP(16*3,"esp")); - &movdqa (&QWP(16*4,"esp"),$inout5); # save last tweak - &pxor ($inout4,$inout5); - - &call ("_aesni_encrypt6"); - - &movaps ($tweak,&QWP(16*4,"esp")); # last tweak - &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak - &xorps ($inout1,&QWP(16*1,"esp")); - &xorps ($inout2,&QWP(16*2,"esp")); - &movups (&QWP(16*0,$out),$inout0); # write output - &xorps ($inout3,&QWP(16*3,"esp")); - &movups (&QWP(16*1,$out),$inout1); - &xorps ($inout4,$tweak); - &movups (&QWP(16*2,$out),$inout2); - &movups (&QWP(16*3,$out),$inout3); - &movups (&QWP(16*4,$out),$inout4); - &lea ($out,&DWP(16*5,$out)); - &jmp (&label("xts_enc_done")); - -&set_label("xts_enc_one",16); - &movups ($inout0,&QWP(16*0,$inp)); # load input - &lea ($inp,&DWP(16*1,$inp)); - &xorps ($inout0,$inout3); # input^=tweak - if ($inline) - { &aesni_inline_generate1("enc"); } - else - { &call ("_aesni_encrypt1"); } - &xorps ($inout0,$inout3); # output^=tweak - &movups (&QWP(16*0,$out),$inout0); # write output - &lea ($out,&DWP(16*1,$out)); - - &movdqa ($tweak,$inout3); # last tweak - &jmp (&label("xts_enc_done")); - -&set_label("xts_enc_two",16); - &movaps ($inout4,$tweak); # put aside last tweak - - &movups ($inout0,&QWP(16*0,$inp)); # load input - &movups ($inout1,&QWP(16*1,$inp)); - &lea ($inp,&DWP(16*2,$inp)); - &xorps ($inout0,$inout3); # input^=tweak - &xorps ($inout1,$inout4); - &xorps ($inout2,$inout2); - - &call ("_aesni_encrypt3"); - - &xorps ($inout0,$inout3); # output^=tweak - &xorps ($inout1,$inout4); - &movups (&QWP(16*0,$out),$inout0); # write output - &movups (&QWP(16*1,$out),$inout1); - &lea ($out,&DWP(16*2,$out)); - - &movdqa ($tweak,$inout4); # last tweak - &jmp (&label("xts_enc_done")); - -&set_label("xts_enc_three",16); - &movaps ($inout5,$tweak); # put aside last tweak - &movups ($inout0,&QWP(16*0,$inp)); # load input - &movups ($inout1,&QWP(16*1,$inp)); - &movups ($inout2,&QWP(16*2,$inp)); - &lea ($inp,&DWP(16*3,$inp)); - &xorps ($inout0,$inout3); # input^=tweak - &xorps ($inout1,$inout4); - &xorps ($inout2,$inout5); - - &call ("_aesni_encrypt3"); - - &xorps ($inout0,$inout3); # output^=tweak - &xorps ($inout1,$inout4); - &xorps ($inout2,$inout5); - &movups (&QWP(16*0,$out),$inout0); # write output - &movups (&QWP(16*1,$out),$inout1); - &movups (&QWP(16*2,$out),$inout2); - &lea ($out,&DWP(16*3,$out)); - - &movdqa ($tweak,$inout5); # last tweak - &jmp (&label("xts_enc_done")); - -&set_label("xts_enc_four",16); - &movaps ($inout4,$tweak); # put aside last tweak - - &movups ($inout0,&QWP(16*0,$inp)); # load input - &movups ($inout1,&QWP(16*1,$inp)); - &movups ($inout2,&QWP(16*2,$inp)); - &xorps ($inout0,&QWP(16*0,"esp")); # input^=tweak - &movups ($inout3,&QWP(16*3,$inp)); - &lea ($inp,&DWP(16*4,$inp)); - &xorps ($inout1,&QWP(16*1,"esp")); - &xorps ($inout2,$inout5); - &xorps ($inout3,$inout4); - - &call ("_aesni_encrypt4"); - - &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak - &xorps ($inout1,&QWP(16*1,"esp")); - &xorps ($inout2,$inout5); - &movups (&QWP(16*0,$out),$inout0); # write output - &xorps ($inout3,$inout4); - &movups (&QWP(16*1,$out),$inout1); - &movups (&QWP(16*2,$out),$inout2); - &movups (&QWP(16*3,$out),$inout3); - &lea ($out,&DWP(16*4,$out)); - - &movdqa ($tweak,$inout4); # last tweak - &jmp (&label("xts_enc_done")); - -&set_label("xts_enc_done6x",16); # $tweak is pre-calculated - &mov ($len,&DWP(16*7+0,"esp")); # restore original $len - &and ($len,15); - &jz (&label("xts_enc_ret")); - &movdqa ($inout3,$tweak); - &mov (&DWP(16*7+0,"esp"),$len); # save $len%16 - &jmp (&label("xts_enc_steal")); - -&set_label("xts_enc_done",16); - &mov ($len,&DWP(16*7+0,"esp")); # restore original $len - &pxor ($twtmp,$twtmp); - &and ($len,15); - &jz (&label("xts_enc_ret")); - - &pcmpgtd($twtmp,$tweak); # broadcast upper bits - &mov (&DWP(16*7+0,"esp"),$len); # save $len%16 - &pshufd ($inout3,$twtmp,0x13); - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &pand ($inout3,&QWP(16*6,"esp")); # isolate carry and residue - &pxor ($inout3,$tweak); - -&set_label("xts_enc_steal"); - &movz ($rounds,&BP(0,$inp)); - &movz ($key,&BP(-16,$out)); - &lea ($inp,&DWP(1,$inp)); - &mov (&BP(-16,$out),&LB($rounds)); - &mov (&BP(0,$out),&LB($key)); - &lea ($out,&DWP(1,$out)); - &sub ($len,1); - &jnz (&label("xts_enc_steal")); - - &sub ($out,&DWP(16*7+0,"esp")); # rewind $out - &mov ($key,$key_); # restore $key - &mov ($rounds,$rounds_); # restore $rounds - - &movups ($inout0,&QWP(-16,$out)); # load input - &xorps ($inout0,$inout3); # input^=tweak - if ($inline) - { &aesni_inline_generate1("enc"); } - else - { &call ("_aesni_encrypt1"); } - &xorps ($inout0,$inout3); # output^=tweak - &movups (&QWP(-16,$out),$inout0); # write output - -&set_label("xts_enc_ret"); - &mov ("esp",&DWP(16*7+4,"esp")); # restore %esp -&function_end("aesni_xts_encrypt"); - -&function_begin("aesni_xts_decrypt"); - &mov ($key,&wparam(4)); # key2 - &mov ($inp,&wparam(5)); # clear-text tweak - - &mov ($rounds,&DWP(240,$key)); # key2->rounds - &movups ($inout0,&QWP(0,$inp)); - if ($inline) - { &aesni_inline_generate1("enc"); } - else - { &call ("_aesni_encrypt1"); } - - &mov ($inp,&wparam(0)); - &mov ($out,&wparam(1)); - &mov ($len,&wparam(2)); - &mov ($key,&wparam(3)); # key1 - - &mov ($key_,"esp"); - &sub ("esp",16*7+8); - &and ("esp",-16); # align stack - - &xor ($rounds_,$rounds_); # if(len%16) len-=16; - &test ($len,15); - &setnz (&LB($rounds_)); - &shl ($rounds_,4); - &sub ($len,$rounds_); - - &mov (&DWP(16*6+0,"esp"),0x87); # compose the magic constant - &mov (&DWP(16*6+4,"esp"),0); - &mov (&DWP(16*6+8,"esp"),1); - &mov (&DWP(16*6+12,"esp"),0); - &mov (&DWP(16*7+0,"esp"),$len); # save original $len - &mov (&DWP(16*7+4,"esp"),$key_); # save original %esp - - &mov ($rounds,&DWP(240,$key)); # key1->rounds - &mov ($key_,$key); # backup $key - &mov ($rounds_,$rounds); # backup $rounds - - &movdqa ($tweak,$inout0); - &pxor ($twtmp,$twtmp); - &movdqa ($twmask,&QWP(6*16,"esp")); # 0x0...010...87 - &pcmpgtd($twtmp,$tweak); # broadcast upper bits - - &and ($len,-16); - &sub ($len,16*6); - &jc (&label("xts_dec_short")); - - &shr ($rounds,1); - &mov ($rounds_,$rounds); - &jmp (&label("xts_dec_loop6")); - -&set_label("xts_dec_loop6",16); - for ($i=0;$i<4;$i++) { - &pshufd ($twres,$twtmp,0x13); - &pxor ($twtmp,$twtmp); - &movdqa (&QWP(16*$i,"esp"),$tweak); - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &pand ($twres,$twmask); # isolate carry and residue - &pcmpgtd ($twtmp,$tweak); # broadcast upper bits - &pxor ($tweak,$twres); - } - &pshufd ($inout5,$twtmp,0x13); - &movdqa (&QWP(16*$i++,"esp"),$tweak); - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &$movekey ($rndkey0,&QWP(0,$key_)); - &pand ($inout5,$twmask); # isolate carry and residue - &movups ($inout0,&QWP(0,$inp)); # load input - &pxor ($inout5,$tweak); - - # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] - &movdqu ($inout1,&QWP(16*1,$inp)); - &xorps ($inout0,$rndkey0); # input^=rndkey[0] - &movdqu ($inout2,&QWP(16*2,$inp)); - &pxor ($inout1,$rndkey0); - &movdqu ($inout3,&QWP(16*3,$inp)); - &pxor ($inout2,$rndkey0); - &movdqu ($inout4,&QWP(16*4,$inp)); - &pxor ($inout3,$rndkey0); - &movdqu ($rndkey1,&QWP(16*5,$inp)); - &pxor ($inout4,$rndkey0); - &lea ($inp,&DWP(16*6,$inp)); - &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak - &movdqa (&QWP(16*$i,"esp"),$inout5); # save last tweak - &pxor ($inout5,$rndkey1); - - &$movekey ($rndkey1,&QWP(16,$key_)); - &lea ($key,&DWP(32,$key_)); - &pxor ($inout1,&QWP(16*1,"esp")); - &aesdec ($inout0,$rndkey1); - &pxor ($inout2,&QWP(16*2,"esp")); - &aesdec ($inout1,$rndkey1); - &pxor ($inout3,&QWP(16*3,"esp")); - &dec ($rounds); - &aesdec ($inout2,$rndkey1); - &pxor ($inout4,&QWP(16*4,"esp")); - &aesdec ($inout3,$rndkey1); - &pxor ($inout5,$rndkey0); - &aesdec ($inout4,$rndkey1); - &$movekey ($rndkey0,&QWP(0,$key)); - &aesdec ($inout5,$rndkey1); - &call (&label("_aesni_decrypt6_enter")); - - &movdqa ($tweak,&QWP(16*5,"esp")); # last tweak - &pxor ($twtmp,$twtmp); - &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak - &pcmpgtd ($twtmp,$tweak); # broadcast upper bits - &xorps ($inout1,&QWP(16*1,"esp")); - &movups (&QWP(16*0,$out),$inout0); # write output - &xorps ($inout2,&QWP(16*2,"esp")); - &movups (&QWP(16*1,$out),$inout1); - &xorps ($inout3,&QWP(16*3,"esp")); - &movups (&QWP(16*2,$out),$inout2); - &xorps ($inout4,&QWP(16*4,"esp")); - &movups (&QWP(16*3,$out),$inout3); - &xorps ($inout5,$tweak); - &movups (&QWP(16*4,$out),$inout4); - &pshufd ($twres,$twtmp,0x13); - &movups (&QWP(16*5,$out),$inout5); - &lea ($out,&DWP(16*6,$out)); - &movdqa ($twmask,&QWP(16*6,"esp")); # 0x0...010...87 - - &pxor ($twtmp,$twtmp); - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &pand ($twres,$twmask); # isolate carry and residue - &pcmpgtd($twtmp,$tweak); # broadcast upper bits - &mov ($rounds,$rounds_); # restore $rounds - &pxor ($tweak,$twres); - - &sub ($len,16*6); - &jnc (&label("xts_dec_loop6")); - - &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds - &mov ($key,$key_); # restore $key - &mov ($rounds_,$rounds); - -&set_label("xts_dec_short"); - &add ($len,16*6); - &jz (&label("xts_dec_done6x")); - - &movdqa ($inout3,$tweak); # put aside previous tweak - &cmp ($len,0x20); - &jb (&label("xts_dec_one")); - - &pshufd ($twres,$twtmp,0x13); - &pxor ($twtmp,$twtmp); - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &pand ($twres,$twmask); # isolate carry and residue - &pcmpgtd($twtmp,$tweak); # broadcast upper bits - &pxor ($tweak,$twres); - &je (&label("xts_dec_two")); - - &pshufd ($twres,$twtmp,0x13); - &pxor ($twtmp,$twtmp); - &movdqa ($inout4,$tweak); # put aside previous tweak - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &pand ($twres,$twmask); # isolate carry and residue - &pcmpgtd($twtmp,$tweak); # broadcast upper bits - &pxor ($tweak,$twres); - &cmp ($len,0x40); - &jb (&label("xts_dec_three")); - - &pshufd ($twres,$twtmp,0x13); - &pxor ($twtmp,$twtmp); - &movdqa ($inout5,$tweak); # put aside previous tweak - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &pand ($twres,$twmask); # isolate carry and residue - &pcmpgtd($twtmp,$tweak); # broadcast upper bits - &pxor ($tweak,$twres); - &movdqa (&QWP(16*0,"esp"),$inout3); - &movdqa (&QWP(16*1,"esp"),$inout4); - &je (&label("xts_dec_four")); - - &movdqa (&QWP(16*2,"esp"),$inout5); - &pshufd ($inout5,$twtmp,0x13); - &movdqa (&QWP(16*3,"esp"),$tweak); - &paddq ($tweak,$tweak); # &psllq($inout0,1); - &pand ($inout5,$twmask); # isolate carry and residue - &pxor ($inout5,$tweak); - - &movdqu ($inout0,&QWP(16*0,$inp)); # load input - &movdqu ($inout1,&QWP(16*1,$inp)); - &movdqu ($inout2,&QWP(16*2,$inp)); - &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak - &movdqu ($inout3,&QWP(16*3,$inp)); - &pxor ($inout1,&QWP(16*1,"esp")); - &movdqu ($inout4,&QWP(16*4,$inp)); - &pxor ($inout2,&QWP(16*2,"esp")); - &lea ($inp,&DWP(16*5,$inp)); - &pxor ($inout3,&QWP(16*3,"esp")); - &movdqa (&QWP(16*4,"esp"),$inout5); # save last tweak - &pxor ($inout4,$inout5); - - &call ("_aesni_decrypt6"); - - &movaps ($tweak,&QWP(16*4,"esp")); # last tweak - &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak - &xorps ($inout1,&QWP(16*1,"esp")); - &xorps ($inout2,&QWP(16*2,"esp")); - &movups (&QWP(16*0,$out),$inout0); # write output - &xorps ($inout3,&QWP(16*3,"esp")); - &movups (&QWP(16*1,$out),$inout1); - &xorps ($inout4,$tweak); - &movups (&QWP(16*2,$out),$inout2); - &movups (&QWP(16*3,$out),$inout3); - &movups (&QWP(16*4,$out),$inout4); - &lea ($out,&DWP(16*5,$out)); - &jmp (&label("xts_dec_done")); - -&set_label("xts_dec_one",16); - &movups ($inout0,&QWP(16*0,$inp)); # load input - &lea ($inp,&DWP(16*1,$inp)); - &xorps ($inout0,$inout3); # input^=tweak - if ($inline) - { &aesni_inline_generate1("dec"); } - else - { &call ("_aesni_decrypt1"); } - &xorps ($inout0,$inout3); # output^=tweak - &movups (&QWP(16*0,$out),$inout0); # write output - &lea ($out,&DWP(16*1,$out)); - - &movdqa ($tweak,$inout3); # last tweak - &jmp (&label("xts_dec_done")); - -&set_label("xts_dec_two",16); - &movaps ($inout4,$tweak); # put aside last tweak - - &movups ($inout0,&QWP(16*0,$inp)); # load input - &movups ($inout1,&QWP(16*1,$inp)); - &lea ($inp,&DWP(16*2,$inp)); - &xorps ($inout0,$inout3); # input^=tweak - &xorps ($inout1,$inout4); - - &call ("_aesni_decrypt3"); - - &xorps ($inout0,$inout3); # output^=tweak - &xorps ($inout1,$inout4); - &movups (&QWP(16*0,$out),$inout0); # write output - &movups (&QWP(16*1,$out),$inout1); - &lea ($out,&DWP(16*2,$out)); - - &movdqa ($tweak,$inout4); # last tweak - &jmp (&label("xts_dec_done")); - -&set_label("xts_dec_three",16); - &movaps ($inout5,$tweak); # put aside last tweak - &movups ($inout0,&QWP(16*0,$inp)); # load input - &movups ($inout1,&QWP(16*1,$inp)); - &movups ($inout2,&QWP(16*2,$inp)); - &lea ($inp,&DWP(16*3,$inp)); - &xorps ($inout0,$inout3); # input^=tweak - &xorps ($inout1,$inout4); - &xorps ($inout2,$inout5); - - &call ("_aesni_decrypt3"); - - &xorps ($inout0,$inout3); # output^=tweak - &xorps ($inout1,$inout4); - &xorps ($inout2,$inout5); - &movups (&QWP(16*0,$out),$inout0); # write output - &movups (&QWP(16*1,$out),$inout1); - &movups (&QWP(16*2,$out),$inout2); - &lea ($out,&DWP(16*3,$out)); - - &movdqa ($tweak,$inout5); # last tweak - &jmp (&label("xts_dec_done")); - -&set_label("xts_dec_four",16); - &movaps ($inout4,$tweak); # put aside last tweak - - &movups ($inout0,&QWP(16*0,$inp)); # load input - &movups ($inout1,&QWP(16*1,$inp)); - &movups ($inout2,&QWP(16*2,$inp)); - &xorps ($inout0,&QWP(16*0,"esp")); # input^=tweak - &movups ($inout3,&QWP(16*3,$inp)); - &lea ($inp,&DWP(16*4,$inp)); - &xorps ($inout1,&QWP(16*1,"esp")); - &xorps ($inout2,$inout5); - &xorps ($inout3,$inout4); - - &call ("_aesni_decrypt4"); - - &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak - &xorps ($inout1,&QWP(16*1,"esp")); - &xorps ($inout2,$inout5); - &movups (&QWP(16*0,$out),$inout0); # write output - &xorps ($inout3,$inout4); - &movups (&QWP(16*1,$out),$inout1); - &movups (&QWP(16*2,$out),$inout2); - &movups (&QWP(16*3,$out),$inout3); - &lea ($out,&DWP(16*4,$out)); - - &movdqa ($tweak,$inout4); # last tweak - &jmp (&label("xts_dec_done")); - -&set_label("xts_dec_done6x",16); # $tweak is pre-calculated - &mov ($len,&DWP(16*7+0,"esp")); # restore original $len - &and ($len,15); - &jz (&label("xts_dec_ret")); - &mov (&DWP(16*7+0,"esp"),$len); # save $len%16 - &jmp (&label("xts_dec_only_one_more")); - -&set_label("xts_dec_done",16); - &mov ($len,&DWP(16*7+0,"esp")); # restore original $len - &pxor ($twtmp,$twtmp); - &and ($len,15); - &jz (&label("xts_dec_ret")); - - &pcmpgtd($twtmp,$tweak); # broadcast upper bits - &mov (&DWP(16*7+0,"esp"),$len); # save $len%16 - &pshufd ($twres,$twtmp,0x13); - &pxor ($twtmp,$twtmp); - &movdqa ($twmask,&QWP(16*6,"esp")); - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &pand ($twres,$twmask); # isolate carry and residue - &pcmpgtd($twtmp,$tweak); # broadcast upper bits - &pxor ($tweak,$twres); - -&set_label("xts_dec_only_one_more"); - &pshufd ($inout3,$twtmp,0x13); - &movdqa ($inout4,$tweak); # put aside previous tweak - &paddq ($tweak,$tweak); # &psllq($tweak,1); - &pand ($inout3,$twmask); # isolate carry and residue - &pxor ($inout3,$tweak); - - &mov ($key,$key_); # restore $key - &mov ($rounds,$rounds_); # restore $rounds - - &movups ($inout0,&QWP(0,$inp)); # load input - &xorps ($inout0,$inout3); # input^=tweak - if ($inline) - { &aesni_inline_generate1("dec"); } - else - { &call ("_aesni_decrypt1"); } - &xorps ($inout0,$inout3); # output^=tweak - &movups (&QWP(0,$out),$inout0); # write output - -&set_label("xts_dec_steal"); - &movz ($rounds,&BP(16,$inp)); - &movz ($key,&BP(0,$out)); - &lea ($inp,&DWP(1,$inp)); - &mov (&BP(0,$out),&LB($rounds)); - &mov (&BP(16,$out),&LB($key)); - &lea ($out,&DWP(1,$out)); - &sub ($len,1); - &jnz (&label("xts_dec_steal")); - - &sub ($out,&DWP(16*7+0,"esp")); # rewind $out - &mov ($key,$key_); # restore $key - &mov ($rounds,$rounds_); # restore $rounds - - &movups ($inout0,&QWP(0,$out)); # load input - &xorps ($inout0,$inout4); # input^=tweak - if ($inline) - { &aesni_inline_generate1("dec"); } - else - { &call ("_aesni_decrypt1"); } - &xorps ($inout0,$inout4); # output^=tweak - &movups (&QWP(0,$out),$inout0); # write output - -&set_label("xts_dec_ret"); - &mov ("esp",&DWP(16*7+4,"esp")); # restore %esp -&function_end("aesni_xts_decrypt"); -} -} - -###################################################################### -# void $PREFIX_cbc_encrypt (const void *inp, void *out, -# size_t length, const AES_KEY *key, -# unsigned char *ivp,const int enc); -&function_begin("${PREFIX}_cbc_encrypt"); - &mov ($inp,&wparam(0)); - &mov ($rounds_,"esp"); - &mov ($out,&wparam(1)); - &sub ($rounds_,24); - &mov ($len,&wparam(2)); - &and ($rounds_,-16); - &mov ($key,&wparam(3)); - &mov ($key_,&wparam(4)); - &test ($len,$len); - &jz (&label("cbc_abort")); - - &cmp (&wparam(5),0); - &xchg ($rounds_,"esp"); # alloca - &movups ($ivec,&QWP(0,$key_)); # load IV - &mov ($rounds,&DWP(240,$key)); - &mov ($key_,$key); # backup $key - &mov (&DWP(16,"esp"),$rounds_); # save original %esp - &mov ($rounds_,$rounds); # backup $rounds - &je (&label("cbc_decrypt")); - - &movaps ($inout0,$ivec); - &cmp ($len,16); - &jb (&label("cbc_enc_tail")); - &sub ($len,16); - &jmp (&label("cbc_enc_loop")); - -&set_label("cbc_enc_loop",16); - &movups ($ivec,&QWP(0,$inp)); # input actually - &lea ($inp,&DWP(16,$inp)); - if ($inline) - { &aesni_inline_generate1("enc",$inout0,$ivec); } - else - { &xorps($inout0,$ivec); &call("_aesni_encrypt1"); } - &mov ($rounds,$rounds_); # restore $rounds - &mov ($key,$key_); # restore $key - &movups (&QWP(0,$out),$inout0); # store output - &lea ($out,&DWP(16,$out)); - &sub ($len,16); - &jnc (&label("cbc_enc_loop")); - &add ($len,16); - &jnz (&label("cbc_enc_tail")); - &movaps ($ivec,$inout0); - &jmp (&label("cbc_ret")); - -&set_label("cbc_enc_tail"); - &mov ("ecx",$len); # zaps $rounds - &data_word(0xA4F3F689); # rep movsb - &mov ("ecx",16); # zero tail - &sub ("ecx",$len); - &xor ("eax","eax"); # zaps $len - &data_word(0xAAF3F689); # rep stosb - &lea ($out,&DWP(-16,$out)); # rewind $out by 1 block - &mov ($rounds,$rounds_); # restore $rounds - &mov ($inp,$out); # $inp and $out are the same - &mov ($key,$key_); # restore $key - &jmp (&label("cbc_enc_loop")); -###################################################################### -&set_label("cbc_decrypt",16); - &cmp ($len,0x50); - &jbe (&label("cbc_dec_tail")); - &movaps (&QWP(0,"esp"),$ivec); # save IV - &sub ($len,0x50); - &jmp (&label("cbc_dec_loop6_enter")); - -&set_label("cbc_dec_loop6",16); - &movaps (&QWP(0,"esp"),$rndkey0); # save IV - &movups (&QWP(0,$out),$inout5); - &lea ($out,&DWP(0x10,$out)); -&set_label("cbc_dec_loop6_enter"); - &movdqu ($inout0,&QWP(0,$inp)); - &movdqu ($inout1,&QWP(0x10,$inp)); - &movdqu ($inout2,&QWP(0x20,$inp)); - &movdqu ($inout3,&QWP(0x30,$inp)); - &movdqu ($inout4,&QWP(0x40,$inp)); - &movdqu ($inout5,&QWP(0x50,$inp)); - - &call ("_aesni_decrypt6"); - - &movups ($rndkey1,&QWP(0,$inp)); - &movups ($rndkey0,&QWP(0x10,$inp)); - &xorps ($inout0,&QWP(0,"esp")); # ^=IV - &xorps ($inout1,$rndkey1); - &movups ($rndkey1,&QWP(0x20,$inp)); - &xorps ($inout2,$rndkey0); - &movups ($rndkey0,&QWP(0x30,$inp)); - &xorps ($inout3,$rndkey1); - &movups ($rndkey1,&QWP(0x40,$inp)); - &xorps ($inout4,$rndkey0); - &movups ($rndkey0,&QWP(0x50,$inp)); # IV - &xorps ($inout5,$rndkey1); - &movups (&QWP(0,$out),$inout0); - &movups (&QWP(0x10,$out),$inout1); - &lea ($inp,&DWP(0x60,$inp)); - &movups (&QWP(0x20,$out),$inout2); - &mov ($rounds,$rounds_) # restore $rounds - &movups (&QWP(0x30,$out),$inout3); - &mov ($key,$key_); # restore $key - &movups (&QWP(0x40,$out),$inout4); - &lea ($out,&DWP(0x50,$out)); - &sub ($len,0x60); - &ja (&label("cbc_dec_loop6")); - - &movaps ($inout0,$inout5); - &movaps ($ivec,$rndkey0); - &add ($len,0x50); - &jle (&label("cbc_dec_tail_collected")); - &movups (&QWP(0,$out),$inout0); - &lea ($out,&DWP(0x10,$out)); -&set_label("cbc_dec_tail"); - &movups ($inout0,&QWP(0,$inp)); - &movaps ($in0,$inout0); - &cmp ($len,0x10); - &jbe (&label("cbc_dec_one")); - - &movups ($inout1,&QWP(0x10,$inp)); - &movaps ($in1,$inout1); - &cmp ($len,0x20); - &jbe (&label("cbc_dec_two")); - - &movups ($inout2,&QWP(0x20,$inp)); - &cmp ($len,0x30); - &jbe (&label("cbc_dec_three")); - - &movups ($inout3,&QWP(0x30,$inp)); - &cmp ($len,0x40); - &jbe (&label("cbc_dec_four")); - - &movups ($inout4,&QWP(0x40,$inp)); - &movaps (&QWP(0,"esp"),$ivec); # save IV - &movups ($inout0,&QWP(0,$inp)); - &xorps ($inout5,$inout5); - &call ("_aesni_decrypt6"); - &movups ($rndkey1,&QWP(0,$inp)); - &movups ($rndkey0,&QWP(0x10,$inp)); - &xorps ($inout0,&QWP(0,"esp")); # ^= IV - &xorps ($inout1,$rndkey1); - &movups ($rndkey1,&QWP(0x20,$inp)); - &xorps ($inout2,$rndkey0); - &movups ($rndkey0,&QWP(0x30,$inp)); - &xorps ($inout3,$rndkey1); - &movups ($ivec,&QWP(0x40,$inp)); # IV - &xorps ($inout4,$rndkey0); - &movups (&QWP(0,$out),$inout0); - &movups (&QWP(0x10,$out),$inout1); - &movups (&QWP(0x20,$out),$inout2); - &movups (&QWP(0x30,$out),$inout3); - &lea ($out,&DWP(0x40,$out)); - &movaps ($inout0,$inout4); - &sub ($len,0x50); - &jmp (&label("cbc_dec_tail_collected")); - -&set_label("cbc_dec_one",16); - if ($inline) - { &aesni_inline_generate1("dec"); } - else - { &call ("_aesni_decrypt1"); } - &xorps ($inout0,$ivec); - &movaps ($ivec,$in0); - &sub ($len,0x10); - &jmp (&label("cbc_dec_tail_collected")); - -&set_label("cbc_dec_two",16); - &xorps ($inout2,$inout2); - &call ("_aesni_decrypt3"); - &xorps ($inout0,$ivec); - &xorps ($inout1,$in0); - &movups (&QWP(0,$out),$inout0); - &movaps ($inout0,$inout1); - &lea ($out,&DWP(0x10,$out)); - &movaps ($ivec,$in1); - &sub ($len,0x20); - &jmp (&label("cbc_dec_tail_collected")); - -&set_label("cbc_dec_three",16); - &call ("_aesni_decrypt3"); - &xorps ($inout0,$ivec); - &xorps ($inout1,$in0); - &xorps ($inout2,$in1); - &movups (&QWP(0,$out),$inout0); - &movaps ($inout0,$inout2); - &movups (&QWP(0x10,$out),$inout1); - &lea ($out,&DWP(0x20,$out)); - &movups ($ivec,&QWP(0x20,$inp)); - &sub ($len,0x30); - &jmp (&label("cbc_dec_tail_collected")); - -&set_label("cbc_dec_four",16); - &call ("_aesni_decrypt4"); - &movups ($rndkey1,&QWP(0x10,$inp)); - &movups ($rndkey0,&QWP(0x20,$inp)); - &xorps ($inout0,$ivec); - &movups ($ivec,&QWP(0x30,$inp)); - &xorps ($inout1,$in0); - &movups (&QWP(0,$out),$inout0); - &xorps ($inout2,$rndkey1); - &movups (&QWP(0x10,$out),$inout1); - &xorps ($inout3,$rndkey0); - &movups (&QWP(0x20,$out),$inout2); - &lea ($out,&DWP(0x30,$out)); - &movaps ($inout0,$inout3); - &sub ($len,0x40); - -&set_label("cbc_dec_tail_collected"); - &and ($len,15); - &jnz (&label("cbc_dec_tail_partial")); - &movups (&QWP(0,$out),$inout0); - &jmp (&label("cbc_ret")); - -&set_label("cbc_dec_tail_partial",16); - &movaps (&QWP(0,"esp"),$inout0); - &mov ("ecx",16); - &mov ($inp,"esp"); - &sub ("ecx",$len); - &data_word(0xA4F3F689); # rep movsb - -&set_label("cbc_ret"); - &mov ("esp",&DWP(16,"esp")); # pull original %esp - &mov ($key_,&wparam(4)); - &movups (&QWP(0,$key_),$ivec); # output IV -&set_label("cbc_abort"); -&function_end("${PREFIX}_cbc_encrypt"); - -###################################################################### -# Mechanical port from aesni-x86_64.pl. -# -# _aesni_set_encrypt_key is private interface, -# input: -# "eax" const unsigned char *userKey -# $rounds int bits -# $key AES_KEY *key -# output: -# "eax" return code -# $round rounds - -&function_begin_B("_aesni_set_encrypt_key"); - &test ("eax","eax"); - &jz (&label("bad_pointer")); - &test ($key,$key); - &jz (&label("bad_pointer")); - - &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey - &xorps ("xmm4","xmm4"); # low dword of xmm4 is assumed 0 - &lea ($key,&DWP(16,$key)); - &cmp ($rounds,256); - &je (&label("14rounds")); - &cmp ($rounds,192); - &je (&label("12rounds")); - &cmp ($rounds,128); - &jne (&label("bad_keybits")); - -&set_label("10rounds",16); - &mov ($rounds,9); - &$movekey (&QWP(-16,$key),"xmm0"); # round 0 - &aeskeygenassist("xmm1","xmm0",0x01); # round 1 - &call (&label("key_128_cold")); - &aeskeygenassist("xmm1","xmm0",0x2); # round 2 - &call (&label("key_128")); - &aeskeygenassist("xmm1","xmm0",0x04); # round 3 - &call (&label("key_128")); - &aeskeygenassist("xmm1","xmm0",0x08); # round 4 - &call (&label("key_128")); - &aeskeygenassist("xmm1","xmm0",0x10); # round 5 - &call (&label("key_128")); - &aeskeygenassist("xmm1","xmm0",0x20); # round 6 - &call (&label("key_128")); - &aeskeygenassist("xmm1","xmm0",0x40); # round 7 - &call (&label("key_128")); - &aeskeygenassist("xmm1","xmm0",0x80); # round 8 - &call (&label("key_128")); - &aeskeygenassist("xmm1","xmm0",0x1b); # round 9 - &call (&label("key_128")); - &aeskeygenassist("xmm1","xmm0",0x36); # round 10 - &call (&label("key_128")); - &$movekey (&QWP(0,$key),"xmm0"); - &mov (&DWP(80,$key),$rounds); - &xor ("eax","eax"); - &ret(); - -&set_label("key_128",16); - &$movekey (&QWP(0,$key),"xmm0"); - &lea ($key,&DWP(16,$key)); -&set_label("key_128_cold"); - &shufps ("xmm4","xmm0",0b00010000); - &xorps ("xmm0","xmm4"); - &shufps ("xmm4","xmm0",0b10001100); - &xorps ("xmm0","xmm4"); - &shufps ("xmm1","xmm1",0b11111111); # critical path - &xorps ("xmm0","xmm1"); - &ret(); - -&set_label("12rounds",16); - &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey - &mov ($rounds,11); - &$movekey (&QWP(-16,$key),"xmm0") # round 0 - &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2 - &call (&label("key_192a_cold")); - &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3 - &call (&label("key_192b")); - &aeskeygenassist("xmm1","xmm2",0x04); # round 4,5 - &call (&label("key_192a")); - &aeskeygenassist("xmm1","xmm2",0x08); # round 5,6 - &call (&label("key_192b")); - &aeskeygenassist("xmm1","xmm2",0x10); # round 7,8 - &call (&label("key_192a")); - &aeskeygenassist("xmm1","xmm2",0x20); # round 8,9 - &call (&label("key_192b")); - &aeskeygenassist("xmm1","xmm2",0x40); # round 10,11 - &call (&label("key_192a")); - &aeskeygenassist("xmm1","xmm2",0x80); # round 11,12 - &call (&label("key_192b")); - &$movekey (&QWP(0,$key),"xmm0"); - &mov (&DWP(48,$key),$rounds); - &xor ("eax","eax"); - &ret(); - -&set_label("key_192a",16); - &$movekey (&QWP(0,$key),"xmm0"); - &lea ($key,&DWP(16,$key)); -&set_label("key_192a_cold",16); - &movaps ("xmm5","xmm2"); -&set_label("key_192b_warm"); - &shufps ("xmm4","xmm0",0b00010000); - &movdqa ("xmm3","xmm2"); - &xorps ("xmm0","xmm4"); - &shufps ("xmm4","xmm0",0b10001100); - &pslldq ("xmm3",4); - &xorps ("xmm0","xmm4"); - &pshufd ("xmm1","xmm1",0b01010101); # critical path - &pxor ("xmm2","xmm3"); - &pxor ("xmm0","xmm1"); - &pshufd ("xmm3","xmm0",0b11111111); - &pxor ("xmm2","xmm3"); - &ret(); - -&set_label("key_192b",16); - &movaps ("xmm3","xmm0"); - &shufps ("xmm5","xmm0",0b01000100); - &$movekey (&QWP(0,$key),"xmm5"); - &shufps ("xmm3","xmm2",0b01001110); - &$movekey (&QWP(16,$key),"xmm3"); - &lea ($key,&DWP(32,$key)); - &jmp (&label("key_192b_warm")); - -&set_label("14rounds",16); - &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey - &mov ($rounds,13); - &lea ($key,&DWP(16,$key)); - &$movekey (&QWP(-32,$key),"xmm0"); # round 0 - &$movekey (&QWP(-16,$key),"xmm2"); # round 1 - &aeskeygenassist("xmm1","xmm2",0x01); # round 2 - &call (&label("key_256a_cold")); - &aeskeygenassist("xmm1","xmm0",0x01); # round 3 - &call (&label("key_256b")); - &aeskeygenassist("xmm1","xmm2",0x02); # round 4 - &call (&label("key_256a")); - &aeskeygenassist("xmm1","xmm0",0x02); # round 5 - &call (&label("key_256b")); - &aeskeygenassist("xmm1","xmm2",0x04); # round 6 - &call (&label("key_256a")); - &aeskeygenassist("xmm1","xmm0",0x04); # round 7 - &call (&label("key_256b")); - &aeskeygenassist("xmm1","xmm2",0x08); # round 8 - &call (&label("key_256a")); - &aeskeygenassist("xmm1","xmm0",0x08); # round 9 - &call (&label("key_256b")); - &aeskeygenassist("xmm1","xmm2",0x10); # round 10 - &call (&label("key_256a")); - &aeskeygenassist("xmm1","xmm0",0x10); # round 11 - &call (&label("key_256b")); - &aeskeygenassist("xmm1","xmm2",0x20); # round 12 - &call (&label("key_256a")); - &aeskeygenassist("xmm1","xmm0",0x20); # round 13 - &call (&label("key_256b")); - &aeskeygenassist("xmm1","xmm2",0x40); # round 14 - &call (&label("key_256a")); - &$movekey (&QWP(0,$key),"xmm0"); - &mov (&DWP(16,$key),$rounds); - &xor ("eax","eax"); - &ret(); - -&set_label("key_256a",16); - &$movekey (&QWP(0,$key),"xmm2"); - &lea ($key,&DWP(16,$key)); -&set_label("key_256a_cold"); - &shufps ("xmm4","xmm0",0b00010000); - &xorps ("xmm0","xmm4"); - &shufps ("xmm4","xmm0",0b10001100); - &xorps ("xmm0","xmm4"); - &shufps ("xmm1","xmm1",0b11111111); # critical path - &xorps ("xmm0","xmm1"); - &ret(); - -&set_label("key_256b",16); - &$movekey (&QWP(0,$key),"xmm0"); - &lea ($key,&DWP(16,$key)); - - &shufps ("xmm4","xmm2",0b00010000); - &xorps ("xmm2","xmm4"); - &shufps ("xmm4","xmm2",0b10001100); - &xorps ("xmm2","xmm4"); - &shufps ("xmm1","xmm1",0b10101010); # critical path - &xorps ("xmm2","xmm1"); - &ret(); - -&set_label("bad_pointer",4); - &mov ("eax",-1); - &ret (); -&set_label("bad_keybits",4); - &mov ("eax",-2); - &ret (); -&function_end_B("_aesni_set_encrypt_key"); - -# int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits, -# AES_KEY *key) -&function_begin_B("${PREFIX}_set_encrypt_key"); - &mov ("eax",&wparam(0)); - &mov ($rounds,&wparam(1)); - &mov ($key,&wparam(2)); - &call ("_aesni_set_encrypt_key"); - &ret (); -&function_end_B("${PREFIX}_set_encrypt_key"); - -# int $PREFIX_set_decrypt_key (const unsigned char *userKey, int bits, -# AES_KEY *key) -&function_begin_B("${PREFIX}_set_decrypt_key"); - &mov ("eax",&wparam(0)); - &mov ($rounds,&wparam(1)); - &mov ($key,&wparam(2)); - &call ("_aesni_set_encrypt_key"); - &mov ($key,&wparam(2)); - &shl ($rounds,4) # rounds-1 after _aesni_set_encrypt_key - &test ("eax","eax"); - &jnz (&label("dec_key_ret")); - &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule - - &$movekey ("xmm0",&QWP(0,$key)); # just swap - &$movekey ("xmm1",&QWP(0,"eax")); - &$movekey (&QWP(0,"eax"),"xmm0"); - &$movekey (&QWP(0,$key),"xmm1"); - &lea ($key,&DWP(16,$key)); - &lea ("eax",&DWP(-16,"eax")); - -&set_label("dec_key_inverse"); - &$movekey ("xmm0",&QWP(0,$key)); # swap and inverse - &$movekey ("xmm1",&QWP(0,"eax")); - &aesimc ("xmm0","xmm0"); - &aesimc ("xmm1","xmm1"); - &lea ($key,&DWP(16,$key)); - &lea ("eax",&DWP(-16,"eax")); - &$movekey (&QWP(16,"eax"),"xmm0"); - &$movekey (&QWP(-16,$key),"xmm1"); - &cmp ("eax",$key); - &ja (&label("dec_key_inverse")); - - &$movekey ("xmm0",&QWP(0,$key)); # inverse middle - &aesimc ("xmm0","xmm0"); - &$movekey (&QWP(0,$key),"xmm0"); - - &xor ("eax","eax"); # return success -&set_label("dec_key_ret"); - &ret (); -&function_end_B("${PREFIX}_set_decrypt_key"); -&asciz("AES for Intel AES-NI, CRYPTOGAMS by "); - -&asm_finish(); diff --git a/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86_64.pl b/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86_64.pl deleted file mode 100644 index c9270dfddc..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/aesni-x86_64.pl +++ /dev/null @@ -1,3071 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# This module implements support for Intel AES-NI extension. In -# OpenSSL context it's used with Intel engine, but can also be used as -# drop-in replacement for crypto/aes/asm/aes-x86_64.pl [see below for -# details]. -# -# Performance. -# -# Given aes(enc|dec) instructions' latency asymptotic performance for -# non-parallelizable modes such as CBC encrypt is 3.75 cycles per byte -# processed with 128-bit key. And given their throughput asymptotic -# performance for parallelizable modes is 1.25 cycles per byte. Being -# asymptotic limit it's not something you commonly achieve in reality, -# but how close does one get? Below are results collected for -# different modes and block sized. Pairs of numbers are for en-/ -# decryption. -# -# 16-byte 64-byte 256-byte 1-KB 8-KB -# ECB 4.25/4.25 1.38/1.38 1.28/1.28 1.26/1.26 1.26/1.26 -# CTR 5.42/5.42 1.92/1.92 1.44/1.44 1.28/1.28 1.26/1.26 -# CBC 4.38/4.43 4.15/1.43 4.07/1.32 4.07/1.29 4.06/1.28 -# CCM 5.66/9.42 4.42/5.41 4.16/4.40 4.09/4.15 4.06/4.07 -# OFB 5.42/5.42 4.64/4.64 4.44/4.44 4.39/4.39 4.38/4.38 -# CFB 5.73/5.85 5.56/5.62 5.48/5.56 5.47/5.55 5.47/5.55 -# -# ECB, CTR, CBC and CCM results are free from EVP overhead. This means -# that otherwise used 'openssl speed -evp aes-128-??? -engine aesni -# [-decrypt]' will exhibit 10-15% worse results for smaller blocks. -# The results were collected with specially crafted speed.c benchmark -# in order to compare them with results reported in "Intel Advanced -# Encryption Standard (AES) New Instruction Set" White Paper Revision -# 3.0 dated May 2010. All above results are consistently better. This -# module also provides better performance for block sizes smaller than -# 128 bytes in points *not* represented in the above table. -# -# Looking at the results for 8-KB buffer. -# -# CFB and OFB results are far from the limit, because implementation -# uses "generic" CRYPTO_[c|o]fb128_encrypt interfaces relying on -# single-block aesni_encrypt, which is not the most optimal way to go. -# CBC encrypt result is unexpectedly high and there is no documented -# explanation for it. Seemingly there is a small penalty for feeding -# the result back to AES unit the way it's done in CBC mode. There is -# nothing one can do and the result appears optimal. CCM result is -# identical to CBC, because CBC-MAC is essentially CBC encrypt without -# saving output. CCM CTR "stays invisible," because it's neatly -# interleaved wih CBC-MAC. This provides ~30% improvement over -# "straghtforward" CCM implementation with CTR and CBC-MAC performed -# disjointly. Parallelizable modes practically achieve the theoretical -# limit. -# -# Looking at how results vary with buffer size. -# -# Curves are practically saturated at 1-KB buffer size. In most cases -# "256-byte" performance is >95%, and "64-byte" is ~90% of "8-KB" one. -# CTR curve doesn't follow this pattern and is "slowest" changing one -# with "256-byte" result being 87% of "8-KB." This is because overhead -# in CTR mode is most computationally intensive. Small-block CCM -# decrypt is slower than encrypt, because first CTR and last CBC-MAC -# iterations can't be interleaved. -# -# Results for 192- and 256-bit keys. -# -# EVP-free results were observed to scale perfectly with number of -# rounds for larger block sizes, i.e. 192-bit result being 10/12 times -# lower and 256-bit one - 10/14. Well, in CBC encrypt case differences -# are a tad smaller, because the above mentioned penalty biases all -# results by same constant value. In similar way function call -# overhead affects small-block performance, as well as OFB and CFB -# results. Differences are not large, most common coefficients are -# 10/11.7 and 10/13.4 (as opposite to 10/12.0 and 10/14.0), but one -# observe even 10/11.2 and 10/12.4 (CTR, OFB, CFB)... - -# January 2011 -# -# While Westmere processor features 6 cycles latency for aes[enc|dec] -# instructions, which can be scheduled every second cycle, Sandy -# Bridge spends 8 cycles per instruction, but it can schedule them -# every cycle. This means that code targeting Westmere would perform -# suboptimally on Sandy Bridge. Therefore this update. -# -# In addition, non-parallelizable CBC encrypt (as well as CCM) is -# optimized. Relative improvement might appear modest, 8% on Westmere, -# but in absolute terms it's 3.77 cycles per byte encrypted with -# 128-bit key on Westmere, and 5.07 - on Sandy Bridge. These numbers -# should be compared to asymptotic limits of 3.75 for Westmere and -# 5.00 for Sandy Bridge. Actually, the fact that they get this close -# to asymptotic limits is quite amazing. Indeed, the limit is -# calculated as latency times number of rounds, 10 for 128-bit key, -# and divided by 16, the number of bytes in block, or in other words -# it accounts *solely* for aesenc instructions. But there are extra -# instructions, and numbers so close to the asymptotic limits mean -# that it's as if it takes as little as *one* additional cycle to -# execute all of them. How is it possible? It is possible thanks to -# out-of-order execution logic, which manages to overlap post- -# processing of previous block, things like saving the output, with -# actual encryption of current block, as well as pre-processing of -# current block, things like fetching input and xor-ing it with -# 0-round element of the key schedule, with actual encryption of -# previous block. Keep this in mind... -# -# For parallelizable modes, such as ECB, CBC decrypt, CTR, higher -# performance is achieved by interleaving instructions working on -# independent blocks. In which case asymptotic limit for such modes -# can be obtained by dividing above mentioned numbers by AES -# instructions' interleave factor. Westmere can execute at most 3 -# instructions at a time, meaning that optimal interleave factor is 3, -# and that's where the "magic" number of 1.25 come from. "Optimal -# interleave factor" means that increase of interleave factor does -# not improve performance. The formula has proven to reflect reality -# pretty well on Westmere... Sandy Bridge on the other hand can -# execute up to 8 AES instructions at a time, so how does varying -# interleave factor affect the performance? Here is table for ECB -# (numbers are cycles per byte processed with 128-bit key): -# -# instruction interleave factor 3x 6x 8x -# theoretical asymptotic limit 1.67 0.83 0.625 -# measured performance for 8KB block 1.05 0.86 0.84 -# -# "as if" interleave factor 4.7x 5.8x 6.0x -# -# Further data for other parallelizable modes: -# -# CBC decrypt 1.16 0.93 0.93 -# CTR 1.14 0.91 n/a -# -# Well, given 3x column it's probably inappropriate to call the limit -# asymptotic, if it can be surpassed, isn't it? What happens there? -# Rewind to CBC paragraph for the answer. Yes, out-of-order execution -# magic is responsible for this. Processor overlaps not only the -# additional instructions with AES ones, but even AES instuctions -# processing adjacent triplets of independent blocks. In the 6x case -# additional instructions still claim disproportionally small amount -# of additional cycles, but in 8x case number of instructions must be -# a tad too high for out-of-order logic to cope with, and AES unit -# remains underutilized... As you can see 8x interleave is hardly -# justifiable, so there no need to feel bad that 32-bit aesni-x86.pl -# utilizies 6x interleave because of limited register bank capacity. -# -# Higher interleave factors do have negative impact on Westmere -# performance. While for ECB mode it's negligible ~1.5%, other -# parallelizables perform ~5% worse, which is outweighed by ~25% -# improvement on Sandy Bridge. To balance regression on Westmere -# CTR mode was implemented with 6x aesenc interleave factor. - -# April 2011 -# -# Add aesni_xts_[en|de]crypt. Westmere spends 1.33 cycles processing -# one byte out of 8KB with 128-bit key, Sandy Bridge - 0.97. Just like -# in CTR mode AES instruction interleave factor was chosen to be 6x. - -$PREFIX="aesni"; # if $PREFIX is set to "AES", the script - # generates drop-in replacement for - # crypto/aes/asm/aes-x86_64.pl:-) - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -$movkey = $PREFIX eq "aesni" ? "movups" : "movups"; -@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order - ("%rdi","%rsi","%rdx","%rcx"); # Unix order - -$code=".text\n"; - -$rounds="%eax"; # input to and changed by aesni_[en|de]cryptN !!! -# this is natural Unix argument order for public $PREFIX_[ecb|cbc]_encrypt ... -$inp="%rdi"; -$out="%rsi"; -$len="%rdx"; -$key="%rcx"; # input to and changed by aesni_[en|de]cryptN !!! -$ivp="%r8"; # cbc, ctr, ... - -$rnds_="%r10d"; # backup copy for $rounds -$key_="%r11"; # backup copy for $key - -# %xmm register layout -$rndkey0="%xmm0"; $rndkey1="%xmm1"; -$inout0="%xmm2"; $inout1="%xmm3"; -$inout2="%xmm4"; $inout3="%xmm5"; -$inout4="%xmm6"; $inout5="%xmm7"; -$inout6="%xmm8"; $inout7="%xmm9"; - -$in2="%xmm6"; $in1="%xmm7"; # used in CBC decrypt, CTR, ... -$in0="%xmm8"; $iv="%xmm9"; - -# Inline version of internal aesni_[en|de]crypt1. -# -# Why folded loop? Because aes[enc|dec] is slow enough to accommodate -# cycles which take care of loop variables... -{ my $sn; -sub aesni_generate1 { -my ($p,$key,$rounds,$inout,$ivec)=@_; $inout=$inout0 if (!defined($inout)); -++$sn; -$code.=<<___; - $movkey ($key),$rndkey0 - $movkey 16($key),$rndkey1 -___ -$code.=<<___ if (defined($ivec)); - xorps $rndkey0,$ivec - lea 32($key),$key - xorps $ivec,$inout -___ -$code.=<<___ if (!defined($ivec)); - lea 32($key),$key - xorps $rndkey0,$inout -___ -$code.=<<___; -.Loop_${p}1_$sn: - aes${p} $rndkey1,$inout - dec $rounds - $movkey ($key),$rndkey1 - lea 16($key),$key - jnz .Loop_${p}1_$sn # loop body is 16 bytes - aes${p}last $rndkey1,$inout -___ -}} -# void $PREFIX_[en|de]crypt (const void *inp,void *out,const AES_KEY *key); -# -{ my ($inp,$out,$key) = @_4args; - -$code.=<<___; -.globl ${PREFIX}_encrypt -.type ${PREFIX}_encrypt,\@abi-omnipotent -.align 16 -${PREFIX}_encrypt: - movups ($inp),$inout0 # load input - mov 240($key),$rounds # key->rounds -___ - &aesni_generate1("enc",$key,$rounds); -$code.=<<___; - movups $inout0,($out) # output - ret -.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt - -.globl ${PREFIX}_decrypt -.type ${PREFIX}_decrypt,\@abi-omnipotent -.align 16 -${PREFIX}_decrypt: - movups ($inp),$inout0 # load input - mov 240($key),$rounds # key->rounds -___ - &aesni_generate1("dec",$key,$rounds); -$code.=<<___; - movups $inout0,($out) # output - ret -.size ${PREFIX}_decrypt, .-${PREFIX}_decrypt -___ -} - -# _aesni_[en|de]cryptN are private interfaces, N denotes interleave -# factor. Why 3x subroutine were originally used in loops? Even though -# aes[enc|dec] latency was originally 6, it could be scheduled only -# every *2nd* cycle. Thus 3x interleave was the one providing optimal -# utilization, i.e. when subroutine's throughput is virtually same as -# of non-interleaved subroutine [for number of input blocks up to 3]. -# This is why it makes no sense to implement 2x subroutine. -# aes[enc|dec] latency in next processor generation is 8, but the -# instructions can be scheduled every cycle. Optimal interleave for -# new processor is therefore 8x... -sub aesni_generate3 { -my $dir=shift; -# As already mentioned it takes in $key and $rounds, which are *not* -# preserved. $inout[0-2] is cipher/clear text... -$code.=<<___; -.type _aesni_${dir}rypt3,\@abi-omnipotent -.align 16 -_aesni_${dir}rypt3: - $movkey ($key),$rndkey0 - shr \$1,$rounds - $movkey 16($key),$rndkey1 - lea 32($key),$key - xorps $rndkey0,$inout0 - xorps $rndkey0,$inout1 - xorps $rndkey0,$inout2 - $movkey ($key),$rndkey0 - -.L${dir}_loop3: - aes${dir} $rndkey1,$inout0 - aes${dir} $rndkey1,$inout1 - dec $rounds - aes${dir} $rndkey1,$inout2 - $movkey 16($key),$rndkey1 - aes${dir} $rndkey0,$inout0 - aes${dir} $rndkey0,$inout1 - lea 32($key),$key - aes${dir} $rndkey0,$inout2 - $movkey ($key),$rndkey0 - jnz .L${dir}_loop3 - - aes${dir} $rndkey1,$inout0 - aes${dir} $rndkey1,$inout1 - aes${dir} $rndkey1,$inout2 - aes${dir}last $rndkey0,$inout0 - aes${dir}last $rndkey0,$inout1 - aes${dir}last $rndkey0,$inout2 - ret -.size _aesni_${dir}rypt3,.-_aesni_${dir}rypt3 -___ -} -# 4x interleave is implemented to improve small block performance, -# most notably [and naturally] 4 block by ~30%. One can argue that one -# should have implemented 5x as well, but improvement would be <20%, -# so it's not worth it... -sub aesni_generate4 { -my $dir=shift; -# As already mentioned it takes in $key and $rounds, which are *not* -# preserved. $inout[0-3] is cipher/clear text... -$code.=<<___; -.type _aesni_${dir}rypt4,\@abi-omnipotent -.align 16 -_aesni_${dir}rypt4: - $movkey ($key),$rndkey0 - shr \$1,$rounds - $movkey 16($key),$rndkey1 - lea 32($key),$key - xorps $rndkey0,$inout0 - xorps $rndkey0,$inout1 - xorps $rndkey0,$inout2 - xorps $rndkey0,$inout3 - $movkey ($key),$rndkey0 - -.L${dir}_loop4: - aes${dir} $rndkey1,$inout0 - aes${dir} $rndkey1,$inout1 - dec $rounds - aes${dir} $rndkey1,$inout2 - aes${dir} $rndkey1,$inout3 - $movkey 16($key),$rndkey1 - aes${dir} $rndkey0,$inout0 - aes${dir} $rndkey0,$inout1 - lea 32($key),$key - aes${dir} $rndkey0,$inout2 - aes${dir} $rndkey0,$inout3 - $movkey ($key),$rndkey0 - jnz .L${dir}_loop4 - - aes${dir} $rndkey1,$inout0 - aes${dir} $rndkey1,$inout1 - aes${dir} $rndkey1,$inout2 - aes${dir} $rndkey1,$inout3 - aes${dir}last $rndkey0,$inout0 - aes${dir}last $rndkey0,$inout1 - aes${dir}last $rndkey0,$inout2 - aes${dir}last $rndkey0,$inout3 - ret -.size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4 -___ -} -sub aesni_generate6 { -my $dir=shift; -# As already mentioned it takes in $key and $rounds, which are *not* -# preserved. $inout[0-5] is cipher/clear text... -$code.=<<___; -.type _aesni_${dir}rypt6,\@abi-omnipotent -.align 16 -_aesni_${dir}rypt6: - $movkey ($key),$rndkey0 - shr \$1,$rounds - $movkey 16($key),$rndkey1 - lea 32($key),$key - xorps $rndkey0,$inout0 - pxor $rndkey0,$inout1 - aes${dir} $rndkey1,$inout0 - pxor $rndkey0,$inout2 - aes${dir} $rndkey1,$inout1 - pxor $rndkey0,$inout3 - aes${dir} $rndkey1,$inout2 - pxor $rndkey0,$inout4 - aes${dir} $rndkey1,$inout3 - pxor $rndkey0,$inout5 - dec $rounds - aes${dir} $rndkey1,$inout4 - $movkey ($key),$rndkey0 - aes${dir} $rndkey1,$inout5 - jmp .L${dir}_loop6_enter -.align 16 -.L${dir}_loop6: - aes${dir} $rndkey1,$inout0 - aes${dir} $rndkey1,$inout1 - dec $rounds - aes${dir} $rndkey1,$inout2 - aes${dir} $rndkey1,$inout3 - aes${dir} $rndkey1,$inout4 - aes${dir} $rndkey1,$inout5 -.L${dir}_loop6_enter: # happens to be 16-byte aligned - $movkey 16($key),$rndkey1 - aes${dir} $rndkey0,$inout0 - aes${dir} $rndkey0,$inout1 - lea 32($key),$key - aes${dir} $rndkey0,$inout2 - aes${dir} $rndkey0,$inout3 - aes${dir} $rndkey0,$inout4 - aes${dir} $rndkey0,$inout5 - $movkey ($key),$rndkey0 - jnz .L${dir}_loop6 - - aes${dir} $rndkey1,$inout0 - aes${dir} $rndkey1,$inout1 - aes${dir} $rndkey1,$inout2 - aes${dir} $rndkey1,$inout3 - aes${dir} $rndkey1,$inout4 - aes${dir} $rndkey1,$inout5 - aes${dir}last $rndkey0,$inout0 - aes${dir}last $rndkey0,$inout1 - aes${dir}last $rndkey0,$inout2 - aes${dir}last $rndkey0,$inout3 - aes${dir}last $rndkey0,$inout4 - aes${dir}last $rndkey0,$inout5 - ret -.size _aesni_${dir}rypt6,.-_aesni_${dir}rypt6 -___ -} -sub aesni_generate8 { -my $dir=shift; -# As already mentioned it takes in $key and $rounds, which are *not* -# preserved. $inout[0-7] is cipher/clear text... -$code.=<<___; -.type _aesni_${dir}rypt8,\@abi-omnipotent -.align 16 -_aesni_${dir}rypt8: - $movkey ($key),$rndkey0 - shr \$1,$rounds - $movkey 16($key),$rndkey1 - lea 32($key),$key - xorps $rndkey0,$inout0 - xorps $rndkey0,$inout1 - aes${dir} $rndkey1,$inout0 - pxor $rndkey0,$inout2 - aes${dir} $rndkey1,$inout1 - pxor $rndkey0,$inout3 - aes${dir} $rndkey1,$inout2 - pxor $rndkey0,$inout4 - aes${dir} $rndkey1,$inout3 - pxor $rndkey0,$inout5 - dec $rounds - aes${dir} $rndkey1,$inout4 - pxor $rndkey0,$inout6 - aes${dir} $rndkey1,$inout5 - pxor $rndkey0,$inout7 - $movkey ($key),$rndkey0 - aes${dir} $rndkey1,$inout6 - aes${dir} $rndkey1,$inout7 - $movkey 16($key),$rndkey1 - jmp .L${dir}_loop8_enter -.align 16 -.L${dir}_loop8: - aes${dir} $rndkey1,$inout0 - aes${dir} $rndkey1,$inout1 - dec $rounds - aes${dir} $rndkey1,$inout2 - aes${dir} $rndkey1,$inout3 - aes${dir} $rndkey1,$inout4 - aes${dir} $rndkey1,$inout5 - aes${dir} $rndkey1,$inout6 - aes${dir} $rndkey1,$inout7 - $movkey 16($key),$rndkey1 -.L${dir}_loop8_enter: # happens to be 16-byte aligned - aes${dir} $rndkey0,$inout0 - aes${dir} $rndkey0,$inout1 - lea 32($key),$key - aes${dir} $rndkey0,$inout2 - aes${dir} $rndkey0,$inout3 - aes${dir} $rndkey0,$inout4 - aes${dir} $rndkey0,$inout5 - aes${dir} $rndkey0,$inout6 - aes${dir} $rndkey0,$inout7 - $movkey ($key),$rndkey0 - jnz .L${dir}_loop8 - - aes${dir} $rndkey1,$inout0 - aes${dir} $rndkey1,$inout1 - aes${dir} $rndkey1,$inout2 - aes${dir} $rndkey1,$inout3 - aes${dir} $rndkey1,$inout4 - aes${dir} $rndkey1,$inout5 - aes${dir} $rndkey1,$inout6 - aes${dir} $rndkey1,$inout7 - aes${dir}last $rndkey0,$inout0 - aes${dir}last $rndkey0,$inout1 - aes${dir}last $rndkey0,$inout2 - aes${dir}last $rndkey0,$inout3 - aes${dir}last $rndkey0,$inout4 - aes${dir}last $rndkey0,$inout5 - aes${dir}last $rndkey0,$inout6 - aes${dir}last $rndkey0,$inout7 - ret -.size _aesni_${dir}rypt8,.-_aesni_${dir}rypt8 -___ -} -&aesni_generate3("enc") if ($PREFIX eq "aesni"); -&aesni_generate3("dec"); -&aesni_generate4("enc") if ($PREFIX eq "aesni"); -&aesni_generate4("dec"); -&aesni_generate6("enc") if ($PREFIX eq "aesni"); -&aesni_generate6("dec"); -&aesni_generate8("enc") if ($PREFIX eq "aesni"); -&aesni_generate8("dec"); - -if ($PREFIX eq "aesni") { -######################################################################## -# void aesni_ecb_encrypt (const void *in, void *out, -# size_t length, const AES_KEY *key, -# int enc); -$code.=<<___; -.globl aesni_ecb_encrypt -.type aesni_ecb_encrypt,\@function,5 -.align 16 -aesni_ecb_encrypt: -___ -$code.=<<___ if ($win64); - lea -0x58(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) - movaps %xmm8,0x20(%rsp) - movaps %xmm9,0x30(%rsp) -.Lecb_enc_body: -___ -$code.=<<___; - and \$-16,$len - jz .Lecb_ret - - mov 240($key),$rounds # key->rounds - $movkey ($key),$rndkey0 - mov $key,$key_ # backup $key - mov $rounds,$rnds_ # backup $rounds - test %r8d,%r8d # 5th argument - jz .Lecb_decrypt -#--------------------------- ECB ENCRYPT ------------------------------# - cmp \$0x80,$len - jb .Lecb_enc_tail - - movdqu ($inp),$inout0 - movdqu 0x10($inp),$inout1 - movdqu 0x20($inp),$inout2 - movdqu 0x30($inp),$inout3 - movdqu 0x40($inp),$inout4 - movdqu 0x50($inp),$inout5 - movdqu 0x60($inp),$inout6 - movdqu 0x70($inp),$inout7 - lea 0x80($inp),$inp - sub \$0x80,$len - jmp .Lecb_enc_loop8_enter -.align 16 -.Lecb_enc_loop8: - movups $inout0,($out) - mov $key_,$key # restore $key - movdqu ($inp),$inout0 - mov $rnds_,$rounds # restore $rounds - movups $inout1,0x10($out) - movdqu 0x10($inp),$inout1 - movups $inout2,0x20($out) - movdqu 0x20($inp),$inout2 - movups $inout3,0x30($out) - movdqu 0x30($inp),$inout3 - movups $inout4,0x40($out) - movdqu 0x40($inp),$inout4 - movups $inout5,0x50($out) - movdqu 0x50($inp),$inout5 - movups $inout6,0x60($out) - movdqu 0x60($inp),$inout6 - movups $inout7,0x70($out) - lea 0x80($out),$out - movdqu 0x70($inp),$inout7 - lea 0x80($inp),$inp -.Lecb_enc_loop8_enter: - - call _aesni_encrypt8 - - sub \$0x80,$len - jnc .Lecb_enc_loop8 - - movups $inout0,($out) - mov $key_,$key # restore $key - movups $inout1,0x10($out) - mov $rnds_,$rounds # restore $rounds - movups $inout2,0x20($out) - movups $inout3,0x30($out) - movups $inout4,0x40($out) - movups $inout5,0x50($out) - movups $inout6,0x60($out) - movups $inout7,0x70($out) - lea 0x80($out),$out - add \$0x80,$len - jz .Lecb_ret - -.Lecb_enc_tail: - movups ($inp),$inout0 - cmp \$0x20,$len - jb .Lecb_enc_one - movups 0x10($inp),$inout1 - je .Lecb_enc_two - movups 0x20($inp),$inout2 - cmp \$0x40,$len - jb .Lecb_enc_three - movups 0x30($inp),$inout3 - je .Lecb_enc_four - movups 0x40($inp),$inout4 - cmp \$0x60,$len - jb .Lecb_enc_five - movups 0x50($inp),$inout5 - je .Lecb_enc_six - movdqu 0x60($inp),$inout6 - call _aesni_encrypt8 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - movups $inout4,0x40($out) - movups $inout5,0x50($out) - movups $inout6,0x60($out) - jmp .Lecb_ret -.align 16 -.Lecb_enc_one: -___ - &aesni_generate1("enc",$key,$rounds); -$code.=<<___; - movups $inout0,($out) - jmp .Lecb_ret -.align 16 -.Lecb_enc_two: - xorps $inout2,$inout2 - call _aesni_encrypt3 - movups $inout0,($out) - movups $inout1,0x10($out) - jmp .Lecb_ret -.align 16 -.Lecb_enc_three: - call _aesni_encrypt3 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - jmp .Lecb_ret -.align 16 -.Lecb_enc_four: - call _aesni_encrypt4 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - jmp .Lecb_ret -.align 16 -.Lecb_enc_five: - xorps $inout5,$inout5 - call _aesni_encrypt6 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - movups $inout4,0x40($out) - jmp .Lecb_ret -.align 16 -.Lecb_enc_six: - call _aesni_encrypt6 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - movups $inout4,0x40($out) - movups $inout5,0x50($out) - jmp .Lecb_ret - #--------------------------- ECB DECRYPT ------------------------------# -.align 16 -.Lecb_decrypt: - cmp \$0x80,$len - jb .Lecb_dec_tail - - movdqu ($inp),$inout0 - movdqu 0x10($inp),$inout1 - movdqu 0x20($inp),$inout2 - movdqu 0x30($inp),$inout3 - movdqu 0x40($inp),$inout4 - movdqu 0x50($inp),$inout5 - movdqu 0x60($inp),$inout6 - movdqu 0x70($inp),$inout7 - lea 0x80($inp),$inp - sub \$0x80,$len - jmp .Lecb_dec_loop8_enter -.align 16 -.Lecb_dec_loop8: - movups $inout0,($out) - mov $key_,$key # restore $key - movdqu ($inp),$inout0 - mov $rnds_,$rounds # restore $rounds - movups $inout1,0x10($out) - movdqu 0x10($inp),$inout1 - movups $inout2,0x20($out) - movdqu 0x20($inp),$inout2 - movups $inout3,0x30($out) - movdqu 0x30($inp),$inout3 - movups $inout4,0x40($out) - movdqu 0x40($inp),$inout4 - movups $inout5,0x50($out) - movdqu 0x50($inp),$inout5 - movups $inout6,0x60($out) - movdqu 0x60($inp),$inout6 - movups $inout7,0x70($out) - lea 0x80($out),$out - movdqu 0x70($inp),$inout7 - lea 0x80($inp),$inp -.Lecb_dec_loop8_enter: - - call _aesni_decrypt8 - - $movkey ($key_),$rndkey0 - sub \$0x80,$len - jnc .Lecb_dec_loop8 - - movups $inout0,($out) - mov $key_,$key # restore $key - movups $inout1,0x10($out) - mov $rnds_,$rounds # restore $rounds - movups $inout2,0x20($out) - movups $inout3,0x30($out) - movups $inout4,0x40($out) - movups $inout5,0x50($out) - movups $inout6,0x60($out) - movups $inout7,0x70($out) - lea 0x80($out),$out - add \$0x80,$len - jz .Lecb_ret - -.Lecb_dec_tail: - movups ($inp),$inout0 - cmp \$0x20,$len - jb .Lecb_dec_one - movups 0x10($inp),$inout1 - je .Lecb_dec_two - movups 0x20($inp),$inout2 - cmp \$0x40,$len - jb .Lecb_dec_three - movups 0x30($inp),$inout3 - je .Lecb_dec_four - movups 0x40($inp),$inout4 - cmp \$0x60,$len - jb .Lecb_dec_five - movups 0x50($inp),$inout5 - je .Lecb_dec_six - movups 0x60($inp),$inout6 - $movkey ($key),$rndkey0 - call _aesni_decrypt8 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - movups $inout4,0x40($out) - movups $inout5,0x50($out) - movups $inout6,0x60($out) - jmp .Lecb_ret -.align 16 -.Lecb_dec_one: -___ - &aesni_generate1("dec",$key,$rounds); -$code.=<<___; - movups $inout0,($out) - jmp .Lecb_ret -.align 16 -.Lecb_dec_two: - xorps $inout2,$inout2 - call _aesni_decrypt3 - movups $inout0,($out) - movups $inout1,0x10($out) - jmp .Lecb_ret -.align 16 -.Lecb_dec_three: - call _aesni_decrypt3 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - jmp .Lecb_ret -.align 16 -.Lecb_dec_four: - call _aesni_decrypt4 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - jmp .Lecb_ret -.align 16 -.Lecb_dec_five: - xorps $inout5,$inout5 - call _aesni_decrypt6 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - movups $inout4,0x40($out) - jmp .Lecb_ret -.align 16 -.Lecb_dec_six: - call _aesni_decrypt6 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - movups $inout4,0x40($out) - movups $inout5,0x50($out) - -.Lecb_ret: -___ -$code.=<<___ if ($win64); - movaps (%rsp),%xmm6 - movaps 0x10(%rsp),%xmm7 - movaps 0x20(%rsp),%xmm8 - movaps 0x30(%rsp),%xmm9 - lea 0x58(%rsp),%rsp -.Lecb_enc_ret: -___ -$code.=<<___; - ret -.size aesni_ecb_encrypt,.-aesni_ecb_encrypt -___ - -{ -###################################################################### -# void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out, -# size_t blocks, const AES_KEY *key, -# const char *ivec,char *cmac); -# -# Handles only complete blocks, operates on 64-bit counter and -# does not update *ivec! Nor does it finalize CMAC value -# (see engine/eng_aesni.c for details) -# -{ -my $cmac="%r9"; # 6th argument - -my $increment="%xmm6"; -my $bswap_mask="%xmm7"; - -$code.=<<___; -.globl aesni_ccm64_encrypt_blocks -.type aesni_ccm64_encrypt_blocks,\@function,6 -.align 16 -aesni_ccm64_encrypt_blocks: -___ -$code.=<<___ if ($win64); - lea -0x58(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) - movaps %xmm8,0x20(%rsp) - movaps %xmm9,0x30(%rsp) -.Lccm64_enc_body: -___ -$code.=<<___; - mov 240($key),$rounds # key->rounds - movdqu ($ivp),$iv - movdqa .Lincrement64(%rip),$increment - movdqa .Lbswap_mask(%rip),$bswap_mask - - shr \$1,$rounds - lea 0($key),$key_ - movdqu ($cmac),$inout1 - movdqa $iv,$inout0 - mov $rounds,$rnds_ - pshufb $bswap_mask,$iv - jmp .Lccm64_enc_outer -.align 16 -.Lccm64_enc_outer: - $movkey ($key_),$rndkey0 - mov $rnds_,$rounds - movups ($inp),$in0 # load inp - - xorps $rndkey0,$inout0 # counter - $movkey 16($key_),$rndkey1 - xorps $in0,$rndkey0 - lea 32($key_),$key - xorps $rndkey0,$inout1 # cmac^=inp - $movkey ($key),$rndkey0 - -.Lccm64_enc2_loop: - aesenc $rndkey1,$inout0 - dec $rounds - aesenc $rndkey1,$inout1 - $movkey 16($key),$rndkey1 - aesenc $rndkey0,$inout0 - lea 32($key),$key - aesenc $rndkey0,$inout1 - $movkey 0($key),$rndkey0 - jnz .Lccm64_enc2_loop - aesenc $rndkey1,$inout0 - aesenc $rndkey1,$inout1 - paddq $increment,$iv - aesenclast $rndkey0,$inout0 - aesenclast $rndkey0,$inout1 - - dec $len - lea 16($inp),$inp - xorps $inout0,$in0 # inp ^= E(iv) - movdqa $iv,$inout0 - movups $in0,($out) # save output - lea 16($out),$out - pshufb $bswap_mask,$inout0 - jnz .Lccm64_enc_outer - - movups $inout1,($cmac) -___ -$code.=<<___ if ($win64); - movaps (%rsp),%xmm6 - movaps 0x10(%rsp),%xmm7 - movaps 0x20(%rsp),%xmm8 - movaps 0x30(%rsp),%xmm9 - lea 0x58(%rsp),%rsp -.Lccm64_enc_ret: -___ -$code.=<<___; - ret -.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks -___ -###################################################################### -$code.=<<___; -.globl aesni_ccm64_decrypt_blocks -.type aesni_ccm64_decrypt_blocks,\@function,6 -.align 16 -aesni_ccm64_decrypt_blocks: -___ -$code.=<<___ if ($win64); - lea -0x58(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) - movaps %xmm8,0x20(%rsp) - movaps %xmm9,0x30(%rsp) -.Lccm64_dec_body: -___ -$code.=<<___; - mov 240($key),$rounds # key->rounds - movups ($ivp),$iv - movdqu ($cmac),$inout1 - movdqa .Lincrement64(%rip),$increment - movdqa .Lbswap_mask(%rip),$bswap_mask - - movaps $iv,$inout0 - mov $rounds,$rnds_ - mov $key,$key_ - pshufb $bswap_mask,$iv -___ - &aesni_generate1("enc",$key,$rounds); -$code.=<<___; - movups ($inp),$in0 # load inp - paddq $increment,$iv - lea 16($inp),$inp - jmp .Lccm64_dec_outer -.align 16 -.Lccm64_dec_outer: - xorps $inout0,$in0 # inp ^= E(iv) - movdqa $iv,$inout0 - mov $rnds_,$rounds - movups $in0,($out) # save output - lea 16($out),$out - pshufb $bswap_mask,$inout0 - - sub \$1,$len - jz .Lccm64_dec_break - - $movkey ($key_),$rndkey0 - shr \$1,$rounds - $movkey 16($key_),$rndkey1 - xorps $rndkey0,$in0 - lea 32($key_),$key - xorps $rndkey0,$inout0 - xorps $in0,$inout1 # cmac^=out - $movkey ($key),$rndkey0 - -.Lccm64_dec2_loop: - aesenc $rndkey1,$inout0 - dec $rounds - aesenc $rndkey1,$inout1 - $movkey 16($key),$rndkey1 - aesenc $rndkey0,$inout0 - lea 32($key),$key - aesenc $rndkey0,$inout1 - $movkey 0($key),$rndkey0 - jnz .Lccm64_dec2_loop - movups ($inp),$in0 # load inp - paddq $increment,$iv - aesenc $rndkey1,$inout0 - aesenc $rndkey1,$inout1 - lea 16($inp),$inp - aesenclast $rndkey0,$inout0 - aesenclast $rndkey0,$inout1 - jmp .Lccm64_dec_outer - -.align 16 -.Lccm64_dec_break: - #xorps $in0,$inout1 # cmac^=out -___ - &aesni_generate1("enc",$key_,$rounds,$inout1,$in0); -$code.=<<___; - movups $inout1,($cmac) -___ -$code.=<<___ if ($win64); - movaps (%rsp),%xmm6 - movaps 0x10(%rsp),%xmm7 - movaps 0x20(%rsp),%xmm8 - movaps 0x30(%rsp),%xmm9 - lea 0x58(%rsp),%rsp -.Lccm64_dec_ret: -___ -$code.=<<___; - ret -.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks -___ -} -###################################################################### -# void aesni_ctr32_encrypt_blocks (const void *in, void *out, -# size_t blocks, const AES_KEY *key, -# const char *ivec); -# -# Handles only complete blocks, operates on 32-bit counter and -# does not update *ivec! (see engine/eng_aesni.c for details) -# -{ -my $reserved = $win64?0:-0x28; -my ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11)); -my ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14"); -my $bswap_mask="%xmm15"; - -$code.=<<___; -.globl aesni_ctr32_encrypt_blocks -.type aesni_ctr32_encrypt_blocks,\@function,5 -.align 16 -aesni_ctr32_encrypt_blocks: -___ -$code.=<<___ if ($win64); - lea -0xc8(%rsp),%rsp - movaps %xmm6,0x20(%rsp) - movaps %xmm7,0x30(%rsp) - movaps %xmm8,0x40(%rsp) - movaps %xmm9,0x50(%rsp) - movaps %xmm10,0x60(%rsp) - movaps %xmm11,0x70(%rsp) - movaps %xmm12,0x80(%rsp) - movaps %xmm13,0x90(%rsp) - movaps %xmm14,0xa0(%rsp) - movaps %xmm15,0xb0(%rsp) -.Lctr32_body: -___ -$code.=<<___; - cmp \$1,$len - je .Lctr32_one_shortcut - - movdqu ($ivp),$ivec - movdqa .Lbswap_mask(%rip),$bswap_mask - xor $rounds,$rounds - pextrd \$3,$ivec,$rnds_ # pull 32-bit counter - pinsrd \$3,$rounds,$ivec # wipe 32-bit counter - - mov 240($key),$rounds # key->rounds - bswap $rnds_ - pxor $iv0,$iv0 # vector of 3 32-bit counters - pxor $iv1,$iv1 # vector of 3 32-bit counters - pinsrd \$0,$rnds_,$iv0 - lea 3($rnds_),$key_ - pinsrd \$0,$key_,$iv1 - inc $rnds_ - pinsrd \$1,$rnds_,$iv0 - inc $key_ - pinsrd \$1,$key_,$iv1 - inc $rnds_ - pinsrd \$2,$rnds_,$iv0 - inc $key_ - pinsrd \$2,$key_,$iv1 - movdqa $iv0,$reserved(%rsp) - pshufb $bswap_mask,$iv0 - movdqa $iv1,`$reserved+0x10`(%rsp) - pshufb $bswap_mask,$iv1 - - pshufd \$`3<<6`,$iv0,$inout0 # place counter to upper dword - pshufd \$`2<<6`,$iv0,$inout1 - pshufd \$`1<<6`,$iv0,$inout2 - cmp \$6,$len - jb .Lctr32_tail - shr \$1,$rounds - mov $key,$key_ # backup $key - mov $rounds,$rnds_ # backup $rounds - sub \$6,$len - jmp .Lctr32_loop6 - -.align 16 -.Lctr32_loop6: - pshufd \$`3<<6`,$iv1,$inout3 - por $ivec,$inout0 # merge counter-less ivec - $movkey ($key_),$rndkey0 - pshufd \$`2<<6`,$iv1,$inout4 - por $ivec,$inout1 - $movkey 16($key_),$rndkey1 - pshufd \$`1<<6`,$iv1,$inout5 - por $ivec,$inout2 - por $ivec,$inout3 - xorps $rndkey0,$inout0 - por $ivec,$inout4 - por $ivec,$inout5 - - # inline _aesni_encrypt6 and interleave last rounds - # with own code... - - pxor $rndkey0,$inout1 - aesenc $rndkey1,$inout0 - lea 32($key_),$key - pxor $rndkey0,$inout2 - aesenc $rndkey1,$inout1 - movdqa .Lincrement32(%rip),$iv1 - pxor $rndkey0,$inout3 - aesenc $rndkey1,$inout2 - movdqa $reserved(%rsp),$iv0 - pxor $rndkey0,$inout4 - aesenc $rndkey1,$inout3 - pxor $rndkey0,$inout5 - $movkey ($key),$rndkey0 - dec $rounds - aesenc $rndkey1,$inout4 - aesenc $rndkey1,$inout5 - jmp .Lctr32_enc_loop6_enter -.align 16 -.Lctr32_enc_loop6: - aesenc $rndkey1,$inout0 - aesenc $rndkey1,$inout1 - dec $rounds - aesenc $rndkey1,$inout2 - aesenc $rndkey1,$inout3 - aesenc $rndkey1,$inout4 - aesenc $rndkey1,$inout5 -.Lctr32_enc_loop6_enter: - $movkey 16($key),$rndkey1 - aesenc $rndkey0,$inout0 - aesenc $rndkey0,$inout1 - lea 32($key),$key - aesenc $rndkey0,$inout2 - aesenc $rndkey0,$inout3 - aesenc $rndkey0,$inout4 - aesenc $rndkey0,$inout5 - $movkey ($key),$rndkey0 - jnz .Lctr32_enc_loop6 - - aesenc $rndkey1,$inout0 - paddd $iv1,$iv0 # increment counter vector - aesenc $rndkey1,$inout1 - paddd `$reserved+0x10`(%rsp),$iv1 - aesenc $rndkey1,$inout2 - movdqa $iv0,$reserved(%rsp) # save counter vector - aesenc $rndkey1,$inout3 - movdqa $iv1,`$reserved+0x10`(%rsp) - aesenc $rndkey1,$inout4 - pshufb $bswap_mask,$iv0 # byte swap - aesenc $rndkey1,$inout5 - pshufb $bswap_mask,$iv1 - - aesenclast $rndkey0,$inout0 - movups ($inp),$in0 # load input - aesenclast $rndkey0,$inout1 - movups 0x10($inp),$in1 - aesenclast $rndkey0,$inout2 - movups 0x20($inp),$in2 - aesenclast $rndkey0,$inout3 - movups 0x30($inp),$in3 - aesenclast $rndkey0,$inout4 - movups 0x40($inp),$rndkey1 - aesenclast $rndkey0,$inout5 - movups 0x50($inp),$rndkey0 - lea 0x60($inp),$inp - - xorps $inout0,$in0 # xor - pshufd \$`3<<6`,$iv0,$inout0 - xorps $inout1,$in1 - pshufd \$`2<<6`,$iv0,$inout1 - movups $in0,($out) # store output - xorps $inout2,$in2 - pshufd \$`1<<6`,$iv0,$inout2 - movups $in1,0x10($out) - xorps $inout3,$in3 - movups $in2,0x20($out) - xorps $inout4,$rndkey1 - movups $in3,0x30($out) - xorps $inout5,$rndkey0 - movups $rndkey1,0x40($out) - movups $rndkey0,0x50($out) - lea 0x60($out),$out - mov $rnds_,$rounds - sub \$6,$len - jnc .Lctr32_loop6 - - add \$6,$len - jz .Lctr32_done - mov $key_,$key # restore $key - lea 1($rounds,$rounds),$rounds # restore original value - -.Lctr32_tail: - por $ivec,$inout0 - movups ($inp),$in0 - cmp \$2,$len - jb .Lctr32_one - - por $ivec,$inout1 - movups 0x10($inp),$in1 - je .Lctr32_two - - pshufd \$`3<<6`,$iv1,$inout3 - por $ivec,$inout2 - movups 0x20($inp),$in2 - cmp \$4,$len - jb .Lctr32_three - - pshufd \$`2<<6`,$iv1,$inout4 - por $ivec,$inout3 - movups 0x30($inp),$in3 - je .Lctr32_four - - por $ivec,$inout4 - xorps $inout5,$inout5 - - call _aesni_encrypt6 - - movups 0x40($inp),$rndkey1 - xorps $inout0,$in0 - xorps $inout1,$in1 - movups $in0,($out) - xorps $inout2,$in2 - movups $in1,0x10($out) - xorps $inout3,$in3 - movups $in2,0x20($out) - xorps $inout4,$rndkey1 - movups $in3,0x30($out) - movups $rndkey1,0x40($out) - jmp .Lctr32_done - -.align 16 -.Lctr32_one_shortcut: - movups ($ivp),$inout0 - movups ($inp),$in0 - mov 240($key),$rounds # key->rounds -.Lctr32_one: -___ - &aesni_generate1("enc",$key,$rounds); -$code.=<<___; - xorps $inout0,$in0 - movups $in0,($out) - jmp .Lctr32_done - -.align 16 -.Lctr32_two: - xorps $inout2,$inout2 - call _aesni_encrypt3 - xorps $inout0,$in0 - xorps $inout1,$in1 - movups $in0,($out) - movups $in1,0x10($out) - jmp .Lctr32_done - -.align 16 -.Lctr32_three: - call _aesni_encrypt3 - xorps $inout0,$in0 - xorps $inout1,$in1 - movups $in0,($out) - xorps $inout2,$in2 - movups $in1,0x10($out) - movups $in2,0x20($out) - jmp .Lctr32_done - -.align 16 -.Lctr32_four: - call _aesni_encrypt4 - xorps $inout0,$in0 - xorps $inout1,$in1 - movups $in0,($out) - xorps $inout2,$in2 - movups $in1,0x10($out) - xorps $inout3,$in3 - movups $in2,0x20($out) - movups $in3,0x30($out) - -.Lctr32_done: -___ -$code.=<<___ if ($win64); - movaps 0x20(%rsp),%xmm6 - movaps 0x30(%rsp),%xmm7 - movaps 0x40(%rsp),%xmm8 - movaps 0x50(%rsp),%xmm9 - movaps 0x60(%rsp),%xmm10 - movaps 0x70(%rsp),%xmm11 - movaps 0x80(%rsp),%xmm12 - movaps 0x90(%rsp),%xmm13 - movaps 0xa0(%rsp),%xmm14 - movaps 0xb0(%rsp),%xmm15 - lea 0xc8(%rsp),%rsp -.Lctr32_ret: -___ -$code.=<<___; - ret -.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks -___ -} - -###################################################################### -# void aesni_xts_[en|de]crypt(const char *inp,char *out,size_t len, -# const AES_KEY *key1, const AES_KEY *key2 -# const unsigned char iv[16]); -# -{ -my @tweak=map("%xmm$_",(10..15)); -my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]); -my ($key2,$ivp,$len_)=("%r8","%r9","%r9"); -my $frame_size = 0x68 + ($win64?160:0); - -$code.=<<___; -.globl aesni_xts_encrypt -.type aesni_xts_encrypt,\@function,6 -.align 16 -aesni_xts_encrypt: - lea -$frame_size(%rsp),%rsp -___ -$code.=<<___ if ($win64); - movaps %xmm6,0x60(%rsp) - movaps %xmm7,0x70(%rsp) - movaps %xmm8,0x80(%rsp) - movaps %xmm9,0x90(%rsp) - movaps %xmm10,0xa0(%rsp) - movaps %xmm11,0xb0(%rsp) - movaps %xmm12,0xc0(%rsp) - movaps %xmm13,0xd0(%rsp) - movaps %xmm14,0xe0(%rsp) - movaps %xmm15,0xf0(%rsp) -.Lxts_enc_body: -___ -$code.=<<___; - movups ($ivp),@tweak[5] # load clear-text tweak - mov 240(%r8),$rounds # key2->rounds - mov 240($key),$rnds_ # key1->rounds -___ - # generate the tweak - &aesni_generate1("enc",$key2,$rounds,@tweak[5]); -$code.=<<___; - mov $key,$key_ # backup $key - mov $rnds_,$rounds # backup $rounds - mov $len,$len_ # backup $len - and \$-16,$len - - movdqa .Lxts_magic(%rip),$twmask - pxor $twtmp,$twtmp - pcmpgtd @tweak[5],$twtmp # broadcast upper bits -___ - for ($i=0;$i<4;$i++) { - $code.=<<___; - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[$i] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - pand $twmask,$twres # isolate carry and residue - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - pxor $twres,@tweak[5] -___ - } -$code.=<<___; - sub \$16*6,$len - jc .Lxts_enc_short - - shr \$1,$rounds - sub \$1,$rounds - mov $rounds,$rnds_ - jmp .Lxts_enc_grandloop - -.align 16 -.Lxts_enc_grandloop: - pshufd \$0x13,$twtmp,$twres - movdqa @tweak[5],@tweak[4] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - movdqu `16*0`($inp),$inout0 # load input - pand $twmask,$twres # isolate carry and residue - movdqu `16*1`($inp),$inout1 - pxor $twres,@tweak[5] - - movdqu `16*2`($inp),$inout2 - pxor @tweak[0],$inout0 # input^=tweak - movdqu `16*3`($inp),$inout3 - pxor @tweak[1],$inout1 - movdqu `16*4`($inp),$inout4 - pxor @tweak[2],$inout2 - movdqu `16*5`($inp),$inout5 - lea `16*6`($inp),$inp - pxor @tweak[3],$inout3 - $movkey ($key_),$rndkey0 - pxor @tweak[4],$inout4 - pxor @tweak[5],$inout5 - - # inline _aesni_encrypt6 and interleave first and last rounds - # with own code... - $movkey 16($key_),$rndkey1 - pxor $rndkey0,$inout0 - pxor $rndkey0,$inout1 - movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks - aesenc $rndkey1,$inout0 - lea 32($key_),$key - pxor $rndkey0,$inout2 - movdqa @tweak[1],`16*1`(%rsp) - aesenc $rndkey1,$inout1 - pxor $rndkey0,$inout3 - movdqa @tweak[2],`16*2`(%rsp) - aesenc $rndkey1,$inout2 - pxor $rndkey0,$inout4 - movdqa @tweak[3],`16*3`(%rsp) - aesenc $rndkey1,$inout3 - pxor $rndkey0,$inout5 - $movkey ($key),$rndkey0 - dec $rounds - movdqa @tweak[4],`16*4`(%rsp) - aesenc $rndkey1,$inout4 - movdqa @tweak[5],`16*5`(%rsp) - aesenc $rndkey1,$inout5 - pxor $twtmp,$twtmp - pcmpgtd @tweak[5],$twtmp - jmp .Lxts_enc_loop6_enter - -.align 16 -.Lxts_enc_loop6: - aesenc $rndkey1,$inout0 - aesenc $rndkey1,$inout1 - dec $rounds - aesenc $rndkey1,$inout2 - aesenc $rndkey1,$inout3 - aesenc $rndkey1,$inout4 - aesenc $rndkey1,$inout5 -.Lxts_enc_loop6_enter: - $movkey 16($key),$rndkey1 - aesenc $rndkey0,$inout0 - aesenc $rndkey0,$inout1 - lea 32($key),$key - aesenc $rndkey0,$inout2 - aesenc $rndkey0,$inout3 - aesenc $rndkey0,$inout4 - aesenc $rndkey0,$inout5 - $movkey ($key),$rndkey0 - jnz .Lxts_enc_loop6 - - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - aesenc $rndkey1,$inout0 - pand $twmask,$twres # isolate carry and residue - aesenc $rndkey1,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcast upper bits - aesenc $rndkey1,$inout2 - pxor $twres,@tweak[5] - aesenc $rndkey1,$inout3 - aesenc $rndkey1,$inout4 - aesenc $rndkey1,$inout5 - $movkey 16($key),$rndkey1 - - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[0] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - aesenc $rndkey0,$inout0 - pand $twmask,$twres # isolate carry and residue - aesenc $rndkey0,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - aesenc $rndkey0,$inout2 - pxor $twres,@tweak[5] - aesenc $rndkey0,$inout3 - aesenc $rndkey0,$inout4 - aesenc $rndkey0,$inout5 - $movkey 32($key),$rndkey0 - - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[1] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - aesenc $rndkey1,$inout0 - pand $twmask,$twres # isolate carry and residue - aesenc $rndkey1,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - aesenc $rndkey1,$inout2 - pxor $twres,@tweak[5] - aesenc $rndkey1,$inout3 - aesenc $rndkey1,$inout4 - aesenc $rndkey1,$inout5 - - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[2] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - aesenclast $rndkey0,$inout0 - pand $twmask,$twres # isolate carry and residue - aesenclast $rndkey0,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - aesenclast $rndkey0,$inout2 - pxor $twres,@tweak[5] - aesenclast $rndkey0,$inout3 - aesenclast $rndkey0,$inout4 - aesenclast $rndkey0,$inout5 - - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[3] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - xorps `16*0`(%rsp),$inout0 # output^=tweak - pand $twmask,$twres # isolate carry and residue - xorps `16*1`(%rsp),$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - pxor $twres,@tweak[5] - - xorps `16*2`(%rsp),$inout2 - movups $inout0,`16*0`($out) # write output - xorps `16*3`(%rsp),$inout3 - movups $inout1,`16*1`($out) - xorps `16*4`(%rsp),$inout4 - movups $inout2,`16*2`($out) - xorps `16*5`(%rsp),$inout5 - movups $inout3,`16*3`($out) - mov $rnds_,$rounds # restore $rounds - movups $inout4,`16*4`($out) - movups $inout5,`16*5`($out) - lea `16*6`($out),$out - sub \$16*6,$len - jnc .Lxts_enc_grandloop - - lea 3($rounds,$rounds),$rounds # restore original value - mov $key_,$key # restore $key - mov $rounds,$rnds_ # backup $rounds - -.Lxts_enc_short: - add \$16*6,$len - jz .Lxts_enc_done - - cmp \$0x20,$len - jb .Lxts_enc_one - je .Lxts_enc_two - - cmp \$0x40,$len - jb .Lxts_enc_three - je .Lxts_enc_four - - pshufd \$0x13,$twtmp,$twres - movdqa @tweak[5],@tweak[4] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - movdqu ($inp),$inout0 - pand $twmask,$twres # isolate carry and residue - movdqu 16*1($inp),$inout1 - pxor $twres,@tweak[5] - - movdqu 16*2($inp),$inout2 - pxor @tweak[0],$inout0 - movdqu 16*3($inp),$inout3 - pxor @tweak[1],$inout1 - movdqu 16*4($inp),$inout4 - lea 16*5($inp),$inp - pxor @tweak[2],$inout2 - pxor @tweak[3],$inout3 - pxor @tweak[4],$inout4 - - call _aesni_encrypt6 - - xorps @tweak[0],$inout0 - movdqa @tweak[5],@tweak[0] - xorps @tweak[1],$inout1 - xorps @tweak[2],$inout2 - movdqu $inout0,($out) - xorps @tweak[3],$inout3 - movdqu $inout1,16*1($out) - xorps @tweak[4],$inout4 - movdqu $inout2,16*2($out) - movdqu $inout3,16*3($out) - movdqu $inout4,16*4($out) - lea 16*5($out),$out - jmp .Lxts_enc_done - -.align 16 -.Lxts_enc_one: - movups ($inp),$inout0 - lea 16*1($inp),$inp - xorps @tweak[0],$inout0 -___ - &aesni_generate1("enc",$key,$rounds); -$code.=<<___; - xorps @tweak[0],$inout0 - movdqa @tweak[1],@tweak[0] - movups $inout0,($out) - lea 16*1($out),$out - jmp .Lxts_enc_done - -.align 16 -.Lxts_enc_two: - movups ($inp),$inout0 - movups 16($inp),$inout1 - lea 32($inp),$inp - xorps @tweak[0],$inout0 - xorps @tweak[1],$inout1 - - call _aesni_encrypt3 - - xorps @tweak[0],$inout0 - movdqa @tweak[2],@tweak[0] - xorps @tweak[1],$inout1 - movups $inout0,($out) - movups $inout1,16*1($out) - lea 16*2($out),$out - jmp .Lxts_enc_done - -.align 16 -.Lxts_enc_three: - movups ($inp),$inout0 - movups 16*1($inp),$inout1 - movups 16*2($inp),$inout2 - lea 16*3($inp),$inp - xorps @tweak[0],$inout0 - xorps @tweak[1],$inout1 - xorps @tweak[2],$inout2 - - call _aesni_encrypt3 - - xorps @tweak[0],$inout0 - movdqa @tweak[3],@tweak[0] - xorps @tweak[1],$inout1 - xorps @tweak[2],$inout2 - movups $inout0,($out) - movups $inout1,16*1($out) - movups $inout2,16*2($out) - lea 16*3($out),$out - jmp .Lxts_enc_done - -.align 16 -.Lxts_enc_four: - movups ($inp),$inout0 - movups 16*1($inp),$inout1 - movups 16*2($inp),$inout2 - xorps @tweak[0],$inout0 - movups 16*3($inp),$inout3 - lea 16*4($inp),$inp - xorps @tweak[1],$inout1 - xorps @tweak[2],$inout2 - xorps @tweak[3],$inout3 - - call _aesni_encrypt4 - - xorps @tweak[0],$inout0 - movdqa @tweak[5],@tweak[0] - xorps @tweak[1],$inout1 - xorps @tweak[2],$inout2 - movups $inout0,($out) - xorps @tweak[3],$inout3 - movups $inout1,16*1($out) - movups $inout2,16*2($out) - movups $inout3,16*3($out) - lea 16*4($out),$out - jmp .Lxts_enc_done - -.align 16 -.Lxts_enc_done: - and \$15,$len_ - jz .Lxts_enc_ret - mov $len_,$len - -.Lxts_enc_steal: - movzb ($inp),%eax # borrow $rounds ... - movzb -16($out),%ecx # ... and $key - lea 1($inp),$inp - mov %al,-16($out) - mov %cl,0($out) - lea 1($out),$out - sub \$1,$len - jnz .Lxts_enc_steal - - sub $len_,$out # rewind $out - mov $key_,$key # restore $key - mov $rnds_,$rounds # restore $rounds - - movups -16($out),$inout0 - xorps @tweak[0],$inout0 -___ - &aesni_generate1("enc",$key,$rounds); -$code.=<<___; - xorps @tweak[0],$inout0 - movups $inout0,-16($out) - -.Lxts_enc_ret: -___ -$code.=<<___ if ($win64); - movaps 0x60(%rsp),%xmm6 - movaps 0x70(%rsp),%xmm7 - movaps 0x80(%rsp),%xmm8 - movaps 0x90(%rsp),%xmm9 - movaps 0xa0(%rsp),%xmm10 - movaps 0xb0(%rsp),%xmm11 - movaps 0xc0(%rsp),%xmm12 - movaps 0xd0(%rsp),%xmm13 - movaps 0xe0(%rsp),%xmm14 - movaps 0xf0(%rsp),%xmm15 -___ -$code.=<<___; - lea $frame_size(%rsp),%rsp -.Lxts_enc_epilogue: - ret -.size aesni_xts_encrypt,.-aesni_xts_encrypt -___ - -$code.=<<___; -.globl aesni_xts_decrypt -.type aesni_xts_decrypt,\@function,6 -.align 16 -aesni_xts_decrypt: - lea -$frame_size(%rsp),%rsp -___ -$code.=<<___ if ($win64); - movaps %xmm6,0x60(%rsp) - movaps %xmm7,0x70(%rsp) - movaps %xmm8,0x80(%rsp) - movaps %xmm9,0x90(%rsp) - movaps %xmm10,0xa0(%rsp) - movaps %xmm11,0xb0(%rsp) - movaps %xmm12,0xc0(%rsp) - movaps %xmm13,0xd0(%rsp) - movaps %xmm14,0xe0(%rsp) - movaps %xmm15,0xf0(%rsp) -.Lxts_dec_body: -___ -$code.=<<___; - movups ($ivp),@tweak[5] # load clear-text tweak - mov 240($key2),$rounds # key2->rounds - mov 240($key),$rnds_ # key1->rounds -___ - # generate the tweak - &aesni_generate1("enc",$key2,$rounds,@tweak[5]); -$code.=<<___; - xor %eax,%eax # if ($len%16) len-=16; - test \$15,$len - setnz %al - shl \$4,%rax - sub %rax,$len - - mov $key,$key_ # backup $key - mov $rnds_,$rounds # backup $rounds - mov $len,$len_ # backup $len - and \$-16,$len - - movdqa .Lxts_magic(%rip),$twmask - pxor $twtmp,$twtmp - pcmpgtd @tweak[5],$twtmp # broadcast upper bits -___ - for ($i=0;$i<4;$i++) { - $code.=<<___; - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[$i] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - pand $twmask,$twres # isolate carry and residue - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - pxor $twres,@tweak[5] -___ - } -$code.=<<___; - sub \$16*6,$len - jc .Lxts_dec_short - - shr \$1,$rounds - sub \$1,$rounds - mov $rounds,$rnds_ - jmp .Lxts_dec_grandloop - -.align 16 -.Lxts_dec_grandloop: - pshufd \$0x13,$twtmp,$twres - movdqa @tweak[5],@tweak[4] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - movdqu `16*0`($inp),$inout0 # load input - pand $twmask,$twres # isolate carry and residue - movdqu `16*1`($inp),$inout1 - pxor $twres,@tweak[5] - - movdqu `16*2`($inp),$inout2 - pxor @tweak[0],$inout0 # input^=tweak - movdqu `16*3`($inp),$inout3 - pxor @tweak[1],$inout1 - movdqu `16*4`($inp),$inout4 - pxor @tweak[2],$inout2 - movdqu `16*5`($inp),$inout5 - lea `16*6`($inp),$inp - pxor @tweak[3],$inout3 - $movkey ($key_),$rndkey0 - pxor @tweak[4],$inout4 - pxor @tweak[5],$inout5 - - # inline _aesni_decrypt6 and interleave first and last rounds - # with own code... - $movkey 16($key_),$rndkey1 - pxor $rndkey0,$inout0 - pxor $rndkey0,$inout1 - movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks - aesdec $rndkey1,$inout0 - lea 32($key_),$key - pxor $rndkey0,$inout2 - movdqa @tweak[1],`16*1`(%rsp) - aesdec $rndkey1,$inout1 - pxor $rndkey0,$inout3 - movdqa @tweak[2],`16*2`(%rsp) - aesdec $rndkey1,$inout2 - pxor $rndkey0,$inout4 - movdqa @tweak[3],`16*3`(%rsp) - aesdec $rndkey1,$inout3 - pxor $rndkey0,$inout5 - $movkey ($key),$rndkey0 - dec $rounds - movdqa @tweak[4],`16*4`(%rsp) - aesdec $rndkey1,$inout4 - movdqa @tweak[5],`16*5`(%rsp) - aesdec $rndkey1,$inout5 - pxor $twtmp,$twtmp - pcmpgtd @tweak[5],$twtmp - jmp .Lxts_dec_loop6_enter - -.align 16 -.Lxts_dec_loop6: - aesdec $rndkey1,$inout0 - aesdec $rndkey1,$inout1 - dec $rounds - aesdec $rndkey1,$inout2 - aesdec $rndkey1,$inout3 - aesdec $rndkey1,$inout4 - aesdec $rndkey1,$inout5 -.Lxts_dec_loop6_enter: - $movkey 16($key),$rndkey1 - aesdec $rndkey0,$inout0 - aesdec $rndkey0,$inout1 - lea 32($key),$key - aesdec $rndkey0,$inout2 - aesdec $rndkey0,$inout3 - aesdec $rndkey0,$inout4 - aesdec $rndkey0,$inout5 - $movkey ($key),$rndkey0 - jnz .Lxts_dec_loop6 - - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - aesdec $rndkey1,$inout0 - pand $twmask,$twres # isolate carry and residue - aesdec $rndkey1,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcast upper bits - aesdec $rndkey1,$inout2 - pxor $twres,@tweak[5] - aesdec $rndkey1,$inout3 - aesdec $rndkey1,$inout4 - aesdec $rndkey1,$inout5 - $movkey 16($key),$rndkey1 - - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[0] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - aesdec $rndkey0,$inout0 - pand $twmask,$twres # isolate carry and residue - aesdec $rndkey0,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - aesdec $rndkey0,$inout2 - pxor $twres,@tweak[5] - aesdec $rndkey0,$inout3 - aesdec $rndkey0,$inout4 - aesdec $rndkey0,$inout5 - $movkey 32($key),$rndkey0 - - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[1] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - aesdec $rndkey1,$inout0 - pand $twmask,$twres # isolate carry and residue - aesdec $rndkey1,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - aesdec $rndkey1,$inout2 - pxor $twres,@tweak[5] - aesdec $rndkey1,$inout3 - aesdec $rndkey1,$inout4 - aesdec $rndkey1,$inout5 - - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[2] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - aesdeclast $rndkey0,$inout0 - pand $twmask,$twres # isolate carry and residue - aesdeclast $rndkey0,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - aesdeclast $rndkey0,$inout2 - pxor $twres,@tweak[5] - aesdeclast $rndkey0,$inout3 - aesdeclast $rndkey0,$inout4 - aesdeclast $rndkey0,$inout5 - - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[3] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - xorps `16*0`(%rsp),$inout0 # output^=tweak - pand $twmask,$twres # isolate carry and residue - xorps `16*1`(%rsp),$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - pxor $twres,@tweak[5] - - xorps `16*2`(%rsp),$inout2 - movups $inout0,`16*0`($out) # write output - xorps `16*3`(%rsp),$inout3 - movups $inout1,`16*1`($out) - xorps `16*4`(%rsp),$inout4 - movups $inout2,`16*2`($out) - xorps `16*5`(%rsp),$inout5 - movups $inout3,`16*3`($out) - mov $rnds_,$rounds # restore $rounds - movups $inout4,`16*4`($out) - movups $inout5,`16*5`($out) - lea `16*6`($out),$out - sub \$16*6,$len - jnc .Lxts_dec_grandloop - - lea 3($rounds,$rounds),$rounds # restore original value - mov $key_,$key # restore $key - mov $rounds,$rnds_ # backup $rounds - -.Lxts_dec_short: - add \$16*6,$len - jz .Lxts_dec_done - - cmp \$0x20,$len - jb .Lxts_dec_one - je .Lxts_dec_two - - cmp \$0x40,$len - jb .Lxts_dec_three - je .Lxts_dec_four - - pshufd \$0x13,$twtmp,$twres - movdqa @tweak[5],@tweak[4] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - movdqu ($inp),$inout0 - pand $twmask,$twres # isolate carry and residue - movdqu 16*1($inp),$inout1 - pxor $twres,@tweak[5] - - movdqu 16*2($inp),$inout2 - pxor @tweak[0],$inout0 - movdqu 16*3($inp),$inout3 - pxor @tweak[1],$inout1 - movdqu 16*4($inp),$inout4 - lea 16*5($inp),$inp - pxor @tweak[2],$inout2 - pxor @tweak[3],$inout3 - pxor @tweak[4],$inout4 - - call _aesni_decrypt6 - - xorps @tweak[0],$inout0 - xorps @tweak[1],$inout1 - xorps @tweak[2],$inout2 - movdqu $inout0,($out) - xorps @tweak[3],$inout3 - movdqu $inout1,16*1($out) - xorps @tweak[4],$inout4 - movdqu $inout2,16*2($out) - pxor $twtmp,$twtmp - movdqu $inout3,16*3($out) - pcmpgtd @tweak[5],$twtmp - movdqu $inout4,16*4($out) - lea 16*5($out),$out - pshufd \$0x13,$twtmp,@tweak[1] # $twres - and \$15,$len_ - jz .Lxts_dec_ret - - movdqa @tweak[5],@tweak[0] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - pand $twmask,@tweak[1] # isolate carry and residue - pxor @tweak[5],@tweak[1] - jmp .Lxts_dec_done2 - -.align 16 -.Lxts_dec_one: - movups ($inp),$inout0 - lea 16*1($inp),$inp - xorps @tweak[0],$inout0 -___ - &aesni_generate1("dec",$key,$rounds); -$code.=<<___; - xorps @tweak[0],$inout0 - movdqa @tweak[1],@tweak[0] - movups $inout0,($out) - movdqa @tweak[2],@tweak[1] - lea 16*1($out),$out - jmp .Lxts_dec_done - -.align 16 -.Lxts_dec_two: - movups ($inp),$inout0 - movups 16($inp),$inout1 - lea 32($inp),$inp - xorps @tweak[0],$inout0 - xorps @tweak[1],$inout1 - - call _aesni_decrypt3 - - xorps @tweak[0],$inout0 - movdqa @tweak[2],@tweak[0] - xorps @tweak[1],$inout1 - movdqa @tweak[3],@tweak[1] - movups $inout0,($out) - movups $inout1,16*1($out) - lea 16*2($out),$out - jmp .Lxts_dec_done - -.align 16 -.Lxts_dec_three: - movups ($inp),$inout0 - movups 16*1($inp),$inout1 - movups 16*2($inp),$inout2 - lea 16*3($inp),$inp - xorps @tweak[0],$inout0 - xorps @tweak[1],$inout1 - xorps @tweak[2],$inout2 - - call _aesni_decrypt3 - - xorps @tweak[0],$inout0 - movdqa @tweak[3],@tweak[0] - xorps @tweak[1],$inout1 - movdqa @tweak[5],@tweak[1] - xorps @tweak[2],$inout2 - movups $inout0,($out) - movups $inout1,16*1($out) - movups $inout2,16*2($out) - lea 16*3($out),$out - jmp .Lxts_dec_done - -.align 16 -.Lxts_dec_four: - pshufd \$0x13,$twtmp,$twres - movdqa @tweak[5],@tweak[4] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - movups ($inp),$inout0 - pand $twmask,$twres # isolate carry and residue - movups 16*1($inp),$inout1 - pxor $twres,@tweak[5] - - movups 16*2($inp),$inout2 - xorps @tweak[0],$inout0 - movups 16*3($inp),$inout3 - lea 16*4($inp),$inp - xorps @tweak[1],$inout1 - xorps @tweak[2],$inout2 - xorps @tweak[3],$inout3 - - call _aesni_decrypt4 - - xorps @tweak[0],$inout0 - movdqa @tweak[4],@tweak[0] - xorps @tweak[1],$inout1 - movdqa @tweak[5],@tweak[1] - xorps @tweak[2],$inout2 - movups $inout0,($out) - xorps @tweak[3],$inout3 - movups $inout1,16*1($out) - movups $inout2,16*2($out) - movups $inout3,16*3($out) - lea 16*4($out),$out - jmp .Lxts_dec_done - -.align 16 -.Lxts_dec_done: - and \$15,$len_ - jz .Lxts_dec_ret -.Lxts_dec_done2: - mov $len_,$len - mov $key_,$key # restore $key - mov $rnds_,$rounds # restore $rounds - - movups ($inp),$inout0 - xorps @tweak[1],$inout0 -___ - &aesni_generate1("dec",$key,$rounds); -$code.=<<___; - xorps @tweak[1],$inout0 - movups $inout0,($out) - -.Lxts_dec_steal: - movzb 16($inp),%eax # borrow $rounds ... - movzb ($out),%ecx # ... and $key - lea 1($inp),$inp - mov %al,($out) - mov %cl,16($out) - lea 1($out),$out - sub \$1,$len - jnz .Lxts_dec_steal - - sub $len_,$out # rewind $out - mov $key_,$key # restore $key - mov $rnds_,$rounds # restore $rounds - - movups ($out),$inout0 - xorps @tweak[0],$inout0 -___ - &aesni_generate1("dec",$key,$rounds); -$code.=<<___; - xorps @tweak[0],$inout0 - movups $inout0,($out) - -.Lxts_dec_ret: -___ -$code.=<<___ if ($win64); - movaps 0x60(%rsp),%xmm6 - movaps 0x70(%rsp),%xmm7 - movaps 0x80(%rsp),%xmm8 - movaps 0x90(%rsp),%xmm9 - movaps 0xa0(%rsp),%xmm10 - movaps 0xb0(%rsp),%xmm11 - movaps 0xc0(%rsp),%xmm12 - movaps 0xd0(%rsp),%xmm13 - movaps 0xe0(%rsp),%xmm14 - movaps 0xf0(%rsp),%xmm15 -___ -$code.=<<___; - lea $frame_size(%rsp),%rsp -.Lxts_dec_epilogue: - ret -.size aesni_xts_decrypt,.-aesni_xts_decrypt -___ -} }} - -######################################################################## -# void $PREFIX_cbc_encrypt (const void *inp, void *out, -# size_t length, const AES_KEY *key, -# unsigned char *ivp,const int enc); -{ -my $reserved = $win64?0x40:-0x18; # used in decrypt -$code.=<<___; -.globl ${PREFIX}_cbc_encrypt -.type ${PREFIX}_cbc_encrypt,\@function,6 -.align 16 -${PREFIX}_cbc_encrypt: - test $len,$len # check length - jz .Lcbc_ret - - mov 240($key),$rnds_ # key->rounds - mov $key,$key_ # backup $key - test %r9d,%r9d # 6th argument - jz .Lcbc_decrypt -#--------------------------- CBC ENCRYPT ------------------------------# - movups ($ivp),$inout0 # load iv as initial state - mov $rnds_,$rounds - cmp \$16,$len - jb .Lcbc_enc_tail - sub \$16,$len - jmp .Lcbc_enc_loop -.align 16 -.Lcbc_enc_loop: - movups ($inp),$inout1 # load input - lea 16($inp),$inp - #xorps $inout1,$inout0 -___ - &aesni_generate1("enc",$key,$rounds,$inout0,$inout1); -$code.=<<___; - mov $rnds_,$rounds # restore $rounds - mov $key_,$key # restore $key - movups $inout0,0($out) # store output - lea 16($out),$out - sub \$16,$len - jnc .Lcbc_enc_loop - add \$16,$len - jnz .Lcbc_enc_tail - movups $inout0,($ivp) - jmp .Lcbc_ret - -.Lcbc_enc_tail: - mov $len,%rcx # zaps $key - xchg $inp,$out # $inp is %rsi and $out is %rdi now - .long 0x9066A4F3 # rep movsb - mov \$16,%ecx # zero tail - sub $len,%rcx - xor %eax,%eax - .long 0x9066AAF3 # rep stosb - lea -16(%rdi),%rdi # rewind $out by 1 block - mov $rnds_,$rounds # restore $rounds - mov %rdi,%rsi # $inp and $out are the same - mov $key_,$key # restore $key - xor $len,$len # len=16 - jmp .Lcbc_enc_loop # one more spin - #--------------------------- CBC DECRYPT ------------------------------# -.align 16 -.Lcbc_decrypt: -___ -$code.=<<___ if ($win64); - lea -0x58(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) - movaps %xmm8,0x20(%rsp) - movaps %xmm9,0x30(%rsp) -.Lcbc_decrypt_body: -___ -$code.=<<___; - movups ($ivp),$iv - mov $rnds_,$rounds - cmp \$0x70,$len - jbe .Lcbc_dec_tail - shr \$1,$rnds_ - sub \$0x70,$len - mov $rnds_,$rounds - movaps $iv,$reserved(%rsp) - jmp .Lcbc_dec_loop8_enter -.align 16 -.Lcbc_dec_loop8: - movaps $rndkey0,$reserved(%rsp) # save IV - movups $inout7,($out) - lea 0x10($out),$out -.Lcbc_dec_loop8_enter: - $movkey ($key),$rndkey0 - movups ($inp),$inout0 # load input - movups 0x10($inp),$inout1 - $movkey 16($key),$rndkey1 - - lea 32($key),$key - movdqu 0x20($inp),$inout2 - xorps $rndkey0,$inout0 - movdqu 0x30($inp),$inout3 - xorps $rndkey0,$inout1 - movdqu 0x40($inp),$inout4 - aesdec $rndkey1,$inout0 - pxor $rndkey0,$inout2 - movdqu 0x50($inp),$inout5 - aesdec $rndkey1,$inout1 - pxor $rndkey0,$inout3 - movdqu 0x60($inp),$inout6 - aesdec $rndkey1,$inout2 - pxor $rndkey0,$inout4 - movdqu 0x70($inp),$inout7 - aesdec $rndkey1,$inout3 - pxor $rndkey0,$inout5 - dec $rounds - aesdec $rndkey1,$inout4 - pxor $rndkey0,$inout6 - aesdec $rndkey1,$inout5 - pxor $rndkey0,$inout7 - $movkey ($key),$rndkey0 - aesdec $rndkey1,$inout6 - aesdec $rndkey1,$inout7 - $movkey 16($key),$rndkey1 - - call .Ldec_loop8_enter - - movups ($inp),$rndkey1 # re-load input - movups 0x10($inp),$rndkey0 - xorps $reserved(%rsp),$inout0 # ^= IV - xorps $rndkey1,$inout1 - movups 0x20($inp),$rndkey1 - xorps $rndkey0,$inout2 - movups 0x30($inp),$rndkey0 - xorps $rndkey1,$inout3 - movups 0x40($inp),$rndkey1 - xorps $rndkey0,$inout4 - movups 0x50($inp),$rndkey0 - xorps $rndkey1,$inout5 - movups 0x60($inp),$rndkey1 - xorps $rndkey0,$inout6 - movups 0x70($inp),$rndkey0 # IV - xorps $rndkey1,$inout7 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - mov $rnds_,$rounds # restore $rounds - movups $inout4,0x40($out) - mov $key_,$key # restore $key - movups $inout5,0x50($out) - lea 0x80($inp),$inp - movups $inout6,0x60($out) - lea 0x70($out),$out - sub \$0x80,$len - ja .Lcbc_dec_loop8 - - movaps $inout7,$inout0 - movaps $rndkey0,$iv - add \$0x70,$len - jle .Lcbc_dec_tail_collected - movups $inout0,($out) - lea 1($rnds_,$rnds_),$rounds - lea 0x10($out),$out -.Lcbc_dec_tail: - movups ($inp),$inout0 - movaps $inout0,$in0 - cmp \$0x10,$len - jbe .Lcbc_dec_one - - movups 0x10($inp),$inout1 - movaps $inout1,$in1 - cmp \$0x20,$len - jbe .Lcbc_dec_two - - movups 0x20($inp),$inout2 - movaps $inout2,$in2 - cmp \$0x30,$len - jbe .Lcbc_dec_three - - movups 0x30($inp),$inout3 - cmp \$0x40,$len - jbe .Lcbc_dec_four - - movups 0x40($inp),$inout4 - cmp \$0x50,$len - jbe .Lcbc_dec_five - - movups 0x50($inp),$inout5 - cmp \$0x60,$len - jbe .Lcbc_dec_six - - movups 0x60($inp),$inout6 - movaps $iv,$reserved(%rsp) # save IV - call _aesni_decrypt8 - movups ($inp),$rndkey1 - movups 0x10($inp),$rndkey0 - xorps $reserved(%rsp),$inout0 # ^= IV - xorps $rndkey1,$inout1 - movups 0x20($inp),$rndkey1 - xorps $rndkey0,$inout2 - movups 0x30($inp),$rndkey0 - xorps $rndkey1,$inout3 - movups 0x40($inp),$rndkey1 - xorps $rndkey0,$inout4 - movups 0x50($inp),$rndkey0 - xorps $rndkey1,$inout5 - movups 0x60($inp),$iv # IV - xorps $rndkey0,$inout6 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - movups $inout4,0x40($out) - movups $inout5,0x50($out) - lea 0x60($out),$out - movaps $inout6,$inout0 - sub \$0x70,$len - jmp .Lcbc_dec_tail_collected -.align 16 -.Lcbc_dec_one: -___ - &aesni_generate1("dec",$key,$rounds); -$code.=<<___; - xorps $iv,$inout0 - movaps $in0,$iv - sub \$0x10,$len - jmp .Lcbc_dec_tail_collected -.align 16 -.Lcbc_dec_two: - xorps $inout2,$inout2 - call _aesni_decrypt3 - xorps $iv,$inout0 - xorps $in0,$inout1 - movups $inout0,($out) - movaps $in1,$iv - movaps $inout1,$inout0 - lea 0x10($out),$out - sub \$0x20,$len - jmp .Lcbc_dec_tail_collected -.align 16 -.Lcbc_dec_three: - call _aesni_decrypt3 - xorps $iv,$inout0 - xorps $in0,$inout1 - movups $inout0,($out) - xorps $in1,$inout2 - movups $inout1,0x10($out) - movaps $in2,$iv - movaps $inout2,$inout0 - lea 0x20($out),$out - sub \$0x30,$len - jmp .Lcbc_dec_tail_collected -.align 16 -.Lcbc_dec_four: - call _aesni_decrypt4 - xorps $iv,$inout0 - movups 0x30($inp),$iv - xorps $in0,$inout1 - movups $inout0,($out) - xorps $in1,$inout2 - movups $inout1,0x10($out) - xorps $in2,$inout3 - movups $inout2,0x20($out) - movaps $inout3,$inout0 - lea 0x30($out),$out - sub \$0x40,$len - jmp .Lcbc_dec_tail_collected -.align 16 -.Lcbc_dec_five: - xorps $inout5,$inout5 - call _aesni_decrypt6 - movups 0x10($inp),$rndkey1 - movups 0x20($inp),$rndkey0 - xorps $iv,$inout0 - xorps $in0,$inout1 - xorps $rndkey1,$inout2 - movups 0x30($inp),$rndkey1 - xorps $rndkey0,$inout3 - movups 0x40($inp),$iv - xorps $rndkey1,$inout4 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - lea 0x40($out),$out - movaps $inout4,$inout0 - sub \$0x50,$len - jmp .Lcbc_dec_tail_collected -.align 16 -.Lcbc_dec_six: - call _aesni_decrypt6 - movups 0x10($inp),$rndkey1 - movups 0x20($inp),$rndkey0 - xorps $iv,$inout0 - xorps $in0,$inout1 - xorps $rndkey1,$inout2 - movups 0x30($inp),$rndkey1 - xorps $rndkey0,$inout3 - movups 0x40($inp),$rndkey0 - xorps $rndkey1,$inout4 - movups 0x50($inp),$iv - xorps $rndkey0,$inout5 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - movups $inout4,0x40($out) - lea 0x50($out),$out - movaps $inout5,$inout0 - sub \$0x60,$len - jmp .Lcbc_dec_tail_collected -.align 16 -.Lcbc_dec_tail_collected: - and \$15,$len - movups $iv,($ivp) - jnz .Lcbc_dec_tail_partial - movups $inout0,($out) - jmp .Lcbc_dec_ret -.align 16 -.Lcbc_dec_tail_partial: - movaps $inout0,$reserved(%rsp) - mov \$16,%rcx - mov $out,%rdi - sub $len,%rcx - lea $reserved(%rsp),%rsi - .long 0x9066A4F3 # rep movsb - -.Lcbc_dec_ret: -___ -$code.=<<___ if ($win64); - movaps (%rsp),%xmm6 - movaps 0x10(%rsp),%xmm7 - movaps 0x20(%rsp),%xmm8 - movaps 0x30(%rsp),%xmm9 - lea 0x58(%rsp),%rsp -___ -$code.=<<___; -.Lcbc_ret: - ret -.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt -___ -} -# int $PREFIX_set_[en|de]crypt_key (const unsigned char *userKey, -# int bits, AES_KEY *key) -{ my ($inp,$bits,$key) = @_4args; - $bits =~ s/%r/%e/; - -$code.=<<___; -.globl ${PREFIX}_set_decrypt_key -.type ${PREFIX}_set_decrypt_key,\@abi-omnipotent -.align 16 -${PREFIX}_set_decrypt_key: - .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 - call __aesni_set_encrypt_key - shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key - test %eax,%eax - jnz .Ldec_key_ret - lea 16($key,$bits),$inp # points at the end of key schedule - - $movkey ($key),%xmm0 # just swap - $movkey ($inp),%xmm1 - $movkey %xmm0,($inp) - $movkey %xmm1,($key) - lea 16($key),$key - lea -16($inp),$inp - -.Ldec_key_inverse: - $movkey ($key),%xmm0 # swap and inverse - $movkey ($inp),%xmm1 - aesimc %xmm0,%xmm0 - aesimc %xmm1,%xmm1 - lea 16($key),$key - lea -16($inp),$inp - $movkey %xmm0,16($inp) - $movkey %xmm1,-16($key) - cmp $key,$inp - ja .Ldec_key_inverse - - $movkey ($key),%xmm0 # inverse middle - aesimc %xmm0,%xmm0 - $movkey %xmm0,($inp) -.Ldec_key_ret: - add \$8,%rsp - ret -.LSEH_end_set_decrypt_key: -.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key -___ - -# This is based on submission by -# -# Huang Ying -# Vinodh Gopal -# Kahraman Akdemir -# -# Agressively optimized in respect to aeskeygenassist's critical path -# and is contained in %xmm0-5 to meet Win64 ABI requirement. -# -$code.=<<___; -.globl ${PREFIX}_set_encrypt_key -.type ${PREFIX}_set_encrypt_key,\@abi-omnipotent -.align 16 -${PREFIX}_set_encrypt_key: -__aesni_set_encrypt_key: - .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 - mov \$-1,%rax - test $inp,$inp - jz .Lenc_key_ret - test $key,$key - jz .Lenc_key_ret - - movups ($inp),%xmm0 # pull first 128 bits of *userKey - xorps %xmm4,%xmm4 # low dword of xmm4 is assumed 0 - lea 16($key),%rax - cmp \$256,$bits - je .L14rounds - cmp \$192,$bits - je .L12rounds - cmp \$128,$bits - jne .Lbad_keybits - -.L10rounds: - mov \$9,$bits # 10 rounds for 128-bit key - $movkey %xmm0,($key) # round 0 - aeskeygenassist \$0x1,%xmm0,%xmm1 # round 1 - call .Lkey_expansion_128_cold - aeskeygenassist \$0x2,%xmm0,%xmm1 # round 2 - call .Lkey_expansion_128 - aeskeygenassist \$0x4,%xmm0,%xmm1 # round 3 - call .Lkey_expansion_128 - aeskeygenassist \$0x8,%xmm0,%xmm1 # round 4 - call .Lkey_expansion_128 - aeskeygenassist \$0x10,%xmm0,%xmm1 # round 5 - call .Lkey_expansion_128 - aeskeygenassist \$0x20,%xmm0,%xmm1 # round 6 - call .Lkey_expansion_128 - aeskeygenassist \$0x40,%xmm0,%xmm1 # round 7 - call .Lkey_expansion_128 - aeskeygenassist \$0x80,%xmm0,%xmm1 # round 8 - call .Lkey_expansion_128 - aeskeygenassist \$0x1b,%xmm0,%xmm1 # round 9 - call .Lkey_expansion_128 - aeskeygenassist \$0x36,%xmm0,%xmm1 # round 10 - call .Lkey_expansion_128 - $movkey %xmm0,(%rax) - mov $bits,80(%rax) # 240(%rdx) - xor %eax,%eax - jmp .Lenc_key_ret - -.align 16 -.L12rounds: - movq 16($inp),%xmm2 # remaining 1/3 of *userKey - mov \$11,$bits # 12 rounds for 192 - $movkey %xmm0,($key) # round 0 - aeskeygenassist \$0x1,%xmm2,%xmm1 # round 1,2 - call .Lkey_expansion_192a_cold - aeskeygenassist \$0x2,%xmm2,%xmm1 # round 2,3 - call .Lkey_expansion_192b - aeskeygenassist \$0x4,%xmm2,%xmm1 # round 4,5 - call .Lkey_expansion_192a - aeskeygenassist \$0x8,%xmm2,%xmm1 # round 5,6 - call .Lkey_expansion_192b - aeskeygenassist \$0x10,%xmm2,%xmm1 # round 7,8 - call .Lkey_expansion_192a - aeskeygenassist \$0x20,%xmm2,%xmm1 # round 8,9 - call .Lkey_expansion_192b - aeskeygenassist \$0x40,%xmm2,%xmm1 # round 10,11 - call .Lkey_expansion_192a - aeskeygenassist \$0x80,%xmm2,%xmm1 # round 11,12 - call .Lkey_expansion_192b - $movkey %xmm0,(%rax) - mov $bits,48(%rax) # 240(%rdx) - xor %rax, %rax - jmp .Lenc_key_ret - -.align 16 -.L14rounds: - movups 16($inp),%xmm2 # remaning half of *userKey - mov \$13,$bits # 14 rounds for 256 - lea 16(%rax),%rax - $movkey %xmm0,($key) # round 0 - $movkey %xmm2,16($key) # round 1 - aeskeygenassist \$0x1,%xmm2,%xmm1 # round 2 - call .Lkey_expansion_256a_cold - aeskeygenassist \$0x1,%xmm0,%xmm1 # round 3 - call .Lkey_expansion_256b - aeskeygenassist \$0x2,%xmm2,%xmm1 # round 4 - call .Lkey_expansion_256a - aeskeygenassist \$0x2,%xmm0,%xmm1 # round 5 - call .Lkey_expansion_256b - aeskeygenassist \$0x4,%xmm2,%xmm1 # round 6 - call .Lkey_expansion_256a - aeskeygenassist \$0x4,%xmm0,%xmm1 # round 7 - call .Lkey_expansion_256b - aeskeygenassist \$0x8,%xmm2,%xmm1 # round 8 - call .Lkey_expansion_256a - aeskeygenassist \$0x8,%xmm0,%xmm1 # round 9 - call .Lkey_expansion_256b - aeskeygenassist \$0x10,%xmm2,%xmm1 # round 10 - call .Lkey_expansion_256a - aeskeygenassist \$0x10,%xmm0,%xmm1 # round 11 - call .Lkey_expansion_256b - aeskeygenassist \$0x20,%xmm2,%xmm1 # round 12 - call .Lkey_expansion_256a - aeskeygenassist \$0x20,%xmm0,%xmm1 # round 13 - call .Lkey_expansion_256b - aeskeygenassist \$0x40,%xmm2,%xmm1 # round 14 - call .Lkey_expansion_256a - $movkey %xmm0,(%rax) - mov $bits,16(%rax) # 240(%rdx) - xor %rax,%rax - jmp .Lenc_key_ret - -.align 16 -.Lbad_keybits: - mov \$-2,%rax -.Lenc_key_ret: - add \$8,%rsp - ret -.LSEH_end_set_encrypt_key: - -.align 16 -.Lkey_expansion_128: - $movkey %xmm0,(%rax) - lea 16(%rax),%rax -.Lkey_expansion_128_cold: - shufps \$0b00010000,%xmm0,%xmm4 - xorps %xmm4, %xmm0 - shufps \$0b10001100,%xmm0,%xmm4 - xorps %xmm4, %xmm0 - shufps \$0b11111111,%xmm1,%xmm1 # critical path - xorps %xmm1,%xmm0 - ret - -.align 16 -.Lkey_expansion_192a: - $movkey %xmm0,(%rax) - lea 16(%rax),%rax -.Lkey_expansion_192a_cold: - movaps %xmm2, %xmm5 -.Lkey_expansion_192b_warm: - shufps \$0b00010000,%xmm0,%xmm4 - movdqa %xmm2,%xmm3 - xorps %xmm4,%xmm0 - shufps \$0b10001100,%xmm0,%xmm4 - pslldq \$4,%xmm3 - xorps %xmm4,%xmm0 - pshufd \$0b01010101,%xmm1,%xmm1 # critical path - pxor %xmm3,%xmm2 - pxor %xmm1,%xmm0 - pshufd \$0b11111111,%xmm0,%xmm3 - pxor %xmm3,%xmm2 - ret - -.align 16 -.Lkey_expansion_192b: - movaps %xmm0,%xmm3 - shufps \$0b01000100,%xmm0,%xmm5 - $movkey %xmm5,(%rax) - shufps \$0b01001110,%xmm2,%xmm3 - $movkey %xmm3,16(%rax) - lea 32(%rax),%rax - jmp .Lkey_expansion_192b_warm - -.align 16 -.Lkey_expansion_256a: - $movkey %xmm2,(%rax) - lea 16(%rax),%rax -.Lkey_expansion_256a_cold: - shufps \$0b00010000,%xmm0,%xmm4 - xorps %xmm4,%xmm0 - shufps \$0b10001100,%xmm0,%xmm4 - xorps %xmm4,%xmm0 - shufps \$0b11111111,%xmm1,%xmm1 # critical path - xorps %xmm1,%xmm0 - ret - -.align 16 -.Lkey_expansion_256b: - $movkey %xmm0,(%rax) - lea 16(%rax),%rax - - shufps \$0b00010000,%xmm2,%xmm4 - xorps %xmm4,%xmm2 - shufps \$0b10001100,%xmm2,%xmm4 - xorps %xmm4,%xmm2 - shufps \$0b10101010,%xmm1,%xmm1 # critical path - xorps %xmm1,%xmm2 - ret -.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key -.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key -___ -} - -$code.=<<___; -.align 64 -.Lbswap_mask: - .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 -.Lincrement32: - .long 6,6,6,0 -.Lincrement64: - .long 1,0,0,0 -.Lxts_magic: - .long 0x87,0,1,0 - -.asciz "AES for Intel AES-NI, CRYPTOGAMS by " -.align 64 -___ - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -___ -$code.=<<___ if ($PREFIX eq "aesni"); -.type ecb_ccm64_se_handler,\@abi-omnipotent -.align 16 -ecb_ccm64_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - mov 8($disp),%rsi # disp->ImageBase - mov 56($disp),%r11 # disp->HandlerData - - mov 0(%r11),%r10d # HandlerData[0] - lea (%rsi,%r10),%r10 # prologue label - cmp %r10,%rbx # context->RipRsp - - mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lcommon_seh_tail - - lea 0(%rax),%rsi # %xmm save area - lea 512($context),%rdi # &context.Xmm6 - mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) - .long 0xa548f3fc # cld; rep movsq - lea 0x58(%rax),%rax # adjust stack pointer - - jmp .Lcommon_seh_tail -.size ecb_ccm64_se_handler,.-ecb_ccm64_se_handler - -.type ctr32_se_handler,\@abi-omnipotent -.align 16 -ctr32_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lctr32_body(%rip),%r10 - cmp %r10,%rbx # context->Rip<"prologue" label - jb .Lcommon_seh_tail - - mov 152($context),%rax # pull context->Rsp - - lea .Lctr32_ret(%rip),%r10 - cmp %r10,%rbx - jae .Lcommon_seh_tail - - lea 0x20(%rax),%rsi # %xmm save area - lea 512($context),%rdi # &context.Xmm6 - mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) - .long 0xa548f3fc # cld; rep movsq - lea 0xc8(%rax),%rax # adjust stack pointer - - jmp .Lcommon_seh_tail -.size ctr32_se_handler,.-ctr32_se_handler - -.type xts_se_handler,\@abi-omnipotent -.align 16 -xts_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - mov 8($disp),%rsi # disp->ImageBase - mov 56($disp),%r11 # disp->HandlerData - - mov 0(%r11),%r10d # HandlerData[0] - lea (%rsi,%r10),%r10 # prologue lable - cmp %r10,%rbx # context->RipRsp - - mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lcommon_seh_tail - - lea 0x60(%rax),%rsi # %xmm save area - lea 512($context),%rdi # & context.Xmm6 - mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) - .long 0xa548f3fc # cld; rep movsq - lea 0x68+160(%rax),%rax # adjust stack pointer - - jmp .Lcommon_seh_tail -.size xts_se_handler,.-xts_se_handler -___ -$code.=<<___; -.type cbc_se_handler,\@abi-omnipotent -.align 16 -cbc_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 152($context),%rax # pull context->Rsp - mov 248($context),%rbx # pull context->Rip - - lea .Lcbc_decrypt(%rip),%r10 - cmp %r10,%rbx # context->Rip<"prologue" label - jb .Lcommon_seh_tail - - lea .Lcbc_decrypt_body(%rip),%r10 - cmp %r10,%rbx # context->RipRip>="epilogue" label - jae .Lcommon_seh_tail - - lea 0(%rax),%rsi # top of stack - lea 512($context),%rdi # &context.Xmm6 - mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) - .long 0xa548f3fc # cld; rep movsq - lea 0x58(%rax),%rax # adjust stack pointer - jmp .Lcommon_seh_tail - -.Lrestore_cbc_rax: - mov 120($context),%rax - -.Lcommon_seh_tail: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size cbc_se_handler,.-cbc_se_handler - -.section .pdata -.align 4 -___ -$code.=<<___ if ($PREFIX eq "aesni"); - .rva .LSEH_begin_aesni_ecb_encrypt - .rva .LSEH_end_aesni_ecb_encrypt - .rva .LSEH_info_ecb - - .rva .LSEH_begin_aesni_ccm64_encrypt_blocks - .rva .LSEH_end_aesni_ccm64_encrypt_blocks - .rva .LSEH_info_ccm64_enc - - .rva .LSEH_begin_aesni_ccm64_decrypt_blocks - .rva .LSEH_end_aesni_ccm64_decrypt_blocks - .rva .LSEH_info_ccm64_dec - - .rva .LSEH_begin_aesni_ctr32_encrypt_blocks - .rva .LSEH_end_aesni_ctr32_encrypt_blocks - .rva .LSEH_info_ctr32 - - .rva .LSEH_begin_aesni_xts_encrypt - .rva .LSEH_end_aesni_xts_encrypt - .rva .LSEH_info_xts_enc - - .rva .LSEH_begin_aesni_xts_decrypt - .rva .LSEH_end_aesni_xts_decrypt - .rva .LSEH_info_xts_dec -___ -$code.=<<___; - .rva .LSEH_begin_${PREFIX}_cbc_encrypt - .rva .LSEH_end_${PREFIX}_cbc_encrypt - .rva .LSEH_info_cbc - - .rva ${PREFIX}_set_decrypt_key - .rva .LSEH_end_set_decrypt_key - .rva .LSEH_info_key - - .rva ${PREFIX}_set_encrypt_key - .rva .LSEH_end_set_encrypt_key - .rva .LSEH_info_key -.section .xdata -.align 8 -___ -$code.=<<___ if ($PREFIX eq "aesni"); -.LSEH_info_ecb: - .byte 9,0,0,0 - .rva ecb_ccm64_se_handler - .rva .Lecb_enc_body,.Lecb_enc_ret # HandlerData[] -.LSEH_info_ccm64_enc: - .byte 9,0,0,0 - .rva ecb_ccm64_se_handler - .rva .Lccm64_enc_body,.Lccm64_enc_ret # HandlerData[] -.LSEH_info_ccm64_dec: - .byte 9,0,0,0 - .rva ecb_ccm64_se_handler - .rva .Lccm64_dec_body,.Lccm64_dec_ret # HandlerData[] -.LSEH_info_ctr32: - .byte 9,0,0,0 - .rva ctr32_se_handler -.LSEH_info_xts_enc: - .byte 9,0,0,0 - .rva xts_se_handler - .rva .Lxts_enc_body,.Lxts_enc_epilogue # HandlerData[] -.LSEH_info_xts_dec: - .byte 9,0,0,0 - .rva xts_se_handler - .rva .Lxts_dec_body,.Lxts_dec_epilogue # HandlerData[] -___ -$code.=<<___; -.LSEH_info_cbc: - .byte 9,0,0,0 - .rva cbc_se_handler -.LSEH_info_key: - .byte 0x01,0x04,0x01,0x00 - .byte 0x04,0x02,0x00,0x00 # sub rsp,8 -___ -} - -sub rex { - local *opcode=shift; - my ($dst,$src)=@_; - my $rex=0; - - $rex|=0x04 if($dst>=8); - $rex|=0x01 if($src>=8); - push @opcode,$rex|0x40 if($rex); -} - -sub aesni { - my $line=shift; - my @opcode=(0x66); - - if ($line=~/(aeskeygenassist)\s+\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { - rex(\@opcode,$4,$3); - push @opcode,0x0f,0x3a,0xdf; - push @opcode,0xc0|($3&7)|(($4&7)<<3); # ModR/M - my $c=$2; - push @opcode,$c=~/^0/?oct($c):$c; - return ".byte\t".join(',',@opcode); - } - elsif ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) { - my %opcodelet = ( - "aesimc" => 0xdb, - "aesenc" => 0xdc, "aesenclast" => 0xdd, - "aesdec" => 0xde, "aesdeclast" => 0xdf - ); - return undef if (!defined($opcodelet{$1})); - rex(\@opcode,$3,$2); - push @opcode,0x0f,0x38,$opcodelet{$1}; - push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M - return ".byte\t".join(',',@opcode); - } - return $line; -} - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; -$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem; - -print $code; - -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/aes/asm/bsaes-x86_64.pl b/drivers/builtin_openssl2/crypto/aes/asm/bsaes-x86_64.pl deleted file mode 100644 index 41b90f0844..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/bsaes-x86_64.pl +++ /dev/null @@ -1,3108 +0,0 @@ -#!/usr/bin/env perl - -################################################################### -### AES-128 [originally in CTR mode] ### -### bitsliced implementation for Intel Core 2 processors ### -### requires support of SSE extensions up to SSSE3 ### -### Author: Emilia Käsper and Peter Schwabe ### -### Date: 2009-03-19 ### -### Public domain ### -### ### -### See http://homes.esat.kuleuven.be/~ekasper/#software for ### -### further information. ### -################################################################### -# -# September 2011. -# -# Started as transliteration to "perlasm" the original code has -# undergone following changes: -# -# - code was made position-independent; -# - rounds were folded into a loop resulting in >5x size reduction -# from 12.5KB to 2.2KB; -# - above was possibile thanks to mixcolumns() modification that -# allowed to feed its output back to aesenc[last], this was -# achieved at cost of two additional inter-registers moves; -# - some instruction reordering and interleaving; -# - this module doesn't implement key setup subroutine, instead it -# relies on conversion of "conventional" key schedule as returned -# by AES_set_encrypt_key (see discussion below); -# - first and last round keys are treated differently, which allowed -# to skip one shiftrows(), reduce bit-sliced key schedule and -# speed-up conversion by 22%; -# - support for 192- and 256-bit keys was added; -# -# Resulting performance in CPU cycles spent to encrypt one byte out -# of 4096-byte buffer with 128-bit key is: -# -# Emilia's this(*) difference -# -# Core 2 9.30 8.69 +7% -# Nehalem(**) 7.63 6.98 +9% -# Atom 17.1 17.4 -2%(***) -# -# (*) Comparison is not completely fair, because "this" is ECB, -# i.e. no extra processing such as counter values calculation -# and xor-ing input as in Emilia's CTR implementation is -# performed. However, the CTR calculations stand for not more -# than 1% of total time, so comparison is *rather* fair. -# -# (**) Results were collected on Westmere, which is considered to -# be equivalent to Nehalem for this code. -# -# (***) Slowdown on Atom is rather strange per se, because original -# implementation has a number of 9+-bytes instructions, which -# are bad for Atom front-end, and which I eliminated completely. -# In attempt to address deterioration sbox() was tested in FP -# SIMD "domain" (movaps instead of movdqa, xorps instead of -# pxor, etc.). While it resulted in nominal 4% improvement on -# Atom, it hurted Westmere by more than 2x factor. -# -# As for key schedule conversion subroutine. Interface to OpenSSL -# relies on per-invocation on-the-fly conversion. This naturally -# has impact on performance, especially for short inputs. Conversion -# time in CPU cycles and its ratio to CPU cycles spent in 8x block -# function is: -# -# conversion conversion/8x block -# Core 2 240 0.22 -# Nehalem 180 0.20 -# Atom 430 0.19 -# -# The ratio values mean that 128-byte blocks will be processed -# 16-18% slower, 256-byte blocks - 9-10%, 384-byte blocks - 6-7%, -# etc. Then keep in mind that input sizes not divisible by 128 are -# *effectively* slower, especially shortest ones, e.g. consecutive -# 144-byte blocks are processed 44% slower than one would expect, -# 272 - 29%, 400 - 22%, etc. Yet, despite all these "shortcomings" -# it's still faster than ["hyper-threading-safe" code path in] -# aes-x86_64.pl on all lengths above 64 bytes... -# -# October 2011. -# -# Add decryption procedure. Performance in CPU cycles spent to decrypt -# one byte out of 4096-byte buffer with 128-bit key is: -# -# Core 2 9.83 -# Nehalem 7.74 -# Atom 19.0 -# -# November 2011. -# -# Add bsaes_xts_[en|de]crypt. Less-than-80-bytes-block performance is -# suboptimal, but XTS is meant to be used with larger blocks... -# -# - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx"); -my @XMM=map("%xmm$_",(15,0..14)); # best on Atom, +10% over (0..15) -my $ecb=0; # suppress unreferenced ECB subroutines, spare some space... - -{ -my ($key,$rounds,$const)=("%rax","%r10d","%r11"); - -sub Sbox { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb -my @b=@_[0..7]; -my @t=@_[8..11]; -my @s=@_[12..15]; - &InBasisChange (@b); - &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s); - &OutBasisChange (@b[7,1,4,2,6,5,0,3]); -} - -sub InBasisChange { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb -my @b=@_[0..7]; -$code.=<<___; - pxor @b[6], @b[5] - pxor @b[1], @b[2] - pxor @b[0], @b[3] - pxor @b[2], @b[6] - pxor @b[0], @b[5] - - pxor @b[3], @b[6] - pxor @b[7], @b[3] - pxor @b[5], @b[7] - pxor @b[4], @b[3] - pxor @b[5], @b[4] - pxor @b[1], @b[3] - - pxor @b[7], @b[2] - pxor @b[5], @b[1] -___ -} - -sub OutBasisChange { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb -my @b=@_[0..7]; -$code.=<<___; - pxor @b[6], @b[0] - pxor @b[4], @b[1] - pxor @b[0], @b[2] - pxor @b[6], @b[4] - pxor @b[1], @b[6] - - pxor @b[5], @b[1] - pxor @b[3], @b[5] - pxor @b[7], @b[3] - pxor @b[5], @b[7] - pxor @b[5], @b[2] - - pxor @b[7], @b[4] -___ -} - -sub InvSbox { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb -my @b=@_[0..7]; -my @t=@_[8..11]; -my @s=@_[12..15]; - &InvInBasisChange (@b); - &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s); - &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]); -} - -sub InvInBasisChange { # OutBasisChange in reverse -my @b=@_[5,1,2,6,3,7,0,4]; -$code.=<<___ - pxor @b[7], @b[4] - - pxor @b[5], @b[7] - pxor @b[5], @b[2] - pxor @b[7], @b[3] - pxor @b[3], @b[5] - pxor @b[5], @b[1] - - pxor @b[1], @b[6] - pxor @b[0], @b[2] - pxor @b[6], @b[4] - pxor @b[6], @b[0] - pxor @b[4], @b[1] -___ -} - -sub InvOutBasisChange { # InBasisChange in reverse -my @b=@_[2,5,7,3,6,1,0,4]; -$code.=<<___; - pxor @b[5], @b[1] - pxor @b[7], @b[2] - - pxor @b[1], @b[3] - pxor @b[5], @b[4] - pxor @b[5], @b[7] - pxor @b[4], @b[3] - pxor @b[0], @b[5] - pxor @b[7], @b[3] - pxor @b[2], @b[6] - pxor @b[1], @b[2] - pxor @b[3], @b[6] - - pxor @b[0], @b[3] - pxor @b[6], @b[5] -___ -} - -sub Mul_GF4 { -#;************************************************************* -#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) * -#;************************************************************* -my ($x0,$x1,$y0,$y1,$t0)=@_; -$code.=<<___; - movdqa $y0, $t0 - pxor $y1, $t0 - pand $x0, $t0 - pxor $x1, $x0 - pand $y0, $x1 - pand $y1, $x0 - pxor $x1, $x0 - pxor $t0, $x1 -___ -} - -sub Mul_GF4_N { # not used, see next subroutine -# multiply and scale by N -my ($x0,$x1,$y0,$y1,$t0)=@_; -$code.=<<___; - movdqa $y0, $t0 - pxor $y1, $t0 - pand $x0, $t0 - pxor $x1, $x0 - pand $y0, $x1 - pand $y1, $x0 - pxor $x0, $x1 - pxor $t0, $x0 -___ -} - -sub Mul_GF4_N_GF4 { -# interleaved Mul_GF4_N and Mul_GF4 -my ($x0,$x1,$y0,$y1,$t0, - $x2,$x3,$y2,$y3,$t1)=@_; -$code.=<<___; - movdqa $y0, $t0 - movdqa $y2, $t1 - pxor $y1, $t0 - pxor $y3, $t1 - pand $x0, $t0 - pand $x2, $t1 - pxor $x1, $x0 - pxor $x3, $x2 - pand $y0, $x1 - pand $y2, $x3 - pand $y1, $x0 - pand $y3, $x2 - pxor $x0, $x1 - pxor $x3, $x2 - pxor $t0, $x0 - pxor $t1, $x3 -___ -} -sub Mul_GF16_2 { -my @x=@_[0..7]; -my @y=@_[8..11]; -my @t=@_[12..15]; -$code.=<<___; - movdqa @x[0], @t[0] - movdqa @x[1], @t[1] -___ - &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2]); -$code.=<<___; - pxor @x[2], @t[0] - pxor @x[3], @t[1] - pxor @y[2], @y[0] - pxor @y[3], @y[1] -___ - Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], - @x[2], @x[3], @y[2], @y[3], @t[2]); -$code.=<<___; - pxor @t[0], @x[0] - pxor @t[0], @x[2] - pxor @t[1], @x[1] - pxor @t[1], @x[3] - - movdqa @x[4], @t[0] - movdqa @x[5], @t[1] - pxor @x[6], @t[0] - pxor @x[7], @t[1] -___ - &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], - @x[6], @x[7], @y[2], @y[3], @t[2]); -$code.=<<___; - pxor @y[2], @y[0] - pxor @y[3], @y[1] -___ - &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[3]); -$code.=<<___; - pxor @t[0], @x[4] - pxor @t[0], @x[6] - pxor @t[1], @x[5] - pxor @t[1], @x[7] -___ -} -sub Inv_GF256 { -#;******************************************************************** -#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) * -#;******************************************************************** -my @x=@_[0..7]; -my @t=@_[8..11]; -my @s=@_[12..15]; -# direct optimizations from hardware -$code.=<<___; - movdqa @x[4], @t[3] - movdqa @x[5], @t[2] - movdqa @x[1], @t[1] - movdqa @x[7], @s[1] - movdqa @x[0], @s[0] - - pxor @x[6], @t[3] - pxor @x[7], @t[2] - pxor @x[3], @t[1] - movdqa @t[3], @s[2] - pxor @x[6], @s[1] - movdqa @t[2], @t[0] - pxor @x[2], @s[0] - movdqa @t[3], @s[3] - - por @t[1], @t[2] - por @s[0], @t[3] - pxor @t[0], @s[3] - pand @s[0], @s[2] - pxor @t[1], @s[0] - pand @t[1], @t[0] - pand @s[0], @s[3] - movdqa @x[3], @s[0] - pxor @x[2], @s[0] - pand @s[0], @s[1] - pxor @s[1], @t[3] - pxor @s[1], @t[2] - movdqa @x[4], @s[1] - movdqa @x[1], @s[0] - pxor @x[5], @s[1] - pxor @x[0], @s[0] - movdqa @s[1], @t[1] - pand @s[0], @s[1] - por @s[0], @t[1] - pxor @s[1], @t[0] - pxor @s[3], @t[3] - pxor @s[2], @t[2] - pxor @s[3], @t[1] - movdqa @x[7], @s[0] - pxor @s[2], @t[0] - movdqa @x[6], @s[1] - pxor @s[2], @t[1] - movdqa @x[5], @s[2] - pand @x[3], @s[0] - movdqa @x[4], @s[3] - pand @x[2], @s[1] - pand @x[1], @s[2] - por @x[0], @s[3] - pxor @s[0], @t[3] - pxor @s[1], @t[2] - pxor @s[2], @t[1] - pxor @s[3], @t[0] - - #Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 - - # new smaller inversion - - movdqa @t[3], @s[0] - pand @t[1], @t[3] - pxor @t[2], @s[0] - - movdqa @t[0], @s[2] - movdqa @s[0], @s[3] - pxor @t[3], @s[2] - pand @s[2], @s[3] - - movdqa @t[1], @s[1] - pxor @t[2], @s[3] - pxor @t[0], @s[1] - - pxor @t[2], @t[3] - - pand @t[3], @s[1] - - movdqa @s[2], @t[2] - pxor @t[0], @s[1] - - pxor @s[1], @t[2] - pxor @s[1], @t[1] - - pand @t[0], @t[2] - - pxor @t[2], @s[2] - pxor @t[2], @t[1] - - pand @s[3], @s[2] - - pxor @s[0], @s[2] -___ -# output in s3, s2, s1, t1 - -# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3 - -# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 - &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]); - -### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb -} - -# AES linear components - -sub ShiftRows { -my @x=@_[0..7]; -my $mask=pop; -$code.=<<___; - pxor 0x00($key),@x[0] - pxor 0x10($key),@x[1] - pshufb $mask,@x[0] - pxor 0x20($key),@x[2] - pshufb $mask,@x[1] - pxor 0x30($key),@x[3] - pshufb $mask,@x[2] - pxor 0x40($key),@x[4] - pshufb $mask,@x[3] - pxor 0x50($key),@x[5] - pshufb $mask,@x[4] - pxor 0x60($key),@x[6] - pshufb $mask,@x[5] - pxor 0x70($key),@x[7] - pshufb $mask,@x[6] - lea 0x80($key),$key - pshufb $mask,@x[7] -___ -} - -sub MixColumns { -# modified to emit output in order suitable for feeding back to aesenc[last] -my @x=@_[0..7]; -my @t=@_[8..15]; -my $inv=@_[16]; # optional -$code.=<<___; - pshufd \$0x93, @x[0], @t[0] # x0 <<< 32 - pshufd \$0x93, @x[1], @t[1] - pxor @t[0], @x[0] # x0 ^ (x0 <<< 32) - pshufd \$0x93, @x[2], @t[2] - pxor @t[1], @x[1] - pshufd \$0x93, @x[3], @t[3] - pxor @t[2], @x[2] - pshufd \$0x93, @x[4], @t[4] - pxor @t[3], @x[3] - pshufd \$0x93, @x[5], @t[5] - pxor @t[4], @x[4] - pshufd \$0x93, @x[6], @t[6] - pxor @t[5], @x[5] - pshufd \$0x93, @x[7], @t[7] - pxor @t[6], @x[6] - pxor @t[7], @x[7] - - pxor @x[0], @t[1] - pxor @x[7], @t[0] - pxor @x[7], @t[1] - pshufd \$0x4E, @x[0], @x[0] # (x0 ^ (x0 <<< 32)) <<< 64) - pxor @x[1], @t[2] - pshufd \$0x4E, @x[1], @x[1] - pxor @x[4], @t[5] - pxor @t[0], @x[0] - pxor @x[5], @t[6] - pxor @t[1], @x[1] - pxor @x[3], @t[4] - pshufd \$0x4E, @x[4], @t[0] - pxor @x[6], @t[7] - pshufd \$0x4E, @x[5], @t[1] - pxor @x[2], @t[3] - pshufd \$0x4E, @x[3], @x[4] - pxor @x[7], @t[3] - pshufd \$0x4E, @x[7], @x[5] - pxor @x[7], @t[4] - pshufd \$0x4E, @x[6], @x[3] - pxor @t[4], @t[0] - pshufd \$0x4E, @x[2], @x[6] - pxor @t[5], @t[1] -___ -$code.=<<___ if (!$inv); - pxor @t[3], @x[4] - pxor @t[7], @x[5] - pxor @t[6], @x[3] - movdqa @t[0], @x[2] - pxor @t[2], @x[6] - movdqa @t[1], @x[7] -___ -$code.=<<___ if ($inv); - pxor @x[4], @t[3] - pxor @t[7], @x[5] - pxor @x[3], @t[6] - movdqa @t[0], @x[3] - pxor @t[2], @x[6] - movdqa @t[6], @x[2] - movdqa @t[1], @x[7] - movdqa @x[6], @x[4] - movdqa @t[3], @x[6] -___ -} - -sub InvMixColumns_orig { -my @x=@_[0..7]; -my @t=@_[8..15]; - -$code.=<<___; - # multiplication by 0x0e - pshufd \$0x93, @x[7], @t[7] - movdqa @x[2], @t[2] - pxor @x[5], @x[7] # 7 5 - pxor @x[5], @x[2] # 2 5 - pshufd \$0x93, @x[0], @t[0] - movdqa @x[5], @t[5] - pxor @x[0], @x[5] # 5 0 [1] - pxor @x[1], @x[0] # 0 1 - pshufd \$0x93, @x[1], @t[1] - pxor @x[2], @x[1] # 1 25 - pxor @x[6], @x[0] # 01 6 [2] - pxor @x[3], @x[1] # 125 3 [4] - pshufd \$0x93, @x[3], @t[3] - pxor @x[0], @x[2] # 25 016 [3] - pxor @x[7], @x[3] # 3 75 - pxor @x[6], @x[7] # 75 6 [0] - pshufd \$0x93, @x[6], @t[6] - movdqa @x[4], @t[4] - pxor @x[4], @x[6] # 6 4 - pxor @x[3], @x[4] # 4 375 [6] - pxor @x[7], @x[3] # 375 756=36 - pxor @t[5], @x[6] # 64 5 [7] - pxor @t[2], @x[3] # 36 2 - pxor @t[4], @x[3] # 362 4 [5] - pshufd \$0x93, @t[5], @t[5] -___ - my @y = @x[7,5,0,2,1,3,4,6]; -$code.=<<___; - # multiplication by 0x0b - pxor @y[0], @y[1] - pxor @t[0], @y[0] - pxor @t[1], @y[1] - pshufd \$0x93, @t[2], @t[2] - pxor @t[5], @y[0] - pxor @t[6], @y[1] - pxor @t[7], @y[0] - pshufd \$0x93, @t[4], @t[4] - pxor @t[6], @t[7] # clobber t[7] - pxor @y[0], @y[1] - - pxor @t[0], @y[3] - pshufd \$0x93, @t[0], @t[0] - pxor @t[1], @y[2] - pxor @t[1], @y[4] - pxor @t[2], @y[2] - pshufd \$0x93, @t[1], @t[1] - pxor @t[2], @y[3] - pxor @t[2], @y[5] - pxor @t[7], @y[2] - pshufd \$0x93, @t[2], @t[2] - pxor @t[3], @y[3] - pxor @t[3], @y[6] - pxor @t[3], @y[4] - pshufd \$0x93, @t[3], @t[3] - pxor @t[4], @y[7] - pxor @t[4], @y[5] - pxor @t[7], @y[7] - pxor @t[5], @y[3] - pxor @t[4], @y[4] - pxor @t[5], @t[7] # clobber t[7] even more - - pxor @t[7], @y[5] - pshufd \$0x93, @t[4], @t[4] - pxor @t[7], @y[6] - pxor @t[7], @y[4] - - pxor @t[5], @t[7] - pshufd \$0x93, @t[5], @t[5] - pxor @t[6], @t[7] # restore t[7] - - # multiplication by 0x0d - pxor @y[7], @y[4] - pxor @t[4], @y[7] - pshufd \$0x93, @t[6], @t[6] - pxor @t[0], @y[2] - pxor @t[5], @y[7] - pxor @t[2], @y[2] - pshufd \$0x93, @t[7], @t[7] - - pxor @y[1], @y[3] - pxor @t[1], @y[1] - pxor @t[0], @y[0] - pxor @t[0], @y[3] - pxor @t[5], @y[1] - pxor @t[5], @y[0] - pxor @t[7], @y[1] - pshufd \$0x93, @t[0], @t[0] - pxor @t[6], @y[0] - pxor @y[1], @y[3] - pxor @t[1], @y[4] - pshufd \$0x93, @t[1], @t[1] - - pxor @t[7], @y[7] - pxor @t[2], @y[4] - pxor @t[2], @y[5] - pshufd \$0x93, @t[2], @t[2] - pxor @t[6], @y[2] - pxor @t[3], @t[6] # clobber t[6] - pxor @y[7], @y[4] - pxor @t[6], @y[3] - - pxor @t[6], @y[6] - pxor @t[5], @y[5] - pxor @t[4], @y[6] - pshufd \$0x93, @t[4], @t[4] - pxor @t[6], @y[5] - pxor @t[7], @y[6] - pxor @t[3], @t[6] # restore t[6] - - pshufd \$0x93, @t[5], @t[5] - pshufd \$0x93, @t[6], @t[6] - pshufd \$0x93, @t[7], @t[7] - pshufd \$0x93, @t[3], @t[3] - - # multiplication by 0x09 - pxor @y[1], @y[4] - pxor @y[1], @t[1] # t[1]=y[1] - pxor @t[5], @t[0] # clobber t[0] - pxor @t[5], @t[1] - pxor @t[0], @y[3] - pxor @y[0], @t[0] # t[0]=y[0] - pxor @t[6], @t[1] - pxor @t[7], @t[6] # clobber t[6] - pxor @t[1], @y[4] - pxor @t[4], @y[7] - pxor @y[4], @t[4] # t[4]=y[4] - pxor @t[3], @y[6] - pxor @y[3], @t[3] # t[3]=y[3] - pxor @t[2], @y[5] - pxor @y[2], @t[2] # t[2]=y[2] - pxor @t[7], @t[3] - pxor @y[5], @t[5] # t[5]=y[5] - pxor @t[6], @t[2] - pxor @t[6], @t[5] - pxor @y[6], @t[6] # t[6]=y[6] - pxor @y[7], @t[7] # t[7]=y[7] - - movdqa @t[0],@XMM[0] - movdqa @t[1],@XMM[1] - movdqa @t[2],@XMM[2] - movdqa @t[3],@XMM[3] - movdqa @t[4],@XMM[4] - movdqa @t[5],@XMM[5] - movdqa @t[6],@XMM[6] - movdqa @t[7],@XMM[7] -___ -} - -sub InvMixColumns { -my @x=@_[0..7]; -my @t=@_[8..15]; - -# Thanks to Jussi Kivilinna for providing pointer to -# -# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | -# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | -# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | -# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | - -$code.=<<___; - # multiplication by 0x05-0x00-0x04-0x00 - pshufd \$0x4E, @x[0], @t[0] - pshufd \$0x4E, @x[6], @t[6] - pxor @x[0], @t[0] - pshufd \$0x4E, @x[7], @t[7] - pxor @x[6], @t[6] - pshufd \$0x4E, @x[1], @t[1] - pxor @x[7], @t[7] - pshufd \$0x4E, @x[2], @t[2] - pxor @x[1], @t[1] - pshufd \$0x4E, @x[3], @t[3] - pxor @x[2], @t[2] - pxor @t[6], @x[0] - pxor @t[6], @x[1] - pshufd \$0x4E, @x[4], @t[4] - pxor @x[3], @t[3] - pxor @t[0], @x[2] - pxor @t[1], @x[3] - pshufd \$0x4E, @x[5], @t[5] - pxor @x[4], @t[4] - pxor @t[7], @x[1] - pxor @t[2], @x[4] - pxor @x[5], @t[5] - - pxor @t[7], @x[2] - pxor @t[6], @x[3] - pxor @t[6], @x[4] - pxor @t[3], @x[5] - pxor @t[4], @x[6] - pxor @t[7], @x[4] - pxor @t[7], @x[5] - pxor @t[5], @x[7] -___ - &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 -} - -sub aesenc { # not used -my @b=@_[0..7]; -my @t=@_[8..15]; -$code.=<<___; - movdqa 0x30($const),@t[0] # .LSR -___ - &ShiftRows (@b,@t[0]); - &Sbox (@b,@t); - &MixColumns (@b[0,1,4,6,3,7,2,5],@t); -} - -sub aesenclast { # not used -my @b=@_[0..7]; -my @t=@_[8..15]; -$code.=<<___; - movdqa 0x40($const),@t[0] # .LSRM0 -___ - &ShiftRows (@b,@t[0]); - &Sbox (@b,@t); -$code.=<<___ - pxor 0x00($key),@b[0] - pxor 0x10($key),@b[1] - pxor 0x20($key),@b[4] - pxor 0x30($key),@b[6] - pxor 0x40($key),@b[3] - pxor 0x50($key),@b[7] - pxor 0x60($key),@b[2] - pxor 0x70($key),@b[5] -___ -} - -sub swapmove { -my ($a,$b,$n,$mask,$t)=@_; -$code.=<<___; - movdqa $b,$t - psrlq \$$n,$b - pxor $a,$b - pand $mask,$b - pxor $b,$a - psllq \$$n,$b - pxor $t,$b -___ -} -sub swapmove2x { -my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_; -$code.=<<___; - movdqa $b0,$t0 - psrlq \$$n,$b0 - movdqa $b1,$t1 - psrlq \$$n,$b1 - pxor $a0,$b0 - pxor $a1,$b1 - pand $mask,$b0 - pand $mask,$b1 - pxor $b0,$a0 - psllq \$$n,$b0 - pxor $b1,$a1 - psllq \$$n,$b1 - pxor $t0,$b0 - pxor $t1,$b1 -___ -} - -sub bitslice { -my @x=reverse(@_[0..7]); -my ($t0,$t1,$t2,$t3)=@_[8..11]; -$code.=<<___; - movdqa 0x00($const),$t0 # .LBS0 - movdqa 0x10($const),$t1 # .LBS1 -___ - &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3); - &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); -$code.=<<___; - movdqa 0x20($const),$t0 # .LBS2 -___ - &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3); - &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); - - &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3); - &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3); -} - -$code.=<<___; -.text - -.extern asm_AES_encrypt -.extern asm_AES_decrypt - -.type _bsaes_encrypt8,\@abi-omnipotent -.align 64 -_bsaes_encrypt8: - lea .LBS0(%rip), $const # constants table - - movdqa ($key), @XMM[9] # round 0 key - lea 0x10($key), $key - movdqa 0x50($const), @XMM[8] # .LM0SR - pxor @XMM[9], @XMM[0] # xor with round0 key - pxor @XMM[9], @XMM[1] - pshufb @XMM[8], @XMM[0] - pxor @XMM[9], @XMM[2] - pshufb @XMM[8], @XMM[1] - pxor @XMM[9], @XMM[3] - pshufb @XMM[8], @XMM[2] - pxor @XMM[9], @XMM[4] - pshufb @XMM[8], @XMM[3] - pxor @XMM[9], @XMM[5] - pshufb @XMM[8], @XMM[4] - pxor @XMM[9], @XMM[6] - pshufb @XMM[8], @XMM[5] - pxor @XMM[9], @XMM[7] - pshufb @XMM[8], @XMM[6] - pshufb @XMM[8], @XMM[7] -_bsaes_encrypt8_bitslice: -___ - &bitslice (@XMM[0..7, 8..11]); -$code.=<<___; - dec $rounds - jmp .Lenc_sbox -.align 16 -.Lenc_loop: -___ - &ShiftRows (@XMM[0..7, 8]); -$code.=".Lenc_sbox:\n"; - &Sbox (@XMM[0..7, 8..15]); -$code.=<<___; - dec $rounds - jl .Lenc_done -___ - &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); -$code.=<<___; - movdqa 0x30($const), @XMM[8] # .LSR - jnz .Lenc_loop - movdqa 0x40($const), @XMM[8] # .LSRM0 - jmp .Lenc_loop -.align 16 -.Lenc_done: -___ - # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb - &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]); -$code.=<<___; - movdqa ($key), @XMM[8] # last round key - pxor @XMM[8], @XMM[4] - pxor @XMM[8], @XMM[6] - pxor @XMM[8], @XMM[3] - pxor @XMM[8], @XMM[7] - pxor @XMM[8], @XMM[2] - pxor @XMM[8], @XMM[5] - pxor @XMM[8], @XMM[0] - pxor @XMM[8], @XMM[1] - ret -.size _bsaes_encrypt8,.-_bsaes_encrypt8 - -.type _bsaes_decrypt8,\@abi-omnipotent -.align 64 -_bsaes_decrypt8: - lea .LBS0(%rip), $const # constants table - - movdqa ($key), @XMM[9] # round 0 key - lea 0x10($key), $key - movdqa -0x30($const), @XMM[8] # .LM0ISR - pxor @XMM[9], @XMM[0] # xor with round0 key - pxor @XMM[9], @XMM[1] - pshufb @XMM[8], @XMM[0] - pxor @XMM[9], @XMM[2] - pshufb @XMM[8], @XMM[1] - pxor @XMM[9], @XMM[3] - pshufb @XMM[8], @XMM[2] - pxor @XMM[9], @XMM[4] - pshufb @XMM[8], @XMM[3] - pxor @XMM[9], @XMM[5] - pshufb @XMM[8], @XMM[4] - pxor @XMM[9], @XMM[6] - pshufb @XMM[8], @XMM[5] - pxor @XMM[9], @XMM[7] - pshufb @XMM[8], @XMM[6] - pshufb @XMM[8], @XMM[7] -___ - &bitslice (@XMM[0..7, 8..11]); -$code.=<<___; - dec $rounds - jmp .Ldec_sbox -.align 16 -.Ldec_loop: -___ - &ShiftRows (@XMM[0..7, 8]); -$code.=".Ldec_sbox:\n"; - &InvSbox (@XMM[0..7, 8..15]); -$code.=<<___; - dec $rounds - jl .Ldec_done -___ - &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); -$code.=<<___; - movdqa -0x10($const), @XMM[8] # .LISR - jnz .Ldec_loop - movdqa -0x20($const), @XMM[8] # .LISRM0 - jmp .Ldec_loop -.align 16 -.Ldec_done: -___ - &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]); -$code.=<<___; - movdqa ($key), @XMM[8] # last round key - pxor @XMM[8], @XMM[6] - pxor @XMM[8], @XMM[4] - pxor @XMM[8], @XMM[2] - pxor @XMM[8], @XMM[7] - pxor @XMM[8], @XMM[3] - pxor @XMM[8], @XMM[5] - pxor @XMM[8], @XMM[0] - pxor @XMM[8], @XMM[1] - ret -.size _bsaes_decrypt8,.-_bsaes_decrypt8 -___ -} -{ -my ($out,$inp,$rounds,$const)=("%rax","%rcx","%r10d","%r11"); - -sub bitslice_key { -my @x=reverse(@_[0..7]); -my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12]; - - &swapmove (@x[0,1],1,$bs0,$t2,$t3); -$code.=<<___; - #&swapmove(@x[2,3],1,$t0,$t2,$t3); - movdqa @x[0], @x[2] - movdqa @x[1], @x[3] -___ - #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); - - &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3); -$code.=<<___; - #&swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); - movdqa @x[0], @x[4] - movdqa @x[2], @x[6] - movdqa @x[1], @x[5] - movdqa @x[3], @x[7] -___ - &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3); - &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3); -} - -$code.=<<___; -.type _bsaes_key_convert,\@abi-omnipotent -.align 16 -_bsaes_key_convert: - lea .Lmasks(%rip), $const - movdqu ($inp), %xmm7 # load round 0 key - lea 0x10($inp), $inp - movdqa 0x00($const), %xmm0 # 0x01... - movdqa 0x10($const), %xmm1 # 0x02... - movdqa 0x20($const), %xmm2 # 0x04... - movdqa 0x30($const), %xmm3 # 0x08... - movdqa 0x40($const), %xmm4 # .LM0 - pcmpeqd %xmm5, %xmm5 # .LNOT - - movdqu ($inp), %xmm6 # load round 1 key - movdqa %xmm7, ($out) # save round 0 key - lea 0x10($out), $out - dec $rounds - jmp .Lkey_loop -.align 16 -.Lkey_loop: - pshufb %xmm4, %xmm6 # .LM0 - - movdqa %xmm0, %xmm8 - movdqa %xmm1, %xmm9 - - pand %xmm6, %xmm8 - pand %xmm6, %xmm9 - movdqa %xmm2, %xmm10 - pcmpeqb %xmm0, %xmm8 - psllq \$4, %xmm0 # 0x10... - movdqa %xmm3, %xmm11 - pcmpeqb %xmm1, %xmm9 - psllq \$4, %xmm1 # 0x20... - - pand %xmm6, %xmm10 - pand %xmm6, %xmm11 - movdqa %xmm0, %xmm12 - pcmpeqb %xmm2, %xmm10 - psllq \$4, %xmm2 # 0x40... - movdqa %xmm1, %xmm13 - pcmpeqb %xmm3, %xmm11 - psllq \$4, %xmm3 # 0x80... - - movdqa %xmm2, %xmm14 - movdqa %xmm3, %xmm15 - pxor %xmm5, %xmm8 # "pnot" - pxor %xmm5, %xmm9 - - pand %xmm6, %xmm12 - pand %xmm6, %xmm13 - movdqa %xmm8, 0x00($out) # write bit-sliced round key - pcmpeqb %xmm0, %xmm12 - psrlq \$4, %xmm0 # 0x01... - movdqa %xmm9, 0x10($out) - pcmpeqb %xmm1, %xmm13 - psrlq \$4, %xmm1 # 0x02... - lea 0x10($inp), $inp - - pand %xmm6, %xmm14 - pand %xmm6, %xmm15 - movdqa %xmm10, 0x20($out) - pcmpeqb %xmm2, %xmm14 - psrlq \$4, %xmm2 # 0x04... - movdqa %xmm11, 0x30($out) - pcmpeqb %xmm3, %xmm15 - psrlq \$4, %xmm3 # 0x08... - movdqu ($inp), %xmm6 # load next round key - - pxor %xmm5, %xmm13 # "pnot" - pxor %xmm5, %xmm14 - movdqa %xmm12, 0x40($out) - movdqa %xmm13, 0x50($out) - movdqa %xmm14, 0x60($out) - movdqa %xmm15, 0x70($out) - lea 0x80($out),$out - dec $rounds - jnz .Lkey_loop - - movdqa 0x50($const), %xmm7 # .L63 - #movdqa %xmm6, ($out) # don't save last round key - ret -.size _bsaes_key_convert,.-_bsaes_key_convert -___ -} - -if (0 && !$win64) { # following four functions are unsupported interface - # used for benchmarking... -$code.=<<___; -.globl bsaes_enc_key_convert -.type bsaes_enc_key_convert,\@function,2 -.align 16 -bsaes_enc_key_convert: - mov 240($inp),%r10d # pass rounds - mov $inp,%rcx # pass key - mov $out,%rax # pass key schedule - call _bsaes_key_convert - pxor %xmm6,%xmm7 # fix up last round key - movdqa %xmm7,(%rax) # save last round key - ret -.size bsaes_enc_key_convert,.-bsaes_enc_key_convert - -.globl bsaes_encrypt_128 -.type bsaes_encrypt_128,\@function,4 -.align 16 -bsaes_encrypt_128: -.Lenc128_loop: - movdqu 0x00($inp), @XMM[0] # load input - movdqu 0x10($inp), @XMM[1] - movdqu 0x20($inp), @XMM[2] - movdqu 0x30($inp), @XMM[3] - movdqu 0x40($inp), @XMM[4] - movdqu 0x50($inp), @XMM[5] - movdqu 0x60($inp), @XMM[6] - movdqu 0x70($inp), @XMM[7] - mov $key, %rax # pass the $key - lea 0x80($inp), $inp - mov \$10,%r10d - - call _bsaes_encrypt8 - - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[4], 0x20($out) - movdqu @XMM[6], 0x30($out) - movdqu @XMM[3], 0x40($out) - movdqu @XMM[7], 0x50($out) - movdqu @XMM[2], 0x60($out) - movdqu @XMM[5], 0x70($out) - lea 0x80($out), $out - sub \$0x80,$len - ja .Lenc128_loop - ret -.size bsaes_encrypt_128,.-bsaes_encrypt_128 - -.globl bsaes_dec_key_convert -.type bsaes_dec_key_convert,\@function,2 -.align 16 -bsaes_dec_key_convert: - mov 240($inp),%r10d # pass rounds - mov $inp,%rcx # pass key - mov $out,%rax # pass key schedule - call _bsaes_key_convert - pxor ($out),%xmm7 # fix up round 0 key - movdqa %xmm6,(%rax) # save last round key - movdqa %xmm7,($out) - ret -.size bsaes_dec_key_convert,.-bsaes_dec_key_convert - -.globl bsaes_decrypt_128 -.type bsaes_decrypt_128,\@function,4 -.align 16 -bsaes_decrypt_128: -.Ldec128_loop: - movdqu 0x00($inp), @XMM[0] # load input - movdqu 0x10($inp), @XMM[1] - movdqu 0x20($inp), @XMM[2] - movdqu 0x30($inp), @XMM[3] - movdqu 0x40($inp), @XMM[4] - movdqu 0x50($inp), @XMM[5] - movdqu 0x60($inp), @XMM[6] - movdqu 0x70($inp), @XMM[7] - mov $key, %rax # pass the $key - lea 0x80($inp), $inp - mov \$10,%r10d - - call _bsaes_decrypt8 - - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - movdqu @XMM[4], 0x30($out) - movdqu @XMM[2], 0x40($out) - movdqu @XMM[7], 0x50($out) - movdqu @XMM[3], 0x60($out) - movdqu @XMM[5], 0x70($out) - lea 0x80($out), $out - sub \$0x80,$len - ja .Ldec128_loop - ret -.size bsaes_decrypt_128,.-bsaes_decrypt_128 -___ -} -{ -###################################################################### -# -# OpenSSL interface -# -my ($arg1,$arg2,$arg3,$arg4,$arg5,$arg6)=$win64 ? ("%rcx","%rdx","%r8","%r9","%r10","%r11d") - : ("%rdi","%rsi","%rdx","%rcx","%r8","%r9d"); -my ($inp,$out,$len,$key)=("%r12","%r13","%r14","%r15"); - -if ($ecb) { -$code.=<<___; -.globl bsaes_ecb_encrypt_blocks -.type bsaes_ecb_encrypt_blocks,\@abi-omnipotent -.align 16 -bsaes_ecb_encrypt_blocks: - mov %rsp, %rax -.Lecb_enc_prologue: - push %rbp - push %rbx - push %r12 - push %r13 - push %r14 - push %r15 - lea -0x48(%rsp),%rsp -___ -$code.=<<___ if ($win64); - lea -0xa0(%rsp), %rsp - movaps %xmm6, 0x40(%rsp) - movaps %xmm7, 0x50(%rsp) - movaps %xmm8, 0x60(%rsp) - movaps %xmm9, 0x70(%rsp) - movaps %xmm10, 0x80(%rsp) - movaps %xmm11, 0x90(%rsp) - movaps %xmm12, 0xa0(%rsp) - movaps %xmm13, 0xb0(%rsp) - movaps %xmm14, 0xc0(%rsp) - movaps %xmm15, 0xd0(%rsp) -.Lecb_enc_body: -___ -$code.=<<___; - mov %rsp,%rbp # backup %rsp - mov 240($arg4),%eax # rounds - mov $arg1,$inp # backup arguments - mov $arg2,$out - mov $arg3,$len - mov $arg4,$key - cmp \$8,$arg3 - jb .Lecb_enc_short - - mov %eax,%ebx # backup rounds - shl \$7,%rax # 128 bytes per inner round key - sub \$`128-32`,%rax # size of bit-sliced key schedule - sub %rax,%rsp - mov %rsp,%rax # pass key schedule - mov $key,%rcx # pass key - mov %ebx,%r10d # pass rounds - call _bsaes_key_convert - pxor %xmm6,%xmm7 # fix up last round key - movdqa %xmm7,(%rax) # save last round key - - sub \$8,$len -.Lecb_enc_loop: - movdqu 0x00($inp), @XMM[0] # load input - movdqu 0x10($inp), @XMM[1] - movdqu 0x20($inp), @XMM[2] - movdqu 0x30($inp), @XMM[3] - movdqu 0x40($inp), @XMM[4] - movdqu 0x50($inp), @XMM[5] - mov %rsp, %rax # pass key schedule - movdqu 0x60($inp), @XMM[6] - mov %ebx,%r10d # pass rounds - movdqu 0x70($inp), @XMM[7] - lea 0x80($inp), $inp - - call _bsaes_encrypt8 - - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[4], 0x20($out) - movdqu @XMM[6], 0x30($out) - movdqu @XMM[3], 0x40($out) - movdqu @XMM[7], 0x50($out) - movdqu @XMM[2], 0x60($out) - movdqu @XMM[5], 0x70($out) - lea 0x80($out), $out - sub \$8,$len - jnc .Lecb_enc_loop - - add \$8,$len - jz .Lecb_enc_done - - movdqu 0x00($inp), @XMM[0] # load input - mov %rsp, %rax # pass key schedule - mov %ebx,%r10d # pass rounds - cmp \$2,$len - jb .Lecb_enc_one - movdqu 0x10($inp), @XMM[1] - je .Lecb_enc_two - movdqu 0x20($inp), @XMM[2] - cmp \$4,$len - jb .Lecb_enc_three - movdqu 0x30($inp), @XMM[3] - je .Lecb_enc_four - movdqu 0x40($inp), @XMM[4] - cmp \$6,$len - jb .Lecb_enc_five - movdqu 0x50($inp), @XMM[5] - je .Lecb_enc_six - movdqu 0x60($inp), @XMM[6] - call _bsaes_encrypt8 - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[4], 0x20($out) - movdqu @XMM[6], 0x30($out) - movdqu @XMM[3], 0x40($out) - movdqu @XMM[7], 0x50($out) - movdqu @XMM[2], 0x60($out) - jmp .Lecb_enc_done -.align 16 -.Lecb_enc_six: - call _bsaes_encrypt8 - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[4], 0x20($out) - movdqu @XMM[6], 0x30($out) - movdqu @XMM[3], 0x40($out) - movdqu @XMM[7], 0x50($out) - jmp .Lecb_enc_done -.align 16 -.Lecb_enc_five: - call _bsaes_encrypt8 - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[4], 0x20($out) - movdqu @XMM[6], 0x30($out) - movdqu @XMM[3], 0x40($out) - jmp .Lecb_enc_done -.align 16 -.Lecb_enc_four: - call _bsaes_encrypt8 - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[4], 0x20($out) - movdqu @XMM[6], 0x30($out) - jmp .Lecb_enc_done -.align 16 -.Lecb_enc_three: - call _bsaes_encrypt8 - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[4], 0x20($out) - jmp .Lecb_enc_done -.align 16 -.Lecb_enc_two: - call _bsaes_encrypt8 - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - jmp .Lecb_enc_done -.align 16 -.Lecb_enc_one: - call _bsaes_encrypt8 - movdqu @XMM[0], 0x00($out) # write output - jmp .Lecb_enc_done -.align 16 -.Lecb_enc_short: - lea ($inp), $arg1 - lea ($out), $arg2 - lea ($key), $arg3 - call asm_AES_encrypt - lea 16($inp), $inp - lea 16($out), $out - dec $len - jnz .Lecb_enc_short - -.Lecb_enc_done: - lea (%rsp),%rax - pxor %xmm0, %xmm0 -.Lecb_enc_bzero: # wipe key schedule [if any] - movdqa %xmm0, 0x00(%rax) - movdqa %xmm0, 0x10(%rax) - lea 0x20(%rax), %rax - cmp %rax, %rbp - jb .Lecb_enc_bzero - - lea (%rbp),%rsp # restore %rsp -___ -$code.=<<___ if ($win64); - movaps 0x40(%rbp), %xmm6 - movaps 0x50(%rbp), %xmm7 - movaps 0x60(%rbp), %xmm8 - movaps 0x70(%rbp), %xmm9 - movaps 0x80(%rbp), %xmm10 - movaps 0x90(%rbp), %xmm11 - movaps 0xa0(%rbp), %xmm12 - movaps 0xb0(%rbp), %xmm13 - movaps 0xc0(%rbp), %xmm14 - movaps 0xd0(%rbp), %xmm15 - lea 0xa0(%rbp), %rsp -___ -$code.=<<___; - mov 0x48(%rsp), %r15 - mov 0x50(%rsp), %r14 - mov 0x58(%rsp), %r13 - mov 0x60(%rsp), %r12 - mov 0x68(%rsp), %rbx - mov 0x70(%rsp), %rax - lea 0x78(%rsp), %rsp - mov %rax, %rbp -.Lecb_enc_epilogue: - ret -.size bsaes_ecb_encrypt_blocks,.-bsaes_ecb_encrypt_blocks - -.globl bsaes_ecb_decrypt_blocks -.type bsaes_ecb_decrypt_blocks,\@abi-omnipotent -.align 16 -bsaes_ecb_decrypt_blocks: - mov %rsp, %rax -.Lecb_dec_prologue: - push %rbp - push %rbx - push %r12 - push %r13 - push %r14 - push %r15 - lea -0x48(%rsp),%rsp -___ -$code.=<<___ if ($win64); - lea -0xa0(%rsp), %rsp - movaps %xmm6, 0x40(%rsp) - movaps %xmm7, 0x50(%rsp) - movaps %xmm8, 0x60(%rsp) - movaps %xmm9, 0x70(%rsp) - movaps %xmm10, 0x80(%rsp) - movaps %xmm11, 0x90(%rsp) - movaps %xmm12, 0xa0(%rsp) - movaps %xmm13, 0xb0(%rsp) - movaps %xmm14, 0xc0(%rsp) - movaps %xmm15, 0xd0(%rsp) -.Lecb_dec_body: -___ -$code.=<<___; - mov %rsp,%rbp # backup %rsp - mov 240($arg4),%eax # rounds - mov $arg1,$inp # backup arguments - mov $arg2,$out - mov $arg3,$len - mov $arg4,$key - cmp \$8,$arg3 - jb .Lecb_dec_short - - mov %eax,%ebx # backup rounds - shl \$7,%rax # 128 bytes per inner round key - sub \$`128-32`,%rax # size of bit-sliced key schedule - sub %rax,%rsp - mov %rsp,%rax # pass key schedule - mov $key,%rcx # pass key - mov %ebx,%r10d # pass rounds - call _bsaes_key_convert - pxor (%rsp),%xmm7 # fix up 0 round key - movdqa %xmm6,(%rax) # save last round key - movdqa %xmm7,(%rsp) - - sub \$8,$len -.Lecb_dec_loop: - movdqu 0x00($inp), @XMM[0] # load input - movdqu 0x10($inp), @XMM[1] - movdqu 0x20($inp), @XMM[2] - movdqu 0x30($inp), @XMM[3] - movdqu 0x40($inp), @XMM[4] - movdqu 0x50($inp), @XMM[5] - mov %rsp, %rax # pass key schedule - movdqu 0x60($inp), @XMM[6] - mov %ebx,%r10d # pass rounds - movdqu 0x70($inp), @XMM[7] - lea 0x80($inp), $inp - - call _bsaes_decrypt8 - - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - movdqu @XMM[4], 0x30($out) - movdqu @XMM[2], 0x40($out) - movdqu @XMM[7], 0x50($out) - movdqu @XMM[3], 0x60($out) - movdqu @XMM[5], 0x70($out) - lea 0x80($out), $out - sub \$8,$len - jnc .Lecb_dec_loop - - add \$8,$len - jz .Lecb_dec_done - - movdqu 0x00($inp), @XMM[0] # load input - mov %rsp, %rax # pass key schedule - mov %ebx,%r10d # pass rounds - cmp \$2,$len - jb .Lecb_dec_one - movdqu 0x10($inp), @XMM[1] - je .Lecb_dec_two - movdqu 0x20($inp), @XMM[2] - cmp \$4,$len - jb .Lecb_dec_three - movdqu 0x30($inp), @XMM[3] - je .Lecb_dec_four - movdqu 0x40($inp), @XMM[4] - cmp \$6,$len - jb .Lecb_dec_five - movdqu 0x50($inp), @XMM[5] - je .Lecb_dec_six - movdqu 0x60($inp), @XMM[6] - call _bsaes_decrypt8 - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - movdqu @XMM[4], 0x30($out) - movdqu @XMM[2], 0x40($out) - movdqu @XMM[7], 0x50($out) - movdqu @XMM[3], 0x60($out) - jmp .Lecb_dec_done -.align 16 -.Lecb_dec_six: - call _bsaes_decrypt8 - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - movdqu @XMM[4], 0x30($out) - movdqu @XMM[2], 0x40($out) - movdqu @XMM[7], 0x50($out) - jmp .Lecb_dec_done -.align 16 -.Lecb_dec_five: - call _bsaes_decrypt8 - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - movdqu @XMM[4], 0x30($out) - movdqu @XMM[2], 0x40($out) - jmp .Lecb_dec_done -.align 16 -.Lecb_dec_four: - call _bsaes_decrypt8 - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - movdqu @XMM[4], 0x30($out) - jmp .Lecb_dec_done -.align 16 -.Lecb_dec_three: - call _bsaes_decrypt8 - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - jmp .Lecb_dec_done -.align 16 -.Lecb_dec_two: - call _bsaes_decrypt8 - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - jmp .Lecb_dec_done -.align 16 -.Lecb_dec_one: - call _bsaes_decrypt8 - movdqu @XMM[0], 0x00($out) # write output - jmp .Lecb_dec_done -.align 16 -.Lecb_dec_short: - lea ($inp), $arg1 - lea ($out), $arg2 - lea ($key), $arg3 - call asm_AES_decrypt - lea 16($inp), $inp - lea 16($out), $out - dec $len - jnz .Lecb_dec_short - -.Lecb_dec_done: - lea (%rsp),%rax - pxor %xmm0, %xmm0 -.Lecb_dec_bzero: # wipe key schedule [if any] - movdqa %xmm0, 0x00(%rax) - movdqa %xmm0, 0x10(%rax) - lea 0x20(%rax), %rax - cmp %rax, %rbp - jb .Lecb_dec_bzero - - lea (%rbp),%rsp # restore %rsp -___ -$code.=<<___ if ($win64); - movaps 0x40(%rbp), %xmm6 - movaps 0x50(%rbp), %xmm7 - movaps 0x60(%rbp), %xmm8 - movaps 0x70(%rbp), %xmm9 - movaps 0x80(%rbp), %xmm10 - movaps 0x90(%rbp), %xmm11 - movaps 0xa0(%rbp), %xmm12 - movaps 0xb0(%rbp), %xmm13 - movaps 0xc0(%rbp), %xmm14 - movaps 0xd0(%rbp), %xmm15 - lea 0xa0(%rbp), %rsp -___ -$code.=<<___; - mov 0x48(%rsp), %r15 - mov 0x50(%rsp), %r14 - mov 0x58(%rsp), %r13 - mov 0x60(%rsp), %r12 - mov 0x68(%rsp), %rbx - mov 0x70(%rsp), %rax - lea 0x78(%rsp), %rsp - mov %rax, %rbp -.Lecb_dec_epilogue: - ret -.size bsaes_ecb_decrypt_blocks,.-bsaes_ecb_decrypt_blocks -___ -} -$code.=<<___; -.extern asm_AES_cbc_encrypt -.globl bsaes_cbc_encrypt -.type bsaes_cbc_encrypt,\@abi-omnipotent -.align 16 -bsaes_cbc_encrypt: -___ -$code.=<<___ if ($win64); - mov 48(%rsp),$arg6 # pull direction flag -___ -$code.=<<___; - cmp \$0,$arg6 - jne asm_AES_cbc_encrypt - cmp \$128,$arg3 - jb asm_AES_cbc_encrypt - - mov %rsp, %rax -.Lcbc_dec_prologue: - push %rbp - push %rbx - push %r12 - push %r13 - push %r14 - push %r15 - lea -0x48(%rsp), %rsp -___ -$code.=<<___ if ($win64); - mov 0xa0(%rsp),$arg5 # pull ivp - lea -0xa0(%rsp), %rsp - movaps %xmm6, 0x40(%rsp) - movaps %xmm7, 0x50(%rsp) - movaps %xmm8, 0x60(%rsp) - movaps %xmm9, 0x70(%rsp) - movaps %xmm10, 0x80(%rsp) - movaps %xmm11, 0x90(%rsp) - movaps %xmm12, 0xa0(%rsp) - movaps %xmm13, 0xb0(%rsp) - movaps %xmm14, 0xc0(%rsp) - movaps %xmm15, 0xd0(%rsp) -.Lcbc_dec_body: -___ -$code.=<<___; - mov %rsp, %rbp # backup %rsp - mov 240($arg4), %eax # rounds - mov $arg1, $inp # backup arguments - mov $arg2, $out - mov $arg3, $len - mov $arg4, $key - mov $arg5, %rbx - shr \$4, $len # bytes to blocks - - mov %eax, %edx # rounds - shl \$7, %rax # 128 bytes per inner round key - sub \$`128-32`, %rax # size of bit-sliced key schedule - sub %rax, %rsp - - mov %rsp, %rax # pass key schedule - mov $key, %rcx # pass key - mov %edx, %r10d # pass rounds - call _bsaes_key_convert - pxor (%rsp),%xmm7 # fix up 0 round key - movdqa %xmm6,(%rax) # save last round key - movdqa %xmm7,(%rsp) - - movdqu (%rbx), @XMM[15] # load IV - sub \$8,$len -.Lcbc_dec_loop: - movdqu 0x00($inp), @XMM[0] # load input - movdqu 0x10($inp), @XMM[1] - movdqu 0x20($inp), @XMM[2] - movdqu 0x30($inp), @XMM[3] - movdqu 0x40($inp), @XMM[4] - movdqu 0x50($inp), @XMM[5] - mov %rsp, %rax # pass key schedule - movdqu 0x60($inp), @XMM[6] - mov %edx,%r10d # pass rounds - movdqu 0x70($inp), @XMM[7] - movdqa @XMM[15], 0x20(%rbp) # put aside IV - - call _bsaes_decrypt8 - - pxor 0x20(%rbp), @XMM[0] # ^= IV - movdqu 0x00($inp), @XMM[8] # re-load input - movdqu 0x10($inp), @XMM[9] - pxor @XMM[8], @XMM[1] - movdqu 0x20($inp), @XMM[10] - pxor @XMM[9], @XMM[6] - movdqu 0x30($inp), @XMM[11] - pxor @XMM[10], @XMM[4] - movdqu 0x40($inp), @XMM[12] - pxor @XMM[11], @XMM[2] - movdqu 0x50($inp), @XMM[13] - pxor @XMM[12], @XMM[7] - movdqu 0x60($inp), @XMM[14] - pxor @XMM[13], @XMM[3] - movdqu 0x70($inp), @XMM[15] # IV - pxor @XMM[14], @XMM[5] - movdqu @XMM[0], 0x00($out) # write output - lea 0x80($inp), $inp - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - movdqu @XMM[4], 0x30($out) - movdqu @XMM[2], 0x40($out) - movdqu @XMM[7], 0x50($out) - movdqu @XMM[3], 0x60($out) - movdqu @XMM[5], 0x70($out) - lea 0x80($out), $out - sub \$8,$len - jnc .Lcbc_dec_loop - - add \$8,$len - jz .Lcbc_dec_done - - movdqu 0x00($inp), @XMM[0] # load input - mov %rsp, %rax # pass key schedule - mov %edx, %r10d # pass rounds - cmp \$2,$len - jb .Lcbc_dec_one - movdqu 0x10($inp), @XMM[1] - je .Lcbc_dec_two - movdqu 0x20($inp), @XMM[2] - cmp \$4,$len - jb .Lcbc_dec_three - movdqu 0x30($inp), @XMM[3] - je .Lcbc_dec_four - movdqu 0x40($inp), @XMM[4] - cmp \$6,$len - jb .Lcbc_dec_five - movdqu 0x50($inp), @XMM[5] - je .Lcbc_dec_six - movdqu 0x60($inp), @XMM[6] - movdqa @XMM[15], 0x20(%rbp) # put aside IV - call _bsaes_decrypt8 - pxor 0x20(%rbp), @XMM[0] # ^= IV - movdqu 0x00($inp), @XMM[8] # re-load input - movdqu 0x10($inp), @XMM[9] - pxor @XMM[8], @XMM[1] - movdqu 0x20($inp), @XMM[10] - pxor @XMM[9], @XMM[6] - movdqu 0x30($inp), @XMM[11] - pxor @XMM[10], @XMM[4] - movdqu 0x40($inp), @XMM[12] - pxor @XMM[11], @XMM[2] - movdqu 0x50($inp), @XMM[13] - pxor @XMM[12], @XMM[7] - movdqu 0x60($inp), @XMM[15] # IV - pxor @XMM[13], @XMM[3] - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - movdqu @XMM[4], 0x30($out) - movdqu @XMM[2], 0x40($out) - movdqu @XMM[7], 0x50($out) - movdqu @XMM[3], 0x60($out) - jmp .Lcbc_dec_done -.align 16 -.Lcbc_dec_six: - movdqa @XMM[15], 0x20(%rbp) # put aside IV - call _bsaes_decrypt8 - pxor 0x20(%rbp), @XMM[0] # ^= IV - movdqu 0x00($inp), @XMM[8] # re-load input - movdqu 0x10($inp), @XMM[9] - pxor @XMM[8], @XMM[1] - movdqu 0x20($inp), @XMM[10] - pxor @XMM[9], @XMM[6] - movdqu 0x30($inp), @XMM[11] - pxor @XMM[10], @XMM[4] - movdqu 0x40($inp), @XMM[12] - pxor @XMM[11], @XMM[2] - movdqu 0x50($inp), @XMM[15] # IV - pxor @XMM[12], @XMM[7] - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - movdqu @XMM[4], 0x30($out) - movdqu @XMM[2], 0x40($out) - movdqu @XMM[7], 0x50($out) - jmp .Lcbc_dec_done -.align 16 -.Lcbc_dec_five: - movdqa @XMM[15], 0x20(%rbp) # put aside IV - call _bsaes_decrypt8 - pxor 0x20(%rbp), @XMM[0] # ^= IV - movdqu 0x00($inp), @XMM[8] # re-load input - movdqu 0x10($inp), @XMM[9] - pxor @XMM[8], @XMM[1] - movdqu 0x20($inp), @XMM[10] - pxor @XMM[9], @XMM[6] - movdqu 0x30($inp), @XMM[11] - pxor @XMM[10], @XMM[4] - movdqu 0x40($inp), @XMM[15] # IV - pxor @XMM[11], @XMM[2] - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - movdqu @XMM[4], 0x30($out) - movdqu @XMM[2], 0x40($out) - jmp .Lcbc_dec_done -.align 16 -.Lcbc_dec_four: - movdqa @XMM[15], 0x20(%rbp) # put aside IV - call _bsaes_decrypt8 - pxor 0x20(%rbp), @XMM[0] # ^= IV - movdqu 0x00($inp), @XMM[8] # re-load input - movdqu 0x10($inp), @XMM[9] - pxor @XMM[8], @XMM[1] - movdqu 0x20($inp), @XMM[10] - pxor @XMM[9], @XMM[6] - movdqu 0x30($inp), @XMM[15] # IV - pxor @XMM[10], @XMM[4] - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - movdqu @XMM[4], 0x30($out) - jmp .Lcbc_dec_done -.align 16 -.Lcbc_dec_three: - movdqa @XMM[15], 0x20(%rbp) # put aside IV - call _bsaes_decrypt8 - pxor 0x20(%rbp), @XMM[0] # ^= IV - movdqu 0x00($inp), @XMM[8] # re-load input - movdqu 0x10($inp), @XMM[9] - pxor @XMM[8], @XMM[1] - movdqu 0x20($inp), @XMM[15] # IV - pxor @XMM[9], @XMM[6] - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - jmp .Lcbc_dec_done -.align 16 -.Lcbc_dec_two: - movdqa @XMM[15], 0x20(%rbp) # put aside IV - call _bsaes_decrypt8 - pxor 0x20(%rbp), @XMM[0] # ^= IV - movdqu 0x00($inp), @XMM[8] # re-load input - movdqu 0x10($inp), @XMM[15] # IV - pxor @XMM[8], @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - jmp .Lcbc_dec_done -.align 16 -.Lcbc_dec_one: - lea ($inp), $arg1 - lea 0x20(%rbp), $arg2 # buffer output - lea ($key), $arg3 - call asm_AES_decrypt # doesn't touch %xmm - pxor 0x20(%rbp), @XMM[15] # ^= IV - movdqu @XMM[15], ($out) # write output - movdqa @XMM[0], @XMM[15] # IV - -.Lcbc_dec_done: - movdqu @XMM[15], (%rbx) # return IV - lea (%rsp), %rax - pxor %xmm0, %xmm0 -.Lcbc_dec_bzero: # wipe key schedule [if any] - movdqa %xmm0, 0x00(%rax) - movdqa %xmm0, 0x10(%rax) - lea 0x20(%rax), %rax - cmp %rax, %rbp - ja .Lcbc_dec_bzero - - lea (%rbp),%rsp # restore %rsp -___ -$code.=<<___ if ($win64); - movaps 0x40(%rbp), %xmm6 - movaps 0x50(%rbp), %xmm7 - movaps 0x60(%rbp), %xmm8 - movaps 0x70(%rbp), %xmm9 - movaps 0x80(%rbp), %xmm10 - movaps 0x90(%rbp), %xmm11 - movaps 0xa0(%rbp), %xmm12 - movaps 0xb0(%rbp), %xmm13 - movaps 0xc0(%rbp), %xmm14 - movaps 0xd0(%rbp), %xmm15 - lea 0xa0(%rbp), %rsp -___ -$code.=<<___; - mov 0x48(%rsp), %r15 - mov 0x50(%rsp), %r14 - mov 0x58(%rsp), %r13 - mov 0x60(%rsp), %r12 - mov 0x68(%rsp), %rbx - mov 0x70(%rsp), %rax - lea 0x78(%rsp), %rsp - mov %rax, %rbp -.Lcbc_dec_epilogue: - ret -.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt - -.globl bsaes_ctr32_encrypt_blocks -.type bsaes_ctr32_encrypt_blocks,\@abi-omnipotent -.align 16 -bsaes_ctr32_encrypt_blocks: - mov %rsp, %rax -.Lctr_enc_prologue: - push %rbp - push %rbx - push %r12 - push %r13 - push %r14 - push %r15 - lea -0x48(%rsp), %rsp -___ -$code.=<<___ if ($win64); - mov 0xa0(%rsp),$arg5 # pull ivp - lea -0xa0(%rsp), %rsp - movaps %xmm6, 0x40(%rsp) - movaps %xmm7, 0x50(%rsp) - movaps %xmm8, 0x60(%rsp) - movaps %xmm9, 0x70(%rsp) - movaps %xmm10, 0x80(%rsp) - movaps %xmm11, 0x90(%rsp) - movaps %xmm12, 0xa0(%rsp) - movaps %xmm13, 0xb0(%rsp) - movaps %xmm14, 0xc0(%rsp) - movaps %xmm15, 0xd0(%rsp) -.Lctr_enc_body: -___ -$code.=<<___; - mov %rsp, %rbp # backup %rsp - movdqu ($arg5), %xmm0 # load counter - mov 240($arg4), %eax # rounds - mov $arg1, $inp # backup arguments - mov $arg2, $out - mov $arg3, $len - mov $arg4, $key - movdqa %xmm0, 0x20(%rbp) # copy counter - cmp \$8, $arg3 - jb .Lctr_enc_short - - mov %eax, %ebx # rounds - shl \$7, %rax # 128 bytes per inner round key - sub \$`128-32`, %rax # size of bit-sliced key schedule - sub %rax, %rsp - - mov %rsp, %rax # pass key schedule - mov $key, %rcx # pass key - mov %ebx, %r10d # pass rounds - call _bsaes_key_convert - pxor %xmm6,%xmm7 # fix up last round key - movdqa %xmm7,(%rax) # save last round key - - movdqa (%rsp), @XMM[9] # load round0 key - lea .LADD1(%rip), %r11 - movdqa 0x20(%rbp), @XMM[0] # counter copy - movdqa -0x20(%r11), @XMM[8] # .LSWPUP - pshufb @XMM[8], @XMM[9] # byte swap upper part - pshufb @XMM[8], @XMM[0] - movdqa @XMM[9], (%rsp) # save adjusted round0 key - jmp .Lctr_enc_loop -.align 16 -.Lctr_enc_loop: - movdqa @XMM[0], 0x20(%rbp) # save counter - movdqa @XMM[0], @XMM[1] # prepare 8 counter values - movdqa @XMM[0], @XMM[2] - paddd 0x00(%r11), @XMM[1] # .LADD1 - movdqa @XMM[0], @XMM[3] - paddd 0x10(%r11), @XMM[2] # .LADD2 - movdqa @XMM[0], @XMM[4] - paddd 0x20(%r11), @XMM[3] # .LADD3 - movdqa @XMM[0], @XMM[5] - paddd 0x30(%r11), @XMM[4] # .LADD4 - movdqa @XMM[0], @XMM[6] - paddd 0x40(%r11), @XMM[5] # .LADD5 - movdqa @XMM[0], @XMM[7] - paddd 0x50(%r11), @XMM[6] # .LADD6 - paddd 0x60(%r11), @XMM[7] # .LADD7 - - # Borrow prologue from _bsaes_encrypt8 to use the opportunity - # to flip byte order in 32-bit counter - movdqa (%rsp), @XMM[9] # round 0 key - lea 0x10(%rsp), %rax # pass key schedule - movdqa -0x10(%r11), @XMM[8] # .LSWPUPM0SR - pxor @XMM[9], @XMM[0] # xor with round0 key - pxor @XMM[9], @XMM[1] - pshufb @XMM[8], @XMM[0] - pxor @XMM[9], @XMM[2] - pshufb @XMM[8], @XMM[1] - pxor @XMM[9], @XMM[3] - pshufb @XMM[8], @XMM[2] - pxor @XMM[9], @XMM[4] - pshufb @XMM[8], @XMM[3] - pxor @XMM[9], @XMM[5] - pshufb @XMM[8], @XMM[4] - pxor @XMM[9], @XMM[6] - pshufb @XMM[8], @XMM[5] - pxor @XMM[9], @XMM[7] - pshufb @XMM[8], @XMM[6] - lea .LBS0(%rip), %r11 # constants table - pshufb @XMM[8], @XMM[7] - mov %ebx,%r10d # pass rounds - - call _bsaes_encrypt8_bitslice - - sub \$8,$len - jc .Lctr_enc_loop_done - - movdqu 0x00($inp), @XMM[8] # load input - movdqu 0x10($inp), @XMM[9] - movdqu 0x20($inp), @XMM[10] - movdqu 0x30($inp), @XMM[11] - movdqu 0x40($inp), @XMM[12] - movdqu 0x50($inp), @XMM[13] - movdqu 0x60($inp), @XMM[14] - movdqu 0x70($inp), @XMM[15] - lea 0x80($inp),$inp - pxor @XMM[0], @XMM[8] - movdqa 0x20(%rbp), @XMM[0] # load counter - pxor @XMM[9], @XMM[1] - movdqu @XMM[8], 0x00($out) # write output - pxor @XMM[10], @XMM[4] - movdqu @XMM[1], 0x10($out) - pxor @XMM[11], @XMM[6] - movdqu @XMM[4], 0x20($out) - pxor @XMM[12], @XMM[3] - movdqu @XMM[6], 0x30($out) - pxor @XMM[13], @XMM[7] - movdqu @XMM[3], 0x40($out) - pxor @XMM[14], @XMM[2] - movdqu @XMM[7], 0x50($out) - pxor @XMM[15], @XMM[5] - movdqu @XMM[2], 0x60($out) - lea .LADD1(%rip), %r11 - movdqu @XMM[5], 0x70($out) - lea 0x80($out), $out - paddd 0x70(%r11), @XMM[0] # .LADD8 - jnz .Lctr_enc_loop - - jmp .Lctr_enc_done -.align 16 -.Lctr_enc_loop_done: - add \$8, $len - movdqu 0x00($inp), @XMM[8] # load input - pxor @XMM[8], @XMM[0] - movdqu @XMM[0], 0x00($out) # write output - cmp \$2,$len - jb .Lctr_enc_done - movdqu 0x10($inp), @XMM[9] - pxor @XMM[9], @XMM[1] - movdqu @XMM[1], 0x10($out) - je .Lctr_enc_done - movdqu 0x20($inp), @XMM[10] - pxor @XMM[10], @XMM[4] - movdqu @XMM[4], 0x20($out) - cmp \$4,$len - jb .Lctr_enc_done - movdqu 0x30($inp), @XMM[11] - pxor @XMM[11], @XMM[6] - movdqu @XMM[6], 0x30($out) - je .Lctr_enc_done - movdqu 0x40($inp), @XMM[12] - pxor @XMM[12], @XMM[3] - movdqu @XMM[3], 0x40($out) - cmp \$6,$len - jb .Lctr_enc_done - movdqu 0x50($inp), @XMM[13] - pxor @XMM[13], @XMM[7] - movdqu @XMM[7], 0x50($out) - je .Lctr_enc_done - movdqu 0x60($inp), @XMM[14] - pxor @XMM[14], @XMM[2] - movdqu @XMM[2], 0x60($out) - jmp .Lctr_enc_done - -.align 16 -.Lctr_enc_short: - lea 0x20(%rbp), $arg1 - lea 0x30(%rbp), $arg2 - lea ($key), $arg3 - call asm_AES_encrypt - movdqu ($inp), @XMM[1] - lea 16($inp), $inp - mov 0x2c(%rbp), %eax # load 32-bit counter - bswap %eax - pxor 0x30(%rbp), @XMM[1] - inc %eax # increment - movdqu @XMM[1], ($out) - bswap %eax - lea 16($out), $out - mov %eax, 0x2c(%rsp) # save 32-bit counter - dec $len - jnz .Lctr_enc_short - -.Lctr_enc_done: - lea (%rsp), %rax - pxor %xmm0, %xmm0 -.Lctr_enc_bzero: # wipe key schedule [if any] - movdqa %xmm0, 0x00(%rax) - movdqa %xmm0, 0x10(%rax) - lea 0x20(%rax), %rax - cmp %rax, %rbp - ja .Lctr_enc_bzero - - lea (%rbp),%rsp # restore %rsp -___ -$code.=<<___ if ($win64); - movaps 0x40(%rbp), %xmm6 - movaps 0x50(%rbp), %xmm7 - movaps 0x60(%rbp), %xmm8 - movaps 0x70(%rbp), %xmm9 - movaps 0x80(%rbp), %xmm10 - movaps 0x90(%rbp), %xmm11 - movaps 0xa0(%rbp), %xmm12 - movaps 0xb0(%rbp), %xmm13 - movaps 0xc0(%rbp), %xmm14 - movaps 0xd0(%rbp), %xmm15 - lea 0xa0(%rbp), %rsp -___ -$code.=<<___; - mov 0x48(%rsp), %r15 - mov 0x50(%rsp), %r14 - mov 0x58(%rsp), %r13 - mov 0x60(%rsp), %r12 - mov 0x68(%rsp), %rbx - mov 0x70(%rsp), %rax - lea 0x78(%rsp), %rsp - mov %rax, %rbp -.Lctr_enc_epilogue: - ret -.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks -___ -###################################################################### -# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len, -# const AES_KEY *key1, const AES_KEY *key2, -# const unsigned char iv[16]); -# -my ($twmask,$twres,$twtmp)=@XMM[13..15]; -$arg6=~s/d$//; - -$code.=<<___; -.globl bsaes_xts_encrypt -.type bsaes_xts_encrypt,\@abi-omnipotent -.align 16 -bsaes_xts_encrypt: - mov %rsp, %rax -.Lxts_enc_prologue: - push %rbp - push %rbx - push %r12 - push %r13 - push %r14 - push %r15 - lea -0x48(%rsp), %rsp -___ -$code.=<<___ if ($win64); - mov 0xa0(%rsp),$arg5 # pull key2 - mov 0xa8(%rsp),$arg6 # pull ivp - lea -0xa0(%rsp), %rsp - movaps %xmm6, 0x40(%rsp) - movaps %xmm7, 0x50(%rsp) - movaps %xmm8, 0x60(%rsp) - movaps %xmm9, 0x70(%rsp) - movaps %xmm10, 0x80(%rsp) - movaps %xmm11, 0x90(%rsp) - movaps %xmm12, 0xa0(%rsp) - movaps %xmm13, 0xb0(%rsp) - movaps %xmm14, 0xc0(%rsp) - movaps %xmm15, 0xd0(%rsp) -.Lxts_enc_body: -___ -$code.=<<___; - mov %rsp, %rbp # backup %rsp - mov $arg1, $inp # backup arguments - mov $arg2, $out - mov $arg3, $len - mov $arg4, $key - - lea ($arg6), $arg1 - lea 0x20(%rbp), $arg2 - lea ($arg5), $arg3 - call asm_AES_encrypt # generate initial tweak - - mov 240($key), %eax # rounds - mov $len, %rbx # backup $len - - mov %eax, %edx # rounds - shl \$7, %rax # 128 bytes per inner round key - sub \$`128-32`, %rax # size of bit-sliced key schedule - sub %rax, %rsp - - mov %rsp, %rax # pass key schedule - mov $key, %rcx # pass key - mov %edx, %r10d # pass rounds - call _bsaes_key_convert - pxor %xmm6, %xmm7 # fix up last round key - movdqa %xmm7, (%rax) # save last round key - - and \$-16, $len - sub \$0x80, %rsp # place for tweak[8] - movdqa 0x20(%rbp), @XMM[7] # initial tweak - - pxor $twtmp, $twtmp - movdqa .Lxts_magic(%rip), $twmask - pcmpgtd @XMM[7], $twtmp # broadcast upper bits - - sub \$0x80, $len - jc .Lxts_enc_short - jmp .Lxts_enc_loop - -.align 16 -.Lxts_enc_loop: -___ - for ($i=0;$i<7;$i++) { - $code.=<<___; - pshufd \$0x13, $twtmp, $twres - pxor $twtmp, $twtmp - movdqa @XMM[7], @XMM[$i] - movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i] - paddq @XMM[7], @XMM[7] # psllq 1,$tweak - pand $twmask, $twres # isolate carry and residue - pcmpgtd @XMM[7], $twtmp # broadcast upper bits - pxor $twres, @XMM[7] -___ - $code.=<<___ if ($i>=1); - movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1] -___ - $code.=<<___ if ($i>=2); - pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[] -___ - } -$code.=<<___; - movdqu 0x60($inp), @XMM[8+6] - pxor @XMM[8+5], @XMM[5] - movdqu 0x70($inp), @XMM[8+7] - lea 0x80($inp), $inp - movdqa @XMM[7], 0x70(%rsp) - pxor @XMM[8+6], @XMM[6] - lea 0x80(%rsp), %rax # pass key schedule - pxor @XMM[8+7], @XMM[7] - mov %edx, %r10d # pass rounds - - call _bsaes_encrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - pxor 0x20(%rsp), @XMM[4] - movdqu @XMM[1], 0x10($out) - pxor 0x30(%rsp), @XMM[6] - movdqu @XMM[4], 0x20($out) - pxor 0x40(%rsp), @XMM[3] - movdqu @XMM[6], 0x30($out) - pxor 0x50(%rsp), @XMM[7] - movdqu @XMM[3], 0x40($out) - pxor 0x60(%rsp), @XMM[2] - movdqu @XMM[7], 0x50($out) - pxor 0x70(%rsp), @XMM[5] - movdqu @XMM[2], 0x60($out) - movdqu @XMM[5], 0x70($out) - lea 0x80($out), $out - - movdqa 0x70(%rsp), @XMM[7] # prepare next iteration tweak - pxor $twtmp, $twtmp - movdqa .Lxts_magic(%rip), $twmask - pcmpgtd @XMM[7], $twtmp - pshufd \$0x13, $twtmp, $twres - pxor $twtmp, $twtmp - paddq @XMM[7], @XMM[7] # psllq 1,$tweak - pand $twmask, $twres # isolate carry and residue - pcmpgtd @XMM[7], $twtmp # broadcast upper bits - pxor $twres, @XMM[7] - - sub \$0x80,$len - jnc .Lxts_enc_loop - -.Lxts_enc_short: - add \$0x80, $len - jz .Lxts_enc_done -___ - for ($i=0;$i<7;$i++) { - $code.=<<___; - pshufd \$0x13, $twtmp, $twres - pxor $twtmp, $twtmp - movdqa @XMM[7], @XMM[$i] - movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i] - paddq @XMM[7], @XMM[7] # psllq 1,$tweak - pand $twmask, $twres # isolate carry and residue - pcmpgtd @XMM[7], $twtmp # broadcast upper bits - pxor $twres, @XMM[7] -___ - $code.=<<___ if ($i>=1); - movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1] - cmp \$`0x10*$i`,$len - je .Lxts_enc_$i -___ - $code.=<<___ if ($i>=2); - pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[] -___ - } -$code.=<<___; - movdqu 0x60($inp), @XMM[8+6] - pxor @XMM[8+5], @XMM[5] - movdqa @XMM[7], 0x70(%rsp) - lea 0x70($inp), $inp - pxor @XMM[8+6], @XMM[6] - lea 0x80(%rsp), %rax # pass key schedule - mov %edx, %r10d # pass rounds - - call _bsaes_encrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - pxor 0x20(%rsp), @XMM[4] - movdqu @XMM[1], 0x10($out) - pxor 0x30(%rsp), @XMM[6] - movdqu @XMM[4], 0x20($out) - pxor 0x40(%rsp), @XMM[3] - movdqu @XMM[6], 0x30($out) - pxor 0x50(%rsp), @XMM[7] - movdqu @XMM[3], 0x40($out) - pxor 0x60(%rsp), @XMM[2] - movdqu @XMM[7], 0x50($out) - movdqu @XMM[2], 0x60($out) - lea 0x70($out), $out - - movdqa 0x70(%rsp), @XMM[7] # next iteration tweak - jmp .Lxts_enc_done -.align 16 -.Lxts_enc_6: - pxor @XMM[8+4], @XMM[4] - lea 0x60($inp), $inp - pxor @XMM[8+5], @XMM[5] - lea 0x80(%rsp), %rax # pass key schedule - mov %edx, %r10d # pass rounds - - call _bsaes_encrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - pxor 0x20(%rsp), @XMM[4] - movdqu @XMM[1], 0x10($out) - pxor 0x30(%rsp), @XMM[6] - movdqu @XMM[4], 0x20($out) - pxor 0x40(%rsp), @XMM[3] - movdqu @XMM[6], 0x30($out) - pxor 0x50(%rsp), @XMM[7] - movdqu @XMM[3], 0x40($out) - movdqu @XMM[7], 0x50($out) - lea 0x60($out), $out - - movdqa 0x60(%rsp), @XMM[7] # next iteration tweak - jmp .Lxts_enc_done -.align 16 -.Lxts_enc_5: - pxor @XMM[8+3], @XMM[3] - lea 0x50($inp), $inp - pxor @XMM[8+4], @XMM[4] - lea 0x80(%rsp), %rax # pass key schedule - mov %edx, %r10d # pass rounds - - call _bsaes_encrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - pxor 0x20(%rsp), @XMM[4] - movdqu @XMM[1], 0x10($out) - pxor 0x30(%rsp), @XMM[6] - movdqu @XMM[4], 0x20($out) - pxor 0x40(%rsp), @XMM[3] - movdqu @XMM[6], 0x30($out) - movdqu @XMM[3], 0x40($out) - lea 0x50($out), $out - - movdqa 0x50(%rsp), @XMM[7] # next iteration tweak - jmp .Lxts_enc_done -.align 16 -.Lxts_enc_4: - pxor @XMM[8+2], @XMM[2] - lea 0x40($inp), $inp - pxor @XMM[8+3], @XMM[3] - lea 0x80(%rsp), %rax # pass key schedule - mov %edx, %r10d # pass rounds - - call _bsaes_encrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - pxor 0x20(%rsp), @XMM[4] - movdqu @XMM[1], 0x10($out) - pxor 0x30(%rsp), @XMM[6] - movdqu @XMM[4], 0x20($out) - movdqu @XMM[6], 0x30($out) - lea 0x40($out), $out - - movdqa 0x40(%rsp), @XMM[7] # next iteration tweak - jmp .Lxts_enc_done -.align 16 -.Lxts_enc_3: - pxor @XMM[8+1], @XMM[1] - lea 0x30($inp), $inp - pxor @XMM[8+2], @XMM[2] - lea 0x80(%rsp), %rax # pass key schedule - mov %edx, %r10d # pass rounds - - call _bsaes_encrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - pxor 0x20(%rsp), @XMM[4] - movdqu @XMM[1], 0x10($out) - movdqu @XMM[4], 0x20($out) - lea 0x30($out), $out - - movdqa 0x30(%rsp), @XMM[7] # next iteration tweak - jmp .Lxts_enc_done -.align 16 -.Lxts_enc_2: - pxor @XMM[8+0], @XMM[0] - lea 0x20($inp), $inp - pxor @XMM[8+1], @XMM[1] - lea 0x80(%rsp), %rax # pass key schedule - mov %edx, %r10d # pass rounds - - call _bsaes_encrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - lea 0x20($out), $out - - movdqa 0x20(%rsp), @XMM[7] # next iteration tweak - jmp .Lxts_enc_done -.align 16 -.Lxts_enc_1: - pxor @XMM[0], @XMM[8] - lea 0x10($inp), $inp - movdqa @XMM[8], 0x20(%rbp) - lea 0x20(%rbp), $arg1 - lea 0x20(%rbp), $arg2 - lea ($key), $arg3 - call asm_AES_encrypt # doesn't touch %xmm - pxor 0x20(%rbp), @XMM[0] # ^= tweak[] - #pxor @XMM[8], @XMM[0] - #lea 0x80(%rsp), %rax # pass key schedule - #mov %edx, %r10d # pass rounds - #call _bsaes_encrypt8 - #pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - movdqu @XMM[0], 0x00($out) # write output - lea 0x10($out), $out - - movdqa 0x10(%rsp), @XMM[7] # next iteration tweak - -.Lxts_enc_done: - and \$15, %ebx - jz .Lxts_enc_ret - mov $out, %rdx - -.Lxts_enc_steal: - movzb ($inp), %eax - movzb -16(%rdx), %ecx - lea 1($inp), $inp - mov %al, -16(%rdx) - mov %cl, 0(%rdx) - lea 1(%rdx), %rdx - sub \$1,%ebx - jnz .Lxts_enc_steal - - movdqu -16($out), @XMM[0] - lea 0x20(%rbp), $arg1 - pxor @XMM[7], @XMM[0] - lea 0x20(%rbp), $arg2 - movdqa @XMM[0], 0x20(%rbp) - lea ($key), $arg3 - call asm_AES_encrypt # doesn't touch %xmm - pxor 0x20(%rbp), @XMM[7] - movdqu @XMM[7], -16($out) - -.Lxts_enc_ret: - lea (%rsp), %rax - pxor %xmm0, %xmm0 -.Lxts_enc_bzero: # wipe key schedule [if any] - movdqa %xmm0, 0x00(%rax) - movdqa %xmm0, 0x10(%rax) - lea 0x20(%rax), %rax - cmp %rax, %rbp - ja .Lxts_enc_bzero - - lea (%rbp),%rsp # restore %rsp -___ -$code.=<<___ if ($win64); - movaps 0x40(%rbp), %xmm6 - movaps 0x50(%rbp), %xmm7 - movaps 0x60(%rbp), %xmm8 - movaps 0x70(%rbp), %xmm9 - movaps 0x80(%rbp), %xmm10 - movaps 0x90(%rbp), %xmm11 - movaps 0xa0(%rbp), %xmm12 - movaps 0xb0(%rbp), %xmm13 - movaps 0xc0(%rbp), %xmm14 - movaps 0xd0(%rbp), %xmm15 - lea 0xa0(%rbp), %rsp -___ -$code.=<<___; - mov 0x48(%rsp), %r15 - mov 0x50(%rsp), %r14 - mov 0x58(%rsp), %r13 - mov 0x60(%rsp), %r12 - mov 0x68(%rsp), %rbx - mov 0x70(%rsp), %rax - lea 0x78(%rsp), %rsp - mov %rax, %rbp -.Lxts_enc_epilogue: - ret -.size bsaes_xts_encrypt,.-bsaes_xts_encrypt - -.globl bsaes_xts_decrypt -.type bsaes_xts_decrypt,\@abi-omnipotent -.align 16 -bsaes_xts_decrypt: - mov %rsp, %rax -.Lxts_dec_prologue: - push %rbp - push %rbx - push %r12 - push %r13 - push %r14 - push %r15 - lea -0x48(%rsp), %rsp -___ -$code.=<<___ if ($win64); - mov 0xa0(%rsp),$arg5 # pull key2 - mov 0xa8(%rsp),$arg6 # pull ivp - lea -0xa0(%rsp), %rsp - movaps %xmm6, 0x40(%rsp) - movaps %xmm7, 0x50(%rsp) - movaps %xmm8, 0x60(%rsp) - movaps %xmm9, 0x70(%rsp) - movaps %xmm10, 0x80(%rsp) - movaps %xmm11, 0x90(%rsp) - movaps %xmm12, 0xa0(%rsp) - movaps %xmm13, 0xb0(%rsp) - movaps %xmm14, 0xc0(%rsp) - movaps %xmm15, 0xd0(%rsp) -.Lxts_dec_body: -___ -$code.=<<___; - mov %rsp, %rbp # backup %rsp - mov $arg1, $inp # backup arguments - mov $arg2, $out - mov $arg3, $len - mov $arg4, $key - - lea ($arg6), $arg1 - lea 0x20(%rbp), $arg2 - lea ($arg5), $arg3 - call asm_AES_encrypt # generate initial tweak - - mov 240($key), %eax # rounds - mov $len, %rbx # backup $len - - mov %eax, %edx # rounds - shl \$7, %rax # 128 bytes per inner round key - sub \$`128-32`, %rax # size of bit-sliced key schedule - sub %rax, %rsp - - mov %rsp, %rax # pass key schedule - mov $key, %rcx # pass key - mov %edx, %r10d # pass rounds - call _bsaes_key_convert - pxor (%rsp), %xmm7 # fix up round 0 key - movdqa %xmm6, (%rax) # save last round key - movdqa %xmm7, (%rsp) - - xor %eax, %eax # if ($len%16) len-=16; - and \$-16, $len - test \$15, %ebx - setnz %al - shl \$4, %rax - sub %rax, $len - - sub \$0x80, %rsp # place for tweak[8] - movdqa 0x20(%rbp), @XMM[7] # initial tweak - - pxor $twtmp, $twtmp - movdqa .Lxts_magic(%rip), $twmask - pcmpgtd @XMM[7], $twtmp # broadcast upper bits - - sub \$0x80, $len - jc .Lxts_dec_short - jmp .Lxts_dec_loop - -.align 16 -.Lxts_dec_loop: -___ - for ($i=0;$i<7;$i++) { - $code.=<<___; - pshufd \$0x13, $twtmp, $twres - pxor $twtmp, $twtmp - movdqa @XMM[7], @XMM[$i] - movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i] - paddq @XMM[7], @XMM[7] # psllq 1,$tweak - pand $twmask, $twres # isolate carry and residue - pcmpgtd @XMM[7], $twtmp # broadcast upper bits - pxor $twres, @XMM[7] -___ - $code.=<<___ if ($i>=1); - movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1] -___ - $code.=<<___ if ($i>=2); - pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[] -___ - } -$code.=<<___; - movdqu 0x60($inp), @XMM[8+6] - pxor @XMM[8+5], @XMM[5] - movdqu 0x70($inp), @XMM[8+7] - lea 0x80($inp), $inp - movdqa @XMM[7], 0x70(%rsp) - pxor @XMM[8+6], @XMM[6] - lea 0x80(%rsp), %rax # pass key schedule - pxor @XMM[8+7], @XMM[7] - mov %edx, %r10d # pass rounds - - call _bsaes_decrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - pxor 0x20(%rsp), @XMM[6] - movdqu @XMM[1], 0x10($out) - pxor 0x30(%rsp), @XMM[4] - movdqu @XMM[6], 0x20($out) - pxor 0x40(%rsp), @XMM[2] - movdqu @XMM[4], 0x30($out) - pxor 0x50(%rsp), @XMM[7] - movdqu @XMM[2], 0x40($out) - pxor 0x60(%rsp), @XMM[3] - movdqu @XMM[7], 0x50($out) - pxor 0x70(%rsp), @XMM[5] - movdqu @XMM[3], 0x60($out) - movdqu @XMM[5], 0x70($out) - lea 0x80($out), $out - - movdqa 0x70(%rsp), @XMM[7] # prepare next iteration tweak - pxor $twtmp, $twtmp - movdqa .Lxts_magic(%rip), $twmask - pcmpgtd @XMM[7], $twtmp - pshufd \$0x13, $twtmp, $twres - pxor $twtmp, $twtmp - paddq @XMM[7], @XMM[7] # psllq 1,$tweak - pand $twmask, $twres # isolate carry and residue - pcmpgtd @XMM[7], $twtmp # broadcast upper bits - pxor $twres, @XMM[7] - - sub \$0x80,$len - jnc .Lxts_dec_loop - -.Lxts_dec_short: - add \$0x80, $len - jz .Lxts_dec_done -___ - for ($i=0;$i<7;$i++) { - $code.=<<___; - pshufd \$0x13, $twtmp, $twres - pxor $twtmp, $twtmp - movdqa @XMM[7], @XMM[$i] - movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i] - paddq @XMM[7], @XMM[7] # psllq 1,$tweak - pand $twmask, $twres # isolate carry and residue - pcmpgtd @XMM[7], $twtmp # broadcast upper bits - pxor $twres, @XMM[7] -___ - $code.=<<___ if ($i>=1); - movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1] - cmp \$`0x10*$i`,$len - je .Lxts_dec_$i -___ - $code.=<<___ if ($i>=2); - pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[] -___ - } -$code.=<<___; - movdqu 0x60($inp), @XMM[8+6] - pxor @XMM[8+5], @XMM[5] - movdqa @XMM[7], 0x70(%rsp) - lea 0x70($inp), $inp - pxor @XMM[8+6], @XMM[6] - lea 0x80(%rsp), %rax # pass key schedule - mov %edx, %r10d # pass rounds - - call _bsaes_decrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - pxor 0x20(%rsp), @XMM[6] - movdqu @XMM[1], 0x10($out) - pxor 0x30(%rsp), @XMM[4] - movdqu @XMM[6], 0x20($out) - pxor 0x40(%rsp), @XMM[2] - movdqu @XMM[4], 0x30($out) - pxor 0x50(%rsp), @XMM[7] - movdqu @XMM[2], 0x40($out) - pxor 0x60(%rsp), @XMM[3] - movdqu @XMM[7], 0x50($out) - movdqu @XMM[3], 0x60($out) - lea 0x70($out), $out - - movdqa 0x70(%rsp), @XMM[7] # next iteration tweak - jmp .Lxts_dec_done -.align 16 -.Lxts_dec_6: - pxor @XMM[8+4], @XMM[4] - lea 0x60($inp), $inp - pxor @XMM[8+5], @XMM[5] - lea 0x80(%rsp), %rax # pass key schedule - mov %edx, %r10d # pass rounds - - call _bsaes_decrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - pxor 0x20(%rsp), @XMM[6] - movdqu @XMM[1], 0x10($out) - pxor 0x30(%rsp), @XMM[4] - movdqu @XMM[6], 0x20($out) - pxor 0x40(%rsp), @XMM[2] - movdqu @XMM[4], 0x30($out) - pxor 0x50(%rsp), @XMM[7] - movdqu @XMM[2], 0x40($out) - movdqu @XMM[7], 0x50($out) - lea 0x60($out), $out - - movdqa 0x60(%rsp), @XMM[7] # next iteration tweak - jmp .Lxts_dec_done -.align 16 -.Lxts_dec_5: - pxor @XMM[8+3], @XMM[3] - lea 0x50($inp), $inp - pxor @XMM[8+4], @XMM[4] - lea 0x80(%rsp), %rax # pass key schedule - mov %edx, %r10d # pass rounds - - call _bsaes_decrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - pxor 0x20(%rsp), @XMM[6] - movdqu @XMM[1], 0x10($out) - pxor 0x30(%rsp), @XMM[4] - movdqu @XMM[6], 0x20($out) - pxor 0x40(%rsp), @XMM[2] - movdqu @XMM[4], 0x30($out) - movdqu @XMM[2], 0x40($out) - lea 0x50($out), $out - - movdqa 0x50(%rsp), @XMM[7] # next iteration tweak - jmp .Lxts_dec_done -.align 16 -.Lxts_dec_4: - pxor @XMM[8+2], @XMM[2] - lea 0x40($inp), $inp - pxor @XMM[8+3], @XMM[3] - lea 0x80(%rsp), %rax # pass key schedule - mov %edx, %r10d # pass rounds - - call _bsaes_decrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - pxor 0x20(%rsp), @XMM[6] - movdqu @XMM[1], 0x10($out) - pxor 0x30(%rsp), @XMM[4] - movdqu @XMM[6], 0x20($out) - movdqu @XMM[4], 0x30($out) - lea 0x40($out), $out - - movdqa 0x40(%rsp), @XMM[7] # next iteration tweak - jmp .Lxts_dec_done -.align 16 -.Lxts_dec_3: - pxor @XMM[8+1], @XMM[1] - lea 0x30($inp), $inp - pxor @XMM[8+2], @XMM[2] - lea 0x80(%rsp), %rax # pass key schedule - mov %edx, %r10d # pass rounds - - call _bsaes_decrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - pxor 0x20(%rsp), @XMM[6] - movdqu @XMM[1], 0x10($out) - movdqu @XMM[6], 0x20($out) - lea 0x30($out), $out - - movdqa 0x30(%rsp), @XMM[7] # next iteration tweak - jmp .Lxts_dec_done -.align 16 -.Lxts_dec_2: - pxor @XMM[8+0], @XMM[0] - lea 0x20($inp), $inp - pxor @XMM[8+1], @XMM[1] - lea 0x80(%rsp), %rax # pass key schedule - mov %edx, %r10d # pass rounds - - call _bsaes_decrypt8 - - pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - pxor 0x10(%rsp), @XMM[1] - movdqu @XMM[0], 0x00($out) # write output - movdqu @XMM[1], 0x10($out) - lea 0x20($out), $out - - movdqa 0x20(%rsp), @XMM[7] # next iteration tweak - jmp .Lxts_dec_done -.align 16 -.Lxts_dec_1: - pxor @XMM[0], @XMM[8] - lea 0x10($inp), $inp - movdqa @XMM[8], 0x20(%rbp) - lea 0x20(%rbp), $arg1 - lea 0x20(%rbp), $arg2 - lea ($key), $arg3 - call asm_AES_decrypt # doesn't touch %xmm - pxor 0x20(%rbp), @XMM[0] # ^= tweak[] - #pxor @XMM[8], @XMM[0] - #lea 0x80(%rsp), %rax # pass key schedule - #mov %edx, %r10d # pass rounds - #call _bsaes_decrypt8 - #pxor 0x00(%rsp), @XMM[0] # ^= tweak[] - movdqu @XMM[0], 0x00($out) # write output - lea 0x10($out), $out - - movdqa 0x10(%rsp), @XMM[7] # next iteration tweak - -.Lxts_dec_done: - and \$15, %ebx - jz .Lxts_dec_ret - - pxor $twtmp, $twtmp - movdqa .Lxts_magic(%rip), $twmask - pcmpgtd @XMM[7], $twtmp - pshufd \$0x13, $twtmp, $twres - movdqa @XMM[7], @XMM[6] - paddq @XMM[7], @XMM[7] # psllq 1,$tweak - pand $twmask, $twres # isolate carry and residue - movdqu ($inp), @XMM[0] - pxor $twres, @XMM[7] - - lea 0x20(%rbp), $arg1 - pxor @XMM[7], @XMM[0] - lea 0x20(%rbp), $arg2 - movdqa @XMM[0], 0x20(%rbp) - lea ($key), $arg3 - call asm_AES_decrypt # doesn't touch %xmm - pxor 0x20(%rbp), @XMM[7] - mov $out, %rdx - movdqu @XMM[7], ($out) - -.Lxts_dec_steal: - movzb 16($inp), %eax - movzb (%rdx), %ecx - lea 1($inp), $inp - mov %al, (%rdx) - mov %cl, 16(%rdx) - lea 1(%rdx), %rdx - sub \$1,%ebx - jnz .Lxts_dec_steal - - movdqu ($out), @XMM[0] - lea 0x20(%rbp), $arg1 - pxor @XMM[6], @XMM[0] - lea 0x20(%rbp), $arg2 - movdqa @XMM[0], 0x20(%rbp) - lea ($key), $arg3 - call asm_AES_decrypt # doesn't touch %xmm - pxor 0x20(%rbp), @XMM[6] - movdqu @XMM[6], ($out) - -.Lxts_dec_ret: - lea (%rsp), %rax - pxor %xmm0, %xmm0 -.Lxts_dec_bzero: # wipe key schedule [if any] - movdqa %xmm0, 0x00(%rax) - movdqa %xmm0, 0x10(%rax) - lea 0x20(%rax), %rax - cmp %rax, %rbp - ja .Lxts_dec_bzero - - lea (%rbp),%rsp # restore %rsp -___ -$code.=<<___ if ($win64); - movaps 0x40(%rbp), %xmm6 - movaps 0x50(%rbp), %xmm7 - movaps 0x60(%rbp), %xmm8 - movaps 0x70(%rbp), %xmm9 - movaps 0x80(%rbp), %xmm10 - movaps 0x90(%rbp), %xmm11 - movaps 0xa0(%rbp), %xmm12 - movaps 0xb0(%rbp), %xmm13 - movaps 0xc0(%rbp), %xmm14 - movaps 0xd0(%rbp), %xmm15 - lea 0xa0(%rbp), %rsp -___ -$code.=<<___; - mov 0x48(%rsp), %r15 - mov 0x50(%rsp), %r14 - mov 0x58(%rsp), %r13 - mov 0x60(%rsp), %r12 - mov 0x68(%rsp), %rbx - mov 0x70(%rsp), %rax - lea 0x78(%rsp), %rsp - mov %rax, %rbp -.Lxts_dec_epilogue: - ret -.size bsaes_xts_decrypt,.-bsaes_xts_decrypt -___ -} -$code.=<<___; -.type _bsaes_const,\@object -.align 64 -_bsaes_const: -.LM0ISR: # InvShiftRows constants - .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 -.LISRM0: - .quad 0x01040b0e0205080f, 0x0306090c00070a0d -.LISR: - .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 -.LBS0: # bit-slice constants - .quad 0x5555555555555555, 0x5555555555555555 -.LBS1: - .quad 0x3333333333333333, 0x3333333333333333 -.LBS2: - .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f -.LSR: # shiftrows constants - .quad 0x0504070600030201, 0x0f0e0d0c0a09080b -.LSRM0: - .quad 0x0304090e00050a0f, 0x01060b0c0207080d -.LM0SR: - .quad 0x0a0e02060f03070b, 0x0004080c05090d01 -.LSWPUP: # byte-swap upper dword - .quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 -.LSWPUPM0SR: - .quad 0x0a0d02060c03070b, 0x0004080f05090e01 -.LADD1: # counter increment constants - .quad 0x0000000000000000, 0x0000000100000000 -.LADD2: - .quad 0x0000000000000000, 0x0000000200000000 -.LADD3: - .quad 0x0000000000000000, 0x0000000300000000 -.LADD4: - .quad 0x0000000000000000, 0x0000000400000000 -.LADD5: - .quad 0x0000000000000000, 0x0000000500000000 -.LADD6: - .quad 0x0000000000000000, 0x0000000600000000 -.LADD7: - .quad 0x0000000000000000, 0x0000000700000000 -.LADD8: - .quad 0x0000000000000000, 0x0000000800000000 -.Lxts_magic: - .long 0x87,0,1,0 -.Lmasks: - .quad 0x0101010101010101, 0x0101010101010101 - .quad 0x0202020202020202, 0x0202020202020202 - .quad 0x0404040404040404, 0x0404040404040404 - .quad 0x0808080808080808, 0x0808080808080808 -.LM0: - .quad 0x02060a0e03070b0f, 0x0004080c0105090d -.L63: - .quad 0x6363636363636363, 0x6363636363636363 -.asciz "Bit-sliced AES for x86_64/SSSE3, Emilia Käsper, Peter Schwabe, Andy Polyakov" -.align 64 -.size _bsaes_const,.-_bsaes_const -___ - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type se_handler,\@abi-omnipotent -.align 16 -se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - mov 8($disp),%rsi # disp->ImageBase - mov 56($disp),%r11 # disp->HandlerData - - mov 0(%r11),%r10d # HandlerData[0] - lea (%rsi,%r10),%r10 # prologue label - cmp %r10,%rbx # context->RipRsp - - mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lin_prologue - - mov 160($context),%rax # pull context->Rbp - - lea 0x40(%rax),%rsi # %xmm save area - lea 512($context),%rdi # &context.Xmm6 - mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) - .long 0xa548f3fc # cld; rep movsq - lea 0xa0(%rax),%rax # adjust stack pointer - - mov 0x70(%rax),%rbp - mov 0x68(%rax),%rbx - mov 0x60(%rax),%r12 - mov 0x58(%rax),%r13 - mov 0x50(%rax),%r14 - mov 0x48(%rax),%r15 - lea 0x78(%rax),%rax # adjust stack pointer - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_prologue: - mov %rax,152($context) # restore context->Rsp - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$`1232/8`,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size se_handler,.-se_handler - -.section .pdata -.align 4 -___ -$code.=<<___ if ($ecb); - .rva .Lecb_enc_prologue - .rva .Lecb_enc_epilogue - .rva .Lecb_enc_info - - .rva .Lecb_dec_prologue - .rva .Lecb_dec_epilogue - .rva .Lecb_dec_info -___ -$code.=<<___; - .rva .Lcbc_dec_prologue - .rva .Lcbc_dec_epilogue - .rva .Lcbc_dec_info - - .rva .Lctr_enc_prologue - .rva .Lctr_enc_epilogue - .rva .Lctr_enc_info - - .rva .Lxts_enc_prologue - .rva .Lxts_enc_epilogue - .rva .Lxts_enc_info - - .rva .Lxts_dec_prologue - .rva .Lxts_dec_epilogue - .rva .Lxts_dec_info - -.section .xdata -.align 8 -___ -$code.=<<___ if ($ecb); -.Lecb_enc_info: - .byte 9,0,0,0 - .rva se_handler - .rva .Lecb_enc_body,.Lecb_enc_epilogue # HandlerData[] -.Lecb_dec_info: - .byte 9,0,0,0 - .rva se_handler - .rva .Lecb_dec_body,.Lecb_dec_epilogue # HandlerData[] -___ -$code.=<<___; -.Lcbc_dec_info: - .byte 9,0,0,0 - .rva se_handler - .rva .Lcbc_dec_body,.Lcbc_dec_epilogue # HandlerData[] -.Lctr_enc_info: - .byte 9,0,0,0 - .rva se_handler - .rva .Lctr_enc_body,.Lctr_enc_epilogue # HandlerData[] -.Lxts_enc_info: - .byte 9,0,0,0 - .rva se_handler - .rva .Lxts_enc_body,.Lxts_enc_epilogue # HandlerData[] -.Lxts_dec_info: - .byte 9,0,0,0 - .rva se_handler - .rva .Lxts_dec_body,.Lxts_dec_epilogue # HandlerData[] -___ -} - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; - -print $code; - -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/aes/asm/vpaes-x86.pl b/drivers/builtin_openssl2/crypto/aes/asm/vpaes-x86.pl deleted file mode 100644 index 1533e2c304..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/vpaes-x86.pl +++ /dev/null @@ -1,903 +0,0 @@ -#!/usr/bin/env perl - -###################################################################### -## Constant-time SSSE3 AES core implementation. -## version 0.1 -## -## By Mike Hamburg (Stanford University), 2009 -## Public domain. -## -## For details see http://shiftleft.org/papers/vector_aes/ and -## http://crypto.stanford.edu/vpaes/. - -###################################################################### -# September 2011. -# -# Port vpaes-x86_64.pl as 32-bit "almost" drop-in replacement for -# aes-586.pl. "Almost" refers to the fact that AES_cbc_encrypt -# doesn't handle partial vectors (doesn't have to if called from -# EVP only). "Drop-in" implies that this module doesn't share key -# schedule structure with the original nor does it make assumption -# about its alignment... -# -# Performance summary. aes-586.pl column lists large-block CBC -# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per -# byte processed with 128-bit key, and vpaes-x86.pl column - [also -# large-block CBC] encrypt/decrypt. -# -# aes-586.pl vpaes-x86.pl -# -# Core 2(**) 29.1/42.3/18.3 22.0/25.6(***) -# Nehalem 27.9/40.4/18.1 10.3/12.0 -# Atom 102./119./60.1 64.5/85.3(***) -# -# (*) "Hyper-threading" in the context refers rather to cache shared -# among multiple cores, than to specifically Intel HTT. As vast -# majority of contemporary cores share cache, slower code path -# is common place. In other words "with-hyper-threading-off" -# results are presented mostly for reference purposes. -# -# (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe. -# -# (***) Less impressive improvement on Core 2 and Atom is due to slow -# pshufb, yet it's respectable +32%/65% improvement on Core 2 -# and +58%/40% on Atom (as implied, over "hyper-threading-safe" -# code path). -# -# - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],"vpaes-x86.pl",$x86only = $ARGV[$#ARGV] eq "386"); - -$PREFIX="vpaes"; - -my ($round, $base, $magic, $key, $const, $inp, $out)= - ("eax", "ebx", "ecx", "edx","ebp", "esi","edi"); - -&static_label("_vpaes_consts"); -&static_label("_vpaes_schedule_low_round"); - -&set_label("_vpaes_consts",64); -$k_inv=-0x30; # inv, inva - &data_word(0x0D080180,0x0E05060F,0x0A0B0C02,0x04070309); - &data_word(0x0F0B0780,0x01040A06,0x02050809,0x030D0E0C); - -$k_s0F=-0x10; # s0F - &data_word(0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F); - -$k_ipt=0x00; # input transform (lo, hi) - &data_word(0x5A2A7000,0xC2B2E898,0x52227808,0xCABAE090); - &data_word(0x317C4D00,0x4C01307D,0xB0FDCC81,0xCD80B1FC); - -$k_sb1=0x20; # sb1u, sb1t - &data_word(0xCB503E00,0xB19BE18F,0x142AF544,0xA5DF7A6E); - &data_word(0xFAE22300,0x3618D415,0x0D2ED9EF,0x3BF7CCC1); -$k_sb2=0x40; # sb2u, sb2t - &data_word(0x0B712400,0xE27A93C6,0xBC982FCD,0x5EB7E955); - &data_word(0x0AE12900,0x69EB8840,0xAB82234A,0xC2A163C8); -$k_sbo=0x60; # sbou, sbot - &data_word(0x6FBDC700,0xD0D26D17,0xC502A878,0x15AABF7A); - &data_word(0x5FBB6A00,0xCFE474A5,0x412B35FA,0x8E1E90D1); - -$k_mc_forward=0x80; # mc_forward - &data_word(0x00030201,0x04070605,0x080B0A09,0x0C0F0E0D); - &data_word(0x04070605,0x080B0A09,0x0C0F0E0D,0x00030201); - &data_word(0x080B0A09,0x0C0F0E0D,0x00030201,0x04070605); - &data_word(0x0C0F0E0D,0x00030201,0x04070605,0x080B0A09); - -$k_mc_backward=0xc0; # mc_backward - &data_word(0x02010003,0x06050407,0x0A09080B,0x0E0D0C0F); - &data_word(0x0E0D0C0F,0x02010003,0x06050407,0x0A09080B); - &data_word(0x0A09080B,0x0E0D0C0F,0x02010003,0x06050407); - &data_word(0x06050407,0x0A09080B,0x0E0D0C0F,0x02010003); - -$k_sr=0x100; # sr - &data_word(0x03020100,0x07060504,0x0B0A0908,0x0F0E0D0C); - &data_word(0x0F0A0500,0x030E0904,0x07020D08,0x0B06010C); - &data_word(0x0B020900,0x0F060D04,0x030A0108,0x070E050C); - &data_word(0x070A0D00,0x0B0E0104,0x0F020508,0x0306090C); - -$k_rcon=0x140; # rcon - &data_word(0xAF9DEEB6,0x1F8391B9,0x4D7C7D81,0x702A9808); - -$k_s63=0x150; # s63: all equal to 0x63 transformed - &data_word(0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B); - -$k_opt=0x160; # output transform - &data_word(0xD6B66000,0xFF9F4929,0xDEBE6808,0xF7974121); - &data_word(0x50BCEC00,0x01EDBD51,0xB05C0CE0,0xE10D5DB1); - -$k_deskew=0x180; # deskew tables: inverts the sbox's "skew" - &data_word(0x47A4E300,0x07E4A340,0x5DBEF91A,0x1DFEB95A); - &data_word(0x83EA6900,0x5F36B5DC,0xF49D1E77,0x2841C2AB); -## -## Decryption stuff -## Key schedule constants -## -$k_dksd=0x1a0; # decryption key schedule: invskew x*D - &data_word(0xA3E44700,0xFEB91A5D,0x5A1DBEF9,0x0740E3A4); - &data_word(0xB5368300,0x41C277F4,0xAB289D1E,0x5FDC69EA); -$k_dksb=0x1c0; # decryption key schedule: invskew x*B - &data_word(0x8550D500,0x9A4FCA1F,0x1CC94C99,0x03D65386); - &data_word(0xB6FC4A00,0x115BEDA7,0x7E3482C8,0xD993256F); -$k_dkse=0x1e0; # decryption key schedule: invskew x*E + 0x63 - &data_word(0x1FC9D600,0xD5031CCA,0x994F5086,0x53859A4C); - &data_word(0x4FDC7BE8,0xA2319605,0x20B31487,0xCD5EF96A); -$k_dks9=0x200; # decryption key schedule: invskew x*9 - &data_word(0x7ED9A700,0xB6116FC8,0x82255BFC,0x4AED9334); - &data_word(0x27143300,0x45765162,0xE9DAFDCE,0x8BB89FAC); - -## -## Decryption stuff -## Round function constants -## -$k_dipt=0x220; # decryption input transform - &data_word(0x0B545F00,0x0F505B04,0x114E451A,0x154A411E); - &data_word(0x60056500,0x86E383E6,0xF491F194,0x12771772); - -$k_dsb9=0x240; # decryption sbox output *9*u, *9*t - &data_word(0x9A86D600,0x851C0353,0x4F994CC9,0xCAD51F50); - &data_word(0xECD74900,0xC03B1789,0xB2FBA565,0x725E2C9E); -$k_dsbd=0x260; # decryption sbox output *D*u, *D*t - &data_word(0xE6B1A200,0x7D57CCDF,0x882A4439,0xF56E9B13); - &data_word(0x24C6CB00,0x3CE2FAF7,0x15DEEFD3,0x2931180D); -$k_dsbb=0x280; # decryption sbox output *B*u, *B*t - &data_word(0x96B44200,0xD0226492,0xB0F2D404,0x602646F6); - &data_word(0xCD596700,0xC19498A6,0x3255AA6B,0xF3FF0C3E); -$k_dsbe=0x2a0; # decryption sbox output *E*u, *E*t - &data_word(0x26D4D000,0x46F29296,0x64B4F6B0,0x22426004); - &data_word(0xFFAAC100,0x0C55A6CD,0x98593E32,0x9467F36B); -$k_dsbo=0x2c0; # decryption sbox final output - &data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9); - &data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159); -&asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)"); -&align (64); - -&function_begin_B("_vpaes_preheat"); - &add ($const,&DWP(0,"esp")); - &movdqa ("xmm7",&QWP($k_inv,$const)); - &movdqa ("xmm6",&QWP($k_s0F,$const)); - &ret (); -&function_end_B("_vpaes_preheat"); - -## -## _aes_encrypt_core -## -## AES-encrypt %xmm0. -## -## Inputs: -## %xmm0 = input -## %xmm6-%xmm7 as in _vpaes_preheat -## (%edx) = scheduled keys -## -## Output in %xmm0 -## Clobbers %xmm1-%xmm5, %eax, %ebx, %ecx, %edx -## -## -&function_begin_B("_vpaes_encrypt_core"); - &mov ($magic,16); - &mov ($round,&DWP(240,$key)); - &movdqa ("xmm1","xmm6") - &movdqa ("xmm2",&QWP($k_ipt,$const)); - &pandn ("xmm1","xmm0"); - &movdqu ("xmm5",&QWP(0,$key)); - &psrld ("xmm1",4); - &pand ("xmm0","xmm6"); - &pshufb ("xmm2","xmm0"); - &movdqa ("xmm0",&QWP($k_ipt+16,$const)); - &pshufb ("xmm0","xmm1"); - &pxor ("xmm2","xmm5"); - &pxor ("xmm0","xmm2"); - &add ($key,16); - &lea ($base,&DWP($k_mc_backward,$const)); - &jmp (&label("enc_entry")); - - -&set_label("enc_loop",16); - # middle of middle round - &movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sb1u - &pshufb ("xmm4","xmm2"); # 4 = sb1u - &pxor ("xmm4","xmm5"); # 4 = sb1u + k - &movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sb1t - &pshufb ("xmm0","xmm3"); # 0 = sb1t - &pxor ("xmm0","xmm4"); # 0 = A - &movdqa ("xmm5",&QWP($k_sb2,$const)); # 4 : sb2u - &pshufb ("xmm5","xmm2"); # 4 = sb2u - &movdqa ("xmm1",&QWP(-0x40,$base,$magic));# .Lk_mc_forward[] - &movdqa ("xmm2",&QWP($k_sb2+16,$const));# 2 : sb2t - &pshufb ("xmm2","xmm3"); # 2 = sb2t - &pxor ("xmm2","xmm5"); # 2 = 2A - &movdqa ("xmm4",&QWP(0,$base,$magic)); # .Lk_mc_backward[] - &movdqa ("xmm3","xmm0"); # 3 = A - &pshufb ("xmm0","xmm1"); # 0 = B - &add ($key,16); # next key - &pxor ("xmm0","xmm2"); # 0 = 2A+B - &pshufb ("xmm3","xmm4"); # 3 = D - &add ($magic,16); # next mc - &pxor ("xmm3","xmm0"); # 3 = 2A+B+D - &pshufb ("xmm0","xmm1"); # 0 = 2B+C - &and ($magic,0x30); # ... mod 4 - &pxor ("xmm0","xmm3"); # 0 = 2A+3B+C+D - &sub ($round,1); # nr-- - -&set_label("enc_entry"); - # top of round - &movdqa ("xmm1","xmm6"); # 1 : i - &pandn ("xmm1","xmm0"); # 1 = i<<4 - &psrld ("xmm1",4); # 1 = i - &pand ("xmm0","xmm6"); # 0 = k - &movdqa ("xmm5",&QWP($k_inv+16,$const));# 2 : a/k - &pshufb ("xmm5","xmm0"); # 2 = a/k - &pxor ("xmm0","xmm1"); # 0 = j - &movdqa ("xmm3","xmm7"); # 3 : 1/i - &pshufb ("xmm3","xmm1"); # 3 = 1/i - &pxor ("xmm3","xmm5"); # 3 = iak = 1/i + a/k - &movdqa ("xmm4","xmm7"); # 4 : 1/j - &pshufb ("xmm4","xmm0"); # 4 = 1/j - &pxor ("xmm4","xmm5"); # 4 = jak = 1/j + a/k - &movdqa ("xmm2","xmm7"); # 2 : 1/iak - &pshufb ("xmm2","xmm3"); # 2 = 1/iak - &pxor ("xmm2","xmm0"); # 2 = io - &movdqa ("xmm3","xmm7"); # 3 : 1/jak - &movdqu ("xmm5",&QWP(0,$key)); - &pshufb ("xmm3","xmm4"); # 3 = 1/jak - &pxor ("xmm3","xmm1"); # 3 = jo - &jnz (&label("enc_loop")); - - # middle of last round - &movdqa ("xmm4",&QWP($k_sbo,$const)); # 3 : sbou .Lk_sbo - &movdqa ("xmm0",&QWP($k_sbo+16,$const));# 3 : sbot .Lk_sbo+16 - &pshufb ("xmm4","xmm2"); # 4 = sbou - &pxor ("xmm4","xmm5"); # 4 = sb1u + k - &pshufb ("xmm0","xmm3"); # 0 = sb1t - &movdqa ("xmm1",&QWP(0x40,$base,$magic));# .Lk_sr[] - &pxor ("xmm0","xmm4"); # 0 = A - &pshufb ("xmm0","xmm1"); - &ret (); -&function_end_B("_vpaes_encrypt_core"); - -## -## Decryption core -## -## Same API as encryption core. -## -&function_begin_B("_vpaes_decrypt_core"); - &mov ($round,&DWP(240,$key)); - &lea ($base,&DWP($k_dsbd,$const)); - &movdqa ("xmm1","xmm6"); - &movdqa ("xmm2",&QWP($k_dipt-$k_dsbd,$base)); - &pandn ("xmm1","xmm0"); - &mov ($magic,$round); - &psrld ("xmm1",4) - &movdqu ("xmm5",&QWP(0,$key)); - &shl ($magic,4); - &pand ("xmm0","xmm6"); - &pshufb ("xmm2","xmm0"); - &movdqa ("xmm0",&QWP($k_dipt-$k_dsbd+16,$base)); - &xor ($magic,0x30); - &pshufb ("xmm0","xmm1"); - &and ($magic,0x30); - &pxor ("xmm2","xmm5"); - &movdqa ("xmm5",&QWP($k_mc_forward+48,$const)); - &pxor ("xmm0","xmm2"); - &add ($key,16); - &lea ($magic,&DWP($k_sr-$k_dsbd,$base,$magic)); - &jmp (&label("dec_entry")); - -&set_label("dec_loop",16); -## -## Inverse mix columns -## - &movdqa ("xmm4",&QWP(-0x20,$base)); # 4 : sb9u - &pshufb ("xmm4","xmm2"); # 4 = sb9u - &pxor ("xmm4","xmm0"); - &movdqa ("xmm0",&QWP(-0x10,$base)); # 0 : sb9t - &pshufb ("xmm0","xmm3"); # 0 = sb9t - &pxor ("xmm0","xmm4"); # 0 = ch - &add ($key,16); # next round key - - &pshufb ("xmm0","xmm5"); # MC ch - &movdqa ("xmm4",&QWP(0,$base)); # 4 : sbdu - &pshufb ("xmm4","xmm2"); # 4 = sbdu - &pxor ("xmm4","xmm0"); # 4 = ch - &movdqa ("xmm0",&QWP(0x10,$base)); # 0 : sbdt - &pshufb ("xmm0","xmm3"); # 0 = sbdt - &pxor ("xmm0","xmm4"); # 0 = ch - &sub ($round,1); # nr-- - - &pshufb ("xmm0","xmm5"); # MC ch - &movdqa ("xmm4",&QWP(0x20,$base)); # 4 : sbbu - &pshufb ("xmm4","xmm2"); # 4 = sbbu - &pxor ("xmm4","xmm0"); # 4 = ch - &movdqa ("xmm0",&QWP(0x30,$base)); # 0 : sbbt - &pshufb ("xmm0","xmm3"); # 0 = sbbt - &pxor ("xmm0","xmm4"); # 0 = ch - - &pshufb ("xmm0","xmm5"); # MC ch - &movdqa ("xmm4",&QWP(0x40,$base)); # 4 : sbeu - &pshufb ("xmm4","xmm2"); # 4 = sbeu - &pxor ("xmm4","xmm0"); # 4 = ch - &movdqa ("xmm0",&QWP(0x50,$base)); # 0 : sbet - &pshufb ("xmm0","xmm3"); # 0 = sbet - &pxor ("xmm0","xmm4"); # 0 = ch - - &palignr("xmm5","xmm5",12); - -&set_label("dec_entry"); - # top of round - &movdqa ("xmm1","xmm6"); # 1 : i - &pandn ("xmm1","xmm0"); # 1 = i<<4 - &psrld ("xmm1",4); # 1 = i - &pand ("xmm0","xmm6"); # 0 = k - &movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k - &pshufb ("xmm2","xmm0"); # 2 = a/k - &pxor ("xmm0","xmm1"); # 0 = j - &movdqa ("xmm3","xmm7"); # 3 : 1/i - &pshufb ("xmm3","xmm1"); # 3 = 1/i - &pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k - &movdqa ("xmm4","xmm7"); # 4 : 1/j - &pshufb ("xmm4","xmm0"); # 4 = 1/j - &pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k - &movdqa ("xmm2","xmm7"); # 2 : 1/iak - &pshufb ("xmm2","xmm3"); # 2 = 1/iak - &pxor ("xmm2","xmm0"); # 2 = io - &movdqa ("xmm3","xmm7"); # 3 : 1/jak - &pshufb ("xmm3","xmm4"); # 3 = 1/jak - &pxor ("xmm3","xmm1"); # 3 = jo - &movdqu ("xmm0",&QWP(0,$key)); - &jnz (&label("dec_loop")); - - # middle of last round - &movdqa ("xmm4",&QWP(0x60,$base)); # 3 : sbou - &pshufb ("xmm4","xmm2"); # 4 = sbou - &pxor ("xmm4","xmm0"); # 4 = sb1u + k - &movdqa ("xmm0",&QWP(0x70,$base)); # 0 : sbot - &movdqa ("xmm2",&QWP(0,$magic)); - &pshufb ("xmm0","xmm3"); # 0 = sb1t - &pxor ("xmm0","xmm4"); # 0 = A - &pshufb ("xmm0","xmm2"); - &ret (); -&function_end_B("_vpaes_decrypt_core"); - -######################################################## -## ## -## AES key schedule ## -## ## -######################################################## -&function_begin_B("_vpaes_schedule_core"); - &add ($const,&DWP(0,"esp")); - &movdqu ("xmm0",&QWP(0,$inp)); # load key (unaligned) - &movdqa ("xmm2",&QWP($k_rcon,$const)); # load rcon - - # input transform - &movdqa ("xmm3","xmm0"); - &lea ($base,&DWP($k_ipt,$const)); - &movdqa (&QWP(4,"esp"),"xmm2"); # xmm8 - &call ("_vpaes_schedule_transform"); - &movdqa ("xmm7","xmm0"); - - &test ($out,$out); - &jnz (&label("schedule_am_decrypting")); - - # encrypting, output zeroth round key after transform - &movdqu (&QWP(0,$key),"xmm0"); - &jmp (&label("schedule_go")); - -&set_label("schedule_am_decrypting"); - # decrypting, output zeroth round key after shiftrows - &movdqa ("xmm1",&QWP($k_sr,$const,$magic)); - &pshufb ("xmm3","xmm1"); - &movdqu (&QWP(0,$key),"xmm3"); - &xor ($magic,0x30); - -&set_label("schedule_go"); - &cmp ($round,192); - &ja (&label("schedule_256")); - &je (&label("schedule_192")); - # 128: fall though - -## -## .schedule_128 -## -## 128-bit specific part of key schedule. -## -## This schedule is really simple, because all its parts -## are accomplished by the subroutines. -## -&set_label("schedule_128"); - &mov ($round,10); - -&set_label("loop_schedule_128"); - &call ("_vpaes_schedule_round"); - &dec ($round); - &jz (&label("schedule_mangle_last")); - &call ("_vpaes_schedule_mangle"); # write output - &jmp (&label("loop_schedule_128")); - -## -## .aes_schedule_192 -## -## 192-bit specific part of key schedule. -## -## The main body of this schedule is the same as the 128-bit -## schedule, but with more smearing. The long, high side is -## stored in %xmm7 as before, and the short, low side is in -## the high bits of %xmm6. -## -## This schedule is somewhat nastier, however, because each -## round produces 192 bits of key material, or 1.5 round keys. -## Therefore, on each cycle we do 2 rounds and produce 3 round -## keys. -## -&set_label("schedule_192",16); - &movdqu ("xmm0",&QWP(8,$inp)); # load key part 2 (very unaligned) - &call ("_vpaes_schedule_transform"); # input transform - &movdqa ("xmm6","xmm0"); # save short part - &pxor ("xmm4","xmm4"); # clear 4 - &movhlps("xmm6","xmm4"); # clobber low side with zeros - &mov ($round,4); - -&set_label("loop_schedule_192"); - &call ("_vpaes_schedule_round"); - &palignr("xmm0","xmm6",8); - &call ("_vpaes_schedule_mangle"); # save key n - &call ("_vpaes_schedule_192_smear"); - &call ("_vpaes_schedule_mangle"); # save key n+1 - &call ("_vpaes_schedule_round"); - &dec ($round); - &jz (&label("schedule_mangle_last")); - &call ("_vpaes_schedule_mangle"); # save key n+2 - &call ("_vpaes_schedule_192_smear"); - &jmp (&label("loop_schedule_192")); - -## -## .aes_schedule_256 -## -## 256-bit specific part of key schedule. -## -## The structure here is very similar to the 128-bit -## schedule, but with an additional "low side" in -## %xmm6. The low side's rounds are the same as the -## high side's, except no rcon and no rotation. -## -&set_label("schedule_256",16); - &movdqu ("xmm0",&QWP(16,$inp)); # load key part 2 (unaligned) - &call ("_vpaes_schedule_transform"); # input transform - &mov ($round,7); - -&set_label("loop_schedule_256"); - &call ("_vpaes_schedule_mangle"); # output low result - &movdqa ("xmm6","xmm0"); # save cur_lo in xmm6 - - # high round - &call ("_vpaes_schedule_round"); - &dec ($round); - &jz (&label("schedule_mangle_last")); - &call ("_vpaes_schedule_mangle"); - - # low round. swap xmm7 and xmm6 - &pshufd ("xmm0","xmm0",0xFF); - &movdqa (&QWP(20,"esp"),"xmm7"); - &movdqa ("xmm7","xmm6"); - &call ("_vpaes_schedule_low_round"); - &movdqa ("xmm7",&QWP(20,"esp")); - - &jmp (&label("loop_schedule_256")); - -## -## .aes_schedule_mangle_last -## -## Mangler for last round of key schedule -## Mangles %xmm0 -## when encrypting, outputs out(%xmm0) ^ 63 -## when decrypting, outputs unskew(%xmm0) -## -## Always called right before return... jumps to cleanup and exits -## -&set_label("schedule_mangle_last",16); - # schedule last round key from xmm0 - &lea ($base,&DWP($k_deskew,$const)); - &test ($out,$out); - &jnz (&label("schedule_mangle_last_dec")); - - # encrypting - &movdqa ("xmm1",&QWP($k_sr,$const,$magic)); - &pshufb ("xmm0","xmm1"); # output permute - &lea ($base,&DWP($k_opt,$const)); # prepare to output transform - &add ($key,32); - -&set_label("schedule_mangle_last_dec"); - &add ($key,-16); - &pxor ("xmm0",&QWP($k_s63,$const)); - &call ("_vpaes_schedule_transform"); # output transform - &movdqu (&QWP(0,$key),"xmm0"); # save last key - - # cleanup - &pxor ("xmm0","xmm0"); - &pxor ("xmm1","xmm1"); - &pxor ("xmm2","xmm2"); - &pxor ("xmm3","xmm3"); - &pxor ("xmm4","xmm4"); - &pxor ("xmm5","xmm5"); - &pxor ("xmm6","xmm6"); - &pxor ("xmm7","xmm7"); - &ret (); -&function_end_B("_vpaes_schedule_core"); - -## -## .aes_schedule_192_smear -## -## Smear the short, low side in the 192-bit key schedule. -## -## Inputs: -## %xmm7: high side, b a x y -## %xmm6: low side, d c 0 0 -## %xmm13: 0 -## -## Outputs: -## %xmm6: b+c+d b+c 0 0 -## %xmm0: b+c+d b+c b a -## -&function_begin_B("_vpaes_schedule_192_smear"); - &pshufd ("xmm0","xmm6",0x80); # d c 0 0 -> c 0 0 0 - &pxor ("xmm6","xmm0"); # -> c+d c 0 0 - &pshufd ("xmm0","xmm7",0xFE); # b a _ _ -> b b b a - &pxor ("xmm6","xmm0"); # -> b+c+d b+c b a - &movdqa ("xmm0","xmm6"); - &pxor ("xmm1","xmm1"); - &movhlps("xmm6","xmm1"); # clobber low side with zeros - &ret (); -&function_end_B("_vpaes_schedule_192_smear"); - -## -## .aes_schedule_round -## -## Runs one main round of the key schedule on %xmm0, %xmm7 -## -## Specifically, runs subbytes on the high dword of %xmm0 -## then rotates it by one byte and xors into the low dword of -## %xmm7. -## -## Adds rcon from low byte of %xmm8, then rotates %xmm8 for -## next rcon. -## -## Smears the dwords of %xmm7 by xoring the low into the -## second low, result into third, result into highest. -## -## Returns results in %xmm7 = %xmm0. -## Clobbers %xmm1-%xmm5. -## -&function_begin_B("_vpaes_schedule_round"); - # extract rcon from xmm8 - &movdqa ("xmm2",&QWP(8,"esp")); # xmm8 - &pxor ("xmm1","xmm1"); - &palignr("xmm1","xmm2",15); - &palignr("xmm2","xmm2",15); - &pxor ("xmm7","xmm1"); - - # rotate - &pshufd ("xmm0","xmm0",0xFF); - &palignr("xmm0","xmm0",1); - - # fall through... - &movdqa (&QWP(8,"esp"),"xmm2"); # xmm8 - - # low round: same as high round, but no rotation and no rcon. -&set_label("_vpaes_schedule_low_round"); - # smear xmm7 - &movdqa ("xmm1","xmm7"); - &pslldq ("xmm7",4); - &pxor ("xmm7","xmm1"); - &movdqa ("xmm1","xmm7"); - &pslldq ("xmm7",8); - &pxor ("xmm7","xmm1"); - &pxor ("xmm7",&QWP($k_s63,$const)); - - # subbyte - &movdqa ("xmm4",&QWP($k_s0F,$const)); - &movdqa ("xmm5",&QWP($k_inv,$const)); # 4 : 1/j - &movdqa ("xmm1","xmm4"); - &pandn ("xmm1","xmm0"); - &psrld ("xmm1",4); # 1 = i - &pand ("xmm0","xmm4"); # 0 = k - &movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k - &pshufb ("xmm2","xmm0"); # 2 = a/k - &pxor ("xmm0","xmm1"); # 0 = j - &movdqa ("xmm3","xmm5"); # 3 : 1/i - &pshufb ("xmm3","xmm1"); # 3 = 1/i - &pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k - &movdqa ("xmm4","xmm5"); # 4 : 1/j - &pshufb ("xmm4","xmm0"); # 4 = 1/j - &pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k - &movdqa ("xmm2","xmm5"); # 2 : 1/iak - &pshufb ("xmm2","xmm3"); # 2 = 1/iak - &pxor ("xmm2","xmm0"); # 2 = io - &movdqa ("xmm3","xmm5"); # 3 : 1/jak - &pshufb ("xmm3","xmm4"); # 3 = 1/jak - &pxor ("xmm3","xmm1"); # 3 = jo - &movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sbou - &pshufb ("xmm4","xmm2"); # 4 = sbou - &movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sbot - &pshufb ("xmm0","xmm3"); # 0 = sb1t - &pxor ("xmm0","xmm4"); # 0 = sbox output - - # add in smeared stuff - &pxor ("xmm0","xmm7"); - &movdqa ("xmm7","xmm0"); - &ret (); -&function_end_B("_vpaes_schedule_round"); - -## -## .aes_schedule_transform -## -## Linear-transform %xmm0 according to tables at (%ebx) -## -## Output in %xmm0 -## Clobbers %xmm1, %xmm2 -## -&function_begin_B("_vpaes_schedule_transform"); - &movdqa ("xmm2",&QWP($k_s0F,$const)); - &movdqa ("xmm1","xmm2"); - &pandn ("xmm1","xmm0"); - &psrld ("xmm1",4); - &pand ("xmm0","xmm2"); - &movdqa ("xmm2",&QWP(0,$base)); - &pshufb ("xmm2","xmm0"); - &movdqa ("xmm0",&QWP(16,$base)); - &pshufb ("xmm0","xmm1"); - &pxor ("xmm0","xmm2"); - &ret (); -&function_end_B("_vpaes_schedule_transform"); - -## -## .aes_schedule_mangle -## -## Mangle xmm0 from (basis-transformed) standard version -## to our version. -## -## On encrypt, -## xor with 0x63 -## multiply by circulant 0,1,1,1 -## apply shiftrows transform -## -## On decrypt, -## xor with 0x63 -## multiply by "inverse mixcolumns" circulant E,B,D,9 -## deskew -## apply shiftrows transform -## -## -## Writes out to (%edx), and increments or decrements it -## Keeps track of round number mod 4 in %ecx -## Preserves xmm0 -## Clobbers xmm1-xmm5 -## -&function_begin_B("_vpaes_schedule_mangle"); - &movdqa ("xmm4","xmm0"); # save xmm0 for later - &movdqa ("xmm5",&QWP($k_mc_forward,$const)); - &test ($out,$out); - &jnz (&label("schedule_mangle_dec")); - - # encrypting - &add ($key,16); - &pxor ("xmm4",&QWP($k_s63,$const)); - &pshufb ("xmm4","xmm5"); - &movdqa ("xmm3","xmm4"); - &pshufb ("xmm4","xmm5"); - &pxor ("xmm3","xmm4"); - &pshufb ("xmm4","xmm5"); - &pxor ("xmm3","xmm4"); - - &jmp (&label("schedule_mangle_both")); - -&set_label("schedule_mangle_dec",16); - # inverse mix columns - &movdqa ("xmm2",&QWP($k_s0F,$const)); - &lea ($inp,&DWP($k_dksd,$const)); - &movdqa ("xmm1","xmm2"); - &pandn ("xmm1","xmm4"); - &psrld ("xmm1",4); # 1 = hi - &pand ("xmm4","xmm2"); # 4 = lo - - &movdqa ("xmm2",&QWP(0,$inp)); - &pshufb ("xmm2","xmm4"); - &movdqa ("xmm3",&QWP(0x10,$inp)); - &pshufb ("xmm3","xmm1"); - &pxor ("xmm3","xmm2"); - &pshufb ("xmm3","xmm5"); - - &movdqa ("xmm2",&QWP(0x20,$inp)); - &pshufb ("xmm2","xmm4"); - &pxor ("xmm2","xmm3"); - &movdqa ("xmm3",&QWP(0x30,$inp)); - &pshufb ("xmm3","xmm1"); - &pxor ("xmm3","xmm2"); - &pshufb ("xmm3","xmm5"); - - &movdqa ("xmm2",&QWP(0x40,$inp)); - &pshufb ("xmm2","xmm4"); - &pxor ("xmm2","xmm3"); - &movdqa ("xmm3",&QWP(0x50,$inp)); - &pshufb ("xmm3","xmm1"); - &pxor ("xmm3","xmm2"); - &pshufb ("xmm3","xmm5"); - - &movdqa ("xmm2",&QWP(0x60,$inp)); - &pshufb ("xmm2","xmm4"); - &pxor ("xmm2","xmm3"); - &movdqa ("xmm3",&QWP(0x70,$inp)); - &pshufb ("xmm3","xmm1"); - &pxor ("xmm3","xmm2"); - - &add ($key,-16); - -&set_label("schedule_mangle_both"); - &movdqa ("xmm1",&QWP($k_sr,$const,$magic)); - &pshufb ("xmm3","xmm1"); - &add ($magic,-16); - &and ($magic,0x30); - &movdqu (&QWP(0,$key),"xmm3"); - &ret (); -&function_end_B("_vpaes_schedule_mangle"); - -# -# Interface to OpenSSL -# -&function_begin("${PREFIX}_set_encrypt_key"); - &mov ($inp,&wparam(0)); # inp - &lea ($base,&DWP(-56,"esp")); - &mov ($round,&wparam(1)); # bits - &and ($base,-16); - &mov ($key,&wparam(2)); # key - &xchg ($base,"esp"); # alloca - &mov (&DWP(48,"esp"),$base); - - &mov ($base,$round); - &shr ($base,5); - &add ($base,5); - &mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5; - &mov ($magic,0x30); - &mov ($out,0); - - &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); - &call ("_vpaes_schedule_core"); -&set_label("pic_point"); - - &mov ("esp",&DWP(48,"esp")); - &xor ("eax","eax"); -&function_end("${PREFIX}_set_encrypt_key"); - -&function_begin("${PREFIX}_set_decrypt_key"); - &mov ($inp,&wparam(0)); # inp - &lea ($base,&DWP(-56,"esp")); - &mov ($round,&wparam(1)); # bits - &and ($base,-16); - &mov ($key,&wparam(2)); # key - &xchg ($base,"esp"); # alloca - &mov (&DWP(48,"esp"),$base); - - &mov ($base,$round); - &shr ($base,5); - &add ($base,5); - &mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5; - &shl ($base,4); - &lea ($key,&DWP(16,$key,$base)); - - &mov ($out,1); - &mov ($magic,$round); - &shr ($magic,1); - &and ($magic,32); - &xor ($magic,32); # nbist==192?0:32; - - &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); - &call ("_vpaes_schedule_core"); -&set_label("pic_point"); - - &mov ("esp",&DWP(48,"esp")); - &xor ("eax","eax"); -&function_end("${PREFIX}_set_decrypt_key"); - -&function_begin("${PREFIX}_encrypt"); - &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); - &call ("_vpaes_preheat"); -&set_label("pic_point"); - &mov ($inp,&wparam(0)); # inp - &lea ($base,&DWP(-56,"esp")); - &mov ($out,&wparam(1)); # out - &and ($base,-16); - &mov ($key,&wparam(2)); # key - &xchg ($base,"esp"); # alloca - &mov (&DWP(48,"esp"),$base); - - &movdqu ("xmm0",&QWP(0,$inp)); - &call ("_vpaes_encrypt_core"); - &movdqu (&QWP(0,$out),"xmm0"); - - &mov ("esp",&DWP(48,"esp")); -&function_end("${PREFIX}_encrypt"); - -&function_begin("${PREFIX}_decrypt"); - &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); - &call ("_vpaes_preheat"); -&set_label("pic_point"); - &mov ($inp,&wparam(0)); # inp - &lea ($base,&DWP(-56,"esp")); - &mov ($out,&wparam(1)); # out - &and ($base,-16); - &mov ($key,&wparam(2)); # key - &xchg ($base,"esp"); # alloca - &mov (&DWP(48,"esp"),$base); - - &movdqu ("xmm0",&QWP(0,$inp)); - &call ("_vpaes_decrypt_core"); - &movdqu (&QWP(0,$out),"xmm0"); - - &mov ("esp",&DWP(48,"esp")); -&function_end("${PREFIX}_decrypt"); - -&function_begin("${PREFIX}_cbc_encrypt"); - &mov ($inp,&wparam(0)); # inp - &mov ($out,&wparam(1)); # out - &mov ($round,&wparam(2)); # len - &mov ($key,&wparam(3)); # key - &sub ($round,16); - &jc (&label("cbc_abort")); - &lea ($base,&DWP(-56,"esp")); - &mov ($const,&wparam(4)); # ivp - &and ($base,-16); - &mov ($magic,&wparam(5)); # enc - &xchg ($base,"esp"); # alloca - &movdqu ("xmm1",&QWP(0,$const)); # load IV - &sub ($out,$inp); - &mov (&DWP(48,"esp"),$base); - - &mov (&DWP(0,"esp"),$out); # save out - &mov (&DWP(4,"esp"),$key) # save key - &mov (&DWP(8,"esp"),$const); # save ivp - &mov ($out,$round); # $out works as $len - - &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); - &call ("_vpaes_preheat"); -&set_label("pic_point"); - &cmp ($magic,0); - &je (&label("cbc_dec_loop")); - &jmp (&label("cbc_enc_loop")); - -&set_label("cbc_enc_loop",16); - &movdqu ("xmm0",&QWP(0,$inp)); # load input - &pxor ("xmm0","xmm1"); # inp^=iv - &call ("_vpaes_encrypt_core"); - &mov ($base,&DWP(0,"esp")); # restore out - &mov ($key,&DWP(4,"esp")); # restore key - &movdqa ("xmm1","xmm0"); - &movdqu (&QWP(0,$base,$inp),"xmm0"); # write output - &lea ($inp,&DWP(16,$inp)); - &sub ($out,16); - &jnc (&label("cbc_enc_loop")); - &jmp (&label("cbc_done")); - -&set_label("cbc_dec_loop",16); - &movdqu ("xmm0",&QWP(0,$inp)); # load input - &movdqa (&QWP(16,"esp"),"xmm1"); # save IV - &movdqa (&QWP(32,"esp"),"xmm0"); # save future IV - &call ("_vpaes_decrypt_core"); - &mov ($base,&DWP(0,"esp")); # restore out - &mov ($key,&DWP(4,"esp")); # restore key - &pxor ("xmm0",&QWP(16,"esp")); # out^=iv - &movdqa ("xmm1",&QWP(32,"esp")); # load next IV - &movdqu (&QWP(0,$base,$inp),"xmm0"); # write output - &lea ($inp,&DWP(16,$inp)); - &sub ($out,16); - &jnc (&label("cbc_dec_loop")); - -&set_label("cbc_done"); - &mov ($base,&DWP(8,"esp")); # restore ivp - &mov ("esp",&DWP(48,"esp")); - &movdqu (&QWP(0,$base),"xmm1"); # write IV -&set_label("cbc_abort"); -&function_end("${PREFIX}_cbc_encrypt"); - -&asm_finish(); diff --git a/drivers/builtin_openssl2/crypto/aes/asm/vpaes-x86_64.pl b/drivers/builtin_openssl2/crypto/aes/asm/vpaes-x86_64.pl deleted file mode 100644 index bd7f45b850..0000000000 --- a/drivers/builtin_openssl2/crypto/aes/asm/vpaes-x86_64.pl +++ /dev/null @@ -1,1207 +0,0 @@ -#!/usr/bin/env perl - -###################################################################### -## Constant-time SSSE3 AES core implementation. -## version 0.1 -## -## By Mike Hamburg (Stanford University), 2009 -## Public domain. -## -## For details see http://shiftleft.org/papers/vector_aes/ and -## http://crypto.stanford.edu/vpaes/. - -###################################################################### -# September 2011. -# -# Interface to OpenSSL as "almost" drop-in replacement for -# aes-x86_64.pl. "Almost" refers to the fact that AES_cbc_encrypt -# doesn't handle partial vectors (doesn't have to if called from -# EVP only). "Drop-in" implies that this module doesn't share key -# schedule structure with the original nor does it make assumption -# about its alignment... -# -# Performance summary. aes-x86_64.pl column lists large-block CBC -# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per -# byte processed with 128-bit key, and vpaes-x86_64.pl column - -# [also large-block CBC] encrypt/decrypt. -# -# aes-x86_64.pl vpaes-x86_64.pl -# -# Core 2(**) 30.5/43.7/14.3 21.8/25.7(***) -# Nehalem 30.5/42.2/14.6 9.8/11.8 -# Atom 63.9/79.0/32.1 64.0/84.8(***) -# -# (*) "Hyper-threading" in the context refers rather to cache shared -# among multiple cores, than to specifically Intel HTT. As vast -# majority of contemporary cores share cache, slower code path -# is common place. In other words "with-hyper-threading-off" -# results are presented mostly for reference purposes. -# -# (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe. -# -# (***) Less impressive improvement on Core 2 and Atom is due to slow -# pshufb, yet it's respectable +40%/78% improvement on Core 2 -# (as implied, over "hyper-threading-safe" code path). -# -# - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -$PREFIX="vpaes"; - -$code.=<<___; -.text - -## -## _aes_encrypt_core -## -## AES-encrypt %xmm0. -## -## Inputs: -## %xmm0 = input -## %xmm9-%xmm15 as in _vpaes_preheat -## (%rdx) = scheduled keys -## -## Output in %xmm0 -## Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax -## Preserves %xmm6 - %xmm8 so you get some local vectors -## -## -.type _vpaes_encrypt_core,\@abi-omnipotent -.align 16 -_vpaes_encrypt_core: - mov %rdx, %r9 - mov \$16, %r11 - mov 240(%rdx),%eax - movdqa %xmm9, %xmm1 - movdqa .Lk_ipt(%rip), %xmm2 # iptlo - pandn %xmm0, %xmm1 - movdqu (%r9), %xmm5 # round0 key - psrld \$4, %xmm1 - pand %xmm9, %xmm0 - pshufb %xmm0, %xmm2 - movdqa .Lk_ipt+16(%rip), %xmm0 # ipthi - pshufb %xmm1, %xmm0 - pxor %xmm5, %xmm2 - pxor %xmm2, %xmm0 - add \$16, %r9 - lea .Lk_mc_backward(%rip),%r10 - jmp .Lenc_entry - -.align 16 -.Lenc_loop: - # middle of middle round - movdqa %xmm13, %xmm4 # 4 : sb1u - pshufb %xmm2, %xmm4 # 4 = sb1u - pxor %xmm5, %xmm4 # 4 = sb1u + k - movdqa %xmm12, %xmm0 # 0 : sb1t - pshufb %xmm3, %xmm0 # 0 = sb1t - pxor %xmm4, %xmm0 # 0 = A - movdqa %xmm15, %xmm5 # 4 : sb2u - pshufb %xmm2, %xmm5 # 4 = sb2u - movdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] - movdqa %xmm14, %xmm2 # 2 : sb2t - pshufb %xmm3, %xmm2 # 2 = sb2t - pxor %xmm5, %xmm2 # 2 = 2A - movdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] - movdqa %xmm0, %xmm3 # 3 = A - pshufb %xmm1, %xmm0 # 0 = B - add \$16, %r9 # next key - pxor %xmm2, %xmm0 # 0 = 2A+B - pshufb %xmm4, %xmm3 # 3 = D - add \$16, %r11 # next mc - pxor %xmm0, %xmm3 # 3 = 2A+B+D - pshufb %xmm1, %xmm0 # 0 = 2B+C - and \$0x30, %r11 # ... mod 4 - pxor %xmm3, %xmm0 # 0 = 2A+3B+C+D - sub \$1,%rax # nr-- - -.Lenc_entry: - # top of round - movdqa %xmm9, %xmm1 # 1 : i - pandn %xmm0, %xmm1 # 1 = i<<4 - psrld \$4, %xmm1 # 1 = i - pand %xmm9, %xmm0 # 0 = k - movdqa %xmm11, %xmm5 # 2 : a/k - pshufb %xmm0, %xmm5 # 2 = a/k - pxor %xmm1, %xmm0 # 0 = j - movdqa %xmm10, %xmm3 # 3 : 1/i - pshufb %xmm1, %xmm3 # 3 = 1/i - pxor %xmm5, %xmm3 # 3 = iak = 1/i + a/k - movdqa %xmm10, %xmm4 # 4 : 1/j - pshufb %xmm0, %xmm4 # 4 = 1/j - pxor %xmm5, %xmm4 # 4 = jak = 1/j + a/k - movdqa %xmm10, %xmm2 # 2 : 1/iak - pshufb %xmm3, %xmm2 # 2 = 1/iak - pxor %xmm0, %xmm2 # 2 = io - movdqa %xmm10, %xmm3 # 3 : 1/jak - movdqu (%r9), %xmm5 - pshufb %xmm4, %xmm3 # 3 = 1/jak - pxor %xmm1, %xmm3 # 3 = jo - jnz .Lenc_loop - - # middle of last round - movdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo - movdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 - pshufb %xmm2, %xmm4 # 4 = sbou - pxor %xmm5, %xmm4 # 4 = sb1u + k - pshufb %xmm3, %xmm0 # 0 = sb1t - movdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] - pxor %xmm4, %xmm0 # 0 = A - pshufb %xmm1, %xmm0 - ret -.size _vpaes_encrypt_core,.-_vpaes_encrypt_core - -## -## Decryption core -## -## Same API as encryption core. -## -.type _vpaes_decrypt_core,\@abi-omnipotent -.align 16 -_vpaes_decrypt_core: - mov %rdx, %r9 # load key - mov 240(%rdx),%eax - movdqa %xmm9, %xmm1 - movdqa .Lk_dipt(%rip), %xmm2 # iptlo - pandn %xmm0, %xmm1 - mov %rax, %r11 - psrld \$4, %xmm1 - movdqu (%r9), %xmm5 # round0 key - shl \$4, %r11 - pand %xmm9, %xmm0 - pshufb %xmm0, %xmm2 - movdqa .Lk_dipt+16(%rip), %xmm0 # ipthi - xor \$0x30, %r11 - lea .Lk_dsbd(%rip),%r10 - pshufb %xmm1, %xmm0 - and \$0x30, %r11 - pxor %xmm5, %xmm2 - movdqa .Lk_mc_forward+48(%rip), %xmm5 - pxor %xmm2, %xmm0 - add \$16, %r9 - add %r10, %r11 - jmp .Ldec_entry - -.align 16 -.Ldec_loop: -## -## Inverse mix columns -## - movdqa -0x20(%r10),%xmm4 # 4 : sb9u - pshufb %xmm2, %xmm4 # 4 = sb9u - pxor %xmm0, %xmm4 - movdqa -0x10(%r10),%xmm0 # 0 : sb9t - pshufb %xmm3, %xmm0 # 0 = sb9t - pxor %xmm4, %xmm0 # 0 = ch - add \$16, %r9 # next round key - - pshufb %xmm5, %xmm0 # MC ch - movdqa 0x00(%r10),%xmm4 # 4 : sbdu - pshufb %xmm2, %xmm4 # 4 = sbdu - pxor %xmm0, %xmm4 # 4 = ch - movdqa 0x10(%r10),%xmm0 # 0 : sbdt - pshufb %xmm3, %xmm0 # 0 = sbdt - pxor %xmm4, %xmm0 # 0 = ch - sub \$1,%rax # nr-- - - pshufb %xmm5, %xmm0 # MC ch - movdqa 0x20(%r10),%xmm4 # 4 : sbbu - pshufb %xmm2, %xmm4 # 4 = sbbu - pxor %xmm0, %xmm4 # 4 = ch - movdqa 0x30(%r10),%xmm0 # 0 : sbbt - pshufb %xmm3, %xmm0 # 0 = sbbt - pxor %xmm4, %xmm0 # 0 = ch - - pshufb %xmm5, %xmm0 # MC ch - movdqa 0x40(%r10),%xmm4 # 4 : sbeu - pshufb %xmm2, %xmm4 # 4 = sbeu - pxor %xmm0, %xmm4 # 4 = ch - movdqa 0x50(%r10),%xmm0 # 0 : sbet - pshufb %xmm3, %xmm0 # 0 = sbet - pxor %xmm4, %xmm0 # 0 = ch - - palignr \$12, %xmm5, %xmm5 - -.Ldec_entry: - # top of round - movdqa %xmm9, %xmm1 # 1 : i - pandn %xmm0, %xmm1 # 1 = i<<4 - psrld \$4, %xmm1 # 1 = i - pand %xmm9, %xmm0 # 0 = k - movdqa %xmm11, %xmm2 # 2 : a/k - pshufb %xmm0, %xmm2 # 2 = a/k - pxor %xmm1, %xmm0 # 0 = j - movdqa %xmm10, %xmm3 # 3 : 1/i - pshufb %xmm1, %xmm3 # 3 = 1/i - pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k - movdqa %xmm10, %xmm4 # 4 : 1/j - pshufb %xmm0, %xmm4 # 4 = 1/j - pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k - movdqa %xmm10, %xmm2 # 2 : 1/iak - pshufb %xmm3, %xmm2 # 2 = 1/iak - pxor %xmm0, %xmm2 # 2 = io - movdqa %xmm10, %xmm3 # 3 : 1/jak - pshufb %xmm4, %xmm3 # 3 = 1/jak - pxor %xmm1, %xmm3 # 3 = jo - movdqu (%r9), %xmm0 - jnz .Ldec_loop - - # middle of last round - movdqa 0x60(%r10), %xmm4 # 3 : sbou - pshufb %xmm2, %xmm4 # 4 = sbou - pxor %xmm0, %xmm4 # 4 = sb1u + k - movdqa 0x70(%r10), %xmm0 # 0 : sbot - movdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 - pshufb %xmm3, %xmm0 # 0 = sb1t - pxor %xmm4, %xmm0 # 0 = A - pshufb %xmm2, %xmm0 - ret -.size _vpaes_decrypt_core,.-_vpaes_decrypt_core - -######################################################## -## ## -## AES key schedule ## -## ## -######################################################## -.type _vpaes_schedule_core,\@abi-omnipotent -.align 16 -_vpaes_schedule_core: - # rdi = key - # rsi = size in bits - # rdx = buffer - # rcx = direction. 0=encrypt, 1=decrypt - - call _vpaes_preheat # load the tables - movdqa .Lk_rcon(%rip), %xmm8 # load rcon - movdqu (%rdi), %xmm0 # load key (unaligned) - - # input transform - movdqa %xmm0, %xmm3 - lea .Lk_ipt(%rip), %r11 - call _vpaes_schedule_transform - movdqa %xmm0, %xmm7 - - lea .Lk_sr(%rip),%r10 - test %rcx, %rcx - jnz .Lschedule_am_decrypting - - # encrypting, output zeroth round key after transform - movdqu %xmm0, (%rdx) - jmp .Lschedule_go - -.Lschedule_am_decrypting: - # decrypting, output zeroth round key after shiftrows - movdqa (%r8,%r10),%xmm1 - pshufb %xmm1, %xmm3 - movdqu %xmm3, (%rdx) - xor \$0x30, %r8 - -.Lschedule_go: - cmp \$192, %esi - ja .Lschedule_256 - je .Lschedule_192 - # 128: fall though - -## -## .schedule_128 -## -## 128-bit specific part of key schedule. -## -## This schedule is really simple, because all its parts -## are accomplished by the subroutines. -## -.Lschedule_128: - mov \$10, %esi - -.Loop_schedule_128: - call _vpaes_schedule_round - dec %rsi - jz .Lschedule_mangle_last - call _vpaes_schedule_mangle # write output - jmp .Loop_schedule_128 - -## -## .aes_schedule_192 -## -## 192-bit specific part of key schedule. -## -## The main body of this schedule is the same as the 128-bit -## schedule, but with more smearing. The long, high side is -## stored in %xmm7 as before, and the short, low side is in -## the high bits of %xmm6. -## -## This schedule is somewhat nastier, however, because each -## round produces 192 bits of key material, or 1.5 round keys. -## Therefore, on each cycle we do 2 rounds and produce 3 round -## keys. -## -.align 16 -.Lschedule_192: - movdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) - call _vpaes_schedule_transform # input transform - movdqa %xmm0, %xmm6 # save short part - pxor %xmm4, %xmm4 # clear 4 - movhlps %xmm4, %xmm6 # clobber low side with zeros - mov \$4, %esi - -.Loop_schedule_192: - call _vpaes_schedule_round - palignr \$8,%xmm6,%xmm0 - call _vpaes_schedule_mangle # save key n - call _vpaes_schedule_192_smear - call _vpaes_schedule_mangle # save key n+1 - call _vpaes_schedule_round - dec %rsi - jz .Lschedule_mangle_last - call _vpaes_schedule_mangle # save key n+2 - call _vpaes_schedule_192_smear - jmp .Loop_schedule_192 - -## -## .aes_schedule_256 -## -## 256-bit specific part of key schedule. -## -## The structure here is very similar to the 128-bit -## schedule, but with an additional "low side" in -## %xmm6. The low side's rounds are the same as the -## high side's, except no rcon and no rotation. -## -.align 16 -.Lschedule_256: - movdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) - call _vpaes_schedule_transform # input transform - mov \$7, %esi - -.Loop_schedule_256: - call _vpaes_schedule_mangle # output low result - movdqa %xmm0, %xmm6 # save cur_lo in xmm6 - - # high round - call _vpaes_schedule_round - dec %rsi - jz .Lschedule_mangle_last - call _vpaes_schedule_mangle - - # low round. swap xmm7 and xmm6 - pshufd \$0xFF, %xmm0, %xmm0 - movdqa %xmm7, %xmm5 - movdqa %xmm6, %xmm7 - call _vpaes_schedule_low_round - movdqa %xmm5, %xmm7 - - jmp .Loop_schedule_256 - - -## -## .aes_schedule_mangle_last -## -## Mangler for last round of key schedule -## Mangles %xmm0 -## when encrypting, outputs out(%xmm0) ^ 63 -## when decrypting, outputs unskew(%xmm0) -## -## Always called right before return... jumps to cleanup and exits -## -.align 16 -.Lschedule_mangle_last: - # schedule last round key from xmm0 - lea .Lk_deskew(%rip),%r11 # prepare to deskew - test %rcx, %rcx - jnz .Lschedule_mangle_last_dec - - # encrypting - movdqa (%r8,%r10),%xmm1 - pshufb %xmm1, %xmm0 # output permute - lea .Lk_opt(%rip), %r11 # prepare to output transform - add \$32, %rdx - -.Lschedule_mangle_last_dec: - add \$-16, %rdx - pxor .Lk_s63(%rip), %xmm0 - call _vpaes_schedule_transform # output transform - movdqu %xmm0, (%rdx) # save last key - - # cleanup - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 - pxor %xmm4, %xmm4 - pxor %xmm5, %xmm5 - pxor %xmm6, %xmm6 - pxor %xmm7, %xmm7 - ret -.size _vpaes_schedule_core,.-_vpaes_schedule_core - -## -## .aes_schedule_192_smear -## -## Smear the short, low side in the 192-bit key schedule. -## -## Inputs: -## %xmm7: high side, b a x y -## %xmm6: low side, d c 0 0 -## %xmm13: 0 -## -## Outputs: -## %xmm6: b+c+d b+c 0 0 -## %xmm0: b+c+d b+c b a -## -.type _vpaes_schedule_192_smear,\@abi-omnipotent -.align 16 -_vpaes_schedule_192_smear: - pshufd \$0x80, %xmm6, %xmm0 # d c 0 0 -> c 0 0 0 - pxor %xmm0, %xmm6 # -> c+d c 0 0 - pshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a - pxor %xmm0, %xmm6 # -> b+c+d b+c b a - movdqa %xmm6, %xmm0 - pxor %xmm1, %xmm1 - movhlps %xmm1, %xmm6 # clobber low side with zeros - ret -.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear - -## -## .aes_schedule_round -## -## Runs one main round of the key schedule on %xmm0, %xmm7 -## -## Specifically, runs subbytes on the high dword of %xmm0 -## then rotates it by one byte and xors into the low dword of -## %xmm7. -## -## Adds rcon from low byte of %xmm8, then rotates %xmm8 for -## next rcon. -## -## Smears the dwords of %xmm7 by xoring the low into the -## second low, result into third, result into highest. -## -## Returns results in %xmm7 = %xmm0. -## Clobbers %xmm1-%xmm4, %r11. -## -.type _vpaes_schedule_round,\@abi-omnipotent -.align 16 -_vpaes_schedule_round: - # extract rcon from xmm8 - pxor %xmm1, %xmm1 - palignr \$15, %xmm8, %xmm1 - palignr \$15, %xmm8, %xmm8 - pxor %xmm1, %xmm7 - - # rotate - pshufd \$0xFF, %xmm0, %xmm0 - palignr \$1, %xmm0, %xmm0 - - # fall through... - - # low round: same as high round, but no rotation and no rcon. -_vpaes_schedule_low_round: - # smear xmm7 - movdqa %xmm7, %xmm1 - pslldq \$4, %xmm7 - pxor %xmm1, %xmm7 - movdqa %xmm7, %xmm1 - pslldq \$8, %xmm7 - pxor %xmm1, %xmm7 - pxor .Lk_s63(%rip), %xmm7 - - # subbytes - movdqa %xmm9, %xmm1 - pandn %xmm0, %xmm1 - psrld \$4, %xmm1 # 1 = i - pand %xmm9, %xmm0 # 0 = k - movdqa %xmm11, %xmm2 # 2 : a/k - pshufb %xmm0, %xmm2 # 2 = a/k - pxor %xmm1, %xmm0 # 0 = j - movdqa %xmm10, %xmm3 # 3 : 1/i - pshufb %xmm1, %xmm3 # 3 = 1/i - pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k - movdqa %xmm10, %xmm4 # 4 : 1/j - pshufb %xmm0, %xmm4 # 4 = 1/j - pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k - movdqa %xmm10, %xmm2 # 2 : 1/iak - pshufb %xmm3, %xmm2 # 2 = 1/iak - pxor %xmm0, %xmm2 # 2 = io - movdqa %xmm10, %xmm3 # 3 : 1/jak - pshufb %xmm4, %xmm3 # 3 = 1/jak - pxor %xmm1, %xmm3 # 3 = jo - movdqa %xmm13, %xmm4 # 4 : sbou - pshufb %xmm2, %xmm4 # 4 = sbou - movdqa %xmm12, %xmm0 # 0 : sbot - pshufb %xmm3, %xmm0 # 0 = sb1t - pxor %xmm4, %xmm0 # 0 = sbox output - - # add in smeared stuff - pxor %xmm7, %xmm0 - movdqa %xmm0, %xmm7 - ret -.size _vpaes_schedule_round,.-_vpaes_schedule_round - -## -## .aes_schedule_transform -## -## Linear-transform %xmm0 according to tables at (%r11) -## -## Requires that %xmm9 = 0x0F0F... as in preheat -## Output in %xmm0 -## Clobbers %xmm1, %xmm2 -## -.type _vpaes_schedule_transform,\@abi-omnipotent -.align 16 -_vpaes_schedule_transform: - movdqa %xmm9, %xmm1 - pandn %xmm0, %xmm1 - psrld \$4, %xmm1 - pand %xmm9, %xmm0 - movdqa (%r11), %xmm2 # lo - pshufb %xmm0, %xmm2 - movdqa 16(%r11), %xmm0 # hi - pshufb %xmm1, %xmm0 - pxor %xmm2, %xmm0 - ret -.size _vpaes_schedule_transform,.-_vpaes_schedule_transform - -## -## .aes_schedule_mangle -## -## Mangle xmm0 from (basis-transformed) standard version -## to our version. -## -## On encrypt, -## xor with 0x63 -## multiply by circulant 0,1,1,1 -## apply shiftrows transform -## -## On decrypt, -## xor with 0x63 -## multiply by "inverse mixcolumns" circulant E,B,D,9 -## deskew -## apply shiftrows transform -## -## -## Writes out to (%rdx), and increments or decrements it -## Keeps track of round number mod 4 in %r8 -## Preserves xmm0 -## Clobbers xmm1-xmm5 -## -.type _vpaes_schedule_mangle,\@abi-omnipotent -.align 16 -_vpaes_schedule_mangle: - movdqa %xmm0, %xmm4 # save xmm0 for later - movdqa .Lk_mc_forward(%rip),%xmm5 - test %rcx, %rcx - jnz .Lschedule_mangle_dec - - # encrypting - add \$16, %rdx - pxor .Lk_s63(%rip),%xmm4 - pshufb %xmm5, %xmm4 - movdqa %xmm4, %xmm3 - pshufb %xmm5, %xmm4 - pxor %xmm4, %xmm3 - pshufb %xmm5, %xmm4 - pxor %xmm4, %xmm3 - - jmp .Lschedule_mangle_both -.align 16 -.Lschedule_mangle_dec: - # inverse mix columns - lea .Lk_dksd(%rip),%r11 - movdqa %xmm9, %xmm1 - pandn %xmm4, %xmm1 - psrld \$4, %xmm1 # 1 = hi - pand %xmm9, %xmm4 # 4 = lo - - movdqa 0x00(%r11), %xmm2 - pshufb %xmm4, %xmm2 - movdqa 0x10(%r11), %xmm3 - pshufb %xmm1, %xmm3 - pxor %xmm2, %xmm3 - pshufb %xmm5, %xmm3 - - movdqa 0x20(%r11), %xmm2 - pshufb %xmm4, %xmm2 - pxor %xmm3, %xmm2 - movdqa 0x30(%r11), %xmm3 - pshufb %xmm1, %xmm3 - pxor %xmm2, %xmm3 - pshufb %xmm5, %xmm3 - - movdqa 0x40(%r11), %xmm2 - pshufb %xmm4, %xmm2 - pxor %xmm3, %xmm2 - movdqa 0x50(%r11), %xmm3 - pshufb %xmm1, %xmm3 - pxor %xmm2, %xmm3 - pshufb %xmm5, %xmm3 - - movdqa 0x60(%r11), %xmm2 - pshufb %xmm4, %xmm2 - pxor %xmm3, %xmm2 - movdqa 0x70(%r11), %xmm3 - pshufb %xmm1, %xmm3 - pxor %xmm2, %xmm3 - - add \$-16, %rdx - -.Lschedule_mangle_both: - movdqa (%r8,%r10),%xmm1 - pshufb %xmm1,%xmm3 - add \$-16, %r8 - and \$0x30, %r8 - movdqu %xmm3, (%rdx) - ret -.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle - -# -# Interface to OpenSSL -# -.globl ${PREFIX}_set_encrypt_key -.type ${PREFIX}_set_encrypt_key,\@function,3 -.align 16 -${PREFIX}_set_encrypt_key: -___ -$code.=<<___ if ($win64); - lea -0xb8(%rsp),%rsp - movaps %xmm6,0x10(%rsp) - movaps %xmm7,0x20(%rsp) - movaps %xmm8,0x30(%rsp) - movaps %xmm9,0x40(%rsp) - movaps %xmm10,0x50(%rsp) - movaps %xmm11,0x60(%rsp) - movaps %xmm12,0x70(%rsp) - movaps %xmm13,0x80(%rsp) - movaps %xmm14,0x90(%rsp) - movaps %xmm15,0xa0(%rsp) -.Lenc_key_body: -___ -$code.=<<___; - mov %esi,%eax - shr \$5,%eax - add \$5,%eax - mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; - - mov \$0,%ecx - mov \$0x30,%r8d - call _vpaes_schedule_core -___ -$code.=<<___ if ($win64); - movaps 0x10(%rsp),%xmm6 - movaps 0x20(%rsp),%xmm7 - movaps 0x30(%rsp),%xmm8 - movaps 0x40(%rsp),%xmm9 - movaps 0x50(%rsp),%xmm10 - movaps 0x60(%rsp),%xmm11 - movaps 0x70(%rsp),%xmm12 - movaps 0x80(%rsp),%xmm13 - movaps 0x90(%rsp),%xmm14 - movaps 0xa0(%rsp),%xmm15 - lea 0xb8(%rsp),%rsp -.Lenc_key_epilogue: -___ -$code.=<<___; - xor %eax,%eax - ret -.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key - -.globl ${PREFIX}_set_decrypt_key -.type ${PREFIX}_set_decrypt_key,\@function,3 -.align 16 -${PREFIX}_set_decrypt_key: -___ -$code.=<<___ if ($win64); - lea -0xb8(%rsp),%rsp - movaps %xmm6,0x10(%rsp) - movaps %xmm7,0x20(%rsp) - movaps %xmm8,0x30(%rsp) - movaps %xmm9,0x40(%rsp) - movaps %xmm10,0x50(%rsp) - movaps %xmm11,0x60(%rsp) - movaps %xmm12,0x70(%rsp) - movaps %xmm13,0x80(%rsp) - movaps %xmm14,0x90(%rsp) - movaps %xmm15,0xa0(%rsp) -.Ldec_key_body: -___ -$code.=<<___; - mov %esi,%eax - shr \$5,%eax - add \$5,%eax - mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; - shl \$4,%eax - lea 16(%rdx,%rax),%rdx - - mov \$1,%ecx - mov %esi,%r8d - shr \$1,%r8d - and \$32,%r8d - xor \$32,%r8d # nbits==192?0:32 - call _vpaes_schedule_core -___ -$code.=<<___ if ($win64); - movaps 0x10(%rsp),%xmm6 - movaps 0x20(%rsp),%xmm7 - movaps 0x30(%rsp),%xmm8 - movaps 0x40(%rsp),%xmm9 - movaps 0x50(%rsp),%xmm10 - movaps 0x60(%rsp),%xmm11 - movaps 0x70(%rsp),%xmm12 - movaps 0x80(%rsp),%xmm13 - movaps 0x90(%rsp),%xmm14 - movaps 0xa0(%rsp),%xmm15 - lea 0xb8(%rsp),%rsp -.Ldec_key_epilogue: -___ -$code.=<<___; - xor %eax,%eax - ret -.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key - -.globl ${PREFIX}_encrypt -.type ${PREFIX}_encrypt,\@function,3 -.align 16 -${PREFIX}_encrypt: -___ -$code.=<<___ if ($win64); - lea -0xb8(%rsp),%rsp - movaps %xmm6,0x10(%rsp) - movaps %xmm7,0x20(%rsp) - movaps %xmm8,0x30(%rsp) - movaps %xmm9,0x40(%rsp) - movaps %xmm10,0x50(%rsp) - movaps %xmm11,0x60(%rsp) - movaps %xmm12,0x70(%rsp) - movaps %xmm13,0x80(%rsp) - movaps %xmm14,0x90(%rsp) - movaps %xmm15,0xa0(%rsp) -.Lenc_body: -___ -$code.=<<___; - movdqu (%rdi),%xmm0 - call _vpaes_preheat - call _vpaes_encrypt_core - movdqu %xmm0,(%rsi) -___ -$code.=<<___ if ($win64); - movaps 0x10(%rsp),%xmm6 - movaps 0x20(%rsp),%xmm7 - movaps 0x30(%rsp),%xmm8 - movaps 0x40(%rsp),%xmm9 - movaps 0x50(%rsp),%xmm10 - movaps 0x60(%rsp),%xmm11 - movaps 0x70(%rsp),%xmm12 - movaps 0x80(%rsp),%xmm13 - movaps 0x90(%rsp),%xmm14 - movaps 0xa0(%rsp),%xmm15 - lea 0xb8(%rsp),%rsp -.Lenc_epilogue: -___ -$code.=<<___; - ret -.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt - -.globl ${PREFIX}_decrypt -.type ${PREFIX}_decrypt,\@function,3 -.align 16 -${PREFIX}_decrypt: -___ -$code.=<<___ if ($win64); - lea -0xb8(%rsp),%rsp - movaps %xmm6,0x10(%rsp) - movaps %xmm7,0x20(%rsp) - movaps %xmm8,0x30(%rsp) - movaps %xmm9,0x40(%rsp) - movaps %xmm10,0x50(%rsp) - movaps %xmm11,0x60(%rsp) - movaps %xmm12,0x70(%rsp) - movaps %xmm13,0x80(%rsp) - movaps %xmm14,0x90(%rsp) - movaps %xmm15,0xa0(%rsp) -.Ldec_body: -___ -$code.=<<___; - movdqu (%rdi),%xmm0 - call _vpaes_preheat - call _vpaes_decrypt_core - movdqu %xmm0,(%rsi) -___ -$code.=<<___ if ($win64); - movaps 0x10(%rsp),%xmm6 - movaps 0x20(%rsp),%xmm7 - movaps 0x30(%rsp),%xmm8 - movaps 0x40(%rsp),%xmm9 - movaps 0x50(%rsp),%xmm10 - movaps 0x60(%rsp),%xmm11 - movaps 0x70(%rsp),%xmm12 - movaps 0x80(%rsp),%xmm13 - movaps 0x90(%rsp),%xmm14 - movaps 0xa0(%rsp),%xmm15 - lea 0xb8(%rsp),%rsp -.Ldec_epilogue: -___ -$code.=<<___; - ret -.size ${PREFIX}_decrypt,.-${PREFIX}_decrypt -___ -{ -my ($inp,$out,$len,$key,$ivp,$enc)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9"); -# void AES_cbc_encrypt (const void char *inp, unsigned char *out, -# size_t length, const AES_KEY *key, -# unsigned char *ivp,const int enc); -$code.=<<___; -.globl ${PREFIX}_cbc_encrypt -.type ${PREFIX}_cbc_encrypt,\@function,6 -.align 16 -${PREFIX}_cbc_encrypt: - xchg $key,$len -___ -($len,$key)=($key,$len); -$code.=<<___; - sub \$16,$len - jc .Lcbc_abort -___ -$code.=<<___ if ($win64); - lea -0xb8(%rsp),%rsp - movaps %xmm6,0x10(%rsp) - movaps %xmm7,0x20(%rsp) - movaps %xmm8,0x30(%rsp) - movaps %xmm9,0x40(%rsp) - movaps %xmm10,0x50(%rsp) - movaps %xmm11,0x60(%rsp) - movaps %xmm12,0x70(%rsp) - movaps %xmm13,0x80(%rsp) - movaps %xmm14,0x90(%rsp) - movaps %xmm15,0xa0(%rsp) -.Lcbc_body: -___ -$code.=<<___; - movdqu ($ivp),%xmm6 # load IV - sub $inp,$out - call _vpaes_preheat - cmp \$0,${enc}d - je .Lcbc_dec_loop - jmp .Lcbc_enc_loop -.align 16 -.Lcbc_enc_loop: - movdqu ($inp),%xmm0 - pxor %xmm6,%xmm0 - call _vpaes_encrypt_core - movdqa %xmm0,%xmm6 - movdqu %xmm0,($out,$inp) - lea 16($inp),$inp - sub \$16,$len - jnc .Lcbc_enc_loop - jmp .Lcbc_done -.align 16 -.Lcbc_dec_loop: - movdqu ($inp),%xmm0 - movdqa %xmm0,%xmm7 - call _vpaes_decrypt_core - pxor %xmm6,%xmm0 - movdqa %xmm7,%xmm6 - movdqu %xmm0,($out,$inp) - lea 16($inp),$inp - sub \$16,$len - jnc .Lcbc_dec_loop -.Lcbc_done: - movdqu %xmm6,($ivp) # save IV -___ -$code.=<<___ if ($win64); - movaps 0x10(%rsp),%xmm6 - movaps 0x20(%rsp),%xmm7 - movaps 0x30(%rsp),%xmm8 - movaps 0x40(%rsp),%xmm9 - movaps 0x50(%rsp),%xmm10 - movaps 0x60(%rsp),%xmm11 - movaps 0x70(%rsp),%xmm12 - movaps 0x80(%rsp),%xmm13 - movaps 0x90(%rsp),%xmm14 - movaps 0xa0(%rsp),%xmm15 - lea 0xb8(%rsp),%rsp -.Lcbc_epilogue: -___ -$code.=<<___; -.Lcbc_abort: - ret -.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt -___ -} -$code.=<<___; -## -## _aes_preheat -## -## Fills register %r10 -> .aes_consts (so you can -fPIC) -## and %xmm9-%xmm15 as specified below. -## -.type _vpaes_preheat,\@abi-omnipotent -.align 16 -_vpaes_preheat: - lea .Lk_s0F(%rip), %r10 - movdqa -0x20(%r10), %xmm10 # .Lk_inv - movdqa -0x10(%r10), %xmm11 # .Lk_inv+16 - movdqa 0x00(%r10), %xmm9 # .Lk_s0F - movdqa 0x30(%r10), %xmm13 # .Lk_sb1 - movdqa 0x40(%r10), %xmm12 # .Lk_sb1+16 - movdqa 0x50(%r10), %xmm15 # .Lk_sb2 - movdqa 0x60(%r10), %xmm14 # .Lk_sb2+16 - ret -.size _vpaes_preheat,.-_vpaes_preheat -######################################################## -## ## -## Constants ## -## ## -######################################################## -.type _vpaes_consts,\@object -.align 64 -_vpaes_consts: -.Lk_inv: # inv, inva - .quad 0x0E05060F0D080180, 0x040703090A0B0C02 - .quad 0x01040A060F0B0780, 0x030D0E0C02050809 - -.Lk_s0F: # s0F - .quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F - -.Lk_ipt: # input transform (lo, hi) - .quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 - .quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 - -.Lk_sb1: # sb1u, sb1t - .quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 - .quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF -.Lk_sb2: # sb2u, sb2t - .quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD - .quad 0x69EB88400AE12900, 0xC2A163C8AB82234A -.Lk_sbo: # sbou, sbot - .quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 - .quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA - -.Lk_mc_forward: # mc_forward - .quad 0x0407060500030201, 0x0C0F0E0D080B0A09 - .quad 0x080B0A0904070605, 0x000302010C0F0E0D - .quad 0x0C0F0E0D080B0A09, 0x0407060500030201 - .quad 0x000302010C0F0E0D, 0x080B0A0904070605 - -.Lk_mc_backward:# mc_backward - .quad 0x0605040702010003, 0x0E0D0C0F0A09080B - .quad 0x020100030E0D0C0F, 0x0A09080B06050407 - .quad 0x0E0D0C0F0A09080B, 0x0605040702010003 - .quad 0x0A09080B06050407, 0x020100030E0D0C0F - -.Lk_sr: # sr - .quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 - .quad 0x030E09040F0A0500, 0x0B06010C07020D08 - .quad 0x0F060D040B020900, 0x070E050C030A0108 - .quad 0x0B0E0104070A0D00, 0x0306090C0F020508 - -.Lk_rcon: # rcon - .quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 - -.Lk_s63: # s63: all equal to 0x63 transformed - .quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B - -.Lk_opt: # output transform - .quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 - .quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 - -.Lk_deskew: # deskew tables: inverts the sbox's "skew" - .quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A - .quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 - -## -## Decryption stuff -## Key schedule constants -## -.Lk_dksd: # decryption key schedule: invskew x*D - .quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 - .quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E -.Lk_dksb: # decryption key schedule: invskew x*B - .quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 - .quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 -.Lk_dkse: # decryption key schedule: invskew x*E + 0x63 - .quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 - .quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 -.Lk_dks9: # decryption key schedule: invskew x*9 - .quad 0xB6116FC87ED9A700, 0x4AED933482255BFC - .quad 0x4576516227143300, 0x8BB89FACE9DAFDCE - -## -## Decryption stuff -## Round function constants -## -.Lk_dipt: # decryption input transform - .quad 0x0F505B040B545F00, 0x154A411E114E451A - .quad 0x86E383E660056500, 0x12771772F491F194 - -.Lk_dsb9: # decryption sbox output *9*u, *9*t - .quad 0x851C03539A86D600, 0xCAD51F504F994CC9 - .quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 -.Lk_dsbd: # decryption sbox output *D*u, *D*t - .quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 - .quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 -.Lk_dsbb: # decryption sbox output *B*u, *B*t - .quad 0xD022649296B44200, 0x602646F6B0F2D404 - .quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B -.Lk_dsbe: # decryption sbox output *E*u, *E*t - .quad 0x46F2929626D4D000, 0x2242600464B4F6B0 - .quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 -.Lk_dsbo: # decryption sbox final output - .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D - .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C -.asciz "Vector Permutation AES for x86_64/SSSE3, Mike Hamburg (Stanford University)" -.align 64 -.size _vpaes_consts,.-_vpaes_consts -___ - -if ($win64) { -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type se_handler,\@abi-omnipotent -.align 16 -se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - mov 8($disp),%rsi # disp->ImageBase - mov 56($disp),%r11 # disp->HandlerData - - mov 0(%r11),%r10d # HandlerData[0] - lea (%rsi,%r10),%r10 # prologue label - cmp %r10,%rbx # context->RipRsp - - mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lin_prologue - - lea 16(%rax),%rsi # %xmm save area - lea 512($context),%rdi # &context.Xmm6 - mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) - .long 0xa548f3fc # cld; rep movsq - lea 0xb8(%rax),%rax # adjust stack pointer - -.Lin_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$`1232/8`,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size se_handler,.-se_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_${PREFIX}_set_encrypt_key - .rva .LSEH_end_${PREFIX}_set_encrypt_key - .rva .LSEH_info_${PREFIX}_set_encrypt_key - - .rva .LSEH_begin_${PREFIX}_set_decrypt_key - .rva .LSEH_end_${PREFIX}_set_decrypt_key - .rva .LSEH_info_${PREFIX}_set_decrypt_key - - .rva .LSEH_begin_${PREFIX}_encrypt - .rva .LSEH_end_${PREFIX}_encrypt - .rva .LSEH_info_${PREFIX}_encrypt - - .rva .LSEH_begin_${PREFIX}_decrypt - .rva .LSEH_end_${PREFIX}_decrypt - .rva .LSEH_info_${PREFIX}_decrypt - - .rva .LSEH_begin_${PREFIX}_cbc_encrypt - .rva .LSEH_end_${PREFIX}_cbc_encrypt - .rva .LSEH_info_${PREFIX}_cbc_encrypt - -.section .xdata -.align 8 -.LSEH_info_${PREFIX}_set_encrypt_key: - .byte 9,0,0,0 - .rva se_handler - .rva .Lenc_key_body,.Lenc_key_epilogue # HandlerData[] -.LSEH_info_${PREFIX}_set_decrypt_key: - .byte 9,0,0,0 - .rva se_handler - .rva .Ldec_key_body,.Ldec_key_epilogue # HandlerData[] -.LSEH_info_${PREFIX}_encrypt: - .byte 9,0,0,0 - .rva se_handler - .rva .Lenc_body,.Lenc_epilogue # HandlerData[] -.LSEH_info_${PREFIX}_decrypt: - .byte 9,0,0,0 - .rva se_handler - .rva .Ldec_body,.Ldec_epilogue # HandlerData[] -.LSEH_info_${PREFIX}_cbc_encrypt: - .byte 9,0,0,0 - .rva se_handler - .rva .Lcbc_body,.Lcbc_epilogue # HandlerData[] -___ -} - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; - -print $code; - -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/alphacpuid.pl b/drivers/builtin_openssl2/crypto/alphacpuid.pl deleted file mode 100644 index 4b3cbb9827..0000000000 --- a/drivers/builtin_openssl2/crypto/alphacpuid.pl +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env perl -print <<'___'; -.text - -.set noat - -.globl OPENSSL_cpuid_setup -.ent OPENSSL_cpuid_setup -OPENSSL_cpuid_setup: - .frame $30,0,$26 - .prologue 0 - ret ($26) -.end OPENSSL_cpuid_setup - -.globl OPENSSL_wipe_cpu -.ent OPENSSL_wipe_cpu -OPENSSL_wipe_cpu: - .frame $30,0,$26 - .prologue 0 - clr $1 - clr $2 - clr $3 - clr $4 - clr $5 - clr $6 - clr $7 - clr $8 - clr $16 - clr $17 - clr $18 - clr $19 - clr $20 - clr $21 - clr $22 - clr $23 - clr $24 - clr $25 - clr $27 - clr $at - clr $29 - fclr $f0 - fclr $f1 - fclr $f10 - fclr $f11 - fclr $f12 - fclr $f13 - fclr $f14 - fclr $f15 - fclr $f16 - fclr $f17 - fclr $f18 - fclr $f19 - fclr $f20 - fclr $f21 - fclr $f22 - fclr $f23 - fclr $f24 - fclr $f25 - fclr $f26 - fclr $f27 - fclr $f28 - fclr $f29 - fclr $f30 - mov $sp,$0 - ret ($26) -.end OPENSSL_wipe_cpu - -.globl OPENSSL_atomic_add -.ent OPENSSL_atomic_add -OPENSSL_atomic_add: - .frame $30,0,$26 - .prologue 0 -1: ldl_l $0,0($16) - addl $0,$17,$1 - stl_c $1,0($16) - beq $1,1b - addl $0,$17,$0 - ret ($26) -.end OPENSSL_atomic_add - -.globl OPENSSL_rdtsc -.ent OPENSSL_rdtsc -OPENSSL_rdtsc: - .frame $30,0,$26 - .prologue 0 - rpcc $0 - ret ($26) -.end OPENSSL_rdtsc - -.globl OPENSSL_cleanse -.ent OPENSSL_cleanse -OPENSSL_cleanse: - .frame $30,0,$26 - .prologue 0 - beq $17,.Ldone - and $16,7,$0 - bic $17,7,$at - beq $at,.Little - beq $0,.Laligned - -.Little: - subq $0,8,$0 - ldq_u $1,0($16) - mov $16,$2 -.Lalign: - mskbl $1,$16,$1 - lda $16,1($16) - subq $17,1,$17 - addq $0,1,$0 - beq $17,.Lout - bne $0,.Lalign -.Lout: stq_u $1,0($2) - beq $17,.Ldone - bic $17,7,$at - beq $at,.Little - -.Laligned: - stq $31,0($16) - subq $17,8,$17 - lda $16,8($16) - bic $17,7,$at - bne $at,.Laligned - bne $17,.Little -.Ldone: ret ($26) -.end OPENSSL_cleanse -___ diff --git a/drivers/builtin_openssl2/crypto/arm_arch.h b/drivers/builtin_openssl2/crypto/arm_arch.h index b654371625..9d6e58880d 100644 --- a/drivers/builtin_openssl2/crypto/arm_arch.h +++ b/drivers/builtin_openssl2/crypto/arm_arch.h @@ -10,13 +10,24 @@ # define __ARMEL__ # endif # elif defined(__GNUC__) +# if defined(__aarch64__) +# define __ARM_ARCH__ 8 +# if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ +# define __ARMEB__ +# else +# define __ARMEL__ +# endif /* * Why doesn't gcc define __ARM_ARCH__? Instead it defines * bunch of below macros. See all_architectires[] table in * gcc/config/arm/arm.c. On a side note it defines * __ARMEL__/__ARMEB__ for little-/big-endian. */ -# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ +# elif defined(__ARM_ARCH) +# define __ARM_ARCH__ __ARM_ARCH +# elif defined(__ARM_ARCH_8A__) +# define __ARM_ARCH__ 8 +# elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \ defined(__ARM_ARCH_7EM__) # define __ARM_ARCH__ 7 @@ -41,11 +52,27 @@ # include # endif +# if !defined(__ARM_MAX_ARCH__) +# define __ARM_MAX_ARCH__ __ARM_ARCH__ +# endif + +# if __ARM_MAX_ARCH__<__ARM_ARCH__ +# error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__" +# elif __ARM_MAX_ARCH__!=__ARM_ARCH__ +# if __ARM_ARCH__<7 && __ARM_MAX_ARCH__>=7 && defined(__ARMEB__) +# error "can't build universal big-endian binary" +# endif +# endif + # if !__ASSEMBLER__ extern unsigned int OPENSSL_armcap_P; - -# define ARMV7_NEON (1<<0) -# define ARMV7_TICK (1<<1) # endif +# define ARMV7_NEON (1<<0) +# define ARMV7_TICK (1<<1) +# define ARMV8_AES (1<<2) +# define ARMV8_SHA1 (1<<3) +# define ARMV8_SHA256 (1<<4) +# define ARMV8_PMULL (1<<5) + #endif diff --git a/drivers/builtin_openssl2/crypto/armcap.c b/drivers/builtin_openssl2/crypto/armcap.c index 28522ea867..356fa15287 100644 --- a/drivers/builtin_openssl2/crypto/armcap.c +++ b/drivers/builtin_openssl2/crypto/armcap.c @@ -7,8 +7,18 @@ #include "arm_arch.h" -unsigned int OPENSSL_armcap_P; +unsigned int OPENSSL_armcap_P = 0; +#if __ARM_MAX_ARCH__<7 +void OPENSSL_cpuid_setup(void) +{ +} + +unsigned long OPENSSL_rdtsc(void) +{ + return 0; +} +#else static sigset_t all_masked; static sigjmp_buf ill_jmp; @@ -22,9 +32,13 @@ static void ill_handler(int sig) * ARM compilers support inline assembler... */ void _armv7_neon_probe(void); -unsigned int _armv7_tick(void); +void _armv8_aes_probe(void); +void _armv8_sha1_probe(void); +void _armv8_sha256_probe(void); +void _armv8_pmull_probe(void); +unsigned long _armv7_tick(void); -unsigned int OPENSSL_rdtsc(void) +unsigned long OPENSSL_rdtsc(void) { if (OPENSSL_armcap_P & ARMV7_TICK) return _armv7_tick(); @@ -32,9 +46,44 @@ unsigned int OPENSSL_rdtsc(void) return 0; } -#if defined(__GNUC__) && __GNUC__>=2 +/* + * Use a weak reference to getauxval() so we can use it if it is available but + * don't break the build if it is not. + */ +# if defined(__GNUC__) && __GNUC__>=2 void OPENSSL_cpuid_setup(void) __attribute__ ((constructor)); -#endif +extern unsigned long getauxval(unsigned long type) __attribute__ ((weak)); +# else +static unsigned long (*getauxval) (unsigned long) = NULL; +# endif + +/* + * ARM puts the the feature bits for Crypto Extensions in AT_HWCAP2, whereas + * AArch64 used AT_HWCAP. + */ +# if defined(__arm__) || defined (__arm) +# define HWCAP 16 + /* AT_HWCAP */ +# define HWCAP_NEON (1 << 12) + +# define HWCAP_CE 26 + /* AT_HWCAP2 */ +# define HWCAP_CE_AES (1 << 0) +# define HWCAP_CE_PMULL (1 << 1) +# define HWCAP_CE_SHA1 (1 << 2) +# define HWCAP_CE_SHA256 (1 << 3) +# elif defined(__aarch64__) +# define HWCAP 16 + /* AT_HWCAP */ +# define HWCAP_NEON (1 << 1) + +# define HWCAP_CE HWCAP +# define HWCAP_CE_AES (1 << 3) +# define HWCAP_CE_PMULL (1 << 4) +# define HWCAP_CE_SHA1 (1 << 5) +# define HWCAP_CE_SHA256 (1 << 6) +# endif + void OPENSSL_cpuid_setup(void) { char *e; @@ -47,7 +96,7 @@ void OPENSSL_cpuid_setup(void) trigger = 1; if ((e = getenv("OPENSSL_armcap"))) { - OPENSSL_armcap_P = strtoul(e, NULL, 0); + OPENSSL_armcap_P = (unsigned int)strtoul(e, NULL, 0); return; } @@ -67,9 +116,42 @@ void OPENSSL_cpuid_setup(void) sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset); sigaction(SIGILL, &ill_act, &ill_oact); - if (sigsetjmp(ill_jmp, 1) == 0) { + if (getauxval != NULL) { + if (getauxval(HWCAP) & HWCAP_NEON) { + unsigned long hwcap = getauxval(HWCAP_CE); + + OPENSSL_armcap_P |= ARMV7_NEON; + + if (hwcap & HWCAP_CE_AES) + OPENSSL_armcap_P |= ARMV8_AES; + + if (hwcap & HWCAP_CE_PMULL) + OPENSSL_armcap_P |= ARMV8_PMULL; + + if (hwcap & HWCAP_CE_SHA1) + OPENSSL_armcap_P |= ARMV8_SHA1; + + if (hwcap & HWCAP_CE_SHA256) + OPENSSL_armcap_P |= ARMV8_SHA256; + } + } else if (sigsetjmp(ill_jmp, 1) == 0) { _armv7_neon_probe(); OPENSSL_armcap_P |= ARMV7_NEON; + if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_pmull_probe(); + OPENSSL_armcap_P |= ARMV8_PMULL | ARMV8_AES; + } else if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_aes_probe(); + OPENSSL_armcap_P |= ARMV8_AES; + } + if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_sha1_probe(); + OPENSSL_armcap_P |= ARMV8_SHA1; + } + if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_sha256_probe(); + OPENSSL_armcap_P |= ARMV8_SHA256; + } } if (sigsetjmp(ill_jmp, 1) == 0) { _armv7_tick(); @@ -79,3 +161,4 @@ void OPENSSL_cpuid_setup(void) sigaction(SIGILL, &ill_oact, NULL); sigprocmask(SIG_SETMASK, &oset, NULL); } +#endif diff --git a/drivers/builtin_openssl2/crypto/armv4cpuid.S b/drivers/builtin_openssl2/crypto/armv4cpuid.S deleted file mode 100644 index 2d618deaa4..0000000000 --- a/drivers/builtin_openssl2/crypto/armv4cpuid.S +++ /dev/null @@ -1,154 +0,0 @@ -#include "arm_arch.h" - -.text -.code 32 - -.align 5 -.global _armv7_neon_probe -.type _armv7_neon_probe,%function -_armv7_neon_probe: - .word 0xf26ee1fe @ vorr q15,q15,q15 - .word 0xe12fff1e @ bx lr -.size _armv7_neon_probe,.-_armv7_neon_probe - -.global _armv7_tick -.type _armv7_tick,%function -_armv7_tick: - mrc p15,0,r0,c9,c13,0 - .word 0xe12fff1e @ bx lr -.size _armv7_tick,.-_armv7_tick - -.global OPENSSL_atomic_add -.type OPENSSL_atomic_add,%function -OPENSSL_atomic_add: -#if __ARM_ARCH__>=6 -.Ladd: ldrex r2,[r0] - add r3,r2,r1 - strex r2,r3,[r0] - cmp r2,#0 - bne .Ladd - mov r0,r3 - .word 0xe12fff1e @ bx lr -#else - stmdb sp!,{r4-r6,lr} - ldr r2,.Lspinlock - adr r3,.Lspinlock - mov r4,r0 - mov r5,r1 - add r6,r3,r2 @ &spinlock - b .+8 -.Lspin: bl sched_yield - mov r0,#-1 - swp r0,r0,[r6] - cmp r0,#0 - bne .Lspin - - ldr r2,[r4] - add r2,r2,r5 - str r2,[r4] - str r0,[r6] @ release spinlock - ldmia sp!,{r4-r6,lr} - tst lr,#1 - moveq pc,lr - .word 0xe12fff1e @ bx lr -#endif -.size OPENSSL_atomic_add,.-OPENSSL_atomic_add - -.global OPENSSL_cleanse -.type OPENSSL_cleanse,%function -OPENSSL_cleanse: - eor ip,ip,ip - cmp r1,#7 - subhs r1,r1,#4 - bhs .Lot - cmp r1,#0 - beq .Lcleanse_done -.Little: - strb ip,[r0],#1 - subs r1,r1,#1 - bhi .Little - b .Lcleanse_done - -.Lot: tst r0,#3 - beq .Laligned - strb ip,[r0],#1 - sub r1,r1,#1 - b .Lot -.Laligned: - str ip,[r0],#4 - subs r1,r1,#4 - bhs .Laligned - adds r1,r1,#4 - bne .Little -.Lcleanse_done: - tst lr,#1 - moveq pc,lr - .word 0xe12fff1e @ bx lr -.size OPENSSL_cleanse,.-OPENSSL_cleanse - -.global OPENSSL_wipe_cpu -.type OPENSSL_wipe_cpu,%function -OPENSSL_wipe_cpu: - ldr r0,.LOPENSSL_armcap - adr r1,.LOPENSSL_armcap - ldr r0,[r1,r0] - eor r2,r2,r2 - eor r3,r3,r3 - eor ip,ip,ip - tst r0,#1 - beq .Lwipe_done - .word 0xf3000150 @ veor q0, q0, q0 - .word 0xf3022152 @ veor q1, q1, q1 - .word 0xf3044154 @ veor q2, q2, q2 - .word 0xf3066156 @ veor q3, q3, q3 - .word 0xf34001f0 @ veor q8, q8, q8 - .word 0xf34221f2 @ veor q9, q9, q9 - .word 0xf34441f4 @ veor q10, q10, q10 - .word 0xf34661f6 @ veor q11, q11, q11 - .word 0xf34881f8 @ veor q12, q12, q12 - .word 0xf34aa1fa @ veor q13, q13, q13 - .word 0xf34cc1fc @ veor q14, q14, q14 - .word 0xf34ee1fe @ veor q15, q15, q15 -.Lwipe_done: - mov r0,sp - tst lr,#1 - moveq pc,lr - .word 0xe12fff1e @ bx lr -.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu - -.global OPENSSL_instrument_bus -.type OPENSSL_instrument_bus,%function -OPENSSL_instrument_bus: - eor r0,r0,r0 - tst lr,#1 - moveq pc,lr - .word 0xe12fff1e @ bx lr -.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus - -.global OPENSSL_instrument_bus2 -.type OPENSSL_instrument_bus2,%function -OPENSSL_instrument_bus2: - eor r0,r0,r0 - tst lr,#1 - moveq pc,lr - .word 0xe12fff1e @ bx lr -.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 - -.align 5 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-.LOPENSSL_armcap -#if __ARM_ARCH__>=6 -.align 5 -#else -.Lspinlock: -.word atomic_add_spinlock-.Lspinlock -.align 5 - -.data -.align 2 -atomic_add_spinlock: -.word 0 -#endif - -.comm OPENSSL_armcap_P,4,4 -.hidden OPENSSL_armcap_P diff --git a/drivers/builtin_openssl2/crypto/asn1/a_gentm.c b/drivers/builtin_openssl2/crypto/asn1/a_gentm.c index 8b3ef716e3..fa76dcac91 100644 --- a/drivers/builtin_openssl2/crypto/asn1/a_gentm.c +++ b/drivers/builtin_openssl2/crypto/asn1/a_gentm.c @@ -65,6 +65,7 @@ #include "cryptlib.h" #include "o_time.h" #include +#include "asn1_locl.h" #if 0 @@ -117,7 +118,7 @@ ASN1_GENERALIZEDTIME *d2i_ASN1_GENERALIZEDTIME(ASN1_GENERALIZEDTIME **a, #endif -int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d) +int asn1_generalizedtime_to_tm(struct tm *tm, const ASN1_GENERALIZEDTIME *d) { static const int min[9] = { 0, 0, 1, 1, 0, 0, 0, 0, 0 }; static const int max[9] = { 99, 99, 12, 31, 23, 59, 59, 12, 59 }; @@ -139,6 +140,8 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d) for (i = 0; i < 7; i++) { if ((i == 6) && ((a[o] == 'Z') || (a[o] == '+') || (a[o] == '-'))) { i++; + if (tm) + tm->tm_sec = 0; break; } if ((a[o] < '0') || (a[o] > '9')) @@ -155,6 +158,31 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d) if ((n < min[i]) || (n > max[i])) goto err; + if (tm) { + switch (i) { + case 0: + tm->tm_year = n * 100 - 1900; + break; + case 1: + tm->tm_year += n; + break; + case 2: + tm->tm_mon = n - 1; + break; + case 3: + tm->tm_mday = n; + break; + case 4: + tm->tm_hour = n; + break; + case 5: + tm->tm_min = n; + break; + case 6: + tm->tm_sec = n; + break; + } + } } /* * Optional fractional seconds: decimal point followed by one or more @@ -174,6 +202,7 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d) if (a[o] == 'Z') o++; else if ((a[o] == '+') || (a[o] == '-')) { + int offsign = a[o] == '-' ? -1 : 1, offset = 0; o++; if (o + 4 > l) goto err; @@ -187,9 +216,17 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d) n = (n * 10) + a[o] - '0'; if ((n < min[i]) || (n > max[i])) goto err; + if (tm) { + if (i == 7) + offset = n * 3600; + else if (i == 8) + offset += n * 60; + } o++; } - } else { + if (offset && !OPENSSL_gmtime_adj(tm, 0, offset * offsign)) + return 0; + } else if (a[o]) { /* Missing time zone information. */ goto err; } @@ -198,6 +235,11 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d) return (0); } +int ASN1_GENERALIZEDTIME_check(const ASN1_GENERALIZEDTIME *d) +{ + return asn1_generalizedtime_to_tm(NULL, d); +} + int ASN1_GENERALIZEDTIME_set_string(ASN1_GENERALIZEDTIME *s, const char *str) { ASN1_GENERALIZEDTIME t; diff --git a/drivers/builtin_openssl2/crypto/asn1/a_time.c b/drivers/builtin_openssl2/crypto/asn1/a_time.c index c81f0de5a9..fcb2d565cd 100644 --- a/drivers/builtin_openssl2/crypto/asn1/a_time.c +++ b/drivers/builtin_openssl2/crypto/asn1/a_time.c @@ -66,6 +66,7 @@ #include "cryptlib.h" #include "o_time.h" #include +#include "asn1_locl.h" IMPLEMENT_ASN1_MSTRING(ASN1_TIME, B_ASN1_TIME) @@ -196,3 +197,32 @@ int ASN1_TIME_set_string(ASN1_TIME *s, const char *str) return 1; } + +static int asn1_time_to_tm(struct tm *tm, const ASN1_TIME *t) +{ + if (t == NULL) { + time_t now_t; + time(&now_t); + if (OPENSSL_gmtime(&now_t, tm)) + return 1; + return 0; + } + + if (t->type == V_ASN1_UTCTIME) + return asn1_utctime_to_tm(tm, t); + else if (t->type == V_ASN1_GENERALIZEDTIME) + return asn1_generalizedtime_to_tm(tm, t); + + return 0; +} + +int ASN1_TIME_diff(int *pday, int *psec, + const ASN1_TIME *from, const ASN1_TIME *to) +{ + struct tm tm_from, tm_to; + if (!asn1_time_to_tm(&tm_from, from)) + return 0; + if (!asn1_time_to_tm(&tm_to, to)) + return 0; + return OPENSSL_gmtime_diff(pday, psec, &tm_from, &tm_to); +} diff --git a/drivers/builtin_openssl2/crypto/asn1/a_utctm.c b/drivers/builtin_openssl2/crypto/asn1/a_utctm.c index 179de6d395..724a10be4e 100644 --- a/drivers/builtin_openssl2/crypto/asn1/a_utctm.c +++ b/drivers/builtin_openssl2/crypto/asn1/a_utctm.c @@ -61,6 +61,7 @@ #include "cryptlib.h" #include "o_time.h" #include +#include "asn1_locl.h" #if 0 int i2d_ASN1_UTCTIME(ASN1_UTCTIME *a, unsigned char **pp) @@ -109,7 +110,7 @@ ASN1_UTCTIME *d2i_ASN1_UTCTIME(ASN1_UTCTIME **a, unsigned char **pp, #endif -int ASN1_UTCTIME_check(ASN1_UTCTIME *d) +int asn1_utctime_to_tm(struct tm *tm, const ASN1_UTCTIME *d) { static const int min[8] = { 0, 1, 1, 0, 0, 0, 0, 0 }; static const int max[8] = { 99, 12, 31, 23, 59, 59, 12, 59 }; @@ -127,6 +128,8 @@ int ASN1_UTCTIME_check(ASN1_UTCTIME *d) for (i = 0; i < 6; i++) { if ((i == 5) && ((a[o] == 'Z') || (a[o] == '+') || (a[o] == '-'))) { i++; + if (tm) + tm->tm_sec = 0; break; } if ((a[o] < '0') || (a[o] > '9')) @@ -143,10 +146,33 @@ int ASN1_UTCTIME_check(ASN1_UTCTIME *d) if ((n < min[i]) || (n > max[i])) goto err; + if (tm) { + switch (i) { + case 0: + tm->tm_year = n < 50 ? n + 100 : n; + break; + case 1: + tm->tm_mon = n - 1; + break; + case 2: + tm->tm_mday = n; + break; + case 3: + tm->tm_hour = n; + break; + case 4: + tm->tm_min = n; + break; + case 5: + tm->tm_sec = n; + break; + } + } } if (a[o] == 'Z') o++; else if ((a[o] == '+') || (a[o] == '-')) { + int offsign = a[o] == '-' ? -1 : 1, offset = 0; o++; if (o + 4 > l) goto err; @@ -160,12 +186,25 @@ int ASN1_UTCTIME_check(ASN1_UTCTIME *d) n = (n * 10) + a[o] - '0'; if ((n < min[i]) || (n > max[i])) goto err; + if (tm) { + if (i == 6) + offset = n * 3600; + else if (i == 7) + offset += n * 60; + } o++; } + if (offset && !OPENSSL_gmtime_adj(tm, 0, offset * offsign)) + return 0; } - return (o == l); + return o == l; err: - return (0); + return 0; +} + +int ASN1_UTCTIME_check(const ASN1_UTCTIME *d) +{ + return asn1_utctime_to_tm(NULL, d); } int ASN1_UTCTIME_set_string(ASN1_UTCTIME *s, const char *str) @@ -249,43 +288,26 @@ ASN1_UTCTIME *ASN1_UTCTIME_adj(ASN1_UTCTIME *s, time_t t, int ASN1_UTCTIME_cmp_time_t(const ASN1_UTCTIME *s, time_t t) { - struct tm *tm; - struct tm data; - int offset; - int year; - -#define g2(p) (((p)[0]-'0')*10+(p)[1]-'0') - - if (s->data[12] == 'Z') - offset = 0; - else { - offset = g2(s->data + 13) * 60 + g2(s->data + 15); - if (s->data[12] == '-') - offset = -offset; - } + struct tm stm, ttm; + int day, sec; - t -= offset * 60; /* FIXME: may overflow in extreme cases */ + if (!asn1_utctime_to_tm(&stm, s)) + return -2; - tm = OPENSSL_gmtime(&t, &data); - /* - * NB: -1, 0, 1 already valid return values so use -2 to indicate error. - */ - if (tm == NULL) + if (!OPENSSL_gmtime(&t, &ttm)) return -2; -#define return_cmp(a,b) if ((a)<(b)) return -1; else if ((a)>(b)) return 1 - year = g2(s->data); - if (year < 50) - year += 100; - return_cmp(year, tm->tm_year); - return_cmp(g2(s->data + 2) - 1, tm->tm_mon); - return_cmp(g2(s->data + 4), tm->tm_mday); - return_cmp(g2(s->data + 6), tm->tm_hour); - return_cmp(g2(s->data + 8), tm->tm_min); - return_cmp(g2(s->data + 10), tm->tm_sec); -#undef g2 -#undef return_cmp + if (!OPENSSL_gmtime_diff(&day, &sec, &ttm, &stm)) + return -2; + if (day > 0) + return 1; + if (day < 0) + return -1; + if (sec > 0) + return 1; + if (sec < 0) + return -1; return 0; } diff --git a/drivers/builtin_openssl2/crypto/asn1/ameth_lib.c b/drivers/builtin_openssl2/crypto/asn1/ameth_lib.c index 45f3f40560..5389c04347 100644 --- a/drivers/builtin_openssl2/crypto/asn1/ameth_lib.c +++ b/drivers/builtin_openssl2/crypto/asn1/ameth_lib.c @@ -68,6 +68,7 @@ extern const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[]; extern const EVP_PKEY_ASN1_METHOD dsa_asn1_meths[]; extern const EVP_PKEY_ASN1_METHOD dh_asn1_meth; +extern const EVP_PKEY_ASN1_METHOD dhx_asn1_meth; extern const EVP_PKEY_ASN1_METHOD eckey_asn1_meth; extern const EVP_PKEY_ASN1_METHOD hmac_asn1_meth; extern const EVP_PKEY_ASN1_METHOD cmac_asn1_meth; @@ -92,7 +93,10 @@ static const EVP_PKEY_ASN1_METHOD *standard_methods[] = { &eckey_asn1_meth, #endif &hmac_asn1_meth, - &cmac_asn1_meth + &cmac_asn1_meth, +#ifndef OPENSSL_NO_DH + &dhx_asn1_meth +#endif }; typedef int sk_cmp_fn_type(const char *const *a, const char *const *b); @@ -460,3 +464,21 @@ void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth, { ameth->pkey_ctrl = pkey_ctrl; } + +void EVP_PKEY_asn1_set_item(EVP_PKEY_ASN1_METHOD *ameth, + int (*item_verify) (EVP_MD_CTX *ctx, + const ASN1_ITEM *it, + void *asn, + X509_ALGOR *a, + ASN1_BIT_STRING *sig, + EVP_PKEY *pkey), + int (*item_sign) (EVP_MD_CTX *ctx, + const ASN1_ITEM *it, + void *asn, + X509_ALGOR *alg1, + X509_ALGOR *alg2, + ASN1_BIT_STRING *sig)) +{ + ameth->item_sign = item_sign; + ameth->item_verify = item_verify; +} diff --git a/drivers/builtin_openssl2/crypto/asn1/asn1_locl.h b/drivers/builtin_openssl2/crypto/asn1/asn1_locl.h index 9f5ed84727..4c004fab9a 100644 --- a/drivers/builtin_openssl2/crypto/asn1/asn1_locl.h +++ b/drivers/builtin_openssl2/crypto/asn1/asn1_locl.h @@ -59,6 +59,9 @@ /* Internal ASN1 structures and functions: not for application use */ +int asn1_utctime_to_tm(struct tm *tm, const ASN1_UTCTIME *d); +int asn1_generalizedtime_to_tm(struct tm *tm, const ASN1_GENERALIZEDTIME *d); + /* ASN1 print context structure */ struct asn1_pctx_st { diff --git a/drivers/builtin_openssl2/crypto/asn1/charmap.pl b/drivers/builtin_openssl2/crypto/asn1/charmap.pl deleted file mode 100644 index 25ebf2c205..0000000000 --- a/drivers/builtin_openssl2/crypto/asn1/charmap.pl +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/local/bin/perl -w - -# Written by Dr Stephen N Henson (steve@openssl.org). -# Licensed under the terms of the OpenSSL license. - -use strict; - -my ($i, @arr); - -# Set up an array with the type of ASCII characters -# Each set bit represents a character property. - -# RFC2253 character properties -my $RFC2253_ESC = 1; # Character escaped with \ -my $ESC_CTRL = 2; # Escaped control character -# These are used with RFC1779 quoting using " -my $NOESC_QUOTE = 8; # Not escaped if quoted -my $PSTRING_CHAR = 0x10; # Valid PrintableString character -my $RFC2253_FIRST_ESC = 0x20; # Escaped with \ if first character -my $RFC2253_LAST_ESC = 0x40; # Escaped with \ if last character - -for($i = 0; $i < 128; $i++) { - # Set the RFC2253 escape characters (control) - $arr[$i] = 0; - if(($i < 32) || ($i > 126)) { - $arr[$i] |= $ESC_CTRL; - } - - # Some PrintableString characters - if( ( ( $i >= ord("a")) && ( $i <= ord("z")) ) - || ( ( $i >= ord("A")) && ( $i <= ord("Z")) ) - || ( ( $i >= ord("0")) && ( $i <= ord("9")) ) ) { - $arr[$i] |= $PSTRING_CHAR; - } -} - -# Now setup the rest - -# Remaining RFC2253 escaped characters - -$arr[ord(" ")] |= $NOESC_QUOTE | $RFC2253_FIRST_ESC | $RFC2253_LAST_ESC; -$arr[ord("#")] |= $NOESC_QUOTE | $RFC2253_FIRST_ESC; - -$arr[ord(",")] |= $NOESC_QUOTE | $RFC2253_ESC; -$arr[ord("+")] |= $NOESC_QUOTE | $RFC2253_ESC; -$arr[ord("\"")] |= $RFC2253_ESC; -$arr[ord("\\")] |= $RFC2253_ESC; -$arr[ord("<")] |= $NOESC_QUOTE | $RFC2253_ESC; -$arr[ord(">")] |= $NOESC_QUOTE | $RFC2253_ESC; -$arr[ord(";")] |= $NOESC_QUOTE | $RFC2253_ESC; - -# Remaining PrintableString characters - -$arr[ord(" ")] |= $PSTRING_CHAR; -$arr[ord("'")] |= $PSTRING_CHAR; -$arr[ord("(")] |= $PSTRING_CHAR; -$arr[ord(")")] |= $PSTRING_CHAR; -$arr[ord("+")] |= $PSTRING_CHAR; -$arr[ord(",")] |= $PSTRING_CHAR; -$arr[ord("-")] |= $PSTRING_CHAR; -$arr[ord(".")] |= $PSTRING_CHAR; -$arr[ord("/")] |= $PSTRING_CHAR; -$arr[ord(":")] |= $PSTRING_CHAR; -$arr[ord("=")] |= $PSTRING_CHAR; -$arr[ord("?")] |= $PSTRING_CHAR; - -# Now generate the C code - -print <issuerUID) { + if (BIO_printf(bp, "%8sIssuer Unique ID: ", "") <= 0) + goto err; + if (!X509_signature_dump(bp, ci->issuerUID, 12)) + goto err; + } + if (ci->subjectUID) { + if (BIO_printf(bp, "%8sSubject Unique ID: ", "") <= 0) + goto err; + if (!X509_signature_dump(bp, ci->subjectUID, 12)) + goto err; + } + } + if (!(cflag & X509_FLAG_NO_EXTENSIONS)) X509V3_extensions_print(bp, "X509v3 extensions", ci->extensions, cflag, 8); diff --git a/drivers/builtin_openssl2/crypto/asn1/tasn_dec.c b/drivers/builtin_openssl2/crypto/asn1/tasn_dec.c index 9256049d15..5a507967c8 100644 --- a/drivers/builtin_openssl2/crypto/asn1/tasn_dec.c +++ b/drivers/builtin_openssl2/crypto/asn1/tasn_dec.c @@ -717,7 +717,7 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval, long plen; char cst, inf, free_cont = 0; const unsigned char *p; - BUF_MEM buf; + BUF_MEM buf = { 0, NULL, 0 }; const unsigned char *cont = NULL; long len; if (!pval) { @@ -793,7 +793,6 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval, } else { len = p - cont + plen; p += plen; - buf.data = NULL; } } else if (cst) { if (utype == V_ASN1_NULL || utype == V_ASN1_BOOLEAN @@ -802,9 +801,9 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval, ASN1err(ASN1_F_ASN1_D2I_EX_PRIMITIVE, ASN1_R_TYPE_NOT_PRIMITIVE); return 0; } - buf.length = 0; - buf.max = 0; - buf.data = NULL; + + /* Free any returned 'buf' content */ + free_cont = 1; /* * Should really check the internal tags are correct but some things * may get this wrong. The relevant specs say that constructed string @@ -812,18 +811,16 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval, * So instead just check for UNIVERSAL class and ignore the tag. */ if (!asn1_collect(&buf, &p, plen, inf, -1, V_ASN1_UNIVERSAL, 0)) { - free_cont = 1; goto err; } len = buf.length; /* Append a final null to string */ if (!BUF_MEM_grow_clean(&buf, len + 1)) { ASN1err(ASN1_F_ASN1_D2I_EX_PRIMITIVE, ERR_R_MALLOC_FAILURE); - return 0; + goto err; } buf.data[len] = 0; cont = (const unsigned char *)buf.data; - free_cont = 1; } else { cont = p; len = plen; @@ -831,6 +828,7 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval, } /* We now have content length and type: translate into a structure */ + /* asn1_ex_c2i may reuse allocated buffer, and so sets free_cont to 0 */ if (!asn1_ex_c2i(pval, cont, len, utype, &free_cont, it)) goto err; diff --git a/drivers/builtin_openssl2/crypto/asn1/x_crl.c b/drivers/builtin_openssl2/crypto/asn1/x_crl.c index e258c714b2..027950330d 100644 --- a/drivers/builtin_openssl2/crypto/asn1/x_crl.c +++ b/drivers/builtin_openssl2/crypto/asn1/x_crl.c @@ -58,8 +58,8 @@ #include #include "cryptlib.h" -#include "asn1_locl.h" #include +#include "asn1_locl.h" #include #include @@ -341,6 +341,8 @@ ASN1_SEQUENCE_ref(X509_CRL, crl_cb, CRYPTO_LOCK_X509_CRL) = { IMPLEMENT_ASN1_FUNCTIONS(X509_REVOKED) +IMPLEMENT_ASN1_DUP_FUNCTION(X509_REVOKED) + IMPLEMENT_ASN1_FUNCTIONS(X509_CRL_INFO) IMPLEMENT_ASN1_FUNCTIONS(X509_CRL) diff --git a/drivers/builtin_openssl2/crypto/asn1/x_x509.c b/drivers/builtin_openssl2/crypto/asn1/x_x509.c index bcd9166c35..e2cac83694 100644 --- a/drivers/builtin_openssl2/crypto/asn1/x_x509.c +++ b/drivers/builtin_openssl2/crypto/asn1/x_x509.c @@ -207,3 +207,23 @@ int i2d_X509_AUX(X509 *a, unsigned char **pp) length += i2d_X509_CERT_AUX(a->aux, pp); return length; } + +int i2d_re_X509_tbs(X509 *x, unsigned char **pp) +{ + x->cert_info->enc.modified = 1; + return i2d_X509_CINF(x->cert_info, pp); +} + +void X509_get0_signature(ASN1_BIT_STRING **psig, X509_ALGOR **palg, + const X509 *x) +{ + if (psig) + *psig = x->signature; + if (palg) + *palg = x->sig_alg; +} + +int X509_get_signature_nid(const X509 *x) +{ + return OBJ_obj2nid(x->sig_alg->algorithm); +} diff --git a/drivers/builtin_openssl2/crypto/asn1/x_x509a.c b/drivers/builtin_openssl2/crypto/asn1/x_x509a.c index 76bbc1370f..ad93592a71 100644 --- a/drivers/builtin_openssl2/crypto/asn1/x_x509a.c +++ b/drivers/builtin_openssl2/crypto/asn1/x_x509a.c @@ -163,10 +163,13 @@ int X509_add1_reject_object(X509 *x, ASN1_OBJECT *obj) if (!(objtmp = OBJ_dup(obj))) return 0; if (!(aux = aux_get(x))) - return 0; + goto err; if (!aux->reject && !(aux->reject = sk_ASN1_OBJECT_new_null())) - return 0; + goto err; return sk_ASN1_OBJECT_push(aux->reject, objtmp); + err: + ASN1_OBJECT_free(objtmp); + return 0; } void X509_trust_clear(X509 *x) diff --git a/drivers/builtin_openssl2/crypto/bf/asm/bf-586.pl b/drivers/builtin_openssl2/crypto/bf/asm/bf-586.pl deleted file mode 100644 index b74cfbafd4..0000000000 --- a/drivers/builtin_openssl2/crypto/bf/asm/bf-586.pl +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/local/bin/perl - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; -require "cbc.pl"; - -&asm_init($ARGV[0],"bf-586.pl",$ARGV[$#ARGV] eq "386"); - -$BF_ROUNDS=16; -$BF_OFF=($BF_ROUNDS+2)*4; -$L="edi"; -$R="esi"; -$P="ebp"; -$tmp1="eax"; -$tmp2="ebx"; -$tmp3="ecx"; -$tmp4="edx"; - -&BF_encrypt("BF_encrypt",1); -&BF_encrypt("BF_decrypt",0); -&cbc("BF_cbc_encrypt","BF_encrypt","BF_decrypt",1,4,5,3,-1,-1); -&asm_finish(); - -sub BF_encrypt - { - local($name,$enc)=@_; - - &function_begin_B($name,""); - - &comment(""); - - &push("ebp"); - &push("ebx"); - &mov($tmp2,&wparam(0)); - &mov($P,&wparam(1)); - &push("esi"); - &push("edi"); - - &comment("Load the 2 words"); - &mov($L,&DWP(0,$tmp2,"",0)); - &mov($R,&DWP(4,$tmp2,"",0)); - - &xor( $tmp1, $tmp1); - - # encrypting part - - if ($enc) - { - &mov($tmp2,&DWP(0,$P,"",0)); - &xor( $tmp3, $tmp3); - - &xor($L,$tmp2); - for ($i=0; $i<$BF_ROUNDS; $i+=2) - { - &comment(""); - &comment("Round $i"); - &BF_ENCRYPT($i+1,$R,$L,$P,$tmp1,$tmp2,$tmp3,$tmp4,1); - - &comment(""); - &comment("Round ".sprintf("%d",$i+1)); - &BF_ENCRYPT($i+2,$L,$R,$P,$tmp1,$tmp2,$tmp3,$tmp4,1); - } - # &mov($tmp1,&wparam(0)); In last loop - &mov($tmp4,&DWP(($BF_ROUNDS+1)*4,$P,"",0)); - } - else - { - &mov($tmp2,&DWP(($BF_ROUNDS+1)*4,$P,"",0)); - &xor( $tmp3, $tmp3); - - &xor($L,$tmp2); - for ($i=$BF_ROUNDS; $i>0; $i-=2) - { - &comment(""); - &comment("Round $i"); - &BF_ENCRYPT($i,$R,$L,$P,$tmp1,$tmp2,$tmp3,$tmp4,0); - &comment(""); - &comment("Round ".sprintf("%d",$i-1)); - &BF_ENCRYPT($i-1,$L,$R,$P,$tmp1,$tmp2,$tmp3,$tmp4,0); - } - # &mov($tmp1,&wparam(0)); In last loop - &mov($tmp4,&DWP(0,$P,"",0)); - } - - &xor($R,$tmp4); - &mov(&DWP(4,$tmp1,"",0),$L); - - &mov(&DWP(0,$tmp1,"",0),$R); - &function_end($name); - } - -sub BF_ENCRYPT - { - local($i,$L,$R,$P,$tmp1,$tmp2,$tmp3,$tmp4,$enc)=@_; - - &mov( $tmp4, &DWP(&n2a($i*4),$P,"",0)); # for next round - - &mov( $tmp2, $R); - &xor( $L, $tmp4); - - &shr( $tmp2, 16); - &mov( $tmp4, $R); - - &movb( &LB($tmp1), &HB($tmp2)); # A - &and( $tmp2, 0xff); # B - - &movb( &LB($tmp3), &HB($tmp4)); # C - &and( $tmp4, 0xff); # D - - &mov( $tmp1, &DWP(&n2a($BF_OFF+0x0000),$P,$tmp1,4)); - &mov( $tmp2, &DWP(&n2a($BF_OFF+0x0400),$P,$tmp2,4)); - - &add( $tmp2, $tmp1); - &mov( $tmp1, &DWP(&n2a($BF_OFF+0x0800),$P,$tmp3,4)); - - &xor( $tmp2, $tmp1); - &mov( $tmp4, &DWP(&n2a($BF_OFF+0x0C00),$P,$tmp4,4)); - - &add( $tmp2, $tmp4); - if (($enc && ($i != 16)) || ((!$enc) && ($i != 1))) - { &xor( $tmp1, $tmp1); } - else - { - &comment("Load parameter 0 ($i) enc=$enc"); - &mov($tmp1,&wparam(0)); - } # In last loop - - &xor( $L, $tmp2); - # delay - } - -sub n2a - { - sprintf("%d",$_[0]); - } - diff --git a/drivers/builtin_openssl2/crypto/bf/asm/bf-686.pl b/drivers/builtin_openssl2/crypto/bf/asm/bf-686.pl deleted file mode 100644 index 8e4c25f598..0000000000 --- a/drivers/builtin_openssl2/crypto/bf/asm/bf-686.pl +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/local/bin/perl - -push(@INC,"perlasm","../../perlasm"); -require "x86asm.pl"; -require "cbc.pl"; - -&asm_init($ARGV[0],"bf-686.pl"); - -$BF_ROUNDS=16; -$BF_OFF=($BF_ROUNDS+2)*4; -$L="ecx"; -$R="edx"; -$P="edi"; -$tot="esi"; -$tmp1="eax"; -$tmp2="ebx"; -$tmp3="ebp"; - -&des_encrypt("BF_encrypt",1); -&des_encrypt("BF_decrypt",0); -&cbc("BF_cbc_encrypt","BF_encrypt","BF_decrypt",1,4,5,3,-1,-1); - -&asm_finish(); - -&file_end(); - -sub des_encrypt - { - local($name,$enc)=@_; - - &function_begin($name,""); - - &comment(""); - &comment("Load the 2 words"); - &mov("eax",&wparam(0)); - &mov($L,&DWP(0,"eax","",0)); - &mov($R,&DWP(4,"eax","",0)); - - &comment(""); - &comment("P pointer, s and enc flag"); - &mov($P,&wparam(1)); - - &xor( $tmp1, $tmp1); - &xor( $tmp2, $tmp2); - - # encrypting part - - if ($enc) - { - &xor($L,&DWP(0,$P,"",0)); - for ($i=0; $i<$BF_ROUNDS; $i+=2) - { - &comment(""); - &comment("Round $i"); - &BF_ENCRYPT($i+1,$R,$L,$P,$tot,$tmp1,$tmp2,$tmp3); - - &comment(""); - &comment("Round ".sprintf("%d",$i+1)); - &BF_ENCRYPT($i+2,$L,$R,$P,$tot,$tmp1,$tmp2,$tmp3); - } - &xor($R,&DWP(($BF_ROUNDS+1)*4,$P,"",0)); - - &mov("eax",&wparam(0)); - &mov(&DWP(0,"eax","",0),$R); - &mov(&DWP(4,"eax","",0),$L); - &function_end_A($name); - } - else - { - &xor($L,&DWP(($BF_ROUNDS+1)*4,$P,"",0)); - for ($i=$BF_ROUNDS; $i>0; $i-=2) - { - &comment(""); - &comment("Round $i"); - &BF_ENCRYPT($i,$R,$L,$P,$tot,$tmp1,$tmp2,$tmp3); - &comment(""); - &comment("Round ".sprintf("%d",$i-1)); - &BF_ENCRYPT($i-1,$L,$R,$P,$tot,$tmp1,$tmp2,$tmp3); - } - &xor($R,&DWP(0,$P,"",0)); - - &mov("eax",&wparam(0)); - &mov(&DWP(0,"eax","",0),$R); - &mov(&DWP(4,"eax","",0),$L); - &function_end_A($name); - } - - &function_end_B($name); - } - -sub BF_ENCRYPT - { - local($i,$L,$R,$P,$tot,$tmp1,$tmp2,$tmp3)=@_; - - &rotr( $R, 16); - &mov( $tot, &DWP(&n2a($i*4),$P,"",0)); - - &movb( &LB($tmp1), &HB($R)); - &movb( &LB($tmp2), &LB($R)); - - &rotr( $R, 16); - &xor( $L, $tot); - - &mov( $tot, &DWP(&n2a($BF_OFF+0x0000),$P,$tmp1,4)); - &mov( $tmp3, &DWP(&n2a($BF_OFF+0x0400),$P,$tmp2,4)); - - &movb( &LB($tmp1), &HB($R)); - &movb( &LB($tmp2), &LB($R)); - - &add( $tot, $tmp3); - &mov( $tmp1, &DWP(&n2a($BF_OFF+0x0800),$P,$tmp1,4)); # delay - - &xor( $tot, $tmp1); - &mov( $tmp3, &DWP(&n2a($BF_OFF+0x0C00),$P,$tmp2,4)); - - &add( $tot, $tmp3); - &xor( $tmp1, $tmp1); - - &xor( $L, $tot); - # delay - } - -sub n2a - { - sprintf("%d",$_[0]); - } - diff --git a/drivers/builtin_openssl2/crypto/bf/bftest.c b/drivers/builtin_openssl2/crypto/bf/bftest.c deleted file mode 100644 index 0b008f091c..0000000000 --- a/drivers/builtin_openssl2/crypto/bf/bftest.c +++ /dev/null @@ -1,538 +0,0 @@ -/* crypto/bf/bftest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -/* - * This has been a quickly hacked 'ideatest.c'. When I add tests for other - * RC2 modes, more of the code will be uncommented. - */ - -#include -#include -#include -#include /* To see if OPENSSL_NO_BF is defined */ - -#include "../e_os.h" - -#ifdef OPENSSL_NO_BF -int main(int argc, char *argv[]) -{ - printf("No BF support\n"); - return (0); -} -#else -# include - -# ifdef CHARSET_EBCDIC -# include -# endif - -static char *bf_key[2] = { - "abcdefghijklmnopqrstuvwxyz", - "Who is John Galt?" -}; - -/* big endian */ -static BF_LONG bf_plain[2][2] = { - {0x424c4f57L, 0x46495348L}, - {0xfedcba98L, 0x76543210L} -}; - -static BF_LONG bf_cipher[2][2] = { - {0x324ed0feL, 0xf413a203L}, - {0xcc91732bL, 0x8022f684L} -}; - -/************/ - -/* Lets use the DES test vectors :-) */ -# define NUM_TESTS 34 -static unsigned char ecb_data[NUM_TESTS][8] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, - {0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10}, - {0x7C, 0xA1, 0x10, 0x45, 0x4A, 0x1A, 0x6E, 0x57}, - {0x01, 0x31, 0xD9, 0x61, 0x9D, 0xC1, 0x37, 0x6E}, - {0x07, 0xA1, 0x13, 0x3E, 0x4A, 0x0B, 0x26, 0x86}, - {0x38, 0x49, 0x67, 0x4C, 0x26, 0x02, 0x31, 0x9E}, - {0x04, 0xB9, 0x15, 0xBA, 0x43, 0xFE, 0xB5, 0xB6}, - {0x01, 0x13, 0xB9, 0x70, 0xFD, 0x34, 0xF2, 0xCE}, - {0x01, 0x70, 0xF1, 0x75, 0x46, 0x8F, 0xB5, 0xE6}, - {0x43, 0x29, 0x7F, 0xAD, 0x38, 0xE3, 0x73, 0xFE}, - {0x07, 0xA7, 0x13, 0x70, 0x45, 0xDA, 0x2A, 0x16}, - {0x04, 0x68, 0x91, 0x04, 0xC2, 0xFD, 0x3B, 0x2F}, - {0x37, 0xD0, 0x6B, 0xB5, 0x16, 0xCB, 0x75, 0x46}, - {0x1F, 0x08, 0x26, 0x0D, 0x1A, 0xC2, 0x46, 0x5E}, - {0x58, 0x40, 0x23, 0x64, 0x1A, 0xBA, 0x61, 0x76}, - {0x02, 0x58, 0x16, 0x16, 0x46, 0x29, 0xB0, 0x07}, - {0x49, 0x79, 0x3E, 0xBC, 0x79, 0xB3, 0x25, 0x8F}, - {0x4F, 0xB0, 0x5E, 0x15, 0x15, 0xAB, 0x73, 0xA7}, - {0x49, 0xE9, 0x5D, 0x6D, 0x4C, 0xA2, 0x29, 0xBF}, - {0x01, 0x83, 0x10, 0xDC, 0x40, 0x9B, 0x26, 0xD6}, - {0x1C, 0x58, 0x7F, 0x1C, 0x13, 0x92, 0x4F, 0xEF}, - {0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01}, - {0x1F, 0x1F, 0x1F, 0x1F, 0x0E, 0x0E, 0x0E, 0x0E}, - {0xE0, 0xFE, 0xE0, 0xFE, 0xF1, 0xFE, 0xF1, 0xFE}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10} -}; - -static unsigned char plain_data[NUM_TESTS][8] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, - {0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, - {0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, - {0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0x01, 0xA1, 0xD6, 0xD0, 0x39, 0x77, 0x67, 0x42}, - {0x5C, 0xD5, 0x4C, 0xA8, 0x3D, 0xEF, 0x57, 0xDA}, - {0x02, 0x48, 0xD4, 0x38, 0x06, 0xF6, 0x71, 0x72}, - {0x51, 0x45, 0x4B, 0x58, 0x2D, 0xDF, 0x44, 0x0A}, - {0x42, 0xFD, 0x44, 0x30, 0x59, 0x57, 0x7F, 0xA2}, - {0x05, 0x9B, 0x5E, 0x08, 0x51, 0xCF, 0x14, 0x3A}, - {0x07, 0x56, 0xD8, 0xE0, 0x77, 0x47, 0x61, 0xD2}, - {0x76, 0x25, 0x14, 0xB8, 0x29, 0xBF, 0x48, 0x6A}, - {0x3B, 0xDD, 0x11, 0x90, 0x49, 0x37, 0x28, 0x02}, - {0x26, 0x95, 0x5F, 0x68, 0x35, 0xAF, 0x60, 0x9A}, - {0x16, 0x4D, 0x5E, 0x40, 0x4F, 0x27, 0x52, 0x32}, - {0x6B, 0x05, 0x6E, 0x18, 0x75, 0x9F, 0x5C, 0xCA}, - {0x00, 0x4B, 0xD6, 0xEF, 0x09, 0x17, 0x60, 0x62}, - {0x48, 0x0D, 0x39, 0x00, 0x6E, 0xE7, 0x62, 0xF2}, - {0x43, 0x75, 0x40, 0xC8, 0x69, 0x8F, 0x3C, 0xFA}, - {0x07, 0x2D, 0x43, 0xA0, 0x77, 0x07, 0x52, 0x92}, - {0x02, 0xFE, 0x55, 0x77, 0x81, 0x17, 0xF1, 0x2A}, - {0x1D, 0x9D, 0x5C, 0x50, 0x18, 0xF7, 0x28, 0xC2}, - {0x30, 0x55, 0x32, 0x28, 0x6D, 0x6F, 0x29, 0x5A}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF} -}; - -static unsigned char cipher_data[NUM_TESTS][8] = { - {0x4E, 0xF9, 0x97, 0x45, 0x61, 0x98, 0xDD, 0x78}, - {0x51, 0x86, 0x6F, 0xD5, 0xB8, 0x5E, 0xCB, 0x8A}, - {0x7D, 0x85, 0x6F, 0x9A, 0x61, 0x30, 0x63, 0xF2}, - {0x24, 0x66, 0xDD, 0x87, 0x8B, 0x96, 0x3C, 0x9D}, - {0x61, 0xF9, 0xC3, 0x80, 0x22, 0x81, 0xB0, 0x96}, - {0x7D, 0x0C, 0xC6, 0x30, 0xAF, 0xDA, 0x1E, 0xC7}, - {0x4E, 0xF9, 0x97, 0x45, 0x61, 0x98, 0xDD, 0x78}, - {0x0A, 0xCE, 0xAB, 0x0F, 0xC6, 0xA0, 0xA2, 0x8D}, - {0x59, 0xC6, 0x82, 0x45, 0xEB, 0x05, 0x28, 0x2B}, - {0xB1, 0xB8, 0xCC, 0x0B, 0x25, 0x0F, 0x09, 0xA0}, - {0x17, 0x30, 0xE5, 0x77, 0x8B, 0xEA, 0x1D, 0xA4}, - {0xA2, 0x5E, 0x78, 0x56, 0xCF, 0x26, 0x51, 0xEB}, - {0x35, 0x38, 0x82, 0xB1, 0x09, 0xCE, 0x8F, 0x1A}, - {0x48, 0xF4, 0xD0, 0x88, 0x4C, 0x37, 0x99, 0x18}, - {0x43, 0x21, 0x93, 0xB7, 0x89, 0x51, 0xFC, 0x98}, - {0x13, 0xF0, 0x41, 0x54, 0xD6, 0x9D, 0x1A, 0xE5}, - {0x2E, 0xED, 0xDA, 0x93, 0xFF, 0xD3, 0x9C, 0x79}, - {0xD8, 0x87, 0xE0, 0x39, 0x3C, 0x2D, 0xA6, 0xE3}, - {0x5F, 0x99, 0xD0, 0x4F, 0x5B, 0x16, 0x39, 0x69}, - {0x4A, 0x05, 0x7A, 0x3B, 0x24, 0xD3, 0x97, 0x7B}, - {0x45, 0x20, 0x31, 0xC1, 0xE4, 0xFA, 0xDA, 0x8E}, - {0x75, 0x55, 0xAE, 0x39, 0xF5, 0x9B, 0x87, 0xBD}, - {0x53, 0xC5, 0x5F, 0x9C, 0xB4, 0x9F, 0xC0, 0x19}, - {0x7A, 0x8E, 0x7B, 0xFA, 0x93, 0x7E, 0x89, 0xA3}, - {0xCF, 0x9C, 0x5D, 0x7A, 0x49, 0x86, 0xAD, 0xB5}, - {0xD1, 0xAB, 0xB2, 0x90, 0x65, 0x8B, 0xC7, 0x78}, - {0x55, 0xCB, 0x37, 0x74, 0xD1, 0x3E, 0xF2, 0x01}, - {0xFA, 0x34, 0xEC, 0x48, 0x47, 0xB2, 0x68, 0xB2}, - {0xA7, 0x90, 0x79, 0x51, 0x08, 0xEA, 0x3C, 0xAE}, - {0xC3, 0x9E, 0x07, 0x2D, 0x9F, 0xAC, 0x63, 0x1D}, - {0x01, 0x49, 0x33, 0xE0, 0xCD, 0xAF, 0xF6, 0xE4}, - {0xF2, 0x1E, 0x9A, 0x77, 0xB7, 0x1C, 0x49, 0xBC}, - {0x24, 0x59, 0x46, 0x88, 0x57, 0x54, 0x36, 0x9A}, - {0x6B, 0x5C, 0x5A, 0x9C, 0x5D, 0x9E, 0x0A, 0x5A}, -}; - -static unsigned char cbc_key[16] = { - 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, - 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87 -}; -static unsigned char cbc_iv[8] = - { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10 }; -static char cbc_data[40] = "7654321 Now is the time for "; -static unsigned char cbc_ok[32] = { - 0x6B, 0x77, 0xB4, 0xD6, 0x30, 0x06, 0xDE, 0xE6, - 0x05, 0xB1, 0x56, 0xE2, 0x74, 0x03, 0x97, 0x93, - 0x58, 0xDE, 0xB9, 0xE7, 0x15, 0x46, 0x16, 0xD9, - 0x59, 0xF1, 0x65, 0x2B, 0xD5, 0xFF, 0x92, 0xCC -}; - -static unsigned char cfb64_ok[] = { - 0xE7, 0x32, 0x14, 0xA2, 0x82, 0x21, 0x39, 0xCA, - 0xF2, 0x6E, 0xCF, 0x6D, 0x2E, 0xB9, 0xE7, 0x6E, - 0x3D, 0xA3, 0xDE, 0x04, 0xD1, 0x51, 0x72, 0x00, - 0x51, 0x9D, 0x57, 0xA6, 0xC3 -}; - -static unsigned char ofb64_ok[] = { - 0xE7, 0x32, 0x14, 0xA2, 0x82, 0x21, 0x39, 0xCA, - 0x62, 0xB3, 0x43, 0xCC, 0x5B, 0x65, 0x58, 0x73, - 0x10, 0xDD, 0x90, 0x8D, 0x0C, 0x24, 0x1B, 0x22, - 0x63, 0xC2, 0xCF, 0x80, 0xDA -}; - -# define KEY_TEST_NUM 25 -static unsigned char key_test[KEY_TEST_NUM] = { - 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87, - 0x78, 0x69, 0x5a, 0x4b, 0x3c, 0x2d, 0x1e, 0x0f, - 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, - 0x88 -}; - -static unsigned char key_data[8] = - { 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10 }; - -static unsigned char key_out[KEY_TEST_NUM][8] = { - {0xF9, 0xAD, 0x59, 0x7C, 0x49, 0xDB, 0x00, 0x5E}, - {0xE9, 0x1D, 0x21, 0xC1, 0xD9, 0x61, 0xA6, 0xD6}, - {0xE9, 0xC2, 0xB7, 0x0A, 0x1B, 0xC6, 0x5C, 0xF3}, - {0xBE, 0x1E, 0x63, 0x94, 0x08, 0x64, 0x0F, 0x05}, - {0xB3, 0x9E, 0x44, 0x48, 0x1B, 0xDB, 0x1E, 0x6E}, - {0x94, 0x57, 0xAA, 0x83, 0xB1, 0x92, 0x8C, 0x0D}, - {0x8B, 0xB7, 0x70, 0x32, 0xF9, 0x60, 0x62, 0x9D}, - {0xE8, 0x7A, 0x24, 0x4E, 0x2C, 0xC8, 0x5E, 0x82}, - {0x15, 0x75, 0x0E, 0x7A, 0x4F, 0x4E, 0xC5, 0x77}, - {0x12, 0x2B, 0xA7, 0x0B, 0x3A, 0xB6, 0x4A, 0xE0}, - {0x3A, 0x83, 0x3C, 0x9A, 0xFF, 0xC5, 0x37, 0xF6}, - {0x94, 0x09, 0xDA, 0x87, 0xA9, 0x0F, 0x6B, 0xF2}, - {0x88, 0x4F, 0x80, 0x62, 0x50, 0x60, 0xB8, 0xB4}, - {0x1F, 0x85, 0x03, 0x1C, 0x19, 0xE1, 0x19, 0x68}, - {0x79, 0xD9, 0x37, 0x3A, 0x71, 0x4C, 0xA3, 0x4F}, - {0x93, 0x14, 0x28, 0x87, 0xEE, 0x3B, 0xE1, 0x5C}, - {0x03, 0x42, 0x9E, 0x83, 0x8C, 0xE2, 0xD1, 0x4B}, - {0xA4, 0x29, 0x9E, 0x27, 0x46, 0x9F, 0xF6, 0x7B}, - {0xAF, 0xD5, 0xAE, 0xD1, 0xC1, 0xBC, 0x96, 0xA8}, - {0x10, 0x85, 0x1C, 0x0E, 0x38, 0x58, 0xDA, 0x9F}, - {0xE6, 0xF5, 0x1E, 0xD7, 0x9B, 0x9D, 0xB2, 0x1F}, - {0x64, 0xA6, 0xE1, 0x4A, 0xFD, 0x36, 0xB4, 0x6F}, - {0x80, 0xC7, 0xD7, 0xD4, 0x5A, 0x54, 0x79, 0xAD}, - {0x05, 0x04, 0x4B, 0x62, 0xFA, 0x52, 0xD0, 0x80}, -}; - -static int test(void); -static int print_test_data(void); -int main(int argc, char *argv[]) -{ - int ret; - - if (argc > 1) - ret = print_test_data(); - else - ret = test(); - -# ifdef OPENSSL_SYS_NETWARE - if (ret) - printf("ERROR: %d\n", ret); -# endif - EXIT(ret); - return (0); -} - -static int print_test_data(void) -{ - unsigned int i, j; - - printf("ecb test data\n"); - printf("key bytes\t\tclear bytes\t\tcipher bytes\n"); - for (i = 0; i < NUM_TESTS; i++) { - for (j = 0; j < 8; j++) - printf("%02X", ecb_data[i][j]); - printf("\t"); - for (j = 0; j < 8; j++) - printf("%02X", plain_data[i][j]); - printf("\t"); - for (j = 0; j < 8; j++) - printf("%02X", cipher_data[i][j]); - printf("\n"); - } - - printf("set_key test data\n"); - printf("data[8]= "); - for (j = 0; j < 8; j++) - printf("%02X", key_data[j]); - printf("\n"); - for (i = 0; i < KEY_TEST_NUM - 1; i++) { - printf("c="); - for (j = 0; j < 8; j++) - printf("%02X", key_out[i][j]); - printf(" k[%2u]=", i + 1); - for (j = 0; j < i + 1; j++) - printf("%02X", key_test[j]); - printf("\n"); - } - - printf("\nchaining mode test data\n"); - printf("key[16] = "); - for (j = 0; j < 16; j++) - printf("%02X", cbc_key[j]); - printf("\niv[8] = "); - for (j = 0; j < 8; j++) - printf("%02X", cbc_iv[j]); - printf("\ndata[%d] = '%s'", (int)strlen(cbc_data) + 1, cbc_data); - printf("\ndata[%d] = ", (int)strlen(cbc_data) + 1); - for (j = 0; j < strlen(cbc_data) + 1; j++) - printf("%02X", cbc_data[j]); - printf("\n"); - printf("cbc cipher text\n"); - printf("cipher[%d]= ", 32); - for (j = 0; j < 32; j++) - printf("%02X", cbc_ok[j]); - printf("\n"); - - printf("cfb64 cipher text\n"); - printf("cipher[%d]= ", (int)strlen(cbc_data) + 1); - for (j = 0; j < strlen(cbc_data) + 1; j++) - printf("%02X", cfb64_ok[j]); - printf("\n"); - - printf("ofb64 cipher text\n"); - printf("cipher[%d]= ", (int)strlen(cbc_data) + 1); - for (j = 0; j < strlen(cbc_data) + 1; j++) - printf("%02X", ofb64_ok[j]); - printf("\n"); - return (0); -} - -static int test(void) -{ - unsigned char cbc_in[40], cbc_out[40], iv[8]; - int i, n, err = 0; - BF_KEY key; - BF_LONG data[2]; - unsigned char out[8]; - BF_LONG len; - -# ifdef CHARSET_EBCDIC - ebcdic2ascii(cbc_data, cbc_data, strlen(cbc_data)); -# endif - - printf("testing blowfish in raw ecb mode\n"); - for (n = 0; n < 2; n++) { -# ifdef CHARSET_EBCDIC - ebcdic2ascii(bf_key[n], bf_key[n], strlen(bf_key[n])); -# endif - BF_set_key(&key, strlen(bf_key[n]), (unsigned char *)bf_key[n]); - - data[0] = bf_plain[n][0]; - data[1] = bf_plain[n][1]; - BF_encrypt(data, &key); - if (memcmp(&(bf_cipher[n][0]), &(data[0]), 8) != 0) { - printf("BF_encrypt error encrypting\n"); - printf("got :"); - for (i = 0; i < 2; i++) - printf("%08lX ", (unsigned long)data[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 2; i++) - printf("%08lX ", (unsigned long)bf_cipher[n][i]); - err = 1; - printf("\n"); - } - - BF_decrypt(&(data[0]), &key); - if (memcmp(&(bf_plain[n][0]), &(data[0]), 8) != 0) { - printf("BF_encrypt error decrypting\n"); - printf("got :"); - for (i = 0; i < 2; i++) - printf("%08lX ", (unsigned long)data[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 2; i++) - printf("%08lX ", (unsigned long)bf_plain[n][i]); - printf("\n"); - err = 1; - } - } - - printf("testing blowfish in ecb mode\n"); - - for (n = 0; n < NUM_TESTS; n++) { - BF_set_key(&key, 8, ecb_data[n]); - - BF_ecb_encrypt(&(plain_data[n][0]), out, &key, BF_ENCRYPT); - if (memcmp(&(cipher_data[n][0]), out, 8) != 0) { - printf("BF_ecb_encrypt blowfish error encrypting\n"); - printf("got :"); - for (i = 0; i < 8; i++) - printf("%02X ", out[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 8; i++) - printf("%02X ", cipher_data[n][i]); - err = 1; - printf("\n"); - } - - BF_ecb_encrypt(out, out, &key, BF_DECRYPT); - if (memcmp(&(plain_data[n][0]), out, 8) != 0) { - printf("BF_ecb_encrypt error decrypting\n"); - printf("got :"); - for (i = 0; i < 8; i++) - printf("%02X ", out[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 8; i++) - printf("%02X ", plain_data[n][i]); - printf("\n"); - err = 1; - } - } - - printf("testing blowfish set_key\n"); - for (n = 1; n < KEY_TEST_NUM; n++) { - BF_set_key(&key, n, key_test); - BF_ecb_encrypt(key_data, out, &key, BF_ENCRYPT); - /* mips-sgi-irix6.5-gcc vv -mabi=64 bug workaround */ - if (memcmp(out, &(key_out[i = n - 1][0]), 8) != 0) { - printf("blowfish setkey error\n"); - err = 1; - } - } - - printf("testing blowfish in cbc mode\n"); - len = strlen(cbc_data) + 1; - - BF_set_key(&key, 16, cbc_key); - memset(cbc_in, 0, sizeof cbc_in); - memset(cbc_out, 0, sizeof cbc_out); - memcpy(iv, cbc_iv, sizeof iv); - BF_cbc_encrypt((unsigned char *)cbc_data, cbc_out, len, - &key, iv, BF_ENCRYPT); - if (memcmp(cbc_out, cbc_ok, 32) != 0) { - err = 1; - printf("BF_cbc_encrypt encrypt error\n"); - for (i = 0; i < 32; i++) - printf("0x%02X,", cbc_out[i]); - } - memcpy(iv, cbc_iv, 8); - BF_cbc_encrypt(cbc_out, cbc_in, len, &key, iv, BF_DECRYPT); - if (memcmp(cbc_in, cbc_data, strlen(cbc_data) + 1) != 0) { - printf("BF_cbc_encrypt decrypt error\n"); - err = 1; - } - - printf("testing blowfish in cfb64 mode\n"); - - BF_set_key(&key, 16, cbc_key); - memset(cbc_in, 0, 40); - memset(cbc_out, 0, 40); - memcpy(iv, cbc_iv, 8); - n = 0; - BF_cfb64_encrypt((unsigned char *)cbc_data, cbc_out, (long)13, - &key, iv, &n, BF_ENCRYPT); - BF_cfb64_encrypt((unsigned char *)&(cbc_data[13]), &(cbc_out[13]), - len - 13, &key, iv, &n, BF_ENCRYPT); - if (memcmp(cbc_out, cfb64_ok, (int)len) != 0) { - err = 1; - printf("BF_cfb64_encrypt encrypt error\n"); - for (i = 0; i < (int)len; i++) - printf("0x%02X,", cbc_out[i]); - } - n = 0; - memcpy(iv, cbc_iv, 8); - BF_cfb64_encrypt(cbc_out, cbc_in, 17, &key, iv, &n, BF_DECRYPT); - BF_cfb64_encrypt(&(cbc_out[17]), &(cbc_in[17]), len - 17, - &key, iv, &n, BF_DECRYPT); - if (memcmp(cbc_in, cbc_data, (int)len) != 0) { - printf("BF_cfb64_encrypt decrypt error\n"); - err = 1; - } - - printf("testing blowfish in ofb64\n"); - - BF_set_key(&key, 16, cbc_key); - memset(cbc_in, 0, 40); - memset(cbc_out, 0, 40); - memcpy(iv, cbc_iv, 8); - n = 0; - BF_ofb64_encrypt((unsigned char *)cbc_data, cbc_out, (long)13, &key, iv, - &n); - BF_ofb64_encrypt((unsigned char *)&(cbc_data[13]), &(cbc_out[13]), - len - 13, &key, iv, &n); - if (memcmp(cbc_out, ofb64_ok, (int)len) != 0) { - err = 1; - printf("BF_ofb64_encrypt encrypt error\n"); - for (i = 0; i < (int)len; i++) - printf("0x%02X,", cbc_out[i]); - } - n = 0; - memcpy(iv, cbc_iv, 8); - BF_ofb64_encrypt(cbc_out, cbc_in, 17, &key, iv, &n); - BF_ofb64_encrypt(&(cbc_out[17]), &(cbc_in[17]), len - 17, &key, iv, &n); - if (memcmp(cbc_in, cbc_data, (int)len) != 0) { - printf("BF_ofb64_encrypt decrypt error\n"); - err = 1; - } - - return (err); -} -#endif diff --git a/drivers/builtin_openssl2/crypto/bio/b_dump.c b/drivers/builtin_openssl2/crypto/bio/b_dump.c index 87c8162c5e..ccf0e287c4 100644 --- a/drivers/builtin_openssl2/crypto/bio/b_dump.c +++ b/drivers/builtin_openssl2/crypto/bio/b_dump.c @@ -181,3 +181,28 @@ int BIO_dump_indent(BIO *bp, const char *s, int len, int indent) { return BIO_dump_indent_cb(write_bio, bp, s, len, indent); } + +int BIO_hex_string(BIO *out, int indent, int width, unsigned char *data, + int datalen) +{ + int i, j = 0; + + if (datalen < 1) + return 1; + + for (i = 0; i < datalen - 1; i++) { + if (i && !j) + BIO_printf(out, "%*s", indent, ""); + + BIO_printf(out, "%02X:", data[i]); + + j = (j + 1) % width; + if (!j) + BIO_printf(out, "\n"); + } + + if (i && !j) + BIO_printf(out, "%*s", indent, ""); + BIO_printf(out, "%02X", data[datalen - 1]); + return 1; +} diff --git a/drivers/builtin_openssl2/crypto/bio/b_sock.c b/drivers/builtin_openssl2/crypto/bio/b_sock.c index bda882c40b..5bad0a2bad 100644 --- a/drivers/builtin_openssl2/crypto/bio/b_sock.c +++ b/drivers/builtin_openssl2/crypto/bio/b_sock.c @@ -225,13 +225,17 @@ int BIO_get_port(const char *str, unsigned short *port_ptr) int BIO_sock_error(int sock) { int j, i; - int size; + union { + size_t s; + int i; + } size; # if defined(OPENSSL_SYS_BEOS_R5) return 0; # endif - size = sizeof(int); + /* heuristic way to adapt for platforms that expect 64-bit optlen */ + size.s = 0, size.i = sizeof(j); /* * Note: under Windows the third parameter is of type (char *) whereas * under other systems it is (void *) if you don't have a cast it will diff --git a/drivers/builtin_openssl2/crypto/bio/bio_err.c b/drivers/builtin_openssl2/crypto/bio/bio_err.c index 6dd6162fc9..d9007aa3d3 100644 --- a/drivers/builtin_openssl2/crypto/bio/bio_err.c +++ b/drivers/builtin_openssl2/crypto/bio/bio_err.c @@ -1,6 +1,6 @@ /* crypto/bio/bio_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2015 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/drivers/builtin_openssl2/crypto/bio/bss_acpt.c b/drivers/builtin_openssl2/crypto/bio/bss_acpt.c index d08292c3e9..4a5e39bd38 100644 --- a/drivers/builtin_openssl2/crypto/bio/bss_acpt.c +++ b/drivers/builtin_openssl2/crypto/bio/bss_acpt.c @@ -445,7 +445,7 @@ static int acpt_puts(BIO *bp, const char *str) return (ret); } -BIO *BIO_new_accept(char *str) +BIO *BIO_new_accept(const char *str) { BIO *ret; diff --git a/drivers/builtin_openssl2/crypto/bio/bss_conn.c b/drivers/builtin_openssl2/crypto/bio/bss_conn.c index ed214ca6f1..7d15ad29dc 100644 --- a/drivers/builtin_openssl2/crypto/bio/bss_conn.c +++ b/drivers/builtin_openssl2/crypto/bio/bss_conn.c @@ -594,7 +594,7 @@ static int conn_puts(BIO *bp, const char *str) return (ret); } -BIO *BIO_new_connect(char *str) +BIO *BIO_new_connect(const char *str) { BIO *ret; diff --git a/drivers/builtin_openssl2/crypto/bio/bss_dgram.c b/drivers/builtin_openssl2/crypto/bio/bss_dgram.c index d12b83a8dc..bdd7bf88ea 100644 --- a/drivers/builtin_openssl2/crypto/bio/bss_dgram.c +++ b/drivers/builtin_openssl2/crypto/bio/bss_dgram.c @@ -65,7 +65,7 @@ #include #ifndef OPENSSL_NO_DGRAM -# if defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VMS) +# if defined(OPENSSL_SYS_VMS) # include # endif @@ -80,6 +80,10 @@ # define IP_MTU 14 /* linux is lame */ # endif +# if OPENSSL_USE_IPV6 && !defined(IPPROTO_IPV6) +# define IPPROTO_IPV6 41 /* windows is lame */ +# endif + # if defined(__FreeBSD__) && defined(IN6_IS_ADDR_V4MAPPED) /* Standard definition causes type-punning problems. */ # undef IN6_IS_ADDR_V4MAPPED @@ -496,8 +500,8 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) int *ip; struct sockaddr *to = NULL; bio_dgram_data *data = NULL; -# if defined(OPENSSL_SYS_LINUX) && (defined(IP_MTU_DISCOVER) || defined(IP_MTU)) int sockopt_val = 0; +# if defined(OPENSSL_SYS_LINUX) && (defined(IP_MTU_DISCOVER) || defined(IP_MTU)) socklen_t sockopt_len; /* assume that system supporting IP_MTU is * modern enough to define socklen_t */ socklen_t addr_len; @@ -880,6 +884,61 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) ret = 0; break; # endif + case BIO_CTRL_DGRAM_SET_DONT_FRAG: + sockopt_val = num ? 1 : 0; + + switch (data->peer.sa.sa_family) { + case AF_INET: +# if defined(IP_DONTFRAG) + if ((ret = setsockopt(b->num, IPPROTO_IP, IP_DONTFRAG, + &sockopt_val, sizeof(sockopt_val))) < 0) { + perror("setsockopt"); + ret = -1; + } +# elif defined(OPENSSL_SYS_LINUX) && defined(IP_MTU_DISCOVER) && defined (IP_PMTUDISC_PROBE) + if ((sockopt_val = num ? IP_PMTUDISC_PROBE : IP_PMTUDISC_DONT), + (ret = setsockopt(b->num, IPPROTO_IP, IP_MTU_DISCOVER, + &sockopt_val, sizeof(sockopt_val))) < 0) { + perror("setsockopt"); + ret = -1; + } +# elif defined(OPENSSL_SYS_WINDOWS) && defined(IP_DONTFRAGMENT) + if ((ret = setsockopt(b->num, IPPROTO_IP, IP_DONTFRAGMENT, + (const char *)&sockopt_val, + sizeof(sockopt_val))) < 0) { + perror("setsockopt"); + ret = -1; + } +# else + ret = -1; +# endif + break; +# if OPENSSL_USE_IPV6 + case AF_INET6: +# if defined(IPV6_DONTFRAG) + if ((ret = setsockopt(b->num, IPPROTO_IPV6, IPV6_DONTFRAG, + (const void *)&sockopt_val, + sizeof(sockopt_val))) < 0) { + perror("setsockopt"); + ret = -1; + } +# elif defined(OPENSSL_SYS_LINUX) && defined(IPV6_MTUDISCOVER) + if ((sockopt_val = num ? IP_PMTUDISC_PROBE : IP_PMTUDISC_DONT), + (ret = setsockopt(b->num, IPPROTO_IPV6, IPV6_MTU_DISCOVER, + &sockopt_val, sizeof(sockopt_val))) < 0) { + perror("setsockopt"); + ret = -1; + } +# else + ret = -1; +# endif + break; +# endif + default: + ret = -1; + break; + } + break; case BIO_CTRL_DGRAM_GET_MTU_OVERHEAD: ret = dgram_get_mtu_overhead(data); break; @@ -1993,11 +2052,22 @@ int BIO_dgram_non_fatal_error(int err) static void get_current_time(struct timeval *t) { -# ifdef OPENSSL_SYS_WIN32 - struct _timeb tb; - _ftime(&tb); - t->tv_sec = (long)tb.time; - t->tv_usec = (long)tb.millitm * 1000; +# if defined(_WIN32) + SYSTEMTIME st; + union { + unsigned __int64 ul; + FILETIME ft; + } now; + + GetSystemTime(&st); + SystemTimeToFileTime(&st, &now.ft); +# ifdef __MINGW32__ + now.ul -= 116444736000000000ULL; +# else + now.ul -= 116444736000000000UI64; /* re-bias to 1/1/1970 */ +# endif + t->tv_sec = (long)(now.ul / 10000000); + t->tv_usec = ((int)(now.ul % 10000000)) / 10; # elif defined(OPENSSL_SYS_VMS) struct timeb tb; ftime(&tb); diff --git a/drivers/builtin_openssl2/crypto/bio/bss_fd.c b/drivers/builtin_openssl2/crypto/bio/bss_fd.c index ccef578154..5f4e34481b 100644 --- a/drivers/builtin_openssl2/crypto/bio/bss_fd.c +++ b/drivers/builtin_openssl2/crypto/bio/bss_fd.c @@ -63,9 +63,27 @@ #if defined(OPENSSL_NO_POSIX_IO) /* - * One can argue that one should implement dummy placeholder for - * BIO_s_fd here... + * Dummy placeholder for BIO_s_fd... */ +BIO *BIO_new_fd(int fd, int close_flag) +{ + return NULL; +} + +int BIO_fd_non_fatal_error(int err) +{ + return 0; +} + +int BIO_fd_should_retry(int i) +{ + return 0; +} + +BIO_METHOD *BIO_s_fd(void) +{ + return NULL; +} #else /* * As for unconditional usage of "UPLINK" interface in this module. diff --git a/drivers/builtin_openssl2/crypto/bio/bss_mem.c b/drivers/builtin_openssl2/crypto/bio/bss_mem.c index d190765dc2..b0394a960d 100644 --- a/drivers/builtin_openssl2/crypto/bio/bss_mem.c +++ b/drivers/builtin_openssl2/crypto/bio/bss_mem.c @@ -91,7 +91,8 @@ BIO_METHOD *BIO_s_mem(void) return (&mem_method); } -BIO *BIO_new_mem_buf(void *buf, int len) + +BIO *BIO_new_mem_buf(const void *buf, int len) { BIO *ret; BUF_MEM *b; @@ -105,7 +106,8 @@ BIO *BIO_new_mem_buf(void *buf, int len) if (!(ret = BIO_new(BIO_s_mem()))) return NULL; b = (BUF_MEM *)ret->ptr; - b->data = buf; + /* Cast away const and trust in the MEM_RDONLY flag. */ + b->data = (void *)buf; b->length = sz; b->max = sz; ret->flags |= BIO_FLAGS_MEM_RDONLY; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/alpha-mont.pl b/drivers/builtin_openssl2/crypto/bn/asm/alpha-mont.pl deleted file mode 100644 index 03596e2014..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/alpha-mont.pl +++ /dev/null @@ -1,321 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# On 21264 RSA sign performance improves by 70/35/20/15 percent for -# 512/1024/2048/4096 bit key lengths. This is against vendor compiler -# instructed to '-tune host' code with in-line assembler. Other -# benchmarks improve by 15-20%. To anchor it to something else, the -# code provides approximately the same performance per GHz as AMD64. -# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x -# difference. - -# int bn_mul_mont( -$rp="a0"; # BN_ULONG *rp, -$ap="a1"; # const BN_ULONG *ap, -$bp="a2"; # const BN_ULONG *bp, -$np="a3"; # const BN_ULONG *np, -$n0="a4"; # const BN_ULONG *n0, -$num="a5"; # int num); - -$lo0="t0"; -$hi0="t1"; -$lo1="t2"; -$hi1="t3"; -$aj="t4"; -$bi="t5"; -$nj="t6"; -$tp="t7"; -$alo="t8"; -$ahi="t9"; -$nlo="t10"; -$nhi="t11"; -$tj="t12"; -$i="s3"; -$j="s4"; -$m1="s5"; - -$code=<<___; -#ifdef __linux__ -#include -#else -#include -#include -#endif - -.text - -.set noat -.set noreorder - -.globl bn_mul_mont -.align 5 -.ent bn_mul_mont -bn_mul_mont: - lda sp,-48(sp) - stq ra,0(sp) - stq s3,8(sp) - stq s4,16(sp) - stq s5,24(sp) - stq fp,32(sp) - mov sp,fp - .mask 0x0400f000,-48 - .frame fp,48,ra - .prologue 0 - - .align 4 - .set reorder - sextl $num,$num - mov 0,v0 - cmplt $num,4,AT - bne AT,.Lexit - - ldq $hi0,0($ap) # ap[0] - s8addq $num,16,AT - ldq $aj,8($ap) - subq sp,AT,sp - ldq $bi,0($bp) # bp[0] - lda AT,-4096(zero) # mov -4096,AT - ldq $n0,0($n0) - and sp,AT,sp - - mulq $hi0,$bi,$lo0 - ldq $hi1,0($np) # np[0] - umulh $hi0,$bi,$hi0 - ldq $nj,8($np) - - mulq $lo0,$n0,$m1 - - mulq $hi1,$m1,$lo1 - umulh $hi1,$m1,$hi1 - - addq $lo1,$lo0,$lo1 - cmpult $lo1,$lo0,AT - addq $hi1,AT,$hi1 - - mulq $aj,$bi,$alo - mov 2,$j - umulh $aj,$bi,$ahi - mov sp,$tp - - mulq $nj,$m1,$nlo - s8addq $j,$ap,$aj - umulh $nj,$m1,$nhi - s8addq $j,$np,$nj -.align 4 -.L1st: - .set noreorder - ldq $aj,0($aj) - addl $j,1,$j - ldq $nj,0($nj) - lda $tp,8($tp) - - addq $alo,$hi0,$lo0 - mulq $aj,$bi,$alo - cmpult $lo0,$hi0,AT - addq $nlo,$hi1,$lo1 - - mulq $nj,$m1,$nlo - addq $ahi,AT,$hi0 - cmpult $lo1,$hi1,v0 - cmplt $j,$num,$tj - - umulh $aj,$bi,$ahi - addq $nhi,v0,$hi1 - addq $lo1,$lo0,$lo1 - s8addq $j,$ap,$aj - - umulh $nj,$m1,$nhi - cmpult $lo1,$lo0,v0 - addq $hi1,v0,$hi1 - s8addq $j,$np,$nj - - stq $lo1,-8($tp) - nop - unop - bne $tj,.L1st - .set reorder - - addq $alo,$hi0,$lo0 - addq $nlo,$hi1,$lo1 - cmpult $lo0,$hi0,AT - cmpult $lo1,$hi1,v0 - addq $ahi,AT,$hi0 - addq $nhi,v0,$hi1 - - addq $lo1,$lo0,$lo1 - cmpult $lo1,$lo0,v0 - addq $hi1,v0,$hi1 - - stq $lo1,0($tp) - - addq $hi1,$hi0,$hi1 - cmpult $hi1,$hi0,AT - stq $hi1,8($tp) - stq AT,16($tp) - - mov 1,$i -.align 4 -.Louter: - s8addq $i,$bp,$bi - ldq $hi0,0($ap) - ldq $aj,8($ap) - ldq $bi,0($bi) - ldq $hi1,0($np) - ldq $nj,8($np) - ldq $tj,0(sp) - - mulq $hi0,$bi,$lo0 - umulh $hi0,$bi,$hi0 - - addq $lo0,$tj,$lo0 - cmpult $lo0,$tj,AT - addq $hi0,AT,$hi0 - - mulq $lo0,$n0,$m1 - - mulq $hi1,$m1,$lo1 - umulh $hi1,$m1,$hi1 - - addq $lo1,$lo0,$lo1 - cmpult $lo1,$lo0,AT - mov 2,$j - addq $hi1,AT,$hi1 - - mulq $aj,$bi,$alo - mov sp,$tp - umulh $aj,$bi,$ahi - - mulq $nj,$m1,$nlo - s8addq $j,$ap,$aj - umulh $nj,$m1,$nhi -.align 4 -.Linner: - .set noreorder - ldq $tj,8($tp) #L0 - nop #U1 - ldq $aj,0($aj) #L1 - s8addq $j,$np,$nj #U0 - - ldq $nj,0($nj) #L0 - nop #U1 - addq $alo,$hi0,$lo0 #L1 - lda $tp,8($tp) - - mulq $aj,$bi,$alo #U1 - cmpult $lo0,$hi0,AT #L0 - addq $nlo,$hi1,$lo1 #L1 - addl $j,1,$j - - mulq $nj,$m1,$nlo #U1 - addq $ahi,AT,$hi0 #L0 - addq $lo0,$tj,$lo0 #L1 - cmpult $lo1,$hi1,v0 #U0 - - umulh $aj,$bi,$ahi #U1 - cmpult $lo0,$tj,AT #L0 - addq $lo1,$lo0,$lo1 #L1 - addq $nhi,v0,$hi1 #U0 - - umulh $nj,$m1,$nhi #U1 - s8addq $j,$ap,$aj #L0 - cmpult $lo1,$lo0,v0 #L1 - cmplt $j,$num,$tj #U0 # borrow $tj - - addq $hi0,AT,$hi0 #L0 - addq $hi1,v0,$hi1 #U1 - stq $lo1,-8($tp) #L1 - bne $tj,.Linner #U0 - .set reorder - - ldq $tj,8($tp) - addq $alo,$hi0,$lo0 - addq $nlo,$hi1,$lo1 - cmpult $lo0,$hi0,AT - cmpult $lo1,$hi1,v0 - addq $ahi,AT,$hi0 - addq $nhi,v0,$hi1 - - addq $lo0,$tj,$lo0 - cmpult $lo0,$tj,AT - addq $hi0,AT,$hi0 - - ldq $tj,16($tp) - addq $lo1,$lo0,$j - cmpult $j,$lo0,v0 - addq $hi1,v0,$hi1 - - addq $hi1,$hi0,$lo1 - stq $j,0($tp) - cmpult $lo1,$hi0,$hi1 - addq $lo1,$tj,$lo1 - cmpult $lo1,$tj,AT - addl $i,1,$i - addq $hi1,AT,$hi1 - stq $lo1,8($tp) - cmplt $i,$num,$tj # borrow $tj - stq $hi1,16($tp) - bne $tj,.Louter - - s8addq $num,sp,$tj # &tp[num] - mov $rp,$bp # put rp aside - mov sp,$tp - mov sp,$ap - mov 0,$hi0 # clear borrow bit - -.align 4 -.Lsub: ldq $lo0,0($tp) - ldq $lo1,0($np) - lda $tp,8($tp) - lda $np,8($np) - subq $lo0,$lo1,$lo1 # tp[i]-np[i] - cmpult $lo0,$lo1,AT - subq $lo1,$hi0,$lo0 - cmpult $lo1,$lo0,$hi0 - or $hi0,AT,$hi0 - stq $lo0,0($rp) - cmpult $tp,$tj,v0 - lda $rp,8($rp) - bne v0,.Lsub - - subq $hi1,$hi0,$hi0 # handle upmost overflow bit - mov sp,$tp - mov $bp,$rp # restore rp - - and sp,$hi0,$ap - bic $bp,$hi0,$bp - bis $bp,$ap,$ap # ap=borrow?tp:rp - -.align 4 -.Lcopy: ldq $aj,0($ap) # copy or in-place refresh - lda $tp,8($tp) - lda $rp,8($rp) - lda $ap,8($ap) - stq zero,-8($tp) # zap tp - cmpult $tp,$tj,AT - stq $aj,-8($rp) - bne AT,.Lcopy - mov 1,v0 - -.Lexit: - .set noreorder - mov fp,sp - /*ldq ra,0(sp)*/ - ldq s3,8(sp) - ldq s4,16(sp) - ldq s5,24(sp) - ldq fp,32(sp) - lda sp,48(sp) - ret (ra) -.end bn_mul_mont -.ascii "Montgomery Multiplication for Alpha, CRYPTOGAMS by " -.align 2 -___ - -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/armv4-gf2m.pl b/drivers/builtin_openssl2/crypto/bn/asm/armv4-gf2m.pl deleted file mode 100644 index 22ad1f85f9..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/armv4-gf2m.pl +++ /dev/null @@ -1,278 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# May 2011 -# -# The module implements bn_GF2m_mul_2x2 polynomial multiplication -# used in bn_gf2m.c. It's kind of low-hanging mechanical port from -# C for the time being... Except that it has two code paths: pure -# integer code suitable for any ARMv4 and later CPU and NEON code -# suitable for ARMv7. Pure integer 1x1 multiplication subroutine runs -# in ~45 cycles on dual-issue core such as Cortex A8, which is ~50% -# faster than compiler-generated code. For ECDH and ECDSA verify (but -# not for ECDSA sign) it means 25%-45% improvement depending on key -# length, more for longer keys. Even though NEON 1x1 multiplication -# runs in even less cycles, ~30, improvement is measurable only on -# longer keys. One has to optimize code elsewhere to get NEON glow... - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } -sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } -sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; } - -$code=<<___; -#include "arm_arch.h" - -.text -.code 32 - -#if __ARM_ARCH__>=7 -.fpu neon - -.type mul_1x1_neon,%function -.align 5 -mul_1x1_neon: - vshl.u64 `&Dlo("q1")`,d16,#8 @ q1-q3 are slided $a - vmull.p8 `&Q("d0")`,d16,d17 @ a·bb - vshl.u64 `&Dlo("q2")`,d16,#16 - vmull.p8 q1,`&Dlo("q1")`,d17 @ a<<8·bb - vshl.u64 `&Dlo("q3")`,d16,#24 - vmull.p8 q2,`&Dlo("q2")`,d17 @ a<<16·bb - vshr.u64 `&Dlo("q1")`,#8 - vmull.p8 q3,`&Dlo("q3")`,d17 @ a<<24·bb - vshl.u64 `&Dhi("q1")`,#24 - veor d0,`&Dlo("q1")` - vshr.u64 `&Dlo("q2")`,#16 - veor d0,`&Dhi("q1")` - vshl.u64 `&Dhi("q2")`,#16 - veor d0,`&Dlo("q2")` - vshr.u64 `&Dlo("q3")`,#24 - veor d0,`&Dhi("q2")` - vshl.u64 `&Dhi("q3")`,#8 - veor d0,`&Dlo("q3")` - veor d0,`&Dhi("q3")` - bx lr -.size mul_1x1_neon,.-mul_1x1_neon -#endif -___ -################ -# private interface to mul_1x1_ialu -# -$a="r1"; -$b="r0"; - -($a0,$a1,$a2,$a12,$a4,$a14)= -($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12); - -$mask="r12"; - -$code.=<<___; -.type mul_1x1_ialu,%function -.align 5 -mul_1x1_ialu: - mov $a0,#0 - bic $a1,$a,#3<<30 @ a1=a&0x3fffffff - str $a0,[sp,#0] @ tab[0]=0 - add $a2,$a1,$a1 @ a2=a1<<1 - str $a1,[sp,#4] @ tab[1]=a1 - eor $a12,$a1,$a2 @ a1^a2 - str $a2,[sp,#8] @ tab[2]=a2 - mov $a4,$a1,lsl#2 @ a4=a1<<2 - str $a12,[sp,#12] @ tab[3]=a1^a2 - eor $a14,$a1,$a4 @ a1^a4 - str $a4,[sp,#16] @ tab[4]=a4 - eor $a0,$a2,$a4 @ a2^a4 - str $a14,[sp,#20] @ tab[5]=a1^a4 - eor $a12,$a12,$a4 @ a1^a2^a4 - str $a0,[sp,#24] @ tab[6]=a2^a4 - and $i0,$mask,$b,lsl#2 - str $a12,[sp,#28] @ tab[7]=a1^a2^a4 - - and $i1,$mask,$b,lsr#1 - ldr $lo,[sp,$i0] @ tab[b & 0x7] - and $i0,$mask,$b,lsr#4 - ldr $t1,[sp,$i1] @ tab[b >> 3 & 0x7] - and $i1,$mask,$b,lsr#7 - ldr $t0,[sp,$i0] @ tab[b >> 6 & 0x7] - eor $lo,$lo,$t1,lsl#3 @ stall - mov $hi,$t1,lsr#29 - ldr $t1,[sp,$i1] @ tab[b >> 9 & 0x7] - - and $i0,$mask,$b,lsr#10 - eor $lo,$lo,$t0,lsl#6 - eor $hi,$hi,$t0,lsr#26 - ldr $t0,[sp,$i0] @ tab[b >> 12 & 0x7] - - and $i1,$mask,$b,lsr#13 - eor $lo,$lo,$t1,lsl#9 - eor $hi,$hi,$t1,lsr#23 - ldr $t1,[sp,$i1] @ tab[b >> 15 & 0x7] - - and $i0,$mask,$b,lsr#16 - eor $lo,$lo,$t0,lsl#12 - eor $hi,$hi,$t0,lsr#20 - ldr $t0,[sp,$i0] @ tab[b >> 18 & 0x7] - - and $i1,$mask,$b,lsr#19 - eor $lo,$lo,$t1,lsl#15 - eor $hi,$hi,$t1,lsr#17 - ldr $t1,[sp,$i1] @ tab[b >> 21 & 0x7] - - and $i0,$mask,$b,lsr#22 - eor $lo,$lo,$t0,lsl#18 - eor $hi,$hi,$t0,lsr#14 - ldr $t0,[sp,$i0] @ tab[b >> 24 & 0x7] - - and $i1,$mask,$b,lsr#25 - eor $lo,$lo,$t1,lsl#21 - eor $hi,$hi,$t1,lsr#11 - ldr $t1,[sp,$i1] @ tab[b >> 27 & 0x7] - - tst $a,#1<<30 - and $i0,$mask,$b,lsr#28 - eor $lo,$lo,$t0,lsl#24 - eor $hi,$hi,$t0,lsr#8 - ldr $t0,[sp,$i0] @ tab[b >> 30 ] - - eorne $lo,$lo,$b,lsl#30 - eorne $hi,$hi,$b,lsr#2 - tst $a,#1<<31 - eor $lo,$lo,$t1,lsl#27 - eor $hi,$hi,$t1,lsr#5 - eorne $lo,$lo,$b,lsl#31 - eorne $hi,$hi,$b,lsr#1 - eor $lo,$lo,$t0,lsl#30 - eor $hi,$hi,$t0,lsr#2 - - mov pc,lr -.size mul_1x1_ialu,.-mul_1x1_ialu -___ -################ -# void bn_GF2m_mul_2x2(BN_ULONG *r, -# BN_ULONG a1,BN_ULONG a0, -# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0 - -($A1,$B1,$A0,$B0,$A1B1,$A0B0)=map("d$_",(18..23)); - -$code.=<<___; -.global bn_GF2m_mul_2x2 -.type bn_GF2m_mul_2x2,%function -.align 5 -bn_GF2m_mul_2x2: -#if __ARM_ARCH__>=7 - ldr r12,.LOPENSSL_armcap -.Lpic: ldr r12,[pc,r12] - tst r12,#1 - beq .Lialu - - veor $A1,$A1 - vmov.32 $B1,r3,r3 @ two copies of b1 - vmov.32 ${A1}[0],r1 @ a1 - - veor $A0,$A0 - vld1.32 ${B0}[],[sp,:32] @ two copies of b0 - vmov.32 ${A0}[0],r2 @ a0 - mov r12,lr - - vmov d16,$A1 - vmov d17,$B1 - bl mul_1x1_neon @ a1·b1 - vmov $A1B1,d0 - - vmov d16,$A0 - vmov d17,$B0 - bl mul_1x1_neon @ a0·b0 - vmov $A0B0,d0 - - veor d16,$A0,$A1 - veor d17,$B0,$B1 - veor $A0,$A0B0,$A1B1 - bl mul_1x1_neon @ (a0+a1)·(b0+b1) - - veor d0,$A0 @ (a0+a1)·(b0+b1)-a0·b0-a1·b1 - vshl.u64 d1,d0,#32 - vshr.u64 d0,d0,#32 - veor $A0B0,d1 - veor $A1B1,d0 - vst1.32 {${A0B0}[0]},[r0,:32]! - vst1.32 {${A0B0}[1]},[r0,:32]! - vst1.32 {${A1B1}[0]},[r0,:32]! - vst1.32 {${A1B1}[1]},[r0,:32] - bx r12 -.align 4 -.Lialu: -#endif -___ -$ret="r10"; # reassigned 1st argument -$code.=<<___; - stmdb sp!,{r4-r10,lr} - mov $ret,r0 @ reassign 1st argument - mov $b,r3 @ $b=b1 - ldr r3,[sp,#32] @ load b0 - mov $mask,#7<<2 - sub sp,sp,#32 @ allocate tab[8] - - bl mul_1x1_ialu @ a1·b1 - str $lo,[$ret,#8] - str $hi,[$ret,#12] - - eor $b,$b,r3 @ flip b0 and b1 - eor $a,$a,r2 @ flip a0 and a1 - eor r3,r3,$b - eor r2,r2,$a - eor $b,$b,r3 - eor $a,$a,r2 - bl mul_1x1_ialu @ a0·b0 - str $lo,[$ret] - str $hi,[$ret,#4] - - eor $a,$a,r2 - eor $b,$b,r3 - bl mul_1x1_ialu @ (a1+a0)·(b1+b0) -___ -@r=map("r$_",(6..9)); -$code.=<<___; - ldmia $ret,{@r[0]-@r[3]} - eor $lo,$lo,$hi - eor $hi,$hi,@r[1] - eor $lo,$lo,@r[0] - eor $hi,$hi,@r[2] - eor $lo,$lo,@r[3] - eor $hi,$hi,@r[3] - str $hi,[$ret,#8] - eor $lo,$lo,$hi - add sp,sp,#32 @ destroy tab[8] - str $lo,[$ret,#4] - -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r10,pc} -#else - ldmia sp!,{r4-r10,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -#endif -.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 -#if __ARM_ARCH__>=7 -.align 5 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-(.Lpic+8) -#endif -.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by " -.align 5 - -.comm OPENSSL_armcap_P,4,4 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; -close STDOUT; # enforce flush diff --git a/drivers/builtin_openssl2/crypto/bn/asm/armv4-mont.pl b/drivers/builtin_openssl2/crypto/bn/asm/armv4-mont.pl deleted file mode 100644 index f78a8b5f0f..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/armv4-mont.pl +++ /dev/null @@ -1,204 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# January 2007. - -# Montgomery multiplication for ARMv4. -# -# Performance improvement naturally varies among CPU implementations -# and compilers. The code was observed to provide +65-35% improvement -# [depending on key length, less for longer keys] on ARM920T, and -# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code -# base and compiler generated code with in-lined umull and even umlal -# instructions. The latter means that this code didn't really have an -# "advantage" of utilizing some "secret" instruction. -# -# The code is interoperable with Thumb ISA and is rather compact, less -# than 1/2KB. Windows CE port would be trivial, as it's exclusively -# about decorations, ABI and instruction syntax are identical. - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -$num="r0"; # starts as num argument, but holds &tp[num-1] -$ap="r1"; -$bp="r2"; $bi="r2"; $rp="r2"; -$np="r3"; -$tp="r4"; -$aj="r5"; -$nj="r6"; -$tj="r7"; -$n0="r8"; -########### # r9 is reserved by ELF as platform specific, e.g. TLS pointer -$alo="r10"; # sl, gcc uses it to keep @GOT -$ahi="r11"; # fp -$nlo="r12"; # ip -########### # r13 is stack pointer -$nhi="r14"; # lr -########### # r15 is program counter - -#### argument block layout relative to &tp[num-1], a.k.a. $num -$_rp="$num,#12*4"; -# ap permanently resides in r1 -$_bp="$num,#13*4"; -# np permanently resides in r3 -$_n0="$num,#14*4"; -$_num="$num,#15*4"; $_bpend=$_num; - -$code=<<___; -.text - -.global bn_mul_mont -.type bn_mul_mont,%function - -.align 2 -bn_mul_mont: - stmdb sp!,{r0,r2} @ sp points at argument block - ldr $num,[sp,#3*4] @ load num - cmp $num,#2 - movlt r0,#0 - addlt sp,sp,#2*4 - blt .Labrt - - stmdb sp!,{r4-r12,lr} @ save 10 registers - - mov $num,$num,lsl#2 @ rescale $num for byte count - sub sp,sp,$num @ alloca(4*num) - sub sp,sp,#4 @ +extra dword - sub $num,$num,#4 @ "num=num-1" - add $tp,$bp,$num @ &bp[num-1] - - add $num,sp,$num @ $num to point at &tp[num-1] - ldr $n0,[$_n0] @ &n0 - ldr $bi,[$bp] @ bp[0] - ldr $aj,[$ap],#4 @ ap[0],ap++ - ldr $nj,[$np],#4 @ np[0],np++ - ldr $n0,[$n0] @ *n0 - str $tp,[$_bpend] @ save &bp[num] - - umull $alo,$ahi,$aj,$bi @ ap[0]*bp[0] - str $n0,[$_n0] @ save n0 value - mul $n0,$alo,$n0 @ "tp[0]"*n0 - mov $nlo,#0 - umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"t[0]" - mov $tp,sp - -.L1st: - ldr $aj,[$ap],#4 @ ap[j],ap++ - mov $alo,$ahi - ldr $nj,[$np],#4 @ np[j],np++ - mov $ahi,#0 - umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0] - mov $nhi,#0 - umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 - adds $nlo,$nlo,$alo - str $nlo,[$tp],#4 @ tp[j-1]=,tp++ - adc $nlo,$nhi,#0 - cmp $tp,$num - bne .L1st - - adds $nlo,$nlo,$ahi - ldr $tp,[$_bp] @ restore bp - mov $nhi,#0 - ldr $n0,[$_n0] @ restore n0 - adc $nhi,$nhi,#0 - str $nlo,[$num] @ tp[num-1]= - str $nhi,[$num,#4] @ tp[num]= - -.Louter: - sub $tj,$num,sp @ "original" $num-1 value - sub $ap,$ap,$tj @ "rewind" ap to &ap[1] - ldr $bi,[$tp,#4]! @ *(++bp) - sub $np,$np,$tj @ "rewind" np to &np[1] - ldr $aj,[$ap,#-4] @ ap[0] - ldr $alo,[sp] @ tp[0] - ldr $nj,[$np,#-4] @ np[0] - ldr $tj,[sp,#4] @ tp[1] - - mov $ahi,#0 - umlal $alo,$ahi,$aj,$bi @ ap[0]*bp[i]+tp[0] - str $tp,[$_bp] @ save bp - mul $n0,$alo,$n0 - mov $nlo,#0 - umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"tp[0]" - mov $tp,sp - -.Linner: - ldr $aj,[$ap],#4 @ ap[j],ap++ - adds $alo,$ahi,$tj @ +=tp[j] - ldr $nj,[$np],#4 @ np[j],np++ - mov $ahi,#0 - umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i] - mov $nhi,#0 - umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 - adc $ahi,$ahi,#0 - ldr $tj,[$tp,#8] @ tp[j+1] - adds $nlo,$nlo,$alo - str $nlo,[$tp],#4 @ tp[j-1]=,tp++ - adc $nlo,$nhi,#0 - cmp $tp,$num - bne .Linner - - adds $nlo,$nlo,$ahi - mov $nhi,#0 - ldr $tp,[$_bp] @ restore bp - adc $nhi,$nhi,#0 - ldr $n0,[$_n0] @ restore n0 - adds $nlo,$nlo,$tj - ldr $tj,[$_bpend] @ restore &bp[num] - adc $nhi,$nhi,#0 - str $nlo,[$num] @ tp[num-1]= - str $nhi,[$num,#4] @ tp[num]= - - cmp $tp,$tj - bne .Louter - - ldr $rp,[$_rp] @ pull rp - add $num,$num,#4 @ $num to point at &tp[num] - sub $aj,$num,sp @ "original" num value - mov $tp,sp @ "rewind" $tp - mov $ap,$tp @ "borrow" $ap - sub $np,$np,$aj @ "rewind" $np to &np[0] - - subs $tj,$tj,$tj @ "clear" carry flag -.Lsub: ldr $tj,[$tp],#4 - ldr $nj,[$np],#4 - sbcs $tj,$tj,$nj @ tp[j]-np[j] - str $tj,[$rp],#4 @ rp[j]= - teq $tp,$num @ preserve carry - bne .Lsub - sbcs $nhi,$nhi,#0 @ upmost carry - mov $tp,sp @ "rewind" $tp - sub $rp,$rp,$aj @ "rewind" $rp - - and $ap,$tp,$nhi - bic $np,$rp,$nhi - orr $ap,$ap,$np @ ap=borrow?tp:rp - -.Lcopy: ldr $tj,[$ap],#4 @ copy or in-place refresh - str sp,[$tp],#4 @ zap tp - str $tj,[$rp],#4 - cmp $tp,$num - bne .Lcopy - - add sp,$num,#4 @ skip over tp[num+1] - ldmia sp!,{r4-r12,lr} @ restore registers - add sp,sp,#2*4 @ skip over {r0,r2} - mov r0,#1 -.Labrt: tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -.size bn_mul_mont,.-bn_mul_mont -.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by " -.align 2 -___ - -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/bn-586.pl b/drivers/builtin_openssl2/crypto/bn/asm/bn-586.pl deleted file mode 100644 index 332ef3e91d..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/bn-586.pl +++ /dev/null @@ -1,774 +0,0 @@ -#!/usr/local/bin/perl - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],$0); - -$sse2=0; -for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } - -&external_label("OPENSSL_ia32cap_P") if ($sse2); - -&bn_mul_add_words("bn_mul_add_words"); -&bn_mul_words("bn_mul_words"); -&bn_sqr_words("bn_sqr_words"); -&bn_div_words("bn_div_words"); -&bn_add_words("bn_add_words"); -&bn_sub_words("bn_sub_words"); -&bn_sub_part_words("bn_sub_part_words"); - -&asm_finish(); - -sub bn_mul_add_words - { - local($name)=@_; - - &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); - - $r="eax"; - $a="edx"; - $c="ecx"; - - if ($sse2) { - &picmeup("eax","OPENSSL_ia32cap_P"); - &bt(&DWP(0,"eax"),26); - &jnc(&label("maw_non_sse2")); - - &mov($r,&wparam(0)); - &mov($a,&wparam(1)); - &mov($c,&wparam(2)); - &movd("mm0",&wparam(3)); # mm0 = w - &pxor("mm1","mm1"); # mm1 = carry_in - &jmp(&label("maw_sse2_entry")); - - &set_label("maw_sse2_unrolled",16); - &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] - &paddq("mm1","mm3"); # mm1 = carry_in + r[0] - &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] - &pmuludq("mm2","mm0"); # mm2 = w*a[0] - &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1] - &pmuludq("mm4","mm0"); # mm4 = w*a[1] - &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2] - &pmuludq("mm6","mm0"); # mm6 = w*a[2] - &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3] - &pmuludq("mm7","mm0"); # mm7 = w*a[3] - &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0] - &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1] - &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1] - &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2] - &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2] - &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3] - &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3] - &movd(&DWP(0,$r,"",0),"mm1"); - &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4] - &pmuludq("mm2","mm0"); # mm2 = w*a[4] - &psrlq("mm1",32); # mm1 = carry0 - &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5] - &pmuludq("mm4","mm0"); # mm4 = w*a[5] - &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1] - &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6] - &pmuludq("mm6","mm0"); # mm6 = w*a[6] - &movd(&DWP(4,$r,"",0),"mm1"); - &psrlq("mm1",32); # mm1 = carry1 - &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7] - &add($a,32); - &pmuludq("mm3","mm0"); # mm3 = w*a[7] - &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2] - &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4] - &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4] - &movd(&DWP(8,$r,"",0),"mm1"); - &psrlq("mm1",32); # mm1 = carry2 - &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3] - &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5] - &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5] - &movd(&DWP(12,$r,"",0),"mm1"); - &psrlq("mm1",32); # mm1 = carry3 - &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4] - &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6] - &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6] - &movd(&DWP(16,$r,"",0),"mm1"); - &psrlq("mm1",32); # mm1 = carry4 - &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5] - &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7] - &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7] - &movd(&DWP(20,$r,"",0),"mm1"); - &psrlq("mm1",32); # mm1 = carry5 - &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6] - &movd(&DWP(24,$r,"",0),"mm1"); - &psrlq("mm1",32); # mm1 = carry6 - &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] - &movd(&DWP(28,$r,"",0),"mm1"); - &lea($r,&DWP(32,$r)); - &psrlq("mm1",32); # mm1 = carry_out - - &sub($c,8); - &jz(&label("maw_sse2_exit")); - &set_label("maw_sse2_entry"); - &test($c,0xfffffff8); - &jnz(&label("maw_sse2_unrolled")); - - &set_label("maw_sse2_loop",4); - &movd("mm2",&DWP(0,$a)); # mm2 = a[i] - &movd("mm3",&DWP(0,$r)); # mm3 = r[i] - &pmuludq("mm2","mm0"); # a[i] *= w - &lea($a,&DWP(4,$a)); - &paddq("mm1","mm3"); # carry += r[i] - &paddq("mm1","mm2"); # carry += a[i]*w - &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low - &sub($c,1); - &psrlq("mm1",32); # carry = carry_high - &lea($r,&DWP(4,$r)); - &jnz(&label("maw_sse2_loop")); - &set_label("maw_sse2_exit"); - &movd("eax","mm1"); # c = carry_out - &emms(); - &ret(); - - &set_label("maw_non_sse2",16); - } - - # function_begin prologue - &push("ebp"); - &push("ebx"); - &push("esi"); - &push("edi"); - - &comment(""); - $Low="eax"; - $High="edx"; - $a="ebx"; - $w="ebp"; - $r="edi"; - $c="esi"; - - &xor($c,$c); # clear carry - &mov($r,&wparam(0)); # - - &mov("ecx",&wparam(2)); # - &mov($a,&wparam(1)); # - - &and("ecx",0xfffffff8); # num / 8 - &mov($w,&wparam(3)); # - - &push("ecx"); # Up the stack for a tmp variable - - &jz(&label("maw_finish")); - - &set_label("maw_loop",16); - - for ($i=0; $i<32; $i+=4) - { - &comment("Round $i"); - - &mov("eax",&DWP($i,$a)); # *a - &mul($w); # *a * w - &add("eax",$c); # L(t)+= c - &adc("edx",0); # H(t)+=carry - &add("eax",&DWP($i,$r)); # L(t)+= *r - &adc("edx",0); # H(t)+=carry - &mov(&DWP($i,$r),"eax"); # *r= L(t); - &mov($c,"edx"); # c= H(t); - } - - &comment(""); - &sub("ecx",8); - &lea($a,&DWP(32,$a)); - &lea($r,&DWP(32,$r)); - &jnz(&label("maw_loop")); - - &set_label("maw_finish",0); - &mov("ecx",&wparam(2)); # get num - &and("ecx",7); - &jnz(&label("maw_finish2")); # helps branch prediction - &jmp(&label("maw_end")); - - &set_label("maw_finish2",1); - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov("eax",&DWP($i*4,$a)); # *a - &mul($w); # *a * w - &add("eax",$c); # L(t)+=c - &adc("edx",0); # H(t)+=carry - &add("eax",&DWP($i*4,$r)); # L(t)+= *r - &adc("edx",0); # H(t)+=carry - &dec("ecx") if ($i != 7-1); - &mov(&DWP($i*4,$r),"eax"); # *r= L(t); - &mov($c,"edx"); # c= H(t); - &jz(&label("maw_end")) if ($i != 7-1); - } - &set_label("maw_end",0); - &mov("eax",$c); - - &pop("ecx"); # clear variable from - - &function_end($name); - } - -sub bn_mul_words - { - local($name)=@_; - - &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); - - $r="eax"; - $a="edx"; - $c="ecx"; - - if ($sse2) { - &picmeup("eax","OPENSSL_ia32cap_P"); - &bt(&DWP(0,"eax"),26); - &jnc(&label("mw_non_sse2")); - - &mov($r,&wparam(0)); - &mov($a,&wparam(1)); - &mov($c,&wparam(2)); - &movd("mm0",&wparam(3)); # mm0 = w - &pxor("mm1","mm1"); # mm1 = carry = 0 - - &set_label("mw_sse2_loop",16); - &movd("mm2",&DWP(0,$a)); # mm2 = a[i] - &pmuludq("mm2","mm0"); # a[i] *= w - &lea($a,&DWP(4,$a)); - &paddq("mm1","mm2"); # carry += a[i]*w - &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low - &sub($c,1); - &psrlq("mm1",32); # carry = carry_high - &lea($r,&DWP(4,$r)); - &jnz(&label("mw_sse2_loop")); - - &movd("eax","mm1"); # return carry - &emms(); - &ret(); - &set_label("mw_non_sse2",16); - } - - # function_begin prologue - &push("ebp"); - &push("ebx"); - &push("esi"); - &push("edi"); - - &comment(""); - $Low="eax"; - $High="edx"; - $a="ebx"; - $w="ecx"; - $r="edi"; - $c="esi"; - $num="ebp"; - - &xor($c,$c); # clear carry - &mov($r,&wparam(0)); # - &mov($a,&wparam(1)); # - &mov($num,&wparam(2)); # - &mov($w,&wparam(3)); # - - &and($num,0xfffffff8); # num / 8 - &jz(&label("mw_finish")); - - &set_label("mw_loop",0); - for ($i=0; $i<32; $i+=4) - { - &comment("Round $i"); - - &mov("eax",&DWP($i,$a,"",0)); # *a - &mul($w); # *a * w - &add("eax",$c); # L(t)+=c - # XXX - - &adc("edx",0); # H(t)+=carry - &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); - - &mov($c,"edx"); # c= H(t); - } - - &comment(""); - &add($a,32); - &add($r,32); - &sub($num,8); - &jz(&label("mw_finish")); - &jmp(&label("mw_loop")); - - &set_label("mw_finish",0); - &mov($num,&wparam(2)); # get num - &and($num,7); - &jnz(&label("mw_finish2")); - &jmp(&label("mw_end")); - - &set_label("mw_finish2",1); - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov("eax",&DWP($i*4,$a,"",0));# *a - &mul($w); # *a * w - &add("eax",$c); # L(t)+=c - # XXX - &adc("edx",0); # H(t)+=carry - &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); - &mov($c,"edx"); # c= H(t); - &dec($num) if ($i != 7-1); - &jz(&label("mw_end")) if ($i != 7-1); - } - &set_label("mw_end",0); - &mov("eax",$c); - - &function_end($name); - } - -sub bn_sqr_words - { - local($name)=@_; - - &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); - - $r="eax"; - $a="edx"; - $c="ecx"; - - if ($sse2) { - &picmeup("eax","OPENSSL_ia32cap_P"); - &bt(&DWP(0,"eax"),26); - &jnc(&label("sqr_non_sse2")); - - &mov($r,&wparam(0)); - &mov($a,&wparam(1)); - &mov($c,&wparam(2)); - - &set_label("sqr_sse2_loop",16); - &movd("mm0",&DWP(0,$a)); # mm0 = a[i] - &pmuludq("mm0","mm0"); # a[i] *= a[i] - &lea($a,&DWP(4,$a)); # a++ - &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i] - &sub($c,1); - &lea($r,&DWP(8,$r)); # r += 2 - &jnz(&label("sqr_sse2_loop")); - - &emms(); - &ret(); - &set_label("sqr_non_sse2",16); - } - - # function_begin prologue - &push("ebp"); - &push("ebx"); - &push("esi"); - &push("edi"); - - &comment(""); - $r="esi"; - $a="edi"; - $num="ebx"; - - &mov($r,&wparam(0)); # - &mov($a,&wparam(1)); # - &mov($num,&wparam(2)); # - - &and($num,0xfffffff8); # num / 8 - &jz(&label("sw_finish")); - - &set_label("sw_loop",0); - for ($i=0; $i<32; $i+=4) - { - &comment("Round $i"); - &mov("eax",&DWP($i,$a,"",0)); # *a - # XXX - &mul("eax"); # *a * *a - &mov(&DWP($i*2,$r,"",0),"eax"); # - &mov(&DWP($i*2+4,$r,"",0),"edx");# - } - - &comment(""); - &add($a,32); - &add($r,64); - &sub($num,8); - &jnz(&label("sw_loop")); - - &set_label("sw_finish",0); - &mov($num,&wparam(2)); # get num - &and($num,7); - &jz(&label("sw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov("eax",&DWP($i*4,$a,"",0)); # *a - # XXX - &mul("eax"); # *a * *a - &mov(&DWP($i*8,$r,"",0),"eax"); # - &dec($num) if ($i != 7-1); - &mov(&DWP($i*8+4,$r,"",0),"edx"); - &jz(&label("sw_end")) if ($i != 7-1); - } - &set_label("sw_end",0); - - &function_end($name); - } - -sub bn_div_words - { - local($name)=@_; - - &function_begin_B($name,""); - &mov("edx",&wparam(0)); # - &mov("eax",&wparam(1)); # - &mov("ecx",&wparam(2)); # - &div("ecx"); - &ret(); - &function_end_B($name); - } - -sub bn_add_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $a="esi"; - $b="edi"; - $c="eax"; - $r="ebx"; - $tmp1="ecx"; - $tmp2="edx"; - $num="ebp"; - - &mov($r,&wparam(0)); # get r - &mov($a,&wparam(1)); # get a - &mov($b,&wparam(2)); # get b - &mov($num,&wparam(3)); # get num - &xor($c,$c); # clear carry - &and($num,0xfffffff8); # num / 8 - - &jz(&label("aw_finish")); - - &set_label("aw_loop",0); - for ($i=0; $i<8; $i++) - { - &comment("Round $i"); - - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0)); # *b - &add($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &add($tmp1,$tmp2); - &adc($c,0); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - } - - &comment(""); - &add($a,32); - &add($b,32); - &add($r,32); - &sub($num,8); - &jnz(&label("aw_loop")); - - &set_label("aw_finish",0); - &mov($num,&wparam(3)); # get num - &and($num,7); - &jz(&label("aw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0));# *b - &add($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &add($tmp1,$tmp2); - &adc($c,0); - &dec($num) if ($i != 6); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - &jz(&label("aw_end")) if ($i != 6); - } - &set_label("aw_end",0); - -# &mov("eax",$c); # $c is "eax" - - &function_end($name); - } - -sub bn_sub_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $a="esi"; - $b="edi"; - $c="eax"; - $r="ebx"; - $tmp1="ecx"; - $tmp2="edx"; - $num="ebp"; - - &mov($r,&wparam(0)); # get r - &mov($a,&wparam(1)); # get a - &mov($b,&wparam(2)); # get b - &mov($num,&wparam(3)); # get num - &xor($c,$c); # clear carry - &and($num,0xfffffff8); # num / 8 - - &jz(&label("aw_finish")); - - &set_label("aw_loop",0); - for ($i=0; $i<8; $i++) - { - &comment("Round $i"); - - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0)); # *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - } - - &comment(""); - &add($a,32); - &add($b,32); - &add($r,32); - &sub($num,8); - &jnz(&label("aw_loop")); - - &set_label("aw_finish",0); - &mov($num,&wparam(3)); # get num - &and($num,7); - &jz(&label("aw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0));# *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &dec($num) if ($i != 6); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - &jz(&label("aw_end")) if ($i != 6); - } - &set_label("aw_end",0); - -# &mov("eax",$c); # $c is "eax" - - &function_end($name); - } - -sub bn_sub_part_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $a="esi"; - $b="edi"; - $c="eax"; - $r="ebx"; - $tmp1="ecx"; - $tmp2="edx"; - $num="ebp"; - - &mov($r,&wparam(0)); # get r - &mov($a,&wparam(1)); # get a - &mov($b,&wparam(2)); # get b - &mov($num,&wparam(3)); # get num - &xor($c,$c); # clear carry - &and($num,0xfffffff8); # num / 8 - - &jz(&label("aw_finish")); - - &set_label("aw_loop",0); - for ($i=0; $i<8; $i++) - { - &comment("Round $i"); - - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0)); # *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - } - - &comment(""); - &add($a,32); - &add($b,32); - &add($r,32); - &sub($num,8); - &jnz(&label("aw_loop")); - - &set_label("aw_finish",0); - &mov($num,&wparam(3)); # get num - &and($num,7); - &jz(&label("aw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov($tmp1,&DWP(0,$a,"",0)); # *a - &mov($tmp2,&DWP(0,$b,"",0));# *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &mov(&DWP(0,$r,"",0),$tmp1); # *r - &add($a, 4); - &add($b, 4); - &add($r, 4); - &dec($num) if ($i != 6); - &jz(&label("aw_end")) if ($i != 6); - } - &set_label("aw_end",0); - - &cmp(&wparam(4),0); - &je(&label("pw_end")); - - &mov($num,&wparam(4)); # get dl - &cmp($num,0); - &je(&label("pw_end")); - &jge(&label("pw_pos")); - - &comment("pw_neg"); - &mov($tmp2,0); - &sub($tmp2,$num); - &mov($num,$tmp2); - &and($num,0xfffffff8); # num / 8 - &jz(&label("pw_neg_finish")); - - &set_label("pw_neg_loop",0); - for ($i=0; $i<8; $i++) - { - &comment("dl<0 Round $i"); - - &mov($tmp1,0); - &mov($tmp2,&DWP($i*4,$b,"",0)); # *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - } - - &comment(""); - &add($b,32); - &add($r,32); - &sub($num,8); - &jnz(&label("pw_neg_loop")); - - &set_label("pw_neg_finish",0); - &mov($tmp2,&wparam(4)); # get dl - &mov($num,0); - &sub($num,$tmp2); - &and($num,7); - &jz(&label("pw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("dl<0 Tail Round $i"); - &mov($tmp1,0); - &mov($tmp2,&DWP($i*4,$b,"",0));# *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &dec($num) if ($i != 6); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - &jz(&label("pw_end")) if ($i != 6); - } - - &jmp(&label("pw_end")); - - &set_label("pw_pos",0); - - &and($num,0xfffffff8); # num / 8 - &jz(&label("pw_pos_finish")); - - &set_label("pw_pos_loop",0); - - for ($i=0; $i<8; $i++) - { - &comment("dl>0 Round $i"); - - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &sub($tmp1,$c); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - &jnc(&label("pw_nc".$i)); - } - - &comment(""); - &add($a,32); - &add($r,32); - &sub($num,8); - &jnz(&label("pw_pos_loop")); - - &set_label("pw_pos_finish",0); - &mov($num,&wparam(4)); # get dl - &and($num,7); - &jz(&label("pw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("dl>0 Tail Round $i"); - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &sub($tmp1,$c); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - &jnc(&label("pw_tail_nc".$i)); - &dec($num) if ($i != 6); - &jz(&label("pw_end")) if ($i != 6); - } - &mov($c,1); - &jmp(&label("pw_end")); - - &set_label("pw_nc_loop",0); - for ($i=0; $i<8; $i++) - { - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - &set_label("pw_nc".$i,0); - } - - &comment(""); - &add($a,32); - &add($r,32); - &sub($num,8); - &jnz(&label("pw_nc_loop")); - - &mov($num,&wparam(4)); # get dl - &and($num,7); - &jz(&label("pw_nc_end")); - - for ($i=0; $i<7; $i++) - { - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - &set_label("pw_tail_nc".$i,0); - &dec($num) if ($i != 6); - &jz(&label("pw_nc_end")) if ($i != 6); - } - - &set_label("pw_nc_end",0); - &mov($c,0); - - &set_label("pw_end",0); - -# &mov("eax",$c); # $c is "eax" - - &function_end($name); - } - diff --git a/drivers/builtin_openssl2/crypto/bn/asm/co-586.pl b/drivers/builtin_openssl2/crypto/bn/asm/co-586.pl deleted file mode 100644 index 57101a6bd7..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/co-586.pl +++ /dev/null @@ -1,287 +0,0 @@ -#!/usr/local/bin/perl - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],$0); - -&bn_mul_comba("bn_mul_comba8",8); -&bn_mul_comba("bn_mul_comba4",4); -&bn_sqr_comba("bn_sqr_comba8",8); -&bn_sqr_comba("bn_sqr_comba4",4); - -&asm_finish(); - -sub mul_add_c - { - local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; - - # pos == -1 if eax and edx are pre-loaded, 0 to load from next - # words, and 1 if load return value - - &comment("mul a[$ai]*b[$bi]"); - - # "eax" and "edx" will always be pre-loaded. - # &mov("eax",&DWP($ai*4,$a,"",0)) ; - # &mov("edx",&DWP($bi*4,$b,"",0)); - - &mul("edx"); - &add($c0,"eax"); - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a - &mov("eax",&wparam(0)) if $pos > 0; # load r[] - ### - &adc($c1,"edx"); - &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b - &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b - ### - &adc($c2,0); - # is pos > 1, it means it is the last loop - &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a - } - -sub sqr_add_c - { - local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; - - # pos == -1 if eax and edx are pre-loaded, 0 to load from next - # words, and 1 if load return value - - &comment("sqr a[$ai]*a[$bi]"); - - # "eax" and "edx" will always be pre-loaded. - # &mov("eax",&DWP($ai*4,$a,"",0)) ; - # &mov("edx",&DWP($bi*4,$b,"",0)); - - if ($ai == $bi) - { &mul("eax");} - else - { &mul("edx");} - &add($c0,"eax"); - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a - ### - &adc($c1,"edx"); - &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); - ### - &adc($c2,0); - # is pos > 1, it means it is the last loop - &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b - } - -sub sqr_add_c2 - { - local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; - - # pos == -1 if eax and edx are pre-loaded, 0 to load from next - # words, and 1 if load return value - - &comment("sqr a[$ai]*a[$bi]"); - - # "eax" and "edx" will always be pre-loaded. - # &mov("eax",&DWP($ai*4,$a,"",0)) ; - # &mov("edx",&DWP($bi*4,$a,"",0)); - - if ($ai == $bi) - { &mul("eax");} - else - { &mul("edx");} - &add("eax","eax"); - ### - &adc("edx","edx"); - ### - &adc($c2,0); - &add($c0,"eax"); - &adc($c1,"edx"); - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b - &adc($c2,0); - &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; - &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); - ### - } - -sub bn_mul_comba - { - local($name,$num)=@_; - local($a,$b,$c0,$c1,$c2); - local($i,$as,$ae,$bs,$be,$ai,$bi); - local($tot,$end); - - &function_begin_B($name,""); - - $c0="ebx"; - $c1="ecx"; - $c2="ebp"; - $a="esi"; - $b="edi"; - - $as=0; - $ae=0; - $bs=0; - $be=0; - $tot=$num+$num-1; - - &push("esi"); - &mov($a,&wparam(1)); - &push("edi"); - &mov($b,&wparam(2)); - &push("ebp"); - &push("ebx"); - - &xor($c0,$c0); - &mov("eax",&DWP(0,$a,"",0)); # load the first word - &xor($c1,$c1); - &mov("edx",&DWP(0,$b,"",0)); # load the first second - - for ($i=0; $i<$tot; $i++) - { - $ai=$as; - $bi=$bs; - $end=$be+1; - - &comment("################## Calculate word $i"); - - for ($j=$bs; $j<$end; $j++) - { - &xor($c2,$c2) if ($j == $bs); - if (($j+1) == $end) - { - $v=1; - $v=2 if (($i+1) == $tot); - } - else - { $v=0; } - if (($j+1) != $end) - { - $na=($ai-1); - $nb=($bi+1); - } - else - { - $na=$as+($i < ($num-1)); - $nb=$bs+($i >= ($num-1)); - } -#printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; - &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); - if ($v) - { - &comment("saved r[$i]"); - # &mov("eax",&wparam(0)); - # &mov(&DWP($i*4,"eax","",0),$c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - } - $ai--; - $bi++; - } - $as++ if ($i < ($num-1)); - $ae++ if ($i >= ($num-1)); - - $bs++ if ($i >= ($num-1)); - $be++ if ($i < ($num-1)); - } - &comment("save r[$i]"); - # &mov("eax",&wparam(0)); - &mov(&DWP($i*4,"eax","",0),$c0); - - &pop("ebx"); - &pop("ebp"); - &pop("edi"); - &pop("esi"); - &ret(); - &function_end_B($name); - } - -sub bn_sqr_comba - { - local($name,$num)=@_; - local($r,$a,$c0,$c1,$c2)=@_; - local($i,$as,$ae,$bs,$be,$ai,$bi); - local($b,$tot,$end,$half); - - &function_begin_B($name,""); - - $c0="ebx"; - $c1="ecx"; - $c2="ebp"; - $a="esi"; - $r="edi"; - - &push("esi"); - &push("edi"); - &push("ebp"); - &push("ebx"); - &mov($r,&wparam(0)); - &mov($a,&wparam(1)); - &xor($c0,$c0); - &xor($c1,$c1); - &mov("eax",&DWP(0,$a,"",0)); # load the first word - - $as=0; - $ae=0; - $bs=0; - $be=0; - $tot=$num+$num-1; - - for ($i=0; $i<$tot; $i++) - { - $ai=$as; - $bi=$bs; - $end=$be+1; - - &comment("############### Calculate word $i"); - for ($j=$bs; $j<$end; $j++) - { - &xor($c2,$c2) if ($j == $bs); - if (($ai-1) < ($bi+1)) - { - $v=1; - $v=2 if ($i+1) == $tot; - } - else - { $v=0; } - if (!$v) - { - $na=$ai-1; - $nb=$bi+1; - } - else - { - $na=$as+($i < ($num-1)); - $nb=$bs+($i >= ($num-1)); - } - if ($ai == $bi) - { - &sqr_add_c($r,$a,$ai,$bi, - $c0,$c1,$c2,$v,$i,$na,$nb); - } - else - { - &sqr_add_c2($r,$a,$ai,$bi, - $c0,$c1,$c2,$v,$i,$na,$nb); - } - if ($v) - { - &comment("saved r[$i]"); - #&mov(&DWP($i*4,$r,"",0),$c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - last; - } - $ai--; - $bi++; - } - $as++ if ($i < ($num-1)); - $ae++ if ($i >= ($num-1)); - - $bs++ if ($i >= ($num-1)); - $be++ if ($i < ($num-1)); - } - &mov(&DWP($i*4,$r,"",0),$c0); - &pop("ebx"); - &pop("ebp"); - &pop("edi"); - &pop("esi"); - &ret(); - &function_end_B($name); - } diff --git a/drivers/builtin_openssl2/crypto/bn/asm/ia64-mont.pl b/drivers/builtin_openssl2/crypto/bn/asm/ia64-mont.pl deleted file mode 100644 index e258658428..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/ia64-mont.pl +++ /dev/null @@ -1,851 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# January 2010 -# -# "Teaser" Montgomery multiplication module for IA-64. There are -# several possibilities for improvement: -# -# - modulo-scheduling outer loop would eliminate quite a number of -# stalls after ldf8, xma and getf.sig outside inner loop and -# improve shorter key performance; -# - shorter vector support [with input vectors being fetched only -# once] should be added; -# - 2x unroll with help of n0[1] would make the code scalable on -# "wider" IA-64, "wider" than Itanium 2 that is, which is not of -# acute interest, because upcoming Tukwila's individual cores are -# reportedly based on Itanium 2 design; -# - dedicated squaring procedure(?); -# -# January 2010 -# -# Shorter vector support is implemented by zero-padding ap and np -# vectors up to 8 elements, or 512 bits. This means that 256-bit -# inputs will be processed only 2 times faster than 512-bit inputs, -# not 4 [as one would expect, because algorithm complexity is n^2]. -# The reason for padding is that inputs shorter than 512 bits won't -# be processed faster anyway, because minimal critical path of the -# core loop happens to match 512-bit timing. Either way, it resulted -# in >100% improvement of 512-bit RSA sign benchmark and 50% - of -# 1024-bit one [in comparison to original version of *this* module]. -# -# So far 'openssl speed rsa dsa' output on 900MHz Itanium 2 *with* -# this module is: -# sign verify sign/s verify/s -# rsa 512 bits 0.000290s 0.000024s 3452.8 42031.4 -# rsa 1024 bits 0.000793s 0.000058s 1261.7 17172.0 -# rsa 2048 bits 0.005908s 0.000148s 169.3 6754.0 -# rsa 4096 bits 0.033456s 0.000469s 29.9 2133.6 -# dsa 512 bits 0.000253s 0.000198s 3949.9 5057.0 -# dsa 1024 bits 0.000585s 0.000607s 1708.4 1647.4 -# dsa 2048 bits 0.001453s 0.001703s 688.1 587.4 -# -# ... and *without* (but still with ia64.S): -# -# rsa 512 bits 0.000670s 0.000041s 1491.8 24145.5 -# rsa 1024 bits 0.001988s 0.000080s 502.9 12499.3 -# rsa 2048 bits 0.008702s 0.000189s 114.9 5293.9 -# rsa 4096 bits 0.043860s 0.000533s 22.8 1875.9 -# dsa 512 bits 0.000441s 0.000427s 2265.3 2340.6 -# dsa 1024 bits 0.000823s 0.000867s 1215.6 1153.2 -# dsa 2048 bits 0.001894s 0.002179s 528.1 458.9 -# -# As it can be seen, RSA sign performance improves by 130-30%, -# hereafter less for longer keys, while verify - by 74-13%. -# DSA performance improves by 115-30%. - -if ($^O eq "hpux") { - $ADDP="addp4"; - for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); } -} else { $ADDP="add"; } - -$code=<<___; -.explicit -.text - -// int bn_mul_mont (BN_ULONG *rp,const BN_ULONG *ap, -// const BN_ULONG *bp,const BN_ULONG *np, -// const BN_ULONG *n0p,int num); -.align 64 -.global bn_mul_mont# -.proc bn_mul_mont# -bn_mul_mont: - .prologue - .body -{ .mmi; cmp4.le p6,p7=2,r37;; -(p6) cmp4.lt.unc p8,p9=8,r37 - mov ret0=r0 };; -{ .bbb; -(p9) br.cond.dptk.many bn_mul_mont_8 -(p8) br.cond.dpnt.many bn_mul_mont_general -(p7) br.ret.spnt.many b0 };; -.endp bn_mul_mont# - -prevfs=r2; prevpr=r3; prevlc=r10; prevsp=r11; - -rptr=r8; aptr=r9; bptr=r14; nptr=r15; -tptr=r16; // &tp[0] -tp_1=r17; // &tp[-1] -num=r18; len=r19; lc=r20; -topbit=r21; // carry bit from tmp[num] - -n0=f6; -m0=f7; -bi=f8; - -.align 64 -.local bn_mul_mont_general# -.proc bn_mul_mont_general# -bn_mul_mont_general: - .prologue -{ .mmi; .save ar.pfs,prevfs - alloc prevfs=ar.pfs,6,2,0,8 - $ADDP aptr=0,in1 - .save ar.lc,prevlc - mov prevlc=ar.lc } -{ .mmi; .vframe prevsp - mov prevsp=sp - $ADDP bptr=0,in2 - .save pr,prevpr - mov prevpr=pr };; - - .body - .rotf alo[6],nlo[4],ahi[8],nhi[6] - .rotr a[3],n[3],t[2] - -{ .mmi; ldf8 bi=[bptr],8 // (*bp++) - ldf8 alo[4]=[aptr],16 // ap[0] - $ADDP r30=8,in1 };; -{ .mmi; ldf8 alo[3]=[r30],16 // ap[1] - ldf8 alo[2]=[aptr],16 // ap[2] - $ADDP in4=0,in4 };; -{ .mmi; ldf8 alo[1]=[r30] // ap[3] - ldf8 n0=[in4] // n0 - $ADDP rptr=0,in0 } -{ .mmi; $ADDP nptr=0,in3 - mov r31=16 - zxt4 num=in5 };; -{ .mmi; ldf8 nlo[2]=[nptr],8 // np[0] - shladd len=num,3,r0 - shladd r31=num,3,r31 };; -{ .mmi; ldf8 nlo[1]=[nptr],8 // np[1] - add lc=-5,num - sub r31=sp,r31 };; -{ .mfb; and sp=-16,r31 // alloca - xmpy.hu ahi[2]=alo[4],bi // ap[0]*bp[0] - nop.b 0 } -{ .mfb; nop.m 0 - xmpy.lu alo[4]=alo[4],bi - brp.loop.imp .L1st_ctop,.L1st_cend-16 - };; -{ .mfi; nop.m 0 - xma.hu ahi[1]=alo[3],bi,ahi[2] // ap[1]*bp[0] - add tp_1=8,sp } -{ .mfi; nop.m 0 - xma.lu alo[3]=alo[3],bi,ahi[2] - mov pr.rot=0x20001f<<16 - // ------^----- (p40) at first (p23) - // ----------^^ p[16:20]=1 - };; -{ .mfi; nop.m 0 - xmpy.lu m0=alo[4],n0 // (ap[0]*bp[0])*n0 - mov ar.lc=lc } -{ .mfi; nop.m 0 - fcvt.fxu.s1 nhi[1]=f0 - mov ar.ec=8 };; - -.align 32 -.L1st_ctop: -.pred.rel "mutex",p40,p42 -{ .mfi; (p16) ldf8 alo[0]=[aptr],8 // *(aptr++) - (p18) xma.hu ahi[0]=alo[2],bi,ahi[1] - (p40) add n[2]=n[2],a[2] } // (p23) } -{ .mfi; (p18) ldf8 nlo[0]=[nptr],8 // *(nptr++)(p16) - (p18) xma.lu alo[2]=alo[2],bi,ahi[1] - (p42) add n[2]=n[2],a[2],1 };; // (p23) -{ .mfi; (p21) getf.sig a[0]=alo[5] - (p20) xma.hu nhi[0]=nlo[2],m0,nhi[1] - (p42) cmp.leu p41,p39=n[2],a[2] } // (p23) -{ .mfi; (p23) st8 [tp_1]=n[2],8 - (p20) xma.lu nlo[2]=nlo[2],m0,nhi[1] - (p40) cmp.ltu p41,p39=n[2],a[2] } // (p23) -{ .mmb; (p21) getf.sig n[0]=nlo[3] - (p16) nop.m 0 - br.ctop.sptk .L1st_ctop };; -.L1st_cend: - -{ .mmi; getf.sig a[0]=ahi[6] // (p24) - getf.sig n[0]=nhi[4] - add num=-1,num };; // num-- -{ .mmi; .pred.rel "mutex",p40,p42 -(p40) add n[0]=n[0],a[0] -(p42) add n[0]=n[0],a[0],1 - sub aptr=aptr,len };; // rewind -{ .mmi; .pred.rel "mutex",p40,p42 -(p40) cmp.ltu p41,p39=n[0],a[0] -(p42) cmp.leu p41,p39=n[0],a[0] - sub nptr=nptr,len };; -{ .mmi; .pred.rel "mutex",p39,p41 -(p39) add topbit=r0,r0 -(p41) add topbit=r0,r0,1 - nop.i 0 } -{ .mmi; st8 [tp_1]=n[0] - add tptr=16,sp - add tp_1=8,sp };; - -.Louter: -{ .mmi; ldf8 bi=[bptr],8 // (*bp++) - ldf8 ahi[3]=[tptr] // tp[0] - add r30=8,aptr };; -{ .mmi; ldf8 alo[4]=[aptr],16 // ap[0] - ldf8 alo[3]=[r30],16 // ap[1] - add r31=8,nptr };; -{ .mfb; ldf8 alo[2]=[aptr],16 // ap[2] - xma.hu ahi[2]=alo[4],bi,ahi[3] // ap[0]*bp[i]+tp[0] - brp.loop.imp .Linner_ctop,.Linner_cend-16 - } -{ .mfb; ldf8 alo[1]=[r30] // ap[3] - xma.lu alo[4]=alo[4],bi,ahi[3] - clrrrb.pr };; -{ .mfi; ldf8 nlo[2]=[nptr],16 // np[0] - xma.hu ahi[1]=alo[3],bi,ahi[2] // ap[1]*bp[i] - nop.i 0 } -{ .mfi; ldf8 nlo[1]=[r31] // np[1] - xma.lu alo[3]=alo[3],bi,ahi[2] - mov pr.rot=0x20101f<<16 - // ------^----- (p40) at first (p23) - // --------^--- (p30) at first (p22) - // ----------^^ p[16:20]=1 - };; -{ .mfi; st8 [tptr]=r0 // tp[0] is already accounted - xmpy.lu m0=alo[4],n0 // (ap[0]*bp[i]+tp[0])*n0 - mov ar.lc=lc } -{ .mfi; - fcvt.fxu.s1 nhi[1]=f0 - mov ar.ec=8 };; - -// This loop spins in 4*(n+7) ticks on Itanium 2 and should spin in -// 7*(n+7) ticks on Itanium (the one codenamed Merced). Factor of 7 -// in latter case accounts for two-tick pipeline stall, which means -// that its performance would be ~20% lower than optimal one. No -// attempt was made to address this, because original Itanium is -// hardly represented out in the wild... -.align 32 -.Linner_ctop: -.pred.rel "mutex",p40,p42 -.pred.rel "mutex",p30,p32 -{ .mfi; (p16) ldf8 alo[0]=[aptr],8 // *(aptr++) - (p18) xma.hu ahi[0]=alo[2],bi,ahi[1] - (p40) add n[2]=n[2],a[2] } // (p23) -{ .mfi; (p16) nop.m 0 - (p18) xma.lu alo[2]=alo[2],bi,ahi[1] - (p42) add n[2]=n[2],a[2],1 };; // (p23) -{ .mfi; (p21) getf.sig a[0]=alo[5] - (p16) nop.f 0 - (p40) cmp.ltu p41,p39=n[2],a[2] } // (p23) -{ .mfi; (p21) ld8 t[0]=[tptr],8 - (p16) nop.f 0 - (p42) cmp.leu p41,p39=n[2],a[2] };; // (p23) -{ .mfi; (p18) ldf8 nlo[0]=[nptr],8 // *(nptr++) - (p20) xma.hu nhi[0]=nlo[2],m0,nhi[1] - (p30) add a[1]=a[1],t[1] } // (p22) -{ .mfi; (p16) nop.m 0 - (p20) xma.lu nlo[2]=nlo[2],m0,nhi[1] - (p32) add a[1]=a[1],t[1],1 };; // (p22) -{ .mmi; (p21) getf.sig n[0]=nlo[3] - (p16) nop.m 0 - (p30) cmp.ltu p31,p29=a[1],t[1] } // (p22) -{ .mmb; (p23) st8 [tp_1]=n[2],8 - (p32) cmp.leu p31,p29=a[1],t[1] // (p22) - br.ctop.sptk .Linner_ctop };; -.Linner_cend: - -{ .mmi; getf.sig a[0]=ahi[6] // (p24) - getf.sig n[0]=nhi[4] - nop.i 0 };; - -{ .mmi; .pred.rel "mutex",p31,p33 -(p31) add a[0]=a[0],topbit -(p33) add a[0]=a[0],topbit,1 - mov topbit=r0 };; -{ .mfi; .pred.rel "mutex",p31,p33 -(p31) cmp.ltu p32,p30=a[0],topbit -(p33) cmp.leu p32,p30=a[0],topbit - } -{ .mfi; .pred.rel "mutex",p40,p42 -(p40) add n[0]=n[0],a[0] -(p42) add n[0]=n[0],a[0],1 - };; -{ .mmi; .pred.rel "mutex",p44,p46 -(p40) cmp.ltu p41,p39=n[0],a[0] -(p42) cmp.leu p41,p39=n[0],a[0] -(p32) add topbit=r0,r0,1 } - -{ .mmi; st8 [tp_1]=n[0],8 - cmp4.ne p6,p0=1,num - sub aptr=aptr,len };; // rewind -{ .mmi; sub nptr=nptr,len -(p41) add topbit=r0,r0,1 - add tptr=16,sp } -{ .mmb; add tp_1=8,sp - add num=-1,num // num-- -(p6) br.cond.sptk.many .Louter };; - -{ .mbb; add lc=4,lc - brp.loop.imp .Lsub_ctop,.Lsub_cend-16 - clrrrb.pr };; -{ .mii; nop.m 0 - mov pr.rot=0x10001<<16 - // ------^---- (p33) at first (p17) - mov ar.lc=lc } -{ .mii; nop.m 0 - mov ar.ec=3 - nop.i 0 };; - -.Lsub_ctop: -.pred.rel "mutex",p33,p35 -{ .mfi; (p16) ld8 t[0]=[tptr],8 // t=*(tp++) - (p16) nop.f 0 - (p33) sub n[1]=t[1],n[1] } // (p17) -{ .mfi; (p16) ld8 n[0]=[nptr],8 // n=*(np++) - (p16) nop.f 0 - (p35) sub n[1]=t[1],n[1],1 };; // (p17) -{ .mib; (p18) st8 [rptr]=n[2],8 // *(rp++)=r - (p33) cmp.gtu p34,p32=n[1],t[1] // (p17) - (p18) nop.b 0 } -{ .mib; (p18) nop.m 0 - (p35) cmp.geu p34,p32=n[1],t[1] // (p17) - br.ctop.sptk .Lsub_ctop };; -.Lsub_cend: - -{ .mmb; .pred.rel "mutex",p34,p36 -(p34) sub topbit=topbit,r0 // (p19) -(p36) sub topbit=topbit,r0,1 - brp.loop.imp .Lcopy_ctop,.Lcopy_cend-16 - } -{ .mmb; sub rptr=rptr,len // rewind - sub tptr=tptr,len - clrrrb.pr };; -{ .mmi; and aptr=tptr,topbit - andcm bptr=rptr,topbit - mov pr.rot=1<<16 };; -{ .mii; or nptr=aptr,bptr - mov ar.lc=lc - mov ar.ec=3 };; - -.Lcopy_ctop: -{ .mmb; (p16) ld8 n[0]=[nptr],8 - (p18) st8 [tptr]=r0,8 - (p16) nop.b 0 } -{ .mmb; (p16) nop.m 0 - (p18) st8 [rptr]=n[2],8 - br.ctop.sptk .Lcopy_ctop };; -.Lcopy_cend: - -{ .mmi; mov ret0=1 // signal "handled" - rum 1<<5 // clear um.mfh - mov ar.lc=prevlc } -{ .mib; .restore sp - mov sp=prevsp - mov pr=prevpr,0x1ffff - br.ret.sptk.many b0 };; -.endp bn_mul_mont_general# - -a1=r16; a2=r17; a3=r18; a4=r19; a5=r20; a6=r21; a7=r22; a8=r23; -n1=r24; n2=r25; n3=r26; n4=r27; n5=r28; n6=r29; n7=r30; n8=r31; -t0=r15; - -ai0=f8; ai1=f9; ai2=f10; ai3=f11; ai4=f12; ai5=f13; ai6=f14; ai7=f15; -ni0=f16; ni1=f17; ni2=f18; ni3=f19; ni4=f20; ni5=f21; ni6=f22; ni7=f23; - -.align 64 -.skip 48 // aligns loop body -.local bn_mul_mont_8# -.proc bn_mul_mont_8# -bn_mul_mont_8: - .prologue -{ .mmi; .save ar.pfs,prevfs - alloc prevfs=ar.pfs,6,2,0,8 - .vframe prevsp - mov prevsp=sp - .save ar.lc,prevlc - mov prevlc=ar.lc } -{ .mmi; add r17=-6*16,sp - add sp=-7*16,sp - .save pr,prevpr - mov prevpr=pr };; - -{ .mmi; .save.gf 0,0x10 - stf.spill [sp]=f16,-16 - .save.gf 0,0x20 - stf.spill [r17]=f17,32 - add r16=-5*16,prevsp};; -{ .mmi; .save.gf 0,0x40 - stf.spill [r16]=f18,32 - .save.gf 0,0x80 - stf.spill [r17]=f19,32 - $ADDP aptr=0,in1 };; -{ .mmi; .save.gf 0,0x100 - stf.spill [r16]=f20,32 - .save.gf 0,0x200 - stf.spill [r17]=f21,32 - $ADDP r29=8,in1 };; -{ .mmi; .save.gf 0,0x400 - stf.spill [r16]=f22 - .save.gf 0,0x800 - stf.spill [r17]=f23 - $ADDP rptr=0,in0 };; - - .body - .rotf bj[8],mj[2],tf[2],alo[10],ahi[10],nlo[10],nhi[10] - .rotr t[8] - -// load input vectors padding them to 8 elements -{ .mmi; ldf8 ai0=[aptr],16 // ap[0] - ldf8 ai1=[r29],16 // ap[1] - $ADDP bptr=0,in2 } -{ .mmi; $ADDP r30=8,in2 - $ADDP nptr=0,in3 - $ADDP r31=8,in3 };; -{ .mmi; ldf8 bj[7]=[bptr],16 // bp[0] - ldf8 bj[6]=[r30],16 // bp[1] - cmp4.le p4,p5=3,in5 } -{ .mmi; ldf8 ni0=[nptr],16 // np[0] - ldf8 ni1=[r31],16 // np[1] - cmp4.le p6,p7=4,in5 };; - -{ .mfi; (p4)ldf8 ai2=[aptr],16 // ap[2] - (p5)fcvt.fxu ai2=f0 - cmp4.le p8,p9=5,in5 } -{ .mfi; (p6)ldf8 ai3=[r29],16 // ap[3] - (p7)fcvt.fxu ai3=f0 - cmp4.le p10,p11=6,in5 } -{ .mfi; (p4)ldf8 bj[5]=[bptr],16 // bp[2] - (p5)fcvt.fxu bj[5]=f0 - cmp4.le p12,p13=7,in5 } -{ .mfi; (p6)ldf8 bj[4]=[r30],16 // bp[3] - (p7)fcvt.fxu bj[4]=f0 - cmp4.le p14,p15=8,in5 } -{ .mfi; (p4)ldf8 ni2=[nptr],16 // np[2] - (p5)fcvt.fxu ni2=f0 - addp4 r28=-1,in5 } -{ .mfi; (p6)ldf8 ni3=[r31],16 // np[3] - (p7)fcvt.fxu ni3=f0 - $ADDP in4=0,in4 };; - -{ .mfi; ldf8 n0=[in4] - fcvt.fxu tf[1]=f0 - nop.i 0 } - -{ .mfi; (p8)ldf8 ai4=[aptr],16 // ap[4] - (p9)fcvt.fxu ai4=f0 - mov t[0]=r0 } -{ .mfi; (p10)ldf8 ai5=[r29],16 // ap[5] - (p11)fcvt.fxu ai5=f0 - mov t[1]=r0 } -{ .mfi; (p8)ldf8 bj[3]=[bptr],16 // bp[4] - (p9)fcvt.fxu bj[3]=f0 - mov t[2]=r0 } -{ .mfi; (p10)ldf8 bj[2]=[r30],16 // bp[5] - (p11)fcvt.fxu bj[2]=f0 - mov t[3]=r0 } -{ .mfi; (p8)ldf8 ni4=[nptr],16 // np[4] - (p9)fcvt.fxu ni4=f0 - mov t[4]=r0 } -{ .mfi; (p10)ldf8 ni5=[r31],16 // np[5] - (p11)fcvt.fxu ni5=f0 - mov t[5]=r0 };; - -{ .mfi; (p12)ldf8 ai6=[aptr],16 // ap[6] - (p13)fcvt.fxu ai6=f0 - mov t[6]=r0 } -{ .mfi; (p14)ldf8 ai7=[r29],16 // ap[7] - (p15)fcvt.fxu ai7=f0 - mov t[7]=r0 } -{ .mfi; (p12)ldf8 bj[1]=[bptr],16 // bp[6] - (p13)fcvt.fxu bj[1]=f0 - mov ar.lc=r28 } -{ .mfi; (p14)ldf8 bj[0]=[r30],16 // bp[7] - (p15)fcvt.fxu bj[0]=f0 - mov ar.ec=1 } -{ .mfi; (p12)ldf8 ni6=[nptr],16 // np[6] - (p13)fcvt.fxu ni6=f0 - mov pr.rot=1<<16 } -{ .mfb; (p14)ldf8 ni7=[r31],16 // np[7] - (p15)fcvt.fxu ni7=f0 - brp.loop.imp .Louter_8_ctop,.Louter_8_cend-16 - };; - -// The loop is scheduled for 32*n ticks on Itanium 2. Actual attempt -// to measure with help of Interval Time Counter indicated that the -// factor is a tad higher: 33 or 34, if not 35. Exact measurement and -// addressing the issue is problematic, because I don't have access -// to platform-specific instruction-level profiler. On Itanium it -// should run in 56*n ticks, because of higher xma latency... -.Louter_8_ctop: - .pred.rel "mutex",p40,p42 - .pred.rel "mutex",p48,p50 -{ .mfi; (p16) nop.m 0 // 0: - (p16) xma.hu ahi[0]=ai0,bj[7],tf[1] // ap[0]*b[i]+t[0] - (p40) add a3=a3,n3 } // (p17) a3+=n3 -{ .mfi; (p42) add a3=a3,n3,1 - (p16) xma.lu alo[0]=ai0,bj[7],tf[1] - (p16) nop.i 0 };; -{ .mii; (p17) getf.sig a7=alo[8] // 1: - (p48) add t[6]=t[6],a3 // (p17) t[6]+=a3 - (p50) add t[6]=t[6],a3,1 };; -{ .mfi; (p17) getf.sig a8=ahi[8] // 2: - (p17) xma.hu nhi[7]=ni6,mj[1],nhi[6] // np[6]*m0 - (p40) cmp.ltu p43,p41=a3,n3 } -{ .mfi; (p42) cmp.leu p43,p41=a3,n3 - (p17) xma.lu nlo[7]=ni6,mj[1],nhi[6] - (p16) nop.i 0 };; -{ .mii; (p17) getf.sig n5=nlo[6] // 3: - (p48) cmp.ltu p51,p49=t[6],a3 - (p50) cmp.leu p51,p49=t[6],a3 };; - .pred.rel "mutex",p41,p43 - .pred.rel "mutex",p49,p51 -{ .mfi; (p16) nop.m 0 // 4: - (p16) xma.hu ahi[1]=ai1,bj[7],ahi[0] // ap[1]*b[i] - (p41) add a4=a4,n4 } // (p17) a4+=n4 -{ .mfi; (p43) add a4=a4,n4,1 - (p16) xma.lu alo[1]=ai1,bj[7],ahi[0] - (p16) nop.i 0 };; -{ .mfi; (p49) add t[5]=t[5],a4 // 5: (p17) t[5]+=a4 - (p16) xmpy.lu mj[0]=alo[0],n0 // (ap[0]*b[i]+t[0])*n0 - (p51) add t[5]=t[5],a4,1 };; -{ .mfi; (p16) nop.m 0 // 6: - (p17) xma.hu nhi[8]=ni7,mj[1],nhi[7] // np[7]*m0 - (p41) cmp.ltu p42,p40=a4,n4 } -{ .mfi; (p43) cmp.leu p42,p40=a4,n4 - (p17) xma.lu nlo[8]=ni7,mj[1],nhi[7] - (p16) nop.i 0 };; -{ .mii; (p17) getf.sig n6=nlo[7] // 7: - (p49) cmp.ltu p50,p48=t[5],a4 - (p51) cmp.leu p50,p48=t[5],a4 };; - .pred.rel "mutex",p40,p42 - .pred.rel "mutex",p48,p50 -{ .mfi; (p16) nop.m 0 // 8: - (p16) xma.hu ahi[2]=ai2,bj[7],ahi[1] // ap[2]*b[i] - (p40) add a5=a5,n5 } // (p17) a5+=n5 -{ .mfi; (p42) add a5=a5,n5,1 - (p16) xma.lu alo[2]=ai2,bj[7],ahi[1] - (p16) nop.i 0 };; -{ .mii; (p16) getf.sig a1=alo[1] // 9: - (p48) add t[4]=t[4],a5 // p(17) t[4]+=a5 - (p50) add t[4]=t[4],a5,1 };; -{ .mfi; (p16) nop.m 0 // 10: - (p16) xma.hu nhi[0]=ni0,mj[0],alo[0] // np[0]*m0 - (p40) cmp.ltu p43,p41=a5,n5 } -{ .mfi; (p42) cmp.leu p43,p41=a5,n5 - (p16) xma.lu nlo[0]=ni0,mj[0],alo[0] - (p16) nop.i 0 };; -{ .mii; (p17) getf.sig n7=nlo[8] // 11: - (p48) cmp.ltu p51,p49=t[4],a5 - (p50) cmp.leu p51,p49=t[4],a5 };; - .pred.rel "mutex",p41,p43 - .pred.rel "mutex",p49,p51 -{ .mfi; (p17) getf.sig n8=nhi[8] // 12: - (p16) xma.hu ahi[3]=ai3,bj[7],ahi[2] // ap[3]*b[i] - (p41) add a6=a6,n6 } // (p17) a6+=n6 -{ .mfi; (p43) add a6=a6,n6,1 - (p16) xma.lu alo[3]=ai3,bj[7],ahi[2] - (p16) nop.i 0 };; -{ .mii; (p16) getf.sig a2=alo[2] // 13: - (p49) add t[3]=t[3],a6 // (p17) t[3]+=a6 - (p51) add t[3]=t[3],a6,1 };; -{ .mfi; (p16) nop.m 0 // 14: - (p16) xma.hu nhi[1]=ni1,mj[0],nhi[0] // np[1]*m0 - (p41) cmp.ltu p42,p40=a6,n6 } -{ .mfi; (p43) cmp.leu p42,p40=a6,n6 - (p16) xma.lu nlo[1]=ni1,mj[0],nhi[0] - (p16) nop.i 0 };; -{ .mii; (p16) nop.m 0 // 15: - (p49) cmp.ltu p50,p48=t[3],a6 - (p51) cmp.leu p50,p48=t[3],a6 };; - .pred.rel "mutex",p40,p42 - .pred.rel "mutex",p48,p50 -{ .mfi; (p16) nop.m 0 // 16: - (p16) xma.hu ahi[4]=ai4,bj[7],ahi[3] // ap[4]*b[i] - (p40) add a7=a7,n7 } // (p17) a7+=n7 -{ .mfi; (p42) add a7=a7,n7,1 - (p16) xma.lu alo[4]=ai4,bj[7],ahi[3] - (p16) nop.i 0 };; -{ .mii; (p16) getf.sig a3=alo[3] // 17: - (p48) add t[2]=t[2],a7 // (p17) t[2]+=a7 - (p50) add t[2]=t[2],a7,1 };; -{ .mfi; (p16) nop.m 0 // 18: - (p16) xma.hu nhi[2]=ni2,mj[0],nhi[1] // np[2]*m0 - (p40) cmp.ltu p43,p41=a7,n7 } -{ .mfi; (p42) cmp.leu p43,p41=a7,n7 - (p16) xma.lu nlo[2]=ni2,mj[0],nhi[1] - (p16) nop.i 0 };; -{ .mii; (p16) getf.sig n1=nlo[1] // 19: - (p48) cmp.ltu p51,p49=t[2],a7 - (p50) cmp.leu p51,p49=t[2],a7 };; - .pred.rel "mutex",p41,p43 - .pred.rel "mutex",p49,p51 -{ .mfi; (p16) nop.m 0 // 20: - (p16) xma.hu ahi[5]=ai5,bj[7],ahi[4] // ap[5]*b[i] - (p41) add a8=a8,n8 } // (p17) a8+=n8 -{ .mfi; (p43) add a8=a8,n8,1 - (p16) xma.lu alo[5]=ai5,bj[7],ahi[4] - (p16) nop.i 0 };; -{ .mii; (p16) getf.sig a4=alo[4] // 21: - (p49) add t[1]=t[1],a8 // (p17) t[1]+=a8 - (p51) add t[1]=t[1],a8,1 };; -{ .mfi; (p16) nop.m 0 // 22: - (p16) xma.hu nhi[3]=ni3,mj[0],nhi[2] // np[3]*m0 - (p41) cmp.ltu p42,p40=a8,n8 } -{ .mfi; (p43) cmp.leu p42,p40=a8,n8 - (p16) xma.lu nlo[3]=ni3,mj[0],nhi[2] - (p16) nop.i 0 };; -{ .mii; (p16) getf.sig n2=nlo[2] // 23: - (p49) cmp.ltu p50,p48=t[1],a8 - (p51) cmp.leu p50,p48=t[1],a8 };; -{ .mfi; (p16) nop.m 0 // 24: - (p16) xma.hu ahi[6]=ai6,bj[7],ahi[5] // ap[6]*b[i] - (p16) add a1=a1,n1 } // (p16) a1+=n1 -{ .mfi; (p16) nop.m 0 - (p16) xma.lu alo[6]=ai6,bj[7],ahi[5] - (p17) mov t[0]=r0 };; -{ .mii; (p16) getf.sig a5=alo[5] // 25: - (p16) add t0=t[7],a1 // (p16) t[7]+=a1 - (p42) add t[0]=t[0],r0,1 };; -{ .mfi; (p16) setf.sig tf[0]=t0 // 26: - (p16) xma.hu nhi[4]=ni4,mj[0],nhi[3] // np[4]*m0 - (p50) add t[0]=t[0],r0,1 } -{ .mfi; (p16) cmp.ltu.unc p42,p40=a1,n1 - (p16) xma.lu nlo[4]=ni4,mj[0],nhi[3] - (p16) nop.i 0 };; -{ .mii; (p16) getf.sig n3=nlo[3] // 27: - (p16) cmp.ltu.unc p50,p48=t0,a1 - (p16) nop.i 0 };; - .pred.rel "mutex",p40,p42 - .pred.rel "mutex",p48,p50 -{ .mfi; (p16) nop.m 0 // 28: - (p16) xma.hu ahi[7]=ai7,bj[7],ahi[6] // ap[7]*b[i] - (p40) add a2=a2,n2 } // (p16) a2+=n2 -{ .mfi; (p42) add a2=a2,n2,1 - (p16) xma.lu alo[7]=ai7,bj[7],ahi[6] - (p16) nop.i 0 };; -{ .mii; (p16) getf.sig a6=alo[6] // 29: - (p48) add t[6]=t[6],a2 // (p16) t[6]+=a2 - (p50) add t[6]=t[6],a2,1 };; -{ .mfi; (p16) nop.m 0 // 30: - (p16) xma.hu nhi[5]=ni5,mj[0],nhi[4] // np[5]*m0 - (p40) cmp.ltu p41,p39=a2,n2 } -{ .mfi; (p42) cmp.leu p41,p39=a2,n2 - (p16) xma.lu nlo[5]=ni5,mj[0],nhi[4] - (p16) nop.i 0 };; -{ .mfi; (p16) getf.sig n4=nlo[4] // 31: - (p16) nop.f 0 - (p48) cmp.ltu p49,p47=t[6],a2 } -{ .mfb; (p50) cmp.leu p49,p47=t[6],a2 - (p16) nop.f 0 - br.ctop.sptk.many .Louter_8_ctop };; -.Louter_8_cend: - -// above loop has to execute one more time, without (p16), which is -// replaced with merged move of np[8] to GPR bank - .pred.rel "mutex",p40,p42 - .pred.rel "mutex",p48,p50 -{ .mmi; (p0) getf.sig n1=ni0 // 0: - (p40) add a3=a3,n3 // (p17) a3+=n3 - (p42) add a3=a3,n3,1 };; -{ .mii; (p17) getf.sig a7=alo[8] // 1: - (p48) add t[6]=t[6],a3 // (p17) t[6]+=a3 - (p50) add t[6]=t[6],a3,1 };; -{ .mfi; (p17) getf.sig a8=ahi[8] // 2: - (p17) xma.hu nhi[7]=ni6,mj[1],nhi[6] // np[6]*m0 - (p40) cmp.ltu p43,p41=a3,n3 } -{ .mfi; (p42) cmp.leu p43,p41=a3,n3 - (p17) xma.lu nlo[7]=ni6,mj[1],nhi[6] - (p0) nop.i 0 };; -{ .mii; (p17) getf.sig n5=nlo[6] // 3: - (p48) cmp.ltu p51,p49=t[6],a3 - (p50) cmp.leu p51,p49=t[6],a3 };; - .pred.rel "mutex",p41,p43 - .pred.rel "mutex",p49,p51 -{ .mmi; (p0) getf.sig n2=ni1 // 4: - (p41) add a4=a4,n4 // (p17) a4+=n4 - (p43) add a4=a4,n4,1 };; -{ .mfi; (p49) add t[5]=t[5],a4 // 5: (p17) t[5]+=a4 - (p0) nop.f 0 - (p51) add t[5]=t[5],a4,1 };; -{ .mfi; (p0) getf.sig n3=ni2 // 6: - (p17) xma.hu nhi[8]=ni7,mj[1],nhi[7] // np[7]*m0 - (p41) cmp.ltu p42,p40=a4,n4 } -{ .mfi; (p43) cmp.leu p42,p40=a4,n4 - (p17) xma.lu nlo[8]=ni7,mj[1],nhi[7] - (p0) nop.i 0 };; -{ .mii; (p17) getf.sig n6=nlo[7] // 7: - (p49) cmp.ltu p50,p48=t[5],a4 - (p51) cmp.leu p50,p48=t[5],a4 };; - .pred.rel "mutex",p40,p42 - .pred.rel "mutex",p48,p50 -{ .mii; (p0) getf.sig n4=ni3 // 8: - (p40) add a5=a5,n5 // (p17) a5+=n5 - (p42) add a5=a5,n5,1 };; -{ .mii; (p0) nop.m 0 // 9: - (p48) add t[4]=t[4],a5 // p(17) t[4]+=a5 - (p50) add t[4]=t[4],a5,1 };; -{ .mii; (p0) nop.m 0 // 10: - (p40) cmp.ltu p43,p41=a5,n5 - (p42) cmp.leu p43,p41=a5,n5 };; -{ .mii; (p17) getf.sig n7=nlo[8] // 11: - (p48) cmp.ltu p51,p49=t[4],a5 - (p50) cmp.leu p51,p49=t[4],a5 };; - .pred.rel "mutex",p41,p43 - .pred.rel "mutex",p49,p51 -{ .mii; (p17) getf.sig n8=nhi[8] // 12: - (p41) add a6=a6,n6 // (p17) a6+=n6 - (p43) add a6=a6,n6,1 };; -{ .mii; (p0) getf.sig n5=ni4 // 13: - (p49) add t[3]=t[3],a6 // (p17) t[3]+=a6 - (p51) add t[3]=t[3],a6,1 };; -{ .mii; (p0) nop.m 0 // 14: - (p41) cmp.ltu p42,p40=a6,n6 - (p43) cmp.leu p42,p40=a6,n6 };; -{ .mii; (p0) getf.sig n6=ni5 // 15: - (p49) cmp.ltu p50,p48=t[3],a6 - (p51) cmp.leu p50,p48=t[3],a6 };; - .pred.rel "mutex",p40,p42 - .pred.rel "mutex",p48,p50 -{ .mii; (p0) nop.m 0 // 16: - (p40) add a7=a7,n7 // (p17) a7+=n7 - (p42) add a7=a7,n7,1 };; -{ .mii; (p0) nop.m 0 // 17: - (p48) add t[2]=t[2],a7 // (p17) t[2]+=a7 - (p50) add t[2]=t[2],a7,1 };; -{ .mii; (p0) nop.m 0 // 18: - (p40) cmp.ltu p43,p41=a7,n7 - (p42) cmp.leu p43,p41=a7,n7 };; -{ .mii; (p0) getf.sig n7=ni6 // 19: - (p48) cmp.ltu p51,p49=t[2],a7 - (p50) cmp.leu p51,p49=t[2],a7 };; - .pred.rel "mutex",p41,p43 - .pred.rel "mutex",p49,p51 -{ .mii; (p0) nop.m 0 // 20: - (p41) add a8=a8,n8 // (p17) a8+=n8 - (p43) add a8=a8,n8,1 };; -{ .mmi; (p0) nop.m 0 // 21: - (p49) add t[1]=t[1],a8 // (p17) t[1]+=a8 - (p51) add t[1]=t[1],a8,1 } -{ .mmi; (p17) mov t[0]=r0 - (p41) cmp.ltu p42,p40=a8,n8 - (p43) cmp.leu p42,p40=a8,n8 };; -{ .mmi; (p0) getf.sig n8=ni7 // 22: - (p49) cmp.ltu p50,p48=t[1],a8 - (p51) cmp.leu p50,p48=t[1],a8 } -{ .mmi; (p42) add t[0]=t[0],r0,1 - (p0) add r16=-7*16,prevsp - (p0) add r17=-6*16,prevsp };; - -// subtract np[8] from carrybit|tmp[8] -// carrybit|tmp[8] layout upon exit from above loop is: -// t[0]|t[1]|t[2]|t[3]|t[4]|t[5]|t[6]|t[7]|t0 (least significant) -{ .mmi; (p50)add t[0]=t[0],r0,1 - add r18=-5*16,prevsp - sub n1=t0,n1 };; -{ .mmi; cmp.gtu p34,p32=n1,t0;; - .pred.rel "mutex",p32,p34 - (p32)sub n2=t[7],n2 - (p34)sub n2=t[7],n2,1 };; -{ .mii; (p32)cmp.gtu p35,p33=n2,t[7] - (p34)cmp.geu p35,p33=n2,t[7];; - .pred.rel "mutex",p33,p35 - (p33)sub n3=t[6],n3 } -{ .mmi; (p35)sub n3=t[6],n3,1;; - (p33)cmp.gtu p34,p32=n3,t[6] - (p35)cmp.geu p34,p32=n3,t[6] };; - .pred.rel "mutex",p32,p34 -{ .mii; (p32)sub n4=t[5],n4 - (p34)sub n4=t[5],n4,1;; - (p32)cmp.gtu p35,p33=n4,t[5] } -{ .mmi; (p34)cmp.geu p35,p33=n4,t[5];; - .pred.rel "mutex",p33,p35 - (p33)sub n5=t[4],n5 - (p35)sub n5=t[4],n5,1 };; -{ .mii; (p33)cmp.gtu p34,p32=n5,t[4] - (p35)cmp.geu p34,p32=n5,t[4];; - .pred.rel "mutex",p32,p34 - (p32)sub n6=t[3],n6 } -{ .mmi; (p34)sub n6=t[3],n6,1;; - (p32)cmp.gtu p35,p33=n6,t[3] - (p34)cmp.geu p35,p33=n6,t[3] };; - .pred.rel "mutex",p33,p35 -{ .mii; (p33)sub n7=t[2],n7 - (p35)sub n7=t[2],n7,1;; - (p33)cmp.gtu p34,p32=n7,t[2] } -{ .mmi; (p35)cmp.geu p34,p32=n7,t[2];; - .pred.rel "mutex",p32,p34 - (p32)sub n8=t[1],n8 - (p34)sub n8=t[1],n8,1 };; -{ .mii; (p32)cmp.gtu p35,p33=n8,t[1] - (p34)cmp.geu p35,p33=n8,t[1];; - .pred.rel "mutex",p33,p35 - (p33)sub a8=t[0],r0 } -{ .mmi; (p35)sub a8=t[0],r0,1;; - (p33)cmp.gtu p34,p32=a8,t[0] - (p35)cmp.geu p34,p32=a8,t[0] };; - -// save the result, either tmp[num] or tmp[num]-np[num] - .pred.rel "mutex",p32,p34 -{ .mmi; (p32)st8 [rptr]=n1,8 - (p34)st8 [rptr]=t0,8 - add r19=-4*16,prevsp};; -{ .mmb; (p32)st8 [rptr]=n2,8 - (p34)st8 [rptr]=t[7],8 - (p5)br.cond.dpnt.few .Ldone };; -{ .mmb; (p32)st8 [rptr]=n3,8 - (p34)st8 [rptr]=t[6],8 - (p7)br.cond.dpnt.few .Ldone };; -{ .mmb; (p32)st8 [rptr]=n4,8 - (p34)st8 [rptr]=t[5],8 - (p9)br.cond.dpnt.few .Ldone };; -{ .mmb; (p32)st8 [rptr]=n5,8 - (p34)st8 [rptr]=t[4],8 - (p11)br.cond.dpnt.few .Ldone };; -{ .mmb; (p32)st8 [rptr]=n6,8 - (p34)st8 [rptr]=t[3],8 - (p13)br.cond.dpnt.few .Ldone };; -{ .mmb; (p32)st8 [rptr]=n7,8 - (p34)st8 [rptr]=t[2],8 - (p15)br.cond.dpnt.few .Ldone };; -{ .mmb; (p32)st8 [rptr]=n8,8 - (p34)st8 [rptr]=t[1],8 - nop.b 0 };; -.Ldone: // epilogue -{ .mmi; ldf.fill f16=[r16],64 - ldf.fill f17=[r17],64 - nop.i 0 } -{ .mmi; ldf.fill f18=[r18],64 - ldf.fill f19=[r19],64 - mov pr=prevpr,0x1ffff };; -{ .mmi; ldf.fill f20=[r16] - ldf.fill f21=[r17] - mov ar.lc=prevlc } -{ .mmi; ldf.fill f22=[r18] - ldf.fill f23=[r19] - mov ret0=1 } // signal "handled" -{ .mib; rum 1<<5 - .restore sp - mov sp=prevsp - br.ret.sptk.many b0 };; -.endp bn_mul_mont_8# - -.type copyright#,\@object -copyright: -stringz "Montgomery multiplication for IA-64, CRYPTOGAMS by " -___ - -$output=shift and open STDOUT,">$output"; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/ia64.S b/drivers/builtin_openssl2/crypto/bn/asm/ia64.S deleted file mode 100644 index a9a42abfc3..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/ia64.S +++ /dev/null @@ -1,1555 +0,0 @@ -.explicit -.text -.ident "ia64.S, Version 2.1" -.ident "IA-64 ISA artwork by Andy Polyakov " - -// -// ==================================================================== -// Written by Andy Polyakov for the OpenSSL -// project. -// -// Rights for redistribution and usage in source and binary forms are -// granted according to the OpenSSL license. Warranty of any kind is -// disclaimed. -// ==================================================================== -// -// Version 2.x is Itanium2 re-tune. Few words about how Itanum2 is -// different from Itanium to this module viewpoint. Most notably, is it -// "wider" than Itanium? Can you experience loop scalability as -// discussed in commentary sections? Not really:-( Itanium2 has 6 -// integer ALU ports, i.e. it's 2 ports wider, but it's not enough to -// spin twice as fast, as I need 8 IALU ports. Amount of floating point -// ports is the same, i.e. 2, while I need 4. In other words, to this -// module Itanium2 remains effectively as "wide" as Itanium. Yet it's -// essentially different in respect to this module, and a re-tune was -// required. Well, because some intruction latencies has changed. Most -// noticeably those intensively used: -// -// Itanium Itanium2 -// ldf8 9 6 L2 hit -// ld8 2 1 L1 hit -// getf 2 5 -// xma[->getf] 7[+1] 4[+0] -// add[->st8] 1[+1] 1[+0] -// -// What does it mean? You might ratiocinate that the original code -// should run just faster... Because sum of latencies is smaller... -// Wrong! Note that getf latency increased. This means that if a loop is -// scheduled for lower latency (as they were), then it will suffer from -// stall condition and the code will therefore turn anti-scalable, e.g. -// original bn_mul_words spun at 5*n or 2.5 times slower than expected -// on Itanium2! What to do? Reschedule loops for Itanium2? But then -// Itanium would exhibit anti-scalability. So I've chosen to reschedule -// for worst latency for every instruction aiming for best *all-round* -// performance. - -// Q. How much faster does it get? -// A. Here is the output from 'openssl speed rsa dsa' for vanilla -// 0.9.6a compiled with gcc version 2.96 20000731 (Red Hat -// Linux 7.1 2.96-81): -// -// sign verify sign/s verify/s -// rsa 512 bits 0.0036s 0.0003s 275.3 2999.2 -// rsa 1024 bits 0.0203s 0.0011s 49.3 894.1 -// rsa 2048 bits 0.1331s 0.0040s 7.5 250.9 -// rsa 4096 bits 0.9270s 0.0147s 1.1 68.1 -// sign verify sign/s verify/s -// dsa 512 bits 0.0035s 0.0043s 288.3 234.8 -// dsa 1024 bits 0.0111s 0.0135s 90.0 74.2 -// -// And here is similar output but for this assembler -// implementation:-) -// -// sign verify sign/s verify/s -// rsa 512 bits 0.0021s 0.0001s 549.4 9638.5 -// rsa 1024 bits 0.0055s 0.0002s 183.8 4481.1 -// rsa 2048 bits 0.0244s 0.0006s 41.4 1726.3 -// rsa 4096 bits 0.1295s 0.0018s 7.7 561.5 -// sign verify sign/s verify/s -// dsa 512 bits 0.0012s 0.0013s 891.9 756.6 -// dsa 1024 bits 0.0023s 0.0028s 440.4 376.2 -// -// Yes, you may argue that it's not fair comparison as it's -// possible to craft the C implementation with BN_UMULT_HIGH -// inline assembler macro. But of course! Here is the output -// with the macro: -// -// sign verify sign/s verify/s -// rsa 512 bits 0.0020s 0.0002s 495.0 6561.0 -// rsa 1024 bits 0.0086s 0.0004s 116.2 2235.7 -// rsa 2048 bits 0.0519s 0.0015s 19.3 667.3 -// rsa 4096 bits 0.3464s 0.0053s 2.9 187.7 -// sign verify sign/s verify/s -// dsa 512 bits 0.0016s 0.0020s 613.1 510.5 -// dsa 1024 bits 0.0045s 0.0054s 221.0 183.9 -// -// My code is still way faster, huh:-) And I believe that even -// higher performance can be achieved. Note that as keys get -// longer, performance gain is larger. Why? According to the -// profiler there is another player in the field, namely -// BN_from_montgomery consuming larger and larger portion of CPU -// time as keysize decreases. I therefore consider putting effort -// to assembler implementation of the following routine: -// -// void bn_mul_add_mont (BN_ULONG *rp,BN_ULONG *np,int nl,BN_ULONG n0) -// { -// int i,j; -// BN_ULONG v; -// -// for (i=0; i for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# This module doesn't present direct interest for OpenSSL, because it -# doesn't provide better performance for longer keys, at least not on -# in-order-execution cores. While 512-bit RSA sign operations can be -# 65% faster in 64-bit mode, 1024-bit ones are only 15% faster, and -# 4096-bit ones are up to 15% slower. In 32-bit mode it varies from -# 16% improvement for 512-bit RSA sign to -33% for 4096-bit RSA -# verify:-( All comparisons are against bn_mul_mont-free assembler. -# The module might be of interest to embedded system developers, as -# the code is smaller than 1KB, yet offers >3x improvement on MIPS64 -# and 75-30% [less for longer keys] on MIPS32 over compiler-generated -# code. - -###################################################################### -# There is a number of MIPS ABI in use, O32 and N32/64 are most -# widely used. Then there is a new contender: NUBI. It appears that if -# one picks the latter, it's possible to arrange code in ABI neutral -# manner. Therefore let's stick to NUBI register layout: -# -($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); -($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); -($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); -# -# The return value is placed in $a0. Following coding rules facilitate -# interoperability: -# -# - never ever touch $tp, "thread pointer", former $gp; -# - copy return value to $t0, former $v0 [or to $a0 if you're adapting -# old code]; -# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; -# -# For reference here is register layout for N32/64 MIPS ABIs: -# -# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); -# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); -# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); -# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); -# -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 - -if ($flavour =~ /64|n32/i) { - $PTR_ADD="dadd"; # incidentally works even on n32 - $PTR_SUB="dsub"; # incidentally works even on n32 - $REG_S="sd"; - $REG_L="ld"; - $SZREG=8; -} else { - $PTR_ADD="add"; - $PTR_SUB="sub"; - $REG_S="sw"; - $REG_L="lw"; - $SZREG=4; -} -$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000; -# -# -# -###################################################################### - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -if ($flavour =~ /64|n32/i) { - $LD="ld"; - $ST="sd"; - $MULTU="dmultu"; - $ADDU="daddu"; - $SUBU="dsubu"; - $BNSZ=8; -} else { - $LD="lw"; - $ST="sw"; - $MULTU="multu"; - $ADDU="addu"; - $SUBU="subu"; - $BNSZ=4; -} - -# int bn_mul_mont( -$rp=$a0; # BN_ULONG *rp, -$ap=$a1; # const BN_ULONG *ap, -$bp=$a2; # const BN_ULONG *bp, -$np=$a3; # const BN_ULONG *np, -$n0=$a4; # const BN_ULONG *n0, -$num=$a5; # int num); - -$lo0=$a6; -$hi0=$a7; -$lo1=$t1; -$hi1=$t2; -$aj=$s0; -$bi=$s1; -$nj=$s2; -$tp=$s3; -$alo=$s4; -$ahi=$s5; -$nlo=$s6; -$nhi=$s7; -$tj=$s8; -$i=$s9; -$j=$s10; -$m1=$s11; - -$FRAMESIZE=14; - -$code=<<___; -.text - -.set noat -.set noreorder - -.align 5 -.globl bn_mul_mont -.ent bn_mul_mont -bn_mul_mont: -___ -$code.=<<___ if ($flavour =~ /o32/i); - lw $n0,16($sp) - lw $num,20($sp) -___ -$code.=<<___; - slt $at,$num,4 - bnez $at,1f - li $t0,0 - slt $at,$num,17 # on in-order CPU - bnez $at,bn_mul_mont_internal - nop -1: jr $ra - li $a0,0 -.end bn_mul_mont - -.align 5 -.ent bn_mul_mont_internal -bn_mul_mont_internal: - .frame $fp,$FRAMESIZE*$SZREG,$ra - .mask 0x40000000|$SAVED_REGS_MASK,-$SZREG - $PTR_SUB $sp,$FRAMESIZE*$SZREG - $REG_S $fp,($FRAMESIZE-1)*$SZREG($sp) - $REG_S $s11,($FRAMESIZE-2)*$SZREG($sp) - $REG_S $s10,($FRAMESIZE-3)*$SZREG($sp) - $REG_S $s9,($FRAMESIZE-4)*$SZREG($sp) - $REG_S $s8,($FRAMESIZE-5)*$SZREG($sp) - $REG_S $s7,($FRAMESIZE-6)*$SZREG($sp) - $REG_S $s6,($FRAMESIZE-7)*$SZREG($sp) - $REG_S $s5,($FRAMESIZE-8)*$SZREG($sp) - $REG_S $s4,($FRAMESIZE-9)*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_S $s3,($FRAMESIZE-10)*$SZREG($sp) - $REG_S $s2,($FRAMESIZE-11)*$SZREG($sp) - $REG_S $s1,($FRAMESIZE-12)*$SZREG($sp) - $REG_S $s0,($FRAMESIZE-13)*$SZREG($sp) -___ -$code.=<<___; - move $fp,$sp - - .set reorder - $LD $n0,0($n0) - $LD $bi,0($bp) # bp[0] - $LD $aj,0($ap) # ap[0] - $LD $nj,0($np) # np[0] - - $PTR_SUB $sp,2*$BNSZ # place for two extra words - sll $num,`log($BNSZ)/log(2)` - li $at,-4096 - $PTR_SUB $sp,$num - and $sp,$at - - $MULTU $aj,$bi - $LD $alo,$BNSZ($ap) - $LD $nlo,$BNSZ($np) - mflo $lo0 - mfhi $hi0 - $MULTU $lo0,$n0 - mflo $m1 - - $MULTU $alo,$bi - mflo $alo - mfhi $ahi - - $MULTU $nj,$m1 - mflo $lo1 - mfhi $hi1 - $MULTU $nlo,$m1 - $ADDU $lo1,$lo0 - sltu $at,$lo1,$lo0 - $ADDU $hi1,$at - mflo $nlo - mfhi $nhi - - move $tp,$sp - li $j,2*$BNSZ -.align 4 -.L1st: - .set noreorder - $PTR_ADD $aj,$ap,$j - $PTR_ADD $nj,$np,$j - $LD $aj,($aj) - $LD $nj,($nj) - - $MULTU $aj,$bi - $ADDU $lo0,$alo,$hi0 - $ADDU $lo1,$nlo,$hi1 - sltu $at,$lo0,$hi0 - sltu $t0,$lo1,$hi1 - $ADDU $hi0,$ahi,$at - $ADDU $hi1,$nhi,$t0 - mflo $alo - mfhi $ahi - - $ADDU $lo1,$lo0 - sltu $at,$lo1,$lo0 - $MULTU $nj,$m1 - $ADDU $hi1,$at - addu $j,$BNSZ - $ST $lo1,($tp) - sltu $t0,$j,$num - mflo $nlo - mfhi $nhi - - bnez $t0,.L1st - $PTR_ADD $tp,$BNSZ - .set reorder - - $ADDU $lo0,$alo,$hi0 - sltu $at,$lo0,$hi0 - $ADDU $hi0,$ahi,$at - - $ADDU $lo1,$nlo,$hi1 - sltu $t0,$lo1,$hi1 - $ADDU $hi1,$nhi,$t0 - $ADDU $lo1,$lo0 - sltu $at,$lo1,$lo0 - $ADDU $hi1,$at - - $ST $lo1,($tp) - - $ADDU $hi1,$hi0 - sltu $at,$hi1,$hi0 - $ST $hi1,$BNSZ($tp) - $ST $at,2*$BNSZ($tp) - - li $i,$BNSZ -.align 4 -.Louter: - $PTR_ADD $bi,$bp,$i - $LD $bi,($bi) - $LD $aj,($ap) - $LD $alo,$BNSZ($ap) - $LD $tj,($sp) - - $MULTU $aj,$bi - $LD $nj,($np) - $LD $nlo,$BNSZ($np) - mflo $lo0 - mfhi $hi0 - $ADDU $lo0,$tj - $MULTU $lo0,$n0 - sltu $at,$lo0,$tj - $ADDU $hi0,$at - mflo $m1 - - $MULTU $alo,$bi - mflo $alo - mfhi $ahi - - $MULTU $nj,$m1 - mflo $lo1 - mfhi $hi1 - - $MULTU $nlo,$m1 - $ADDU $lo1,$lo0 - sltu $at,$lo1,$lo0 - $ADDU $hi1,$at - mflo $nlo - mfhi $nhi - - move $tp,$sp - li $j,2*$BNSZ - $LD $tj,$BNSZ($tp) -.align 4 -.Linner: - .set noreorder - $PTR_ADD $aj,$ap,$j - $PTR_ADD $nj,$np,$j - $LD $aj,($aj) - $LD $nj,($nj) - - $MULTU $aj,$bi - $ADDU $lo0,$alo,$hi0 - $ADDU $lo1,$nlo,$hi1 - sltu $at,$lo0,$hi0 - sltu $t0,$lo1,$hi1 - $ADDU $hi0,$ahi,$at - $ADDU $hi1,$nhi,$t0 - mflo $alo - mfhi $ahi - - $ADDU $lo0,$tj - addu $j,$BNSZ - $MULTU $nj,$m1 - sltu $at,$lo0,$tj - $ADDU $lo1,$lo0 - $ADDU $hi0,$at - sltu $t0,$lo1,$lo0 - $LD $tj,2*$BNSZ($tp) - $ADDU $hi1,$t0 - sltu $at,$j,$num - mflo $nlo - mfhi $nhi - $ST $lo1,($tp) - bnez $at,.Linner - $PTR_ADD $tp,$BNSZ - .set reorder - - $ADDU $lo0,$alo,$hi0 - sltu $at,$lo0,$hi0 - $ADDU $hi0,$ahi,$at - $ADDU $lo0,$tj - sltu $t0,$lo0,$tj - $ADDU $hi0,$t0 - - $LD $tj,2*$BNSZ($tp) - $ADDU $lo1,$nlo,$hi1 - sltu $at,$lo1,$hi1 - $ADDU $hi1,$nhi,$at - $ADDU $lo1,$lo0 - sltu $t0,$lo1,$lo0 - $ADDU $hi1,$t0 - $ST $lo1,($tp) - - $ADDU $lo1,$hi1,$hi0 - sltu $hi1,$lo1,$hi0 - $ADDU $lo1,$tj - sltu $at,$lo1,$tj - $ADDU $hi1,$at - $ST $lo1,$BNSZ($tp) - $ST $hi1,2*$BNSZ($tp) - - addu $i,$BNSZ - sltu $t0,$i,$num - bnez $t0,.Louter - - .set noreorder - $PTR_ADD $tj,$sp,$num # &tp[num] - move $tp,$sp - move $ap,$sp - li $hi0,0 # clear borrow bit - -.align 4 -.Lsub: $LD $lo0,($tp) - $LD $lo1,($np) - $PTR_ADD $tp,$BNSZ - $PTR_ADD $np,$BNSZ - $SUBU $lo1,$lo0,$lo1 # tp[i]-np[i] - sgtu $at,$lo1,$lo0 - $SUBU $lo0,$lo1,$hi0 - sgtu $hi0,$lo0,$lo1 - $ST $lo0,($rp) - or $hi0,$at - sltu $at,$tp,$tj - bnez $at,.Lsub - $PTR_ADD $rp,$BNSZ - - $SUBU $hi0,$hi1,$hi0 # handle upmost overflow bit - move $tp,$sp - $PTR_SUB $rp,$num # restore rp - not $hi1,$hi0 - - and $ap,$hi0,$sp - and $bp,$hi1,$rp - or $ap,$ap,$bp # ap=borrow?tp:rp - -.align 4 -.Lcopy: $LD $aj,($ap) - $PTR_ADD $ap,$BNSZ - $ST $zero,($tp) - $PTR_ADD $tp,$BNSZ - sltu $at,$tp,$tj - $ST $aj,($rp) - bnez $at,.Lcopy - $PTR_ADD $rp,$BNSZ - - li $a0,1 - li $t0,1 - - .set noreorder - move $sp,$fp - $REG_L $fp,($FRAMESIZE-1)*$SZREG($sp) - $REG_L $s11,($FRAMESIZE-2)*$SZREG($sp) - $REG_L $s10,($FRAMESIZE-3)*$SZREG($sp) - $REG_L $s9,($FRAMESIZE-4)*$SZREG($sp) - $REG_L $s8,($FRAMESIZE-5)*$SZREG($sp) - $REG_L $s7,($FRAMESIZE-6)*$SZREG($sp) - $REG_L $s6,($FRAMESIZE-7)*$SZREG($sp) - $REG_L $s5,($FRAMESIZE-8)*$SZREG($sp) - $REG_L $s4,($FRAMESIZE-9)*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $s3,($FRAMESIZE-10)*$SZREG($sp) - $REG_L $s2,($FRAMESIZE-11)*$SZREG($sp) - $REG_L $s1,($FRAMESIZE-12)*$SZREG($sp) - $REG_L $s0,($FRAMESIZE-13)*$SZREG($sp) -___ -$code.=<<___; - jr $ra - $PTR_ADD $sp,$FRAMESIZE*$SZREG -.end bn_mul_mont_internal -.rdata -.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by " -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; - -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/mips.pl b/drivers/builtin_openssl2/crypto/bn/asm/mips.pl deleted file mode 100644 index 215c9a7483..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/mips.pl +++ /dev/null @@ -1,2234 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. -# -# Rights for redistribution and usage in source and binary forms are -# granted according to the OpenSSL license. Warranty of any kind is -# disclaimed. -# ==================================================================== - - -# July 1999 -# -# This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c. -# -# The module is designed to work with either of the "new" MIPS ABI(5), -# namely N32 or N64, offered by IRIX 6.x. It's not ment to work under -# IRIX 5.x not only because it doesn't support new ABIs but also -# because 5.x kernels put R4x00 CPU into 32-bit mode and all those -# 64-bit instructions (daddu, dmultu, etc.) found below gonna only -# cause illegal instruction exception:-( -# -# In addition the code depends on preprocessor flags set up by MIPSpro -# compiler driver (either as or cc) and therefore (probably?) can't be -# compiled by the GNU assembler. GNU C driver manages fine though... -# I mean as long as -mmips-as is specified or is the default option, -# because then it simply invokes /usr/bin/as which in turn takes -# perfect care of the preprocessor definitions. Another neat feature -# offered by the MIPSpro assembler is an optimization pass. This gave -# me the opportunity to have the code looking more regular as all those -# architecture dependent instruction rescheduling details were left to -# the assembler. Cool, huh? -# -# Performance improvement is astonishing! 'apps/openssl speed rsa dsa' -# goes way over 3 times faster! -# -# - -# October 2010 -# -# Adapt the module even for 32-bit ABIs and other OSes. The former was -# achieved by mechanical replacement of 64-bit arithmetic instructions -# such as dmultu, daddu, etc. with their 32-bit counterparts and -# adjusting offsets denoting multiples of BN_ULONG. Above mentioned -# >3x performance improvement naturally does not apply to 32-bit code -# [because there is no instruction 32-bit compiler can't use], one -# has to content with 40-85% improvement depending on benchmark and -# key length, more for longer keys. - -$flavour = shift; -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -if ($flavour =~ /64|n32/i) { - $LD="ld"; - $ST="sd"; - $MULTU="dmultu"; - $DIVU="ddivu"; - $ADDU="daddu"; - $SUBU="dsubu"; - $SRL="dsrl"; - $SLL="dsll"; - $BNSZ=8; - $PTR_ADD="daddu"; - $PTR_SUB="dsubu"; - $SZREG=8; - $REG_S="sd"; - $REG_L="ld"; -} else { - $LD="lw"; - $ST="sw"; - $MULTU="multu"; - $DIVU="divu"; - $ADDU="addu"; - $SUBU="subu"; - $SRL="srl"; - $SLL="sll"; - $BNSZ=4; - $PTR_ADD="addu"; - $PTR_SUB="subu"; - $SZREG=4; - $REG_S="sw"; - $REG_L="lw"; - $code=".set mips2\n"; -} - -# Below is N32/64 register layout used in the original module. -# -($zero,$at,$v0,$v1)=map("\$$_",(0..3)); -($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); -($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); -($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); -($ta0,$ta1,$ta2,$ta3)=($a4,$a5,$a6,$a7); -# -# No special adaptation is required for O32. NUBI on the other hand -# is treated by saving/restoring ($v1,$t0..$t3). - -$gp=$v1 if ($flavour =~ /nubi/i); - -$minus4=$v1; - -$code.=<<___; -.rdata -.asciiz "mips3.s, Version 1.2" -.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov " - -.text -.set noat - -.align 5 -.globl bn_mul_add_words -.ent bn_mul_add_words -bn_mul_add_words: - .set noreorder - bgtz $a2,bn_mul_add_words_internal - move $v0,$zero - jr $ra - move $a0,$v0 -.end bn_mul_add_words - -.align 5 -.ent bn_mul_add_words_internal -bn_mul_add_words_internal: -___ -$code.=<<___ if ($flavour =~ /nubi/i); - .frame $sp,6*$SZREG,$ra - .mask 0x8000f008,-$SZREG - .set noreorder - $PTR_SUB $sp,6*$SZREG - $REG_S $ra,5*$SZREG($sp) - $REG_S $t3,4*$SZREG($sp) - $REG_S $t2,3*$SZREG($sp) - $REG_S $t1,2*$SZREG($sp) - $REG_S $t0,1*$SZREG($sp) - $REG_S $gp,0*$SZREG($sp) -___ -$code.=<<___; - .set reorder - li $minus4,-4 - and $ta0,$a2,$minus4 - beqz $ta0,.L_bn_mul_add_words_tail - -.L_bn_mul_add_words_loop: - $LD $t0,0($a1) - $MULTU $t0,$a3 - $LD $t1,0($a0) - $LD $t2,$BNSZ($a1) - $LD $t3,$BNSZ($a0) - $LD $ta0,2*$BNSZ($a1) - $LD $ta1,2*$BNSZ($a0) - $ADDU $t1,$v0 - sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit - # values", but it seems to work fine - # even on 64-bit registers. - mflo $at - mfhi $t0 - $ADDU $t1,$at - $ADDU $v0,$t0 - $MULTU $t2,$a3 - sltu $at,$t1,$at - $ST $t1,0($a0) - $ADDU $v0,$at - - $LD $ta2,3*$BNSZ($a1) - $LD $ta3,3*$BNSZ($a0) - $ADDU $t3,$v0 - sltu $v0,$t3,$v0 - mflo $at - mfhi $t2 - $ADDU $t3,$at - $ADDU $v0,$t2 - $MULTU $ta0,$a3 - sltu $at,$t3,$at - $ST $t3,$BNSZ($a0) - $ADDU $v0,$at - - subu $a2,4 - $PTR_ADD $a0,4*$BNSZ - $PTR_ADD $a1,4*$BNSZ - $ADDU $ta1,$v0 - sltu $v0,$ta1,$v0 - mflo $at - mfhi $ta0 - $ADDU $ta1,$at - $ADDU $v0,$ta0 - $MULTU $ta2,$a3 - sltu $at,$ta1,$at - $ST $ta1,-2*$BNSZ($a0) - $ADDU $v0,$at - - - and $ta0,$a2,$minus4 - $ADDU $ta3,$v0 - sltu $v0,$ta3,$v0 - mflo $at - mfhi $ta2 - $ADDU $ta3,$at - $ADDU $v0,$ta2 - sltu $at,$ta3,$at - $ST $ta3,-$BNSZ($a0) - .set noreorder - bgtz $ta0,.L_bn_mul_add_words_loop - $ADDU $v0,$at - - beqz $a2,.L_bn_mul_add_words_return - nop - -.L_bn_mul_add_words_tail: - .set reorder - $LD $t0,0($a1) - $MULTU $t0,$a3 - $LD $t1,0($a0) - subu $a2,1 - $ADDU $t1,$v0 - sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 - $ADDU $t1,$at - $ADDU $v0,$t0 - sltu $at,$t1,$at - $ST $t1,0($a0) - $ADDU $v0,$at - beqz $a2,.L_bn_mul_add_words_return - - $LD $t0,$BNSZ($a1) - $MULTU $t0,$a3 - $LD $t1,$BNSZ($a0) - subu $a2,1 - $ADDU $t1,$v0 - sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 - $ADDU $t1,$at - $ADDU $v0,$t0 - sltu $at,$t1,$at - $ST $t1,$BNSZ($a0) - $ADDU $v0,$at - beqz $a2,.L_bn_mul_add_words_return - - $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$a3 - $LD $t1,2*$BNSZ($a0) - $ADDU $t1,$v0 - sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 - $ADDU $t1,$at - $ADDU $v0,$t0 - sltu $at,$t1,$at - $ST $t1,2*$BNSZ($a0) - $ADDU $v0,$at - -.L_bn_mul_add_words_return: - .set noreorder -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $t3,4*$SZREG($sp) - $REG_L $t2,3*$SZREG($sp) - $REG_L $t1,2*$SZREG($sp) - $REG_L $t0,1*$SZREG($sp) - $REG_L $gp,0*$SZREG($sp) - $PTR_ADD $sp,6*$SZREG -___ -$code.=<<___; - jr $ra - move $a0,$v0 -.end bn_mul_add_words_internal - -.align 5 -.globl bn_mul_words -.ent bn_mul_words -bn_mul_words: - .set noreorder - bgtz $a2,bn_mul_words_internal - move $v0,$zero - jr $ra - move $a0,$v0 -.end bn_mul_words - -.align 5 -.ent bn_mul_words_internal -bn_mul_words_internal: -___ -$code.=<<___ if ($flavour =~ /nubi/i); - .frame $sp,6*$SZREG,$ra - .mask 0x8000f008,-$SZREG - .set noreorder - $PTR_SUB $sp,6*$SZREG - $REG_S $ra,5*$SZREG($sp) - $REG_S $t3,4*$SZREG($sp) - $REG_S $t2,3*$SZREG($sp) - $REG_S $t1,2*$SZREG($sp) - $REG_S $t0,1*$SZREG($sp) - $REG_S $gp,0*$SZREG($sp) -___ -$code.=<<___; - .set reorder - li $minus4,-4 - and $ta0,$a2,$minus4 - beqz $ta0,.L_bn_mul_words_tail - -.L_bn_mul_words_loop: - $LD $t0,0($a1) - $MULTU $t0,$a3 - $LD $t2,$BNSZ($a1) - $LD $ta0,2*$BNSZ($a1) - $LD $ta2,3*$BNSZ($a1) - mflo $at - mfhi $t0 - $ADDU $v0,$at - sltu $t1,$v0,$at - $MULTU $t2,$a3 - $ST $v0,0($a0) - $ADDU $v0,$t1,$t0 - - subu $a2,4 - $PTR_ADD $a0,4*$BNSZ - $PTR_ADD $a1,4*$BNSZ - mflo $at - mfhi $t2 - $ADDU $v0,$at - sltu $t3,$v0,$at - $MULTU $ta0,$a3 - $ST $v0,-3*$BNSZ($a0) - $ADDU $v0,$t3,$t2 - - mflo $at - mfhi $ta0 - $ADDU $v0,$at - sltu $ta1,$v0,$at - $MULTU $ta2,$a3 - $ST $v0,-2*$BNSZ($a0) - $ADDU $v0,$ta1,$ta0 - - and $ta0,$a2,$minus4 - mflo $at - mfhi $ta2 - $ADDU $v0,$at - sltu $ta3,$v0,$at - $ST $v0,-$BNSZ($a0) - .set noreorder - bgtz $ta0,.L_bn_mul_words_loop - $ADDU $v0,$ta3,$ta2 - - beqz $a2,.L_bn_mul_words_return - nop - -.L_bn_mul_words_tail: - .set reorder - $LD $t0,0($a1) - $MULTU $t0,$a3 - subu $a2,1 - mflo $at - mfhi $t0 - $ADDU $v0,$at - sltu $t1,$v0,$at - $ST $v0,0($a0) - $ADDU $v0,$t1,$t0 - beqz $a2,.L_bn_mul_words_return - - $LD $t0,$BNSZ($a1) - $MULTU $t0,$a3 - subu $a2,1 - mflo $at - mfhi $t0 - $ADDU $v0,$at - sltu $t1,$v0,$at - $ST $v0,$BNSZ($a0) - $ADDU $v0,$t1,$t0 - beqz $a2,.L_bn_mul_words_return - - $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$a3 - mflo $at - mfhi $t0 - $ADDU $v0,$at - sltu $t1,$v0,$at - $ST $v0,2*$BNSZ($a0) - $ADDU $v0,$t1,$t0 - -.L_bn_mul_words_return: - .set noreorder -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $t3,4*$SZREG($sp) - $REG_L $t2,3*$SZREG($sp) - $REG_L $t1,2*$SZREG($sp) - $REG_L $t0,1*$SZREG($sp) - $REG_L $gp,0*$SZREG($sp) - $PTR_ADD $sp,6*$SZREG -___ -$code.=<<___; - jr $ra - move $a0,$v0 -.end bn_mul_words_internal - -.align 5 -.globl bn_sqr_words -.ent bn_sqr_words -bn_sqr_words: - .set noreorder - bgtz $a2,bn_sqr_words_internal - move $v0,$zero - jr $ra - move $a0,$v0 -.end bn_sqr_words - -.align 5 -.ent bn_sqr_words_internal -bn_sqr_words_internal: -___ -$code.=<<___ if ($flavour =~ /nubi/i); - .frame $sp,6*$SZREG,$ra - .mask 0x8000f008,-$SZREG - .set noreorder - $PTR_SUB $sp,6*$SZREG - $REG_S $ra,5*$SZREG($sp) - $REG_S $t3,4*$SZREG($sp) - $REG_S $t2,3*$SZREG($sp) - $REG_S $t1,2*$SZREG($sp) - $REG_S $t0,1*$SZREG($sp) - $REG_S $gp,0*$SZREG($sp) -___ -$code.=<<___; - .set reorder - li $minus4,-4 - and $ta0,$a2,$minus4 - beqz $ta0,.L_bn_sqr_words_tail - -.L_bn_sqr_words_loop: - $LD $t0,0($a1) - $MULTU $t0,$t0 - $LD $t2,$BNSZ($a1) - $LD $ta0,2*$BNSZ($a1) - $LD $ta2,3*$BNSZ($a1) - mflo $t1 - mfhi $t0 - $ST $t1,0($a0) - $ST $t0,$BNSZ($a0) - - $MULTU $t2,$t2 - subu $a2,4 - $PTR_ADD $a0,8*$BNSZ - $PTR_ADD $a1,4*$BNSZ - mflo $t3 - mfhi $t2 - $ST $t3,-6*$BNSZ($a0) - $ST $t2,-5*$BNSZ($a0) - - $MULTU $ta0,$ta0 - mflo $ta1 - mfhi $ta0 - $ST $ta1,-4*$BNSZ($a0) - $ST $ta0,-3*$BNSZ($a0) - - - $MULTU $ta2,$ta2 - and $ta0,$a2,$minus4 - mflo $ta3 - mfhi $ta2 - $ST $ta3,-2*$BNSZ($a0) - - .set noreorder - bgtz $ta0,.L_bn_sqr_words_loop - $ST $ta2,-$BNSZ($a0) - - beqz $a2,.L_bn_sqr_words_return - nop - -.L_bn_sqr_words_tail: - .set reorder - $LD $t0,0($a1) - $MULTU $t0,$t0 - subu $a2,1 - mflo $t1 - mfhi $t0 - $ST $t1,0($a0) - $ST $t0,$BNSZ($a0) - beqz $a2,.L_bn_sqr_words_return - - $LD $t0,$BNSZ($a1) - $MULTU $t0,$t0 - subu $a2,1 - mflo $t1 - mfhi $t0 - $ST $t1,2*$BNSZ($a0) - $ST $t0,3*$BNSZ($a0) - beqz $a2,.L_bn_sqr_words_return - - $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$t0 - mflo $t1 - mfhi $t0 - $ST $t1,4*$BNSZ($a0) - $ST $t0,5*$BNSZ($a0) - -.L_bn_sqr_words_return: - .set noreorder -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $t3,4*$SZREG($sp) - $REG_L $t2,3*$SZREG($sp) - $REG_L $t1,2*$SZREG($sp) - $REG_L $t0,1*$SZREG($sp) - $REG_L $gp,0*$SZREG($sp) - $PTR_ADD $sp,6*$SZREG -___ -$code.=<<___; - jr $ra - move $a0,$v0 - -.end bn_sqr_words_internal - -.align 5 -.globl bn_add_words -.ent bn_add_words -bn_add_words: - .set noreorder - bgtz $a3,bn_add_words_internal - move $v0,$zero - jr $ra - move $a0,$v0 -.end bn_add_words - -.align 5 -.ent bn_add_words_internal -bn_add_words_internal: -___ -$code.=<<___ if ($flavour =~ /nubi/i); - .frame $sp,6*$SZREG,$ra - .mask 0x8000f008,-$SZREG - .set noreorder - $PTR_SUB $sp,6*$SZREG - $REG_S $ra,5*$SZREG($sp) - $REG_S $t3,4*$SZREG($sp) - $REG_S $t2,3*$SZREG($sp) - $REG_S $t1,2*$SZREG($sp) - $REG_S $t0,1*$SZREG($sp) - $REG_S $gp,0*$SZREG($sp) -___ -$code.=<<___; - .set reorder - li $minus4,-4 - and $at,$a3,$minus4 - beqz $at,.L_bn_add_words_tail - -.L_bn_add_words_loop: - $LD $t0,0($a1) - $LD $ta0,0($a2) - subu $a3,4 - $LD $t1,$BNSZ($a1) - and $at,$a3,$minus4 - $LD $t2,2*$BNSZ($a1) - $PTR_ADD $a2,4*$BNSZ - $LD $t3,3*$BNSZ($a1) - $PTR_ADD $a0,4*$BNSZ - $LD $ta1,-3*$BNSZ($a2) - $PTR_ADD $a1,4*$BNSZ - $LD $ta2,-2*$BNSZ($a2) - $LD $ta3,-$BNSZ($a2) - $ADDU $ta0,$t0 - sltu $t8,$ta0,$t0 - $ADDU $t0,$ta0,$v0 - sltu $v0,$t0,$ta0 - $ST $t0,-4*$BNSZ($a0) - $ADDU $v0,$t8 - - $ADDU $ta1,$t1 - sltu $t9,$ta1,$t1 - $ADDU $t1,$ta1,$v0 - sltu $v0,$t1,$ta1 - $ST $t1,-3*$BNSZ($a0) - $ADDU $v0,$t9 - - $ADDU $ta2,$t2 - sltu $t8,$ta2,$t2 - $ADDU $t2,$ta2,$v0 - sltu $v0,$t2,$ta2 - $ST $t2,-2*$BNSZ($a0) - $ADDU $v0,$t8 - - $ADDU $ta3,$t3 - sltu $t9,$ta3,$t3 - $ADDU $t3,$ta3,$v0 - sltu $v0,$t3,$ta3 - $ST $t3,-$BNSZ($a0) - - .set noreorder - bgtz $at,.L_bn_add_words_loop - $ADDU $v0,$t9 - - beqz $a3,.L_bn_add_words_return - nop - -.L_bn_add_words_tail: - .set reorder - $LD $t0,0($a1) - $LD $ta0,0($a2) - $ADDU $ta0,$t0 - subu $a3,1 - sltu $t8,$ta0,$t0 - $ADDU $t0,$ta0,$v0 - sltu $v0,$t0,$ta0 - $ST $t0,0($a0) - $ADDU $v0,$t8 - beqz $a3,.L_bn_add_words_return - - $LD $t1,$BNSZ($a1) - $LD $ta1,$BNSZ($a2) - $ADDU $ta1,$t1 - subu $a3,1 - sltu $t9,$ta1,$t1 - $ADDU $t1,$ta1,$v0 - sltu $v0,$t1,$ta1 - $ST $t1,$BNSZ($a0) - $ADDU $v0,$t9 - beqz $a3,.L_bn_add_words_return - - $LD $t2,2*$BNSZ($a1) - $LD $ta2,2*$BNSZ($a2) - $ADDU $ta2,$t2 - sltu $t8,$ta2,$t2 - $ADDU $t2,$ta2,$v0 - sltu $v0,$t2,$ta2 - $ST $t2,2*$BNSZ($a0) - $ADDU $v0,$t8 - -.L_bn_add_words_return: - .set noreorder -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $t3,4*$SZREG($sp) - $REG_L $t2,3*$SZREG($sp) - $REG_L $t1,2*$SZREG($sp) - $REG_L $t0,1*$SZREG($sp) - $REG_L $gp,0*$SZREG($sp) - $PTR_ADD $sp,6*$SZREG -___ -$code.=<<___; - jr $ra - move $a0,$v0 - -.end bn_add_words_internal - -.align 5 -.globl bn_sub_words -.ent bn_sub_words -bn_sub_words: - .set noreorder - bgtz $a3,bn_sub_words_internal - move $v0,$zero - jr $ra - move $a0,$zero -.end bn_sub_words - -.align 5 -.ent bn_sub_words_internal -bn_sub_words_internal: -___ -$code.=<<___ if ($flavour =~ /nubi/i); - .frame $sp,6*$SZREG,$ra - .mask 0x8000f008,-$SZREG - .set noreorder - $PTR_SUB $sp,6*$SZREG - $REG_S $ra,5*$SZREG($sp) - $REG_S $t3,4*$SZREG($sp) - $REG_S $t2,3*$SZREG($sp) - $REG_S $t1,2*$SZREG($sp) - $REG_S $t0,1*$SZREG($sp) - $REG_S $gp,0*$SZREG($sp) -___ -$code.=<<___; - .set reorder - li $minus4,-4 - and $at,$a3,$minus4 - beqz $at,.L_bn_sub_words_tail - -.L_bn_sub_words_loop: - $LD $t0,0($a1) - $LD $ta0,0($a2) - subu $a3,4 - $LD $t1,$BNSZ($a1) - and $at,$a3,$minus4 - $LD $t2,2*$BNSZ($a1) - $PTR_ADD $a2,4*$BNSZ - $LD $t3,3*$BNSZ($a1) - $PTR_ADD $a0,4*$BNSZ - $LD $ta1,-3*$BNSZ($a2) - $PTR_ADD $a1,4*$BNSZ - $LD $ta2,-2*$BNSZ($a2) - $LD $ta3,-$BNSZ($a2) - sltu $t8,$t0,$ta0 - $SUBU $ta0,$t0,$ta0 - $SUBU $t0,$ta0,$v0 - sgtu $v0,$t0,$ta0 - $ST $t0,-4*$BNSZ($a0) - $ADDU $v0,$t8 - - sltu $t9,$t1,$ta1 - $SUBU $ta1,$t1,$ta1 - $SUBU $t1,$ta1,$v0 - sgtu $v0,$t1,$ta1 - $ST $t1,-3*$BNSZ($a0) - $ADDU $v0,$t9 - - - sltu $t8,$t2,$ta2 - $SUBU $ta2,$t2,$ta2 - $SUBU $t2,$ta2,$v0 - sgtu $v0,$t2,$ta2 - $ST $t2,-2*$BNSZ($a0) - $ADDU $v0,$t8 - - sltu $t9,$t3,$ta3 - $SUBU $ta3,$t3,$ta3 - $SUBU $t3,$ta3,$v0 - sgtu $v0,$t3,$ta3 - $ST $t3,-$BNSZ($a0) - - .set noreorder - bgtz $at,.L_bn_sub_words_loop - $ADDU $v0,$t9 - - beqz $a3,.L_bn_sub_words_return - nop - -.L_bn_sub_words_tail: - .set reorder - $LD $t0,0($a1) - $LD $ta0,0($a2) - subu $a3,1 - sltu $t8,$t0,$ta0 - $SUBU $ta0,$t0,$ta0 - $SUBU $t0,$ta0,$v0 - sgtu $v0,$t0,$ta0 - $ST $t0,0($a0) - $ADDU $v0,$t8 - beqz $a3,.L_bn_sub_words_return - - $LD $t1,$BNSZ($a1) - subu $a3,1 - $LD $ta1,$BNSZ($a2) - sltu $t9,$t1,$ta1 - $SUBU $ta1,$t1,$ta1 - $SUBU $t1,$ta1,$v0 - sgtu $v0,$t1,$ta1 - $ST $t1,$BNSZ($a0) - $ADDU $v0,$t9 - beqz $a3,.L_bn_sub_words_return - - $LD $t2,2*$BNSZ($a1) - $LD $ta2,2*$BNSZ($a2) - sltu $t8,$t2,$ta2 - $SUBU $ta2,$t2,$ta2 - $SUBU $t2,$ta2,$v0 - sgtu $v0,$t2,$ta2 - $ST $t2,2*$BNSZ($a0) - $ADDU $v0,$t8 - -.L_bn_sub_words_return: - .set noreorder -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $t3,4*$SZREG($sp) - $REG_L $t2,3*$SZREG($sp) - $REG_L $t1,2*$SZREG($sp) - $REG_L $t0,1*$SZREG($sp) - $REG_L $gp,0*$SZREG($sp) - $PTR_ADD $sp,6*$SZREG -___ -$code.=<<___; - jr $ra - move $a0,$v0 -.end bn_sub_words_internal - -.align 5 -.globl bn_div_3_words -.ent bn_div_3_words -bn_div_3_words: - .set noreorder - move $a3,$a0 # we know that bn_div_words does not - # touch $a3, $ta2, $ta3 and preserves $a2 - # so that we can save two arguments - # and return address in registers - # instead of stack:-) - - $LD $a0,($a3) - move $ta2,$a1 - bne $a0,$a2,bn_div_3_words_internal - $LD $a1,-$BNSZ($a3) - li $v0,-1 - jr $ra - move $a0,$v0 -.end bn_div_3_words - -.align 5 -.ent bn_div_3_words_internal -bn_div_3_words_internal: -___ -$code.=<<___ if ($flavour =~ /nubi/i); - .frame $sp,6*$SZREG,$ra - .mask 0x8000f008,-$SZREG - .set noreorder - $PTR_SUB $sp,6*$SZREG - $REG_S $ra,5*$SZREG($sp) - $REG_S $t3,4*$SZREG($sp) - $REG_S $t2,3*$SZREG($sp) - $REG_S $t1,2*$SZREG($sp) - $REG_S $t0,1*$SZREG($sp) - $REG_S $gp,0*$SZREG($sp) -___ -$code.=<<___; - .set reorder - move $ta3,$ra - bal bn_div_words_internal - move $ra,$ta3 - $MULTU $ta2,$v0 - $LD $t2,-2*$BNSZ($a3) - move $ta0,$zero - mfhi $t1 - mflo $t0 - sltu $t8,$t1,$a1 -.L_bn_div_3_words_inner_loop: - bnez $t8,.L_bn_div_3_words_inner_loop_done - sgeu $at,$t2,$t0 - seq $t9,$t1,$a1 - and $at,$t9 - sltu $t3,$t0,$ta2 - $ADDU $a1,$a2 - $SUBU $t1,$t3 - $SUBU $t0,$ta2 - sltu $t8,$t1,$a1 - sltu $ta0,$a1,$a2 - or $t8,$ta0 - .set noreorder - beqz $at,.L_bn_div_3_words_inner_loop - $SUBU $v0,1 - $ADDU $v0,1 - .set reorder -.L_bn_div_3_words_inner_loop_done: - .set noreorder -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $t3,4*$SZREG($sp) - $REG_L $t2,3*$SZREG($sp) - $REG_L $t1,2*$SZREG($sp) - $REG_L $t0,1*$SZREG($sp) - $REG_L $gp,0*$SZREG($sp) - $PTR_ADD $sp,6*$SZREG -___ -$code.=<<___; - jr $ra - move $a0,$v0 -.end bn_div_3_words_internal - -.align 5 -.globl bn_div_words -.ent bn_div_words -bn_div_words: - .set noreorder - bnez $a2,bn_div_words_internal - li $v0,-1 # I would rather signal div-by-zero - # which can be done with 'break 7' - jr $ra - move $a0,$v0 -.end bn_div_words - -.align 5 -.ent bn_div_words_internal -bn_div_words_internal: -___ -$code.=<<___ if ($flavour =~ /nubi/i); - .frame $sp,6*$SZREG,$ra - .mask 0x8000f008,-$SZREG - .set noreorder - $PTR_SUB $sp,6*$SZREG - $REG_S $ra,5*$SZREG($sp) - $REG_S $t3,4*$SZREG($sp) - $REG_S $t2,3*$SZREG($sp) - $REG_S $t1,2*$SZREG($sp) - $REG_S $t0,1*$SZREG($sp) - $REG_S $gp,0*$SZREG($sp) -___ -$code.=<<___; - move $v1,$zero - bltz $a2,.L_bn_div_words_body - move $t9,$v1 - $SLL $a2,1 - bgtz $a2,.-4 - addu $t9,1 - - .set reorder - negu $t1,$t9 - li $t2,-1 - $SLL $t2,$t1 - and $t2,$a0 - $SRL $at,$a1,$t1 - .set noreorder - beqz $t2,.+12 - nop - break 6 # signal overflow - .set reorder - $SLL $a0,$t9 - $SLL $a1,$t9 - or $a0,$at -___ -$QT=$ta0; -$HH=$ta1; -$DH=$v1; -$code.=<<___; -.L_bn_div_words_body: - $SRL $DH,$a2,4*$BNSZ # bits - sgeu $at,$a0,$a2 - .set noreorder - beqz $at,.+12 - nop - $SUBU $a0,$a2 - .set reorder - - li $QT,-1 - $SRL $HH,$a0,4*$BNSZ # bits - $SRL $QT,4*$BNSZ # q=0xffffffff - beq $DH,$HH,.L_bn_div_words_skip_div1 - $DIVU $zero,$a0,$DH - mflo $QT -.L_bn_div_words_skip_div1: - $MULTU $a2,$QT - $SLL $t3,$a0,4*$BNSZ # bits - $SRL $at,$a1,4*$BNSZ # bits - or $t3,$at - mflo $t0 - mfhi $t1 -.L_bn_div_words_inner_loop1: - sltu $t2,$t3,$t0 - seq $t8,$HH,$t1 - sltu $at,$HH,$t1 - and $t2,$t8 - sltu $v0,$t0,$a2 - or $at,$t2 - .set noreorder - beqz $at,.L_bn_div_words_inner_loop1_done - $SUBU $t1,$v0 - $SUBU $t0,$a2 - b .L_bn_div_words_inner_loop1 - $SUBU $QT,1 - .set reorder -.L_bn_div_words_inner_loop1_done: - - $SLL $a1,4*$BNSZ # bits - $SUBU $a0,$t3,$t0 - $SLL $v0,$QT,4*$BNSZ # bits - - li $QT,-1 - $SRL $HH,$a0,4*$BNSZ # bits - $SRL $QT,4*$BNSZ # q=0xffffffff - beq $DH,$HH,.L_bn_div_words_skip_div2 - $DIVU $zero,$a0,$DH - mflo $QT -.L_bn_div_words_skip_div2: - $MULTU $a2,$QT - $SLL $t3,$a0,4*$BNSZ # bits - $SRL $at,$a1,4*$BNSZ # bits - or $t3,$at - mflo $t0 - mfhi $t1 -.L_bn_div_words_inner_loop2: - sltu $t2,$t3,$t0 - seq $t8,$HH,$t1 - sltu $at,$HH,$t1 - and $t2,$t8 - sltu $v1,$t0,$a2 - or $at,$t2 - .set noreorder - beqz $at,.L_bn_div_words_inner_loop2_done - $SUBU $t1,$v1 - $SUBU $t0,$a2 - b .L_bn_div_words_inner_loop2 - $SUBU $QT,1 - .set reorder -.L_bn_div_words_inner_loop2_done: - - $SUBU $a0,$t3,$t0 - or $v0,$QT - $SRL $v1,$a0,$t9 # $v1 contains remainder if anybody wants it - $SRL $a2,$t9 # restore $a2 - - .set noreorder - move $a1,$v1 -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $t3,4*$SZREG($sp) - $REG_L $t2,3*$SZREG($sp) - $REG_L $t1,2*$SZREG($sp) - $REG_L $t0,1*$SZREG($sp) - $REG_L $gp,0*$SZREG($sp) - $PTR_ADD $sp,6*$SZREG -___ -$code.=<<___; - jr $ra - move $a0,$v0 -.end bn_div_words_internal -___ -undef $HH; undef $QT; undef $DH; - -($a_0,$a_1,$a_2,$a_3)=($t0,$t1,$t2,$t3); -($b_0,$b_1,$b_2,$b_3)=($ta0,$ta1,$ta2,$ta3); - -($a_4,$a_5,$a_6,$a_7)=($s0,$s2,$s4,$a1); # once we load a[7], no use for $a1 -($b_4,$b_5,$b_6,$b_7)=($s1,$s3,$s5,$a2); # once we load b[7], no use for $a2 - -($t_1,$t_2,$c_1,$c_2,$c_3)=($t8,$t9,$v0,$v1,$a3); - -$code.=<<___; - -.align 5 -.globl bn_mul_comba8 -.ent bn_mul_comba8 -bn_mul_comba8: - .set noreorder -___ -$code.=<<___ if ($flavour =~ /nubi/i); - .frame $sp,12*$SZREG,$ra - .mask 0x803ff008,-$SZREG - $PTR_SUB $sp,12*$SZREG - $REG_S $ra,11*$SZREG($sp) - $REG_S $s5,10*$SZREG($sp) - $REG_S $s4,9*$SZREG($sp) - $REG_S $s3,8*$SZREG($sp) - $REG_S $s2,7*$SZREG($sp) - $REG_S $s1,6*$SZREG($sp) - $REG_S $s0,5*$SZREG($sp) - $REG_S $t3,4*$SZREG($sp) - $REG_S $t2,3*$SZREG($sp) - $REG_S $t1,2*$SZREG($sp) - $REG_S $t0,1*$SZREG($sp) - $REG_S $gp,0*$SZREG($sp) -___ -$code.=<<___ if ($flavour !~ /nubi/i); - .frame $sp,6*$SZREG,$ra - .mask 0x003f0000,-$SZREG - $PTR_SUB $sp,6*$SZREG - $REG_S $s5,5*$SZREG($sp) - $REG_S $s4,4*$SZREG($sp) - $REG_S $s3,3*$SZREG($sp) - $REG_S $s2,2*$SZREG($sp) - $REG_S $s1,1*$SZREG($sp) - $REG_S $s0,0*$SZREG($sp) -___ -$code.=<<___; - - .set reorder - $LD $a_0,0($a1) # If compiled with -mips3 option on - # R5000 box assembler barks on this - # 1ine with "should not have mult/div - # as last instruction in bb (R10K - # bug)" warning. If anybody out there - # has a clue about how to circumvent - # this do send me a note. - # - - $LD $b_0,0($a2) - $LD $a_1,$BNSZ($a1) - $LD $a_2,2*$BNSZ($a1) - $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); - $LD $a_3,3*$BNSZ($a1) - $LD $b_1,$BNSZ($a2) - $LD $b_2,2*$BNSZ($a2) - $LD $b_3,3*$BNSZ($a2) - mflo $c_1 - mfhi $c_2 - - $LD $a_4,4*$BNSZ($a1) - $LD $a_5,5*$BNSZ($a1) - $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); - $LD $a_6,6*$BNSZ($a1) - $LD $a_7,7*$BNSZ($a1) - $LD $b_4,4*$BNSZ($a2) - $LD $b_5,5*$BNSZ($a2) - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); - $ADDU $c_3,$t_2,$at - $LD $b_6,6*$BNSZ($a2) - $LD $b_7,7*$BNSZ($a2) - $ST $c_1,0($a0) # r[0]=c1; - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $c_1,$c_3,$t_2 - $ST $c_2,$BNSZ($a0) # r[1]=c2; - - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - $ST $c_3,2*$BNSZ($a0) # r[2]=c3; - - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - $ST $c_1,3*$BNSZ($a0) # r[3]=c1; - - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - $ST $c_2,4*$BNSZ($a0) # r[4]=c2; - - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - $ST $c_3,5*$BNSZ($a0) # r[5]=c3; - - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - $ST $c_1,6*$BNSZ($a0) # r[6]=c1; - - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - $ST $c_2,7*$BNSZ($a0) # r[7]=c2; - - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - $ST $c_3,8*$BNSZ($a0) # r[8]=c3; - - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - $ST $c_1,9*$BNSZ($a0) # r[9]=c1; - - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - $ST $c_2,10*$BNSZ($a0) # r[10]=c2; - - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - $ST $c_3,11*$BNSZ($a0) # r[11]=c3; - - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - $ST $c_1,12*$BNSZ($a0) # r[12]=c1; - - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - $ST $c_2,13*$BNSZ($a0) # r[13]=c2; - - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - $ST $c_3,14*$BNSZ($a0) # r[14]=c3; - $ST $c_1,15*$BNSZ($a0) # r[15]=c1; - - .set noreorder -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $s5,10*$SZREG($sp) - $REG_L $s4,9*$SZREG($sp) - $REG_L $s3,8*$SZREG($sp) - $REG_L $s2,7*$SZREG($sp) - $REG_L $s1,6*$SZREG($sp) - $REG_L $s0,5*$SZREG($sp) - $REG_L $t3,4*$SZREG($sp) - $REG_L $t2,3*$SZREG($sp) - $REG_L $t1,2*$SZREG($sp) - $REG_L $t0,1*$SZREG($sp) - $REG_L $gp,0*$SZREG($sp) - jr $ra - $PTR_ADD $sp,12*$SZREG -___ -$code.=<<___ if ($flavour !~ /nubi/i); - $REG_L $s5,5*$SZREG($sp) - $REG_L $s4,4*$SZREG($sp) - $REG_L $s3,3*$SZREG($sp) - $REG_L $s2,2*$SZREG($sp) - $REG_L $s1,1*$SZREG($sp) - $REG_L $s0,0*$SZREG($sp) - jr $ra - $PTR_ADD $sp,6*$SZREG -___ -$code.=<<___; -.end bn_mul_comba8 - -.align 5 -.globl bn_mul_comba4 -.ent bn_mul_comba4 -bn_mul_comba4: -___ -$code.=<<___ if ($flavour =~ /nubi/i); - .frame $sp,6*$SZREG,$ra - .mask 0x8000f008,-$SZREG - .set noreorder - $PTR_SUB $sp,6*$SZREG - $REG_S $ra,5*$SZREG($sp) - $REG_S $t3,4*$SZREG($sp) - $REG_S $t2,3*$SZREG($sp) - $REG_S $t1,2*$SZREG($sp) - $REG_S $t0,1*$SZREG($sp) - $REG_S $gp,0*$SZREG($sp) -___ -$code.=<<___; - .set reorder - $LD $a_0,0($a1) - $LD $b_0,0($a2) - $LD $a_1,$BNSZ($a1) - $LD $a_2,2*$BNSZ($a1) - $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); - $LD $a_3,3*$BNSZ($a1) - $LD $b_1,$BNSZ($a2) - $LD $b_2,2*$BNSZ($a2) - $LD $b_3,3*$BNSZ($a2) - mflo $c_1 - mfhi $c_2 - $ST $c_1,0($a0) - - $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); - $ADDU $c_3,$t_2,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $c_1,$c_3,$t_2 - $ST $c_2,$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - $ST $c_3,2*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - $ST $c_1,3*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - $ST $c_2,4*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - $ST $c_3,5*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - $ST $c_1,6*$BNSZ($a0) - $ST $c_2,7*$BNSZ($a0) - - .set noreorder -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $t3,4*$SZREG($sp) - $REG_L $t2,3*$SZREG($sp) - $REG_L $t1,2*$SZREG($sp) - $REG_L $t0,1*$SZREG($sp) - $REG_L $gp,0*$SZREG($sp) - $PTR_ADD $sp,6*$SZREG -___ -$code.=<<___; - jr $ra - nop -.end bn_mul_comba4 -___ - -($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); - -sub add_c2 () { -my ($hi,$lo,$c0,$c1,$c2, - $warm, # !$warm denotes first call with specific sequence of - # $c_[XYZ] when there is no Z-carry to accumulate yet; - $an,$bn # these two are arguments for multiplication which - # result is used in *next* step [which is why it's - # commented as "forward multiplication" below]; - )=@_; -$code.=<<___; - mflo $lo - mfhi $hi - $ADDU $c0,$lo - sltu $at,$c0,$lo - $MULTU $an,$bn # forward multiplication - $ADDU $c0,$lo - $ADDU $at,$hi - sltu $lo,$c0,$lo - $ADDU $c1,$at - $ADDU $hi,$lo -___ -$code.=<<___ if (!$warm); - sltu $c2,$c1,$at - $ADDU $c1,$hi - sltu $hi,$c1,$hi - $ADDU $c2,$hi -___ -$code.=<<___ if ($warm); - sltu $at,$c1,$at - $ADDU $c1,$hi - $ADDU $c2,$at - sltu $hi,$c1,$hi - $ADDU $c2,$hi -___ -} - -$code.=<<___; - -.align 5 -.globl bn_sqr_comba8 -.ent bn_sqr_comba8 -bn_sqr_comba8: -___ -$code.=<<___ if ($flavour =~ /nubi/i); - .frame $sp,6*$SZREG,$ra - .mask 0x8000f008,-$SZREG - .set noreorder - $PTR_SUB $sp,6*$SZREG - $REG_S $ra,5*$SZREG($sp) - $REG_S $t3,4*$SZREG($sp) - $REG_S $t2,3*$SZREG($sp) - $REG_S $t1,2*$SZREG($sp) - $REG_S $t0,1*$SZREG($sp) - $REG_S $gp,0*$SZREG($sp) -___ -$code.=<<___; - .set reorder - $LD $a_0,0($a1) - $LD $a_1,$BNSZ($a1) - $LD $a_2,2*$BNSZ($a1) - $LD $a_3,3*$BNSZ($a1) - - $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); - $LD $a_4,4*$BNSZ($a1) - $LD $a_5,5*$BNSZ($a1) - $LD $a_6,6*$BNSZ($a1) - $LD $a_7,7*$BNSZ($a1) - mflo $c_1 - mfhi $c_2 - $ST $c_1,0($a0) - - $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); - mflo $t_1 - mfhi $t_2 - slt $c_1,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $c_3,$t_2,$at - $ST $c_2,$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, - $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); -$code.=<<___; - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - $ST $c_3,2*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, - $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); - &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, - $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1); -$code.=<<___; - $ST $c_1,3*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, - $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); - &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, - $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); -$code.=<<___; - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - $ST $c_2,4*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, - $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); - &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, - $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2); - &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, - $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3); -$code.=<<___; - $ST $c_3,5*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, - $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3); - &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, - $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3); - &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, - $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); -$code.=<<___; - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - $ST $c_1,6*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, - $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); - &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, - $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1); - &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, - $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1); - &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, - $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2); -$code.=<<___; - $ST $c_2,7*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, - $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2); - &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, - $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2); - &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, - $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); -$code.=<<___; - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - $ST $c_3,8*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, - $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); - &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, - $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3); - &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, - $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1); -$code.=<<___; - $ST $c_1,9*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, - $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1); - &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, - $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); -$code.=<<___; - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - $ST $c_2,10*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, - $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); - &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, - $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3); -$code.=<<___; - $ST $c_3,11*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, - $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); -$code.=<<___; - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1); - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - $ST $c_1,12*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, - $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); -$code.=<<___; - $ST $c_2,13*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - $ST $c_3,14*$BNSZ($a0) - $ST $c_1,15*$BNSZ($a0) - - .set noreorder -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $t3,4*$SZREG($sp) - $REG_L $t2,3*$SZREG($sp) - $REG_L $t1,2*$SZREG($sp) - $REG_L $t0,1*$SZREG($sp) - $REG_L $gp,0*$SZREG($sp) - $PTR_ADD $sp,6*$SZREG -___ -$code.=<<___; - jr $ra - nop -.end bn_sqr_comba8 - -.align 5 -.globl bn_sqr_comba4 -.ent bn_sqr_comba4 -bn_sqr_comba4: -___ -$code.=<<___ if ($flavour =~ /nubi/i); - .frame $sp,6*$SZREG,$ra - .mask 0x8000f008,-$SZREG - .set noreorder - $PTR_SUB $sp,6*$SZREG - $REG_S $ra,5*$SZREG($sp) - $REG_S $t3,4*$SZREG($sp) - $REG_S $t2,3*$SZREG($sp) - $REG_S $t1,2*$SZREG($sp) - $REG_S $t0,1*$SZREG($sp) - $REG_S $gp,0*$SZREG($sp) -___ -$code.=<<___; - .set reorder - $LD $a_0,0($a1) - $LD $a_1,$BNSZ($a1) - $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); - $LD $a_2,2*$BNSZ($a1) - $LD $a_3,3*$BNSZ($a1) - mflo $c_1 - mfhi $c_2 - $ST $c_1,0($a0) - - $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); - mflo $t_1 - mfhi $t_2 - slt $c_1,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $c_3,$t_2,$at - $ST $c_2,$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, - $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); -$code.=<<___; - mflo $t_1 - mfhi $t_2 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - $ST $c_3,2*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, - $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); - &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, - $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); -$code.=<<___; - $ST $c_1,3*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, - $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); -$code.=<<___; - mflo $t_1 - mfhi $t_2 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - $ST $c_2,4*$BNSZ($a0) -___ - &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, - $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); -$code.=<<___; - $ST $c_3,5*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - $ST $c_1,6*$BNSZ($a0) - $ST $c_2,7*$BNSZ($a0) - - .set noreorder -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $t3,4*$SZREG($sp) - $REG_L $t2,3*$SZREG($sp) - $REG_L $t1,2*$SZREG($sp) - $REG_L $t0,1*$SZREG($sp) - $REG_L $gp,0*$SZREG($sp) - $PTR_ADD $sp,6*$SZREG -___ -$code.=<<___; - jr $ra - nop -.end bn_sqr_comba4 -___ -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/mips3-mont.pl b/drivers/builtin_openssl2/crypto/bn/asm/mips3-mont.pl deleted file mode 100644 index 8f9156e02a..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/mips3-mont.pl +++ /dev/null @@ -1,327 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# This module doesn't present direct interest for OpenSSL, because it -# doesn't provide better performance for longer keys. While 512-bit -# RSA private key operations are 40% faster, 1024-bit ones are hardly -# faster at all, while longer key operations are slower by up to 20%. -# It might be of interest to embedded system developers though, as -# it's smaller than 1KB, yet offers ~3x improvement over compiler -# generated code. -# -# The module targets N32 and N64 MIPS ABIs and currently is a bit -# IRIX-centric, i.e. is likely to require adaptation for other OSes. - -# int bn_mul_mont( -$rp="a0"; # BN_ULONG *rp, -$ap="a1"; # const BN_ULONG *ap, -$bp="a2"; # const BN_ULONG *bp, -$np="a3"; # const BN_ULONG *np, -$n0="a4"; # const BN_ULONG *n0, -$num="a5"; # int num); - -$lo0="a6"; -$hi0="a7"; -$lo1="v0"; -$hi1="v1"; -$aj="t0"; -$bi="t1"; -$nj="t2"; -$tp="t3"; -$alo="s0"; -$ahi="s1"; -$nlo="s2"; -$nhi="s3"; -$tj="s4"; -$i="s5"; -$j="s6"; -$fp="t8"; -$m1="t9"; - -$FRAME=8*(2+8); - -$code=<<___; -#include -#include - -.text - -.set noat -.set reorder - -.align 5 -.globl bn_mul_mont -.ent bn_mul_mont -bn_mul_mont: - .set noreorder - PTR_SUB sp,64 - move $fp,sp - .frame $fp,64,ra - slt AT,$num,4 - li v0,0 - beqzl AT,.Lproceed - nop - jr ra - PTR_ADD sp,$fp,64 - .set reorder -.align 5 -.Lproceed: - ld $n0,0($n0) - ld $bi,0($bp) # bp[0] - ld $aj,0($ap) # ap[0] - ld $nj,0($np) # np[0] - PTR_SUB sp,16 # place for two extra words - sll $num,3 - li AT,-4096 - PTR_SUB sp,$num - and sp,AT - - sd s0,0($fp) - sd s1,8($fp) - sd s2,16($fp) - sd s3,24($fp) - sd s4,32($fp) - sd s5,40($fp) - sd s6,48($fp) - sd s7,56($fp) - - dmultu $aj,$bi - ld $alo,8($ap) - ld $nlo,8($np) - mflo $lo0 - mfhi $hi0 - dmultu $lo0,$n0 - mflo $m1 - - dmultu $alo,$bi - mflo $alo - mfhi $ahi - - dmultu $nj,$m1 - mflo $lo1 - mfhi $hi1 - dmultu $nlo,$m1 - daddu $lo1,$lo0 - sltu AT,$lo1,$lo0 - daddu $hi1,AT - mflo $nlo - mfhi $nhi - - move $tp,sp - li $j,16 -.align 4 -.L1st: - .set noreorder - PTR_ADD $aj,$ap,$j - ld $aj,($aj) - PTR_ADD $nj,$np,$j - ld $nj,($nj) - - dmultu $aj,$bi - daddu $lo0,$alo,$hi0 - daddu $lo1,$nlo,$hi1 - sltu AT,$lo0,$hi0 - sltu s7,$lo1,$hi1 - daddu $hi0,$ahi,AT - daddu $hi1,$nhi,s7 - mflo $alo - mfhi $ahi - - daddu $lo1,$lo0 - sltu AT,$lo1,$lo0 - dmultu $nj,$m1 - daddu $hi1,AT - addu $j,8 - sd $lo1,($tp) - sltu s7,$j,$num - mflo $nlo - mfhi $nhi - - bnez s7,.L1st - PTR_ADD $tp,8 - .set reorder - - daddu $lo0,$alo,$hi0 - sltu AT,$lo0,$hi0 - daddu $hi0,$ahi,AT - - daddu $lo1,$nlo,$hi1 - sltu s7,$lo1,$hi1 - daddu $hi1,$nhi,s7 - daddu $lo1,$lo0 - sltu AT,$lo1,$lo0 - daddu $hi1,AT - - sd $lo1,($tp) - - daddu $hi1,$hi0 - sltu AT,$hi1,$hi0 - sd $hi1,8($tp) - sd AT,16($tp) - - li $i,8 -.align 4 -.Louter: - PTR_ADD $bi,$bp,$i - ld $bi,($bi) - ld $aj,($ap) - ld $alo,8($ap) - ld $tj,(sp) - - dmultu $aj,$bi - ld $nj,($np) - ld $nlo,8($np) - mflo $lo0 - mfhi $hi0 - daddu $lo0,$tj - dmultu $lo0,$n0 - sltu AT,$lo0,$tj - daddu $hi0,AT - mflo $m1 - - dmultu $alo,$bi - mflo $alo - mfhi $ahi - - dmultu $nj,$m1 - mflo $lo1 - mfhi $hi1 - - dmultu $nlo,$m1 - daddu $lo1,$lo0 - sltu AT,$lo1,$lo0 - daddu $hi1,AT - mflo $nlo - mfhi $nhi - - move $tp,sp - li $j,16 - ld $tj,8($tp) -.align 4 -.Linner: - .set noreorder - PTR_ADD $aj,$ap,$j - ld $aj,($aj) - PTR_ADD $nj,$np,$j - ld $nj,($nj) - - dmultu $aj,$bi - daddu $lo0,$alo,$hi0 - daddu $lo1,$nlo,$hi1 - sltu AT,$lo0,$hi0 - sltu s7,$lo1,$hi1 - daddu $hi0,$ahi,AT - daddu $hi1,$nhi,s7 - mflo $alo - mfhi $ahi - - daddu $lo0,$tj - addu $j,8 - dmultu $nj,$m1 - sltu AT,$lo0,$tj - daddu $lo1,$lo0 - daddu $hi0,AT - sltu s7,$lo1,$lo0 - ld $tj,16($tp) - daddu $hi1,s7 - sltu AT,$j,$num - mflo $nlo - mfhi $nhi - sd $lo1,($tp) - bnez AT,.Linner - PTR_ADD $tp,8 - .set reorder - - daddu $lo0,$alo,$hi0 - sltu AT,$lo0,$hi0 - daddu $hi0,$ahi,AT - daddu $lo0,$tj - sltu s7,$lo0,$tj - daddu $hi0,s7 - - ld $tj,16($tp) - daddu $lo1,$nlo,$hi1 - sltu AT,$lo1,$hi1 - daddu $hi1,$nhi,AT - daddu $lo1,$lo0 - sltu s7,$lo1,$lo0 - daddu $hi1,s7 - sd $lo1,($tp) - - daddu $lo1,$hi1,$hi0 - sltu $hi1,$lo1,$hi0 - daddu $lo1,$tj - sltu AT,$lo1,$tj - daddu $hi1,AT - sd $lo1,8($tp) - sd $hi1,16($tp) - - addu $i,8 - sltu s7,$i,$num - bnez s7,.Louter - - .set noreorder - PTR_ADD $tj,sp,$num # &tp[num] - move $tp,sp - move $ap,sp - li $hi0,0 # clear borrow bit - -.align 4 -.Lsub: ld $lo0,($tp) - ld $lo1,($np) - PTR_ADD $tp,8 - PTR_ADD $np,8 - dsubu $lo1,$lo0,$lo1 # tp[i]-np[i] - sgtu AT,$lo1,$lo0 - dsubu $lo0,$lo1,$hi0 - sgtu $hi0,$lo0,$lo1 - sd $lo0,($rp) - or $hi0,AT - sltu AT,$tp,$tj - bnez AT,.Lsub - PTR_ADD $rp,8 - - dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit - move $tp,sp - PTR_SUB $rp,$num # restore rp - not $hi1,$hi0 - - and $ap,$hi0,sp - and $bp,$hi1,$rp - or $ap,$ap,$bp # ap=borrow?tp:rp - -.align 4 -.Lcopy: ld $aj,($ap) - PTR_ADD $ap,8 - PTR_ADD $tp,8 - sd zero,-8($tp) - sltu AT,$tp,$tj - sd $aj,($rp) - bnez AT,.Lcopy - PTR_ADD $rp,8 - - ld s0,0($fp) - ld s1,8($fp) - ld s2,16($fp) - ld s3,24($fp) - ld s4,32($fp) - ld s5,40($fp) - ld s6,48($fp) - ld s7,56($fp) - li v0,1 - jr ra - PTR_ADD sp,$fp,64 - .set reorder -END(bn_mul_mont) -.rdata -.asciiz "Montgomery Multiplication for MIPS III/IV, CRYPTOGAMS by " -___ - -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/mips3.s b/drivers/builtin_openssl2/crypto/bn/asm/mips3.s deleted file mode 100644 index dca4105c7d..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/mips3.s +++ /dev/null @@ -1,2201 +0,0 @@ -.rdata -.asciiz "mips3.s, Version 1.1" -.asciiz "MIPS III/IV ISA artwork by Andy Polyakov " - -/* - * ==================================================================== - * Written by Andy Polyakov for the OpenSSL - * project. - * - * Rights for redistribution and usage in source and binary forms are - * granted according to the OpenSSL license. Warranty of any kind is - * disclaimed. - * ==================================================================== - */ - -/* - * This is my modest contributon to the OpenSSL project (see - * http://www.openssl.org/ for more information about it) and is - * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c - * module. For updates see http://fy.chalmers.se/~appro/hpe/. - * - * The module is designed to work with either of the "new" MIPS ABI(5), - * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under - * IRIX 5.x not only because it doesn't support new ABIs but also - * because 5.x kernels put R4x00 CPU into 32-bit mode and all those - * 64-bit instructions (daddu, dmultu, etc.) found below gonna only - * cause illegal instruction exception:-( - * - * In addition the code depends on preprocessor flags set up by MIPSpro - * compiler driver (either as or cc) and therefore (probably?) can't be - * compiled by the GNU assembler. GNU C driver manages fine though... - * I mean as long as -mmips-as is specified or is the default option, - * because then it simply invokes /usr/bin/as which in turn takes - * perfect care of the preprocessor definitions. Another neat feature - * offered by the MIPSpro assembler is an optimization pass. This gave - * me the opportunity to have the code looking more regular as all those - * architecture dependent instruction rescheduling details were left to - * the assembler. Cool, huh? - * - * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' - * goes way over 3 times faster! - * - * - */ -#include -#include - -#if _MIPS_ISA>=4 -#define MOVNZ(cond,dst,src) \ - movn dst,src,cond -#else -#define MOVNZ(cond,dst,src) \ - .set noreorder; \ - bnezl cond,.+8; \ - move dst,src; \ - .set reorder -#endif - -.text - -.set noat -.set reorder - -#define MINUS4 v1 - -.align 5 -LEAF(bn_mul_add_words) - .set noreorder - bgtzl a2,.L_bn_mul_add_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_mul_add_words_proceed: - li MINUS4,-4 - and ta0,a2,MINUS4 - move v0,zero - beqz ta0,.L_bn_mul_add_words_tail - -.L_bn_mul_add_words_loop: - dmultu t0,a3 - ld t1,0(a0) - ld t2,8(a1) - ld t3,8(a0) - ld ta0,16(a1) - ld ta1,16(a0) - daddu t1,v0 - sltu v0,t1,v0 /* All manuals say it "compares 32-bit - * values", but it seems to work fine - * even on 64-bit registers. */ - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,0(a0) - daddu v0,AT - - dmultu t2,a3 - ld ta2,24(a1) - ld ta3,24(a0) - daddu t3,v0 - sltu v0,t3,v0 - mflo AT - mfhi t2 - daddu t3,AT - daddu v0,t2 - sltu AT,t3,AT - sd t3,8(a0) - daddu v0,AT - - dmultu ta0,a3 - subu a2,4 - PTR_ADD a0,32 - PTR_ADD a1,32 - daddu ta1,v0 - sltu v0,ta1,v0 - mflo AT - mfhi ta0 - daddu ta1,AT - daddu v0,ta0 - sltu AT,ta1,AT - sd ta1,-16(a0) - daddu v0,AT - - - dmultu ta2,a3 - and ta0,a2,MINUS4 - daddu ta3,v0 - sltu v0,ta3,v0 - mflo AT - mfhi ta2 - daddu ta3,AT - daddu v0,ta2 - sltu AT,ta3,AT - sd ta3,-8(a0) - daddu v0,AT - .set noreorder - bgtzl ta0,.L_bn_mul_add_words_loop - ld t0,0(a1) - - bnezl a2,.L_bn_mul_add_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_mul_add_words_return: - jr ra - -.L_bn_mul_add_words_tail: - dmultu t0,a3 - ld t1,0(a0) - subu a2,1 - daddu t1,v0 - sltu v0,t1,v0 - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,0(a0) - daddu v0,AT - beqz a2,.L_bn_mul_add_words_return - - ld t0,8(a1) - dmultu t0,a3 - ld t1,8(a0) - subu a2,1 - daddu t1,v0 - sltu v0,t1,v0 - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,8(a0) - daddu v0,AT - beqz a2,.L_bn_mul_add_words_return - - ld t0,16(a1) - dmultu t0,a3 - ld t1,16(a0) - daddu t1,v0 - sltu v0,t1,v0 - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,16(a0) - daddu v0,AT - jr ra -END(bn_mul_add_words) - -.align 5 -LEAF(bn_mul_words) - .set noreorder - bgtzl a2,.L_bn_mul_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_mul_words_proceed: - li MINUS4,-4 - and ta0,a2,MINUS4 - move v0,zero - beqz ta0,.L_bn_mul_words_tail - -.L_bn_mul_words_loop: - dmultu t0,a3 - ld t2,8(a1) - ld ta0,16(a1) - ld ta2,24(a1) - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,0(a0) - daddu v0,t1,t0 - - dmultu t2,a3 - subu a2,4 - PTR_ADD a0,32 - PTR_ADD a1,32 - mflo AT - mfhi t2 - daddu v0,AT - sltu t3,v0,AT - sd v0,-24(a0) - daddu v0,t3,t2 - - dmultu ta0,a3 - mflo AT - mfhi ta0 - daddu v0,AT - sltu ta1,v0,AT - sd v0,-16(a0) - daddu v0,ta1,ta0 - - - dmultu ta2,a3 - and ta0,a2,MINUS4 - mflo AT - mfhi ta2 - daddu v0,AT - sltu ta3,v0,AT - sd v0,-8(a0) - daddu v0,ta3,ta2 - .set noreorder - bgtzl ta0,.L_bn_mul_words_loop - ld t0,0(a1) - - bnezl a2,.L_bn_mul_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_mul_words_return: - jr ra - -.L_bn_mul_words_tail: - dmultu t0,a3 - subu a2,1 - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,0(a0) - daddu v0,t1,t0 - beqz a2,.L_bn_mul_words_return - - ld t0,8(a1) - dmultu t0,a3 - subu a2,1 - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,8(a0) - daddu v0,t1,t0 - beqz a2,.L_bn_mul_words_return - - ld t0,16(a1) - dmultu t0,a3 - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,16(a0) - daddu v0,t1,t0 - jr ra -END(bn_mul_words) - -.align 5 -LEAF(bn_sqr_words) - .set noreorder - bgtzl a2,.L_bn_sqr_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_sqr_words_proceed: - li MINUS4,-4 - and ta0,a2,MINUS4 - move v0,zero - beqz ta0,.L_bn_sqr_words_tail - -.L_bn_sqr_words_loop: - dmultu t0,t0 - ld t2,8(a1) - ld ta0,16(a1) - ld ta2,24(a1) - mflo t1 - mfhi t0 - sd t1,0(a0) - sd t0,8(a0) - - dmultu t2,t2 - subu a2,4 - PTR_ADD a0,64 - PTR_ADD a1,32 - mflo t3 - mfhi t2 - sd t3,-48(a0) - sd t2,-40(a0) - - dmultu ta0,ta0 - mflo ta1 - mfhi ta0 - sd ta1,-32(a0) - sd ta0,-24(a0) - - - dmultu ta2,ta2 - and ta0,a2,MINUS4 - mflo ta3 - mfhi ta2 - sd ta3,-16(a0) - sd ta2,-8(a0) - - .set noreorder - bgtzl ta0,.L_bn_sqr_words_loop - ld t0,0(a1) - - bnezl a2,.L_bn_sqr_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_sqr_words_return: - move v0,zero - jr ra - -.L_bn_sqr_words_tail: - dmultu t0,t0 - subu a2,1 - mflo t1 - mfhi t0 - sd t1,0(a0) - sd t0,8(a0) - beqz a2,.L_bn_sqr_words_return - - ld t0,8(a1) - dmultu t0,t0 - subu a2,1 - mflo t1 - mfhi t0 - sd t1,16(a0) - sd t0,24(a0) - beqz a2,.L_bn_sqr_words_return - - ld t0,16(a1) - dmultu t0,t0 - mflo t1 - mfhi t0 - sd t1,32(a0) - sd t0,40(a0) - jr ra -END(bn_sqr_words) - -.align 5 -LEAF(bn_add_words) - .set noreorder - bgtzl a3,.L_bn_add_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_add_words_proceed: - li MINUS4,-4 - and AT,a3,MINUS4 - move v0,zero - beqz AT,.L_bn_add_words_tail - -.L_bn_add_words_loop: - ld ta0,0(a2) - subu a3,4 - ld t1,8(a1) - and AT,a3,MINUS4 - ld t2,16(a1) - PTR_ADD a2,32 - ld t3,24(a1) - PTR_ADD a0,32 - ld ta1,-24(a2) - PTR_ADD a1,32 - ld ta2,-16(a2) - ld ta3,-8(a2) - daddu ta0,t0 - sltu t8,ta0,t0 - daddu t0,ta0,v0 - sltu v0,t0,ta0 - sd t0,-32(a0) - daddu v0,t8 - - daddu ta1,t1 - sltu t9,ta1,t1 - daddu t1,ta1,v0 - sltu v0,t1,ta1 - sd t1,-24(a0) - daddu v0,t9 - - daddu ta2,t2 - sltu t8,ta2,t2 - daddu t2,ta2,v0 - sltu v0,t2,ta2 - sd t2,-16(a0) - daddu v0,t8 - - daddu ta3,t3 - sltu t9,ta3,t3 - daddu t3,ta3,v0 - sltu v0,t3,ta3 - sd t3,-8(a0) - daddu v0,t9 - - .set noreorder - bgtzl AT,.L_bn_add_words_loop - ld t0,0(a1) - - bnezl a3,.L_bn_add_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_add_words_return: - jr ra - -.L_bn_add_words_tail: - ld ta0,0(a2) - daddu ta0,t0 - subu a3,1 - sltu t8,ta0,t0 - daddu t0,ta0,v0 - sltu v0,t0,ta0 - sd t0,0(a0) - daddu v0,t8 - beqz a3,.L_bn_add_words_return - - ld t1,8(a1) - ld ta1,8(a2) - daddu ta1,t1 - subu a3,1 - sltu t9,ta1,t1 - daddu t1,ta1,v0 - sltu v0,t1,ta1 - sd t1,8(a0) - daddu v0,t9 - beqz a3,.L_bn_add_words_return - - ld t2,16(a1) - ld ta2,16(a2) - daddu ta2,t2 - sltu t8,ta2,t2 - daddu t2,ta2,v0 - sltu v0,t2,ta2 - sd t2,16(a0) - daddu v0,t8 - jr ra -END(bn_add_words) - -.align 5 -LEAF(bn_sub_words) - .set noreorder - bgtzl a3,.L_bn_sub_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_sub_words_proceed: - li MINUS4,-4 - and AT,a3,MINUS4 - move v0,zero - beqz AT,.L_bn_sub_words_tail - -.L_bn_sub_words_loop: - ld ta0,0(a2) - subu a3,4 - ld t1,8(a1) - and AT,a3,MINUS4 - ld t2,16(a1) - PTR_ADD a2,32 - ld t3,24(a1) - PTR_ADD a0,32 - ld ta1,-24(a2) - PTR_ADD a1,32 - ld ta2,-16(a2) - ld ta3,-8(a2) - sltu t8,t0,ta0 - dsubu t0,ta0 - dsubu ta0,t0,v0 - sd ta0,-32(a0) - MOVNZ (t0,v0,t8) - - sltu t9,t1,ta1 - dsubu t1,ta1 - dsubu ta1,t1,v0 - sd ta1,-24(a0) - MOVNZ (t1,v0,t9) - - - sltu t8,t2,ta2 - dsubu t2,ta2 - dsubu ta2,t2,v0 - sd ta2,-16(a0) - MOVNZ (t2,v0,t8) - - sltu t9,t3,ta3 - dsubu t3,ta3 - dsubu ta3,t3,v0 - sd ta3,-8(a0) - MOVNZ (t3,v0,t9) - - .set noreorder - bgtzl AT,.L_bn_sub_words_loop - ld t0,0(a1) - - bnezl a3,.L_bn_sub_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_sub_words_return: - jr ra - -.L_bn_sub_words_tail: - ld ta0,0(a2) - subu a3,1 - sltu t8,t0,ta0 - dsubu t0,ta0 - dsubu ta0,t0,v0 - MOVNZ (t0,v0,t8) - sd ta0,0(a0) - beqz a3,.L_bn_sub_words_return - - ld t1,8(a1) - subu a3,1 - ld ta1,8(a2) - sltu t9,t1,ta1 - dsubu t1,ta1 - dsubu ta1,t1,v0 - MOVNZ (t1,v0,t9) - sd ta1,8(a0) - beqz a3,.L_bn_sub_words_return - - ld t2,16(a1) - ld ta2,16(a2) - sltu t8,t2,ta2 - dsubu t2,ta2 - dsubu ta2,t2,v0 - MOVNZ (t2,v0,t8) - sd ta2,16(a0) - jr ra -END(bn_sub_words) - -#undef MINUS4 - -.align 5 -LEAF(bn_div_3_words) - .set reorder - move a3,a0 /* we know that bn_div_words doesn't - * touch a3, ta2, ta3 and preserves a2 - * so that we can save two arguments - * and return address in registers - * instead of stack:-) - */ - ld a0,(a3) - move ta2,a1 - ld a1,-8(a3) - bne a0,a2,.L_bn_div_3_words_proceed - li v0,-1 - jr ra -.L_bn_div_3_words_proceed: - move ta3,ra - bal bn_div_words - move ra,ta3 - dmultu ta2,v0 - ld t2,-16(a3) - move ta0,zero - mfhi t1 - mflo t0 - sltu t8,t1,v1 -.L_bn_div_3_words_inner_loop: - bnez t8,.L_bn_div_3_words_inner_loop_done - sgeu AT,t2,t0 - seq t9,t1,v1 - and AT,t9 - sltu t3,t0,ta2 - daddu v1,a2 - dsubu t1,t3 - dsubu t0,ta2 - sltu t8,t1,v1 - sltu ta0,v1,a2 - or t8,ta0 - .set noreorder - beqzl AT,.L_bn_div_3_words_inner_loop - dsubu v0,1 - .set reorder -.L_bn_div_3_words_inner_loop_done: - jr ra -END(bn_div_3_words) - -.align 5 -LEAF(bn_div_words) - .set noreorder - bnezl a2,.L_bn_div_words_proceed - move v1,zero - jr ra - li v0,-1 /* I'd rather signal div-by-zero - * which can be done with 'break 7' */ - -.L_bn_div_words_proceed: - bltz a2,.L_bn_div_words_body - move t9,v1 - dsll a2,1 - bgtz a2,.-4 - addu t9,1 - - .set reorder - negu t1,t9 - li t2,-1 - dsll t2,t1 - and t2,a0 - dsrl AT,a1,t1 - .set noreorder - bnezl t2,.+8 - break 6 /* signal overflow */ - .set reorder - dsll a0,t9 - dsll a1,t9 - or a0,AT - -#define QT ta0 -#define HH ta1 -#define DH v1 -.L_bn_div_words_body: - dsrl DH,a2,32 - sgeu AT,a0,a2 - .set noreorder - bnezl AT,.+8 - dsubu a0,a2 - .set reorder - - li QT,-1 - dsrl HH,a0,32 - dsrl QT,32 /* q=0xffffffff */ - beq DH,HH,.L_bn_div_words_skip_div1 - ddivu zero,a0,DH - mflo QT -.L_bn_div_words_skip_div1: - dmultu a2,QT - dsll t3,a0,32 - dsrl AT,a1,32 - or t3,AT - mflo t0 - mfhi t1 -.L_bn_div_words_inner_loop1: - sltu t2,t3,t0 - seq t8,HH,t1 - sltu AT,HH,t1 - and t2,t8 - sltu v0,t0,a2 - or AT,t2 - .set noreorder - beqz AT,.L_bn_div_words_inner_loop1_done - dsubu t1,v0 - dsubu t0,a2 - b .L_bn_div_words_inner_loop1 - dsubu QT,1 - .set reorder -.L_bn_div_words_inner_loop1_done: - - dsll a1,32 - dsubu a0,t3,t0 - dsll v0,QT,32 - - li QT,-1 - dsrl HH,a0,32 - dsrl QT,32 /* q=0xffffffff */ - beq DH,HH,.L_bn_div_words_skip_div2 - ddivu zero,a0,DH - mflo QT -.L_bn_div_words_skip_div2: -#undef DH - dmultu a2,QT - dsll t3,a0,32 - dsrl AT,a1,32 - or t3,AT - mflo t0 - mfhi t1 -.L_bn_div_words_inner_loop2: - sltu t2,t3,t0 - seq t8,HH,t1 - sltu AT,HH,t1 - and t2,t8 - sltu v1,t0,a2 - or AT,t2 - .set noreorder - beqz AT,.L_bn_div_words_inner_loop2_done - dsubu t1,v1 - dsubu t0,a2 - b .L_bn_div_words_inner_loop2 - dsubu QT,1 - .set reorder -.L_bn_div_words_inner_loop2_done: -#undef HH - - dsubu a0,t3,t0 - or v0,QT - dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ - dsrl a2,t9 /* restore a2 */ - jr ra -#undef QT -END(bn_div_words) - -#define a_0 t0 -#define a_1 t1 -#define a_2 t2 -#define a_3 t3 -#define b_0 ta0 -#define b_1 ta1 -#define b_2 ta2 -#define b_3 ta3 - -#define a_4 s0 -#define a_5 s2 -#define a_6 s4 -#define a_7 a1 /* once we load a[7] we don't need a anymore */ -#define b_4 s1 -#define b_5 s3 -#define b_6 s5 -#define b_7 a2 /* once we load b[7] we don't need b anymore */ - -#define t_1 t8 -#define t_2 t9 - -#define c_1 v0 -#define c_2 v1 -#define c_3 a3 - -#define FRAME_SIZE 48 - -.align 5 -LEAF(bn_mul_comba8) - .set noreorder - PTR_SUB sp,FRAME_SIZE - .frame sp,64,ra - .set reorder - ld a_0,0(a1) /* If compiled with -mips3 option on - * R5000 box assembler barks on this - * line with "shouldn't have mult/div - * as last instruction in bb (R10K - * bug)" warning. If anybody out there - * has a clue about how to circumvent - * this do send me a note. - * - */ - ld b_0,0(a2) - ld a_1,8(a1) - ld a_2,16(a1) - ld a_3,24(a1) - ld b_1,8(a2) - ld b_2,16(a2) - ld b_3,24(a2) - dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - sd s0,0(sp) - sd s1,8(sp) - sd s2,16(sp) - sd s3,24(sp) - sd s4,32(sp) - sd s5,40(sp) - mflo c_1 - mfhi c_2 - - dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ - ld a_4,32(a1) - ld a_5,40(a1) - ld a_6,48(a1) - ld a_7,56(a1) - ld b_4,32(a2) - ld b_5,40(a2) - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ - ld b_6,48(a2) - ld b_7,56(a2) - sd c_1,0(a0) /* r[0]=c1; */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - sd c_2,8(a0) /* r[1]=c2; */ - - dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) /* r[2]=c3; */ - - dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) /* r[3]=c1; */ - - dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) /* r[4]=c2; */ - - dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) /* r[5]=c3; */ - - dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,48(a0) /* r[6]=c1; */ - - dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,56(a0) /* r[7]=c2; */ - - dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,64(a0) /* r[8]=c3; */ - - dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,72(a0) /* r[9]=c1; */ - - dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,80(a0) /* r[10]=c2; */ - - dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,88(a0) /* r[11]=c3; */ - - dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,96(a0) /* r[12]=c1; */ - - dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,104(a0) /* r[13]=c2; */ - - dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ - ld s0,0(sp) - ld s1,8(sp) - ld s2,16(sp) - ld s3,24(sp) - ld s4,32(sp) - ld s5,40(sp) - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sd c_3,112(a0) /* r[14]=c3; */ - sd c_1,120(a0) /* r[15]=c1; */ - - PTR_ADD sp,FRAME_SIZE - - jr ra -END(bn_mul_comba8) - -.align 5 -LEAF(bn_mul_comba4) - .set reorder - ld a_0,0(a1) - ld b_0,0(a2) - ld a_1,8(a1) - ld a_2,16(a1) - dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - ld a_3,24(a1) - ld b_1,8(a2) - ld b_2,16(a2) - ld b_3,24(a2) - mflo c_1 - mfhi c_2 - sd c_1,0(a0) - - dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - sd c_2,8(a0) - - dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) - - dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) - - dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) - - dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) - - dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sd c_1,48(a0) - sd c_2,56(a0) - - jr ra -END(bn_mul_comba4) - -#undef a_4 -#undef a_5 -#undef a_6 -#undef a_7 -#define a_4 b_0 -#define a_5 b_1 -#define a_6 b_2 -#define a_7 b_3 - -.align 5 -LEAF(bn_sqr_comba8) - .set reorder - ld a_0,0(a1) - ld a_1,8(a1) - ld a_2,16(a1) - ld a_3,24(a1) - - dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - ld a_4,32(a1) - ld a_5,40(a1) - ld a_6,48(a1) - ld a_7,56(a1) - mflo c_1 - mfhi c_2 - sd c_1,0(a0) - - dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - sd c_2,8(a0) - - dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) - - dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) - - dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) - - dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) - - dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,48(a0) - - dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,56(a0) - - dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,64(a0) - - dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,72(a0) - - dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,80(a0) - - dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,88(a0) - - dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,96(a0) - - dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,104(a0) - - dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sd c_3,112(a0) - sd c_1,120(a0) - - jr ra -END(bn_sqr_comba8) - -.align 5 -LEAF(bn_sqr_comba4) - .set reorder - ld a_0,0(a1) - ld a_1,8(a1) - ld a_2,16(a1) - ld a_3,24(a1) - dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - mflo c_1 - mfhi c_2 - sd c_1,0(a0) - - dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - sd c_2,8(a0) - - dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) - - dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) - - dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) - - dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) - - dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sd c_1,48(a0) - sd c_2,56(a0) - - jr ra -END(bn_sqr_comba4) diff --git a/drivers/builtin_openssl2/crypto/bn/asm/modexp512-x86_64.pl b/drivers/builtin_openssl2/crypto/bn/asm/modexp512-x86_64.pl deleted file mode 100644 index bfd6e97541..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/modexp512-x86_64.pl +++ /dev/null @@ -1,1497 +0,0 @@ -#!/usr/bin/env perl -# -# Copyright (c) 2010-2011 Intel Corp. -# Author: Vinodh.Gopal@intel.com -# Jim Guilford -# Erdinc.Ozturk@intel.com -# Maxim.Perminov@intel.com -# -# More information about algorithm used can be found at: -# http://www.cse.buffalo.edu/srds2009/escs2009_submission_Gopal.pdf -# -# ==================================================================== -# Copyright (c) 2011 The OpenSSL Project. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# -# 3. All advertising materials mentioning features or use of this -# software must display the following acknowledgment: -# "This product includes software developed by the OpenSSL Project -# for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" -# -# 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to -# endorse or promote products derived from this software without -# prior written permission. For written permission, please contact -# licensing@OpenSSL.org. -# -# 5. Products derived from this software may not be called "OpenSSL" -# nor may "OpenSSL" appear in their names without prior written -# permission of the OpenSSL Project. -# -# 6. Redistributions of any form whatsoever must retain the following -# acknowledgment: -# "This product includes software developed by the OpenSSL Project -# for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" -# -# THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY -# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR -# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -# OF THE POSSIBILITY OF SUCH DAMAGE. -# ==================================================================== - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -my $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -use strict; -my $code=".text\n\n"; -my $m=0; - -# -# Define x512 macros -# - -#MULSTEP_512_ADD MACRO x7, x6, x5, x4, x3, x2, x1, x0, dst, src1, src2, add_src, tmp1, tmp2 -# -# uses rax, rdx, and args -sub MULSTEP_512_ADD -{ - my ($x, $DST, $SRC2, $ASRC, $OP, $TMP)=@_; - my @X=@$x; # make a copy -$code.=<<___; - mov (+8*0)($SRC2), %rax - mul $OP # rdx:rax = %OP * [0] - mov ($ASRC), $X[0] - add %rax, $X[0] - adc \$0, %rdx - mov $X[0], $DST -___ -for(my $i=1;$i<8;$i++) { -$code.=<<___; - mov %rdx, $TMP - - mov (+8*$i)($SRC2), %rax - mul $OP # rdx:rax = %OP * [$i] - mov (+8*$i)($ASRC), $X[$i] - add %rax, $X[$i] - adc \$0, %rdx - add $TMP, $X[$i] - adc \$0, %rdx -___ -} -$code.=<<___; - mov %rdx, $X[0] -___ -} - -#MULSTEP_512 MACRO x7, x6, x5, x4, x3, x2, x1, x0, dst, src2, src1_val, tmp -# -# uses rax, rdx, and args -sub MULSTEP_512 -{ - my ($x, $DST, $SRC2, $OP, $TMP)=@_; - my @X=@$x; # make a copy -$code.=<<___; - mov (+8*0)($SRC2), %rax - mul $OP # rdx:rax = %OP * [0] - add %rax, $X[0] - adc \$0, %rdx - mov $X[0], $DST -___ -for(my $i=1;$i<8;$i++) { -$code.=<<___; - mov %rdx, $TMP - - mov (+8*$i)($SRC2), %rax - mul $OP # rdx:rax = %OP * [$i] - add %rax, $X[$i] - adc \$0, %rdx - add $TMP, $X[$i] - adc \$0, %rdx -___ -} -$code.=<<___; - mov %rdx, $X[0] -___ -} - -# -# Swizzle Macros -# - -# macro to copy data from flat space to swizzled table -#MACRO swizzle pDst, pSrc, tmp1, tmp2 -# pDst and pSrc are modified -sub swizzle -{ - my ($pDst, $pSrc, $cnt, $d0)=@_; -$code.=<<___; - mov \$8, $cnt -loop_$m: - mov ($pSrc), $d0 - mov $d0#w, ($pDst) - shr \$16, $d0 - mov $d0#w, (+64*1)($pDst) - shr \$16, $d0 - mov $d0#w, (+64*2)($pDst) - shr \$16, $d0 - mov $d0#w, (+64*3)($pDst) - lea 8($pSrc), $pSrc - lea 64*4($pDst), $pDst - dec $cnt - jnz loop_$m -___ - - $m++; -} - -# macro to copy data from swizzled table to flat space -#MACRO unswizzle pDst, pSrc, tmp*3 -sub unswizzle -{ - my ($pDst, $pSrc, $cnt, $d0, $d1)=@_; -$code.=<<___; - mov \$4, $cnt -loop_$m: - movzxw (+64*3+256*0)($pSrc), $d0 - movzxw (+64*3+256*1)($pSrc), $d1 - shl \$16, $d0 - shl \$16, $d1 - mov (+64*2+256*0)($pSrc), $d0#w - mov (+64*2+256*1)($pSrc), $d1#w - shl \$16, $d0 - shl \$16, $d1 - mov (+64*1+256*0)($pSrc), $d0#w - mov (+64*1+256*1)($pSrc), $d1#w - shl \$16, $d0 - shl \$16, $d1 - mov (+64*0+256*0)($pSrc), $d0#w - mov (+64*0+256*1)($pSrc), $d1#w - mov $d0, (+8*0)($pDst) - mov $d1, (+8*1)($pDst) - lea 256*2($pSrc), $pSrc - lea 8*2($pDst), $pDst - sub \$1, $cnt - jnz loop_$m -___ - - $m++; -} - -# -# Data Structures -# - -# Reduce Data -# -# -# Offset Value -# 0C0 Carries -# 0B8 X2[10] -# 0B0 X2[9] -# 0A8 X2[8] -# 0A0 X2[7] -# 098 X2[6] -# 090 X2[5] -# 088 X2[4] -# 080 X2[3] -# 078 X2[2] -# 070 X2[1] -# 068 X2[0] -# 060 X1[12] P[10] -# 058 X1[11] P[9] Z[8] -# 050 X1[10] P[8] Z[7] -# 048 X1[9] P[7] Z[6] -# 040 X1[8] P[6] Z[5] -# 038 X1[7] P[5] Z[4] -# 030 X1[6] P[4] Z[3] -# 028 X1[5] P[3] Z[2] -# 020 X1[4] P[2] Z[1] -# 018 X1[3] P[1] Z[0] -# 010 X1[2] P[0] Y[2] -# 008 X1[1] Q[1] Y[1] -# 000 X1[0] Q[0] Y[0] - -my $X1_offset = 0; # 13 qwords -my $X2_offset = $X1_offset + 13*8; # 11 qwords -my $Carries_offset = $X2_offset + 11*8; # 1 qword -my $Q_offset = 0; # 2 qwords -my $P_offset = $Q_offset + 2*8; # 11 qwords -my $Y_offset = 0; # 3 qwords -my $Z_offset = $Y_offset + 3*8; # 9 qwords - -my $Red_Data_Size = $Carries_offset + 1*8; # (25 qwords) - -# -# Stack Frame -# -# -# offset value -# ... -# ... -# 280 Garray - -# 278 tmp16[15] -# ... ... -# 200 tmp16[0] - -# 1F8 tmp[7] -# ... ... -# 1C0 tmp[0] - -# 1B8 GT[7] -# ... ... -# 180 GT[0] - -# 178 Reduce Data -# ... ... -# 0B8 Reduce Data -# 0B0 reserved -# 0A8 reserved -# 0A0 reserved -# 098 reserved -# 090 reserved -# 088 reduce result addr -# 080 exp[8] - -# ... -# 048 exp[1] -# 040 exp[0] - -# 038 reserved -# 030 loop_idx -# 028 pg -# 020 i -# 018 pData ; arg 4 -# 010 pG ; arg 2 -# 008 pResult ; arg 1 -# 000 rsp ; stack pointer before subtract - -my $rsp_offset = 0; -my $pResult_offset = 8*1 + $rsp_offset; -my $pG_offset = 8*1 + $pResult_offset; -my $pData_offset = 8*1 + $pG_offset; -my $i_offset = 8*1 + $pData_offset; -my $pg_offset = 8*1 + $i_offset; -my $loop_idx_offset = 8*1 + $pg_offset; -my $reserved1_offset = 8*1 + $loop_idx_offset; -my $exp_offset = 8*1 + $reserved1_offset; -my $red_result_addr_offset= 8*9 + $exp_offset; -my $reserved2_offset = 8*1 + $red_result_addr_offset; -my $Reduce_Data_offset = 8*5 + $reserved2_offset; -my $GT_offset = $Red_Data_Size + $Reduce_Data_offset; -my $tmp_offset = 8*8 + $GT_offset; -my $tmp16_offset = 8*8 + $tmp_offset; -my $garray_offset = 8*16 + $tmp16_offset; -my $mem_size = 8*8*32 + $garray_offset; - -# -# Offsets within Reduce Data -# -# -# struct MODF_2FOLD_MONT_512_C1_DATA { -# UINT64 t[8][8]; -# UINT64 m[8]; -# UINT64 m1[8]; /* 2^768 % m */ -# UINT64 m2[8]; /* 2^640 % m */ -# UINT64 k1[2]; /* (- 1/m) % 2^128 */ -# }; - -my $T = 0; -my $M = 512; # = 8 * 8 * 8 -my $M1 = 576; # = 8 * 8 * 9 /* += 8 * 8 */ -my $M2 = 640; # = 8 * 8 * 10 /* += 8 * 8 */ -my $K1 = 704; # = 8 * 8 * 11 /* += 8 * 8 */ - -# -# FUNCTIONS -# - -{{{ -# -# MULADD_128x512 : Function to multiply 128-bits (2 qwords) by 512-bits (8 qwords) -# and add 512-bits (8 qwords) -# to get 640 bits (10 qwords) -# Input: 128-bit mul source: [rdi+8*1], rbp -# 512-bit mul source: [rsi+8*n] -# 512-bit add source: r15, r14, ..., r9, r8 -# Output: r9, r8, r15, r14, r13, r12, r11, r10, [rcx+8*1], [rcx+8*0] -# Clobbers all regs except: rcx, rsi, rdi -$code.=<<___; -.type MULADD_128x512,\@abi-omnipotent -.align 16 -MULADD_128x512: -___ - &MULSTEP_512([map("%r$_",(8..15))], "(+8*0)(%rcx)", "%rsi", "%rbp", "%rbx"); -$code.=<<___; - mov (+8*1)(%rdi), %rbp -___ - &MULSTEP_512([map("%r$_",(9..15,8))], "(+8*1)(%rcx)", "%rsi", "%rbp", "%rbx"); -$code.=<<___; - ret -.size MULADD_128x512,.-MULADD_128x512 -___ -}}} - -{{{ -#MULADD_256x512 MACRO pDst, pA, pB, OP, TMP, X7, X6, X5, X4, X3, X2, X1, X0 -# -# Inputs: pDst: Destination (768 bits, 12 qwords) -# pA: Multiplicand (1024 bits, 16 qwords) -# pB: Multiplicand (512 bits, 8 qwords) -# Dst = Ah * B + Al -# where Ah is (in qwords) A[15:12] (256 bits) and Al is A[7:0] (512 bits) -# Results in X3 X2 X1 X0 X7 X6 X5 X4 Dst[3:0] -# Uses registers: arguments, RAX, RDX -sub MULADD_256x512 -{ - my ($pDst, $pA, $pB, $OP, $TMP, $X)=@_; -$code.=<<___; - mov (+8*12)($pA), $OP -___ - &MULSTEP_512_ADD($X, "(+8*0)($pDst)", $pB, $pA, $OP, $TMP); - push(@$X,shift(@$X)); - -$code.=<<___; - mov (+8*13)($pA), $OP -___ - &MULSTEP_512($X, "(+8*1)($pDst)", $pB, $OP, $TMP); - push(@$X,shift(@$X)); - -$code.=<<___; - mov (+8*14)($pA), $OP -___ - &MULSTEP_512($X, "(+8*2)($pDst)", $pB, $OP, $TMP); - push(@$X,shift(@$X)); - -$code.=<<___; - mov (+8*15)($pA), $OP -___ - &MULSTEP_512($X, "(+8*3)($pDst)", $pB, $OP, $TMP); - push(@$X,shift(@$X)); -} - -# -# mont_reduce(UINT64 *x, /* 1024 bits, 16 qwords */ -# UINT64 *m, /* 512 bits, 8 qwords */ -# MODF_2FOLD_MONT_512_C1_DATA *data, -# UINT64 *r) /* 512 bits, 8 qwords */ -# Input: x (number to be reduced): tmp16 (Implicit) -# m (modulus): [pM] (Implicit) -# data (reduce data): [pData] (Implicit) -# Output: r (result): Address in [red_res_addr] -# result also in: r9, r8, r15, r14, r13, r12, r11, r10 - -my @X=map("%r$_",(8..15)); - -$code.=<<___; -.type mont_reduce,\@abi-omnipotent -.align 16 -mont_reduce: -___ - -my $STACK_DEPTH = 8; - # - # X1 = Xh * M1 + Xl -$code.=<<___; - lea (+$Reduce_Data_offset+$X1_offset+$STACK_DEPTH)(%rsp), %rdi # pX1 (Dst) 769 bits, 13 qwords - mov (+$pData_offset+$STACK_DEPTH)(%rsp), %rsi # pM1 (Bsrc) 512 bits, 8 qwords - add \$$M1, %rsi - lea (+$tmp16_offset+$STACK_DEPTH)(%rsp), %rcx # X (Asrc) 1024 bits, 16 qwords - -___ - - &MULADD_256x512("%rdi", "%rcx", "%rsi", "%rbp", "%rbx", \@X); # rotates @X 4 times - # results in r11, r10, r9, r8, r15, r14, r13, r12, X1[3:0] - -$code.=<<___; - xor %rax, %rax - # X1 += xl - add (+8*8)(%rcx), $X[4] - adc (+8*9)(%rcx), $X[5] - adc (+8*10)(%rcx), $X[6] - adc (+8*11)(%rcx), $X[7] - adc \$0, %rax - # X1 is now rax, r11-r8, r15-r12, tmp16[3:0] - - # - # check for carry ;; carry stored in rax - mov $X[4], (+8*8)(%rdi) # rdi points to X1 - mov $X[5], (+8*9)(%rdi) - mov $X[6], %rbp - mov $X[7], (+8*11)(%rdi) - - mov %rax, (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp) - - mov (+8*0)(%rdi), $X[4] - mov (+8*1)(%rdi), $X[5] - mov (+8*2)(%rdi), $X[6] - mov (+8*3)(%rdi), $X[7] - - # X1 is now stored in: X1[11], rbp, X1[9:8], r15-r8 - # rdi -> X1 - # rsi -> M1 - - # - # X2 = Xh * M2 + Xl - # do first part (X2 = Xh * M2) - add \$8*10, %rdi # rdi -> pXh ; 128 bits, 2 qwords - # Xh is actually { [rdi+8*1], rbp } - add \$`$M2-$M1`, %rsi # rsi -> M2 - lea (+$Reduce_Data_offset+$X2_offset+$STACK_DEPTH)(%rsp), %rcx # rcx -> pX2 ; 641 bits, 11 qwords -___ - unshift(@X,pop(@X)); unshift(@X,pop(@X)); -$code.=<<___; - - call MULADD_128x512 # args in rcx, rdi / rbp, rsi, r15-r8 - # result in r9, r8, r15, r14, r13, r12, r11, r10, X2[1:0] - mov (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp), %rax - - # X2 += Xl - add (+8*8-8*10)(%rdi), $X[6] # (-8*10) is to adjust rdi -> Xh to Xl - adc (+8*9-8*10)(%rdi), $X[7] - mov $X[6], (+8*8)(%rcx) - mov $X[7], (+8*9)(%rcx) - - adc %rax, %rax - mov %rax, (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp) - - lea (+$Reduce_Data_offset+$Q_offset+$STACK_DEPTH)(%rsp), %rdi # rdi -> pQ ; 128 bits, 2 qwords - add \$`$K1-$M2`, %rsi # rsi -> pK1 ; 128 bits, 2 qwords - - # MUL_128x128t128 rdi, rcx, rsi ; Q = X2 * K1 (bottom half) - # B1:B0 = rsi[1:0] = K1[1:0] - # A1:A0 = rcx[1:0] = X2[1:0] - # Result = rdi[1],rbp = Q[1],rbp - mov (%rsi), %r8 # B0 - mov (+8*1)(%rsi), %rbx # B1 - - mov (%rcx), %rax # A0 - mul %r8 # B0 - mov %rax, %rbp - mov %rdx, %r9 - - mov (+8*1)(%rcx), %rax # A1 - mul %r8 # B0 - add %rax, %r9 - - mov (%rcx), %rax # A0 - mul %rbx # B1 - add %rax, %r9 - - mov %r9, (+8*1)(%rdi) - # end MUL_128x128t128 - - sub \$`$K1-$M`, %rsi - - mov (%rcx), $X[6] - mov (+8*1)(%rcx), $X[7] # r9:r8 = X2[1:0] - - call MULADD_128x512 # args in rcx, rdi / rbp, rsi, r15-r8 - # result in r9, r8, r15, r14, r13, r12, r11, r10, X2[1:0] - - # load first half of m to rdx, rdi, rbx, rax - # moved this here for efficiency - mov (+8*0)(%rsi), %rax - mov (+8*1)(%rsi), %rbx - mov (+8*2)(%rsi), %rdi - mov (+8*3)(%rsi), %rdx - - # continue with reduction - mov (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp), %rbp - - add (+8*8)(%rcx), $X[6] - adc (+8*9)(%rcx), $X[7] - - #accumulate the final carry to rbp - adc %rbp, %rbp - - # Add in overflow corrections: R = (X2>>128) += T[overflow] - # R = {r9, r8, r15, r14, ..., r10} - shl \$3, %rbp - mov (+$pData_offset+$STACK_DEPTH)(%rsp), %rcx # rsi -> Data (and points to T) - add %rcx, %rbp # pT ; 512 bits, 8 qwords, spread out - - # rsi will be used to generate a mask after the addition - xor %rsi, %rsi - - add (+8*8*0)(%rbp), $X[0] - adc (+8*8*1)(%rbp), $X[1] - adc (+8*8*2)(%rbp), $X[2] - adc (+8*8*3)(%rbp), $X[3] - adc (+8*8*4)(%rbp), $X[4] - adc (+8*8*5)(%rbp), $X[5] - adc (+8*8*6)(%rbp), $X[6] - adc (+8*8*7)(%rbp), $X[7] - - # if there is a carry: rsi = 0xFFFFFFFFFFFFFFFF - # if carry is clear: rsi = 0x0000000000000000 - sbb \$0, %rsi - - # if carry is clear, subtract 0. Otherwise, subtract 256 bits of m - and %rsi, %rax - and %rsi, %rbx - and %rsi, %rdi - and %rsi, %rdx - - mov \$1, %rbp - sub %rax, $X[0] - sbb %rbx, $X[1] - sbb %rdi, $X[2] - sbb %rdx, $X[3] - - # if there is a borrow: rbp = 0 - # if there is no borrow: rbp = 1 - # this is used to save the borrows in between the first half and the 2nd half of the subtraction of m - sbb \$0, %rbp - - #load second half of m to rdx, rdi, rbx, rax - - add \$$M, %rcx - mov (+8*4)(%rcx), %rax - mov (+8*5)(%rcx), %rbx - mov (+8*6)(%rcx), %rdi - mov (+8*7)(%rcx), %rdx - - # use the rsi mask as before - # if carry is clear, subtract 0. Otherwise, subtract 256 bits of m - and %rsi, %rax - and %rsi, %rbx - and %rsi, %rdi - and %rsi, %rdx - - # if rbp = 0, there was a borrow before, it is moved to the carry flag - # if rbp = 1, there was not a borrow before, carry flag is cleared - sub \$1, %rbp - - sbb %rax, $X[4] - sbb %rbx, $X[5] - sbb %rdi, $X[6] - sbb %rdx, $X[7] - - # write R back to memory - - mov (+$red_result_addr_offset+$STACK_DEPTH)(%rsp), %rsi - mov $X[0], (+8*0)(%rsi) - mov $X[1], (+8*1)(%rsi) - mov $X[2], (+8*2)(%rsi) - mov $X[3], (+8*3)(%rsi) - mov $X[4], (+8*4)(%rsi) - mov $X[5], (+8*5)(%rsi) - mov $X[6], (+8*6)(%rsi) - mov $X[7], (+8*7)(%rsi) - - ret -.size mont_reduce,.-mont_reduce -___ -}}} - -{{{ -#MUL_512x512 MACRO pDst, pA, pB, x7, x6, x5, x4, x3, x2, x1, x0, tmp*2 -# -# Inputs: pDst: Destination (1024 bits, 16 qwords) -# pA: Multiplicand (512 bits, 8 qwords) -# pB: Multiplicand (512 bits, 8 qwords) -# Uses registers rax, rdx, args -# B operand in [pB] and also in x7...x0 -sub MUL_512x512 -{ - my ($pDst, $pA, $pB, $x, $OP, $TMP, $pDst_o)=@_; - my ($pDst, $pDst_o) = ($pDst =~ m/([^+]*)\+?(.*)?/); - my @X=@$x; # make a copy - -$code.=<<___; - mov (+8*0)($pA), $OP - - mov $X[0], %rax - mul $OP # rdx:rax = %OP * [0] - mov %rax, (+$pDst_o+8*0)($pDst) - mov %rdx, $X[0] -___ -for(my $i=1;$i<8;$i++) { -$code.=<<___; - mov $X[$i], %rax - mul $OP # rdx:rax = %OP * [$i] - add %rax, $X[$i-1] - adc \$0, %rdx - mov %rdx, $X[$i] -___ -} - -for(my $i=1;$i<8;$i++) { -$code.=<<___; - mov (+8*$i)($pA), $OP -___ - - &MULSTEP_512(\@X, "(+$pDst_o+8*$i)($pDst)", $pB, $OP, $TMP); - push(@X,shift(@X)); -} - -$code.=<<___; - mov $X[0], (+$pDst_o+8*8)($pDst) - mov $X[1], (+$pDst_o+8*9)($pDst) - mov $X[2], (+$pDst_o+8*10)($pDst) - mov $X[3], (+$pDst_o+8*11)($pDst) - mov $X[4], (+$pDst_o+8*12)($pDst) - mov $X[5], (+$pDst_o+8*13)($pDst) - mov $X[6], (+$pDst_o+8*14)($pDst) - mov $X[7], (+$pDst_o+8*15)($pDst) -___ -} - -# -# mont_mul_a3b : subroutine to compute (Src1 * Src2) % M (all 512-bits) -# Input: src1: Address of source 1: rdi -# src2: Address of source 2: rsi -# Output: dst: Address of destination: [red_res_addr] -# src2 and result also in: r9, r8, r15, r14, r13, r12, r11, r10 -# Temp: Clobbers [tmp16], all registers -$code.=<<___; -.type mont_mul_a3b,\@abi-omnipotent -.align 16 -mont_mul_a3b: - # - # multiply tmp = src1 * src2 - # For multiply: dst = rcx, src1 = rdi, src2 = rsi - # stack depth is extra 8 from call -___ - &MUL_512x512("%rsp+$tmp16_offset+8", "%rdi", "%rsi", [map("%r$_",(10..15,8..9))], "%rbp", "%rbx"); -$code.=<<___; - # - # Dst = tmp % m - # Call reduce(tmp, m, data, dst) - - # tail recursion optimization: jmp to mont_reduce and return from there - jmp mont_reduce - # call mont_reduce - # ret -.size mont_mul_a3b,.-mont_mul_a3b -___ -}}} - -{{{ -#SQR_512 MACRO pDest, pA, x7, x6, x5, x4, x3, x2, x1, x0, tmp*4 -# -# Input in memory [pA] and also in x7...x0 -# Uses all argument registers plus rax and rdx -# -# This version computes all of the off-diagonal terms into memory, -# and then it adds in the diagonal terms - -sub SQR_512 -{ - my ($pDst, $pA, $x, $A, $tmp, $x7, $x6, $pDst_o)=@_; - my ($pDst, $pDst_o) = ($pDst =~ m/([^+]*)\+?(.*)?/); - my @X=@$x; # make a copy -$code.=<<___; - # ------------------ - # first pass 01...07 - # ------------------ - mov $X[0], $A - - mov $X[1],%rax - mul $A - mov %rax, (+$pDst_o+8*1)($pDst) -___ -for(my $i=2;$i<8;$i++) { -$code.=<<___; - mov %rdx, $X[$i-2] - mov $X[$i],%rax - mul $A - add %rax, $X[$i-2] - adc \$0, %rdx -___ -} -$code.=<<___; - mov %rdx, $x7 - - mov $X[0], (+$pDst_o+8*2)($pDst) - - # ------------------ - # second pass 12...17 - # ------------------ - - mov (+8*1)($pA), $A - - mov (+8*2)($pA),%rax - mul $A - add %rax, $X[1] - adc \$0, %rdx - mov $X[1], (+$pDst_o+8*3)($pDst) - - mov %rdx, $X[0] - mov (+8*3)($pA),%rax - mul $A - add %rax, $X[2] - adc \$0, %rdx - add $X[0], $X[2] - adc \$0, %rdx - mov $X[2], (+$pDst_o+8*4)($pDst) - - mov %rdx, $X[0] - mov (+8*4)($pA),%rax - mul $A - add %rax, $X[3] - adc \$0, %rdx - add $X[0], $X[3] - adc \$0, %rdx - - mov %rdx, $X[0] - mov (+8*5)($pA),%rax - mul $A - add %rax, $X[4] - adc \$0, %rdx - add $X[0], $X[4] - adc \$0, %rdx - - mov %rdx, $X[0] - mov $X[6],%rax - mul $A - add %rax, $X[5] - adc \$0, %rdx - add $X[0], $X[5] - adc \$0, %rdx - - mov %rdx, $X[0] - mov $X[7],%rax - mul $A - add %rax, $x7 - adc \$0, %rdx - add $X[0], $x7 - adc \$0, %rdx - - mov %rdx, $X[1] - - # ------------------ - # third pass 23...27 - # ------------------ - mov (+8*2)($pA), $A - - mov (+8*3)($pA),%rax - mul $A - add %rax, $X[3] - adc \$0, %rdx - mov $X[3], (+$pDst_o+8*5)($pDst) - - mov %rdx, $X[0] - mov (+8*4)($pA),%rax - mul $A - add %rax, $X[4] - adc \$0, %rdx - add $X[0], $X[4] - adc \$0, %rdx - mov $X[4], (+$pDst_o+8*6)($pDst) - - mov %rdx, $X[0] - mov (+8*5)($pA),%rax - mul $A - add %rax, $X[5] - adc \$0, %rdx - add $X[0], $X[5] - adc \$0, %rdx - - mov %rdx, $X[0] - mov $X[6],%rax - mul $A - add %rax, $x7 - adc \$0, %rdx - add $X[0], $x7 - adc \$0, %rdx - - mov %rdx, $X[0] - mov $X[7],%rax - mul $A - add %rax, $X[1] - adc \$0, %rdx - add $X[0], $X[1] - adc \$0, %rdx - - mov %rdx, $X[2] - - # ------------------ - # fourth pass 34...37 - # ------------------ - - mov (+8*3)($pA), $A - - mov (+8*4)($pA),%rax - mul $A - add %rax, $X[5] - adc \$0, %rdx - mov $X[5], (+$pDst_o+8*7)($pDst) - - mov %rdx, $X[0] - mov (+8*5)($pA),%rax - mul $A - add %rax, $x7 - adc \$0, %rdx - add $X[0], $x7 - adc \$0, %rdx - mov $x7, (+$pDst_o+8*8)($pDst) - - mov %rdx, $X[0] - mov $X[6],%rax - mul $A - add %rax, $X[1] - adc \$0, %rdx - add $X[0], $X[1] - adc \$0, %rdx - - mov %rdx, $X[0] - mov $X[7],%rax - mul $A - add %rax, $X[2] - adc \$0, %rdx - add $X[0], $X[2] - adc \$0, %rdx - - mov %rdx, $X[5] - - # ------------------ - # fifth pass 45...47 - # ------------------ - mov (+8*4)($pA), $A - - mov (+8*5)($pA),%rax - mul $A - add %rax, $X[1] - adc \$0, %rdx - mov $X[1], (+$pDst_o+8*9)($pDst) - - mov %rdx, $X[0] - mov $X[6],%rax - mul $A - add %rax, $X[2] - adc \$0, %rdx - add $X[0], $X[2] - adc \$0, %rdx - mov $X[2], (+$pDst_o+8*10)($pDst) - - mov %rdx, $X[0] - mov $X[7],%rax - mul $A - add %rax, $X[5] - adc \$0, %rdx - add $X[0], $X[5] - adc \$0, %rdx - - mov %rdx, $X[1] - - # ------------------ - # sixth pass 56...57 - # ------------------ - mov (+8*5)($pA), $A - - mov $X[6],%rax - mul $A - add %rax, $X[5] - adc \$0, %rdx - mov $X[5], (+$pDst_o+8*11)($pDst) - - mov %rdx, $X[0] - mov $X[7],%rax - mul $A - add %rax, $X[1] - adc \$0, %rdx - add $X[0], $X[1] - adc \$0, %rdx - mov $X[1], (+$pDst_o+8*12)($pDst) - - mov %rdx, $X[2] - - # ------------------ - # seventh pass 67 - # ------------------ - mov $X[6], $A - - mov $X[7],%rax - mul $A - add %rax, $X[2] - adc \$0, %rdx - mov $X[2], (+$pDst_o+8*13)($pDst) - - mov %rdx, (+$pDst_o+8*14)($pDst) - - # start finalize (add in squares, and double off-terms) - mov (+$pDst_o+8*1)($pDst), $X[0] - mov (+$pDst_o+8*2)($pDst), $X[1] - mov (+$pDst_o+8*3)($pDst), $X[2] - mov (+$pDst_o+8*4)($pDst), $X[3] - mov (+$pDst_o+8*5)($pDst), $X[4] - mov (+$pDst_o+8*6)($pDst), $X[5] - - mov (+8*3)($pA), %rax - mul %rax - mov %rax, $x6 - mov %rdx, $X[6] - - add $X[0], $X[0] - adc $X[1], $X[1] - adc $X[2], $X[2] - adc $X[3], $X[3] - adc $X[4], $X[4] - adc $X[5], $X[5] - adc \$0, $X[6] - - mov (+8*0)($pA), %rax - mul %rax - mov %rax, (+$pDst_o+8*0)($pDst) - mov %rdx, $A - - mov (+8*1)($pA), %rax - mul %rax - - add $A, $X[0] - adc %rax, $X[1] - adc \$0, %rdx - - mov %rdx, $A - mov $X[0], (+$pDst_o+8*1)($pDst) - mov $X[1], (+$pDst_o+8*2)($pDst) - - mov (+8*2)($pA), %rax - mul %rax - - add $A, $X[2] - adc %rax, $X[3] - adc \$0, %rdx - - mov %rdx, $A - - mov $X[2], (+$pDst_o+8*3)($pDst) - mov $X[3], (+$pDst_o+8*4)($pDst) - - xor $tmp, $tmp - add $A, $X[4] - adc $x6, $X[5] - adc \$0, $tmp - - mov $X[4], (+$pDst_o+8*5)($pDst) - mov $X[5], (+$pDst_o+8*6)($pDst) - - # %%tmp has 0/1 in column 7 - # %%A6 has a full value in column 7 - - mov (+$pDst_o+8*7)($pDst), $X[0] - mov (+$pDst_o+8*8)($pDst), $X[1] - mov (+$pDst_o+8*9)($pDst), $X[2] - mov (+$pDst_o+8*10)($pDst), $X[3] - mov (+$pDst_o+8*11)($pDst), $X[4] - mov (+$pDst_o+8*12)($pDst), $X[5] - mov (+$pDst_o+8*13)($pDst), $x6 - mov (+$pDst_o+8*14)($pDst), $x7 - - mov $X[7], %rax - mul %rax - mov %rax, $X[7] - mov %rdx, $A - - add $X[0], $X[0] - adc $X[1], $X[1] - adc $X[2], $X[2] - adc $X[3], $X[3] - adc $X[4], $X[4] - adc $X[5], $X[5] - adc $x6, $x6 - adc $x7, $x7 - adc \$0, $A - - add $tmp, $X[0] - - mov (+8*4)($pA), %rax - mul %rax - - add $X[6], $X[0] - adc %rax, $X[1] - adc \$0, %rdx - - mov %rdx, $tmp - - mov $X[0], (+$pDst_o+8*7)($pDst) - mov $X[1], (+$pDst_o+8*8)($pDst) - - mov (+8*5)($pA), %rax - mul %rax - - add $tmp, $X[2] - adc %rax, $X[3] - adc \$0, %rdx - - mov %rdx, $tmp - - mov $X[2], (+$pDst_o+8*9)($pDst) - mov $X[3], (+$pDst_o+8*10)($pDst) - - mov (+8*6)($pA), %rax - mul %rax - - add $tmp, $X[4] - adc %rax, $X[5] - adc \$0, %rdx - - mov $X[4], (+$pDst_o+8*11)($pDst) - mov $X[5], (+$pDst_o+8*12)($pDst) - - add %rdx, $x6 - adc $X[7], $x7 - adc \$0, $A - - mov $x6, (+$pDst_o+8*13)($pDst) - mov $x7, (+$pDst_o+8*14)($pDst) - mov $A, (+$pDst_o+8*15)($pDst) -___ -} - -# -# sqr_reduce: subroutine to compute Result = reduce(Result * Result) -# -# input and result also in: r9, r8, r15, r14, r13, r12, r11, r10 -# -$code.=<<___; -.type sqr_reduce,\@abi-omnipotent -.align 16 -sqr_reduce: - mov (+$pResult_offset+8)(%rsp), %rcx -___ - &SQR_512("%rsp+$tmp16_offset+8", "%rcx", [map("%r$_",(10..15,8..9))], "%rbx", "%rbp", "%rsi", "%rdi"); -$code.=<<___; - # tail recursion optimization: jmp to mont_reduce and return from there - jmp mont_reduce - # call mont_reduce - # ret -.size sqr_reduce,.-sqr_reduce -___ -}}} - -# -# MAIN FUNCTION -# - -#mod_exp_512(UINT64 *result, /* 512 bits, 8 qwords */ -# UINT64 *g, /* 512 bits, 8 qwords */ -# UINT64 *exp, /* 512 bits, 8 qwords */ -# struct mod_ctx_512 *data) - -# window size = 5 -# table size = 2^5 = 32 -#table_entries equ 32 -#table_size equ table_entries * 8 -$code.=<<___; -.globl mod_exp_512 -.type mod_exp_512,\@function,4 -mod_exp_512: - push %rbp - push %rbx - push %r12 - push %r13 - push %r14 - push %r15 - - # adjust stack down and then align it with cache boundary - mov %rsp, %r8 - sub \$$mem_size, %rsp - and \$-64, %rsp - - # store previous stack pointer and arguments - mov %r8, (+$rsp_offset)(%rsp) - mov %rdi, (+$pResult_offset)(%rsp) - mov %rsi, (+$pG_offset)(%rsp) - mov %rcx, (+$pData_offset)(%rsp) -.Lbody: - # transform g into montgomery space - # GT = reduce(g * C2) = reduce(g * (2^256)) - # reduce expects to have the input in [tmp16] - pxor %xmm4, %xmm4 - movdqu (+16*0)(%rsi), %xmm0 - movdqu (+16*1)(%rsi), %xmm1 - movdqu (+16*2)(%rsi), %xmm2 - movdqu (+16*3)(%rsi), %xmm3 - movdqa %xmm4, (+$tmp16_offset+16*0)(%rsp) - movdqa %xmm4, (+$tmp16_offset+16*1)(%rsp) - movdqa %xmm4, (+$tmp16_offset+16*6)(%rsp) - movdqa %xmm4, (+$tmp16_offset+16*7)(%rsp) - movdqa %xmm0, (+$tmp16_offset+16*2)(%rsp) - movdqa %xmm1, (+$tmp16_offset+16*3)(%rsp) - movdqa %xmm2, (+$tmp16_offset+16*4)(%rsp) - movdqa %xmm3, (+$tmp16_offset+16*5)(%rsp) - - # load pExp before rdx gets blown away - movdqu (+16*0)(%rdx), %xmm0 - movdqu (+16*1)(%rdx), %xmm1 - movdqu (+16*2)(%rdx), %xmm2 - movdqu (+16*3)(%rdx), %xmm3 - - lea (+$GT_offset)(%rsp), %rbx - mov %rbx, (+$red_result_addr_offset)(%rsp) - call mont_reduce - - # Initialize tmp = C - lea (+$tmp_offset)(%rsp), %rcx - xor %rax, %rax - mov %rax, (+8*0)(%rcx) - mov %rax, (+8*1)(%rcx) - mov %rax, (+8*3)(%rcx) - mov %rax, (+8*4)(%rcx) - mov %rax, (+8*5)(%rcx) - mov %rax, (+8*6)(%rcx) - mov %rax, (+8*7)(%rcx) - mov %rax, (+$exp_offset+8*8)(%rsp) - movq \$1, (+8*2)(%rcx) - - lea (+$garray_offset)(%rsp), %rbp - mov %rcx, %rsi # pTmp - mov %rbp, %rdi # Garray[][0] -___ - - &swizzle("%rdi", "%rcx", "%rax", "%rbx"); - - # for (rax = 31; rax != 0; rax--) { - # tmp = reduce(tmp * G) - # swizzle(pg, tmp); - # pg += 2; } -$code.=<<___; - mov \$31, %rax - mov %rax, (+$i_offset)(%rsp) - mov %rbp, (+$pg_offset)(%rsp) - # rsi -> pTmp - mov %rsi, (+$red_result_addr_offset)(%rsp) - mov (+8*0)(%rsi), %r10 - mov (+8*1)(%rsi), %r11 - mov (+8*2)(%rsi), %r12 - mov (+8*3)(%rsi), %r13 - mov (+8*4)(%rsi), %r14 - mov (+8*5)(%rsi), %r15 - mov (+8*6)(%rsi), %r8 - mov (+8*7)(%rsi), %r9 -init_loop: - lea (+$GT_offset)(%rsp), %rdi - call mont_mul_a3b - lea (+$tmp_offset)(%rsp), %rsi - mov (+$pg_offset)(%rsp), %rbp - add \$2, %rbp - mov %rbp, (+$pg_offset)(%rsp) - mov %rsi, %rcx # rcx = rsi = addr of tmp -___ - - &swizzle("%rbp", "%rcx", "%rax", "%rbx"); -$code.=<<___; - mov (+$i_offset)(%rsp), %rax - sub \$1, %rax - mov %rax, (+$i_offset)(%rsp) - jne init_loop - - # - # Copy exponent onto stack - movdqa %xmm0, (+$exp_offset+16*0)(%rsp) - movdqa %xmm1, (+$exp_offset+16*1)(%rsp) - movdqa %xmm2, (+$exp_offset+16*2)(%rsp) - movdqa %xmm3, (+$exp_offset+16*3)(%rsp) - - - # - # Do exponentiation - # Initialize result to G[exp{511:507}] - mov (+$exp_offset+62)(%rsp), %eax - mov %rax, %rdx - shr \$11, %rax - and \$0x07FF, %edx - mov %edx, (+$exp_offset+62)(%rsp) - lea (+$garray_offset)(%rsp,%rax,2), %rsi - mov (+$pResult_offset)(%rsp), %rdx -___ - - &unswizzle("%rdx", "%rsi", "%rbp", "%rbx", "%rax"); - - # - # Loop variables - # rcx = [loop_idx] = index: 510-5 to 0 by 5 -$code.=<<___; - movq \$505, (+$loop_idx_offset)(%rsp) - - mov (+$pResult_offset)(%rsp), %rcx - mov %rcx, (+$red_result_addr_offset)(%rsp) - mov (+8*0)(%rcx), %r10 - mov (+8*1)(%rcx), %r11 - mov (+8*2)(%rcx), %r12 - mov (+8*3)(%rcx), %r13 - mov (+8*4)(%rcx), %r14 - mov (+8*5)(%rcx), %r15 - mov (+8*6)(%rcx), %r8 - mov (+8*7)(%rcx), %r9 - jmp sqr_2 - -main_loop_a3b: - call sqr_reduce - call sqr_reduce - call sqr_reduce -sqr_2: - call sqr_reduce - call sqr_reduce - - # - # Do multiply, first look up proper value in Garray - mov (+$loop_idx_offset)(%rsp), %rcx # bit index - mov %rcx, %rax - shr \$4, %rax # rax is word pointer - mov (+$exp_offset)(%rsp,%rax,2), %edx - and \$15, %rcx - shrq %cl, %rdx - and \$0x1F, %rdx - - lea (+$garray_offset)(%rsp,%rdx,2), %rsi - lea (+$tmp_offset)(%rsp), %rdx - mov %rdx, %rdi -___ - - &unswizzle("%rdx", "%rsi", "%rbp", "%rbx", "%rax"); - # rdi = tmp = pG - - # - # Call mod_mul_a1(pDst, pSrc1, pSrc2, pM, pData) - # result result pG M Data -$code.=<<___; - mov (+$pResult_offset)(%rsp), %rsi - call mont_mul_a3b - - # - # finish loop - mov (+$loop_idx_offset)(%rsp), %rcx - sub \$5, %rcx - mov %rcx, (+$loop_idx_offset)(%rsp) - jge main_loop_a3b - - # - -end_main_loop_a3b: - # transform result out of Montgomery space - # result = reduce(result) - mov (+$pResult_offset)(%rsp), %rdx - pxor %xmm4, %xmm4 - movdqu (+16*0)(%rdx), %xmm0 - movdqu (+16*1)(%rdx), %xmm1 - movdqu (+16*2)(%rdx), %xmm2 - movdqu (+16*3)(%rdx), %xmm3 - movdqa %xmm4, (+$tmp16_offset+16*4)(%rsp) - movdqa %xmm4, (+$tmp16_offset+16*5)(%rsp) - movdqa %xmm4, (+$tmp16_offset+16*6)(%rsp) - movdqa %xmm4, (+$tmp16_offset+16*7)(%rsp) - movdqa %xmm0, (+$tmp16_offset+16*0)(%rsp) - movdqa %xmm1, (+$tmp16_offset+16*1)(%rsp) - movdqa %xmm2, (+$tmp16_offset+16*2)(%rsp) - movdqa %xmm3, (+$tmp16_offset+16*3)(%rsp) - call mont_reduce - - # If result > m, subract m - # load result into r15:r8 - mov (+$pResult_offset)(%rsp), %rax - mov (+8*0)(%rax), %r8 - mov (+8*1)(%rax), %r9 - mov (+8*2)(%rax), %r10 - mov (+8*3)(%rax), %r11 - mov (+8*4)(%rax), %r12 - mov (+8*5)(%rax), %r13 - mov (+8*6)(%rax), %r14 - mov (+8*7)(%rax), %r15 - - # subtract m - mov (+$pData_offset)(%rsp), %rbx - add \$$M, %rbx - - sub (+8*0)(%rbx), %r8 - sbb (+8*1)(%rbx), %r9 - sbb (+8*2)(%rbx), %r10 - sbb (+8*3)(%rbx), %r11 - sbb (+8*4)(%rbx), %r12 - sbb (+8*5)(%rbx), %r13 - sbb (+8*6)(%rbx), %r14 - sbb (+8*7)(%rbx), %r15 - - # if Carry is clear, replace result with difference - mov (+8*0)(%rax), %rsi - mov (+8*1)(%rax), %rdi - mov (+8*2)(%rax), %rcx - mov (+8*3)(%rax), %rdx - cmovnc %r8, %rsi - cmovnc %r9, %rdi - cmovnc %r10, %rcx - cmovnc %r11, %rdx - mov %rsi, (+8*0)(%rax) - mov %rdi, (+8*1)(%rax) - mov %rcx, (+8*2)(%rax) - mov %rdx, (+8*3)(%rax) - - mov (+8*4)(%rax), %rsi - mov (+8*5)(%rax), %rdi - mov (+8*6)(%rax), %rcx - mov (+8*7)(%rax), %rdx - cmovnc %r12, %rsi - cmovnc %r13, %rdi - cmovnc %r14, %rcx - cmovnc %r15, %rdx - mov %rsi, (+8*4)(%rax) - mov %rdi, (+8*5)(%rax) - mov %rcx, (+8*6)(%rax) - mov %rdx, (+8*7)(%rax) - - mov (+$rsp_offset)(%rsp), %rsi - mov 0(%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbx - mov 40(%rsi),%rbp - lea 48(%rsi),%rsp -.Lepilogue: - ret -.size mod_exp_512, . - mod_exp_512 -___ - -if ($win64) { -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -my $rec="%rcx"; -my $frame="%rdx"; -my $context="%r8"; -my $disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type mod_exp_512_se_handler,\@abi-omnipotent -.align 16 -mod_exp_512_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lbody(%rip),%r10 - cmp %r10,%rbx # context->RipRsp - - lea .Lepilogue(%rip),%r10 - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lin_prologue - - mov $rsp_offset(%rax),%rax # pull saved Rsp - - mov 32(%rax),%rbx - mov 40(%rax),%rbp - mov 24(%rax),%r12 - mov 16(%rax),%r13 - mov 8(%rax),%r14 - mov 0(%rax),%r15 - lea 48(%rax),%rax - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size mod_exp_512_se_handler,.-mod_exp_512_se_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_mod_exp_512 - .rva .LSEH_end_mod_exp_512 - .rva .LSEH_info_mod_exp_512 - -.section .xdata -.align 8 -.LSEH_info_mod_exp_512: - .byte 9,0,0,0 - .rva mod_exp_512_se_handler -___ -} - -sub reg_part { -my ($reg,$conv)=@_; - if ($reg =~ /%r[0-9]+/) { $reg .= $conv; } - elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; } - elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; } - elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; } - return $reg; -} - -$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem; -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/(\(\+[^)]+\))/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/pa-risc2.s b/drivers/builtin_openssl2/crypto/bn/asm/pa-risc2.s deleted file mode 100644 index f3b16290eb..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/pa-risc2.s +++ /dev/null @@ -1,1618 +0,0 @@ -; -; PA-RISC 2.0 implementation of bn_asm code, based on the -; 64-bit version of the code. This code is effectively the -; same as the 64-bit version except the register model is -; slightly different given all values must be 32-bit between -; function calls. Thus the 64-bit return values are returned -; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit -; -; -; This code is approximately 2x faster than the C version -; for RSA/DSA. -; -; See http://devresource.hp.com/ for more details on the PA-RISC -; architecture. Also see the book "PA-RISC 2.0 Architecture" -; by Gerry Kane for information on the instruction set architecture. -; -; Code written by Chris Ruemmler (with some help from the HP C -; compiler). -; -; The code compiles with HP's assembler -; - - .level 2.0N - .space $TEXT$ - .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY - -; -; Global Register definitions used for the routines. -; -; Some information about HP's runtime architecture for 32-bits. -; -; "Caller save" means the calling function must save the register -; if it wants the register to be preserved. -; "Callee save" means if a function uses the register, it must save -; the value before using it. -; -; For the floating point registers -; -; "caller save" registers: fr4-fr11, fr22-fr31 -; "callee save" registers: fr12-fr21 -; "special" registers: fr0-fr3 (status and exception registers) -; -; For the integer registers -; value zero : r0 -; "caller save" registers: r1,r19-r26 -; "callee save" registers: r3-r18 -; return register : r2 (rp) -; return values ; r28,r29 (ret0,ret1) -; Stack pointer ; r30 (sp) -; millicode return ptr ; r31 (also a caller save register) - - -; -; Arguments to the routines -; -r_ptr .reg %r26 -a_ptr .reg %r25 -b_ptr .reg %r24 -num .reg %r24 -n .reg %r23 - -; -; Note that the "w" argument for bn_mul_add_words and bn_mul_words -; is passed on the stack at a delta of -56 from the top of stack -; as the routine is entered. -; - -; -; Globals used in some routines -; - -top_overflow .reg %r23 -high_mask .reg %r22 ; value 0xffffffff80000000L - - -;------------------------------------------------------------------------------ -; -; bn_mul_add_words -; -;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, -; int num, BN_ULONG w) -; -; arg0 = r_ptr -; arg1 = a_ptr -; arg3 = num -; -56(sp) = w -; -; Local register definitions -; - -fm1 .reg %fr22 -fm .reg %fr23 -ht_temp .reg %fr24 -ht_temp_1 .reg %fr25 -lt_temp .reg %fr26 -lt_temp_1 .reg %fr27 -fm1_1 .reg %fr28 -fm_1 .reg %fr29 - -fw_h .reg %fr7L -fw_l .reg %fr7R -fw .reg %fr7 - -fht_0 .reg %fr8L -flt_0 .reg %fr8R -t_float_0 .reg %fr8 - -fht_1 .reg %fr9L -flt_1 .reg %fr9R -t_float_1 .reg %fr9 - -tmp_0 .reg %r31 -tmp_1 .reg %r21 -m_0 .reg %r20 -m_1 .reg %r19 -ht_0 .reg %r1 -ht_1 .reg %r3 -lt_0 .reg %r4 -lt_1 .reg %r5 -m1_0 .reg %r6 -m1_1 .reg %r7 -rp_val .reg %r8 -rp_val_1 .reg %r9 - -bn_mul_add_words - .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN - .proc - .callinfo frame=128 - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP ; Needed to make the loop 16-byte aligned - NOP ; needed to make the loop 16-byte aligned - - STD %r5,16(%sp) ; save r5 - NOP - STD %r6,24(%sp) ; save r6 - STD %r7,32(%sp) ; save r7 - - STD %r8,40(%sp) ; save r8 - STD %r9,48(%sp) ; save r9 - COPY %r0,%ret1 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - - CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit - LDO 128(%sp),%sp ; bump stack - - ; - ; The loop is unrolled twice, so if there is only 1 number - ; then go straight to the cleanup code. - ; - CMPIB,= 1,num,bn_mul_add_words_single_top - FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_add_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDD 8(r_ptr),rp_val_1 ; rp[1] - - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1[0] - FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] - - XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h - XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m[0] - FSTD fm_1,-40(%sp) ; -40(sp) = m[1] - - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 - - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 - - LDD -8(%sp),m_0 ; m[0] - LDD -40(%sp),m_1 ; m[1] - LDD -16(%sp),m1_0 ; m1[0] - LDD -48(%sp),m1_1 ; m1[1] - - LDD -24(%sp),ht_0 ; ht[0] - LDD -56(%sp),ht_1 ; ht[1] - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; - - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) - ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) - - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) - ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) - EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 - - EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 - ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) - ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) - - ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - - ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - - LDO -2(num),num ; num = num - 2; - ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - - ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] - ADD,DC ht_1,%r0,%ret1 ; ht[1]++ - LDO 16(a_ptr),a_ptr ; a_ptr += 2 - - STD lt_1,8(r_ptr) ; rp[1] = lt[1] - CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do - LDO 16(r_ptr),r_ptr ; r_ptr += 2 - - CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_add_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDO 8(a_ptr),a_ptr ; a_ptr++ - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 ; m1 = temp1 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD %ret1,tmp_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] - ADD,DC ht_0,%r0,%ret1 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_add_words_exit - .EXIT - - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - LDD -80(%sp),%r9 ; restore r9 - LDD -88(%sp),%r8 ; restore r8 - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) -; -; arg0 = rp -; arg1 = ap -; arg3 = num -; w on stack at -56(sp) - -bn_mul_words - .proc - .callinfo frame=128 - .entry - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP - STD %r5,16(%sp) ; save r5 - - STD %r6,24(%sp) ; save r6 - STD %r7,32(%sp) ; save r7 - COPY %r0,%ret1 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - - CMPIB,>= 0,num,bn_mul_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; See if only 1 word to do, thus just do cleanup - ; - CMPIB,= 1,num,bn_mul_words_single_top - FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l - - FSTD fm1,-16(%sp) ; -16(sp) = m1 - FSTD fm1_1,-48(%sp) ; -48(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h - - FSTD fm,-8(%sp) ; -8(sp) = m - FSTD fm_1,-40(%sp) ; -40(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h - - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt - LDD -8(%sp),m_0 - LDD -40(%sp),m_1 - - LDD -16(%sp),m1_0 - LDD -48(%sp),m1_1 - LDD -24(%sp),ht_0 - LDD -56(%sp),ht_1 - - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) - ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - EXTRD,U tmp_1,31,32,m_1 ; m>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD lt_1,m1_1,lt_1 ; lt = lt+m1; - ADD,DC ht_1,%r0,ht_1 ; ht++ - ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1); - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) - ADD,DC ht_1,%r0,ht_1 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - STD lt_1,8(r_ptr) ; rp[1] = lt - - COPY ht_1,%ret1 ; carry = ht - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_mul_words_unroll2 - LDO 16(r_ptr),r_ptr ; rp++ - - CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt= lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD %ret1,lt_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - COPY ht_0,%ret1 ; copy carry - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_words_exit - .EXIT - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND - -;---------------------------------------------------------------------------- -; -;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; - -bn_sqr_words - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP - STD %r5,16(%sp) ; save r5 - - CMPIB,>= 0,num,bn_sqr_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; If only 1, the goto straight to cleanup - ; - CMPIB,= 1,num,bn_sqr_words_single_top - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - -bn_sqr_words_unroll2 - FLDD 0(a_ptr),t_float_0 ; a[0] - FLDD 8(a_ptr),t_float_1 ; a[1] - XMPYU fht_0,flt_0,fm ; m[0] - XMPYU fht_1,flt_1,fm_1 ; m[1] - - FSTD fm,-24(%sp) ; store m[0] - FSTD fm_1,-56(%sp) ; store m[1] - XMPYU flt_0,flt_0,lt_temp ; lt[0] - XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] - - FSTD lt_temp,-16(%sp) ; store lt[0] - FSTD lt_temp_1,-48(%sp) ; store lt[1] - XMPYU fht_0,fht_0,ht_temp ; ht[0] - XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] - - FSTD ht_temp,-8(%sp) ; store ht[0] - FSTD ht_temp_1,-40(%sp) ; store ht[1] - LDD -24(%sp),m_0 - LDD -56(%sp),m_1 - - AND m_0,high_mask,tmp_0 ; m[0] & Mask - AND m_1,high_mask,tmp_1 ; m[1] & Mask - DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 - DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 - - LDD -16(%sp),lt_0 - LDD -48(%sp),lt_1 - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 - EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 - - LDD -8(%sp),ht_0 - LDD -40(%sp),ht_1 - ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 - ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 - - ADD lt_0,m_0,lt_0 ; lt = lt+m - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - STD ht_0,8(r_ptr) ; rp[1] = ht[1] - - ADD lt_1,m_1,lt_1 ; lt = lt+m - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_1,16(r_ptr) ; rp[2] = lt[1] - STD ht_1,24(r_ptr) ; rp[3] = ht[1] - - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_sqr_words_unroll2 - LDO 32(r_ptr),r_ptr ; rp += 4 - - CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_sqr_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,flt_0,fm ; m - FSTD fm,-24(%sp) ; store m - - XMPYU flt_0,flt_0,lt_temp ; lt - FSTD lt_temp,-16(%sp) ; store lt - - XMPYU fht_0,fht_0,ht_temp ; ht - FSTD ht_temp,-8(%sp) ; store ht - - LDD -24(%sp),m_0 ; load m - AND m_0,high_mask,tmp_0 ; m & Mask - DEPD,Z m_0,30,31,m_0 ; m << 32+1 - LDD -16(%sp),lt_0 ; lt - - LDD -8(%sp),ht_0 ; ht - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 - ADD m_0,lt_0,lt_0 ; lt = lt+m - ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 - ADD,DC ht_0,%r0,ht_0 ; ht++ - - STD lt_0,0(r_ptr) ; rp[0] = lt - STD ht_0,8(r_ptr) ; rp[1] = ht - -bn_sqr_words_exit - .EXIT - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - .PROCEND ;in=23,24,25,26,29;out=28; - - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t .reg %r22 -b .reg %r21 -l .reg %r20 - -bn_add_words - .proc - .entry - .callinfo - .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - CMPIB,>= 0,n,bn_add_words_exit - COPY %r0,%ret1 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_add_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_add_words_unroll2 - LDD 0(a_ptr),t - LDD 0(b_ptr),b - ADD t,%ret1,t ; t = t+c; - ADD,DC %r0,%r0,%ret1 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret1,%r0,%ret1 ; c+= carry - STD l,0(r_ptr) - - LDD 8(a_ptr),t - LDD 8(b_ptr),b - ADD t,%ret1,t ; t = t+c; - ADD,DC %r0,%r0,%ret1 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret1,%r0,%ret1 ; c+= carry - STD l,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_add_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_add_words_exit ; are we done? - -bn_add_words_single_top - LDD 0(a_ptr),t - LDD 0(b_ptr),b - - ADD t,%ret1,t ; t = t+c; - ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??) - ADD t,b,l ; l = t + b[0] - ADD,DC %ret1,%r0,%ret1 ; c+= carry - STD l,0(r_ptr) - -bn_add_words_exit - .EXIT - BVE (%rp) - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t1 .reg %r22 -t2 .reg %r21 -sub_tmp1 .reg %r20 -sub_tmp2 .reg %r19 - - -bn_sub_words - .proc - .callinfo - .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - CMPIB,>= 0,n,bn_sub_words_exit - COPY %r0,%ret1 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_sub_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_sub_words_unroll2 - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; - - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret1 - STD sub_tmp1,0(r_ptr) - - LDD 8(a_ptr),t1 - LDD 8(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret1 - STD sub_tmp1,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_sub_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? - -bn_sub_words_single_top - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret1 - - STD sub_tmp1,0(r_ptr) - -bn_sub_words_exit - .EXIT - BVE (%rp) - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - .PROCEND ;in=23,24,25,26,29;out=28; - -;------------------------------------------------------------------------------ -; -; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) -; -; arg0 = h -; arg1 = l -; arg2 = d -; -; This is mainly just output from the HP C compiler. -; -;------------------------------------------------------------------------------ -bn_div_words - .PROC - .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN - .IMPORT BN_num_bits_word,CODE - ;--- not PIC .IMPORT __iob,DATA - ;--- not PIC .IMPORT fprintf,CODE - .IMPORT abort,CODE - .IMPORT $$div2U,MILLICODE - .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE - .ENTRY - STW %r2,-20(%r30) ;offset 0x8ec - STW,MA %r3,192(%r30) ;offset 0x8f0 - STW %r4,-188(%r30) ;offset 0x8f4 - DEPD %r5,31,32,%r6 ;offset 0x8f8 - STD %r6,-184(%r30) ;offset 0x8fc - DEPD %r7,31,32,%r8 ;offset 0x900 - STD %r8,-176(%r30) ;offset 0x904 - STW %r9,-168(%r30) ;offset 0x908 - LDD -248(%r30),%r3 ;offset 0x90c - COPY %r26,%r4 ;offset 0x910 - COPY %r24,%r5 ;offset 0x914 - DEPD %r25,31,32,%r4 ;offset 0x918 - CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c - DEPD %r23,31,32,%r5 ;offset 0x920 - MOVIB,TR -1,%r29,$00060002 ;offset 0x924 - EXTRD,U %r29,31,32,%r28 ;offset 0x928 -$0006002A - LDO -1(%r29),%r29 ;offset 0x92c - SUB %r23,%r7,%r23 ;offset 0x930 -$00060024 - SUB %r4,%r31,%r25 ;offset 0x934 - AND %r25,%r19,%r26 ;offset 0x938 - CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c - DEPD,Z %r25,31,32,%r20 ;offset 0x940 - OR %r20,%r24,%r21 ;offset 0x944 - CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948 - SUB %r31,%r2,%r31 ;offset 0x94c -$00060046 -$0006002E - DEPD,Z %r23,31,32,%r25 ;offset 0x950 - EXTRD,U %r23,31,32,%r26 ;offset 0x954 - AND %r25,%r19,%r24 ;offset 0x958 - ADD,L %r31,%r26,%r31 ;offset 0x95c - CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960 - LDO 1(%r31),%r31 ;offset 0x964 -$00060032 - CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968 - LDO -1(%r29),%r29 ;offset 0x96c - ADD,L %r4,%r3,%r4 ;offset 0x970 -$00060036 - ADDIB,=,N -1,%r8,$D0 ;offset 0x974 - SUB %r5,%r24,%r28 ;offset 0x978 -$0006003A - SUB %r4,%r31,%r24 ;offset 0x97c - SHRPD %r24,%r28,32,%r4 ;offset 0x980 - DEPD,Z %r29,31,32,%r9 ;offset 0x984 - DEPD,Z %r28,31,32,%r5 ;offset 0x988 -$0006001C - EXTRD,U %r4,31,32,%r31 ;offset 0x98c - CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990 - MOVB,TR %r6,%r29,$D1 ;offset 0x994 - STD %r29,-152(%r30) ;offset 0x998 -$0006000C - EXTRD,U %r3,31,32,%r25 ;offset 0x99c - COPY %r3,%r26 ;offset 0x9a0 - EXTRD,U %r3,31,32,%r9 ;offset 0x9a4 - EXTRD,U %r4,31,32,%r8 ;offset 0x9a8 - .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28; - B,L BN_num_bits_word,%r2 ;offset 0x9ac - EXTRD,U %r5,31,32,%r7 ;offset 0x9b0 - LDI 64,%r20 ;offset 0x9b4 - DEPD %r7,31,32,%r5 ;offset 0x9b8 - DEPD %r8,31,32,%r4 ;offset 0x9bc - DEPD %r9,31,32,%r3 ;offset 0x9c0 - CMPB,= %r28,%r20,$00060012 ;offset 0x9c4 - COPY %r28,%r24 ;offset 0x9c8 - MTSARCM %r24 ;offset 0x9cc - DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0 - CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4 -$00060012 - SUBI 64,%r24,%r31 ;offset 0x9d8 - CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc - SUB %r4,%r3,%r4 ;offset 0x9e0 -$00060016 - CMPB,= %r31,%r0,$0006001A ;offset 0x9e4 - COPY %r0,%r9 ;offset 0x9e8 - MTSARCM %r31 ;offset 0x9ec - DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0 - SUBI 64,%r31,%r26 ;offset 0x9f4 - MTSAR %r26 ;offset 0x9f8 - SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc - MTSARCM %r31 ;offset 0xa00 - DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04 -$0006001A - DEPDI,Z -1,31,32,%r19 ;offset 0xa08 - AND %r3,%r19,%r29 ;offset 0xa0c - EXTRD,U %r29,31,32,%r2 ;offset 0xa10 - DEPDI,Z -1,63,32,%r6 ;offset 0xa14 - MOVIB,TR 2,%r8,$0006001C ;offset 0xa18 - EXTRD,U %r3,63,32,%r7 ;offset 0xa1c -$D2 - ;--- not PIC ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 - ;--- not PIC LDIL LR'C$7,%r21 ;offset 0xa24 - ;--- not PIC LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 - ;--- not PIC .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; - ;--- not PIC B,L fprintf,%r2 ;offset 0xa2c - ;--- not PIC LDO RR'C$7(%r21),%r25 ;offset 0xa30 - .CALL ; - B,L abort,%r2 ;offset 0xa34 - NOP ;offset 0xa38 - B $D3 ;offset 0xa3c - LDW -212(%r30),%r2 ;offset 0xa40 -$00060020 - COPY %r4,%r26 ;offset 0xa44 - EXTRD,U %r4,31,32,%r25 ;offset 0xa48 - COPY %r2,%r24 ;offset 0xa4c - .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) - B,L $$div2U,%r31 ;offset 0xa50 - EXTRD,U %r2,31,32,%r23 ;offset 0xa54 - DEPD %r28,31,32,%r29 ;offset 0xa58 -$00060022 - STD %r29,-152(%r30) ;offset 0xa5c -$D1 - AND %r5,%r19,%r24 ;offset 0xa60 - EXTRD,U %r24,31,32,%r24 ;offset 0xa64 - STW %r2,-160(%r30) ;offset 0xa68 - STW %r7,-128(%r30) ;offset 0xa6c - FLDD -152(%r30),%fr4 ;offset 0xa70 - FLDD -152(%r30),%fr7 ;offset 0xa74 - FLDW -160(%r30),%fr8L ;offset 0xa78 - FLDW -128(%r30),%fr5L ;offset 0xa7c - XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80 - FSTD %fr10,-136(%r30) ;offset 0xa84 - XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88 - FSTD %fr22,-144(%r30) ;offset 0xa8c - XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90 - XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94 - FSTD %fr11,-112(%r30) ;offset 0xa98 - FSTD %fr23,-120(%r30) ;offset 0xa9c - LDD -136(%r30),%r28 ;offset 0xaa0 - DEPD,Z %r28,31,32,%r31 ;offset 0xaa4 - LDD -144(%r30),%r20 ;offset 0xaa8 - ADD,L %r20,%r31,%r31 ;offset 0xaac - LDD -112(%r30),%r22 ;offset 0xab0 - DEPD,Z %r22,31,32,%r22 ;offset 0xab4 - LDD -120(%r30),%r21 ;offset 0xab8 - B $00060024 ;offset 0xabc - ADD,L %r21,%r22,%r23 ;offset 0xac0 -$D0 - OR %r9,%r29,%r29 ;offset 0xac4 -$00060040 - EXTRD,U %r29,31,32,%r28 ;offset 0xac8 -$00060002 -$L2 - LDW -212(%r30),%r2 ;offset 0xacc -$D3 - LDW -168(%r30),%r9 ;offset 0xad0 - LDD -176(%r30),%r8 ;offset 0xad4 - EXTRD,U %r8,31,32,%r7 ;offset 0xad8 - LDD -184(%r30),%r6 ;offset 0xadc - EXTRD,U %r6,31,32,%r5 ;offset 0xae0 - LDW -188(%r30),%r4 ;offset 0xae4 - BVE (%r2) ;offset 0xae8 - .EXIT - LDW,MB -192(%r30),%r3 ;offset 0xaec - .PROCEND ;in=23,25;out=28,29;fpin=105,107; - - - - -;---------------------------------------------------------------------------- -; -; Registers to hold 64-bit values to manipulate. The "L" part -; of the register corresponds to the upper 32-bits, while the "R" -; part corresponds to the lower 32-bits -; -; Note, that when using b6 and b7, the code must save these before -; using them because they are callee save registers -; -; -; Floating point registers to use to save values that -; are manipulated. These don't collide with ftemp1-6 and -; are all caller save registers -; -a0 .reg %fr22 -a0L .reg %fr22L -a0R .reg %fr22R - -a1 .reg %fr23 -a1L .reg %fr23L -a1R .reg %fr23R - -a2 .reg %fr24 -a2L .reg %fr24L -a2R .reg %fr24R - -a3 .reg %fr25 -a3L .reg %fr25L -a3R .reg %fr25R - -a4 .reg %fr26 -a4L .reg %fr26L -a4R .reg %fr26R - -a5 .reg %fr27 -a5L .reg %fr27L -a5R .reg %fr27R - -a6 .reg %fr28 -a6L .reg %fr28L -a6R .reg %fr28R - -a7 .reg %fr29 -a7L .reg %fr29L -a7R .reg %fr29R - -b0 .reg %fr30 -b0L .reg %fr30L -b0R .reg %fr30R - -b1 .reg %fr31 -b1L .reg %fr31L -b1R .reg %fr31R - -; -; Temporary floating point variables, these are all caller save -; registers -; -ftemp1 .reg %fr4 -ftemp2 .reg %fr5 -ftemp3 .reg %fr6 -ftemp4 .reg %fr7 - -; -; The B set of registers when used. -; - -b2 .reg %fr8 -b2L .reg %fr8L -b2R .reg %fr8R - -b3 .reg %fr9 -b3L .reg %fr9L -b3R .reg %fr9R - -b4 .reg %fr10 -b4L .reg %fr10L -b4R .reg %fr10R - -b5 .reg %fr11 -b5L .reg %fr11L -b5R .reg %fr11R - -b6 .reg %fr12 -b6L .reg %fr12L -b6R .reg %fr12R - -b7 .reg %fr13 -b7L .reg %fr13L -b7R .reg %fr13R - -c1 .reg %r21 ; only reg -temp1 .reg %r20 ; only reg -temp2 .reg %r19 ; only reg -temp3 .reg %r31 ; only reg - -m1 .reg %r28 -c2 .reg %r23 -high_one .reg %r1 -ht .reg %r6 -lt .reg %r5 -m .reg %r4 -c3 .reg %r3 - -SQR_ADD_C .macro A0L,A0R,C1,C2,C3 - XMPYU A0L,A0R,ftemp1 ; m - FSTD ftemp1,-24(%sp) ; store m - - XMPYU A0R,A0R,ftemp2 ; lt - FSTD ftemp2,-16(%sp) ; store lt - - XMPYU A0L,A0L,ftemp3 ; ht - FSTD ftemp3,-8(%sp) ; store ht - - LDD -24(%sp),m ; load m - AND m,high_mask,temp2 ; m & Mask - DEPD,Z m,30,31,temp3 ; m << 32+1 - LDD -16(%sp),lt ; lt - - LDD -8(%sp),ht ; ht - EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 - ADD temp3,lt,lt ; lt = lt+m - ADD,L ht,temp1,ht ; ht += temp1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; ht++ - - ADD C2,ht,C2 ; c2=c2+ht - ADD,DC C3,%r0,C3 ; c3++ -.endm - -SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 - XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,A1L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,A1R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,A1L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD ht,ht,ht ; ht=ht+ht; - ADD,DC C3,%r0,C3 ; add in carry (c3++) - - ADD lt,lt,lt ; lt=lt+lt; - ADD,DC ht,%r0,ht ; add in carry (ht++) - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) - LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - -; -;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba8 - .PROC - .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .ENTRY - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 - SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 - STD c2,56(r_ptr) ; r[7] = c2; - COPY %r0,c2 - - SQR_ADD_C a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 - SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 - STD c3,64(r_ptr) ; r[8] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 - STD c1,72(r_ptr) ; r[9] = c1; - COPY %r0,c1 - - SQR_ADD_C a5L,a5R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 - SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 - STD c2,80(r_ptr) ; r[10] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 - STD c3,88(r_ptr) ; r[11] = c3; - COPY %r0,c3 - - SQR_ADD_C a6L,a6R,c1,c2,c3 - SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 - STD c1,96(r_ptr) ; r[12] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 - STD c2,104(r_ptr) ; r[13] = c2; - COPY %r0,c2 - - SQR_ADD_C a7L,a7R,c3,c1,c2 - STD c3, 112(r_ptr) ; r[14] = c3 - STD c1, 120(r_ptr) ; r[15] = c1 - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - STD c2,56(r_ptr) ; r[7] = c2; - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - -;--------------------------------------------------------------------------- - -MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 - XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,B0L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,B0R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,B0L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - - -; -;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba8 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - FLDD 32(b_ptr),b4 - FLDD 40(b_ptr),b5 - FLDD 48(b_ptr),b6 - FLDD 56(b_ptr),b7 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 - STD c1,48(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 - STD c2,56(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 - STD c3,64(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 - STD c1,72(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 - STD c2,80(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 - STD c3,88(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 - STD c1,96(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 - STD c2,104(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 - STD c3,112(r_ptr) - STD c1,120(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - STD c1,48(r_ptr) - STD c2,56(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - -;--- not PIC .SPACE $TEXT$ -;--- not PIC .SUBSPA $CODE$ -;--- not PIC .SPACE $PRIVATE$,SORT=16 -;--- not PIC .IMPORT $global$,DATA -;--- not PIC .SPACE $TEXT$ -;--- not PIC .SUBSPA $CODE$ -;--- not PIC .SUBSPA $LIT$,ACCESS=0x2c -;--- not PIC C$7 -;--- not PIC .ALIGN 8 -;--- not PIC .STRINGZ "Division would overflow (%d)\n" - .END diff --git a/drivers/builtin_openssl2/crypto/bn/asm/pa-risc2W.s b/drivers/builtin_openssl2/crypto/bn/asm/pa-risc2W.s deleted file mode 100644 index a99545754d..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/pa-risc2W.s +++ /dev/null @@ -1,1605 +0,0 @@ -; -; PA-RISC 64-bit implementation of bn_asm code -; -; This code is approximately 2x faster than the C version -; for RSA/DSA. -; -; See http://devresource.hp.com/ for more details on the PA-RISC -; architecture. Also see the book "PA-RISC 2.0 Architecture" -; by Gerry Kane for information on the instruction set architecture. -; -; Code written by Chris Ruemmler (with some help from the HP C -; compiler). -; -; The code compiles with HP's assembler -; - - .level 2.0W - .space $TEXT$ - .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY - -; -; Global Register definitions used for the routines. -; -; Some information about HP's runtime architecture for 64-bits. -; -; "Caller save" means the calling function must save the register -; if it wants the register to be preserved. -; "Callee save" means if a function uses the register, it must save -; the value before using it. -; -; For the floating point registers -; -; "caller save" registers: fr4-fr11, fr22-fr31 -; "callee save" registers: fr12-fr21 -; "special" registers: fr0-fr3 (status and exception registers) -; -; For the integer registers -; value zero : r0 -; "caller save" registers: r1,r19-r26 -; "callee save" registers: r3-r18 -; return register : r2 (rp) -; return values ; r28 (ret0,ret1) -; Stack pointer ; r30 (sp) -; global data pointer ; r27 (dp) -; argument pointer ; r29 (ap) -; millicode return ptr ; r31 (also a caller save register) - - -; -; Arguments to the routines -; -r_ptr .reg %r26 -a_ptr .reg %r25 -b_ptr .reg %r24 -num .reg %r24 -w .reg %r23 -n .reg %r23 - - -; -; Globals used in some routines -; - -top_overflow .reg %r29 -high_mask .reg %r22 ; value 0xffffffff80000000L - - -;------------------------------------------------------------------------------ -; -; bn_mul_add_words -; -;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, -; int num, BN_ULONG w) -; -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = num -; arg3 = w -; -; Local register definitions -; - -fm1 .reg %fr22 -fm .reg %fr23 -ht_temp .reg %fr24 -ht_temp_1 .reg %fr25 -lt_temp .reg %fr26 -lt_temp_1 .reg %fr27 -fm1_1 .reg %fr28 -fm_1 .reg %fr29 - -fw_h .reg %fr7L -fw_l .reg %fr7R -fw .reg %fr7 - -fht_0 .reg %fr8L -flt_0 .reg %fr8R -t_float_0 .reg %fr8 - -fht_1 .reg %fr9L -flt_1 .reg %fr9R -t_float_1 .reg %fr9 - -tmp_0 .reg %r31 -tmp_1 .reg %r21 -m_0 .reg %r20 -m_1 .reg %r19 -ht_0 .reg %r1 -ht_1 .reg %r3 -lt_0 .reg %r4 -lt_1 .reg %r5 -m1_0 .reg %r6 -m1_1 .reg %r7 -rp_val .reg %r8 -rp_val_1 .reg %r9 - -bn_mul_add_words - .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN - .proc - .callinfo frame=128 - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP ; Needed to make the loop 16-byte aligned - NOP ; Needed to make the loop 16-byte aligned - - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - STD %r7,32(%sp) ; save r7 - STD %r8,40(%sp) ; save r8 - - STD %r9,48(%sp) ; save r9 - COPY %r0,%ret0 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - STD w,56(%sp) ; store w on stack - - CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit - LDO 128(%sp),%sp ; bump stack - - ; - ; The loop is unrolled twice, so if there is only 1 number - ; then go straight to the cleanup code. - ; - CMPIB,= 1,num,bn_mul_add_words_single_top - FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_add_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDD 8(r_ptr),rp_val_1 ; rp[1] - - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1[0] - FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] - - XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h - XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m[0] - FSTD fm_1,-40(%sp) ; -40(sp) = m[1] - - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 - - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 - - LDD -8(%sp),m_0 ; m[0] - LDD -40(%sp),m_1 ; m[1] - LDD -16(%sp),m1_0 ; m1[0] - LDD -48(%sp),m1_1 ; m1[1] - - LDD -24(%sp),ht_0 ; ht[0] - LDD -56(%sp),ht_1 ; ht[1] - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; - - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) - ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) - - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) - ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) - EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 - - EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 - ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) - ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) - - ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - - ADD %ret0,lt_0,lt_0 ; lt[0] = lt[0] + c; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - - LDO -2(num),num ; num = num - 2; - ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - - ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] - ADD,DC ht_1,%r0,%ret0 ; ht[1]++ - LDO 16(a_ptr),a_ptr ; a_ptr += 2 - - STD lt_1,8(r_ptr) ; rp[1] = lt[1] - CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do - LDO 16(r_ptr),r_ptr ; r_ptr += 2 - - CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_add_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDO 8(a_ptr),a_ptr ; a_ptr++ - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 ; m1 = temp1 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD %ret0,tmp_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] - ADD,DC ht_0,%r0,%ret0 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_add_words_exit - .EXIT - LDD -80(%sp),%r9 ; restore r9 - LDD -88(%sp),%r8 ; restore r8 - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; arg3 = w - -bn_mul_words - .proc - .callinfo frame=128 - .entry - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - STD %r7,32(%sp) ; save r7 - COPY %r0,%ret0 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - STD w,56(%sp) ; w on stack - - CMPIB,>= 0,num,bn_mul_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; See if only 1 word to do, thus just do cleanup - ; - CMPIB,= 1,num,bn_mul_words_single_top - FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l - - FSTD fm1,-16(%sp) ; -16(sp) = m1 - FSTD fm1_1,-48(%sp) ; -48(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h - - FSTD fm,-8(%sp) ; -8(sp) = m - FSTD fm_1,-40(%sp) ; -40(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h - - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt - LDD -8(%sp),m_0 - LDD -40(%sp),m_1 - - LDD -16(%sp),m1_0 - LDD -48(%sp),m1_1 - LDD -24(%sp),ht_0 - LDD -56(%sp),ht_1 - - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) - ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - EXTRD,U tmp_1,31,32,m_1 ; m>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD lt_1,m1_1,lt_1 ; lt = lt+m1; - ADD,DC ht_1,%r0,ht_1 ; ht++ - ADD %ret0,lt_0,lt_0 ; lt = lt + c (ret0); - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) - ADD,DC ht_1,%r0,ht_1 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - STD lt_1,8(r_ptr) ; rp[1] = lt - - COPY ht_1,%ret0 ; carry = ht - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_mul_words_unroll2 - LDO 16(r_ptr),r_ptr ; rp++ - - CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt= lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD %ret0,lt_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - COPY ht_0,%ret0 ; copy carry - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_words_exit - .EXIT - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; - -bn_sqr_words - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP - STD %r5,16(%sp) ; save r5 - - CMPIB,>= 0,num,bn_sqr_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; If only 1, the goto straight to cleanup - ; - CMPIB,= 1,num,bn_sqr_words_single_top - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - -bn_sqr_words_unroll2 - FLDD 0(a_ptr),t_float_0 ; a[0] - FLDD 8(a_ptr),t_float_1 ; a[1] - XMPYU fht_0,flt_0,fm ; m[0] - XMPYU fht_1,flt_1,fm_1 ; m[1] - - FSTD fm,-24(%sp) ; store m[0] - FSTD fm_1,-56(%sp) ; store m[1] - XMPYU flt_0,flt_0,lt_temp ; lt[0] - XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] - - FSTD lt_temp,-16(%sp) ; store lt[0] - FSTD lt_temp_1,-48(%sp) ; store lt[1] - XMPYU fht_0,fht_0,ht_temp ; ht[0] - XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] - - FSTD ht_temp,-8(%sp) ; store ht[0] - FSTD ht_temp_1,-40(%sp) ; store ht[1] - LDD -24(%sp),m_0 - LDD -56(%sp),m_1 - - AND m_0,high_mask,tmp_0 ; m[0] & Mask - AND m_1,high_mask,tmp_1 ; m[1] & Mask - DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 - DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 - - LDD -16(%sp),lt_0 - LDD -48(%sp),lt_1 - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 - EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 - - LDD -8(%sp),ht_0 - LDD -40(%sp),ht_1 - ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 - ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 - - ADD lt_0,m_0,lt_0 ; lt = lt+m - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - STD ht_0,8(r_ptr) ; rp[1] = ht[1] - - ADD lt_1,m_1,lt_1 ; lt = lt+m - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_1,16(r_ptr) ; rp[2] = lt[1] - STD ht_1,24(r_ptr) ; rp[3] = ht[1] - - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_sqr_words_unroll2 - LDO 32(r_ptr),r_ptr ; rp += 4 - - CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_sqr_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,flt_0,fm ; m - FSTD fm,-24(%sp) ; store m - - XMPYU flt_0,flt_0,lt_temp ; lt - FSTD lt_temp,-16(%sp) ; store lt - - XMPYU fht_0,fht_0,ht_temp ; ht - FSTD ht_temp,-8(%sp) ; store ht - - LDD -24(%sp),m_0 ; load m - AND m_0,high_mask,tmp_0 ; m & Mask - DEPD,Z m_0,30,31,m_0 ; m << 32+1 - LDD -16(%sp),lt_0 ; lt - - LDD -8(%sp),ht_0 ; ht - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 - ADD m_0,lt_0,lt_0 ; lt = lt+m - ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 - ADD,DC ht_0,%r0,ht_0 ; ht++ - - STD lt_0,0(r_ptr) ; rp[0] = lt - STD ht_0,8(r_ptr) ; rp[1] = ht - -bn_sqr_words_exit - .EXIT - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - .PROCEND ;in=23,24,25,26,29;out=28; - - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t .reg %r22 -b .reg %r21 -l .reg %r20 - -bn_add_words - .proc - .entry - .callinfo - .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - CMPIB,>= 0,n,bn_add_words_exit - COPY %r0,%ret0 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_add_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_add_words_unroll2 - LDD 0(a_ptr),t - LDD 0(b_ptr),b - ADD t,%ret0,t ; t = t+c; - ADD,DC %r0,%r0,%ret0 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret0,%r0,%ret0 ; c+= carry - STD l,0(r_ptr) - - LDD 8(a_ptr),t - LDD 8(b_ptr),b - ADD t,%ret0,t ; t = t+c; - ADD,DC %r0,%r0,%ret0 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret0,%r0,%ret0 ; c+= carry - STD l,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_add_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_add_words_exit ; are we done? - -bn_add_words_single_top - LDD 0(a_ptr),t - LDD 0(b_ptr),b - - ADD t,%ret0,t ; t = t+c; - ADD,DC %r0,%r0,%ret0 ; set c to carry (could use CMPCLR??) - ADD t,b,l ; l = t + b[0] - ADD,DC %ret0,%r0,%ret0 ; c+= carry - STD l,0(r_ptr) - -bn_add_words_exit - .EXIT - BVE (%rp) - NOP - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t1 .reg %r22 -t2 .reg %r21 -sub_tmp1 .reg %r20 -sub_tmp2 .reg %r19 - - -bn_sub_words - .proc - .callinfo - .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - CMPIB,>= 0,n,bn_sub_words_exit - COPY %r0,%ret0 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_sub_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_sub_words_unroll2 - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; - - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret0 - STD sub_tmp1,0(r_ptr) - - LDD 8(a_ptr),t1 - LDD 8(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret0 - STD sub_tmp1,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_sub_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? - -bn_sub_words_single_top - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret0 - - STD sub_tmp1,0(r_ptr) - -bn_sub_words_exit - .EXIT - BVE (%rp) - NOP - .PROCEND ;in=23,24,25,26,29;out=28; - -;------------------------------------------------------------------------------ -; -; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) -; -; arg0 = h -; arg1 = l -; arg2 = d -; -; This is mainly just modified assembly from the compiler, thus the -; lack of variable names. -; -;------------------------------------------------------------------------------ -bn_div_words - .proc - .callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .IMPORT BN_num_bits_word,CODE,NO_RELOCATION - .IMPORT __iob,DATA - .IMPORT fprintf,CODE,NO_RELOCATION - .IMPORT abort,CODE,NO_RELOCATION - .IMPORT $$div2U,MILLICODE - .entry - STD %r2,-16(%r30) - STD,MA %r3,352(%r30) - STD %r4,-344(%r30) - STD %r5,-336(%r30) - STD %r6,-328(%r30) - STD %r7,-320(%r30) - STD %r8,-312(%r30) - STD %r9,-304(%r30) - STD %r10,-296(%r30) - - STD %r27,-288(%r30) ; save gp - - COPY %r24,%r3 ; save d - COPY %r26,%r4 ; save h (high 64-bits) - LDO -1(%r0),%ret0 ; return -1 by default - - CMPB,*= %r0,%arg2,$D3 ; if (d == 0) - COPY %r25,%r5 ; save l (low 64-bits) - - LDO -48(%r30),%r29 ; create ap - .CALL ;in=26,29;out=28; - B,L BN_num_bits_word,%r2 - COPY %r3,%r26 - LDD -288(%r30),%r27 ; restore gp - LDI 64,%r21 - - CMPB,= %r21,%ret0,$00000012 ;if (i == 64) (forward) - COPY %ret0,%r24 ; i - MTSARCM %r24 - DEPDI,Z -1,%sar,1,%r29 - CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<= d) - SUB %r4,%r3,%r4 ; h -= d - CMPB,= %r31,%r0,$0000001A ; if (i) - COPY %r0,%r10 ; ret = 0 - MTSARCM %r31 ; i to shift - DEPD,Z %r3,%sar,64,%r3 ; d <<= i; - SUBI 64,%r31,%r19 ; 64 - i; redundent - MTSAR %r19 ; (64 -i) to shift - SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i) - MTSARCM %r31 ; i to shift - DEPD,Z %r5,%sar,64,%r5 ; l <<= i; - -$0000001A - DEPDI,Z -1,31,32,%r19 - EXTRD,U %r3,31,32,%r6 ; dh=(d&0xfff)>>32 - EXTRD,U %r3,63,32,%r8 ; dl = d&0xffffff - LDO 2(%r0),%r9 - STD %r3,-280(%r30) ; "d" to stack - -$0000001C - DEPDI,Z -1,63,32,%r29 ; - EXTRD,U %r4,31,32,%r31 ; h >> 32 - CMPB,*=,N %r31,%r6,$D2 ; if ((h>>32) != dh)(forward) div - COPY %r4,%r26 - EXTRD,U %r4,31,32,%r25 - COPY %r6,%r24 - .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) - B,L $$div2U,%r2 - EXTRD,U %r6,31,32,%r23 - DEPD %r28,31,32,%r29 -$D2 - STD %r29,-272(%r30) ; q - AND %r5,%r19,%r24 ; t & 0xffffffff00000000; - EXTRD,U %r24,31,32,%r24 ; ??? - FLDD -272(%r30),%fr7 ; q - FLDD -280(%r30),%fr8 ; d - XMPYU %fr8L,%fr7L,%fr10 - FSTD %fr10,-256(%r30) - XMPYU %fr8L,%fr7R,%fr22 - FSTD %fr22,-264(%r30) - XMPYU %fr8R,%fr7L,%fr11 - XMPYU %fr8R,%fr7R,%fr23 - FSTD %fr11,-232(%r30) - FSTD %fr23,-240(%r30) - LDD -256(%r30),%r28 - DEPD,Z %r28,31,32,%r2 - LDD -264(%r30),%r20 - ADD,L %r20,%r2,%r31 - LDD -232(%r30),%r22 - DEPD,Z %r22,31,32,%r22 - LDD -240(%r30),%r21 - B $00000024 ; enter loop - ADD,L %r21,%r22,%r23 - -$0000002A - LDO -1(%r29),%r29 - SUB %r23,%r8,%r23 -$00000024 - SUB %r4,%r31,%r25 - AND %r25,%r19,%r26 - CMPB,*<>,N %r0,%r26,$00000046 ; (forward) - DEPD,Z %r25,31,32,%r20 - OR %r20,%r24,%r21 - CMPB,*<<,N %r21,%r23,$0000002A ;(backward) - SUB %r31,%r6,%r31 -;-------------Break path--------------------- - -$00000046 - DEPD,Z %r23,31,32,%r25 ;tl - EXTRD,U %r23,31,32,%r26 ;t - AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L - ADD,L %r31,%r26,%r31 ;th += t; - CMPCLR,*>>= %r5,%r24,%r0 ;if (l>32)); - DEPD,Z %r29,31,32,%r10 ; ret = q<<32 - b $0000001C - DEPD,Z %r28,31,32,%r5 ; l = l << 32 - -$D1 - OR %r10,%r29,%r28 ; ret |= q -$D3 - LDD -368(%r30),%r2 -$D0 - LDD -296(%r30),%r10 - LDD -304(%r30),%r9 - LDD -312(%r30),%r8 - LDD -320(%r30),%r7 - LDD -328(%r30),%r6 - LDD -336(%r30),%r5 - LDD -344(%r30),%r4 - BVE (%r2) - .EXIT - LDD,MB -352(%r30),%r3 - -bn_div_err_case - MFIA %r6 - ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1 - LDO R'bn_div_words-bn_div_err_case(%r1),%r6 - ADDIL LT'__iob,%r27,%r1 - LDD RT'__iob(%r1),%r26 - ADDIL L'C$4-bn_div_words,%r6,%r1 - LDO R'C$4-bn_div_words(%r1),%r25 - LDO 64(%r26),%r26 - .CALL ;in=24,25,26,29;out=28; - B,L fprintf,%r2 - LDO -48(%r30),%r29 - LDD -288(%r30),%r27 - .CALL ;in=29; - B,L abort,%r2 - LDO -48(%r30),%r29 - LDD -288(%r30),%r27 - B $D0 - LDD -368(%r30),%r2 - .PROCEND ;in=24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -; Registers to hold 64-bit values to manipulate. The "L" part -; of the register corresponds to the upper 32-bits, while the "R" -; part corresponds to the lower 32-bits -; -; Note, that when using b6 and b7, the code must save these before -; using them because they are callee save registers -; -; -; Floating point registers to use to save values that -; are manipulated. These don't collide with ftemp1-6 and -; are all caller save registers -; -a0 .reg %fr22 -a0L .reg %fr22L -a0R .reg %fr22R - -a1 .reg %fr23 -a1L .reg %fr23L -a1R .reg %fr23R - -a2 .reg %fr24 -a2L .reg %fr24L -a2R .reg %fr24R - -a3 .reg %fr25 -a3L .reg %fr25L -a3R .reg %fr25R - -a4 .reg %fr26 -a4L .reg %fr26L -a4R .reg %fr26R - -a5 .reg %fr27 -a5L .reg %fr27L -a5R .reg %fr27R - -a6 .reg %fr28 -a6L .reg %fr28L -a6R .reg %fr28R - -a7 .reg %fr29 -a7L .reg %fr29L -a7R .reg %fr29R - -b0 .reg %fr30 -b0L .reg %fr30L -b0R .reg %fr30R - -b1 .reg %fr31 -b1L .reg %fr31L -b1R .reg %fr31R - -; -; Temporary floating point variables, these are all caller save -; registers -; -ftemp1 .reg %fr4 -ftemp2 .reg %fr5 -ftemp3 .reg %fr6 -ftemp4 .reg %fr7 - -; -; The B set of registers when used. -; - -b2 .reg %fr8 -b2L .reg %fr8L -b2R .reg %fr8R - -b3 .reg %fr9 -b3L .reg %fr9L -b3R .reg %fr9R - -b4 .reg %fr10 -b4L .reg %fr10L -b4R .reg %fr10R - -b5 .reg %fr11 -b5L .reg %fr11L -b5R .reg %fr11R - -b6 .reg %fr12 -b6L .reg %fr12L -b6R .reg %fr12R - -b7 .reg %fr13 -b7L .reg %fr13L -b7R .reg %fr13R - -c1 .reg %r21 ; only reg -temp1 .reg %r20 ; only reg -temp2 .reg %r19 ; only reg -temp3 .reg %r31 ; only reg - -m1 .reg %r28 -c2 .reg %r23 -high_one .reg %r1 -ht .reg %r6 -lt .reg %r5 -m .reg %r4 -c3 .reg %r3 - -SQR_ADD_C .macro A0L,A0R,C1,C2,C3 - XMPYU A0L,A0R,ftemp1 ; m - FSTD ftemp1,-24(%sp) ; store m - - XMPYU A0R,A0R,ftemp2 ; lt - FSTD ftemp2,-16(%sp) ; store lt - - XMPYU A0L,A0L,ftemp3 ; ht - FSTD ftemp3,-8(%sp) ; store ht - - LDD -24(%sp),m ; load m - AND m,high_mask,temp2 ; m & Mask - DEPD,Z m,30,31,temp3 ; m << 32+1 - LDD -16(%sp),lt ; lt - - LDD -8(%sp),ht ; ht - EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 - ADD temp3,lt,lt ; lt = lt+m - ADD,L ht,temp1,ht ; ht += temp1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; ht++ - - ADD C2,ht,C2 ; c2=c2+ht - ADD,DC C3,%r0,C3 ; c3++ -.endm - -SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 - XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,A1L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,A1R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,A1L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD ht,ht,ht ; ht=ht+ht; - ADD,DC C3,%r0,C3 ; add in carry (c3++) - - ADD lt,lt,lt ; lt=lt+lt; - ADD,DC ht,%r0,ht ; add in carry (ht++) - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) - LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - -; -;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba8 - .PROC - .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .ENTRY - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 - SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 - STD c2,56(r_ptr) ; r[7] = c2; - COPY %r0,c2 - - SQR_ADD_C a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 - SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 - STD c3,64(r_ptr) ; r[8] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 - STD c1,72(r_ptr) ; r[9] = c1; - COPY %r0,c1 - - SQR_ADD_C a5L,a5R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 - SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 - STD c2,80(r_ptr) ; r[10] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 - STD c3,88(r_ptr) ; r[11] = c3; - COPY %r0,c3 - - SQR_ADD_C a6L,a6R,c1,c2,c3 - SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 - STD c1,96(r_ptr) ; r[12] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 - STD c2,104(r_ptr) ; r[13] = c2; - COPY %r0,c2 - - SQR_ADD_C a7L,a7R,c3,c1,c2 - STD c3, 112(r_ptr) ; r[14] = c3 - STD c1, 120(r_ptr) ; r[15] = c1 - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - STD c2,56(r_ptr) ; r[7] = c2; - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - -;--------------------------------------------------------------------------- - -MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 - XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,B0L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,B0R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,B0L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - - -; -;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba8 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - FLDD 32(b_ptr),b4 - FLDD 40(b_ptr),b5 - FLDD 48(b_ptr),b6 - FLDD 56(b_ptr),b7 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 - STD c1,48(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 - STD c2,56(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 - STD c3,64(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 - STD c1,72(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 - STD c2,80(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 - STD c3,88(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 - STD c1,96(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 - STD c2,104(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 - STD c3,112(r_ptr) - STD c1,120(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - STD c1,48(r_ptr) - STD c2,56(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - - .SPACE $TEXT$ - .SUBSPA $CODE$ - .SPACE $PRIVATE$,SORT=16 - .IMPORT $global$,DATA - .SPACE $TEXT$ - .SUBSPA $CODE$ - .SUBSPA $LIT$,ACCESS=0x2c -C$4 - .ALIGN 8 - .STRINGZ "Division would overflow (%d)\n" - .END diff --git a/drivers/builtin_openssl2/crypto/bn/asm/parisc-mont.pl b/drivers/builtin_openssl2/crypto/bn/asm/parisc-mont.pl deleted file mode 100644 index c02ef6f014..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/parisc-mont.pl +++ /dev/null @@ -1,995 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# On PA-7100LC this module performs ~90-50% better, less for longer -# keys, than code generated by gcc 3.2 for PA-RISC 1.1. Latter means -# that compiler utilized xmpyu instruction to perform 32x32=64-bit -# multiplication, which in turn means that "baseline" performance was -# optimal in respect to instruction set capabilities. Fair comparison -# with vendor compiler is problematic, because OpenSSL doesn't define -# BN_LLONG [presumably] for historical reasons, which drives compiler -# toward 4 times 16x16=32-bit multiplicatons [plus complementary -# shifts and additions] instead. This means that you should observe -# several times improvement over code generated by vendor compiler -# for PA-RISC 1.1, but the "baseline" is far from optimal. The actual -# improvement coefficient was never collected on PA-7100LC, or any -# other 1.1 CPU, because I don't have access to such machine with -# vendor compiler. But to give you a taste, PA-RISC 1.1 code path -# reportedly outperformed code generated by cc +DA1.1 +O3 by factor -# of ~5x on PA-8600. -# -# On PA-RISC 2.0 it has to compete with pa-risc2[W].s, which is -# reportedly ~2x faster than vendor compiler generated code [according -# to comment in pa-risc2[W].s]. Here comes a catch. Execution core of -# this implementation is actually 32-bit one, in the sense that it -# operates on 32-bit values. But pa-risc2[W].s operates on arrays of -# 64-bit BN_LONGs... How do they interoperate then? No problem. This -# module picks halves of 64-bit values in reverse order and pretends -# they were 32-bit BN_LONGs. But can 32-bit core compete with "pure" -# 64-bit code such as pa-risc2[W].s then? Well, the thing is that -# 32x32=64-bit multiplication is the best even PA-RISC 2.0 can do, -# i.e. there is no "wider" multiplication like on most other 64-bit -# platforms. This means that even being effectively 32-bit, this -# implementation performs "64-bit" computational task in same amount -# of arithmetic operations, most notably multiplications. It requires -# more memory references, most notably to tp[num], but this doesn't -# seem to exhaust memory port capacity. And indeed, dedicated PA-RISC -# 2.0 code path provides virtually same performance as pa-risc2[W].s: -# it's ~10% better for shortest key length and ~10% worse for longest -# one. -# -# In case it wasn't clear. The module has two distinct code paths: -# PA-RISC 1.1 and PA-RISC 2.0 ones. Latter features carry-free 64-bit -# additions and 64-bit integer loads, not to mention specific -# instruction scheduling. In 64-bit build naturally only 2.0 code path -# is assembled. In 32-bit application context both code paths are -# assembled, PA-RISC 2.0 CPU is detected at run-time and proper path -# is taken automatically. Also, in 32-bit build the module imposes -# couple of limitations: vector lengths has to be even and vector -# addresses has to be 64-bit aligned. Normally neither is a problem: -# most common key lengths are even and vectors are commonly malloc-ed, -# which ensures alignment. -# -# Special thanks to polarhome.com for providing HP-UX account on -# PA-RISC 1.1 machine, and to correspondent who chose to remain -# anonymous for testing the code on PA-RISC 2.0 machine. - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; - -$flavour = shift; -$output = shift; - -open STDOUT,">$output"; - -if ($flavour =~ /64/) { - $LEVEL ="2.0W"; - $SIZE_T =8; - $FRAME_MARKER =80; - $SAVED_RP =16; - $PUSH ="std"; - $PUSHMA ="std,ma"; - $POP ="ldd"; - $POPMB ="ldd,mb"; - $BN_SZ =$SIZE_T; -} else { - $LEVEL ="1.1"; #$LEVEL.="\n\t.ALLOW\t2.0"; - $SIZE_T =4; - $FRAME_MARKER =48; - $SAVED_RP =20; - $PUSH ="stw"; - $PUSHMA ="stwm"; - $POP ="ldw"; - $POPMB ="ldwm"; - $BN_SZ =$SIZE_T; - if (open CONF,"<${dir}../../opensslconf.h") { - while() { - if (m/#\s*define\s+SIXTY_FOUR_BIT/) { - $BN_SZ=8; - $LEVEL="2.0"; - last; - } - } - close CONF; - } -} - -$FRAME=8*$SIZE_T+$FRAME_MARKER; # 8 saved regs + frame marker - # [+ argument transfer] -$LOCALS=$FRAME-$FRAME_MARKER; -$FRAME+=32; # local variables - -$tp="%r31"; -$ti1="%r29"; -$ti0="%r28"; - -$rp="%r26"; -$ap="%r25"; -$bp="%r24"; -$np="%r23"; -$n0="%r22"; # passed through stack in 32-bit -$num="%r21"; # passed through stack in 32-bit -$idx="%r20"; -$arrsz="%r19"; - -$nm1="%r7"; -$nm0="%r6"; -$ab1="%r5"; -$ab0="%r4"; - -$fp="%r3"; -$hi1="%r2"; -$hi0="%r1"; - -$xfer=$n0; # accomodates [-16..15] offset in fld[dw]s - -$fm0="%fr4"; $fti=$fm0; -$fbi="%fr5L"; -$fn0="%fr5R"; -$fai="%fr6"; $fab0="%fr7"; $fab1="%fr8"; -$fni="%fr9"; $fnm0="%fr10"; $fnm1="%fr11"; - -$code=<<___; - .LEVEL $LEVEL - .SPACE \$TEXT\$ - .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY - - .EXPORT bn_mul_mont,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR - .ALIGN 64 -bn_mul_mont - .PROC - .CALLINFO FRAME=`$FRAME-8*$SIZE_T`,NO_CALLS,SAVE_RP,SAVE_SP,ENTRY_GR=6 - .ENTRY - $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue - $PUSHMA %r3,$FRAME(%sp) - $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) - $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) - $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) - $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) - $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) - $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) - $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) - ldo -$FRAME(%sp),$fp -___ -$code.=<<___ if ($SIZE_T==4); - ldw `-$FRAME_MARKER-4`($fp),$n0 - ldw `-$FRAME_MARKER-8`($fp),$num - nop - nop ; alignment -___ -$code.=<<___ if ($BN_SZ==4); - comiclr,<= 6,$num,%r0 ; are vectors long enough? - b L\$abort - ldi 0,%r28 ; signal "unhandled" - add,ev %r0,$num,$num ; is $num even? - b L\$abort - nop - or $ap,$np,$ti1 - extru,= $ti1,31,3,%r0 ; are ap and np 64-bit aligned? - b L\$abort - nop - nop ; alignment - nop - - fldws 0($n0),${fn0} - fldws,ma 4($bp),${fbi} ; bp[0] -___ -$code.=<<___ if ($BN_SZ==8); - comib,> 3,$num,L\$abort ; are vectors long enough? - ldi 0,%r28 ; signal "unhandled" - addl $num,$num,$num ; I operate on 32-bit values - - fldws 4($n0),${fn0} ; only low part of n0 - fldws 4($bp),${fbi} ; bp[0] in flipped word order -___ -$code.=<<___; - fldds 0($ap),${fai} ; ap[0,1] - fldds 0($np),${fni} ; np[0,1] - - sh2addl $num,%r0,$arrsz - ldi 31,$hi0 - ldo 36($arrsz),$hi1 ; space for tp[num+1] - andcm $hi1,$hi0,$hi1 ; align - addl $hi1,%sp,%sp - $PUSH $fp,-$SIZE_T(%sp) - - ldo `$LOCALS+16`($fp),$xfer - ldo `$LOCALS+32+4`($fp),$tp - - xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[0] - xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[0] - xmpyu ${fn0},${fab0}R,${fm0} - - addl $arrsz,$ap,$ap ; point at the end - addl $arrsz,$np,$np - subi 0,$arrsz,$idx ; j=0 - ldo 8($idx),$idx ; j++++ - - xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m - xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m - fstds ${fab0},-16($xfer) - fstds ${fnm0},-8($xfer) - fstds ${fab1},0($xfer) - fstds ${fnm1},8($xfer) - flddx $idx($ap),${fai} ; ap[2,3] - flddx $idx($np),${fni} ; np[2,3] -___ -$code.=<<___ if ($BN_SZ==4); - mtctl $hi0,%cr11 ; $hi0 still holds 31 - extrd,u,*= $hi0,%sar,1,$hi0 ; executes on PA-RISC 1.0 - b L\$parisc11 - nop -___ -$code.=<<___; # PA-RISC 2.0 code-path - xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0] - xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m - ldd -16($xfer),$ab0 - fstds ${fab0},-16($xfer) - - extrd,u $ab0,31,32,$hi0 - extrd,u $ab0,63,32,$ab0 - ldd -8($xfer),$nm0 - fstds ${fnm0},-8($xfer) - ldo 8($idx),$idx ; j++++ - addl $ab0,$nm0,$nm0 ; low part is discarded - extrd,u $nm0,31,32,$hi1 - -L\$1st - xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0] - xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m - ldd 0($xfer),$ab1 - fstds ${fab1},0($xfer) - addl $hi0,$ab1,$ab1 - extrd,u $ab1,31,32,$hi0 - ldd 8($xfer),$nm1 - fstds ${fnm1},8($xfer) - extrd,u $ab1,63,32,$ab1 - addl $hi1,$nm1,$nm1 - flddx $idx($ap),${fai} ; ap[j,j+1] - flddx $idx($np),${fni} ; np[j,j+1] - addl $ab1,$nm1,$nm1 - extrd,u $nm1,31,32,$hi1 - - xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0] - xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m - ldd -16($xfer),$ab0 - fstds ${fab0},-16($xfer) - addl $hi0,$ab0,$ab0 - extrd,u $ab0,31,32,$hi0 - ldd -8($xfer),$nm0 - fstds ${fnm0},-8($xfer) - extrd,u $ab0,63,32,$ab0 - addl $hi1,$nm0,$nm0 - stw $nm1,-4($tp) ; tp[j-1] - addl $ab0,$nm0,$nm0 - stw,ma $nm0,8($tp) ; tp[j-1] - addib,<> 8,$idx,L\$1st ; j++++ - extrd,u $nm0,31,32,$hi1 - - xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[0] - xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m - ldd 0($xfer),$ab1 - fstds ${fab1},0($xfer) - addl $hi0,$ab1,$ab1 - extrd,u $ab1,31,32,$hi0 - ldd 8($xfer),$nm1 - fstds ${fnm1},8($xfer) - extrd,u $ab1,63,32,$ab1 - addl $hi1,$nm1,$nm1 - ldd -16($xfer),$ab0 - addl $ab1,$nm1,$nm1 - ldd -8($xfer),$nm0 - extrd,u $nm1,31,32,$hi1 - - addl $hi0,$ab0,$ab0 - extrd,u $ab0,31,32,$hi0 - stw $nm1,-4($tp) ; tp[j-1] - extrd,u $ab0,63,32,$ab0 - addl $hi1,$nm0,$nm0 - ldd 0($xfer),$ab1 - addl $ab0,$nm0,$nm0 - ldd,mb 8($xfer),$nm1 - extrd,u $nm0,31,32,$hi1 - stw,ma $nm0,8($tp) ; tp[j-1] - - ldo -1($num),$num ; i-- - subi 0,$arrsz,$idx ; j=0 -___ -$code.=<<___ if ($BN_SZ==4); - fldws,ma 4($bp),${fbi} ; bp[1] -___ -$code.=<<___ if ($BN_SZ==8); - fldws 0($bp),${fbi} ; bp[1] in flipped word order -___ -$code.=<<___; - flddx $idx($ap),${fai} ; ap[0,1] - flddx $idx($np),${fni} ; np[0,1] - fldws 8($xfer),${fti}R ; tp[0] - addl $hi0,$ab1,$ab1 - extrd,u $ab1,31,32,$hi0 - extrd,u $ab1,63,32,$ab1 - ldo 8($idx),$idx ; j++++ - xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[1] - xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[1] - addl $hi1,$nm1,$nm1 - addl $ab1,$nm1,$nm1 - extrd,u $nm1,31,32,$hi1 - fstws,mb ${fab0}L,-8($xfer) ; save high part - stw $nm1,-4($tp) ; tp[j-1] - - fcpy,sgl %fr0,${fti}L ; zero high part - fcpy,sgl %fr0,${fab0}L - addl $hi1,$hi0,$hi0 - extrd,u $hi0,31,32,$hi1 - fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double - fcnvxf,dbl,dbl ${fab0},${fab0} - stw $hi0,0($tp) - stw $hi1,4($tp) - - fadd,dbl ${fti},${fab0},${fab0} ; add tp[0] - fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int - xmpyu ${fn0},${fab0}R,${fm0} - ldo `$LOCALS+32+4`($fp),$tp -L\$outer - xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m - xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m - fstds ${fab0},-16($xfer) ; 33-bit value - fstds ${fnm0},-8($xfer) - flddx $idx($ap),${fai} ; ap[2] - flddx $idx($np),${fni} ; np[2] - ldo 8($idx),$idx ; j++++ - ldd -16($xfer),$ab0 ; 33-bit value - ldd -8($xfer),$nm0 - ldw 0($xfer),$hi0 ; high part - - xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i] - xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m - extrd,u $ab0,31,32,$ti0 ; carry bit - extrd,u $ab0,63,32,$ab0 - fstds ${fab1},0($xfer) - addl $ti0,$hi0,$hi0 ; account carry bit - fstds ${fnm1},8($xfer) - addl $ab0,$nm0,$nm0 ; low part is discarded - ldw 0($tp),$ti1 ; tp[1] - extrd,u $nm0,31,32,$hi1 - fstds ${fab0},-16($xfer) - fstds ${fnm0},-8($xfer) - -L\$inner - xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i] - xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m - ldd 0($xfer),$ab1 - fstds ${fab1},0($xfer) - addl $hi0,$ti1,$ti1 - addl $ti1,$ab1,$ab1 - ldd 8($xfer),$nm1 - fstds ${fnm1},8($xfer) - extrd,u $ab1,31,32,$hi0 - extrd,u $ab1,63,32,$ab1 - flddx $idx($ap),${fai} ; ap[j,j+1] - flddx $idx($np),${fni} ; np[j,j+1] - addl $hi1,$nm1,$nm1 - addl $ab1,$nm1,$nm1 - ldw 4($tp),$ti0 ; tp[j] - stw $nm1,-4($tp) ; tp[j-1] - - xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i] - xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m - ldd -16($xfer),$ab0 - fstds ${fab0},-16($xfer) - addl $hi0,$ti0,$ti0 - addl $ti0,$ab0,$ab0 - ldd -8($xfer),$nm0 - fstds ${fnm0},-8($xfer) - extrd,u $ab0,31,32,$hi0 - extrd,u $nm1,31,32,$hi1 - ldw 8($tp),$ti1 ; tp[j] - extrd,u $ab0,63,32,$ab0 - addl $hi1,$nm0,$nm0 - addl $ab0,$nm0,$nm0 - stw,ma $nm0,8($tp) ; tp[j-1] - addib,<> 8,$idx,L\$inner ; j++++ - extrd,u $nm0,31,32,$hi1 - - xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[i] - xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m - ldd 0($xfer),$ab1 - fstds ${fab1},0($xfer) - addl $hi0,$ti1,$ti1 - addl $ti1,$ab1,$ab1 - ldd 8($xfer),$nm1 - fstds ${fnm1},8($xfer) - extrd,u $ab1,31,32,$hi0 - extrd,u $ab1,63,32,$ab1 - ldw 4($tp),$ti0 ; tp[j] - addl $hi1,$nm1,$nm1 - addl $ab1,$nm1,$nm1 - ldd -16($xfer),$ab0 - ldd -8($xfer),$nm0 - extrd,u $nm1,31,32,$hi1 - - addl $hi0,$ab0,$ab0 - addl $ti0,$ab0,$ab0 - stw $nm1,-4($tp) ; tp[j-1] - extrd,u $ab0,31,32,$hi0 - ldw 8($tp),$ti1 ; tp[j] - extrd,u $ab0,63,32,$ab0 - addl $hi1,$nm0,$nm0 - ldd 0($xfer),$ab1 - addl $ab0,$nm0,$nm0 - ldd,mb 8($xfer),$nm1 - extrd,u $nm0,31,32,$hi1 - stw,ma $nm0,8($tp) ; tp[j-1] - - addib,= -1,$num,L\$outerdone ; i-- - subi 0,$arrsz,$idx ; j=0 -___ -$code.=<<___ if ($BN_SZ==4); - fldws,ma 4($bp),${fbi} ; bp[i] -___ -$code.=<<___ if ($BN_SZ==8); - ldi 12,$ti0 ; bp[i] in flipped word order - addl,ev %r0,$num,$num - ldi -4,$ti0 - addl $ti0,$bp,$bp - fldws 0($bp),${fbi} -___ -$code.=<<___; - flddx $idx($ap),${fai} ; ap[0] - addl $hi0,$ab1,$ab1 - flddx $idx($np),${fni} ; np[0] - fldws 8($xfer),${fti}R ; tp[0] - addl $ti1,$ab1,$ab1 - extrd,u $ab1,31,32,$hi0 - extrd,u $ab1,63,32,$ab1 - - ldo 8($idx),$idx ; j++++ - xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[i] - xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[i] - ldw 4($tp),$ti0 ; tp[j] - - addl $hi1,$nm1,$nm1 - fstws,mb ${fab0}L,-8($xfer) ; save high part - addl $ab1,$nm1,$nm1 - extrd,u $nm1,31,32,$hi1 - fcpy,sgl %fr0,${fti}L ; zero high part - fcpy,sgl %fr0,${fab0}L - stw $nm1,-4($tp) ; tp[j-1] - - fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double - fcnvxf,dbl,dbl ${fab0},${fab0} - addl $hi1,$hi0,$hi0 - fadd,dbl ${fti},${fab0},${fab0} ; add tp[0] - addl $ti0,$hi0,$hi0 - extrd,u $hi0,31,32,$hi1 - fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int - stw $hi0,0($tp) - stw $hi1,4($tp) - xmpyu ${fn0},${fab0}R,${fm0} - - b L\$outer - ldo `$LOCALS+32+4`($fp),$tp - -L\$outerdone - addl $hi0,$ab1,$ab1 - addl $ti1,$ab1,$ab1 - extrd,u $ab1,31,32,$hi0 - extrd,u $ab1,63,32,$ab1 - - ldw 4($tp),$ti0 ; tp[j] - - addl $hi1,$nm1,$nm1 - addl $ab1,$nm1,$nm1 - extrd,u $nm1,31,32,$hi1 - stw $nm1,-4($tp) ; tp[j-1] - - addl $hi1,$hi0,$hi0 - addl $ti0,$hi0,$hi0 - extrd,u $hi0,31,32,$hi1 - stw $hi0,0($tp) - stw $hi1,4($tp) - - ldo `$LOCALS+32`($fp),$tp - sub %r0,%r0,%r0 ; clear borrow -___ -$code.=<<___ if ($BN_SZ==4); - ldws,ma 4($tp),$ti0 - extru,= $rp,31,3,%r0 ; is rp 64-bit aligned? - b L\$sub_pa11 - addl $tp,$arrsz,$tp -L\$sub - ldwx $idx($np),$hi0 - subb $ti0,$hi0,$hi1 - ldwx $idx($tp),$ti0 - addib,<> 4,$idx,L\$sub - stws,ma $hi1,4($rp) - - subb $ti0,%r0,$hi1 - ldo -4($tp),$tp -___ -$code.=<<___ if ($BN_SZ==8); - ldd,ma 8($tp),$ti0 -L\$sub - ldd $idx($np),$hi0 - shrpd $ti0,$ti0,32,$ti0 ; flip word order - std $ti0,-8($tp) ; save flipped value - sub,db $ti0,$hi0,$hi1 - ldd,ma 8($tp),$ti0 - addib,<> 8,$idx,L\$sub - std,ma $hi1,8($rp) - - extrd,u $ti0,31,32,$ti0 ; carry in flipped word order - sub,db $ti0,%r0,$hi1 - ldo -8($tp),$tp -___ -$code.=<<___; - and $tp,$hi1,$ap - andcm $rp,$hi1,$bp - or $ap,$bp,$np - - sub $rp,$arrsz,$rp ; rewind rp - subi 0,$arrsz,$idx - ldo `$LOCALS+32`($fp),$tp -L\$copy - ldd $idx($np),$hi0 - std,ma %r0,8($tp) - addib,<> 8,$idx,.-8 ; L\$copy - std,ma $hi0,8($rp) -___ - -if ($BN_SZ==4) { # PA-RISC 1.1 code-path -$ablo=$ab0; -$abhi=$ab1; -$nmlo0=$nm0; -$nmhi0=$nm1; -$nmlo1="%r9"; -$nmhi1="%r8"; - -$code.=<<___; - b L\$done - nop - - .ALIGN 8 -L\$parisc11 - xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0] - xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m - ldw -12($xfer),$ablo - ldw -16($xfer),$hi0 - ldw -4($xfer),$nmlo0 - ldw -8($xfer),$nmhi0 - fstds ${fab0},-16($xfer) - fstds ${fnm0},-8($xfer) - - ldo 8($idx),$idx ; j++++ - add $ablo,$nmlo0,$nmlo0 ; discarded - addc %r0,$nmhi0,$hi1 - ldw 4($xfer),$ablo - ldw 0($xfer),$abhi - nop - -L\$1st_pa11 - xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0] - flddx $idx($ap),${fai} ; ap[j,j+1] - xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m - flddx $idx($np),${fni} ; np[j,j+1] - add $hi0,$ablo,$ablo - ldw 12($xfer),$nmlo1 - addc %r0,$abhi,$hi0 - ldw 8($xfer),$nmhi1 - add $ablo,$nmlo1,$nmlo1 - fstds ${fab1},0($xfer) - addc %r0,$nmhi1,$nmhi1 - fstds ${fnm1},8($xfer) - add $hi1,$nmlo1,$nmlo1 - ldw -12($xfer),$ablo - addc %r0,$nmhi1,$hi1 - ldw -16($xfer),$abhi - - xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0] - ldw -4($xfer),$nmlo0 - xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m - ldw -8($xfer),$nmhi0 - add $hi0,$ablo,$ablo - stw $nmlo1,-4($tp) ; tp[j-1] - addc %r0,$abhi,$hi0 - fstds ${fab0},-16($xfer) - add $ablo,$nmlo0,$nmlo0 - fstds ${fnm0},-8($xfer) - addc %r0,$nmhi0,$nmhi0 - ldw 0($xfer),$abhi - add $hi1,$nmlo0,$nmlo0 - ldw 4($xfer),$ablo - stws,ma $nmlo0,8($tp) ; tp[j-1] - addib,<> 8,$idx,L\$1st_pa11 ; j++++ - addc %r0,$nmhi0,$hi1 - - ldw 8($xfer),$nmhi1 - ldw 12($xfer),$nmlo1 - xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[0] - xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m - add $hi0,$ablo,$ablo - fstds ${fab1},0($xfer) - addc %r0,$abhi,$hi0 - fstds ${fnm1},8($xfer) - add $ablo,$nmlo1,$nmlo1 - ldw -16($xfer),$abhi - addc %r0,$nmhi1,$nmhi1 - ldw -12($xfer),$ablo - add $hi1,$nmlo1,$nmlo1 - ldw -8($xfer),$nmhi0 - addc %r0,$nmhi1,$hi1 - ldw -4($xfer),$nmlo0 - - add $hi0,$ablo,$ablo - stw $nmlo1,-4($tp) ; tp[j-1] - addc %r0,$abhi,$hi0 - ldw 0($xfer),$abhi - add $ablo,$nmlo0,$nmlo0 - ldw 4($xfer),$ablo - addc %r0,$nmhi0,$nmhi0 - ldws,mb 8($xfer),$nmhi1 - add $hi1,$nmlo0,$nmlo0 - ldw 4($xfer),$nmlo1 - addc %r0,$nmhi0,$hi1 - stws,ma $nmlo0,8($tp) ; tp[j-1] - - ldo -1($num),$num ; i-- - subi 0,$arrsz,$idx ; j=0 - - fldws,ma 4($bp),${fbi} ; bp[1] - flddx $idx($ap),${fai} ; ap[0,1] - flddx $idx($np),${fni} ; np[0,1] - fldws 8($xfer),${fti}R ; tp[0] - add $hi0,$ablo,$ablo - addc %r0,$abhi,$hi0 - ldo 8($idx),$idx ; j++++ - xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[1] - xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[1] - add $hi1,$nmlo1,$nmlo1 - addc %r0,$nmhi1,$nmhi1 - add $ablo,$nmlo1,$nmlo1 - addc %r0,$nmhi1,$hi1 - fstws,mb ${fab0}L,-8($xfer) ; save high part - stw $nmlo1,-4($tp) ; tp[j-1] - - fcpy,sgl %fr0,${fti}L ; zero high part - fcpy,sgl %fr0,${fab0}L - add $hi1,$hi0,$hi0 - addc %r0,%r0,$hi1 - fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double - fcnvxf,dbl,dbl ${fab0},${fab0} - stw $hi0,0($tp) - stw $hi1,4($tp) - - fadd,dbl ${fti},${fab0},${fab0} ; add tp[0] - fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int - xmpyu ${fn0},${fab0}R,${fm0} - ldo `$LOCALS+32+4`($fp),$tp -L\$outer_pa11 - xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m - xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m - fstds ${fab0},-16($xfer) ; 33-bit value - fstds ${fnm0},-8($xfer) - flddx $idx($ap),${fai} ; ap[2,3] - flddx $idx($np),${fni} ; np[2,3] - ldw -16($xfer),$abhi ; carry bit actually - ldo 8($idx),$idx ; j++++ - ldw -12($xfer),$ablo - ldw -8($xfer),$nmhi0 - ldw -4($xfer),$nmlo0 - ldw 0($xfer),$hi0 ; high part - - xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i] - xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m - fstds ${fab1},0($xfer) - addl $abhi,$hi0,$hi0 ; account carry bit - fstds ${fnm1},8($xfer) - add $ablo,$nmlo0,$nmlo0 ; discarded - ldw 0($tp),$ti1 ; tp[1] - addc %r0,$nmhi0,$hi1 - fstds ${fab0},-16($xfer) - fstds ${fnm0},-8($xfer) - ldw 4($xfer),$ablo - ldw 0($xfer),$abhi - -L\$inner_pa11 - xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i] - flddx $idx($ap),${fai} ; ap[j,j+1] - xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m - flddx $idx($np),${fni} ; np[j,j+1] - add $hi0,$ablo,$ablo - ldw 4($tp),$ti0 ; tp[j] - addc %r0,$abhi,$abhi - ldw 12($xfer),$nmlo1 - add $ti1,$ablo,$ablo - ldw 8($xfer),$nmhi1 - addc %r0,$abhi,$hi0 - fstds ${fab1},0($xfer) - add $ablo,$nmlo1,$nmlo1 - fstds ${fnm1},8($xfer) - addc %r0,$nmhi1,$nmhi1 - ldw -12($xfer),$ablo - add $hi1,$nmlo1,$nmlo1 - ldw -16($xfer),$abhi - addc %r0,$nmhi1,$hi1 - - xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i] - ldw 8($tp),$ti1 ; tp[j] - xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m - ldw -4($xfer),$nmlo0 - add $hi0,$ablo,$ablo - ldw -8($xfer),$nmhi0 - addc %r0,$abhi,$abhi - stw $nmlo1,-4($tp) ; tp[j-1] - add $ti0,$ablo,$ablo - fstds ${fab0},-16($xfer) - addc %r0,$abhi,$hi0 - fstds ${fnm0},-8($xfer) - add $ablo,$nmlo0,$nmlo0 - ldw 4($xfer),$ablo - addc %r0,$nmhi0,$nmhi0 - ldw 0($xfer),$abhi - add $hi1,$nmlo0,$nmlo0 - stws,ma $nmlo0,8($tp) ; tp[j-1] - addib,<> 8,$idx,L\$inner_pa11 ; j++++ - addc %r0,$nmhi0,$hi1 - - xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[i] - ldw 12($xfer),$nmlo1 - xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m - ldw 8($xfer),$nmhi1 - add $hi0,$ablo,$ablo - ldw 4($tp),$ti0 ; tp[j] - addc %r0,$abhi,$abhi - fstds ${fab1},0($xfer) - add $ti1,$ablo,$ablo - fstds ${fnm1},8($xfer) - addc %r0,$abhi,$hi0 - ldw -16($xfer),$abhi - add $ablo,$nmlo1,$nmlo1 - ldw -12($xfer),$ablo - addc %r0,$nmhi1,$nmhi1 - ldw -8($xfer),$nmhi0 - add $hi1,$nmlo1,$nmlo1 - ldw -4($xfer),$nmlo0 - addc %r0,$nmhi1,$hi1 - - add $hi0,$ablo,$ablo - stw $nmlo1,-4($tp) ; tp[j-1] - addc %r0,$abhi,$abhi - add $ti0,$ablo,$ablo - ldw 8($tp),$ti1 ; tp[j] - addc %r0,$abhi,$hi0 - ldw 0($xfer),$abhi - add $ablo,$nmlo0,$nmlo0 - ldw 4($xfer),$ablo - addc %r0,$nmhi0,$nmhi0 - ldws,mb 8($xfer),$nmhi1 - add $hi1,$nmlo0,$nmlo0 - ldw 4($xfer),$nmlo1 - addc %r0,$nmhi0,$hi1 - stws,ma $nmlo0,8($tp) ; tp[j-1] - - addib,= -1,$num,L\$outerdone_pa11; i-- - subi 0,$arrsz,$idx ; j=0 - - fldws,ma 4($bp),${fbi} ; bp[i] - flddx $idx($ap),${fai} ; ap[0] - add $hi0,$ablo,$ablo - addc %r0,$abhi,$abhi - flddx $idx($np),${fni} ; np[0] - fldws 8($xfer),${fti}R ; tp[0] - add $ti1,$ablo,$ablo - addc %r0,$abhi,$hi0 - - ldo 8($idx),$idx ; j++++ - xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[i] - xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[i] - ldw 4($tp),$ti0 ; tp[j] - - add $hi1,$nmlo1,$nmlo1 - addc %r0,$nmhi1,$nmhi1 - fstws,mb ${fab0}L,-8($xfer) ; save high part - add $ablo,$nmlo1,$nmlo1 - addc %r0,$nmhi1,$hi1 - fcpy,sgl %fr0,${fti}L ; zero high part - fcpy,sgl %fr0,${fab0}L - stw $nmlo1,-4($tp) ; tp[j-1] - - fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double - fcnvxf,dbl,dbl ${fab0},${fab0} - add $hi1,$hi0,$hi0 - addc %r0,%r0,$hi1 - fadd,dbl ${fti},${fab0},${fab0} ; add tp[0] - add $ti0,$hi0,$hi0 - addc %r0,$hi1,$hi1 - fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int - stw $hi0,0($tp) - stw $hi1,4($tp) - xmpyu ${fn0},${fab0}R,${fm0} - - b L\$outer_pa11 - ldo `$LOCALS+32+4`($fp),$tp - -L\$outerdone_pa11 - add $hi0,$ablo,$ablo - addc %r0,$abhi,$abhi - add $ti1,$ablo,$ablo - addc %r0,$abhi,$hi0 - - ldw 4($tp),$ti0 ; tp[j] - - add $hi1,$nmlo1,$nmlo1 - addc %r0,$nmhi1,$nmhi1 - add $ablo,$nmlo1,$nmlo1 - addc %r0,$nmhi1,$hi1 - stw $nmlo1,-4($tp) ; tp[j-1] - - add $hi1,$hi0,$hi0 - addc %r0,%r0,$hi1 - add $ti0,$hi0,$hi0 - addc %r0,$hi1,$hi1 - stw $hi0,0($tp) - stw $hi1,4($tp) - - ldo `$LOCALS+32+4`($fp),$tp - sub %r0,%r0,%r0 ; clear borrow - ldw -4($tp),$ti0 - addl $tp,$arrsz,$tp -L\$sub_pa11 - ldwx $idx($np),$hi0 - subb $ti0,$hi0,$hi1 - ldwx $idx($tp),$ti0 - addib,<> 4,$idx,L\$sub_pa11 - stws,ma $hi1,4($rp) - - subb $ti0,%r0,$hi1 - ldo -4($tp),$tp - and $tp,$hi1,$ap - andcm $rp,$hi1,$bp - or $ap,$bp,$np - - sub $rp,$arrsz,$rp ; rewind rp - subi 0,$arrsz,$idx - ldo `$LOCALS+32`($fp),$tp -L\$copy_pa11 - ldwx $idx($np),$hi0 - stws,ma %r0,4($tp) - addib,<> 4,$idx,L\$copy_pa11 - stws,ma $hi0,4($rp) - - nop ; alignment -L\$done -___ -} - -$code.=<<___; - ldi 1,%r28 ; signal "handled" - ldo $FRAME($fp),%sp ; destroy tp[num+1] - - $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue - $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 - $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 - $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 - $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 - $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 - $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 - $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 -L\$abort - bv (%r2) - .EXIT - $POPMB -$FRAME(%sp),%r3 - .PROCEND - .STRINGZ "Montgomery Multiplication for PA-RISC, CRYPTOGAMS by " -___ - -# Explicitly encode PA-RISC 2.0 instructions used in this module, so -# that it can be compiled with .LEVEL 1.0. It should be noted that I -# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 -# directive... - -my $ldd = sub { - my ($mod,$args) = @_; - my $orig = "ldd$mod\t$args"; - - if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4 - { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3; - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5 - { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3; - $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset - $opcode|=(1<<5) if ($mod =~ /^,m/); - $opcode|=(1<<13) if ($mod =~ /^,mb/); - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - else { "\t".$orig; } -}; - -my $std = sub { - my ($mod,$args) = @_; - my $orig = "std$mod\t$args"; - - if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 6 - { my $opcode=(0x03<<26)|($3<<21)|($1<<16)|(1<<12)|(0xB<<6); - $opcode|=(($2&0xF)<<1)|(($2&0x10)>>4); # encode offset - $opcode|=(1<<5) if ($mod =~ /^,m/); - $opcode|=(1<<13) if ($mod =~ /^,mb/); - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - else { "\t".$orig; } -}; - -my $extrd = sub { - my ($mod,$args) = @_; - my $orig = "extrd$mod\t$args"; - - # I only have ",u" completer, it's implicitly encoded... - if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 - { my $opcode=(0x36<<26)|($1<<21)|($4<<16); - my $len=32-$3; - $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos - $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 - { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); - my $len=32-$2; - $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len - $opcode |= (1<<13) if ($mod =~ /,\**=/); - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - else { "\t".$orig; } -}; - -my $shrpd = sub { - my ($mod,$args) = @_; - my $orig = "shrpd$mod\t$args"; - - if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 - { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; - my $cpos=63-$3; - $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - else { "\t".$orig; } -}; - -my $sub = sub { - my ($mod,$args) = @_; - my $orig = "sub$mod\t$args"; - - if ($mod eq ",db" && $args =~ /%r([0-9]+),%r([0-9]+),%r([0-9]+)/) { - my $opcode=(0x02<<26)|($2<<21)|($1<<16)|$3; - $opcode|=(1<<10); # e1 - $opcode|=(1<<8); # e2 - $opcode|=(1<<5); # d - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig - } - else { "\t".$orig; } -}; - -sub assemble { - my ($mnemonic,$mod,$args)=@_; - my $opcode = eval("\$$mnemonic"); - - ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; -} - -foreach (split("\n",$code)) { - s/\`([^\`]*)\`/eval $1/ge; - # flip word order in 64-bit mode... - s/(xmpyu\s+)($fai|$fni)([LR])/$1.$2.($3 eq "L"?"R":"L")/e if ($BN_SZ==8); - # assemble 2.0 instructions in 32-bit mode... - s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($BN_SZ==4); - - s/\bbv\b/bve/gm if ($SIZE_T==8); - - print $_,"\n"; -} -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/ppc-mont.pl b/drivers/builtin_openssl2/crypto/bn/asm/ppc-mont.pl deleted file mode 100644 index f9b6992ccc..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/ppc-mont.pl +++ /dev/null @@ -1,334 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# April 2006 - -# "Teaser" Montgomery multiplication module for PowerPC. It's possible -# to gain a bit more by modulo-scheduling outer loop, then dedicated -# squaring procedure should give further 20% and code can be adapted -# for 32-bit application running on 64-bit CPU. As for the latter. -# It won't be able to achieve "native" 64-bit performance, because in -# 32-bit application context every addc instruction will have to be -# expanded as addc, twice right shift by 32 and finally adde, etc. -# So far RSA *sign* performance improvement over pre-bn_mul_mont asm -# for 64-bit application running on PPC970/G5 is: -# -# 512-bit +65% -# 1024-bit +35% -# 2048-bit +18% -# 4096-bit +4% - -$flavour = shift; - -if ($flavour =~ /32/) { - $BITS= 32; - $BNSZ= $BITS/8; - $SIZE_T=4; - $RZONE= 224; - - $LD= "lwz"; # load - $LDU= "lwzu"; # load and update - $LDX= "lwzx"; # load indexed - $ST= "stw"; # store - $STU= "stwu"; # store and update - $STX= "stwx"; # store indexed - $STUX= "stwux"; # store indexed and update - $UMULL= "mullw"; # unsigned multiply low - $UMULH= "mulhwu"; # unsigned multiply high - $UCMP= "cmplw"; # unsigned compare - $SHRI= "srwi"; # unsigned shift right by immediate - $PUSH= $ST; - $POP= $LD; -} elsif ($flavour =~ /64/) { - $BITS= 64; - $BNSZ= $BITS/8; - $SIZE_T=8; - $RZONE= 288; - - # same as above, but 64-bit mnemonics... - $LD= "ld"; # load - $LDU= "ldu"; # load and update - $LDX= "ldx"; # load indexed - $ST= "std"; # store - $STU= "stdu"; # store and update - $STX= "stdx"; # store indexed - $STUX= "stdux"; # store indexed and update - $UMULL= "mulld"; # unsigned multiply low - $UMULH= "mulhdu"; # unsigned multiply high - $UCMP= "cmpld"; # unsigned compare - $SHRI= "srdi"; # unsigned shift right by immediate - $PUSH= $ST; - $POP= $LD; -} else { die "nonsense $flavour"; } - -$FRAME=8*$SIZE_T+$RZONE; -$LOCALS=8*$SIZE_T; - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; - -$sp="r1"; -$toc="r2"; -$rp="r3"; $ovf="r3"; -$ap="r4"; -$bp="r5"; -$np="r6"; -$n0="r7"; -$num="r8"; -$rp="r9"; # $rp is reassigned -$aj="r10"; -$nj="r11"; -$tj="r12"; -# non-volatile registers -$i="r20"; -$j="r21"; -$tp="r22"; -$m0="r23"; -$m1="r24"; -$lo0="r25"; -$hi0="r26"; -$lo1="r27"; -$hi1="r28"; -$alo="r29"; -$ahi="r30"; -$nlo="r31"; -# -$nhi="r0"; - -$code=<<___; -.machine "any" -.text - -.globl .bn_mul_mont_int -.align 4 -.bn_mul_mont_int: - cmpwi $num,4 - mr $rp,r3 ; $rp is reassigned - li r3,0 - bltlr -___ -$code.=<<___ if ($BNSZ==4); - cmpwi $num,32 ; longer key performance is not better - bgelr -___ -$code.=<<___; - slwi $num,$num,`log($BNSZ)/log(2)` - li $tj,-4096 - addi $ovf,$num,$FRAME - subf $ovf,$ovf,$sp ; $sp-$ovf - and $ovf,$ovf,$tj ; minimize TLB usage - subf $ovf,$sp,$ovf ; $ovf-$sp - mr $tj,$sp - srwi $num,$num,`log($BNSZ)/log(2)` - $STUX $sp,$sp,$ovf - - $PUSH r20,`-12*$SIZE_T`($tj) - $PUSH r21,`-11*$SIZE_T`($tj) - $PUSH r22,`-10*$SIZE_T`($tj) - $PUSH r23,`-9*$SIZE_T`($tj) - $PUSH r24,`-8*$SIZE_T`($tj) - $PUSH r25,`-7*$SIZE_T`($tj) - $PUSH r26,`-6*$SIZE_T`($tj) - $PUSH r27,`-5*$SIZE_T`($tj) - $PUSH r28,`-4*$SIZE_T`($tj) - $PUSH r29,`-3*$SIZE_T`($tj) - $PUSH r30,`-2*$SIZE_T`($tj) - $PUSH r31,`-1*$SIZE_T`($tj) - - $LD $n0,0($n0) ; pull n0[0] value - addi $num,$num,-2 ; adjust $num for counter register - - $LD $m0,0($bp) ; m0=bp[0] - $LD $aj,0($ap) ; ap[0] - addi $tp,$sp,$LOCALS - $UMULL $lo0,$aj,$m0 ; ap[0]*bp[0] - $UMULH $hi0,$aj,$m0 - - $LD $aj,$BNSZ($ap) ; ap[1] - $LD $nj,0($np) ; np[0] - - $UMULL $m1,$lo0,$n0 ; "tp[0]"*n0 - - $UMULL $alo,$aj,$m0 ; ap[1]*bp[0] - $UMULH $ahi,$aj,$m0 - - $UMULL $lo1,$nj,$m1 ; np[0]*m1 - $UMULH $hi1,$nj,$m1 - $LD $nj,$BNSZ($np) ; np[1] - addc $lo1,$lo1,$lo0 - addze $hi1,$hi1 - - $UMULL $nlo,$nj,$m1 ; np[1]*m1 - $UMULH $nhi,$nj,$m1 - - mtctr $num - li $j,`2*$BNSZ` -.align 4 -L1st: - $LDX $aj,$ap,$j ; ap[j] - addc $lo0,$alo,$hi0 - $LDX $nj,$np,$j ; np[j] - addze $hi0,$ahi - $UMULL $alo,$aj,$m0 ; ap[j]*bp[0] - addc $lo1,$nlo,$hi1 - $UMULH $ahi,$aj,$m0 - addze $hi1,$nhi - $UMULL $nlo,$nj,$m1 ; np[j]*m1 - addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0] - $UMULH $nhi,$nj,$m1 - addze $hi1,$hi1 - $ST $lo1,0($tp) ; tp[j-1] - - addi $j,$j,$BNSZ ; j++ - addi $tp,$tp,$BNSZ ; tp++ - bdnz- L1st -;L1st - addc $lo0,$alo,$hi0 - addze $hi0,$ahi - - addc $lo1,$nlo,$hi1 - addze $hi1,$nhi - addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0] - addze $hi1,$hi1 - $ST $lo1,0($tp) ; tp[j-1] - - li $ovf,0 - addc $hi1,$hi1,$hi0 - addze $ovf,$ovf ; upmost overflow bit - $ST $hi1,$BNSZ($tp) - - li $i,$BNSZ -.align 4 -Louter: - $LDX $m0,$bp,$i ; m0=bp[i] - $LD $aj,0($ap) ; ap[0] - addi $tp,$sp,$LOCALS - $LD $tj,$LOCALS($sp); tp[0] - $UMULL $lo0,$aj,$m0 ; ap[0]*bp[i] - $UMULH $hi0,$aj,$m0 - $LD $aj,$BNSZ($ap) ; ap[1] - $LD $nj,0($np) ; np[0] - addc $lo0,$lo0,$tj ; ap[0]*bp[i]+tp[0] - $UMULL $alo,$aj,$m0 ; ap[j]*bp[i] - addze $hi0,$hi0 - $UMULL $m1,$lo0,$n0 ; tp[0]*n0 - $UMULH $ahi,$aj,$m0 - $UMULL $lo1,$nj,$m1 ; np[0]*m1 - $UMULH $hi1,$nj,$m1 - $LD $nj,$BNSZ($np) ; np[1] - addc $lo1,$lo1,$lo0 - $UMULL $nlo,$nj,$m1 ; np[1]*m1 - addze $hi1,$hi1 - $UMULH $nhi,$nj,$m1 - - mtctr $num - li $j,`2*$BNSZ` -.align 4 -Linner: - $LDX $aj,$ap,$j ; ap[j] - addc $lo0,$alo,$hi0 - $LD $tj,$BNSZ($tp) ; tp[j] - addze $hi0,$ahi - $LDX $nj,$np,$j ; np[j] - addc $lo1,$nlo,$hi1 - $UMULL $alo,$aj,$m0 ; ap[j]*bp[i] - addze $hi1,$nhi - $UMULH $ahi,$aj,$m0 - addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j] - $UMULL $nlo,$nj,$m1 ; np[j]*m1 - addze $hi0,$hi0 - $UMULH $nhi,$nj,$m1 - addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j] - addi $j,$j,$BNSZ ; j++ - addze $hi1,$hi1 - $ST $lo1,0($tp) ; tp[j-1] - addi $tp,$tp,$BNSZ ; tp++ - bdnz- Linner -;Linner - $LD $tj,$BNSZ($tp) ; tp[j] - addc $lo0,$alo,$hi0 - addze $hi0,$ahi - addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j] - addze $hi0,$hi0 - - addc $lo1,$nlo,$hi1 - addze $hi1,$nhi - addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j] - addze $hi1,$hi1 - $ST $lo1,0($tp) ; tp[j-1] - - addic $ovf,$ovf,-1 ; move upmost overflow to XER[CA] - li $ovf,0 - adde $hi1,$hi1,$hi0 - addze $ovf,$ovf - $ST $hi1,$BNSZ($tp) -; - slwi $tj,$num,`log($BNSZ)/log(2)` - $UCMP $i,$tj - addi $i,$i,$BNSZ - ble- Louter - - addi $num,$num,2 ; restore $num - subfc $j,$j,$j ; j=0 and "clear" XER[CA] - addi $tp,$sp,$LOCALS - mtctr $num - -.align 4 -Lsub: $LDX $tj,$tp,$j - $LDX $nj,$np,$j - subfe $aj,$nj,$tj ; tp[j]-np[j] - $STX $aj,$rp,$j - addi $j,$j,$BNSZ - bdnz- Lsub - - li $j,0 - mtctr $num - subfe $ovf,$j,$ovf ; handle upmost overflow bit - and $ap,$tp,$ovf - andc $np,$rp,$ovf - or $ap,$ap,$np ; ap=borrow?tp:rp - -.align 4 -Lcopy: ; copy or in-place refresh - $LDX $tj,$ap,$j - $STX $tj,$rp,$j - $STX $j,$tp,$j ; zap at once - addi $j,$j,$BNSZ - bdnz- Lcopy - - $POP $tj,0($sp) - li r3,1 - $POP r20,`-12*$SIZE_T`($tj) - $POP r21,`-11*$SIZE_T`($tj) - $POP r22,`-10*$SIZE_T`($tj) - $POP r23,`-9*$SIZE_T`($tj) - $POP r24,`-8*$SIZE_T`($tj) - $POP r25,`-7*$SIZE_T`($tj) - $POP r26,`-6*$SIZE_T`($tj) - $POP r27,`-5*$SIZE_T`($tj) - $POP r28,`-4*$SIZE_T`($tj) - $POP r29,`-3*$SIZE_T`($tj) - $POP r30,`-2*$SIZE_T`($tj) - $POP r31,`-1*$SIZE_T`($tj) - mr $sp,$tj - blr - .long 0 - .byte 0,12,4,0,0x80,12,6,0 - .long 0 - -.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by " -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/ppc.pl b/drivers/builtin_openssl2/crypto/bn/asm/ppc.pl deleted file mode 100644 index 1249ce2299..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/ppc.pl +++ /dev/null @@ -1,1998 +0,0 @@ -#!/usr/bin/env perl -# -# Implemented as a Perl wrapper as we want to support several different -# architectures with single file. We pick up the target based on the -# file name we are asked to generate. -# -# It should be noted though that this perl code is nothing like -# /crypto/perlasm/x86*. In this case perl is used pretty much -# as pre-processor to cover for platform differences in name decoration, -# linker tables, 32-/64-bit instruction sets... -# -# As you might know there're several PowerPC ABI in use. Most notably -# Linux and AIX use different 32-bit ABIs. Good news are that these ABIs -# are similar enough to implement leaf(!) functions, which would be ABI -# neutral. And that's what you find here: ABI neutral leaf functions. -# In case you wonder what that is... -# -# AIX performance -# -# MEASUREMENTS WITH cc ON a 200 MhZ PowerPC 604e. -# -# The following is the performance of 32-bit compiler -# generated code: -# -# OpenSSL 0.9.6c 21 dec 2001 -# built on: Tue Jun 11 11:06:51 EDT 2002 -# options:bn(64,32) ... -#compiler: cc -DTHREADS -DAIX -DB_ENDIAN -DBN_LLONG -O3 -# sign verify sign/s verify/s -#rsa 512 bits 0.0098s 0.0009s 102.0 1170.6 -#rsa 1024 bits 0.0507s 0.0026s 19.7 387.5 -#rsa 2048 bits 0.3036s 0.0085s 3.3 117.1 -#rsa 4096 bits 2.0040s 0.0299s 0.5 33.4 -#dsa 512 bits 0.0087s 0.0106s 114.3 94.5 -#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0 -# -# Same bechmark with this assembler code: -# -#rsa 512 bits 0.0056s 0.0005s 178.6 2049.2 -#rsa 1024 bits 0.0283s 0.0015s 35.3 674.1 -#rsa 2048 bits 0.1744s 0.0050s 5.7 201.2 -#rsa 4096 bits 1.1644s 0.0179s 0.9 55.7 -#dsa 512 bits 0.0052s 0.0062s 191.6 162.0 -#dsa 1024 bits 0.0149s 0.0180s 67.0 55.5 -# -# Number of operations increases by at almost 75% -# -# Here are performance numbers for 64-bit compiler -# generated code: -# -# OpenSSL 0.9.6g [engine] 9 Aug 2002 -# built on: Fri Apr 18 16:59:20 EDT 2003 -# options:bn(64,64) ... -# compiler: cc -DTHREADS -D_REENTRANT -q64 -DB_ENDIAN -O3 -# sign verify sign/s verify/s -#rsa 512 bits 0.0028s 0.0003s 357.1 3844.4 -#rsa 1024 bits 0.0148s 0.0008s 67.5 1239.7 -#rsa 2048 bits 0.0963s 0.0028s 10.4 353.0 -#rsa 4096 bits 0.6538s 0.0102s 1.5 98.1 -#dsa 512 bits 0.0026s 0.0032s 382.5 313.7 -#dsa 1024 bits 0.0081s 0.0099s 122.8 100.6 -# -# Same benchmark with this assembler code: -# -#rsa 512 bits 0.0020s 0.0002s 510.4 6273.7 -#rsa 1024 bits 0.0088s 0.0005s 114.1 2128.3 -#rsa 2048 bits 0.0540s 0.0016s 18.5 622.5 -#rsa 4096 bits 0.3700s 0.0058s 2.7 171.0 -#dsa 512 bits 0.0016s 0.0020s 610.7 507.1 -#dsa 1024 bits 0.0047s 0.0058s 212.5 173.2 -# -# Again, performance increases by at about 75% -# -# Mac OS X, Apple G5 1.8GHz (Note this is 32 bit code) -# OpenSSL 0.9.7c 30 Sep 2003 -# -# Original code. -# -#rsa 512 bits 0.0011s 0.0001s 906.1 11012.5 -#rsa 1024 bits 0.0060s 0.0003s 166.6 3363.1 -#rsa 2048 bits 0.0370s 0.0010s 27.1 982.4 -#rsa 4096 bits 0.2426s 0.0036s 4.1 280.4 -#dsa 512 bits 0.0010s 0.0012s 1038.1 841.5 -#dsa 1024 bits 0.0030s 0.0037s 329.6 269.7 -#dsa 2048 bits 0.0101s 0.0127s 98.9 78.6 -# -# Same benchmark with this assembler code: -# -#rsa 512 bits 0.0007s 0.0001s 1416.2 16645.9 -#rsa 1024 bits 0.0036s 0.0002s 274.4 5380.6 -#rsa 2048 bits 0.0222s 0.0006s 45.1 1589.5 -#rsa 4096 bits 0.1469s 0.0022s 6.8 449.6 -#dsa 512 bits 0.0006s 0.0007s 1664.2 1376.2 -#dsa 1024 bits 0.0018s 0.0023s 545.0 442.2 -#dsa 2048 bits 0.0061s 0.0075s 163.5 132.8 -# -# Performance increase of ~60% -# -# If you have comments or suggestions to improve code send -# me a note at schari@us.ibm.com -# - -$flavour = shift; - -if ($flavour =~ /32/) { - $BITS= 32; - $BNSZ= $BITS/8; - $ISA= "\"ppc\""; - - $LD= "lwz"; # load - $LDU= "lwzu"; # load and update - $ST= "stw"; # store - $STU= "stwu"; # store and update - $UMULL= "mullw"; # unsigned multiply low - $UMULH= "mulhwu"; # unsigned multiply high - $UDIV= "divwu"; # unsigned divide - $UCMPI= "cmplwi"; # unsigned compare with immediate - $UCMP= "cmplw"; # unsigned compare - $CNTLZ= "cntlzw"; # count leading zeros - $SHL= "slw"; # shift left - $SHR= "srw"; # unsigned shift right - $SHRI= "srwi"; # unsigned shift right by immediate - $SHLI= "slwi"; # shift left by immediate - $CLRU= "clrlwi"; # clear upper bits - $INSR= "insrwi"; # insert right - $ROTL= "rotlwi"; # rotate left by immediate - $TR= "tw"; # conditional trap -} elsif ($flavour =~ /64/) { - $BITS= 64; - $BNSZ= $BITS/8; - $ISA= "\"ppc64\""; - - # same as above, but 64-bit mnemonics... - $LD= "ld"; # load - $LDU= "ldu"; # load and update - $ST= "std"; # store - $STU= "stdu"; # store and update - $UMULL= "mulld"; # unsigned multiply low - $UMULH= "mulhdu"; # unsigned multiply high - $UDIV= "divdu"; # unsigned divide - $UCMPI= "cmpldi"; # unsigned compare with immediate - $UCMP= "cmpld"; # unsigned compare - $CNTLZ= "cntlzd"; # count leading zeros - $SHL= "sld"; # shift left - $SHR= "srd"; # unsigned shift right - $SHRI= "srdi"; # unsigned shift right by immediate - $SHLI= "sldi"; # shift left by immediate - $CLRU= "clrldi"; # clear upper bits - $INSR= "insrdi"; # insert right - $ROTL= "rotldi"; # rotate left by immediate - $TR= "td"; # conditional trap -} else { die "nonsense $flavour"; } - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; - -$data=< 0 then result !=0 - # In either case carry bit is set. - beq Lppcasm_sub_adios - addi r4,r4,-$BNSZ - addi r3,r3,-$BNSZ - addi r5,r5,-$BNSZ - mtctr r6 -Lppcasm_sub_mainloop: - $LDU r7,$BNSZ(r4) - $LDU r8,$BNSZ(r5) - subfe r6,r8,r7 # r6 = r7+carry bit + onescomplement(r8) - # if carry = 1 this is r7-r8. Else it - # is r7-r8 -1 as we need. - $STU r6,$BNSZ(r3) - bdnz- Lppcasm_sub_mainloop -Lppcasm_sub_adios: - subfze r3,r0 # if carry bit is set then r3 = 0 else -1 - andi. r3,r3,1 # keep only last bit. - blr - .long 0 - .byte 0,12,0x14,0,0,0,4,0 - .long 0 - -# -# NOTE: The following label name should be changed to -# "bn_add_words" i.e. remove the first dot -# for the gcc compiler. This should be automatically -# done in the build -# - -.align 4 -.bn_add_words: -# -# Handcoded version of bn_add_words -# -#BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -# -# r3 = r -# r4 = a -# r5 = b -# r6 = n -# -# Note: No loop unrolling done since this is not a performance -# critical loop. - - xor r0,r0,r0 -# -# check for r6 = 0. Is this needed? -# - addic. r6,r6,0 #test r6 and clear carry bit. - beq Lppcasm_add_adios - addi r4,r4,-$BNSZ - addi r3,r3,-$BNSZ - addi r5,r5,-$BNSZ - mtctr r6 -Lppcasm_add_mainloop: - $LDU r7,$BNSZ(r4) - $LDU r8,$BNSZ(r5) - adde r8,r7,r8 - $STU r8,$BNSZ(r3) - bdnz- Lppcasm_add_mainloop -Lppcasm_add_adios: - addze r3,r0 #return carry bit. - blr - .long 0 - .byte 0,12,0x14,0,0,0,4,0 - .long 0 - -# -# NOTE: The following label name should be changed to -# "bn_div_words" i.e. remove the first dot -# for the gcc compiler. This should be automatically -# done in the build -# - -.align 4 -.bn_div_words: -# -# This is a cleaned up version of code generated by -# the AIX compiler. The only optimization is to use -# the PPC instruction to count leading zeros instead -# of call to num_bits_word. Since this was compiled -# only at level -O2 we can possibly squeeze it more? -# -# r3 = h -# r4 = l -# r5 = d - - $UCMPI 0,r5,0 # compare r5 and 0 - bne Lppcasm_div1 # proceed if d!=0 - li r3,-1 # d=0 return -1 - blr -Lppcasm_div1: - xor r0,r0,r0 #r0=0 - li r8,$BITS - $CNTLZ. r7,r5 #r7 = num leading 0s in d. - beq Lppcasm_div2 #proceed if no leading zeros - subf r8,r7,r8 #r8 = BN_num_bits_word(d) - $SHR. r9,r3,r8 #are there any bits above r8'th? - $TR 16,r9,r0 #if there're, signal to dump core... -Lppcasm_div2: - $UCMP 0,r3,r5 #h>=d? - blt Lppcasm_div3 #goto Lppcasm_div3 if not - subf r3,r5,r3 #h-=d ; -Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i - cmpi 0,0,r7,0 # is (i == 0)? - beq Lppcasm_div4 - $SHL r3,r3,r7 # h = (h<< i) - $SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i) - $SHL r5,r5,r7 # d<<=i - or r3,r3,r8 # h = (h<>(BN_BITS2-i)) - $SHL r4,r4,r7 # l <<=i -Lppcasm_div4: - $SHRI r9,r5,`$BITS/2` # r9 = dh - # dl will be computed when needed - # as it saves registers. - li r6,2 #r6=2 - mtctr r6 #counter will be in count. -Lppcasm_divouterloop: - $SHRI r8,r3,`$BITS/2` #r8 = (h>>BN_BITS4) - $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4 - # compute here for innerloop. - $UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh - bne Lppcasm_div5 # goto Lppcasm_div5 if not - - li r8,-1 - $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l - b Lppcasm_div6 -Lppcasm_div5: - $UDIV r8,r3,r9 #q = h/dh -Lppcasm_div6: - $UMULL r12,r9,r8 #th = q*dh - $CLRU r10,r5,`$BITS/2` #r10=dl - $UMULL r6,r8,r10 #tl = q*dl - -Lppcasm_divinnerloop: - subf r10,r12,r3 #t = h -th - $SHRI r7,r10,`$BITS/2` #r7= (t &BN_MASK2H), sort of... - addic. r7,r7,0 #test if r7 == 0. used below. - # now want to compute - # r7 = (t<>BN_BITS4) - # the following 2 instructions do that - $SHLI r7,r10,`$BITS/2` # r7 = (t<>BN_BITS4) - $UCMP cr1,r6,r7 # compare (tl <= r7) - bne Lppcasm_divinnerexit - ble cr1,Lppcasm_divinnerexit - addi r8,r8,-1 #q-- - subf r12,r9,r12 #th -=dh - $CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop. - subf r6,r10,r6 #tl -=dl - b Lppcasm_divinnerloop -Lppcasm_divinnerexit: - $SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4) - $SHLI r11,r6,`$BITS/2` #tl=(tl<=tl) goto Lppcasm_div7 - addi r12,r12,1 # th++ -Lppcasm_div7: - subf r11,r11,r4 #r11=l-tl - $UCMP cr1,r3,r12 #compare h and th - bge cr1,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8 - addi r8,r8,-1 # q-- - add r3,r5,r3 # h+=d -Lppcasm_div8: - subf r12,r12,r3 #r12 = h-th - $SHLI r4,r11,`$BITS/2` #l=(l&BN_MASK2l)<>BN_BITS4))&BN_MASK2 - # the following 2 instructions will do this. - $INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2. - $ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3 - bdz Lppcasm_div9 #if (count==0) break ; - $SHLI r0,r8,`$BITS/2` #ret =q<> 2 - beq Lppcasm_mw_REM - mtctr r7 -Lppcasm_mw_LOOP: - #mul(rp[0],ap[0],w,c1); - $LD r8,`0*$BNSZ`(r4) - $UMULL r9,r6,r8 - $UMULH r10,r6,r8 - addc r9,r9,r12 - #addze r10,r10 #carry is NOT ignored. - #will be taken care of - #in second spin below - #using adde. - $ST r9,`0*$BNSZ`(r3) - #mul(rp[1],ap[1],w,c1); - $LD r8,`1*$BNSZ`(r4) - $UMULL r11,r6,r8 - $UMULH r12,r6,r8 - adde r11,r11,r10 - #addze r12,r12 - $ST r11,`1*$BNSZ`(r3) - #mul(rp[2],ap[2],w,c1); - $LD r8,`2*$BNSZ`(r4) - $UMULL r9,r6,r8 - $UMULH r10,r6,r8 - adde r9,r9,r12 - #addze r10,r10 - $ST r9,`2*$BNSZ`(r3) - #mul_add(rp[3],ap[3],w,c1); - $LD r8,`3*$BNSZ`(r4) - $UMULL r11,r6,r8 - $UMULH r12,r6,r8 - adde r11,r11,r10 - addze r12,r12 #this spin we collect carry into - #r12 - $ST r11,`3*$BNSZ`(r3) - - addi r3,r3,`4*$BNSZ` - addi r4,r4,`4*$BNSZ` - bdnz- Lppcasm_mw_LOOP - -Lppcasm_mw_REM: - andi. r5,r5,0x3 - beq Lppcasm_mw_OVER - #mul(rp[0],ap[0],w,c1); - $LD r8,`0*$BNSZ`(r4) - $UMULL r9,r6,r8 - $UMULH r10,r6,r8 - addc r9,r9,r12 - addze r10,r10 - $ST r9,`0*$BNSZ`(r3) - addi r12,r10,0 - - addi r5,r5,-1 - cmpli 0,0,r5,0 - beq Lppcasm_mw_OVER - - - #mul(rp[1],ap[1],w,c1); - $LD r8,`1*$BNSZ`(r4) - $UMULL r9,r6,r8 - $UMULH r10,r6,r8 - addc r9,r9,r12 - addze r10,r10 - $ST r9,`1*$BNSZ`(r3) - addi r12,r10,0 - - addi r5,r5,-1 - cmpli 0,0,r5,0 - beq Lppcasm_mw_OVER - - #mul_add(rp[2],ap[2],w,c1); - $LD r8,`2*$BNSZ`(r4) - $UMULL r9,r6,r8 - $UMULH r10,r6,r8 - addc r9,r9,r12 - addze r10,r10 - $ST r9,`2*$BNSZ`(r3) - addi r12,r10,0 - -Lppcasm_mw_OVER: - addi r3,r12,0 - blr - .long 0 - .byte 0,12,0x14,0,0,0,4,0 - .long 0 - -# -# NOTE: The following label name should be changed to -# "bn_mul_add_words" i.e. remove the first dot -# for the gcc compiler. This should be automatically -# done in the build -# - -.align 4 -.bn_mul_add_words: -# -# BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) -# -# r3 = rp -# r4 = ap -# r5 = num -# r6 = w -# -# empirical evidence suggests that unrolled version performs best!! -# - xor r0,r0,r0 #r0 = 0 - xor r12,r12,r12 #r12 = 0 . used for carry - rlwinm. r7,r5,30,2,31 # num >> 2 - beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover - mtctr r7 -Lppcasm_maw_mainloop: - #mul_add(rp[0],ap[0],w,c1); - $LD r8,`0*$BNSZ`(r4) - $LD r11,`0*$BNSZ`(r3) - $UMULL r9,r6,r8 - $UMULH r10,r6,r8 - addc r9,r9,r12 #r12 is carry. - addze r10,r10 - addc r9,r9,r11 - #addze r10,r10 - #the above instruction addze - #is NOT needed. Carry will NOT - #be ignored. It's not affected - #by multiply and will be collected - #in the next spin - $ST r9,`0*$BNSZ`(r3) - - #mul_add(rp[1],ap[1],w,c1); - $LD r8,`1*$BNSZ`(r4) - $LD r9,`1*$BNSZ`(r3) - $UMULL r11,r6,r8 - $UMULH r12,r6,r8 - adde r11,r11,r10 #r10 is carry. - addze r12,r12 - addc r11,r11,r9 - #addze r12,r12 - $ST r11,`1*$BNSZ`(r3) - - #mul_add(rp[2],ap[2],w,c1); - $LD r8,`2*$BNSZ`(r4) - $UMULL r9,r6,r8 - $LD r11,`2*$BNSZ`(r3) - $UMULH r10,r6,r8 - adde r9,r9,r12 - addze r10,r10 - addc r9,r9,r11 - #addze r10,r10 - $ST r9,`2*$BNSZ`(r3) - - #mul_add(rp[3],ap[3],w,c1); - $LD r8,`3*$BNSZ`(r4) - $UMULL r11,r6,r8 - $LD r9,`3*$BNSZ`(r3) - $UMULH r12,r6,r8 - adde r11,r11,r10 - addze r12,r12 - addc r11,r11,r9 - addze r12,r12 - $ST r11,`3*$BNSZ`(r3) - addi r3,r3,`4*$BNSZ` - addi r4,r4,`4*$BNSZ` - bdnz- Lppcasm_maw_mainloop - -Lppcasm_maw_leftover: - andi. r5,r5,0x3 - beq Lppcasm_maw_adios - addi r3,r3,-$BNSZ - addi r4,r4,-$BNSZ - #mul_add(rp[0],ap[0],w,c1); - mtctr r5 - $LDU r8,$BNSZ(r4) - $UMULL r9,r6,r8 - $UMULH r10,r6,r8 - $LDU r11,$BNSZ(r3) - addc r9,r9,r11 - addze r10,r10 - addc r9,r9,r12 - addze r12,r10 - $ST r9,0(r3) - - bdz Lppcasm_maw_adios - #mul_add(rp[1],ap[1],w,c1); - $LDU r8,$BNSZ(r4) - $UMULL r9,r6,r8 - $UMULH r10,r6,r8 - $LDU r11,$BNSZ(r3) - addc r9,r9,r11 - addze r10,r10 - addc r9,r9,r12 - addze r12,r10 - $ST r9,0(r3) - - bdz Lppcasm_maw_adios - #mul_add(rp[2],ap[2],w,c1); - $LDU r8,$BNSZ(r4) - $UMULL r9,r6,r8 - $UMULH r10,r6,r8 - $LDU r11,$BNSZ(r3) - addc r9,r9,r11 - addze r10,r10 - addc r9,r9,r12 - addze r12,r10 - $ST r9,0(r3) - -Lppcasm_maw_adios: - addi r3,r12,0 - blr - .long 0 - .byte 0,12,0x14,0,0,0,4,0 - .long 0 - .align 4 -EOF -$data =~ s/\`([^\`]*)\`/eval $1/gem; -print $data; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/ppc64-mont.pl b/drivers/builtin_openssl2/crypto/bn/asm/ppc64-mont.pl deleted file mode 100644 index a14e769ad0..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/ppc64-mont.pl +++ /dev/null @@ -1,1088 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# December 2007 - -# The reason for undertaken effort is basically following. Even though -# Power 6 CPU operates at incredible 4.7GHz clock frequency, its PKI -# performance was observed to be less than impressive, essentially as -# fast as 1.8GHz PPC970, or 2.6 times(!) slower than one would hope. -# Well, it's not surprising that IBM had to make some sacrifices to -# boost the clock frequency that much, but no overall improvement? -# Having observed how much difference did switching to FPU make on -# UltraSPARC, playing same stunt on Power 6 appeared appropriate... -# Unfortunately the resulting performance improvement is not as -# impressive, ~30%, and in absolute terms is still very far from what -# one would expect from 4.7GHz CPU. There is a chance that I'm doing -# something wrong, but in the lack of assembler level micro-profiling -# data or at least decent platform guide I can't tell... Or better -# results might be achieved with VMX... Anyway, this module provides -# *worse* performance on other PowerPC implementations, ~40-15% slower -# on PPC970 depending on key length and ~40% slower on Power 5 for all -# key lengths. As it's obviously inappropriate as "best all-round" -# alternative, it has to be complemented with run-time CPU family -# detection. Oh! It should also be noted that unlike other PowerPC -# implementation IALU ppc-mont.pl module performs *suboptimaly* on -# >=1024-bit key lengths on Power 6. It should also be noted that -# *everything* said so far applies to 64-bit builds! As far as 32-bit -# application executed on 64-bit CPU goes, this module is likely to -# become preferred choice, because it's easy to adapt it for such -# case and *is* faster than 32-bit ppc-mont.pl on *all* processors. - -# February 2008 - -# Micro-profiling assisted optimization results in ~15% improvement -# over original ppc64-mont.pl version, or overall ~50% improvement -# over ppc.pl module on Power 6. If compared to ppc-mont.pl on same -# Power 6 CPU, this module is 5-150% faster depending on key length, -# [hereafter] more for longer keys. But if compared to ppc-mont.pl -# on 1.8GHz PPC970, it's only 5-55% faster. Still far from impressive -# in absolute terms, but it's apparently the way Power 6 is... - -# December 2009 - -# Adapted for 32-bit build this module delivers 25-120%, yes, more -# than *twice* for longer keys, performance improvement over 32-bit -# ppc-mont.pl on 1.8GHz PPC970. However! This implementation utilizes -# even 64-bit integer operations and the trouble is that most PPC -# operating systems don't preserve upper halves of general purpose -# registers upon 32-bit signal delivery. They do preserve them upon -# context switch, but not signalling:-( This means that asynchronous -# signals have to be blocked upon entry to this subroutine. Signal -# masking (and of course complementary unmasking) has quite an impact -# on performance, naturally larger for shorter keys. It's so severe -# that 512-bit key performance can be as low as 1/3 of expected one. -# This is why this routine can be engaged for longer key operations -# only on these OSes, see crypto/ppccap.c for further details. MacOS X -# is an exception from this and doesn't require signal masking, and -# that's where above improvement coefficients were collected. For -# others alternative would be to break dependence on upper halves of -# GPRs by sticking to 32-bit integer operations... - -$flavour = shift; - -if ($flavour =~ /32/) { - $SIZE_T=4; - $RZONE= 224; - $fname= "bn_mul_mont_fpu64"; - - $STUX= "stwux"; # store indexed and update - $PUSH= "stw"; - $POP= "lwz"; -} elsif ($flavour =~ /64/) { - $SIZE_T=8; - $RZONE= 288; - $fname= "bn_mul_mont_fpu64"; - - # same as above, but 64-bit mnemonics... - $STUX= "stdux"; # store indexed and update - $PUSH= "std"; - $POP= "ld"; -} else { die "nonsense $flavour"; } - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; - -$FRAME=64; # padded frame header -$TRANSFER=16*8; - -$carry="r0"; -$sp="r1"; -$toc="r2"; -$rp="r3"; $ovf="r3"; -$ap="r4"; -$bp="r5"; -$np="r6"; -$n0="r7"; -$num="r8"; -$rp="r9"; # $rp is reassigned -$tp="r10"; -$j="r11"; -$i="r12"; -# non-volatile registers -$nap_d="r22"; # interleaved ap and np in double format -$a0="r23"; # ap[0] -$t0="r24"; # temporary registers -$t1="r25"; -$t2="r26"; -$t3="r27"; -$t4="r28"; -$t5="r29"; -$t6="r30"; -$t7="r31"; - -# PPC offers enough register bank capacity to unroll inner loops twice -# -# ..A3A2A1A0 -# dcba -# ----------- -# A0a -# A0b -# A0c -# A0d -# A1a -# A1b -# A1c -# A1d -# A2a -# A2b -# A2c -# A2d -# A3a -# A3b -# A3c -# A3d -# ..a -# ..b -# -$ba="f0"; $bb="f1"; $bc="f2"; $bd="f3"; -$na="f4"; $nb="f5"; $nc="f6"; $nd="f7"; -$dota="f8"; $dotb="f9"; -$A0="f10"; $A1="f11"; $A2="f12"; $A3="f13"; -$N0="f20"; $N1="f21"; $N2="f22"; $N3="f23"; -$T0a="f24"; $T0b="f25"; -$T1a="f26"; $T1b="f27"; -$T2a="f28"; $T2b="f29"; -$T3a="f30"; $T3b="f31"; - -# sp----------->+-------------------------------+ -# | saved sp | -# +-------------------------------+ -# . . -# +64 +-------------------------------+ -# | 16 gpr<->fpr transfer zone | -# . . -# . . -# +16*8 +-------------------------------+ -# | __int64 tmp[-1] | -# +-------------------------------+ -# | __int64 tmp[num] | -# . . -# . . -# . . -# +(num+1)*8 +-------------------------------+ -# | padding to 64 byte boundary | -# . . -# +X +-------------------------------+ -# | double nap_d[4*num] | -# . . -# . . -# . . -# +-------------------------------+ -# . . -# -12*size_t +-------------------------------+ -# | 10 saved gpr, r22-r31 | -# . . -# . . -# -12*8 +-------------------------------+ -# | 12 saved fpr, f20-f31 | -# . . -# . . -# +-------------------------------+ - -$code=<<___; -.machine "any" -.text - -.globl .$fname -.align 5 -.$fname: - cmpwi $num,`3*8/$SIZE_T` - mr $rp,r3 ; $rp is reassigned - li r3,0 ; possible "not handled" return code - bltlr- - andi. r0,$num,`16/$SIZE_T-1` ; $num has to be "even" - bnelr- - - slwi $num,$num,`log($SIZE_T)/log(2)` ; num*=sizeof(BN_LONG) - li $i,-4096 - slwi $tp,$num,2 ; place for {an}p_{lh}[num], i.e. 4*num - add $tp,$tp,$num ; place for tp[num+1] - addi $tp,$tp,`$FRAME+$TRANSFER+8+64+$RZONE` - subf $tp,$tp,$sp ; $sp-$tp - and $tp,$tp,$i ; minimize TLB usage - subf $tp,$sp,$tp ; $tp-$sp - mr $i,$sp - $STUX $sp,$sp,$tp ; alloca - - $PUSH r22,`-12*8-10*$SIZE_T`($i) - $PUSH r23,`-12*8-9*$SIZE_T`($i) - $PUSH r24,`-12*8-8*$SIZE_T`($i) - $PUSH r25,`-12*8-7*$SIZE_T`($i) - $PUSH r26,`-12*8-6*$SIZE_T`($i) - $PUSH r27,`-12*8-5*$SIZE_T`($i) - $PUSH r28,`-12*8-4*$SIZE_T`($i) - $PUSH r29,`-12*8-3*$SIZE_T`($i) - $PUSH r30,`-12*8-2*$SIZE_T`($i) - $PUSH r31,`-12*8-1*$SIZE_T`($i) - stfd f20,`-12*8`($i) - stfd f21,`-11*8`($i) - stfd f22,`-10*8`($i) - stfd f23,`-9*8`($i) - stfd f24,`-8*8`($i) - stfd f25,`-7*8`($i) - stfd f26,`-6*8`($i) - stfd f27,`-5*8`($i) - stfd f28,`-4*8`($i) - stfd f29,`-3*8`($i) - stfd f30,`-2*8`($i) - stfd f31,`-1*8`($i) -___ -$code.=<<___ if ($SIZE_T==8); - ld $a0,0($ap) ; pull ap[0] value - ld $n0,0($n0) ; pull n0[0] value - ld $t3,0($bp) ; bp[0] -___ -$code.=<<___ if ($SIZE_T==4); - mr $t1,$n0 - lwz $a0,0($ap) ; pull ap[0,1] value - lwz $t0,4($ap) - lwz $n0,0($t1) ; pull n0[0,1] value - lwz $t1,4($t1) - lwz $t3,0($bp) ; bp[0,1] - lwz $t2,4($bp) - insrdi $a0,$t0,32,0 - insrdi $n0,$t1,32,0 - insrdi $t3,$t2,32,0 -___ -$code.=<<___; - addi $tp,$sp,`$FRAME+$TRANSFER+8+64` - li $i,-64 - add $nap_d,$tp,$num - and $nap_d,$nap_d,$i ; align to 64 bytes - - mulld $t7,$a0,$t3 ; ap[0]*bp[0] - ; nap_d is off by 1, because it's used with stfdu/lfdu - addi $nap_d,$nap_d,-8 - srwi $j,$num,`3+1` ; counter register, num/2 - mulld $t7,$t7,$n0 ; tp[0]*n0 - addi $j,$j,-1 - addi $tp,$sp,`$FRAME+$TRANSFER-8` - li $carry,0 - mtctr $j - - ; transfer bp[0] to FPU as 4x16-bit values - extrdi $t0,$t3,16,48 - extrdi $t1,$t3,16,32 - extrdi $t2,$t3,16,16 - extrdi $t3,$t3,16,0 - std $t0,`$FRAME+0`($sp) - std $t1,`$FRAME+8`($sp) - std $t2,`$FRAME+16`($sp) - std $t3,`$FRAME+24`($sp) - ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values - extrdi $t4,$t7,16,48 - extrdi $t5,$t7,16,32 - extrdi $t6,$t7,16,16 - extrdi $t7,$t7,16,0 - std $t4,`$FRAME+32`($sp) - std $t5,`$FRAME+40`($sp) - std $t6,`$FRAME+48`($sp) - std $t7,`$FRAME+56`($sp) -___ -$code.=<<___ if ($SIZE_T==8); - lwz $t0,4($ap) ; load a[j] as 32-bit word pair - lwz $t1,0($ap) - lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair - lwz $t3,8($ap) - lwz $t4,4($np) ; load n[j] as 32-bit word pair - lwz $t5,0($np) - lwz $t6,12($np) ; load n[j+1] as 32-bit word pair - lwz $t7,8($np) -___ -$code.=<<___ if ($SIZE_T==4); - lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs - lwz $t1,4($ap) - lwz $t2,8($ap) - lwz $t3,12($ap) - lwz $t4,0($np) ; load n[j..j+3] as 32-bit word pairs - lwz $t5,4($np) - lwz $t6,8($np) - lwz $t7,12($np) -___ -$code.=<<___; - lfd $ba,`$FRAME+0`($sp) - lfd $bb,`$FRAME+8`($sp) - lfd $bc,`$FRAME+16`($sp) - lfd $bd,`$FRAME+24`($sp) - lfd $na,`$FRAME+32`($sp) - lfd $nb,`$FRAME+40`($sp) - lfd $nc,`$FRAME+48`($sp) - lfd $nd,`$FRAME+56`($sp) - std $t0,`$FRAME+64`($sp) - std $t1,`$FRAME+72`($sp) - std $t2,`$FRAME+80`($sp) - std $t3,`$FRAME+88`($sp) - std $t4,`$FRAME+96`($sp) - std $t5,`$FRAME+104`($sp) - std $t6,`$FRAME+112`($sp) - std $t7,`$FRAME+120`($sp) - fcfid $ba,$ba - fcfid $bb,$bb - fcfid $bc,$bc - fcfid $bd,$bd - fcfid $na,$na - fcfid $nb,$nb - fcfid $nc,$nc - fcfid $nd,$nd - - lfd $A0,`$FRAME+64`($sp) - lfd $A1,`$FRAME+72`($sp) - lfd $A2,`$FRAME+80`($sp) - lfd $A3,`$FRAME+88`($sp) - lfd $N0,`$FRAME+96`($sp) - lfd $N1,`$FRAME+104`($sp) - lfd $N2,`$FRAME+112`($sp) - lfd $N3,`$FRAME+120`($sp) - fcfid $A0,$A0 - fcfid $A1,$A1 - fcfid $A2,$A2 - fcfid $A3,$A3 - fcfid $N0,$N0 - fcfid $N1,$N1 - fcfid $N2,$N2 - fcfid $N3,$N3 - addi $ap,$ap,16 - addi $np,$np,16 - - fmul $T1a,$A1,$ba - fmul $T1b,$A1,$bb - stfd $A0,8($nap_d) ; save a[j] in double format - stfd $A1,16($nap_d) - fmul $T2a,$A2,$ba - fmul $T2b,$A2,$bb - stfd $A2,24($nap_d) ; save a[j+1] in double format - stfd $A3,32($nap_d) - fmul $T3a,$A3,$ba - fmul $T3b,$A3,$bb - stfd $N0,40($nap_d) ; save n[j] in double format - stfd $N1,48($nap_d) - fmul $T0a,$A0,$ba - fmul $T0b,$A0,$bb - stfd $N2,56($nap_d) ; save n[j+1] in double format - stfdu $N3,64($nap_d) - - fmadd $T1a,$A0,$bc,$T1a - fmadd $T1b,$A0,$bd,$T1b - fmadd $T2a,$A1,$bc,$T2a - fmadd $T2b,$A1,$bd,$T2b - fmadd $T3a,$A2,$bc,$T3a - fmadd $T3b,$A2,$bd,$T3b - fmul $dota,$A3,$bc - fmul $dotb,$A3,$bd - - fmadd $T1a,$N1,$na,$T1a - fmadd $T1b,$N1,$nb,$T1b - fmadd $T2a,$N2,$na,$T2a - fmadd $T2b,$N2,$nb,$T2b - fmadd $T3a,$N3,$na,$T3a - fmadd $T3b,$N3,$nb,$T3b - fmadd $T0a,$N0,$na,$T0a - fmadd $T0b,$N0,$nb,$T0b - - fmadd $T1a,$N0,$nc,$T1a - fmadd $T1b,$N0,$nd,$T1b - fmadd $T2a,$N1,$nc,$T2a - fmadd $T2b,$N1,$nd,$T2b - fmadd $T3a,$N2,$nc,$T3a - fmadd $T3b,$N2,$nd,$T3b - fmadd $dota,$N3,$nc,$dota - fmadd $dotb,$N3,$nd,$dotb - - fctid $T0a,$T0a - fctid $T0b,$T0b - fctid $T1a,$T1a - fctid $T1b,$T1b - fctid $T2a,$T2a - fctid $T2b,$T2b - fctid $T3a,$T3a - fctid $T3b,$T3b - - stfd $T0a,`$FRAME+0`($sp) - stfd $T0b,`$FRAME+8`($sp) - stfd $T1a,`$FRAME+16`($sp) - stfd $T1b,`$FRAME+24`($sp) - stfd $T2a,`$FRAME+32`($sp) - stfd $T2b,`$FRAME+40`($sp) - stfd $T3a,`$FRAME+48`($sp) - stfd $T3b,`$FRAME+56`($sp) - -.align 5 -L1st: -___ -$code.=<<___ if ($SIZE_T==8); - lwz $t0,4($ap) ; load a[j] as 32-bit word pair - lwz $t1,0($ap) - lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair - lwz $t3,8($ap) - lwz $t4,4($np) ; load n[j] as 32-bit word pair - lwz $t5,0($np) - lwz $t6,12($np) ; load n[j+1] as 32-bit word pair - lwz $t7,8($np) -___ -$code.=<<___ if ($SIZE_T==4); - lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs - lwz $t1,4($ap) - lwz $t2,8($ap) - lwz $t3,12($ap) - lwz $t4,0($np) ; load n[j..j+3] as 32-bit word pairs - lwz $t5,4($np) - lwz $t6,8($np) - lwz $t7,12($np) -___ -$code.=<<___; - std $t0,`$FRAME+64`($sp) - std $t1,`$FRAME+72`($sp) - std $t2,`$FRAME+80`($sp) - std $t3,`$FRAME+88`($sp) - std $t4,`$FRAME+96`($sp) - std $t5,`$FRAME+104`($sp) - std $t6,`$FRAME+112`($sp) - std $t7,`$FRAME+120`($sp) - ld $t0,`$FRAME+0`($sp) - ld $t1,`$FRAME+8`($sp) - ld $t2,`$FRAME+16`($sp) - ld $t3,`$FRAME+24`($sp) - ld $t4,`$FRAME+32`($sp) - ld $t5,`$FRAME+40`($sp) - ld $t6,`$FRAME+48`($sp) - ld $t7,`$FRAME+56`($sp) - lfd $A0,`$FRAME+64`($sp) - lfd $A1,`$FRAME+72`($sp) - lfd $A2,`$FRAME+80`($sp) - lfd $A3,`$FRAME+88`($sp) - lfd $N0,`$FRAME+96`($sp) - lfd $N1,`$FRAME+104`($sp) - lfd $N2,`$FRAME+112`($sp) - lfd $N3,`$FRAME+120`($sp) - fcfid $A0,$A0 - fcfid $A1,$A1 - fcfid $A2,$A2 - fcfid $A3,$A3 - fcfid $N0,$N0 - fcfid $N1,$N1 - fcfid $N2,$N2 - fcfid $N3,$N3 - addi $ap,$ap,16 - addi $np,$np,16 - - fmul $T1a,$A1,$ba - fmul $T1b,$A1,$bb - fmul $T2a,$A2,$ba - fmul $T2b,$A2,$bb - stfd $A0,8($nap_d) ; save a[j] in double format - stfd $A1,16($nap_d) - fmul $T3a,$A3,$ba - fmul $T3b,$A3,$bb - fmadd $T0a,$A0,$ba,$dota - fmadd $T0b,$A0,$bb,$dotb - stfd $A2,24($nap_d) ; save a[j+1] in double format - stfd $A3,32($nap_d) - - fmadd $T1a,$A0,$bc,$T1a - fmadd $T1b,$A0,$bd,$T1b - fmadd $T2a,$A1,$bc,$T2a - fmadd $T2b,$A1,$bd,$T2b - stfd $N0,40($nap_d) ; save n[j] in double format - stfd $N1,48($nap_d) - fmadd $T3a,$A2,$bc,$T3a - fmadd $T3b,$A2,$bd,$T3b - add $t0,$t0,$carry ; can not overflow - fmul $dota,$A3,$bc - fmul $dotb,$A3,$bd - stfd $N2,56($nap_d) ; save n[j+1] in double format - stfdu $N3,64($nap_d) - srdi $carry,$t0,16 - add $t1,$t1,$carry - srdi $carry,$t1,16 - - fmadd $T1a,$N1,$na,$T1a - fmadd $T1b,$N1,$nb,$T1b - insrdi $t0,$t1,16,32 - fmadd $T2a,$N2,$na,$T2a - fmadd $T2b,$N2,$nb,$T2b - add $t2,$t2,$carry - fmadd $T3a,$N3,$na,$T3a - fmadd $T3b,$N3,$nb,$T3b - srdi $carry,$t2,16 - fmadd $T0a,$N0,$na,$T0a - fmadd $T0b,$N0,$nb,$T0b - insrdi $t0,$t2,16,16 - add $t3,$t3,$carry - srdi $carry,$t3,16 - - fmadd $T1a,$N0,$nc,$T1a - fmadd $T1b,$N0,$nd,$T1b - insrdi $t0,$t3,16,0 ; 0..63 bits - fmadd $T2a,$N1,$nc,$T2a - fmadd $T2b,$N1,$nd,$T2b - add $t4,$t4,$carry - fmadd $T3a,$N2,$nc,$T3a - fmadd $T3b,$N2,$nd,$T3b - srdi $carry,$t4,16 - fmadd $dota,$N3,$nc,$dota - fmadd $dotb,$N3,$nd,$dotb - add $t5,$t5,$carry - srdi $carry,$t5,16 - insrdi $t4,$t5,16,32 - - fctid $T0a,$T0a - fctid $T0b,$T0b - add $t6,$t6,$carry - fctid $T1a,$T1a - fctid $T1b,$T1b - srdi $carry,$t6,16 - fctid $T2a,$T2a - fctid $T2b,$T2b - insrdi $t4,$t6,16,16 - fctid $T3a,$T3a - fctid $T3b,$T3b - add $t7,$t7,$carry - insrdi $t4,$t7,16,0 ; 64..127 bits - srdi $carry,$t7,16 ; upper 33 bits - - stfd $T0a,`$FRAME+0`($sp) - stfd $T0b,`$FRAME+8`($sp) - stfd $T1a,`$FRAME+16`($sp) - stfd $T1b,`$FRAME+24`($sp) - stfd $T2a,`$FRAME+32`($sp) - stfd $T2b,`$FRAME+40`($sp) - stfd $T3a,`$FRAME+48`($sp) - stfd $T3b,`$FRAME+56`($sp) - std $t0,8($tp) ; tp[j-1] - stdu $t4,16($tp) ; tp[j] - bdnz- L1st - - fctid $dota,$dota - fctid $dotb,$dotb - - ld $t0,`$FRAME+0`($sp) - ld $t1,`$FRAME+8`($sp) - ld $t2,`$FRAME+16`($sp) - ld $t3,`$FRAME+24`($sp) - ld $t4,`$FRAME+32`($sp) - ld $t5,`$FRAME+40`($sp) - ld $t6,`$FRAME+48`($sp) - ld $t7,`$FRAME+56`($sp) - stfd $dota,`$FRAME+64`($sp) - stfd $dotb,`$FRAME+72`($sp) - - add $t0,$t0,$carry ; can not overflow - srdi $carry,$t0,16 - add $t1,$t1,$carry - srdi $carry,$t1,16 - insrdi $t0,$t1,16,32 - add $t2,$t2,$carry - srdi $carry,$t2,16 - insrdi $t0,$t2,16,16 - add $t3,$t3,$carry - srdi $carry,$t3,16 - insrdi $t0,$t3,16,0 ; 0..63 bits - add $t4,$t4,$carry - srdi $carry,$t4,16 - add $t5,$t5,$carry - srdi $carry,$t5,16 - insrdi $t4,$t5,16,32 - add $t6,$t6,$carry - srdi $carry,$t6,16 - insrdi $t4,$t6,16,16 - add $t7,$t7,$carry - insrdi $t4,$t7,16,0 ; 64..127 bits - srdi $carry,$t7,16 ; upper 33 bits - ld $t6,`$FRAME+64`($sp) - ld $t7,`$FRAME+72`($sp) - - std $t0,8($tp) ; tp[j-1] - stdu $t4,16($tp) ; tp[j] - - add $t6,$t6,$carry ; can not overflow - srdi $carry,$t6,16 - add $t7,$t7,$carry - insrdi $t6,$t7,48,0 - srdi $ovf,$t7,48 - std $t6,8($tp) ; tp[num-1] - - slwi $t7,$num,2 - subf $nap_d,$t7,$nap_d ; rewind pointer - - li $i,8 ; i=1 -.align 5 -Louter: -___ -$code.=<<___ if ($SIZE_T==8); - ldx $t3,$bp,$i ; bp[i] -___ -$code.=<<___ if ($SIZE_T==4); - add $t0,$bp,$i - lwz $t3,0($t0) ; bp[i,i+1] - lwz $t0,4($t0) - insrdi $t3,$t0,32,0 -___ -$code.=<<___; - ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0] - mulld $t7,$a0,$t3 ; ap[0]*bp[i] - - addi $tp,$sp,`$FRAME+$TRANSFER` - add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0] - li $carry,0 - mulld $t7,$t7,$n0 ; tp[0]*n0 - mtctr $j - - ; transfer bp[i] to FPU as 4x16-bit values - extrdi $t0,$t3,16,48 - extrdi $t1,$t3,16,32 - extrdi $t2,$t3,16,16 - extrdi $t3,$t3,16,0 - std $t0,`$FRAME+0`($sp) - std $t1,`$FRAME+8`($sp) - std $t2,`$FRAME+16`($sp) - std $t3,`$FRAME+24`($sp) - ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values - extrdi $t4,$t7,16,48 - extrdi $t5,$t7,16,32 - extrdi $t6,$t7,16,16 - extrdi $t7,$t7,16,0 - std $t4,`$FRAME+32`($sp) - std $t5,`$FRAME+40`($sp) - std $t6,`$FRAME+48`($sp) - std $t7,`$FRAME+56`($sp) - - lfd $A0,8($nap_d) ; load a[j] in double format - lfd $A1,16($nap_d) - lfd $A2,24($nap_d) ; load a[j+1] in double format - lfd $A3,32($nap_d) - lfd $N0,40($nap_d) ; load n[j] in double format - lfd $N1,48($nap_d) - lfd $N2,56($nap_d) ; load n[j+1] in double format - lfdu $N3,64($nap_d) - - lfd $ba,`$FRAME+0`($sp) - lfd $bb,`$FRAME+8`($sp) - lfd $bc,`$FRAME+16`($sp) - lfd $bd,`$FRAME+24`($sp) - lfd $na,`$FRAME+32`($sp) - lfd $nb,`$FRAME+40`($sp) - lfd $nc,`$FRAME+48`($sp) - lfd $nd,`$FRAME+56`($sp) - - fcfid $ba,$ba - fcfid $bb,$bb - fcfid $bc,$bc - fcfid $bd,$bd - fcfid $na,$na - fcfid $nb,$nb - fcfid $nc,$nc - fcfid $nd,$nd - - fmul $T1a,$A1,$ba - fmul $T1b,$A1,$bb - fmul $T2a,$A2,$ba - fmul $T2b,$A2,$bb - fmul $T3a,$A3,$ba - fmul $T3b,$A3,$bb - fmul $T0a,$A0,$ba - fmul $T0b,$A0,$bb - - fmadd $T1a,$A0,$bc,$T1a - fmadd $T1b,$A0,$bd,$T1b - fmadd $T2a,$A1,$bc,$T2a - fmadd $T2b,$A1,$bd,$T2b - fmadd $T3a,$A2,$bc,$T3a - fmadd $T3b,$A2,$bd,$T3b - fmul $dota,$A3,$bc - fmul $dotb,$A3,$bd - - fmadd $T1a,$N1,$na,$T1a - fmadd $T1b,$N1,$nb,$T1b - lfd $A0,8($nap_d) ; load a[j] in double format - lfd $A1,16($nap_d) - fmadd $T2a,$N2,$na,$T2a - fmadd $T2b,$N2,$nb,$T2b - lfd $A2,24($nap_d) ; load a[j+1] in double format - lfd $A3,32($nap_d) - fmadd $T3a,$N3,$na,$T3a - fmadd $T3b,$N3,$nb,$T3b - fmadd $T0a,$N0,$na,$T0a - fmadd $T0b,$N0,$nb,$T0b - - fmadd $T1a,$N0,$nc,$T1a - fmadd $T1b,$N0,$nd,$T1b - fmadd $T2a,$N1,$nc,$T2a - fmadd $T2b,$N1,$nd,$T2b - fmadd $T3a,$N2,$nc,$T3a - fmadd $T3b,$N2,$nd,$T3b - fmadd $dota,$N3,$nc,$dota - fmadd $dotb,$N3,$nd,$dotb - - fctid $T0a,$T0a - fctid $T0b,$T0b - fctid $T1a,$T1a - fctid $T1b,$T1b - fctid $T2a,$T2a - fctid $T2b,$T2b - fctid $T3a,$T3a - fctid $T3b,$T3b - - stfd $T0a,`$FRAME+0`($sp) - stfd $T0b,`$FRAME+8`($sp) - stfd $T1a,`$FRAME+16`($sp) - stfd $T1b,`$FRAME+24`($sp) - stfd $T2a,`$FRAME+32`($sp) - stfd $T2b,`$FRAME+40`($sp) - stfd $T3a,`$FRAME+48`($sp) - stfd $T3b,`$FRAME+56`($sp) - -.align 5 -Linner: - fmul $T1a,$A1,$ba - fmul $T1b,$A1,$bb - fmul $T2a,$A2,$ba - fmul $T2b,$A2,$bb - lfd $N0,40($nap_d) ; load n[j] in double format - lfd $N1,48($nap_d) - fmul $T3a,$A3,$ba - fmul $T3b,$A3,$bb - fmadd $T0a,$A0,$ba,$dota - fmadd $T0b,$A0,$bb,$dotb - lfd $N2,56($nap_d) ; load n[j+1] in double format - lfdu $N3,64($nap_d) - - fmadd $T1a,$A0,$bc,$T1a - fmadd $T1b,$A0,$bd,$T1b - fmadd $T2a,$A1,$bc,$T2a - fmadd $T2b,$A1,$bd,$T2b - lfd $A0,8($nap_d) ; load a[j] in double format - lfd $A1,16($nap_d) - fmadd $T3a,$A2,$bc,$T3a - fmadd $T3b,$A2,$bd,$T3b - fmul $dota,$A3,$bc - fmul $dotb,$A3,$bd - lfd $A2,24($nap_d) ; load a[j+1] in double format - lfd $A3,32($nap_d) - - fmadd $T1a,$N1,$na,$T1a - fmadd $T1b,$N1,$nb,$T1b - ld $t0,`$FRAME+0`($sp) - ld $t1,`$FRAME+8`($sp) - fmadd $T2a,$N2,$na,$T2a - fmadd $T2b,$N2,$nb,$T2b - ld $t2,`$FRAME+16`($sp) - ld $t3,`$FRAME+24`($sp) - fmadd $T3a,$N3,$na,$T3a - fmadd $T3b,$N3,$nb,$T3b - add $t0,$t0,$carry ; can not overflow - ld $t4,`$FRAME+32`($sp) - ld $t5,`$FRAME+40`($sp) - fmadd $T0a,$N0,$na,$T0a - fmadd $T0b,$N0,$nb,$T0b - srdi $carry,$t0,16 - add $t1,$t1,$carry - srdi $carry,$t1,16 - ld $t6,`$FRAME+48`($sp) - ld $t7,`$FRAME+56`($sp) - - fmadd $T1a,$N0,$nc,$T1a - fmadd $T1b,$N0,$nd,$T1b - insrdi $t0,$t1,16,32 - ld $t1,8($tp) ; tp[j] - fmadd $T2a,$N1,$nc,$T2a - fmadd $T2b,$N1,$nd,$T2b - add $t2,$t2,$carry - fmadd $T3a,$N2,$nc,$T3a - fmadd $T3b,$N2,$nd,$T3b - srdi $carry,$t2,16 - insrdi $t0,$t2,16,16 - fmadd $dota,$N3,$nc,$dota - fmadd $dotb,$N3,$nd,$dotb - add $t3,$t3,$carry - ldu $t2,16($tp) ; tp[j+1] - srdi $carry,$t3,16 - insrdi $t0,$t3,16,0 ; 0..63 bits - add $t4,$t4,$carry - - fctid $T0a,$T0a - fctid $T0b,$T0b - srdi $carry,$t4,16 - fctid $T1a,$T1a - fctid $T1b,$T1b - add $t5,$t5,$carry - fctid $T2a,$T2a - fctid $T2b,$T2b - srdi $carry,$t5,16 - insrdi $t4,$t5,16,32 - fctid $T3a,$T3a - fctid $T3b,$T3b - add $t6,$t6,$carry - srdi $carry,$t6,16 - insrdi $t4,$t6,16,16 - - stfd $T0a,`$FRAME+0`($sp) - stfd $T0b,`$FRAME+8`($sp) - add $t7,$t7,$carry - addc $t3,$t0,$t1 -___ -$code.=<<___ if ($SIZE_T==4); # adjust XER[CA] - extrdi $t0,$t0,32,0 - extrdi $t1,$t1,32,0 - adde $t0,$t0,$t1 -___ -$code.=<<___; - stfd $T1a,`$FRAME+16`($sp) - stfd $T1b,`$FRAME+24`($sp) - insrdi $t4,$t7,16,0 ; 64..127 bits - srdi $carry,$t7,16 ; upper 33 bits - stfd $T2a,`$FRAME+32`($sp) - stfd $T2b,`$FRAME+40`($sp) - adde $t5,$t4,$t2 -___ -$code.=<<___ if ($SIZE_T==4); # adjust XER[CA] - extrdi $t4,$t4,32,0 - extrdi $t2,$t2,32,0 - adde $t4,$t4,$t2 -___ -$code.=<<___; - stfd $T3a,`$FRAME+48`($sp) - stfd $T3b,`$FRAME+56`($sp) - addze $carry,$carry - std $t3,-16($tp) ; tp[j-1] - std $t5,-8($tp) ; tp[j] - bdnz- Linner - - fctid $dota,$dota - fctid $dotb,$dotb - ld $t0,`$FRAME+0`($sp) - ld $t1,`$FRAME+8`($sp) - ld $t2,`$FRAME+16`($sp) - ld $t3,`$FRAME+24`($sp) - ld $t4,`$FRAME+32`($sp) - ld $t5,`$FRAME+40`($sp) - ld $t6,`$FRAME+48`($sp) - ld $t7,`$FRAME+56`($sp) - stfd $dota,`$FRAME+64`($sp) - stfd $dotb,`$FRAME+72`($sp) - - add $t0,$t0,$carry ; can not overflow - srdi $carry,$t0,16 - add $t1,$t1,$carry - srdi $carry,$t1,16 - insrdi $t0,$t1,16,32 - add $t2,$t2,$carry - ld $t1,8($tp) ; tp[j] - srdi $carry,$t2,16 - insrdi $t0,$t2,16,16 - add $t3,$t3,$carry - ldu $t2,16($tp) ; tp[j+1] - srdi $carry,$t3,16 - insrdi $t0,$t3,16,0 ; 0..63 bits - add $t4,$t4,$carry - srdi $carry,$t4,16 - add $t5,$t5,$carry - srdi $carry,$t5,16 - insrdi $t4,$t5,16,32 - add $t6,$t6,$carry - srdi $carry,$t6,16 - insrdi $t4,$t6,16,16 - add $t7,$t7,$carry - insrdi $t4,$t7,16,0 ; 64..127 bits - srdi $carry,$t7,16 ; upper 33 bits - ld $t6,`$FRAME+64`($sp) - ld $t7,`$FRAME+72`($sp) - - addc $t3,$t0,$t1 -___ -$code.=<<___ if ($SIZE_T==4); # adjust XER[CA] - extrdi $t0,$t0,32,0 - extrdi $t1,$t1,32,0 - adde $t0,$t0,$t1 -___ -$code.=<<___; - adde $t5,$t4,$t2 -___ -$code.=<<___ if ($SIZE_T==4); # adjust XER[CA] - extrdi $t4,$t4,32,0 - extrdi $t2,$t2,32,0 - adde $t4,$t4,$t2 -___ -$code.=<<___; - addze $carry,$carry - - std $t3,-16($tp) ; tp[j-1] - std $t5,-8($tp) ; tp[j] - - add $carry,$carry,$ovf ; comsume upmost overflow - add $t6,$t6,$carry ; can not overflow - srdi $carry,$t6,16 - add $t7,$t7,$carry - insrdi $t6,$t7,48,0 - srdi $ovf,$t7,48 - std $t6,0($tp) ; tp[num-1] - - slwi $t7,$num,2 - addi $i,$i,8 - subf $nap_d,$t7,$nap_d ; rewind pointer - cmpw $i,$num - blt- Louter -___ - -$code.=<<___ if ($SIZE_T==8); - subf $np,$num,$np ; rewind np - addi $j,$j,1 ; restore counter - subfc $i,$i,$i ; j=0 and "clear" XER[CA] - addi $tp,$sp,`$FRAME+$TRANSFER+8` - addi $t4,$sp,`$FRAME+$TRANSFER+16` - addi $t5,$np,8 - addi $t6,$rp,8 - mtctr $j - -.align 4 -Lsub: ldx $t0,$tp,$i - ldx $t1,$np,$i - ldx $t2,$t4,$i - ldx $t3,$t5,$i - subfe $t0,$t1,$t0 ; tp[j]-np[j] - subfe $t2,$t3,$t2 ; tp[j+1]-np[j+1] - stdx $t0,$rp,$i - stdx $t2,$t6,$i - addi $i,$i,16 - bdnz- Lsub - - li $i,0 - subfe $ovf,$i,$ovf ; handle upmost overflow bit - and $ap,$tp,$ovf - andc $np,$rp,$ovf - or $ap,$ap,$np ; ap=borrow?tp:rp - addi $t7,$ap,8 - mtctr $j - -.align 4 -Lcopy: ; copy or in-place refresh - ldx $t0,$ap,$i - ldx $t1,$t7,$i - std $i,8($nap_d) ; zap nap_d - std $i,16($nap_d) - std $i,24($nap_d) - std $i,32($nap_d) - std $i,40($nap_d) - std $i,48($nap_d) - std $i,56($nap_d) - stdu $i,64($nap_d) - stdx $t0,$rp,$i - stdx $t1,$t6,$i - stdx $i,$tp,$i ; zap tp at once - stdx $i,$t4,$i - addi $i,$i,16 - bdnz- Lcopy -___ -$code.=<<___ if ($SIZE_T==4); - subf $np,$num,$np ; rewind np - addi $j,$j,1 ; restore counter - subfc $i,$i,$i ; j=0 and "clear" XER[CA] - addi $tp,$sp,`$FRAME+$TRANSFER` - addi $np,$np,-4 - addi $rp,$rp,-4 - addi $ap,$sp,`$FRAME+$TRANSFER+4` - mtctr $j - -.align 4 -Lsub: ld $t0,8($tp) ; load tp[j..j+3] in 64-bit word order - ldu $t2,16($tp) - lwz $t4,4($np) ; load np[j..j+3] in 32-bit word order - lwz $t5,8($np) - lwz $t6,12($np) - lwzu $t7,16($np) - extrdi $t1,$t0,32,0 - extrdi $t3,$t2,32,0 - subfe $t4,$t4,$t0 ; tp[j]-np[j] - stw $t0,4($ap) ; save tp[j..j+3] in 32-bit word order - subfe $t5,$t5,$t1 ; tp[j+1]-np[j+1] - stw $t1,8($ap) - subfe $t6,$t6,$t2 ; tp[j+2]-np[j+2] - stw $t2,12($ap) - subfe $t7,$t7,$t3 ; tp[j+3]-np[j+3] - stwu $t3,16($ap) - stw $t4,4($rp) - stw $t5,8($rp) - stw $t6,12($rp) - stwu $t7,16($rp) - bdnz- Lsub - - li $i,0 - subfe $ovf,$i,$ovf ; handle upmost overflow bit - addi $tp,$sp,`$FRAME+$TRANSFER+4` - subf $rp,$num,$rp ; rewind rp - and $ap,$tp,$ovf - andc $np,$rp,$ovf - or $ap,$ap,$np ; ap=borrow?tp:rp - addi $tp,$sp,`$FRAME+$TRANSFER` - mtctr $j - -.align 4 -Lcopy: ; copy or in-place refresh - lwz $t0,4($ap) - lwz $t1,8($ap) - lwz $t2,12($ap) - lwzu $t3,16($ap) - std $i,8($nap_d) ; zap nap_d - std $i,16($nap_d) - std $i,24($nap_d) - std $i,32($nap_d) - std $i,40($nap_d) - std $i,48($nap_d) - std $i,56($nap_d) - stdu $i,64($nap_d) - stw $t0,4($rp) - stw $t1,8($rp) - stw $t2,12($rp) - stwu $t3,16($rp) - std $i,8($tp) ; zap tp at once - stdu $i,16($tp) - bdnz- Lcopy -___ - -$code.=<<___; - $POP $i,0($sp) - li r3,1 ; signal "handled" - $POP r22,`-12*8-10*$SIZE_T`($i) - $POP r23,`-12*8-9*$SIZE_T`($i) - $POP r24,`-12*8-8*$SIZE_T`($i) - $POP r25,`-12*8-7*$SIZE_T`($i) - $POP r26,`-12*8-6*$SIZE_T`($i) - $POP r27,`-12*8-5*$SIZE_T`($i) - $POP r28,`-12*8-4*$SIZE_T`($i) - $POP r29,`-12*8-3*$SIZE_T`($i) - $POP r30,`-12*8-2*$SIZE_T`($i) - $POP r31,`-12*8-1*$SIZE_T`($i) - lfd f20,`-12*8`($i) - lfd f21,`-11*8`($i) - lfd f22,`-10*8`($i) - lfd f23,`-9*8`($i) - lfd f24,`-8*8`($i) - lfd f25,`-7*8`($i) - lfd f26,`-6*8`($i) - lfd f27,`-5*8`($i) - lfd f28,`-4*8`($i) - lfd f29,`-3*8`($i) - lfd f30,`-2*8`($i) - lfd f31,`-1*8`($i) - mr $sp,$i - blr - .long 0 - .byte 0,12,4,0,0x8c,10,6,0 - .long 0 - -.asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by " -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/s390x-gf2m.pl b/drivers/builtin_openssl2/crypto/bn/asm/s390x-gf2m.pl deleted file mode 100644 index 9d18d40e77..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/s390x-gf2m.pl +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# May 2011 -# -# The module implements bn_GF2m_mul_2x2 polynomial multiplication used -# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for -# the time being... gcc 4.3 appeared to generate poor code, therefore -# the effort. And indeed, the module delivers 55%-90%(*) improvement -# on haviest ECDSA verify and ECDH benchmarks for 163- and 571-bit -# key lengths on z990, 30%-55%(*) - on z10, and 70%-110%(*) - on z196. -# This is for 64-bit build. In 32-bit "highgprs" case improvement is -# even higher, for example on z990 it was measured 80%-150%. ECDSA -# sign is modest 9%-12% faster. Keep in mind that these coefficients -# are not ones for bn_GF2m_mul_2x2 itself, as not all CPU time is -# burnt in it... -# -# (*) gcc 4.1 was observed to deliver better results than gcc 4.3, -# so that improvement coefficients can vary from one specific -# setup to another. - -$flavour = shift; - -if ($flavour =~ /3[12]/) { - $SIZE_T=4; - $g=""; -} else { - $SIZE_T=8; - $g="g"; -} - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -$stdframe=16*$SIZE_T+4*8; - -$rp="%r2"; -$a1="%r3"; -$a0="%r4"; -$b1="%r5"; -$b0="%r6"; - -$ra="%r14"; -$sp="%r15"; - -@T=("%r0","%r1"); -@i=("%r12","%r13"); - -($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(6..11)); -($lo,$hi,$b)=map("%r$_",(3..5)); $a=$lo; $mask=$a8; - -$code.=<<___; -.text - -.type _mul_1x1,\@function -.align 16 -_mul_1x1: - lgr $a1,$a - sllg $a2,$a,1 - sllg $a4,$a,2 - sllg $a8,$a,3 - - srag $lo,$a1,63 # broadcast 63rd bit - nihh $a1,0x1fff - srag @i[0],$a2,63 # broadcast 62nd bit - nihh $a2,0x3fff - srag @i[1],$a4,63 # broadcast 61st bit - nihh $a4,0x7fff - ngr $lo,$b - ngr @i[0],$b - ngr @i[1],$b - - lghi @T[0],0 - lgr $a12,$a1 - stg @T[0],`$stdframe+0*8`($sp) # tab[0]=0 - xgr $a12,$a2 - stg $a1,`$stdframe+1*8`($sp) # tab[1]=a1 - lgr $a48,$a4 - stg $a2,`$stdframe+2*8`($sp) # tab[2]=a2 - xgr $a48,$a8 - stg $a12,`$stdframe+3*8`($sp) # tab[3]=a1^a2 - xgr $a1,$a4 - - stg $a4,`$stdframe+4*8`($sp) # tab[4]=a4 - xgr $a2,$a4 - stg $a1,`$stdframe+5*8`($sp) # tab[5]=a1^a4 - xgr $a12,$a4 - stg $a2,`$stdframe+6*8`($sp) # tab[6]=a2^a4 - xgr $a1,$a48 - stg $a12,`$stdframe+7*8`($sp) # tab[7]=a1^a2^a4 - xgr $a2,$a48 - - stg $a8,`$stdframe+8*8`($sp) # tab[8]=a8 - xgr $a12,$a48 - stg $a1,`$stdframe+9*8`($sp) # tab[9]=a1^a8 - xgr $a1,$a4 - stg $a2,`$stdframe+10*8`($sp) # tab[10]=a2^a8 - xgr $a2,$a4 - stg $a12,`$stdframe+11*8`($sp) # tab[11]=a1^a2^a8 - - xgr $a12,$a4 - stg $a48,`$stdframe+12*8`($sp) # tab[12]=a4^a8 - srlg $hi,$lo,1 - stg $a1,`$stdframe+13*8`($sp) # tab[13]=a1^a4^a8 - sllg $lo,$lo,63 - stg $a2,`$stdframe+14*8`($sp) # tab[14]=a2^a4^a8 - srlg @T[0],@i[0],2 - stg $a12,`$stdframe+15*8`($sp) # tab[15]=a1^a2^a4^a8 - - lghi $mask,`0xf<<3` - sllg $a1,@i[0],62 - sllg @i[0],$b,3 - srlg @T[1],@i[1],3 - ngr @i[0],$mask - sllg $a2,@i[1],61 - srlg @i[1],$b,4-3 - xgr $hi,@T[0] - ngr @i[1],$mask - xgr $lo,$a1 - xgr $hi,@T[1] - xgr $lo,$a2 - - xg $lo,$stdframe(@i[0],$sp) - srlg @i[0],$b,8-3 - ngr @i[0],$mask -___ -for($n=1;$n<14;$n++) { -$code.=<<___; - lg @T[1],$stdframe(@i[1],$sp) - srlg @i[1],$b,`($n+2)*4`-3 - sllg @T[0],@T[1],`$n*4` - ngr @i[1],$mask - srlg @T[1],@T[1],`64-$n*4` - xgr $lo,@T[0] - xgr $hi,@T[1] -___ - push(@i,shift(@i)); push(@T,shift(@T)); -} -$code.=<<___; - lg @T[1],$stdframe(@i[1],$sp) - sllg @T[0],@T[1],`$n*4` - srlg @T[1],@T[1],`64-$n*4` - xgr $lo,@T[0] - xgr $hi,@T[1] - - lg @T[0],$stdframe(@i[0],$sp) - sllg @T[1],@T[0],`($n+1)*4` - srlg @T[0],@T[0],`64-($n+1)*4` - xgr $lo,@T[1] - xgr $hi,@T[0] - - br $ra -.size _mul_1x1,.-_mul_1x1 - -.globl bn_GF2m_mul_2x2 -.type bn_GF2m_mul_2x2,\@function -.align 16 -bn_GF2m_mul_2x2: - stm${g} %r3,%r15,3*$SIZE_T($sp) - - lghi %r1,-$stdframe-128 - la %r0,0($sp) - la $sp,0(%r1,$sp) # alloca - st${g} %r0,0($sp) # back chain -___ -if ($SIZE_T==8) { -my @r=map("%r$_",(6..9)); -$code.=<<___; - bras $ra,_mul_1x1 # a1·b1 - stmg $lo,$hi,16($rp) - - lg $a,`$stdframe+128+4*$SIZE_T`($sp) - lg $b,`$stdframe+128+6*$SIZE_T`($sp) - bras $ra,_mul_1x1 # a0·b0 - stmg $lo,$hi,0($rp) - - lg $a,`$stdframe+128+3*$SIZE_T`($sp) - lg $b,`$stdframe+128+5*$SIZE_T`($sp) - xg $a,`$stdframe+128+4*$SIZE_T`($sp) - xg $b,`$stdframe+128+6*$SIZE_T`($sp) - bras $ra,_mul_1x1 # (a0+a1)·(b0+b1) - lmg @r[0],@r[3],0($rp) - - xgr $lo,$hi - xgr $hi,@r[1] - xgr $lo,@r[0] - xgr $hi,@r[2] - xgr $lo,@r[3] - xgr $hi,@r[3] - xgr $lo,$hi - stg $hi,16($rp) - stg $lo,8($rp) -___ -} else { -$code.=<<___; - sllg %r3,%r3,32 - sllg %r5,%r5,32 - or %r3,%r4 - or %r5,%r6 - bras $ra,_mul_1x1 - rllg $lo,$lo,32 - rllg $hi,$hi,32 - stmg $lo,$hi,0($rp) -___ -} -$code.=<<___; - lm${g} %r6,%r15,`$stdframe+128+6*$SIZE_T`($sp) - br $ra -.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 -.string "GF(2^m) Multiplication for s390x, CRYPTOGAMS by " -___ - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/s390x-mont.pl b/drivers/builtin_openssl2/crypto/bn/asm/s390x-mont.pl deleted file mode 100644 index 9fd64e81ee..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/s390x-mont.pl +++ /dev/null @@ -1,277 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# April 2007. -# -# Performance improvement over vanilla C code varies from 85% to 45% -# depending on key length and benchmark. Unfortunately in this context -# these are not very impressive results [for code that utilizes "wide" -# 64x64=128-bit multiplication, which is not commonly available to C -# programmers], at least hand-coded bn_asm.c replacement is known to -# provide 30-40% better results for longest keys. Well, on a second -# thought it's not very surprising, because z-CPUs are single-issue -# and _strictly_ in-order execution, while bn_mul_mont is more or less -# dependent on CPU ability to pipe-line instructions and have several -# of them "in-flight" at the same time. I mean while other methods, -# for example Karatsuba, aim to minimize amount of multiplications at -# the cost of other operations increase, bn_mul_mont aim to neatly -# "overlap" multiplications and the other operations [and on most -# platforms even minimize the amount of the other operations, in -# particular references to memory]. But it's possible to improve this -# module performance by implementing dedicated squaring code-path and -# possibly by unrolling loops... - -# January 2009. -# -# Reschedule to minimize/avoid Address Generation Interlock hazard, -# make inner loops counter-based. - -# November 2010. -# -# Adapt for -m31 build. If kernel supports what's called "highgprs" -# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit -# instructions and achieve "64-bit" performance even in 31-bit legacy -# application context. The feature is not specific to any particular -# processor, as long as it's "z-CPU". Latter implies that the code -# remains z/Architecture specific. Compatibility with 32-bit BN_ULONG -# is achieved by swapping words after 64-bit loads, follow _dswap-s. -# On z990 it was measured to perform 2.6-2.2 times better than -# compiler-generated code, less for longer keys... - -$flavour = shift; - -if ($flavour =~ /3[12]/) { - $SIZE_T=4; - $g=""; -} else { - $SIZE_T=8; - $g="g"; -} - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -$stdframe=16*$SIZE_T+4*8; - -$mn0="%r0"; -$num="%r1"; - -# int bn_mul_mont( -$rp="%r2"; # BN_ULONG *rp, -$ap="%r3"; # const BN_ULONG *ap, -$bp="%r4"; # const BN_ULONG *bp, -$np="%r5"; # const BN_ULONG *np, -$n0="%r6"; # const BN_ULONG *n0, -#$num="160(%r15)" # int num); - -$bi="%r2"; # zaps rp -$j="%r7"; - -$ahi="%r8"; -$alo="%r9"; -$nhi="%r10"; -$nlo="%r11"; -$AHI="%r12"; -$NHI="%r13"; -$count="%r14"; -$sp="%r15"; - -$code.=<<___; -.text -.globl bn_mul_mont -.type bn_mul_mont,\@function -bn_mul_mont: - lgf $num,`$stdframe+$SIZE_T-4`($sp) # pull $num - sla $num,`log($SIZE_T)/log(2)` # $num to enumerate bytes - la $bp,0($num,$bp) - - st${g} %r2,2*$SIZE_T($sp) - - cghi $num,16 # - lghi %r2,0 # - blr %r14 # if($num<16) return 0; -___ -$code.=<<___ if ($flavour =~ /3[12]/); - tmll $num,4 - bnzr %r14 # if ($num&1) return 0; -___ -$code.=<<___ if ($flavour !~ /3[12]/); - cghi $num,96 # - bhr %r14 # if($num>96) return 0; -___ -$code.=<<___; - stm${g} %r3,%r15,3*$SIZE_T($sp) - - lghi $rp,-$stdframe-8 # leave room for carry bit - lcgr $j,$num # -$num - lgr %r0,$sp - la $rp,0($rp,$sp) - la $sp,0($j,$rp) # alloca - st${g} %r0,0($sp) # back chain - - sra $num,3 # restore $num - la $bp,0($j,$bp) # restore $bp - ahi $num,-1 # adjust $num for inner loop - lg $n0,0($n0) # pull n0 - _dswap $n0 - - lg $bi,0($bp) - _dswap $bi - lg $alo,0($ap) - _dswap $alo - mlgr $ahi,$bi # ap[0]*bp[0] - lgr $AHI,$ahi - - lgr $mn0,$alo # "tp[0]"*n0 - msgr $mn0,$n0 - - lg $nlo,0($np) # - _dswap $nlo - mlgr $nhi,$mn0 # np[0]*m1 - algr $nlo,$alo # +="tp[0]" - lghi $NHI,0 - alcgr $NHI,$nhi - - la $j,8(%r0) # j=1 - lr $count,$num - -.align 16 -.L1st: - lg $alo,0($j,$ap) - _dswap $alo - mlgr $ahi,$bi # ap[j]*bp[0] - algr $alo,$AHI - lghi $AHI,0 - alcgr $AHI,$ahi - - lg $nlo,0($j,$np) - _dswap $nlo - mlgr $nhi,$mn0 # np[j]*m1 - algr $nlo,$NHI - lghi $NHI,0 - alcgr $nhi,$NHI # +="tp[j]" - algr $nlo,$alo - alcgr $NHI,$nhi - - stg $nlo,$stdframe-8($j,$sp) # tp[j-1]= - la $j,8($j) # j++ - brct $count,.L1st - - algr $NHI,$AHI - lghi $AHI,0 - alcgr $AHI,$AHI # upmost overflow bit - stg $NHI,$stdframe-8($j,$sp) - stg $AHI,$stdframe($j,$sp) - la $bp,8($bp) # bp++ - -.Louter: - lg $bi,0($bp) # bp[i] - _dswap $bi - lg $alo,0($ap) - _dswap $alo - mlgr $ahi,$bi # ap[0]*bp[i] - alg $alo,$stdframe($sp) # +=tp[0] - lghi $AHI,0 - alcgr $AHI,$ahi - - lgr $mn0,$alo - msgr $mn0,$n0 # tp[0]*n0 - - lg $nlo,0($np) # np[0] - _dswap $nlo - mlgr $nhi,$mn0 # np[0]*m1 - algr $nlo,$alo # +="tp[0]" - lghi $NHI,0 - alcgr $NHI,$nhi - - la $j,8(%r0) # j=1 - lr $count,$num - -.align 16 -.Linner: - lg $alo,0($j,$ap) - _dswap $alo - mlgr $ahi,$bi # ap[j]*bp[i] - algr $alo,$AHI - lghi $AHI,0 - alcgr $ahi,$AHI - alg $alo,$stdframe($j,$sp)# +=tp[j] - alcgr $AHI,$ahi - - lg $nlo,0($j,$np) - _dswap $nlo - mlgr $nhi,$mn0 # np[j]*m1 - algr $nlo,$NHI - lghi $NHI,0 - alcgr $nhi,$NHI - algr $nlo,$alo # +="tp[j]" - alcgr $NHI,$nhi - - stg $nlo,$stdframe-8($j,$sp) # tp[j-1]= - la $j,8($j) # j++ - brct $count,.Linner - - algr $NHI,$AHI - lghi $AHI,0 - alcgr $AHI,$AHI - alg $NHI,$stdframe($j,$sp)# accumulate previous upmost overflow bit - lghi $ahi,0 - alcgr $AHI,$ahi # new upmost overflow bit - stg $NHI,$stdframe-8($j,$sp) - stg $AHI,$stdframe($j,$sp) - - la $bp,8($bp) # bp++ - cl${g} $bp,`$stdframe+8+4*$SIZE_T`($j,$sp) # compare to &bp[num] - jne .Louter - - l${g} $rp,`$stdframe+8+2*$SIZE_T`($j,$sp) # reincarnate rp - la $ap,$stdframe($sp) - ahi $num,1 # restore $num, incidentally clears "borrow" - - la $j,0(%r0) - lr $count,$num -.Lsub: lg $alo,0($j,$ap) - lg $nlo,0($j,$np) - _dswap $nlo - slbgr $alo,$nlo - stg $alo,0($j,$rp) - la $j,8($j) - brct $count,.Lsub - lghi $ahi,0 - slbgr $AHI,$ahi # handle upmost carry - - ngr $ap,$AHI - lghi $np,-1 - xgr $np,$AHI - ngr $np,$rp - ogr $ap,$np # ap=borrow?tp:rp - - la $j,0(%r0) - lgr $count,$num -.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh - _dswap $alo - stg $j,$stdframe($j,$sp) # zap tp - stg $alo,0($j,$rp) - la $j,8($j) - brct $count,.Lcopy - - la %r1,`$stdframe+8+6*$SIZE_T`($j,$sp) - lm${g} %r6,%r15,0(%r1) - lghi %r2,1 # signal "processed" - br %r14 -.size bn_mul_mont,.-bn_mul_mont -.string "Montgomery Multiplication for s390x, CRYPTOGAMS by " -___ - -foreach (split("\n",$code)) { - s/\`([^\`]*)\`/eval $1/ge; - s/_dswap\s+(%r[0-9]+)/sprintf("rllg\t%s,%s,32",$1,$1) if($SIZE_T==4)/e; - print $_,"\n"; -} -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/s390x.S b/drivers/builtin_openssl2/crypto/bn/asm/s390x.S deleted file mode 100755 index 43fcb79bc0..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/s390x.S +++ /dev/null @@ -1,678 +0,0 @@ -.ident "s390x.S, version 1.1" -// ==================================================================== -// Written by Andy Polyakov for the OpenSSL -// project. -// -// Rights for redistribution and usage in source and binary forms are -// granted according to the OpenSSL license. Warranty of any kind is -// disclaimed. -// ==================================================================== - -.text - -#define zero %r0 - -// BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); -.globl bn_mul_add_words -.type bn_mul_add_words,@function -.align 4 -bn_mul_add_words: - lghi zero,0 // zero = 0 - la %r1,0(%r2) // put rp aside - lghi %r2,0 // i=0; - ltgfr %r4,%r4 - bler %r14 // if (len<=0) return 0; - - stmg %r6,%r10,48(%r15) - lghi %r10,3 - lghi %r8,0 // carry = 0 - nr %r10,%r4 // len%4 - sra %r4,2 // cnt=len/4 - jz .Loop1_madd // carry is incidentally cleared if branch taken - algr zero,zero // clear carry - -.Loop4_madd: - lg %r7,0(%r2,%r3) // ap[i] - mlgr %r6,%r5 // *=w - alcgr %r7,%r8 // +=carry - alcgr %r6,zero - alg %r7,0(%r2,%r1) // +=rp[i] - stg %r7,0(%r2,%r1) // rp[i]= - - lg %r9,8(%r2,%r3) - mlgr %r8,%r5 - alcgr %r9,%r6 - alcgr %r8,zero - alg %r9,8(%r2,%r1) - stg %r9,8(%r2,%r1) - - lg %r7,16(%r2,%r3) - mlgr %r6,%r5 - alcgr %r7,%r8 - alcgr %r6,zero - alg %r7,16(%r2,%r1) - stg %r7,16(%r2,%r1) - - lg %r9,24(%r2,%r3) - mlgr %r8,%r5 - alcgr %r9,%r6 - alcgr %r8,zero - alg %r9,24(%r2,%r1) - stg %r9,24(%r2,%r1) - - la %r2,32(%r2) // i+=4 - brct %r4,.Loop4_madd - - la %r10,1(%r10) // see if len%4 is zero ... - brct %r10,.Loop1_madd // without touching condition code:-) - -.Lend_madd: - alcgr %r8,zero // collect carry bit - lgr %r2,%r8 - lmg %r6,%r10,48(%r15) - br %r14 - -.Loop1_madd: - lg %r7,0(%r2,%r3) // ap[i] - mlgr %r6,%r5 // *=w - alcgr %r7,%r8 // +=carry - alcgr %r6,zero - alg %r7,0(%r2,%r1) // +=rp[i] - stg %r7,0(%r2,%r1) // rp[i]= - - lgr %r8,%r6 - la %r2,8(%r2) // i++ - brct %r10,.Loop1_madd - - j .Lend_madd -.size bn_mul_add_words,.-bn_mul_add_words - -// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); -.globl bn_mul_words -.type bn_mul_words,@function -.align 4 -bn_mul_words: - lghi zero,0 // zero = 0 - la %r1,0(%r2) // put rp aside - lghi %r2,0 // i=0; - ltgfr %r4,%r4 - bler %r14 // if (len<=0) return 0; - - stmg %r6,%r10,48(%r15) - lghi %r10,3 - lghi %r8,0 // carry = 0 - nr %r10,%r4 // len%4 - sra %r4,2 // cnt=len/4 - jz .Loop1_mul // carry is incidentally cleared if branch taken - algr zero,zero // clear carry - -.Loop4_mul: - lg %r7,0(%r2,%r3) // ap[i] - mlgr %r6,%r5 // *=w - alcgr %r7,%r8 // +=carry - stg %r7,0(%r2,%r1) // rp[i]= - - lg %r9,8(%r2,%r3) - mlgr %r8,%r5 - alcgr %r9,%r6 - stg %r9,8(%r2,%r1) - - lg %r7,16(%r2,%r3) - mlgr %r6,%r5 - alcgr %r7,%r8 - stg %r7,16(%r2,%r1) - - lg %r9,24(%r2,%r3) - mlgr %r8,%r5 - alcgr %r9,%r6 - stg %r9,24(%r2,%r1) - - la %r2,32(%r2) // i+=4 - brct %r4,.Loop4_mul - - la %r10,1(%r10) // see if len%4 is zero ... - brct %r10,.Loop1_mul // without touching condition code:-) - -.Lend_mul: - alcgr %r8,zero // collect carry bit - lgr %r2,%r8 - lmg %r6,%r10,48(%r15) - br %r14 - -.Loop1_mul: - lg %r7,0(%r2,%r3) // ap[i] - mlgr %r6,%r5 // *=w - alcgr %r7,%r8 // +=carry - stg %r7,0(%r2,%r1) // rp[i]= - - lgr %r8,%r6 - la %r2,8(%r2) // i++ - brct %r10,.Loop1_mul - - j .Lend_mul -.size bn_mul_words,.-bn_mul_words - -// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4) -.globl bn_sqr_words -.type bn_sqr_words,@function -.align 4 -bn_sqr_words: - ltgfr %r4,%r4 - bler %r14 - - stmg %r6,%r7,48(%r15) - srag %r1,%r4,2 // cnt=len/4 - jz .Loop1_sqr - -.Loop4_sqr: - lg %r7,0(%r3) - mlgr %r6,%r7 - stg %r7,0(%r2) - stg %r6,8(%r2) - - lg %r7,8(%r3) - mlgr %r6,%r7 - stg %r7,16(%r2) - stg %r6,24(%r2) - - lg %r7,16(%r3) - mlgr %r6,%r7 - stg %r7,32(%r2) - stg %r6,40(%r2) - - lg %r7,24(%r3) - mlgr %r6,%r7 - stg %r7,48(%r2) - stg %r6,56(%r2) - - la %r3,32(%r3) - la %r2,64(%r2) - brct %r1,.Loop4_sqr - - lghi %r1,3 - nr %r4,%r1 // cnt=len%4 - jz .Lend_sqr - -.Loop1_sqr: - lg %r7,0(%r3) - mlgr %r6,%r7 - stg %r7,0(%r2) - stg %r6,8(%r2) - - la %r3,8(%r3) - la %r2,16(%r2) - brct %r4,.Loop1_sqr - -.Lend_sqr: - lmg %r6,%r7,48(%r15) - br %r14 -.size bn_sqr_words,.-bn_sqr_words - -// BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d); -.globl bn_div_words -.type bn_div_words,@function -.align 4 -bn_div_words: - dlgr %r2,%r4 - lgr %r2,%r3 - br %r14 -.size bn_div_words,.-bn_div_words - -// BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5); -.globl bn_add_words -.type bn_add_words,@function -.align 4 -bn_add_words: - la %r1,0(%r2) // put rp aside - lghi %r2,0 // i=0 - ltgfr %r5,%r5 - bler %r14 // if (len<=0) return 0; - - stg %r6,48(%r15) - lghi %r6,3 - nr %r6,%r5 // len%4 - sra %r5,2 // len/4, use sra because it sets condition code - jz .Loop1_add // carry is incidentally cleared if branch taken - algr %r2,%r2 // clear carry - -.Loop4_add: - lg %r0,0(%r2,%r3) - alcg %r0,0(%r2,%r4) - stg %r0,0(%r2,%r1) - lg %r0,8(%r2,%r3) - alcg %r0,8(%r2,%r4) - stg %r0,8(%r2,%r1) - lg %r0,16(%r2,%r3) - alcg %r0,16(%r2,%r4) - stg %r0,16(%r2,%r1) - lg %r0,24(%r2,%r3) - alcg %r0,24(%r2,%r4) - stg %r0,24(%r2,%r1) - - la %r2,32(%r2) // i+=4 - brct %r5,.Loop4_add - - la %r6,1(%r6) // see if len%4 is zero ... - brct %r6,.Loop1_add // without touching condition code:-) - -.Lexit_add: - lghi %r2,0 - alcgr %r2,%r2 - lg %r6,48(%r15) - br %r14 - -.Loop1_add: - lg %r0,0(%r2,%r3) - alcg %r0,0(%r2,%r4) - stg %r0,0(%r2,%r1) - - la %r2,8(%r2) // i++ - brct %r6,.Loop1_add - - j .Lexit_add -.size bn_add_words,.-bn_add_words - -// BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5); -.globl bn_sub_words -.type bn_sub_words,@function -.align 4 -bn_sub_words: - la %r1,0(%r2) // put rp aside - lghi %r2,0 // i=0 - ltgfr %r5,%r5 - bler %r14 // if (len<=0) return 0; - - stg %r6,48(%r15) - lghi %r6,3 - nr %r6,%r5 // len%4 - sra %r5,2 // len/4, use sra because it sets condition code - jnz .Loop4_sub // borrow is incidentally cleared if branch taken - slgr %r2,%r2 // clear borrow - -.Loop1_sub: - lg %r0,0(%r2,%r3) - slbg %r0,0(%r2,%r4) - stg %r0,0(%r2,%r1) - - la %r2,8(%r2) // i++ - brct %r6,.Loop1_sub - j .Lexit_sub - -.Loop4_sub: - lg %r0,0(%r2,%r3) - slbg %r0,0(%r2,%r4) - stg %r0,0(%r2,%r1) - lg %r0,8(%r2,%r3) - slbg %r0,8(%r2,%r4) - stg %r0,8(%r2,%r1) - lg %r0,16(%r2,%r3) - slbg %r0,16(%r2,%r4) - stg %r0,16(%r2,%r1) - lg %r0,24(%r2,%r3) - slbg %r0,24(%r2,%r4) - stg %r0,24(%r2,%r1) - - la %r2,32(%r2) // i+=4 - brct %r5,.Loop4_sub - - la %r6,1(%r6) // see if len%4 is zero ... - brct %r6,.Loop1_sub // without touching condition code:-) - -.Lexit_sub: - lghi %r2,0 - slbgr %r2,%r2 - lcgr %r2,%r2 - lg %r6,48(%r15) - br %r14 -.size bn_sub_words,.-bn_sub_words - -#define c1 %r1 -#define c2 %r5 -#define c3 %r8 - -#define mul_add_c(ai,bi,c1,c2,c3) \ - lg %r7,ai*8(%r3); \ - mlg %r6,bi*8(%r4); \ - algr c1,%r7; \ - alcgr c2,%r6; \ - alcgr c3,zero - -// void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4); -.globl bn_mul_comba8 -.type bn_mul_comba8,@function -.align 4 -bn_mul_comba8: - stmg %r6,%r8,48(%r15) - - lghi c1,0 - lghi c2,0 - lghi c3,0 - lghi zero,0 - - mul_add_c(0,0,c1,c2,c3); - stg c1,0*8(%r2) - lghi c1,0 - - mul_add_c(0,1,c2,c3,c1); - mul_add_c(1,0,c2,c3,c1); - stg c2,1*8(%r2) - lghi c2,0 - - mul_add_c(2,0,c3,c1,c2); - mul_add_c(1,1,c3,c1,c2); - mul_add_c(0,2,c3,c1,c2); - stg c3,2*8(%r2) - lghi c3,0 - - mul_add_c(0,3,c1,c2,c3); - mul_add_c(1,2,c1,c2,c3); - mul_add_c(2,1,c1,c2,c3); - mul_add_c(3,0,c1,c2,c3); - stg c1,3*8(%r2) - lghi c1,0 - - mul_add_c(4,0,c2,c3,c1); - mul_add_c(3,1,c2,c3,c1); - mul_add_c(2,2,c2,c3,c1); - mul_add_c(1,3,c2,c3,c1); - mul_add_c(0,4,c2,c3,c1); - stg c2,4*8(%r2) - lghi c2,0 - - mul_add_c(0,5,c3,c1,c2); - mul_add_c(1,4,c3,c1,c2); - mul_add_c(2,3,c3,c1,c2); - mul_add_c(3,2,c3,c1,c2); - mul_add_c(4,1,c3,c1,c2); - mul_add_c(5,0,c3,c1,c2); - stg c3,5*8(%r2) - lghi c3,0 - - mul_add_c(6,0,c1,c2,c3); - mul_add_c(5,1,c1,c2,c3); - mul_add_c(4,2,c1,c2,c3); - mul_add_c(3,3,c1,c2,c3); - mul_add_c(2,4,c1,c2,c3); - mul_add_c(1,5,c1,c2,c3); - mul_add_c(0,6,c1,c2,c3); - stg c1,6*8(%r2) - lghi c1,0 - - mul_add_c(0,7,c2,c3,c1); - mul_add_c(1,6,c2,c3,c1); - mul_add_c(2,5,c2,c3,c1); - mul_add_c(3,4,c2,c3,c1); - mul_add_c(4,3,c2,c3,c1); - mul_add_c(5,2,c2,c3,c1); - mul_add_c(6,1,c2,c3,c1); - mul_add_c(7,0,c2,c3,c1); - stg c2,7*8(%r2) - lghi c2,0 - - mul_add_c(7,1,c3,c1,c2); - mul_add_c(6,2,c3,c1,c2); - mul_add_c(5,3,c3,c1,c2); - mul_add_c(4,4,c3,c1,c2); - mul_add_c(3,5,c3,c1,c2); - mul_add_c(2,6,c3,c1,c2); - mul_add_c(1,7,c3,c1,c2); - stg c3,8*8(%r2) - lghi c3,0 - - mul_add_c(2,7,c1,c2,c3); - mul_add_c(3,6,c1,c2,c3); - mul_add_c(4,5,c1,c2,c3); - mul_add_c(5,4,c1,c2,c3); - mul_add_c(6,3,c1,c2,c3); - mul_add_c(7,2,c1,c2,c3); - stg c1,9*8(%r2) - lghi c1,0 - - mul_add_c(7,3,c2,c3,c1); - mul_add_c(6,4,c2,c3,c1); - mul_add_c(5,5,c2,c3,c1); - mul_add_c(4,6,c2,c3,c1); - mul_add_c(3,7,c2,c3,c1); - stg c2,10*8(%r2) - lghi c2,0 - - mul_add_c(4,7,c3,c1,c2); - mul_add_c(5,6,c3,c1,c2); - mul_add_c(6,5,c3,c1,c2); - mul_add_c(7,4,c3,c1,c2); - stg c3,11*8(%r2) - lghi c3,0 - - mul_add_c(7,5,c1,c2,c3); - mul_add_c(6,6,c1,c2,c3); - mul_add_c(5,7,c1,c2,c3); - stg c1,12*8(%r2) - lghi c1,0 - - - mul_add_c(6,7,c2,c3,c1); - mul_add_c(7,6,c2,c3,c1); - stg c2,13*8(%r2) - lghi c2,0 - - mul_add_c(7,7,c3,c1,c2); - stg c3,14*8(%r2) - stg c1,15*8(%r2) - - lmg %r6,%r8,48(%r15) - br %r14 -.size bn_mul_comba8,.-bn_mul_comba8 - -// void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4); -.globl bn_mul_comba4 -.type bn_mul_comba4,@function -.align 4 -bn_mul_comba4: - stmg %r6,%r8,48(%r15) - - lghi c1,0 - lghi c2,0 - lghi c3,0 - lghi zero,0 - - mul_add_c(0,0,c1,c2,c3); - stg c1,0*8(%r3) - lghi c1,0 - - mul_add_c(0,1,c2,c3,c1); - mul_add_c(1,0,c2,c3,c1); - stg c2,1*8(%r2) - lghi c2,0 - - mul_add_c(2,0,c3,c1,c2); - mul_add_c(1,1,c3,c1,c2); - mul_add_c(0,2,c3,c1,c2); - stg c3,2*8(%r2) - lghi c3,0 - - mul_add_c(0,3,c1,c2,c3); - mul_add_c(1,2,c1,c2,c3); - mul_add_c(2,1,c1,c2,c3); - mul_add_c(3,0,c1,c2,c3); - stg c1,3*8(%r2) - lghi c1,0 - - mul_add_c(3,1,c2,c3,c1); - mul_add_c(2,2,c2,c3,c1); - mul_add_c(1,3,c2,c3,c1); - stg c2,4*8(%r2) - lghi c2,0 - - mul_add_c(2,3,c3,c1,c2); - mul_add_c(3,2,c3,c1,c2); - stg c3,5*8(%r2) - lghi c3,0 - - mul_add_c(3,3,c1,c2,c3); - stg c1,6*8(%r2) - stg c2,7*8(%r2) - - stmg %r6,%r8,48(%r15) - br %r14 -.size bn_mul_comba4,.-bn_mul_comba4 - -#define sqr_add_c(ai,c1,c2,c3) \ - lg %r7,ai*8(%r3); \ - mlgr %r6,%r7; \ - algr c1,%r7; \ - alcgr c2,%r6; \ - alcgr c3,zero - -#define sqr_add_c2(ai,aj,c1,c2,c3) \ - lg %r7,ai*8(%r3); \ - mlg %r6,aj*8(%r3); \ - algr c1,%r7; \ - alcgr c2,%r6; \ - alcgr c3,zero; \ - algr c1,%r7; \ - alcgr c2,%r6; \ - alcgr c3,zero - -// void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3); -.globl bn_sqr_comba8 -.type bn_sqr_comba8,@function -.align 4 -bn_sqr_comba8: - stmg %r6,%r8,48(%r15) - - lghi c1,0 - lghi c2,0 - lghi c3,0 - lghi zero,0 - - sqr_add_c(0,c1,c2,c3); - stg c1,0*8(%r2) - lghi c1,0 - - sqr_add_c2(1,0,c2,c3,c1); - stg c2,1*8(%r2) - lghi c2,0 - - sqr_add_c(1,c3,c1,c2); - sqr_add_c2(2,0,c3,c1,c2); - stg c3,2*8(%r2) - lghi c3,0 - - sqr_add_c2(3,0,c1,c2,c3); - sqr_add_c2(2,1,c1,c2,c3); - stg c1,3*8(%r2) - lghi c1,0 - - sqr_add_c(2,c2,c3,c1); - sqr_add_c2(3,1,c2,c3,c1); - sqr_add_c2(4,0,c2,c3,c1); - stg c2,4*8(%r2) - lghi c2,0 - - sqr_add_c2(5,0,c3,c1,c2); - sqr_add_c2(4,1,c3,c1,c2); - sqr_add_c2(3,2,c3,c1,c2); - stg c3,5*8(%r2) - lghi c3,0 - - sqr_add_c(3,c1,c2,c3); - sqr_add_c2(4,2,c1,c2,c3); - sqr_add_c2(5,1,c1,c2,c3); - sqr_add_c2(6,0,c1,c2,c3); - stg c1,6*8(%r2) - lghi c1,0 - - sqr_add_c2(7,0,c2,c3,c1); - sqr_add_c2(6,1,c2,c3,c1); - sqr_add_c2(5,2,c2,c3,c1); - sqr_add_c2(4,3,c2,c3,c1); - stg c2,7*8(%r2) - lghi c2,0 - - sqr_add_c(4,c3,c1,c2); - sqr_add_c2(5,3,c3,c1,c2); - sqr_add_c2(6,2,c3,c1,c2); - sqr_add_c2(7,1,c3,c1,c2); - stg c3,8*8(%r2) - lghi c3,0 - - sqr_add_c2(7,2,c1,c2,c3); - sqr_add_c2(6,3,c1,c2,c3); - sqr_add_c2(5,4,c1,c2,c3); - stg c1,9*8(%r2) - lghi c1,0 - - sqr_add_c(5,c2,c3,c1); - sqr_add_c2(6,4,c2,c3,c1); - sqr_add_c2(7,3,c2,c3,c1); - stg c2,10*8(%r2) - lghi c2,0 - - sqr_add_c2(7,4,c3,c1,c2); - sqr_add_c2(6,5,c3,c1,c2); - stg c3,11*8(%r2) - lghi c3,0 - - sqr_add_c(6,c1,c2,c3); - sqr_add_c2(7,5,c1,c2,c3); - stg c1,12*8(%r2) - lghi c1,0 - - sqr_add_c2(7,6,c2,c3,c1); - stg c2,13*8(%r2) - lghi c2,0 - - sqr_add_c(7,c3,c1,c2); - stg c3,14*8(%r2) - stg c1,15*8(%r2) - - lmg %r6,%r8,48(%r15) - br %r14 -.size bn_sqr_comba8,.-bn_sqr_comba8 - -// void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3); -.globl bn_sqr_comba4 -.type bn_sqr_comba4,@function -.align 4 -bn_sqr_comba4: - stmg %r6,%r8,48(%r15) - - lghi c1,0 - lghi c2,0 - lghi c3,0 - lghi zero,0 - - sqr_add_c(0,c1,c2,c3); - stg c1,0*8(%r2) - lghi c1,0 - - sqr_add_c2(1,0,c2,c3,c1); - stg c2,1*8(%r2) - lghi c2,0 - - sqr_add_c(1,c3,c1,c2); - sqr_add_c2(2,0,c3,c1,c2); - stg c3,2*8(%r2) - lghi c3,0 - - sqr_add_c2(3,0,c1,c2,c3); - sqr_add_c2(2,1,c1,c2,c3); - stg c1,3*8(%r2) - lghi c1,0 - - sqr_add_c(2,c2,c3,c1); - sqr_add_c2(3,1,c2,c3,c1); - stg c2,4*8(%r2) - lghi c2,0 - - sqr_add_c2(3,2,c3,c1,c2); - stg c3,5*8(%r2) - lghi c3,0 - - sqr_add_c(3,c1,c2,c3); - stg c1,6*8(%r2) - stg c2,7*8(%r2) - - lmg %r6,%r8,48(%r15) - br %r14 -.size bn_sqr_comba4,.-bn_sqr_comba4 diff --git a/drivers/builtin_openssl2/crypto/bn/asm/sparcv8.S b/drivers/builtin_openssl2/crypto/bn/asm/sparcv8.S deleted file mode 100644 index 88c5dc480a..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/sparcv8.S +++ /dev/null @@ -1,1458 +0,0 @@ -.ident "sparcv8.s, Version 1.4" -.ident "SPARC v8 ISA artwork by Andy Polyakov " - -/* - * ==================================================================== - * Written by Andy Polyakov for the OpenSSL - * project. - * - * Rights for redistribution and usage in source and binary forms are - * granted according to the OpenSSL license. Warranty of any kind is - * disclaimed. - * ==================================================================== - */ - -/* - * This is my modest contributon to OpenSSL project (see - * http://www.openssl.org/ for more information about it) and is - * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c - * module. For updates see http://fy.chalmers.se/~appro/hpe/. - * - * See bn_asm.sparc.v8plus.S for more details. - */ - -/* - * Revision history. - * - * 1.1 - new loop unrolling model(*); - * 1.2 - made gas friendly; - * 1.3 - fixed problem with /usr/ccs/lib/cpp; - * 1.4 - some retunes; - * - * (*) see bn_asm.sparc.v8plus.S for details - */ - -.section ".text",#alloc,#execinstr -.file "bn_asm.sparc.v8.S" - -.align 32 - -.global bn_mul_add_words -/* - * BN_ULONG bn_mul_add_words(rp,ap,num,w) - * BN_ULONG *rp,*ap; - * int num; - * BN_ULONG w; - */ -bn_mul_add_words: - cmp %o2,0 - bg,a .L_bn_mul_add_words_proceed - ld [%o1],%g2 - retl - clr %o0 - -.L_bn_mul_add_words_proceed: - andcc %o2,-4,%g0 - bz .L_bn_mul_add_words_tail - clr %o5 - -.L_bn_mul_add_words_loop: - ld [%o0],%o4 - ld [%o1+4],%g3 - umul %o3,%g2,%g2 - rd %y,%g1 - addcc %o4,%o5,%o4 - addx %g1,0,%g1 - addcc %o4,%g2,%o4 - st %o4,[%o0] - addx %g1,0,%o5 - - ld [%o0+4],%o4 - ld [%o1+8],%g2 - umul %o3,%g3,%g3 - dec 4,%o2 - rd %y,%g1 - addcc %o4,%o5,%o4 - addx %g1,0,%g1 - addcc %o4,%g3,%o4 - st %o4,[%o0+4] - addx %g1,0,%o5 - - ld [%o0+8],%o4 - ld [%o1+12],%g3 - umul %o3,%g2,%g2 - inc 16,%o1 - rd %y,%g1 - addcc %o4,%o5,%o4 - addx %g1,0,%g1 - addcc %o4,%g2,%o4 - st %o4,[%o0+8] - addx %g1,0,%o5 - - ld [%o0+12],%o4 - umul %o3,%g3,%g3 - inc 16,%o0 - rd %y,%g1 - addcc %o4,%o5,%o4 - addx %g1,0,%g1 - addcc %o4,%g3,%o4 - st %o4,[%o0-4] - addx %g1,0,%o5 - andcc %o2,-4,%g0 - bnz,a .L_bn_mul_add_words_loop - ld [%o1],%g2 - - tst %o2 - bnz,a .L_bn_mul_add_words_tail - ld [%o1],%g2 -.L_bn_mul_add_words_return: - retl - mov %o5,%o0 - nop - -.L_bn_mul_add_words_tail: - ld [%o0],%o4 - umul %o3,%g2,%g2 - addcc %o4,%o5,%o4 - rd %y,%g1 - addx %g1,0,%g1 - addcc %o4,%g2,%o4 - addx %g1,0,%o5 - deccc %o2 - bz .L_bn_mul_add_words_return - st %o4,[%o0] - - ld [%o1+4],%g2 - ld [%o0+4],%o4 - umul %o3,%g2,%g2 - rd %y,%g1 - addcc %o4,%o5,%o4 - addx %g1,0,%g1 - addcc %o4,%g2,%o4 - addx %g1,0,%o5 - deccc %o2 - bz .L_bn_mul_add_words_return - st %o4,[%o0+4] - - ld [%o1+8],%g2 - ld [%o0+8],%o4 - umul %o3,%g2,%g2 - rd %y,%g1 - addcc %o4,%o5,%o4 - addx %g1,0,%g1 - addcc %o4,%g2,%o4 - st %o4,[%o0+8] - retl - addx %g1,0,%o0 - -.type bn_mul_add_words,#function -.size bn_mul_add_words,(.-bn_mul_add_words) - -.align 32 - -.global bn_mul_words -/* - * BN_ULONG bn_mul_words(rp,ap,num,w) - * BN_ULONG *rp,*ap; - * int num; - * BN_ULONG w; - */ -bn_mul_words: - cmp %o2,0 - bg,a .L_bn_mul_words_proceeed - ld [%o1],%g2 - retl - clr %o0 - -.L_bn_mul_words_proceeed: - andcc %o2,-4,%g0 - bz .L_bn_mul_words_tail - clr %o5 - -.L_bn_mul_words_loop: - ld [%o1+4],%g3 - umul %o3,%g2,%g2 - addcc %g2,%o5,%g2 - rd %y,%g1 - addx %g1,0,%o5 - st %g2,[%o0] - - ld [%o1+8],%g2 - umul %o3,%g3,%g3 - addcc %g3,%o5,%g3 - rd %y,%g1 - dec 4,%o2 - addx %g1,0,%o5 - st %g3,[%o0+4] - - ld [%o1+12],%g3 - umul %o3,%g2,%g2 - addcc %g2,%o5,%g2 - rd %y,%g1 - inc 16,%o1 - st %g2,[%o0+8] - addx %g1,0,%o5 - - umul %o3,%g3,%g3 - addcc %g3,%o5,%g3 - rd %y,%g1 - inc 16,%o0 - addx %g1,0,%o5 - st %g3,[%o0-4] - andcc %o2,-4,%g0 - nop - bnz,a .L_bn_mul_words_loop - ld [%o1],%g2 - - tst %o2 - bnz,a .L_bn_mul_words_tail - ld [%o1],%g2 -.L_bn_mul_words_return: - retl - mov %o5,%o0 - nop - -.L_bn_mul_words_tail: - umul %o3,%g2,%g2 - addcc %g2,%o5,%g2 - rd %y,%g1 - addx %g1,0,%o5 - deccc %o2 - bz .L_bn_mul_words_return - st %g2,[%o0] - nop - - ld [%o1+4],%g2 - umul %o3,%g2,%g2 - addcc %g2,%o5,%g2 - rd %y,%g1 - addx %g1,0,%o5 - deccc %o2 - bz .L_bn_mul_words_return - st %g2,[%o0+4] - - ld [%o1+8],%g2 - umul %o3,%g2,%g2 - addcc %g2,%o5,%g2 - rd %y,%g1 - st %g2,[%o0+8] - retl - addx %g1,0,%o0 - -.type bn_mul_words,#function -.size bn_mul_words,(.-bn_mul_words) - -.align 32 -.global bn_sqr_words -/* - * void bn_sqr_words(r,a,n) - * BN_ULONG *r,*a; - * int n; - */ -bn_sqr_words: - cmp %o2,0 - bg,a .L_bn_sqr_words_proceeed - ld [%o1],%g2 - retl - clr %o0 - -.L_bn_sqr_words_proceeed: - andcc %o2,-4,%g0 - bz .L_bn_sqr_words_tail - clr %o5 - -.L_bn_sqr_words_loop: - ld [%o1+4],%g3 - umul %g2,%g2,%o4 - st %o4,[%o0] - rd %y,%o5 - st %o5,[%o0+4] - - ld [%o1+8],%g2 - umul %g3,%g3,%o4 - dec 4,%o2 - st %o4,[%o0+8] - rd %y,%o5 - st %o5,[%o0+12] - nop - - ld [%o1+12],%g3 - umul %g2,%g2,%o4 - st %o4,[%o0+16] - rd %y,%o5 - inc 16,%o1 - st %o5,[%o0+20] - - umul %g3,%g3,%o4 - inc 32,%o0 - st %o4,[%o0-8] - rd %y,%o5 - st %o5,[%o0-4] - andcc %o2,-4,%g2 - bnz,a .L_bn_sqr_words_loop - ld [%o1],%g2 - - tst %o2 - nop - bnz,a .L_bn_sqr_words_tail - ld [%o1],%g2 -.L_bn_sqr_words_return: - retl - clr %o0 - -.L_bn_sqr_words_tail: - umul %g2,%g2,%o4 - st %o4,[%o0] - deccc %o2 - rd %y,%o5 - bz .L_bn_sqr_words_return - st %o5,[%o0+4] - - ld [%o1+4],%g2 - umul %g2,%g2,%o4 - st %o4,[%o0+8] - deccc %o2 - rd %y,%o5 - nop - bz .L_bn_sqr_words_return - st %o5,[%o0+12] - - ld [%o1+8],%g2 - umul %g2,%g2,%o4 - st %o4,[%o0+16] - rd %y,%o5 - st %o5,[%o0+20] - retl - clr %o0 - -.type bn_sqr_words,#function -.size bn_sqr_words,(.-bn_sqr_words) - -.align 32 - -.global bn_div_words -/* - * BN_ULONG bn_div_words(h,l,d) - * BN_ULONG h,l,d; - */ -bn_div_words: - wr %o0,%y - udiv %o1,%o2,%o0 - retl - nop - -.type bn_div_words,#function -.size bn_div_words,(.-bn_div_words) - -.align 32 - -.global bn_add_words -/* - * BN_ULONG bn_add_words(rp,ap,bp,n) - * BN_ULONG *rp,*ap,*bp; - * int n; - */ -bn_add_words: - cmp %o3,0 - bg,a .L_bn_add_words_proceed - ld [%o1],%o4 - retl - clr %o0 - -.L_bn_add_words_proceed: - andcc %o3,-4,%g0 - bz .L_bn_add_words_tail - clr %g1 - ba .L_bn_add_words_warn_loop - addcc %g0,0,%g0 ! clear carry flag - -.L_bn_add_words_loop: - ld [%o1],%o4 -.L_bn_add_words_warn_loop: - ld [%o2],%o5 - ld [%o1+4],%g3 - ld [%o2+4],%g4 - dec 4,%o3 - addxcc %o5,%o4,%o5 - st %o5,[%o0] - - ld [%o1+8],%o4 - ld [%o2+8],%o5 - inc 16,%o1 - addxcc %g3,%g4,%g3 - st %g3,[%o0+4] - - ld [%o1-4],%g3 - ld [%o2+12],%g4 - inc 16,%o2 - addxcc %o5,%o4,%o5 - st %o5,[%o0+8] - - inc 16,%o0 - addxcc %g3,%g4,%g3 - st %g3,[%o0-4] - addx %g0,0,%g1 - andcc %o3,-4,%g0 - bnz,a .L_bn_add_words_loop - addcc %g1,-1,%g0 - - tst %o3 - bnz,a .L_bn_add_words_tail - ld [%o1],%o4 -.L_bn_add_words_return: - retl - mov %g1,%o0 - -.L_bn_add_words_tail: - addcc %g1,-1,%g0 - ld [%o2],%o5 - addxcc %o5,%o4,%o5 - addx %g0,0,%g1 - deccc %o3 - bz .L_bn_add_words_return - st %o5,[%o0] - - ld [%o1+4],%o4 - addcc %g1,-1,%g0 - ld [%o2+4],%o5 - addxcc %o5,%o4,%o5 - addx %g0,0,%g1 - deccc %o3 - bz .L_bn_add_words_return - st %o5,[%o0+4] - - ld [%o1+8],%o4 - addcc %g1,-1,%g0 - ld [%o2+8],%o5 - addxcc %o5,%o4,%o5 - st %o5,[%o0+8] - retl - addx %g0,0,%o0 - -.type bn_add_words,#function -.size bn_add_words,(.-bn_add_words) - -.align 32 - -.global bn_sub_words -/* - * BN_ULONG bn_sub_words(rp,ap,bp,n) - * BN_ULONG *rp,*ap,*bp; - * int n; - */ -bn_sub_words: - cmp %o3,0 - bg,a .L_bn_sub_words_proceed - ld [%o1],%o4 - retl - clr %o0 - -.L_bn_sub_words_proceed: - andcc %o3,-4,%g0 - bz .L_bn_sub_words_tail - clr %g1 - ba .L_bn_sub_words_warm_loop - addcc %g0,0,%g0 ! clear carry flag - -.L_bn_sub_words_loop: - ld [%o1],%o4 -.L_bn_sub_words_warm_loop: - ld [%o2],%o5 - ld [%o1+4],%g3 - ld [%o2+4],%g4 - dec 4,%o3 - subxcc %o4,%o5,%o5 - st %o5,[%o0] - - ld [%o1+8],%o4 - ld [%o2+8],%o5 - inc 16,%o1 - subxcc %g3,%g4,%g4 - st %g4,[%o0+4] - - ld [%o1-4],%g3 - ld [%o2+12],%g4 - inc 16,%o2 - subxcc %o4,%o5,%o5 - st %o5,[%o0+8] - - inc 16,%o0 - subxcc %g3,%g4,%g4 - st %g4,[%o0-4] - addx %g0,0,%g1 - andcc %o3,-4,%g0 - bnz,a .L_bn_sub_words_loop - addcc %g1,-1,%g0 - - tst %o3 - nop - bnz,a .L_bn_sub_words_tail - ld [%o1],%o4 -.L_bn_sub_words_return: - retl - mov %g1,%o0 - -.L_bn_sub_words_tail: - addcc %g1,-1,%g0 - ld [%o2],%o5 - subxcc %o4,%o5,%o5 - addx %g0,0,%g1 - deccc %o3 - bz .L_bn_sub_words_return - st %o5,[%o0] - nop - - ld [%o1+4],%o4 - addcc %g1,-1,%g0 - ld [%o2+4],%o5 - subxcc %o4,%o5,%o5 - addx %g0,0,%g1 - deccc %o3 - bz .L_bn_sub_words_return - st %o5,[%o0+4] - - ld [%o1+8],%o4 - addcc %g1,-1,%g0 - ld [%o2+8],%o5 - subxcc %o4,%o5,%o5 - st %o5,[%o0+8] - retl - addx %g0,0,%o0 - -.type bn_sub_words,#function -.size bn_sub_words,(.-bn_sub_words) - -#define FRAME_SIZE -96 - -/* - * Here is register usage map for *all* routines below. - */ -#define t_1 %o0 -#define t_2 %o1 -#define c_1 %o2 -#define c_2 %o3 -#define c_3 %o4 - -#define ap(I) [%i1+4*I] -#define bp(I) [%i2+4*I] -#define rp(I) [%i0+4*I] - -#define a_0 %l0 -#define a_1 %l1 -#define a_2 %l2 -#define a_3 %l3 -#define a_4 %l4 -#define a_5 %l5 -#define a_6 %l6 -#define a_7 %l7 - -#define b_0 %i3 -#define b_1 %i4 -#define b_2 %i5 -#define b_3 %o5 -#define b_4 %g1 -#define b_5 %g2 -#define b_6 %g3 -#define b_7 %g4 - -.align 32 -.global bn_mul_comba8 -/* - * void bn_mul_comba8(r,a,b) - * BN_ULONG *r,*a,*b; - */ -bn_mul_comba8: - save %sp,FRAME_SIZE,%sp - ld ap(0),a_0 - ld bp(0),b_0 - umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3); - ld bp(1),b_1 - rd %y,c_2 - st c_1,rp(0) !r[0]=c1; - - umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1); - ld ap(1),a_1 - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc %g0,t_2,c_3 != - addx %g0,%g0,c_1 - ld ap(2),a_2 - umul a_1,b_0,t_1 !mul_add_c(a[1],b[0],c2,c3,c1); - addcc c_2,t_1,c_2 != - rd %y,t_2 - addxcc c_3,t_2,c_3 - st c_2,rp(1) !r[1]=c2; - addx c_1,%g0,c_1 != - - umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 != - addx %g0,%g0,c_2 - ld bp(2),b_2 - umul a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2); - addcc c_3,t_1,c_3 != - rd %y,t_2 - addxcc c_1,t_2,c_1 - ld bp(3),b_3 - addx c_2,%g0,c_2 != - umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 != - addx c_2,%g0,c_2 - st c_3,rp(2) !r[2]=c3; - - umul a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3); - addcc c_1,t_1,c_1 != - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx %g0,%g0,c_3 - umul a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - ld ap(3),a_3 - umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 != - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 - ld ap(4),a_4 - umul a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!= - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - st c_1,rp(3) !r[3]=c1; - - umul a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx %g0,%g0,c_1 - umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1); - addcc c_2,t_1,c_2 != - rd %y,t_2 - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - umul a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 != - ld bp(4),b_4 - umul a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - ld bp(5),b_5 - umul a_0,b_4,t_1 !=!mul_add_c(a[0],b[4],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 != - st c_2,rp(4) !r[4]=c2; - - umul a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 != - addxcc c_1,t_2,c_1 - addx %g0,%g0,c_2 - umul a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2); - addcc c_3,t_1,c_3 != - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 - umul a_2,b_3,t_1 !=!mul_add_c(a[2],b[3],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 != - umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 != - addx c_2,%g0,c_2 - ld ap(5),a_5 - umul a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2); - addcc c_3,t_1,c_3 != - rd %y,t_2 - addxcc c_1,t_2,c_1 - ld ap(6),a_6 - addx c_2,%g0,c_2 != - umul a_5,b_0,t_1 !mul_add_c(a[5],b[0],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 != - addx c_2,%g0,c_2 - st c_3,rp(5) !r[5]=c3; - - umul a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3); - addcc c_1,t_1,c_1 != - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx %g0,%g0,c_3 - umul a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - umul a_4,b_2,t_1 !mul_add_c(a[4],b[2],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - addx c_3,%g0,c_3 - umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 != - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 - umul a_2,b_4,t_1 !mul_add_c(a[2],b[4],c1,c2,c3); - addcc c_1,t_1,c_1 != - rd %y,t_2 - addxcc c_2,t_2,c_2 - ld bp(6),b_6 - addx c_3,%g0,c_3 != - umul a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - addx c_3,%g0,c_3 - ld bp(7),b_7 - umul a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3); - addcc c_1,t_1,c_1 != - rd %y,t_2 - addxcc c_2,t_2,c_2 - st c_1,rp(6) !r[6]=c1; - addx c_3,%g0,c_3 != - - umul a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc c_3,t_2,c_3 != - addx %g0,%g0,c_1 - umul a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - umul a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1); - addcc c_2,t_1,c_2 != - rd %y,t_2 - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - umul a_3,b_4,t_1 !=!mul_add_c(a[3],b[4],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 != - umul a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc c_3,t_2,c_3 != - addx c_1,%g0,c_1 - umul a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - ld ap(7),a_7 - umul a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 != - umul a_7,b_0,t_1 !mul_add_c(a[7],b[0],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc c_3,t_2,c_3 != - addx c_1,%g0,c_1 - st c_2,rp(7) !r[7]=c2; - - umul a_7,b_1,t_1 !mul_add_c(a[7],b[1],c3,c1,c2); - addcc c_3,t_1,c_3 != - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx %g0,%g0,c_2 - umul a_6,b_2,t_1 !=!mul_add_c(a[6],b[2],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 != - umul a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 != - addx c_2,%g0,c_2 - umul a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 != - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 - umul a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2); - addcc c_3,t_1,c_3 != - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 - umul a_2,b_6,t_1 !=!mul_add_c(a[2],b[6],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 != - umul a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 ! - addx c_2,%g0,c_2 - st c_3,rp(8) !r[8]=c3; - - umul a_2,b_7,t_1 !mul_add_c(a[2],b[7],c1,c2,c3); - addcc c_1,t_1,c_1 != - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx %g0,%g0,c_3 - umul a_3,b_6,t_1 !=!mul_add_c(a[3],b[6],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - umul a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - addx c_3,%g0,c_3 - umul a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 != - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 - umul a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3); - addcc c_1,t_1,c_1 != - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 - umul a_7,b_2,t_1 !=!mul_add_c(a[7],b[2],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - st c_1,rp(9) !r[9]=c1; - - umul a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx %g0,%g0,c_1 - umul a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1); - addcc c_2,t_1,c_2 != - rd %y,t_2 - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - umul a_5,b_5,t_1 !=!mul_add_c(a[5],b[5],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 != - umul a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc c_3,t_2,c_3 != - addx c_1,%g0,c_1 - umul a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - st c_2,rp(10) !r[10]=c2; - - umul a_4,b_7,t_1 !=!mul_add_c(a[4],b[7],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx %g0,%g0,c_2 != - umul a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 != - addx c_2,%g0,c_2 - umul a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 != - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 - umul a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2); - addcc c_3,t_1,c_3 != - rd %y,t_2 - addxcc c_1,t_2,c_1 - st c_3,rp(11) !r[11]=c3; - addx c_2,%g0,c_2 != - - umul a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - addx %g0,%g0,c_3 - umul a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 != - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 - umul a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3); - addcc c_1,t_1,c_1 != - rd %y,t_2 - addxcc c_2,t_2,c_2 - st c_1,rp(12) !r[12]=c1; - addx c_3,%g0,c_3 != - - umul a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc c_3,t_2,c_3 != - addx %g0,%g0,c_1 - umul a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - st c_2,rp(13) !r[13]=c2; - - umul a_7,b_7,t_1 !=!mul_add_c(a[7],b[7],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 - nop != - st c_3,rp(14) !r[14]=c3; - st c_1,rp(15) !r[15]=c1; - - ret - restore %g0,%g0,%o0 - -.type bn_mul_comba8,#function -.size bn_mul_comba8,(.-bn_mul_comba8) - -.align 32 - -.global bn_mul_comba4 -/* - * void bn_mul_comba4(r,a,b) - * BN_ULONG *r,*a,*b; - */ -bn_mul_comba4: - save %sp,FRAME_SIZE,%sp - ld ap(0),a_0 - ld bp(0),b_0 - umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3); - ld bp(1),b_1 - rd %y,c_2 - st c_1,rp(0) !r[0]=c1; - - umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1); - ld ap(1),a_1 - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc %g0,t_2,c_3 - addx %g0,%g0,c_1 - ld ap(2),a_2 - umul a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 != - st c_2,rp(1) !r[1]=c2; - - umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 != - addxcc c_1,t_2,c_1 - addx %g0,%g0,c_2 - ld bp(2),b_2 - umul a_1,b_1,t_1 !=!mul_add_c(a[1],b[1],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 != - ld bp(3),b_3 - umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 != - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 - st c_3,rp(2) !r[2]=c3; - - umul a_0,b_3,t_1 !=!mul_add_c(a[0],b[3],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx %g0,%g0,c_3 != - umul a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - addx c_3,%g0,c_3 - ld ap(3),a_3 - umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3); - addcc c_1,t_1,c_1 != - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 - umul a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - st c_1,rp(3) !r[3]=c1; - - umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx %g0,%g0,c_1 - umul a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1); - addcc c_2,t_1,c_2 != - rd %y,t_2 - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - umul a_1,b_3,t_1 !=!mul_add_c(a[1],b[3],c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 != - st c_2,rp(4) !r[4]=c2; - - umul a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 != - addxcc c_1,t_2,c_1 - addx %g0,%g0,c_2 - umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2); - addcc c_3,t_1,c_3 != - rd %y,t_2 - addxcc c_1,t_2,c_1 - st c_3,rp(5) !r[5]=c3; - addx c_2,%g0,c_2 != - - umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - st c_1,rp(6) !r[6]=c1; - st c_2,rp(7) !r[7]=c2; - - ret - restore %g0,%g0,%o0 - -.type bn_mul_comba4,#function -.size bn_mul_comba4,(.-bn_mul_comba4) - -.align 32 - -.global bn_sqr_comba8 -bn_sqr_comba8: - save %sp,FRAME_SIZE,%sp - ld ap(0),a_0 - ld ap(1),a_1 - umul a_0,a_0,c_1 !=!sqr_add_c(a,0,c1,c2,c3); - rd %y,c_2 - st c_1,rp(0) !r[0]=c1; - - ld ap(2),a_2 - umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc %g0,t_2,c_3 - addx %g0,%g0,c_1 != - addcc c_2,t_1,c_2 - addxcc c_3,t_2,c_3 - st c_2,rp(1) !r[1]=c2; - addx c_1,%g0,c_1 != - - umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 != - addx %g0,%g0,c_2 - addcc c_3,t_1,c_3 - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 != - ld ap(3),a_3 - umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 != - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 - st c_3,rp(2) !r[2]=c3; - - umul a_0,a_3,t_1 !=!sqr_add_c2(a,3,0,c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx %g0,%g0,c_3 != - addcc c_1,t_1,c_1 - addxcc c_2,t_2,c_2 - ld ap(4),a_4 - addx c_3,%g0,c_3 != - umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - addx c_3,%g0,c_3 - addcc c_1,t_1,c_1 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - st c_1,rp(3) !r[3]=c1; - - umul a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx %g0,%g0,c_1 - addcc c_2,t_1,c_2 - addxcc c_3,t_2,c_3 != - addx c_1,%g0,c_1 - umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - addcc c_2,t_1,c_2 - addxcc c_3,t_2,c_3 != - addx c_1,%g0,c_1 - ld ap(5),a_5 - umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1); - addcc c_2,t_1,c_2 != - rd %y,t_2 - addxcc c_3,t_2,c_3 - st c_2,rp(4) !r[4]=c2; - addx c_1,%g0,c_1 != - - umul a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 != - addx %g0,%g0,c_2 - addcc c_3,t_1,c_3 - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 != - umul a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 != - addx c_2,%g0,c_2 - addcc c_3,t_1,c_3 - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 != - ld ap(6),a_6 - umul a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 != - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 - addcc c_3,t_1,c_3 - addxcc c_1,t_2,c_1 != - addx c_2,%g0,c_2 - st c_3,rp(5) !r[5]=c3; - - umul a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3); - addcc c_1,t_1,c_1 != - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx %g0,%g0,c_3 - addcc c_1,t_1,c_1 != - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 - umul a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3); - addcc c_1,t_1,c_1 != - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 - addcc c_1,t_1,c_1 != - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 - umul a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3); - addcc c_1,t_1,c_1 != - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 - addcc c_1,t_1,c_1 != - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 - ld ap(7),a_7 - umul a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - st c_1,rp(6) !r[6]=c1; - - umul a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx %g0,%g0,c_1 - addcc c_2,t_1,c_2 - addxcc c_3,t_2,c_3 != - addx c_1,%g0,c_1 - umul a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - addcc c_2,t_1,c_2 - addxcc c_3,t_2,c_3 != - addx c_1,%g0,c_1 - umul a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - addcc c_2,t_1,c_2 - addxcc c_3,t_2,c_3 != - addx c_1,%g0,c_1 - umul a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - addcc c_2,t_1,c_2 - addxcc c_3,t_2,c_3 != - addx c_1,%g0,c_1 - st c_2,rp(7) !r[7]=c2; - - umul a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2); - addcc c_3,t_1,c_3 != - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx %g0,%g0,c_2 - addcc c_3,t_1,c_3 != - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 - umul a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2); - addcc c_3,t_1,c_3 != - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 - addcc c_3,t_1,c_3 != - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 - umul a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2); - addcc c_3,t_1,c_3 != - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 - addcc c_3,t_1,c_3 != - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 - umul a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2); - addcc c_3,t_1,c_3 != - rd %y,t_2 - addxcc c_1,t_2,c_1 - st c_3,rp(8) !r[8]=c3; - addx c_2,%g0,c_2 != - - umul a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - addx %g0,%g0,c_3 - addcc c_1,t_1,c_1 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - umul a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - addx c_3,%g0,c_3 - addcc c_1,t_1,c_1 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - umul a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - addx c_3,%g0,c_3 - addcc c_1,t_1,c_1 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - st c_1,rp(9) !r[9]=c1; - - umul a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx %g0,%g0,c_1 - addcc c_2,t_1,c_2 - addxcc c_3,t_2,c_3 != - addx c_1,%g0,c_1 - umul a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - addcc c_2,t_1,c_2 - addxcc c_3,t_2,c_3 != - addx c_1,%g0,c_1 - umul a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - st c_2,rp(10) !r[10]=c2; - - umul a_4,a_7,t_1 !=!sqr_add_c2(a,7,4,c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx %g0,%g0,c_2 != - addcc c_3,t_1,c_3 - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 - umul a_5,a_6,t_1 !=!sqr_add_c2(a,6,5,c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx c_2,%g0,c_2 != - addcc c_3,t_1,c_3 - addxcc c_1,t_2,c_1 - st c_3,rp(11) !r[11]=c3; - addx c_2,%g0,c_2 != - - umul a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - addx %g0,%g0,c_3 - addcc c_1,t_1,c_1 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - umul a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - addx c_3,%g0,c_3 - st c_1,rp(12) !r[12]=c1; - - umul a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1); - addcc c_2,t_1,c_2 != - rd %y,t_2 - addxcc c_3,t_2,c_3 - addx %g0,%g0,c_1 - addcc c_2,t_1,c_2 != - addxcc c_3,t_2,c_3 - st c_2,rp(13) !r[13]=c2; - addx c_1,%g0,c_1 != - - umul a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 != - st c_3,rp(14) !r[14]=c3; - st c_1,rp(15) !r[15]=c1; - - ret - restore %g0,%g0,%o0 - -.type bn_sqr_comba8,#function -.size bn_sqr_comba8,(.-bn_sqr_comba8) - -.align 32 - -.global bn_sqr_comba4 -/* - * void bn_sqr_comba4(r,a) - * BN_ULONG *r,*a; - */ -bn_sqr_comba4: - save %sp,FRAME_SIZE,%sp - ld ap(0),a_0 - umul a_0,a_0,c_1 !sqr_add_c(a,0,c1,c2,c3); - ld ap(1),a_1 != - rd %y,c_2 - st c_1,rp(0) !r[0]=c1; - - ld ap(2),a_2 - umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 - addxcc %g0,t_2,c_3 - addx %g0,%g0,c_1 != - addcc c_2,t_1,c_2 - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 != - st c_2,rp(1) !r[1]=c2; - - umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 != - addxcc c_1,t_2,c_1 - addx %g0,%g0,c_2 - addcc c_3,t_1,c_3 - addxcc c_1,t_2,c_1 != - addx c_2,%g0,c_2 - ld ap(3),a_3 - umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2); - addcc c_3,t_1,c_3 != - rd %y,t_2 - addxcc c_1,t_2,c_1 - st c_3,rp(2) !r[2]=c3; - addx c_2,%g0,c_2 != - - umul a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - addx %g0,%g0,c_3 - addcc c_1,t_1,c_1 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - addx c_3,%g0,c_3 - addcc c_1,t_1,c_1 - addxcc c_2,t_2,c_2 - addx c_3,%g0,c_3 != - st c_1,rp(3) !r[3]=c1; - - umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx %g0,%g0,c_1 - addcc c_2,t_1,c_2 - addxcc c_3,t_2,c_3 != - addx c_1,%g0,c_1 - umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1); - addcc c_2,t_1,c_2 - rd %y,t_2 != - addxcc c_3,t_2,c_3 - addx c_1,%g0,c_1 - st c_2,rp(4) !r[4]=c2; - - umul a_2,a_3,t_1 !=!sqr_add_c2(a,3,2,c3,c1,c2); - addcc c_3,t_1,c_3 - rd %y,t_2 - addxcc c_1,t_2,c_1 - addx %g0,%g0,c_2 != - addcc c_3,t_1,c_3 - addxcc c_1,t_2,c_1 - st c_3,rp(5) !r[5]=c3; - addx c_2,%g0,c_2 != - - umul a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3); - addcc c_1,t_1,c_1 - rd %y,t_2 - addxcc c_2,t_2,c_2 != - st c_1,rp(6) !r[6]=c1; - st c_2,rp(7) !r[7]=c2; - - ret - restore %g0,%g0,%o0 - -.type bn_sqr_comba4,#function -.size bn_sqr_comba4,(.-bn_sqr_comba4) - -.align 32 diff --git a/drivers/builtin_openssl2/crypto/bn/asm/sparcv8plus.S b/drivers/builtin_openssl2/crypto/bn/asm/sparcv8plus.S deleted file mode 100644 index 63de1860f2..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/sparcv8plus.S +++ /dev/null @@ -1,1558 +0,0 @@ -.ident "sparcv8plus.s, Version 1.4" -.ident "SPARC v9 ISA artwork by Andy Polyakov " - -/* - * ==================================================================== - * Written by Andy Polyakov for the OpenSSL - * project. - * - * Rights for redistribution and usage in source and binary forms are - * granted according to the OpenSSL license. Warranty of any kind is - * disclaimed. - * ==================================================================== - */ - -/* - * This is my modest contributon to OpenSSL project (see - * http://www.openssl.org/ for more information about it) and is - * a drop-in UltraSPARC ISA replacement for crypto/bn/bn_asm.c - * module. For updates see http://fy.chalmers.se/~appro/hpe/. - * - * Questions-n-answers. - * - * Q. How to compile? - * A. With SC4.x/SC5.x: - * - * cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o - * - * and with gcc: - * - * gcc -mcpu=ultrasparc -c bn_asm.sparc.v8plus.S -o bn_asm.o - * - * or if above fails (it does if you have gas installed): - * - * gcc -E bn_asm.sparc.v8plus.S | as -xarch=v8plus /dev/fd/0 -o bn_asm.o - * - * Quick-n-dirty way to fuse the module into the library. - * Provided that the library is already configured and built - * (in 0.9.2 case with no-asm option): - * - * # cd crypto/bn - * # cp /some/place/bn_asm.sparc.v8plus.S . - * # cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o - * # make - * # cd ../.. - * # make; make test - * - * Quick-n-dirty way to get rid of it: - * - * # cd crypto/bn - * # touch bn_asm.c - * # make - * # cd ../.. - * # make; make test - * - * Q. V8plus achitecture? What kind of beast is that? - * A. Well, it's rather a programming model than an architecture... - * It's actually v9-compliant, i.e. *any* UltraSPARC, CPU under - * special conditions, namely when kernel doesn't preserve upper - * 32 bits of otherwise 64-bit registers during a context switch. - * - * Q. Why just UltraSPARC? What about SuperSPARC? - * A. Original release did target UltraSPARC only. Now SuperSPARC - * version is provided along. Both version share bn_*comba[48] - * implementations (see comment later in code for explanation). - * But what's so special about this UltraSPARC implementation? - * Why didn't I let compiler do the job? Trouble is that most of - * available compilers (well, SC5.0 is the only exception) don't - * attempt to take advantage of UltraSPARC's 64-bitness under - * 32-bit kernels even though it's perfectly possible (see next - * question). - * - * Q. 64-bit registers under 32-bit kernels? Didn't you just say it - * doesn't work? - * A. You can't adress *all* registers as 64-bit wide:-( The catch is - * that you actually may rely upon %o0-%o5 and %g1-%g4 being fully - * preserved if you're in a leaf function, i.e. such never calling - * any other functions. All functions in this module are leaf and - * 10 registers is a handful. And as a matter of fact none-"comba" - * routines don't require even that much and I could even afford to - * not allocate own stack frame for 'em:-) - * - * Q. What about 64-bit kernels? - * A. What about 'em? Just kidding:-) Pure 64-bit version is currently - * under evaluation and development... - * - * Q. What about shared libraries? - * A. What about 'em? Kidding again:-) Code does *not* contain any - * code position dependencies and it's safe to include it into - * shared library as is. - * - * Q. How much faster does it go? - * A. Do you have a good benchmark? In either case below is what I - * experience with crypto/bn/expspeed.c test program: - * - * v8plus module on U10/300MHz against bn_asm.c compiled with: - * - * cc-5.0 -xarch=v8plus -xO5 -xdepend +7-12% - * cc-4.2 -xarch=v8plus -xO5 -xdepend +25-35% - * egcs-1.1.2 -mcpu=ultrasparc -O3 +35-45% - * - * v8 module on SS10/60MHz against bn_asm.c compiled with: - * - * cc-5.0 -xarch=v8 -xO5 -xdepend +7-10% - * cc-4.2 -xarch=v8 -xO5 -xdepend +10% - * egcs-1.1.2 -mv8 -O3 +35-45% - * - * As you can see it's damn hard to beat the new Sun C compiler - * and it's in first place GNU C users who will appreciate this - * assembler implementation:-) - */ - -/* - * Revision history. - * - * 1.0 - initial release; - * 1.1 - new loop unrolling model(*); - * - some more fine tuning; - * 1.2 - made gas friendly; - * - updates to documentation concerning v9; - * - new performance comparison matrix; - * 1.3 - fixed problem with /usr/ccs/lib/cpp; - * 1.4 - native V9 bn_*_comba[48] implementation (15% more efficient) - * resulting in slight overall performance kick; - * - some retunes; - * - support for GNU as added; - * - * (*) Originally unrolled loop looked like this: - * for (;;) { - * op(p+0); if (--n==0) break; - * op(p+1); if (--n==0) break; - * op(p+2); if (--n==0) break; - * op(p+3); if (--n==0) break; - * p+=4; - * } - * I unroll according to following: - * while (n&~3) { - * op(p+0); op(p+1); op(p+2); op(p+3); - * p+=4; n=-4; - * } - * if (n) { - * op(p+0); if (--n==0) return; - * op(p+2); if (--n==0) return; - * op(p+3); return; - * } - */ - -#if defined(__SUNPRO_C) && defined(__sparcv9) - /* They've said -xarch=v9 at command line */ - .register %g2,#scratch - .register %g3,#scratch -# define FRAME_SIZE -192 -#elif defined(__GNUC__) && defined(__arch64__) - /* They've said -m64 at command line */ - .register %g2,#scratch - .register %g3,#scratch -# define FRAME_SIZE -192 -#else -# define FRAME_SIZE -96 -#endif -/* - * GNU assembler can't stand stuw:-( - */ -#define stuw st - -.section ".text",#alloc,#execinstr -.file "bn_asm.sparc.v8plus.S" - -.align 32 - -.global bn_mul_add_words -/* - * BN_ULONG bn_mul_add_words(rp,ap,num,w) - * BN_ULONG *rp,*ap; - * int num; - * BN_ULONG w; - */ -bn_mul_add_words: - sra %o2,%g0,%o2 ! signx %o2 - brgz,a %o2,.L_bn_mul_add_words_proceed - lduw [%o1],%g2 - retl - clr %o0 - nop - nop - nop - -.L_bn_mul_add_words_proceed: - srl %o3,%g0,%o3 ! clruw %o3 - andcc %o2,-4,%g0 - bz,pn %icc,.L_bn_mul_add_words_tail - clr %o5 - -.L_bn_mul_add_words_loop: ! wow! 32 aligned! - lduw [%o0],%g1 - lduw [%o1+4],%g3 - mulx %o3,%g2,%g2 - add %g1,%o5,%o4 - nop - add %o4,%g2,%o4 - stuw %o4,[%o0] - srlx %o4,32,%o5 - - lduw [%o0+4],%g1 - lduw [%o1+8],%g2 - mulx %o3,%g3,%g3 - add %g1,%o5,%o4 - dec 4,%o2 - add %o4,%g3,%o4 - stuw %o4,[%o0+4] - srlx %o4,32,%o5 - - lduw [%o0+8],%g1 - lduw [%o1+12],%g3 - mulx %o3,%g2,%g2 - add %g1,%o5,%o4 - inc 16,%o1 - add %o4,%g2,%o4 - stuw %o4,[%o0+8] - srlx %o4,32,%o5 - - lduw [%o0+12],%g1 - mulx %o3,%g3,%g3 - add %g1,%o5,%o4 - inc 16,%o0 - add %o4,%g3,%o4 - andcc %o2,-4,%g0 - stuw %o4,[%o0-4] - srlx %o4,32,%o5 - bnz,a,pt %icc,.L_bn_mul_add_words_loop - lduw [%o1],%g2 - - brnz,a,pn %o2,.L_bn_mul_add_words_tail - lduw [%o1],%g2 -.L_bn_mul_add_words_return: - retl - mov %o5,%o0 - -.L_bn_mul_add_words_tail: - lduw [%o0],%g1 - mulx %o3,%g2,%g2 - add %g1,%o5,%o4 - dec %o2 - add %o4,%g2,%o4 - srlx %o4,32,%o5 - brz,pt %o2,.L_bn_mul_add_words_return - stuw %o4,[%o0] - - lduw [%o1+4],%g2 - lduw [%o0+4],%g1 - mulx %o3,%g2,%g2 - add %g1,%o5,%o4 - dec %o2 - add %o4,%g2,%o4 - srlx %o4,32,%o5 - brz,pt %o2,.L_bn_mul_add_words_return - stuw %o4,[%o0+4] - - lduw [%o1+8],%g2 - lduw [%o0+8],%g1 - mulx %o3,%g2,%g2 - add %g1,%o5,%o4 - add %o4,%g2,%o4 - stuw %o4,[%o0+8] - retl - srlx %o4,32,%o0 - -.type bn_mul_add_words,#function -.size bn_mul_add_words,(.-bn_mul_add_words) - -.align 32 - -.global bn_mul_words -/* - * BN_ULONG bn_mul_words(rp,ap,num,w) - * BN_ULONG *rp,*ap; - * int num; - * BN_ULONG w; - */ -bn_mul_words: - sra %o2,%g0,%o2 ! signx %o2 - brgz,a %o2,.L_bn_mul_words_proceeed - lduw [%o1],%g2 - retl - clr %o0 - nop - nop - nop - -.L_bn_mul_words_proceeed: - srl %o3,%g0,%o3 ! clruw %o3 - andcc %o2,-4,%g0 - bz,pn %icc,.L_bn_mul_words_tail - clr %o5 - -.L_bn_mul_words_loop: ! wow! 32 aligned! - lduw [%o1+4],%g3 - mulx %o3,%g2,%g2 - add %g2,%o5,%o4 - nop - stuw %o4,[%o0] - srlx %o4,32,%o5 - - lduw [%o1+8],%g2 - mulx %o3,%g3,%g3 - add %g3,%o5,%o4 - dec 4,%o2 - stuw %o4,[%o0+4] - srlx %o4,32,%o5 - - lduw [%o1+12],%g3 - mulx %o3,%g2,%g2 - add %g2,%o5,%o4 - inc 16,%o1 - stuw %o4,[%o0+8] - srlx %o4,32,%o5 - - mulx %o3,%g3,%g3 - add %g3,%o5,%o4 - inc 16,%o0 - stuw %o4,[%o0-4] - srlx %o4,32,%o5 - andcc %o2,-4,%g0 - bnz,a,pt %icc,.L_bn_mul_words_loop - lduw [%o1],%g2 - nop - nop - - brnz,a,pn %o2,.L_bn_mul_words_tail - lduw [%o1],%g2 -.L_bn_mul_words_return: - retl - mov %o5,%o0 - -.L_bn_mul_words_tail: - mulx %o3,%g2,%g2 - add %g2,%o5,%o4 - dec %o2 - srlx %o4,32,%o5 - brz,pt %o2,.L_bn_mul_words_return - stuw %o4,[%o0] - - lduw [%o1+4],%g2 - mulx %o3,%g2,%g2 - add %g2,%o5,%o4 - dec %o2 - srlx %o4,32,%o5 - brz,pt %o2,.L_bn_mul_words_return - stuw %o4,[%o0+4] - - lduw [%o1+8],%g2 - mulx %o3,%g2,%g2 - add %g2,%o5,%o4 - stuw %o4,[%o0+8] - retl - srlx %o4,32,%o0 - -.type bn_mul_words,#function -.size bn_mul_words,(.-bn_mul_words) - -.align 32 -.global bn_sqr_words -/* - * void bn_sqr_words(r,a,n) - * BN_ULONG *r,*a; - * int n; - */ -bn_sqr_words: - sra %o2,%g0,%o2 ! signx %o2 - brgz,a %o2,.L_bn_sqr_words_proceeed - lduw [%o1],%g2 - retl - clr %o0 - nop - nop - nop - -.L_bn_sqr_words_proceeed: - andcc %o2,-4,%g0 - nop - bz,pn %icc,.L_bn_sqr_words_tail - nop - -.L_bn_sqr_words_loop: ! wow! 32 aligned! - lduw [%o1+4],%g3 - mulx %g2,%g2,%o4 - stuw %o4,[%o0] - srlx %o4,32,%o5 - stuw %o5,[%o0+4] - nop - - lduw [%o1+8],%g2 - mulx %g3,%g3,%o4 - dec 4,%o2 - stuw %o4,[%o0+8] - srlx %o4,32,%o5 - stuw %o5,[%o0+12] - - lduw [%o1+12],%g3 - mulx %g2,%g2,%o4 - srlx %o4,32,%o5 - stuw %o4,[%o0+16] - inc 16,%o1 - stuw %o5,[%o0+20] - - mulx %g3,%g3,%o4 - inc 32,%o0 - stuw %o4,[%o0-8] - srlx %o4,32,%o5 - andcc %o2,-4,%g2 - stuw %o5,[%o0-4] - bnz,a,pt %icc,.L_bn_sqr_words_loop - lduw [%o1],%g2 - nop - - brnz,a,pn %o2,.L_bn_sqr_words_tail - lduw [%o1],%g2 -.L_bn_sqr_words_return: - retl - clr %o0 - -.L_bn_sqr_words_tail: - mulx %g2,%g2,%o4 - dec %o2 - stuw %o4,[%o0] - srlx %o4,32,%o5 - brz,pt %o2,.L_bn_sqr_words_return - stuw %o5,[%o0+4] - - lduw [%o1+4],%g2 - mulx %g2,%g2,%o4 - dec %o2 - stuw %o4,[%o0+8] - srlx %o4,32,%o5 - brz,pt %o2,.L_bn_sqr_words_return - stuw %o5,[%o0+12] - - lduw [%o1+8],%g2 - mulx %g2,%g2,%o4 - srlx %o4,32,%o5 - stuw %o4,[%o0+16] - stuw %o5,[%o0+20] - retl - clr %o0 - -.type bn_sqr_words,#function -.size bn_sqr_words,(.-bn_sqr_words) - -.align 32 -.global bn_div_words -/* - * BN_ULONG bn_div_words(h,l,d) - * BN_ULONG h,l,d; - */ -bn_div_words: - sllx %o0,32,%o0 - or %o0,%o1,%o0 - udivx %o0,%o2,%o0 - retl - srl %o0,%g0,%o0 ! clruw %o0 - -.type bn_div_words,#function -.size bn_div_words,(.-bn_div_words) - -.align 32 - -.global bn_add_words -/* - * BN_ULONG bn_add_words(rp,ap,bp,n) - * BN_ULONG *rp,*ap,*bp; - * int n; - */ -bn_add_words: - sra %o3,%g0,%o3 ! signx %o3 - brgz,a %o3,.L_bn_add_words_proceed - lduw [%o1],%o4 - retl - clr %o0 - -.L_bn_add_words_proceed: - andcc %o3,-4,%g0 - bz,pn %icc,.L_bn_add_words_tail - addcc %g0,0,%g0 ! clear carry flag - -.L_bn_add_words_loop: ! wow! 32 aligned! - dec 4,%o3 - lduw [%o2],%o5 - lduw [%o1+4],%g1 - lduw [%o2+4],%g2 - lduw [%o1+8],%g3 - lduw [%o2+8],%g4 - addccc %o5,%o4,%o5 - stuw %o5,[%o0] - - lduw [%o1+12],%o4 - lduw [%o2+12],%o5 - inc 16,%o1 - addccc %g1,%g2,%g1 - stuw %g1,[%o0+4] - - inc 16,%o2 - addccc %g3,%g4,%g3 - stuw %g3,[%o0+8] - - inc 16,%o0 - addccc %o5,%o4,%o5 - stuw %o5,[%o0-4] - and %o3,-4,%g1 - brnz,a,pt %g1,.L_bn_add_words_loop - lduw [%o1],%o4 - - brnz,a,pn %o3,.L_bn_add_words_tail - lduw [%o1],%o4 -.L_bn_add_words_return: - clr %o0 - retl - movcs %icc,1,%o0 - nop - -.L_bn_add_words_tail: - lduw [%o2],%o5 - dec %o3 - addccc %o5,%o4,%o5 - brz,pt %o3,.L_bn_add_words_return - stuw %o5,[%o0] - - lduw [%o1+4],%o4 - lduw [%o2+4],%o5 - dec %o3 - addccc %o5,%o4,%o5 - brz,pt %o3,.L_bn_add_words_return - stuw %o5,[%o0+4] - - lduw [%o1+8],%o4 - lduw [%o2+8],%o5 - addccc %o5,%o4,%o5 - stuw %o5,[%o0+8] - clr %o0 - retl - movcs %icc,1,%o0 - -.type bn_add_words,#function -.size bn_add_words,(.-bn_add_words) - -.global bn_sub_words -/* - * BN_ULONG bn_sub_words(rp,ap,bp,n) - * BN_ULONG *rp,*ap,*bp; - * int n; - */ -bn_sub_words: - sra %o3,%g0,%o3 ! signx %o3 - brgz,a %o3,.L_bn_sub_words_proceed - lduw [%o1],%o4 - retl - clr %o0 - -.L_bn_sub_words_proceed: - andcc %o3,-4,%g0 - bz,pn %icc,.L_bn_sub_words_tail - addcc %g0,0,%g0 ! clear carry flag - -.L_bn_sub_words_loop: ! wow! 32 aligned! - dec 4,%o3 - lduw [%o2],%o5 - lduw [%o1+4],%g1 - lduw [%o2+4],%g2 - lduw [%o1+8],%g3 - lduw [%o2+8],%g4 - subccc %o4,%o5,%o5 - stuw %o5,[%o0] - - lduw [%o1+12],%o4 - lduw [%o2+12],%o5 - inc 16,%o1 - subccc %g1,%g2,%g2 - stuw %g2,[%o0+4] - - inc 16,%o2 - subccc %g3,%g4,%g4 - stuw %g4,[%o0+8] - - inc 16,%o0 - subccc %o4,%o5,%o5 - stuw %o5,[%o0-4] - and %o3,-4,%g1 - brnz,a,pt %g1,.L_bn_sub_words_loop - lduw [%o1],%o4 - - brnz,a,pn %o3,.L_bn_sub_words_tail - lduw [%o1],%o4 -.L_bn_sub_words_return: - clr %o0 - retl - movcs %icc,1,%o0 - nop - -.L_bn_sub_words_tail: ! wow! 32 aligned! - lduw [%o2],%o5 - dec %o3 - subccc %o4,%o5,%o5 - brz,pt %o3,.L_bn_sub_words_return - stuw %o5,[%o0] - - lduw [%o1+4],%o4 - lduw [%o2+4],%o5 - dec %o3 - subccc %o4,%o5,%o5 - brz,pt %o3,.L_bn_sub_words_return - stuw %o5,[%o0+4] - - lduw [%o1+8],%o4 - lduw [%o2+8],%o5 - subccc %o4,%o5,%o5 - stuw %o5,[%o0+8] - clr %o0 - retl - movcs %icc,1,%o0 - -.type bn_sub_words,#function -.size bn_sub_words,(.-bn_sub_words) - -/* - * Code below depends on the fact that upper parts of the %l0-%l7 - * and %i0-%i7 are zeroed by kernel after context switch. In - * previous versions this comment stated that "the trouble is that - * it's not feasible to implement the mumbo-jumbo in less V9 - * instructions:-(" which apparently isn't true thanks to - * 'bcs,a %xcc,.+8; inc %rd' pair. But the performance improvement - * results not from the shorter code, but from elimination of - * multicycle none-pairable 'rd %y,%rd' instructions. - * - * Andy. - */ - -/* - * Here is register usage map for *all* routines below. - */ -#define t_1 %o0 -#define t_2 %o1 -#define c_12 %o2 -#define c_3 %o3 - -#define ap(I) [%i1+4*I] -#define bp(I) [%i2+4*I] -#define rp(I) [%i0+4*I] - -#define a_0 %l0 -#define a_1 %l1 -#define a_2 %l2 -#define a_3 %l3 -#define a_4 %l4 -#define a_5 %l5 -#define a_6 %l6 -#define a_7 %l7 - -#define b_0 %i3 -#define b_1 %i4 -#define b_2 %i5 -#define b_3 %o4 -#define b_4 %o5 -#define b_5 %o7 -#define b_6 %g1 -#define b_7 %g4 - -.align 32 -.global bn_mul_comba8 -/* - * void bn_mul_comba8(r,a,b) - * BN_ULONG *r,*a,*b; - */ -bn_mul_comba8: - save %sp,FRAME_SIZE,%sp - mov 1,t_2 - lduw ap(0),a_0 - sllx t_2,32,t_2 - lduw bp(0),b_0 != - lduw bp(1),b_1 - mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3); - srlx t_1,32,c_12 - stuw t_1,rp(0) !=!r[0]=c1; - - lduw ap(1),a_1 - mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1); - addcc c_12,t_1,c_12 - clr c_3 != - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw ap(2),a_2 - mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 != - stuw t_1,rp(1) !r[1]=c2; - or c_12,c_3,c_12 - - mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2); - addcc c_12,t_1,c_12 != - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw bp(2),b_2 != - mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 != - lduw bp(3),b_3 - mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(2) !r[2]=c3; - or c_12,c_3,c_12 != - - mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - lduw ap(3),a_3 - mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3); - addcc c_12,t_1,c_12 != - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw ap(4),a_4 - mulx a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);!= - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 != - stuw t_1,rp(3) !r[3]=c1; - or c_12,c_3,c_12 - - mulx a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1); - addcc c_12,t_1,c_12 != - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_3,b_1,t_1 !=!mul_add_c(a[3],b[1],c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw bp(4),b_4 != - mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 != - lduw bp(5),b_5 - mulx a_0,b_4,t_1 !mul_add_c(a[0],b[4],c2,c3,c1); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(4) !r[4]=c2; - or c_12,c_3,c_12 != - - mulx a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - lduw ap(5),a_5 - mulx a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2); - addcc c_12,t_1,c_12 != - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw ap(6),a_6 - mulx a_5,b_0,t_1 !=!mul_add_c(a[5],b[0],c3,c1,c2); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 != - stuw t_1,rp(5) !r[5]=c3; - or c_12,c_3,c_12 - - mulx a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3); - addcc c_12,t_1,c_12 != - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_4,b_2,t_1 !=!mul_add_c(a[4],b[2],c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_3,b_3,t_1 !=!mul_add_c(a[3],b[3],c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_2,b_4,t_1 !=!mul_add_c(a[2],b[4],c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw bp(6),b_6 != - mulx a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 != - lduw bp(7),b_7 - mulx a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(6) !r[6]=c1; - or c_12,c_3,c_12 != - - mulx a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_3,b_4,t_1 !mul_add_c(a[3],b[4],c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - lduw ap(7),a_7 - mulx a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_7,b_0,t_1 !=!mul_add_c(a[7],b[0],c2,c3,c1); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 != - stuw t_1,rp(7) !r[7]=c2; - or c_12,c_3,c_12 - - mulx a_7,b_1,t_1 !=!mul_add_c(a[7],b[1],c3,c1,c2); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 != - mulx a_6,b_2,t_1 !mul_add_c(a[6],b[2],c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 != - mulx a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 != - mulx a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 != - mulx a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 != - mulx a_2,b_6,t_1 !mul_add_c(a[2],b[6],c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 != - mulx a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 != - srlx t_1,32,c_12 - stuw t_1,rp(8) !r[8]=c3; - or c_12,c_3,c_12 - - mulx a_2,b_7,t_1 !=!mul_add_c(a[2],b[7],c1,c2,c3); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 != - mulx a_3,b_6,t_1 !mul_add_c(a[3],b[6],c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_7,b_2,t_1 !mul_add_c(a[7],b[2],c1,c2,c3); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(9) !r[9]=c1; - or c_12,c_3,c_12 != - - mulx a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_5,b_5,t_1 !mul_add_c(a[5],b[5],c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(10) !r[10]=c2; - or c_12,c_3,c_12 != - - mulx a_4,b_7,t_1 !mul_add_c(a[4],b[7],c3,c1,c2); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(11) !r[11]=c3; - or c_12,c_3,c_12 != - - mulx a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(12) !r[12]=c1; - or c_12,c_3,c_12 != - - mulx a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - srlx t_1,32,c_12 - st t_1,rp(13) !r[13]=c2; - or c_12,c_3,c_12 != - - mulx a_7,b_7,t_1 !mul_add_c(a[7],b[7],c3,c1,c2); - addcc c_12,t_1,t_1 - srlx t_1,32,c_12 != - stuw t_1,rp(14) !r[14]=c3; - stuw c_12,rp(15) !r[15]=c1; - - ret - restore %g0,%g0,%o0 != - -.type bn_mul_comba8,#function -.size bn_mul_comba8,(.-bn_mul_comba8) - -.align 32 - -.global bn_mul_comba4 -/* - * void bn_mul_comba4(r,a,b) - * BN_ULONG *r,*a,*b; - */ -bn_mul_comba4: - save %sp,FRAME_SIZE,%sp - lduw ap(0),a_0 - mov 1,t_2 - lduw bp(0),b_0 - sllx t_2,32,t_2 != - lduw bp(1),b_1 - mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3); - srlx t_1,32,c_12 - stuw t_1,rp(0) !=!r[0]=c1; - - lduw ap(1),a_1 - mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1); - addcc c_12,t_1,c_12 - clr c_3 != - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw ap(2),a_2 - mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 != - stuw t_1,rp(1) !r[1]=c2; - or c_12,c_3,c_12 - - mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2); - addcc c_12,t_1,c_12 != - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw bp(2),b_2 != - mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 != - lduw bp(3),b_3 - mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(2) !r[2]=c3; - or c_12,c_3,c_12 != - - mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - mulx a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 != - add c_3,t_2,c_3 - lduw ap(3),a_3 - mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3); - addcc c_12,t_1,c_12 != - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!= - addcc c_12,t_1,t_1 != - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(3) !=!r[3]=c1; - or c_12,c_3,c_12 - - mulx a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1); - addcc c_12,t_1,c_12 - clr c_3 != - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1); - addcc c_12,t_1,c_12 != - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1); - addcc c_12,t_1,t_1 != - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(4) !=!r[4]=c2; - or c_12,c_3,c_12 - - mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2); - addcc c_12,t_1,c_12 - clr c_3 != - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2); - addcc c_12,t_1,t_1 != - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(5) !=!r[5]=c3; - or c_12,c_3,c_12 - - mulx a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3); - addcc c_12,t_1,t_1 - srlx t_1,32,c_12 != - stuw t_1,rp(6) !r[6]=c1; - stuw c_12,rp(7) !r[7]=c2; - - ret - restore %g0,%g0,%o0 - -.type bn_mul_comba4,#function -.size bn_mul_comba4,(.-bn_mul_comba4) - -.align 32 - -.global bn_sqr_comba8 -bn_sqr_comba8: - save %sp,FRAME_SIZE,%sp - mov 1,t_2 - lduw ap(0),a_0 - sllx t_2,32,t_2 - lduw ap(1),a_1 - mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3); - srlx t_1,32,c_12 - stuw t_1,rp(0) !r[0]=c1; - - lduw ap(2),a_2 - mulx a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(1) !r[1]=c2; - or c_12,c_3,c_12 - - mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw ap(3),a_3 - mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(2) !r[2]=c3; - or c_12,c_3,c_12 - - mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw ap(4),a_4 - mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - st t_1,rp(3) !r[3]=c1; - or c_12,c_3,c_12 - - mulx a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw ap(5),a_5 - mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(4) !r[4]=c2; - or c_12,c_3,c_12 - - mulx a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw ap(6),a_6 - mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(5) !r[5]=c3; - or c_12,c_3,c_12 - - mulx a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw ap(7),a_7 - mulx a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(6) !r[6]=c1; - or c_12,c_3,c_12 - - mulx a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(7) !r[7]=c2; - or c_12,c_3,c_12 - - mulx a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(8) !r[8]=c3; - or c_12,c_3,c_12 - - mulx a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(9) !r[9]=c1; - or c_12,c_3,c_12 - - mulx a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(10) !r[10]=c2; - or c_12,c_3,c_12 - - mulx a_4,a_7,t_1 !sqr_add_c2(a,7,4,c3,c1,c2); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_5,a_6,t_1 !sqr_add_c2(a,6,5,c3,c1,c2); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(11) !r[11]=c3; - or c_12,c_3,c_12 - - mulx a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(12) !r[12]=c1; - or c_12,c_3,c_12 - - mulx a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(13) !r[13]=c2; - or c_12,c_3,c_12 - - mulx a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2); - addcc c_12,t_1,t_1 - srlx t_1,32,c_12 - stuw t_1,rp(14) !r[14]=c3; - stuw c_12,rp(15) !r[15]=c1; - - ret - restore %g0,%g0,%o0 - -.type bn_sqr_comba8,#function -.size bn_sqr_comba8,(.-bn_sqr_comba8) - -.align 32 - -.global bn_sqr_comba4 -/* - * void bn_sqr_comba4(r,a) - * BN_ULONG *r,*a; - */ -bn_sqr_comba4: - save %sp,FRAME_SIZE,%sp - mov 1,t_2 - lduw ap(0),a_0 - sllx t_2,32,t_2 - lduw ap(1),a_1 - mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3); - srlx t_1,32,c_12 - stuw t_1,rp(0) !r[0]=c1; - - lduw ap(2),a_2 - mulx a_0,a_1,t_1 !sqr_add_c2(a,1,0,c2,c3,c1); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(1) !r[1]=c2; - or c_12,c_3,c_12 - - mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - lduw ap(3),a_3 - mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(2) !r[2]=c3; - or c_12,c_3,c_12 - - mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3); - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(3) !r[3]=c1; - or c_12,c_3,c_12 - - mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,c_12 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1); - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(4) !r[4]=c2; - or c_12,c_3,c_12 - - mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2); - addcc c_12,t_1,c_12 - clr c_3 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - addcc c_12,t_1,t_1 - bcs,a %xcc,.+8 - add c_3,t_2,c_3 - srlx t_1,32,c_12 - stuw t_1,rp(5) !r[5]=c3; - or c_12,c_3,c_12 - - mulx a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3); - addcc c_12,t_1,t_1 - srlx t_1,32,c_12 - stuw t_1,rp(6) !r[6]=c1; - stuw c_12,rp(7) !r[7]=c2; - - ret - restore %g0,%g0,%o0 - -.type bn_sqr_comba4,#function -.size bn_sqr_comba4,(.-bn_sqr_comba4) - -.align 32 diff --git a/drivers/builtin_openssl2/crypto/bn/asm/sparcv9-mont.pl b/drivers/builtin_openssl2/crypto/bn/asm/sparcv9-mont.pl deleted file mode 100644 index b8fb1e8a25..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/sparcv9-mont.pl +++ /dev/null @@ -1,606 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# December 2005 -# -# Pure SPARCv9/8+ and IALU-only bn_mul_mont implementation. The reasons -# for undertaken effort are multiple. First of all, UltraSPARC is not -# the whole SPARCv9 universe and other VIS-free implementations deserve -# optimized code as much. Secondly, newly introduced UltraSPARC T1, -# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes, -# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with -# several integrated RSA/DSA accelerator circuits accessible through -# kernel driver [only(*)], but having decent user-land software -# implementation is important too. Finally, reasons like desire to -# experiment with dedicated squaring procedure. Yes, this module -# implements one, because it was easiest to draft it in SPARCv9 -# instructions... - -# (*) Engine accessing the driver in question is on my TODO list. -# For reference, acceleator is estimated to give 6 to 10 times -# improvement on single-threaded RSA sign. It should be noted -# that 6-10x improvement coefficient does not actually mean -# something extraordinary in terms of absolute [single-threaded] -# performance, as SPARCv9 instruction set is by all means least -# suitable for high performance crypto among other 64 bit -# platforms. 6-10x factor simply places T1 in same performance -# domain as say AMD64 and IA-64. Improvement of RSA verify don't -# appear impressive at all, but it's the sign operation which is -# far more critical/interesting. - -# You might notice that inner loops are modulo-scheduled:-) This has -# essentially negligible impact on UltraSPARC performance, it's -# Fujitsu SPARC64 V users who should notice and hopefully appreciate -# the advantage... Currently this module surpasses sparcv9a-mont.pl -# by ~20% on UltraSPARC-III and later cores, but recall that sparcv9a -# module still have hidden potential [see TODO list there], which is -# estimated to be larger than 20%... - -# int bn_mul_mont( -$rp="%i0"; # BN_ULONG *rp, -$ap="%i1"; # const BN_ULONG *ap, -$bp="%i2"; # const BN_ULONG *bp, -$np="%i3"; # const BN_ULONG *np, -$n0="%i4"; # const BN_ULONG *n0, -$num="%i5"; # int num); - -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=128; } - -$car0="%o0"; -$car1="%o1"; -$car2="%o2"; # 1 bit -$acc0="%o3"; -$acc1="%o4"; -$mask="%g1"; # 32 bits, what a waste... -$tmp0="%g4"; -$tmp1="%g5"; - -$i="%l0"; -$j="%l1"; -$mul0="%l2"; -$mul1="%l3"; -$tp="%l4"; -$apj="%l5"; -$npj="%l6"; -$tpj="%l7"; - -$fname="bn_mul_mont_int"; - -$code=<<___; -.section ".text",#alloc,#execinstr - -.global $fname -.align 32 -$fname: - cmp %o5,4 ! 128 bits minimum - bge,pt %icc,.Lenter - sethi %hi(0xffffffff),$mask - retl - clr %o0 -.align 32 -.Lenter: - save %sp,-$frame,%sp - sll $num,2,$num ! num*=4 - or $mask,%lo(0xffffffff),$mask - ld [$n0],$n0 - cmp $ap,$bp - and $num,$mask,$num - ld [$bp],$mul0 ! bp[0] - nop - - add %sp,$bias,%o7 ! real top of stack - ld [$ap],$car0 ! ap[0] ! redundant in squaring context - sub %o7,$num,%o7 - ld [$ap+4],$apj ! ap[1] - and %o7,-1024,%o7 - ld [$np],$car1 ! np[0] - sub %o7,$bias,%sp ! alloca - ld [$np+4],$npj ! np[1] - be,pt `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont - mov 12,$j - - mulx $car0,$mul0,$car0 ! ap[0]*bp[0] - mulx $apj,$mul0,$tmp0 !prologue! ap[1]*bp[0] - and $car0,$mask,$acc0 - add %sp,$bias+$frame,$tp - ld [$ap+8],$apj !prologue! - - mulx $n0,$acc0,$mul1 ! "t[0]"*n0 - and $mul1,$mask,$mul1 - - mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0 - mulx $npj,$mul1,$acc1 !prologue! np[1]*"t[0]"*n0 - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - ld [$np+8],$npj !prologue! - srlx $car1,32,$car1 - mov $tmp0,$acc0 !prologue! - -.L1st: - mulx $apj,$mul0,$tmp0 - mulx $npj,$mul1,$tmp1 - add $acc0,$car0,$car0 - ld [$ap+$j],$apj ! ap[j] - and $car0,$mask,$acc0 - add $acc1,$car1,$car1 - ld [$np+$j],$npj ! np[j] - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - add $j,4,$j ! j++ - mov $tmp0,$acc0 - st $car1,[$tp] - cmp $j,$num - mov $tmp1,$acc1 - srlx $car1,32,$car1 - bl %icc,.L1st - add $tp,4,$tp ! tp++ -!.L1st - - mulx $apj,$mul0,$tmp0 !epilogue! - mulx $npj,$mul1,$tmp1 - add $acc0,$car0,$car0 - and $car0,$mask,$acc0 - add $acc1,$car1,$car1 - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - st $car1,[$tp] - srlx $car1,32,$car1 - - add $tmp0,$car0,$car0 - and $car0,$mask,$acc0 - add $tmp1,$car1,$car1 - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - st $car1,[$tp+4] - srlx $car1,32,$car1 - - add $car0,$car1,$car1 - st $car1,[$tp+8] - srlx $car1,32,$car2 - - mov 4,$i ! i++ - ld [$bp+4],$mul0 ! bp[1] -.Louter: - add %sp,$bias+$frame,$tp - ld [$ap],$car0 ! ap[0] - ld [$ap+4],$apj ! ap[1] - ld [$np],$car1 ! np[0] - ld [$np+4],$npj ! np[1] - ld [$tp],$tmp1 ! tp[0] - ld [$tp+4],$tpj ! tp[1] - mov 12,$j - - mulx $car0,$mul0,$car0 - mulx $apj,$mul0,$tmp0 !prologue! - add $tmp1,$car0,$car0 - ld [$ap+8],$apj !prologue! - and $car0,$mask,$acc0 - - mulx $n0,$acc0,$mul1 - and $mul1,$mask,$mul1 - - mulx $car1,$mul1,$car1 - mulx $npj,$mul1,$acc1 !prologue! - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - ld [$np+8],$npj !prologue! - srlx $car1,32,$car1 - mov $tmp0,$acc0 !prologue! - -.Linner: - mulx $apj,$mul0,$tmp0 - mulx $npj,$mul1,$tmp1 - add $tpj,$car0,$car0 - ld [$ap+$j],$apj ! ap[j] - add $acc0,$car0,$car0 - add $acc1,$car1,$car1 - ld [$np+$j],$npj ! np[j] - and $car0,$mask,$acc0 - ld [$tp+8],$tpj ! tp[j] - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - add $j,4,$j ! j++ - mov $tmp0,$acc0 - st $car1,[$tp] ! tp[j-1] - srlx $car1,32,$car1 - mov $tmp1,$acc1 - cmp $j,$num - bl %icc,.Linner - add $tp,4,$tp ! tp++ -!.Linner - - mulx $apj,$mul0,$tmp0 !epilogue! - mulx $npj,$mul1,$tmp1 - add $tpj,$car0,$car0 - add $acc0,$car0,$car0 - ld [$tp+8],$tpj ! tp[j] - and $car0,$mask,$acc0 - add $acc1,$car1,$car1 - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - st $car1,[$tp] ! tp[j-1] - srlx $car1,32,$car1 - - add $tpj,$car0,$car0 - add $tmp0,$car0,$car0 - and $car0,$mask,$acc0 - add $tmp1,$car1,$car1 - add $acc0,$car1,$car1 - st $car1,[$tp+4] ! tp[j-1] - srlx $car0,32,$car0 - add $i,4,$i ! i++ - srlx $car1,32,$car1 - - add $car0,$car1,$car1 - cmp $i,$num - add $car2,$car1,$car1 - st $car1,[$tp+8] - - srlx $car1,32,$car2 - bl,a %icc,.Louter - ld [$bp+$i],$mul0 ! bp[i] -!.Louter - - add $tp,12,$tp - -.Ltail: - add $np,$num,$np - add $rp,$num,$rp - mov $tp,$ap - sub %g0,$num,%o7 ! k=-num - ba .Lsub - subcc %g0,%g0,%g0 ! clear %icc.c -.align 16 -.Lsub: - ld [$tp+%o7],%o0 - ld [$np+%o7],%o1 - subccc %o0,%o1,%o1 ! tp[j]-np[j] - add $rp,%o7,$i - add %o7,4,%o7 - brnz %o7,.Lsub - st %o1,[$i] - subc $car2,0,$car2 ! handle upmost overflow bit - and $tp,$car2,$ap - andn $rp,$car2,$np - or $ap,$np,$ap - sub %g0,$num,%o7 - -.Lcopy: - ld [$ap+%o7],%o0 ! copy or in-place refresh - st %g0,[$tp+%o7] ! zap tp - st %o0,[$rp+%o7] - add %o7,4,%o7 - brnz %o7,.Lcopy - nop - mov 1,%i0 - ret - restore -___ - -######## -######## .Lbn_sqr_mont gives up to 20% *overall* improvement over -######## code without following dedicated squaring procedure. -######## -$sbit="%i2"; # re-use $bp! - -$code.=<<___; -.align 32 -.Lbn_sqr_mont: - mulx $mul0,$mul0,$car0 ! ap[0]*ap[0] - mulx $apj,$mul0,$tmp0 !prologue! - and $car0,$mask,$acc0 - add %sp,$bias+$frame,$tp - ld [$ap+8],$apj !prologue! - - mulx $n0,$acc0,$mul1 ! "t[0]"*n0 - srlx $car0,32,$car0 - and $mul1,$mask,$mul1 - - mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0 - mulx $npj,$mul1,$acc1 !prologue! - and $car0,1,$sbit - ld [$np+8],$npj !prologue! - srlx $car0,1,$car0 - add $acc0,$car1,$car1 - srlx $car1,32,$car1 - mov $tmp0,$acc0 !prologue! - -.Lsqr_1st: - mulx $apj,$mul0,$tmp0 - mulx $npj,$mul1,$tmp1 - add $acc0,$car0,$car0 ! ap[j]*a0+c0 - add $acc1,$car1,$car1 - ld [$ap+$j],$apj ! ap[j] - and $car0,$mask,$acc0 - ld [$np+$j],$npj ! np[j] - srlx $car0,32,$car0 - add $acc0,$acc0,$acc0 - or $sbit,$acc0,$acc0 - mov $tmp1,$acc1 - srlx $acc0,32,$sbit - add $j,4,$j ! j++ - and $acc0,$mask,$acc0 - cmp $j,$num - add $acc0,$car1,$car1 - st $car1,[$tp] - mov $tmp0,$acc0 - srlx $car1,32,$car1 - bl %icc,.Lsqr_1st - add $tp,4,$tp ! tp++ -!.Lsqr_1st - - mulx $apj,$mul0,$tmp0 ! epilogue - mulx $npj,$mul1,$tmp1 - add $acc0,$car0,$car0 ! ap[j]*a0+c0 - add $acc1,$car1,$car1 - and $car0,$mask,$acc0 - srlx $car0,32,$car0 - add $acc0,$acc0,$acc0 - or $sbit,$acc0,$acc0 - srlx $acc0,32,$sbit - and $acc0,$mask,$acc0 - add $acc0,$car1,$car1 - st $car1,[$tp] - srlx $car1,32,$car1 - - add $tmp0,$car0,$car0 ! ap[j]*a0+c0 - add $tmp1,$car1,$car1 - and $car0,$mask,$acc0 - srlx $car0,32,$car0 - add $acc0,$acc0,$acc0 - or $sbit,$acc0,$acc0 - srlx $acc0,32,$sbit - and $acc0,$mask,$acc0 - add $acc0,$car1,$car1 - st $car1,[$tp+4] - srlx $car1,32,$car1 - - add $car0,$car0,$car0 - or $sbit,$car0,$car0 - add $car0,$car1,$car1 - st $car1,[$tp+8] - srlx $car1,32,$car2 - - ld [%sp+$bias+$frame],$tmp0 ! tp[0] - ld [%sp+$bias+$frame+4],$tmp1 ! tp[1] - ld [%sp+$bias+$frame+8],$tpj ! tp[2] - ld [$ap+4],$mul0 ! ap[1] - ld [$ap+8],$apj ! ap[2] - ld [$np],$car1 ! np[0] - ld [$np+4],$npj ! np[1] - mulx $n0,$tmp0,$mul1 - - mulx $mul0,$mul0,$car0 - and $mul1,$mask,$mul1 - - mulx $car1,$mul1,$car1 - mulx $npj,$mul1,$acc1 - add $tmp0,$car1,$car1 - and $car0,$mask,$acc0 - ld [$np+8],$npj ! np[2] - srlx $car1,32,$car1 - add $tmp1,$car1,$car1 - srlx $car0,32,$car0 - add $acc0,$car1,$car1 - and $car0,1,$sbit - add $acc1,$car1,$car1 - srlx $car0,1,$car0 - mov 12,$j - st $car1,[%sp+$bias+$frame] ! tp[0]= - srlx $car1,32,$car1 - add %sp,$bias+$frame+4,$tp - -.Lsqr_2nd: - mulx $apj,$mul0,$acc0 - mulx $npj,$mul1,$acc1 - add $acc0,$car0,$car0 - add $tpj,$car1,$car1 - ld [$ap+$j],$apj ! ap[j] - and $car0,$mask,$acc0 - ld [$np+$j],$npj ! np[j] - srlx $car0,32,$car0 - add $acc1,$car1,$car1 - ld [$tp+8],$tpj ! tp[j] - add $acc0,$acc0,$acc0 - add $j,4,$j ! j++ - or $sbit,$acc0,$acc0 - srlx $acc0,32,$sbit - and $acc0,$mask,$acc0 - cmp $j,$num - add $acc0,$car1,$car1 - st $car1,[$tp] ! tp[j-1] - srlx $car1,32,$car1 - bl %icc,.Lsqr_2nd - add $tp,4,$tp ! tp++ -!.Lsqr_2nd - - mulx $apj,$mul0,$acc0 - mulx $npj,$mul1,$acc1 - add $acc0,$car0,$car0 - add $tpj,$car1,$car1 - and $car0,$mask,$acc0 - srlx $car0,32,$car0 - add $acc1,$car1,$car1 - add $acc0,$acc0,$acc0 - or $sbit,$acc0,$acc0 - srlx $acc0,32,$sbit - and $acc0,$mask,$acc0 - add $acc0,$car1,$car1 - st $car1,[$tp] ! tp[j-1] - srlx $car1,32,$car1 - - add $car0,$car0,$car0 - or $sbit,$car0,$car0 - add $car0,$car1,$car1 - add $car2,$car1,$car1 - st $car1,[$tp+4] - srlx $car1,32,$car2 - - ld [%sp+$bias+$frame],$tmp1 ! tp[0] - ld [%sp+$bias+$frame+4],$tpj ! tp[1] - ld [$ap+8],$mul0 ! ap[2] - ld [$np],$car1 ! np[0] - ld [$np+4],$npj ! np[1] - mulx $n0,$tmp1,$mul1 - and $mul1,$mask,$mul1 - mov 8,$i - - mulx $mul0,$mul0,$car0 - mulx $car1,$mul1,$car1 - and $car0,$mask,$acc0 - add $tmp1,$car1,$car1 - srlx $car0,32,$car0 - add %sp,$bias+$frame,$tp - srlx $car1,32,$car1 - and $car0,1,$sbit - srlx $car0,1,$car0 - mov 4,$j - -.Lsqr_outer: -.Lsqr_inner1: - mulx $npj,$mul1,$acc1 - add $tpj,$car1,$car1 - add $j,4,$j - ld [$tp+8],$tpj - cmp $j,$i - add $acc1,$car1,$car1 - ld [$np+$j],$npj - st $car1,[$tp] - srlx $car1,32,$car1 - bl %icc,.Lsqr_inner1 - add $tp,4,$tp -!.Lsqr_inner1 - - add $j,4,$j - ld [$ap+$j],$apj ! ap[j] - mulx $npj,$mul1,$acc1 - add $tpj,$car1,$car1 - ld [$np+$j],$npj ! np[j] - add $acc0,$car1,$car1 - ld [$tp+8],$tpj ! tp[j] - add $acc1,$car1,$car1 - st $car1,[$tp] - srlx $car1,32,$car1 - - add $j,4,$j - cmp $j,$num - be,pn %icc,.Lsqr_no_inner2 - add $tp,4,$tp - -.Lsqr_inner2: - mulx $apj,$mul0,$acc0 - mulx $npj,$mul1,$acc1 - add $tpj,$car1,$car1 - add $acc0,$car0,$car0 - ld [$ap+$j],$apj ! ap[j] - and $car0,$mask,$acc0 - ld [$np+$j],$npj ! np[j] - srlx $car0,32,$car0 - add $acc0,$acc0,$acc0 - ld [$tp+8],$tpj ! tp[j] - or $sbit,$acc0,$acc0 - add $j,4,$j ! j++ - srlx $acc0,32,$sbit - and $acc0,$mask,$acc0 - cmp $j,$num - add $acc0,$car1,$car1 - add $acc1,$car1,$car1 - st $car1,[$tp] ! tp[j-1] - srlx $car1,32,$car1 - bl %icc,.Lsqr_inner2 - add $tp,4,$tp ! tp++ - -.Lsqr_no_inner2: - mulx $apj,$mul0,$acc0 - mulx $npj,$mul1,$acc1 - add $tpj,$car1,$car1 - add $acc0,$car0,$car0 - and $car0,$mask,$acc0 - srlx $car0,32,$car0 - add $acc0,$acc0,$acc0 - or $sbit,$acc0,$acc0 - srlx $acc0,32,$sbit - and $acc0,$mask,$acc0 - add $acc0,$car1,$car1 - add $acc1,$car1,$car1 - st $car1,[$tp] ! tp[j-1] - srlx $car1,32,$car1 - - add $car0,$car0,$car0 - or $sbit,$car0,$car0 - add $car0,$car1,$car1 - add $car2,$car1,$car1 - st $car1,[$tp+4] - srlx $car1,32,$car2 - - add $i,4,$i ! i++ - ld [%sp+$bias+$frame],$tmp1 ! tp[0] - ld [%sp+$bias+$frame+4],$tpj ! tp[1] - ld [$ap+$i],$mul0 ! ap[j] - ld [$np],$car1 ! np[0] - ld [$np+4],$npj ! np[1] - mulx $n0,$tmp1,$mul1 - and $mul1,$mask,$mul1 - add $i,4,$tmp0 - - mulx $mul0,$mul0,$car0 - mulx $car1,$mul1,$car1 - and $car0,$mask,$acc0 - add $tmp1,$car1,$car1 - srlx $car0,32,$car0 - add %sp,$bias+$frame,$tp - srlx $car1,32,$car1 - and $car0,1,$sbit - srlx $car0,1,$car0 - - cmp $tmp0,$num ! i" -.align 32 -___ -$code =~ s/\`([^\`]*)\`/eval($1)/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/sparcv9a-mont.pl b/drivers/builtin_openssl2/crypto/bn/asm/sparcv9a-mont.pl deleted file mode 100755 index a14205f2f0..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/sparcv9a-mont.pl +++ /dev/null @@ -1,882 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# October 2005 -# -# "Teaser" Montgomery multiplication module for UltraSPARC. Why FPU? -# Because unlike integer multiplier, which simply stalls whole CPU, -# FPU is fully pipelined and can effectively emit 48 bit partial -# product every cycle. Why not blended SPARC v9? One can argue that -# making this module dependent on UltraSPARC VIS extension limits its -# binary compatibility. Well yes, it does exclude SPARC64 prior-V(!) -# implementations from compatibility matrix. But the rest, whole Sun -# UltraSPARC family and brand new Fujitsu's SPARC64 V, all support -# VIS extension instructions used in this module. This is considered -# good enough to not care about HAL SPARC64 users [if any] who have -# integer-only pure SPARCv9 module to "fall down" to. - -# USI&II cores currently exhibit uniform 2x improvement [over pre- -# bn_mul_mont codebase] for all key lengths and benchmarks. On USIII -# performance improves few percents for shorter keys and worsens few -# percents for longer keys. This is because USIII integer multiplier -# is >3x faster than USI&II one, which is harder to match [but see -# TODO list below]. It should also be noted that SPARC64 V features -# out-of-order execution, which *might* mean that integer multiplier -# is pipelined, which in turn *might* be impossible to match... On -# additional note, SPARC64 V implements FP Multiply-Add instruction, -# which is perfectly usable in this context... In other words, as far -# as Fujitsu SPARC64 V goes, talk to the author:-) - -# The implementation implies following "non-natural" limitations on -# input arguments: -# - num may not be less than 4; -# - num has to be even; -# Failure to meet either condition has no fatal effects, simply -# doesn't give any performance gain. - -# TODO: -# - modulo-schedule inner loop for better performance (on in-order -# execution core such as UltraSPARC this shall result in further -# noticeable(!) improvement); -# - dedicated squaring procedure[?]; - -###################################################################### -# November 2006 -# -# Modulo-scheduled inner loops allow to interleave floating point and -# integer instructions and minimize Read-After-Write penalties. This -# results in *further* 20-50% perfromance improvement [depending on -# key length, more for longer keys] on USI&II cores and 30-80% - on -# USIII&IV. - -$fname="bn_mul_mont_fpu"; -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } - -if ($bits==64) { - $bias=2047; - $frame=192; -} else { - $bias=0; - $frame=128; # 96 rounded up to largest known cache-line -} -$locals=64; - -# In order to provide for 32-/64-bit ABI duality, I keep integers wider -# than 32 bit in %g1-%g4 and %o0-%o5. %l0-%l7 and %i0-%i5 are used -# exclusively for pointers, indexes and other small values... -# int bn_mul_mont( -$rp="%i0"; # BN_ULONG *rp, -$ap="%i1"; # const BN_ULONG *ap, -$bp="%i2"; # const BN_ULONG *bp, -$np="%i3"; # const BN_ULONG *np, -$n0="%i4"; # const BN_ULONG *n0, -$num="%i5"; # int num); - -$tp="%l0"; # t[num] -$ap_l="%l1"; # a[num],n[num] are smashed to 32-bit words and saved -$ap_h="%l2"; # to these four vectors as double-precision FP values. -$np_l="%l3"; # This way a bunch of fxtods are eliminated in second -$np_h="%l4"; # loop and L1-cache aliasing is minimized... -$i="%l5"; -$j="%l6"; -$mask="%l7"; # 16-bit mask, 0xffff - -$n0="%g4"; # reassigned(!) to "64-bit" register -$carry="%i4"; # %i4 reused(!) for a carry bit - -# FP register naming chart -# -# ..HILO -# dcba -# -------- -# LOa -# LOb -# LOc -# LOd -# HIa -# HIb -# HIc -# HId -# ..a -# ..b -$ba="%f0"; $bb="%f2"; $bc="%f4"; $bd="%f6"; -$na="%f8"; $nb="%f10"; $nc="%f12"; $nd="%f14"; -$alo="%f16"; $alo_="%f17"; $ahi="%f18"; $ahi_="%f19"; -$nlo="%f20"; $nlo_="%f21"; $nhi="%f22"; $nhi_="%f23"; - -$dota="%f24"; $dotb="%f26"; - -$aloa="%f32"; $alob="%f34"; $aloc="%f36"; $alod="%f38"; -$ahia="%f40"; $ahib="%f42"; $ahic="%f44"; $ahid="%f46"; -$nloa="%f48"; $nlob="%f50"; $nloc="%f52"; $nlod="%f54"; -$nhia="%f56"; $nhib="%f58"; $nhic="%f60"; $nhid="%f62"; - -$ASI_FL16_P=0xD2; # magic ASI value to engage 16-bit FP load - -$code=<<___; -.section ".text",#alloc,#execinstr - -.global $fname -.align 32 -$fname: - save %sp,-$frame-$locals,%sp - - cmp $num,4 - bl,a,pn %icc,.Lret - clr %i0 - andcc $num,1,%g0 ! $num has to be even... - bnz,a,pn %icc,.Lret - clr %i0 ! signal "unsupported input value" - - srl $num,1,$num - sethi %hi(0xffff),$mask - ld [%i4+0],$n0 ! $n0 reassigned, remember? - or $mask,%lo(0xffff),$mask - ld [%i4+4],%o0 - sllx %o0,32,%o0 - or %o0,$n0,$n0 ! $n0=n0[1].n0[0] - - sll $num,3,$num ! num*=8 - - add %sp,$bias,%o0 ! real top of stack - sll $num,2,%o1 - add %o1,$num,%o1 ! %o1=num*5 - sub %o0,%o1,%o0 - and %o0,-2048,%o0 ! optimize TLB utilization - sub %o0,$bias,%sp ! alloca(5*num*8) - - rd %asi,%o7 ! save %asi - add %sp,$bias+$frame+$locals,$tp - add $tp,$num,$ap_l - add $ap_l,$num,$ap_l ! [an]p_[lh] point at the vectors' ends ! - add $ap_l,$num,$ap_h - add $ap_h,$num,$np_l - add $np_l,$num,$np_h - - wr %g0,$ASI_FL16_P,%asi ! setup %asi for 16-bit FP loads - - add $rp,$num,$rp ! readjust input pointers to point - add $ap,$num,$ap ! at the ends too... - add $bp,$num,$bp - add $np,$num,$np - - stx %o7,[%sp+$bias+$frame+48] ! save %asi - - sub %g0,$num,$i ! i=-num - sub %g0,$num,$j ! j=-num - - add $ap,$j,%o3 - add $bp,$i,%o4 - - ld [%o3+4],%g1 ! bp[0] - ld [%o3+0],%o0 - ld [%o4+4],%g5 ! ap[0] - sllx %g1,32,%g1 - ld [%o4+0],%o1 - sllx %g5,32,%g5 - or %g1,%o0,%o0 - or %g5,%o1,%o1 - - add $np,$j,%o5 - - mulx %o1,%o0,%o0 ! ap[0]*bp[0] - mulx $n0,%o0,%o0 ! ap[0]*bp[0]*n0 - stx %o0,[%sp+$bias+$frame+0] - - ld [%o3+0],$alo_ ! load a[j] as pair of 32-bit words - fzeros $alo - ld [%o3+4],$ahi_ - fzeros $ahi - ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words - fzeros $nlo - ld [%o5+4],$nhi_ - fzeros $nhi - - ! transfer b[i] to FPU as 4x16-bit values - ldda [%o4+2]%asi,$ba - fxtod $alo,$alo - ldda [%o4+0]%asi,$bb - fxtod $ahi,$ahi - ldda [%o4+6]%asi,$bc - fxtod $nlo,$nlo - ldda [%o4+4]%asi,$bd - fxtod $nhi,$nhi - - ! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values - ldda [%sp+$bias+$frame+6]%asi,$na - fxtod $ba,$ba - ldda [%sp+$bias+$frame+4]%asi,$nb - fxtod $bb,$bb - ldda [%sp+$bias+$frame+2]%asi,$nc - fxtod $bc,$bc - ldda [%sp+$bias+$frame+0]%asi,$nd - fxtod $bd,$bd - - std $alo,[$ap_l+$j] ! save smashed ap[j] in double format - fxtod $na,$na - std $ahi,[$ap_h+$j] - fxtod $nb,$nb - std $nlo,[$np_l+$j] ! save smashed np[j] in double format - fxtod $nc,$nc - std $nhi,[$np_h+$j] - fxtod $nd,$nd - - fmuld $alo,$ba,$aloa - fmuld $nlo,$na,$nloa - fmuld $alo,$bb,$alob - fmuld $nlo,$nb,$nlob - fmuld $alo,$bc,$aloc - faddd $aloa,$nloa,$nloa - fmuld $nlo,$nc,$nloc - fmuld $alo,$bd,$alod - faddd $alob,$nlob,$nlob - fmuld $nlo,$nd,$nlod - fmuld $ahi,$ba,$ahia - faddd $aloc,$nloc,$nloc - fmuld $nhi,$na,$nhia - fmuld $ahi,$bb,$ahib - faddd $alod,$nlod,$nlod - fmuld $nhi,$nb,$nhib - fmuld $ahi,$bc,$ahic - faddd $ahia,$nhia,$nhia - fmuld $nhi,$nc,$nhic - fmuld $ahi,$bd,$ahid - faddd $ahib,$nhib,$nhib - fmuld $nhi,$nd,$nhid - - faddd $ahic,$nhic,$dota ! $nhic - faddd $ahid,$nhid,$dotb ! $nhid - - faddd $nloc,$nhia,$nloc - faddd $nlod,$nhib,$nlod - - fdtox $nloa,$nloa - fdtox $nlob,$nlob - fdtox $nloc,$nloc - fdtox $nlod,$nlod - - std $nloa,[%sp+$bias+$frame+0] - add $j,8,$j - std $nlob,[%sp+$bias+$frame+8] - add $ap,$j,%o4 - std $nloc,[%sp+$bias+$frame+16] - add $np,$j,%o5 - std $nlod,[%sp+$bias+$frame+24] - - ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words - fzeros $alo - ld [%o4+4],$ahi_ - fzeros $ahi - ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words - fzeros $nlo - ld [%o5+4],$nhi_ - fzeros $nhi - - fxtod $alo,$alo - fxtod $ahi,$ahi - fxtod $nlo,$nlo - fxtod $nhi,$nhi - - ldx [%sp+$bias+$frame+0],%o0 - fmuld $alo,$ba,$aloa - ldx [%sp+$bias+$frame+8],%o1 - fmuld $nlo,$na,$nloa - ldx [%sp+$bias+$frame+16],%o2 - fmuld $alo,$bb,$alob - ldx [%sp+$bias+$frame+24],%o3 - fmuld $nlo,$nb,$nlob - - srlx %o0,16,%o7 - std $alo,[$ap_l+$j] ! save smashed ap[j] in double format - fmuld $alo,$bc,$aloc - add %o7,%o1,%o1 - std $ahi,[$ap_h+$j] - faddd $aloa,$nloa,$nloa - fmuld $nlo,$nc,$nloc - srlx %o1,16,%o7 - std $nlo,[$np_l+$j] ! save smashed np[j] in double format - fmuld $alo,$bd,$alod - add %o7,%o2,%o2 - std $nhi,[$np_h+$j] - faddd $alob,$nlob,$nlob - fmuld $nlo,$nd,$nlod - srlx %o2,16,%o7 - fmuld $ahi,$ba,$ahia - add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] - faddd $aloc,$nloc,$nloc - fmuld $nhi,$na,$nhia - !and %o0,$mask,%o0 - !and %o1,$mask,%o1 - !and %o2,$mask,%o2 - !sllx %o1,16,%o1 - !sllx %o2,32,%o2 - !sllx %o3,48,%o7 - !or %o1,%o0,%o0 - !or %o2,%o0,%o0 - !or %o7,%o0,%o0 ! 64-bit result - srlx %o3,16,%g1 ! 34-bit carry - fmuld $ahi,$bb,$ahib - - faddd $alod,$nlod,$nlod - fmuld $nhi,$nb,$nhib - fmuld $ahi,$bc,$ahic - faddd $ahia,$nhia,$nhia - fmuld $nhi,$nc,$nhic - fmuld $ahi,$bd,$ahid - faddd $ahib,$nhib,$nhib - fmuld $nhi,$nd,$nhid - - faddd $dota,$nloa,$nloa - faddd $dotb,$nlob,$nlob - faddd $ahic,$nhic,$dota ! $nhic - faddd $ahid,$nhid,$dotb ! $nhid - - faddd $nloc,$nhia,$nloc - faddd $nlod,$nhib,$nlod - - fdtox $nloa,$nloa - fdtox $nlob,$nlob - fdtox $nloc,$nloc - fdtox $nlod,$nlod - - std $nloa,[%sp+$bias+$frame+0] - std $nlob,[%sp+$bias+$frame+8] - addcc $j,8,$j - std $nloc,[%sp+$bias+$frame+16] - bz,pn %icc,.L1stskip - std $nlod,[%sp+$bias+$frame+24] - -.align 32 ! incidentally already aligned ! -.L1st: - add $ap,$j,%o4 - add $np,$j,%o5 - ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words - fzeros $alo - ld [%o4+4],$ahi_ - fzeros $ahi - ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words - fzeros $nlo - ld [%o5+4],$nhi_ - fzeros $nhi - - fxtod $alo,$alo - fxtod $ahi,$ahi - fxtod $nlo,$nlo - fxtod $nhi,$nhi - - ldx [%sp+$bias+$frame+0],%o0 - fmuld $alo,$ba,$aloa - ldx [%sp+$bias+$frame+8],%o1 - fmuld $nlo,$na,$nloa - ldx [%sp+$bias+$frame+16],%o2 - fmuld $alo,$bb,$alob - ldx [%sp+$bias+$frame+24],%o3 - fmuld $nlo,$nb,$nlob - - srlx %o0,16,%o7 - std $alo,[$ap_l+$j] ! save smashed ap[j] in double format - fmuld $alo,$bc,$aloc - add %o7,%o1,%o1 - std $ahi,[$ap_h+$j] - faddd $aloa,$nloa,$nloa - fmuld $nlo,$nc,$nloc - srlx %o1,16,%o7 - std $nlo,[$np_l+$j] ! save smashed np[j] in double format - fmuld $alo,$bd,$alod - add %o7,%o2,%o2 - std $nhi,[$np_h+$j] - faddd $alob,$nlob,$nlob - fmuld $nlo,$nd,$nlod - srlx %o2,16,%o7 - fmuld $ahi,$ba,$ahia - add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] - and %o0,$mask,%o0 - faddd $aloc,$nloc,$nloc - fmuld $nhi,$na,$nhia - and %o1,$mask,%o1 - and %o2,$mask,%o2 - fmuld $ahi,$bb,$ahib - sllx %o1,16,%o1 - faddd $alod,$nlod,$nlod - fmuld $nhi,$nb,$nhib - sllx %o2,32,%o2 - fmuld $ahi,$bc,$ahic - sllx %o3,48,%o7 - or %o1,%o0,%o0 - faddd $ahia,$nhia,$nhia - fmuld $nhi,$nc,$nhic - or %o2,%o0,%o0 - fmuld $ahi,$bd,$ahid - or %o7,%o0,%o0 ! 64-bit result - faddd $ahib,$nhib,$nhib - fmuld $nhi,$nd,$nhid - addcc %g1,%o0,%o0 - faddd $dota,$nloa,$nloa - srlx %o3,16,%g1 ! 34-bit carry - faddd $dotb,$nlob,$nlob - bcs,a %xcc,.+8 - add %g1,1,%g1 - - stx %o0,[$tp] ! tp[j-1]= - - faddd $ahic,$nhic,$dota ! $nhic - faddd $ahid,$nhid,$dotb ! $nhid - - faddd $nloc,$nhia,$nloc - faddd $nlod,$nhib,$nlod - - fdtox $nloa,$nloa - fdtox $nlob,$nlob - fdtox $nloc,$nloc - fdtox $nlod,$nlod - - std $nloa,[%sp+$bias+$frame+0] - std $nlob,[%sp+$bias+$frame+8] - std $nloc,[%sp+$bias+$frame+16] - std $nlod,[%sp+$bias+$frame+24] - - addcc $j,8,$j - bnz,pt %icc,.L1st - add $tp,8,$tp - -.L1stskip: - fdtox $dota,$dota - fdtox $dotb,$dotb - - ldx [%sp+$bias+$frame+0],%o0 - ldx [%sp+$bias+$frame+8],%o1 - ldx [%sp+$bias+$frame+16],%o2 - ldx [%sp+$bias+$frame+24],%o3 - - srlx %o0,16,%o7 - std $dota,[%sp+$bias+$frame+32] - add %o7,%o1,%o1 - std $dotb,[%sp+$bias+$frame+40] - srlx %o1,16,%o7 - add %o7,%o2,%o2 - srlx %o2,16,%o7 - add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] - and %o0,$mask,%o0 - and %o1,$mask,%o1 - and %o2,$mask,%o2 - sllx %o1,16,%o1 - sllx %o2,32,%o2 - sllx %o3,48,%o7 - or %o1,%o0,%o0 - or %o2,%o0,%o0 - or %o7,%o0,%o0 ! 64-bit result - ldx [%sp+$bias+$frame+32],%o4 - addcc %g1,%o0,%o0 - ldx [%sp+$bias+$frame+40],%o5 - srlx %o3,16,%g1 ! 34-bit carry - bcs,a %xcc,.+8 - add %g1,1,%g1 - - stx %o0,[$tp] ! tp[j-1]= - add $tp,8,$tp - - srlx %o4,16,%o7 - add %o7,%o5,%o5 - and %o4,$mask,%o4 - sllx %o5,16,%o7 - or %o7,%o4,%o4 - addcc %g1,%o4,%o4 - srlx %o5,48,%g1 - bcs,a %xcc,.+8 - add %g1,1,%g1 - - mov %g1,$carry - stx %o4,[$tp] ! tp[num-1]= - - ba .Louter - add $i,8,$i -.align 32 -.Louter: - sub %g0,$num,$j ! j=-num - add %sp,$bias+$frame+$locals,$tp - - add $ap,$j,%o3 - add $bp,$i,%o4 - - ld [%o3+4],%g1 ! bp[i] - ld [%o3+0],%o0 - ld [%o4+4],%g5 ! ap[0] - sllx %g1,32,%g1 - ld [%o4+0],%o1 - sllx %g5,32,%g5 - or %g1,%o0,%o0 - or %g5,%o1,%o1 - - ldx [$tp],%o2 ! tp[0] - mulx %o1,%o0,%o0 - addcc %o2,%o0,%o0 - mulx $n0,%o0,%o0 ! (ap[0]*bp[i]+t[0])*n0 - stx %o0,[%sp+$bias+$frame+0] - - ! transfer b[i] to FPU as 4x16-bit values - ldda [%o4+2]%asi,$ba - ldda [%o4+0]%asi,$bb - ldda [%o4+6]%asi,$bc - ldda [%o4+4]%asi,$bd - - ! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values - ldda [%sp+$bias+$frame+6]%asi,$na - fxtod $ba,$ba - ldda [%sp+$bias+$frame+4]%asi,$nb - fxtod $bb,$bb - ldda [%sp+$bias+$frame+2]%asi,$nc - fxtod $bc,$bc - ldda [%sp+$bias+$frame+0]%asi,$nd - fxtod $bd,$bd - ldd [$ap_l+$j],$alo ! load a[j] in double format - fxtod $na,$na - ldd [$ap_h+$j],$ahi - fxtod $nb,$nb - ldd [$np_l+$j],$nlo ! load n[j] in double format - fxtod $nc,$nc - ldd [$np_h+$j],$nhi - fxtod $nd,$nd - - fmuld $alo,$ba,$aloa - fmuld $nlo,$na,$nloa - fmuld $alo,$bb,$alob - fmuld $nlo,$nb,$nlob - fmuld $alo,$bc,$aloc - faddd $aloa,$nloa,$nloa - fmuld $nlo,$nc,$nloc - fmuld $alo,$bd,$alod - faddd $alob,$nlob,$nlob - fmuld $nlo,$nd,$nlod - fmuld $ahi,$ba,$ahia - faddd $aloc,$nloc,$nloc - fmuld $nhi,$na,$nhia - fmuld $ahi,$bb,$ahib - faddd $alod,$nlod,$nlod - fmuld $nhi,$nb,$nhib - fmuld $ahi,$bc,$ahic - faddd $ahia,$nhia,$nhia - fmuld $nhi,$nc,$nhic - fmuld $ahi,$bd,$ahid - faddd $ahib,$nhib,$nhib - fmuld $nhi,$nd,$nhid - - faddd $ahic,$nhic,$dota ! $nhic - faddd $ahid,$nhid,$dotb ! $nhid - - faddd $nloc,$nhia,$nloc - faddd $nlod,$nhib,$nlod - - fdtox $nloa,$nloa - fdtox $nlob,$nlob - fdtox $nloc,$nloc - fdtox $nlod,$nlod - - std $nloa,[%sp+$bias+$frame+0] - std $nlob,[%sp+$bias+$frame+8] - std $nloc,[%sp+$bias+$frame+16] - add $j,8,$j - std $nlod,[%sp+$bias+$frame+24] - - ldd [$ap_l+$j],$alo ! load a[j] in double format - ldd [$ap_h+$j],$ahi - ldd [$np_l+$j],$nlo ! load n[j] in double format - ldd [$np_h+$j],$nhi - - fmuld $alo,$ba,$aloa - fmuld $nlo,$na,$nloa - fmuld $alo,$bb,$alob - fmuld $nlo,$nb,$nlob - fmuld $alo,$bc,$aloc - ldx [%sp+$bias+$frame+0],%o0 - faddd $aloa,$nloa,$nloa - fmuld $nlo,$nc,$nloc - ldx [%sp+$bias+$frame+8],%o1 - fmuld $alo,$bd,$alod - ldx [%sp+$bias+$frame+16],%o2 - faddd $alob,$nlob,$nlob - fmuld $nlo,$nd,$nlod - ldx [%sp+$bias+$frame+24],%o3 - fmuld $ahi,$ba,$ahia - - srlx %o0,16,%o7 - faddd $aloc,$nloc,$nloc - fmuld $nhi,$na,$nhia - add %o7,%o1,%o1 - fmuld $ahi,$bb,$ahib - srlx %o1,16,%o7 - faddd $alod,$nlod,$nlod - fmuld $nhi,$nb,$nhib - add %o7,%o2,%o2 - fmuld $ahi,$bc,$ahic - srlx %o2,16,%o7 - faddd $ahia,$nhia,$nhia - fmuld $nhi,$nc,$nhic - add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] - ! why? - and %o0,$mask,%o0 - fmuld $ahi,$bd,$ahid - and %o1,$mask,%o1 - and %o2,$mask,%o2 - faddd $ahib,$nhib,$nhib - fmuld $nhi,$nd,$nhid - sllx %o1,16,%o1 - faddd $dota,$nloa,$nloa - sllx %o2,32,%o2 - faddd $dotb,$nlob,$nlob - sllx %o3,48,%o7 - or %o1,%o0,%o0 - faddd $ahic,$nhic,$dota ! $nhic - or %o2,%o0,%o0 - faddd $ahid,$nhid,$dotb ! $nhid - or %o7,%o0,%o0 ! 64-bit result - ldx [$tp],%o7 - faddd $nloc,$nhia,$nloc - addcc %o7,%o0,%o0 - ! end-of-why? - faddd $nlod,$nhib,$nlod - srlx %o3,16,%g1 ! 34-bit carry - fdtox $nloa,$nloa - bcs,a %xcc,.+8 - add %g1,1,%g1 - - fdtox $nlob,$nlob - fdtox $nloc,$nloc - fdtox $nlod,$nlod - - std $nloa,[%sp+$bias+$frame+0] - std $nlob,[%sp+$bias+$frame+8] - addcc $j,8,$j - std $nloc,[%sp+$bias+$frame+16] - bz,pn %icc,.Linnerskip - std $nlod,[%sp+$bias+$frame+24] - - ba .Linner - nop -.align 32 -.Linner: - ldd [$ap_l+$j],$alo ! load a[j] in double format - ldd [$ap_h+$j],$ahi - ldd [$np_l+$j],$nlo ! load n[j] in double format - ldd [$np_h+$j],$nhi - - fmuld $alo,$ba,$aloa - fmuld $nlo,$na,$nloa - fmuld $alo,$bb,$alob - fmuld $nlo,$nb,$nlob - fmuld $alo,$bc,$aloc - ldx [%sp+$bias+$frame+0],%o0 - faddd $aloa,$nloa,$nloa - fmuld $nlo,$nc,$nloc - ldx [%sp+$bias+$frame+8],%o1 - fmuld $alo,$bd,$alod - ldx [%sp+$bias+$frame+16],%o2 - faddd $alob,$nlob,$nlob - fmuld $nlo,$nd,$nlod - ldx [%sp+$bias+$frame+24],%o3 - fmuld $ahi,$ba,$ahia - - srlx %o0,16,%o7 - faddd $aloc,$nloc,$nloc - fmuld $nhi,$na,$nhia - add %o7,%o1,%o1 - fmuld $ahi,$bb,$ahib - srlx %o1,16,%o7 - faddd $alod,$nlod,$nlod - fmuld $nhi,$nb,$nhib - add %o7,%o2,%o2 - fmuld $ahi,$bc,$ahic - srlx %o2,16,%o7 - faddd $ahia,$nhia,$nhia - fmuld $nhi,$nc,$nhic - add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] - and %o0,$mask,%o0 - fmuld $ahi,$bd,$ahid - and %o1,$mask,%o1 - and %o2,$mask,%o2 - faddd $ahib,$nhib,$nhib - fmuld $nhi,$nd,$nhid - sllx %o1,16,%o1 - faddd $dota,$nloa,$nloa - sllx %o2,32,%o2 - faddd $dotb,$nlob,$nlob - sllx %o3,48,%o7 - or %o1,%o0,%o0 - faddd $ahic,$nhic,$dota ! $nhic - or %o2,%o0,%o0 - faddd $ahid,$nhid,$dotb ! $nhid - or %o7,%o0,%o0 ! 64-bit result - faddd $nloc,$nhia,$nloc - addcc %g1,%o0,%o0 - ldx [$tp+8],%o7 ! tp[j] - faddd $nlod,$nhib,$nlod - srlx %o3,16,%g1 ! 34-bit carry - fdtox $nloa,$nloa - bcs,a %xcc,.+8 - add %g1,1,%g1 - fdtox $nlob,$nlob - addcc %o7,%o0,%o0 - fdtox $nloc,$nloc - bcs,a %xcc,.+8 - add %g1,1,%g1 - - stx %o0,[$tp] ! tp[j-1] - fdtox $nlod,$nlod - - std $nloa,[%sp+$bias+$frame+0] - std $nlob,[%sp+$bias+$frame+8] - std $nloc,[%sp+$bias+$frame+16] - addcc $j,8,$j - std $nlod,[%sp+$bias+$frame+24] - bnz,pt %icc,.Linner - add $tp,8,$tp - -.Linnerskip: - fdtox $dota,$dota - fdtox $dotb,$dotb - - ldx [%sp+$bias+$frame+0],%o0 - ldx [%sp+$bias+$frame+8],%o1 - ldx [%sp+$bias+$frame+16],%o2 - ldx [%sp+$bias+$frame+24],%o3 - - srlx %o0,16,%o7 - std $dota,[%sp+$bias+$frame+32] - add %o7,%o1,%o1 - std $dotb,[%sp+$bias+$frame+40] - srlx %o1,16,%o7 - add %o7,%o2,%o2 - srlx %o2,16,%o7 - add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15] - and %o0,$mask,%o0 - and %o1,$mask,%o1 - and %o2,$mask,%o2 - sllx %o1,16,%o1 - sllx %o2,32,%o2 - sllx %o3,48,%o7 - or %o1,%o0,%o0 - or %o2,%o0,%o0 - ldx [%sp+$bias+$frame+32],%o4 - or %o7,%o0,%o0 ! 64-bit result - ldx [%sp+$bias+$frame+40],%o5 - addcc %g1,%o0,%o0 - ldx [$tp+8],%o7 ! tp[j] - srlx %o3,16,%g1 ! 34-bit carry - bcs,a %xcc,.+8 - add %g1,1,%g1 - - addcc %o7,%o0,%o0 - bcs,a %xcc,.+8 - add %g1,1,%g1 - - stx %o0,[$tp] ! tp[j-1] - add $tp,8,$tp - - srlx %o4,16,%o7 - add %o7,%o5,%o5 - and %o4,$mask,%o4 - sllx %o5,16,%o7 - or %o7,%o4,%o4 - addcc %g1,%o4,%o4 - srlx %o5,48,%g1 - bcs,a %xcc,.+8 - add %g1,1,%g1 - - addcc $carry,%o4,%o4 - stx %o4,[$tp] ! tp[num-1] - mov %g1,$carry - bcs,a %xcc,.+8 - add $carry,1,$carry - - addcc $i,8,$i - bnz %icc,.Louter - nop - - add $tp,8,$tp ! adjust tp to point at the end - orn %g0,%g0,%g4 - sub %g0,$num,%o7 ! n=-num - ba .Lsub - subcc %g0,%g0,%g0 ! clear %icc.c - -.align 32 -.Lsub: - ldx [$tp+%o7],%o0 - add $np,%o7,%g1 - ld [%g1+0],%o2 - ld [%g1+4],%o3 - srlx %o0,32,%o1 - subccc %o0,%o2,%o2 - add $rp,%o7,%g1 - subccc %o1,%o3,%o3 - st %o2,[%g1+0] - add %o7,8,%o7 - brnz,pt %o7,.Lsub - st %o3,[%g1+4] - subc $carry,0,%g4 - sub %g0,$num,%o7 ! n=-num - ba .Lcopy - nop - -.align 32 -.Lcopy: - ldx [$tp+%o7],%o0 - add $rp,%o7,%g1 - ld [%g1+0],%o2 - ld [%g1+4],%o3 - stx %g0,[$tp+%o7] - and %o0,%g4,%o0 - srlx %o0,32,%o1 - andn %o2,%g4,%o2 - andn %o3,%g4,%o3 - or %o2,%o0,%o0 - or %o3,%o1,%o1 - st %o0,[%g1+0] - add %o7,8,%o7 - brnz,pt %o7,.Lcopy - st %o1,[%g1+4] - sub %g0,$num,%o7 ! n=-num - -.Lzap: - stx %g0,[$ap_l+%o7] - stx %g0,[$ap_h+%o7] - stx %g0,[$np_l+%o7] - stx %g0,[$np_h+%o7] - add %o7,8,%o7 - brnz,pt %o7,.Lzap - nop - - ldx [%sp+$bias+$frame+48],%o7 - wr %g0,%o7,%asi ! restore %asi - - mov 1,%i0 -.Lret: - ret - restore -.type $fname,#function -.size $fname,(.-$fname) -.asciz "Montgomery Multipltication for UltraSPARC, CRYPTOGAMS by " -.align 32 -___ - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; - -# Below substitution makes it possible to compile without demanding -# VIS extentions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I -# dare to do this, because VIS capability is detected at run-time now -# and this routine is not called on CPU not capable to execute it. Do -# note that fzeros is not the only VIS dependency! Another dependency -# is implicit and is just _a_ numerical value loaded to %asi register, -# which assembler can't recognize as VIS specific... -$code =~ s/fzeros\s+%f([0-9]+)/ - sprintf(".word\t0x%x\t! fzeros %%f%d",0x81b00c20|($1<<25),$1) - /gem; - -print $code; -# flush -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/via-mont.pl b/drivers/builtin_openssl2/crypto/bn/asm/via-mont.pl deleted file mode 100644 index c046a514c8..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/via-mont.pl +++ /dev/null @@ -1,242 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# Wrapper around 'rep montmul', VIA-specific instruction accessing -# PadLock Montgomery Multiplier. The wrapper is designed as drop-in -# replacement for OpenSSL bn_mul_mont [first implemented in 0.9.9]. -# -# Below are interleaved outputs from 'openssl speed rsa dsa' for 4 -# different software configurations on 1.5GHz VIA Esther processor. -# Lines marked with "software integer" denote performance of hand- -# coded integer-only assembler found in OpenSSL 0.9.7. "Software SSE2" -# refers to hand-coded SSE2 Montgomery multiplication procedure found -# OpenSSL 0.9.9. "Hardware VIA SDK" refers to padlock_pmm routine from -# Padlock SDK 2.0.1 available for download from VIA, which naturally -# utilizes the magic 'repz montmul' instruction. And finally "hardware -# this" refers to *this* implementation which also uses 'repz montmul' -# -# sign verify sign/s verify/s -# rsa 512 bits 0.001720s 0.000140s 581.4 7149.7 software integer -# rsa 512 bits 0.000690s 0.000086s 1450.3 11606.0 software SSE2 -# rsa 512 bits 0.006136s 0.000201s 163.0 4974.5 hardware VIA SDK -# rsa 512 bits 0.000712s 0.000050s 1404.9 19858.5 hardware this -# -# rsa 1024 bits 0.008518s 0.000413s 117.4 2420.8 software integer -# rsa 1024 bits 0.004275s 0.000277s 233.9 3609.7 software SSE2 -# rsa 1024 bits 0.012136s 0.000260s 82.4 3844.5 hardware VIA SDK -# rsa 1024 bits 0.002522s 0.000116s 396.5 8650.9 hardware this -# -# rsa 2048 bits 0.050101s 0.001371s 20.0 729.6 software integer -# rsa 2048 bits 0.030273s 0.001008s 33.0 991.9 software SSE2 -# rsa 2048 bits 0.030833s 0.000976s 32.4 1025.1 hardware VIA SDK -# rsa 2048 bits 0.011879s 0.000342s 84.2 2921.7 hardware this -# -# rsa 4096 bits 0.327097s 0.004859s 3.1 205.8 software integer -# rsa 4096 bits 0.229318s 0.003859s 4.4 259.2 software SSE2 -# rsa 4096 bits 0.233953s 0.003274s 4.3 305.4 hardware VIA SDK -# rsa 4096 bits 0.070493s 0.001166s 14.2 857.6 hardware this -# -# dsa 512 bits 0.001342s 0.001651s 745.2 605.7 software integer -# dsa 512 bits 0.000844s 0.000987s 1185.3 1013.1 software SSE2 -# dsa 512 bits 0.001902s 0.002247s 525.6 444.9 hardware VIA SDK -# dsa 512 bits 0.000458s 0.000524s 2182.2 1909.1 hardware this -# -# dsa 1024 bits 0.003964s 0.004926s 252.3 203.0 software integer -# dsa 1024 bits 0.002686s 0.003166s 372.3 315.8 software SSE2 -# dsa 1024 bits 0.002397s 0.002823s 417.1 354.3 hardware VIA SDK -# dsa 1024 bits 0.000978s 0.001170s 1022.2 855.0 hardware this -# -# dsa 2048 bits 0.013280s 0.016518s 75.3 60.5 software integer -# dsa 2048 bits 0.009911s 0.011522s 100.9 86.8 software SSE2 -# dsa 2048 bits 0.009542s 0.011763s 104.8 85.0 hardware VIA SDK -# dsa 2048 bits 0.002884s 0.003352s 346.8 298.3 hardware this -# -# To give you some other reference point here is output for 2.4GHz P4 -# running hand-coded SSE2 bn_mul_mont found in 0.9.9, i.e. "software -# SSE2" in above terms. -# -# rsa 512 bits 0.000407s 0.000047s 2454.2 21137.0 -# rsa 1024 bits 0.002426s 0.000141s 412.1 7100.0 -# rsa 2048 bits 0.015046s 0.000491s 66.5 2034.9 -# rsa 4096 bits 0.109770s 0.002379s 9.1 420.3 -# dsa 512 bits 0.000438s 0.000525s 2281.1 1904.1 -# dsa 1024 bits 0.001346s 0.001595s 742.7 627.0 -# dsa 2048 bits 0.004745s 0.005582s 210.7 179.1 -# -# Conclusions: -# - VIA SDK leaves a *lot* of room for improvement (which this -# implementation successfully fills:-); -# - 'rep montmul' gives up to >3x performance improvement depending on -# key length; -# - in terms of absolute performance it delivers approximately as much -# as modern out-of-order 32-bit cores [again, for longer keys]. - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],"via-mont.pl"); - -# int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); -$func="bn_mul_mont_padlock"; - -$pad=16*1; # amount of reserved bytes on top of every vector - -# stack layout -$mZeroPrime=&DWP(0,"esp"); # these are specified by VIA -$A=&DWP(4,"esp"); -$B=&DWP(8,"esp"); -$T=&DWP(12,"esp"); -$M=&DWP(16,"esp"); -$scratch=&DWP(20,"esp"); -$rp=&DWP(24,"esp"); # these are mine -$sp=&DWP(28,"esp"); -# &DWP(32,"esp") # 32 byte scratch area -# &DWP(64+(4*$num+$pad)*0,"esp") # padded tp[num] -# &DWP(64+(4*$num+$pad)*1,"esp") # padded copy of ap[num] -# &DWP(64+(4*$num+$pad)*2,"esp") # padded copy of bp[num] -# &DWP(64+(4*$num+$pad)*3,"esp") # padded copy of np[num] -# Note that SDK suggests to unconditionally allocate 2K per vector. This -# has quite an impact on performance. It naturally depends on key length, -# but to give an example 1024 bit private RSA key operations suffer >30% -# penalty. I allocate only as much as actually required... - -&function_begin($func); - &xor ("eax","eax"); - &mov ("ecx",&wparam(5)); # num - # meet VIA's limitations for num [note that the specification - # expresses them in bits, while we work with amount of 32-bit words] - &test ("ecx",3); - &jnz (&label("leave")); # num % 4 != 0 - &cmp ("ecx",8); - &jb (&label("leave")); # num < 8 - &cmp ("ecx",1024); - &ja (&label("leave")); # num > 1024 - - &pushf (); - &cld (); - - &mov ("edi",&wparam(0)); # rp - &mov ("eax",&wparam(1)); # ap - &mov ("ebx",&wparam(2)); # bp - &mov ("edx",&wparam(3)); # np - &mov ("esi",&wparam(4)); # n0 - &mov ("esi",&DWP(0,"esi")); # *n0 - - &lea ("ecx",&DWP($pad,"","ecx",4)); # ecx becomes vector size in bytes - &lea ("ebp",&DWP(64,"","ecx",4)); # allocate 4 vectors + 64 bytes - &neg ("ebp"); - &add ("ebp","esp"); - &and ("ebp",-64); # align to cache-line - &xchg ("ebp","esp"); # alloca - - &mov ($rp,"edi"); # save rp - &mov ($sp,"ebp"); # save esp - - &mov ($mZeroPrime,"esi"); - &lea ("esi",&DWP(64,"esp")); # tp - &mov ($T,"esi"); - &lea ("edi",&DWP(32,"esp")); # scratch area - &mov ($scratch,"edi"); - &mov ("esi","eax"); - - &lea ("ebp",&DWP(-$pad,"ecx")); - &shr ("ebp",2); # restore original num value in ebp - - &xor ("eax","eax"); - - &mov ("ecx","ebp"); - &lea ("ecx",&DWP((32+$pad)/4,"ecx"));# padded tp + scratch - &data_byte(0xf3,0xab); # rep stosl, bzero - - &mov ("ecx","ebp"); - &lea ("edi",&DWP(64+$pad,"esp","ecx",4));# pointer to ap copy - &mov ($A,"edi"); - &data_byte(0xf3,0xa5); # rep movsl, memcpy - &mov ("ecx",$pad/4); - &data_byte(0xf3,0xab); # rep stosl, bzero pad - # edi points at the end of padded ap copy... - - &mov ("ecx","ebp"); - &mov ("esi","ebx"); - &mov ($B,"edi"); - &data_byte(0xf3,0xa5); # rep movsl, memcpy - &mov ("ecx",$pad/4); - &data_byte(0xf3,0xab); # rep stosl, bzero pad - # edi points at the end of padded bp copy... - - &mov ("ecx","ebp"); - &mov ("esi","edx"); - &mov ($M,"edi"); - &data_byte(0xf3,0xa5); # rep movsl, memcpy - &mov ("ecx",$pad/4); - &data_byte(0xf3,0xab); # rep stosl, bzero pad - # edi points at the end of padded np copy... - - # let magic happen... - &mov ("ecx","ebp"); - &mov ("esi","esp"); - &shl ("ecx",5); # convert word counter to bit counter - &align (4); - &data_byte(0xf3,0x0f,0xa6,0xc0);# rep montmul - - &mov ("ecx","ebp"); - &lea ("esi",&DWP(64,"esp")); # tp - # edi still points at the end of padded np copy... - &neg ("ebp"); - &lea ("ebp",&DWP(-$pad,"edi","ebp",4)); # so just "rewind" - &mov ("edi",$rp); # restore rp - &xor ("edx","edx"); # i=0 and clear CF - -&set_label("sub",8); - &mov ("eax",&DWP(0,"esi","edx",4)); - &sbb ("eax",&DWP(0,"ebp","edx",4)); - &mov (&DWP(0,"edi","edx",4),"eax"); # rp[i]=tp[i]-np[i] - &lea ("edx",&DWP(1,"edx")); # i++ - &loop (&label("sub")); # doesn't affect CF! - - &mov ("eax",&DWP(0,"esi","edx",4)); # upmost overflow bit - &sbb ("eax",0); - &and ("esi","eax"); - ¬ ("eax"); - &mov ("ebp","edi"); - &and ("ebp","eax"); - &or ("esi","ebp"); # tp=carry?tp:rp - - &mov ("ecx","edx"); # num - &xor ("edx","edx"); # i=0 - -&set_label("copy",8); - &mov ("eax",&DWP(0,"esi","edx",4)); - &mov (&DWP(64,"esp","edx",4),"ecx"); # zap tp - &mov (&DWP(0,"edi","edx",4),"eax"); - &lea ("edx",&DWP(1,"edx")); # i++ - &loop (&label("copy")); - - &mov ("ebp",$sp); - &xor ("eax","eax"); - - &mov ("ecx",64/4); - &mov ("edi","esp"); # zap frame including scratch area - &data_byte(0xf3,0xab); # rep stosl, bzero - - # zap copies of ap, bp and np - &lea ("edi",&DWP(64+$pad,"esp","edx",4));# pointer to ap - &lea ("ecx",&DWP(3*$pad/4,"edx","edx",2)); - &data_byte(0xf3,0xab); # rep stosl, bzero - - &mov ("esp","ebp"); - &inc ("eax"); # signal "done" - &popf (); -&set_label("leave"); -&function_end($func); - -&asciz("Padlock Montgomery Multiplication, CRYPTOGAMS by "); - -&asm_finish(); diff --git a/drivers/builtin_openssl2/crypto/bn/asm/x86-gf2m.pl b/drivers/builtin_openssl2/crypto/bn/asm/x86-gf2m.pl deleted file mode 100644 index b579530272..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/x86-gf2m.pl +++ /dev/null @@ -1,313 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# May 2011 -# -# The module implements bn_GF2m_mul_2x2 polynomial multiplication used -# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for -# the time being... Except that it has three code paths: pure integer -# code suitable for any x86 CPU, MMX code suitable for PIII and later -# and PCLMULQDQ suitable for Westmere and later. Improvement varies -# from one benchmark and µ-arch to another. Below are interval values -# for 163- and 571-bit ECDH benchmarks relative to compiler-generated -# code: -# -# PIII 16%-30% -# P4 12%-12% -# Opteron 18%-40% -# Core2 19%-44% -# Atom 38%-64% -# Westmere 53%-121%(PCLMULQDQ)/20%-32%(MMX) -# Sandy Bridge 72%-127%(PCLMULQDQ)/27%-23%(MMX) -# -# Note that above improvement coefficients are not coefficients for -# bn_GF2m_mul_2x2 itself. For example 120% ECDH improvement is result -# of bn_GF2m_mul_2x2 being >4x faster. As it gets faster, benchmark -# is more and more dominated by other subroutines, most notably by -# BN_GF2m_mod[_mul]_arr... - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],$0,$x86only = $ARGV[$#ARGV] eq "386"); - -$sse2=0; -for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } - -&external_label("OPENSSL_ia32cap_P") if ($sse2); - -$a="eax"; -$b="ebx"; -($a1,$a2,$a4)=("ecx","edx","ebp"); - -$R="mm0"; -@T=("mm1","mm2"); -($A,$B,$B30,$B31)=("mm2","mm3","mm4","mm5"); -@i=("esi","edi"); - - if (!$x86only) { -&function_begin_B("_mul_1x1_mmx"); - &sub ("esp",32+4); - &mov ($a1,$a); - &lea ($a2,&DWP(0,$a,$a)); - &and ($a1,0x3fffffff); - &lea ($a4,&DWP(0,$a2,$a2)); - &mov (&DWP(0*4,"esp"),0); - &and ($a2,0x7fffffff); - &movd ($A,$a); - &movd ($B,$b); - &mov (&DWP(1*4,"esp"),$a1); # a1 - &xor ($a1,$a2); # a1^a2 - &pxor ($B31,$B31); - &pxor ($B30,$B30); - &mov (&DWP(2*4,"esp"),$a2); # a2 - &xor ($a2,$a4); # a2^a4 - &mov (&DWP(3*4,"esp"),$a1); # a1^a2 - &pcmpgtd($B31,$A); # broadcast 31st bit - &paddd ($A,$A); # $A<<=1 - &xor ($a1,$a2); # a1^a4=a1^a2^a2^a4 - &mov (&DWP(4*4,"esp"),$a4); # a4 - &xor ($a4,$a2); # a2=a4^a2^a4 - &pand ($B31,$B); - &pcmpgtd($B30,$A); # broadcast 30th bit - &mov (&DWP(5*4,"esp"),$a1); # a1^a4 - &xor ($a4,$a1); # a1^a2^a4 - &psllq ($B31,31); - &pand ($B30,$B); - &mov (&DWP(6*4,"esp"),$a2); # a2^a4 - &mov (@i[0],0x7); - &mov (&DWP(7*4,"esp"),$a4); # a1^a2^a4 - &mov ($a4,@i[0]); - &and (@i[0],$b); - &shr ($b,3); - &mov (@i[1],$a4); - &psllq ($B30,30); - &and (@i[1],$b); - &shr ($b,3); - &movd ($R,&DWP(0,"esp",@i[0],4)); - &mov (@i[0],$a4); - &and (@i[0],$b); - &shr ($b,3); - for($n=1;$n<9;$n++) { - &movd (@T[1],&DWP(0,"esp",@i[1],4)); - &mov (@i[1],$a4); - &psllq (@T[1],3*$n); - &and (@i[1],$b); - &shr ($b,3); - &pxor ($R,@T[1]); - - push(@i,shift(@i)); push(@T,shift(@T)); - } - &movd (@T[1],&DWP(0,"esp",@i[1],4)); - &pxor ($R,$B30); - &psllq (@T[1],3*$n++); - &pxor ($R,@T[1]); - - &movd (@T[0],&DWP(0,"esp",@i[0],4)); - &pxor ($R,$B31); - &psllq (@T[0],3*$n); - &add ("esp",32+4); - &pxor ($R,@T[0]); - &ret (); -&function_end_B("_mul_1x1_mmx"); - } - -($lo,$hi)=("eax","edx"); -@T=("ecx","ebp"); - -&function_begin_B("_mul_1x1_ialu"); - &sub ("esp",32+4); - &mov ($a1,$a); - &lea ($a2,&DWP(0,$a,$a)); - &lea ($a4,&DWP(0,"",$a,4)); - &and ($a1,0x3fffffff); - &lea (@i[1],&DWP(0,$lo,$lo)); - &sar ($lo,31); # broadcast 31st bit - &mov (&DWP(0*4,"esp"),0); - &and ($a2,0x7fffffff); - &mov (&DWP(1*4,"esp"),$a1); # a1 - &xor ($a1,$a2); # a1^a2 - &mov (&DWP(2*4,"esp"),$a2); # a2 - &xor ($a2,$a4); # a2^a4 - &mov (&DWP(3*4,"esp"),$a1); # a1^a2 - &xor ($a1,$a2); # a1^a4=a1^a2^a2^a4 - &mov (&DWP(4*4,"esp"),$a4); # a4 - &xor ($a4,$a2); # a2=a4^a2^a4 - &mov (&DWP(5*4,"esp"),$a1); # a1^a4 - &xor ($a4,$a1); # a1^a2^a4 - &sar (@i[1],31); # broardcast 30th bit - &and ($lo,$b); - &mov (&DWP(6*4,"esp"),$a2); # a2^a4 - &and (@i[1],$b); - &mov (&DWP(7*4,"esp"),$a4); # a1^a2^a4 - &mov ($hi,$lo); - &shl ($lo,31); - &mov (@T[0],@i[1]); - &shr ($hi,1); - - &mov (@i[0],0x7); - &shl (@i[1],30); - &and (@i[0],$b); - &shr (@T[0],2); - &xor ($lo,@i[1]); - - &shr ($b,3); - &mov (@i[1],0x7); # 5-byte instruction!? - &and (@i[1],$b); - &shr ($b,3); - &xor ($hi,@T[0]); - &xor ($lo,&DWP(0,"esp",@i[0],4)); - &mov (@i[0],0x7); - &and (@i[0],$b); - &shr ($b,3); - for($n=1;$n<9;$n++) { - &mov (@T[1],&DWP(0,"esp",@i[1],4)); - &mov (@i[1],0x7); - &mov (@T[0],@T[1]); - &shl (@T[1],3*$n); - &and (@i[1],$b); - &shr (@T[0],32-3*$n); - &xor ($lo,@T[1]); - &shr ($b,3); - &xor ($hi,@T[0]); - - push(@i,shift(@i)); push(@T,shift(@T)); - } - &mov (@T[1],&DWP(0,"esp",@i[1],4)); - &mov (@T[0],@T[1]); - &shl (@T[1],3*$n); - &mov (@i[1],&DWP(0,"esp",@i[0],4)); - &shr (@T[0],32-3*$n); $n++; - &mov (@i[0],@i[1]); - &xor ($lo,@T[1]); - &shl (@i[1],3*$n); - &xor ($hi,@T[0]); - &shr (@i[0],32-3*$n); - &xor ($lo,@i[1]); - &xor ($hi,@i[0]); - - &add ("esp",32+4); - &ret (); -&function_end_B("_mul_1x1_ialu"); - -# void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0); -&function_begin_B("bn_GF2m_mul_2x2"); -if (!$x86only) { - &picmeup("edx","OPENSSL_ia32cap_P"); - &mov ("eax",&DWP(0,"edx")); - &mov ("edx",&DWP(4,"edx")); - &test ("eax",1<<23); # check MMX bit - &jz (&label("ialu")); -if ($sse2) { - &test ("eax",1<<24); # check FXSR bit - &jz (&label("mmx")); - &test ("edx",1<<1); # check PCLMULQDQ bit - &jz (&label("mmx")); - - &movups ("xmm0",&QWP(8,"esp")); - &shufps ("xmm0","xmm0",0b10110001); - &pclmulqdq ("xmm0","xmm0",1); - &mov ("eax",&DWP(4,"esp")); - &movups (&QWP(0,"eax"),"xmm0"); - &ret (); - -&set_label("mmx",16); -} - &push ("ebp"); - &push ("ebx"); - &push ("esi"); - &push ("edi"); - &mov ($a,&wparam(1)); - &mov ($b,&wparam(3)); - &call ("_mul_1x1_mmx"); # a1·b1 - &movq ("mm7",$R); - - &mov ($a,&wparam(2)); - &mov ($b,&wparam(4)); - &call ("_mul_1x1_mmx"); # a0·b0 - &movq ("mm6",$R); - - &mov ($a,&wparam(1)); - &mov ($b,&wparam(3)); - &xor ($a,&wparam(2)); - &xor ($b,&wparam(4)); - &call ("_mul_1x1_mmx"); # (a0+a1)·(b0+b1) - &pxor ($R,"mm7"); - &mov ($a,&wparam(0)); - &pxor ($R,"mm6"); # (a0+a1)·(b0+b1)-a1·b1-a0·b0 - - &movq ($A,$R); - &psllq ($R,32); - &pop ("edi"); - &psrlq ($A,32); - &pop ("esi"); - &pxor ($R,"mm6"); - &pop ("ebx"); - &pxor ($A,"mm7"); - &movq (&QWP(0,$a),$R); - &pop ("ebp"); - &movq (&QWP(8,$a),$A); - &emms (); - &ret (); -&set_label("ialu",16); -} - &push ("ebp"); - &push ("ebx"); - &push ("esi"); - &push ("edi"); - &stack_push(4+1); - - &mov ($a,&wparam(1)); - &mov ($b,&wparam(3)); - &call ("_mul_1x1_ialu"); # a1·b1 - &mov (&DWP(8,"esp"),$lo); - &mov (&DWP(12,"esp"),$hi); - - &mov ($a,&wparam(2)); - &mov ($b,&wparam(4)); - &call ("_mul_1x1_ialu"); # a0·b0 - &mov (&DWP(0,"esp"),$lo); - &mov (&DWP(4,"esp"),$hi); - - &mov ($a,&wparam(1)); - &mov ($b,&wparam(3)); - &xor ($a,&wparam(2)); - &xor ($b,&wparam(4)); - &call ("_mul_1x1_ialu"); # (a0+a1)·(b0+b1) - - &mov ("ebp",&wparam(0)); - @r=("ebx","ecx","edi","esi"); - &mov (@r[0],&DWP(0,"esp")); - &mov (@r[1],&DWP(4,"esp")); - &mov (@r[2],&DWP(8,"esp")); - &mov (@r[3],&DWP(12,"esp")); - - &xor ($lo,$hi); - &xor ($hi,@r[1]); - &xor ($lo,@r[0]); - &mov (&DWP(0,"ebp"),@r[0]); - &xor ($hi,@r[2]); - &mov (&DWP(12,"ebp"),@r[3]); - &xor ($lo,@r[3]); - &stack_pop(4+1); - &xor ($hi,@r[3]); - &pop ("edi"); - &xor ($lo,$hi); - &pop ("esi"); - &mov (&DWP(8,"ebp"),$hi); - &pop ("ebx"); - &mov (&DWP(4,"ebp"),$lo); - &pop ("ebp"); - &ret (); -&function_end_B("bn_GF2m_mul_2x2"); - -&asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by "); - -&asm_finish(); diff --git a/drivers/builtin_openssl2/crypto/bn/asm/x86-mont.pl b/drivers/builtin_openssl2/crypto/bn/asm/x86-mont.pl deleted file mode 100755 index e8f6b05084..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/x86-mont.pl +++ /dev/null @@ -1,593 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# October 2005 -# -# This is a "teaser" code, as it can be improved in several ways... -# First of all non-SSE2 path should be implemented (yes, for now it -# performs Montgomery multiplication/convolution only on SSE2-capable -# CPUs such as P4, others fall down to original code). Then inner loop -# can be unrolled and modulo-scheduled to improve ILP and possibly -# moved to 128-bit XMM register bank (though it would require input -# rearrangement and/or increase bus bandwidth utilization). Dedicated -# squaring procedure should give further performance improvement... -# Yet, for being draft, the code improves rsa512 *sign* benchmark by -# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-) - -# December 2006 -# -# Modulo-scheduling SSE2 loops results in further 15-20% improvement. -# Integer-only code [being equipped with dedicated squaring procedure] -# gives ~40% on rsa512 sign benchmark... - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],$0); - -$sse2=0; -for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } - -&external_label("OPENSSL_ia32cap_P") if ($sse2); - -&function_begin("bn_mul_mont"); - -$i="edx"; -$j="ecx"; -$ap="esi"; $tp="esi"; # overlapping variables!!! -$rp="edi"; $bp="edi"; # overlapping variables!!! -$np="ebp"; -$num="ebx"; - -$_num=&DWP(4*0,"esp"); # stack top layout -$_rp=&DWP(4*1,"esp"); -$_ap=&DWP(4*2,"esp"); -$_bp=&DWP(4*3,"esp"); -$_np=&DWP(4*4,"esp"); -$_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp"); -$_sp=&DWP(4*6,"esp"); -$_bpend=&DWP(4*7,"esp"); -$frame=32; # size of above frame rounded up to 16n - - &xor ("eax","eax"); - &mov ("edi",&wparam(5)); # int num - &cmp ("edi",4); - &jl (&label("just_leave")); - - &lea ("esi",&wparam(0)); # put aside pointer to argument block - &lea ("edx",&wparam(1)); # load ap - &mov ("ebp","esp"); # saved stack pointer! - &add ("edi",2); # extra two words on top of tp - &neg ("edi"); - &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2)) - &neg ("edi"); - - # minimize cache contention by arraning 2K window between stack - # pointer and ap argument [np is also position sensitive vector, - # but it's assumed to be near ap, as it's allocated at ~same - # time]. - &mov ("eax","esp"); - &sub ("eax","edx"); - &and ("eax",2047); - &sub ("esp","eax"); # this aligns sp and ap modulo 2048 - - &xor ("edx","esp"); - &and ("edx",2048); - &xor ("edx",2048); - &sub ("esp","edx"); # this splits them apart modulo 4096 - - &and ("esp",-64); # align to cache line - - ################################# load argument block... - &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp - &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap - &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp - &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np - &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0 - #&mov ("edi",&DWP(5*4,"esi"));# int num - - &mov ("esi",&DWP(0,"esi")); # pull n0[0] - &mov ($_rp,"eax"); # ... save a copy of argument block - &mov ($_ap,"ebx"); - &mov ($_bp,"ecx"); - &mov ($_np,"edx"); - &mov ($_n0,"esi"); - &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling - #&mov ($_num,$num); # redundant as $num is not reused - &mov ($_sp,"ebp"); # saved stack pointer! - -if($sse2) { -$acc0="mm0"; # mmx register bank layout -$acc1="mm1"; -$car0="mm2"; -$car1="mm3"; -$mul0="mm4"; -$mul1="mm5"; -$temp="mm6"; -$mask="mm7"; - - &picmeup("eax","OPENSSL_ia32cap_P"); - &bt (&DWP(0,"eax"),26); - &jnc (&label("non_sse2")); - - &mov ("eax",-1); - &movd ($mask,"eax"); # mask 32 lower bits - - &mov ($ap,$_ap); # load input pointers - &mov ($bp,$_bp); - &mov ($np,$_np); - - &xor ($i,$i); # i=0 - &xor ($j,$j); # j=0 - - &movd ($mul0,&DWP(0,$bp)); # bp[0] - &movd ($mul1,&DWP(0,$ap)); # ap[0] - &movd ($car1,&DWP(0,$np)); # np[0] - - &pmuludq($mul1,$mul0); # ap[0]*bp[0] - &movq ($car0,$mul1); - &movq ($acc0,$mul1); # I wish movd worked for - &pand ($acc0,$mask); # inter-register transfers - - &pmuludq($mul1,$_n0q); # *=n0 - - &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0 - &paddq ($car1,$acc0); - - &movd ($acc1,&DWP(4,$np)); # np[1] - &movd ($acc0,&DWP(4,$ap)); # ap[1] - - &psrlq ($car0,32); - &psrlq ($car1,32); - - &inc ($j); # j++ -&set_label("1st",16); - &pmuludq($acc0,$mul0); # ap[j]*bp[0] - &pmuludq($acc1,$mul1); # np[j]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &pand ($acc0,$mask); - &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1] - &paddq ($car1,$acc0); # +=ap[j]*bp[0]; - &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1] - &psrlq ($car0,32); - &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]= - &psrlq ($car1,32); - - &lea ($j,&DWP(1,$j)); - &cmp ($j,$num); - &jl (&label("1st")); - - &pmuludq($acc0,$mul0); # ap[num-1]*bp[0] - &pmuludq($acc1,$mul1); # np[num-1]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &pand ($acc0,$mask); - &paddq ($car1,$acc0); # +=ap[num-1]*bp[0]; - &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]= - - &psrlq ($car0,32); - &psrlq ($car1,32); - - &paddq ($car1,$car0); - &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1] - - &inc ($i); # i++ -&set_label("outer"); - &xor ($j,$j); # j=0 - - &movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i] - &movd ($mul1,&DWP(0,$ap)); # ap[0] - &movd ($temp,&DWP($frame,"esp")); # tp[0] - &movd ($car1,&DWP(0,$np)); # np[0] - &pmuludq($mul1,$mul0); # ap[0]*bp[i] - - &paddq ($mul1,$temp); # +=tp[0] - &movq ($acc0,$mul1); - &movq ($car0,$mul1); - &pand ($acc0,$mask); - - &pmuludq($mul1,$_n0q); # *=n0 - - &pmuludq($car1,$mul1); - &paddq ($car1,$acc0); - - &movd ($temp,&DWP($frame+4,"esp")); # tp[1] - &movd ($acc1,&DWP(4,$np)); # np[1] - &movd ($acc0,&DWP(4,$ap)); # ap[1] - - &psrlq ($car0,32); - &psrlq ($car1,32); - &paddq ($car0,$temp); # +=tp[1] - - &inc ($j); # j++ - &dec ($num); -&set_label("inner"); - &pmuludq($acc0,$mul0); # ap[j]*bp[i] - &pmuludq($acc1,$mul1); # np[j]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1] - &pand ($acc0,$mask); - &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1] - &paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j] - &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1] - &psrlq ($car0,32); - &movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]= - &psrlq ($car1,32); - &paddq ($car0,$temp); # +=tp[j+1] - - &dec ($num); - &lea ($j,&DWP(1,$j)); # j++ - &jnz (&label("inner")); - - &mov ($num,$j); - &pmuludq($acc0,$mul0); # ap[num-1]*bp[i] - &pmuludq($acc1,$mul1); # np[num-1]*m1 - &paddq ($car0,$acc0); # +=c0 - &paddq ($car1,$acc1); # +=c1 - - &movq ($acc0,$car0); - &pand ($acc0,$mask); - &paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1] - &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]= - &psrlq ($car0,32); - &psrlq ($car1,32); - - &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num] - &paddq ($car1,$car0); - &paddq ($car1,$temp); - &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1] - - &lea ($i,&DWP(1,$i)); # i++ - &cmp ($i,$num); - &jle (&label("outer")); - - &emms (); # done with mmx bank - &jmp (&label("common_tail")); - -&set_label("non_sse2",16); -} - -if (0) { - &mov ("esp",$_sp); - &xor ("eax","eax"); # signal "not fast enough [yet]" - &jmp (&label("just_leave")); - # While the below code provides competitive performance for - # all key lengthes on modern Intel cores, it's still more - # than 10% slower for 4096-bit key elsewhere:-( "Competitive" - # means compared to the original integer-only assembler. - # 512-bit RSA sign is better by ~40%, but that's about all - # one can say about all CPUs... -} else { -$inp="esi"; # integer path uses these registers differently -$word="edi"; -$carry="ebp"; - - &mov ($inp,$_ap); - &lea ($carry,&DWP(1,$num)); - &mov ($word,$_bp); - &xor ($j,$j); # j=0 - &mov ("edx",$inp); - &and ($carry,1); # see if num is even - &sub ("edx",$word); # see if ap==bp - &lea ("eax",&DWP(4,$word,$num,4)); # &bp[num] - &or ($carry,"edx"); - &mov ($word,&DWP(0,$word)); # bp[0] - &jz (&label("bn_sqr_mont")); - &mov ($_bpend,"eax"); - &mov ("eax",&DWP(0,$inp)); - &xor ("edx","edx"); - -&set_label("mull",16); - &mov ($carry,"edx"); - &mul ($word); # ap[j]*bp[0] - &add ($carry,"eax"); - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1] - &cmp ($j,$num); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &jl (&label("mull")); - - &mov ($carry,"edx"); - &mul ($word); # ap[num-1]*bp[0] - &mov ($word,$_n0); - &add ("eax",$carry); - &mov ($inp,$_np); - &adc ("edx",0); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - - &mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]= - &xor ($j,$j); - &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]= - &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]= - - &mov ("eax",&DWP(0,$inp)); # np[0] - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &mov ("eax",&DWP(4,$inp)); # np[1] - &adc ("edx",0); - &inc ($j); - - &jmp (&label("2ndmadd")); - -&set_label("1stmadd",16); - &mov ($carry,"edx"); - &mul ($word); # ap[j]*bp[i] - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1] - &adc ("edx",0); - &cmp ($j,$num); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &jl (&label("1stmadd")); - - &mov ($carry,"edx"); - &mul ($word); # ap[num-1]*bp[i] - &add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1] - &mov ($word,$_n0); - &adc ("edx",0); - &mov ($inp,$_np); - &add ($carry,"eax"); - &adc ("edx",0); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - - &xor ($j,$j); - &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num] - &mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]= - &adc ($j,0); - &mov ("eax",&DWP(0,$inp)); # np[0] - &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]= - &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]= - - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &mov ("eax",&DWP(4,$inp)); # np[1] - &adc ("edx",0); - &mov ($j,1); - -&set_label("2ndmadd",16); - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1] - &adc ("edx",0); - &cmp ($j,$num); - &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]= - &jl (&label("2ndmadd")); - - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1] - &adc ("edx",0); - &add ($carry,"eax"); - &adc ("edx",0); - &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]= - - &xor ("eax","eax"); - &mov ($j,$_bp); # &bp[i] - &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num] - &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1] - &lea ($j,&DWP(4,$j)); - &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]= - &cmp ($j,$_bpend); - &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]= - &je (&label("common_tail")); - - &mov ($word,&DWP(0,$j)); # bp[i+1] - &mov ($inp,$_ap); - &mov ($_bp,$j); # &bp[++i] - &xor ($j,$j); - &xor ("edx","edx"); - &mov ("eax",&DWP(0,$inp)); - &jmp (&label("1stmadd")); - -&set_label("bn_sqr_mont",16); -$sbit=$num; - &mov ($_num,$num); - &mov ($_bp,$j); # i=0 - - &mov ("eax",$word); # ap[0] - &mul ($word); # ap[0]*ap[0] - &mov (&DWP($frame,"esp"),"eax"); # tp[0]= - &mov ($sbit,"edx"); - &shr ("edx",1); - &and ($sbit,1); - &inc ($j); -&set_label("sqr",16); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j] - &mov ($carry,"edx"); - &mul ($word); # ap[j]*ap[0] - &add ("eax",$carry); - &lea ($j,&DWP(1,$j)); - &adc ("edx",0); - &lea ($carry,&DWP(0,$sbit,"eax",2)); - &shr ("eax",31); - &cmp ($j,$_num); - &mov ($sbit,"eax"); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &jl (&label("sqr")); - - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1] - &mov ($carry,"edx"); - &mul ($word); # ap[num-1]*ap[0] - &add ("eax",$carry); - &mov ($word,$_n0); - &adc ("edx",0); - &mov ($inp,$_np); - &lea ($carry,&DWP(0,$sbit,"eax",2)); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - &shr ("eax",31); - &mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]= - - &lea ($carry,&DWP(0,"eax","edx",2)); - &mov ("eax",&DWP(0,$inp)); # np[0] - &shr ("edx",31); - &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]= - &mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]= - - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &mov ($num,$j); - &adc ("edx",0); - &mov ("eax",&DWP(4,$inp)); # np[1] - &mov ($j,1); - -&set_label("3rdmadd",16); - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1] - &adc ("edx",0); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]= - - &mov ($carry,"edx"); - &mul ($word); # np[j+1]*m - &add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1] - &lea ($j,&DWP(2,$j)); - &adc ("edx",0); - &add ($carry,"eax"); - &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2] - &adc ("edx",0); - &cmp ($j,$num); - &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]= - &jl (&label("3rdmadd")); - - &mov ($carry,"edx"); - &mul ($word); # np[j]*m - &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1] - &adc ("edx",0); - &add ($carry,"eax"); - &adc ("edx",0); - &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]= - - &mov ($j,$_bp); # i - &xor ("eax","eax"); - &mov ($inp,$_ap); - &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num] - &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1] - &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]= - &cmp ($j,$num); - &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]= - &je (&label("common_tail")); - - &mov ($word,&DWP(4,$inp,$j,4)); # ap[i] - &lea ($j,&DWP(1,$j)); - &mov ("eax",$word); - &mov ($_bp,$j); # ++i - &mul ($word); # ap[i]*ap[i] - &add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i] - &adc ("edx",0); - &mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]= - &xor ($carry,$carry); - &cmp ($j,$num); - &lea ($j,&DWP(1,$j)); - &je (&label("sqrlast")); - - &mov ($sbit,"edx"); # zaps $num - &shr ("edx",1); - &and ($sbit,1); -&set_label("sqradd",16); - &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j] - &mov ($carry,"edx"); - &mul ($word); # ap[j]*ap[i] - &add ("eax",$carry); - &lea ($carry,&DWP(0,"eax","eax")); - &adc ("edx",0); - &shr ("eax",31); - &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j] - &lea ($j,&DWP(1,$j)); - &adc ("eax",0); - &add ($carry,$sbit); - &adc ("eax",0); - &cmp ($j,$_num); - &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]= - &mov ($sbit,"eax"); - &jle (&label("sqradd")); - - &mov ($carry,"edx"); - &add ("edx","edx"); - &shr ($carry,31); - &add ("edx",$sbit); - &adc ($carry,0); -&set_label("sqrlast"); - &mov ($word,$_n0); - &mov ($inp,$_np); - &imul ($word,&DWP($frame,"esp")); # n0*tp[0] - - &add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num] - &mov ("eax",&DWP(0,$inp)); # np[0] - &adc ($carry,0); - &mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]= - &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]= - - &mul ($word); # np[0]*m - &add ("eax",&DWP($frame,"esp")); # +=tp[0] - &lea ($num,&DWP(-1,$j)); - &adc ("edx",0); - &mov ($j,1); - &mov ("eax",&DWP(4,$inp)); # np[1] - - &jmp (&label("3rdmadd")); -} - -&set_label("common_tail",16); - &mov ($np,$_np); # load modulus pointer - &mov ($rp,$_rp); # load result pointer - &lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped] - - &mov ("eax",&DWP(0,$tp)); # tp[0] - &mov ($j,$num); # j=num-1 - &xor ($i,$i); # i=0 and clear CF! - -&set_label("sub",16); - &sbb ("eax",&DWP(0,$np,$i,4)); - &mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i] - &dec ($j); # doesn't affect CF! - &mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1] - &lea ($i,&DWP(1,$i)); # i++ - &jge (&label("sub")); - - &sbb ("eax",0); # handle upmost overflow bit - &and ($tp,"eax"); - ¬ ("eax"); - &mov ($np,$rp); - &and ($np,"eax"); - &or ($tp,$np); # tp=carry?tp:rp - -&set_label("copy",16); # copy or in-place refresh - &mov ("eax",&DWP(0,$tp,$num,4)); - &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i] - &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector - &dec ($num); - &jge (&label("copy")); - - &mov ("esp",$_sp); # pull saved stack pointer - &mov ("eax",1); -&set_label("just_leave"); -&function_end("bn_mul_mont"); - -&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by "); - -&asm_finish(); diff --git a/drivers/builtin_openssl2/crypto/bn/asm/x86.pl b/drivers/builtin_openssl2/crypto/bn/asm/x86.pl deleted file mode 100644 index 1bc4f1bb27..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/x86.pl +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/local/bin/perl - -push(@INC,"perlasm","../../perlasm"); -require "x86asm.pl"; - -require("x86/mul_add.pl"); -require("x86/mul.pl"); -require("x86/sqr.pl"); -require("x86/div.pl"); -require("x86/add.pl"); -require("x86/sub.pl"); -require("x86/comba.pl"); - -&asm_init($ARGV[0],$0); - -&bn_mul_add_words("bn_mul_add_words"); -&bn_mul_words("bn_mul_words"); -&bn_sqr_words("bn_sqr_words"); -&bn_div_words("bn_div_words"); -&bn_add_words("bn_add_words"); -&bn_sub_words("bn_sub_words"); -&bn_mul_comba("bn_mul_comba8",8); -&bn_mul_comba("bn_mul_comba4",4); -&bn_sqr_comba("bn_sqr_comba8",8); -&bn_sqr_comba("bn_sqr_comba4",4); - -&asm_finish(); - diff --git a/drivers/builtin_openssl2/crypto/bn/asm/x86/add.pl b/drivers/builtin_openssl2/crypto/bn/asm/x86/add.pl deleted file mode 100644 index 0b5cf583e3..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/x86/add.pl +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub bn_add_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $a="esi"; - $b="edi"; - $c="eax"; - $r="ebx"; - $tmp1="ecx"; - $tmp2="edx"; - $num="ebp"; - - &mov($r,&wparam(0)); # get r - &mov($a,&wparam(1)); # get a - &mov($b,&wparam(2)); # get b - &mov($num,&wparam(3)); # get num - &xor($c,$c); # clear carry - &and($num,0xfffffff8); # num / 8 - - &jz(&label("aw_finish")); - - &set_label("aw_loop",0); - for ($i=0; $i<8; $i++) - { - &comment("Round $i"); - - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0)); # *b - &add($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &add($tmp1,$tmp2); - &adc($c,0); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - } - - &comment(""); - &add($a,32); - &add($b,32); - &add($r,32); - &sub($num,8); - &jnz(&label("aw_loop")); - - &set_label("aw_finish",0); - &mov($num,&wparam(3)); # get num - &and($num,7); - &jz(&label("aw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0));# *b - &add($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &add($tmp1,$tmp2); - &adc($c,0); - &dec($num) if ($i != 6); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *a - &jz(&label("aw_end")) if ($i != 6); - } - &set_label("aw_end",0); - -# &mov("eax",$c); # $c is "eax" - - &function_end($name); - } - -1; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/x86/comba.pl b/drivers/builtin_openssl2/crypto/bn/asm/x86/comba.pl deleted file mode 100644 index 2291253629..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/x86/comba.pl +++ /dev/null @@ -1,277 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub mul_add_c - { - local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; - - # pos == -1 if eax and edx are pre-loaded, 0 to load from next - # words, and 1 if load return value - - &comment("mul a[$ai]*b[$bi]"); - - # "eax" and "edx" will always be pre-loaded. - # &mov("eax",&DWP($ai*4,$a,"",0)) ; - # &mov("edx",&DWP($bi*4,$b,"",0)); - - &mul("edx"); - &add($c0,"eax"); - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a - &mov("eax",&wparam(0)) if $pos > 0; # load r[] - ### - &adc($c1,"edx"); - &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b - &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b - ### - &adc($c2,0); - # is pos > 1, it means it is the last loop - &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a - } - -sub sqr_add_c - { - local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; - - # pos == -1 if eax and edx are pre-loaded, 0 to load from next - # words, and 1 if load return value - - &comment("sqr a[$ai]*a[$bi]"); - - # "eax" and "edx" will always be pre-loaded. - # &mov("eax",&DWP($ai*4,$a,"",0)) ; - # &mov("edx",&DWP($bi*4,$b,"",0)); - - if ($ai == $bi) - { &mul("eax");} - else - { &mul("edx");} - &add($c0,"eax"); - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a - ### - &adc($c1,"edx"); - &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); - ### - &adc($c2,0); - # is pos > 1, it means it is the last loop - &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b - } - -sub sqr_add_c2 - { - local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; - - # pos == -1 if eax and edx are pre-loaded, 0 to load from next - # words, and 1 if load return value - - &comment("sqr a[$ai]*a[$bi]"); - - # "eax" and "edx" will always be pre-loaded. - # &mov("eax",&DWP($ai*4,$a,"",0)) ; - # &mov("edx",&DWP($bi*4,$a,"",0)); - - if ($ai == $bi) - { &mul("eax");} - else - { &mul("edx");} - &add("eax","eax"); - ### - &adc("edx","edx"); - ### - &adc($c2,0); - &add($c0,"eax"); - &adc($c1,"edx"); - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a - &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b - &adc($c2,0); - &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; - &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); - ### - } - -sub bn_mul_comba - { - local($name,$num)=@_; - local($a,$b,$c0,$c1,$c2); - local($i,$as,$ae,$bs,$be,$ai,$bi); - local($tot,$end); - - &function_begin_B($name,""); - - $c0="ebx"; - $c1="ecx"; - $c2="ebp"; - $a="esi"; - $b="edi"; - - $as=0; - $ae=0; - $bs=0; - $be=0; - $tot=$num+$num-1; - - &push("esi"); - &mov($a,&wparam(1)); - &push("edi"); - &mov($b,&wparam(2)); - &push("ebp"); - &push("ebx"); - - &xor($c0,$c0); - &mov("eax",&DWP(0,$a,"",0)); # load the first word - &xor($c1,$c1); - &mov("edx",&DWP(0,$b,"",0)); # load the first second - - for ($i=0; $i<$tot; $i++) - { - $ai=$as; - $bi=$bs; - $end=$be+1; - - &comment("################## Calculate word $i"); - - for ($j=$bs; $j<$end; $j++) - { - &xor($c2,$c2) if ($j == $bs); - if (($j+1) == $end) - { - $v=1; - $v=2 if (($i+1) == $tot); - } - else - { $v=0; } - if (($j+1) != $end) - { - $na=($ai-1); - $nb=($bi+1); - } - else - { - $na=$as+($i < ($num-1)); - $nb=$bs+($i >= ($num-1)); - } -#printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; - &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); - if ($v) - { - &comment("saved r[$i]"); - # &mov("eax",&wparam(0)); - # &mov(&DWP($i*4,"eax","",0),$c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - } - $ai--; - $bi++; - } - $as++ if ($i < ($num-1)); - $ae++ if ($i >= ($num-1)); - - $bs++ if ($i >= ($num-1)); - $be++ if ($i < ($num-1)); - } - &comment("save r[$i]"); - # &mov("eax",&wparam(0)); - &mov(&DWP($i*4,"eax","",0),$c0); - - &pop("ebx"); - &pop("ebp"); - &pop("edi"); - &pop("esi"); - &ret(); - &function_end_B($name); - } - -sub bn_sqr_comba - { - local($name,$num)=@_; - local($r,$a,$c0,$c1,$c2)=@_; - local($i,$as,$ae,$bs,$be,$ai,$bi); - local($b,$tot,$end,$half); - - &function_begin_B($name,""); - - $c0="ebx"; - $c1="ecx"; - $c2="ebp"; - $a="esi"; - $r="edi"; - - &push("esi"); - &push("edi"); - &push("ebp"); - &push("ebx"); - &mov($r,&wparam(0)); - &mov($a,&wparam(1)); - &xor($c0,$c0); - &xor($c1,$c1); - &mov("eax",&DWP(0,$a,"",0)); # load the first word - - $as=0; - $ae=0; - $bs=0; - $be=0; - $tot=$num+$num-1; - - for ($i=0; $i<$tot; $i++) - { - $ai=$as; - $bi=$bs; - $end=$be+1; - - &comment("############### Calculate word $i"); - for ($j=$bs; $j<$end; $j++) - { - &xor($c2,$c2) if ($j == $bs); - if (($ai-1) < ($bi+1)) - { - $v=1; - $v=2 if ($i+1) == $tot; - } - else - { $v=0; } - if (!$v) - { - $na=$ai-1; - $nb=$bi+1; - } - else - { - $na=$as+($i < ($num-1)); - $nb=$bs+($i >= ($num-1)); - } - if ($ai == $bi) - { - &sqr_add_c($r,$a,$ai,$bi, - $c0,$c1,$c2,$v,$i,$na,$nb); - } - else - { - &sqr_add_c2($r,$a,$ai,$bi, - $c0,$c1,$c2,$v,$i,$na,$nb); - } - if ($v) - { - &comment("saved r[$i]"); - #&mov(&DWP($i*4,$r,"",0),$c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - last; - } - $ai--; - $bi++; - } - $as++ if ($i < ($num-1)); - $ae++ if ($i >= ($num-1)); - - $bs++ if ($i >= ($num-1)); - $be++ if ($i < ($num-1)); - } - &mov(&DWP($i*4,$r,"",0),$c0); - &pop("ebx"); - &pop("ebp"); - &pop("edi"); - &pop("esi"); - &ret(); - &function_end_B($name); - } - -1; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/x86/div.pl b/drivers/builtin_openssl2/crypto/bn/asm/x86/div.pl deleted file mode 100644 index 0e90152caa..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/x86/div.pl +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub bn_div_words - { - local($name)=@_; - - &function_begin($name,""); - &mov("edx",&wparam(0)); # - &mov("eax",&wparam(1)); # - &mov("ebx",&wparam(2)); # - &div("ebx"); - &function_end($name); - } -1; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/x86/mul.pl b/drivers/builtin_openssl2/crypto/bn/asm/x86/mul.pl deleted file mode 100644 index 674cb9b055..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/x86/mul.pl +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub bn_mul_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $Low="eax"; - $High="edx"; - $a="ebx"; - $w="ecx"; - $r="edi"; - $c="esi"; - $num="ebp"; - - &xor($c,$c); # clear carry - &mov($r,&wparam(0)); # - &mov($a,&wparam(1)); # - &mov($num,&wparam(2)); # - &mov($w,&wparam(3)); # - - &and($num,0xfffffff8); # num / 8 - &jz(&label("mw_finish")); - - &set_label("mw_loop",0); - for ($i=0; $i<32; $i+=4) - { - &comment("Round $i"); - - &mov("eax",&DWP($i,$a,"",0)); # *a - &mul($w); # *a * w - &add("eax",$c); # L(t)+=c - # XXX - - &adc("edx",0); # H(t)+=carry - &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); - - &mov($c,"edx"); # c= H(t); - } - - &comment(""); - &add($a,32); - &add($r,32); - &sub($num,8); - &jz(&label("mw_finish")); - &jmp(&label("mw_loop")); - - &set_label("mw_finish",0); - &mov($num,&wparam(2)); # get num - &and($num,7); - &jnz(&label("mw_finish2")); - &jmp(&label("mw_end")); - - &set_label("mw_finish2",1); - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov("eax",&DWP($i*4,$a,"",0));# *a - &mul($w); # *a * w - &add("eax",$c); # L(t)+=c - # XXX - &adc("edx",0); # H(t)+=carry - &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); - &mov($c,"edx"); # c= H(t); - &dec($num) if ($i != 7-1); - &jz(&label("mw_end")) if ($i != 7-1); - } - &set_label("mw_end",0); - &mov("eax",$c); - - &function_end($name); - } - -1; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/x86/mul_add.pl b/drivers/builtin_openssl2/crypto/bn/asm/x86/mul_add.pl deleted file mode 100644 index 61830d3a90..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/x86/mul_add.pl +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub bn_mul_add_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $Low="eax"; - $High="edx"; - $a="ebx"; - $w="ebp"; - $r="edi"; - $c="esi"; - - &xor($c,$c); # clear carry - &mov($r,&wparam(0)); # - - &mov("ecx",&wparam(2)); # - &mov($a,&wparam(1)); # - - &and("ecx",0xfffffff8); # num / 8 - &mov($w,&wparam(3)); # - - &push("ecx"); # Up the stack for a tmp variable - - &jz(&label("maw_finish")); - - &set_label("maw_loop",0); - - &mov(&swtmp(0),"ecx"); # - - for ($i=0; $i<32; $i+=4) - { - &comment("Round $i"); - - &mov("eax",&DWP($i,$a,"",0)); # *a - &mul($w); # *a * w - &add("eax",$c); # L(t)+= *r - &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r - &adc("edx",0); # H(t)+=carry - &add("eax",$c); # L(t)+=c - &adc("edx",0); # H(t)+=carry - &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); - &mov($c,"edx"); # c= H(t); - } - - &comment(""); - &mov("ecx",&swtmp(0)); # - &add($a,32); - &add($r,32); - &sub("ecx",8); - &jnz(&label("maw_loop")); - - &set_label("maw_finish",0); - &mov("ecx",&wparam(2)); # get num - &and("ecx",7); - &jnz(&label("maw_finish2")); # helps branch prediction - &jmp(&label("maw_end")); - - &set_label("maw_finish2",1); - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov("eax",&DWP($i*4,$a,"",0));# *a - &mul($w); # *a * w - &add("eax",$c); # L(t)+=c - &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r - &adc("edx",0); # H(t)+=carry - &add("eax",$c); - &adc("edx",0); # H(t)+=carry - &dec("ecx") if ($i != 7-1); - &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); - &mov($c,"edx"); # c= H(t); - &jz(&label("maw_end")) if ($i != 7-1); - } - &set_label("maw_end",0); - &mov("eax",$c); - - &pop("ecx"); # clear variable from - - &function_end($name); - } - -1; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/x86/sqr.pl b/drivers/builtin_openssl2/crypto/bn/asm/x86/sqr.pl deleted file mode 100644 index 1f90993cf6..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/x86/sqr.pl +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub bn_sqr_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $r="esi"; - $a="edi"; - $num="ebx"; - - &mov($r,&wparam(0)); # - &mov($a,&wparam(1)); # - &mov($num,&wparam(2)); # - - &and($num,0xfffffff8); # num / 8 - &jz(&label("sw_finish")); - - &set_label("sw_loop",0); - for ($i=0; $i<32; $i+=4) - { - &comment("Round $i"); - &mov("eax",&DWP($i,$a,"",0)); # *a - # XXX - &mul("eax"); # *a * *a - &mov(&DWP($i*2,$r,"",0),"eax"); # - &mov(&DWP($i*2+4,$r,"",0),"edx");# - } - - &comment(""); - &add($a,32); - &add($r,64); - &sub($num,8); - &jnz(&label("sw_loop")); - - &set_label("sw_finish",0); - &mov($num,&wparam(2)); # get num - &and($num,7); - &jz(&label("sw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov("eax",&DWP($i*4,$a,"",0)); # *a - # XXX - &mul("eax"); # *a * *a - &mov(&DWP($i*8,$r,"",0),"eax"); # - &dec($num) if ($i != 7-1); - &mov(&DWP($i*8+4,$r,"",0),"edx"); - &jz(&label("sw_end")) if ($i != 7-1); - } - &set_label("sw_end",0); - - &function_end($name); - } - -1; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/x86/sub.pl b/drivers/builtin_openssl2/crypto/bn/asm/x86/sub.pl deleted file mode 100644 index 837b0e1b07..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/x86/sub.pl +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/local/bin/perl -# x86 assember - -sub bn_sub_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $a="esi"; - $b="edi"; - $c="eax"; - $r="ebx"; - $tmp1="ecx"; - $tmp2="edx"; - $num="ebp"; - - &mov($r,&wparam(0)); # get r - &mov($a,&wparam(1)); # get a - &mov($b,&wparam(2)); # get b - &mov($num,&wparam(3)); # get num - &xor($c,$c); # clear carry - &and($num,0xfffffff8); # num / 8 - - &jz(&label("aw_finish")); - - &set_label("aw_loop",0); - for ($i=0; $i<8; $i++) - { - &comment("Round $i"); - - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0)); # *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - } - - &comment(""); - &add($a,32); - &add($b,32); - &add($r,32); - &sub($num,8); - &jnz(&label("aw_loop")); - - &set_label("aw_finish",0); - &mov($num,&wparam(3)); # get num - &and($num,7); - &jz(&label("aw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0));# *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &dec($num) if ($i != 6); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *a - &jz(&label("aw_end")) if ($i != 6); - } - &set_label("aw_end",0); - -# &mov("eax",$c); # $c is "eax" - - &function_end($name); - } - -1; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/x86_64-gcc.c b/drivers/builtin_openssl2/crypto/bn/asm/x86_64-gcc.c index 0a5bb285a1..d77dc433d4 100644 --- a/drivers/builtin_openssl2/crypto/bn/asm/x86_64-gcc.c +++ b/drivers/builtin_openssl2/crypto/bn/asm/x86_64-gcc.c @@ -55,7 +55,7 @@ * machine. */ -# ifdef _WIN64 +# if defined(_WIN64) || !defined(__LP64__) # define BN_ULONG unsigned long long # else # define BN_ULONG unsigned long @@ -63,7 +63,6 @@ # undef mul # undef mul_add -# undef sqr /*- * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; @@ -99,8 +98,8 @@ : "cc"); \ (r)=carry, carry=high; \ } while (0) - -# define sqr(r0,r1,a) \ +# undef sqr +# define sqr(r0,r1,a) \ asm ("mulq %2" \ : "=a"(r0),"=d"(r1) \ : "a"(a) \ @@ -204,20 +203,22 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, int n) { - BN_ULONG ret = 0, i = 0; + BN_ULONG ret; + size_t i = 0; if (n <= 0) return 0; - asm volatile (" subq %2,%2 \n" + asm volatile (" subq %0,%0 \n" /* clear carry */ + " jmp 1f \n" ".p2align 4 \n" "1: movq (%4,%2,8),%0 \n" " adcq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" - " leaq 1(%2),%2 \n" + " lea 1(%2),%2 \n" " loop 1b \n" - " sbbq %0,%0 \n":"=&a" (ret), "+c"(n), - "=&r"(i) + " sbbq %0,%0 \n":"=&r" (ret), "+c"(n), + "+r"(i) :"r"(rp), "r"(ap), "r"(bp) :"cc", "memory"); @@ -228,20 +229,22 @@ BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, int n) { - BN_ULONG ret = 0, i = 0; + BN_ULONG ret; + size_t i = 0; if (n <= 0) return 0; - asm volatile (" subq %2,%2 \n" + asm volatile (" subq %0,%0 \n" /* clear borrow */ + " jmp 1f \n" ".p2align 4 \n" "1: movq (%4,%2,8),%0 \n" " sbbq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" - " leaq 1(%2),%2 \n" + " lea 1(%2),%2 \n" " loop 1b \n" - " sbbq %0,%0 \n":"=&a" (ret), "+c"(n), - "=&r"(i) + " sbbq %0,%0 \n":"=&r" (ret), "+c"(n), + "+r"(i) :"r"(rp), "r"(ap), "r"(bp) :"cc", "memory"); @@ -313,55 +316,58 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) */ # if 0 /* original macros are kept for reference purposes */ -# define mul_add_c(a,b,c0,c1,c2) { \ - BN_ULONG ta=(a),tb=(b); \ - t1 = ta * tb; \ - t2 = BN_UMULT_HIGH(ta,tb); \ - c0 += t1; t2 += (c0 for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# May 2011 -# -# The module implements bn_GF2m_mul_2x2 polynomial multiplication used -# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for -# the time being... Except that it has two code paths: code suitable -# for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and -# later. Improvement varies from one benchmark and µ-arch to another. -# Vanilla code path is at most 20% faster than compiler-generated code -# [not very impressive], while PCLMULQDQ - whole 85%-160% better on -# 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that -# these coefficients are not ones for bn_GF2m_mul_2x2 itself, as not -# all CPU time is burnt in it... - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -($lo,$hi)=("%rax","%rdx"); $a=$lo; -($i0,$i1)=("%rsi","%rdi"); -($t0,$t1)=("%rbx","%rcx"); -($b,$mask)=("%rbp","%r8"); -($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(9..15)); -($R,$Tx)=("%xmm0","%xmm1"); - -$code.=<<___; -.text - -.type _mul_1x1,\@abi-omnipotent -.align 16 -_mul_1x1: - sub \$128+8,%rsp - mov \$-1,$a1 - lea ($a,$a),$i0 - shr \$3,$a1 - lea (,$a,4),$i1 - and $a,$a1 # a1=a&0x1fffffffffffffff - lea (,$a,8),$a8 - sar \$63,$a # broadcast 63rd bit - lea ($a1,$a1),$a2 - sar \$63,$i0 # broadcast 62nd bit - lea (,$a1,4),$a4 - and $b,$a - sar \$63,$i1 # boardcast 61st bit - mov $a,$hi # $a is $lo - shl \$63,$lo - and $b,$i0 - shr \$1,$hi - mov $i0,$t1 - shl \$62,$i0 - and $b,$i1 - shr \$2,$t1 - xor $i0,$lo - mov $i1,$t0 - shl \$61,$i1 - xor $t1,$hi - shr \$3,$t0 - xor $i1,$lo - xor $t0,$hi - - mov $a1,$a12 - movq \$0,0(%rsp) # tab[0]=0 - xor $a2,$a12 # a1^a2 - mov $a1,8(%rsp) # tab[1]=a1 - mov $a4,$a48 - mov $a2,16(%rsp) # tab[2]=a2 - xor $a8,$a48 # a4^a8 - mov $a12,24(%rsp) # tab[3]=a1^a2 - - xor $a4,$a1 - mov $a4,32(%rsp) # tab[4]=a4 - xor $a4,$a2 - mov $a1,40(%rsp) # tab[5]=a1^a4 - xor $a4,$a12 - mov $a2,48(%rsp) # tab[6]=a2^a4 - xor $a48,$a1 # a1^a4^a4^a8=a1^a8 - mov $a12,56(%rsp) # tab[7]=a1^a2^a4 - xor $a48,$a2 # a2^a4^a4^a8=a1^a8 - - mov $a8,64(%rsp) # tab[8]=a8 - xor $a48,$a12 # a1^a2^a4^a4^a8=a1^a2^a8 - mov $a1,72(%rsp) # tab[9]=a1^a8 - xor $a4,$a1 # a1^a8^a4 - mov $a2,80(%rsp) # tab[10]=a2^a8 - xor $a4,$a2 # a2^a8^a4 - mov $a12,88(%rsp) # tab[11]=a1^a2^a8 - - xor $a4,$a12 # a1^a2^a8^a4 - mov $a48,96(%rsp) # tab[12]=a4^a8 - mov $mask,$i0 - mov $a1,104(%rsp) # tab[13]=a1^a4^a8 - and $b,$i0 - mov $a2,112(%rsp) # tab[14]=a2^a4^a8 - shr \$4,$b - mov $a12,120(%rsp) # tab[15]=a1^a2^a4^a8 - mov $mask,$i1 - and $b,$i1 - shr \$4,$b - - movq (%rsp,$i0,8),$R # half of calculations is done in SSE2 - mov $mask,$i0 - and $b,$i0 - shr \$4,$b -___ - for ($n=1;$n<8;$n++) { - $code.=<<___; - mov (%rsp,$i1,8),$t1 - mov $mask,$i1 - mov $t1,$t0 - shl \$`8*$n-4`,$t1 - and $b,$i1 - movq (%rsp,$i0,8),$Tx - shr \$`64-(8*$n-4)`,$t0 - xor $t1,$lo - pslldq \$$n,$Tx - mov $mask,$i0 - shr \$4,$b - xor $t0,$hi - and $b,$i0 - shr \$4,$b - pxor $Tx,$R -___ - } -$code.=<<___; - mov (%rsp,$i1,8),$t1 - mov $t1,$t0 - shl \$`8*$n-4`,$t1 - movq $R,$i0 - shr \$`64-(8*$n-4)`,$t0 - xor $t1,$lo - psrldq \$8,$R - xor $t0,$hi - movq $R,$i1 - xor $i0,$lo - xor $i1,$hi - - add \$128+8,%rsp - ret -.Lend_mul_1x1: -.size _mul_1x1,.-_mul_1x1 -___ - -($rp,$a1,$a0,$b1,$b0) = $win64? ("%rcx","%rdx","%r8", "%r9","%r10") : # Win64 order - ("%rdi","%rsi","%rdx","%rcx","%r8"); # Unix order - -$code.=<<___; -.extern OPENSSL_ia32cap_P -.globl bn_GF2m_mul_2x2 -.type bn_GF2m_mul_2x2,\@abi-omnipotent -.align 16 -bn_GF2m_mul_2x2: - mov OPENSSL_ia32cap_P(%rip),%rax - bt \$33,%rax - jnc .Lvanilla_mul_2x2 - - movq $a1,%xmm0 - movq $b1,%xmm1 - movq $a0,%xmm2 -___ -$code.=<<___ if ($win64); - movq 40(%rsp),%xmm3 -___ -$code.=<<___ if (!$win64); - movq $b0,%xmm3 -___ -$code.=<<___; - movdqa %xmm0,%xmm4 - movdqa %xmm1,%xmm5 - pclmulqdq \$0,%xmm1,%xmm0 # a1·b1 - pxor %xmm2,%xmm4 - pxor %xmm3,%xmm5 - pclmulqdq \$0,%xmm3,%xmm2 # a0·b0 - pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)·(b0+b1) - xorps %xmm0,%xmm4 - xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1·b1 - movdqa %xmm4,%xmm5 - pslldq \$8,%xmm4 - psrldq \$8,%xmm5 - pxor %xmm4,%xmm2 - pxor %xmm5,%xmm0 - movdqu %xmm2,0($rp) - movdqu %xmm0,16($rp) - ret - -.align 16 -.Lvanilla_mul_2x2: - lea -8*17(%rsp),%rsp -___ -$code.=<<___ if ($win64); - mov `8*17+40`(%rsp),$b0 - mov %rdi,8*15(%rsp) - mov %rsi,8*16(%rsp) -___ -$code.=<<___; - mov %r14,8*10(%rsp) - mov %r13,8*11(%rsp) - mov %r12,8*12(%rsp) - mov %rbp,8*13(%rsp) - mov %rbx,8*14(%rsp) -.Lbody_mul_2x2: - mov $rp,32(%rsp) # save the arguments - mov $a1,40(%rsp) - mov $a0,48(%rsp) - mov $b1,56(%rsp) - mov $b0,64(%rsp) - - mov \$0xf,$mask - mov $a1,$a - mov $b1,$b - call _mul_1x1 # a1·b1 - mov $lo,16(%rsp) - mov $hi,24(%rsp) - - mov 48(%rsp),$a - mov 64(%rsp),$b - call _mul_1x1 # a0·b0 - mov $lo,0(%rsp) - mov $hi,8(%rsp) - - mov 40(%rsp),$a - mov 56(%rsp),$b - xor 48(%rsp),$a - xor 64(%rsp),$b - call _mul_1x1 # (a0+a1)·(b0+b1) -___ - @r=("%rbx","%rcx","%rdi","%rsi"); -$code.=<<___; - mov 0(%rsp),@r[0] - mov 8(%rsp),@r[1] - mov 16(%rsp),@r[2] - mov 24(%rsp),@r[3] - mov 32(%rsp),%rbp - - xor $hi,$lo - xor @r[1],$hi - xor @r[0],$lo - mov @r[0],0(%rbp) - xor @r[2],$hi - mov @r[3],24(%rbp) - xor @r[3],$lo - xor @r[3],$hi - xor $hi,$lo - mov $hi,16(%rbp) - mov $lo,8(%rbp) - - mov 8*10(%rsp),%r14 - mov 8*11(%rsp),%r13 - mov 8*12(%rsp),%r12 - mov 8*13(%rsp),%rbp - mov 8*14(%rsp),%rbx -___ -$code.=<<___ if ($win64); - mov 8*15(%rsp),%rdi - mov 8*16(%rsp),%rsi -___ -$code.=<<___; - lea 8*17(%rsp),%rsp - ret -.Lend_mul_2x2: -.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 -.asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by " -.align 16 -___ - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind - -.type se_handler,\@abi-omnipotent -.align 16 -se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 152($context),%rax # pull context->Rsp - mov 248($context),%rbx # pull context->Rip - - lea .Lbody_mul_2x2(%rip),%r10 - cmp %r10,%rbx # context->Rip<"prologue" label - jb .Lin_prologue - - mov 8*10(%rax),%r14 # mimic epilogue - mov 8*11(%rax),%r13 - mov 8*12(%rax),%r12 - mov 8*13(%rax),%rbp - mov 8*14(%rax),%rbx - mov 8*15(%rax),%rdi - mov 8*16(%rax),%rsi - - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - -.Lin_prologue: - lea 8*17(%rax),%rax - mov %rax,152($context) # restore context->Rsp - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size se_handler,.-se_handler - -.section .pdata -.align 4 - .rva _mul_1x1 - .rva .Lend_mul_1x1 - .rva .LSEH_info_1x1 - - .rva .Lvanilla_mul_2x2 - .rva .Lend_mul_2x2 - .rva .LSEH_info_2x2 -.section .xdata -.align 8 -.LSEH_info_1x1: - .byte 0x01,0x07,0x02,0x00 - .byte 0x07,0x01,0x11,0x00 # sub rsp,128+8 -.LSEH_info_2x2: - .byte 9,0,0,0 - .rva se_handler -___ -} - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/x86_64-mont.pl b/drivers/builtin_openssl2/crypto/bn/asm/x86_64-mont.pl deleted file mode 100755 index 17fb94c84c..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/x86_64-mont.pl +++ /dev/null @@ -1,1681 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# October 2005. -# -# Montgomery multiplication routine for x86_64. While it gives modest -# 9% improvement of rsa4096 sign on Opteron, rsa512 sign runs more -# than twice, >2x, as fast. Most common rsa1024 sign is improved by -# respectful 50%. It remains to be seen if loop unrolling and -# dedicated squaring routine can provide further improvement... - -# July 2011. -# -# Add dedicated squaring procedure. Performance improvement varies -# from platform to platform, but in average it's ~5%/15%/25%/33% -# for 512-/1024-/2048-/4096-bit RSA *sign* benchmarks respectively. - -# August 2011. -# -# Unroll and modulo-schedule inner loops in such manner that they -# are "fallen through" for input lengths of 8, which is critical for -# 1024-bit RSA *sign*. Average performance improvement in comparison -# to *initial* version of this module from 2005 is ~0%/30%/40%/45% -# for 512-/1024-/2048-/4096-bit RSA *sign* benchmarks respectively. - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -# int bn_mul_mont( -$rp="%rdi"; # BN_ULONG *rp, -$ap="%rsi"; # const BN_ULONG *ap, -$bp="%rdx"; # const BN_ULONG *bp, -$np="%rcx"; # const BN_ULONG *np, -$n0="%r8"; # const BN_ULONG *n0, -$num="%r9"; # int num); -$lo0="%r10"; -$hi0="%r11"; -$hi1="%r13"; -$i="%r14"; -$j="%r15"; -$m0="%rbx"; -$m1="%rbp"; - -$code=<<___; -.text - -.globl bn_mul_mont -.type bn_mul_mont,\@function,6 -.align 16 -bn_mul_mont: - test \$3,${num}d - jnz .Lmul_enter - cmp \$8,${num}d - jb .Lmul_enter - cmp $ap,$bp - jne .Lmul4x_enter - jmp .Lsqr4x_enter - -.align 16 -.Lmul_enter: - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - - mov ${num}d,${num}d - lea 2($num),%r10 - mov %rsp,%r11 - neg %r10 - lea (%rsp,%r10,8),%rsp # tp=alloca(8*(num+2)) - and \$-1024,%rsp # minimize TLB usage - - mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp -.Lmul_body: - mov $bp,%r12 # reassign $bp -___ - $bp="%r12"; -$code.=<<___; - mov ($n0),$n0 # pull n0[0] value - mov ($bp),$m0 # m0=bp[0] - mov ($ap),%rax - - xor $i,$i # i=0 - xor $j,$j # j=0 - - mov $n0,$m1 - mulq $m0 # ap[0]*bp[0] - mov %rax,$lo0 - mov ($np),%rax - - imulq $lo0,$m1 # "tp[0]"*n0 - mov %rdx,$hi0 - - mulq $m1 # np[0]*m1 - add %rax,$lo0 # discarded - mov 8($ap),%rax - adc \$0,%rdx - mov %rdx,$hi1 - - lea 1($j),$j # j++ - jmp .L1st_enter - -.align 16 -.L1st: - add %rax,$hi1 - mov ($ap,$j,8),%rax - adc \$0,%rdx - add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0] - mov $lo0,$hi0 - adc \$0,%rdx - mov $hi1,-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$hi1 - -.L1st_enter: - mulq $m0 # ap[j]*bp[0] - add %rax,$hi0 - mov ($np,$j,8),%rax - adc \$0,%rdx - lea 1($j),$j # j++ - mov %rdx,$lo0 - - mulq $m1 # np[j]*m1 - cmp $num,$j - jne .L1st - - add %rax,$hi1 - mov ($ap),%rax # ap[0] - adc \$0,%rdx - add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $hi1,-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$hi1 - mov $lo0,$hi0 - - xor %rdx,%rdx - add $hi0,$hi1 - adc \$0,%rdx - mov $hi1,-8(%rsp,$num,8) - mov %rdx,(%rsp,$num,8) # store upmost overflow bit - - lea 1($i),$i # i++ - jmp .Louter -.align 16 -.Louter: - mov ($bp,$i,8),$m0 # m0=bp[i] - xor $j,$j # j=0 - mov $n0,$m1 - mov (%rsp),$lo0 - mulq $m0 # ap[0]*bp[i] - add %rax,$lo0 # ap[0]*bp[i]+tp[0] - mov ($np),%rax - adc \$0,%rdx - - imulq $lo0,$m1 # tp[0]*n0 - mov %rdx,$hi0 - - mulq $m1 # np[0]*m1 - add %rax,$lo0 # discarded - mov 8($ap),%rax - adc \$0,%rdx - mov 8(%rsp),$lo0 # tp[1] - mov %rdx,$hi1 - - lea 1($j),$j # j++ - jmp .Linner_enter - -.align 16 -.Linner: - add %rax,$hi1 - mov ($ap,$j,8),%rax - adc \$0,%rdx - add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j] - mov (%rsp,$j,8),$lo0 - adc \$0,%rdx - mov $hi1,-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$hi1 - -.Linner_enter: - mulq $m0 # ap[j]*bp[i] - add %rax,$hi0 - mov ($np,$j,8),%rax - adc \$0,%rdx - add $hi0,$lo0 # ap[j]*bp[i]+tp[j] - mov %rdx,$hi0 - adc \$0,$hi0 - lea 1($j),$j # j++ - - mulq $m1 # np[j]*m1 - cmp $num,$j - jne .Linner - - add %rax,$hi1 - mov ($ap),%rax # ap[0] - adc \$0,%rdx - add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j] - mov (%rsp,$j,8),$lo0 - adc \$0,%rdx - mov $hi1,-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$hi1 - - xor %rdx,%rdx - add $hi0,$hi1 - adc \$0,%rdx - add $lo0,$hi1 # pull upmost overflow bit - adc \$0,%rdx - mov $hi1,-8(%rsp,$num,8) - mov %rdx,(%rsp,$num,8) # store upmost overflow bit - - lea 1($i),$i # i++ - cmp $num,$i - jl .Louter - - xor $i,$i # i=0 and clear CF! - mov (%rsp),%rax # tp[0] - lea (%rsp),$ap # borrow ap for tp - mov $num,$j # j=num - jmp .Lsub -.align 16 -.Lsub: sbb ($np,$i,8),%rax - mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i] - mov 8($ap,$i,8),%rax # tp[i+1] - lea 1($i),$i # i++ - dec $j # doesnn't affect CF! - jnz .Lsub - - sbb \$0,%rax # handle upmost overflow bit - xor $i,$i - and %rax,$ap - not %rax - mov $rp,$np - and %rax,$np - mov $num,$j # j=num - or $np,$ap # ap=borrow?tp:rp -.align 16 -.Lcopy: # copy or in-place refresh - mov ($ap,$i,8),%rax - mov $i,(%rsp,$i,8) # zap temporary vector - mov %rax,($rp,$i,8) # rp[i]=tp[i] - lea 1($i),$i - sub \$1,$j - jnz .Lcopy - - mov 8(%rsp,$num,8),%rsi # restore %rsp - mov \$1,%rax - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lmul_epilogue: - ret -.size bn_mul_mont,.-bn_mul_mont -___ -{{{ -my @A=("%r10","%r11"); -my @N=("%r13","%rdi"); -$code.=<<___; -.type bn_mul4x_mont,\@function,6 -.align 16 -bn_mul4x_mont: -.Lmul4x_enter: - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - - mov ${num}d,${num}d - lea 4($num),%r10 - mov %rsp,%r11 - neg %r10 - lea (%rsp,%r10,8),%rsp # tp=alloca(8*(num+4)) - and \$-1024,%rsp # minimize TLB usage - - mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp -.Lmul4x_body: - mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp - mov %rdx,%r12 # reassign $bp -___ - $bp="%r12"; -$code.=<<___; - mov ($n0),$n0 # pull n0[0] value - mov ($bp),$m0 # m0=bp[0] - mov ($ap),%rax - - xor $i,$i # i=0 - xor $j,$j # j=0 - - mov $n0,$m1 - mulq $m0 # ap[0]*bp[0] - mov %rax,$A[0] - mov ($np),%rax - - imulq $A[0],$m1 # "tp[0]"*n0 - mov %rdx,$A[1] - - mulq $m1 # np[0]*m1 - add %rax,$A[0] # discarded - mov 8($ap),%rax - adc \$0,%rdx - mov %rdx,$N[1] - - mulq $m0 - add %rax,$A[1] - mov 8($np),%rax - adc \$0,%rdx - mov %rdx,$A[0] - - mulq $m1 - add %rax,$N[1] - mov 16($ap),%rax - adc \$0,%rdx - add $A[1],$N[1] - lea 4($j),$j # j++ - adc \$0,%rdx - mov $N[1],(%rsp) - mov %rdx,$N[0] - jmp .L1st4x -.align 16 -.L1st4x: - mulq $m0 # ap[j]*bp[0] - add %rax,$A[0] - mov -16($np,$j,8),%rax - adc \$0,%rdx - mov %rdx,$A[1] - - mulq $m1 # np[j]*m1 - add %rax,$N[0] - mov -8($ap,$j,8),%rax - adc \$0,%rdx - add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $N[0],-24(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[0] - add %rax,$A[1] - mov -8($np,$j,8),%rax - adc \$0,%rdx - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov ($ap,$j,8),%rax - adc \$0,%rdx - add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $N[1],-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[0] - - mulq $m0 # ap[j]*bp[0] - add %rax,$A[0] - mov ($np,$j,8),%rax - adc \$0,%rdx - mov %rdx,$A[1] - - mulq $m1 # np[j]*m1 - add %rax,$N[0] - mov 8($ap,$j,8),%rax - adc \$0,%rdx - add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $N[0],-8(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[0] - add %rax,$A[1] - mov 8($np,$j,8),%rax - adc \$0,%rdx - lea 4($j),$j # j++ - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov -16($ap,$j,8),%rax - adc \$0,%rdx - add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $N[1],-32(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[0] - cmp $num,$j - jl .L1st4x - - mulq $m0 # ap[j]*bp[0] - add %rax,$A[0] - mov -16($np,$j,8),%rax - adc \$0,%rdx - mov %rdx,$A[1] - - mulq $m1 # np[j]*m1 - add %rax,$N[0] - mov -8($ap,$j,8),%rax - adc \$0,%rdx - add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $N[0],-24(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[0] - add %rax,$A[1] - mov -8($np,$j,8),%rax - adc \$0,%rdx - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov ($ap),%rax # ap[0] - adc \$0,%rdx - add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $N[1],-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[0] - - xor $N[1],$N[1] - add $A[0],$N[0] - adc \$0,$N[1] - mov $N[0],-8(%rsp,$j,8) - mov $N[1],(%rsp,$j,8) # store upmost overflow bit - - lea 1($i),$i # i++ -.align 4 -.Louter4x: - mov ($bp,$i,8),$m0 # m0=bp[i] - xor $j,$j # j=0 - mov (%rsp),$A[0] - mov $n0,$m1 - mulq $m0 # ap[0]*bp[i] - add %rax,$A[0] # ap[0]*bp[i]+tp[0] - mov ($np),%rax - adc \$0,%rdx - - imulq $A[0],$m1 # tp[0]*n0 - mov %rdx,$A[1] - - mulq $m1 # np[0]*m1 - add %rax,$A[0] # "$N[0]", discarded - mov 8($ap),%rax - adc \$0,%rdx - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[i] - add %rax,$A[1] - mov 8($np),%rax - adc \$0,%rdx - add 8(%rsp),$A[1] # +tp[1] - adc \$0,%rdx - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov 16($ap),%rax - adc \$0,%rdx - add $A[1],$N[1] # np[j]*m1+ap[j]*bp[i]+tp[j] - lea 4($j),$j # j+=2 - adc \$0,%rdx - mov $N[1],(%rsp) # tp[j-1] - mov %rdx,$N[0] - jmp .Linner4x -.align 16 -.Linner4x: - mulq $m0 # ap[j]*bp[i] - add %rax,$A[0] - mov -16($np,$j,8),%rax - adc \$0,%rdx - add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] - adc \$0,%rdx - mov %rdx,$A[1] - - mulq $m1 # np[j]*m1 - add %rax,$N[0] - mov -8($ap,$j,8),%rax - adc \$0,%rdx - add $A[0],$N[0] - adc \$0,%rdx - mov $N[0],-24(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[i] - add %rax,$A[1] - mov -8($np,$j,8),%rax - adc \$0,%rdx - add -8(%rsp,$j,8),$A[1] - adc \$0,%rdx - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov ($ap,$j,8),%rax - adc \$0,%rdx - add $A[1],$N[1] - adc \$0,%rdx - mov $N[1],-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[0] - - mulq $m0 # ap[j]*bp[i] - add %rax,$A[0] - mov ($np,$j,8),%rax - adc \$0,%rdx - add (%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] - adc \$0,%rdx - mov %rdx,$A[1] - - mulq $m1 # np[j]*m1 - add %rax,$N[0] - mov 8($ap,$j,8),%rax - adc \$0,%rdx - add $A[0],$N[0] - adc \$0,%rdx - mov $N[0],-8(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[i] - add %rax,$A[1] - mov 8($np,$j,8),%rax - adc \$0,%rdx - add 8(%rsp,$j,8),$A[1] - adc \$0,%rdx - lea 4($j),$j # j++ - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov -16($ap,$j,8),%rax - adc \$0,%rdx - add $A[1],$N[1] - adc \$0,%rdx - mov $N[1],-32(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[0] - cmp $num,$j - jl .Linner4x - - mulq $m0 # ap[j]*bp[i] - add %rax,$A[0] - mov -16($np,$j,8),%rax - adc \$0,%rdx - add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] - adc \$0,%rdx - mov %rdx,$A[1] - - mulq $m1 # np[j]*m1 - add %rax,$N[0] - mov -8($ap,$j,8),%rax - adc \$0,%rdx - add $A[0],$N[0] - adc \$0,%rdx - mov $N[0],-24(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[i] - add %rax,$A[1] - mov -8($np,$j,8),%rax - adc \$0,%rdx - add -8(%rsp,$j,8),$A[1] - adc \$0,%rdx - lea 1($i),$i # i++ - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov ($ap),%rax # ap[0] - adc \$0,%rdx - add $A[1],$N[1] - adc \$0,%rdx - mov $N[1],-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[0] - - xor $N[1],$N[1] - add $A[0],$N[0] - adc \$0,$N[1] - add (%rsp,$num,8),$N[0] # pull upmost overflow bit - adc \$0,$N[1] - mov $N[0],-8(%rsp,$j,8) - mov $N[1],(%rsp,$j,8) # store upmost overflow bit - - cmp $num,$i - jl .Louter4x -___ -{ -my @ri=("%rax","%rdx",$m0,$m1); -$code.=<<___; - mov 16(%rsp,$num,8),$rp # restore $rp - mov 0(%rsp),@ri[0] # tp[0] - pxor %xmm0,%xmm0 - mov 8(%rsp),@ri[1] # tp[1] - shr \$2,$num # num/=4 - lea (%rsp),$ap # borrow ap for tp - xor $i,$i # i=0 and clear CF! - - sub 0($np),@ri[0] - mov 16($ap),@ri[2] # tp[2] - mov 24($ap),@ri[3] # tp[3] - sbb 8($np),@ri[1] - lea -1($num),$j # j=num/4-1 - jmp .Lsub4x -.align 16 -.Lsub4x: - mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] - mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i] - sbb 16($np,$i,8),@ri[2] - mov 32($ap,$i,8),@ri[0] # tp[i+1] - mov 40($ap,$i,8),@ri[1] - sbb 24($np,$i,8),@ri[3] - mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i] - mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i] - sbb 32($np,$i,8),@ri[0] - mov 48($ap,$i,8),@ri[2] - mov 56($ap,$i,8),@ri[3] - sbb 40($np,$i,8),@ri[1] - lea 4($i),$i # i++ - dec $j # doesnn't affect CF! - jnz .Lsub4x - - mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] - mov 32($ap,$i,8),@ri[0] # load overflow bit - sbb 16($np,$i,8),@ri[2] - mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i] - sbb 24($np,$i,8),@ri[3] - mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i] - - sbb \$0,@ri[0] # handle upmost overflow bit - mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i] - xor $i,$i # i=0 - and @ri[0],$ap - not @ri[0] - mov $rp,$np - and @ri[0],$np - lea -1($num),$j - or $np,$ap # ap=borrow?tp:rp - - movdqu ($ap),%xmm1 - movdqa %xmm0,(%rsp) - movdqu %xmm1,($rp) - jmp .Lcopy4x -.align 16 -.Lcopy4x: # copy or in-place refresh - movdqu 16($ap,$i),%xmm2 - movdqu 32($ap,$i),%xmm1 - movdqa %xmm0,16(%rsp,$i) - movdqu %xmm2,16($rp,$i) - movdqa %xmm0,32(%rsp,$i) - movdqu %xmm1,32($rp,$i) - lea 32($i),$i - dec $j - jnz .Lcopy4x - - shl \$2,$num - movdqu 16($ap,$i),%xmm2 - movdqa %xmm0,16(%rsp,$i) - movdqu %xmm2,16($rp,$i) -___ -} -$code.=<<___; - mov 8(%rsp,$num,8),%rsi # restore %rsp - mov \$1,%rax - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lmul4x_epilogue: - ret -.size bn_mul4x_mont,.-bn_mul4x_mont -___ -}}} - {{{ -###################################################################### -# void bn_sqr4x_mont( -my $rptr="%rdi"; # const BN_ULONG *rptr, -my $aptr="%rsi"; # const BN_ULONG *aptr, -my $bptr="%rdx"; # not used -my $nptr="%rcx"; # const BN_ULONG *nptr, -my $n0 ="%r8"; # const BN_ULONG *n0); -my $num ="%r9"; # int num, has to be divisible by 4 and - # not less than 8 - -my ($i,$j,$tptr)=("%rbp","%rcx",$rptr); -my @A0=("%r10","%r11"); -my @A1=("%r12","%r13"); -my ($a0,$a1,$ai)=("%r14","%r15","%rbx"); - -$code.=<<___; -.type bn_sqr4x_mont,\@function,6 -.align 16 -bn_sqr4x_mont: -.Lsqr4x_enter: - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - - shl \$3,${num}d # convert $num to bytes - xor %r10,%r10 - mov %rsp,%r11 # put aside %rsp - sub $num,%r10 # -$num - mov ($n0),$n0 # *n0 - lea -72(%rsp,%r10,2),%rsp # alloca(frame+2*$num) - and \$-1024,%rsp # minimize TLB usage - ############################################################## - # Stack layout - # - # +0 saved $num, used in reduction section - # +8 &t[2*$num], used in reduction section - # +32 saved $rptr - # +40 saved $nptr - # +48 saved *n0 - # +56 saved %rsp - # +64 t[2*$num] - # - mov $rptr,32(%rsp) # save $rptr - mov $nptr,40(%rsp) - mov $n0, 48(%rsp) - mov %r11, 56(%rsp) # save original %rsp -.Lsqr4x_body: - ############################################################## - # Squaring part: - # - # a) multiply-n-add everything but a[i]*a[i]; - # b) shift result of a) by 1 to the left and accumulate - # a[i]*a[i] products; - # - lea 32(%r10),$i # $i=-($num-32) - lea ($aptr,$num),$aptr # end of a[] buffer, ($aptr,$i)=&ap[2] - - mov $num,$j # $j=$num - - # comments apply to $num==8 case - mov -32($aptr,$i),$a0 # a[0] - lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num] - mov -24($aptr,$i),%rax # a[1] - lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"] - mov -16($aptr,$i),$ai # a[2] - mov %rax,$a1 - - mul $a0 # a[1]*a[0] - mov %rax,$A0[0] # a[1]*a[0] - mov $ai,%rax # a[2] - mov %rdx,$A0[1] - mov $A0[0],-24($tptr,$i) # t[1] - - xor $A0[0],$A0[0] - mul $a0 # a[2]*a[0] - add %rax,$A0[1] - mov $ai,%rax - adc %rdx,$A0[0] - mov $A0[1],-16($tptr,$i) # t[2] - - lea -16($i),$j # j=-16 - - - mov 8($aptr,$j),$ai # a[3] - mul $a1 # a[2]*a[1] - mov %rax,$A1[0] # a[2]*a[1]+t[3] - mov $ai,%rax - mov %rdx,$A1[1] - - xor $A0[1],$A0[1] - add $A1[0],$A0[0] - lea 16($j),$j - adc \$0,$A0[1] - mul $a0 # a[3]*a[0] - add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3] - mov $ai,%rax - adc %rdx,$A0[1] - mov $A0[0],-8($tptr,$j) # t[3] - jmp .Lsqr4x_1st - -.align 16 -.Lsqr4x_1st: - mov ($aptr,$j),$ai # a[4] - xor $A1[0],$A1[0] - mul $a1 # a[3]*a[1] - add %rax,$A1[1] # a[3]*a[1]+t[4] - mov $ai,%rax - adc %rdx,$A1[0] - - xor $A0[0],$A0[0] - add $A1[1],$A0[1] - adc \$0,$A0[0] - mul $a0 # a[4]*a[0] - add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4] - mov $ai,%rax # a[3] - adc %rdx,$A0[0] - mov $A0[1],($tptr,$j) # t[4] - - - mov 8($aptr,$j),$ai # a[5] - xor $A1[1],$A1[1] - mul $a1 # a[4]*a[3] - add %rax,$A1[0] # a[4]*a[3]+t[5] - mov $ai,%rax - adc %rdx,$A1[1] - - xor $A0[1],$A0[1] - add $A1[0],$A0[0] - adc \$0,$A0[1] - mul $a0 # a[5]*a[2] - add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5] - mov $ai,%rax - adc %rdx,$A0[1] - mov $A0[0],8($tptr,$j) # t[5] - - mov 16($aptr,$j),$ai # a[6] - xor $A1[0],$A1[0] - mul $a1 # a[5]*a[3] - add %rax,$A1[1] # a[5]*a[3]+t[6] - mov $ai,%rax - adc %rdx,$A1[0] - - xor $A0[0],$A0[0] - add $A1[1],$A0[1] - adc \$0,$A0[0] - mul $a0 # a[6]*a[2] - add %rax,$A0[1] # a[6]*a[2]+a[5]*a[3]+t[6] - mov $ai,%rax # a[3] - adc %rdx,$A0[0] - mov $A0[1],16($tptr,$j) # t[6] - - - mov 24($aptr,$j),$ai # a[7] - xor $A1[1],$A1[1] - mul $a1 # a[6]*a[5] - add %rax,$A1[0] # a[6]*a[5]+t[7] - mov $ai,%rax - adc %rdx,$A1[1] - - xor $A0[1],$A0[1] - add $A1[0],$A0[0] - lea 32($j),$j - adc \$0,$A0[1] - mul $a0 # a[7]*a[4] - add %rax,$A0[0] # a[7]*a[4]+a[6]*a[5]+t[6] - mov $ai,%rax - adc %rdx,$A0[1] - mov $A0[0],-8($tptr,$j) # t[7] - - cmp \$0,$j - jne .Lsqr4x_1st - - xor $A1[0],$A1[0] - add $A0[1],$A1[1] - adc \$0,$A1[0] - mul $a1 # a[7]*a[5] - add %rax,$A1[1] - adc %rdx,$A1[0] - - mov $A1[1],($tptr) # t[8] - lea 16($i),$i - mov $A1[0],8($tptr) # t[9] - jmp .Lsqr4x_outer - -.align 16 -.Lsqr4x_outer: # comments apply to $num==6 case - mov -32($aptr,$i),$a0 # a[0] - lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num] - mov -24($aptr,$i),%rax # a[1] - lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"] - mov -16($aptr,$i),$ai # a[2] - mov %rax,$a1 - - mov -24($tptr,$i),$A0[0] # t[1] - xor $A0[1],$A0[1] - mul $a0 # a[1]*a[0] - add %rax,$A0[0] # a[1]*a[0]+t[1] - mov $ai,%rax # a[2] - adc %rdx,$A0[1] - mov $A0[0],-24($tptr,$i) # t[1] - - xor $A0[0],$A0[0] - add -16($tptr,$i),$A0[1] # a[2]*a[0]+t[2] - adc \$0,$A0[0] - mul $a0 # a[2]*a[0] - add %rax,$A0[1] - mov $ai,%rax - adc %rdx,$A0[0] - mov $A0[1],-16($tptr,$i) # t[2] - - lea -16($i),$j # j=-16 - xor $A1[0],$A1[0] - - - mov 8($aptr,$j),$ai # a[3] - xor $A1[1],$A1[1] - add 8($tptr,$j),$A1[0] - adc \$0,$A1[1] - mul $a1 # a[2]*a[1] - add %rax,$A1[0] # a[2]*a[1]+t[3] - mov $ai,%rax - adc %rdx,$A1[1] - - xor $A0[1],$A0[1] - add $A1[0],$A0[0] - adc \$0,$A0[1] - mul $a0 # a[3]*a[0] - add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3] - mov $ai,%rax - adc %rdx,$A0[1] - mov $A0[0],8($tptr,$j) # t[3] - - lea 16($j),$j - jmp .Lsqr4x_inner - -.align 16 -.Lsqr4x_inner: - mov ($aptr,$j),$ai # a[4] - xor $A1[0],$A1[0] - add ($tptr,$j),$A1[1] - adc \$0,$A1[0] - mul $a1 # a[3]*a[1] - add %rax,$A1[1] # a[3]*a[1]+t[4] - mov $ai,%rax - adc %rdx,$A1[0] - - xor $A0[0],$A0[0] - add $A1[1],$A0[1] - adc \$0,$A0[0] - mul $a0 # a[4]*a[0] - add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4] - mov $ai,%rax # a[3] - adc %rdx,$A0[0] - mov $A0[1],($tptr,$j) # t[4] - - mov 8($aptr,$j),$ai # a[5] - xor $A1[1],$A1[1] - add 8($tptr,$j),$A1[0] - adc \$0,$A1[1] - mul $a1 # a[4]*a[3] - add %rax,$A1[0] # a[4]*a[3]+t[5] - mov $ai,%rax - adc %rdx,$A1[1] - - xor $A0[1],$A0[1] - add $A1[0],$A0[0] - lea 16($j),$j # j++ - adc \$0,$A0[1] - mul $a0 # a[5]*a[2] - add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5] - mov $ai,%rax - adc %rdx,$A0[1] - mov $A0[0],-8($tptr,$j) # t[5], "preloaded t[1]" below - - cmp \$0,$j - jne .Lsqr4x_inner - - xor $A1[0],$A1[0] - add $A0[1],$A1[1] - adc \$0,$A1[0] - mul $a1 # a[5]*a[3] - add %rax,$A1[1] - adc %rdx,$A1[0] - - mov $A1[1],($tptr) # t[6], "preloaded t[2]" below - mov $A1[0],8($tptr) # t[7], "preloaded t[3]" below - - add \$16,$i - jnz .Lsqr4x_outer - - # comments apply to $num==4 case - mov -32($aptr),$a0 # a[0] - lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num] - mov -24($aptr),%rax # a[1] - lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"] - mov -16($aptr),$ai # a[2] - mov %rax,$a1 - - xor $A0[1],$A0[1] - mul $a0 # a[1]*a[0] - add %rax,$A0[0] # a[1]*a[0]+t[1], preloaded t[1] - mov $ai,%rax # a[2] - adc %rdx,$A0[1] - mov $A0[0],-24($tptr) # t[1] - - xor $A0[0],$A0[0] - add $A1[1],$A0[1] # a[2]*a[0]+t[2], preloaded t[2] - adc \$0,$A0[0] - mul $a0 # a[2]*a[0] - add %rax,$A0[1] - mov $ai,%rax - adc %rdx,$A0[0] - mov $A0[1],-16($tptr) # t[2] - - mov -8($aptr),$ai # a[3] - mul $a1 # a[2]*a[1] - add %rax,$A1[0] # a[2]*a[1]+t[3], preloaded t[3] - mov $ai,%rax - adc \$0,%rdx - - xor $A0[1],$A0[1] - add $A1[0],$A0[0] - mov %rdx,$A1[1] - adc \$0,$A0[1] - mul $a0 # a[3]*a[0] - add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3] - mov $ai,%rax - adc %rdx,$A0[1] - mov $A0[0],-8($tptr) # t[3] - - xor $A1[0],$A1[0] - add $A0[1],$A1[1] - adc \$0,$A1[0] - mul $a1 # a[3]*a[1] - add %rax,$A1[1] - mov -16($aptr),%rax # a[2] - adc %rdx,$A1[0] - - mov $A1[1],($tptr) # t[4] - mov $A1[0],8($tptr) # t[5] - - mul $ai # a[2]*a[3] -___ -{ -my ($shift,$carry)=($a0,$a1); -my @S=(@A1,$ai,$n0); -$code.=<<___; - add \$16,$i - xor $shift,$shift - sub $num,$i # $i=16-$num - xor $carry,$carry - - add $A1[0],%rax # t[5] - adc \$0,%rdx - mov %rax,8($tptr) # t[5] - mov %rdx,16($tptr) # t[6] - mov $carry,24($tptr) # t[7] - - mov -16($aptr,$i),%rax # a[0] - lea 64(%rsp,$num,2),$tptr - xor $A0[0],$A0[0] # t[0] - mov -24($tptr,$i,2),$A0[1] # t[1] - - lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[1] # | t[2*i]>>63 - mov -16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov -8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[0] - mov -8($aptr,$i),%rax # a[i+1] # prefetch - mov $S[0],-32($tptr,$i,2) - adc %rdx,$S[1] - - lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift - mov $S[1],-24($tptr,$i,2) - sbb $carry,$carry # mov cf,$carry - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[3] # | t[2*i]>>63 - mov 0($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov 8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[2] - mov 0($aptr,$i),%rax # a[i+1] # prefetch - mov $S[2],-16($tptr,$i,2) - adc %rdx,$S[3] - lea 16($i),$i - mov $S[3],-40($tptr,$i,2) - sbb $carry,$carry # mov cf,$carry - jmp .Lsqr4x_shift_n_add - -.align 16 -.Lsqr4x_shift_n_add: - lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[1] # | t[2*i]>>63 - mov -16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov -8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[0] - mov -8($aptr,$i),%rax # a[i+1] # prefetch - mov $S[0],-32($tptr,$i,2) - adc %rdx,$S[1] - - lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift - mov $S[1],-24($tptr,$i,2) - sbb $carry,$carry # mov cf,$carry - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[3] # | t[2*i]>>63 - mov 0($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov 8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[2] - mov 0($aptr,$i),%rax # a[i+1] # prefetch - mov $S[2],-16($tptr,$i,2) - adc %rdx,$S[3] - - lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift - mov $S[3],-8($tptr,$i,2) - sbb $carry,$carry # mov cf,$carry - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[1] # | t[2*i]>>63 - mov 16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov 24($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[0] - mov 8($aptr,$i),%rax # a[i+1] # prefetch - mov $S[0],0($tptr,$i,2) - adc %rdx,$S[1] - - lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift - mov $S[1],8($tptr,$i,2) - sbb $carry,$carry # mov cf,$carry - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[3] # | t[2*i]>>63 - mov 32($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov 40($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[2] - mov 16($aptr,$i),%rax # a[i+1] # prefetch - mov $S[2],16($tptr,$i,2) - adc %rdx,$S[3] - mov $S[3],24($tptr,$i,2) - sbb $carry,$carry # mov cf,$carry - add \$32,$i - jnz .Lsqr4x_shift_n_add - - lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[1] # | t[2*i]>>63 - mov -16($tptr),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov -8($tptr),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[0] - mov -8($aptr),%rax # a[i+1] # prefetch - mov $S[0],-32($tptr) - adc %rdx,$S[1] - - lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1|shift - mov $S[1],-24($tptr) - sbb $carry,$carry # mov cf,$carry - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[3] # | t[2*i]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - adc %rax,$S[2] - adc %rdx,$S[3] - mov $S[2],-16($tptr) - mov $S[3],-8($tptr) -___ -} -############################################################## -# Montgomery reduction part, "word-by-word" algorithm. -# -{ -my ($topbit,$nptr)=("%rbp",$aptr); -my ($m0,$m1)=($a0,$a1); -my @Ni=("%rbx","%r9"); -$code.=<<___; - mov 40(%rsp),$nptr # restore $nptr - mov 48(%rsp),$n0 # restore *n0 - xor $j,$j - mov $num,0(%rsp) # save $num - sub $num,$j # $j=-$num - mov 64(%rsp),$A0[0] # t[0] # modsched # - mov $n0,$m0 # # modsched # - lea 64(%rsp,$num,2),%rax # end of t[] buffer - lea 64(%rsp,$num),$tptr # end of t[] window - mov %rax,8(%rsp) # save end of t[] buffer - lea ($nptr,$num),$nptr # end of n[] buffer - xor $topbit,$topbit # $topbit=0 - - mov 0($nptr,$j),%rax # n[0] # modsched # - mov 8($nptr,$j),$Ni[1] # n[1] # modsched # - imulq $A0[0],$m0 # m0=t[0]*n0 # modsched # - mov %rax,$Ni[0] # # modsched # - jmp .Lsqr4x_mont_outer - -.align 16 -.Lsqr4x_mont_outer: - xor $A0[1],$A0[1] - mul $m0 # n[0]*m0 - add %rax,$A0[0] # n[0]*m0+t[0] - mov $Ni[1],%rax - adc %rdx,$A0[1] - mov $n0,$m1 - - xor $A0[0],$A0[0] - add 8($tptr,$j),$A0[1] - adc \$0,$A0[0] - mul $m0 # n[1]*m0 - add %rax,$A0[1] # n[1]*m0+t[1] - mov $Ni[0],%rax - adc %rdx,$A0[0] - - imulq $A0[1],$m1 - - mov 16($nptr,$j),$Ni[0] # n[2] - xor $A1[1],$A1[1] - add $A0[1],$A1[0] - adc \$0,$A1[1] - mul $m1 # n[0]*m1 - add %rax,$A1[0] # n[0]*m1+"t[1]" - mov $Ni[0],%rax - adc %rdx,$A1[1] - mov $A1[0],8($tptr,$j) # "t[1]" - - xor $A0[1],$A0[1] - add 16($tptr,$j),$A0[0] - adc \$0,$A0[1] - mul $m0 # n[2]*m0 - add %rax,$A0[0] # n[2]*m0+t[2] - mov $Ni[1],%rax - adc %rdx,$A0[1] - - mov 24($nptr,$j),$Ni[1] # n[3] - xor $A1[0],$A1[0] - add $A0[0],$A1[1] - adc \$0,$A1[0] - mul $m1 # n[1]*m1 - add %rax,$A1[1] # n[1]*m1+"t[2]" - mov $Ni[1],%rax - adc %rdx,$A1[0] - mov $A1[1],16($tptr,$j) # "t[2]" - - xor $A0[0],$A0[0] - add 24($tptr,$j),$A0[1] - lea 32($j),$j - adc \$0,$A0[0] - mul $m0 # n[3]*m0 - add %rax,$A0[1] # n[3]*m0+t[3] - mov $Ni[0],%rax - adc %rdx,$A0[0] - jmp .Lsqr4x_mont_inner - -.align 16 -.Lsqr4x_mont_inner: - mov ($nptr,$j),$Ni[0] # n[4] - xor $A1[1],$A1[1] - add $A0[1],$A1[0] - adc \$0,$A1[1] - mul $m1 # n[2]*m1 - add %rax,$A1[0] # n[2]*m1+"t[3]" - mov $Ni[0],%rax - adc %rdx,$A1[1] - mov $A1[0],-8($tptr,$j) # "t[3]" - - xor $A0[1],$A0[1] - add ($tptr,$j),$A0[0] - adc \$0,$A0[1] - mul $m0 # n[4]*m0 - add %rax,$A0[0] # n[4]*m0+t[4] - mov $Ni[1],%rax - adc %rdx,$A0[1] - - mov 8($nptr,$j),$Ni[1] # n[5] - xor $A1[0],$A1[0] - add $A0[0],$A1[1] - adc \$0,$A1[0] - mul $m1 # n[3]*m1 - add %rax,$A1[1] # n[3]*m1+"t[4]" - mov $Ni[1],%rax - adc %rdx,$A1[0] - mov $A1[1],($tptr,$j) # "t[4]" - - xor $A0[0],$A0[0] - add 8($tptr,$j),$A0[1] - adc \$0,$A0[0] - mul $m0 # n[5]*m0 - add %rax,$A0[1] # n[5]*m0+t[5] - mov $Ni[0],%rax - adc %rdx,$A0[0] - - - mov 16($nptr,$j),$Ni[0] # n[6] - xor $A1[1],$A1[1] - add $A0[1],$A1[0] - adc \$0,$A1[1] - mul $m1 # n[4]*m1 - add %rax,$A1[0] # n[4]*m1+"t[5]" - mov $Ni[0],%rax - adc %rdx,$A1[1] - mov $A1[0],8($tptr,$j) # "t[5]" - - xor $A0[1],$A0[1] - add 16($tptr,$j),$A0[0] - adc \$0,$A0[1] - mul $m0 # n[6]*m0 - add %rax,$A0[0] # n[6]*m0+t[6] - mov $Ni[1],%rax - adc %rdx,$A0[1] - - mov 24($nptr,$j),$Ni[1] # n[7] - xor $A1[0],$A1[0] - add $A0[0],$A1[1] - adc \$0,$A1[0] - mul $m1 # n[5]*m1 - add %rax,$A1[1] # n[5]*m1+"t[6]" - mov $Ni[1],%rax - adc %rdx,$A1[0] - mov $A1[1],16($tptr,$j) # "t[6]" - - xor $A0[0],$A0[0] - add 24($tptr,$j),$A0[1] - lea 32($j),$j - adc \$0,$A0[0] - mul $m0 # n[7]*m0 - add %rax,$A0[1] # n[7]*m0+t[7] - mov $Ni[0],%rax - adc %rdx,$A0[0] - cmp \$0,$j - jne .Lsqr4x_mont_inner - - sub 0(%rsp),$j # $j=-$num # modsched # - mov $n0,$m0 # # modsched # - - xor $A1[1],$A1[1] - add $A0[1],$A1[0] - adc \$0,$A1[1] - mul $m1 # n[6]*m1 - add %rax,$A1[0] # n[6]*m1+"t[7]" - mov $Ni[1],%rax - adc %rdx,$A1[1] - mov $A1[0],-8($tptr) # "t[7]" - - xor $A0[1],$A0[1] - add ($tptr),$A0[0] # +t[8] - adc \$0,$A0[1] - mov 0($nptr,$j),$Ni[0] # n[0] # modsched # - add $topbit,$A0[0] - adc \$0,$A0[1] - - imulq 16($tptr,$j),$m0 # m0=t[0]*n0 # modsched # - xor $A1[0],$A1[0] - mov 8($nptr,$j),$Ni[1] # n[1] # modsched # - add $A0[0],$A1[1] - mov 16($tptr,$j),$A0[0] # t[0] # modsched # - adc \$0,$A1[0] - mul $m1 # n[7]*m1 - add %rax,$A1[1] # n[7]*m1+"t[8]" - mov $Ni[0],%rax # # modsched # - adc %rdx,$A1[0] - mov $A1[1],($tptr) # "t[8]" - - xor $topbit,$topbit - add 8($tptr),$A1[0] # +t[9] - adc $topbit,$topbit - add $A0[1],$A1[0] - lea 16($tptr),$tptr # "t[$num]>>128" - adc \$0,$topbit - mov $A1[0],-8($tptr) # "t[9]" - cmp 8(%rsp),$tptr # are we done? - jb .Lsqr4x_mont_outer - - mov 0(%rsp),$num # restore $num - mov $topbit,($tptr) # save $topbit -___ -} -############################################################## -# Post-condition, 4x unrolled copy from bn_mul_mont -# -{ -my ($tptr,$nptr)=("%rbx",$aptr); -my @ri=("%rax","%rdx","%r10","%r11"); -$code.=<<___; - mov 64(%rsp,$num),@ri[0] # tp[0] - lea 64(%rsp,$num),$tptr # upper half of t[2*$num] holds result - mov 40(%rsp),$nptr # restore $nptr - shr \$5,$num # num/4 - mov 8($tptr),@ri[1] # t[1] - xor $i,$i # i=0 and clear CF! - - mov 32(%rsp),$rptr # restore $rptr - sub 0($nptr),@ri[0] - mov 16($tptr),@ri[2] # t[2] - mov 24($tptr),@ri[3] # t[3] - sbb 8($nptr),@ri[1] - lea -1($num),$j # j=num/4-1 - jmp .Lsqr4x_sub -.align 16 -.Lsqr4x_sub: - mov @ri[0],0($rptr,$i,8) # rp[i]=tp[i]-np[i] - mov @ri[1],8($rptr,$i,8) # rp[i]=tp[i]-np[i] - sbb 16($nptr,$i,8),@ri[2] - mov 32($tptr,$i,8),@ri[0] # tp[i+1] - mov 40($tptr,$i,8),@ri[1] - sbb 24($nptr,$i,8),@ri[3] - mov @ri[2],16($rptr,$i,8) # rp[i]=tp[i]-np[i] - mov @ri[3],24($rptr,$i,8) # rp[i]=tp[i]-np[i] - sbb 32($nptr,$i,8),@ri[0] - mov 48($tptr,$i,8),@ri[2] - mov 56($tptr,$i,8),@ri[3] - sbb 40($nptr,$i,8),@ri[1] - lea 4($i),$i # i++ - dec $j # doesn't affect CF! - jnz .Lsqr4x_sub - - mov @ri[0],0($rptr,$i,8) # rp[i]=tp[i]-np[i] - mov 32($tptr,$i,8),@ri[0] # load overflow bit - sbb 16($nptr,$i,8),@ri[2] - mov @ri[1],8($rptr,$i,8) # rp[i]=tp[i]-np[i] - sbb 24($nptr,$i,8),@ri[3] - mov @ri[2],16($rptr,$i,8) # rp[i]=tp[i]-np[i] - - sbb \$0,@ri[0] # handle upmost overflow bit - mov @ri[3],24($rptr,$i,8) # rp[i]=tp[i]-np[i] - xor $i,$i # i=0 - and @ri[0],$tptr - not @ri[0] - mov $rptr,$nptr - and @ri[0],$nptr - lea -1($num),$j - or $nptr,$tptr # tp=borrow?tp:rp - - pxor %xmm0,%xmm0 - lea 64(%rsp,$num,8),$nptr - movdqu ($tptr),%xmm1 - lea ($nptr,$num,8),$nptr - movdqa %xmm0,64(%rsp) # zap lower half of temporary vector - movdqa %xmm0,($nptr) # zap upper half of temporary vector - movdqu %xmm1,($rptr) - jmp .Lsqr4x_copy -.align 16 -.Lsqr4x_copy: # copy or in-place refresh - movdqu 16($tptr,$i),%xmm2 - movdqu 32($tptr,$i),%xmm1 - movdqa %xmm0,80(%rsp,$i) # zap lower half of temporary vector - movdqa %xmm0,96(%rsp,$i) # zap lower half of temporary vector - movdqa %xmm0,16($nptr,$i) # zap upper half of temporary vector - movdqa %xmm0,32($nptr,$i) # zap upper half of temporary vector - movdqu %xmm2,16($rptr,$i) - movdqu %xmm1,32($rptr,$i) - lea 32($i),$i - dec $j - jnz .Lsqr4x_copy - - movdqu 16($tptr,$i),%xmm2 - movdqa %xmm0,80(%rsp,$i) # zap lower half of temporary vector - movdqa %xmm0,16($nptr,$i) # zap upper half of temporary vector - movdqu %xmm2,16($rptr,$i) -___ -} -$code.=<<___; - mov 56(%rsp),%rsi # restore %rsp - mov \$1,%rax - mov 0(%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lsqr4x_epilogue: - ret -.size bn_sqr4x_mont,.-bn_sqr4x_mont -___ -}}} -$code.=<<___; -.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by " -.align 16 -___ - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type mul_handler,\@abi-omnipotent -.align 16 -mul_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - mov 8($disp),%rsi # disp->ImageBase - mov 56($disp),%r11 # disp->HandlerData - - mov 0(%r11),%r10d # HandlerData[0] - lea (%rsi,%r10),%r10 # end of prologue label - cmp %r10,%rbx # context->RipRsp - - mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lcommon_seh_tail - - mov 192($context),%r10 # pull $num - mov 8(%rax,%r10,8),%rax # pull saved stack pointer - lea 48(%rax),%rax - - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov -32(%rax),%r13 - mov -40(%rax),%r14 - mov -48(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - - jmp .Lcommon_seh_tail -.size mul_handler,.-mul_handler - -.type sqr_handler,\@abi-omnipotent -.align 16 -sqr_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lsqr4x_body(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lsqr_body - jb .Lcommon_seh_tail - - mov 152($context),%rax # pull context->Rsp - - lea .Lsqr4x_epilogue(%rip),%r10 - cmp %r10,%rbx # context->Rip>=.Lsqr_epilogue - jae .Lcommon_seh_tail - - mov 56(%rax),%rax # pull saved stack pointer - lea 48(%rax),%rax - - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov -32(%rax),%r13 - mov -40(%rax),%r14 - mov -48(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lcommon_seh_tail: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size sqr_handler,.-sqr_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_bn_mul_mont - .rva .LSEH_end_bn_mul_mont - .rva .LSEH_info_bn_mul_mont - - .rva .LSEH_begin_bn_mul4x_mont - .rva .LSEH_end_bn_mul4x_mont - .rva .LSEH_info_bn_mul4x_mont - - .rva .LSEH_begin_bn_sqr4x_mont - .rva .LSEH_end_bn_sqr4x_mont - .rva .LSEH_info_bn_sqr4x_mont - -.section .xdata -.align 8 -.LSEH_info_bn_mul_mont: - .byte 9,0,0,0 - .rva mul_handler - .rva .Lmul_body,.Lmul_epilogue # HandlerData[] -.LSEH_info_bn_mul4x_mont: - .byte 9,0,0,0 - .rva mul_handler - .rva .Lmul4x_body,.Lmul4x_epilogue # HandlerData[] -.LSEH_info_bn_sqr4x_mont: - .byte 9,0,0,0 - .rva sqr_handler -___ -} - -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/asm/x86_64-mont5.pl b/drivers/builtin_openssl2/crypto/bn/asm/x86_64-mont5.pl deleted file mode 100755 index 235979181f..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/asm/x86_64-mont5.pl +++ /dev/null @@ -1,1186 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# August 2011. -# -# Companion to x86_64-mont.pl that optimizes cache-timing attack -# countermeasures. The subroutines are produced by replacing bp[i] -# references in their x86_64-mont.pl counterparts with cache-neutral -# references to powers table computed in BN_mod_exp_mont_consttime. -# In addition subroutine that scatters elements of the powers table -# is implemented, so that scatter-/gathering can be tuned without -# bn_exp.c modifications. - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -# int bn_mul_mont_gather5( -$rp="%rdi"; # BN_ULONG *rp, -$ap="%rsi"; # const BN_ULONG *ap, -$bp="%rdx"; # const BN_ULONG *bp, -$np="%rcx"; # const BN_ULONG *np, -$n0="%r8"; # const BN_ULONG *n0, -$num="%r9"; # int num, - # int idx); # 0 to 2^5-1, "index" in $bp holding - # pre-computed powers of a', interlaced - # in such manner that b[0] is $bp[idx], - # b[1] is [2^5+idx], etc. -$lo0="%r10"; -$hi0="%r11"; -$hi1="%r13"; -$i="%r14"; -$j="%r15"; -$m0="%rbx"; -$m1="%rbp"; - -$code=<<___; -.text - -.globl bn_mul_mont_gather5 -.type bn_mul_mont_gather5,\@function,6 -.align 64 -bn_mul_mont_gather5: - test \$3,${num}d - jnz .Lmul_enter - cmp \$8,${num}d - jb .Lmul_enter - jmp .Lmul4x_enter - -.align 16 -.Lmul_enter: - mov ${num}d,${num}d - movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument - lea .Linc(%rip),%r10 - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - -.Lmul_alloca: - mov %rsp,%rax - lea 2($num),%r11 - neg %r11 - lea -264(%rsp,%r11,8),%rsp # tp=alloca(8*(num+2)+256+8) - and \$-1024,%rsp # minimize TLB usage - - mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp -.Lmul_body: - lea 128($bp),%r12 # reassign $bp (+size optimization) -___ - $bp="%r12"; - $STRIDE=2**5*8; # 5 is "window size" - $N=$STRIDE/4; # should match cache line size -$code.=<<___; - movdqa 0(%r10),%xmm0 # 00000001000000010000000000000000 - movdqa 16(%r10),%xmm1 # 00000002000000020000000200000002 - lea 24-112(%rsp,$num,8),%r10# place the mask after tp[num+3] (+ICache optimization) - and \$-16,%r10 - - pshufd \$0,%xmm5,%xmm5 # broadcast index - movdqa %xmm1,%xmm4 - movdqa %xmm1,%xmm2 -___ -######################################################################## -# calculate mask by comparing 0..31 to index and save result to stack -# -$code.=<<___; - paddd %xmm0,%xmm1 - pcmpeqd %xmm5,%xmm0 # compare to 1,0 - .byte 0x67 - movdqa %xmm4,%xmm3 -___ -for($k=0;$k<$STRIDE/16-4;$k+=4) { -$code.=<<___; - paddd %xmm1,%xmm2 - pcmpeqd %xmm5,%xmm1 # compare to 3,2 - movdqa %xmm0,`16*($k+0)+112`(%r10) - movdqa %xmm4,%xmm0 - - paddd %xmm2,%xmm3 - pcmpeqd %xmm5,%xmm2 # compare to 5,4 - movdqa %xmm1,`16*($k+1)+112`(%r10) - movdqa %xmm4,%xmm1 - - paddd %xmm3,%xmm0 - pcmpeqd %xmm5,%xmm3 # compare to 7,6 - movdqa %xmm2,`16*($k+2)+112`(%r10) - movdqa %xmm4,%xmm2 - - paddd %xmm0,%xmm1 - pcmpeqd %xmm5,%xmm0 - movdqa %xmm3,`16*($k+3)+112`(%r10) - movdqa %xmm4,%xmm3 -___ -} -$code.=<<___; # last iteration can be optimized - paddd %xmm1,%xmm2 - pcmpeqd %xmm5,%xmm1 - movdqa %xmm0,`16*($k+0)+112`(%r10) - - paddd %xmm2,%xmm3 - .byte 0x67 - pcmpeqd %xmm5,%xmm2 - movdqa %xmm1,`16*($k+1)+112`(%r10) - - pcmpeqd %xmm5,%xmm3 - movdqa %xmm2,`16*($k+2)+112`(%r10) - pand `16*($k+0)-128`($bp),%xmm0 # while it's still in register - - pand `16*($k+1)-128`($bp),%xmm1 - pand `16*($k+2)-128`($bp),%xmm2 - movdqa %xmm3,`16*($k+3)+112`(%r10) - pand `16*($k+3)-128`($bp),%xmm3 - por %xmm2,%xmm0 - por %xmm3,%xmm1 -___ -for($k=0;$k<$STRIDE/16-4;$k+=4) { -$code.=<<___; - movdqa `16*($k+0)-128`($bp),%xmm4 - movdqa `16*($k+1)-128`($bp),%xmm5 - movdqa `16*($k+2)-128`($bp),%xmm2 - pand `16*($k+0)+112`(%r10),%xmm4 - movdqa `16*($k+3)-128`($bp),%xmm3 - pand `16*($k+1)+112`(%r10),%xmm5 - por %xmm4,%xmm0 - pand `16*($k+2)+112`(%r10),%xmm2 - por %xmm5,%xmm1 - pand `16*($k+3)+112`(%r10),%xmm3 - por %xmm2,%xmm0 - por %xmm3,%xmm1 -___ -} -$code.=<<___; - por %xmm1,%xmm0 - pshufd \$0x4e,%xmm0,%xmm1 - por %xmm1,%xmm0 - lea $STRIDE($bp),$bp - movq %xmm0,$m0 # m0=bp[0] - - mov ($n0),$n0 # pull n0[0] value - mov ($ap),%rax - - xor $i,$i # i=0 - xor $j,$j # j=0 - - mov $n0,$m1 - mulq $m0 # ap[0]*bp[0] - mov %rax,$lo0 - mov ($np),%rax - - imulq $lo0,$m1 # "tp[0]"*n0 - mov %rdx,$hi0 - - mulq $m1 # np[0]*m1 - add %rax,$lo0 # discarded - mov 8($ap),%rax - adc \$0,%rdx - mov %rdx,$hi1 - - lea 1($j),$j # j++ - jmp .L1st_enter - -.align 16 -.L1st: - add %rax,$hi1 - mov ($ap,$j,8),%rax - adc \$0,%rdx - add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0] - mov $lo0,$hi0 - adc \$0,%rdx - mov $hi1,-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$hi1 - -.L1st_enter: - mulq $m0 # ap[j]*bp[0] - add %rax,$hi0 - mov ($np,$j,8),%rax - adc \$0,%rdx - lea 1($j),$j # j++ - mov %rdx,$lo0 - - mulq $m1 # np[j]*m1 - cmp $num,$j - jne .L1st - - add %rax,$hi1 - mov ($ap),%rax # ap[0] - adc \$0,%rdx - add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $hi1,-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$hi1 - mov $lo0,$hi0 - - xor %rdx,%rdx - add $hi0,$hi1 - adc \$0,%rdx - mov $hi1,-8(%rsp,$num,8) - mov %rdx,(%rsp,$num,8) # store upmost overflow bit - - lea 1($i),$i # i++ - jmp .Louter -.align 16 -.Louter: - lea 24+128(%rsp,$num,8),%rdx # where 256-byte mask is (+size optimization) - and \$-16,%rdx - pxor %xmm4,%xmm4 - pxor %xmm5,%xmm5 -___ -for($k=0;$k<$STRIDE/16;$k+=4) { -$code.=<<___; - movdqa `16*($k+0)-128`($bp),%xmm0 - movdqa `16*($k+1)-128`($bp),%xmm1 - movdqa `16*($k+2)-128`($bp),%xmm2 - movdqa `16*($k+3)-128`($bp),%xmm3 - pand `16*($k+0)-128`(%rdx),%xmm0 - pand `16*($k+1)-128`(%rdx),%xmm1 - por %xmm0,%xmm4 - pand `16*($k+2)-128`(%rdx),%xmm2 - por %xmm1,%xmm5 - pand `16*($k+3)-128`(%rdx),%xmm3 - por %xmm2,%xmm4 - por %xmm3,%xmm5 -___ -} -$code.=<<___; - por %xmm5,%xmm4 - pshufd \$0x4e,%xmm4,%xmm0 - por %xmm4,%xmm0 - lea $STRIDE($bp),$bp - movq %xmm0,$m0 # m0=bp[i] - - xor $j,$j # j=0 - mov $n0,$m1 - mov (%rsp),$lo0 - - mulq $m0 # ap[0]*bp[i] - add %rax,$lo0 # ap[0]*bp[i]+tp[0] - mov ($np),%rax - adc \$0,%rdx - - imulq $lo0,$m1 # tp[0]*n0 - mov %rdx,$hi0 - - mulq $m1 # np[0]*m1 - add %rax,$lo0 # discarded - mov 8($ap),%rax - adc \$0,%rdx - mov 8(%rsp),$lo0 # tp[1] - mov %rdx,$hi1 - - lea 1($j),$j # j++ - jmp .Linner_enter - -.align 16 -.Linner: - add %rax,$hi1 - mov ($ap,$j,8),%rax - adc \$0,%rdx - add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j] - mov (%rsp,$j,8),$lo0 - adc \$0,%rdx - mov $hi1,-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$hi1 - -.Linner_enter: - mulq $m0 # ap[j]*bp[i] - add %rax,$hi0 - mov ($np,$j,8),%rax - adc \$0,%rdx - add $hi0,$lo0 # ap[j]*bp[i]+tp[j] - mov %rdx,$hi0 - adc \$0,$hi0 - lea 1($j),$j # j++ - - mulq $m1 # np[j]*m1 - cmp $num,$j - jne .Linner - - add %rax,$hi1 - mov ($ap),%rax # ap[0] - adc \$0,%rdx - add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j] - mov (%rsp,$j,8),$lo0 - adc \$0,%rdx - mov $hi1,-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$hi1 - - xor %rdx,%rdx - add $hi0,$hi1 - adc \$0,%rdx - add $lo0,$hi1 # pull upmost overflow bit - adc \$0,%rdx - mov $hi1,-8(%rsp,$num,8) - mov %rdx,(%rsp,$num,8) # store upmost overflow bit - - lea 1($i),$i # i++ - cmp $num,$i - jl .Louter - - xor $i,$i # i=0 and clear CF! - mov (%rsp),%rax # tp[0] - lea (%rsp),$ap # borrow ap for tp - mov $num,$j # j=num - jmp .Lsub -.align 16 -.Lsub: sbb ($np,$i,8),%rax - mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i] - mov 8($ap,$i,8),%rax # tp[i+1] - lea 1($i),$i # i++ - dec $j # doesnn't affect CF! - jnz .Lsub - - sbb \$0,%rax # handle upmost overflow bit - xor $i,$i - and %rax,$ap - not %rax - mov $rp,$np - and %rax,$np - mov $num,$j # j=num - or $np,$ap # ap=borrow?tp:rp -.align 16 -.Lcopy: # copy or in-place refresh - mov ($ap,$i,8),%rax - mov $i,(%rsp,$i,8) # zap temporary vector - mov %rax,($rp,$i,8) # rp[i]=tp[i] - lea 1($i),$i - sub \$1,$j - jnz .Lcopy - - mov 8(%rsp,$num,8),%rsi # restore %rsp - mov \$1,%rax - - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lmul_epilogue: - ret -.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 -___ -{{{ -my @A=("%r10","%r11"); -my @N=("%r13","%rdi"); -$code.=<<___; -.type bn_mul4x_mont_gather5,\@function,6 -.align 16 -bn_mul4x_mont_gather5: -.Lmul4x_enter: - mov ${num}d,${num}d - movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument - lea .Linc(%rip),%r10 - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - -.Lmul4x_alloca: - mov %rsp,%rax - lea 4($num),%r11 - neg %r11 - lea -256(%rsp,%r11,8),%rsp # tp=alloca(8*(num+4)+256) - and \$-1024,%rsp # minimize TLB usage - - mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp -.Lmul4x_body: - mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp - lea 128(%rdx),%r12 # reassign $bp (+size optimization) -___ - $bp="%r12"; - $STRIDE=2**5*8; # 5 is "window size" - $N=$STRIDE/4; # should match cache line size -$code.=<<___; - movdqa 0(%r10),%xmm0 # 00000001000000010000000000000000 - movdqa 16(%r10),%xmm1 # 00000002000000020000000200000002 - lea 32-112(%rsp,$num,8),%r10# place the mask after tp[num+4] (+ICache optimization) - - pshufd \$0,%xmm5,%xmm5 # broadcast index - movdqa %xmm1,%xmm4 - .byte 0x67,0x67 - movdqa %xmm1,%xmm2 -___ -######################################################################## -# calculate mask by comparing 0..31 to index and save result to stack -# -$code.=<<___; - paddd %xmm0,%xmm1 - pcmpeqd %xmm5,%xmm0 # compare to 1,0 - .byte 0x67 - movdqa %xmm4,%xmm3 -___ -for($k=0;$k<$STRIDE/16-4;$k+=4) { -$code.=<<___; - paddd %xmm1,%xmm2 - pcmpeqd %xmm5,%xmm1 # compare to 3,2 - movdqa %xmm0,`16*($k+0)+112`(%r10) - movdqa %xmm4,%xmm0 - - paddd %xmm2,%xmm3 - pcmpeqd %xmm5,%xmm2 # compare to 5,4 - movdqa %xmm1,`16*($k+1)+112`(%r10) - movdqa %xmm4,%xmm1 - - paddd %xmm3,%xmm0 - pcmpeqd %xmm5,%xmm3 # compare to 7,6 - movdqa %xmm2,`16*($k+2)+112`(%r10) - movdqa %xmm4,%xmm2 - - paddd %xmm0,%xmm1 - pcmpeqd %xmm5,%xmm0 - movdqa %xmm3,`16*($k+3)+112`(%r10) - movdqa %xmm4,%xmm3 -___ -} -$code.=<<___; # last iteration can be optimized - paddd %xmm1,%xmm2 - pcmpeqd %xmm5,%xmm1 - movdqa %xmm0,`16*($k+0)+112`(%r10) - - paddd %xmm2,%xmm3 - .byte 0x67 - pcmpeqd %xmm5,%xmm2 - movdqa %xmm1,`16*($k+1)+112`(%r10) - - pcmpeqd %xmm5,%xmm3 - movdqa %xmm2,`16*($k+2)+112`(%r10) - pand `16*($k+0)-128`($bp),%xmm0 # while it's still in register - - pand `16*($k+1)-128`($bp),%xmm1 - pand `16*($k+2)-128`($bp),%xmm2 - movdqa %xmm3,`16*($k+3)+112`(%r10) - pand `16*($k+3)-128`($bp),%xmm3 - por %xmm2,%xmm0 - por %xmm3,%xmm1 -___ -for($k=0;$k<$STRIDE/16-4;$k+=4) { -$code.=<<___; - movdqa `16*($k+0)-128`($bp),%xmm4 - movdqa `16*($k+1)-128`($bp),%xmm5 - movdqa `16*($k+2)-128`($bp),%xmm2 - pand `16*($k+0)+112`(%r10),%xmm4 - movdqa `16*($k+3)-128`($bp),%xmm3 - pand `16*($k+1)+112`(%r10),%xmm5 - por %xmm4,%xmm0 - pand `16*($k+2)+112`(%r10),%xmm2 - por %xmm5,%xmm1 - pand `16*($k+3)+112`(%r10),%xmm3 - por %xmm2,%xmm0 - por %xmm3,%xmm1 -___ -} -$code.=<<___; - por %xmm1,%xmm0 - pshufd \$0x4e,%xmm0,%xmm1 - por %xmm1,%xmm0 - lea $STRIDE($bp),$bp - movq %xmm0,$m0 # m0=bp[0] - - mov ($n0),$n0 # pull n0[0] value - mov ($ap),%rax - - xor $i,$i # i=0 - xor $j,$j # j=0 - - mov $n0,$m1 - mulq $m0 # ap[0]*bp[0] - mov %rax,$A[0] - mov ($np),%rax - - imulq $A[0],$m1 # "tp[0]"*n0 - mov %rdx,$A[1] - - mulq $m1 # np[0]*m1 - add %rax,$A[0] # discarded - mov 8($ap),%rax - adc \$0,%rdx - mov %rdx,$N[1] - - mulq $m0 - add %rax,$A[1] - mov 8($np),%rax - adc \$0,%rdx - mov %rdx,$A[0] - - mulq $m1 - add %rax,$N[1] - mov 16($ap),%rax - adc \$0,%rdx - add $A[1],$N[1] - lea 4($j),$j # j++ - adc \$0,%rdx - mov $N[1],(%rsp) - mov %rdx,$N[0] - jmp .L1st4x -.align 16 -.L1st4x: - mulq $m0 # ap[j]*bp[0] - add %rax,$A[0] - mov -16($np,$j,8),%rax - adc \$0,%rdx - mov %rdx,$A[1] - - mulq $m1 # np[j]*m1 - add %rax,$N[0] - mov -8($ap,$j,8),%rax - adc \$0,%rdx - add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $N[0],-24(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[0] - add %rax,$A[1] - mov -8($np,$j,8),%rax - adc \$0,%rdx - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov ($ap,$j,8),%rax - adc \$0,%rdx - add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $N[1],-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[0] - - mulq $m0 # ap[j]*bp[0] - add %rax,$A[0] - mov ($np,$j,8),%rax - adc \$0,%rdx - mov %rdx,$A[1] - - mulq $m1 # np[j]*m1 - add %rax,$N[0] - mov 8($ap,$j,8),%rax - adc \$0,%rdx - add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $N[0],-8(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[0] - add %rax,$A[1] - mov 8($np,$j,8),%rax - adc \$0,%rdx - lea 4($j),$j # j++ - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov -16($ap,$j,8),%rax - adc \$0,%rdx - add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $N[1],-32(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[0] - cmp $num,$j - jl .L1st4x - - mulq $m0 # ap[j]*bp[0] - add %rax,$A[0] - mov -16($np,$j,8),%rax - adc \$0,%rdx - mov %rdx,$A[1] - - mulq $m1 # np[j]*m1 - add %rax,$N[0] - mov -8($ap,$j,8),%rax - adc \$0,%rdx - add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $N[0],-24(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[0] - add %rax,$A[1] - mov -8($np,$j,8),%rax - adc \$0,%rdx - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov ($ap),%rax # ap[0] - adc \$0,%rdx - add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] - adc \$0,%rdx - mov $N[1],-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[0] - - xor $N[1],$N[1] - add $A[0],$N[0] - adc \$0,$N[1] - mov $N[0],-8(%rsp,$j,8) - mov $N[1],(%rsp,$j,8) # store upmost overflow bit - - lea 1($i),$i # i++ -.align 4 -.Louter4x: - lea 32+128(%rsp,$num,8),%rdx # where 256-byte mask is (+size optimization) - pxor %xmm4,%xmm4 - pxor %xmm5,%xmm5 -___ -for($k=0;$k<$STRIDE/16;$k+=4) { -$code.=<<___; - movdqa `16*($k+0)-128`($bp),%xmm0 - movdqa `16*($k+1)-128`($bp),%xmm1 - movdqa `16*($k+2)-128`($bp),%xmm2 - movdqa `16*($k+3)-128`($bp),%xmm3 - pand `16*($k+0)-128`(%rdx),%xmm0 - pand `16*($k+1)-128`(%rdx),%xmm1 - por %xmm0,%xmm4 - pand `16*($k+2)-128`(%rdx),%xmm2 - por %xmm1,%xmm5 - pand `16*($k+3)-128`(%rdx),%xmm3 - por %xmm2,%xmm4 - por %xmm3,%xmm5 -___ -} -$code.=<<___; - por %xmm5,%xmm4 - pshufd \$0x4e,%xmm4,%xmm0 - por %xmm4,%xmm0 - lea $STRIDE($bp),$bp - movq %xmm0,$m0 # m0=bp[i] - - xor $j,$j # j=0 - - mov (%rsp),$A[0] - mov $n0,$m1 - mulq $m0 # ap[0]*bp[i] - add %rax,$A[0] # ap[0]*bp[i]+tp[0] - mov ($np),%rax - adc \$0,%rdx - - imulq $A[0],$m1 # tp[0]*n0 - mov %rdx,$A[1] - - mulq $m1 # np[0]*m1 - add %rax,$A[0] # "$N[0]", discarded - mov 8($ap),%rax - adc \$0,%rdx - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[i] - add %rax,$A[1] - mov 8($np),%rax - adc \$0,%rdx - add 8(%rsp),$A[1] # +tp[1] - adc \$0,%rdx - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov 16($ap),%rax - adc \$0,%rdx - add $A[1],$N[1] # np[j]*m1+ap[j]*bp[i]+tp[j] - lea 4($j),$j # j+=2 - adc \$0,%rdx - mov %rdx,$N[0] - jmp .Linner4x -.align 16 -.Linner4x: - mulq $m0 # ap[j]*bp[i] - add %rax,$A[0] - mov -16($np,$j,8),%rax - adc \$0,%rdx - add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] - adc \$0,%rdx - mov %rdx,$A[1] - - mulq $m1 # np[j]*m1 - add %rax,$N[0] - mov -8($ap,$j,8),%rax - adc \$0,%rdx - add $A[0],$N[0] - adc \$0,%rdx - mov $N[1],-32(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[i] - add %rax,$A[1] - mov -8($np,$j,8),%rax - adc \$0,%rdx - add -8(%rsp,$j,8),$A[1] - adc \$0,%rdx - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov ($ap,$j,8),%rax - adc \$0,%rdx - add $A[1],$N[1] - adc \$0,%rdx - mov $N[0],-24(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[0] - - mulq $m0 # ap[j]*bp[i] - add %rax,$A[0] - mov ($np,$j,8),%rax - adc \$0,%rdx - add (%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] - adc \$0,%rdx - mov %rdx,$A[1] - - mulq $m1 # np[j]*m1 - add %rax,$N[0] - mov 8($ap,$j,8),%rax - adc \$0,%rdx - add $A[0],$N[0] - adc \$0,%rdx - mov $N[1],-16(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[i] - add %rax,$A[1] - mov 8($np,$j,8),%rax - adc \$0,%rdx - add 8(%rsp,$j,8),$A[1] - adc \$0,%rdx - lea 4($j),$j # j++ - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov -16($ap,$j,8),%rax - adc \$0,%rdx - add $A[1],$N[1] - adc \$0,%rdx - mov $N[0],-40(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[0] - cmp $num,$j - jl .Linner4x - - mulq $m0 # ap[j]*bp[i] - add %rax,$A[0] - mov -16($np,$j,8),%rax - adc \$0,%rdx - add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] - adc \$0,%rdx - mov %rdx,$A[1] - - mulq $m1 # np[j]*m1 - add %rax,$N[0] - mov -8($ap,$j,8),%rax - adc \$0,%rdx - add $A[0],$N[0] - adc \$0,%rdx - mov $N[1],-32(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[1] - - mulq $m0 # ap[j]*bp[i] - add %rax,$A[1] - mov -8($np,$j,8),%rax - adc \$0,%rdx - add -8(%rsp,$j,8),$A[1] - adc \$0,%rdx - lea 1($i),$i # i++ - mov %rdx,$A[0] - - mulq $m1 # np[j]*m1 - add %rax,$N[1] - mov ($ap),%rax # ap[0] - adc \$0,%rdx - add $A[1],$N[1] - adc \$0,%rdx - mov $N[0],-24(%rsp,$j,8) # tp[j-1] - mov %rdx,$N[0] - - mov $N[1],-16(%rsp,$j,8) # tp[j-1] - - xor $N[1],$N[1] - add $A[0],$N[0] - adc \$0,$N[1] - add (%rsp,$num,8),$N[0] # pull upmost overflow bit - adc \$0,$N[1] - mov $N[0],-8(%rsp,$j,8) - mov $N[1],(%rsp,$j,8) # store upmost overflow bit - - cmp $num,$i - jl .Louter4x -___ -{ -my @ri=("%rax","%rdx",$m0,$m1); -$code.=<<___; - mov 16(%rsp,$num,8),$rp # restore $rp - mov 0(%rsp),@ri[0] # tp[0] - pxor %xmm0,%xmm0 - mov 8(%rsp),@ri[1] # tp[1] - shr \$2,$num # num/=4 - lea (%rsp),$ap # borrow ap for tp - xor $i,$i # i=0 and clear CF! - - sub 0($np),@ri[0] - mov 16($ap),@ri[2] # tp[2] - mov 24($ap),@ri[3] # tp[3] - sbb 8($np),@ri[1] - lea -1($num),$j # j=num/4-1 - jmp .Lsub4x -.align 16 -.Lsub4x: - mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] - mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i] - sbb 16($np,$i,8),@ri[2] - mov 32($ap,$i,8),@ri[0] # tp[i+1] - mov 40($ap,$i,8),@ri[1] - sbb 24($np,$i,8),@ri[3] - mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i] - mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i] - sbb 32($np,$i,8),@ri[0] - mov 48($ap,$i,8),@ri[2] - mov 56($ap,$i,8),@ri[3] - sbb 40($np,$i,8),@ri[1] - lea 4($i),$i # i++ - dec $j # doesnn't affect CF! - jnz .Lsub4x - - mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] - mov 32($ap,$i,8),@ri[0] # load overflow bit - sbb 16($np,$i,8),@ri[2] - mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i] - sbb 24($np,$i,8),@ri[3] - mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i] - - sbb \$0,@ri[0] # handle upmost overflow bit - mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i] - xor $i,$i # i=0 - and @ri[0],$ap - not @ri[0] - mov $rp,$np - and @ri[0],$np - lea -1($num),$j - or $np,$ap # ap=borrow?tp:rp - - movdqu ($ap),%xmm1 - movdqa %xmm0,(%rsp) - movdqu %xmm1,($rp) - jmp .Lcopy4x -.align 16 -.Lcopy4x: # copy or in-place refresh - movdqu 16($ap,$i),%xmm2 - movdqu 32($ap,$i),%xmm1 - movdqa %xmm0,16(%rsp,$i) - movdqu %xmm2,16($rp,$i) - movdqa %xmm0,32(%rsp,$i) - movdqu %xmm1,32($rp,$i) - lea 32($i),$i - dec $j - jnz .Lcopy4x - - shl \$2,$num - movdqu 16($ap,$i),%xmm2 - movdqa %xmm0,16(%rsp,$i) - movdqu %xmm2,16($rp,$i) -___ -} -$code.=<<___; - mov 8(%rsp,$num,8),%rsi # restore %rsp - mov \$1,%rax - - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lmul4x_epilogue: - ret -.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 -___ -}}} - -{ -my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%rdx","%r8", "%r9d") : # Win64 order - ("%rdi","%rsi","%rdx","%ecx"); # Unix order -my $out=$inp; -my $STRIDE=2**5*8; -my $N=$STRIDE/4; - -$code.=<<___; -.globl bn_scatter5 -.type bn_scatter5,\@abi-omnipotent -.align 16 -bn_scatter5: - cmp \$0, $num - jz .Lscatter_epilogue - lea ($tbl,$idx,8),$tbl -.Lscatter: - mov ($inp),%rax - lea 8($inp),$inp - mov %rax,($tbl) - lea 32*8($tbl),$tbl - sub \$1,$num - jnz .Lscatter -.Lscatter_epilogue: - ret -.size bn_scatter5,.-bn_scatter5 - -.globl bn_gather5 -.type bn_gather5,\@abi-omnipotent -.align 16 -bn_gather5: -.LSEH_begin_bn_gather5: # Win64 thing, but harmless in other cases - # I can't trust assembler to use specific encoding:-( - .byte 0x4c,0x8d,0x14,0x24 # lea (%rsp),%r10 - .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 # sub $0x108,%rsp - lea .Linc(%rip),%rax - and \$-16,%rsp # shouldn't be formally required - - movd $idx,%xmm5 - movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000 - movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002 - lea 128($tbl),%r11 # size optimization - lea 128(%rsp),%rax # size optimization - - pshufd \$0,%xmm5,%xmm5 # broadcast $idx - movdqa %xmm1,%xmm4 - movdqa %xmm1,%xmm2 -___ -######################################################################## -# calculate mask by comparing 0..31 to $idx and save result to stack -# -for($i=0;$i<$STRIDE/16;$i+=4) { -$code.=<<___; - paddd %xmm0,%xmm1 - pcmpeqd %xmm5,%xmm0 # compare to 1,0 -___ -$code.=<<___ if ($i); - movdqa %xmm3,`16*($i-1)-128`(%rax) -___ -$code.=<<___; - movdqa %xmm4,%xmm3 - - paddd %xmm1,%xmm2 - pcmpeqd %xmm5,%xmm1 # compare to 3,2 - movdqa %xmm0,`16*($i+0)-128`(%rax) - movdqa %xmm4,%xmm0 - - paddd %xmm2,%xmm3 - pcmpeqd %xmm5,%xmm2 # compare to 5,4 - movdqa %xmm1,`16*($i+1)-128`(%rax) - movdqa %xmm4,%xmm1 - - paddd %xmm3,%xmm0 - pcmpeqd %xmm5,%xmm3 # compare to 7,6 - movdqa %xmm2,`16*($i+2)-128`(%rax) - movdqa %xmm4,%xmm2 -___ -} -$code.=<<___; - movdqa %xmm3,`16*($i-1)-128`(%rax) - jmp .Lgather - -.align 32 -.Lgather: - pxor %xmm4,%xmm4 - pxor %xmm5,%xmm5 -___ -for($i=0;$i<$STRIDE/16;$i+=4) { -$code.=<<___; - movdqa `16*($i+0)-128`(%r11),%xmm0 - movdqa `16*($i+1)-128`(%r11),%xmm1 - movdqa `16*($i+2)-128`(%r11),%xmm2 - pand `16*($i+0)-128`(%rax),%xmm0 - movdqa `16*($i+3)-128`(%r11),%xmm3 - pand `16*($i+1)-128`(%rax),%xmm1 - por %xmm0,%xmm4 - pand `16*($i+2)-128`(%rax),%xmm2 - por %xmm1,%xmm5 - pand `16*($i+3)-128`(%rax),%xmm3 - por %xmm2,%xmm4 - por %xmm3,%xmm5 -___ -} -$code.=<<___; - por %xmm5,%xmm4 - lea $STRIDE(%r11),%r11 - pshufd \$0x4e,%xmm4,%xmm0 - por %xmm4,%xmm0 - movq %xmm0,($out) # m0=bp[0] - lea 8($out),$out - sub \$1,$num - jnz .Lgather - - lea (%r10),%rsp - ret -.LSEH_end_bn_gather5: -.size bn_gather5,.-bn_gather5 -___ -} -$code.=<<___; -.align 64 -.Linc: - .long 0,0, 1,1 - .long 2,2, 2,2 -.asciz "Montgomery Multiplication with scatter/gather for x86_64, CRYPTOGAMS by " -___ - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type mul_handler,\@abi-omnipotent -.align 16 -mul_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - mov 8($disp),%rsi # disp->ImageBase - mov 56($disp),%r11 # disp->HandlerData - - mov 0(%r11),%r10d # HandlerData[0] - lea (%rsi,%r10),%r10 # end of prologue label - cmp %r10,%rbx # context->RipRipRsp - - mov 8(%r11),%r10d # HandlerData[2] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lcommon_seh_tail - - mov 192($context),%r10 # pull $num - mov 8(%rax,%r10,8),%rax # pull saved stack pointer - - lea 48(%rax),%rax - - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov -32(%rax),%r13 - mov -40(%rax),%r14 - mov -48(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lcommon_seh_tail: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size mul_handler,.-mul_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_bn_mul_mont_gather5 - .rva .LSEH_end_bn_mul_mont_gather5 - .rva .LSEH_info_bn_mul_mont_gather5 - - .rva .LSEH_begin_bn_mul4x_mont_gather5 - .rva .LSEH_end_bn_mul4x_mont_gather5 - .rva .LSEH_info_bn_mul4x_mont_gather5 - - .rva .LSEH_begin_bn_gather5 - .rva .LSEH_end_bn_gather5 - .rva .LSEH_info_bn_gather5 - -.section .xdata -.align 8 -.LSEH_info_bn_mul_mont_gather5: - .byte 9,0,0,0 - .rva mul_handler - .rva .Lmul_alloca,.Lmul_body,.Lmul_epilogue # HandlerData[] -.align 8 -.LSEH_info_bn_mul4x_mont_gather5: - .byte 9,0,0,0 - .rva mul_handler - .rva .Lmul4x_alloca,.Lmul4x_body,.Lmul4x_epilogue # HandlerData[] -.align 8 -.LSEH_info_bn_gather5: - .byte 0x01,0x0b,0x03,0x0a - .byte 0x0b,0x01,0x21,0x00 # sub rsp,0x108 - .byte 0x04,0xa3,0x00,0x00 # lea r10,(rsp), set_frame r10 -.align 8 -___ -} - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; - -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/bn/bn_asm.c b/drivers/builtin_openssl2/crypto/bn/bn_asm.c index 114acf3dc2..03a33cffe5 100644 --- a/drivers/builtin_openssl2/crypto/bn/bn_asm.c +++ b/drivers/builtin_openssl2/crypto/bn/bn_asm.c @@ -489,121 +489,144 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, * c=(c2,c1,c0) */ +# ifdef BN_LLONG /* - * Keep in mind that carrying into high part of multiplication result - * can not overflow, because it cannot be all-ones. + * Keep in mind that additions to multiplication result can not + * overflow, because its high half cannot be all-ones. */ -# ifdef BN_LLONG -# define mul_add_c(a,b,c0,c1,c2) \ - t=(BN_ULLONG)a*b; \ - t1=(BN_ULONG)Lw(t); \ - t2=(BN_ULONG)Hw(t); \ - c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ - c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; - -# define mul_add_c2(a,b,c0,c1,c2) \ - t=(BN_ULLONG)a*b; \ - tt=(t+t)&BN_MASK; \ - if (tt < t) c2++; \ - t1=(BN_ULONG)Lw(tt); \ - t2=(BN_ULONG)Hw(tt); \ - c0=(c0+t1)&BN_MASK2; \ - if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ - c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; - -# define sqr_add_c(a,i,c0,c1,c2) \ - t=(BN_ULLONG)a[i]*a[i]; \ - t1=(BN_ULONG)Lw(t); \ - t2=(BN_ULONG)Hw(t); \ - c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ - c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; +# define mul_add_c(a,b,c0,c1,c2) do { \ + BN_ULONG hi; \ + BN_ULLONG t = (BN_ULLONG)(a)*(b); \ + t += c0; /* no carry */ \ + c0 = (BN_ULONG)Lw(t); \ + hi = (BN_ULONG)Hw(t); \ + c1 = (c1+hi)&BN_MASK2; if (c1 +#endif + +#include "rsaz_exp.h" + +#undef SPARC_T4_MONT +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) +# include "sparc_arch.h" +extern unsigned int OPENSSL_sparcv9cap_P[]; +# define SPARC_T4_MONT #endif /* maximum precomputation table size for *variable* sliding windows */ @@ -476,6 +487,23 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, wstart = bits - 1; /* The top bit of the window */ wend = 0; /* The bottom bit of the window */ +#if 1 /* by Shay Gueron's suggestion */ + j = m->top; /* borrow j */ + if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) { + if (bn_wexpand(r, j) == NULL) + goto err; + /* 2^(top*BN_BITS2) - m */ + r->d[0] = (0 - m->d[0]) & BN_MASK2; + for (i = 1; i < j; i++) + r->d[i] = (~m->d[i]) & BN_MASK2; + r->top = j; + /* + * Upper words will be zero if the corresponding words of 'm' were + * 0xfff[...], so decrement r->top accordingly. + */ + bn_correct_top(r); + } else +#endif if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) goto err; for (;;) { @@ -527,6 +555,17 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, if (wstart < 0) break; } +#if defined(SPARC_T4_MONT) + if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) { + j = mont->N.top; /* borrow j */ + val[0]->d[0] = 1; /* borrow val[0] */ + for (i = 1; i < j; i++) + val[0]->d[i] = 0; + val[0]->top = j; + if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx)) + goto err; + } else +#endif if (!BN_from_montgomery(rr, r, mont, ctx)) goto err; ret = 1; @@ -538,6 +577,27 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, return (ret); } +#if defined(SPARC_T4_MONT) +static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) +{ + BN_ULONG ret = 0; + int wordpos; + + wordpos = bitpos / BN_BITS2; + bitpos %= BN_BITS2; + if (wordpos >= 0 && wordpos < a->top) { + ret = a->d[wordpos] & BN_MASK2; + if (bitpos) { + ret >>= bitpos; + if (++wordpos < a->top) + ret |= a->d[wordpos] << (BN_BITS2 - bitpos); + } + } + + return ret & BN_MASK2; +} +#endif + /* * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific * layout so that accessing any of these table values shows the same access @@ -644,6 +704,9 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, int powerbufLen = 0; unsigned char *powerbuf = NULL; BIGNUM tmp, am; +#if defined(SPARC_T4_MONT) + unsigned int t4 = 0; +#endif bn_check_top(a); bn_check_top(p); @@ -683,21 +746,62 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, goto err; } +#ifdef RSAZ_ENABLED + /* + * If the size of the operands allow it, perform the optimized + * RSAZ exponentiation. For further information see + * crypto/bn/rsaz_exp.c and accompanying assembly modules. + */ + if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) + && rsaz_avx2_eligible()) { + if (NULL == bn_wexpand(rr, 16)) + goto err; + RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, + mont->n0[0]); + rr->top = 16; + rr->neg = 0; + bn_correct_top(rr); + ret = 1; + goto err; + } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) { + if (NULL == bn_wexpand(rr, 8)) + goto err; + RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d); + rr->top = 8; + rr->neg = 0; + bn_correct_top(rr); + ret = 1; + goto err; + } +#endif + /* Get the window size to use with size of p. */ window = BN_window_bits_for_ctime_exponent_size(bits); +#if defined(SPARC_T4_MONT) + if (window >= 5 && (top & 15) == 0 && top <= 64 && + (OPENSSL_sparcv9cap_P[1] & (CFR_MONTMUL | CFR_MONTSQR)) == + (CFR_MONTMUL | CFR_MONTSQR) && (t4 = OPENSSL_sparcv9cap_P[0])) + window = 5; + else +#endif #if defined(OPENSSL_BN_ASM_MONT5) - if (window == 6 && bits <= 1024) - window = 5; /* ~5% improvement of 2048-bit RSA sign */ + if (window >= 5) { + window = 5; /* ~5% improvement for RSA2048 sign, and even + * for RSA4096 */ + /* reserve space for mont->N.d[] copy */ + powerbufLen += top * sizeof(mont->N.d[0]); + } #endif + (void)0; /* * Allocate a buffer large enough to hold all of the pre-computed powers * of am, am itself and tmp. */ numPowers = 1 << window; - powerbufLen = sizeof(m->d[0]) * (top * numPowers + - ((2 * top) > - numPowers ? (2 * top) : numPowers)); + powerbufLen += sizeof(m->d[0]) * (top * numPowers + + ((2 * top) > + numPowers ? (2 * top) : numPowers)); #ifdef alloca if (powerbufLen < 3072) powerbufFree = @@ -727,15 +831,17 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, tmp.flags = am.flags = BN_FLG_STATIC_DATA; /* prepare a^0 in Montgomery domain */ -#if 1 +#if 1 /* by Shay Gueron's suggestion */ + if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) { + /* 2^(top*BN_BITS2) - m */ + tmp.d[0] = (0 - m->d[0]) & BN_MASK2; + for (i = 1; i < top; i++) + tmp.d[i] = (~m->d[i]) & BN_MASK2; + tmp.top = top; + } else +#endif if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) goto err; -#else - tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ - for (i = 1; i < top; i++) - tmp.d[i] = (~m->d[i]) & BN_MASK2; - tmp.top = top; -#endif /* prepare a^1 in Montgomery domain */ if (a->neg || BN_ucmp(a, m) >= 0) { @@ -746,6 +852,138 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } else if (!BN_to_montgomery(&am, a, mont, ctx)) goto err; +#if defined(SPARC_T4_MONT) + if (t4) { + typedef int (*bn_pwr5_mont_f) (BN_ULONG *tp, const BN_ULONG *np, + const BN_ULONG *n0, const void *table, + int power, int bits); + int bn_pwr5_mont_t4_8(BN_ULONG *tp, const BN_ULONG *np, + const BN_ULONG *n0, const void *table, + int power, int bits); + int bn_pwr5_mont_t4_16(BN_ULONG *tp, const BN_ULONG *np, + const BN_ULONG *n0, const void *table, + int power, int bits); + int bn_pwr5_mont_t4_24(BN_ULONG *tp, const BN_ULONG *np, + const BN_ULONG *n0, const void *table, + int power, int bits); + int bn_pwr5_mont_t4_32(BN_ULONG *tp, const BN_ULONG *np, + const BN_ULONG *n0, const void *table, + int power, int bits); + static const bn_pwr5_mont_f pwr5_funcs[4] = { + bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16, + bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32 + }; + bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top / 16 - 1]; + + typedef int (*bn_mul_mont_f) (BN_ULONG *rp, const BN_ULONG *ap, + const void *bp, const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const void *bp, + const BN_ULONG *np, const BN_ULONG *n0); + int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap, + const void *bp, const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap, + const void *bp, const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap, + const void *bp, const BN_ULONG *np, + const BN_ULONG *n0); + static const bn_mul_mont_f mul_funcs[4] = { + bn_mul_mont_t4_8, bn_mul_mont_t4_16, + bn_mul_mont_t4_24, bn_mul_mont_t4_32 + }; + bn_mul_mont_f mul_worker = mul_funcs[top / 16 - 1]; + + void bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, + const void *bp, const BN_ULONG *np, + const BN_ULONG *n0, int num); + void bn_mul_mont_t4(BN_ULONG *rp, const BN_ULONG *ap, + const void *bp, const BN_ULONG *np, + const BN_ULONG *n0, int num); + void bn_mul_mont_gather5_t4(BN_ULONG *rp, const BN_ULONG *ap, + const void *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power); + void bn_flip_n_scatter5_t4(const BN_ULONG *inp, size_t num, + void *table, size_t power); + void bn_gather5_t4(BN_ULONG *out, size_t num, + void *table, size_t power); + void bn_flip_t4(BN_ULONG *dst, BN_ULONG *src, size_t num); + + BN_ULONG *np = mont->N.d, *n0 = mont->n0; + int stride = 5 * (6 - (top / 16 - 1)); /* multiple of 5, but less + * than 32 */ + + /* + * BN_to_montgomery can contaminate words above .top [in + * BN_DEBUG[_DEBUG] build]... + */ + for (i = am.top; i < top; i++) + am.d[i] = 0; + for (i = tmp.top; i < top; i++) + tmp.d[i] = 0; + + bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 0); + bn_flip_n_scatter5_t4(am.d, top, powerbuf, 1); + if (!(*mul_worker) (tmp.d, am.d, am.d, np, n0) && + !(*mul_worker) (tmp.d, am.d, am.d, np, n0)) + bn_mul_mont_vis3(tmp.d, am.d, am.d, np, n0, top); + bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 2); + + for (i = 3; i < 32; i++) { + /* Calculate a^i = a^(i-1) * a */ + if (!(*mul_worker) (tmp.d, tmp.d, am.d, np, n0) && + !(*mul_worker) (tmp.d, tmp.d, am.d, np, n0)) + bn_mul_mont_vis3(tmp.d, tmp.d, am.d, np, n0, top); + bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, i); + } + + /* switch to 64-bit domain */ + np = alloca(top * sizeof(BN_ULONG)); + top /= 2; + bn_flip_t4(np, mont->N.d, top); + + bits--; + for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) + wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + bn_gather5_t4(tmp.d, top, powerbuf, wvalue); + + /* + * Scan the exponent one window at a time starting from the most + * significant bits. + */ + while (bits >= 0) { + if (bits < stride) + stride = bits + 1; + bits -= stride; + wvalue = bn_get_bits(p, bits + 1); + + if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride)) + continue; + /* retry once and fall back */ + if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride)) + continue; + + bits += stride - 5; + wvalue >>= stride - 5; + wvalue &= 31; + bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont_gather5_t4(tmp.d, tmp.d, powerbuf, np, n0, top, + wvalue); + } + + bn_flip_t4(tmp.d, tmp.d, top); + top *= 2; + /* back to 32-bit domain */ + tmp.top = top; + bn_correct_top(&tmp); + OPENSSL_cleanse(np, top * sizeof(BN_ULONG)); + } else +#endif #if defined(OPENSSL_BN_ASM_MONT5) if (window == 5 && top > 1) { /* @@ -764,8 +1002,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, void bn_scatter5(const BN_ULONG *inp, size_t num, void *table, size_t power); void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power); + void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, + const void *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power); + int bn_get_bits5(const BN_ULONG *ap, int off); + int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *not_used, const BN_ULONG *np, + const BN_ULONG *n0, int num); - BN_ULONG *np = mont->N.d, *n0 = mont->n0; + BN_ULONG *n0 = mont->n0, *np; /* * BN_to_montgomery can contaminate words above .top [in @@ -776,6 +1021,12 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, for (i = tmp.top; i < top; i++) tmp.d[i] = 0; + /* + * copy mont->N.d[] to improve cache locality + */ + for (np = am.d + top, i = 0; i < top; i++) + np[i] = mont->N.d[i]; + bn_scatter5(tmp.d, top, powerbuf, 0); bn_scatter5(am.d, am.top, powerbuf, 1); bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); @@ -822,20 +1073,34 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, * Scan the exponent one window at a time starting from the most * significant bits. */ - while (bits >= 0) { - for (wvalue = 0, i = 0; i < 5; i++, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + if (top & 7) + while (bits >= 0) { + for (wvalue = 0, i = 0; i < 5; i++, bits--) + wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); - bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); - bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); - bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); - bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); - bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); - bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); + bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, + wvalue); + } else { + while (bits >= 0) { + wvalue = bn_get_bits5(p->d, bits - 4); + bits -= 5; + bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); + } } + ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top); tmp.top = top; bn_correct_top(&tmp); + if (ret) { + if (!BN_copy(rr, &tmp)) + ret = 0; + goto err; /* non-zero ret means it's not error */ + } } else #endif { @@ -901,6 +1166,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } /* Convert the final result from montgomery to standard format */ +#if defined(SPARC_T4_MONT) + if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) { + am.d[0] = 1; /* borrow am */ + for (i = 1; i < top; i++) + am.d[i] = 0; + if (!BN_mod_mul_montgomery(rr, &tmp, &am, mont, ctx)) + goto err; + } else +#endif if (!BN_from_montgomery(rr, &tmp, mont, ctx)) goto err; ret = 1; diff --git a/drivers/builtin_openssl2/crypto/bn/bn_gf2m.c b/drivers/builtin_openssl2/crypto/bn/bn_gf2m.c index 8ad44b4826..2c61da1109 100644 --- a/drivers/builtin_openssl2/crypto/bn/bn_gf2m.c +++ b/drivers/builtin_openssl2/crypto/bn/bn_gf2m.c @@ -450,8 +450,7 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]) d0 = p[k] % BN_BITS2; d1 = BN_BITS2 - d0; z[n] ^= (zz << d0); - tmp_ulong = zz >> d1; - if (d0 && tmp_ulong) + if (d0 && (tmp_ulong = zz >> d1)) z[n + 1] ^= tmp_ulong; } diff --git a/drivers/builtin_openssl2/crypto/bn/bn_lcl.h b/drivers/builtin_openssl2/crypto/bn/bn_lcl.h index 904a723497..00f4f09945 100644 --- a/drivers/builtin_openssl2/crypto/bn/bn_lcl.h +++ b/drivers/builtin_openssl2/crypto/bn/bn_lcl.h @@ -204,6 +204,24 @@ extern "C" { # define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32)/* 32 */ # define BN_MONT_CTX_SET_SIZE_WORD (64)/* 32 */ +/* + * 2011-02-22 SMS. In various places, a size_t variable or a type cast to + * size_t was used to perform integer-only operations on pointers. This + * failed on VMS with 64-bit pointers (CC /POINTER_SIZE = 64) because size_t + * is still only 32 bits. What's needed in these cases is an integer type + * with the same size as a pointer, which size_t is not certain to be. The + * only fix here is VMS-specific. + */ +# if defined(OPENSSL_SYS_VMS) +# if __INITIAL_POINTER_SIZE == 64 +# define PTR_SIZE_INT long long +# else /* __INITIAL_POINTER_SIZE == 64 */ +# define PTR_SIZE_INT int +# endif /* __INITIAL_POINTER_SIZE == 64 [else] */ +# elif !defined(PTR_SIZE_INT) /* defined(OPENSSL_SYS_VMS) */ +# define PTR_SIZE_INT size_t +# endif /* defined(OPENSSL_SYS_VMS) [else] */ + # if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC) /* * BN_UMULT_HIGH section. @@ -295,6 +313,15 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b, : "r"(a), "r"(b)); # endif # endif +# elif defined(__aarch64__) && defined(SIXTY_FOUR_BIT_LONG) +# if defined(__GNUC__) && __GNUC__>=2 +# define BN_UMULT_HIGH(a,b) ({ \ + register BN_ULONG ret; \ + asm ("umulh %0,%1,%2" \ + : "=r"(ret) \ + : "r"(a), "r"(b)); \ + ret; }) +# endif # endif /* cpu */ # endif /* OPENSSL_NO_ASM */ diff --git a/drivers/builtin_openssl2/crypto/bn/bn_prime.pl b/drivers/builtin_openssl2/crypto/bn/bn_prime.pl deleted file mode 100644 index 3fafb6f3e9..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/bn_prime.pl +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/local/bin/perl -# bn_prime.pl - -$num=2048; -$num=$ARGV[0] if ($#ARGV >= 0); - -push(@primes,2); -$p=1; -loop: while ($#primes < $num-1) - { - $p+=2; - $s=int(sqrt($p)); - - for ($i=0; defined($primes[$i]) && $primes[$i]<=$s; $i++) - { - next loop if (($p%$primes[$i]) == 0); - } - push(@primes,$p); - } - -# print <<"EOF"; -# /* Auto generated by bn_prime.pl */ -# /* Copyright (C) 1995-1997 Eric Young (eay\@mincom.oz.au). -# * All rights reserved. -# * Copyright remains Eric Young's, and as such any Copyright notices in -# * the code are not to be removed. -# * See the COPYRIGHT file in the SSLeay distribution for more details. -# */ -# -# EOF - -print <<\EOF; -/* Auto generated by bn_prime.pl */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -EOF - -for ($i=0; $i <= $#primes; $i++) - { - if ($primes[$i] > 256) - { - $eight=$i; - last; - } - } - -printf "#ifndef EIGHT_BIT\n"; -printf "#define NUMPRIMES %d\n",$num; -printf "typedef unsigned short prime_t;\n"; -printf "#else\n"; -printf "#define NUMPRIMES %d\n",$eight; -printf "typedef unsigned char prime_t;\n"; -printf "#endif\n"; -print "static const prime_t primes[NUMPRIMES]=\n\t{\n\t"; -$init=0; -for ($i=0; $i <= $#primes; $i++) - { - printf "\n#ifndef EIGHT_BIT\n\t" if ($primes[$i] > 256) && !($init++); - printf("\n\t") if (($i%8) == 0) && ($i != 0); - printf("%4d,",$primes[$i]); - } -print "\n#endif\n\t};\n"; - - diff --git a/drivers/builtin_openssl2/crypto/bn/bntest.c b/drivers/builtin_openssl2/crypto/bn/bntest.c deleted file mode 100644 index 6d55049e3d..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/bntest.c +++ /dev/null @@ -1,2137 +0,0 @@ -/* crypto/bn/bntest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ -/* ==================================================================== - * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. - * - * Portions of the attached software ("Contribution") are developed by - * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project. - * - * The Contribution is licensed pursuant to the Eric Young open source - * license provided above. - * - * The binary polynomial arithmetic software is originally written by - * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems Laboratories. - * - */ - -/* - * Until the key-gen callbacks are modified to use newer prototypes, we allow - * deprecated functions for openssl-internal code - */ -#ifdef OPENSSL_NO_DEPRECATED -# undef OPENSSL_NO_DEPRECATED -#endif - -#include -#include -#include - -#include "e_os.h" - -#include -#include -#include -#include -#include - -const int num0 = 100; /* number of tests */ -const int num1 = 50; /* additional tests for some functions */ -const int num2 = 5; /* number of tests for slow functions */ - -int test_add(BIO *bp); -int test_sub(BIO *bp); -int test_lshift1(BIO *bp); -int test_lshift(BIO *bp, BN_CTX *ctx, BIGNUM *a_); -int test_rshift1(BIO *bp); -int test_rshift(BIO *bp, BN_CTX *ctx); -int test_div(BIO *bp, BN_CTX *ctx); -int test_div_word(BIO *bp); -int test_div_recp(BIO *bp, BN_CTX *ctx); -int test_mul(BIO *bp); -int test_sqr(BIO *bp, BN_CTX *ctx); -int test_mont(BIO *bp, BN_CTX *ctx); -int test_mod(BIO *bp, BN_CTX *ctx); -int test_mod_mul(BIO *bp, BN_CTX *ctx); -int test_mod_exp(BIO *bp, BN_CTX *ctx); -int test_mod_exp_mont_consttime(BIO *bp, BN_CTX *ctx); -int test_mod_exp_mont5(BIO *bp, BN_CTX *ctx); -int test_exp(BIO *bp, BN_CTX *ctx); -int test_gf2m_add(BIO *bp); -int test_gf2m_mod(BIO *bp); -int test_gf2m_mod_mul(BIO *bp, BN_CTX *ctx); -int test_gf2m_mod_sqr(BIO *bp, BN_CTX *ctx); -int test_gf2m_mod_inv(BIO *bp, BN_CTX *ctx); -int test_gf2m_mod_div(BIO *bp, BN_CTX *ctx); -int test_gf2m_mod_exp(BIO *bp, BN_CTX *ctx); -int test_gf2m_mod_sqrt(BIO *bp, BN_CTX *ctx); -int test_gf2m_mod_solve_quad(BIO *bp, BN_CTX *ctx); -int test_kron(BIO *bp, BN_CTX *ctx); -int test_sqrt(BIO *bp, BN_CTX *ctx); -int rand_neg(void); -static int results = 0; - -static unsigned char lst[] = - "\xC6\x4F\x43\x04\x2A\xEA\xCA\x6E\x58\x36\x80\x5B\xE8\xC9" - "\x9B\x04\x5D\x48\x36\xC2\xFD\x16\xC9\x64\xF0"; - -static const char rnd_seed[] = - "string to make the random number generator think it has entropy"; - -static void message(BIO *out, char *m) -{ - fprintf(stderr, "test %s\n", m); - BIO_puts(out, "print \"test "); - BIO_puts(out, m); - BIO_puts(out, "\\n\"\n"); -} - -int main(int argc, char *argv[]) -{ - BN_CTX *ctx; - BIO *out; - char *outfile = NULL; - - results = 0; - - RAND_seed(rnd_seed, sizeof rnd_seed); /* or BN_generate_prime may fail */ - - argc--; - argv++; - while (argc >= 1) { - if (strcmp(*argv, "-results") == 0) - results = 1; - else if (strcmp(*argv, "-out") == 0) { - if (--argc < 1) - break; - outfile = *(++argv); - } - argc--; - argv++; - } - - ctx = BN_CTX_new(); - if (ctx == NULL) - EXIT(1); - - out = BIO_new(BIO_s_file()); - if (out == NULL) - EXIT(1); - if (outfile == NULL) { - BIO_set_fp(out, stdout, BIO_NOCLOSE); - } else { - if (!BIO_write_filename(out, outfile)) { - perror(outfile); - EXIT(1); - } - } - - if (!results) - BIO_puts(out, "obase=16\nibase=16\n"); - - message(out, "BN_add"); - if (!test_add(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_sub"); - if (!test_sub(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_lshift1"); - if (!test_lshift1(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_lshift (fixed)"); - if (!test_lshift(out, ctx, BN_bin2bn(lst, sizeof(lst) - 1, NULL))) - goto err; - (void)BIO_flush(out); - - message(out, "BN_lshift"); - if (!test_lshift(out, ctx, NULL)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_rshift1"); - if (!test_rshift1(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_rshift"); - if (!test_rshift(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_sqr"); - if (!test_sqr(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mul"); - if (!test_mul(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_div"); - if (!test_div(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_div_word"); - if (!test_div_word(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_div_recp"); - if (!test_div_recp(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mod"); - if (!test_mod(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mod_mul"); - if (!test_mod_mul(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mont"); - if (!test_mont(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mod_exp"); - if (!test_mod_exp(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mod_exp_mont_consttime"); - if (!test_mod_exp_mont_consttime(out, ctx)) - goto err; - if (!test_mod_exp_mont5(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_exp"); - if (!test_exp(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_kronecker"); - if (!test_kron(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_mod_sqrt"); - if (!test_sqrt(out, ctx)) - goto err; - (void)BIO_flush(out); -#ifndef OPENSSL_NO_EC2M - message(out, "BN_GF2m_add"); - if (!test_gf2m_add(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod"); - if (!test_gf2m_mod(out)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_mul"); - if (!test_gf2m_mod_mul(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_sqr"); - if (!test_gf2m_mod_sqr(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_inv"); - if (!test_gf2m_mod_inv(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_div"); - if (!test_gf2m_mod_div(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_exp"); - if (!test_gf2m_mod_exp(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_sqrt"); - if (!test_gf2m_mod_sqrt(out, ctx)) - goto err; - (void)BIO_flush(out); - - message(out, "BN_GF2m_mod_solve_quad"); - if (!test_gf2m_mod_solve_quad(out, ctx)) - goto err; - (void)BIO_flush(out); -#endif - BN_CTX_free(ctx); - BIO_free(out); - - EXIT(0); - err: - BIO_puts(out, "1\n"); /* make sure the Perl script fed by bc - * notices the failure, see test_bn in - * test/Makefile.ssl */ - (void)BIO_flush(out); - ERR_load_crypto_strings(); - ERR_print_errors_fp(stderr); - EXIT(1); - return (1); -} - -int test_add(BIO *bp) -{ - BIGNUM a, b, c; - int i; - - BN_init(&a); - BN_init(&b); - BN_init(&c); - - BN_bntest_rand(&a, 512, 0, 0); - for (i = 0; i < num0; i++) { - BN_bntest_rand(&b, 450 + i, 0, 0); - a.neg = rand_neg(); - b.neg = rand_neg(); - BN_add(&c, &a, &b); - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " + "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &c); - BIO_puts(bp, "\n"); - } - a.neg = !a.neg; - b.neg = !b.neg; - BN_add(&c, &c, &b); - BN_add(&c, &c, &a); - if (!BN_is_zero(&c)) { - fprintf(stderr, "Add test failed!\n"); - return 0; - } - } - BN_free(&a); - BN_free(&b); - BN_free(&c); - return (1); -} - -int test_sub(BIO *bp) -{ - BIGNUM a, b, c; - int i; - - BN_init(&a); - BN_init(&b); - BN_init(&c); - - for (i = 0; i < num0 + num1; i++) { - if (i < num1) { - BN_bntest_rand(&a, 512, 0, 0); - BN_copy(&b, &a); - if (BN_set_bit(&a, i) == 0) - return (0); - BN_add_word(&b, i); - } else { - BN_bntest_rand(&b, 400 + i - num1, 0, 0); - a.neg = rand_neg(); - b.neg = rand_neg(); - } - BN_sub(&c, &a, &b); - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " - "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &c); - BIO_puts(bp, "\n"); - } - BN_add(&c, &c, &b); - BN_sub(&c, &c, &a); - if (!BN_is_zero(&c)) { - fprintf(stderr, "Subtract test failed!\n"); - return 0; - } - } - BN_free(&a); - BN_free(&b); - BN_free(&c); - return (1); -} - -int test_div(BIO *bp, BN_CTX *ctx) -{ - BIGNUM a, b, c, d, e; - int i; - - BN_init(&a); - BN_init(&b); - BN_init(&c); - BN_init(&d); - BN_init(&e); - - BN_one(&a); - BN_zero(&b); - - if (BN_div(&d, &c, &a, &b, ctx)) { - fprintf(stderr, "Division by zero succeeded!\n"); - return 0; - } - - for (i = 0; i < num0 + num1; i++) { - if (i < num1) { - BN_bntest_rand(&a, 400, 0, 0); - BN_copy(&b, &a); - BN_lshift(&a, &a, i); - BN_add_word(&a, i); - } else - BN_bntest_rand(&b, 50 + 3 * (i - num1), 0, 0); - a.neg = rand_neg(); - b.neg = rand_neg(); - BN_div(&d, &c, &a, &b, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " / "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &d); - BIO_puts(bp, "\n"); - - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " % "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &c); - BIO_puts(bp, "\n"); - } - BN_mul(&e, &d, &b, ctx); - BN_add(&d, &e, &c); - BN_sub(&d, &d, &a); - if (!BN_is_zero(&d)) { - fprintf(stderr, "Division test failed!\n"); - return 0; - } - } - BN_free(&a); - BN_free(&b); - BN_free(&c); - BN_free(&d); - BN_free(&e); - return (1); -} - -static void print_word(BIO *bp, BN_ULONG w) -{ -#ifdef SIXTY_FOUR_BIT - if (sizeof(w) > sizeof(unsigned long)) { - unsigned long h = (unsigned long)(w >> 32), l = (unsigned long)(w); - - if (h) - BIO_printf(bp, "%lX%08lX", h, l); - else - BIO_printf(bp, "%lX", l); - return; - } -#endif - BIO_printf(bp, BN_HEX_FMT1, w); -} - -int test_div_word(BIO *bp) -{ - BIGNUM a, b; - BN_ULONG r, s; - int i; - - BN_init(&a); - BN_init(&b); - - for (i = 0; i < num0; i++) { - do { - BN_bntest_rand(&a, 512, -1, 0); - BN_bntest_rand(&b, BN_BITS2, -1, 0); - } while (BN_is_zero(&b)); - - s = b.d[0]; - BN_copy(&b, &a); - r = BN_div_word(&b, s); - - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " / "); - print_word(bp, s); - BIO_puts(bp, " - "); - } - BN_print(bp, &b); - BIO_puts(bp, "\n"); - - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " % "); - print_word(bp, s); - BIO_puts(bp, " - "); - } - print_word(bp, r); - BIO_puts(bp, "\n"); - } - BN_mul_word(&b, s); - BN_add_word(&b, r); - BN_sub(&b, &a, &b); - if (!BN_is_zero(&b)) { - fprintf(stderr, "Division (word) test failed!\n"); - return 0; - } - } - BN_free(&a); - BN_free(&b); - return (1); -} - -int test_div_recp(BIO *bp, BN_CTX *ctx) -{ - BIGNUM a, b, c, d, e; - BN_RECP_CTX recp; - int i; - - BN_RECP_CTX_init(&recp); - BN_init(&a); - BN_init(&b); - BN_init(&c); - BN_init(&d); - BN_init(&e); - - for (i = 0; i < num0 + num1; i++) { - if (i < num1) { - BN_bntest_rand(&a, 400, 0, 0); - BN_copy(&b, &a); - BN_lshift(&a, &a, i); - BN_add_word(&a, i); - } else - BN_bntest_rand(&b, 50 + 3 * (i - num1), 0, 0); - a.neg = rand_neg(); - b.neg = rand_neg(); - BN_RECP_CTX_set(&recp, &b, ctx); - BN_div_recp(&d, &c, &a, &recp, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " / "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &d); - BIO_puts(bp, "\n"); - - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " % "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &c); - BIO_puts(bp, "\n"); - } - BN_mul(&e, &d, &b, ctx); - BN_add(&d, &e, &c); - BN_sub(&d, &d, &a); - if (!BN_is_zero(&d)) { - fprintf(stderr, "Reciprocal division test failed!\n"); - fprintf(stderr, "a="); - BN_print_fp(stderr, &a); - fprintf(stderr, "\nb="); - BN_print_fp(stderr, &b); - fprintf(stderr, "\n"); - return 0; - } - } - BN_free(&a); - BN_free(&b); - BN_free(&c); - BN_free(&d); - BN_free(&e); - BN_RECP_CTX_free(&recp); - return (1); -} - -int test_mul(BIO *bp) -{ - BIGNUM a, b, c, d, e; - int i; - BN_CTX *ctx; - - ctx = BN_CTX_new(); - if (ctx == NULL) - EXIT(1); - - BN_init(&a); - BN_init(&b); - BN_init(&c); - BN_init(&d); - BN_init(&e); - - for (i = 0; i < num0 + num1; i++) { - if (i <= num1) { - BN_bntest_rand(&a, 100, 0, 0); - BN_bntest_rand(&b, 100, 0, 0); - } else - BN_bntest_rand(&b, i - num1, 0, 0); - a.neg = rand_neg(); - b.neg = rand_neg(); - BN_mul(&c, &a, &b, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " * "); - BN_print(bp, &b); - BIO_puts(bp, " - "); - } - BN_print(bp, &c); - BIO_puts(bp, "\n"); - } - BN_div(&d, &e, &c, &a, ctx); - BN_sub(&d, &d, &b); - if (!BN_is_zero(&d) || !BN_is_zero(&e)) { - fprintf(stderr, "Multiplication test failed!\n"); - return 0; - } - } - BN_free(&a); - BN_free(&b); - BN_free(&c); - BN_free(&d); - BN_free(&e); - BN_CTX_free(ctx); - return (1); -} - -int test_sqr(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *c, *d, *e; - int i, ret = 0; - - a = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - if (a == NULL || c == NULL || d == NULL || e == NULL) { - goto err; - } - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 40 + i * 10, 0, 0); - a->neg = rand_neg(); - BN_sqr(c, a, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, a); - BIO_puts(bp, " - "); - } - BN_print(bp, c); - BIO_puts(bp, "\n"); - } - BN_div(d, e, c, a, ctx); - BN_sub(d, d, a); - if (!BN_is_zero(d) || !BN_is_zero(e)) { - fprintf(stderr, "Square test failed!\n"); - goto err; - } - } - - /* Regression test for a BN_sqr overflow bug. */ - BN_hex2bn(&a, - "80000000000000008000000000000001" - "FFFFFFFFFFFFFFFE0000000000000000"); - BN_sqr(c, a, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, a); - BIO_puts(bp, " - "); - } - BN_print(bp, c); - BIO_puts(bp, "\n"); - } - BN_mul(d, a, a, ctx); - if (BN_cmp(c, d)) { - fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " - "different results!\n"); - goto err; - } - - /* Regression test for a BN_sqr overflow bug. */ - BN_hex2bn(&a, - "80000000000000000000000080000001" - "FFFFFFFE000000000000000000000000"); - BN_sqr(c, a, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, a); - BIO_puts(bp, " - "); - } - BN_print(bp, c); - BIO_puts(bp, "\n"); - } - BN_mul(d, a, a, ctx); - if (BN_cmp(c, d)) { - fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " - "different results!\n"); - goto err; - } - ret = 1; - err: - if (a != NULL) - BN_free(a); - if (c != NULL) - BN_free(c); - if (d != NULL) - BN_free(d); - if (e != NULL) - BN_free(e); - return ret; -} - -int test_mont(BIO *bp, BN_CTX *ctx) -{ - BIGNUM a, b, c, d, A, B; - BIGNUM n; - int i; - BN_MONT_CTX *mont; - - BN_init(&a); - BN_init(&b); - BN_init(&c); - BN_init(&d); - BN_init(&A); - BN_init(&B); - BN_init(&n); - - mont = BN_MONT_CTX_new(); - if (mont == NULL) - return 0; - - BN_zero(&n); - if (BN_MONT_CTX_set(mont, &n, ctx)) { - fprintf(stderr, "BN_MONT_CTX_set succeeded for zero modulus!\n"); - return 0; - } - - BN_set_word(&n, 16); - if (BN_MONT_CTX_set(mont, &n, ctx)) { - fprintf(stderr, "BN_MONT_CTX_set succeeded for even modulus!\n"); - return 0; - } - - BN_bntest_rand(&a, 100, 0, 0); - BN_bntest_rand(&b, 100, 0, 0); - for (i = 0; i < num2; i++) { - int bits = (200 * (i + 1)) / num2; - - if (bits == 0) - continue; - BN_bntest_rand(&n, bits, 0, 1); - BN_MONT_CTX_set(mont, &n, ctx); - - BN_nnmod(&a, &a, &n, ctx); - BN_nnmod(&b, &b, &n, ctx); - - BN_to_montgomery(&A, &a, mont, ctx); - BN_to_montgomery(&B, &b, mont, ctx); - - BN_mod_mul_montgomery(&c, &A, &B, mont, ctx); - BN_from_montgomery(&A, &c, mont, ctx); - if (bp != NULL) { - if (!results) { -#ifdef undef - fprintf(stderr, "%d * %d %% %d\n", - BN_num_bits(&a), - BN_num_bits(&b), BN_num_bits(mont->N)); -#endif - BN_print(bp, &a); - BIO_puts(bp, " * "); - BN_print(bp, &b); - BIO_puts(bp, " % "); - BN_print(bp, &(mont->N)); - BIO_puts(bp, " - "); - } - BN_print(bp, &A); - BIO_puts(bp, "\n"); - } - BN_mod_mul(&d, &a, &b, &n, ctx); - BN_sub(&d, &d, &A); - if (!BN_is_zero(&d)) { - fprintf(stderr, "Montgomery multiplication test failed!\n"); - return 0; - } - } - BN_MONT_CTX_free(mont); - BN_free(&a); - BN_free(&b); - BN_free(&c); - BN_free(&d); - BN_free(&A); - BN_free(&B); - BN_free(&n); - return (1); -} - -int test_mod(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b, *c, *d, *e; - int i; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - - BN_bntest_rand(a, 1024, 0, 0); - for (i = 0; i < num0; i++) { - BN_bntest_rand(b, 450 + i * 10, 0, 0); - a->neg = rand_neg(); - b->neg = rand_neg(); - BN_mod(c, a, b, ctx); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " % "); - BN_print(bp, b); - BIO_puts(bp, " - "); - } - BN_print(bp, c); - BIO_puts(bp, "\n"); - } - BN_div(d, e, a, b, ctx); - BN_sub(e, e, c); - if (!BN_is_zero(e)) { - fprintf(stderr, "Modulo test failed!\n"); - return 0; - } - } - BN_free(a); - BN_free(b); - BN_free(c); - BN_free(d); - BN_free(e); - return (1); -} - -int test_mod_mul(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b, *c, *d, *e; - int i, j; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - - BN_one(a); - BN_one(b); - BN_zero(c); - if (BN_mod_mul(e, a, b, c, ctx)) { - fprintf(stderr, "BN_mod_mul with zero modulus succeeded!\n"); - return 0; - } - - for (j = 0; j < 3; j++) { - BN_bntest_rand(c, 1024, 0, 0); - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 475 + i * 10, 0, 0); - BN_bntest_rand(b, 425 + i * 11, 0, 0); - a->neg = rand_neg(); - b->neg = rand_neg(); - if (!BN_mod_mul(e, a, b, c, ctx)) { - unsigned long l; - - while ((l = ERR_get_error())) - fprintf(stderr, "ERROR:%s\n", ERR_error_string(l, NULL)); - EXIT(1); - } - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, b); - BIO_puts(bp, " % "); - BN_print(bp, c); - if ((a->neg ^ b->neg) && !BN_is_zero(e)) { - /* - * If (a*b) % c is negative, c must be added in order - * to obtain the normalized remainder (new with - * OpenSSL 0.9.7, previous versions of BN_mod_mul - * could generate negative results) - */ - BIO_puts(bp, " + "); - BN_print(bp, c); - } - BIO_puts(bp, " - "); - } - BN_print(bp, e); - BIO_puts(bp, "\n"); - } - BN_mul(d, a, b, ctx); - BN_sub(d, d, e); - BN_div(a, b, d, c, ctx); - if (!BN_is_zero(b)) { - fprintf(stderr, "Modulo multiply test failed!\n"); - ERR_print_errors_fp(stderr); - return 0; - } - } - } - BN_free(a); - BN_free(b); - BN_free(c); - BN_free(d); - BN_free(e); - return (1); -} - -int test_mod_exp(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b, *c, *d, *e; - int i; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - - BN_one(a); - BN_one(b); - BN_zero(c); - if (BN_mod_exp(d, a, b, c, ctx)) { - fprintf(stderr, "BN_mod_exp with zero modulus succeeded!\n"); - return 0; - } - - BN_bntest_rand(c, 30, 0, 1); /* must be odd for montgomery */ - for (i = 0; i < num2; i++) { - BN_bntest_rand(a, 20 + i * 5, 0, 0); - BN_bntest_rand(b, 2 + i, 0, 0); - - if (!BN_mod_exp(d, a, b, c, ctx)) - return (0); - - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " ^ "); - BN_print(bp, b); - BIO_puts(bp, " % "); - BN_print(bp, c); - BIO_puts(bp, " - "); - } - BN_print(bp, d); - BIO_puts(bp, "\n"); - } - BN_exp(e, a, b, ctx); - BN_sub(e, e, d); - BN_div(a, b, e, c, ctx); - if (!BN_is_zero(b)) { - fprintf(stderr, "Modulo exponentiation test failed!\n"); - return 0; - } - } - BN_free(a); - BN_free(b); - BN_free(c); - BN_free(d); - BN_free(e); - return (1); -} - -int test_mod_exp_mont_consttime(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b, *c, *d, *e; - int i; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - - BN_one(a); - BN_one(b); - BN_zero(c); - if (BN_mod_exp_mont_consttime(d, a, b, c, ctx, NULL)) { - fprintf(stderr, "BN_mod_exp_mont_consttime with zero modulus " - "succeeded\n"); - return 0; - } - - BN_set_word(c, 16); - if (BN_mod_exp_mont_consttime(d, a, b, c, ctx, NULL)) { - fprintf(stderr, "BN_mod_exp_mont_consttime with even modulus " - "succeeded\n"); - return 0; - } - - BN_bntest_rand(c, 30, 0, 1); /* must be odd for montgomery */ - for (i = 0; i < num2; i++) { - BN_bntest_rand(a, 20 + i * 5, 0, 0); - BN_bntest_rand(b, 2 + i, 0, 0); - - if (!BN_mod_exp_mont_consttime(d, a, b, c, ctx, NULL)) - return (00); - - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " ^ "); - BN_print(bp, b); - BIO_puts(bp, " % "); - BN_print(bp, c); - BIO_puts(bp, " - "); - } - BN_print(bp, d); - BIO_puts(bp, "\n"); - } - BN_exp(e, a, b, ctx); - BN_sub(e, e, d); - BN_div(a, b, e, c, ctx); - if (!BN_is_zero(b)) { - fprintf(stderr, "Modulo exponentiation test failed!\n"); - return 0; - } - } - BN_free(a); - BN_free(b); - BN_free(c); - BN_free(d); - BN_free(e); - return (1); -} - -/* - * Test constant-time modular exponentiation with 1024-bit inputs, which on - * x86_64 cause a different code branch to be taken. - */ -int test_mod_exp_mont5(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *p, *m, *d, *e; - - BN_MONT_CTX *mont; - - a = BN_new(); - p = BN_new(); - m = BN_new(); - d = BN_new(); - e = BN_new(); - - mont = BN_MONT_CTX_new(); - - BN_bntest_rand(m, 1024, 0, 1); /* must be odd for montgomery */ - /* Zero exponent */ - BN_bntest_rand(a, 1024, 0, 0); - BN_zero(p); - if (!BN_mod_exp_mont_consttime(d, a, p, m, ctx, NULL)) - return 0; - if (!BN_is_one(d)) { - fprintf(stderr, "Modular exponentiation test failed!\n"); - return 0; - } - /* Zero input */ - BN_bntest_rand(p, 1024, 0, 0); - BN_zero(a); - if (!BN_mod_exp_mont_consttime(d, a, p, m, ctx, NULL)) - return 0; - if (!BN_is_zero(d)) { - fprintf(stderr, "Modular exponentiation test failed!\n"); - return 0; - } - /* - * Craft an input whose Montgomery representation is 1, i.e., shorter - * than the modulus m, in order to test the const time precomputation - * scattering/gathering. - */ - BN_one(a); - BN_MONT_CTX_set(mont, m, ctx); - if (!BN_from_montgomery(e, a, mont, ctx)) - return 0; - if (!BN_mod_exp_mont_consttime(d, e, p, m, ctx, NULL)) - return 0; - if (!BN_mod_exp_simple(a, e, p, m, ctx)) - return 0; - if (BN_cmp(a, d) != 0) { - fprintf(stderr, "Modular exponentiation test failed!\n"); - return 0; - } - /* Finally, some regular test vectors. */ - BN_bntest_rand(e, 1024, 0, 0); - if (!BN_mod_exp_mont_consttime(d, e, p, m, ctx, NULL)) - return 0; - if (!BN_mod_exp_simple(a, e, p, m, ctx)) - return 0; - if (BN_cmp(a, d) != 0) { - fprintf(stderr, "Modular exponentiation test failed!\n"); - return 0; - } - BN_free(a); - BN_free(p); - BN_free(m); - BN_free(d); - BN_free(e); - return (1); -} - -int test_exp(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b, *d, *e, *one; - int i; - - a = BN_new(); - b = BN_new(); - d = BN_new(); - e = BN_new(); - one = BN_new(); - BN_one(one); - - for (i = 0; i < num2; i++) { - BN_bntest_rand(a, 20 + i * 5, 0, 0); - BN_bntest_rand(b, 2 + i, 0, 0); - - if (BN_exp(d, a, b, ctx) <= 0) - return (0); - - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " ^ "); - BN_print(bp, b); - BIO_puts(bp, " - "); - } - BN_print(bp, d); - BIO_puts(bp, "\n"); - } - BN_one(e); - for (; !BN_is_zero(b); BN_sub(b, b, one)) - BN_mul(e, e, a, ctx); - BN_sub(e, e, d); - if (!BN_is_zero(e)) { - fprintf(stderr, "Exponentiation test failed!\n"); - return 0; - } - } - BN_free(a); - BN_free(b); - BN_free(d); - BN_free(e); - BN_free(one); - return (1); -} - -#ifndef OPENSSL_NO_EC2M -int test_gf2m_add(BIO *bp) -{ - BIGNUM a, b, c; - int i, ret = 0; - - BN_init(&a); - BN_init(&b); - BN_init(&c); - - for (i = 0; i < num0; i++) { - BN_rand(&a, 512, 0, 0); - BN_copy(&b, BN_value_one()); - a.neg = rand_neg(); - b.neg = rand_neg(); - BN_GF2m_add(&c, &a, &b); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, &a); - BIO_puts(bp, " ^ "); - BN_print(bp, &b); - BIO_puts(bp, " = "); - } - BN_print(bp, &c); - BIO_puts(bp, "\n"); - } -# endif - /* Test that two added values have the correct parity. */ - if ((BN_is_odd(&a) && BN_is_odd(&c)) - || (!BN_is_odd(&a) && !BN_is_odd(&c))) { - fprintf(stderr, "GF(2^m) addition test (a) failed!\n"); - goto err; - } - BN_GF2m_add(&c, &c, &c); - /* Test that c + c = 0. */ - if (!BN_is_zero(&c)) { - fprintf(stderr, "GF(2^m) addition test (b) failed!\n"); - goto err; - } - } - ret = 1; - err: - BN_free(&a); - BN_free(&b); - BN_free(&c); - return ret; -} - -int test_gf2m_mod(BIO *bp) -{ - BIGNUM *a, *b[2], *c, *d, *e; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 1024, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod(c, a, b[j]); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " % "); - BN_print(bp, b[j]); - BIO_puts(bp, " - "); - BN_print(bp, c); - BIO_puts(bp, "\n"); - } - } -# endif - BN_GF2m_add(d, a, c); - BN_GF2m_mod(e, d, b[j]); - /* Test that a + (a mod p) mod p == 0. */ - if (!BN_is_zero(e)) { - fprintf(stderr, "GF(2^m) modulo test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - BN_free(e); - return ret; -} - -int test_gf2m_mod_mul(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d, *e, *f, *g, *h; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - f = BN_new(); - g = BN_new(); - h = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 1024, 0, 0); - BN_bntest_rand(c, 1024, 0, 0); - BN_bntest_rand(d, 1024, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod_mul(e, a, c, b[j], ctx); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, c); - BIO_puts(bp, " % "); - BN_print(bp, b[j]); - BIO_puts(bp, " - "); - BN_print(bp, e); - BIO_puts(bp, "\n"); - } - } -# endif - BN_GF2m_add(f, a, d); - BN_GF2m_mod_mul(g, f, c, b[j], ctx); - BN_GF2m_mod_mul(h, d, c, b[j], ctx); - BN_GF2m_add(f, e, g); - BN_GF2m_add(f, f, h); - /* Test that (a+d)*c = a*c + d*c. */ - if (!BN_is_zero(f)) { - fprintf(stderr, - "GF(2^m) modular multiplication test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - BN_free(e); - BN_free(f); - BN_free(g); - BN_free(h); - return ret; -} - -int test_gf2m_mod_sqr(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 1024, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod_sqr(c, a, b[j], ctx); - BN_copy(d, a); - BN_GF2m_mod_mul(d, a, d, b[j], ctx); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " ^ 2 % "); - BN_print(bp, b[j]); - BIO_puts(bp, " = "); - BN_print(bp, c); - BIO_puts(bp, "; a * a = "); - BN_print(bp, d); - BIO_puts(bp, "\n"); - } - } -# endif - BN_GF2m_add(d, c, d); - /* Test that a*a = a^2. */ - if (!BN_is_zero(d)) { - fprintf(stderr, "GF(2^m) modular squaring test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - return ret; -} - -int test_gf2m_mod_inv(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 512, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod_inv(c, a, b[j], ctx); - BN_GF2m_mod_mul(d, a, c, b[j], ctx); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, c); - BIO_puts(bp, " - 1 % "); - BN_print(bp, b[j]); - BIO_puts(bp, "\n"); - } - } -# endif - /* Test that ((1/a)*a) = 1. */ - if (!BN_is_one(d)) { - fprintf(stderr, "GF(2^m) modular inversion test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - return ret; -} - -int test_gf2m_mod_div(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d, *e, *f; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - f = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 512, 0, 0); - BN_bntest_rand(c, 512, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod_div(d, a, c, b[j], ctx); - BN_GF2m_mod_mul(e, d, c, b[j], ctx); - BN_GF2m_mod_div(f, a, e, b[j], ctx); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " = "); - BN_print(bp, c); - BIO_puts(bp, " * "); - BN_print(bp, d); - BIO_puts(bp, " % "); - BN_print(bp, b[j]); - BIO_puts(bp, "\n"); - } - } -# endif - /* Test that ((a/c)*c)/a = 1. */ - if (!BN_is_one(f)) { - fprintf(stderr, "GF(2^m) modular division test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - BN_free(e); - BN_free(f); - return ret; -} - -int test_gf2m_mod_exp(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d, *e, *f; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - f = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 512, 0, 0); - BN_bntest_rand(c, 512, 0, 0); - BN_bntest_rand(d, 512, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod_exp(e, a, c, b[j], ctx); - BN_GF2m_mod_exp(f, a, d, b[j], ctx); - BN_GF2m_mod_mul(e, e, f, b[j], ctx); - BN_add(f, c, d); - BN_GF2m_mod_exp(f, a, f, b[j], ctx); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " ^ ("); - BN_print(bp, c); - BIO_puts(bp, " + "); - BN_print(bp, d); - BIO_puts(bp, ") = "); - BN_print(bp, e); - BIO_puts(bp, "; - "); - BN_print(bp, f); - BIO_puts(bp, " % "); - BN_print(bp, b[j]); - BIO_puts(bp, "\n"); - } - } -# endif - BN_GF2m_add(f, e, f); - /* Test that a^(c+d)=a^c*a^d. */ - if (!BN_is_zero(f)) { - fprintf(stderr, - "GF(2^m) modular exponentiation test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - BN_free(e); - BN_free(f); - return ret; -} - -int test_gf2m_mod_sqrt(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d, *e, *f; - int i, j, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - f = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 512, 0, 0); - for (j = 0; j < 2; j++) { - BN_GF2m_mod(c, a, b[j]); - BN_GF2m_mod_sqrt(d, a, b[j], ctx); - BN_GF2m_mod_sqr(e, d, b[j], ctx); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, d); - BIO_puts(bp, " ^ 2 - "); - BN_print(bp, a); - BIO_puts(bp, "\n"); - } - } -# endif - BN_GF2m_add(f, c, e); - /* Test that d^2 = a, where d = sqrt(a). */ - if (!BN_is_zero(f)) { - fprintf(stderr, "GF(2^m) modular square root test failed!\n"); - goto err; - } - } - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - BN_free(e); - BN_free(f); - return ret; -} - -int test_gf2m_mod_solve_quad(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b[2], *c, *d, *e; - int i, j, s = 0, t, ret = 0; - int p0[] = { 163, 7, 6, 3, 0, -1 }; - int p1[] = { 193, 15, 0, -1 }; - - a = BN_new(); - b[0] = BN_new(); - b[1] = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - - BN_GF2m_arr2poly(p0, b[0]); - BN_GF2m_arr2poly(p1, b[1]); - - for (i = 0; i < num0; i++) { - BN_bntest_rand(a, 512, 0, 0); - for (j = 0; j < 2; j++) { - t = BN_GF2m_mod_solve_quad(c, a, b[j], ctx); - if (t) { - s++; - BN_GF2m_mod_sqr(d, c, b[j], ctx); - BN_GF2m_add(d, c, d); - BN_GF2m_mod(e, a, b[j]); -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BN_print(bp, c); - BIO_puts(bp, " is root of z^2 + z = "); - BN_print(bp, a); - BIO_puts(bp, " % "); - BN_print(bp, b[j]); - BIO_puts(bp, "\n"); - } - } -# endif - BN_GF2m_add(e, e, d); - /* - * Test that solution of quadratic c satisfies c^2 + c = a. - */ - if (!BN_is_zero(e)) { - fprintf(stderr, - "GF(2^m) modular solve quadratic test failed!\n"); - goto err; - } - - } else { -# if 0 /* make test uses ouput in bc but bc can't - * handle GF(2^m) arithmetic */ - if (bp != NULL) { - if (!results) { - BIO_puts(bp, "There are no roots of z^2 + z = "); - BN_print(bp, a); - BIO_puts(bp, " % "); - BN_print(bp, b[j]); - BIO_puts(bp, "\n"); - } - } -# endif - } - } - } - if (s == 0) { - fprintf(stderr, - "All %i tests of GF(2^m) modular solve quadratic resulted in no roots;\n", - num0); - fprintf(stderr, - "this is very unlikely and probably indicates an error.\n"); - goto err; - } - ret = 1; - err: - BN_free(a); - BN_free(b[0]); - BN_free(b[1]); - BN_free(c); - BN_free(d); - BN_free(e); - return ret; -} -#endif -static int genprime_cb(int p, int n, BN_GENCB *arg) -{ - char c = '*'; - - if (p == 0) - c = '.'; - if (p == 1) - c = '+'; - if (p == 2) - c = '*'; - if (p == 3) - c = '\n'; - putc(c, stderr); - fflush(stderr); - return 1; -} - -int test_kron(BIO *bp, BN_CTX *ctx) -{ - BN_GENCB cb; - BIGNUM *a, *b, *r, *t; - int i; - int legendre, kronecker; - int ret = 0; - - a = BN_new(); - b = BN_new(); - r = BN_new(); - t = BN_new(); - if (a == NULL || b == NULL || r == NULL || t == NULL) - goto err; - - BN_GENCB_set(&cb, genprime_cb, NULL); - - /* - * We test BN_kronecker(a, b, ctx) just for b odd (Jacobi symbol). In - * this case we know that if b is prime, then BN_kronecker(a, b, ctx) is - * congruent to $a^{(b-1)/2}$, modulo $b$ (Legendre symbol). So we - * generate a random prime b and compare these values for a number of - * random a's. (That is, we run the Solovay-Strassen primality test to - * confirm that b is prime, except that we don't want to test whether b - * is prime but whether BN_kronecker works.) - */ - - if (!BN_generate_prime_ex(b, 512, 0, NULL, NULL, &cb)) - goto err; - b->neg = rand_neg(); - putc('\n', stderr); - - for (i = 0; i < num0; i++) { - if (!BN_bntest_rand(a, 512, 0, 0)) - goto err; - a->neg = rand_neg(); - - /* t := (|b|-1)/2 (note that b is odd) */ - if (!BN_copy(t, b)) - goto err; - t->neg = 0; - if (!BN_sub_word(t, 1)) - goto err; - if (!BN_rshift1(t, t)) - goto err; - /* r := a^t mod b */ - b->neg = 0; - - if (!BN_mod_exp_recp(r, a, t, b, ctx)) - goto err; - b->neg = 1; - - if (BN_is_word(r, 1)) - legendre = 1; - else if (BN_is_zero(r)) - legendre = 0; - else { - if (!BN_add_word(r, 1)) - goto err; - if (0 != BN_ucmp(r, b)) { - fprintf(stderr, "Legendre symbol computation failed\n"); - goto err; - } - legendre = -1; - } - - kronecker = BN_kronecker(a, b, ctx); - if (kronecker < -1) - goto err; - /* we actually need BN_kronecker(a, |b|) */ - if (a->neg && b->neg) - kronecker = -kronecker; - - if (legendre != kronecker) { - fprintf(stderr, "legendre != kronecker; a = "); - BN_print_fp(stderr, a); - fprintf(stderr, ", b = "); - BN_print_fp(stderr, b); - fprintf(stderr, "\n"); - goto err; - } - - putc('.', stderr); - fflush(stderr); - } - - putc('\n', stderr); - fflush(stderr); - ret = 1; - err: - if (a != NULL) - BN_free(a); - if (b != NULL) - BN_free(b); - if (r != NULL) - BN_free(r); - if (t != NULL) - BN_free(t); - return ret; -} - -int test_sqrt(BIO *bp, BN_CTX *ctx) -{ - BN_GENCB cb; - BIGNUM *a, *p, *r; - int i, j; - int ret = 0; - - a = BN_new(); - p = BN_new(); - r = BN_new(); - if (a == NULL || p == NULL || r == NULL) - goto err; - - BN_GENCB_set(&cb, genprime_cb, NULL); - - for (i = 0; i < 16; i++) { - if (i < 8) { - unsigned primes[8] = { 2, 3, 5, 7, 11, 13, 17, 19 }; - - if (!BN_set_word(p, primes[i])) - goto err; - } else { - if (!BN_set_word(a, 32)) - goto err; - if (!BN_set_word(r, 2 * i + 1)) - goto err; - - if (!BN_generate_prime_ex(p, 256, 0, a, r, &cb)) - goto err; - putc('\n', stderr); - } - p->neg = rand_neg(); - - for (j = 0; j < num2; j++) { - /* - * construct 'a' such that it is a square modulo p, but in - * general not a proper square and not reduced modulo p - */ - if (!BN_bntest_rand(r, 256, 0, 3)) - goto err; - if (!BN_nnmod(r, r, p, ctx)) - goto err; - if (!BN_mod_sqr(r, r, p, ctx)) - goto err; - if (!BN_bntest_rand(a, 256, 0, 3)) - goto err; - if (!BN_nnmod(a, a, p, ctx)) - goto err; - if (!BN_mod_sqr(a, a, p, ctx)) - goto err; - if (!BN_mul(a, a, r, ctx)) - goto err; - if (rand_neg()) - if (!BN_sub(a, a, p)) - goto err; - - if (!BN_mod_sqrt(r, a, p, ctx)) - goto err; - if (!BN_mod_sqr(r, r, p, ctx)) - goto err; - - if (!BN_nnmod(a, a, p, ctx)) - goto err; - - if (BN_cmp(a, r) != 0) { - fprintf(stderr, "BN_mod_sqrt failed: a = "); - BN_print_fp(stderr, a); - fprintf(stderr, ", r = "); - BN_print_fp(stderr, r); - fprintf(stderr, ", p = "); - BN_print_fp(stderr, p); - fprintf(stderr, "\n"); - goto err; - } - - putc('.', stderr); - fflush(stderr); - } - - putc('\n', stderr); - fflush(stderr); - } - ret = 1; - err: - if (a != NULL) - BN_free(a); - if (p != NULL) - BN_free(p); - if (r != NULL) - BN_free(r); - return ret; -} - -int test_lshift(BIO *bp, BN_CTX *ctx, BIGNUM *a_) -{ - BIGNUM *a, *b, *c, *d; - int i; - - b = BN_new(); - c = BN_new(); - d = BN_new(); - BN_one(c); - - if (a_) - a = a_; - else { - a = BN_new(); - BN_bntest_rand(a, 200, 0, 0); - a->neg = rand_neg(); - } - for (i = 0; i < num0; i++) { - BN_lshift(b, a, i + 1); - BN_add(c, c, c); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * "); - BN_print(bp, c); - BIO_puts(bp, " - "); - } - BN_print(bp, b); - BIO_puts(bp, "\n"); - } - BN_mul(d, a, c, ctx); - BN_sub(d, d, b); - if (!BN_is_zero(d)) { - fprintf(stderr, "Left shift test failed!\n"); - fprintf(stderr, "a="); - BN_print_fp(stderr, a); - fprintf(stderr, "\nb="); - BN_print_fp(stderr, b); - fprintf(stderr, "\nc="); - BN_print_fp(stderr, c); - fprintf(stderr, "\nd="); - BN_print_fp(stderr, d); - fprintf(stderr, "\n"); - return 0; - } - } - BN_free(a); - BN_free(b); - BN_free(c); - BN_free(d); - return (1); -} - -int test_lshift1(BIO *bp) -{ - BIGNUM *a, *b, *c; - int i; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - - BN_bntest_rand(a, 200, 0, 0); - a->neg = rand_neg(); - for (i = 0; i < num0; i++) { - BN_lshift1(b, a); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " * 2"); - BIO_puts(bp, " - "); - } - BN_print(bp, b); - BIO_puts(bp, "\n"); - } - BN_add(c, a, a); - BN_sub(a, b, c); - if (!BN_is_zero(a)) { - fprintf(stderr, "Left shift one test failed!\n"); - return 0; - } - - BN_copy(a, b); - } - BN_free(a); - BN_free(b); - BN_free(c); - return (1); -} - -int test_rshift(BIO *bp, BN_CTX *ctx) -{ - BIGNUM *a, *b, *c, *d, *e; - int i; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - d = BN_new(); - e = BN_new(); - BN_one(c); - - BN_bntest_rand(a, 200, 0, 0); - a->neg = rand_neg(); - for (i = 0; i < num0; i++) { - BN_rshift(b, a, i + 1); - BN_add(c, c, c); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " / "); - BN_print(bp, c); - BIO_puts(bp, " - "); - } - BN_print(bp, b); - BIO_puts(bp, "\n"); - } - BN_div(d, e, a, c, ctx); - BN_sub(d, d, b); - if (!BN_is_zero(d)) { - fprintf(stderr, "Right shift test failed!\n"); - return 0; - } - } - BN_free(a); - BN_free(b); - BN_free(c); - BN_free(d); - BN_free(e); - return (1); -} - -int test_rshift1(BIO *bp) -{ - BIGNUM *a, *b, *c; - int i; - - a = BN_new(); - b = BN_new(); - c = BN_new(); - - BN_bntest_rand(a, 200, 0, 0); - a->neg = rand_neg(); - for (i = 0; i < num0; i++) { - BN_rshift1(b, a); - if (bp != NULL) { - if (!results) { - BN_print(bp, a); - BIO_puts(bp, " / 2"); - BIO_puts(bp, " - "); - } - BN_print(bp, b); - BIO_puts(bp, "\n"); - } - BN_sub(c, a, b); - BN_sub(c, c, b); - if (!BN_is_zero(c) && !BN_abs_is_word(c, 1)) { - fprintf(stderr, "Right shift one test failed!\n"); - return 0; - } - BN_copy(a, b); - } - BN_free(a); - BN_free(b); - BN_free(c); - return (1); -} - -int rand_neg(void) -{ - static unsigned int neg = 0; - static int sign[8] = { 0, 0, 0, 1, 1, 0, 1, 1 }; - - return (sign[(neg++) % 8]); -} diff --git a/drivers/builtin_openssl2/crypto/bn/divtest.c b/drivers/builtin_openssl2/crypto/bn/divtest.c deleted file mode 100644 index 2590b4581b..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/divtest.c +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include - -static int Rand(n) -{ - unsigned char x[2]; - RAND_pseudo_bytes(x, 2); - return (x[0] + 2 * x[1]); -} - -static void bug(char *m, BIGNUM *a, BIGNUM *b) -{ - printf("%s!\na=", m); - BN_print_fp(stdout, a); - printf("\nb="); - BN_print_fp(stdout, b); - printf("\n"); - fflush(stdout); -} - -main() -{ - BIGNUM *a = BN_new(), *b = BN_new(), *c = BN_new(), *d = BN_new(), - *C = BN_new(), *D = BN_new(); - BN_RECP_CTX *recp = BN_RECP_CTX_new(); - BN_CTX *ctx = BN_CTX_new(); - - for (;;) { - BN_pseudo_rand(a, Rand(), 0, 0); - BN_pseudo_rand(b, Rand(), 0, 0); - if (BN_is_zero(b)) - continue; - - BN_RECP_CTX_set(recp, b, ctx); - if (BN_div(C, D, a, b, ctx) != 1) - bug("BN_div failed", a, b); - if (BN_div_recp(c, d, a, recp, ctx) != 1) - bug("BN_div_recp failed", a, b); - else if (BN_cmp(c, C) != 0 || BN_cmp(c, C) != 0) - bug("mismatch", a, b); - } -} diff --git a/drivers/builtin_openssl2/crypto/bn/exptest.c b/drivers/builtin_openssl2/crypto/bn/exptest.c deleted file mode 100644 index ac611c2e26..0000000000 --- a/drivers/builtin_openssl2/crypto/bn/exptest.c +++ /dev/null @@ -1,313 +0,0 @@ -/* crypto/bn/exptest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -#include "../e_os.h" - -#include -#include -#include -#include - -#define NUM_BITS (BN_BITS*2) - -static const char rnd_seed[] = - "string to make the random number generator think it has entropy"; - -/* - * Test that r == 0 in test_exp_mod_zero(). Returns one on success, - * returns zero and prints debug output otherwise. - */ -static int a_is_zero_mod_one(const char *method, const BIGNUM *r, - const BIGNUM *a) { - if (!BN_is_zero(r)) { - fprintf(stderr, "%s failed:\n", method); - fprintf(stderr, "a ** 0 mod 1 = r (should be 0)\n"); - fprintf(stderr, "a = "); - BN_print_fp(stderr, a); - fprintf(stderr, "\nr = "); - BN_print_fp(stderr, r); - fprintf(stderr, "\n"); - return 0; - } - return 1; -} - -/* - * test_exp_mod_zero tests that x**0 mod 1 == 0. It returns zero on success. - */ -static int test_exp_mod_zero() -{ - BIGNUM a, p, m; - BIGNUM r; - BN_ULONG one_word = 1; - BN_CTX *ctx = BN_CTX_new(); - int ret = 1, failed = 0; - - BN_init(&m); - BN_one(&m); - - BN_init(&a); - BN_one(&a); - - BN_init(&p); - BN_zero(&p); - - BN_init(&r); - - if (!BN_rand(&a, 1024, 0, 0)) - goto err; - - if (!BN_mod_exp(&r, &a, &p, &m, ctx)) - goto err; - - if (!a_is_zero_mod_one("BN_mod_exp", &r, &a)) - failed = 1; - - if (!BN_mod_exp_recp(&r, &a, &p, &m, ctx)) - goto err; - - if (!a_is_zero_mod_one("BN_mod_exp_recp", &r, &a)) - failed = 1; - - if (!BN_mod_exp_simple(&r, &a, &p, &m, ctx)) - goto err; - - if (!a_is_zero_mod_one("BN_mod_exp_simple", &r, &a)) - failed = 1; - - if (!BN_mod_exp_mont(&r, &a, &p, &m, ctx, NULL)) - goto err; - - if (!a_is_zero_mod_one("BN_mod_exp_mont", &r, &a)) - failed = 1; - - if (!BN_mod_exp_mont_consttime(&r, &a, &p, &m, ctx, NULL)) { - goto err; - } - - if (!a_is_zero_mod_one("BN_mod_exp_mont_consttime", &r, &a)) - failed = 1; - - /* - * A different codepath exists for single word multiplication - * in non-constant-time only. - */ - if (!BN_mod_exp_mont_word(&r, one_word, &p, &m, ctx, NULL)) - goto err; - - if (!BN_is_zero(&r)) { - fprintf(stderr, "BN_mod_exp_mont_word failed:\n"); - fprintf(stderr, "1 ** 0 mod 1 = r (should be 0)\n"); - fprintf(stderr, "r = "); - BN_print_fp(stderr, &r); - fprintf(stderr, "\n"); - return 0; - } - - ret = failed; - - err: - BN_free(&r); - BN_free(&a); - BN_free(&p); - BN_free(&m); - BN_CTX_free(ctx); - - return ret; -} - -int main(int argc, char *argv[]) -{ - BN_CTX *ctx; - BIO *out = NULL; - int i, ret; - unsigned char c; - BIGNUM *r_mont, *r_mont_const, *r_recp, *r_simple, *a, *b, *m; - - RAND_seed(rnd_seed, sizeof rnd_seed); /* or BN_rand may fail, and we - * don't even check its return - * value (which we should) */ - - ERR_load_BN_strings(); - - ctx = BN_CTX_new(); - if (ctx == NULL) - EXIT(1); - r_mont = BN_new(); - r_mont_const = BN_new(); - r_recp = BN_new(); - r_simple = BN_new(); - a = BN_new(); - b = BN_new(); - m = BN_new(); - if ((r_mont == NULL) || (r_recp == NULL) || (a == NULL) || (b == NULL)) - goto err; - - out = BIO_new(BIO_s_file()); - - if (out == NULL) - EXIT(1); - BIO_set_fp(out, stdout, BIO_NOCLOSE); - - for (i = 0; i < 200; i++) { - RAND_bytes(&c, 1); - c = (c % BN_BITS) - BN_BITS2; - BN_rand(a, NUM_BITS + c, 0, 0); - - RAND_bytes(&c, 1); - c = (c % BN_BITS) - BN_BITS2; - BN_rand(b, NUM_BITS + c, 0, 0); - - RAND_bytes(&c, 1); - c = (c % BN_BITS) - BN_BITS2; - BN_rand(m, NUM_BITS + c, 0, 1); - - BN_mod(a, a, m, ctx); - BN_mod(b, b, m, ctx); - - ret = BN_mod_exp_mont(r_mont, a, b, m, ctx, NULL); - if (ret <= 0) { - printf("BN_mod_exp_mont() problems\n"); - ERR_print_errors(out); - EXIT(1); - } - - ret = BN_mod_exp_recp(r_recp, a, b, m, ctx); - if (ret <= 0) { - printf("BN_mod_exp_recp() problems\n"); - ERR_print_errors(out); - EXIT(1); - } - - ret = BN_mod_exp_simple(r_simple, a, b, m, ctx); - if (ret <= 0) { - printf("BN_mod_exp_simple() problems\n"); - ERR_print_errors(out); - EXIT(1); - } - - ret = BN_mod_exp_mont_consttime(r_mont_const, a, b, m, ctx, NULL); - if (ret <= 0) { - printf("BN_mod_exp_mont_consttime() problems\n"); - ERR_print_errors(out); - EXIT(1); - } - - if (BN_cmp(r_simple, r_mont) == 0 - && BN_cmp(r_simple, r_recp) == 0 - && BN_cmp(r_simple, r_mont_const) == 0) { - printf("."); - fflush(stdout); - } else { - if (BN_cmp(r_simple, r_mont) != 0) - printf("\nsimple and mont results differ\n"); - if (BN_cmp(r_simple, r_mont_const) != 0) - printf("\nsimple and mont const time results differ\n"); - if (BN_cmp(r_simple, r_recp) != 0) - printf("\nsimple and recp results differ\n"); - - printf("a (%3d) = ", BN_num_bits(a)); - BN_print(out, a); - printf("\nb (%3d) = ", BN_num_bits(b)); - BN_print(out, b); - printf("\nm (%3d) = ", BN_num_bits(m)); - BN_print(out, m); - printf("\nsimple ="); - BN_print(out, r_simple); - printf("\nrecp ="); - BN_print(out, r_recp); - printf("\nmont ="); - BN_print(out, r_mont); - printf("\nmont_ct ="); - BN_print(out, r_mont_const); - printf("\n"); - EXIT(1); - } - } - BN_free(r_mont); - BN_free(r_mont_const); - BN_free(r_recp); - BN_free(r_simple); - BN_free(a); - BN_free(b); - BN_free(m); - BN_CTX_free(ctx); - ERR_remove_thread_state(NULL); - CRYPTO_mem_leaks(out); - BIO_free(out); - printf("\n"); - - if (test_exp_mod_zero() != 0) - goto err; - - printf("done\n"); - - EXIT(0); - err: - ERR_load_crypto_strings(); - ERR_print_errors(out); -#ifdef OPENSSL_SYS_NETWARE - printf("ERROR\n"); -#endif - EXIT(1); - return (1); -} diff --git a/drivers/builtin_openssl2/crypto/bn/rsaz_exp.c b/drivers/builtin_openssl2/crypto/bn/rsaz_exp.c new file mode 100644 index 0000000000..c54c6feb51 --- /dev/null +++ b/drivers/builtin_openssl2/crypto/bn/rsaz_exp.c @@ -0,0 +1,346 @@ +/***************************************************************************** +* * +* Copyright (c) 2012, Intel Corporation * +* * +* All rights reserved. * +* * +* Redistribution and use in source and binary forms, with or without * +* modification, are permitted provided that the following conditions are * +* met: * +* * +* * Redistributions of source code must retain the above copyright * +* notice, this list of conditions and the following disclaimer. * +* * +* * Redistributions in binary form must reproduce the above copyright * +* notice, this list of conditions and the following disclaimer in the * +* documentation and/or other materials provided with the * +* distribution. * +* * +* * Neither the name of the Intel Corporation nor the names of its * +* contributors may be used to endorse or promote products derived from * +* this software without specific prior written permission. * +* * +* * +* THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY * +* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * +* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR * +* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * +* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * +* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * +* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * +* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * +* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * +* * +****************************************************************************** +* Developers and authors: * +* Shay Gueron (1, 2), and Vlad Krasnov (1) * +* (1) Intel Corporation, Israel Development Center, Haifa, Israel * +* (2) University of Haifa, Israel * +*****************************************************************************/ + +#include "rsaz_exp.h" + +#ifdef RSAZ_ENABLED + +/* + * See crypto/bn/asm/rsaz-avx2.pl for further details. + */ +void rsaz_1024_norm2red_avx2(void *red, const void *norm); +void rsaz_1024_mul_avx2(void *ret, const void *a, const void *b, + const void *n, BN_ULONG k); +void rsaz_1024_sqr_avx2(void *ret, const void *a, const void *n, BN_ULONG k, + int cnt); +void rsaz_1024_scatter5_avx2(void *tbl, const void *val, int i); +void rsaz_1024_gather5_avx2(void *val, const void *tbl, int i); +void rsaz_1024_red2norm_avx2(void *norm, const void *red); + +#if defined(__GNUC__) +# define ALIGN64 __attribute__((aligned(64))) +#elif defined(_MSC_VER) +# define ALIGN64 __declspec(align(64)) +#elif defined(__SUNPRO_C) +# define ALIGN64 +# pragma align 64(one,two80) +#else +/* not fatal, might hurt performance a little */ +# define ALIGN64 +#endif + +ALIGN64 static const BN_ULONG one[40] = { + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +ALIGN64 static const BN_ULONG two80[40] = { + 0, 0, 1 << 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16], + const BN_ULONG base_norm[16], + const BN_ULONG exponent[16], + const BN_ULONG m_norm[16], const BN_ULONG RR[16], + BN_ULONG k0) +{ + unsigned char storage[320 * 3 + 32 * 9 * 16 + 64]; /* 5.5KB */ + unsigned char *p_str = storage + (64 - ((size_t)storage % 64)); + unsigned char *a_inv, *m, *result; + unsigned char *table_s = p_str + 320 * 3; + unsigned char *R2 = table_s; /* borrow */ + int index; + int wvalue; + + if ((((size_t)p_str & 4095) + 320) >> 12) { + result = p_str; + a_inv = p_str + 320; + m = p_str + 320 * 2; /* should not cross page */ + } else { + m = p_str; /* should not cross page */ + result = p_str + 320; + a_inv = p_str + 320 * 2; + } + + rsaz_1024_norm2red_avx2(m, m_norm); + rsaz_1024_norm2red_avx2(a_inv, base_norm); + rsaz_1024_norm2red_avx2(R2, RR); + + rsaz_1024_mul_avx2(R2, R2, R2, m, k0); + rsaz_1024_mul_avx2(R2, R2, two80, m, k0); + + /* table[0] = 1 */ + rsaz_1024_mul_avx2(result, R2, one, m, k0); + /* table[1] = a_inv^1 */ + rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0); + + rsaz_1024_scatter5_avx2(table_s, result, 0); + rsaz_1024_scatter5_avx2(table_s, a_inv, 1); + + /* table[2] = a_inv^2 */ + rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 2); +#if 0 + /* this is almost 2x smaller and less than 1% slower */ + for (index = 3; index < 32; index++) { + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, index); + } +#else + /* table[4] = a_inv^4 */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 4); + /* table[8] = a_inv^8 */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 8); + /* table[16] = a_inv^16 */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 16); + /* table[17] = a_inv^17 */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 17); + + /* table[3] */ + rsaz_1024_gather5_avx2(result, table_s, 2); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 3); + /* table[6] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 6); + /* table[12] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 12); + /* table[24] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 24); + /* table[25] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 25); + + /* table[5] */ + rsaz_1024_gather5_avx2(result, table_s, 4); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 5); + /* table[10] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 10); + /* table[20] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 20); + /* table[21] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 21); + + /* table[7] */ + rsaz_1024_gather5_avx2(result, table_s, 6); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 7); + /* table[14] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 14); + /* table[28] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 28); + /* table[29] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 29); + + /* table[9] */ + rsaz_1024_gather5_avx2(result, table_s, 8); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 9); + /* table[18] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 18); + /* table[19] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 19); + + /* table[11] */ + rsaz_1024_gather5_avx2(result, table_s, 10); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 11); + /* table[22] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 22); + /* table[23] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 23); + + /* table[13] */ + rsaz_1024_gather5_avx2(result, table_s, 12); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 13); + /* table[26] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 26); + /* table[27] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 27); + + /* table[15] */ + rsaz_1024_gather5_avx2(result, table_s, 14); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 15); + /* table[30] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 30); + /* table[31] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 31); +#endif + + /* load first window */ + p_str = (unsigned char *)exponent; + wvalue = p_str[127] >> 3; + rsaz_1024_gather5_avx2(result, table_s, wvalue); + + index = 1014; + + while (index > -1) { /* loop for the remaining 127 windows */ + + rsaz_1024_sqr_avx2(result, result, m, k0, 5); + + wvalue = *((unsigned short *)&p_str[index / 8]); + wvalue = (wvalue >> (index % 8)) & 31; + index -= 5; + + rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + } + + /* square four times */ + rsaz_1024_sqr_avx2(result, result, m, k0, 4); + + wvalue = p_str[0] & 15; + + rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + + /* from Montgomery */ + rsaz_1024_mul_avx2(result, result, one, m, k0); + + rsaz_1024_red2norm_avx2(result_norm, result); + + OPENSSL_cleanse(storage, sizeof(storage)); +} + +/* + * See crypto/bn/rsaz-x86_64.pl for further details. + */ +void rsaz_512_mul(void *ret, const void *a, const void *b, const void *n, + BN_ULONG k); +void rsaz_512_mul_scatter4(void *ret, const void *a, const void *n, + BN_ULONG k, const void *tbl, unsigned int power); +void rsaz_512_mul_gather4(void *ret, const void *a, const void *tbl, + const void *n, BN_ULONG k, unsigned int power); +void rsaz_512_mul_by_one(void *ret, const void *a, const void *n, BN_ULONG k); +void rsaz_512_sqr(void *ret, const void *a, const void *n, BN_ULONG k, + int cnt); +void rsaz_512_scatter4(void *tbl, const BN_ULONG *val, int power); +void rsaz_512_gather4(BN_ULONG *val, const void *tbl, int power); + +void RSAZ_512_mod_exp(BN_ULONG result[8], + const BN_ULONG base[8], const BN_ULONG exponent[8], + const BN_ULONG m[8], BN_ULONG k0, const BN_ULONG RR[8]) +{ + unsigned char storage[16 * 8 * 8 + 64 * 2 + 64]; /* 1.2KB */ + unsigned char *table = storage + (64 - ((size_t)storage % 64)); + BN_ULONG *a_inv = (BN_ULONG *)(table + 16 * 8 * 8); + BN_ULONG *temp = (BN_ULONG *)(table + 16 * 8 * 8 + 8 * 8); + unsigned char *p_str = (unsigned char *)exponent; + int index; + unsigned int wvalue; + + /* table[0] = 1_inv */ + temp[0] = 0 - m[0]; + temp[1] = ~m[1]; + temp[2] = ~m[2]; + temp[3] = ~m[3]; + temp[4] = ~m[4]; + temp[5] = ~m[5]; + temp[6] = ~m[6]; + temp[7] = ~m[7]; + rsaz_512_scatter4(table, temp, 0); + + /* table [1] = a_inv^1 */ + rsaz_512_mul(a_inv, base, RR, m, k0); + rsaz_512_scatter4(table, a_inv, 1); + + /* table [2] = a_inv^2 */ + rsaz_512_sqr(temp, a_inv, m, k0, 1); + rsaz_512_scatter4(table, temp, 2); + + for (index = 3; index < 16; index++) + rsaz_512_mul_scatter4(temp, a_inv, m, k0, table, index); + + /* load first window */ + wvalue = p_str[63]; + + rsaz_512_gather4(temp, table, wvalue >> 4); + rsaz_512_sqr(temp, temp, m, k0, 4); + rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0xf); + + for (index = 62; index >= 0; index--) { + wvalue = p_str[index]; + + rsaz_512_sqr(temp, temp, m, k0, 4); + rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue >> 4); + + rsaz_512_sqr(temp, temp, m, k0, 4); + rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0x0f); + } + + /* from Montgomery */ + rsaz_512_mul_by_one(result, temp, m, k0); + + OPENSSL_cleanse(storage, sizeof(storage)); +} + +#else + +# if defined(PEDANTIC) || defined(__DECC) || defined(__clang__) +static void *dummy = &dummy; +# endif + +#endif diff --git a/drivers/builtin_openssl2/crypto/bn/rsaz_exp.h b/drivers/builtin_openssl2/crypto/bn/rsaz_exp.h new file mode 100644 index 0000000000..229e181f67 --- /dev/null +++ b/drivers/builtin_openssl2/crypto/bn/rsaz_exp.h @@ -0,0 +1,68 @@ +/***************************************************************************** +* * +* Copyright (c) 2012, Intel Corporation * +* * +* All rights reserved. * +* * +* Redistribution and use in source and binary forms, with or without * +* modification, are permitted provided that the following conditions are * +* met: * +* * +* * Redistributions of source code must retain the above copyright * +* notice, this list of conditions and the following disclaimer. * +* * +* * Redistributions in binary form must reproduce the above copyright * +* notice, this list of conditions and the following disclaimer in the * +* documentation and/or other materials provided with the * +* distribution. * +* * +* * Neither the name of the Intel Corporation nor the names of its * +* contributors may be used to endorse or promote products derived from * +* this software without specific prior written permission. * +* * +* * +* THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY * +* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * +* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR * +* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * +* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * +* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * +* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * +* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * +* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * +* * +****************************************************************************** +* Developers and authors: * +* Shay Gueron (1, 2), and Vlad Krasnov (1) * +* (1) Intel Corporation, Israel Development Center, Haifa, Israel * +* (2) University of Haifa, Israel * +*****************************************************************************/ + +#ifndef RSAZ_EXP_H +# define RSAZ_EXP_H + +# undef RSAZ_ENABLED +# if defined(OPENSSL_BN_ASM_MONT) && \ + (defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_AMD64) || defined(_M_X64)) +# define RSAZ_ENABLED + +# include + +void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16], + const BN_ULONG base_norm[16], + const BN_ULONG exponent[16], + const BN_ULONG m_norm[16], const BN_ULONG RR[16], + BN_ULONG k0); +int rsaz_avx2_eligible(); + +void RSAZ_512_mod_exp(BN_ULONG result[8], + const BN_ULONG base_norm[8], const BN_ULONG exponent[8], + const BN_ULONG m_norm[8], BN_ULONG k0, + const BN_ULONG RR[8]); + +# endif + +#endif diff --git a/drivers/builtin_openssl2/crypto/buffer/buf_str.c b/drivers/builtin_openssl2/crypto/buffer/buf_str.c index 233af246e4..fa0d608e76 100644 --- a/drivers/builtin_openssl2/crypto/buffer/buf_str.c +++ b/drivers/builtin_openssl2/crypto/buffer/buf_str.c @@ -61,6 +61,15 @@ #include #include +size_t BUF_strnlen(const char *str, size_t maxlen) +{ + const char *p; + + for (p = str; maxlen-- != 0 && *p != '\0'; ++p) ; + + return p - str; +} + char *BUF_strdup(const char *str) { if (str == NULL) @@ -75,6 +84,8 @@ char *BUF_strndup(const char *str, size_t siz) if (str == NULL) return NULL; + siz = BUF_strnlen(str, siz); + if (siz >= INT_MAX) return NULL; diff --git a/drivers/builtin_openssl2/crypto/camellia/asm/cmll-x86.pl b/drivers/builtin_openssl2/crypto/camellia/asm/cmll-x86.pl deleted file mode 100644 index c314d62312..0000000000 --- a/drivers/builtin_openssl2/crypto/camellia/asm/cmll-x86.pl +++ /dev/null @@ -1,1138 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Copyright (c) 2008 Andy Polyakov -# -# This module may be used under the terms of either the GNU General -# Public License version 2 or later, the GNU Lesser General Public -# License version 2.1 or later, the Mozilla Public License version -# 1.1 or the BSD License. The exact terms of either license are -# distributed along with this module. For further details see -# http://www.openssl.org/~appro/camellia/. -# ==================================================================== - -# Performance in cycles per processed byte (less is better) in -# 'openssl speed ...' benchmark: -# -# AMD K8 Core2 PIII P4 -# -evp camellia-128-ecb 21.5 22.8 27.0 28.9 -# + over gcc 3.4.6 +90/11% +70/10% +53/4% +160/64% -# + over icc 8.0 +48/19% +21/15% +21/17% +55/37% -# -# camellia-128-cbc 17.3 21.1 23.9 25.9 -# -# 128-bit key setup 196 280 256 240 cycles/key -# + over gcc 3.4.6 +30/0% +17/11% +11/0% +63/40% -# + over icc 8.0 +18/3% +10/0% +10/3% +21/10% -# -# Pairs of numbers in "+" rows represent performance improvement over -# compiler generated position-independent code, PIC, and non-PIC -# respectively. PIC results are of greater relevance, as this module -# is position-independent, i.e. suitable for a shared library or PIE. -# Position independence "costs" one register, which is why compilers -# are so close with non-PIC results, they have an extra register to -# spare. CBC results are better than ECB ones thanks to "zero-copy" -# private _x86_* interface, and are ~30-40% better than with compiler -# generated cmll_cbc.o, and reach ~80-90% of x86_64 performance on -# same CPU (where applicable). - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -$OPENSSL=1; - -&asm_init($ARGV[0],"cmll-586.pl",$ARGV[$#ARGV] eq "386"); - -@T=("eax","ebx","ecx","edx"); -$idx="esi"; -$key="edi"; -$Tbl="ebp"; - -# stack frame layout in _x86_Camellia_* routines, frame is allocated -# by caller -$__ra=&DWP(0,"esp"); # return address -$__s0=&DWP(4,"esp"); # s0 backing store -$__s1=&DWP(8,"esp"); # s1 backing store -$__s2=&DWP(12,"esp"); # s2 backing store -$__s3=&DWP(16,"esp"); # s3 backing store -$__end=&DWP(20,"esp"); # pointer to end/start of key schedule - -# stack frame layout in Camellia_[en|crypt] routines, which differs from -# above by 4 and overlaps by pointer to end/start of key schedule -$_end=&DWP(16,"esp"); -$_esp=&DWP(20,"esp"); - -# const unsigned int Camellia_SBOX[4][256]; -# Well, sort of... Camellia_SBOX[0][] is interleaved with [1][], -# and [2][] - with [3][]. This is done to optimize code size. -$SBOX1_1110=0; # Camellia_SBOX[0] -$SBOX4_4404=4; # Camellia_SBOX[1] -$SBOX2_0222=2048; # Camellia_SBOX[2] -$SBOX3_3033=2052; # Camellia_SBOX[3] -&static_label("Camellia_SIGMA"); -&static_label("Camellia_SBOX"); - -sub Camellia_Feistel { -my $i=@_[0]; -my $seed=defined(@_[1])?@_[1]:0; -my $scale=$seed<0?-8:8; -my $frame=defined(@_[2])?@_[2]:0; -my $j=($i&1)*2; -my $t0=@T[($j)%4],$t1=@T[($j+1)%4],$t2=@T[($j+2)%4],$t3=@T[($j+3)%4]; - - &xor ($t0,$idx); # t0^=key[0] - &xor ($t1,&DWP($seed+$i*$scale+4,$key)); # t1^=key[1] - &movz ($idx,&HB($t0)); # (t0>>8)&0xff - &mov ($t3,&DWP($SBOX3_3033,$Tbl,$idx,8)); # t3=SBOX3_3033[0] - &movz ($idx,&LB($t0)); # (t0>>0)&0xff - &xor ($t3,&DWP($SBOX4_4404,$Tbl,$idx,8)); # t3^=SBOX4_4404[0] - &shr ($t0,16); - &movz ($idx,&LB($t1)); # (t1>>0)&0xff - &mov ($t2,&DWP($SBOX1_1110,$Tbl,$idx,8)); # t2=SBOX1_1110[1] - &movz ($idx,&HB($t0)); # (t0>>24)&0xff - &xor ($t3,&DWP($SBOX1_1110,$Tbl,$idx,8)); # t3^=SBOX1_1110[0] - &movz ($idx,&HB($t1)); # (t1>>8)&0xff - &xor ($t2,&DWP($SBOX4_4404,$Tbl,$idx,8)); # t2^=SBOX4_4404[1] - &shr ($t1,16); - &movz ($t0,&LB($t0)); # (t0>>16)&0xff - &xor ($t3,&DWP($SBOX2_0222,$Tbl,$t0,8)); # t3^=SBOX2_0222[0] - &movz ($idx,&HB($t1)); # (t1>>24)&0xff - &mov ($t0,&DWP($frame+4*(($j+3)%4),"esp")); # prefetch "s3" - &xor ($t2,$t3); # t2^=t3 - &rotr ($t3,8); # t3=RightRotate(t3,8) - &xor ($t2,&DWP($SBOX2_0222,$Tbl,$idx,8)); # t2^=SBOX2_0222[1] - &movz ($idx,&LB($t1)); # (t1>>16)&0xff - &mov ($t1,&DWP($frame+4*(($j+2)%4),"esp")); # prefetch "s2" - &xor ($t3,$t0); # t3^=s3 - &xor ($t2,&DWP($SBOX3_3033,$Tbl,$idx,8)); # t2^=SBOX3_3033[1] - &mov ($idx,&DWP($seed+($i+1)*$scale,$key)); # prefetch key[i+1] - &xor ($t3,$t2); # t3^=t2 - &mov (&DWP($frame+4*(($j+3)%4),"esp"),$t3); # s3=t3 - &xor ($t2,$t1); # t2^=s2 - &mov (&DWP($frame+4*(($j+2)%4),"esp"),$t2); # s2=t2 -} - -# void Camellia_EncryptBlock_Rounds( -# int grandRounds, -# const Byte plaintext[], -# const KEY_TABLE_TYPE keyTable, -# Byte ciphertext[]) -&function_begin("Camellia_EncryptBlock_Rounds"); - &mov ("eax",&wparam(0)); # load grandRounds - &mov ($idx,&wparam(1)); # load plaintext pointer - &mov ($key,&wparam(2)); # load key schedule pointer - - &mov ("ebx","esp"); - &sub ("esp",7*4); # place for s[0-3],keyEnd,esp and ra - &and ("esp",-64); - - # place stack frame just "above mod 1024" the key schedule - # this ensures that cache associativity of 2 suffices - &lea ("ecx",&DWP(-64-63,$key)); - &sub ("ecx","esp"); - &neg ("ecx"); - &and ("ecx",0x3C0); # modulo 1024, but aligned to cache-line - &sub ("esp","ecx"); - &add ("esp",4); # 4 is reserved for callee's return address - - &shl ("eax",6); - &lea ("eax",&DWP(0,$key,"eax")); - &mov ($_esp,"ebx"); # save %esp - &mov ($_end,"eax"); # save keyEnd - - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); - - &mov (@T[0],&DWP(0,$idx)); # load plaintext - &mov (@T[1],&DWP(4,$idx)); - &mov (@T[2],&DWP(8,$idx)); - &bswap (@T[0]); - &mov (@T[3],&DWP(12,$idx)); - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - - &call ("_x86_Camellia_encrypt"); - - &mov ("esp",$_esp); - &bswap (@T[0]); - &mov ($idx,&wparam(3)); # load ciphertext pointer - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - &mov (&DWP(0,$idx),@T[0]); # write ciphertext - &mov (&DWP(4,$idx),@T[1]); - &mov (&DWP(8,$idx),@T[2]); - &mov (&DWP(12,$idx),@T[3]); -&function_end("Camellia_EncryptBlock_Rounds"); -# V1.x API -&function_begin_B("Camellia_EncryptBlock"); - &mov ("eax",128); - &sub ("eax",&wparam(0)); # load keyBitLength - &mov ("eax",3); - &adc ("eax",0); # keyBitLength==128?3:4 - &mov (&wparam(0),"eax"); - &jmp (&label("Camellia_EncryptBlock_Rounds")); -&function_end_B("Camellia_EncryptBlock"); - -if ($OPENSSL) { -# void Camellia_encrypt( -# const unsigned char *in, -# unsigned char *out, -# const CAMELLIA_KEY *key) -&function_begin("Camellia_encrypt"); - &mov ($idx,&wparam(0)); # load plaintext pointer - &mov ($key,&wparam(2)); # load key schedule pointer - - &mov ("ebx","esp"); - &sub ("esp",7*4); # place for s[0-3],keyEnd,esp and ra - &and ("esp",-64); - &mov ("eax",&DWP(272,$key)); # load grandRounds counter - - # place stack frame just "above mod 1024" the key schedule - # this ensures that cache associativity of 2 suffices - &lea ("ecx",&DWP(-64-63,$key)); - &sub ("ecx","esp"); - &neg ("ecx"); - &and ("ecx",0x3C0); # modulo 1024, but aligned to cache-line - &sub ("esp","ecx"); - &add ("esp",4); # 4 is reserved for callee's return address - - &shl ("eax",6); - &lea ("eax",&DWP(0,$key,"eax")); - &mov ($_esp,"ebx"); # save %esp - &mov ($_end,"eax"); # save keyEnd - - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); - - &mov (@T[0],&DWP(0,$idx)); # load plaintext - &mov (@T[1],&DWP(4,$idx)); - &mov (@T[2],&DWP(8,$idx)); - &bswap (@T[0]); - &mov (@T[3],&DWP(12,$idx)); - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - - &call ("_x86_Camellia_encrypt"); - - &mov ("esp",$_esp); - &bswap (@T[0]); - &mov ($idx,&wparam(1)); # load ciphertext pointer - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - &mov (&DWP(0,$idx),@T[0]); # write ciphertext - &mov (&DWP(4,$idx),@T[1]); - &mov (&DWP(8,$idx),@T[2]); - &mov (&DWP(12,$idx),@T[3]); -&function_end("Camellia_encrypt"); -} - -&function_begin_B("_x86_Camellia_encrypt"); - &xor (@T[0],&DWP(0,$key)); # ^=key[0-3] - &xor (@T[1],&DWP(4,$key)); - &xor (@T[2],&DWP(8,$key)); - &xor (@T[3],&DWP(12,$key)); - &mov ($idx,&DWP(16,$key)); # prefetch key[4] - - &mov ($__s0,@T[0]); # save s[0-3] - &mov ($__s1,@T[1]); - &mov ($__s2,@T[2]); - &mov ($__s3,@T[3]); - -&set_label("loop",16); - for ($i=0;$i<6;$i++) { Camellia_Feistel($i,16,4); } - - &add ($key,16*4); - &cmp ($key,$__end); - &je (&label("done")); - - # @T[0-1] are preloaded, $idx is preloaded with key[0] - &and ($idx,@T[0]); - &mov (@T[3],$__s3); - &rotl ($idx,1); - &mov (@T[2],@T[3]); - &xor (@T[1],$idx); - &or (@T[2],&DWP(12,$key)); - &mov ($__s1,@T[1]); # s1^=LeftRotate(s0&key[0],1); - &xor (@T[2],$__s2); - - &mov ($idx,&DWP(4,$key)); - &mov ($__s2,@T[2]); # s2^=s3|key[3]; - &or ($idx,@T[1]); - &and (@T[2],&DWP(8,$key)); - &xor (@T[0],$idx); - &rotl (@T[2],1); - &mov ($__s0,@T[0]); # s0^=s1|key[1]; - &xor (@T[3],@T[2]); - &mov ($idx,&DWP(16,$key)); # prefetch key[4] - &mov ($__s3,@T[3]); # s3^=LeftRotate(s2&key[2],1); - &jmp (&label("loop")); - -&set_label("done",8); - &mov (@T[2],@T[0]); # SwapHalf - &mov (@T[3],@T[1]); - &mov (@T[0],$__s2); - &mov (@T[1],$__s3); - &xor (@T[0],$idx); # $idx is preloaded with key[0] - &xor (@T[1],&DWP(4,$key)); - &xor (@T[2],&DWP(8,$key)); - &xor (@T[3],&DWP(12,$key)); - &ret (); -&function_end_B("_x86_Camellia_encrypt"); - -# void Camellia_DecryptBlock_Rounds( -# int grandRounds, -# const Byte ciphertext[], -# const KEY_TABLE_TYPE keyTable, -# Byte plaintext[]) -&function_begin("Camellia_DecryptBlock_Rounds"); - &mov ("eax",&wparam(0)); # load grandRounds - &mov ($idx,&wparam(1)); # load ciphertext pointer - &mov ($key,&wparam(2)); # load key schedule pointer - - &mov ("ebx","esp"); - &sub ("esp",7*4); # place for s[0-3],keyEnd,esp and ra - &and ("esp",-64); - - # place stack frame just "above mod 1024" the key schedule - # this ensures that cache associativity of 2 suffices - &lea ("ecx",&DWP(-64-63,$key)); - &sub ("ecx","esp"); - &neg ("ecx"); - &and ("ecx",0x3C0); # modulo 1024, but aligned to cache-line - &sub ("esp","ecx"); - &add ("esp",4); # 4 is reserved for callee's return address - - &shl ("eax",6); - &mov (&DWP(4*4,"esp"),$key); # save keyStart - &lea ($key,&DWP(0,$key,"eax")); - &mov (&DWP(5*4,"esp"),"ebx");# save %esp - - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); - - &mov (@T[0],&DWP(0,$idx)); # load ciphertext - &mov (@T[1],&DWP(4,$idx)); - &mov (@T[2],&DWP(8,$idx)); - &bswap (@T[0]); - &mov (@T[3],&DWP(12,$idx)); - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - - &call ("_x86_Camellia_decrypt"); - - &mov ("esp",&DWP(5*4,"esp")); - &bswap (@T[0]); - &mov ($idx,&wparam(3)); # load plaintext pointer - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - &mov (&DWP(0,$idx),@T[0]); # write plaintext - &mov (&DWP(4,$idx),@T[1]); - &mov (&DWP(8,$idx),@T[2]); - &mov (&DWP(12,$idx),@T[3]); -&function_end("Camellia_DecryptBlock_Rounds"); -# V1.x API -&function_begin_B("Camellia_DecryptBlock"); - &mov ("eax",128); - &sub ("eax",&wparam(0)); # load keyBitLength - &mov ("eax",3); - &adc ("eax",0); # keyBitLength==128?3:4 - &mov (&wparam(0),"eax"); - &jmp (&label("Camellia_DecryptBlock_Rounds")); -&function_end_B("Camellia_DecryptBlock"); - -if ($OPENSSL) { -# void Camellia_decrypt( -# const unsigned char *in, -# unsigned char *out, -# const CAMELLIA_KEY *key) -&function_begin("Camellia_decrypt"); - &mov ($idx,&wparam(0)); # load ciphertext pointer - &mov ($key,&wparam(2)); # load key schedule pointer - - &mov ("ebx","esp"); - &sub ("esp",7*4); # place for s[0-3],keyEnd,esp and ra - &and ("esp",-64); - &mov ("eax",&DWP(272,$key)); # load grandRounds counter - - # place stack frame just "above mod 1024" the key schedule - # this ensures that cache associativity of 2 suffices - &lea ("ecx",&DWP(-64-63,$key)); - &sub ("ecx","esp"); - &neg ("ecx"); - &and ("ecx",0x3C0); # modulo 1024, but aligned to cache-line - &sub ("esp","ecx"); - &add ("esp",4); # 4 is reserved for callee's return address - - &shl ("eax",6); - &mov (&DWP(4*4,"esp"),$key); # save keyStart - &lea ($key,&DWP(0,$key,"eax")); - &mov (&DWP(5*4,"esp"),"ebx");# save %esp - - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); - - &mov (@T[0],&DWP(0,$idx)); # load ciphertext - &mov (@T[1],&DWP(4,$idx)); - &mov (@T[2],&DWP(8,$idx)); - &bswap (@T[0]); - &mov (@T[3],&DWP(12,$idx)); - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - - &call ("_x86_Camellia_decrypt"); - - &mov ("esp",&DWP(5*4,"esp")); - &bswap (@T[0]); - &mov ($idx,&wparam(1)); # load plaintext pointer - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - &mov (&DWP(0,$idx),@T[0]); # write plaintext - &mov (&DWP(4,$idx),@T[1]); - &mov (&DWP(8,$idx),@T[2]); - &mov (&DWP(12,$idx),@T[3]); -&function_end("Camellia_decrypt"); -} - -&function_begin_B("_x86_Camellia_decrypt"); - &xor (@T[0],&DWP(0,$key)); # ^=key[0-3] - &xor (@T[1],&DWP(4,$key)); - &xor (@T[2],&DWP(8,$key)); - &xor (@T[3],&DWP(12,$key)); - &mov ($idx,&DWP(-8,$key)); # prefetch key[-2] - - &mov ($__s0,@T[0]); # save s[0-3] - &mov ($__s1,@T[1]); - &mov ($__s2,@T[2]); - &mov ($__s3,@T[3]); - -&set_label("loop",16); - for ($i=0;$i<6;$i++) { Camellia_Feistel($i,-8,4); } - - &sub ($key,16*4); - &cmp ($key,$__end); - &je (&label("done")); - - # @T[0-1] are preloaded, $idx is preloaded with key[2] - &and ($idx,@T[0]); - &mov (@T[3],$__s3); - &rotl ($idx,1); - &mov (@T[2],@T[3]); - &xor (@T[1],$idx); - &or (@T[2],&DWP(4,$key)); - &mov ($__s1,@T[1]); # s1^=LeftRotate(s0&key[0],1); - &xor (@T[2],$__s2); - - &mov ($idx,&DWP(12,$key)); - &mov ($__s2,@T[2]); # s2^=s3|key[3]; - &or ($idx,@T[1]); - &and (@T[2],&DWP(0,$key)); - &xor (@T[0],$idx); - &rotl (@T[2],1); - &mov ($__s0,@T[0]); # s0^=s1|key[1]; - &xor (@T[3],@T[2]); - &mov ($idx,&DWP(-8,$key)); # prefetch key[4] - &mov ($__s3,@T[3]); # s3^=LeftRotate(s2&key[2],1); - &jmp (&label("loop")); - -&set_label("done",8); - &mov (@T[2],@T[0]); # SwapHalf - &mov (@T[3],@T[1]); - &mov (@T[0],$__s2); - &mov (@T[1],$__s3); - &xor (@T[2],$idx); # $idx is preloaded with key[2] - &xor (@T[3],&DWP(12,$key)); - &xor (@T[0],&DWP(0,$key)); - &xor (@T[1],&DWP(4,$key)); - &ret (); -&function_end_B("_x86_Camellia_decrypt"); - -# shld is very slow on Intel P4 family. Even on AMD it limits -# instruction decode rate [because it's VectorPath] and consequently -# performance. PIII, PM and Core[2] seem to be the only ones which -# execute this code ~7% faster... -sub __rotl128 { - my ($i0,$i1,$i2,$i3,$rot,$rnd,@T)=@_; - - $rnd *= 2; - if ($rot) { - &mov ($idx,$i0); - &shld ($i0,$i1,$rot); - &shld ($i1,$i2,$rot); - &shld ($i2,$i3,$rot); - &shld ($i3,$idx,$rot); - } - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i0 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i1 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i2 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i3 eq @T[0]); -} - -# ... Implementing 128-bit rotate without shld gives >3x performance -# improvement on P4, only ~7% degradation on other Intel CPUs and -# not worse performance on AMD. This is therefore preferred. -sub _rotl128 { - my ($i0,$i1,$i2,$i3,$rot,$rnd,@T)=@_; - - $rnd *= 2; - if ($rot) { - &mov ($Tbl,$i0); - &shl ($i0,$rot); - &mov ($idx,$i1); - &shr ($idx,32-$rot); - &shl ($i1,$rot); - &or ($i0,$idx); - &mov ($idx,$i2); - &shl ($i2,$rot); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i0 eq @T[0]); - &shr ($idx,32-$rot); - &or ($i1,$idx); - &shr ($Tbl,32-$rot); - &mov ($idx,$i3); - &shr ($idx,32-$rot); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i1 eq @T[0]); - &shl ($i3,$rot); - &or ($i2,$idx); - &or ($i3,$Tbl); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i2 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i3 eq @T[0]); - } else { - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i0 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i1 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i2 eq @T[0]); - &mov (&DWP(-128+4*$rnd++,$key),shift(@T)) if ($i3 eq @T[0]); - } -} - -sub _saveround { -my ($rnd,$key,@T)=@_; -my $bias=int(@T[0])?shift(@T):0; - - &mov (&DWP($bias+$rnd*8+0,$key),@T[0]); - &mov (&DWP($bias+$rnd*8+4,$key),@T[1]) if ($#T>=1); - &mov (&DWP($bias+$rnd*8+8,$key),@T[2]) if ($#T>=2); - &mov (&DWP($bias+$rnd*8+12,$key),@T[3]) if ($#T>=3); -} - -sub _loadround { -my ($rnd,$key,@T)=@_; -my $bias=int(@T[0])?shift(@T):0; - - &mov (@T[0],&DWP($bias+$rnd*8+0,$key)); - &mov (@T[1],&DWP($bias+$rnd*8+4,$key)) if ($#T>=1); - &mov (@T[2],&DWP($bias+$rnd*8+8,$key)) if ($#T>=2); - &mov (@T[3],&DWP($bias+$rnd*8+12,$key)) if ($#T>=3); -} - -# void Camellia_Ekeygen( -# const int keyBitLength, -# const Byte *rawKey, -# KEY_TABLE_TYPE keyTable) -&function_begin("Camellia_Ekeygen"); -{ my $step=0; - - &stack_push(4); # place for s[0-3] - - &mov ($Tbl,&wparam(0)); # load arguments - &mov ($idx,&wparam(1)); - &mov ($key,&wparam(2)); - - &mov (@T[0],&DWP(0,$idx)); # load 0-127 bits - &mov (@T[1],&DWP(4,$idx)); - &mov (@T[2],&DWP(8,$idx)); - &mov (@T[3],&DWP(12,$idx)); - - &bswap (@T[0]); - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - - &_saveround (0,$key,@T); # KL<<<0 - - &cmp ($Tbl,128); - &je (&label("1st128")); - - &mov (@T[0],&DWP(16,$idx)); # load 128-191 bits - &mov (@T[1],&DWP(20,$idx)); - &cmp ($Tbl,192); - &je (&label("1st192")); - &mov (@T[2],&DWP(24,$idx)); # load 192-255 bits - &mov (@T[3],&DWP(28,$idx)); - &jmp (&label("1st256")); -&set_label("1st192",4); - &mov (@T[2],@T[0]); - &mov (@T[3],@T[1]); - ¬ (@T[2]); - ¬ (@T[3]); -&set_label("1st256",4); - &bswap (@T[0]); - &bswap (@T[1]); - &bswap (@T[2]); - &bswap (@T[3]); - - &_saveround (4,$key,@T); # temporary storage for KR! - - &xor (@T[0],&DWP(0*8+0,$key)); # KR^KL - &xor (@T[1],&DWP(0*8+4,$key)); - &xor (@T[2],&DWP(1*8+0,$key)); - &xor (@T[3],&DWP(1*8+4,$key)); - -&set_label("1st128",4); - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); - &lea ($key,&DWP(&label("Camellia_SIGMA")."-".&label("Camellia_SBOX"),$Tbl)); - - &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[0] - &mov (&swtmp(0),@T[0]); # save s[0-3] - &mov (&swtmp(1),@T[1]); - &mov (&swtmp(2),@T[2]); - &mov (&swtmp(3),@T[3]); - &Camellia_Feistel($step++); - &Camellia_Feistel($step++); - &mov (@T[2],&swtmp(2)); - &mov (@T[3],&swtmp(3)); - - &mov ($idx,&wparam(2)); - &xor (@T[0],&DWP(0*8+0,$idx)); # ^KL - &xor (@T[1],&DWP(0*8+4,$idx)); - &xor (@T[2],&DWP(1*8+0,$idx)); - &xor (@T[3],&DWP(1*8+4,$idx)); - - &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[4] - &mov (&swtmp(0),@T[0]); # save s[0-3] - &mov (&swtmp(1),@T[1]); - &mov (&swtmp(2),@T[2]); - &mov (&swtmp(3),@T[3]); - &Camellia_Feistel($step++); - &Camellia_Feistel($step++); - &mov (@T[2],&swtmp(2)); - &mov (@T[3],&swtmp(3)); - - &mov ($idx,&wparam(0)); - &cmp ($idx,128); - &jne (&label("2nd256")); - - &mov ($key,&wparam(2)); - &lea ($key,&DWP(128,$key)); # size optimization - - ####### process KA - &_saveround (2,$key,-128,@T); # KA<<<0 - &_rotl128 (@T,15,6,@T); # KA<<<15 - &_rotl128 (@T,15,8,@T); # KA<<<(15+15=30) - &_rotl128 (@T,15,12,@T[0],@T[1]); # KA<<<(30+15=45) - &_rotl128 (@T,15,14,@T); # KA<<<(45+15=60) - push (@T,shift(@T)); # rotl128(@T,32); - &_rotl128 (@T,2,20,@T); # KA<<<(60+32+2=94) - &_rotl128 (@T,17,24,@T); # KA<<<(94+17=111) - - ####### process KL - &_loadround (0,$key,-128,@T); # load KL - &_rotl128 (@T,15,4,@T); # KL<<<15 - &_rotl128 (@T,30,10,@T); # KL<<<(15+30=45) - &_rotl128 (@T,15,13,@T[2],@T[3]); # KL<<<(45+15=60) - &_rotl128 (@T,17,16,@T); # KL<<<(60+17=77) - &_rotl128 (@T,17,18,@T); # KL<<<(77+17=94) - &_rotl128 (@T,17,22,@T); # KL<<<(94+17=111) - - while (@T[0] ne "eax") # restore order - { unshift (@T,pop(@T)); } - - &mov ("eax",3); # 3 grandRounds - &jmp (&label("done")); - -&set_label("2nd256",16); - &mov ($idx,&wparam(2)); - &_saveround (6,$idx,@T); # temporary storage for KA! - - &xor (@T[0],&DWP(4*8+0,$idx)); # KA^KR - &xor (@T[1],&DWP(4*8+4,$idx)); - &xor (@T[2],&DWP(5*8+0,$idx)); - &xor (@T[3],&DWP(5*8+4,$idx)); - - &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[8] - &mov (&swtmp(0),@T[0]); # save s[0-3] - &mov (&swtmp(1),@T[1]); - &mov (&swtmp(2),@T[2]); - &mov (&swtmp(3),@T[3]); - &Camellia_Feistel($step++); - &Camellia_Feistel($step++); - &mov (@T[2],&swtmp(2)); - &mov (@T[3],&swtmp(3)); - - &mov ($key,&wparam(2)); - &lea ($key,&DWP(128,$key)); # size optimization - - ####### process KB - &_saveround (2,$key,-128,@T); # KB<<<0 - &_rotl128 (@T,30,10,@T); # KB<<<30 - &_rotl128 (@T,30,20,@T); # KB<<<(30+30=60) - push (@T,shift(@T)); # rotl128(@T,32); - &_rotl128 (@T,19,32,@T); # KB<<<(60+32+19=111) - - ####### process KR - &_loadround (4,$key,-128,@T); # load KR - &_rotl128 (@T,15,4,@T); # KR<<<15 - &_rotl128 (@T,15,8,@T); # KR<<<(15+15=30) - &_rotl128 (@T,30,18,@T); # KR<<<(30+30=60) - push (@T,shift(@T)); # rotl128(@T,32); - &_rotl128 (@T,2,26,@T); # KR<<<(60+32+2=94) - - ####### process KA - &_loadround (6,$key,-128,@T); # load KA - &_rotl128 (@T,15,6,@T); # KA<<<15 - &_rotl128 (@T,30,14,@T); # KA<<<(15+30=45) - push (@T,shift(@T)); # rotl128(@T,32); - &_rotl128 (@T,0,24,@T); # KA<<<(45+32+0=77) - &_rotl128 (@T,17,28,@T); # KA<<<(77+17=94) - - ####### process KL - &_loadround (0,$key,-128,@T); # load KL - push (@T,shift(@T)); # rotl128(@T,32); - &_rotl128 (@T,13,12,@T); # KL<<<(32+13=45) - &_rotl128 (@T,15,16,@T); # KL<<<(45+15=60) - &_rotl128 (@T,17,22,@T); # KL<<<(60+17=77) - push (@T,shift(@T)); # rotl128(@T,32); - &_rotl128 (@T,2,30,@T); # KL<<<(77+32+2=111) - - while (@T[0] ne "eax") # restore order - { unshift (@T,pop(@T)); } - - &mov ("eax",4); # 4 grandRounds -&set_label("done"); - &lea ("edx",&DWP(272-128,$key)); # end of key schedule - &stack_pop(4); -} -&function_end("Camellia_Ekeygen"); - -if ($OPENSSL) { -# int private_Camellia_set_key ( -# const unsigned char *userKey, -# int bits, -# CAMELLIA_KEY *key) -&function_begin_B("private_Camellia_set_key"); - &push ("ebx"); - &mov ("ecx",&wparam(0)); # pull arguments - &mov ("ebx",&wparam(1)); - &mov ("edx",&wparam(2)); - - &mov ("eax",-1); - &test ("ecx","ecx"); - &jz (&label("done")); # userKey==NULL? - &test ("edx","edx"); - &jz (&label("done")); # key==NULL? - - &mov ("eax",-2); - &cmp ("ebx",256); - &je (&label("arg_ok")); # bits==256? - &cmp ("ebx",192); - &je (&label("arg_ok")); # bits==192? - &cmp ("ebx",128); - &jne (&label("done")); # bits!=128? -&set_label("arg_ok",4); - - &push ("edx"); # push arguments - &push ("ecx"); - &push ("ebx"); - &call ("Camellia_Ekeygen"); - &stack_pop(3); - - # eax holds grandRounds and edx points at where to put it - &mov (&DWP(0,"edx"),"eax"); - &xor ("eax","eax"); -&set_label("done",4); - &pop ("ebx"); - &ret (); -&function_end_B("private_Camellia_set_key"); -} - -@SBOX=( -112,130, 44,236,179, 39,192,229,228,133, 87, 53,234, 12,174, 65, - 35,239,107,147, 69, 25,165, 33,237, 14, 79, 78, 29,101,146,189, -134,184,175,143,124,235, 31,206, 62, 48,220, 95, 94,197, 11, 26, -166,225, 57,202,213, 71, 93, 61,217, 1, 90,214, 81, 86,108, 77, -139, 13,154,102,251,204,176, 45,116, 18, 43, 32,240,177,132,153, -223, 76,203,194, 52,126,118, 5,109,183,169, 49,209, 23, 4,215, - 20, 88, 58, 97,222, 27, 17, 28, 50, 15,156, 22, 83, 24,242, 34, -254, 68,207,178,195,181,122,145, 36, 8,232,168, 96,252,105, 80, -170,208,160,125,161,137, 98,151, 84, 91, 30,149,224,255,100,210, - 16,196, 0, 72,163,247,117,219,138, 3,230,218, 9, 63,221,148, -135, 92,131, 2,205, 74,144, 51,115,103,246,243,157,127,191,226, - 82,155,216, 38,200, 55,198, 59,129,150,111, 75, 19,190, 99, 46, -233,121,167,140,159,110,188,142, 41,245,249,182, 47,253,180, 89, -120,152, 6,106,231, 70,113,186,212, 37,171, 66,136,162,141,250, -114, 7,185, 85,248,238,172, 10, 54, 73, 42,104, 60, 56,241,164, - 64, 40,211,123,187,201, 67,193, 21,227,173,244,119,199,128,158); - -sub S1110 { my $i=shift; $i=@SBOX[$i]; return $i<<24|$i<<16|$i<<8; } -sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; return $i<<24|$i<<16|$i; } -sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; return $i<<16|$i<<8|$i; } -sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<8|$i; } - -&set_label("Camellia_SIGMA",64); -&data_word( - 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2, - 0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c, - 0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd, - 0, 0, 0, 0); -&set_label("Camellia_SBOX",64); -# tables are interleaved, remember? -for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); } -for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); } - -# void Camellia_cbc_encrypt (const void char *inp, unsigned char *out, -# size_t length, const CAMELLIA_KEY *key, -# unsigned char *ivp,const int enc); -{ -# stack frame layout -# -4(%esp) # return address 0(%esp) -# 0(%esp) # s0 4(%esp) -# 4(%esp) # s1 8(%esp) -# 8(%esp) # s2 12(%esp) -# 12(%esp) # s3 16(%esp) -# 16(%esp) # end of key schedule 20(%esp) -# 20(%esp) # %esp backup -my $_inp=&DWP(24,"esp"); #copy of wparam(0) -my $_out=&DWP(28,"esp"); #copy of wparam(1) -my $_len=&DWP(32,"esp"); #copy of wparam(2) -my $_key=&DWP(36,"esp"); #copy of wparam(3) -my $_ivp=&DWP(40,"esp"); #copy of wparam(4) -my $ivec=&DWP(44,"esp"); #ivec[16] -my $_tmp=&DWP(44,"esp"); #volatile variable [yes, aliases with ivec] -my ($s0,$s1,$s2,$s3) = @T; - -&function_begin("Camellia_cbc_encrypt"); - &mov ($s2 eq "ecx"? $s2 : "",&wparam(2)); # load len - &cmp ($s2,0); - &je (&label("enc_out")); - - &pushf (); - &cld (); - - &mov ($s0,&wparam(0)); # load inp - &mov ($s1,&wparam(1)); # load out - #&mov ($s2,&wparam(2)); # load len - &mov ($s3,&wparam(3)); # load key - &mov ($Tbl,&wparam(4)); # load ivp - - # allocate aligned stack frame... - &lea ($idx,&DWP(-64,"esp")); - &and ($idx,-64); - - # place stack frame just "above mod 1024" the key schedule - # this ensures that cache associativity of 2 suffices - &lea ($key,&DWP(-64-63,$s3)); - &sub ($key,$idx); - &neg ($key); - &and ($key,0x3C0); # modulo 1024, but aligned to cache-line - &sub ($idx,$key); - - &mov ($key,&wparam(5)); # load enc - - &exch ("esp",$idx); - &add ("esp",4); # reserve for return address! - &mov ($_esp,$idx); # save %esp - - &mov ($_inp,$s0); # save copy of inp - &mov ($_out,$s1); # save copy of out - &mov ($_len,$s2); # save copy of len - &mov ($_key,$s3); # save copy of key - &mov ($_ivp,$Tbl); # save copy of ivp - - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($Tbl); - &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); - - &mov ($idx,32); - &set_label("prefetch_sbox",4); - &mov ($s0,&DWP(0,$Tbl)); - &mov ($s1,&DWP(32,$Tbl)); - &mov ($s2,&DWP(64,$Tbl)); - &mov ($s3,&DWP(96,$Tbl)); - &lea ($Tbl,&DWP(128,$Tbl)); - &dec ($idx); - &jnz (&label("prefetch_sbox")); - &mov ($s0,$_key); - &sub ($Tbl,4096); - &mov ($idx,$_inp); - &mov ($s3,&DWP(272,$s0)); # load grandRounds - - &cmp ($key,0); - &je (&label("DECRYPT")); - - &mov ($s2,$_len); - &mov ($key,$_ivp); - &shl ($s3,6); - &lea ($s3,&DWP(0,$s0,$s3)); - &mov ($_end,$s3); - - &test ($s2,0xFFFFFFF0); - &jz (&label("enc_tail")); # short input... - - &mov ($s0,&DWP(0,$key)); # load iv - &mov ($s1,&DWP(4,$key)); - - &set_label("enc_loop",4); - &mov ($s2,&DWP(8,$key)); - &mov ($s3,&DWP(12,$key)); - - &xor ($s0,&DWP(0,$idx)); # xor input data - &xor ($s1,&DWP(4,$idx)); - &xor ($s2,&DWP(8,$idx)); - &bswap ($s0); - &xor ($s3,&DWP(12,$idx)); - &bswap ($s1); - &mov ($key,$_key); # load key - &bswap ($s2); - &bswap ($s3); - - &call ("_x86_Camellia_encrypt"); - - &mov ($idx,$_inp); # load inp - &mov ($key,$_out); # load out - - &bswap ($s0); - &bswap ($s1); - &bswap ($s2); - &mov (&DWP(0,$key),$s0); # save output data - &bswap ($s3); - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &mov ($s2,$_len); # load len - - &lea ($idx,&DWP(16,$idx)); - &mov ($_inp,$idx); # save inp - - &lea ($s3,&DWP(16,$key)); - &mov ($_out,$s3); # save out - - &sub ($s2,16); - &test ($s2,0xFFFFFFF0); - &mov ($_len,$s2); # save len - &jnz (&label("enc_loop")); - &test ($s2,15); - &jnz (&label("enc_tail")); - &mov ($idx,$_ivp); # load ivp - &mov ($s2,&DWP(8,$key)); # restore last dwords - &mov ($s3,&DWP(12,$key)); - &mov (&DWP(0,$idx),$s0); # save ivec - &mov (&DWP(4,$idx),$s1); - &mov (&DWP(8,$idx),$s2); - &mov (&DWP(12,$idx),$s3); - - &mov ("esp",$_esp); - &popf (); - &set_label("enc_out"); - &function_end_A(); - &pushf (); # kludge, never executed - - &set_label("enc_tail",4); - &mov ($s0,$key eq "edi" ? $key : ""); - &mov ($key,$_out); # load out - &push ($s0); # push ivp - &mov ($s1,16); - &sub ($s1,$s2); - &cmp ($key,$idx); # compare with inp - &je (&label("enc_in_place")); - &align (4); - &data_word(0xA4F3F689); # rep movsb # copy input - &jmp (&label("enc_skip_in_place")); - &set_label("enc_in_place"); - &lea ($key,&DWP(0,$key,$s2)); - &set_label("enc_skip_in_place"); - &mov ($s2,$s1); - &xor ($s0,$s0); - &align (4); - &data_word(0xAAF3F689); # rep stosb # zero tail - &pop ($key); # pop ivp - - &mov ($idx,$_out); # output as input - &mov ($s0,&DWP(0,$key)); - &mov ($s1,&DWP(4,$key)); - &mov ($_len,16); # len=16 - &jmp (&label("enc_loop")); # one more spin... - -#----------------------------- DECRYPT -----------------------------# -&set_label("DECRYPT",16); - &shl ($s3,6); - &lea ($s3,&DWP(0,$s0,$s3)); - &mov ($_end,$s0); - &mov ($_key,$s3); - - &cmp ($idx,$_out); - &je (&label("dec_in_place")); # in-place processing... - - &mov ($key,$_ivp); # load ivp - &mov ($_tmp,$key); - - &set_label("dec_loop",4); - &mov ($s0,&DWP(0,$idx)); # read input - &mov ($s1,&DWP(4,$idx)); - &mov ($s2,&DWP(8,$idx)); - &bswap ($s0); - &mov ($s3,&DWP(12,$idx)); - &bswap ($s1); - &mov ($key,$_key); # load key - &bswap ($s2); - &bswap ($s3); - - &call ("_x86_Camellia_decrypt"); - - &mov ($key,$_tmp); # load ivp - &mov ($idx,$_len); # load len - - &bswap ($s0); - &bswap ($s1); - &bswap ($s2); - &xor ($s0,&DWP(0,$key)); # xor iv - &bswap ($s3); - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &sub ($idx,16); - &jc (&label("dec_partial")); - &mov ($_len,$idx); # save len - &mov ($idx,$_inp); # load inp - &mov ($key,$_out); # load out - - &mov (&DWP(0,$key),$s0); # write output - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &mov ($_tmp,$idx); # save ivp - &lea ($idx,&DWP(16,$idx)); - &mov ($_inp,$idx); # save inp - - &lea ($key,&DWP(16,$key)); - &mov ($_out,$key); # save out - - &jnz (&label("dec_loop")); - &mov ($key,$_tmp); # load temp ivp - &set_label("dec_end"); - &mov ($idx,$_ivp); # load user ivp - &mov ($s0,&DWP(0,$key)); # load iv - &mov ($s1,&DWP(4,$key)); - &mov ($s2,&DWP(8,$key)); - &mov ($s3,&DWP(12,$key)); - &mov (&DWP(0,$idx),$s0); # copy back to user - &mov (&DWP(4,$idx),$s1); - &mov (&DWP(8,$idx),$s2); - &mov (&DWP(12,$idx),$s3); - &jmp (&label("dec_out")); - - &set_label("dec_partial",4); - &lea ($key,$ivec); - &mov (&DWP(0,$key),$s0); # dump output to stack - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - &lea ($s2 eq "ecx" ? $s2 : "",&DWP(16,$idx)); - &mov ($idx eq "esi" ? $idx : "",$key); - &mov ($key eq "edi" ? $key : "",$_out); # load out - &data_word(0xA4F3F689); # rep movsb # copy output - &mov ($key,$_inp); # use inp as temp ivp - &jmp (&label("dec_end")); - - &set_label("dec_in_place",4); - &set_label("dec_in_place_loop"); - &lea ($key,$ivec); - &mov ($s0,&DWP(0,$idx)); # read input - &mov ($s1,&DWP(4,$idx)); - &mov ($s2,&DWP(8,$idx)); - &mov ($s3,&DWP(12,$idx)); - - &mov (&DWP(0,$key),$s0); # copy to temp - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &bswap ($s0); - &mov (&DWP(12,$key),$s3); - &bswap ($s1); - &mov ($key,$_key); # load key - &bswap ($s2); - &bswap ($s3); - - &call ("_x86_Camellia_decrypt"); - - &mov ($key,$_ivp); # load ivp - &mov ($idx,$_out); # load out - - &bswap ($s0); - &bswap ($s1); - &bswap ($s2); - &xor ($s0,&DWP(0,$key)); # xor iv - &bswap ($s3); - &xor ($s1,&DWP(4,$key)); - &xor ($s2,&DWP(8,$key)); - &xor ($s3,&DWP(12,$key)); - - &mov (&DWP(0,$idx),$s0); # write output - &mov (&DWP(4,$idx),$s1); - &mov (&DWP(8,$idx),$s2); - &mov (&DWP(12,$idx),$s3); - - &lea ($idx,&DWP(16,$idx)); - &mov ($_out,$idx); # save out - - &lea ($idx,$ivec); - &mov ($s0,&DWP(0,$idx)); # read temp - &mov ($s1,&DWP(4,$idx)); - &mov ($s2,&DWP(8,$idx)); - &mov ($s3,&DWP(12,$idx)); - - &mov (&DWP(0,$key),$s0); # copy iv - &mov (&DWP(4,$key),$s1); - &mov (&DWP(8,$key),$s2); - &mov (&DWP(12,$key),$s3); - - &mov ($idx,$_inp); # load inp - - &lea ($idx,&DWP(16,$idx)); - &mov ($_inp,$idx); # save inp - - &mov ($s2,$_len); # load len - &sub ($s2,16); - &jc (&label("dec_in_place_partial")); - &mov ($_len,$s2); # save len - &jnz (&label("dec_in_place_loop")); - &jmp (&label("dec_out")); - - &set_label("dec_in_place_partial",4); - # one can argue if this is actually required... - &mov ($key eq "edi" ? $key : "",$_out); - &lea ($idx eq "esi" ? $idx : "",$ivec); - &lea ($key,&DWP(0,$key,$s2)); - &lea ($idx,&DWP(16,$idx,$s2)); - &neg ($s2 eq "ecx" ? $s2 : ""); - &data_word(0xA4F3F689); # rep movsb # restore tail - - &set_label("dec_out",4); - &mov ("esp",$_esp); - &popf (); -&function_end("Camellia_cbc_encrypt"); -} - -&asciz("Camellia for x86 by "); - -&asm_finish(); diff --git a/drivers/builtin_openssl2/crypto/camellia/asm/cmll-x86_64.pl b/drivers/builtin_openssl2/crypto/camellia/asm/cmll-x86_64.pl deleted file mode 100644 index 9f4b82fa48..0000000000 --- a/drivers/builtin_openssl2/crypto/camellia/asm/cmll-x86_64.pl +++ /dev/null @@ -1,1081 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Copyright (c) 2008 Andy Polyakov -# -# This module may be used under the terms of either the GNU General -# Public License version 2 or later, the GNU Lesser General Public -# License version 2.1 or later, the Mozilla Public License version -# 1.1 or the BSD License. The exact terms of either license are -# distributed along with this module. For further details see -# http://www.openssl.org/~appro/camellia/. -# ==================================================================== - -# Performance in cycles per processed byte (less is better) in -# 'openssl speed ...' benchmark: -# -# AMD64 Core2 EM64T -# -evp camellia-128-ecb 16.7 21.0 22.7 -# + over gcc 3.4.6 +25% +5% 0% -# -# camellia-128-cbc 15.7 20.4 21.1 -# -# 128-bit key setup 128 216 205 cycles/key -# + over gcc 3.4.6 +54% +39% +15% -# -# Numbers in "+" rows represent performance improvement over compiler -# generated code. Key setup timings are impressive on AMD and Core2 -# thanks to 64-bit operations being covertly deployed. Improvement on -# EM64T, pre-Core2 Intel x86_64 CPU, is not as impressive, because it -# apparently emulates some of 64-bit operations in [32-bit] microcode. - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; } -sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/; - $r =~ s/%[er]([sd]i)/%\1l/; - $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; } - -$t0="%eax";$t1="%ebx";$t2="%ecx";$t3="%edx"; -@S=("%r8d","%r9d","%r10d","%r11d"); -$i0="%esi"; -$i1="%edi"; -$Tbl="%rbp"; # size optimization -$inp="%r12"; -$out="%r13"; -$key="%r14"; -$keyend="%r15"; -$arg0d=$win64?"%ecx":"%edi"; - -# const unsigned int Camellia_SBOX[4][256]; -# Well, sort of... Camellia_SBOX[0][] is interleaved with [1][], -# and [2][] - with [3][]. This is done to minimize code size. -$SBOX1_1110=0; # Camellia_SBOX[0] -$SBOX4_4404=4; # Camellia_SBOX[1] -$SBOX2_0222=2048; # Camellia_SBOX[2] -$SBOX3_3033=2052; # Camellia_SBOX[3] - -sub Camellia_Feistel { -my $i=@_[0]; -my $seed=defined(@_[1])?@_[1]:0; -my $scale=$seed<0?-8:8; -my $j=($i&1)*2; -my $s0=@S[($j)%4],$s1=@S[($j+1)%4],$s2=@S[($j+2)%4],$s3=@S[($j+3)%4]; - -$code.=<<___; - xor $s0,$t0 # t0^=key[0] - xor $s1,$t1 # t1^=key[1] - movz `&hi("$t0")`,$i0 # (t0>>8)&0xff - movz `&lo("$t1")`,$i1 # (t1>>0)&0xff - mov $SBOX3_3033($Tbl,$i0,8),$t3 # t3=SBOX3_3033[0] - mov $SBOX1_1110($Tbl,$i1,8),$t2 # t2=SBOX1_1110[1] - movz `&lo("$t0")`,$i0 # (t0>>0)&0xff - shr \$16,$t0 - movz `&hi("$t1")`,$i1 # (t1>>8)&0xff - xor $SBOX4_4404($Tbl,$i0,8),$t3 # t3^=SBOX4_4404[0] - shr \$16,$t1 - xor $SBOX4_4404($Tbl,$i1,8),$t2 # t2^=SBOX4_4404[1] - movz `&hi("$t0")`,$i0 # (t0>>24)&0xff - movz `&lo("$t1")`,$i1 # (t1>>16)&0xff - xor $SBOX1_1110($Tbl,$i0,8),$t3 # t3^=SBOX1_1110[0] - xor $SBOX3_3033($Tbl,$i1,8),$t2 # t2^=SBOX3_3033[1] - movz `&lo("$t0")`,$i0 # (t0>>16)&0xff - movz `&hi("$t1")`,$i1 # (t1>>24)&0xff - xor $SBOX2_0222($Tbl,$i0,8),$t3 # t3^=SBOX2_0222[0] - xor $SBOX2_0222($Tbl,$i1,8),$t2 # t2^=SBOX2_0222[1] - mov `$seed+($i+1)*$scale`($key),$t1 # prefetch key[i+1] - mov `$seed+($i+1)*$scale+4`($key),$t0 - xor $t3,$t2 # t2^=t3 - ror \$8,$t3 # t3=RightRotate(t3,8) - xor $t2,$s2 - xor $t2,$s3 - xor $t3,$s3 -___ -} - -# void Camellia_EncryptBlock_Rounds( -# int grandRounds, -# const Byte plaintext[], -# const KEY_TABLE_TYPE keyTable, -# Byte ciphertext[]) -$code=<<___; -.text - -# V1.x API -.globl Camellia_EncryptBlock -.type Camellia_EncryptBlock,\@abi-omnipotent -.align 16 -Camellia_EncryptBlock: - movl \$128,%eax - subl $arg0d,%eax - movl \$3,$arg0d - adcl \$0,$arg0d # keyBitLength==128?3:4 - jmp .Lenc_rounds -.size Camellia_EncryptBlock,.-Camellia_EncryptBlock -# V2 -.globl Camellia_EncryptBlock_Rounds -.type Camellia_EncryptBlock_Rounds,\@function,4 -.align 16 -.Lenc_rounds: -Camellia_EncryptBlock_Rounds: - push %rbx - push %rbp - push %r13 - push %r14 - push %r15 -.Lenc_prologue: - - #mov %rsi,$inp # put away arguments - mov %rcx,$out - mov %rdx,$key - - shl \$6,%edi # process grandRounds - lea .LCamellia_SBOX(%rip),$Tbl - lea ($key,%rdi),$keyend - - mov 0(%rsi),@S[0] # load plaintext - mov 4(%rsi),@S[1] - mov 8(%rsi),@S[2] - bswap @S[0] - mov 12(%rsi),@S[3] - bswap @S[1] - bswap @S[2] - bswap @S[3] - - call _x86_64_Camellia_encrypt - - bswap @S[0] - bswap @S[1] - bswap @S[2] - mov @S[0],0($out) - bswap @S[3] - mov @S[1],4($out) - mov @S[2],8($out) - mov @S[3],12($out) - - mov 0(%rsp),%r15 - mov 8(%rsp),%r14 - mov 16(%rsp),%r13 - mov 24(%rsp),%rbp - mov 32(%rsp),%rbx - lea 40(%rsp),%rsp -.Lenc_epilogue: - ret -.size Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds - -.type _x86_64_Camellia_encrypt,\@abi-omnipotent -.align 16 -_x86_64_Camellia_encrypt: - xor 0($key),@S[1] - xor 4($key),@S[0] # ^=key[0-3] - xor 8($key),@S[3] - xor 12($key),@S[2] -.align 16 -.Leloop: - mov 16($key),$t1 # prefetch key[4-5] - mov 20($key),$t0 - -___ - for ($i=0;$i<6;$i++) { Camellia_Feistel($i,16); } -$code.=<<___; - lea 16*4($key),$key - cmp $keyend,$key - mov 8($key),$t3 # prefetch key[2-3] - mov 12($key),$t2 - je .Ledone - - and @S[0],$t0 - or @S[3],$t3 - rol \$1,$t0 - xor $t3,@S[2] # s2^=s3|key[3]; - xor $t0,@S[1] # s1^=LeftRotate(s0&key[0],1); - and @S[2],$t2 - or @S[1],$t1 - rol \$1,$t2 - xor $t1,@S[0] # s0^=s1|key[1]; - xor $t2,@S[3] # s3^=LeftRotate(s2&key[2],1); - jmp .Leloop - -.align 16 -.Ledone: - xor @S[2],$t0 # SwapHalf - xor @S[3],$t1 - xor @S[0],$t2 - xor @S[1],$t3 - - mov $t0,@S[0] - mov $t1,@S[1] - mov $t2,@S[2] - mov $t3,@S[3] - - .byte 0xf3,0xc3 # rep ret -.size _x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt - -# V1.x API -.globl Camellia_DecryptBlock -.type Camellia_DecryptBlock,\@abi-omnipotent -.align 16 -Camellia_DecryptBlock: - movl \$128,%eax - subl $arg0d,%eax - movl \$3,$arg0d - adcl \$0,$arg0d # keyBitLength==128?3:4 - jmp .Ldec_rounds -.size Camellia_DecryptBlock,.-Camellia_DecryptBlock -# V2 -.globl Camellia_DecryptBlock_Rounds -.type Camellia_DecryptBlock_Rounds,\@function,4 -.align 16 -.Ldec_rounds: -Camellia_DecryptBlock_Rounds: - push %rbx - push %rbp - push %r13 - push %r14 - push %r15 -.Ldec_prologue: - - #mov %rsi,$inp # put away arguments - mov %rcx,$out - mov %rdx,$keyend - - shl \$6,%edi # process grandRounds - lea .LCamellia_SBOX(%rip),$Tbl - lea ($keyend,%rdi),$key - - mov 0(%rsi),@S[0] # load plaintext - mov 4(%rsi),@S[1] - mov 8(%rsi),@S[2] - bswap @S[0] - mov 12(%rsi),@S[3] - bswap @S[1] - bswap @S[2] - bswap @S[3] - - call _x86_64_Camellia_decrypt - - bswap @S[0] - bswap @S[1] - bswap @S[2] - mov @S[0],0($out) - bswap @S[3] - mov @S[1],4($out) - mov @S[2],8($out) - mov @S[3],12($out) - - mov 0(%rsp),%r15 - mov 8(%rsp),%r14 - mov 16(%rsp),%r13 - mov 24(%rsp),%rbp - mov 32(%rsp),%rbx - lea 40(%rsp),%rsp -.Ldec_epilogue: - ret -.size Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds - -.type _x86_64_Camellia_decrypt,\@abi-omnipotent -.align 16 -_x86_64_Camellia_decrypt: - xor 0($key),@S[1] - xor 4($key),@S[0] # ^=key[0-3] - xor 8($key),@S[3] - xor 12($key),@S[2] -.align 16 -.Ldloop: - mov -8($key),$t1 # prefetch key[4-5] - mov -4($key),$t0 - -___ - for ($i=0;$i<6;$i++) { Camellia_Feistel($i,-8); } -$code.=<<___; - lea -16*4($key),$key - cmp $keyend,$key - mov 0($key),$t3 # prefetch key[2-3] - mov 4($key),$t2 - je .Lddone - - and @S[0],$t0 - or @S[3],$t3 - rol \$1,$t0 - xor $t3,@S[2] # s2^=s3|key[3]; - xor $t0,@S[1] # s1^=LeftRotate(s0&key[0],1); - and @S[2],$t2 - or @S[1],$t1 - rol \$1,$t2 - xor $t1,@S[0] # s0^=s1|key[1]; - xor $t2,@S[3] # s3^=LeftRotate(s2&key[2],1); - - jmp .Ldloop - -.align 16 -.Lddone: - xor @S[2],$t2 - xor @S[3],$t3 - xor @S[0],$t0 - xor @S[1],$t1 - - mov $t2,@S[0] # SwapHalf - mov $t3,@S[1] - mov $t0,@S[2] - mov $t1,@S[3] - - .byte 0xf3,0xc3 # rep ret -.size _x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt -___ - -sub _saveround { -my ($rnd,$key,@T)=@_; -my $bias=int(@T[0])?shift(@T):0; - - if ($#T==3) { - $code.=<<___; - mov @T[1],`$bias+$rnd*8+0`($key) - mov @T[0],`$bias+$rnd*8+4`($key) - mov @T[3],`$bias+$rnd*8+8`($key) - mov @T[2],`$bias+$rnd*8+12`($key) -___ - } else { - $code.=" mov @T[0],`$bias+$rnd*8+0`($key)\n"; - $code.=" mov @T[1],`$bias+$rnd*8+8`($key)\n" if ($#T>=1); - } -} - -sub _loadround { -my ($rnd,$key,@T)=@_; -my $bias=int(@T[0])?shift(@T):0; - -$code.=" mov `$bias+$rnd*8+0`($key),@T[0]\n"; -$code.=" mov `$bias+$rnd*8+8`($key),@T[1]\n" if ($#T>=1); -} - -# shld is very slow on Intel EM64T family. Even on AMD it limits -# instruction decode rate [because it's VectorPath] and consequently -# performance... -sub __rotl128 { -my ($i0,$i1,$rot)=@_; - - if ($rot) { - $code.=<<___; - mov $i0,%r11 - shld \$$rot,$i1,$i0 - shld \$$rot,%r11,$i1 -___ - } -} - -# ... Implementing 128-bit rotate without shld gives 80% better -# performance EM64T, +15% on AMD64 and only ~7% degradation on -# Core2. This is therefore preferred. -sub _rotl128 { -my ($i0,$i1,$rot)=@_; - - if ($rot) { - $code.=<<___; - mov $i0,%r11 - shl \$$rot,$i0 - mov $i1,%r9 - shr \$`64-$rot`,%r9 - shr \$`64-$rot`,%r11 - or %r9,$i0 - shl \$$rot,$i1 - or %r11,$i1 -___ - } -} - -{ my $step=0; - -$code.=<<___; -.globl Camellia_Ekeygen -.type Camellia_Ekeygen,\@function,3 -.align 16 -Camellia_Ekeygen: - push %rbx - push %rbp - push %r13 - push %r14 - push %r15 -.Lkey_prologue: - - mov %rdi,$keyend # put away arguments, keyBitLength - mov %rdx,$out # keyTable - - mov 0(%rsi),@S[0] # load 0-127 bits - mov 4(%rsi),@S[1] - mov 8(%rsi),@S[2] - mov 12(%rsi),@S[3] - - bswap @S[0] - bswap @S[1] - bswap @S[2] - bswap @S[3] -___ - &_saveround (0,$out,@S); # KL<<<0 -$code.=<<___; - cmp \$128,$keyend # check keyBitLength - je .L1st128 - - mov 16(%rsi),@S[0] # load 128-191 bits - mov 20(%rsi),@S[1] - cmp \$192,$keyend - je .L1st192 - mov 24(%rsi),@S[2] # load 192-255 bits - mov 28(%rsi),@S[3] - jmp .L1st256 -.L1st192: - mov @S[0],@S[2] - mov @S[1],@S[3] - not @S[2] - not @S[3] -.L1st256: - bswap @S[0] - bswap @S[1] - bswap @S[2] - bswap @S[3] -___ - &_saveround (4,$out,@S); # temp storage for KR! -$code.=<<___; - xor 0($out),@S[1] # KR^KL - xor 4($out),@S[0] - xor 8($out),@S[3] - xor 12($out),@S[2] - -.L1st128: - lea .LCamellia_SIGMA(%rip),$key - lea .LCamellia_SBOX(%rip),$Tbl - - mov 0($key),$t1 - mov 4($key),$t0 -___ - &Camellia_Feistel($step++); - &Camellia_Feistel($step++); -$code.=<<___; - xor 0($out),@S[1] # ^KL - xor 4($out),@S[0] - xor 8($out),@S[3] - xor 12($out),@S[2] -___ - &Camellia_Feistel($step++); - &Camellia_Feistel($step++); -$code.=<<___; - cmp \$128,$keyend - jne .L2nd256 - - lea 128($out),$out # size optimization - shl \$32,%r8 # @S[0]|| - shl \$32,%r10 # @S[2]|| - or %r9,%r8 # ||@S[1] - or %r11,%r10 # ||@S[3] -___ - &_loadround (0,$out,-128,"%rax","%rbx"); # KL - &_saveround (2,$out,-128,"%r8","%r10"); # KA<<<0 - &_rotl128 ("%rax","%rbx",15); - &_saveround (4,$out,-128,"%rax","%rbx"); # KL<<<15 - &_rotl128 ("%r8","%r10",15); - &_saveround (6,$out,-128,"%r8","%r10"); # KA<<<15 - &_rotl128 ("%r8","%r10",15); # 15+15=30 - &_saveround (8,$out,-128,"%r8","%r10"); # KA<<<30 - &_rotl128 ("%rax","%rbx",30); # 15+30=45 - &_saveround (10,$out,-128,"%rax","%rbx"); # KL<<<45 - &_rotl128 ("%r8","%r10",15); # 30+15=45 - &_saveround (12,$out,-128,"%r8"); # KA<<<45 - &_rotl128 ("%rax","%rbx",15); # 45+15=60 - &_saveround (13,$out,-128,"%rbx"); # KL<<<60 - &_rotl128 ("%r8","%r10",15); # 45+15=60 - &_saveround (14,$out,-128,"%r8","%r10"); # KA<<<60 - &_rotl128 ("%rax","%rbx",17); # 60+17=77 - &_saveround (16,$out,-128,"%rax","%rbx"); # KL<<<77 - &_rotl128 ("%rax","%rbx",17); # 77+17=94 - &_saveround (18,$out,-128,"%rax","%rbx"); # KL<<<94 - &_rotl128 ("%r8","%r10",34); # 60+34=94 - &_saveround (20,$out,-128,"%r8","%r10"); # KA<<<94 - &_rotl128 ("%rax","%rbx",17); # 94+17=111 - &_saveround (22,$out,-128,"%rax","%rbx"); # KL<<<111 - &_rotl128 ("%r8","%r10",17); # 94+17=111 - &_saveround (24,$out,-128,"%r8","%r10"); # KA<<<111 -$code.=<<___; - mov \$3,%eax - jmp .Ldone -.align 16 -.L2nd256: -___ - &_saveround (6,$out,@S); # temp storage for KA! -$code.=<<___; - xor `4*8+0`($out),@S[1] # KA^KR - xor `4*8+4`($out),@S[0] - xor `5*8+0`($out),@S[3] - xor `5*8+4`($out),@S[2] -___ - &Camellia_Feistel($step++); - &Camellia_Feistel($step++); - - &_loadround (0,$out,"%rax","%rbx"); # KL - &_loadround (4,$out,"%rcx","%rdx"); # KR - &_loadround (6,$out,"%r14","%r15"); # KA -$code.=<<___; - lea 128($out),$out # size optimization - shl \$32,%r8 # @S[0]|| - shl \$32,%r10 # @S[2]|| - or %r9,%r8 # ||@S[1] - or %r11,%r10 # ||@S[3] -___ - &_saveround (2,$out,-128,"%r8","%r10"); # KB<<<0 - &_rotl128 ("%rcx","%rdx",15); - &_saveround (4,$out,-128,"%rcx","%rdx"); # KR<<<15 - &_rotl128 ("%r14","%r15",15); - &_saveround (6,$out,-128,"%r14","%r15"); # KA<<<15 - &_rotl128 ("%rcx","%rdx",15); # 15+15=30 - &_saveround (8,$out,-128,"%rcx","%rdx"); # KR<<<30 - &_rotl128 ("%r8","%r10",30); - &_saveround (10,$out,-128,"%r8","%r10"); # KB<<<30 - &_rotl128 ("%rax","%rbx",45); - &_saveround (12,$out,-128,"%rax","%rbx"); # KL<<<45 - &_rotl128 ("%r14","%r15",30); # 15+30=45 - &_saveround (14,$out,-128,"%r14","%r15"); # KA<<<45 - &_rotl128 ("%rax","%rbx",15); # 45+15=60 - &_saveround (16,$out,-128,"%rax","%rbx"); # KL<<<60 - &_rotl128 ("%rcx","%rdx",30); # 30+30=60 - &_saveround (18,$out,-128,"%rcx","%rdx"); # KR<<<60 - &_rotl128 ("%r8","%r10",30); # 30+30=60 - &_saveround (20,$out,-128,"%r8","%r10"); # KB<<<60 - &_rotl128 ("%rax","%rbx",17); # 60+17=77 - &_saveround (22,$out,-128,"%rax","%rbx"); # KL<<<77 - &_rotl128 ("%r14","%r15",32); # 45+32=77 - &_saveround (24,$out,-128,"%r14","%r15"); # KA<<<77 - &_rotl128 ("%rcx","%rdx",34); # 60+34=94 - &_saveround (26,$out,-128,"%rcx","%rdx"); # KR<<<94 - &_rotl128 ("%r14","%r15",17); # 77+17=94 - &_saveround (28,$out,-128,"%r14","%r15"); # KA<<<77 - &_rotl128 ("%rax","%rbx",34); # 77+34=111 - &_saveround (30,$out,-128,"%rax","%rbx"); # KL<<<111 - &_rotl128 ("%r8","%r10",51); # 60+51=111 - &_saveround (32,$out,-128,"%r8","%r10"); # KB<<<111 -$code.=<<___; - mov \$4,%eax -.Ldone: - mov 0(%rsp),%r15 - mov 8(%rsp),%r14 - mov 16(%rsp),%r13 - mov 24(%rsp),%rbp - mov 32(%rsp),%rbx - lea 40(%rsp),%rsp -.Lkey_epilogue: - ret -.size Camellia_Ekeygen,.-Camellia_Ekeygen -___ -} - -@SBOX=( -112,130, 44,236,179, 39,192,229,228,133, 87, 53,234, 12,174, 65, - 35,239,107,147, 69, 25,165, 33,237, 14, 79, 78, 29,101,146,189, -134,184,175,143,124,235, 31,206, 62, 48,220, 95, 94,197, 11, 26, -166,225, 57,202,213, 71, 93, 61,217, 1, 90,214, 81, 86,108, 77, -139, 13,154,102,251,204,176, 45,116, 18, 43, 32,240,177,132,153, -223, 76,203,194, 52,126,118, 5,109,183,169, 49,209, 23, 4,215, - 20, 88, 58, 97,222, 27, 17, 28, 50, 15,156, 22, 83, 24,242, 34, -254, 68,207,178,195,181,122,145, 36, 8,232,168, 96,252,105, 80, -170,208,160,125,161,137, 98,151, 84, 91, 30,149,224,255,100,210, - 16,196, 0, 72,163,247,117,219,138, 3,230,218, 9, 63,221,148, -135, 92,131, 2,205, 74,144, 51,115,103,246,243,157,127,191,226, - 82,155,216, 38,200, 55,198, 59,129,150,111, 75, 19,190, 99, 46, -233,121,167,140,159,110,188,142, 41,245,249,182, 47,253,180, 89, -120,152, 6,106,231, 70,113,186,212, 37,171, 66,136,162,141,250, -114, 7,185, 85,248,238,172, 10, 54, 73, 42,104, 60, 56,241,164, - 64, 40,211,123,187,201, 67,193, 21,227,173,244,119,199,128,158); - -sub S1110 { my $i=shift; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i<<8; sprintf("0x%08x",$i); } -sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i; sprintf("0x%08x",$i); } -sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; $i=$i<<16|$i<<8|$i; sprintf("0x%08x",$i); } -sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; $i=$i<<24|$i<<8|$i; sprintf("0x%08x",$i); } - -$code.=<<___; -.align 64 -.LCamellia_SIGMA: -.long 0x3bcc908b, 0xa09e667f, 0x4caa73b2, 0xb67ae858 -.long 0xe94f82be, 0xc6ef372f, 0xf1d36f1c, 0x54ff53a5 -.long 0xde682d1d, 0x10e527fa, 0xb3e6c1fd, 0xb05688c2 -.long 0, 0, 0, 0 -.LCamellia_SBOX: -___ -# tables are interleaved, remember? -sub data_word { $code.=".long\t".join(',',@_)."\n"; } -for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); } -for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); } - -# void Camellia_cbc_encrypt (const void char *inp, unsigned char *out, -# size_t length, const CAMELLIA_KEY *key, -# unsigned char *ivp,const int enc); -{ -$_key="0(%rsp)"; -$_end="8(%rsp)"; # inp+len&~15 -$_res="16(%rsp)"; # len&15 -$ivec="24(%rsp)"; -$_ivp="40(%rsp)"; -$_rsp="48(%rsp)"; - -$code.=<<___; -.globl Camellia_cbc_encrypt -.type Camellia_cbc_encrypt,\@function,6 -.align 16 -Camellia_cbc_encrypt: - cmp \$0,%rdx - je .Lcbc_abort - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 -.Lcbc_prologue: - - mov %rsp,%rbp - sub \$64,%rsp - and \$-64,%rsp - - # place stack frame just "above mod 1024" the key schedule, - # this ensures that cache associativity suffices - lea -64-63(%rcx),%r10 - sub %rsp,%r10 - neg %r10 - and \$0x3C0,%r10 - sub %r10,%rsp - #add \$8,%rsp # 8 is reserved for callee's ra - - mov %rdi,$inp # inp argument - mov %rsi,$out # out argument - mov %r8,%rbx # ivp argument - mov %rcx,$key # key argument - mov 272(%rcx),${keyend}d # grandRounds - - mov %r8,$_ivp - mov %rbp,$_rsp - -.Lcbc_body: - lea .LCamellia_SBOX(%rip),$Tbl - - mov \$32,%ecx -.align 4 -.Lcbc_prefetch_sbox: - mov 0($Tbl),%rax - mov 32($Tbl),%rsi - mov 64($Tbl),%rdi - mov 96($Tbl),%r11 - lea 128($Tbl),$Tbl - loop .Lcbc_prefetch_sbox - sub \$4096,$Tbl - shl \$6,$keyend - mov %rdx,%rcx # len argument - lea ($key,$keyend),$keyend - - cmp \$0,%r9d # enc argument - je .LCBC_DECRYPT - - and \$-16,%rdx - and \$15,%rcx # length residue - lea ($inp,%rdx),%rdx - mov $key,$_key - mov %rdx,$_end - mov %rcx,$_res - - cmp $inp,%rdx - mov 0(%rbx),@S[0] # load IV - mov 4(%rbx),@S[1] - mov 8(%rbx),@S[2] - mov 12(%rbx),@S[3] - je .Lcbc_enc_tail - jmp .Lcbc_eloop - -.align 16 -.Lcbc_eloop: - xor 0($inp),@S[0] - xor 4($inp),@S[1] - xor 8($inp),@S[2] - bswap @S[0] - xor 12($inp),@S[3] - bswap @S[1] - bswap @S[2] - bswap @S[3] - - call _x86_64_Camellia_encrypt - - mov $_key,$key # "rewind" the key - bswap @S[0] - mov $_end,%rdx - bswap @S[1] - mov $_res,%rcx - bswap @S[2] - mov @S[0],0($out) - bswap @S[3] - mov @S[1],4($out) - mov @S[2],8($out) - lea 16($inp),$inp - mov @S[3],12($out) - cmp %rdx,$inp - lea 16($out),$out - jne .Lcbc_eloop - - cmp \$0,%rcx - jne .Lcbc_enc_tail - - mov $_ivp,$out - mov @S[0],0($out) # write out IV residue - mov @S[1],4($out) - mov @S[2],8($out) - mov @S[3],12($out) - jmp .Lcbc_done - -.align 16 -.Lcbc_enc_tail: - xor %rax,%rax - mov %rax,0+$ivec - mov %rax,8+$ivec - mov %rax,$_res - -.Lcbc_enc_pushf: - pushfq - cld - mov $inp,%rsi - lea 8+$ivec,%rdi - .long 0x9066A4F3 # rep movsb - popfq -.Lcbc_enc_popf: - - lea $ivec,$inp - lea 16+$ivec,%rax - mov %rax,$_end - jmp .Lcbc_eloop # one more time - -.align 16 -.LCBC_DECRYPT: - xchg $key,$keyend - add \$15,%rdx - and \$15,%rcx # length residue - and \$-16,%rdx - mov $key,$_key - lea ($inp,%rdx),%rdx - mov %rdx,$_end - mov %rcx,$_res - - mov (%rbx),%rax # load IV - mov 8(%rbx),%rbx - jmp .Lcbc_dloop -.align 16 -.Lcbc_dloop: - mov 0($inp),@S[0] - mov 4($inp),@S[1] - mov 8($inp),@S[2] - bswap @S[0] - mov 12($inp),@S[3] - bswap @S[1] - mov %rax,0+$ivec # save IV to temporary storage - bswap @S[2] - mov %rbx,8+$ivec - bswap @S[3] - - call _x86_64_Camellia_decrypt - - mov $_key,$key # "rewind" the key - mov $_end,%rdx - mov $_res,%rcx - - bswap @S[0] - mov ($inp),%rax # load IV for next iteration - bswap @S[1] - mov 8($inp),%rbx - bswap @S[2] - xor 0+$ivec,@S[0] - bswap @S[3] - xor 4+$ivec,@S[1] - xor 8+$ivec,@S[2] - lea 16($inp),$inp - xor 12+$ivec,@S[3] - cmp %rdx,$inp - je .Lcbc_ddone - - mov @S[0],0($out) - mov @S[1],4($out) - mov @S[2],8($out) - mov @S[3],12($out) - - lea 16($out),$out - jmp .Lcbc_dloop - -.align 16 -.Lcbc_ddone: - mov $_ivp,%rdx - cmp \$0,%rcx - jne .Lcbc_dec_tail - - mov @S[0],0($out) - mov @S[1],4($out) - mov @S[2],8($out) - mov @S[3],12($out) - - mov %rax,(%rdx) # write out IV residue - mov %rbx,8(%rdx) - jmp .Lcbc_done -.align 16 -.Lcbc_dec_tail: - mov @S[0],0+$ivec - mov @S[1],4+$ivec - mov @S[2],8+$ivec - mov @S[3],12+$ivec - -.Lcbc_dec_pushf: - pushfq - cld - lea 8+$ivec,%rsi - lea ($out),%rdi - .long 0x9066A4F3 # rep movsb - popfq -.Lcbc_dec_popf: - - mov %rax,(%rdx) # write out IV residue - mov %rbx,8(%rdx) - jmp .Lcbc_done - -.align 16 -.Lcbc_done: - mov $_rsp,%rcx - mov 0(%rcx),%r15 - mov 8(%rcx),%r14 - mov 16(%rcx),%r13 - mov 24(%rcx),%r12 - mov 32(%rcx),%rbp - mov 40(%rcx),%rbx - lea 48(%rcx),%rsp -.Lcbc_abort: - ret -.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt - -.asciz "Camellia for x86_64 by " -___ -} - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type common_se_handler,\@abi-omnipotent -.align 16 -common_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - lea -64(%rsp),%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - mov 8($disp),%rsi # disp->ImageBase - mov 56($disp),%r11 # disp->HandlerData - - mov 0(%r11),%r10d # HandlerData[0] - lea (%rsi,%r10),%r10 # prologue label - cmp %r10,%rbx # context->RipRsp - - mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lin_prologue - - lea 40(%rax),%rax - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r13 - mov -32(%rax),%r14 - mov -40(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - jmp .Lcommon_seh_exit -.size common_se_handler,.-common_se_handler - -.type cbc_se_handler,\@abi-omnipotent -.align 16 -cbc_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - lea -64(%rsp),%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lcbc_prologue(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lcbc_prologue - jb .Lin_cbc_prologue - - lea .Lcbc_body(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lcbc_body - jb .Lin_cbc_frame_setup - - mov 152($context),%rax # pull context->Rsp - - lea .Lcbc_abort(%rip),%r10 - cmp %r10,%rbx # context->Rip>=.Lcbc_abort - jae .Lin_cbc_prologue - - # handle pushf/popf in Camellia_cbc_encrypt - lea .Lcbc_enc_pushf(%rip),%r10 - cmp %r10,%rbx # context->Rip<=.Lcbc_enc_pushf - jbe .Lin_cbc_no_flag - lea 8(%rax),%rax - lea .Lcbc_enc_popf(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lcbc_enc_popf - jb .Lin_cbc_no_flag - lea -8(%rax),%rax - lea .Lcbc_dec_pushf(%rip),%r10 - cmp %r10,%rbx # context->Rip<=.Lcbc_dec_pushf - jbe .Lin_cbc_no_flag - lea 8(%rax),%rax - lea .Lcbc_dec_popf(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lcbc_dec_popf - jb .Lin_cbc_no_flag - lea -8(%rax),%rax - -.Lin_cbc_no_flag: - mov 48(%rax),%rax # $_rsp - lea 48(%rax),%rax - -.Lin_cbc_frame_setup: - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov -32(%rax),%r13 - mov -40(%rax),%r14 - mov -48(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_cbc_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - -.align 4 -.Lcommon_seh_exit: - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$`1232/8`,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - lea 64(%rsp),%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size cbc_se_handler,.-cbc_se_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_Camellia_EncryptBlock_Rounds - .rva .LSEH_end_Camellia_EncryptBlock_Rounds - .rva .LSEH_info_Camellia_EncryptBlock_Rounds - - .rva .LSEH_begin_Camellia_DecryptBlock_Rounds - .rva .LSEH_end_Camellia_DecryptBlock_Rounds - .rva .LSEH_info_Camellia_DecryptBlock_Rounds - - .rva .LSEH_begin_Camellia_Ekeygen - .rva .LSEH_end_Camellia_Ekeygen - .rva .LSEH_info_Camellia_Ekeygen - - .rva .LSEH_begin_Camellia_cbc_encrypt - .rva .LSEH_end_Camellia_cbc_encrypt - .rva .LSEH_info_Camellia_cbc_encrypt - -.section .xdata -.align 8 -.LSEH_info_Camellia_EncryptBlock_Rounds: - .byte 9,0,0,0 - .rva common_se_handler - .rva .Lenc_prologue,.Lenc_epilogue # HandlerData[] -.LSEH_info_Camellia_DecryptBlock_Rounds: - .byte 9,0,0,0 - .rva common_se_handler - .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[] -.LSEH_info_Camellia_Ekeygen: - .byte 9,0,0,0 - .rva common_se_handler - .rva .Lkey_prologue,.Lkey_epilogue # HandlerData[] -.LSEH_info_Camellia_cbc_encrypt: - .byte 9,0,0,0 - .rva cbc_se_handler -___ -} - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/cast/asm/cast-586.pl b/drivers/builtin_openssl2/crypto/cast/asm/cast-586.pl deleted file mode 100644 index bf6810d335..0000000000 --- a/drivers/builtin_openssl2/crypto/cast/asm/cast-586.pl +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/local/bin/perl - -# define for pentium pro friendly version -$ppro=1; - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; -require "cbc.pl"; - -&asm_init($ARGV[0],"cast-586.pl",$ARGV[$#ARGV] eq "386"); - -$CAST_ROUNDS=16; -$L="edi"; -$R="esi"; -$K="ebp"; -$tmp1="ecx"; -$tmp2="ebx"; -$tmp3="eax"; -$tmp4="edx"; -$S1="CAST_S_table0"; -$S2="CAST_S_table1"; -$S3="CAST_S_table2"; -$S4="CAST_S_table3"; - -@F1=("add","xor","sub"); -@F2=("xor","sub","add"); -@F3=("sub","add","xor"); - -&CAST_encrypt("CAST_encrypt",1); -&CAST_encrypt("CAST_decrypt",0); -&cbc("CAST_cbc_encrypt","CAST_encrypt","CAST_decrypt",1,4,5,3,-1,-1); - -&asm_finish(); - -sub CAST_encrypt { - local($name,$enc)=@_; - - local($win_ex)=<<"EOF"; -EXTERN _CAST_S_table0:DWORD -EXTERN _CAST_S_table1:DWORD -EXTERN _CAST_S_table2:DWORD -EXTERN _CAST_S_table3:DWORD -EOF - &main::external_label( - "CAST_S_table0", - "CAST_S_table1", - "CAST_S_table2", - "CAST_S_table3", - ); - - &function_begin_B($name,$win_ex); - - &comment(""); - - &push("ebp"); - &push("ebx"); - &mov($tmp2,&wparam(0)); - &mov($K,&wparam(1)); - &push("esi"); - &push("edi"); - - &comment("Load the 2 words"); - &mov($L,&DWP(0,$tmp2,"",0)); - &mov($R,&DWP(4,$tmp2,"",0)); - - &comment('Get short key flag'); - &mov($tmp3,&DWP(128,$K,"",0)); - if($enc) { - &push($tmp3); - } else { - &or($tmp3,$tmp3); - &jnz(&label('cast_dec_skip')); - } - - &xor($tmp3, $tmp3); - - # encrypting part - - if ($enc) { - &E_CAST( 0,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 1,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 2,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 3,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 4,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 5,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 6,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 7,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 8,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 9,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST(10,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST(11,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); - &comment('test short key flag'); - &pop($tmp4); - &or($tmp4,$tmp4); - &jnz(&label('cast_enc_done')); - &E_CAST(12,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST(13,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST(14,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST(15,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); - } else { - &E_CAST(15,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST(14,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST(13,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST(12,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); - &set_label('cast_dec_skip'); - &E_CAST(11,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST(10,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 9,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 8,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 7,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 6,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 5,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 4,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 3,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 2,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 1,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); - &E_CAST( 0,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); - } - - &set_label('cast_enc_done') if $enc; -# Why the nop? - Ben 17/1/99 - &nop(); - &mov($tmp3,&wparam(0)); - &mov(&DWP(4,$tmp3,"",0),$L); - &mov(&DWP(0,$tmp3,"",0),$R); - &function_end($name); -} - -sub E_CAST { - local($i,$S,$L,$R,$K,$OP1,$OP2,$OP3,$tmp1,$tmp2,$tmp3,$tmp4)=@_; - # Ri needs to have 16 pre added. - - &comment("round $i"); - &mov( $tmp4, &DWP($i*8,$K,"",1)); - - &mov( $tmp1, &DWP($i*8+4,$K,"",1)); - &$OP1( $tmp4, $R); - - &rotl( $tmp4, &LB($tmp1)); - - if ($ppro) { - &mov( $tmp2, $tmp4); # B - &xor( $tmp1, $tmp1); - - &movb( &LB($tmp1), &HB($tmp4)); # A - &and( $tmp2, 0xff); - - &shr( $tmp4, 16); # - &xor( $tmp3, $tmp3); - } else { - &mov( $tmp2, $tmp4); # B - &movb( &LB($tmp1), &HB($tmp4)); # A # BAD BAD BAD - - &shr( $tmp4, 16); # - &and( $tmp2, 0xff); - } - - &movb( &LB($tmp3), &HB($tmp4)); # C # BAD BAD BAD - &and( $tmp4, 0xff); # D - - &mov( $tmp1, &DWP($S1,"",$tmp1,4)); - &mov( $tmp2, &DWP($S2,"",$tmp2,4)); - - &$OP2( $tmp1, $tmp2); - &mov( $tmp2, &DWP($S3,"",$tmp3,4)); - - &$OP3( $tmp1, $tmp2); - &mov( $tmp2, &DWP($S4,"",$tmp4,4)); - - &$OP1( $tmp1, $tmp2); - # XXX - - &xor( $L, $tmp1); - # XXX -} - diff --git a/drivers/builtin_openssl2/crypto/cast/cast_lcl.h b/drivers/builtin_openssl2/crypto/cast/cast_lcl.h index 7c4ad41b21..b0f08294e3 100644 --- a/drivers/builtin_openssl2/crypto/cast/cast_lcl.h +++ b/drivers/builtin_openssl2/crypto/cast/cast_lcl.h @@ -152,6 +152,8 @@ #if defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER) # define ROTL(a,n) (_lrotl(a,n)) +#elif defined(PEDANTIC) +# define ROTL(a,n) ((((a)<<(n))&0xffffffffL)|((a)>>((32-(n))&31))) #else # define ROTL(a,n) ((((a)<<(n))&0xffffffffL)|((a)>>(32-(n)))) #endif diff --git a/drivers/builtin_openssl2/crypto/cast/casttest.c b/drivers/builtin_openssl2/crypto/cast/casttest.c deleted file mode 100644 index dc31bc6604..0000000000 --- a/drivers/builtin_openssl2/crypto/cast/casttest.c +++ /dev/null @@ -1,241 +0,0 @@ -/* crypto/cast/casttest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include -#include /* To see if OPENSSL_NO_CAST is defined */ - -#include "../e_os.h" - -#ifdef OPENSSL_NO_CAST -int main(int argc, char *argv[]) -{ - printf("No CAST support\n"); - return (0); -} -#else -# include - -# define FULL_TEST - -static unsigned char k[16] = { - 0x01, 0x23, 0x45, 0x67, 0x12, 0x34, 0x56, 0x78, - 0x23, 0x45, 0x67, 0x89, 0x34, 0x56, 0x78, 0x9A -}; - -static unsigned char in[8] = - { 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF }; - -static int k_len[3] = { 16, 10, 5 }; - -static unsigned char c[3][8] = { - {0x23, 0x8B, 0x4F, 0xE5, 0x84, 0x7E, 0x44, 0xB2}, - {0xEB, 0x6A, 0x71, 0x1A, 0x2C, 0x02, 0x27, 0x1B}, - {0x7A, 0xC8, 0x16, 0xD1, 0x6E, 0x9B, 0x30, 0x2E}, -}; - -static unsigned char out[80]; - -static unsigned char in_a[16] = { - 0x01, 0x23, 0x45, 0x67, 0x12, 0x34, 0x56, 0x78, - 0x23, 0x45, 0x67, 0x89, 0x34, 0x56, 0x78, 0x9A -}; - -static unsigned char in_b[16] = { - 0x01, 0x23, 0x45, 0x67, 0x12, 0x34, 0x56, 0x78, - 0x23, 0x45, 0x67, 0x89, 0x34, 0x56, 0x78, 0x9A -}; - -static unsigned char c_a[16] = { - 0xEE, 0xA9, 0xD0, 0xA2, 0x49, 0xFD, 0x3B, 0xA6, - 0xB3, 0x43, 0x6F, 0xB8, 0x9D, 0x6D, 0xCA, 0x92 -}; - -static unsigned char c_b[16] = { - 0xB2, 0xC9, 0x5E, 0xB0, 0x0C, 0x31, 0xAD, 0x71, - 0x80, 0xAC, 0x05, 0xB8, 0xE8, 0x3D, 0x69, 0x6E -}; - -# if 0 -char *text = "Hello to all people out there"; - -static unsigned char cfb_key[16] = { - 0xe1, 0xf0, 0xc3, 0xd2, 0xa5, 0xb4, 0x87, 0x96, - 0x69, 0x78, 0x4b, 0x5a, 0x2d, 0x3c, 0x0f, 0x1e, -}; -static unsigned char cfb_iv[80] = - { 0x34, 0x12, 0x78, 0x56, 0xab, 0x90, 0xef, 0xcd }; -static unsigned char cfb_buf1[40], cfb_buf2[40], cfb_tmp[8]; -# define CFB_TEST_SIZE 24 -static unsigned char plain[CFB_TEST_SIZE] = { - 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, - 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, - 0x69, 0x6d, 0x65, 0x20, 0x66, 0x6f, - 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20 -}; - -static unsigned char cfb_cipher64[CFB_TEST_SIZE] = { - 0x59, 0xD8, 0xE2, 0x65, 0x00, 0x58, 0x6C, 0x3F, - 0x2C, 0x17, 0x25, 0xD0, 0x1A, 0x38, 0xB7, 0x2A, - 0x39, 0x61, 0x37, 0xDC, 0x79, 0xFB, 0x9F, 0x45 -/*- 0xF9,0x78,0x32,0xB5,0x42,0x1A,0x6B,0x38, - 0x9A,0x44,0xD6,0x04,0x19,0x43,0xC4,0xD9, - 0x3D,0x1E,0xAE,0x47,0xFC,0xCF,0x29,0x0B,*/ -}; -# endif - -int main(int argc, char *argv[]) -{ -# ifdef FULL_TEST - long l; - CAST_KEY key_b; -# endif - int i, z, err = 0; - CAST_KEY key; - - for (z = 0; z < 3; z++) { - CAST_set_key(&key, k_len[z], k); - - CAST_ecb_encrypt(in, out, &key, CAST_ENCRYPT); - if (memcmp(out, &(c[z][0]), 8) != 0) { - printf("ecb cast error encrypting for keysize %d\n", - k_len[z] * 8); - printf("got :"); - for (i = 0; i < 8; i++) - printf("%02X ", out[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 8; i++) - printf("%02X ", c[z][i]); - err = 20; - printf("\n"); - } - - CAST_ecb_encrypt(out, out, &key, CAST_DECRYPT); - if (memcmp(out, in, 8) != 0) { - printf("ecb cast error decrypting for keysize %d\n", - k_len[z] * 8); - printf("got :"); - for (i = 0; i < 8; i++) - printf("%02X ", out[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 8; i++) - printf("%02X ", in[i]); - printf("\n"); - err = 3; - } - } - if (err == 0) - printf("ecb cast5 ok\n"); - -# ifdef FULL_TEST - { - unsigned char out_a[16], out_b[16]; - static char *hex = "0123456789ABCDEF"; - - printf("This test will take some time...."); - fflush(stdout); - memcpy(out_a, in_a, sizeof(in_a)); - memcpy(out_b, in_b, sizeof(in_b)); - i = 1; - - for (l = 0; l < 1000000L; l++) { - CAST_set_key(&key_b, 16, out_b); - CAST_ecb_encrypt(&(out_a[0]), &(out_a[0]), &key_b, CAST_ENCRYPT); - CAST_ecb_encrypt(&(out_a[8]), &(out_a[8]), &key_b, CAST_ENCRYPT); - CAST_set_key(&key, 16, out_a); - CAST_ecb_encrypt(&(out_b[0]), &(out_b[0]), &key, CAST_ENCRYPT); - CAST_ecb_encrypt(&(out_b[8]), &(out_b[8]), &key, CAST_ENCRYPT); - if ((l & 0xffff) == 0xffff) { - printf("%c", hex[i & 0x0f]); - fflush(stdout); - i++; - } - } - - if ((memcmp(out_a, c_a, sizeof(c_a)) != 0) || - (memcmp(out_b, c_b, sizeof(c_b)) != 0)) { - printf("\n"); - printf("Error\n"); - - printf("A out ="); - for (i = 0; i < 16; i++) - printf("%02X ", out_a[i]); - printf("\nactual="); - for (i = 0; i < 16; i++) - printf("%02X ", c_a[i]); - printf("\n"); - - printf("B out ="); - for (i = 0; i < 16; i++) - printf("%02X ", out_b[i]); - printf("\nactual="); - for (i = 0; i < 16; i++) - printf("%02X ", c_b[i]); - printf("\n"); - } else - printf(" ok\n"); - } -# endif - - EXIT(err); - return (err); -} -#endif diff --git a/drivers/builtin_openssl2/crypto/cmac/cmac.c b/drivers/builtin_openssl2/crypto/cmac/cmac.c index 774e6dc919..2954b6eb7d 100644 --- a/drivers/builtin_openssl2/crypto/cmac/cmac.c +++ b/drivers/builtin_openssl2/crypto/cmac/cmac.c @@ -160,6 +160,14 @@ int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen, EVPerr(EVP_F_CMAC_INIT, EVP_R_DISABLED_FOR_FIPS); return 0; } + + /* Switch to FIPS cipher implementation if possible */ + if (cipher != NULL) { + const EVP_CIPHER *fcipher; + fcipher = FIPS_get_cipherbynid(EVP_CIPHER_nid(cipher)); + if (fcipher != NULL) + cipher = fcipher; + } /* * Other algorithm blocking will be done in FIPS_cmac_init, via * FIPS_cipherinit(). diff --git a/drivers/builtin_openssl2/crypto/cms/cms_asn1.c b/drivers/builtin_openssl2/crypto/cms/cms_asn1.c index f9f267afdc..81a3407f12 100644 --- a/drivers/builtin_openssl2/crypto/cms/cms_asn1.c +++ b/drivers/builtin_openssl2/crypto/cms/cms_asn1.c @@ -97,6 +97,8 @@ static int cms_si_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, EVP_PKEY_free(si->pkey); if (si->signer) X509_free(si->signer); + if (si->pctx) + EVP_MD_CTX_cleanup(&si->mctx); } return 1; } @@ -164,10 +166,21 @@ ASN1_CHOICE(CMS_KeyAgreeRecipientIdentifier) = { ASN1_IMP(CMS_KeyAgreeRecipientIdentifier, d.rKeyId, CMS_RecipientKeyIdentifier, 0) } ASN1_CHOICE_END(CMS_KeyAgreeRecipientIdentifier) -ASN1_SEQUENCE(CMS_RecipientEncryptedKey) = { +static int cms_rek_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, + void *exarg) +{ + CMS_RecipientEncryptedKey *rek = (CMS_RecipientEncryptedKey *)*pval; + if (operation == ASN1_OP_FREE_POST) { + if (rek->pkey) + EVP_PKEY_free(rek->pkey); + } + return 1; +} + +ASN1_SEQUENCE_cb(CMS_RecipientEncryptedKey, cms_rek_cb) = { ASN1_SIMPLE(CMS_RecipientEncryptedKey, rid, CMS_KeyAgreeRecipientIdentifier), ASN1_SIMPLE(CMS_RecipientEncryptedKey, encryptedKey, ASN1_OCTET_STRING) -} ASN1_SEQUENCE_END(CMS_RecipientEncryptedKey) +} ASN1_SEQUENCE_END_cb(CMS_RecipientEncryptedKey, CMS_RecipientEncryptedKey) ASN1_SEQUENCE(CMS_OriginatorPublicKey) = { ASN1_SIMPLE(CMS_OriginatorPublicKey, algorithm, X509_ALGOR), @@ -180,13 +193,29 @@ ASN1_CHOICE(CMS_OriginatorIdentifierOrKey) = { ASN1_IMP(CMS_OriginatorIdentifierOrKey, d.originatorKey, CMS_OriginatorPublicKey, 1) } ASN1_CHOICE_END(CMS_OriginatorIdentifierOrKey) -ASN1_SEQUENCE(CMS_KeyAgreeRecipientInfo) = { +static int cms_kari_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, + void *exarg) +{ + CMS_KeyAgreeRecipientInfo *kari = (CMS_KeyAgreeRecipientInfo *)*pval; + if (operation == ASN1_OP_NEW_POST) { + EVP_CIPHER_CTX_init(&kari->ctx); + EVP_CIPHER_CTX_set_flags(&kari->ctx, EVP_CIPHER_CTX_FLAG_WRAP_ALLOW); + kari->pctx = NULL; + } else if (operation == ASN1_OP_FREE_POST) { + if (kari->pctx) + EVP_PKEY_CTX_free(kari->pctx); + EVP_CIPHER_CTX_cleanup(&kari->ctx); + } + return 1; +} + +ASN1_SEQUENCE_cb(CMS_KeyAgreeRecipientInfo, cms_kari_cb) = { ASN1_SIMPLE(CMS_KeyAgreeRecipientInfo, version, LONG), ASN1_EXP(CMS_KeyAgreeRecipientInfo, originator, CMS_OriginatorIdentifierOrKey, 0), ASN1_EXP_OPT(CMS_KeyAgreeRecipientInfo, ukm, ASN1_OCTET_STRING, 1), ASN1_SIMPLE(CMS_KeyAgreeRecipientInfo, keyEncryptionAlgorithm, X509_ALGOR), ASN1_SEQUENCE_OF(CMS_KeyAgreeRecipientInfo, recipientEncryptedKeys, CMS_RecipientEncryptedKey) -} ASN1_SEQUENCE_END(CMS_KeyAgreeRecipientInfo) +} ASN1_SEQUENCE_END_cb(CMS_KeyAgreeRecipientInfo, CMS_KeyAgreeRecipientInfo) ASN1_SEQUENCE(CMS_KEKIdentifier) = { ASN1_SIMPLE(CMS_KEKIdentifier, keyIdentifier, ASN1_OCTET_STRING), @@ -225,6 +254,8 @@ static int cms_ri_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, EVP_PKEY_free(ktri->pkey); if (ktri->recip) X509_free(ktri->recip); + if (ktri->pctx) + EVP_PKEY_CTX_free(ktri->pctx); } else if (ri->type == CMS_RECIPINFO_KEK) { CMS_KEKRecipientInfo *kekri = ri->d.kekri; if (kekri->key) { @@ -379,3 +410,50 @@ ASN1_SEQUENCE(CMS_Receipt) = { ASN1_SIMPLE(CMS_Receipt, signedContentIdentifier, ASN1_OCTET_STRING), ASN1_SIMPLE(CMS_Receipt, originatorSignatureValue, ASN1_OCTET_STRING) } ASN1_SEQUENCE_END(CMS_Receipt) + +/* + * Utilities to encode the CMS_SharedInfo structure used during key + * derivation. + */ + +typedef struct { + X509_ALGOR *keyInfo; + ASN1_OCTET_STRING *entityUInfo; + ASN1_OCTET_STRING *suppPubInfo; +} CMS_SharedInfo; + +ASN1_SEQUENCE(CMS_SharedInfo) = { + ASN1_SIMPLE(CMS_SharedInfo, keyInfo, X509_ALGOR), + ASN1_EXP_OPT(CMS_SharedInfo, entityUInfo, ASN1_OCTET_STRING, 0), + ASN1_EXP_OPT(CMS_SharedInfo, suppPubInfo, ASN1_OCTET_STRING, 2), +} ASN1_SEQUENCE_END(CMS_SharedInfo) + +int CMS_SharedInfo_encode(unsigned char **pder, X509_ALGOR *kekalg, + ASN1_OCTET_STRING *ukm, int keylen) +{ + union { + CMS_SharedInfo *pecsi; + ASN1_VALUE *a; + } intsi = { + NULL + }; + + ASN1_OCTET_STRING oklen; + unsigned char kl[4]; + CMS_SharedInfo ecsi; + + keylen <<= 3; + kl[0] = (keylen >> 24) & 0xff; + kl[1] = (keylen >> 16) & 0xff; + kl[2] = (keylen >> 8) & 0xff; + kl[3] = keylen & 0xff; + oklen.length = 4; + oklen.data = kl; + oklen.type = V_ASN1_OCTET_STRING; + oklen.flags = 0; + ecsi.keyInfo = kekalg; + ecsi.entityUInfo = ukm; + ecsi.suppPubInfo = &oklen; + intsi.pecsi = &ecsi; + return ASN1_item_i2d(intsi.a, pder, ASN1_ITEM_rptr(CMS_SharedInfo)); +} diff --git a/drivers/builtin_openssl2/crypto/cms/cms_env.c b/drivers/builtin_openssl2/crypto/cms/cms_env.c index 1c3046ce92..93c06cb00a 100644 --- a/drivers/builtin_openssl2/crypto/cms/cms_env.c +++ b/drivers/builtin_openssl2/crypto/cms/cms_env.c @@ -100,6 +100,36 @@ static CMS_EnvelopedData *cms_enveloped_data_init(CMS_ContentInfo *cms) return cms_get0_enveloped(cms); } +int cms_env_asn1_ctrl(CMS_RecipientInfo *ri, int cmd) +{ + EVP_PKEY *pkey; + int i; + if (ri->type == CMS_RECIPINFO_TRANS) + pkey = ri->d.ktri->pkey; + else if (ri->type == CMS_RECIPINFO_AGREE) { + EVP_PKEY_CTX *pctx = ri->d.kari->pctx; + if (!pctx) + return 0; + pkey = EVP_PKEY_CTX_get0_pkey(pctx); + if (!pkey) + return 0; + } else + return 0; + if (!pkey->ameth || !pkey->ameth->pkey_ctrl) + return 1; + i = pkey->ameth->pkey_ctrl(pkey, ASN1_PKEY_CTRL_CMS_ENVELOPE, cmd, ri); + if (i == -2) { + CMSerr(CMS_F_CMS_ENV_ASN1_CTRL, + CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); + return 0; + } + if (i <= 0) { + CMSerr(CMS_F_CMS_ENV_ASN1_CTRL, CMS_R_CTRL_FAILURE); + return 0; + } + return 1; +} + STACK_OF(CMS_RecipientInfo) *CMS_get0_RecipientInfos(CMS_ContentInfo *cms) { CMS_EnvelopedData *env; @@ -114,6 +144,15 @@ int CMS_RecipientInfo_type(CMS_RecipientInfo *ri) return ri->type; } +EVP_PKEY_CTX *CMS_RecipientInfo_get0_pkey_ctx(CMS_RecipientInfo *ri) +{ + if (ri->type == CMS_RECIPINFO_TRANS) + return ri->d.ktri->pctx; + else if (ri->type == CMS_RECIPINFO_AGREE) + return ri->d.kari->pctx; + return NULL; +} + CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher) { CMS_ContentInfo *cms; @@ -137,19 +176,63 @@ CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher) /* Key Transport Recipient Info (KTRI) routines */ +/* Initialise a ktri based on passed certificate and key */ + +static int cms_RecipientInfo_ktri_init(CMS_RecipientInfo *ri, X509 *recip, + EVP_PKEY *pk, unsigned int flags) +{ + CMS_KeyTransRecipientInfo *ktri; + int idtype; + + ri->d.ktri = M_ASN1_new_of(CMS_KeyTransRecipientInfo); + if (!ri->d.ktri) + return 0; + ri->type = CMS_RECIPINFO_TRANS; + + ktri = ri->d.ktri; + + if (flags & CMS_USE_KEYID) { + ktri->version = 2; + idtype = CMS_RECIPINFO_KEYIDENTIFIER; + } else { + ktri->version = 0; + idtype = CMS_RECIPINFO_ISSUER_SERIAL; + } + + /* + * Not a typo: RecipientIdentifier and SignerIdentifier are the same + * structure. + */ + + if (!cms_set1_SignerIdentifier(ktri->rid, recip, idtype)) + return 0; + + CRYPTO_add(&recip->references, 1, CRYPTO_LOCK_X509); + CRYPTO_add(&pk->references, 1, CRYPTO_LOCK_EVP_PKEY); + ktri->pkey = pk; + ktri->recip = recip; + + if (flags & CMS_KEY_PARAM) { + ktri->pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL); + if (!ktri->pctx) + return 0; + if (EVP_PKEY_encrypt_init(ktri->pctx) <= 0) + return 0; + } else if (!cms_env_asn1_ctrl(ri, 0)) + return 0; + return 1; +} + /* - * Add a recipient certificate. For now only handle key transport. If we ever - * handle key agreement will need updating. + * Add a recipient certificate using appropriate type of RecipientInfo */ CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, X509 *recip, unsigned int flags) { CMS_RecipientInfo *ri = NULL; - CMS_KeyTransRecipientInfo *ktri; CMS_EnvelopedData *env; EVP_PKEY *pk = NULL; - int i, type; env = cms_get0_enveloped(cms); if (!env) goto err; @@ -159,59 +242,36 @@ CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, if (!ri) goto merr; - /* Initialize and add key transport recipient info */ - - ri->d.ktri = M_ASN1_new_of(CMS_KeyTransRecipientInfo); - if (!ri->d.ktri) - goto merr; - ri->type = CMS_RECIPINFO_TRANS; - - ktri = ri->d.ktri; - - X509_check_purpose(recip, -1, -1); pk = X509_get_pubkey(recip); if (!pk) { CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, CMS_R_ERROR_GETTING_PUBLIC_KEY); goto err; } - CRYPTO_add(&recip->references, 1, CRYPTO_LOCK_X509); - ktri->pkey = pk; - ktri->recip = recip; - if (flags & CMS_USE_KEYID) { - ktri->version = 2; - if (env->version < 2) - env->version = 2; - type = CMS_RECIPINFO_KEYIDENTIFIER; - } else { - ktri->version = 0; - type = CMS_RECIPINFO_ISSUER_SERIAL; - } + switch (cms_pkey_get_ri_type(pk)) { - /* - * Not a typo: RecipientIdentifier and SignerIdentifier are the same - * structure. - */ + case CMS_RECIPINFO_TRANS: + if (!cms_RecipientInfo_ktri_init(ri, recip, pk, flags)) + goto err; + break; - if (!cms_set1_SignerIdentifier(ktri->rid, recip, type)) + case CMS_RECIPINFO_AGREE: + if (!cms_RecipientInfo_kari_init(ri, recip, pk, flags)) + goto err; + break; + + default: + CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, + CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); goto err; - if (pk->ameth && pk->ameth->pkey_ctrl) { - i = pk->ameth->pkey_ctrl(pk, ASN1_PKEY_CTRL_CMS_ENVELOPE, 0, ri); - if (i == -2) { - CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, - CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); - goto err; - } - if (i <= 0) { - CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, CMS_R_CTRL_FAILURE); - goto err; - } } if (!sk_CMS_RecipientInfo_push(env->recipientInfos, ri)) goto merr; + EVP_PKEY_free(pk); + return ri; merr: @@ -219,6 +279,8 @@ CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, err: if (ri) M_ASN1_free_of(ri, CMS_RecipientInfo); + if (pk) + EVP_PKEY_free(pk); return NULL; } @@ -288,7 +350,7 @@ static int cms_RecipientInfo_ktri_encrypt(CMS_ContentInfo *cms, { CMS_KeyTransRecipientInfo *ktri; CMS_EncryptedContentInfo *ec; - EVP_PKEY_CTX *pctx = NULL; + EVP_PKEY_CTX *pctx; unsigned char *ek = NULL; size_t eklen; @@ -301,12 +363,19 @@ static int cms_RecipientInfo_ktri_encrypt(CMS_ContentInfo *cms, ktri = ri->d.ktri; ec = cms->d.envelopedData->encryptedContentInfo; - pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL); - if (!pctx) - return 0; + pctx = ktri->pctx; - if (EVP_PKEY_encrypt_init(pctx) <= 0) - goto err; + if (pctx) { + if (!cms_env_asn1_ctrl(ri, 0)) + goto err; + } else { + pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL); + if (!pctx) + return 0; + + if (EVP_PKEY_encrypt_init(pctx) <= 0) + goto err; + } if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_ENCRYPT, EVP_PKEY_CTRL_CMS_ENCRYPT, 0, ri) <= 0) { @@ -333,8 +402,10 @@ static int cms_RecipientInfo_ktri_encrypt(CMS_ContentInfo *cms, ret = 1; err: - if (pctx) + if (pctx) { EVP_PKEY_CTX_free(pctx); + ktri->pctx = NULL; + } if (ek) OPENSSL_free(ek); return ret; @@ -347,7 +418,7 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri) { CMS_KeyTransRecipientInfo *ktri = ri->d.ktri; - EVP_PKEY_CTX *pctx = NULL; + EVP_PKEY *pkey = ktri->pkey; unsigned char *ek = NULL; size_t eklen; int ret = 0; @@ -359,20 +430,23 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms, return 0; } - pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL); - if (!pctx) + ktri->pctx = EVP_PKEY_CTX_new(pkey, NULL); + if (!ktri->pctx) return 0; - if (EVP_PKEY_decrypt_init(pctx) <= 0) + if (EVP_PKEY_decrypt_init(ktri->pctx) <= 0) goto err; - if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_DECRYPT, + if (!cms_env_asn1_ctrl(ri, 1)) + goto err; + + if (EVP_PKEY_CTX_ctrl(ktri->pctx, -1, EVP_PKEY_OP_DECRYPT, EVP_PKEY_CTRL_CMS_DECRYPT, 0, ri) <= 0) { CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT, CMS_R_CTRL_ERROR); goto err; } - if (EVP_PKEY_decrypt(pctx, NULL, &eklen, + if (EVP_PKEY_decrypt(ktri->pctx, NULL, &eklen, ktri->encryptedKey->data, ktri->encryptedKey->length) <= 0) goto err; @@ -384,7 +458,7 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms, goto err; } - if (EVP_PKEY_decrypt(pctx, ek, &eklen, + if (EVP_PKEY_decrypt(ktri->pctx, ek, &eklen, ktri->encryptedKey->data, ktri->encryptedKey->length) <= 0) { CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT, CMS_R_CMS_LIB); @@ -402,8 +476,10 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms, ec->keylen = eklen; err: - if (pctx) - EVP_PKEY_CTX_free(pctx); + if (ktri->pctx) { + EVP_PKEY_CTX_free(ktri->pctx); + ktri->pctx = NULL; + } if (!ret && ek) OPENSSL_free(ek); @@ -745,12 +821,99 @@ int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri) } } +int CMS_RecipientInfo_encrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri) +{ + switch (ri->type) { + case CMS_RECIPINFO_TRANS: + return cms_RecipientInfo_ktri_encrypt(cms, ri); + + case CMS_RECIPINFO_AGREE: + return cms_RecipientInfo_kari_encrypt(cms, ri); + + case CMS_RECIPINFO_KEK: + return cms_RecipientInfo_kekri_encrypt(cms, ri); + break; + + case CMS_RECIPINFO_PASS: + return cms_RecipientInfo_pwri_crypt(cms, ri, 1); + break; + + default: + CMSerr(CMS_F_CMS_RECIPIENTINFO_ENCRYPT, + CMS_R_UNSUPPORTED_RECIPIENT_TYPE); + return 0; + } +} + +/* Check structures and fixup version numbers (if necessary) */ + +static void cms_env_set_originfo_version(CMS_EnvelopedData *env) +{ + CMS_OriginatorInfo *org = env->originatorInfo; + int i; + if (org == NULL) + return; + for (i = 0; i < sk_CMS_CertificateChoices_num(org->certificates); i++) { + CMS_CertificateChoices *cch; + cch = sk_CMS_CertificateChoices_value(org->certificates, i); + if (cch->type == CMS_CERTCHOICE_OTHER) { + env->version = 4; + return; + } else if (cch->type == CMS_CERTCHOICE_V2ACERT) { + if (env->version < 3) + env->version = 3; + } + } + + for (i = 0; i < sk_CMS_RevocationInfoChoice_num(org->crls); i++) { + CMS_RevocationInfoChoice *rch; + rch = sk_CMS_RevocationInfoChoice_value(org->crls, i); + if (rch->type == CMS_REVCHOICE_OTHER) { + env->version = 4; + return; + } + } +} + +static void cms_env_set_version(CMS_EnvelopedData *env) +{ + int i; + CMS_RecipientInfo *ri; + + /* + * Can't set version higher than 4 so if 4 or more already nothing to do. + */ + if (env->version >= 4) + return; + + cms_env_set_originfo_version(env); + + if (env->version >= 3) + return; + + for (i = 0; i < sk_CMS_RecipientInfo_num(env->recipientInfos); i++) { + ri = sk_CMS_RecipientInfo_value(env->recipientInfos, i); + if (ri->type == CMS_RECIPINFO_PASS || ri->type == CMS_RECIPINFO_OTHER) { + env->version = 3; + return; + } else if (ri->type != CMS_RECIPINFO_TRANS + || ri->d.ktri->version != 0) { + env->version = 2; + } + } + if (env->version == 2) + return; + if (env->originatorInfo || env->unprotectedAttrs) + env->version = 2; + env->version = 0; +} + BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms) { CMS_EncryptedContentInfo *ec; STACK_OF(CMS_RecipientInfo) *rinfos; CMS_RecipientInfo *ri; - int i, r, ok = 0; + int i, ok = 0; BIO *ret; /* Get BIO first to set up key */ @@ -769,32 +932,13 @@ BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms) for (i = 0; i < sk_CMS_RecipientInfo_num(rinfos); i++) { ri = sk_CMS_RecipientInfo_value(rinfos, i); - - switch (ri->type) { - case CMS_RECIPINFO_TRANS: - r = cms_RecipientInfo_ktri_encrypt(cms, ri); - break; - - case CMS_RECIPINFO_KEK: - r = cms_RecipientInfo_kekri_encrypt(cms, ri); - break; - - case CMS_RECIPINFO_PASS: - r = cms_RecipientInfo_pwri_crypt(cms, ri, 1); - break; - - default: - CMSerr(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO, - CMS_R_UNSUPPORTED_RECIPIENT_TYPE); - goto err; - } - - if (r <= 0) { + if (CMS_RecipientInfo_encrypt(cms, ri) <= 0) { CMSerr(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO, CMS_R_ERROR_SETTING_RECIPIENTINFO); goto err; } } + cms_env_set_version(cms->d.envelopedData); ok = 1; @@ -812,3 +956,19 @@ BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms) return NULL; } + +/* + * Get RecipientInfo type (if any) supported by a key (public or private). To + * retain compatibility with previous behaviour if the ctrl value isn't + * supported we assume key transport. + */ +int cms_pkey_get_ri_type(EVP_PKEY *pk) +{ + if (pk->ameth && pk->ameth->pkey_ctrl) { + int i, r; + i = pk->ameth->pkey_ctrl(pk, ASN1_PKEY_CTRL_CMS_RI_TYPE, 0, &r); + if (i > 0) + return r; + } + return CMS_RECIPINFO_TRANS; +} diff --git a/drivers/builtin_openssl2/crypto/cms/cms_err.c b/drivers/builtin_openssl2/crypto/cms/cms_err.c index faf2fccd3b..15572ea348 100644 --- a/drivers/builtin_openssl2/crypto/cms/cms_err.c +++ b/drivers/builtin_openssl2/crypto/cms/cms_err.c @@ -1,6 +1,6 @@ /* crypto/cms/cms_err.c */ /* ==================================================================== - * Copyright (c) 1999-2009 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2013 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -110,6 +110,7 @@ static ERR_STRING_DATA CMS_str_functs[] = { {ERR_FUNC(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO), "cms_EnvelopedData_init_bio"}, {ERR_FUNC(CMS_F_CMS_ENVELOPED_DATA_INIT), "CMS_ENVELOPED_DATA_INIT"}, + {ERR_FUNC(CMS_F_CMS_ENV_ASN1_CTRL), "cms_env_asn1_ctrl"}, {ERR_FUNC(CMS_F_CMS_FINAL), "CMS_final"}, {ERR_FUNC(CMS_F_CMS_GET0_CERTIFICATE_CHOICES), "CMS_GET0_CERTIFICATE_CHOICES"}, @@ -124,6 +125,17 @@ static ERR_STRING_DATA CMS_str_functs[] = { "CMS_ReceiptRequest_create0"}, {ERR_FUNC(CMS_F_CMS_RECEIPT_VERIFY), "cms_Receipt_verify"}, {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_DECRYPT), "CMS_RecipientInfo_decrypt"}, + {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_ENCRYPT), "CMS_RecipientInfo_encrypt"}, + {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_ENCRYPT), + "cms_RecipientInfo_kari_encrypt"}, + {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ALG), + "CMS_RecipientInfo_kari_get0_alg"}, + {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ORIG_ID), + "CMS_RecipientInfo_kari_get0_orig_id"}, + {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_REKS), + "CMS_RecipientInfo_kari_get0_reks"}, + {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_ORIG_ID_CMP), + "CMS_RecipientInfo_kari_orig_id_cmp"}, {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT), "CMS_RECIPIENTINFO_KEKRI_DECRYPT"}, {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KEKRI_ENCRYPT), @@ -150,6 +162,9 @@ static ERR_STRING_DATA CMS_str_functs[] = { "CMS_RecipientInfo_set0_password"}, {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_SET0_PKEY), "CMS_RecipientInfo_set0_pkey"}, + {ERR_FUNC(CMS_F_CMS_SD_ASN1_CTRL), "CMS_SD_ASN1_CTRL"}, + {ERR_FUNC(CMS_F_CMS_SET1_IAS), "cms_set1_ias"}, + {ERR_FUNC(CMS_F_CMS_SET1_KEYID), "cms_set1_keyid"}, {ERR_FUNC(CMS_F_CMS_SET1_SIGNERIDENTIFIER), "cms_set1_SignerIdentifier"}, {ERR_FUNC(CMS_F_CMS_SET_DETACHED), "CMS_set_detached"}, {ERR_FUNC(CMS_F_CMS_SIGN), "CMS_sign"}, @@ -221,6 +236,7 @@ static ERR_STRING_DATA CMS_str_reasons[] = { {ERR_REASON(CMS_R_NOT_A_SIGNED_RECEIPT), "not a signed receipt"}, {ERR_REASON(CMS_R_NOT_ENCRYPTED_DATA), "not encrypted data"}, {ERR_REASON(CMS_R_NOT_KEK), "not kek"}, + {ERR_REASON(CMS_R_NOT_KEY_AGREEMENT), "not key agreement"}, {ERR_REASON(CMS_R_NOT_KEY_TRANSPORT), "not key transport"}, {ERR_REASON(CMS_R_NOT_PWRI), "not pwri"}, {ERR_REASON(CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE), diff --git a/drivers/builtin_openssl2/crypto/cms/cms_kari.c b/drivers/builtin_openssl2/crypto/cms/cms_kari.c new file mode 100644 index 0000000000..2cfcdb29cd --- /dev/null +++ b/drivers/builtin_openssl2/crypto/cms/cms_kari.c @@ -0,0 +1,465 @@ +/* crypto/cms/cms_kari.c */ +/* + * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include "cryptlib.h" +#include +#include +#include +#include +#include +#include +#include +#include "cms_lcl.h" +#include "asn1_locl.h" + +DECLARE_ASN1_ITEM(CMS_KeyAgreeRecipientInfo) +DECLARE_ASN1_ITEM(CMS_RecipientEncryptedKey) +DECLARE_ASN1_ITEM(CMS_OriginatorPublicKey) +DECLARE_ASN1_ITEM(CMS_RecipientKeyIdentifier) + +/* Key Agreement Recipient Info (KARI) routines */ + +int CMS_RecipientInfo_kari_get0_alg(CMS_RecipientInfo *ri, + X509_ALGOR **palg, + ASN1_OCTET_STRING **pukm) +{ + if (ri->type != CMS_RECIPINFO_AGREE) { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ALG, + CMS_R_NOT_KEY_AGREEMENT); + return 0; + } + if (palg) + *palg = ri->d.kari->keyEncryptionAlgorithm; + if (pukm) + *pukm = ri->d.kari->ukm; + return 1; +} + +/* Retrieve recipient encrypted keys from a kari */ + +STACK_OF(CMS_RecipientEncryptedKey) +*CMS_RecipientInfo_kari_get0_reks(CMS_RecipientInfo *ri) +{ + if (ri->type != CMS_RECIPINFO_AGREE) { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_REKS, + CMS_R_NOT_KEY_AGREEMENT); + return NULL; + } + return ri->d.kari->recipientEncryptedKeys; +} + +int CMS_RecipientInfo_kari_get0_orig_id(CMS_RecipientInfo *ri, + X509_ALGOR **pubalg, + ASN1_BIT_STRING **pubkey, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, + ASN1_INTEGER **sno) +{ + CMS_OriginatorIdentifierOrKey *oik; + if (ri->type != CMS_RECIPINFO_AGREE) { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ORIG_ID, + CMS_R_NOT_KEY_AGREEMENT); + return 0; + } + oik = ri->d.kari->originator; + if (issuer) + *issuer = NULL; + if (sno) + *sno = NULL; + if (keyid) + *keyid = NULL; + if (pubalg) + *pubalg = NULL; + if (pubkey) + *pubkey = NULL; + if (oik->type == CMS_OIK_ISSUER_SERIAL) { + if (issuer) + *issuer = oik->d.issuerAndSerialNumber->issuer; + if (sno) + *sno = oik->d.issuerAndSerialNumber->serialNumber; + } else if (oik->type == CMS_OIK_KEYIDENTIFIER) { + if (keyid) + *keyid = oik->d.subjectKeyIdentifier; + } else if (oik->type == CMS_OIK_PUBKEY) { + if (pubalg) + *pubalg = oik->d.originatorKey->algorithm; + if (pubkey) + *pubkey = oik->d.originatorKey->publicKey; + } else + return 0; + return 1; +} + +int CMS_RecipientInfo_kari_orig_id_cmp(CMS_RecipientInfo *ri, X509 *cert) +{ + CMS_OriginatorIdentifierOrKey *oik; + if (ri->type != CMS_RECIPINFO_AGREE) { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_ORIG_ID_CMP, + CMS_R_NOT_KEY_AGREEMENT); + return -2; + } + oik = ri->d.kari->originator; + if (oik->type == CMS_OIK_ISSUER_SERIAL) + return cms_ias_cert_cmp(oik->d.issuerAndSerialNumber, cert); + else if (oik->type == CMS_OIK_KEYIDENTIFIER) + return cms_keyid_cert_cmp(oik->d.subjectKeyIdentifier, cert); + return -1; +} + +int CMS_RecipientEncryptedKey_get0_id(CMS_RecipientEncryptedKey *rek, + ASN1_OCTET_STRING **keyid, + ASN1_GENERALIZEDTIME **tm, + CMS_OtherKeyAttribute **other, + X509_NAME **issuer, ASN1_INTEGER **sno) +{ + CMS_KeyAgreeRecipientIdentifier *rid = rek->rid; + if (rid->type == CMS_REK_ISSUER_SERIAL) { + if (issuer) + *issuer = rid->d.issuerAndSerialNumber->issuer; + if (sno) + *sno = rid->d.issuerAndSerialNumber->serialNumber; + if (keyid) + *keyid = NULL; + if (tm) + *tm = NULL; + if (other) + *other = NULL; + } else if (rid->type == CMS_REK_KEYIDENTIFIER) { + if (keyid) + *keyid = rid->d.rKeyId->subjectKeyIdentifier; + if (tm) + *tm = rid->d.rKeyId->date; + if (other) + *other = rid->d.rKeyId->other; + if (issuer) + *issuer = NULL; + if (sno) + *sno = NULL; + } else + return 0; + return 1; +} + +int CMS_RecipientEncryptedKey_cert_cmp(CMS_RecipientEncryptedKey *rek, + X509 *cert) +{ + CMS_KeyAgreeRecipientIdentifier *rid = rek->rid; + if (rid->type == CMS_REK_ISSUER_SERIAL) + return cms_ias_cert_cmp(rid->d.issuerAndSerialNumber, cert); + else if (rid->type == CMS_REK_KEYIDENTIFIER) + return cms_keyid_cert_cmp(rid->d.rKeyId->subjectKeyIdentifier, cert); + else + return -1; +} + +int CMS_RecipientInfo_kari_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pk) +{ + EVP_PKEY_CTX *pctx; + CMS_KeyAgreeRecipientInfo *kari = ri->d.kari; + if (kari->pctx) { + EVP_PKEY_CTX_free(kari->pctx); + kari->pctx = NULL; + } + if (!pk) + return 1; + pctx = EVP_PKEY_CTX_new(pk, NULL); + if (!pctx || !EVP_PKEY_derive_init(pctx)) + goto err; + kari->pctx = pctx; + return 1; + err: + if (pctx) + EVP_PKEY_CTX_free(pctx); + return 0; +} + +EVP_CIPHER_CTX *CMS_RecipientInfo_kari_get0_ctx(CMS_RecipientInfo *ri) +{ + if (ri->type == CMS_RECIPINFO_AGREE) + return &ri->d.kari->ctx; + return NULL; +} + +/* + * Derive KEK and decrypt/encrypt with it to produce either the original CEK + * or the encrypted CEK. + */ + +static int cms_kek_cipher(unsigned char **pout, size_t *poutlen, + const unsigned char *in, size_t inlen, + CMS_KeyAgreeRecipientInfo *kari, int enc) +{ + /* Key encryption key */ + unsigned char kek[EVP_MAX_KEY_LENGTH]; + size_t keklen; + int rv = 0; + unsigned char *out = NULL; + int outlen; + keklen = EVP_CIPHER_CTX_key_length(&kari->ctx); + if (keklen > EVP_MAX_KEY_LENGTH) + return 0; + /* Derive KEK */ + if (EVP_PKEY_derive(kari->pctx, kek, &keklen) <= 0) + goto err; + /* Set KEK in context */ + if (!EVP_CipherInit_ex(&kari->ctx, NULL, NULL, kek, NULL, enc)) + goto err; + /* obtain output length of ciphered key */ + if (!EVP_CipherUpdate(&kari->ctx, NULL, &outlen, in, inlen)) + goto err; + out = OPENSSL_malloc(outlen); + if (!out) + goto err; + if (!EVP_CipherUpdate(&kari->ctx, out, &outlen, in, inlen)) + goto err; + *pout = out; + *poutlen = (size_t)outlen; + rv = 1; + + err: + OPENSSL_cleanse(kek, keklen); + if (!rv && out) + OPENSSL_free(out); + EVP_CIPHER_CTX_cleanup(&kari->ctx); + EVP_PKEY_CTX_free(kari->pctx); + kari->pctx = NULL; + return rv; +} + +int CMS_RecipientInfo_kari_decrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri, + CMS_RecipientEncryptedKey *rek) +{ + int rv = 0; + unsigned char *enckey = NULL, *cek = NULL; + size_t enckeylen; + size_t ceklen; + CMS_EncryptedContentInfo *ec; + enckeylen = rek->encryptedKey->length; + enckey = rek->encryptedKey->data; + /* Setup all parameters to derive KEK */ + if (!cms_env_asn1_ctrl(ri, 1)) + goto err; + /* Attempt to decrypt CEK */ + if (!cms_kek_cipher(&cek, &ceklen, enckey, enckeylen, ri->d.kari, 0)) + goto err; + ec = cms->d.envelopedData->encryptedContentInfo; + if (ec->key) { + OPENSSL_cleanse(ec->key, ec->keylen); + OPENSSL_free(ec->key); + } + ec->key = cek; + ec->keylen = ceklen; + cek = NULL; + rv = 1; + err: + if (cek) + OPENSSL_free(cek); + return rv; +} + +/* Create ephemeral key and initialise context based on it */ +static int cms_kari_create_ephemeral_key(CMS_KeyAgreeRecipientInfo *kari, + EVP_PKEY *pk) +{ + EVP_PKEY_CTX *pctx = NULL; + EVP_PKEY *ekey = NULL; + int rv = 0; + pctx = EVP_PKEY_CTX_new(pk, NULL); + if (!pctx) + goto err; + if (EVP_PKEY_keygen_init(pctx) <= 0) + goto err; + if (EVP_PKEY_keygen(pctx, &ekey) <= 0) + goto err; + EVP_PKEY_CTX_free(pctx); + pctx = EVP_PKEY_CTX_new(ekey, NULL); + if (!pctx) + goto err; + if (EVP_PKEY_derive_init(pctx) <= 0) + goto err; + kari->pctx = pctx; + rv = 1; + err: + if (!rv && pctx) + EVP_PKEY_CTX_free(pctx); + if (ekey) + EVP_PKEY_free(ekey); + return rv; +} + +/* Initialise a ktri based on passed certificate and key */ + +int cms_RecipientInfo_kari_init(CMS_RecipientInfo *ri, X509 *recip, + EVP_PKEY *pk, unsigned int flags) +{ + CMS_KeyAgreeRecipientInfo *kari; + CMS_RecipientEncryptedKey *rek = NULL; + + ri->d.kari = M_ASN1_new_of(CMS_KeyAgreeRecipientInfo); + if (!ri->d.kari) + return 0; + ri->type = CMS_RECIPINFO_AGREE; + + kari = ri->d.kari; + kari->version = 3; + + rek = M_ASN1_new_of(CMS_RecipientEncryptedKey); + if (!sk_CMS_RecipientEncryptedKey_push(kari->recipientEncryptedKeys, rek)) { + M_ASN1_free_of(rek, CMS_RecipientEncryptedKey); + return 0; + } + + if (flags & CMS_USE_KEYID) { + rek->rid->type = CMS_REK_KEYIDENTIFIER; + rek->rid->d.rKeyId = M_ASN1_new_of(CMS_RecipientKeyIdentifier); + if (rek->rid->d.rKeyId == NULL) + return 0; + if (!cms_set1_keyid(&rek->rid->d.rKeyId->subjectKeyIdentifier, recip)) + return 0; + } else { + rek->rid->type = CMS_REK_ISSUER_SERIAL; + if (!cms_set1_ias(&rek->rid->d.issuerAndSerialNumber, recip)) + return 0; + } + + /* Create ephemeral key */ + if (!cms_kari_create_ephemeral_key(kari, pk)) + return 0; + + CRYPTO_add(&pk->references, 1, CRYPTO_LOCK_EVP_PKEY); + rek->pkey = pk; + return 1; +} + +static int cms_wrap_init(CMS_KeyAgreeRecipientInfo *kari, + const EVP_CIPHER *cipher) +{ + EVP_CIPHER_CTX *ctx = &kari->ctx; + const EVP_CIPHER *kekcipher; + int keylen = EVP_CIPHER_key_length(cipher); + /* If a suitable wrap algorithm is already set nothing to do */ + kekcipher = EVP_CIPHER_CTX_cipher(ctx); + + if (kekcipher) { + if (EVP_CIPHER_CTX_mode(ctx) != EVP_CIPH_WRAP_MODE) + return 0; + return 1; + } + /* + * Pick a cipher based on content encryption cipher. If it is DES3 use + * DES3 wrap otherwise use AES wrap similar to key size. + */ + if (EVP_CIPHER_type(cipher) == NID_des_ede3_cbc) + kekcipher = EVP_des_ede3_wrap(); + else if (keylen <= 16) + kekcipher = EVP_aes_128_wrap(); + else if (keylen <= 24) + kekcipher = EVP_aes_192_wrap(); + else + kekcipher = EVP_aes_256_wrap(); + return EVP_EncryptInit_ex(ctx, kekcipher, NULL, NULL, NULL); +} + +/* Encrypt content key in key agreement recipient info */ + +int cms_RecipientInfo_kari_encrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri) +{ + CMS_KeyAgreeRecipientInfo *kari; + CMS_EncryptedContentInfo *ec; + CMS_RecipientEncryptedKey *rek; + STACK_OF(CMS_RecipientEncryptedKey) *reks; + int i; + + if (ri->type != CMS_RECIPINFO_AGREE) { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_ENCRYPT, CMS_R_NOT_KEY_AGREEMENT); + return 0; + } + kari = ri->d.kari; + reks = kari->recipientEncryptedKeys; + ec = cms->d.envelopedData->encryptedContentInfo; + /* Initialise wrap algorithm parameters */ + if (!cms_wrap_init(kari, ec->cipher)) + return 0; + /* + * If no orignator key set up initialise for ephemeral key the public key + * ASN1 structure will set the actual public key value. + */ + if (kari->originator->type == -1) { + CMS_OriginatorIdentifierOrKey *oik = kari->originator; + oik->type = CMS_OIK_PUBKEY; + oik->d.originatorKey = M_ASN1_new_of(CMS_OriginatorPublicKey); + if (!oik->d.originatorKey) + return 0; + } + /* Initialise KDF algorithm */ + if (!cms_env_asn1_ctrl(ri, 0)) + return 0; + /* For each rek, derive KEK, encrypt CEK */ + for (i = 0; i < sk_CMS_RecipientEncryptedKey_num(reks); i++) { + unsigned char *enckey; + size_t enckeylen; + rek = sk_CMS_RecipientEncryptedKey_value(reks, i); + if (EVP_PKEY_derive_set_peer(kari->pctx, rek->pkey) <= 0) + return 0; + if (!cms_kek_cipher(&enckey, &enckeylen, ec->key, ec->keylen, + kari, 1)) + return 0; + ASN1_STRING_set0(rek->encryptedKey, enckey, enckeylen); + } + + return 1; + +} diff --git a/drivers/builtin_openssl2/crypto/cms/cms_lcl.h b/drivers/builtin_openssl2/crypto/cms/cms_lcl.h index 4f4c4c7434..20f2c25f5a 100644 --- a/drivers/builtin_openssl2/crypto/cms/cms_lcl.h +++ b/drivers/builtin_openssl2/crypto/cms/cms_lcl.h @@ -84,11 +84,9 @@ typedef struct CMS_KeyTransRecipientInfo_st CMS_KeyTransRecipientInfo; typedef struct CMS_OriginatorPublicKey_st CMS_OriginatorPublicKey; typedef struct CMS_OriginatorIdentifierOrKey_st CMS_OriginatorIdentifierOrKey; typedef struct CMS_KeyAgreeRecipientInfo_st CMS_KeyAgreeRecipientInfo; -typedef struct CMS_OtherKeyAttribute_st CMS_OtherKeyAttribute; typedef struct CMS_RecipientKeyIdentifier_st CMS_RecipientKeyIdentifier; typedef struct CMS_KeyAgreeRecipientIdentifier_st CMS_KeyAgreeRecipientIdentifier; -typedef struct CMS_RecipientEncryptedKey_st CMS_RecipientEncryptedKey; typedef struct CMS_KEKIdentifier_st CMS_KEKIdentifier; typedef struct CMS_KEKRecipientInfo_st CMS_KEKRecipientInfo; typedef struct CMS_PasswordRecipientInfo_st CMS_PasswordRecipientInfo; @@ -138,6 +136,9 @@ struct CMS_SignerInfo_st { /* Signing certificate and key */ X509 *signer; EVP_PKEY *pkey; + /* Digest and public key context for alternative parameters */ + EVP_MD_CTX mctx; + EVP_PKEY_CTX *pctx; }; struct CMS_SignerIdentifier_st { @@ -194,6 +195,8 @@ struct CMS_KeyTransRecipientInfo_st { /* Recipient Key and cert */ X509 *recip; EVP_PKEY *pkey; + /* Public key context for this operation */ + EVP_PKEY_CTX *pctx; }; struct CMS_KeyAgreeRecipientInfo_st { @@ -202,6 +205,10 @@ struct CMS_KeyAgreeRecipientInfo_st { ASN1_OCTET_STRING *ukm; X509_ALGOR *keyEncryptionAlgorithm; STACK_OF(CMS_RecipientEncryptedKey) *recipientEncryptedKeys; + /* Public key context associated with current operation */ + EVP_PKEY_CTX *pctx; + /* Cipher context for CEK wrapping */ + EVP_CIPHER_CTX ctx; }; struct CMS_OriginatorIdentifierOrKey_st { @@ -221,6 +228,8 @@ struct CMS_OriginatorPublicKey_st { struct CMS_RecipientEncryptedKey_st { CMS_KeyAgreeRecipientIdentifier *rid; ASN1_OCTET_STRING *encryptedKey; + /* Public key associated with this recipient */ + EVP_PKEY *pkey; }; struct CMS_KeyAgreeRecipientIdentifier_st { @@ -394,6 +403,13 @@ DECLARE_ASN1_ALLOC_FUNCTIONS(CMS_IssuerAndSerialNumber) # define CMS_RECIPINFO_ISSUER_SERIAL 0 # define CMS_RECIPINFO_KEYIDENTIFIER 1 +# define CMS_REK_ISSUER_SERIAL 0 +# define CMS_REK_KEYIDENTIFIER 1 + +# define CMS_OIK_ISSUER_SERIAL 0 +# define CMS_OIK_KEYIDENTIFIER 1 +# define CMS_OIK_PUBKEY 2 + BIO *cms_content_bio(CMS_ContentInfo *cms); CMS_ContentInfo *cms_Data_create(void); @@ -420,6 +436,11 @@ BIO *cms_DigestAlgorithm_init_bio(X509_ALGOR *digestAlgorithm); int cms_DigestAlgorithm_find_ctx(EVP_MD_CTX *mctx, BIO *chain, X509_ALGOR *mdalg); +int cms_ias_cert_cmp(CMS_IssuerAndSerialNumber *ias, X509 *cert); +int cms_keyid_cert_cmp(ASN1_OCTET_STRING *keyid, X509 *cert); +int cms_set1_ias(CMS_IssuerAndSerialNumber **pias, X509 *cert); +int cms_set1_keyid(ASN1_OCTET_STRING **pkeyid, X509 *cert); + BIO *cms_EncryptedContent_init_bio(CMS_EncryptedContentInfo *ec); BIO *cms_EncryptedData_init_bio(CMS_ContentInfo *cms); int cms_EncryptedContent_init(CMS_EncryptedContentInfo *ec, @@ -432,6 +453,13 @@ ASN1_OCTET_STRING *cms_encode_Receipt(CMS_SignerInfo *si); BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms); CMS_EnvelopedData *cms_get0_enveloped(CMS_ContentInfo *cms); +int cms_env_asn1_ctrl(CMS_RecipientInfo *ri, int cmd); +int cms_pkey_get_ri_type(EVP_PKEY *pk); +/* KARI routines */ +int cms_RecipientInfo_kari_init(CMS_RecipientInfo *ri, X509 *recip, + EVP_PKEY *pk, unsigned int flags); +int cms_RecipientInfo_kari_encrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri); /* PWRI routines */ int cms_RecipientInfo_pwri_crypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri, diff --git a/drivers/builtin_openssl2/crypto/cms/cms_lib.c b/drivers/builtin_openssl2/crypto/cms/cms_lib.c index e938461680..d6cb60d02d 100644 --- a/drivers/builtin_openssl2/crypto/cms/cms_lib.c +++ b/drivers/builtin_openssl2/crypto/cms/cms_lib.c @@ -53,7 +53,7 @@ */ #include -#include +#include #include #include #include @@ -593,3 +593,60 @@ STACK_OF(X509_CRL) *CMS_get1_crls(CMS_ContentInfo *cms) } return crls; } + +int cms_ias_cert_cmp(CMS_IssuerAndSerialNumber *ias, X509 *cert) +{ + int ret; + ret = X509_NAME_cmp(ias->issuer, X509_get_issuer_name(cert)); + if (ret) + return ret; + return ASN1_INTEGER_cmp(ias->serialNumber, X509_get_serialNumber(cert)); +} + +int cms_keyid_cert_cmp(ASN1_OCTET_STRING *keyid, X509 *cert) +{ + X509_check_purpose(cert, -1, -1); + if (!cert->skid) + return -1; + return ASN1_OCTET_STRING_cmp(keyid, cert->skid); +} + +int cms_set1_ias(CMS_IssuerAndSerialNumber **pias, X509 *cert) +{ + CMS_IssuerAndSerialNumber *ias; + ias = M_ASN1_new_of(CMS_IssuerAndSerialNumber); + if (!ias) + goto err; + if (!X509_NAME_set(&ias->issuer, X509_get_issuer_name(cert))) + goto err; + if (!ASN1_STRING_copy(ias->serialNumber, X509_get_serialNumber(cert))) + goto err; + if (*pias) + M_ASN1_free_of(*pias, CMS_IssuerAndSerialNumber); + *pias = ias; + return 1; + err: + if (ias) + M_ASN1_free_of(ias, CMS_IssuerAndSerialNumber); + CMSerr(CMS_F_CMS_SET1_IAS, ERR_R_MALLOC_FAILURE); + return 0; +} + +int cms_set1_keyid(ASN1_OCTET_STRING **pkeyid, X509 *cert) +{ + ASN1_OCTET_STRING *keyid = NULL; + X509_check_purpose(cert, -1, -1); + if (!cert->skid) { + CMSerr(CMS_F_CMS_SET1_KEYID, CMS_R_CERTIFICATE_HAS_NO_KEYID); + return 0; + } + keyid = ASN1_STRING_dup(cert->skid); + if (!keyid) { + CMSerr(CMS_F_CMS_SET1_KEYID, ERR_R_MALLOC_FAILURE); + return 0; + } + if (*pkeyid) + ASN1_OCTET_STRING_free(*pkeyid); + *pkeyid = keyid; + return 1; +} diff --git a/drivers/builtin_openssl2/crypto/cms/cms_sd.c b/drivers/builtin_openssl2/crypto/cms/cms_sd.c index 6daa2627db..a41aca8e12 100644 --- a/drivers/builtin_openssl2/crypto/cms/cms_sd.c +++ b/drivers/builtin_openssl2/crypto/cms/cms_sd.c @@ -55,6 +55,7 @@ #include "cryptlib.h" #include #include +#include #include #include #include @@ -197,27 +198,13 @@ int cms_set1_SignerIdentifier(CMS_SignerIdentifier *sid, X509 *cert, int type) { switch (type) { case CMS_SIGNERINFO_ISSUER_SERIAL: - sid->d.issuerAndSerialNumber = - M_ASN1_new_of(CMS_IssuerAndSerialNumber); - if (!sid->d.issuerAndSerialNumber) - goto merr; - if (!X509_NAME_set(&sid->d.issuerAndSerialNumber->issuer, - X509_get_issuer_name(cert))) - goto merr; - if (!ASN1_STRING_copy(sid->d.issuerAndSerialNumber->serialNumber, - X509_get_serialNumber(cert))) - goto merr; + if (!cms_set1_ias(&sid->d.issuerAndSerialNumber, cert)) + return 0; break; case CMS_SIGNERINFO_KEYIDENTIFIER: - if (!cert->skid) { - CMSerr(CMS_F_CMS_SET1_SIGNERIDENTIFIER, - CMS_R_CERTIFICATE_HAS_NO_KEYID); + if (!cms_set1_keyid(&sid->d.subjectKeyIdentifier, cert)) return 0; - } - sid->d.subjectKeyIdentifier = ASN1_STRING_dup(cert->skid); - if (!sid->d.subjectKeyIdentifier) - goto merr; break; default: @@ -228,11 +215,6 @@ int cms_set1_SignerIdentifier(CMS_SignerIdentifier *sid, X509 *cert, int type) sid->type = type; return 1; - - merr: - CMSerr(CMS_F_CMS_SET1_SIGNERIDENTIFIER, ERR_R_MALLOC_FAILURE); - return 0; - } int cms_SignerIdentifier_get0_signer_id(CMS_SignerIdentifier *sid, @@ -255,23 +237,32 @@ int cms_SignerIdentifier_get0_signer_id(CMS_SignerIdentifier *sid, int cms_SignerIdentifier_cert_cmp(CMS_SignerIdentifier *sid, X509 *cert) { - int ret; - if (sid->type == CMS_SIGNERINFO_ISSUER_SERIAL) { - ret = X509_NAME_cmp(sid->d.issuerAndSerialNumber->issuer, - X509_get_issuer_name(cert)); - if (ret) - return ret; - return ASN1_INTEGER_cmp(sid->d.issuerAndSerialNumber->serialNumber, - X509_get_serialNumber(cert)); - } else if (sid->type == CMS_SIGNERINFO_KEYIDENTIFIER) { - X509_check_purpose(cert, -1, -1); - if (!cert->skid) - return -1; - return ASN1_OCTET_STRING_cmp(sid->d.subjectKeyIdentifier, cert->skid); - } else + if (sid->type == CMS_SIGNERINFO_ISSUER_SERIAL) + return cms_ias_cert_cmp(sid->d.issuerAndSerialNumber, cert); + else if (sid->type == CMS_SIGNERINFO_KEYIDENTIFIER) + return cms_keyid_cert_cmp(sid->d.subjectKeyIdentifier, cert); + else return -1; } +static int cms_sd_asn1_ctrl(CMS_SignerInfo *si, int cmd) +{ + EVP_PKEY *pkey = si->pkey; + int i; + if (!pkey->ameth || !pkey->ameth->pkey_ctrl) + return 1; + i = pkey->ameth->pkey_ctrl(pkey, ASN1_PKEY_CTRL_CMS_SIGN, cmd, si); + if (i == -2) { + CMSerr(CMS_F_CMS_SD_ASN1_CTRL, CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); + return 0; + } + if (i <= 0) { + CMSerr(CMS_F_CMS_SD_ASN1_CTRL, CMS_R_CTRL_FAILURE); + return 0; + } + return 1; +} + CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, X509 *signer, EVP_PKEY *pk, const EVP_MD *md, unsigned int flags) @@ -298,6 +289,8 @@ CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, si->pkey = pk; si->signer = signer; + EVP_MD_CTX_init(&si->mctx); + si->pctx = NULL; if (flags & CMS_USE_KEYID) { si->version = 3; @@ -350,19 +343,8 @@ CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, } } - if (pk->ameth && pk->ameth->pkey_ctrl) { - i = pk->ameth->pkey_ctrl(pk, ASN1_PKEY_CTRL_CMS_SIGN, 0, si); - if (i == -2) { - CMSerr(CMS_F_CMS_ADD1_SIGNER, - CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); - goto err; - } - if (i <= 0) { - CMSerr(CMS_F_CMS_ADD1_SIGNER, CMS_R_CTRL_FAILURE); - goto err; - } - } - + if (!(flags & CMS_KEY_PARAM) && !cms_sd_asn1_ctrl(si, 0)) + goto err; if (!(flags & CMS_NOATTR)) { /* * Initialialize signed attributes strutucture so other attributes @@ -386,7 +368,8 @@ CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, if (flags & CMS_REUSE_DIGEST) { if (!cms_copy_messageDigest(cms, si)) goto err; - if (!(flags & CMS_PARTIAL) && !CMS_SignerInfo_sign(si)) + if (!(flags & (CMS_PARTIAL | CMS_KEY_PARAM)) && + !CMS_SignerInfo_sign(si)) goto err; } } @@ -397,6 +380,20 @@ CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, goto merr; } + if (flags & CMS_KEY_PARAM) { + if (flags & CMS_NOATTR) { + si->pctx = EVP_PKEY_CTX_new(si->pkey, NULL); + if (!si->pctx) + goto err; + if (EVP_PKEY_sign_init(si->pctx) <= 0) + goto err; + if (EVP_PKEY_CTX_set_signature_md(si->pctx, md) <= 0) + goto err; + } else if (EVP_DigestSignInit(&si->mctx, &si->pctx, md, NULL, pk) <= + 0) + goto err; + } + if (!sd->signerInfos) sd->signerInfos = sk_CMS_SignerInfo_new_null(); if (!sd->signerInfos || !sk_CMS_SignerInfo_push(sd->signerInfos, si)) @@ -443,6 +440,16 @@ static int cms_add1_signingTime(CMS_SignerInfo *si, ASN1_TIME *t) } +EVP_PKEY_CTX *CMS_SignerInfo_get0_pkey_ctx(CMS_SignerInfo *si) +{ + return si->pctx; +} + +EVP_MD_CTX *CMS_SignerInfo_get0_md_ctx(CMS_SignerInfo *si) +{ + return &si->mctx; +} + STACK_OF(CMS_SignerInfo) *CMS_get0_SignerInfos(CMS_ContentInfo *cms) { CMS_SignedData *sd; @@ -561,11 +568,17 @@ void CMS_SignerInfo_get0_algs(CMS_SignerInfo *si, EVP_PKEY **pk, *psig = si->signatureAlgorithm; } +ASN1_OCTET_STRING *CMS_SignerInfo_get0_signature(CMS_SignerInfo *si) +{ + return si->signature; +} + static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms, CMS_SignerInfo *si, BIO *chain) { EVP_MD_CTX mctx; int r = 0; + EVP_PKEY_CTX *pctx = NULL; EVP_MD_CTX_init(&mctx); if (!si->pkey) { @@ -575,6 +588,9 @@ static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms, if (!cms_DigestAlgorithm_find_ctx(&mctx, chain, si->digestAlgorithm)) goto err; + /* Set SignerInfo algortihm details if we used custom parametsr */ + if (si->pctx && !cms_sd_asn1_ctrl(si, 0)) + goto err; /* * If any signed attributes calculate and add messageDigest attribute @@ -596,6 +612,23 @@ static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms, goto err; if (!CMS_SignerInfo_sign(si)) goto err; + } else if (si->pctx) { + unsigned char *sig; + size_t siglen; + unsigned char md[EVP_MAX_MD_SIZE]; + unsigned int mdlen; + pctx = si->pctx; + if (!EVP_DigestFinal_ex(&mctx, md, &mdlen)) + goto err; + siglen = EVP_PKEY_size(si->pkey); + sig = OPENSSL_malloc(siglen); + if (!sig) { + CMSerr(CMS_F_CMS_SIGNERINFO_CONTENT_SIGN, ERR_R_MALLOC_FAILURE); + goto err; + } + if (EVP_PKEY_sign(pctx, sig, &siglen, md, mdlen) <= 0) + goto err; + ASN1_STRING_set0(si->signature, sig, siglen); } else { unsigned char *sig; unsigned int siglen; @@ -616,6 +649,8 @@ static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms, err: EVP_MD_CTX_cleanup(&mctx); + if (pctx) + EVP_PKEY_CTX_free(pctx); return r; } @@ -637,7 +672,7 @@ int cms_SignedData_final(CMS_ContentInfo *cms, BIO *chain) int CMS_SignerInfo_sign(CMS_SignerInfo *si) { - EVP_MD_CTX mctx; + EVP_MD_CTX *mctx = &si->mctx; EVP_PKEY_CTX *pctx; unsigned char *abuf = NULL; int alen; @@ -648,15 +683,18 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si) if (md == NULL) return 0; - EVP_MD_CTX_init(&mctx); - if (CMS_signed_get_attr_by_NID(si, NID_pkcs9_signingTime, -1) < 0) { if (!cms_add1_signingTime(si, NULL)) goto err; } - if (EVP_DigestSignInit(&mctx, &pctx, md, NULL, si->pkey) <= 0) - goto err; + if (si->pctx) + pctx = si->pctx; + else { + EVP_MD_CTX_init(mctx); + if (EVP_DigestSignInit(mctx, &pctx, md, NULL, si->pkey) <= 0) + goto err; + } if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_SIGN, EVP_PKEY_CTRL_CMS_SIGN, 0, si) <= 0) { @@ -668,15 +706,15 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si) ASN1_ITEM_rptr(CMS_Attributes_Sign)); if (!abuf) goto err; - if (EVP_DigestSignUpdate(&mctx, abuf, alen) <= 0) + if (EVP_DigestSignUpdate(mctx, abuf, alen) <= 0) goto err; - if (EVP_DigestSignFinal(&mctx, NULL, &siglen) <= 0) + if (EVP_DigestSignFinal(mctx, NULL, &siglen) <= 0) goto err; OPENSSL_free(abuf); abuf = OPENSSL_malloc(siglen); if (!abuf) goto err; - if (EVP_DigestSignFinal(&mctx, abuf, &siglen) <= 0) + if (EVP_DigestSignFinal(mctx, abuf, &siglen) <= 0) goto err; if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_SIGN, @@ -685,7 +723,7 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si) goto err; } - EVP_MD_CTX_cleanup(&mctx); + EVP_MD_CTX_cleanup(mctx); ASN1_STRING_set0(si->signature, abuf, siglen); @@ -694,15 +732,14 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si) err: if (abuf) OPENSSL_free(abuf); - EVP_MD_CTX_cleanup(&mctx); + EVP_MD_CTX_cleanup(mctx); return 0; } int CMS_SignerInfo_verify(CMS_SignerInfo *si) { - EVP_MD_CTX mctx; - EVP_PKEY_CTX *pctx; + EVP_MD_CTX *mctx = &si->mctx; unsigned char *abuf = NULL; int alen, r = -1; const EVP_MD *md = NULL; @@ -715,26 +752,29 @@ int CMS_SignerInfo_verify(CMS_SignerInfo *si) md = EVP_get_digestbyobj(si->digestAlgorithm->algorithm); if (md == NULL) return -1; - EVP_MD_CTX_init(&mctx); - if (EVP_DigestVerifyInit(&mctx, &pctx, md, NULL, si->pkey) <= 0) + EVP_MD_CTX_init(mctx); + if (EVP_DigestVerifyInit(mctx, &si->pctx, md, NULL, si->pkey) <= 0) + goto err; + + if (!cms_sd_asn1_ctrl(si, 1)) goto err; alen = ASN1_item_i2d((ASN1_VALUE *)si->signedAttrs, &abuf, ASN1_ITEM_rptr(CMS_Attributes_Verify)); if (!abuf) goto err; - r = EVP_DigestVerifyUpdate(&mctx, abuf, alen); + r = EVP_DigestVerifyUpdate(mctx, abuf, alen); OPENSSL_free(abuf); if (r <= 0) { r = -1; goto err; } - r = EVP_DigestVerifyFinal(&mctx, + r = EVP_DigestVerifyFinal(mctx, si->signature->data, si->signature->length); if (r <= 0) CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY, CMS_R_VERIFICATION_FAILURE); err: - EVP_MD_CTX_cleanup(&mctx); + EVP_MD_CTX_cleanup(mctx); return r; } @@ -773,7 +813,10 @@ int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain) { ASN1_OCTET_STRING *os = NULL; EVP_MD_CTX mctx; + EVP_PKEY_CTX *pkctx = NULL; int r = -1; + unsigned char mval[EVP_MAX_MD_SIZE]; + unsigned int mlen; EVP_MD_CTX_init(&mctx); /* If we have any signed attributes look for messageDigest value */ if (CMS_signed_get_attr_count(si) >= 0) { @@ -790,16 +833,15 @@ int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain) if (!cms_DigestAlgorithm_find_ctx(&mctx, chain, si->digestAlgorithm)) goto err; + if (EVP_DigestFinal_ex(&mctx, mval, &mlen) <= 0) { + CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, + CMS_R_UNABLE_TO_FINALIZE_CONTEXT); + goto err; + } + /* If messageDigest found compare it */ if (os) { - unsigned char mval[EVP_MAX_MD_SIZE]; - unsigned int mlen; - if (EVP_DigestFinal_ex(&mctx, mval, &mlen) <= 0) { - CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, - CMS_R_UNABLE_TO_FINALIZE_CONTEXT); - goto err; - } if (mlen != (unsigned int)os->length) { CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, CMS_R_MESSAGEDIGEST_ATTRIBUTE_WRONG_LENGTH); @@ -813,8 +855,19 @@ int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain) } else r = 1; } else { - r = EVP_VerifyFinal(&mctx, si->signature->data, - si->signature->length, si->pkey); + const EVP_MD *md = EVP_MD_CTX_md(&mctx); + pkctx = EVP_PKEY_CTX_new(si->pkey, NULL); + if (pkctx == NULL) + goto err; + if (EVP_PKEY_verify_init(pkctx) <= 0) + goto err; + if (EVP_PKEY_CTX_set_signature_md(pkctx, md) <= 0) + goto err; + si->pctx = pkctx; + if (!cms_sd_asn1_ctrl(si, 1)) + goto err; + r = EVP_PKEY_verify(pkctx, si->signature->data, + si->signature->length, mval, mlen); if (r <= 0) { CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, CMS_R_VERIFICATION_FAILURE); @@ -823,6 +876,8 @@ int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain) } err: + if (pkctx) + EVP_PKEY_CTX_free(pkctx); EVP_MD_CTX_cleanup(&mctx); return r; diff --git a/drivers/builtin_openssl2/crypto/cms/cms_smime.c b/drivers/builtin_openssl2/crypto/cms/cms_smime.c index f45693ac20..07e3472e10 100644 --- a/drivers/builtin_openssl2/crypto/cms/cms_smime.c +++ b/drivers/builtin_openssl2/crypto/cms/cms_smime.c @@ -59,6 +59,7 @@ #include #include #include "cms_lcl.h" +#include "asn1_locl.h" static int cms_copy_content(BIO *out, BIO *in, unsigned int flags) { @@ -373,7 +374,7 @@ int CMS_verify(CMS_ContentInfo *cms, STACK_OF(X509) *certs, tmpin = BIO_new_mem_buf(ptr, len); if (tmpin == NULL) { CMSerr(CMS_F_CMS_VERIFY, ERR_R_MALLOC_FAILURE); - return 0; + goto err2; } } else tmpin = dcont; @@ -404,6 +405,7 @@ int CMS_verify(CMS_ContentInfo *cms, STACK_OF(X509) *certs, else BIO_free_all(cmsbio); + err2: if (cms_certs) sk_X509_pop_free(cms_certs, X509_free); if (crls) @@ -567,25 +569,63 @@ CMS_ContentInfo *CMS_encrypt(STACK_OF(X509) *certs, BIO *data, return NULL; } +static int cms_kari_set1_pkey(CMS_ContentInfo *cms, CMS_RecipientInfo *ri, + EVP_PKEY *pk, X509 *cert) +{ + int i; + STACK_OF(CMS_RecipientEncryptedKey) *reks; + CMS_RecipientEncryptedKey *rek; + reks = CMS_RecipientInfo_kari_get0_reks(ri); + if (!cert) + return 0; + for (i = 0; i < sk_CMS_RecipientEncryptedKey_num(reks); i++) { + int rv; + rek = sk_CMS_RecipientEncryptedKey_value(reks, i); + if (CMS_RecipientEncryptedKey_cert_cmp(rek, cert)) + continue; + CMS_RecipientInfo_kari_set0_pkey(ri, pk); + rv = CMS_RecipientInfo_kari_decrypt(cms, ri, rek); + CMS_RecipientInfo_kari_set0_pkey(ri, NULL); + if (rv > 0) + return 1; + return -1; + } + return 0; +} + int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert) { STACK_OF(CMS_RecipientInfo) *ris; CMS_RecipientInfo *ri; - int i, r; - int debug = 0, ri_match = 0; + int i, r, ri_type; + int debug = 0, match_ri = 0; ris = CMS_get0_RecipientInfos(cms); if (ris) debug = cms->d.envelopedData->encryptedContentInfo->debug; + ri_type = cms_pkey_get_ri_type(pk); + if (ri_type == CMS_RECIPINFO_NONE) { + CMSerr(CMS_F_CMS_DECRYPT_SET1_PKEY, + CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); + return 0; + } + for (i = 0; i < sk_CMS_RecipientInfo_num(ris); i++) { ri = sk_CMS_RecipientInfo_value(ris, i); - if (CMS_RecipientInfo_type(ri) != CMS_RECIPINFO_TRANS) + if (CMS_RecipientInfo_type(ri) != ri_type) continue; - ri_match = 1; + match_ri = 1; + if (ri_type == CMS_RECIPINFO_AGREE) { + r = cms_kari_set1_pkey(cms, ri, pk, cert); + if (r > 0) + return 1; + if (r < 0) + return 0; + } /* * If we have a cert try matching RecipientInfo otherwise try them * all. */ - if (!cert || (CMS_RecipientInfo_ktri_cert_cmp(ri, cert) == 0)) { + else if (!cert || !CMS_RecipientInfo_ktri_cert_cmp(ri, cert)) { CMS_RecipientInfo_set0_pkey(ri, pk); r = CMS_RecipientInfo_decrypt(cms, ri); CMS_RecipientInfo_set0_pkey(ri, NULL); @@ -613,7 +653,7 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert) } } /* If no cert and not debugging always return success */ - if (ri_match && !cert && !debug) { + if (match_ri && !cert && !debug) { ERR_clear_error(); return 1; } diff --git a/drivers/builtin_openssl2/crypto/conf/keysets.pl b/drivers/builtin_openssl2/crypto/conf/keysets.pl deleted file mode 100644 index 50ed67fa52..0000000000 --- a/drivers/builtin_openssl2/crypto/conf/keysets.pl +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/local/bin/perl - -$NUMBER=0x01; -$UPPER=0x02; -$LOWER=0x04; -$UNDER=0x100; -$PUNCTUATION=0x200; -$WS=0x10; -$ESC=0x20; -$QUOTE=0x40; -$DQUOTE=0x400; -$COMMENT=0x80; -$FCOMMENT=0x800; -$EOF=0x08; -$HIGHBIT=0x1000; - -foreach (0 .. 255) - { - $v=0; - $c=sprintf("%c",$_); - $v|=$NUMBER if ($c =~ /[0-9]/); - $v|=$UPPER if ($c =~ /[A-Z]/); - $v|=$LOWER if ($c =~ /[a-z]/); - $v|=$UNDER if ($c =~ /_/); - $v|=$PUNCTUATION if ($c =~ /[!\.%&\*\+,\/;\?\@\^\~\|-]/); - $v|=$WS if ($c =~ /[ \t\r\n]/); - $v|=$ESC if ($c =~ /\\/); - $v|=$QUOTE if ($c =~ /['`"]/); # for emacs: "`'}/) - $v|=$COMMENT if ($c =~ /\#/); - $v|=$EOF if ($c =~ /\0/); - $v|=$HIGHBIT if ($c =~/[\x80-\xff]/); - - push(@V_def,$v); - } - -foreach (0 .. 255) - { - $v=0; - $c=sprintf("%c",$_); - $v|=$NUMBER if ($c =~ /[0-9]/); - $v|=$UPPER if ($c =~ /[A-Z]/); - $v|=$LOWER if ($c =~ /[a-z]/); - $v|=$UNDER if ($c =~ /_/); - $v|=$PUNCTUATION if ($c =~ /[!\.%&\*\+,\/;\?\@\^\~\|-]/); - $v|=$WS if ($c =~ /[ \t\r\n]/); - $v|=$DQUOTE if ($c =~ /["]/); # for emacs: "}/) - $v|=$FCOMMENT if ($c =~ /;/); - $v|=$EOF if ($c =~ /\0/); - $v|=$HIGHBIT if ($c =~/[\x80-\xff]/); - - push(@V_w32,$v); - } - -print <<"EOF"; -/* crypto/conf/conf_def.h */ -/* Copyright (C) 1995-1998 Eric Young (eay\@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay\@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh\@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay\@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh\@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -/* THIS FILE WAS AUTOMAGICALLY GENERATED! - Please modify and use keysets.pl to regenerate it. */ - -#define CONF_NUMBER $NUMBER -#define CONF_UPPER $UPPER -#define CONF_LOWER $LOWER -#define CONF_UNDER $UNDER -#define CONF_PUNCTUATION $PUNCTUATION -#define CONF_WS $WS -#define CONF_ESC $ESC -#define CONF_QUOTE $QUOTE -#define CONF_DQUOTE $DQUOTE -#define CONF_COMMENT $COMMENT -#define CONF_FCOMMENT $FCOMMENT -#define CONF_EOF $EOF -#define CONF_HIGHBIT $HIGHBIT -#define CONF_ALPHA (CONF_UPPER|CONF_LOWER) -#define CONF_ALPHA_NUMERIC (CONF_ALPHA|CONF_NUMBER|CONF_UNDER) -#define CONF_ALPHA_NUMERIC_PUNCT (CONF_ALPHA|CONF_NUMBER|CONF_UNDER| \\ - CONF_PUNCTUATION) - -#define KEYTYPES(c) ((unsigned short *)((c)->meth_data)) -#ifndef CHARSET_EBCDIC -#define IS_COMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_COMMENT) -#define IS_FCOMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_FCOMMENT) -#define IS_EOF(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_EOF) -#define IS_ESC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ESC) -#define IS_NUMBER(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_NUMBER) -#define IS_WS(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_WS) -#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC) -#define IS_ALPHA_NUMERIC_PUNCT(c,a) \\ - (KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC_PUNCT) -#define IS_QUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_QUOTE) -#define IS_DQUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_DQUOTE) -#define IS_HIGHBIT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_HIGHBIT) - -#else /*CHARSET_EBCDIC*/ - -#define IS_COMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_COMMENT) -#define IS_FCOMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_FCOMMENT) -#define IS_EOF(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_EOF) -#define IS_ESC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ESC) -#define IS_NUMBER(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_NUMBER) -#define IS_WS(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_WS) -#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC) -#define IS_ALPHA_NUMERIC_PUNCT(c,a) \\ - (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC_PUNCT) -#define IS_QUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_QUOTE) -#define IS_DQUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_DQUOTE) -#define IS_HIGHBIT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_HIGHBIT) -#endif /*CHARSET_EBCDIC*/ - -EOF - -print "static unsigned short CONF_type_default[256]={"; - -for ($i=0; $i<256; $i++) - { - print "\n\t" if ($i % 8) == 0; - printf "0x%04X,",$V_def[$i]; - } - -print "\n\t};\n\n"; - -print "static unsigned short CONF_type_win32[256]={"; - -for ($i=0; $i<256; $i++) - { - print "\n\t" if ($i % 8) == 0; - printf "0x%04X,",$V_w32[$i]; - } - -print "\n\t};\n\n"; diff --git a/drivers/builtin_openssl2/crypto/conf/test.c b/drivers/builtin_openssl2/crypto/conf/test.c deleted file mode 100644 index cc1efcca85..0000000000 --- a/drivers/builtin_openssl2/crypto/conf/test.c +++ /dev/null @@ -1,97 +0,0 @@ -/* crypto/conf/test.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include -#include - -main() -{ - LHASH *conf; - long eline; - char *s, *s2; - -#ifdef USE_WIN32 - CONF_set_default_method(CONF_WIN32); -#endif - conf = CONF_load(NULL, "ssleay.cnf", &eline); - if (conf == NULL) { - ERR_load_crypto_strings(); - printf("unable to load configuration, line %ld\n", eline); - ERR_print_errors_fp(stderr); - exit(1); - } - lh_stats(conf, stdout); - lh_node_stats(conf, stdout); - lh_node_usage_stats(conf, stdout); - - s = CONF_get_string(conf, NULL, "init2"); - printf("init2=%s\n", (s == NULL) ? "NULL" : s); - - s = CONF_get_string(conf, NULL, "cipher1"); - printf("cipher1=%s\n", (s == NULL) ? "NULL" : s); - - s = CONF_get_string(conf, "s_client", "cipher1"); - printf("s_client:cipher1=%s\n", (s == NULL) ? "NULL" : s); - - printf("---------------------------- DUMP ------------------------\n"); - CONF_dump_fp(conf, stdout); - - exit(0); -} diff --git a/drivers/builtin_openssl2/crypto/cryptlib.c b/drivers/builtin_openssl2/crypto/cryptlib.c index eccee72311..1925428f5e 100644 --- a/drivers/builtin_openssl2/crypto/cryptlib.c +++ b/drivers/builtin_openssl2/crypto/cryptlib.c @@ -653,7 +653,7 @@ const char *CRYPTO_get_lock_name(int type) defined(__x86_64) || defined(__x86_64__) || \ defined(_M_AMD64) || defined(_M_X64) -unsigned int OPENSSL_ia32cap_P[2]; +extern unsigned int OPENSSL_ia32cap_P[4]; unsigned long *OPENSSL_ia32cap_loc(void) { if (sizeof(long) == 4) @@ -663,6 +663,9 @@ unsigned long *OPENSSL_ia32cap_loc(void) * is 32-bit. */ OPENSSL_ia32cap_P[1] = 0; + + OPENSSL_ia32cap_P[2] = 0; + return (unsigned long *)OPENSSL_ia32cap_P; } @@ -676,7 +679,7 @@ typedef unsigned long long IA32CAP; void OPENSSL_cpuid_setup(void) { static int trigger = 0; - IA32CAP OPENSSL_ia32_cpuid(void); + IA32CAP OPENSSL_ia32_cpuid(unsigned int *); IA32CAP vec; char *env; @@ -694,9 +697,23 @@ void OPENSSL_cpuid_setup(void) vec = strtoul(env + off, NULL, 0); # endif if (off) - vec = OPENSSL_ia32_cpuid() & ~vec; + vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P) & ~vec; + else if (env[0] == ':') + vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P); + + OPENSSL_ia32cap_P[2] = 0; + if ((env = strchr(env, ':'))) { + unsigned int vecx; + env++; + off = (env[0] == '~') ? 1 : 0; + vecx = strtoul(env + off, NULL, 0); + if (off) + OPENSSL_ia32cap_P[2] &= ~vecx; + else + OPENSSL_ia32cap_P[2] = vecx; + } } else - vec = OPENSSL_ia32_cpuid(); + vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P); /* * |(1<<10) sets a reserved bit to signal that variable @@ -706,6 +723,8 @@ void OPENSSL_cpuid_setup(void) OPENSSL_ia32cap_P[0] = (unsigned int)vec | (1 << 10); OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32); } +# else +unsigned int OPENSSL_ia32cap_P[4]; # endif #else @@ -857,8 +876,12 @@ void OPENSSL_showfatal(const char *fmta, ...) if ((h = GetStdHandle(STD_ERROR_HANDLE)) != NULL && GetFileType(h) != FILE_TYPE_UNKNOWN) { /* must be console application */ + int len; + DWORD out; + va_start(ap, fmta); - vfprintf(stderr, fmta, ap); + len = _vsnprintf((char *)buf, sizeof(buf), fmta, ap); + WriteFile(h, buf, len < 0 ? sizeof(buf) : (DWORD) len, &out, NULL); va_end(ap); return; } @@ -981,7 +1004,9 @@ void OpenSSLDie(const char *file, int line, const char *assertion) /* * Win32 abort() customarily shows a dialog, but we just did that... */ +# if !defined(_WIN32_WCE) raise(SIGABRT); +# endif _exit(3); #endif } @@ -991,11 +1016,11 @@ void *OPENSSL_stderr(void) return stderr; } -int CRYPTO_memcmp(const void *in_a, const void *in_b, size_t len) +int CRYPTO_memcmp(const volatile void *in_a, const volatile void *in_b, size_t len) { size_t i; - const unsigned char *a = in_a; - const unsigned char *b = in_b; + const volatile unsigned char *a = in_a; + const volatile unsigned char *b = in_b; unsigned char x = 0; for (i = 0; i < len; i++) diff --git a/drivers/builtin_openssl2/crypto/crypto-lib.com b/drivers/builtin_openssl2/crypto/crypto-lib.com index a136f4b0b6..1423cac288 100644 --- a/drivers/builtin_openssl2/crypto/crypto-lib.com +++ b/drivers/builtin_openssl2/crypto/crypto-lib.com @@ -214,7 +214,7 @@ $! The contents of these variables are copied from the LIBOBJ variable in the $! corresponding Makefile from each corresponding subdirectory, with .o stripped $! and spaces replaced with commas. $ LIB_ = "cryptlib,mem,mem_dbg,cversion,ex_data,cpt_err,ebcdic,"+ - - "uid,o_time,o_str,o_dir,o_fips.c,o_init,fips_ers,mem_clr" + "uid,o_time,o_str,o_dir,o_fips,o_init,fips_ers,mem_clr" $ LIB_OBJECTS = "o_names,obj_dat,obj_lib,obj_err,obj_xref" $ LIB_MD2 = "md2_dgst,md2_one" $ LIB_MD4 = "md4_dgst,md4_one" @@ -231,17 +231,19 @@ $ LIB_DES = "set_key,ecb_enc,cbc_enc,"+ - "des_enc,fcrypt_b,"+ - "fcrypt,xcbc_enc,rpc_enc,cbc_cksm,"+ - "ede_cbcm_enc,des_old,des_old2,read2pwd" +$ LIB_AES = "aes_misc,aes_ecb,aes_cfb,aes_ofb,aes_ctr,aes_ige,aes_wrap,"+ - + "aes_core,aes_cbc" $ LIB_RC2 = "rc2_ecb,rc2_skey,rc2_cbc,rc2cfb64,rc2ofb64" $ LIB_RC4 = "rc4_enc,rc4_skey,rc4_utl" $ LIB_RC5 = "rc5_skey,rc5_ecb,rc5_enc,rc5cfb64,rc5ofb64" $ LIB_IDEA = "i_cbc,i_cfb64,i_ofb64,i_ecb,i_skey" $ LIB_BF = "bf_skey,bf_ecb,bf_enc,bf_cfb64,bf_ofb64" $ LIB_CAST = "c_skey,c_ecb,c_enc,c_cfb64,c_ofb64" -$ LIB_CAMELLIA = "cmll_ecb,cmll_ofb,cmll_cfb,cmll_ctr,cmll_utl,"+ - - "camellia,cmll_misc,cmll_cbc" +$ LIB_CAMELLIA = "cmll_ecb,cmll_ofb,cmll_cfb,cmll_ctr,"+ - + "cmll_utl,camellia,cmll_misc,cmll_cbc" $ LIB_SEED = "seed,seed_ecb,seed_cbc,seed_cfb,seed_ofb" $ LIB_MODES = "cbc128,ctr128,cts128,cfb128,ofb128,gcm128,"+ - - "ccm128,xts128" + "ccm128,xts128,wrap128" $ LIB_BN_ASM = "[.asm]vms.mar,vms-helper" $ IF F$TRNLNM("OPENSSL_NO_ASM") .OR. ARCH .NES. "VAX" THEN - LIB_BN_ASM = "bn_asm" @@ -263,8 +265,8 @@ $ LIB_DSA = "dsa_gen,dsa_key,dsa_lib,dsa_asn1,dsa_vrf,dsa_sign,"+ - "dsa_err,dsa_ossl,dsa_depr,dsa_ameth,dsa_pmeth,dsa_prn" $ LIB_ECDSA = "ecs_lib,ecs_asn1,ecs_ossl,ecs_sign,ecs_vrf,ecs_err" $ LIB_DH = "dh_asn1,dh_gen,dh_key,dh_lib,dh_check,dh_err,dh_depr,"+ - - "dh_ameth,dh_pmeth,dh_prn" -$ LIB_ECDH = "ech_lib,ech_ossl,ech_key,ech_err" + "dh_ameth,dh_pmeth,dh_prn,dh_rfc5114,dh_kdf" +$ LIB_ECDH = "ech_lib,ech_ossl,ech_key,ech_err,ech_kdf" $ LIB_DSO = "dso_dl,dso_dlfcn,dso_err,dso_lib,dso_null,"+ - "dso_openssl,dso_win32,dso_vms,dso_beos" $ LIB_ENGINE = "eng_err,eng_lib,eng_list,eng_init,eng_ctrl,"+ - @@ -272,9 +274,7 @@ $ LIB_ENGINE = "eng_err,eng_lib,eng_list,eng_init,eng_ctrl,"+ - "tb_rsa,tb_dsa,tb_ecdsa,tb_dh,tb_ecdh,tb_rand,tb_store,"+ - "tb_cipher,tb_digest,tb_pkmeth,tb_asnmth,"+ - "eng_openssl,eng_cnf,eng_dyn,eng_cryptodev,"+ - - "eng_rsax,eng_rdrand" -$ LIB_AES = "aes_misc,aes_ecb,aes_cfb,aes_ofb,aes_ctr,aes_ige,aes_wrap,"+ - - "aes_core,aes_cbc" + "eng_rdrand" $ LIB_BUFFER = "buffer,buf_str,buf_err" $ LIB_BIO = "bio_lib,bio_cb,bio_err,"+ - "bss_mem,bss_null,bss_fd,"+ - @@ -298,8 +298,8 @@ $ LIB_EVP_2 = "m_null,m_md2,m_md4,m_md5,m_sha,m_sha1,m_wp," + - "bio_md,bio_b64,bio_enc,evp_err,e_null,"+ - "c_all,c_allc,c_alld,evp_lib,bio_ok,"+- "evp_pkey,evp_pbe,p5_crpt,p5_crpt2" -$ LIB_EVP_3 = "e_old,pmeth_lib,pmeth_fn,pmeth_gn,m_sigver,evp_fips,"+ - - "e_aes_cbc_hmac_sha1,e_rc4_hmac_md5" +$ LIB_EVP_3 = "e_old,pmeth_lib,pmeth_fn,pmeth_gn,m_sigver,"+ - + "e_aes_cbc_hmac_sha1,e_aes_cbc_hmac_sha256,e_rc4_hmac_md5" $ LIB_ASN1 = "a_object,a_bitstr,a_utctm,a_gentm,a_time,a_int,a_octet,"+ - "a_print,a_type,a_set,a_dup,a_d2i_fp,a_i2d_fp,"+ - "a_enum,a_utf8,a_sign,a_digest,a_verify,a_mbstr,a_strex,"+ - @@ -326,7 +326,7 @@ $ LIB_X509V3 = "v3_bcons,v3_bitst,v3_conf,v3_extku,v3_ia5,v3_lib,"+ - "v3_int,v3_enum,v3_sxnet,v3_cpols,v3_crld,v3_purp,v3_info,"+ - "v3_ocsp,v3_akeya,v3_pmaps,v3_pcons,v3_ncons,v3_pcia,v3_pci,"+ - "pcy_cache,pcy_node,pcy_data,pcy_map,pcy_tree,pcy_lib,"+ - - "v3_asid,v3_addr" + "v3_asid,v3_addr,v3_scts" $ LIB_CONF = "conf_err,conf_lib,conf_api,conf_def,conf_mod,conf_mall,conf_sap" $ LIB_TXT_DB = "txt_db" $ LIB_PKCS7 = "pk7_asn1,pk7_lib,pkcs7err,pk7_doit,pk7_smime,pk7_attr,"+ - @@ -343,7 +343,7 @@ $ LIB_UI = "ui_err,ui_lib,ui_openssl,ui_util"+LIB_UI_COMPAT $ LIB_KRB5 = "krb5_asn" $ LIB_CMS = "cms_lib,cms_asn1,cms_att,cms_io,cms_smime,cms_err,"+ - "cms_sd,cms_dd,cms_cd,cms_env,cms_enc,cms_ess,"+ - - "cms_pwri" + "cms_pwri,cms_kari" $ LIB_PQUEUE = "pqueue" $ LIB_TS = "ts_err,ts_req_utils,ts_req_print,ts_rsp_utils,ts_rsp_print,"+ - "ts_rsp_sign,ts_rsp_verify,ts_verify_ctx,ts_lib,ts_conf,"+ - @@ -1141,7 +1141,7 @@ $ IF F$TYPE(USER_CCFLAGS) .NES. "" THEN CCEXTRAFLAGS = USER_CCFLAGS $ CCDISABLEWARNINGS = "" !!! "MAYLOSEDATA3" !!! "LONGLONGTYPE,LONGLONGSUFX,FOUNDCR" $ IF F$TYPE(USER_CCDISABLEWARNINGS) .NES. "" $ THEN -$ IF CCDISABLEWARNINGS .NES. "" THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + "," +$ IF CCDISABLEWARNINGS .NES. THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + "," $ CCDISABLEWARNINGS = CCDISABLEWARNINGS + USER_CCDISABLEWARNINGS $ ENDIF $! diff --git a/drivers/builtin_openssl2/crypto/cversion.c b/drivers/builtin_openssl2/crypto/cversion.c index 9e6f50d781..bfff6995c1 100644 --- a/drivers/builtin_openssl2/crypto/cversion.c +++ b/drivers/builtin_openssl2/crypto/cversion.c @@ -68,7 +68,11 @@ const char *SSLeay_version(int t) return OPENSSL_VERSION_TEXT; if (t == SSLEAY_BUILT_ON) { #ifdef DATE +# ifdef OPENSSL_USE_BUILD_DATE return (DATE); +# else + return ("built on: reproducible build, date unspecified"); +# endif #else return ("built on: date not available"); #endif diff --git a/drivers/builtin_openssl2/crypto/des/asm/crypt586.pl b/drivers/builtin_openssl2/crypto/des/asm/crypt586.pl deleted file mode 100644 index e36f7d44bd..0000000000 --- a/drivers/builtin_openssl2/crypto/des/asm/crypt586.pl +++ /dev/null @@ -1,209 +0,0 @@ -#!/usr/local/bin/perl -# -# The inner loop instruction sequence and the IP/FP modifications are from -# Svend Olaf Mikkelsen -# I've added the stuff needed for crypt() but I've not worried about making -# things perfect. -# - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],"crypt586.pl"); - -$L="edi"; -$R="esi"; - -&external_label("DES_SPtrans"); -&fcrypt_body("fcrypt_body"); -&asm_finish(); - -sub fcrypt_body - { - local($name,$do_ip)=@_; - - &function_begin($name); - - &comment(""); - &comment("Load the 2 words"); - $trans="ebp"; - - &xor( $L, $L); - &xor( $R, $R); - - # PIC-ification:-) - &picmeup("edx","DES_SPtrans"); - #if ($cpp) { &picmeup("edx","DES_SPtrans"); } - #else { &lea("edx",&DWP("DES_SPtrans")); } - &push("edx"); # becomes &swtmp(1) - # - &mov($trans,&wparam(1)); # reloaded with DES_SPtrans in D_ENCRYPT - - &push(&DWC(25)); # add a variable - - &set_label("start"); - for ($i=0; $i<16; $i+=2) - { - &comment(""); - &comment("Round $i"); - &D_ENCRYPT($i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx"); - - &comment(""); - &comment("Round ".sprintf("%d",$i+1)); - &D_ENCRYPT($i+1,$R,$L,($i+1)*2,$trans,"eax","ebx","ecx","edx"); - } - &mov("ebx", &swtmp(0)); - &mov("eax", $L); - &dec("ebx"); - &mov($L, $R); - &mov($R, "eax"); - &mov(&swtmp(0), "ebx"); - &jnz(&label("start")); - - &comment(""); - &comment("FP"); - &mov("edx",&wparam(0)); - - &FP_new($R,$L,"eax",3); - &mov(&DWP(0,"edx","",0),"eax"); - &mov(&DWP(4,"edx","",0),$L); - - &add("esp",8); # remove variables - - &function_end($name); - } - -sub D_ENCRYPT - { - local($r,$L,$R,$S,$trans,$u,$tmp1,$tmp2,$t)=@_; - - &mov( $u, &wparam(2)); # 2 - &mov( $t, $R); - &shr( $t, 16); # 1 - &mov( $tmp2, &wparam(3)); # 2 - &xor( $t, $R); # 1 - - &and( $u, $t); # 2 - &and( $t, $tmp2); # 2 - - &mov( $tmp1, $u); - &shl( $tmp1, 16); # 1 - &mov( $tmp2, $t); - &shl( $tmp2, 16); # 1 - &xor( $u, $tmp1); # 2 - &xor( $t, $tmp2); # 2 - &mov( $tmp1, &DWP(&n2a($S*4),$trans,"",0)); # 2 - &xor( $u, $tmp1); - &mov( $tmp2, &DWP(&n2a(($S+1)*4),$trans,"",0)); # 2 - &xor( $u, $R); - &xor( $t, $R); - &xor( $t, $tmp2); - - &and( $u, "0xfcfcfcfc" ); # 2 - &xor( $tmp1, $tmp1); # 1 - &and( $t, "0xcfcfcfcf" ); # 2 - &xor( $tmp2, $tmp2); - &movb( &LB($tmp1), &LB($u) ); - &movb( &LB($tmp2), &HB($u) ); - &rotr( $t, 4 ); - &mov( $trans, &swtmp(1)); - &xor( $L, &DWP(" ",$trans,$tmp1,0)); - &movb( &LB($tmp1), &LB($t) ); - &xor( $L, &DWP("0x200",$trans,$tmp2,0)); - &movb( &LB($tmp2), &HB($t) ); - &shr( $u, 16); - &xor( $L, &DWP("0x100",$trans,$tmp1,0)); - &movb( &LB($tmp1), &HB($u) ); - &shr( $t, 16); - &xor( $L, &DWP("0x300",$trans,$tmp2,0)); - &movb( &LB($tmp2), &HB($t) ); - &and( $u, "0xff" ); - &and( $t, "0xff" ); - &mov( $tmp1, &DWP("0x600",$trans,$tmp1,0)); - &xor( $L, $tmp1); - &mov( $tmp1, &DWP("0x700",$trans,$tmp2,0)); - &xor( $L, $tmp1); - &mov( $tmp1, &DWP("0x400",$trans,$u,0)); - &xor( $L, $tmp1); - &mov( $tmp1, &DWP("0x500",$trans,$t,0)); - &xor( $L, $tmp1); - &mov( $trans, &wparam(1)); - } - -sub n2a - { - sprintf("%d",$_[0]); - } - -# now has a side affect of rotating $a by $shift -sub R_PERM_OP - { - local($a,$b,$tt,$shift,$mask,$last)=@_; - - &rotl( $a, $shift ) if ($shift != 0); - &mov( $tt, $a ); - &xor( $a, $b ); - &and( $a, $mask ); - if ($notlast eq $b) - { - &xor( $b, $a ); - &xor( $tt, $a ); - } - else - { - &xor( $tt, $a ); - &xor( $b, $a ); - } - &comment(""); - } - -sub IP_new - { - local($l,$r,$tt,$lr)=@_; - - &R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l); - &R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l); - &R_PERM_OP($l,$tt,$r,14,"0x33333333",$r); - &R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r); - &R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r); - - if ($lr != 3) - { - if (($lr-3) < 0) - { &rotr($tt, 3-$lr); } - else { &rotl($tt, $lr-3); } - } - if ($lr != 2) - { - if (($lr-2) < 0) - { &rotr($r, 2-$lr); } - else { &rotl($r, $lr-2); } - } - } - -sub FP_new - { - local($l,$r,$tt,$lr)=@_; - - if ($lr != 2) - { - if (($lr-2) < 0) - { &rotl($r, 2-$lr); } - else { &rotr($r, $lr-2); } - } - if ($lr != 3) - { - if (($lr-3) < 0) - { &rotl($l, 3-$lr); } - else { &rotr($l, $lr-3); } - } - - &R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r); - &R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r); - &R_PERM_OP($l,$r,$tt,10,"0x33333333",$l); - &R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l); - &R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r); - &rotr($tt , 4); - } - diff --git a/drivers/builtin_openssl2/crypto/des/asm/des-586.pl b/drivers/builtin_openssl2/crypto/des/asm/des-586.pl deleted file mode 100644 index 5b5f39cebd..0000000000 --- a/drivers/builtin_openssl2/crypto/des/asm/des-586.pl +++ /dev/null @@ -1,453 +0,0 @@ -#!/usr/local/bin/perl -# -# The inner loop instruction sequence and the IP/FP modifications are from -# Svend Olaf Mikkelsen -# - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; -require "cbc.pl"; -require "desboth.pl"; - -# base code is in microsft -# op dest, source -# format. -# - -&asm_init($ARGV[0],"des-586.pl"); - -$L="edi"; -$R="esi"; -$trans="ebp"; -$small_footprint=1 if (grep(/\-DOPENSSL_SMALL_FOOTPRINT/,@ARGV)); -# one can discuss setting this variable to 1 unconditionally, as -# the folded loop is only 3% slower than unrolled, but >7 times smaller - -&public_label("DES_SPtrans"); - -&DES_encrypt_internal(); -&DES_decrypt_internal(); -&DES_encrypt("DES_encrypt1",1); -&DES_encrypt("DES_encrypt2",0); -&DES_encrypt3("DES_encrypt3",1); -&DES_encrypt3("DES_decrypt3",0); -&cbc("DES_ncbc_encrypt","DES_encrypt1","DES_encrypt1",0,4,5,3,5,-1); -&cbc("DES_ede3_cbc_encrypt","DES_encrypt3","DES_decrypt3",0,6,7,3,4,5); -&DES_SPtrans(); - -&asm_finish(); - -sub DES_encrypt_internal() - { - &function_begin_B("_x86_DES_encrypt"); - - if ($small_footprint) - { - &lea("edx",&DWP(128,"ecx")); - &push("edx"); - &push("ecx"); - &set_label("eloop"); - &D_ENCRYPT(0,$L,$R,0,$trans,"eax","ebx","ecx","edx",&swtmp(0)); - &comment(""); - &D_ENCRYPT(1,$R,$L,2,$trans,"eax","ebx","ecx","edx",&swtmp(0)); - &comment(""); - &add("ecx",16); - &cmp("ecx",&swtmp(1)); - &mov(&swtmp(0),"ecx"); - &jb(&label("eloop")); - &add("esp",8); - } - else - { - &push("ecx"); - for ($i=0; $i<16; $i+=2) - { - &comment("Round $i"); - &D_ENCRYPT($i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx",&swtmp(0)); - &comment("Round ".sprintf("%d",$i+1)); - &D_ENCRYPT($i+1,$R,$L,($i+1)*2,$trans,"eax","ebx","ecx","edx",&swtmp(0)); - } - &add("esp",4); - } - &ret(); - - &function_end_B("_x86_DES_encrypt"); - } - -sub DES_decrypt_internal() - { - &function_begin_B("_x86_DES_decrypt"); - - if ($small_footprint) - { - &push("ecx"); - &lea("ecx",&DWP(128,"ecx")); - &push("ecx"); - &set_label("dloop"); - &D_ENCRYPT(0,$L,$R,-2,$trans,"eax","ebx","ecx","edx",&swtmp(0)); - &comment(""); - &D_ENCRYPT(1,$R,$L,-4,$trans,"eax","ebx","ecx","edx",&swtmp(0)); - &comment(""); - &sub("ecx",16); - &cmp("ecx",&swtmp(1)); - &mov(&swtmp(0),"ecx"); - &ja(&label("dloop")); - &add("esp",8); - } - else - { - &push("ecx"); - for ($i=15; $i>0; $i-=2) - { - &comment("Round $i"); - &D_ENCRYPT(15-$i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx",&swtmp(0)); - &comment("Round ".sprintf("%d",$i-1)); - &D_ENCRYPT(15-$i+1,$R,$L,($i-1)*2,$trans,"eax","ebx","ecx","edx",&swtmp(0)); - } - &add("esp",4); - } - &ret(); - - &function_end_B("_x86_DES_decrypt"); - } - -sub DES_encrypt - { - local($name,$do_ip)=@_; - - &function_begin_B($name); - - &push("esi"); - &push("edi"); - - &comment(""); - &comment("Load the 2 words"); - - if ($do_ip) - { - &mov($R,&wparam(0)); - &xor( "ecx", "ecx" ); - - &push("ebx"); - &push("ebp"); - - &mov("eax",&DWP(0,$R,"",0)); - &mov("ebx",&wparam(2)); # get encrypt flag - &mov($L,&DWP(4,$R,"",0)); - &comment(""); - &comment("IP"); - &IP_new("eax",$L,$R,3); - } - else - { - &mov("eax",&wparam(0)); - &xor( "ecx", "ecx" ); - - &push("ebx"); - &push("ebp"); - - &mov($R,&DWP(0,"eax","",0)); - &mov("ebx",&wparam(2)); # get encrypt flag - &rotl($R,3); - &mov($L,&DWP(4,"eax","",0)); - &rotl($L,3); - } - - # PIC-ification:-) - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop($trans); - &lea ($trans,&DWP(&label("DES_SPtrans")."-".&label("pic_point"),$trans)); - - &mov( "ecx", &wparam(1) ); - - &cmp("ebx","0"); - &je(&label("decrypt")); - &call("_x86_DES_encrypt"); - &jmp(&label("done")); - &set_label("decrypt"); - &call("_x86_DES_decrypt"); - &set_label("done"); - - if ($do_ip) - { - &comment(""); - &comment("FP"); - &mov("edx",&wparam(0)); - &FP_new($L,$R,"eax",3); - - &mov(&DWP(0,"edx","",0),"eax"); - &mov(&DWP(4,"edx","",0),$R); - } - else - { - &comment(""); - &comment("Fixup"); - &rotr($L,3); # r - &mov("eax",&wparam(0)); - &rotr($R,3); # l - &mov(&DWP(0,"eax","",0),$L); - &mov(&DWP(4,"eax","",0),$R); - } - - &pop("ebp"); - &pop("ebx"); - &pop("edi"); - &pop("esi"); - &ret(); - - &function_end_B($name); - } - -sub D_ENCRYPT - { - local($r,$L,$R,$S,$trans,$u,$tmp1,$tmp2,$t,$wp1)=@_; - - &mov( $u, &DWP(&n2a($S*4),$tmp2,"",0)); - &xor( $tmp1, $tmp1); - &mov( $t, &DWP(&n2a(($S+1)*4),$tmp2,"",0)); - &xor( $u, $R); - &xor( $tmp2, $tmp2); - &xor( $t, $R); - &and( $u, "0xfcfcfcfc" ); - &and( $t, "0xcfcfcfcf" ); - &movb( &LB($tmp1), &LB($u) ); - &movb( &LB($tmp2), &HB($u) ); - &rotr( $t, 4 ); - &xor( $L, &DWP(" ",$trans,$tmp1,0)); - &movb( &LB($tmp1), &LB($t) ); - &xor( $L, &DWP("0x200",$trans,$tmp2,0)); - &movb( &LB($tmp2), &HB($t) ); - &shr( $u, 16); - &xor( $L, &DWP("0x100",$trans,$tmp1,0)); - &movb( &LB($tmp1), &HB($u) ); - &shr( $t, 16); - &xor( $L, &DWP("0x300",$trans,$tmp2,0)); - &movb( &LB($tmp2), &HB($t) ); - &and( $u, "0xff" ); - &and( $t, "0xff" ); - &xor( $L, &DWP("0x600",$trans,$tmp1,0)); - &xor( $L, &DWP("0x700",$trans,$tmp2,0)); - &mov( $tmp2, $wp1 ); - &xor( $L, &DWP("0x400",$trans,$u,0)); - &xor( $L, &DWP("0x500",$trans,$t,0)); - } - -sub n2a - { - sprintf("%d",$_[0]); - } - -# now has a side affect of rotating $a by $shift -sub R_PERM_OP - { - local($a,$b,$tt,$shift,$mask,$last)=@_; - - &rotl( $a, $shift ) if ($shift != 0); - &mov( $tt, $a ); - &xor( $a, $b ); - &and( $a, $mask ); - # This can never succeed, and besides it is difficult to see what the - # idea was - Ben 13 Feb 99 - if (!$last eq $b) - { - &xor( $b, $a ); - &xor( $tt, $a ); - } - else - { - &xor( $tt, $a ); - &xor( $b, $a ); - } - &comment(""); - } - -sub IP_new - { - local($l,$r,$tt,$lr)=@_; - - &R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l); - &R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l); - &R_PERM_OP($l,$tt,$r,14,"0x33333333",$r); - &R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r); - &R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r); - - if ($lr != 3) - { - if (($lr-3) < 0) - { &rotr($tt, 3-$lr); } - else { &rotl($tt, $lr-3); } - } - if ($lr != 2) - { - if (($lr-2) < 0) - { &rotr($r, 2-$lr); } - else { &rotl($r, $lr-2); } - } - } - -sub FP_new - { - local($l,$r,$tt,$lr)=@_; - - if ($lr != 2) - { - if (($lr-2) < 0) - { &rotl($r, 2-$lr); } - else { &rotr($r, $lr-2); } - } - if ($lr != 3) - { - if (($lr-3) < 0) - { &rotl($l, 3-$lr); } - else { &rotr($l, $lr-3); } - } - - &R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r); - &R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r); - &R_PERM_OP($l,$r,$tt,10,"0x33333333",$l); - &R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l); - &R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r); - &rotr($tt , 4); - } - -sub DES_SPtrans - { - &set_label("DES_SPtrans",64); - &data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802); - &data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002); - &data_word(0x00080802, 0x02080800, 0x02080000, 0x00000802); - &data_word(0x02000802, 0x02000000, 0x00000000, 0x00080002); - &data_word(0x00080000, 0x00000002, 0x02000800, 0x00080800); - &data_word(0x02080802, 0x02080000, 0x00000802, 0x02000800); - &data_word(0x00000002, 0x00000800, 0x00080800, 0x02080002); - &data_word(0x00000800, 0x02000802, 0x02080002, 0x00000000); - &data_word(0x00000000, 0x02080802, 0x02000800, 0x00080002); - &data_word(0x02080800, 0x00080000, 0x00000802, 0x02000800); - &data_word(0x02080002, 0x00000800, 0x00080800, 0x02000002); - &data_word(0x00080802, 0x00000002, 0x02000002, 0x02080000); - &data_word(0x02080802, 0x00080800, 0x02080000, 0x02000802); - &data_word(0x02000000, 0x00000802, 0x00080002, 0x00000000); - &data_word(0x00080000, 0x02000000, 0x02000802, 0x02080800); - &data_word(0x00000002, 0x02080002, 0x00000800, 0x00080802); - # nibble 1 - &data_word(0x40108010, 0x00000000, 0x00108000, 0x40100000); - &data_word(0x40000010, 0x00008010, 0x40008000, 0x00108000); - &data_word(0x00008000, 0x40100010, 0x00000010, 0x40008000); - &data_word(0x00100010, 0x40108000, 0x40100000, 0x00000010); - &data_word(0x00100000, 0x40008010, 0x40100010, 0x00008000); - &data_word(0x00108010, 0x40000000, 0x00000000, 0x00100010); - &data_word(0x40008010, 0x00108010, 0x40108000, 0x40000010); - &data_word(0x40000000, 0x00100000, 0x00008010, 0x40108010); - &data_word(0x00100010, 0x40108000, 0x40008000, 0x00108010); - &data_word(0x40108010, 0x00100010, 0x40000010, 0x00000000); - &data_word(0x40000000, 0x00008010, 0x00100000, 0x40100010); - &data_word(0x00008000, 0x40000000, 0x00108010, 0x40008010); - &data_word(0x40108000, 0x00008000, 0x00000000, 0x40000010); - &data_word(0x00000010, 0x40108010, 0x00108000, 0x40100000); - &data_word(0x40100010, 0x00100000, 0x00008010, 0x40008000); - &data_word(0x40008010, 0x00000010, 0x40100000, 0x00108000); - # nibble 2 - &data_word(0x04000001, 0x04040100, 0x00000100, 0x04000101); - &data_word(0x00040001, 0x04000000, 0x04000101, 0x00040100); - &data_word(0x04000100, 0x00040000, 0x04040000, 0x00000001); - &data_word(0x04040101, 0x00000101, 0x00000001, 0x04040001); - &data_word(0x00000000, 0x00040001, 0x04040100, 0x00000100); - &data_word(0x00000101, 0x04040101, 0x00040000, 0x04000001); - &data_word(0x04040001, 0x04000100, 0x00040101, 0x04040000); - &data_word(0x00040100, 0x00000000, 0x04000000, 0x00040101); - &data_word(0x04040100, 0x00000100, 0x00000001, 0x00040000); - &data_word(0x00000101, 0x00040001, 0x04040000, 0x04000101); - &data_word(0x00000000, 0x04040100, 0x00040100, 0x04040001); - &data_word(0x00040001, 0x04000000, 0x04040101, 0x00000001); - &data_word(0x00040101, 0x04000001, 0x04000000, 0x04040101); - &data_word(0x00040000, 0x04000100, 0x04000101, 0x00040100); - &data_word(0x04000100, 0x00000000, 0x04040001, 0x00000101); - &data_word(0x04000001, 0x00040101, 0x00000100, 0x04040000); - # nibble 3 - &data_word(0x00401008, 0x10001000, 0x00000008, 0x10401008); - &data_word(0x00000000, 0x10400000, 0x10001008, 0x00400008); - &data_word(0x10401000, 0x10000008, 0x10000000, 0x00001008); - &data_word(0x10000008, 0x00401008, 0x00400000, 0x10000000); - &data_word(0x10400008, 0x00401000, 0x00001000, 0x00000008); - &data_word(0x00401000, 0x10001008, 0x10400000, 0x00001000); - &data_word(0x00001008, 0x00000000, 0x00400008, 0x10401000); - &data_word(0x10001000, 0x10400008, 0x10401008, 0x00400000); - &data_word(0x10400008, 0x00001008, 0x00400000, 0x10000008); - &data_word(0x00401000, 0x10001000, 0x00000008, 0x10400000); - &data_word(0x10001008, 0x00000000, 0x00001000, 0x00400008); - &data_word(0x00000000, 0x10400008, 0x10401000, 0x00001000); - &data_word(0x10000000, 0x10401008, 0x00401008, 0x00400000); - &data_word(0x10401008, 0x00000008, 0x10001000, 0x00401008); - &data_word(0x00400008, 0x00401000, 0x10400000, 0x10001008); - &data_word(0x00001008, 0x10000000, 0x10000008, 0x10401000); - # nibble 4 - &data_word(0x08000000, 0x00010000, 0x00000400, 0x08010420); - &data_word(0x08010020, 0x08000400, 0x00010420, 0x08010000); - &data_word(0x00010000, 0x00000020, 0x08000020, 0x00010400); - &data_word(0x08000420, 0x08010020, 0x08010400, 0x00000000); - &data_word(0x00010400, 0x08000000, 0x00010020, 0x00000420); - &data_word(0x08000400, 0x00010420, 0x00000000, 0x08000020); - &data_word(0x00000020, 0x08000420, 0x08010420, 0x00010020); - &data_word(0x08010000, 0x00000400, 0x00000420, 0x08010400); - &data_word(0x08010400, 0x08000420, 0x00010020, 0x08010000); - &data_word(0x00010000, 0x00000020, 0x08000020, 0x08000400); - &data_word(0x08000000, 0x00010400, 0x08010420, 0x00000000); - &data_word(0x00010420, 0x08000000, 0x00000400, 0x00010020); - &data_word(0x08000420, 0x00000400, 0x00000000, 0x08010420); - &data_word(0x08010020, 0x08010400, 0x00000420, 0x00010000); - &data_word(0x00010400, 0x08010020, 0x08000400, 0x00000420); - &data_word(0x00000020, 0x00010420, 0x08010000, 0x08000020); - # nibble 5 - &data_word(0x80000040, 0x00200040, 0x00000000, 0x80202000); - &data_word(0x00200040, 0x00002000, 0x80002040, 0x00200000); - &data_word(0x00002040, 0x80202040, 0x00202000, 0x80000000); - &data_word(0x80002000, 0x80000040, 0x80200000, 0x00202040); - &data_word(0x00200000, 0x80002040, 0x80200040, 0x00000000); - &data_word(0x00002000, 0x00000040, 0x80202000, 0x80200040); - &data_word(0x80202040, 0x80200000, 0x80000000, 0x00002040); - &data_word(0x00000040, 0x00202000, 0x00202040, 0x80002000); - &data_word(0x00002040, 0x80000000, 0x80002000, 0x00202040); - &data_word(0x80202000, 0x00200040, 0x00000000, 0x80002000); - &data_word(0x80000000, 0x00002000, 0x80200040, 0x00200000); - &data_word(0x00200040, 0x80202040, 0x00202000, 0x00000040); - &data_word(0x80202040, 0x00202000, 0x00200000, 0x80002040); - &data_word(0x80000040, 0x80200000, 0x00202040, 0x00000000); - &data_word(0x00002000, 0x80000040, 0x80002040, 0x80202000); - &data_word(0x80200000, 0x00002040, 0x00000040, 0x80200040); - # nibble 6 - &data_word(0x00004000, 0x00000200, 0x01000200, 0x01000004); - &data_word(0x01004204, 0x00004004, 0x00004200, 0x00000000); - &data_word(0x01000000, 0x01000204, 0x00000204, 0x01004000); - &data_word(0x00000004, 0x01004200, 0x01004000, 0x00000204); - &data_word(0x01000204, 0x00004000, 0x00004004, 0x01004204); - &data_word(0x00000000, 0x01000200, 0x01000004, 0x00004200); - &data_word(0x01004004, 0x00004204, 0x01004200, 0x00000004); - &data_word(0x00004204, 0x01004004, 0x00000200, 0x01000000); - &data_word(0x00004204, 0x01004000, 0x01004004, 0x00000204); - &data_word(0x00004000, 0x00000200, 0x01000000, 0x01004004); - &data_word(0x01000204, 0x00004204, 0x00004200, 0x00000000); - &data_word(0x00000200, 0x01000004, 0x00000004, 0x01000200); - &data_word(0x00000000, 0x01000204, 0x01000200, 0x00004200); - &data_word(0x00000204, 0x00004000, 0x01004204, 0x01000000); - &data_word(0x01004200, 0x00000004, 0x00004004, 0x01004204); - &data_word(0x01000004, 0x01004200, 0x01004000, 0x00004004); - # nibble 7 - &data_word(0x20800080, 0x20820000, 0x00020080, 0x00000000); - &data_word(0x20020000, 0x00800080, 0x20800000, 0x20820080); - &data_word(0x00000080, 0x20000000, 0x00820000, 0x00020080); - &data_word(0x00820080, 0x20020080, 0x20000080, 0x20800000); - &data_word(0x00020000, 0x00820080, 0x00800080, 0x20020000); - &data_word(0x20820080, 0x20000080, 0x00000000, 0x00820000); - &data_word(0x20000000, 0x00800000, 0x20020080, 0x20800080); - &data_word(0x00800000, 0x00020000, 0x20820000, 0x00000080); - &data_word(0x00800000, 0x00020000, 0x20000080, 0x20820080); - &data_word(0x00020080, 0x20000000, 0x00000000, 0x00820000); - &data_word(0x20800080, 0x20020080, 0x20020000, 0x00800080); - &data_word(0x20820000, 0x00000080, 0x00800080, 0x20020000); - &data_word(0x20820080, 0x00800000, 0x20800000, 0x20000080); - &data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000); - &data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000); - &data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080); - } diff --git a/drivers/builtin_openssl2/crypto/des/asm/des_enc.m4 b/drivers/builtin_openssl2/crypto/des/asm/des_enc.m4 index 3280595478..dda08e126d 100644 --- a/drivers/builtin_openssl2/crypto/des/asm/des_enc.m4 +++ b/drivers/builtin_openssl2/crypto/des/asm/des_enc.m4 @@ -46,6 +46,8 @@ .ident "des_enc.m4 2.1" .file "des_enc-sparc.S" +#include + #if defined(__SUNPRO_C) && defined(__sparcv9) # define ABI64 /* They've said -xarch=v9 at command line */ #elif defined(__GNUC__) && defined(__arch64__) diff --git a/drivers/builtin_openssl2/crypto/des/asm/desboth.pl b/drivers/builtin_openssl2/crypto/des/asm/desboth.pl deleted file mode 100644 index eec00886e4..0000000000 --- a/drivers/builtin_openssl2/crypto/des/asm/desboth.pl +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/local/bin/perl - -$L="edi"; -$R="esi"; - -sub DES_encrypt3 - { - local($name,$enc)=@_; - - &function_begin_B($name,""); - &push("ebx"); - &mov("ebx",&wparam(0)); - - &push("ebp"); - &push("esi"); - - &push("edi"); - - &comment(""); - &comment("Load the data words"); - &mov($L,&DWP(0,"ebx","",0)); - &mov($R,&DWP(4,"ebx","",0)); - &stack_push(3); - - &comment(""); - &comment("IP"); - &IP_new($L,$R,"edx",0); - - # put them back - - if ($enc) - { - &mov(&DWP(4,"ebx","",0),$R); - &mov("eax",&wparam(1)); - &mov(&DWP(0,"ebx","",0),"edx"); - &mov("edi",&wparam(2)); - &mov("esi",&wparam(3)); - } - else - { - &mov(&DWP(4,"ebx","",0),$R); - &mov("esi",&wparam(1)); - &mov(&DWP(0,"ebx","",0),"edx"); - &mov("edi",&wparam(2)); - &mov("eax",&wparam(3)); - } - &mov(&swtmp(2), (DWC(($enc)?"1":"0"))); - &mov(&swtmp(1), "eax"); - &mov(&swtmp(0), "ebx"); - &call("DES_encrypt2"); - &mov(&swtmp(2), (DWC(($enc)?"0":"1"))); - &mov(&swtmp(1), "edi"); - &mov(&swtmp(0), "ebx"); - &call("DES_encrypt2"); - &mov(&swtmp(2), (DWC(($enc)?"1":"0"))); - &mov(&swtmp(1), "esi"); - &mov(&swtmp(0), "ebx"); - &call("DES_encrypt2"); - - &stack_pop(3); - &mov($L,&DWP(0,"ebx","",0)); - &mov($R,&DWP(4,"ebx","",0)); - - &comment(""); - &comment("FP"); - &FP_new($L,$R,"eax",0); - - &mov(&DWP(0,"ebx","",0),"eax"); - &mov(&DWP(4,"ebx","",0),$R); - - &pop("edi"); - &pop("esi"); - &pop("ebp"); - &pop("ebx"); - &ret(); - &function_end_B($name); - } - - diff --git a/drivers/builtin_openssl2/crypto/des/des.pod b/drivers/builtin_openssl2/crypto/des/des.pod deleted file mode 100644 index bf479e83d2..0000000000 --- a/drivers/builtin_openssl2/crypto/des/des.pod +++ /dev/null @@ -1,217 +0,0 @@ -=pod - -=head1 NAME - -des - encrypt or decrypt data using Data Encryption Standard - -=head1 SYNOPSIS - -B -( -B<-e> -| -B<-E> -) | ( -B<-d> -| -B<-D> -) | ( -B<->[B][B] -) | -[ -B<-b3hfs> -] [ -B<-k> -I -] -] [ -B<-u>[I] -[ -I -[ -I -] ] - -=head1 NOTE - -This page describes the B stand-alone program, not the B -command. - -=head1 DESCRIPTION - -B -encrypts and decrypts data using the -Data Encryption Standard algorithm. -One of -B<-e>, B<-E> -(for encrypt) or -B<-d>, B<-D> -(for decrypt) must be specified. -It is also possible to use -B<-c> -or -B<-C> -in conjunction or instead of the a encrypt/decrypt option to generate -a 16 character hexadecimal checksum, generated via the -I. - -Two standard encryption modes are supported by the -B -program, Cipher Block Chaining (the default) and Electronic Code Book -(specified with -B<-b>). - -The key used for the DES -algorithm is obtained by prompting the user unless the -B<-k> -I -option is given. -If the key is an argument to the -B -command, it is potentially visible to users executing -ps(1) -or a derivative. To minimise this possibility, -B -takes care to destroy the key argument immediately upon entry. -If your shell keeps a history file be careful to make sure it is not -world readable. - -Since this program attempts to maintain compatibility with sunOS's -des(1) command, there are 2 different methods used to convert the user -supplied key to a des key. -Whenever and one or more of -B<-E>, B<-D>, B<-C> -or -B<-3> -options are used, the key conversion procedure will not be compatible -with the sunOS des(1) version but will use all the user supplied -character to generate the des key. -B -command reads from standard input unless -I -is specified and writes to standard output unless -I -is given. - -=head1 OPTIONS - -=over 4 - -=item B<-b> - -Select ECB -(eight bytes at a time) encryption mode. - -=item B<-3> - -Encrypt using triple encryption. -By default triple cbc encryption is used but if the -B<-b> -option is used then triple ECB encryption is performed. -If the key is less than 8 characters long, the flag has no effect. - -=item B<-e> - -Encrypt data using an 8 byte key in a manner compatible with sunOS -des(1). - -=item B<-E> - -Encrypt data using a key of nearly unlimited length (1024 bytes). -This will product a more secure encryption. - -=item B<-d> - -Decrypt data that was encrypted with the B<-e> option. - -=item B<-D> - -Decrypt data that was encrypted with the B<-E> option. - -=item B<-c> - -Generate a 16 character hexadecimal cbc checksum and output this to -stderr. -If a filename was specified after the -B<-c> -option, the checksum is output to that file. -The checksum is generated using a key generated in a sunOS compatible -manner. - -=item B<-C> - -A cbc checksum is generated in the same manner as described for the -B<-c> -option but the DES key is generated in the same manner as used for the -B<-E> -and -B<-D> -options - -=item B<-f> - -Does nothing - allowed for compatibility with sunOS des(1) command. - -=item B<-s> - -Does nothing - allowed for compatibility with sunOS des(1) command. - -=item B<-k> I - -Use the encryption -I -specified. - -=item B<-h> - -The -I -is assumed to be a 16 character hexadecimal number. -If the -B<-3> -option is used the key is assumed to be a 32 character hexadecimal -number. - -=item B<-u> - -This flag is used to read and write uuencoded files. If decrypting, -the input file is assumed to contain uuencoded, DES encrypted data. -If encrypting, the characters following the B<-u> are used as the name of -the uuencoded file to embed in the begin line of the uuencoded -output. If there is no name specified after the B<-u>, the name text.des -will be embedded in the header. - -=head1 SEE ALSO - -ps(1), -L - -=head1 BUGS - -The problem with using the -B<-e> -option is the short key length. -It would be better to use a real 56-bit key rather than an -ASCII-based 56-bit pattern. Knowing that the key was derived from ASCII -radically reduces the time necessary for a brute-force cryptographic attack. -My attempt to remove this problem is to add an alternative text-key to -DES-key function. This alternative function (accessed via -B<-E>, B<-D>, B<-S> -and -B<-3>) -uses DES to help generate the key. - -Be carefully when using the B<-u> option. Doing B I will -not decrypt filename (the B<-u> option will gobble the B<-d> option). - -The VMS operating system operates in a world where files are always a -multiple of 512 bytes. This causes problems when encrypted data is -send from Unix to VMS since a 88 byte file will suddenly be padded -with 424 null bytes. To get around this problem, use the B<-u> option -to uuencode the data before it is send to the VMS system. - -=head1 AUTHOR - -Eric Young (eay@cryptsoft.com) - -=cut diff --git a/drivers/builtin_openssl2/crypto/des/des_locl.h b/drivers/builtin_openssl2/crypto/des/des_locl.h index 1a8e41dd29..23ea9d32a7 100644 --- a/drivers/builtin_openssl2/crypto/des/des_locl.h +++ b/drivers/builtin_openssl2/crypto/des/des_locl.h @@ -162,8 +162,10 @@ } \ } -# if (defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)) || defined(__ICC) +# if (defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)) # define ROTATE(a,n) (_lrotr(a,n)) +# elif defined(__ICC) +# define ROTATE(a,n) (_rotr(a,n)) # elif defined(__GNUC__) && __GNUC__>=2 && !defined(__STRICT_ANSI__) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC) # if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) # define ROTATE(a,n) ({ register unsigned int ret; \ diff --git a/drivers/builtin_openssl2/crypto/des/destest.c b/drivers/builtin_openssl2/crypto/des/destest.c deleted file mode 100644 index c6be342038..0000000000 --- a/drivers/builtin_openssl2/crypto/des/destest.c +++ /dev/null @@ -1,929 +0,0 @@ -/* crypto/des/destest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include - -#include -#if defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_WIN16) || defined(OPENSSL_SYS_WINDOWS) -# ifndef OPENSSL_SYS_MSDOS -# define OPENSSL_SYS_MSDOS -# endif -#endif - -#ifndef OPENSSL_SYS_MSDOS -# if !defined(OPENSSL_SYS_VMS) || defined(OPENSSL_SYS_VMS_DECC) -# include OPENSSL_UNISTD -# endif -#else -# include -#endif -#include - -#ifdef OPENSSL_NO_DES -int main(int argc, char *argv[]) -{ - printf("No DES support\n"); - return (0); -} -#else -# include - -# define crypt(c,s) (DES_crypt((c),(s))) - -/* tisk tisk - the test keys don't all have odd parity :-( */ -/* test data */ -# define NUM_TESTS 34 -static unsigned char key_data[NUM_TESTS][8] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, - {0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10}, - {0x7C, 0xA1, 0x10, 0x45, 0x4A, 0x1A, 0x6E, 0x57}, - {0x01, 0x31, 0xD9, 0x61, 0x9D, 0xC1, 0x37, 0x6E}, - {0x07, 0xA1, 0x13, 0x3E, 0x4A, 0x0B, 0x26, 0x86}, - {0x38, 0x49, 0x67, 0x4C, 0x26, 0x02, 0x31, 0x9E}, - {0x04, 0xB9, 0x15, 0xBA, 0x43, 0xFE, 0xB5, 0xB6}, - {0x01, 0x13, 0xB9, 0x70, 0xFD, 0x34, 0xF2, 0xCE}, - {0x01, 0x70, 0xF1, 0x75, 0x46, 0x8F, 0xB5, 0xE6}, - {0x43, 0x29, 0x7F, 0xAD, 0x38, 0xE3, 0x73, 0xFE}, - {0x07, 0xA7, 0x13, 0x70, 0x45, 0xDA, 0x2A, 0x16}, - {0x04, 0x68, 0x91, 0x04, 0xC2, 0xFD, 0x3B, 0x2F}, - {0x37, 0xD0, 0x6B, 0xB5, 0x16, 0xCB, 0x75, 0x46}, - {0x1F, 0x08, 0x26, 0x0D, 0x1A, 0xC2, 0x46, 0x5E}, - {0x58, 0x40, 0x23, 0x64, 0x1A, 0xBA, 0x61, 0x76}, - {0x02, 0x58, 0x16, 0x16, 0x46, 0x29, 0xB0, 0x07}, - {0x49, 0x79, 0x3E, 0xBC, 0x79, 0xB3, 0x25, 0x8F}, - {0x4F, 0xB0, 0x5E, 0x15, 0x15, 0xAB, 0x73, 0xA7}, - {0x49, 0xE9, 0x5D, 0x6D, 0x4C, 0xA2, 0x29, 0xBF}, - {0x01, 0x83, 0x10, 0xDC, 0x40, 0x9B, 0x26, 0xD6}, - {0x1C, 0x58, 0x7F, 0x1C, 0x13, 0x92, 0x4F, 0xEF}, - {0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01}, - {0x1F, 0x1F, 0x1F, 0x1F, 0x0E, 0x0E, 0x0E, 0x0E}, - {0xE0, 0xFE, 0xE0, 0xFE, 0xF1, 0xFE, 0xF1, 0xFE}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10} -}; - -static unsigned char plain_data[NUM_TESTS][8] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, - {0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, - {0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, - {0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0x01, 0xA1, 0xD6, 0xD0, 0x39, 0x77, 0x67, 0x42}, - {0x5C, 0xD5, 0x4C, 0xA8, 0x3D, 0xEF, 0x57, 0xDA}, - {0x02, 0x48, 0xD4, 0x38, 0x06, 0xF6, 0x71, 0x72}, - {0x51, 0x45, 0x4B, 0x58, 0x2D, 0xDF, 0x44, 0x0A}, - {0x42, 0xFD, 0x44, 0x30, 0x59, 0x57, 0x7F, 0xA2}, - {0x05, 0x9B, 0x5E, 0x08, 0x51, 0xCF, 0x14, 0x3A}, - {0x07, 0x56, 0xD8, 0xE0, 0x77, 0x47, 0x61, 0xD2}, - {0x76, 0x25, 0x14, 0xB8, 0x29, 0xBF, 0x48, 0x6A}, - {0x3B, 0xDD, 0x11, 0x90, 0x49, 0x37, 0x28, 0x02}, - {0x26, 0x95, 0x5F, 0x68, 0x35, 0xAF, 0x60, 0x9A}, - {0x16, 0x4D, 0x5E, 0x40, 0x4F, 0x27, 0x52, 0x32}, - {0x6B, 0x05, 0x6E, 0x18, 0x75, 0x9F, 0x5C, 0xCA}, - {0x00, 0x4B, 0xD6, 0xEF, 0x09, 0x17, 0x60, 0x62}, - {0x48, 0x0D, 0x39, 0x00, 0x6E, 0xE7, 0x62, 0xF2}, - {0x43, 0x75, 0x40, 0xC8, 0x69, 0x8F, 0x3C, 0xFA}, - {0x07, 0x2D, 0x43, 0xA0, 0x77, 0x07, 0x52, 0x92}, - {0x02, 0xFE, 0x55, 0x77, 0x81, 0x17, 0xF1, 0x2A}, - {0x1D, 0x9D, 0x5C, 0x50, 0x18, 0xF7, 0x28, 0xC2}, - {0x30, 0x55, 0x32, 0x28, 0x6D, 0x6F, 0x29, 0x5A}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF} -}; - -static unsigned char cipher_data[NUM_TESTS][8] = { - {0x8C, 0xA6, 0x4D, 0xE9, 0xC1, 0xB1, 0x23, 0xA7}, - {0x73, 0x59, 0xB2, 0x16, 0x3E, 0x4E, 0xDC, 0x58}, - {0x95, 0x8E, 0x6E, 0x62, 0x7A, 0x05, 0x55, 0x7B}, - {0xF4, 0x03, 0x79, 0xAB, 0x9E, 0x0E, 0xC5, 0x33}, - {0x17, 0x66, 0x8D, 0xFC, 0x72, 0x92, 0x53, 0x2D}, - {0x8A, 0x5A, 0xE1, 0xF8, 0x1A, 0xB8, 0xF2, 0xDD}, - {0x8C, 0xA6, 0x4D, 0xE9, 0xC1, 0xB1, 0x23, 0xA7}, - {0xED, 0x39, 0xD9, 0x50, 0xFA, 0x74, 0xBC, 0xC4}, - {0x69, 0x0F, 0x5B, 0x0D, 0x9A, 0x26, 0x93, 0x9B}, - {0x7A, 0x38, 0x9D, 0x10, 0x35, 0x4B, 0xD2, 0x71}, - {0x86, 0x8E, 0xBB, 0x51, 0xCA, 0xB4, 0x59, 0x9A}, - {0x71, 0x78, 0x87, 0x6E, 0x01, 0xF1, 0x9B, 0x2A}, - {0xAF, 0x37, 0xFB, 0x42, 0x1F, 0x8C, 0x40, 0x95}, - {0x86, 0xA5, 0x60, 0xF1, 0x0E, 0xC6, 0xD8, 0x5B}, - {0x0C, 0xD3, 0xDA, 0x02, 0x00, 0x21, 0xDC, 0x09}, - {0xEA, 0x67, 0x6B, 0x2C, 0xB7, 0xDB, 0x2B, 0x7A}, - {0xDF, 0xD6, 0x4A, 0x81, 0x5C, 0xAF, 0x1A, 0x0F}, - {0x5C, 0x51, 0x3C, 0x9C, 0x48, 0x86, 0xC0, 0x88}, - {0x0A, 0x2A, 0xEE, 0xAE, 0x3F, 0xF4, 0xAB, 0x77}, - {0xEF, 0x1B, 0xF0, 0x3E, 0x5D, 0xFA, 0x57, 0x5A}, - {0x88, 0xBF, 0x0D, 0xB6, 0xD7, 0x0D, 0xEE, 0x56}, - {0xA1, 0xF9, 0x91, 0x55, 0x41, 0x02, 0x0B, 0x56}, - {0x6F, 0xBF, 0x1C, 0xAF, 0xCF, 0xFD, 0x05, 0x56}, - {0x2F, 0x22, 0xE4, 0x9B, 0xAB, 0x7C, 0xA1, 0xAC}, - {0x5A, 0x6B, 0x61, 0x2C, 0xC2, 0x6C, 0xCE, 0x4A}, - {0x5F, 0x4C, 0x03, 0x8E, 0xD1, 0x2B, 0x2E, 0x41}, - {0x63, 0xFA, 0xC0, 0xD0, 0x34, 0xD9, 0xF7, 0x93}, - {0x61, 0x7B, 0x3A, 0x0C, 0xE8, 0xF0, 0x71, 0x00}, - {0xDB, 0x95, 0x86, 0x05, 0xF8, 0xC8, 0xC6, 0x06}, - {0xED, 0xBF, 0xD1, 0xC6, 0x6C, 0x29, 0xCC, 0xC7}, - {0x35, 0x55, 0x50, 0xB2, 0x15, 0x0E, 0x24, 0x51}, - {0xCA, 0xAA, 0xAF, 0x4D, 0xEA, 0xF1, 0xDB, 0xAE}, - {0xD5, 0xD4, 0x4F, 0xF7, 0x20, 0x68, 0x3D, 0x0D}, - {0x2A, 0x2B, 0xB0, 0x08, 0xDF, 0x97, 0xC2, 0xF2} -}; - -static unsigned char cipher_ecb2[NUM_TESTS - 1][8] = { - {0x92, 0x95, 0xB5, 0x9B, 0xB3, 0x84, 0x73, 0x6E}, - {0x19, 0x9E, 0x9D, 0x6D, 0xF3, 0x9A, 0xA8, 0x16}, - {0x2A, 0x4B, 0x4D, 0x24, 0x52, 0x43, 0x84, 0x27}, - {0x35, 0x84, 0x3C, 0x01, 0x9D, 0x18, 0xC5, 0xB6}, - {0x4A, 0x5B, 0x2F, 0x42, 0xAA, 0x77, 0x19, 0x25}, - {0xA0, 0x6B, 0xA9, 0xB8, 0xCA, 0x5B, 0x17, 0x8A}, - {0xAB, 0x9D, 0xB7, 0xFB, 0xED, 0x95, 0xF2, 0x74}, - {0x3D, 0x25, 0x6C, 0x23, 0xA7, 0x25, 0x2F, 0xD6}, - {0xB7, 0x6F, 0xAB, 0x4F, 0xBD, 0xBD, 0xB7, 0x67}, - {0x8F, 0x68, 0x27, 0xD6, 0x9C, 0xF4, 0x1A, 0x10}, - {0x82, 0x57, 0xA1, 0xD6, 0x50, 0x5E, 0x81, 0x85}, - {0xA2, 0x0F, 0x0A, 0xCD, 0x80, 0x89, 0x7D, 0xFA}, - {0xCD, 0x2A, 0x53, 0x3A, 0xDB, 0x0D, 0x7E, 0xF3}, - {0xD2, 0xC2, 0xBE, 0x27, 0xE8, 0x1B, 0x68, 0xE3}, - {0xE9, 0x24, 0xCF, 0x4F, 0x89, 0x3C, 0x5B, 0x0A}, - {0xA7, 0x18, 0xC3, 0x9F, 0xFA, 0x9F, 0xD7, 0x69}, - {0x77, 0x2C, 0x79, 0xB1, 0xD2, 0x31, 0x7E, 0xB1}, - {0x49, 0xAB, 0x92, 0x7F, 0xD0, 0x22, 0x00, 0xB7}, - {0xCE, 0x1C, 0x6C, 0x7D, 0x85, 0xE3, 0x4A, 0x6F}, - {0xBE, 0x91, 0xD6, 0xE1, 0x27, 0xB2, 0xE9, 0x87}, - {0x70, 0x28, 0xAE, 0x8F, 0xD1, 0xF5, 0x74, 0x1A}, - {0xAA, 0x37, 0x80, 0xBB, 0xF3, 0x22, 0x1D, 0xDE}, - {0xA6, 0xC4, 0xD2, 0x5E, 0x28, 0x93, 0xAC, 0xB3}, - {0x22, 0x07, 0x81, 0x5A, 0xE4, 0xB7, 0x1A, 0xAD}, - {0xDC, 0xCE, 0x05, 0xE7, 0x07, 0xBD, 0xF5, 0x84}, - {0x26, 0x1D, 0x39, 0x2C, 0xB3, 0xBA, 0xA5, 0x85}, - {0xB4, 0xF7, 0x0F, 0x72, 0xFB, 0x04, 0xF0, 0xDC}, - {0x95, 0xBA, 0xA9, 0x4E, 0x87, 0x36, 0xF2, 0x89}, - {0xD4, 0x07, 0x3A, 0xF1, 0x5A, 0x17, 0x82, 0x0E}, - {0xEF, 0x6F, 0xAF, 0xA7, 0x66, 0x1A, 0x7E, 0x89}, - {0xC1, 0x97, 0xF5, 0x58, 0x74, 0x8A, 0x20, 0xE7}, - {0x43, 0x34, 0xCF, 0xDA, 0x22, 0xC4, 0x86, 0xC8}, - {0x08, 0xD7, 0xB4, 0xFB, 0x62, 0x9D, 0x08, 0x85} -}; - -static unsigned char cbc_key[8] = - { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef }; -static unsigned char cbc2_key[8] = - { 0xf1, 0xe0, 0xd3, 0xc2, 0xb5, 0xa4, 0x97, 0x86 }; -static unsigned char cbc3_key[8] = - { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10 }; -static unsigned char cbc_iv[8] = - { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10 }; -/* - * Changed the following text constant to binary so it will work on ebcdic - * machines :-) - */ -/* static char cbc_data[40]="7654321 Now is the time for \0001"; */ -static unsigned char cbc_data[40] = { - 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x20, - 0x4E, 0x6F, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, - 0x68, 0x65, 0x20, 0x74, 0x69, 0x6D, 0x65, 0x20, - 0x66, 0x6F, 0x72, 0x20, 0x00, 0x31, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -}; - -static unsigned char cbc_ok[32] = { - 0xcc, 0xd1, 0x73, 0xff, 0xab, 0x20, 0x39, 0xf4, - 0xac, 0xd8, 0xae, 0xfd, 0xdf, 0xd8, 0xa1, 0xeb, - 0x46, 0x8e, 0x91, 0x15, 0x78, 0x88, 0xba, 0x68, - 0x1d, 0x26, 0x93, 0x97, 0xf7, 0xfe, 0x62, 0xb4 -}; - -# ifdef SCREW_THE_PARITY -# error "SCREW_THE_PARITY is not ment to be defined." -# error "Original vectors are preserved for reference only." -static unsigned char cbc2_key[8] = - { 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87 }; -static unsigned char xcbc_ok[32] = { - 0x86, 0x74, 0x81, 0x0D, 0x61, 0xA4, 0xA5, 0x48, - 0xB9, 0x93, 0x03, 0xE1, 0xB8, 0xBB, 0xBD, 0xBD, - 0x64, 0x30, 0x0B, 0xB9, 0x06, 0x65, 0x81, 0x76, - 0x04, 0x1D, 0x77, 0x62, 0x17, 0xCA, 0x2B, 0xD2, -}; -# else -static unsigned char xcbc_ok[32] = { - 0x84, 0x6B, 0x29, 0x14, 0x85, 0x1E, 0x9A, 0x29, - 0x54, 0x73, 0x2F, 0x8A, 0xA0, 0xA6, 0x11, 0xC1, - 0x15, 0xCD, 0xC2, 0xD7, 0x95, 0x1B, 0x10, 0x53, - 0xA6, 0x3C, 0x5E, 0x03, 0xB2, 0x1A, 0xA3, 0xC4, -}; -# endif - -static unsigned char cbc3_ok[32] = { - 0x3F, 0xE3, 0x01, 0xC9, 0x62, 0xAC, 0x01, 0xD0, - 0x22, 0x13, 0x76, 0x3C, 0x1C, 0xBD, 0x4C, 0xDC, - 0x79, 0x96, 0x57, 0xC0, 0x64, 0xEC, 0xF5, 0xD4, - 0x1C, 0x67, 0x38, 0x12, 0xCF, 0xDE, 0x96, 0x75 -}; - -static unsigned char pcbc_ok[32] = { - 0xcc, 0xd1, 0x73, 0xff, 0xab, 0x20, 0x39, 0xf4, - 0x6d, 0xec, 0xb4, 0x70, 0xa0, 0xe5, 0x6b, 0x15, - 0xae, 0xa6, 0xbf, 0x61, 0xed, 0x7d, 0x9c, 0x9f, - 0xf7, 0x17, 0x46, 0x3b, 0x8a, 0xb3, 0xcc, 0x88 -}; - -static unsigned char cfb_key[8] = - { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef }; -static unsigned char cfb_iv[8] = - { 0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef }; -static unsigned char cfb_buf1[40], cfb_buf2[40], cfb_tmp[8]; -static unsigned char plain[24] = { - 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, - 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, - 0x69, 0x6d, 0x65, 0x20, 0x66, 0x6f, - 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20 -}; - -static unsigned char cfb_cipher8[24] = { - 0xf3, 0x1f, 0xda, 0x07, 0x01, 0x14, 0x62, 0xee, 0x18, 0x7f, 0x43, 0xd8, - 0x0a, 0x7c, 0xd9, 0xb5, 0xb0, 0xd2, 0x90, 0xda, 0x6e, 0x5b, 0x9a, 0x87 -}; - -static unsigned char cfb_cipher16[24] = { - 0xF3, 0x09, 0x87, 0x87, 0x7F, 0x57, 0xF7, 0x3C, 0x36, 0xB6, 0xDB, 0x70, - 0xD8, 0xD5, 0x34, 0x19, 0xD3, 0x86, 0xB2, 0x23, 0xB7, 0xB2, 0xAD, 0x1B -}; - -static unsigned char cfb_cipher32[24] = { - 0xF3, 0x09, 0x62, 0x49, 0xA4, 0xDF, 0xA4, 0x9F, 0x33, 0xDC, 0x7B, 0xAD, - 0x4C, 0xC8, 0x9F, 0x64, 0xE4, 0x53, 0xE5, 0xEC, 0x67, 0x20, 0xDA, 0xB6 -}; - -static unsigned char cfb_cipher48[24] = { - 0xF3, 0x09, 0x62, 0x49, 0xC7, 0xF4, 0x30, 0xB5, 0x15, 0xEC, 0xBB, 0x85, - 0x97, 0x5A, 0x13, 0x8C, 0x68, 0x60, 0xE2, 0x38, 0x34, 0x3C, 0xDC, 0x1F -}; - -static unsigned char cfb_cipher64[24] = { - 0xF3, 0x09, 0x62, 0x49, 0xC7, 0xF4, 0x6E, 0x51, 0xA6, 0x9E, 0x83, 0x9B, - 0x1A, 0x92, 0xF7, 0x84, 0x03, 0x46, 0x71, 0x33, 0x89, 0x8E, 0xA6, 0x22 -}; - -static unsigned char ofb_key[8] = - { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef }; -static unsigned char ofb_iv[8] = - { 0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef }; -static unsigned char ofb_buf1[24], ofb_buf2[24], ofb_tmp[8]; -static unsigned char ofb_cipher[24] = { - 0xf3, 0x09, 0x62, 0x49, 0xc7, 0xf4, 0x6e, 0x51, - 0x35, 0xf2, 0x4a, 0x24, 0x2e, 0xeb, 0x3d, 0x3f, - 0x3d, 0x6d, 0x5b, 0xe3, 0x25, 0x5a, 0xf8, 0xc3 -}; - -# if 0 -static DES_LONG cbc_cksum_ret = 0xB462FEF7L; -# else -static DES_LONG cbc_cksum_ret = 0xF7FE62B4L; -# endif -static unsigned char cbc_cksum_data[8] = - { 0x1D, 0x26, 0x93, 0x97, 0xf7, 0xfe, 0x62, 0xb4 }; - -static char *pt(unsigned char *p); -static int cfb_test(int bits, unsigned char *cfb_cipher); -static int cfb64_test(unsigned char *cfb_cipher); -static int ede_cfb64_test(unsigned char *cfb_cipher); -int main(int argc, char *argv[]) -{ - int j, err = 0; - unsigned int i; - des_cblock in, out, outin, iv3, iv2; - des_key_schedule ks, ks2, ks3; - unsigned char cbc_in[40]; - unsigned char cbc_out[40]; - DES_LONG cs; - unsigned char cret[8]; -# ifdef _CRAY - struct { - int a:32; - int b:32; - } lqret[2]; -# else - DES_LONG lqret[4]; -# endif - int num; - char *str; - -# ifndef OPENSSL_NO_DESCBCM - printf("Doing cbcm\n"); - if ((j = DES_set_key_checked(&cbc_key, &ks)) != 0) { - printf("Key error %d\n", j); - err = 1; - } - if ((j = DES_set_key_checked(&cbc2_key, &ks2)) != 0) { - printf("Key error %d\n", j); - err = 1; - } - if ((j = DES_set_key_checked(&cbc3_key, &ks3)) != 0) { - printf("Key error %d\n", j); - err = 1; - } - memset(cbc_out, 0, 40); - memset(cbc_in, 0, 40); - i = strlen((char *)cbc_data) + 1; - /* i=((i+7)/8)*8; */ - memcpy(iv3, cbc_iv, sizeof(cbc_iv)); - memset(iv2, '\0', sizeof iv2); - - DES_ede3_cbcm_encrypt(cbc_data, cbc_out, 16L, &ks, &ks2, &ks3, &iv3, &iv2, - DES_ENCRYPT); - DES_ede3_cbcm_encrypt(&cbc_data[16], &cbc_out[16], i - 16, &ks, &ks2, - &ks3, &iv3, &iv2, DES_ENCRYPT); -/*- if (memcmp(cbc_out,cbc3_ok, - (unsigned int)(strlen((char *)cbc_data)+1+7)/8*8) != 0) - { - printf("des_ede3_cbc_encrypt encrypt error\n"); - err=1; - } -*/ - memcpy(iv3, cbc_iv, sizeof(cbc_iv)); - memset(iv2, '\0', sizeof iv2); - DES_ede3_cbcm_encrypt(cbc_out, cbc_in, i, &ks, &ks2, &ks3, &iv3, &iv2, - DES_DECRYPT); - if (memcmp(cbc_in, cbc_data, strlen((char *)cbc_data) + 1) != 0) { - unsigned int n; - - printf("des_ede3_cbcm_encrypt decrypt error\n"); - for (n = 0; n < i; ++n) - printf(" %02x", cbc_data[n]); - printf("\n"); - for (n = 0; n < i; ++n) - printf(" %02x", cbc_in[n]); - printf("\n"); - err = 1; - } -# endif - - printf("Doing ecb\n"); - for (i = 0; i < NUM_TESTS; i++) { - DES_set_key_unchecked(&key_data[i], &ks); - memcpy(in, plain_data[i], 8); - memset(out, 0, 8); - memset(outin, 0, 8); - des_ecb_encrypt(&in, &out, ks, DES_ENCRYPT); - des_ecb_encrypt(&out, &outin, ks, DES_DECRYPT); - - if (memcmp(out, cipher_data[i], 8) != 0) { - printf("Encryption error %2d\nk=%s p=%s o=%s act=%s\n", - i + 1, pt(key_data[i]), pt(in), pt(cipher_data[i]), - pt(out)); - err = 1; - } - if (memcmp(in, outin, 8) != 0) { - printf("Decryption error %2d\nk=%s p=%s o=%s act=%s\n", - i + 1, pt(key_data[i]), pt(out), pt(in), pt(outin)); - err = 1; - } - } - -# ifndef LIBDES_LIT - printf("Doing ede ecb\n"); - for (i = 0; i < (NUM_TESTS - 2); i++) { - DES_set_key_unchecked(&key_data[i], &ks); - DES_set_key_unchecked(&key_data[i + 1], &ks2); - DES_set_key_unchecked(&key_data[i + 2], &ks3); - memcpy(in, plain_data[i], 8); - memset(out, 0, 8); - memset(outin, 0, 8); - des_ecb2_encrypt(&in, &out, ks, ks2, DES_ENCRYPT); - des_ecb2_encrypt(&out, &outin, ks, ks2, DES_DECRYPT); - - if (memcmp(out, cipher_ecb2[i], 8) != 0) { - printf("Encryption error %2d\nk=%s p=%s o=%s act=%s\n", - i + 1, pt(key_data[i]), pt(in), pt(cipher_ecb2[i]), - pt(out)); - err = 1; - } - if (memcmp(in, outin, 8) != 0) { - printf("Decryption error %2d\nk=%s p=%s o=%s act=%s\n", - i + 1, pt(key_data[i]), pt(out), pt(in), pt(outin)); - err = 1; - } - } -# endif - - printf("Doing cbc\n"); - if ((j = DES_set_key_checked(&cbc_key, &ks)) != 0) { - printf("Key error %d\n", j); - err = 1; - } - memset(cbc_out, 0, 40); - memset(cbc_in, 0, 40); - memcpy(iv3, cbc_iv, sizeof(cbc_iv)); - des_ncbc_encrypt(cbc_data, cbc_out, strlen((char *)cbc_data) + 1, ks, - &iv3, DES_ENCRYPT); - if (memcmp(cbc_out, cbc_ok, 32) != 0) { - printf("cbc_encrypt encrypt error\n"); - err = 1; - } - - memcpy(iv3, cbc_iv, sizeof(cbc_iv)); - des_ncbc_encrypt(cbc_out, cbc_in, strlen((char *)cbc_data) + 1, ks, - &iv3, DES_DECRYPT); - if (memcmp(cbc_in, cbc_data, strlen((char *)cbc_data)) != 0) { - printf("cbc_encrypt decrypt error\n"); - err = 1; - } -# ifndef LIBDES_LIT - printf("Doing desx cbc\n"); - if ((j = DES_set_key_checked(&cbc_key, &ks)) != 0) { - printf("Key error %d\n", j); - err = 1; - } - memset(cbc_out, 0, 40); - memset(cbc_in, 0, 40); - memcpy(iv3, cbc_iv, sizeof(cbc_iv)); - des_xcbc_encrypt(cbc_data, cbc_out, strlen((char *)cbc_data) + 1, ks, - &iv3, &cbc2_key, &cbc3_key, DES_ENCRYPT); - if (memcmp(cbc_out, xcbc_ok, 32) != 0) { - printf("des_xcbc_encrypt encrypt error\n"); - err = 1; - } - memcpy(iv3, cbc_iv, sizeof(cbc_iv)); - des_xcbc_encrypt(cbc_out, cbc_in, strlen((char *)cbc_data) + 1, ks, - &iv3, &cbc2_key, &cbc3_key, DES_DECRYPT); - if (memcmp(cbc_in, cbc_data, strlen((char *)cbc_data) + 1) != 0) { - printf("des_xcbc_encrypt decrypt error\n"); - err = 1; - } -# endif - - printf("Doing ede cbc\n"); - if ((j = DES_set_key_checked(&cbc_key, &ks)) != 0) { - printf("Key error %d\n", j); - err = 1; - } - if ((j = DES_set_key_checked(&cbc2_key, &ks2)) != 0) { - printf("Key error %d\n", j); - err = 1; - } - if ((j = DES_set_key_checked(&cbc3_key, &ks3)) != 0) { - printf("Key error %d\n", j); - err = 1; - } - memset(cbc_out, 0, 40); - memset(cbc_in, 0, 40); - i = strlen((char *)cbc_data) + 1; - /* i=((i+7)/8)*8; */ - memcpy(iv3, cbc_iv, sizeof(cbc_iv)); - - des_ede3_cbc_encrypt(cbc_data, cbc_out, 16L, ks, ks2, ks3, &iv3, - DES_ENCRYPT); - des_ede3_cbc_encrypt(&(cbc_data[16]), &(cbc_out[16]), i - 16, ks, ks2, - ks3, &iv3, DES_ENCRYPT); - if (memcmp - (cbc_out, cbc3_ok, - (unsigned int)(strlen((char *)cbc_data) + 1 + 7) / 8 * 8) != 0) { - unsigned int n; - - printf("des_ede3_cbc_encrypt encrypt error\n"); - for (n = 0; n < i; ++n) - printf(" %02x", cbc_out[n]); - printf("\n"); - for (n = 0; n < i; ++n) - printf(" %02x", cbc3_ok[n]); - printf("\n"); - err = 1; - } - - memcpy(iv3, cbc_iv, sizeof(cbc_iv)); - des_ede3_cbc_encrypt(cbc_out, cbc_in, i, ks, ks2, ks3, &iv3, DES_DECRYPT); - if (memcmp(cbc_in, cbc_data, strlen((char *)cbc_data) + 1) != 0) { - unsigned int n; - - printf("des_ede3_cbc_encrypt decrypt error\n"); - for (n = 0; n < i; ++n) - printf(" %02x", cbc_data[n]); - printf("\n"); - for (n = 0; n < i; ++n) - printf(" %02x", cbc_in[n]); - printf("\n"); - err = 1; - } -# ifndef LIBDES_LIT - printf("Doing pcbc\n"); - if ((j = DES_set_key_checked(&cbc_key, &ks)) != 0) { - printf("Key error %d\n", j); - err = 1; - } - memset(cbc_out, 0, 40); - memset(cbc_in, 0, 40); - des_pcbc_encrypt(cbc_data, cbc_out, strlen((char *)cbc_data) + 1, ks, - &cbc_iv, DES_ENCRYPT); - if (memcmp(cbc_out, pcbc_ok, 32) != 0) { - printf("pcbc_encrypt encrypt error\n"); - err = 1; - } - des_pcbc_encrypt(cbc_out, cbc_in, strlen((char *)cbc_data) + 1, ks, - &cbc_iv, DES_DECRYPT); - if (memcmp(cbc_in, cbc_data, strlen((char *)cbc_data) + 1) != 0) { - printf("pcbc_encrypt decrypt error\n"); - err = 1; - } - - printf("Doing "); - printf("cfb8 "); - err += cfb_test(8, cfb_cipher8); - printf("cfb16 "); - err += cfb_test(16, cfb_cipher16); - printf("cfb32 "); - err += cfb_test(32, cfb_cipher32); - printf("cfb48 "); - err += cfb_test(48, cfb_cipher48); - printf("cfb64 "); - err += cfb_test(64, cfb_cipher64); - - printf("cfb64() "); - err += cfb64_test(cfb_cipher64); - - memcpy(cfb_tmp, cfb_iv, sizeof(cfb_iv)); - for (i = 0; i < sizeof(plain); i++) - des_cfb_encrypt(&(plain[i]), &(cfb_buf1[i]), - 8, 1, ks, &cfb_tmp, DES_ENCRYPT); - if (memcmp(cfb_cipher8, cfb_buf1, sizeof(plain)) != 0) { - printf("cfb_encrypt small encrypt error\n"); - err = 1; - } - - memcpy(cfb_tmp, cfb_iv, sizeof(cfb_iv)); - for (i = 0; i < sizeof(plain); i++) - des_cfb_encrypt(&(cfb_buf1[i]), &(cfb_buf2[i]), - 8, 1, ks, &cfb_tmp, DES_DECRYPT); - if (memcmp(plain, cfb_buf2, sizeof(plain)) != 0) { - printf("cfb_encrypt small decrypt error\n"); - err = 1; - } - - printf("ede_cfb64() "); - err += ede_cfb64_test(cfb_cipher64); - - printf("done\n"); - - printf("Doing ofb\n"); - DES_set_key_checked(&ofb_key, &ks); - memcpy(ofb_tmp, ofb_iv, sizeof(ofb_iv)); - des_ofb_encrypt(plain, ofb_buf1, 64, sizeof(plain) / 8, ks, &ofb_tmp); - if (memcmp(ofb_cipher, ofb_buf1, sizeof(ofb_buf1)) != 0) { - printf("ofb_encrypt encrypt error\n"); - printf("%02X %02X %02X %02X %02X %02X %02X %02X\n", - ofb_buf1[8 + 0], ofb_buf1[8 + 1], ofb_buf1[8 + 2], - ofb_buf1[8 + 3], ofb_buf1[8 + 4], ofb_buf1[8 + 5], - ofb_buf1[8 + 6], ofb_buf1[8 + 7]); - printf("%02X %02X %02X %02X %02X %02X %02X %02X\n", ofb_buf1[8 + 0], - ofb_cipher[8 + 1], ofb_cipher[8 + 2], ofb_cipher[8 + 3], - ofb_buf1[8 + 4], ofb_cipher[8 + 5], ofb_cipher[8 + 6], - ofb_cipher[8 + 7]); - err = 1; - } - memcpy(ofb_tmp, ofb_iv, sizeof(ofb_iv)); - des_ofb_encrypt(ofb_buf1, ofb_buf2, 64, sizeof(ofb_buf1) / 8, ks, - &ofb_tmp); - if (memcmp(plain, ofb_buf2, sizeof(ofb_buf2)) != 0) { - printf("ofb_encrypt decrypt error\n"); - printf("%02X %02X %02X %02X %02X %02X %02X %02X\n", - ofb_buf2[8 + 0], ofb_buf2[8 + 1], ofb_buf2[8 + 2], - ofb_buf2[8 + 3], ofb_buf2[8 + 4], ofb_buf2[8 + 5], - ofb_buf2[8 + 6], ofb_buf2[8 + 7]); - printf("%02X %02X %02X %02X %02X %02X %02X %02X\n", plain[8 + 0], - plain[8 + 1], plain[8 + 2], plain[8 + 3], plain[8 + 4], - plain[8 + 5], plain[8 + 6], plain[8 + 7]); - err = 1; - } - - printf("Doing ofb64\n"); - DES_set_key_checked(&ofb_key, &ks); - memcpy(ofb_tmp, ofb_iv, sizeof(ofb_iv)); - memset(ofb_buf1, 0, sizeof(ofb_buf1)); - memset(ofb_buf2, 0, sizeof(ofb_buf1)); - num = 0; - for (i = 0; i < sizeof(plain); i++) { - des_ofb64_encrypt(&(plain[i]), &(ofb_buf1[i]), 1, ks, &ofb_tmp, &num); - } - if (memcmp(ofb_cipher, ofb_buf1, sizeof(ofb_buf1)) != 0) { - printf("ofb64_encrypt encrypt error\n"); - err = 1; - } - memcpy(ofb_tmp, ofb_iv, sizeof(ofb_iv)); - num = 0; - des_ofb64_encrypt(ofb_buf1, ofb_buf2, sizeof(ofb_buf1), ks, &ofb_tmp, - &num); - if (memcmp(plain, ofb_buf2, sizeof(ofb_buf2)) != 0) { - printf("ofb64_encrypt decrypt error\n"); - err = 1; - } - - printf("Doing ede_ofb64\n"); - DES_set_key_checked(&ofb_key, &ks); - memcpy(ofb_tmp, ofb_iv, sizeof(ofb_iv)); - memset(ofb_buf1, 0, sizeof(ofb_buf1)); - memset(ofb_buf2, 0, sizeof(ofb_buf1)); - num = 0; - for (i = 0; i < sizeof(plain); i++) { - des_ede3_ofb64_encrypt(&(plain[i]), &(ofb_buf1[i]), 1, ks, ks, - ks, &ofb_tmp, &num); - } - if (memcmp(ofb_cipher, ofb_buf1, sizeof(ofb_buf1)) != 0) { - printf("ede_ofb64_encrypt encrypt error\n"); - err = 1; - } - memcpy(ofb_tmp, ofb_iv, sizeof(ofb_iv)); - num = 0; - des_ede3_ofb64_encrypt(ofb_buf1, ofb_buf2, sizeof(ofb_buf1), ks, ks, ks, - &ofb_tmp, &num); - if (memcmp(plain, ofb_buf2, sizeof(ofb_buf2)) != 0) { - printf("ede_ofb64_encrypt decrypt error\n"); - err = 1; - } - - printf("Doing cbc_cksum\n"); - DES_set_key_checked(&cbc_key, &ks); - cs = des_cbc_cksum(cbc_data, &cret, strlen((char *)cbc_data), ks, - &cbc_iv); - if (cs != cbc_cksum_ret) { - printf("bad return value (%08lX), should be %08lX\n", - (unsigned long)cs, (unsigned long)cbc_cksum_ret); - err = 1; - } - if (memcmp(cret, cbc_cksum_data, 8) != 0) { - printf("bad cbc_cksum block returned\n"); - err = 1; - } - - printf("Doing quad_cksum\n"); - cs = des_quad_cksum(cbc_data, (des_cblock *)lqret, - (long)strlen((char *)cbc_data), 2, - (des_cblock *)cbc_iv); - if (cs != 0x70d7a63aL) { - printf("quad_cksum error, ret %08lx should be 70d7a63a\n", - (unsigned long)cs); - err = 1; - } -# ifdef _CRAY - if (lqret[0].a != 0x327eba8dL) { - printf("quad_cksum error, out[0] %08lx is not %08lx\n", - (unsigned long)lqret[0].a, 0x327eba8dUL); - err = 1; - } - if (lqret[0].b != 0x201a49ccL) { - printf("quad_cksum error, out[1] %08lx is not %08lx\n", - (unsigned long)lqret[0].b, 0x201a49ccUL); - err = 1; - } - if (lqret[1].a != 0x70d7a63aL) { - printf("quad_cksum error, out[2] %08lx is not %08lx\n", - (unsigned long)lqret[1].a, 0x70d7a63aUL); - err = 1; - } - if (lqret[1].b != 0x501c2c26L) { - printf("quad_cksum error, out[3] %08lx is not %08lx\n", - (unsigned long)lqret[1].b, 0x501c2c26UL); - err = 1; - } -# else - if (lqret[0] != 0x327eba8dL) { - printf("quad_cksum error, out[0] %08lx is not %08lx\n", - (unsigned long)lqret[0], 0x327eba8dUL); - err = 1; - } - if (lqret[1] != 0x201a49ccL) { - printf("quad_cksum error, out[1] %08lx is not %08lx\n", - (unsigned long)lqret[1], 0x201a49ccUL); - err = 1; - } - if (lqret[2] != 0x70d7a63aL) { - printf("quad_cksum error, out[2] %08lx is not %08lx\n", - (unsigned long)lqret[2], 0x70d7a63aUL); - err = 1; - } - if (lqret[3] != 0x501c2c26L) { - printf("quad_cksum error, out[3] %08lx is not %08lx\n", - (unsigned long)lqret[3], 0x501c2c26UL); - err = 1; - } -# endif -# endif - - printf("input word alignment test"); - for (i = 0; i < 4; i++) { - printf(" %d", i); - des_ncbc_encrypt(&(cbc_out[i]), cbc_in, - strlen((char *)cbc_data) + 1, ks, - &cbc_iv, DES_ENCRYPT); - } - printf("\noutput word alignment test"); - for (i = 0; i < 4; i++) { - printf(" %d", i); - des_ncbc_encrypt(cbc_out, &(cbc_in[i]), - strlen((char *)cbc_data) + 1, ks, - &cbc_iv, DES_ENCRYPT); - } - printf("\n"); - printf("fast crypt test "); - str = crypt("testing", "ef"); - if (strcmp("efGnQx2725bI2", str) != 0) { - printf("fast crypt error, %s should be efGnQx2725bI2\n", str); - err = 1; - } - str = crypt("bca76;23", "yA"); - if (strcmp("yA1Rp/1hZXIJk", str) != 0) { - printf("fast crypt error, %s should be yA1Rp/1hZXIJk\n", str); - err = 1; - } -# ifdef OPENSSL_SYS_NETWARE - if (err) - printf("ERROR: %d\n", err); -# endif - printf("\n"); - return (err); -} - -static char *pt(unsigned char *p) -{ - static char bufs[10][20]; - static int bnum = 0; - char *ret; - int i; - static char *f = "0123456789ABCDEF"; - - ret = &(bufs[bnum++][0]); - bnum %= 10; - for (i = 0; i < 8; i++) { - ret[i * 2] = f[(p[i] >> 4) & 0xf]; - ret[i * 2 + 1] = f[p[i] & 0xf]; - } - ret[16] = '\0'; - return (ret); -} - -# ifndef LIBDES_LIT - -static int cfb_test(int bits, unsigned char *cfb_cipher) -{ - des_key_schedule ks; - int i, err = 0; - - DES_set_key_checked(&cfb_key, &ks); - memcpy(cfb_tmp, cfb_iv, sizeof(cfb_iv)); - des_cfb_encrypt(plain, cfb_buf1, bits, sizeof(plain), ks, &cfb_tmp, - DES_ENCRYPT); - if (memcmp(cfb_cipher, cfb_buf1, sizeof(plain)) != 0) { - err = 1; - printf("cfb_encrypt encrypt error\n"); - for (i = 0; i < 24; i += 8) - printf("%s\n", pt(&(cfb_buf1[i]))); - } - memcpy(cfb_tmp, cfb_iv, sizeof(cfb_iv)); - des_cfb_encrypt(cfb_buf1, cfb_buf2, bits, sizeof(plain), ks, &cfb_tmp, - DES_DECRYPT); - if (memcmp(plain, cfb_buf2, sizeof(plain)) != 0) { - err = 1; - printf("cfb_encrypt decrypt error\n"); - for (i = 0; i < 24; i += 8) - printf("%s\n", pt(&(cfb_buf1[i]))); - } - return (err); -} - -static int cfb64_test(unsigned char *cfb_cipher) -{ - des_key_schedule ks; - int err = 0, i, n; - - DES_set_key_checked(&cfb_key, &ks); - memcpy(cfb_tmp, cfb_iv, sizeof(cfb_iv)); - n = 0; - des_cfb64_encrypt(plain, cfb_buf1, 12, ks, &cfb_tmp, &n, DES_ENCRYPT); - des_cfb64_encrypt(&(plain[12]), &(cfb_buf1[12]), sizeof(plain) - 12, ks, - &cfb_tmp, &n, DES_ENCRYPT); - if (memcmp(cfb_cipher, cfb_buf1, sizeof(plain)) != 0) { - err = 1; - printf("cfb_encrypt encrypt error\n"); - for (i = 0; i < 24; i += 8) - printf("%s\n", pt(&(cfb_buf1[i]))); - } - memcpy(cfb_tmp, cfb_iv, sizeof(cfb_iv)); - n = 0; - des_cfb64_encrypt(cfb_buf1, cfb_buf2, 17, ks, &cfb_tmp, &n, DES_DECRYPT); - des_cfb64_encrypt(&(cfb_buf1[17]), &(cfb_buf2[17]), - sizeof(plain) - 17, ks, &cfb_tmp, &n, DES_DECRYPT); - if (memcmp(plain, cfb_buf2, sizeof(plain)) != 0) { - err = 1; - printf("cfb_encrypt decrypt error\n"); - for (i = 0; i < 24; i += 8) - printf("%s\n", pt(&(cfb_buf2[i]))); - } - return (err); -} - -static int ede_cfb64_test(unsigned char *cfb_cipher) -{ - des_key_schedule ks; - int err = 0, i, n; - - DES_set_key_checked(&cfb_key, &ks); - memcpy(cfb_tmp, cfb_iv, sizeof(cfb_iv)); - n = 0; - des_ede3_cfb64_encrypt(plain, cfb_buf1, 12, ks, ks, ks, &cfb_tmp, &n, - DES_ENCRYPT); - des_ede3_cfb64_encrypt(&(plain[12]), &(cfb_buf1[12]), - sizeof(plain) - 12, ks, ks, ks, - &cfb_tmp, &n, DES_ENCRYPT); - if (memcmp(cfb_cipher, cfb_buf1, sizeof(plain)) != 0) { - err = 1; - printf("ede_cfb_encrypt encrypt error\n"); - for (i = 0; i < 24; i += 8) - printf("%s\n", pt(&(cfb_buf1[i]))); - } - memcpy(cfb_tmp, cfb_iv, sizeof(cfb_iv)); - n = 0; - des_ede3_cfb64_encrypt(cfb_buf1, cfb_buf2, (long)17, ks, ks, ks, - &cfb_tmp, &n, DES_DECRYPT); - des_ede3_cfb64_encrypt(&(cfb_buf1[17]), &(cfb_buf2[17]), - sizeof(plain) - 17, ks, ks, ks, - &cfb_tmp, &n, DES_DECRYPT); - if (memcmp(plain, cfb_buf2, sizeof(plain)) != 0) { - err = 1; - printf("ede_cfb_encrypt decrypt error\n"); - for (i = 0; i < 24; i += 8) - printf("%s\n", pt(&(cfb_buf2[i]))); - } - return (err); -} - -# endif -#endif diff --git a/drivers/builtin_openssl2/crypto/des/read_pwd.c b/drivers/builtin_openssl2/crypto/des/read_pwd.c index 16ba0a9eb2..514a7063b4 100644 --- a/drivers/builtin_openssl2/crypto/des/read_pwd.c +++ b/drivers/builtin_openssl2/crypto/des/read_pwd.c @@ -172,7 +172,7 @@ # include #endif -#if defined(OPENSSL_SYS_MSDOS) && !defined(__CYGWIN32__) && !defined(OPENSSL_SYS_WINCE) +#if defined(OPENSSL_SYS_MSDOS) && !defined(OPENSSL_SYS_WINCE) # include # define fgets(a,b,c) noecho_fgets(a,b,c) #endif diff --git a/drivers/builtin_openssl2/crypto/dh/dh_ameth.c b/drivers/builtin_openssl2/crypto/dh/dh_ameth.c index 873eb2e22d..ac72468bd1 100644 --- a/drivers/builtin_openssl2/crypto/dh/dh_ameth.c +++ b/drivers/builtin_openssl2/crypto/dh/dh_ameth.c @@ -63,6 +63,31 @@ #include #include #include "asn1_locl.h" +#ifndef OPENSSL_NO_CMS +# include +#endif + +extern const EVP_PKEY_ASN1_METHOD dhx_asn1_meth; + +/* + * i2d/d2i like DH parameter functions which use the appropriate routine for + * PKCS#3 DH or X9.42 DH. + */ + +static DH *d2i_dhp(const EVP_PKEY *pkey, const unsigned char **pp, + long length) +{ + if (pkey->ameth == &dhx_asn1_meth) + return d2i_DHxparams(NULL, pp, length); + return d2i_DHparams(NULL, pp, length); +} + +static int i2d_dhp(const EVP_PKEY *pkey, const DH *a, unsigned char **pp) +{ + if (pkey->ameth == &dhx_asn1_meth) + return i2d_DHxparams(a, pp); + return i2d_DHparams(a, pp); +} static void int_dh_free(EVP_PKEY *pkey) { @@ -94,7 +119,7 @@ static int dh_pub_decode(EVP_PKEY *pkey, X509_PUBKEY *pubkey) pm = pstr->data; pmlen = pstr->length; - if (!(dh = d2i_DHparams(NULL, &pm, pmlen))) { + if (!(dh = d2i_dhp(pkey, &pm, pmlen))) { DHerr(DH_F_DH_PUB_DECODE, DH_R_DECODE_ERROR); goto err; } @@ -111,7 +136,7 @@ static int dh_pub_decode(EVP_PKEY *pkey, X509_PUBKEY *pubkey) } ASN1_INTEGER_free(public_key); - EVP_PKEY_assign_DH(pkey, dh); + EVP_PKEY_assign(pkey, pkey->ameth->pkey_id, dh); return 1; err: @@ -139,7 +164,7 @@ static int dh_pub_encode(X509_PUBKEY *pk, const EVP_PKEY *pkey) DHerr(DH_F_DH_PUB_ENCODE, ERR_R_MALLOC_FAILURE); goto err; } - str->length = i2d_DHparams(dh, &str->data); + str->length = i2d_dhp(pkey, dh, &str->data); if (str->length <= 0) { DHerr(DH_F_DH_PUB_ENCODE, ERR_R_MALLOC_FAILURE); goto err; @@ -159,7 +184,7 @@ static int dh_pub_encode(X509_PUBKEY *pk, const EVP_PKEY *pkey) goto err; } - if (X509_PUBKEY_set0_param(pk, OBJ_nid2obj(EVP_PKEY_DH), + if (X509_PUBKEY_set0_param(pk, OBJ_nid2obj(pkey->ameth->pkey_id), ptype, str, penc, penclen)) return 1; @@ -204,7 +229,7 @@ static int dh_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8) pstr = pval; pm = pstr->data; pmlen = pstr->length; - if (!(dh = d2i_DHparams(NULL, &pm, pmlen))) + if (!(dh = d2i_dhp(pkey, &pm, pmlen))) goto decerr; /* We have parameters now set private key */ if (!(dh->priv_key = ASN1_INTEGER_to_BN(privkey, NULL))) { @@ -215,7 +240,7 @@ static int dh_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8) if (!DH_generate_key(dh)) goto dherr; - EVP_PKEY_assign_DH(pkey, dh); + EVP_PKEY_assign(pkey, pkey->ameth->pkey_id, dh); ASN1_STRING_clear_free(privkey); @@ -243,7 +268,7 @@ static int dh_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey) goto err; } - params->length = i2d_DHparams(pkey->pkey.dh, ¶ms->data); + params->length = i2d_dhp(pkey, pkey->pkey.dh, ¶ms->data); if (params->length <= 0) { DHerr(DH_F_DH_PRIV_ENCODE, ERR_R_MALLOC_FAILURE); goto err; @@ -263,7 +288,7 @@ static int dh_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey) ASN1_STRING_clear_free(prkey); prkey = NULL; - if (!PKCS8_pkey_set0(p8, OBJ_nid2obj(NID_dhKeyAgreement), 0, + if (!PKCS8_pkey_set0(p8, OBJ_nid2obj(pkey->ameth->pkey_id), 0, V_ASN1_SEQUENCE, params, dp, dplen)) goto err; @@ -292,17 +317,17 @@ static int dh_param_decode(EVP_PKEY *pkey, const unsigned char **pder, int derlen) { DH *dh; - if (!(dh = d2i_DHparams(NULL, pder, derlen))) { + if (!(dh = d2i_dhp(pkey, pder, derlen))) { DHerr(DH_F_DH_PARAM_DECODE, ERR_R_DH_LIB); return 0; } - EVP_PKEY_assign_DH(pkey, dh); + EVP_PKEY_assign(pkey, pkey->ameth->pkey_id, dh); return 1; } static int dh_param_encode(const EVP_PKEY *pkey, unsigned char **pder) { - return i2d_DHparams(pkey->pkey.dh, pder); + return i2d_dhp(pkey, pkey->pkey.dh, pder); } static int do_dh_print(BIO *bp, const DH *x, int indent, @@ -334,15 +359,18 @@ static int do_dh_print(BIO *bp, const DH *x, int indent, } update_buflen(x->g, &buf_len); + update_buflen(x->q, &buf_len); + update_buflen(x->j, &buf_len); + update_buflen(x->counter, &buf_len); update_buflen(pub_key, &buf_len); update_buflen(priv_key, &buf_len); if (ptype == 2) - ktype = "PKCS#3 DH Private-Key"; + ktype = "DH Private-Key"; else if (ptype == 1) - ktype = "PKCS#3 DH Public-Key"; + ktype = "DH Public-Key"; else - ktype = "PKCS#3 DH Parameters"; + ktype = "DH Parameters"; m = OPENSSL_malloc(buf_len + 10); if (m == NULL) { @@ -364,6 +392,29 @@ static int do_dh_print(BIO *bp, const DH *x, int indent, goto err; if (!ASN1_bn_print(bp, "generator:", x->g, m, indent)) goto err; + if (x->q && !ASN1_bn_print(bp, "subgroup order:", x->q, m, indent)) + goto err; + if (x->j && !ASN1_bn_print(bp, "subgroup factor:", x->j, m, indent)) + goto err; + if (x->seed) { + int i; + BIO_indent(bp, indent, 128); + BIO_puts(bp, "seed:"); + for (i = 0; i < x->seedlen; i++) { + if ((i % 15) == 0) { + if (BIO_puts(bp, "\n") <= 0 + || !BIO_indent(bp, indent + 4, 128)) + goto err; + } + if (BIO_printf(bp, "%02x%s", x->seed[i], + ((i + 1) == x->seedlen) ? "" : ":") <= 0) + goto err; + } + if (BIO_write(bp, "\n", 1) <= 0) + return (0); + } + if (x->counter && !ASN1_bn_print(bp, "counter:", x->counter, m, indent)) + goto err; if (x->length != 0) { BIO_indent(bp, indent, 128); if (BIO_printf(bp, "recommended-private-length: %d bits\n", @@ -396,29 +447,76 @@ static int dh_cmp_parameters(const EVP_PKEY *a, const EVP_PKEY *b) if (BN_cmp(a->pkey.dh->p, b->pkey.dh->p) || BN_cmp(a->pkey.dh->g, b->pkey.dh->g)) return 0; - else - return 1; + else if (a->ameth == &dhx_asn1_meth) { + if (BN_cmp(a->pkey.dh->q, b->pkey.dh->q)) + return 0; + } + return 1; } -static int dh_copy_parameters(EVP_PKEY *to, const EVP_PKEY *from) +static int int_dh_bn_cpy(BIGNUM **dst, const BIGNUM *src) { BIGNUM *a; + if (src) { + a = BN_dup(src); + if (!a) + return 0; + } else + a = NULL; + if (*dst) + BN_free(*dst); + *dst = a; + return 1; +} - if ((a = BN_dup(from->pkey.dh->p)) == NULL) +static int int_dh_param_copy(DH *to, const DH *from, int is_x942) +{ + if (is_x942 == -1) + is_x942 = ! !from->q; + if (!int_dh_bn_cpy(&to->p, from->p)) return 0; - if (to->pkey.dh->p != NULL) - BN_free(to->pkey.dh->p); - to->pkey.dh->p = a; - - if ((a = BN_dup(from->pkey.dh->g)) == NULL) + if (!int_dh_bn_cpy(&to->g, from->g)) return 0; - if (to->pkey.dh->g != NULL) - BN_free(to->pkey.dh->g); - to->pkey.dh->g = a; - + if (is_x942) { + if (!int_dh_bn_cpy(&to->q, from->q)) + return 0; + if (!int_dh_bn_cpy(&to->j, from->j)) + return 0; + if (to->seed) { + OPENSSL_free(to->seed); + to->seed = NULL; + to->seedlen = 0; + } + if (from->seed) { + to->seed = BUF_memdup(from->seed, from->seedlen); + if (!to->seed) + return 0; + to->seedlen = from->seedlen; + } + } else + to->length = from->length; return 1; } +DH *DHparams_dup(DH *dh) +{ + DH *ret; + ret = DH_new(); + if (!ret) + return NULL; + if (!int_dh_param_copy(ret, dh, -1)) { + DH_free(ret); + return NULL; + } + return ret; +} + +static int dh_copy_parameters(EVP_PKEY *to, const EVP_PKEY *from) +{ + return int_dh_param_copy(to->pkey.dh, from->pkey.dh, + from->ameth == &dhx_asn1_meth); +} + static int dh_missing_parameters(const EVP_PKEY *a) { if (!a->pkey.dh->p || !a->pkey.dh->g) @@ -459,6 +557,33 @@ int DHparams_print(BIO *bp, const DH *x) return do_dh_print(bp, x, 4, NULL, 0); } +#ifndef OPENSSL_NO_CMS +static int dh_cms_decrypt(CMS_RecipientInfo *ri); +static int dh_cms_encrypt(CMS_RecipientInfo *ri); +#endif + +static int dh_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) +{ + switch (op) { +#ifndef OPENSSL_NO_CMS + + case ASN1_PKEY_CTRL_CMS_ENVELOPE: + if (arg1 == 1) + return dh_cms_decrypt(arg2); + else if (arg1 == 0) + return dh_cms_encrypt(arg2); + return -2; + + case ASN1_PKEY_CTRL_CMS_RI_TYPE: + *(int *)arg2 = CMS_RECIPINFO_AGREE; + return 1; +#endif + default: + return -2; + } + +} + const EVP_PKEY_ASN1_METHOD dh_asn1_meth = { EVP_PKEY_DH, EVP_PKEY_DH, @@ -490,3 +615,343 @@ const EVP_PKEY_ASN1_METHOD dh_asn1_meth = { int_dh_free, 0 }; + +const EVP_PKEY_ASN1_METHOD dhx_asn1_meth = { + EVP_PKEY_DHX, + EVP_PKEY_DHX, + 0, + + "X9.42 DH", + "OpenSSL X9.42 DH method", + + dh_pub_decode, + dh_pub_encode, + dh_pub_cmp, + dh_public_print, + + dh_priv_decode, + dh_priv_encode, + dh_private_print, + + int_dh_size, + dh_bits, + + dh_param_decode, + dh_param_encode, + dh_missing_parameters, + dh_copy_parameters, + dh_cmp_parameters, + dh_param_print, + 0, + + int_dh_free, + dh_pkey_ctrl +}; + +#ifndef OPENSSL_NO_CMS + +static int dh_cms_set_peerkey(EVP_PKEY_CTX *pctx, + X509_ALGOR *alg, ASN1_BIT_STRING *pubkey) +{ + ASN1_OBJECT *aoid; + int atype; + void *aval; + ASN1_INTEGER *public_key = NULL; + int rv = 0; + EVP_PKEY *pkpeer = NULL, *pk = NULL; + DH *dhpeer = NULL; + const unsigned char *p; + int plen; + + X509_ALGOR_get0(&aoid, &atype, &aval, alg); + if (OBJ_obj2nid(aoid) != NID_dhpublicnumber) + goto err; + /* Only absent parameters allowed in RFC XXXX */ + if (atype != V_ASN1_UNDEF && atype == V_ASN1_NULL) + goto err; + + pk = EVP_PKEY_CTX_get0_pkey(pctx); + if (!pk) + goto err; + if (pk->type != EVP_PKEY_DHX) + goto err; + /* Get parameters from parent key */ + dhpeer = DHparams_dup(pk->pkey.dh); + /* We have parameters now set public key */ + plen = ASN1_STRING_length(pubkey); + p = ASN1_STRING_data(pubkey); + if (!p || !plen) + goto err; + + if (!(public_key = d2i_ASN1_INTEGER(NULL, &p, plen))) { + DHerr(DH_F_DH_CMS_SET_PEERKEY, DH_R_DECODE_ERROR); + goto err; + } + + /* We have parameters now set public key */ + if (!(dhpeer->pub_key = ASN1_INTEGER_to_BN(public_key, NULL))) { + DHerr(DH_F_DH_CMS_SET_PEERKEY, DH_R_BN_DECODE_ERROR); + goto err; + } + + pkpeer = EVP_PKEY_new(); + if (!pkpeer) + goto err; + EVP_PKEY_assign(pkpeer, pk->ameth->pkey_id, dhpeer); + dhpeer = NULL; + if (EVP_PKEY_derive_set_peer(pctx, pkpeer) > 0) + rv = 1; + err: + if (public_key) + ASN1_INTEGER_free(public_key); + if (pkpeer) + EVP_PKEY_free(pkpeer); + if (dhpeer) + DH_free(dhpeer); + return rv; +} + +static int dh_cms_set_shared_info(EVP_PKEY_CTX *pctx, CMS_RecipientInfo *ri) +{ + int rv = 0; + + X509_ALGOR *alg, *kekalg = NULL; + ASN1_OCTET_STRING *ukm; + const unsigned char *p; + unsigned char *dukm = NULL; + size_t dukmlen = 0; + int keylen, plen; + const EVP_CIPHER *kekcipher; + EVP_CIPHER_CTX *kekctx; + + if (!CMS_RecipientInfo_kari_get0_alg(ri, &alg, &ukm)) + goto err; + + /* + * For DH we only have one OID permissible. If ever any more get defined + * we will need something cleverer. + */ + if (OBJ_obj2nid(alg->algorithm) != NID_id_smime_alg_ESDH) { + DHerr(DH_F_DH_CMS_SET_SHARED_INFO, DH_R_KDF_PARAMETER_ERROR); + goto err; + } + + if (EVP_PKEY_CTX_set_dh_kdf_type(pctx, EVP_PKEY_DH_KDF_X9_42) <= 0) + goto err; + + if (EVP_PKEY_CTX_set_dh_kdf_md(pctx, EVP_sha1()) <= 0) + goto err; + + if (alg->parameter->type != V_ASN1_SEQUENCE) + goto err; + + p = alg->parameter->value.sequence->data; + plen = alg->parameter->value.sequence->length; + kekalg = d2i_X509_ALGOR(NULL, &p, plen); + if (!kekalg) + goto err; + kekctx = CMS_RecipientInfo_kari_get0_ctx(ri); + if (!kekctx) + goto err; + kekcipher = EVP_get_cipherbyobj(kekalg->algorithm); + if (!kekcipher || EVP_CIPHER_mode(kekcipher) != EVP_CIPH_WRAP_MODE) + goto err; + if (!EVP_EncryptInit_ex(kekctx, kekcipher, NULL, NULL, NULL)) + goto err; + if (EVP_CIPHER_asn1_to_param(kekctx, kekalg->parameter) <= 0) + goto err; + + keylen = EVP_CIPHER_CTX_key_length(kekctx); + if (EVP_PKEY_CTX_set_dh_kdf_outlen(pctx, keylen) <= 0) + goto err; + /* Use OBJ_nid2obj to ensure we use built in OID that isn't freed */ + if (EVP_PKEY_CTX_set0_dh_kdf_oid(pctx, + OBJ_nid2obj(EVP_CIPHER_type(kekcipher))) + <= 0) + goto err; + + if (ukm) { + dukmlen = ASN1_STRING_length(ukm); + dukm = BUF_memdup(ASN1_STRING_data(ukm), dukmlen); + if (!dukm) + goto err; + } + + if (EVP_PKEY_CTX_set0_dh_kdf_ukm(pctx, dukm, dukmlen) <= 0) + goto err; + dukm = NULL; + + rv = 1; + err: + if (kekalg) + X509_ALGOR_free(kekalg); + if (dukm) + OPENSSL_free(dukm); + return rv; +} + +static int dh_cms_decrypt(CMS_RecipientInfo *ri) +{ + EVP_PKEY_CTX *pctx; + pctx = CMS_RecipientInfo_get0_pkey_ctx(ri); + if (!pctx) + return 0; + /* See if we need to set peer key */ + if (!EVP_PKEY_CTX_get0_peerkey(pctx)) { + X509_ALGOR *alg; + ASN1_BIT_STRING *pubkey; + if (!CMS_RecipientInfo_kari_get0_orig_id(ri, &alg, &pubkey, + NULL, NULL, NULL)) + return 0; + if (!alg || !pubkey) + return 0; + if (!dh_cms_set_peerkey(pctx, alg, pubkey)) { + DHerr(DH_F_DH_CMS_DECRYPT, DH_R_PEER_KEY_ERROR); + return 0; + } + } + /* Set DH derivation parameters and initialise unwrap context */ + if (!dh_cms_set_shared_info(pctx, ri)) { + DHerr(DH_F_DH_CMS_DECRYPT, DH_R_SHARED_INFO_ERROR); + return 0; + } + return 1; +} + +static int dh_cms_encrypt(CMS_RecipientInfo *ri) +{ + EVP_PKEY_CTX *pctx; + EVP_PKEY *pkey; + EVP_CIPHER_CTX *ctx; + int keylen; + X509_ALGOR *talg, *wrap_alg = NULL; + ASN1_OBJECT *aoid; + ASN1_BIT_STRING *pubkey; + ASN1_STRING *wrap_str; + ASN1_OCTET_STRING *ukm; + unsigned char *penc = NULL, *dukm = NULL; + int penclen; + size_t dukmlen = 0; + int rv = 0; + int kdf_type, wrap_nid; + const EVP_MD *kdf_md; + pctx = CMS_RecipientInfo_get0_pkey_ctx(ri); + if (!pctx) + return 0; + /* Get ephemeral key */ + pkey = EVP_PKEY_CTX_get0_pkey(pctx); + if (!CMS_RecipientInfo_kari_get0_orig_id(ri, &talg, &pubkey, + NULL, NULL, NULL)) + goto err; + X509_ALGOR_get0(&aoid, NULL, NULL, talg); + /* Is everything uninitialised? */ + if (aoid == OBJ_nid2obj(NID_undef)) { + ASN1_INTEGER *pubk; + pubk = BN_to_ASN1_INTEGER(pkey->pkey.dh->pub_key, NULL); + if (!pubk) + goto err; + /* Set the key */ + + penclen = i2d_ASN1_INTEGER(pubk, &penc); + ASN1_INTEGER_free(pubk); + if (penclen <= 0) + goto err; + ASN1_STRING_set0(pubkey, penc, penclen); + pubkey->flags &= ~(ASN1_STRING_FLAG_BITS_LEFT | 0x07); + pubkey->flags |= ASN1_STRING_FLAG_BITS_LEFT; + + penc = NULL; + X509_ALGOR_set0(talg, OBJ_nid2obj(NID_dhpublicnumber), + V_ASN1_UNDEF, NULL); + } + + /* See if custom paraneters set */ + kdf_type = EVP_PKEY_CTX_get_dh_kdf_type(pctx); + if (kdf_type <= 0) + goto err; + if (!EVP_PKEY_CTX_get_dh_kdf_md(pctx, &kdf_md)) + goto err; + + if (kdf_type == EVP_PKEY_DH_KDF_NONE) { + kdf_type = EVP_PKEY_DH_KDF_X9_42; + if (EVP_PKEY_CTX_set_dh_kdf_type(pctx, kdf_type) <= 0) + goto err; + } else if (kdf_type != EVP_PKEY_DH_KDF_X9_42) + /* Unknown KDF */ + goto err; + if (kdf_md == NULL) { + /* Only SHA1 supported */ + kdf_md = EVP_sha1(); + if (EVP_PKEY_CTX_set_dh_kdf_md(pctx, kdf_md) <= 0) + goto err; + } else if (EVP_MD_type(kdf_md) != NID_sha1) + /* Unsupported digest */ + goto err; + + if (!CMS_RecipientInfo_kari_get0_alg(ri, &talg, &ukm)) + goto err; + + /* Get wrap NID */ + ctx = CMS_RecipientInfo_kari_get0_ctx(ri); + wrap_nid = EVP_CIPHER_CTX_type(ctx); + if (EVP_PKEY_CTX_set0_dh_kdf_oid(pctx, OBJ_nid2obj(wrap_nid)) <= 0) + goto err; + keylen = EVP_CIPHER_CTX_key_length(ctx); + + /* Package wrap algorithm in an AlgorithmIdentifier */ + + wrap_alg = X509_ALGOR_new(); + if (!wrap_alg) + goto err; + wrap_alg->algorithm = OBJ_nid2obj(wrap_nid); + wrap_alg->parameter = ASN1_TYPE_new(); + if (!wrap_alg->parameter) + goto err; + if (EVP_CIPHER_param_to_asn1(ctx, wrap_alg->parameter) <= 0) + goto err; + if (ASN1_TYPE_get(wrap_alg->parameter) == NID_undef) { + ASN1_TYPE_free(wrap_alg->parameter); + wrap_alg->parameter = NULL; + } + + if (EVP_PKEY_CTX_set_dh_kdf_outlen(pctx, keylen) <= 0) + goto err; + + if (ukm) { + dukmlen = ASN1_STRING_length(ukm); + dukm = BUF_memdup(ASN1_STRING_data(ukm), dukmlen); + if (!dukm) + goto err; + } + + if (EVP_PKEY_CTX_set0_dh_kdf_ukm(pctx, dukm, dukmlen) <= 0) + goto err; + dukm = NULL; + + /* + * Now need to wrap encoding of wrap AlgorithmIdentifier into parameter + * of another AlgorithmIdentifier. + */ + penc = NULL; + penclen = i2d_X509_ALGOR(wrap_alg, &penc); + if (!penc || !penclen) + goto err; + wrap_str = ASN1_STRING_new(); + if (!wrap_str) + goto err; + ASN1_STRING_set0(wrap_str, penc, penclen); + penc = NULL; + X509_ALGOR_set0(talg, OBJ_nid2obj(NID_id_smime_alg_ESDH), + V_ASN1_SEQUENCE, wrap_str); + + rv = 1; + + err: + if (penc) + OPENSSL_free(penc); + if (wrap_alg) + X509_ALGOR_free(wrap_alg); + return rv; +} + +#endif diff --git a/drivers/builtin_openssl2/crypto/dh/dh_asn1.c b/drivers/builtin_openssl2/crypto/dh/dh_asn1.c index e6ee3cfc66..f470214399 100644 --- a/drivers/builtin_openssl2/crypto/dh/dh_asn1.c +++ b/drivers/builtin_openssl2/crypto/dh/dh_asn1.c @@ -89,7 +89,101 @@ ASN1_SEQUENCE_cb(DHparams, dh_cb) = { IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(DH, DHparams, DHparams) -DH *DHparams_dup(DH *dh) +/* + * Internal only structures for handling X9.42 DH: this gets translated to or + * from a DH structure straight away. + */ + +typedef struct { + ASN1_BIT_STRING *seed; + BIGNUM *counter; +} int_dhvparams; + +typedef struct { + BIGNUM *p; + BIGNUM *q; + BIGNUM *g; + BIGNUM *j; + int_dhvparams *vparams; +} int_dhx942_dh; + +ASN1_SEQUENCE(DHvparams) = { + ASN1_SIMPLE(int_dhvparams, seed, ASN1_BIT_STRING), + ASN1_SIMPLE(int_dhvparams, counter, BIGNUM) +} ASN1_SEQUENCE_END_name(int_dhvparams, DHvparams) + +ASN1_SEQUENCE(DHxparams) = { + ASN1_SIMPLE(int_dhx942_dh, p, BIGNUM), + ASN1_SIMPLE(int_dhx942_dh, g, BIGNUM), + ASN1_SIMPLE(int_dhx942_dh, q, BIGNUM), + ASN1_OPT(int_dhx942_dh, j, BIGNUM), + ASN1_OPT(int_dhx942_dh, vparams, DHvparams), +} ASN1_SEQUENCE_END_name(int_dhx942_dh, DHxparams) + +int_dhx942_dh *d2i_int_dhx(int_dhx942_dh **a, + const unsigned char **pp, long length); +int i2d_int_dhx(const int_dhx942_dh *a, unsigned char **pp); + +IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(int_dhx942_dh, DHxparams, int_dhx) + +/* Application leve function: read in X9.42 DH parameters into DH structure */ + +DH *d2i_DHxparams(DH **a, const unsigned char **pp, long length) { - return ASN1_item_dup(ASN1_ITEM_rptr(DHparams), dh); + int_dhx942_dh *dhx = NULL; + DH *dh = NULL; + dh = DH_new(); + if (!dh) + return NULL; + dhx = d2i_int_dhx(NULL, pp, length); + if (!dhx) { + DH_free(dh); + return NULL; + } + + if (a) { + if (*a) + DH_free(*a); + *a = dh; + } + + dh->p = dhx->p; + dh->q = dhx->q; + dh->g = dhx->g; + dh->j = dhx->j; + + if (dhx->vparams) { + dh->seed = dhx->vparams->seed->data; + dh->seedlen = dhx->vparams->seed->length; + dh->counter = dhx->vparams->counter; + dhx->vparams->seed->data = NULL; + ASN1_BIT_STRING_free(dhx->vparams->seed); + OPENSSL_free(dhx->vparams); + dhx->vparams = NULL; + } + + OPENSSL_free(dhx); + return dh; +} + +int i2d_DHxparams(const DH *dh, unsigned char **pp) +{ + int_dhx942_dh dhx; + int_dhvparams dhv; + ASN1_BIT_STRING bs; + dhx.p = dh->p; + dhx.g = dh->g; + dhx.q = dh->q; + dhx.j = dh->j; + if (dh->counter && dh->seed && dh->seedlen > 0) { + bs.flags = ASN1_STRING_FLAG_BITS_LEFT; + bs.data = dh->seed; + bs.length = dh->seedlen; + dhv.seed = &bs; + dhv.counter = dh->counter; + dhx.vparams = &dhv; + } else + dhx.vparams = NULL; + + return i2d_int_dhx(&dhx, pp); } diff --git a/drivers/builtin_openssl2/crypto/dh/dh_check.c b/drivers/builtin_openssl2/crypto/dh/dh_check.c index c39ed97ba6..0277041114 100644 --- a/drivers/builtin_openssl2/crypto/dh/dh_check.c +++ b/drivers/builtin_openssl2/crypto/dh/dh_check.c @@ -76,17 +76,43 @@ int DH_check(const DH *dh, int *ret) int ok = 0; BN_CTX *ctx = NULL; BN_ULONG l; - BIGNUM *q = NULL; + BIGNUM *t1 = NULL, *t2 = NULL; *ret = 0; ctx = BN_CTX_new(); if (ctx == NULL) goto err; - q = BN_new(); - if (q == NULL) + BN_CTX_start(ctx); + t1 = BN_CTX_get(ctx); + if (t1 == NULL) goto err; + t2 = BN_CTX_get(ctx); + if (t2 == NULL) + goto err; + + if (dh->q) { + if (BN_cmp(dh->g, BN_value_one()) <= 0) + *ret |= DH_NOT_SUITABLE_GENERATOR; + else if (BN_cmp(dh->g, dh->p) >= 0) + *ret |= DH_NOT_SUITABLE_GENERATOR; + else { + /* Check g^q == 1 mod p */ + if (!BN_mod_exp(t1, dh->g, dh->q, dh->p, ctx)) + goto err; + if (!BN_is_one(t1)) + *ret |= DH_NOT_SUITABLE_GENERATOR; + } + if (!BN_is_prime_ex(dh->q, BN_prime_checks, ctx, NULL)) + *ret |= DH_CHECK_Q_NOT_PRIME; + /* Check p == 1 mod q i.e. q divides p - 1 */ + if (!BN_div(t1, t2, dh->p, dh->q, ctx)) + goto err; + if (!BN_is_one(t2)) + *ret |= DH_CHECK_INVALID_Q_VALUE; + if (dh->j && BN_cmp(dh->j, t1)) + *ret |= DH_CHECK_INVALID_J_VALUE; - if (BN_is_word(dh->g, DH_GENERATOR_2)) { + } else if (BN_is_word(dh->g, DH_GENERATOR_2)) { l = BN_mod_word(dh->p, 24); if (l != 11) *ret |= DH_NOT_SUITABLE_GENERATOR; @@ -107,41 +133,55 @@ int DH_check(const DH *dh, int *ret) if (!BN_is_prime_ex(dh->p, BN_prime_checks, ctx, NULL)) *ret |= DH_CHECK_P_NOT_PRIME; - else { - if (!BN_rshift1(q, dh->p)) + else if (!dh->q) { + if (!BN_rshift1(t1, dh->p)) goto err; - if (!BN_is_prime_ex(q, BN_prime_checks, ctx, NULL)) + if (!BN_is_prime_ex(t1, BN_prime_checks, ctx, NULL)) *ret |= DH_CHECK_P_NOT_SAFE_PRIME; } ok = 1; err: - if (ctx != NULL) + if (ctx != NULL) { + BN_CTX_end(ctx); BN_CTX_free(ctx); - if (q != NULL) - BN_free(q); + } return (ok); } int DH_check_pub_key(const DH *dh, const BIGNUM *pub_key, int *ret) { int ok = 0; - BIGNUM *q = NULL; + BIGNUM *tmp = NULL; + BN_CTX *ctx = NULL; *ret = 0; - q = BN_new(); - if (q == NULL) + ctx = BN_CTX_new(); + if (ctx == NULL) + goto err; + BN_CTX_start(ctx); + tmp = BN_CTX_get(ctx); + if (tmp == NULL || !BN_set_word(tmp, 1)) goto err; - BN_set_word(q, 1); - if (BN_cmp(pub_key, q) <= 0) + if (BN_cmp(pub_key, tmp) <= 0) *ret |= DH_CHECK_PUBKEY_TOO_SMALL; - BN_copy(q, dh->p); - BN_sub_word(q, 1); - if (BN_cmp(pub_key, q) >= 0) + if (BN_copy(tmp, dh->p) == NULL || !BN_sub_word(tmp, 1)) + goto err; + if (BN_cmp(pub_key, tmp) >= 0) *ret |= DH_CHECK_PUBKEY_TOO_LARGE; + if (dh->q != NULL) { + /* Check pub_key^q == 1 mod p */ + if (!BN_mod_exp(tmp, pub_key, dh->q, dh->p, ctx)) + goto err; + if (!BN_is_one(tmp)) + *ret |= DH_CHECK_PUBKEY_INVALID; + } + ok = 1; err: - if (q != NULL) - BN_free(q); + if (ctx != NULL) { + BN_CTX_end(ctx); + BN_CTX_free(ctx); + } return (ok); } diff --git a/drivers/builtin_openssl2/crypto/dh/dh_err.c b/drivers/builtin_openssl2/crypto/dh/dh_err.c index 6ed5eb7890..b890cca817 100644 --- a/drivers/builtin_openssl2/crypto/dh/dh_err.c +++ b/drivers/builtin_openssl2/crypto/dh/dh_err.c @@ -1,6 +1,6 @@ /* crypto/dh/dh_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2013 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -73,6 +73,9 @@ static ERR_STRING_DATA DH_str_functs[] = { {ERR_FUNC(DH_F_COMPUTE_KEY), "COMPUTE_KEY"}, {ERR_FUNC(DH_F_DHPARAMS_PRINT_FP), "DHparams_print_fp"}, {ERR_FUNC(DH_F_DH_BUILTIN_GENPARAMS), "DH_BUILTIN_GENPARAMS"}, + {ERR_FUNC(DH_F_DH_CMS_DECRYPT), "DH_CMS_DECRYPT"}, + {ERR_FUNC(DH_F_DH_CMS_SET_PEERKEY), "DH_CMS_SET_PEERKEY"}, + {ERR_FUNC(DH_F_DH_CMS_SET_SHARED_INFO), "DH_CMS_SET_SHARED_INFO"}, {ERR_FUNC(DH_F_DH_COMPUTE_KEY), "DH_compute_key"}, {ERR_FUNC(DH_F_DH_GENERATE_KEY), "DH_generate_key"}, {ERR_FUNC(DH_F_DH_GENERATE_PARAMETERS_EX), "DH_generate_parameters_ex"}, @@ -96,6 +99,7 @@ static ERR_STRING_DATA DH_str_reasons[] = { {ERR_REASON(DH_R_BN_ERROR), "bn error"}, {ERR_REASON(DH_R_DECODE_ERROR), "decode error"}, {ERR_REASON(DH_R_INVALID_PUBKEY), "invalid public key"}, + {ERR_REASON(DH_R_KDF_PARAMETER_ERROR), "kdf parameter error"}, {ERR_REASON(DH_R_KEYS_NOT_SET), "keys not set"}, {ERR_REASON(DH_R_KEY_SIZE_TOO_SMALL), "key size too small"}, {ERR_REASON(DH_R_MODULUS_TOO_LARGE), "modulus too large"}, @@ -103,6 +107,8 @@ static ERR_STRING_DATA DH_str_reasons[] = { {ERR_REASON(DH_R_NO_PARAMETERS_SET), "no parameters set"}, {ERR_REASON(DH_R_NO_PRIVATE_VALUE), "no private value"}, {ERR_REASON(DH_R_PARAMETER_ENCODING_ERROR), "parameter encoding error"}, + {ERR_REASON(DH_R_PEER_KEY_ERROR), "peer key error"}, + {ERR_REASON(DH_R_SHARED_INFO_ERROR), "shared info error"}, {0, NULL} }; diff --git a/drivers/builtin_openssl2/crypto/dh/dh_kdf.c b/drivers/builtin_openssl2/crypto/dh/dh_kdf.c new file mode 100644 index 0000000000..a882cb286e --- /dev/null +++ b/drivers/builtin_openssl2/crypto/dh/dh_kdf.c @@ -0,0 +1,187 @@ +/* crypto/dh/dh_kdf.c */ +/* + * Written by Stephen Henson for the OpenSSL project. + */ +/* ==================================================================== + * Copyright (c) 2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include +#include +#include +#include +#include + +/* Key derivation from X9.42/RFC2631 */ + +#define DH_KDF_MAX (1L << 30) + +/* Skip past an ASN1 structure: for OBJECT skip content octets too */ + +static int skip_asn1(unsigned char **pp, long *plen, int exptag) +{ + const unsigned char *q = *pp; + int i, tag, xclass; + long tmplen; + i = ASN1_get_object(&q, &tmplen, &tag, &xclass, *plen); + if (i & 0x80) + return 0; + if (tag != exptag || xclass != V_ASN1_UNIVERSAL) + return 0; + if (tag == V_ASN1_OBJECT) + q += tmplen; + *plen -= q - *pp; + *pp = (unsigned char *)q; + return 1; +} + +/* + * Encode the DH shared info structure, return an offset to the counter value + * so we can update the structure without reencoding it. + */ + +static int dh_sharedinfo_encode(unsigned char **pder, unsigned char **pctr, + ASN1_OBJECT *key_oid, size_t outlen, + const unsigned char *ukm, size_t ukmlen) +{ + unsigned char *p; + int derlen; + long tlen; + /* "magic" value to check offset is sane */ + static unsigned char ctr[4] = { 0xF3, 0x17, 0x22, 0x53 }; + X509_ALGOR atmp; + ASN1_OCTET_STRING ctr_oct, ukm_oct, *pukm_oct; + ASN1_TYPE ctr_atype; + if (ukmlen > DH_KDF_MAX || outlen > DH_KDF_MAX) + return 0; + ctr_oct.data = ctr; + ctr_oct.length = 4; + ctr_oct.flags = 0; + ctr_oct.type = V_ASN1_OCTET_STRING; + ctr_atype.type = V_ASN1_OCTET_STRING; + ctr_atype.value.octet_string = &ctr_oct; + atmp.algorithm = key_oid; + atmp.parameter = &ctr_atype; + if (ukm) { + ukm_oct.type = V_ASN1_OCTET_STRING; + ukm_oct.flags = 0; + ukm_oct.data = (unsigned char *)ukm; + ukm_oct.length = ukmlen; + pukm_oct = &ukm_oct; + } else + pukm_oct = NULL; + derlen = CMS_SharedInfo_encode(pder, &atmp, pukm_oct, outlen); + if (derlen <= 0) + return 0; + p = *pder; + tlen = derlen; + if (!skip_asn1(&p, &tlen, V_ASN1_SEQUENCE)) + return 0; + if (!skip_asn1(&p, &tlen, V_ASN1_SEQUENCE)) + return 0; + if (!skip_asn1(&p, &tlen, V_ASN1_OBJECT)) + return 0; + if (!skip_asn1(&p, &tlen, V_ASN1_OCTET_STRING)) + return 0; + if (CRYPTO_memcmp(p, ctr, 4)) + return 0; + *pctr = p; + return derlen; +} + +int DH_KDF_X9_42(unsigned char *out, size_t outlen, + const unsigned char *Z, size_t Zlen, + ASN1_OBJECT *key_oid, + const unsigned char *ukm, size_t ukmlen, const EVP_MD *md) +{ + EVP_MD_CTX mctx; + int rv = 0; + unsigned int i; + size_t mdlen; + unsigned char *der = NULL, *ctr; + int derlen; + if (Zlen > DH_KDF_MAX) + return 0; + mdlen = EVP_MD_size(md); + EVP_MD_CTX_init(&mctx); + derlen = dh_sharedinfo_encode(&der, &ctr, key_oid, outlen, ukm, ukmlen); + if (derlen == 0) + goto err; + for (i = 1;; i++) { + unsigned char mtmp[EVP_MAX_MD_SIZE]; + EVP_DigestInit_ex(&mctx, md, NULL); + if (!EVP_DigestUpdate(&mctx, Z, Zlen)) + goto err; + ctr[3] = i & 0xFF; + ctr[2] = (i >> 8) & 0xFF; + ctr[1] = (i >> 16) & 0xFF; + ctr[0] = (i >> 24) & 0xFF; + if (!EVP_DigestUpdate(&mctx, der, derlen)) + goto err; + if (outlen >= mdlen) { + if (!EVP_DigestFinal(&mctx, out, NULL)) + goto err; + outlen -= mdlen; + if (outlen == 0) + break; + out += mdlen; + } else { + if (!EVP_DigestFinal(&mctx, mtmp, NULL)) + goto err; + memcpy(out, mtmp, outlen); + OPENSSL_cleanse(mtmp, mdlen); + break; + } + } + rv = 1; + err: + if (der) + OPENSSL_free(der); + EVP_MD_CTX_cleanup(&mctx); + return rv; +} diff --git a/drivers/builtin_openssl2/crypto/dh/dh_key.c b/drivers/builtin_openssl2/crypto/dh/dh_key.c index 9e1d8e5822..1d80fb2c5f 100644 --- a/drivers/builtin_openssl2/crypto/dh/dh_key.c +++ b/drivers/builtin_openssl2/crypto/dh/dh_key.c @@ -94,6 +94,20 @@ int DH_compute_key(unsigned char *key, const BIGNUM *pub_key, DH *dh) return dh->meth->compute_key(key, pub_key, dh); } +int DH_compute_key_padded(unsigned char *key, const BIGNUM *pub_key, DH *dh) +{ + int rv, pad; + rv = dh->meth->compute_key(key, pub_key, dh); + if (rv <= 0) + return rv; + pad = BN_num_bytes(dh->p) - rv; + if (pad > 0) { + memmove(key + pad, key, rv); + memset(key, 0, pad); + } + return rv + pad; +} + static DH_METHOD dh_ossl = { "OpenSSL DH Method", generate_key, diff --git a/drivers/builtin_openssl2/crypto/dh/dh_pmeth.c b/drivers/builtin_openssl2/crypto/dh/dh_pmeth.c index 65bc3882c7..b58e3fa86f 100644 --- a/drivers/builtin_openssl2/crypto/dh/dh_pmeth.c +++ b/drivers/builtin_openssl2/crypto/dh/dh_pmeth.c @@ -63,6 +63,10 @@ #include #include #include +#ifndef OPENSSL_NO_DSA +# include +#endif +#include #include "evp_locl.h" /* DH pkey context structure */ @@ -72,9 +76,23 @@ typedef struct { int prime_len; int generator; int use_dsa; + int subprime_len; + /* message digest used for parameter generation */ + const EVP_MD *md; + int rfc5114_param; /* Keygen callback info */ int gentmp[2]; - /* message digest */ + /* KDF (if any) to use for DH */ + char kdf_type; + /* OID to use for KDF */ + ASN1_OBJECT *kdf_oid; + /* Message digest to use for key derivation */ + const EVP_MD *kdf_md; + /* User key material */ + unsigned char *kdf_ukm; + size_t kdf_ukmlen; + /* KDF output length */ + size_t kdf_outlen; } DH_PKEY_CTX; static int pkey_dh_init(EVP_PKEY_CTX *ctx) @@ -84,8 +102,18 @@ static int pkey_dh_init(EVP_PKEY_CTX *ctx) if (!dctx) return 0; dctx->prime_len = 1024; + dctx->subprime_len = -1; dctx->generator = 2; dctx->use_dsa = 0; + dctx->md = NULL; + dctx->rfc5114_param = 0; + + dctx->kdf_type = EVP_PKEY_DH_KDF_NONE; + dctx->kdf_oid = NULL; + dctx->kdf_md = NULL; + dctx->kdf_ukm = NULL; + dctx->kdf_ukmlen = 0; + dctx->kdf_outlen = 0; ctx->data = dctx; ctx->keygen_info = dctx->gentmp; @@ -102,16 +130,35 @@ static int pkey_dh_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src) sctx = src->data; dctx = dst->data; dctx->prime_len = sctx->prime_len; + dctx->subprime_len = sctx->subprime_len; dctx->generator = sctx->generator; dctx->use_dsa = sctx->use_dsa; + dctx->md = sctx->md; + dctx->rfc5114_param = sctx->rfc5114_param; + + dctx->kdf_type = sctx->kdf_type; + dctx->kdf_oid = OBJ_dup(sctx->kdf_oid); + if (!dctx->kdf_oid) + return 0; + dctx->kdf_md = sctx->kdf_md; + if (dctx->kdf_ukm) { + dctx->kdf_ukm = BUF_memdup(sctx->kdf_ukm, sctx->kdf_ukmlen); + dctx->kdf_ukmlen = sctx->kdf_ukmlen; + } + dctx->kdf_outlen = sctx->kdf_outlen; return 1; } static void pkey_dh_cleanup(EVP_PKEY_CTX *ctx) { DH_PKEY_CTX *dctx = ctx->data; - if (dctx) + if (dctx) { + if (dctx->kdf_ukm) + OPENSSL_free(dctx->kdf_ukm); + if (dctx->kdf_oid) + ASN1_OBJECT_free(dctx->kdf_oid); OPENSSL_free(dctx); + } } static int pkey_dh_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) @@ -124,14 +171,89 @@ static int pkey_dh_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) dctx->prime_len = p1; return 1; + case EVP_PKEY_CTRL_DH_PARAMGEN_SUBPRIME_LEN: + if (dctx->use_dsa == 0) + return -2; + dctx->subprime_len = p1; + return 1; + case EVP_PKEY_CTRL_DH_PARAMGEN_GENERATOR: + if (dctx->use_dsa) + return -2; dctx->generator = p1; return 1; + case EVP_PKEY_CTRL_DH_PARAMGEN_TYPE: +#ifdef OPENSSL_NO_DSA + if (p1 != 0) + return -2; +#else + if (p1 < 0 || p1 > 2) + return -2; +#endif + dctx->use_dsa = p1; + return 1; + + case EVP_PKEY_CTRL_DH_RFC5114: + if (p1 < 1 || p1 > 3) + return -2; + dctx->rfc5114_param = p1; + return 1; + case EVP_PKEY_CTRL_PEER_KEY: /* Default behaviour is OK */ return 1; + case EVP_PKEY_CTRL_DH_KDF_TYPE: + if (p1 == -2) + return dctx->kdf_type; + if (p1 != EVP_PKEY_DH_KDF_NONE && p1 != EVP_PKEY_DH_KDF_X9_42) + return -2; + dctx->kdf_type = p1; + return 1; + + case EVP_PKEY_CTRL_DH_KDF_MD: + dctx->kdf_md = p2; + return 1; + + case EVP_PKEY_CTRL_GET_DH_KDF_MD: + *(const EVP_MD **)p2 = dctx->kdf_md; + return 1; + + case EVP_PKEY_CTRL_DH_KDF_OUTLEN: + if (p1 <= 0) + return -2; + dctx->kdf_outlen = (size_t)p1; + return 1; + + case EVP_PKEY_CTRL_GET_DH_KDF_OUTLEN: + *(int *)p2 = dctx->kdf_outlen; + return 1; + + case EVP_PKEY_CTRL_DH_KDF_UKM: + if (dctx->kdf_ukm) + OPENSSL_free(dctx->kdf_ukm); + dctx->kdf_ukm = p2; + if (p2) + dctx->kdf_ukmlen = p1; + else + dctx->kdf_ukmlen = 0; + return 1; + + case EVP_PKEY_CTRL_GET_DH_KDF_UKM: + *(unsigned char **)p2 = dctx->kdf_ukm; + return dctx->kdf_ukmlen; + + case EVP_PKEY_CTRL_DH_KDF_OID: + if (dctx->kdf_oid) + ASN1_OBJECT_free(dctx->kdf_oid); + dctx->kdf_oid = p2; + return 1; + + case EVP_PKEY_CTRL_GET_DH_KDF_OID: + *(ASN1_OBJECT **)p2 = dctx->kdf_oid; + return 1; + default: return -2; @@ -146,30 +268,139 @@ static int pkey_dh_ctrl_str(EVP_PKEY_CTX *ctx, len = atoi(value); return EVP_PKEY_CTX_set_dh_paramgen_prime_len(ctx, len); } + if (!strcmp(type, "dh_rfc5114")) { + DH_PKEY_CTX *dctx = ctx->data; + int len; + len = atoi(value); + if (len < 0 || len > 3) + return -2; + dctx->rfc5114_param = len; + return 1; + } if (!strcmp(type, "dh_paramgen_generator")) { int len; len = atoi(value); return EVP_PKEY_CTX_set_dh_paramgen_generator(ctx, len); } + if (!strcmp(type, "dh_paramgen_subprime_len")) { + int len; + len = atoi(value); + return EVP_PKEY_CTX_set_dh_paramgen_subprime_len(ctx, len); + } + if (!strcmp(type, "dh_paramgen_type")) { + int typ; + typ = atoi(value); + return EVP_PKEY_CTX_set_dh_paramgen_type(ctx, typ); + } return -2; } +#ifndef OPENSSL_NO_DSA + +extern int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, + const EVP_MD *evpmd, + const unsigned char *seed_in, size_t seed_len, + unsigned char *seed_out, int *counter_ret, + unsigned long *h_ret, BN_GENCB *cb); + +extern int dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N, + const EVP_MD *evpmd, + const unsigned char *seed_in, + size_t seed_len, int idx, + unsigned char *seed_out, int *counter_ret, + unsigned long *h_ret, BN_GENCB *cb); + +static DSA *dsa_dh_generate(DH_PKEY_CTX *dctx, BN_GENCB *pcb) +{ + DSA *ret; + int rv = 0; + int prime_len = dctx->prime_len; + int subprime_len = dctx->subprime_len; + const EVP_MD *md = dctx->md; + if (dctx->use_dsa > 2) + return NULL; + ret = DSA_new(); + if (!ret) + return NULL; + if (subprime_len == -1) { + if (prime_len >= 2048) + subprime_len = 256; + else + subprime_len = 160; + } + if (md == NULL) { + if (prime_len >= 2048) + md = EVP_sha256(); + else + md = EVP_sha1(); + } + if (dctx->use_dsa == 1) + rv = dsa_builtin_paramgen(ret, prime_len, subprime_len, md, + NULL, 0, NULL, NULL, NULL, pcb); + else if (dctx->use_dsa == 2) + rv = dsa_builtin_paramgen2(ret, prime_len, subprime_len, md, + NULL, 0, -1, NULL, NULL, NULL, pcb); + if (rv <= 0) { + DSA_free(ret); + return NULL; + } + return ret; +} + +#endif + static int pkey_dh_paramgen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) { DH *dh = NULL; DH_PKEY_CTX *dctx = ctx->data; BN_GENCB *pcb, cb; int ret; + if (dctx->rfc5114_param) { + switch (dctx->rfc5114_param) { + case 1: + dh = DH_get_1024_160(); + break; + + case 2: + dh = DH_get_2048_224(); + break; + + case 3: + dh = DH_get_2048_256(); + break; + + default: + return -2; + } + EVP_PKEY_assign(pkey, EVP_PKEY_DHX, dh); + return 1; + } + if (ctx->pkey_gencb) { pcb = &cb; evp_pkey_set_cb_translate(pcb, ctx); } else pcb = NULL; +#ifndef OPENSSL_NO_DSA + if (dctx->use_dsa) { + DSA *dsa_dh; + dsa_dh = dsa_dh_generate(dctx, pcb); + if (!dsa_dh) + return 0; + dh = DSA_dup_DH(dsa_dh); + DSA_free(dsa_dh); + if (!dh) + return 0; + EVP_PKEY_assign(pkey, EVP_PKEY_DHX, dh); + return 1; + } +#endif dh = DH_new(); if (!dh) return 0; ret = DH_generate_parameters_ex(dh, dctx->prime_len, dctx->generator, pcb); + if (ret) EVP_PKEY_assign_DH(pkey, dh); else @@ -187,7 +418,7 @@ static int pkey_dh_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) dh = DH_new(); if (!dh) return 0; - EVP_PKEY_assign_DH(pkey, dh); + EVP_PKEY_assign(pkey, ctx->pmeth->pkey_id, dh); /* Note: if error return, pkey is freed by parent routine */ if (!EVP_PKEY_copy_parameters(pkey, ctx->pkey)) return 0; @@ -198,21 +429,96 @@ static int pkey_dh_derive(EVP_PKEY_CTX *ctx, unsigned char *key, size_t *keylen) { int ret; + DH *dh; + DH_PKEY_CTX *dctx = ctx->data; + BIGNUM *dhpub; if (!ctx->pkey || !ctx->peerkey) { DHerr(DH_F_PKEY_DH_DERIVE, DH_R_KEYS_NOT_SET); return 0; } - ret = DH_compute_key(key, ctx->peerkey->pkey.dh->pub_key, - ctx->pkey->pkey.dh); - if (ret < 0) + dh = ctx->pkey->pkey.dh; + dhpub = ctx->peerkey->pkey.dh->pub_key; + if (dctx->kdf_type == EVP_PKEY_DH_KDF_NONE) { + if (key == NULL) { + *keylen = DH_size(dh); + return 1; + } + ret = DH_compute_key(key, dhpub, dh); + if (ret < 0) + return ret; + *keylen = ret; + return 1; + } else if (dctx->kdf_type == EVP_PKEY_DH_KDF_X9_42) { + unsigned char *Z = NULL; + size_t Zlen = 0; + if (!dctx->kdf_outlen || !dctx->kdf_oid) + return 0; + if (key == NULL) { + *keylen = dctx->kdf_outlen; + return 1; + } + if (*keylen != dctx->kdf_outlen) + return 0; + ret = 0; + Zlen = DH_size(dh); + Z = OPENSSL_malloc(Zlen); + if (!Z) { + goto err; + } + if (DH_compute_key_padded(Z, dhpub, dh) <= 0) + goto err; + if (!DH_KDF_X9_42(key, *keylen, Z, Zlen, dctx->kdf_oid, + dctx->kdf_ukm, dctx->kdf_ukmlen, dctx->kdf_md)) + goto err; + *keylen = dctx->kdf_outlen; + ret = 1; + err: + if (Z) { + OPENSSL_cleanse(Z, Zlen); + OPENSSL_free(Z); + } return ret; - *keylen = ret; + } return 1; } const EVP_PKEY_METHOD dh_pkey_meth = { EVP_PKEY_DH, - EVP_PKEY_FLAG_AUTOARGLEN, + 0, + pkey_dh_init, + pkey_dh_copy, + pkey_dh_cleanup, + + 0, + pkey_dh_paramgen, + + 0, + pkey_dh_keygen, + + 0, + 0, + + 0, + 0, + + 0, 0, + + 0, 0, 0, 0, + + 0, 0, + + 0, 0, + + 0, + pkey_dh_derive, + + pkey_dh_ctrl, + pkey_dh_ctrl_str +}; + +const EVP_PKEY_METHOD dhx_pkey_meth = { + EVP_PKEY_DHX, + 0, pkey_dh_init, pkey_dh_copy, pkey_dh_cleanup, diff --git a/drivers/builtin_openssl2/crypto/dh/dh_rfc5114.c b/drivers/builtin_openssl2/crypto/dh/dh_rfc5114.c new file mode 100644 index 0000000000..e96e2aa3fc --- /dev/null +++ b/drivers/builtin_openssl2/crypto/dh/dh_rfc5114.c @@ -0,0 +1,285 @@ +/* + * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project + * 2011. + */ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#include +#include "cryptlib.h" +#include +#include + +/* DH parameters from RFC5114 */ + +#if BN_BITS2 == 64 +static const BN_ULONG dh1024_160_p[] = { + 0xDF1FB2BC2E4A4371ULL, 0xE68CFDA76D4DA708ULL, 0x45BF37DF365C1A65ULL, + 0xA151AF5F0DC8B4BDULL, 0xFAA31A4FF55BCCC0ULL, 0x4EFFD6FAE5644738ULL, + 0x98488E9C219A7372ULL, 0xACCBDD7D90C4BD70ULL, 0x24975C3CD49B83BFULL, + 0x13ECB4AEA9061123ULL, 0x9838EF1E2EE652C0ULL, 0x6073E28675A23D18ULL, + 0x9A6A9DCA52D23B61ULL, 0x52C99FBCFB06A3C6ULL, 0xDE92DE5EAE5D54ECULL, + 0xB10B8F96A080E01DULL +}; + +static const BN_ULONG dh1024_160_g[] = { + 0x855E6EEB22B3B2E5ULL, 0x858F4DCEF97C2A24ULL, 0x2D779D5918D08BC8ULL, + 0xD662A4D18E73AFA3ULL, 0x1DBF0A0169B6A28AULL, 0xA6A24C087A091F53ULL, + 0x909D0D2263F80A76ULL, 0xD7FBD7D3B9A92EE1ULL, 0x5E91547F9E2749F4ULL, + 0x160217B4B01B886AULL, 0x777E690F5504F213ULL, 0x266FEA1E5C41564BULL, + 0xD6406CFF14266D31ULL, 0xF8104DD258AC507FULL, 0x6765A442EFB99905ULL, + 0xA4D1CBD5C3FD3412ULL +}; + +static const BN_ULONG dh1024_160_q[] = { + 0x64B7CB9D49462353ULL, 0x81A8DF278ABA4E7DULL, 0x00000000F518AA87ULL +}; + +static const BN_ULONG dh2048_224_p[] = { + 0x0AC4DFFE0C10E64FULL, 0xCF9DE5384E71B81CULL, 0x7EF363E2FFA31F71ULL, + 0xE3FB73C16B8E75B9ULL, 0xC9B53DCF4BA80A29ULL, 0x23F10B0E16E79763ULL, + 0xC52172E413042E9BULL, 0xBE60E69CC928B2B9ULL, 0x80CD86A1B9E587E8ULL, + 0x315D75E198C641A4ULL, 0xCDF93ACC44328387ULL, 0x15987D9ADC0A486DULL, + 0x7310F7121FD5A074ULL, 0x278273C7DE31EFDCULL, 0x1602E714415D9330ULL, + 0x81286130BC8985DBULL, 0xB3BF8A3170918836ULL, 0x6A00E0A0B9C49708ULL, + 0xC6BA0B2C8BBC27BEULL, 0xC9F98D11ED34DBF6ULL, 0x7AD5B7D0B6C12207ULL, + 0xD91E8FEF55B7394BULL, 0x9037C9EDEFDA4DF8ULL, 0x6D3F8152AD6AC212ULL, + 0x1DE6B85A1274A0A6ULL, 0xEB3D688A309C180EULL, 0xAF9A3C407BA1DF15ULL, + 0xE6FA141DF95A56DBULL, 0xB54B1597B61D0A75ULL, 0xA20D64E5683B9FD1ULL, + 0xD660FAA79559C51FULL, 0xAD107E1E9123A9D0ULL +}; + +static const BN_ULONG dh2048_224_g[] = { + 0x84B890D3191F2BFAULL, 0x81BC087F2A7065B3ULL, 0x19C418E1F6EC0179ULL, + 0x7B5A0F1C71CFFF4CULL, 0xEDFE72FE9B6AA4BDULL, 0x81E1BCFE94B30269ULL, + 0x566AFBB48D6C0191ULL, 0xB539CCE3409D13CDULL, 0x6AA21E7F5F2FF381ULL, + 0xD9E263E4770589EFULL, 0x10E183EDD19963DDULL, 0xB70A8137150B8EEBULL, + 0x051AE3D428C8F8ACULL, 0xBB77A86F0C1AB15BULL, 0x6E3025E316A330EFULL, + 0x19529A45D6F83456ULL, 0xF180EB34118E98D1ULL, 0xB5F6C6B250717CBEULL, + 0x09939D54DA7460CDULL, 0xE247150422EA1ED4ULL, 0xB8A762D0521BC98AULL, + 0xF4D027275AC1348BULL, 0xC17669101999024AULL, 0xBE5E9001A8D66AD7ULL, + 0xC57DB17C620A8652ULL, 0xAB739D7700C29F52ULL, 0xDD921F01A70C4AFAULL, + 0xA6824A4E10B9A6F0ULL, 0x74866A08CFE4FFE3ULL, 0x6CDEBE7B89998CAFULL, + 0x9DF30B5C8FFDAC50ULL, 0xAC4032EF4F2D9AE3ULL +}; + +static const BN_ULONG dh2048_224_q[] = { + 0xBF389A99B36371EBULL, 0x1F80535A4738CEBCULL, 0xC58D93FE99717710ULL, + 0x00000000801C0D34ULL +}; + +static const BN_ULONG dh2048_256_p[] = { + 0xDB094AE91E1A1597ULL, 0x693877FAD7EF09CAULL, 0x6116D2276E11715FULL, + 0xA4B54330C198AF12ULL, 0x75F26375D7014103ULL, 0xC3A3960A54E710C3ULL, + 0xDED4010ABD0BE621ULL, 0xC0B857F689962856ULL, 0xB3CA3F7971506026ULL, + 0x1CCACB83E6B486F6ULL, 0x67E144E514056425ULL, 0xF6A167B5A41825D9ULL, + 0x3AD8347796524D8EULL, 0xF13C6D9A51BFA4ABULL, 0x2D52526735488A0EULL, + 0xB63ACAE1CAA6B790ULL, 0x4FDB70C581B23F76ULL, 0xBC39A0BF12307F5CULL, + 0xB941F54EB1E59BB8ULL, 0x6C5BFC11D45F9088ULL, 0x22E0B1EF4275BF7BULL, + 0x91F9E6725B4758C0ULL, 0x5A8A9D306BCF67EDULL, 0x209E0C6497517ABDULL, + 0x3BF4296D830E9A7CULL, 0x16C3D91134096FAAULL, 0xFAF7DF4561B2AA30ULL, + 0xE00DF8F1D61957D4ULL, 0x5D2CEED4435E3B00ULL, 0x8CEEF608660DD0F2ULL, + 0xFFBBD19C65195999ULL, 0x87A8E61DB4B6663CULL +}; + +static const BN_ULONG dh2048_256_g[] = { + 0x664B4C0F6CC41659ULL, 0x5E2327CFEF98C582ULL, 0xD647D148D4795451ULL, + 0x2F63078490F00EF8ULL, 0x184B523D1DB246C3ULL, 0xC7891428CDC67EB6ULL, + 0x7FD028370DF92B52ULL, 0xB3353BBB64E0EC37ULL, 0xECD06E1557CD0915ULL, + 0xB7D2BBD2DF016199ULL, 0xC8484B1E052588B9ULL, 0xDB2A3B7313D3FE14ULL, + 0xD052B985D182EA0AULL, 0xA4BD1BFFE83B9C80ULL, 0xDFC967C1FB3F2E55ULL, + 0xB5045AF2767164E1ULL, 0x1D14348F6F2F9193ULL, 0x64E67982428EBC83ULL, + 0x8AC376D282D6ED38ULL, 0x777DE62AAAB8A862ULL, 0xDDF463E5E9EC144BULL, + 0x0196F931C77A57F2ULL, 0xA55AE31341000A65ULL, 0x901228F8C28CBB18ULL, + 0xBC3773BF7E8C6F62ULL, 0xBE3A6C1B0C6B47B1ULL, 0xFF4FED4AAC0BB555ULL, + 0x10DBC15077BE463FULL, 0x07F4793A1A0BA125ULL, 0x4CA7B18F21EF2054ULL, + 0x2E77506660EDBD48ULL, 0x3FB32C9B73134D0BULL +}; + +static const BN_ULONG dh2048_256_q[] = { + 0xA308B0FE64F5FBD3ULL, 0x99B1A47D1EB3750BULL, 0xB447997640129DA2ULL, + 0x8CF83642A709A097ULL +}; + +#elif BN_BITS2 == 32 + +static const BN_ULONG dh1024_160_p[] = { + 0x2E4A4371, 0xDF1FB2BC, 0x6D4DA708, 0xE68CFDA7, 0x365C1A65, 0x45BF37DF, + 0x0DC8B4BD, 0xA151AF5F, 0xF55BCCC0, 0xFAA31A4F, 0xE5644738, 0x4EFFD6FA, + 0x219A7372, 0x98488E9C, 0x90C4BD70, 0xACCBDD7D, 0xD49B83BF, 0x24975C3C, + 0xA9061123, 0x13ECB4AE, 0x2EE652C0, 0x9838EF1E, 0x75A23D18, 0x6073E286, + 0x52D23B61, 0x9A6A9DCA, 0xFB06A3C6, 0x52C99FBC, 0xAE5D54EC, 0xDE92DE5E, + 0xA080E01D, 0xB10B8F96 +}; + +static const BN_ULONG dh1024_160_g[] = { + 0x22B3B2E5, 0x855E6EEB, 0xF97C2A24, 0x858F4DCE, 0x18D08BC8, 0x2D779D59, + 0x8E73AFA3, 0xD662A4D1, 0x69B6A28A, 0x1DBF0A01, 0x7A091F53, 0xA6A24C08, + 0x63F80A76, 0x909D0D22, 0xB9A92EE1, 0xD7FBD7D3, 0x9E2749F4, 0x5E91547F, + 0xB01B886A, 0x160217B4, 0x5504F213, 0x777E690F, 0x5C41564B, 0x266FEA1E, + 0x14266D31, 0xD6406CFF, 0x58AC507F, 0xF8104DD2, 0xEFB99905, 0x6765A442, + 0xC3FD3412, 0xA4D1CBD5 +}; + +static const BN_ULONG dh1024_160_q[] = { + 0x49462353, 0x64B7CB9D, 0x8ABA4E7D, 0x81A8DF27, 0xF518AA87 +}; + +static const BN_ULONG dh2048_224_p[] = { + 0x0C10E64F, 0x0AC4DFFE, 0x4E71B81C, 0xCF9DE538, 0xFFA31F71, 0x7EF363E2, + 0x6B8E75B9, 0xE3FB73C1, 0x4BA80A29, 0xC9B53DCF, 0x16E79763, 0x23F10B0E, + 0x13042E9B, 0xC52172E4, 0xC928B2B9, 0xBE60E69C, 0xB9E587E8, 0x80CD86A1, + 0x98C641A4, 0x315D75E1, 0x44328387, 0xCDF93ACC, 0xDC0A486D, 0x15987D9A, + 0x1FD5A074, 0x7310F712, 0xDE31EFDC, 0x278273C7, 0x415D9330, 0x1602E714, + 0xBC8985DB, 0x81286130, 0x70918836, 0xB3BF8A31, 0xB9C49708, 0x6A00E0A0, + 0x8BBC27BE, 0xC6BA0B2C, 0xED34DBF6, 0xC9F98D11, 0xB6C12207, 0x7AD5B7D0, + 0x55B7394B, 0xD91E8FEF, 0xEFDA4DF8, 0x9037C9ED, 0xAD6AC212, 0x6D3F8152, + 0x1274A0A6, 0x1DE6B85A, 0x309C180E, 0xEB3D688A, 0x7BA1DF15, 0xAF9A3C40, + 0xF95A56DB, 0xE6FA141D, 0xB61D0A75, 0xB54B1597, 0x683B9FD1, 0xA20D64E5, + 0x9559C51F, 0xD660FAA7, 0x9123A9D0, 0xAD107E1E +}; + +static const BN_ULONG dh2048_224_g[] = { + 0x191F2BFA, 0x84B890D3, 0x2A7065B3, 0x81BC087F, 0xF6EC0179, 0x19C418E1, + 0x71CFFF4C, 0x7B5A0F1C, 0x9B6AA4BD, 0xEDFE72FE, 0x94B30269, 0x81E1BCFE, + 0x8D6C0191, 0x566AFBB4, 0x409D13CD, 0xB539CCE3, 0x5F2FF381, 0x6AA21E7F, + 0x770589EF, 0xD9E263E4, 0xD19963DD, 0x10E183ED, 0x150B8EEB, 0xB70A8137, + 0x28C8F8AC, 0x051AE3D4, 0x0C1AB15B, 0xBB77A86F, 0x16A330EF, 0x6E3025E3, + 0xD6F83456, 0x19529A45, 0x118E98D1, 0xF180EB34, 0x50717CBE, 0xB5F6C6B2, + 0xDA7460CD, 0x09939D54, 0x22EA1ED4, 0xE2471504, 0x521BC98A, 0xB8A762D0, + 0x5AC1348B, 0xF4D02727, 0x1999024A, 0xC1766910, 0xA8D66AD7, 0xBE5E9001, + 0x620A8652, 0xC57DB17C, 0x00C29F52, 0xAB739D77, 0xA70C4AFA, 0xDD921F01, + 0x10B9A6F0, 0xA6824A4E, 0xCFE4FFE3, 0x74866A08, 0x89998CAF, 0x6CDEBE7B, + 0x8FFDAC50, 0x9DF30B5C, 0x4F2D9AE3, 0xAC4032EF +}; + +static const BN_ULONG dh2048_224_q[] = { + 0xB36371EB, 0xBF389A99, 0x4738CEBC, 0x1F80535A, 0x99717710, 0xC58D93FE, + 0x801C0D34 +}; + +static const BN_ULONG dh2048_256_p[] = { + 0x1E1A1597, 0xDB094AE9, 0xD7EF09CA, 0x693877FA, 0x6E11715F, 0x6116D227, + 0xC198AF12, 0xA4B54330, 0xD7014103, 0x75F26375, 0x54E710C3, 0xC3A3960A, + 0xBD0BE621, 0xDED4010A, 0x89962856, 0xC0B857F6, 0x71506026, 0xB3CA3F79, + 0xE6B486F6, 0x1CCACB83, 0x14056425, 0x67E144E5, 0xA41825D9, 0xF6A167B5, + 0x96524D8E, 0x3AD83477, 0x51BFA4AB, 0xF13C6D9A, 0x35488A0E, 0x2D525267, + 0xCAA6B790, 0xB63ACAE1, 0x81B23F76, 0x4FDB70C5, 0x12307F5C, 0xBC39A0BF, + 0xB1E59BB8, 0xB941F54E, 0xD45F9088, 0x6C5BFC11, 0x4275BF7B, 0x22E0B1EF, + 0x5B4758C0, 0x91F9E672, 0x6BCF67ED, 0x5A8A9D30, 0x97517ABD, 0x209E0C64, + 0x830E9A7C, 0x3BF4296D, 0x34096FAA, 0x16C3D911, 0x61B2AA30, 0xFAF7DF45, + 0xD61957D4, 0xE00DF8F1, 0x435E3B00, 0x5D2CEED4, 0x660DD0F2, 0x8CEEF608, + 0x65195999, 0xFFBBD19C, 0xB4B6663C, 0x87A8E61D +}; + +static const BN_ULONG dh2048_256_g[] = { + 0x6CC41659, 0x664B4C0F, 0xEF98C582, 0x5E2327CF, 0xD4795451, 0xD647D148, + 0x90F00EF8, 0x2F630784, 0x1DB246C3, 0x184B523D, 0xCDC67EB6, 0xC7891428, + 0x0DF92B52, 0x7FD02837, 0x64E0EC37, 0xB3353BBB, 0x57CD0915, 0xECD06E15, + 0xDF016199, 0xB7D2BBD2, 0x052588B9, 0xC8484B1E, 0x13D3FE14, 0xDB2A3B73, + 0xD182EA0A, 0xD052B985, 0xE83B9C80, 0xA4BD1BFF, 0xFB3F2E55, 0xDFC967C1, + 0x767164E1, 0xB5045AF2, 0x6F2F9193, 0x1D14348F, 0x428EBC83, 0x64E67982, + 0x82D6ED38, 0x8AC376D2, 0xAAB8A862, 0x777DE62A, 0xE9EC144B, 0xDDF463E5, + 0xC77A57F2, 0x0196F931, 0x41000A65, 0xA55AE313, 0xC28CBB18, 0x901228F8, + 0x7E8C6F62, 0xBC3773BF, 0x0C6B47B1, 0xBE3A6C1B, 0xAC0BB555, 0xFF4FED4A, + 0x77BE463F, 0x10DBC150, 0x1A0BA125, 0x07F4793A, 0x21EF2054, 0x4CA7B18F, + 0x60EDBD48, 0x2E775066, 0x73134D0B, 0x3FB32C9B +}; + +static const BN_ULONG dh2048_256_q[] = { + 0x64F5FBD3, 0xA308B0FE, 0x1EB3750B, 0x99B1A47D, 0x40129DA2, 0xB4479976, + 0xA709A097, 0x8CF83642 +}; + +#else +# error "unsupported BN_BITS2" +#endif + +/* Macro to make a BIGNUM from static data */ + +#define make_dh_bn(x) static const BIGNUM _bignum_##x = { (BN_ULONG *) x, \ + sizeof(x)/sizeof(BN_ULONG),\ + sizeof(x)/sizeof(BN_ULONG),\ + 0, BN_FLG_STATIC_DATA } + +/* + * Macro to make a DH structure from BIGNUM data. NB: although just copying + * the BIGNUM static pointers would be more efficient we can't as they get + * wiped using BN_clear_free() when DH_free() is called. + */ + +#define make_dh(x) \ +DH * DH_get_##x(void) \ + { \ + DH *dh; \ + make_dh_bn(dh##x##_p); \ + make_dh_bn(dh##x##_q); \ + make_dh_bn(dh##x##_g); \ + dh = DH_new(); \ + if (!dh) \ + return NULL; \ + dh->p = BN_dup(&_bignum_dh##x##_p); \ + dh->g = BN_dup(&_bignum_dh##x##_g); \ + dh->q = BN_dup(&_bignum_dh##x##_q); \ + if (!dh->p || !dh->q || !dh->g) \ + { \ + DH_free(dh); \ + return NULL; \ + } \ + return dh; \ + } + +make_dh(1024_160) +make_dh(2048_224) +make_dh(2048_256) diff --git a/drivers/builtin_openssl2/crypto/dh/dhtest.c b/drivers/builtin_openssl2/crypto/dh/dhtest.c deleted file mode 100644 index 5a4ee9a35d..0000000000 --- a/drivers/builtin_openssl2/crypto/dh/dhtest.c +++ /dev/null @@ -1,241 +0,0 @@ -/* crypto/dh/dhtest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -/* - * Until the key-gen callbacks are modified to use newer prototypes, we allow - * deprecated functions for openssl-internal code - */ -#ifdef OPENSSL_NO_DEPRECATED -# undef OPENSSL_NO_DEPRECATED -#endif - -#include -#include -#include - -#include "../e_os.h" - -#include -#include -#include -#include -#include - -#ifdef OPENSSL_NO_DH -int main(int argc, char *argv[]) -{ - printf("No DH support\n"); - return (0); -} -#else -# include - -# ifdef OPENSSL_SYS_WIN16 -# define MS_CALLBACK _far _loadds -# else -# define MS_CALLBACK -# endif - -static int MS_CALLBACK cb(int p, int n, BN_GENCB *arg); - -static const char rnd_seed[] = - "string to make the random number generator think it has entropy"; - -int main(int argc, char *argv[]) -{ - BN_GENCB _cb; - DH *a; - DH *b = NULL; - char buf[12]; - unsigned char *abuf = NULL, *bbuf = NULL; - int i, alen, blen, aout, bout, ret = 1; - BIO *out; - - CRYPTO_malloc_debug_init(); - CRYPTO_dbg_set_options(V_CRYPTO_MDEBUG_ALL); - CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON); - -# ifdef OPENSSL_SYS_WIN32 - CRYPTO_malloc_init(); -# endif - - RAND_seed(rnd_seed, sizeof rnd_seed); - - out = BIO_new(BIO_s_file()); - if (out == NULL) - EXIT(1); - BIO_set_fp(out, stdout, BIO_NOCLOSE); - - BN_GENCB_set(&_cb, &cb, out); - if (((a = DH_new()) == NULL) || !DH_generate_parameters_ex(a, 64, - DH_GENERATOR_5, - &_cb)) - goto err; - - if (!DH_check(a, &i)) - goto err; - if (i & DH_CHECK_P_NOT_PRIME) - BIO_puts(out, "p value is not prime\n"); - if (i & DH_CHECK_P_NOT_SAFE_PRIME) - BIO_puts(out, "p value is not a safe prime\n"); - if (i & DH_UNABLE_TO_CHECK_GENERATOR) - BIO_puts(out, "unable to check the generator value\n"); - if (i & DH_NOT_SUITABLE_GENERATOR) - BIO_puts(out, "the g value is not a generator\n"); - - BIO_puts(out, "\np ="); - BN_print(out, a->p); - BIO_puts(out, "\ng ="); - BN_print(out, a->g); - BIO_puts(out, "\n"); - - b = DH_new(); - if (b == NULL) - goto err; - - b->p = BN_dup(a->p); - b->g = BN_dup(a->g); - if ((b->p == NULL) || (b->g == NULL)) - goto err; - - /* Set a to run with normal modexp and b to use constant time */ - a->flags &= ~DH_FLAG_NO_EXP_CONSTTIME; - b->flags |= DH_FLAG_NO_EXP_CONSTTIME; - - if (!DH_generate_key(a)) - goto err; - BIO_puts(out, "pri 1="); - BN_print(out, a->priv_key); - BIO_puts(out, "\npub 1="); - BN_print(out, a->pub_key); - BIO_puts(out, "\n"); - - if (!DH_generate_key(b)) - goto err; - BIO_puts(out, "pri 2="); - BN_print(out, b->priv_key); - BIO_puts(out, "\npub 2="); - BN_print(out, b->pub_key); - BIO_puts(out, "\n"); - - alen = DH_size(a); - abuf = (unsigned char *)OPENSSL_malloc(alen); - aout = DH_compute_key(abuf, b->pub_key, a); - - BIO_puts(out, "key1 ="); - for (i = 0; i < aout; i++) { - sprintf(buf, "%02X", abuf[i]); - BIO_puts(out, buf); - } - BIO_puts(out, "\n"); - - blen = DH_size(b); - bbuf = (unsigned char *)OPENSSL_malloc(blen); - bout = DH_compute_key(bbuf, a->pub_key, b); - - BIO_puts(out, "key2 ="); - for (i = 0; i < bout; i++) { - sprintf(buf, "%02X", bbuf[i]); - BIO_puts(out, buf); - } - BIO_puts(out, "\n"); - if ((aout < 4) || (bout != aout) || (memcmp(abuf, bbuf, aout) != 0)) { - fprintf(stderr, "Error in DH routines\n"); - ret = 1; - } else - ret = 0; - err: - ERR_print_errors_fp(stderr); - - if (abuf != NULL) - OPENSSL_free(abuf); - if (bbuf != NULL) - OPENSSL_free(bbuf); - if (b != NULL) - DH_free(b); - if (a != NULL) - DH_free(a); - BIO_free(out); -# ifdef OPENSSL_SYS_NETWARE - if (ret) - printf("ERROR: %d\n", ret); -# endif - EXIT(ret); - return (ret); -} - -static int MS_CALLBACK cb(int p, int n, BN_GENCB *arg) -{ - char c = '*'; - - if (p == 0) - c = '.'; - if (p == 1) - c = '+'; - if (p == 2) - c = '*'; - if (p == 3) - c = '\n'; - BIO_write(arg->arg, &c, 1); - (void)BIO_flush(arg->arg); -# ifdef LINT - p = n; -# endif - return 1; -} -#endif diff --git a/drivers/builtin_openssl2/crypto/dsa/dsa_ameth.c b/drivers/builtin_openssl2/crypto/dsa/dsa_ameth.c index f5443e304b..cc83d6e6ad 100644 --- a/drivers/builtin_openssl2/crypto/dsa/dsa_ameth.c +++ b/drivers/builtin_openssl2/crypto/dsa/dsa_ameth.c @@ -269,7 +269,7 @@ static int dsa_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8) goto done; decerr: - DSAerr(DSA_F_DSA_PRIV_DECODE, EVP_R_DECODE_ERROR); + DSAerr(DSA_F_DSA_PRIV_DECODE, DSA_R_DECODE_ERROR); dsaerr: DSA_free(dsa); done: @@ -602,10 +602,14 @@ static int dsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) X509_ALGOR_set0(alg2, OBJ_nid2obj(snid), V_ASN1_UNDEF, 0); } return 1; + + case ASN1_PKEY_CTRL_CMS_RI_TYPE: + *(int *)arg2 = CMS_RECIPINFO_NONE; + return 1; #endif case ASN1_PKEY_CTRL_DEFAULT_MD_NID: - *(int *)arg2 = NID_sha1; + *(int *)arg2 = NID_sha256; return 2; default: diff --git a/drivers/builtin_openssl2/crypto/dsa/dsa_err.c b/drivers/builtin_openssl2/crypto/dsa/dsa_err.c index 746f5dfe6d..f5ddc66b8a 100644 --- a/drivers/builtin_openssl2/crypto/dsa/dsa_err.c +++ b/drivers/builtin_openssl2/crypto/dsa/dsa_err.c @@ -1,6 +1,6 @@ /* crypto/dsa/dsa_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2013 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -74,6 +74,7 @@ static ERR_STRING_DATA DSA_str_functs[] = { {ERR_FUNC(DSA_F_DO_DSA_PRINT), "DO_DSA_PRINT"}, {ERR_FUNC(DSA_F_DSAPARAMS_PRINT), "DSAparams_print"}, {ERR_FUNC(DSA_F_DSAPARAMS_PRINT_FP), "DSAparams_print_fp"}, + {ERR_FUNC(DSA_F_DSA_BUILTIN_PARAMGEN2), "DSA_BUILTIN_PARAMGEN2"}, {ERR_FUNC(DSA_F_DSA_DO_SIGN), "DSA_do_sign"}, {ERR_FUNC(DSA_F_DSA_DO_VERIFY), "DSA_do_verify"}, {ERR_FUNC(DSA_F_DSA_GENERATE_KEY), "DSA_generate_key"}, @@ -107,12 +108,14 @@ static ERR_STRING_DATA DSA_str_reasons[] = { "data too large for key size"}, {ERR_REASON(DSA_R_DECODE_ERROR), "decode error"}, {ERR_REASON(DSA_R_INVALID_DIGEST_TYPE), "invalid digest type"}, + {ERR_REASON(DSA_R_INVALID_PARAMETERS), "invalid parameters"}, {ERR_REASON(DSA_R_MISSING_PARAMETERS), "missing parameters"}, {ERR_REASON(DSA_R_MODULUS_TOO_LARGE), "modulus too large"}, {ERR_REASON(DSA_R_NEED_NEW_SETUP_VALUES), "need new setup values"}, {ERR_REASON(DSA_R_NON_FIPS_DSA_METHOD), "non fips dsa method"}, {ERR_REASON(DSA_R_NO_PARAMETERS_SET), "no parameters set"}, {ERR_REASON(DSA_R_PARAMETER_ENCODING_ERROR), "parameter encoding error"}, + {ERR_REASON(DSA_R_Q_NOT_PRIME), "q not prime"}, {0, NULL} }; diff --git a/drivers/builtin_openssl2/crypto/dsa/dsa_gen.c b/drivers/builtin_openssl2/crypto/dsa/dsa_gen.c index 34c6113c45..15f3bb4f3f 100644 --- a/drivers/builtin_openssl2/crypto/dsa/dsa_gen.c +++ b/drivers/builtin_openssl2/crypto/dsa/dsa_gen.c @@ -86,6 +86,8 @@ # include "dsa_locl.h" # ifdef OPENSSL_FIPS +/* Workaround bug in prototype */ +# define fips_dsa_builtin_paramgen2 fips_dsa_paramgen_bad # include # endif @@ -376,4 +378,371 @@ int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, BN_MONT_CTX_free(mont); return ok; } + +# ifdef OPENSSL_FIPS +# undef fips_dsa_builtin_paramgen2 +extern int fips_dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N, + const EVP_MD *evpmd, + const unsigned char *seed_in, + size_t seed_len, int idx, + unsigned char *seed_out, + int *counter_ret, unsigned long *h_ret, + BN_GENCB *cb); +# endif + +/* + * This is a parameter generation algorithm for the DSA2 algorithm as + * described in FIPS 186-3. + */ + +int dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N, + const EVP_MD *evpmd, const unsigned char *seed_in, + size_t seed_len, int idx, unsigned char *seed_out, + int *counter_ret, unsigned long *h_ret, + BN_GENCB *cb) +{ + int ok = -1; + unsigned char *seed = NULL, *seed_tmp = NULL; + unsigned char md[EVP_MAX_MD_SIZE]; + int mdsize; + BIGNUM *r0, *W, *X, *c, *test; + BIGNUM *g = NULL, *q = NULL, *p = NULL; + BN_MONT_CTX *mont = NULL; + int i, k, n = 0, m = 0, qsize = N >> 3; + int counter = 0; + int r = 0; + BN_CTX *ctx = NULL; + EVP_MD_CTX mctx; + unsigned int h = 2; + +# ifdef OPENSSL_FIPS + + if (FIPS_mode()) + return fips_dsa_builtin_paramgen2(ret, L, N, evpmd, + seed_in, seed_len, idx, + seed_out, counter_ret, h_ret, cb); +# endif + + EVP_MD_CTX_init(&mctx); + + if (evpmd == NULL) { + if (N == 160) + evpmd = EVP_sha1(); + else if (N == 224) + evpmd = EVP_sha224(); + else + evpmd = EVP_sha256(); + } + + mdsize = EVP_MD_size(evpmd); + /* If unverificable g generation only don't need seed */ + if (!ret->p || !ret->q || idx >= 0) { + if (seed_len == 0) + seed_len = mdsize; + + seed = OPENSSL_malloc(seed_len); + + if (seed_out) + seed_tmp = seed_out; + else + seed_tmp = OPENSSL_malloc(seed_len); + + if (!seed || !seed_tmp) + goto err; + + if (seed_in) + memcpy(seed, seed_in, seed_len); + + } + + if ((ctx = BN_CTX_new()) == NULL) + goto err; + + if ((mont = BN_MONT_CTX_new()) == NULL) + goto err; + + BN_CTX_start(ctx); + r0 = BN_CTX_get(ctx); + g = BN_CTX_get(ctx); + W = BN_CTX_get(ctx); + X = BN_CTX_get(ctx); + c = BN_CTX_get(ctx); + test = BN_CTX_get(ctx); + + /* if p, q already supplied generate g only */ + if (ret->p && ret->q) { + p = ret->p; + q = ret->q; + if (idx >= 0) + memcpy(seed_tmp, seed, seed_len); + goto g_only; + } else { + p = BN_CTX_get(ctx); + q = BN_CTX_get(ctx); + } + + if (!BN_lshift(test, BN_value_one(), L - 1)) + goto err; + for (;;) { + for (;;) { /* find q */ + unsigned char *pmd; + /* step 1 */ + if (!BN_GENCB_call(cb, 0, m++)) + goto err; + + if (!seed_in) { + if (RAND_pseudo_bytes(seed, seed_len) < 0) + goto err; + } + /* step 2 */ + if (!EVP_Digest(seed, seed_len, md, NULL, evpmd, NULL)) + goto err; + /* Take least significant bits of md */ + if (mdsize > qsize) + pmd = md + mdsize - qsize; + else + pmd = md; + + if (mdsize < qsize) + memset(md + mdsize, 0, qsize - mdsize); + + /* step 3 */ + pmd[0] |= 0x80; + pmd[qsize - 1] |= 0x01; + if (!BN_bin2bn(pmd, qsize, q)) + goto err; + + /* step 4 */ + r = BN_is_prime_fasttest_ex(q, DSS_prime_checks, ctx, + seed_in ? 1 : 0, cb); + if (r > 0) + break; + if (r != 0) + goto err; + /* Provided seed didn't produce a prime: error */ + if (seed_in) { + ok = 0; + DSAerr(DSA_F_DSA_BUILTIN_PARAMGEN2, DSA_R_Q_NOT_PRIME); + goto err; + } + + /* do a callback call */ + /* step 5 */ + } + /* Copy seed to seed_out before we mess with it */ + if (seed_out) + memcpy(seed_out, seed, seed_len); + + if (!BN_GENCB_call(cb, 2, 0)) + goto err; + if (!BN_GENCB_call(cb, 3, 0)) + goto err; + + /* step 6 */ + counter = 0; + /* "offset = 1" */ + + n = (L - 1) / (mdsize << 3); + + for (;;) { + if ((counter != 0) && !BN_GENCB_call(cb, 0, counter)) + goto err; + + /* step 7 */ + BN_zero(W); + /* now 'buf' contains "SEED + offset - 1" */ + for (k = 0; k <= n; k++) { + /* + * obtain "SEED + offset + k" by incrementing: + */ + for (i = seed_len - 1; i >= 0; i--) { + seed[i]++; + if (seed[i] != 0) + break; + } + + if (!EVP_Digest(seed, seed_len, md, NULL, evpmd, NULL)) + goto err; + + /* step 8 */ + if (!BN_bin2bn(md, mdsize, r0)) + goto err; + if (!BN_lshift(r0, r0, (mdsize << 3) * k)) + goto err; + if (!BN_add(W, W, r0)) + goto err; + } + + /* more of step 8 */ + if (!BN_mask_bits(W, L - 1)) + goto err; + if (!BN_copy(X, W)) + goto err; + if (!BN_add(X, X, test)) + goto err; + + /* step 9 */ + if (!BN_lshift1(r0, q)) + goto err; + if (!BN_mod(c, X, r0, ctx)) + goto err; + if (!BN_sub(r0, c, BN_value_one())) + goto err; + if (!BN_sub(p, X, r0)) + goto err; + + /* step 10 */ + if (BN_cmp(p, test) >= 0) { + /* step 11 */ + r = BN_is_prime_fasttest_ex(p, DSS_prime_checks, ctx, 1, cb); + if (r > 0) + goto end; /* found it */ + if (r != 0) + goto err; + } + + /* step 13 */ + counter++; + /* "offset = offset + n + 1" */ + + /* step 14 */ + if (counter >= (int)(4 * L)) + break; + } + if (seed_in) { + ok = 0; + DSAerr(DSA_F_DSA_BUILTIN_PARAMGEN2, DSA_R_INVALID_PARAMETERS); + goto err; + } + } + end: + if (!BN_GENCB_call(cb, 2, 1)) + goto err; + + g_only: + + /* We now need to generate g */ + /* Set r0=(p-1)/q */ + if (!BN_sub(test, p, BN_value_one())) + goto err; + if (!BN_div(r0, NULL, test, q, ctx)) + goto err; + + if (idx < 0) { + if (!BN_set_word(test, h)) + goto err; + } else + h = 1; + if (!BN_MONT_CTX_set(mont, p, ctx)) + goto err; + + for (;;) { + static const unsigned char ggen[4] = { 0x67, 0x67, 0x65, 0x6e }; + if (idx >= 0) { + md[0] = idx & 0xff; + md[1] = (h >> 8) & 0xff; + md[2] = h & 0xff; + if (!EVP_DigestInit_ex(&mctx, evpmd, NULL)) + goto err; + if (!EVP_DigestUpdate(&mctx, seed_tmp, seed_len)) + goto err; + if (!EVP_DigestUpdate(&mctx, ggen, sizeof(ggen))) + goto err; + if (!EVP_DigestUpdate(&mctx, md, 3)) + goto err; + if (!EVP_DigestFinal_ex(&mctx, md, NULL)) + goto err; + if (!BN_bin2bn(md, mdsize, test)) + goto err; + } + /* g=test^r0%p */ + if (!BN_mod_exp_mont(g, test, r0, p, ctx, mont)) + goto err; + if (!BN_is_one(g)) + break; + if (idx < 0 && !BN_add(test, test, BN_value_one())) + goto err; + h++; + if (idx >= 0 && h > 0xffff) + goto err; + } + + if (!BN_GENCB_call(cb, 3, 1)) + goto err; + + ok = 1; + err: + if (ok == 1) { + if (p != ret->p) { + if (ret->p) + BN_free(ret->p); + ret->p = BN_dup(p); + } + if (q != ret->q) { + if (ret->q) + BN_free(ret->q); + ret->q = BN_dup(q); + } + if (ret->g) + BN_free(ret->g); + ret->g = BN_dup(g); + if (ret->p == NULL || ret->q == NULL || ret->g == NULL) { + ok = -1; + goto err; + } + if (counter_ret != NULL) + *counter_ret = counter; + if (h_ret != NULL) + *h_ret = h; + } + if (seed) + OPENSSL_free(seed); + if (seed_out != seed_tmp) + OPENSSL_free(seed_tmp); + if (ctx) { + BN_CTX_end(ctx); + BN_CTX_free(ctx); + } + if (mont != NULL) + BN_MONT_CTX_free(mont); + EVP_MD_CTX_cleanup(&mctx); + return ok; +} + +int dsa_paramgen_check_g(DSA *dsa) +{ + BN_CTX *ctx; + BIGNUM *tmp; + BN_MONT_CTX *mont = NULL; + int rv = -1; + ctx = BN_CTX_new(); + if (!ctx) + return -1; + BN_CTX_start(ctx); + if (BN_cmp(dsa->g, BN_value_one()) <= 0) + return 0; + if (BN_cmp(dsa->g, dsa->p) >= 0) + return 0; + tmp = BN_CTX_get(ctx); + if (!tmp) + goto err; + if ((mont = BN_MONT_CTX_new()) == NULL) + goto err; + if (!BN_MONT_CTX_set(mont, dsa->p, ctx)) + goto err; + /* Work out g^q mod p */ + if (!BN_mod_exp_mont(tmp, dsa->g, dsa->q, dsa->p, ctx, mont)) + goto err; + if (!BN_cmp(tmp, BN_value_one())) + rv = 1; + else + rv = 0; + err: + BN_CTX_end(ctx); + if (mont) + BN_MONT_CTX_free(mont); + BN_CTX_free(ctx); + return rv; + +} #endif diff --git a/drivers/builtin_openssl2/crypto/dsa/dsa_locl.h b/drivers/builtin_openssl2/crypto/dsa/dsa_locl.h index f32ee964d0..9c23c3ef90 100644 --- a/drivers/builtin_openssl2/crypto/dsa/dsa_locl.h +++ b/drivers/builtin_openssl2/crypto/dsa/dsa_locl.h @@ -59,3 +59,11 @@ int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, size_t seed_len, unsigned char *seed_out, int *counter_ret, unsigned long *h_ret, BN_GENCB *cb); + +int dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N, + const EVP_MD *evpmd, const unsigned char *seed_in, + size_t seed_len, int idx, unsigned char *seed_out, + int *counter_ret, unsigned long *h_ret, + BN_GENCB *cb); + +int dsa_paramgen_check_g(DSA *dsa); diff --git a/drivers/builtin_openssl2/crypto/dsa/dsa_ossl.c b/drivers/builtin_openssl2/crypto/dsa/dsa_ossl.c index 9a3772e00d..efc4f1b6ae 100644 --- a/drivers/builtin_openssl2/crypto/dsa/dsa_ossl.c +++ b/drivers/builtin_openssl2/crypto/dsa/dsa_ossl.c @@ -398,11 +398,7 @@ static int dsa_do_verify(const unsigned char *dgst, int dgst_len, ret = (BN_ucmp(&u1, sig->r) == 0); err: - /* - * XXX: surely this is wrong - if ret is 0, it just didn't verify; there - * is no error in BN. Test should be ret == -1 (Ben) - */ - if (ret != 1) + if (ret < 0) DSAerr(DSA_F_DSA_DO_VERIFY, ERR_R_BN_LIB); if (ctx != NULL) BN_CTX_free(ctx); diff --git a/drivers/builtin_openssl2/crypto/dsa/dsa_pmeth.c b/drivers/builtin_openssl2/crypto/dsa/dsa_pmeth.c index 0d480f6a70..42b8bb0862 100644 --- a/drivers/builtin_openssl2/crypto/dsa/dsa_pmeth.c +++ b/drivers/builtin_openssl2/crypto/dsa/dsa_pmeth.c @@ -197,6 +197,10 @@ static int pkey_dsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) dctx->md = p2; return 1; + case EVP_PKEY_CTRL_GET_MD: + *(const EVP_MD **)p2 = dctx->md; + return 1; + case EVP_PKEY_CTRL_DIGESTINIT: case EVP_PKEY_CTRL_PKCS7_SIGN: case EVP_PKEY_CTRL_CMS_SIGN: diff --git a/drivers/builtin_openssl2/crypto/dsa/dsatest.c b/drivers/builtin_openssl2/crypto/dsa/dsatest.c deleted file mode 100644 index 8a224a8876..0000000000 --- a/drivers/builtin_openssl2/crypto/dsa/dsatest.c +++ /dev/null @@ -1,268 +0,0 @@ -/* crypto/dsa/dsatest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -/* - * Until the key-gen callbacks are modified to use newer prototypes, we allow - * deprecated functions for openssl-internal code - */ -#ifdef OPENSSL_NO_DEPRECATED -# undef OPENSSL_NO_DEPRECATED -#endif - -#include -#include -#include -#include -#include - -#include "../e_os.h" - -#include -#include -#include -#include -#include - -#ifdef OPENSSL_NO_DSA -int main(int argc, char *argv[]) -{ - printf("No DSA support\n"); - return (0); -} -#else -# include - -# ifdef OPENSSL_SYS_WIN16 -# define MS_CALLBACK _far _loadds -# else -# define MS_CALLBACK -# endif - -static int MS_CALLBACK dsa_cb(int p, int n, BN_GENCB *arg); - -/* - * seed, out_p, out_q, out_g are taken from the updated Appendix 5 to FIPS - * PUB 186 and also appear in Appendix 5 to FIPS PIB 186-1 - */ -static unsigned char seed[20] = { - 0xd5, 0x01, 0x4e, 0x4b, 0x60, 0xef, 0x2b, 0xa8, 0xb6, 0x21, 0x1b, 0x40, - 0x62, 0xba, 0x32, 0x24, 0xe0, 0x42, 0x7d, 0xd3, -}; - -static unsigned char out_p[] = { - 0x8d, 0xf2, 0xa4, 0x94, 0x49, 0x22, 0x76, 0xaa, - 0x3d, 0x25, 0x75, 0x9b, 0xb0, 0x68, 0x69, 0xcb, - 0xea, 0xc0, 0xd8, 0x3a, 0xfb, 0x8d, 0x0c, 0xf7, - 0xcb, 0xb8, 0x32, 0x4f, 0x0d, 0x78, 0x82, 0xe5, - 0xd0, 0x76, 0x2f, 0xc5, 0xb7, 0x21, 0x0e, 0xaf, - 0xc2, 0xe9, 0xad, 0xac, 0x32, 0xab, 0x7a, 0xac, - 0x49, 0x69, 0x3d, 0xfb, 0xf8, 0x37, 0x24, 0xc2, - 0xec, 0x07, 0x36, 0xee, 0x31, 0xc8, 0x02, 0x91, -}; - -static unsigned char out_q[] = { - 0xc7, 0x73, 0x21, 0x8c, 0x73, 0x7e, 0xc8, 0xee, - 0x99, 0x3b, 0x4f, 0x2d, 0xed, 0x30, 0xf4, 0x8e, - 0xda, 0xce, 0x91, 0x5f, -}; - -static unsigned char out_g[] = { - 0x62, 0x6d, 0x02, 0x78, 0x39, 0xea, 0x0a, 0x13, - 0x41, 0x31, 0x63, 0xa5, 0x5b, 0x4c, 0xb5, 0x00, - 0x29, 0x9d, 0x55, 0x22, 0x95, 0x6c, 0xef, 0xcb, - 0x3b, 0xff, 0x10, 0xf3, 0x99, 0xce, 0x2c, 0x2e, - 0x71, 0xcb, 0x9d, 0xe5, 0xfa, 0x24, 0xba, 0xbf, - 0x58, 0xe5, 0xb7, 0x95, 0x21, 0x92, 0x5c, 0x9c, - 0xc4, 0x2e, 0x9f, 0x6f, 0x46, 0x4b, 0x08, 0x8c, - 0xc5, 0x72, 0xaf, 0x53, 0xe6, 0xd7, 0x88, 0x02, -}; - -static const unsigned char str1[] = "12345678901234567890"; - -static const char rnd_seed[] = - "string to make the random number generator think it has entropy"; - -static BIO *bio_err = NULL; - -int main(int argc, char **argv) -{ - BN_GENCB cb; - DSA *dsa = NULL; - int counter, ret = 0, i, j; - unsigned char buf[256]; - unsigned long h; - unsigned char sig[256]; - unsigned int siglen; - - if (bio_err == NULL) - bio_err = BIO_new_fp(stderr, BIO_NOCLOSE); - - CRYPTO_malloc_debug_init(); - CRYPTO_dbg_set_options(V_CRYPTO_MDEBUG_ALL); - CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON); - - ERR_load_crypto_strings(); - RAND_seed(rnd_seed, sizeof rnd_seed); - - BIO_printf(bio_err, "test generation of DSA parameters\n"); - - BN_GENCB_set(&cb, dsa_cb, bio_err); - if (((dsa = DSA_new()) == NULL) || !DSA_generate_parameters_ex(dsa, 512, - seed, 20, - &counter, - &h, &cb)) - goto end; - - BIO_printf(bio_err, "seed\n"); - for (i = 0; i < 20; i += 4) { - BIO_printf(bio_err, "%02X%02X%02X%02X ", - seed[i], seed[i + 1], seed[i + 2], seed[i + 3]); - } - BIO_printf(bio_err, "\ncounter=%d h=%ld\n", counter, h); - - DSA_print(bio_err, dsa, 0); - if (counter != 105) { - BIO_printf(bio_err, "counter should be 105\n"); - goto end; - } - if (h != 2) { - BIO_printf(bio_err, "h should be 2\n"); - goto end; - } - - i = BN_bn2bin(dsa->q, buf); - j = sizeof(out_q); - if ((i != j) || (memcmp(buf, out_q, i) != 0)) { - BIO_printf(bio_err, "q value is wrong\n"); - goto end; - } - - i = BN_bn2bin(dsa->p, buf); - j = sizeof(out_p); - if ((i != j) || (memcmp(buf, out_p, i) != 0)) { - BIO_printf(bio_err, "p value is wrong\n"); - goto end; - } - - i = BN_bn2bin(dsa->g, buf); - j = sizeof(out_g); - if ((i != j) || (memcmp(buf, out_g, i) != 0)) { - BIO_printf(bio_err, "g value is wrong\n"); - goto end; - } - - dsa->flags |= DSA_FLAG_NO_EXP_CONSTTIME; - DSA_generate_key(dsa); - DSA_sign(0, str1, 20, sig, &siglen, dsa); - if (DSA_verify(0, str1, 20, sig, siglen, dsa) == 1) - ret = 1; - - dsa->flags &= ~DSA_FLAG_NO_EXP_CONSTTIME; - DSA_generate_key(dsa); - DSA_sign(0, str1, 20, sig, &siglen, dsa); - if (DSA_verify(0, str1, 20, sig, siglen, dsa) == 1) - ret = 1; - - end: - if (!ret) - ERR_print_errors(bio_err); - if (dsa != NULL) - DSA_free(dsa); - CRYPTO_cleanup_all_ex_data(); - ERR_remove_thread_state(NULL); - ERR_free_strings(); - CRYPTO_mem_leaks(bio_err); - if (bio_err != NULL) { - BIO_free(bio_err); - bio_err = NULL; - } -# ifdef OPENSSL_SYS_NETWARE - if (!ret) - printf("ERROR\n"); -# endif - EXIT(!ret); - return (0); -} - -static int MS_CALLBACK dsa_cb(int p, int n, BN_GENCB *arg) -{ - char c = '*'; - static int ok = 0, num = 0; - - if (p == 0) { - c = '.'; - num++; - }; - if (p == 1) - c = '+'; - if (p == 2) { - c = '*'; - ok++; - } - if (p == 3) - c = '\n'; - BIO_write(arg->arg, &c, 1); - (void)BIO_flush(arg->arg); - - if (!ok && (p == 0) && (num > 1)) { - BIO_printf((BIO *)arg, "error in dsatest\n"); - return 0; - } - return 1; -} -#endif diff --git a/drivers/builtin_openssl2/crypto/dso/dso_lib.c b/drivers/builtin_openssl2/crypto/dso/dso_lib.c index 3312450eae..2beb7c1ba5 100644 --- a/drivers/builtin_openssl2/crypto/dso/dso_lib.c +++ b/drivers/builtin_openssl2/crypto/dso/dso_lib.c @@ -122,6 +122,7 @@ DSO *DSO_new_method(DSO_METHOD *meth) ret->meth = meth; ret->references = 1; if ((ret->meth->init != NULL) && !ret->meth->init(ret)) { + sk_void_free(ret->meth_data); OPENSSL_free(ret); ret = NULL; } diff --git a/drivers/builtin_openssl2/crypto/dso/dso_win32.c b/drivers/builtin_openssl2/crypto/dso/dso_win32.c index eaf1831aa9..706e754a3f 100644 --- a/drivers/builtin_openssl2/crypto/dso/dso_win32.c +++ b/drivers/builtin_openssl2/crypto/dso/dso_win32.c @@ -633,7 +633,6 @@ static int win32_pathbyaddr(void *addr, char *path, int sz) CREATETOOLHELP32SNAPSHOT create_snap; CLOSETOOLHELP32SNAPSHOT close_snap; MODULE32 module_first, module_next; - int len; if (addr == NULL) { union { @@ -694,25 +693,29 @@ static int win32_pathbyaddr(void *addr, char *path, int sz) return WideCharToMultiByte(CP_ACP, 0, me32.szExePath, -1, path, sz, NULL, NULL); # else - len = (int)wcslen(me32.szExePath); - if (sz <= 0) - return len + 1; - if (len >= sz) - len = sz - 1; - for (i = 0; i < len; i++) - path[i] = (char)me32.szExePath[i]; - path[len++] = 0; - return len; + { + int i, len = (int)wcslen(me32.szExePath); + if (sz <= 0) + return len + 1; + if (len >= sz) + len = sz - 1; + for (i = 0; i < len; i++) + path[i] = (char)me32.szExePath[i]; + path[len++] = 0; + return len; + } # endif # else - len = (int)strlen(me32.szExePath); - if (sz <= 0) - return len + 1; - if (len >= sz) - len = sz - 1; - memcpy(path, me32.szExePath, len); - path[len++] = 0; - return len; + { + int len = (int)strlen(me32.szExePath); + if (sz <= 0) + return len + 1; + if (len >= sz) + len = sz - 1; + memcpy(path, me32.szExePath, len); + path[len++] = 0; + return len; + } # endif } } while ((*module_next) (hModuleSnap, &me32)); diff --git a/drivers/builtin_openssl2/crypto/ebcdic.c b/drivers/builtin_openssl2/crypto/ebcdic.c index 4b7652c0ec..fd6df92b46 100644 --- a/drivers/builtin_openssl2/crypto/ebcdic.c +++ b/drivers/builtin_openssl2/crypto/ebcdic.c @@ -3,7 +3,7 @@ #ifndef CHARSET_EBCDIC # include -# if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX) +# if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX) || defined(__clang__) static void *dummy = &dummy; # endif diff --git a/drivers/builtin_openssl2/crypto/ec/ec2_smpl.c b/drivers/builtin_openssl2/crypto/ec/ec2_smpl.c index 077c7fc8dd..5b27b91fcc 100644 --- a/drivers/builtin_openssl2/crypto/ec/ec2_smpl.c +++ b/drivers/builtin_openssl2/crypto/ec/ec2_smpl.c @@ -746,6 +746,7 @@ int ec_GF2m_simple_make_affine(const EC_GROUP *group, EC_POINT *point, goto err; if (!BN_one(&point->Z)) goto err; + point->Z_is_one = 1; ret = 1; diff --git a/drivers/builtin_openssl2/crypto/ec/ec_ameth.c b/drivers/builtin_openssl2/crypto/ec/ec_ameth.c index 5cefb5ad30..83e208cfe4 100644 --- a/drivers/builtin_openssl2/crypto/ec/ec_ameth.c +++ b/drivers/builtin_openssl2/crypto/ec/ec_ameth.c @@ -64,8 +64,12 @@ #ifndef OPENSSL_NO_CMS # include #endif +#include #include "asn1_locl.h" +static int ecdh_cms_decrypt(CMS_RecipientInfo *ri); +static int ecdh_cms_encrypt(CMS_RecipientInfo *ri); + static int eckey_param2type(int *pptype, void **ppval, EC_KEY *ec_key) { const EC_GROUP *group; @@ -580,10 +584,21 @@ static int ec_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) X509_ALGOR_set0(alg2, OBJ_nid2obj(snid), V_ASN1_UNDEF, 0); } return 1; + + case ASN1_PKEY_CTRL_CMS_ENVELOPE: + if (arg1 == 1) + return ecdh_cms_decrypt(arg2); + else if (arg1 == 0) + return ecdh_cms_encrypt(arg2); + return -2; + + case ASN1_PKEY_CTRL_CMS_RI_TYPE: + *(int *)arg2 = CMS_RECIPINFO_AGREE; + return 1; #endif case ASN1_PKEY_CTRL_DEFAULT_MD_NID: - *(int *)arg2 = NID_sha1; + *(int *)arg2 = NID_sha256; return 2; default: @@ -625,3 +640,326 @@ const EVP_PKEY_ASN1_METHOD eckey_asn1_meth = { old_ec_priv_decode, old_ec_priv_encode }; + +#ifndef OPENSSL_NO_CMS + +static int ecdh_cms_set_peerkey(EVP_PKEY_CTX *pctx, + X509_ALGOR *alg, ASN1_BIT_STRING *pubkey) +{ + ASN1_OBJECT *aoid; + int atype; + void *aval; + int rv = 0; + EVP_PKEY *pkpeer = NULL; + EC_KEY *ecpeer = NULL; + const unsigned char *p; + int plen; + X509_ALGOR_get0(&aoid, &atype, &aval, alg); + if (OBJ_obj2nid(aoid) != NID_X9_62_id_ecPublicKey) + goto err; + /* If absent parameters get group from main key */ + if (atype == V_ASN1_UNDEF || atype == V_ASN1_NULL) { + const EC_GROUP *grp; + EVP_PKEY *pk; + pk = EVP_PKEY_CTX_get0_pkey(pctx); + if (!pk) + goto err; + grp = EC_KEY_get0_group(pk->pkey.ec); + ecpeer = EC_KEY_new(); + if (!ecpeer) + goto err; + if (!EC_KEY_set_group(ecpeer, grp)) + goto err; + } else { + ecpeer = eckey_type2param(atype, aval); + if (!ecpeer) + goto err; + } + /* We have parameters now set public key */ + plen = ASN1_STRING_length(pubkey); + p = ASN1_STRING_data(pubkey); + if (!p || !plen) + goto err; + if (!o2i_ECPublicKey(&ecpeer, &p, plen)) + goto err; + pkpeer = EVP_PKEY_new(); + if (!pkpeer) + goto err; + EVP_PKEY_set1_EC_KEY(pkpeer, ecpeer); + if (EVP_PKEY_derive_set_peer(pctx, pkpeer) > 0) + rv = 1; + err: + if (ecpeer) + EC_KEY_free(ecpeer); + if (pkpeer) + EVP_PKEY_free(pkpeer); + return rv; +} + +/* Set KDF parameters based on KDF NID */ +static int ecdh_cms_set_kdf_param(EVP_PKEY_CTX *pctx, int eckdf_nid) +{ + int kdf_nid, kdfmd_nid, cofactor; + const EVP_MD *kdf_md; + if (eckdf_nid == NID_undef) + return 0; + + /* Lookup KDF type, cofactor mode and digest */ + if (!OBJ_find_sigid_algs(eckdf_nid, &kdfmd_nid, &kdf_nid)) + return 0; + + if (kdf_nid == NID_dh_std_kdf) + cofactor = 0; + else if (kdf_nid == NID_dh_cofactor_kdf) + cofactor = 1; + else + return 0; + + if (EVP_PKEY_CTX_set_ecdh_cofactor_mode(pctx, cofactor) <= 0) + return 0; + + if (EVP_PKEY_CTX_set_ecdh_kdf_type(pctx, EVP_PKEY_ECDH_KDF_X9_62) <= 0) + return 0; + + kdf_md = EVP_get_digestbynid(kdfmd_nid); + if (!kdf_md) + return 0; + + if (EVP_PKEY_CTX_set_ecdh_kdf_md(pctx, kdf_md) <= 0) + return 0; + return 1; +} + +static int ecdh_cms_set_shared_info(EVP_PKEY_CTX *pctx, CMS_RecipientInfo *ri) +{ + int rv = 0; + + X509_ALGOR *alg, *kekalg = NULL; + ASN1_OCTET_STRING *ukm; + const unsigned char *p; + unsigned char *der = NULL; + int plen, keylen; + const EVP_CIPHER *kekcipher; + EVP_CIPHER_CTX *kekctx; + + if (!CMS_RecipientInfo_kari_get0_alg(ri, &alg, &ukm)) + return 0; + + if (!ecdh_cms_set_kdf_param(pctx, OBJ_obj2nid(alg->algorithm))) { + ECerr(EC_F_ECDH_CMS_SET_SHARED_INFO, EC_R_KDF_PARAMETER_ERROR); + return 0; + } + + if (alg->parameter->type != V_ASN1_SEQUENCE) + return 0; + + p = alg->parameter->value.sequence->data; + plen = alg->parameter->value.sequence->length; + kekalg = d2i_X509_ALGOR(NULL, &p, plen); + if (!kekalg) + goto err; + kekctx = CMS_RecipientInfo_kari_get0_ctx(ri); + if (!kekctx) + goto err; + kekcipher = EVP_get_cipherbyobj(kekalg->algorithm); + if (!kekcipher || EVP_CIPHER_mode(kekcipher) != EVP_CIPH_WRAP_MODE) + goto err; + if (!EVP_EncryptInit_ex(kekctx, kekcipher, NULL, NULL, NULL)) + goto err; + if (EVP_CIPHER_asn1_to_param(kekctx, kekalg->parameter) <= 0) + goto err; + + keylen = EVP_CIPHER_CTX_key_length(kekctx); + if (EVP_PKEY_CTX_set_ecdh_kdf_outlen(pctx, keylen) <= 0) + goto err; + + plen = CMS_SharedInfo_encode(&der, kekalg, ukm, keylen); + + if (!plen) + goto err; + + if (EVP_PKEY_CTX_set0_ecdh_kdf_ukm(pctx, der, plen) <= 0) + goto err; + der = NULL; + + rv = 1; + err: + if (kekalg) + X509_ALGOR_free(kekalg); + if (der) + OPENSSL_free(der); + return rv; +} + +static int ecdh_cms_decrypt(CMS_RecipientInfo *ri) +{ + EVP_PKEY_CTX *pctx; + pctx = CMS_RecipientInfo_get0_pkey_ctx(ri); + if (!pctx) + return 0; + /* See if we need to set peer key */ + if (!EVP_PKEY_CTX_get0_peerkey(pctx)) { + X509_ALGOR *alg; + ASN1_BIT_STRING *pubkey; + if (!CMS_RecipientInfo_kari_get0_orig_id(ri, &alg, &pubkey, + NULL, NULL, NULL)) + return 0; + if (!alg || !pubkey) + return 0; + if (!ecdh_cms_set_peerkey(pctx, alg, pubkey)) { + ECerr(EC_F_ECDH_CMS_DECRYPT, EC_R_PEER_KEY_ERROR); + return 0; + } + } + /* Set ECDH derivation parameters and initialise unwrap context */ + if (!ecdh_cms_set_shared_info(pctx, ri)) { + ECerr(EC_F_ECDH_CMS_DECRYPT, EC_R_SHARED_INFO_ERROR); + return 0; + } + return 1; +} + +static int ecdh_cms_encrypt(CMS_RecipientInfo *ri) +{ + EVP_PKEY_CTX *pctx; + EVP_PKEY *pkey; + EVP_CIPHER_CTX *ctx; + int keylen; + X509_ALGOR *talg, *wrap_alg = NULL; + ASN1_OBJECT *aoid; + ASN1_BIT_STRING *pubkey; + ASN1_STRING *wrap_str; + ASN1_OCTET_STRING *ukm; + unsigned char *penc = NULL; + int penclen; + int rv = 0; + int ecdh_nid, kdf_type, kdf_nid, wrap_nid; + const EVP_MD *kdf_md; + pctx = CMS_RecipientInfo_get0_pkey_ctx(ri); + if (!pctx) + return 0; + /* Get ephemeral key */ + pkey = EVP_PKEY_CTX_get0_pkey(pctx); + if (!CMS_RecipientInfo_kari_get0_orig_id(ri, &talg, &pubkey, + NULL, NULL, NULL)) + goto err; + X509_ALGOR_get0(&aoid, NULL, NULL, talg); + /* Is everything uninitialised? */ + if (aoid == OBJ_nid2obj(NID_undef)) { + + EC_KEY *eckey = pkey->pkey.ec; + /* Set the key */ + unsigned char *p; + + penclen = i2o_ECPublicKey(eckey, NULL); + if (penclen <= 0) + goto err; + penc = OPENSSL_malloc(penclen); + if (!penc) + goto err; + p = penc; + penclen = i2o_ECPublicKey(eckey, &p); + if (penclen <= 0) + goto err; + ASN1_STRING_set0(pubkey, penc, penclen); + pubkey->flags &= ~(ASN1_STRING_FLAG_BITS_LEFT | 0x07); + pubkey->flags |= ASN1_STRING_FLAG_BITS_LEFT; + + penc = NULL; + X509_ALGOR_set0(talg, OBJ_nid2obj(NID_X9_62_id_ecPublicKey), + V_ASN1_UNDEF, NULL); + } + + /* See if custom paraneters set */ + kdf_type = EVP_PKEY_CTX_get_ecdh_kdf_type(pctx); + if (kdf_type <= 0) + goto err; + if (!EVP_PKEY_CTX_get_ecdh_kdf_md(pctx, &kdf_md)) + goto err; + ecdh_nid = EVP_PKEY_CTX_get_ecdh_cofactor_mode(pctx); + if (ecdh_nid < 0) + goto err; + else if (ecdh_nid == 0) + ecdh_nid = NID_dh_std_kdf; + else if (ecdh_nid == 1) + ecdh_nid = NID_dh_cofactor_kdf; + + if (kdf_type == EVP_PKEY_ECDH_KDF_NONE) { + kdf_type = EVP_PKEY_ECDH_KDF_X9_62; + if (EVP_PKEY_CTX_set_ecdh_kdf_type(pctx, kdf_type) <= 0) + goto err; + } else + /* Uknown KDF */ + goto err; + if (kdf_md == NULL) { + /* Fixme later for better MD */ + kdf_md = EVP_sha1(); + if (EVP_PKEY_CTX_set_ecdh_kdf_md(pctx, kdf_md) <= 0) + goto err; + } + + if (!CMS_RecipientInfo_kari_get0_alg(ri, &talg, &ukm)) + goto err; + + /* Lookup NID for KDF+cofactor+digest */ + + if (!OBJ_find_sigid_by_algs(&kdf_nid, EVP_MD_type(kdf_md), ecdh_nid)) + goto err; + /* Get wrap NID */ + ctx = CMS_RecipientInfo_kari_get0_ctx(ri); + wrap_nid = EVP_CIPHER_CTX_type(ctx); + keylen = EVP_CIPHER_CTX_key_length(ctx); + + /* Package wrap algorithm in an AlgorithmIdentifier */ + + wrap_alg = X509_ALGOR_new(); + if (!wrap_alg) + goto err; + wrap_alg->algorithm = OBJ_nid2obj(wrap_nid); + wrap_alg->parameter = ASN1_TYPE_new(); + if (!wrap_alg->parameter) + goto err; + if (EVP_CIPHER_param_to_asn1(ctx, wrap_alg->parameter) <= 0) + goto err; + if (ASN1_TYPE_get(wrap_alg->parameter) == NID_undef) { + ASN1_TYPE_free(wrap_alg->parameter); + wrap_alg->parameter = NULL; + } + + if (EVP_PKEY_CTX_set_ecdh_kdf_outlen(pctx, keylen) <= 0) + goto err; + + penclen = CMS_SharedInfo_encode(&penc, wrap_alg, ukm, keylen); + + if (!penclen) + goto err; + + if (EVP_PKEY_CTX_set0_ecdh_kdf_ukm(pctx, penc, penclen) <= 0) + goto err; + penc = NULL; + + /* + * Now need to wrap encoding of wrap AlgorithmIdentifier into parameter + * of another AlgorithmIdentifier. + */ + penclen = i2d_X509_ALGOR(wrap_alg, &penc); + if (!penc || !penclen) + goto err; + wrap_str = ASN1_STRING_new(); + if (!wrap_str) + goto err; + ASN1_STRING_set0(wrap_str, penc, penclen); + penc = NULL; + X509_ALGOR_set0(talg, OBJ_nid2obj(kdf_nid), V_ASN1_SEQUENCE, wrap_str); + + rv = 1; + + err: + if (penc) + OPENSSL_free(penc); + if (wrap_alg) + X509_ALGOR_free(wrap_alg); + return rv; +} + +#endif diff --git a/drivers/builtin_openssl2/crypto/ec/ec_curve.c b/drivers/builtin_openssl2/crypto/ec/ec_curve.c index e4bcb5dd81..6dbe9d8258 100644 --- a/drivers/builtin_openssl2/crypto/ec/ec_curve.c +++ b/drivers/builtin_openssl2/crypto/ec/ec_curve.c @@ -69,11 +69,16 @@ * */ +#include #include "ec_lcl.h" #include #include #include +#ifdef OPENSSL_FIPS +# include +#endif + typedef struct { int field_type, /* either NID_X9_62_prime_field or * NID_X9_62_characteristic_two_field */ @@ -2282,6 +2287,554 @@ static const struct { #endif +/* + * These curves were added by Annie Yousar + * For the definition of RFC 5639 curves see + * http://www.ietf.org/rfc/rfc5639.txt These curves are generated verifiable + * at random, nevertheless the seed is omitted as parameter because the + * generation mechanism is different from those defined in ANSI X9.62. + */ + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 20 * 6]; +} _EC_brainpoolP160r1 = { + { + NID_X9_62_prime_field, 0, 20, 1 + }, + { + /* no seed */ + /* p */ + 0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0xC7, 0xAD, + 0x95, 0xB3, 0xD8, 0x13, 0x95, 0x15, 0x62, 0x0F, + /* a */ + 0x34, 0x0E, 0x7B, 0xE2, 0xA2, 0x80, 0xEB, 0x74, 0xE2, 0xBE, 0x61, 0xBA, + 0xDA, 0x74, 0x5D, 0x97, 0xE8, 0xF7, 0xC3, 0x00, + /* b */ + 0x1E, 0x58, 0x9A, 0x85, 0x95, 0x42, 0x34, 0x12, 0x13, 0x4F, 0xAA, 0x2D, + 0xBD, 0xEC, 0x95, 0xC8, 0xD8, 0x67, 0x5E, 0x58, + /* x */ + 0xBE, 0xD5, 0xAF, 0x16, 0xEA, 0x3F, 0x6A, 0x4F, 0x62, 0x93, 0x8C, 0x46, + 0x31, 0xEB, 0x5A, 0xF7, 0xBD, 0xBC, 0xDB, 0xC3, + /* y */ + 0x16, 0x67, 0xCB, 0x47, 0x7A, 0x1A, 0x8E, 0xC3, 0x38, 0xF9, 0x47, 0x41, + 0x66, 0x9C, 0x97, 0x63, 0x16, 0xDA, 0x63, 0x21, + /* order */ + 0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0x59, 0x91, + 0xD4, 0x50, 0x29, 0x40, 0x9E, 0x60, 0xFC, 0x09 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 20 * 6]; +} _EC_brainpoolP160t1 = { + { + NID_X9_62_prime_field, 0, 20, 1 + }, + { + /* no seed */ + /* p */ + 0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0xC7, 0xAD, + 0x95, 0xB3, 0xD8, 0x13, 0x95, 0x15, 0x62, 0x0F, + /* a */ + 0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0xC7, 0xAD, + 0x95, 0xB3, 0xD8, 0x13, 0x95, 0x15, 0x62, 0x0C, + /* b */ + 0x7A, 0x55, 0x6B, 0x6D, 0xAE, 0x53, 0x5B, 0x7B, 0x51, 0xED, 0x2C, 0x4D, + 0x7D, 0xAA, 0x7A, 0x0B, 0x5C, 0x55, 0xF3, 0x80, + /* x */ + 0xB1, 0x99, 0xB1, 0x3B, 0x9B, 0x34, 0xEF, 0xC1, 0x39, 0x7E, 0x64, 0xBA, + 0xEB, 0x05, 0xAC, 0xC2, 0x65, 0xFF, 0x23, 0x78, + /* y */ + 0xAD, 0xD6, 0x71, 0x8B, 0x7C, 0x7C, 0x19, 0x61, 0xF0, 0x99, 0x1B, 0x84, + 0x24, 0x43, 0x77, 0x21, 0x52, 0xC9, 0xE0, 0xAD, + /* order */ + 0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0x59, 0x91, + 0xD4, 0x50, 0x29, 0x40, 0x9E, 0x60, 0xFC, 0x09 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 24 * 6]; +} _EC_brainpoolP192r1 = { + { + NID_X9_62_prime_field, 0, 24, 1 + }, + { + /* no seed */ + /* p */ + 0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x30, + 0x93, 0xD1, 0x8D, 0xB7, 0x8F, 0xCE, 0x47, 0x6D, 0xE1, 0xA8, 0x62, 0x97, + /* a */ + 0x6A, 0x91, 0x17, 0x40, 0x76, 0xB1, 0xE0, 0xE1, 0x9C, 0x39, 0xC0, 0x31, + 0xFE, 0x86, 0x85, 0xC1, 0xCA, 0xE0, 0x40, 0xE5, 0xC6, 0x9A, 0x28, 0xEF, + /* b */ + 0x46, 0x9A, 0x28, 0xEF, 0x7C, 0x28, 0xCC, 0xA3, 0xDC, 0x72, 0x1D, 0x04, + 0x4F, 0x44, 0x96, 0xBC, 0xCA, 0x7E, 0xF4, 0x14, 0x6F, 0xBF, 0x25, 0xC9, + /* x */ + 0xC0, 0xA0, 0x64, 0x7E, 0xAA, 0xB6, 0xA4, 0x87, 0x53, 0xB0, 0x33, 0xC5, + 0x6C, 0xB0, 0xF0, 0x90, 0x0A, 0x2F, 0x5C, 0x48, 0x53, 0x37, 0x5F, 0xD6, + /* y */ + 0x14, 0xB6, 0x90, 0x86, 0x6A, 0xBD, 0x5B, 0xB8, 0x8B, 0x5F, 0x48, 0x28, + 0xC1, 0x49, 0x00, 0x02, 0xE6, 0x77, 0x3F, 0xA2, 0xFA, 0x29, 0x9B, 0x8F, + /* order */ + 0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x2F, + 0x9E, 0x9E, 0x91, 0x6B, 0x5B, 0xE8, 0xF1, 0x02, 0x9A, 0xC4, 0xAC, 0xC1 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 24 * 6]; +} _EC_brainpoolP192t1 = { + { + NID_X9_62_prime_field, 0, 24, 1 + }, + { + /* no seed */ + /* p */ + 0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x30, + 0x93, 0xD1, 0x8D, 0xB7, 0x8F, 0xCE, 0x47, 0x6D, 0xE1, 0xA8, 0x62, 0x97, + /* a */ + 0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x30, + 0x93, 0xD1, 0x8D, 0xB7, 0x8F, 0xCE, 0x47, 0x6D, 0xE1, 0xA8, 0x62, 0x94, + /* b */ + 0x13, 0xD5, 0x6F, 0xFA, 0xEC, 0x78, 0x68, 0x1E, 0x68, 0xF9, 0xDE, 0xB4, + 0x3B, 0x35, 0xBE, 0xC2, 0xFB, 0x68, 0x54, 0x2E, 0x27, 0x89, 0x7B, 0x79, + /* x */ + 0x3A, 0xE9, 0xE5, 0x8C, 0x82, 0xF6, 0x3C, 0x30, 0x28, 0x2E, 0x1F, 0xE7, + 0xBB, 0xF4, 0x3F, 0xA7, 0x2C, 0x44, 0x6A, 0xF6, 0xF4, 0x61, 0x81, 0x29, + /* y */ + 0x09, 0x7E, 0x2C, 0x56, 0x67, 0xC2, 0x22, 0x3A, 0x90, 0x2A, 0xB5, 0xCA, + 0x44, 0x9D, 0x00, 0x84, 0xB7, 0xE5, 0xB3, 0xDE, 0x7C, 0xCC, 0x01, 0xC9, + /* order */ + 0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x2F, + 0x9E, 0x9E, 0x91, 0x6B, 0x5B, 0xE8, 0xF1, 0x02, 0x9A, 0xC4, 0xAC, 0xC1 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 28 * 6]; +} _EC_brainpoolP224r1 = { + { + NID_X9_62_prime_field, 0, 28, 1 + }, + { + /* no seed */ + /* p */ + 0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25, + 0x75, 0xD1, 0xD7, 0x87, 0xB0, 0x9F, 0x07, 0x57, 0x97, 0xDA, 0x89, 0xF5, + 0x7E, 0xC8, 0xC0, 0xFF, + /* a */ + 0x68, 0xA5, 0xE6, 0x2C, 0xA9, 0xCE, 0x6C, 0x1C, 0x29, 0x98, 0x03, 0xA6, + 0xC1, 0x53, 0x0B, 0x51, 0x4E, 0x18, 0x2A, 0xD8, 0xB0, 0x04, 0x2A, 0x59, + 0xCA, 0xD2, 0x9F, 0x43, + /* b */ + 0x25, 0x80, 0xF6, 0x3C, 0xCF, 0xE4, 0x41, 0x38, 0x87, 0x07, 0x13, 0xB1, + 0xA9, 0x23, 0x69, 0xE3, 0x3E, 0x21, 0x35, 0xD2, 0x66, 0xDB, 0xB3, 0x72, + 0x38, 0x6C, 0x40, 0x0B, + /* x */ + 0x0D, 0x90, 0x29, 0xAD, 0x2C, 0x7E, 0x5C, 0xF4, 0x34, 0x08, 0x23, 0xB2, + 0xA8, 0x7D, 0xC6, 0x8C, 0x9E, 0x4C, 0xE3, 0x17, 0x4C, 0x1E, 0x6E, 0xFD, + 0xEE, 0x12, 0xC0, 0x7D, + /* y */ + 0x58, 0xAA, 0x56, 0xF7, 0x72, 0xC0, 0x72, 0x6F, 0x24, 0xC6, 0xB8, 0x9E, + 0x4E, 0xCD, 0xAC, 0x24, 0x35, 0x4B, 0x9E, 0x99, 0xCA, 0xA3, 0xF6, 0xD3, + 0x76, 0x14, 0x02, 0xCD, + /* order */ + 0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25, + 0x75, 0xD0, 0xFB, 0x98, 0xD1, 0x16, 0xBC, 0x4B, 0x6D, 0xDE, 0xBC, 0xA3, + 0xA5, 0xA7, 0x93, 0x9F + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 28 * 6]; +} _EC_brainpoolP224t1 = { + { + NID_X9_62_prime_field, 0, 28, 1 + }, + { + /* no seed */ + /* p */ + 0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25, + 0x75, 0xD1, 0xD7, 0x87, 0xB0, 0x9F, 0x07, 0x57, 0x97, 0xDA, 0x89, 0xF5, + 0x7E, 0xC8, 0xC0, 0xFF, + /* a */ + 0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25, + 0x75, 0xD1, 0xD7, 0x87, 0xB0, 0x9F, 0x07, 0x57, 0x97, 0xDA, 0x89, 0xF5, + 0x7E, 0xC8, 0xC0, 0xFC, + /* b */ + 0x4B, 0x33, 0x7D, 0x93, 0x41, 0x04, 0xCD, 0x7B, 0xEF, 0x27, 0x1B, 0xF6, + 0x0C, 0xED, 0x1E, 0xD2, 0x0D, 0xA1, 0x4C, 0x08, 0xB3, 0xBB, 0x64, 0xF1, + 0x8A, 0x60, 0x88, 0x8D, + /* x */ + 0x6A, 0xB1, 0xE3, 0x44, 0xCE, 0x25, 0xFF, 0x38, 0x96, 0x42, 0x4E, 0x7F, + 0xFE, 0x14, 0x76, 0x2E, 0xCB, 0x49, 0xF8, 0x92, 0x8A, 0xC0, 0xC7, 0x60, + 0x29, 0xB4, 0xD5, 0x80, + /* y */ + 0x03, 0x74, 0xE9, 0xF5, 0x14, 0x3E, 0x56, 0x8C, 0xD2, 0x3F, 0x3F, 0x4D, + 0x7C, 0x0D, 0x4B, 0x1E, 0x41, 0xC8, 0xCC, 0x0D, 0x1C, 0x6A, 0xBD, 0x5F, + 0x1A, 0x46, 0xDB, 0x4C, + /* order */ + 0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25, + 0x75, 0xD0, 0xFB, 0x98, 0xD1, 0x16, 0xBC, 0x4B, 0x6D, 0xDE, 0xBC, 0xA3, + 0xA5, 0xA7, 0x93, 0x9F + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 32 * 6]; +} _EC_brainpoolP256r1 = { + { + NID_X9_62_prime_field, 0, 32, 1 + }, + { + /* no seed */ + /* p */ + 0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90, + 0x9D, 0x83, 0x8D, 0x72, 0x6E, 0x3B, 0xF6, 0x23, 0xD5, 0x26, 0x20, 0x28, + 0x20, 0x13, 0x48, 0x1D, 0x1F, 0x6E, 0x53, 0x77, + /* a */ + 0x7D, 0x5A, 0x09, 0x75, 0xFC, 0x2C, 0x30, 0x57, 0xEE, 0xF6, 0x75, 0x30, + 0x41, 0x7A, 0xFF, 0xE7, 0xFB, 0x80, 0x55, 0xC1, 0x26, 0xDC, 0x5C, 0x6C, + 0xE9, 0x4A, 0x4B, 0x44, 0xF3, 0x30, 0xB5, 0xD9, + /* b */ + 0x26, 0xDC, 0x5C, 0x6C, 0xE9, 0x4A, 0x4B, 0x44, 0xF3, 0x30, 0xB5, 0xD9, + 0xBB, 0xD7, 0x7C, 0xBF, 0x95, 0x84, 0x16, 0x29, 0x5C, 0xF7, 0xE1, 0xCE, + 0x6B, 0xCC, 0xDC, 0x18, 0xFF, 0x8C, 0x07, 0xB6, + /* x */ + 0x8B, 0xD2, 0xAE, 0xB9, 0xCB, 0x7E, 0x57, 0xCB, 0x2C, 0x4B, 0x48, 0x2F, + 0xFC, 0x81, 0xB7, 0xAF, 0xB9, 0xDE, 0x27, 0xE1, 0xE3, 0xBD, 0x23, 0xC2, + 0x3A, 0x44, 0x53, 0xBD, 0x9A, 0xCE, 0x32, 0x62, + /* y */ + 0x54, 0x7E, 0xF8, 0x35, 0xC3, 0xDA, 0xC4, 0xFD, 0x97, 0xF8, 0x46, 0x1A, + 0x14, 0x61, 0x1D, 0xC9, 0xC2, 0x77, 0x45, 0x13, 0x2D, 0xED, 0x8E, 0x54, + 0x5C, 0x1D, 0x54, 0xC7, 0x2F, 0x04, 0x69, 0x97, + /* order */ + 0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90, + 0x9D, 0x83, 0x8D, 0x71, 0x8C, 0x39, 0x7A, 0xA3, 0xB5, 0x61, 0xA6, 0xF7, + 0x90, 0x1E, 0x0E, 0x82, 0x97, 0x48, 0x56, 0xA7 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 32 * 6]; +} _EC_brainpoolP256t1 = { + { + NID_X9_62_prime_field, 0, 32, 1 + }, + { + /* no seed */ + /* p */ + 0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90, + 0x9D, 0x83, 0x8D, 0x72, 0x6E, 0x3B, 0xF6, 0x23, 0xD5, 0x26, 0x20, 0x28, + 0x20, 0x13, 0x48, 0x1D, 0x1F, 0x6E, 0x53, 0x77, + /* a */ + 0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90, + 0x9D, 0x83, 0x8D, 0x72, 0x6E, 0x3B, 0xF6, 0x23, 0xD5, 0x26, 0x20, 0x28, + 0x20, 0x13, 0x48, 0x1D, 0x1F, 0x6E, 0x53, 0x74, + /* b */ + 0x66, 0x2C, 0x61, 0xC4, 0x30, 0xD8, 0x4E, 0xA4, 0xFE, 0x66, 0xA7, 0x73, + 0x3D, 0x0B, 0x76, 0xB7, 0xBF, 0x93, 0xEB, 0xC4, 0xAF, 0x2F, 0x49, 0x25, + 0x6A, 0xE5, 0x81, 0x01, 0xFE, 0xE9, 0x2B, 0x04, + /* x */ + 0xA3, 0xE8, 0xEB, 0x3C, 0xC1, 0xCF, 0xE7, 0xB7, 0x73, 0x22, 0x13, 0xB2, + 0x3A, 0x65, 0x61, 0x49, 0xAF, 0xA1, 0x42, 0xC4, 0x7A, 0xAF, 0xBC, 0x2B, + 0x79, 0xA1, 0x91, 0x56, 0x2E, 0x13, 0x05, 0xF4, + /* y */ + 0x2D, 0x99, 0x6C, 0x82, 0x34, 0x39, 0xC5, 0x6D, 0x7F, 0x7B, 0x22, 0xE1, + 0x46, 0x44, 0x41, 0x7E, 0x69, 0xBC, 0xB6, 0xDE, 0x39, 0xD0, 0x27, 0x00, + 0x1D, 0xAB, 0xE8, 0xF3, 0x5B, 0x25, 0xC9, 0xBE, + /* order */ + 0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90, + 0x9D, 0x83, 0x8D, 0x71, 0x8C, 0x39, 0x7A, 0xA3, 0xB5, 0x61, 0xA6, 0xF7, + 0x90, 0x1E, 0x0E, 0x82, 0x97, 0x48, 0x56, 0xA7 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 40 * 6]; +} _EC_brainpoolP320r1 = { + { + NID_X9_62_prime_field, 0, 40, 1 + }, + { + /* no seed */ + /* p */ + 0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E, + 0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA6, 0xF6, 0xF4, 0x0D, 0xEF, + 0x4F, 0x92, 0xB9, 0xEC, 0x78, 0x93, 0xEC, 0x28, 0xFC, 0xD4, 0x12, 0xB1, + 0xF1, 0xB3, 0x2E, 0x27, + /* a */ + 0x3E, 0xE3, 0x0B, 0x56, 0x8F, 0xBA, 0xB0, 0xF8, 0x83, 0xCC, 0xEB, 0xD4, + 0x6D, 0x3F, 0x3B, 0xB8, 0xA2, 0xA7, 0x35, 0x13, 0xF5, 0xEB, 0x79, 0xDA, + 0x66, 0x19, 0x0E, 0xB0, 0x85, 0xFF, 0xA9, 0xF4, 0x92, 0xF3, 0x75, 0xA9, + 0x7D, 0x86, 0x0E, 0xB4, + /* b */ + 0x52, 0x08, 0x83, 0x94, 0x9D, 0xFD, 0xBC, 0x42, 0xD3, 0xAD, 0x19, 0x86, + 0x40, 0x68, 0x8A, 0x6F, 0xE1, 0x3F, 0x41, 0x34, 0x95, 0x54, 0xB4, 0x9A, + 0xCC, 0x31, 0xDC, 0xCD, 0x88, 0x45, 0x39, 0x81, 0x6F, 0x5E, 0xB4, 0xAC, + 0x8F, 0xB1, 0xF1, 0xA6, + /* x */ + 0x43, 0xBD, 0x7E, 0x9A, 0xFB, 0x53, 0xD8, 0xB8, 0x52, 0x89, 0xBC, 0xC4, + 0x8E, 0xE5, 0xBF, 0xE6, 0xF2, 0x01, 0x37, 0xD1, 0x0A, 0x08, 0x7E, 0xB6, + 0xE7, 0x87, 0x1E, 0x2A, 0x10, 0xA5, 0x99, 0xC7, 0x10, 0xAF, 0x8D, 0x0D, + 0x39, 0xE2, 0x06, 0x11, + /* y */ + 0x14, 0xFD, 0xD0, 0x55, 0x45, 0xEC, 0x1C, 0xC8, 0xAB, 0x40, 0x93, 0x24, + 0x7F, 0x77, 0x27, 0x5E, 0x07, 0x43, 0xFF, 0xED, 0x11, 0x71, 0x82, 0xEA, + 0xA9, 0xC7, 0x78, 0x77, 0xAA, 0xAC, 0x6A, 0xC7, 0xD3, 0x52, 0x45, 0xD1, + 0x69, 0x2E, 0x8E, 0xE1, + /* order */ + 0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E, + 0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA5, 0xB6, 0x8F, 0x12, 0xA3, + 0x2D, 0x48, 0x2E, 0xC7, 0xEE, 0x86, 0x58, 0xE9, 0x86, 0x91, 0x55, 0x5B, + 0x44, 0xC5, 0x93, 0x11 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 40 * 6]; +} _EC_brainpoolP320t1 = { + { + NID_X9_62_prime_field, 0, 40, 1 + }, + { + /* no seed */ + /* p */ + 0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E, + 0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA6, 0xF6, 0xF4, 0x0D, 0xEF, + 0x4F, 0x92, 0xB9, 0xEC, 0x78, 0x93, 0xEC, 0x28, 0xFC, 0xD4, 0x12, 0xB1, + 0xF1, 0xB3, 0x2E, 0x27, + /* a */ + 0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E, + 0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA6, 0xF6, 0xF4, 0x0D, 0xEF, + 0x4F, 0x92, 0xB9, 0xEC, 0x78, 0x93, 0xEC, 0x28, 0xFC, 0xD4, 0x12, 0xB1, + 0xF1, 0xB3, 0x2E, 0x24, + /* b */ + 0xA7, 0xF5, 0x61, 0xE0, 0x38, 0xEB, 0x1E, 0xD5, 0x60, 0xB3, 0xD1, 0x47, + 0xDB, 0x78, 0x20, 0x13, 0x06, 0x4C, 0x19, 0xF2, 0x7E, 0xD2, 0x7C, 0x67, + 0x80, 0xAA, 0xF7, 0x7F, 0xB8, 0xA5, 0x47, 0xCE, 0xB5, 0xB4, 0xFE, 0xF4, + 0x22, 0x34, 0x03, 0x53, + /* x */ + 0x92, 0x5B, 0xE9, 0xFB, 0x01, 0xAF, 0xC6, 0xFB, 0x4D, 0x3E, 0x7D, 0x49, + 0x90, 0x01, 0x0F, 0x81, 0x34, 0x08, 0xAB, 0x10, 0x6C, 0x4F, 0x09, 0xCB, + 0x7E, 0xE0, 0x78, 0x68, 0xCC, 0x13, 0x6F, 0xFF, 0x33, 0x57, 0xF6, 0x24, + 0xA2, 0x1B, 0xED, 0x52, + /* y */ + 0x63, 0xBA, 0x3A, 0x7A, 0x27, 0x48, 0x3E, 0xBF, 0x66, 0x71, 0xDB, 0xEF, + 0x7A, 0xBB, 0x30, 0xEB, 0xEE, 0x08, 0x4E, 0x58, 0xA0, 0xB0, 0x77, 0xAD, + 0x42, 0xA5, 0xA0, 0x98, 0x9D, 0x1E, 0xE7, 0x1B, 0x1B, 0x9B, 0xC0, 0x45, + 0x5F, 0xB0, 0xD2, 0xC3, + /* order */ + 0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E, + 0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA5, 0xB6, 0x8F, 0x12, 0xA3, + 0x2D, 0x48, 0x2E, 0xC7, 0xEE, 0x86, 0x58, 0xE9, 0x86, 0x91, 0x55, 0x5B, + 0x44, 0xC5, 0x93, 0x11 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 48 * 6]; +} _EC_brainpoolP384r1 = { + { + NID_X9_62_prime_field, 0, 48, 1 + }, + { + /* no seed */ + /* p */ + 0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E, + 0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB4, + 0x12, 0xB1, 0xDA, 0x19, 0x7F, 0xB7, 0x11, 0x23, 0xAC, 0xD3, 0xA7, 0x29, + 0x90, 0x1D, 0x1A, 0x71, 0x87, 0x47, 0x00, 0x13, 0x31, 0x07, 0xEC, 0x53, + /* a */ + 0x7B, 0xC3, 0x82, 0xC6, 0x3D, 0x8C, 0x15, 0x0C, 0x3C, 0x72, 0x08, 0x0A, + 0xCE, 0x05, 0xAF, 0xA0, 0xC2, 0xBE, 0xA2, 0x8E, 0x4F, 0xB2, 0x27, 0x87, + 0x13, 0x91, 0x65, 0xEF, 0xBA, 0x91, 0xF9, 0x0F, 0x8A, 0xA5, 0x81, 0x4A, + 0x50, 0x3A, 0xD4, 0xEB, 0x04, 0xA8, 0xC7, 0xDD, 0x22, 0xCE, 0x28, 0x26, + /* b */ + 0x04, 0xA8, 0xC7, 0xDD, 0x22, 0xCE, 0x28, 0x26, 0x8B, 0x39, 0xB5, 0x54, + 0x16, 0xF0, 0x44, 0x7C, 0x2F, 0xB7, 0x7D, 0xE1, 0x07, 0xDC, 0xD2, 0xA6, + 0x2E, 0x88, 0x0E, 0xA5, 0x3E, 0xEB, 0x62, 0xD5, 0x7C, 0xB4, 0x39, 0x02, + 0x95, 0xDB, 0xC9, 0x94, 0x3A, 0xB7, 0x86, 0x96, 0xFA, 0x50, 0x4C, 0x11, + /* x */ + 0x1D, 0x1C, 0x64, 0xF0, 0x68, 0xCF, 0x45, 0xFF, 0xA2, 0xA6, 0x3A, 0x81, + 0xB7, 0xC1, 0x3F, 0x6B, 0x88, 0x47, 0xA3, 0xE7, 0x7E, 0xF1, 0x4F, 0xE3, + 0xDB, 0x7F, 0xCA, 0xFE, 0x0C, 0xBD, 0x10, 0xE8, 0xE8, 0x26, 0xE0, 0x34, + 0x36, 0xD6, 0x46, 0xAA, 0xEF, 0x87, 0xB2, 0xE2, 0x47, 0xD4, 0xAF, 0x1E, + /* y */ + 0x8A, 0xBE, 0x1D, 0x75, 0x20, 0xF9, 0xC2, 0xA4, 0x5C, 0xB1, 0xEB, 0x8E, + 0x95, 0xCF, 0xD5, 0x52, 0x62, 0xB7, 0x0B, 0x29, 0xFE, 0xEC, 0x58, 0x64, + 0xE1, 0x9C, 0x05, 0x4F, 0xF9, 0x91, 0x29, 0x28, 0x0E, 0x46, 0x46, 0x21, + 0x77, 0x91, 0x81, 0x11, 0x42, 0x82, 0x03, 0x41, 0x26, 0x3C, 0x53, 0x15, + /* order */ + 0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E, + 0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB3, + 0x1F, 0x16, 0x6E, 0x6C, 0xAC, 0x04, 0x25, 0xA7, 0xCF, 0x3A, 0xB6, 0xAF, + 0x6B, 0x7F, 0xC3, 0x10, 0x3B, 0x88, 0x32, 0x02, 0xE9, 0x04, 0x65, 0x65 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 48 * 6]; +} _EC_brainpoolP384t1 = { + { + NID_X9_62_prime_field, 0, 48, 1 + }, + { + /* no seed */ + /* p */ + 0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E, + 0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB4, + 0x12, 0xB1, 0xDA, 0x19, 0x7F, 0xB7, 0x11, 0x23, 0xAC, 0xD3, 0xA7, 0x29, + 0x90, 0x1D, 0x1A, 0x71, 0x87, 0x47, 0x00, 0x13, 0x31, 0x07, 0xEC, 0x53, + /* a */ + 0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E, + 0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB4, + 0x12, 0xB1, 0xDA, 0x19, 0x7F, 0xB7, 0x11, 0x23, 0xAC, 0xD3, 0xA7, 0x29, + 0x90, 0x1D, 0x1A, 0x71, 0x87, 0x47, 0x00, 0x13, 0x31, 0x07, 0xEC, 0x50, + /* b */ + 0x7F, 0x51, 0x9E, 0xAD, 0xA7, 0xBD, 0xA8, 0x1B, 0xD8, 0x26, 0xDB, 0xA6, + 0x47, 0x91, 0x0F, 0x8C, 0x4B, 0x93, 0x46, 0xED, 0x8C, 0xCD, 0xC6, 0x4E, + 0x4B, 0x1A, 0xBD, 0x11, 0x75, 0x6D, 0xCE, 0x1D, 0x20, 0x74, 0xAA, 0x26, + 0x3B, 0x88, 0x80, 0x5C, 0xED, 0x70, 0x35, 0x5A, 0x33, 0xB4, 0x71, 0xEE, + /* x */ + 0x18, 0xDE, 0x98, 0xB0, 0x2D, 0xB9, 0xA3, 0x06, 0xF2, 0xAF, 0xCD, 0x72, + 0x35, 0xF7, 0x2A, 0x81, 0x9B, 0x80, 0xAB, 0x12, 0xEB, 0xD6, 0x53, 0x17, + 0x24, 0x76, 0xFE, 0xCD, 0x46, 0x2A, 0xAB, 0xFF, 0xC4, 0xFF, 0x19, 0x1B, + 0x94, 0x6A, 0x5F, 0x54, 0xD8, 0xD0, 0xAA, 0x2F, 0x41, 0x88, 0x08, 0xCC, + /* y */ + 0x25, 0xAB, 0x05, 0x69, 0x62, 0xD3, 0x06, 0x51, 0xA1, 0x14, 0xAF, 0xD2, + 0x75, 0x5A, 0xD3, 0x36, 0x74, 0x7F, 0x93, 0x47, 0x5B, 0x7A, 0x1F, 0xCA, + 0x3B, 0x88, 0xF2, 0xB6, 0xA2, 0x08, 0xCC, 0xFE, 0x46, 0x94, 0x08, 0x58, + 0x4D, 0xC2, 0xB2, 0x91, 0x26, 0x75, 0xBF, 0x5B, 0x9E, 0x58, 0x29, 0x28, + /* order */ + 0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E, + 0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB3, + 0x1F, 0x16, 0x6E, 0x6C, 0xAC, 0x04, 0x25, 0xA7, 0xCF, 0x3A, 0xB6, 0xAF, + 0x6B, 0x7F, 0xC3, 0x10, 0x3B, 0x88, 0x32, 0x02, 0xE9, 0x04, 0x65, 0x65 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 64 * 6]; +} _EC_brainpoolP512r1 = { + { + NID_X9_62_prime_field, 0, 64, 1 + }, + { + /* no seed */ + /* p */ + 0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE, + 0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E, + 0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x71, 0x7D, 0x4D, 0x9B, 0x00, + 0x9B, 0xC6, 0x68, 0x42, 0xAE, 0xCD, 0xA1, 0x2A, 0xE6, 0xA3, 0x80, 0xE6, + 0x28, 0x81, 0xFF, 0x2F, 0x2D, 0x82, 0xC6, 0x85, 0x28, 0xAA, 0x60, 0x56, + 0x58, 0x3A, 0x48, 0xF3, + /* a */ + 0x78, 0x30, 0xA3, 0x31, 0x8B, 0x60, 0x3B, 0x89, 0xE2, 0x32, 0x71, 0x45, + 0xAC, 0x23, 0x4C, 0xC5, 0x94, 0xCB, 0xDD, 0x8D, 0x3D, 0xF9, 0x16, 0x10, + 0xA8, 0x34, 0x41, 0xCA, 0xEA, 0x98, 0x63, 0xBC, 0x2D, 0xED, 0x5D, 0x5A, + 0xA8, 0x25, 0x3A, 0xA1, 0x0A, 0x2E, 0xF1, 0xC9, 0x8B, 0x9A, 0xC8, 0xB5, + 0x7F, 0x11, 0x17, 0xA7, 0x2B, 0xF2, 0xC7, 0xB9, 0xE7, 0xC1, 0xAC, 0x4D, + 0x77, 0xFC, 0x94, 0xCA, + /* b */ + 0x3D, 0xF9, 0x16, 0x10, 0xA8, 0x34, 0x41, 0xCA, 0xEA, 0x98, 0x63, 0xBC, + 0x2D, 0xED, 0x5D, 0x5A, 0xA8, 0x25, 0x3A, 0xA1, 0x0A, 0x2E, 0xF1, 0xC9, + 0x8B, 0x9A, 0xC8, 0xB5, 0x7F, 0x11, 0x17, 0xA7, 0x2B, 0xF2, 0xC7, 0xB9, + 0xE7, 0xC1, 0xAC, 0x4D, 0x77, 0xFC, 0x94, 0xCA, 0xDC, 0x08, 0x3E, 0x67, + 0x98, 0x40, 0x50, 0xB7, 0x5E, 0xBA, 0xE5, 0xDD, 0x28, 0x09, 0xBD, 0x63, + 0x80, 0x16, 0xF7, 0x23, + /* x */ + 0x81, 0xAE, 0xE4, 0xBD, 0xD8, 0x2E, 0xD9, 0x64, 0x5A, 0x21, 0x32, 0x2E, + 0x9C, 0x4C, 0x6A, 0x93, 0x85, 0xED, 0x9F, 0x70, 0xB5, 0xD9, 0x16, 0xC1, + 0xB4, 0x3B, 0x62, 0xEE, 0xF4, 0xD0, 0x09, 0x8E, 0xFF, 0x3B, 0x1F, 0x78, + 0xE2, 0xD0, 0xD4, 0x8D, 0x50, 0xD1, 0x68, 0x7B, 0x93, 0xB9, 0x7D, 0x5F, + 0x7C, 0x6D, 0x50, 0x47, 0x40, 0x6A, 0x5E, 0x68, 0x8B, 0x35, 0x22, 0x09, + 0xBC, 0xB9, 0xF8, 0x22, + /* y */ + 0x7D, 0xDE, 0x38, 0x5D, 0x56, 0x63, 0x32, 0xEC, 0xC0, 0xEA, 0xBF, 0xA9, + 0xCF, 0x78, 0x22, 0xFD, 0xF2, 0x09, 0xF7, 0x00, 0x24, 0xA5, 0x7B, 0x1A, + 0xA0, 0x00, 0xC5, 0x5B, 0x88, 0x1F, 0x81, 0x11, 0xB2, 0xDC, 0xDE, 0x49, + 0x4A, 0x5F, 0x48, 0x5E, 0x5B, 0xCA, 0x4B, 0xD8, 0x8A, 0x27, 0x63, 0xAE, + 0xD1, 0xCA, 0x2B, 0x2F, 0xA8, 0xF0, 0x54, 0x06, 0x78, 0xCD, 0x1E, 0x0F, + 0x3A, 0xD8, 0x08, 0x92, + /* order */ + 0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE, + 0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E, + 0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x70, 0x55, 0x3E, 0x5C, 0x41, + 0x4C, 0xA9, 0x26, 0x19, 0x41, 0x86, 0x61, 0x19, 0x7F, 0xAC, 0x10, 0x47, + 0x1D, 0xB1, 0xD3, 0x81, 0x08, 0x5D, 0xDA, 0xDD, 0xB5, 0x87, 0x96, 0x82, + 0x9C, 0xA9, 0x00, 0x69 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 64 * 6]; +} _EC_brainpoolP512t1 = { + { + NID_X9_62_prime_field, 0, 64, 1 + }, + { + /* no seed */ + /* p */ + 0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE, + 0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E, + 0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x71, 0x7D, 0x4D, 0x9B, 0x00, + 0x9B, 0xC6, 0x68, 0x42, 0xAE, 0xCD, 0xA1, 0x2A, 0xE6, 0xA3, 0x80, 0xE6, + 0x28, 0x81, 0xFF, 0x2F, 0x2D, 0x82, 0xC6, 0x85, 0x28, 0xAA, 0x60, 0x56, + 0x58, 0x3A, 0x48, 0xF3, + /* a */ + 0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE, + 0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E, + 0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x71, 0x7D, 0x4D, 0x9B, 0x00, + 0x9B, 0xC6, 0x68, 0x42, 0xAE, 0xCD, 0xA1, 0x2A, 0xE6, 0xA3, 0x80, 0xE6, + 0x28, 0x81, 0xFF, 0x2F, 0x2D, 0x82, 0xC6, 0x85, 0x28, 0xAA, 0x60, 0x56, + 0x58, 0x3A, 0x48, 0xF0, + /* b */ + 0x7C, 0xBB, 0xBC, 0xF9, 0x44, 0x1C, 0xFA, 0xB7, 0x6E, 0x18, 0x90, 0xE4, + 0x68, 0x84, 0xEA, 0xE3, 0x21, 0xF7, 0x0C, 0x0B, 0xCB, 0x49, 0x81, 0x52, + 0x78, 0x97, 0x50, 0x4B, 0xEC, 0x3E, 0x36, 0xA6, 0x2B, 0xCD, 0xFA, 0x23, + 0x04, 0x97, 0x65, 0x40, 0xF6, 0x45, 0x00, 0x85, 0xF2, 0xDA, 0xE1, 0x45, + 0xC2, 0x25, 0x53, 0xB4, 0x65, 0x76, 0x36, 0x89, 0x18, 0x0E, 0xA2, 0x57, + 0x18, 0x67, 0x42, 0x3E, + /* x */ + 0x64, 0x0E, 0xCE, 0x5C, 0x12, 0x78, 0x87, 0x17, 0xB9, 0xC1, 0xBA, 0x06, + 0xCB, 0xC2, 0xA6, 0xFE, 0xBA, 0x85, 0x84, 0x24, 0x58, 0xC5, 0x6D, 0xDE, + 0x9D, 0xB1, 0x75, 0x8D, 0x39, 0xC0, 0x31, 0x3D, 0x82, 0xBA, 0x51, 0x73, + 0x5C, 0xDB, 0x3E, 0xA4, 0x99, 0xAA, 0x77, 0xA7, 0xD6, 0x94, 0x3A, 0x64, + 0xF7, 0xA3, 0xF2, 0x5F, 0xE2, 0x6F, 0x06, 0xB5, 0x1B, 0xAA, 0x26, 0x96, + 0xFA, 0x90, 0x35, 0xDA, + /* y */ + 0x5B, 0x53, 0x4B, 0xD5, 0x95, 0xF5, 0xAF, 0x0F, 0xA2, 0xC8, 0x92, 0x37, + 0x6C, 0x84, 0xAC, 0xE1, 0xBB, 0x4E, 0x30, 0x19, 0xB7, 0x16, 0x34, 0xC0, + 0x11, 0x31, 0x15, 0x9C, 0xAE, 0x03, 0xCE, 0xE9, 0xD9, 0x93, 0x21, 0x84, + 0xBE, 0xEF, 0x21, 0x6B, 0xD7, 0x1D, 0xF2, 0xDA, 0xDF, 0x86, 0xA6, 0x27, + 0x30, 0x6E, 0xCF, 0xF9, 0x6D, 0xBB, 0x8B, 0xAC, 0xE1, 0x98, 0xB6, 0x1E, + 0x00, 0xF8, 0xB3, 0x32, + /* order */ + 0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE, + 0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E, + 0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x70, 0x55, 0x3E, 0x5C, 0x41, + 0x4C, 0xA9, 0x26, 0x19, 0x41, 0x86, 0x61, 0x19, 0x7F, 0xAC, 0x10, 0x47, + 0x1D, 0xB1, 0xD3, 0x81, 0x08, 0x5D, 0xDA, 0xDD, 0xB5, 0x87, 0x96, 0x82, + 0x9C, 0xA9, 0x00, 0x69 + } +}; + typedef struct _ec_list_element_st { int nid; const EC_CURVE_DATA *data; @@ -2343,13 +2896,15 @@ static const ec_list_element curve_list[] = { "X9.62 curve over a 239 bit prime field"}, {NID_X9_62_prime239v3, &_EC_X9_62_PRIME_239V3.h, 0, "X9.62 curve over a 239 bit prime field"}, -#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 - {NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, EC_GFp_nistp256_method, - "X9.62/SECG curve over a 256 bit prime field"}, + {NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, +#if defined(ECP_NISTZ256_ASM) + EC_GFp_nistz256_method, +#elif !defined(OPENSSL_NO_EC_NISTP_64_GCC_128) + EC_GFp_nistp256_method, #else - {NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, 0, - "X9.62/SECG curve over a 256 bit prime field"}, + 0, #endif + "X9.62/SECG curve over a 256 bit prime field"}, #ifndef OPENSSL_NO_EC2M /* characteristic two field curves */ /* NIST/SECG curves */ @@ -2460,6 +3015,35 @@ static const ec_list_element curve_list[] = { "\n\tIPSec/IKE/Oakley curve #4 over a 185 bit binary field.\n" "\tNot suitable for ECDSA.\n\tQuestionable extension field!"}, #endif + /* brainpool curves */ + {NID_brainpoolP160r1, &_EC_brainpoolP160r1.h, 0, + "RFC 5639 curve over a 160 bit prime field"}, + {NID_brainpoolP160t1, &_EC_brainpoolP160t1.h, 0, + "RFC 5639 curve over a 160 bit prime field"}, + {NID_brainpoolP192r1, &_EC_brainpoolP192r1.h, 0, + "RFC 5639 curve over a 192 bit prime field"}, + {NID_brainpoolP192t1, &_EC_brainpoolP192t1.h, 0, + "RFC 5639 curve over a 192 bit prime field"}, + {NID_brainpoolP224r1, &_EC_brainpoolP224r1.h, 0, + "RFC 5639 curve over a 224 bit prime field"}, + {NID_brainpoolP224t1, &_EC_brainpoolP224t1.h, 0, + "RFC 5639 curve over a 224 bit prime field"}, + {NID_brainpoolP256r1, &_EC_brainpoolP256r1.h, 0, + "RFC 5639 curve over a 256 bit prime field"}, + {NID_brainpoolP256t1, &_EC_brainpoolP256t1.h, 0, + "RFC 5639 curve over a 256 bit prime field"}, + {NID_brainpoolP320r1, &_EC_brainpoolP320r1.h, 0, + "RFC 5639 curve over a 320 bit prime field"}, + {NID_brainpoolP320t1, &_EC_brainpoolP320t1.h, 0, + "RFC 5639 curve over a 320 bit prime field"}, + {NID_brainpoolP384r1, &_EC_brainpoolP384r1.h, 0, + "RFC 5639 curve over a 384 bit prime field"}, + {NID_brainpoolP384t1, &_EC_brainpoolP384t1.h, 0, + "RFC 5639 curve over a 384 bit prime field"}, + {NID_brainpoolP512r1, &_EC_brainpoolP512r1.h, 0, + "RFC 5639 curve over a 512 bit prime field"}, + {NID_brainpoolP512t1, &_EC_brainpoolP512t1.h, 0, + "RFC 5639 curve over a 512 bit prime field"}, }; #define curve_list_length (sizeof(curve_list)/sizeof(ec_list_element)) @@ -2578,6 +3162,10 @@ EC_GROUP *EC_GROUP_new_by_curve_name(int nid) size_t i; EC_GROUP *ret = NULL; +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_ec_group_new_by_curve_name(nid); +#endif if (nid <= 0) return NULL; @@ -2613,3 +3201,48 @@ size_t EC_get_builtin_curves(EC_builtin_curve *r, size_t nitems) return curve_list_length; } + +/* Functions to translate between common NIST curve names and NIDs */ + +typedef struct { + const char *name; /* NIST Name of curve */ + int nid; /* Curve NID */ +} EC_NIST_NAME; + +static EC_NIST_NAME nist_curves[] = { + {"B-163", NID_sect163r2}, + {"B-233", NID_sect233r1}, + {"B-283", NID_sect283r1}, + {"B-409", NID_sect409r1}, + {"B-571", NID_sect571r1}, + {"K-163", NID_sect163k1}, + {"K-233", NID_sect233k1}, + {"K-283", NID_sect283k1}, + {"K-409", NID_sect409k1}, + {"K-571", NID_sect571k1}, + {"P-192", NID_X9_62_prime192v1}, + {"P-224", NID_secp224r1}, + {"P-256", NID_X9_62_prime256v1}, + {"P-384", NID_secp384r1}, + {"P-521", NID_secp521r1} +}; + +const char *EC_curve_nid2nist(int nid) +{ + size_t i; + for (i = 0; i < sizeof(nist_curves) / sizeof(EC_NIST_NAME); i++) { + if (nist_curves[i].nid == nid) + return nist_curves[i].name; + } + return NULL; +} + +int EC_curve_nist2nid(const char *name) +{ + size_t i; + for (i = 0; i < sizeof(nist_curves) / sizeof(EC_NIST_NAME); i++) { + if (!strcmp(nist_curves[i].name, name)) + return nist_curves[i].nid; + } + return NID_undef; +} diff --git a/drivers/builtin_openssl2/crypto/ec/ec_cvt.c b/drivers/builtin_openssl2/crypto/ec/ec_cvt.c index 487d727407..5a832ba1cf 100644 --- a/drivers/builtin_openssl2/crypto/ec/ec_cvt.c +++ b/drivers/builtin_openssl2/crypto/ec/ec_cvt.c @@ -72,12 +72,20 @@ #include #include "ec_lcl.h" +#ifdef OPENSSL_FIPS +# include +#endif + EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) { const EC_METHOD *meth; EC_GROUP *ret; +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_ec_group_new_curve_gfp(p, a, b, ctx); +#endif #if defined(OPENSSL_BN_ASM_MONT) /* * This might appear controversial, but the fact is that generic @@ -152,6 +160,10 @@ EC_GROUP *EC_GROUP_new_curve_GF2m(const BIGNUM *p, const BIGNUM *a, const EC_METHOD *meth; EC_GROUP *ret; +# ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_ec_group_new_curve_gf2m(p, a, b, ctx); +# endif meth = EC_GF2m_simple_method(); ret = EC_GROUP_new(meth); diff --git a/drivers/builtin_openssl2/crypto/ec/ec_err.c b/drivers/builtin_openssl2/crypto/ec/ec_err.c index 58eae7c666..6fe5baafd4 100644 --- a/drivers/builtin_openssl2/crypto/ec/ec_err.c +++ b/drivers/builtin_openssl2/crypto/ec/ec_err.c @@ -1,6 +1,6 @@ /* crypto/ec/ec_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2015 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -76,6 +76,8 @@ static ERR_STRING_DATA EC_str_functs[] = { {ERR_FUNC(EC_F_D2I_ECPKPARAMETERS), "d2i_ECPKParameters"}, {ERR_FUNC(EC_F_D2I_ECPRIVATEKEY), "d2i_ECPrivateKey"}, {ERR_FUNC(EC_F_DO_EC_KEY_PRINT), "DO_EC_KEY_PRINT"}, + {ERR_FUNC(EC_F_ECDH_CMS_DECRYPT), "ECDH_CMS_DECRYPT"}, + {ERR_FUNC(EC_F_ECDH_CMS_SET_SHARED_INFO), "ECDH_CMS_SET_SHARED_INFO"}, {ERR_FUNC(EC_F_ECKEY_PARAM2TYPE), "ECKEY_PARAM2TYPE"}, {ERR_FUNC(EC_F_ECKEY_PARAM_DECODE), "ECKEY_PARAM_DECODE"}, {ERR_FUNC(EC_F_ECKEY_PRIV_DECODE), "ECKEY_PRIV_DECODE"}, @@ -87,6 +89,13 @@ static ERR_STRING_DATA EC_str_functs[] = { {ERR_FUNC(EC_F_ECPARAMETERS_PRINT_FP), "ECParameters_print_fp"}, {ERR_FUNC(EC_F_ECPKPARAMETERS_PRINT), "ECPKParameters_print"}, {ERR_FUNC(EC_F_ECPKPARAMETERS_PRINT_FP), "ECPKParameters_print_fp"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_GET_AFFINE), "ecp_nistz256_get_affine"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE), + "ecp_nistz256_mult_precompute"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_POINTS_MUL), "ecp_nistz256_points_mul"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_PRE_COMP_NEW), "ecp_nistz256_pre_comp_new"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_SET_WORDS), "ecp_nistz256_set_words"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_WINDOWED_MUL), "ecp_nistz256_windowed_mul"}, {ERR_FUNC(EC_F_ECP_NIST_MOD_192), "ECP_NIST_MOD_192"}, {ERR_FUNC(EC_F_ECP_NIST_MOD_224), "ECP_NIST_MOD_224"}, {ERR_FUNC(EC_F_ECP_NIST_MOD_256), "ECP_NIST_MOD_256"}, @@ -271,6 +280,7 @@ static ERR_STRING_DATA EC_str_reasons[] = { {ERR_REASON(EC_R_INVALID_COMPRESSED_POINT), "invalid compressed point"}, {ERR_REASON(EC_R_INVALID_COMPRESSION_BIT), "invalid compression bit"}, {ERR_REASON(EC_R_INVALID_CURVE), "invalid curve"}, + {ERR_REASON(EC_R_INVALID_DIGEST), "invalid digest"}, {ERR_REASON(EC_R_INVALID_DIGEST_TYPE), "invalid digest type"}, {ERR_REASON(EC_R_INVALID_ENCODING), "invalid encoding"}, {ERR_REASON(EC_R_INVALID_FIELD), "invalid field"}, @@ -279,6 +289,7 @@ static ERR_STRING_DATA EC_str_reasons[] = { {ERR_REASON(EC_R_INVALID_PENTANOMIAL_BASIS), "invalid pentanomial basis"}, {ERR_REASON(EC_R_INVALID_PRIVATE_KEY), "invalid private key"}, {ERR_REASON(EC_R_INVALID_TRINOMIAL_BASIS), "invalid trinomial basis"}, + {ERR_REASON(EC_R_KDF_PARAMETER_ERROR), "kdf parameter error"}, {ERR_REASON(EC_R_KEYS_NOT_SET), "keys not set"}, {ERR_REASON(EC_R_MISSING_PARAMETERS), "missing parameters"}, {ERR_REASON(EC_R_MISSING_PRIVATE_KEY), "missing private key"}, @@ -290,10 +301,12 @@ static ERR_STRING_DATA EC_str_reasons[] = { {ERR_REASON(EC_R_NO_FIELD_MOD), "no field mod"}, {ERR_REASON(EC_R_NO_PARAMETERS_SET), "no parameters set"}, {ERR_REASON(EC_R_PASSED_NULL_PARAMETER), "passed null parameter"}, + {ERR_REASON(EC_R_PEER_KEY_ERROR), "peer key error"}, {ERR_REASON(EC_R_PKPARAMETERS2GROUP_FAILURE), "pkparameters2group failure"}, {ERR_REASON(EC_R_POINT_AT_INFINITY), "point at infinity"}, {ERR_REASON(EC_R_POINT_IS_NOT_ON_CURVE), "point is not on curve"}, + {ERR_REASON(EC_R_SHARED_INFO_ERROR), "shared info error"}, {ERR_REASON(EC_R_SLOT_FULL), "slot full"}, {ERR_REASON(EC_R_UNDEFINED_GENERATOR), "undefined generator"}, {ERR_REASON(EC_R_UNDEFINED_ORDER), "undefined order"}, diff --git a/drivers/builtin_openssl2/crypto/ec/ec_key.c b/drivers/builtin_openssl2/crypto/ec/ec_key.c index c784b6fd30..bc94ab5661 100644 --- a/drivers/builtin_openssl2/crypto/ec/ec_key.c +++ b/drivers/builtin_openssl2/crypto/ec/ec_key.c @@ -387,6 +387,8 @@ int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, tx = BN_CTX_get(ctx); ty = BN_CTX_get(ctx); + if (ty == NULL) + goto err; #ifndef OPENSSL_NO_EC2M tmp_nid = EC_METHOD_get_field_type(EC_GROUP_method_of(key->group)); diff --git a/drivers/builtin_openssl2/crypto/ec/ec_lcl.h b/drivers/builtin_openssl2/crypto/ec/ec_lcl.h index d79ed1e4d9..969fd147ef 100644 --- a/drivers/builtin_openssl2/crypto/ec/ec_lcl.h +++ b/drivers/builtin_openssl2/crypto/ec/ec_lcl.h @@ -212,6 +212,13 @@ struct ec_group_st { BIGNUM order, cofactor; int curve_name; /* optional NID for named curve */ int asn1_flag; /* flag to control the asn1 encoding */ + /* + * Kludge: upper bit of ans1_flag is used to denote structure + * version. Is set, then last field is present. This is done + * for interoperation with FIPS code. + */ +#define EC_GROUP_ASN1_FLAG_MASK 0x7fffffff +#define EC_GROUP_VERSION(p) (p->asn1_flag&~EC_GROUP_ASN1_FLAG_MASK) point_conversion_form_t asn1_form; unsigned char *seed; /* optional seed for parameters (appears in * ASN1) */ @@ -252,6 +259,7 @@ struct ec_group_st { /* method-specific */ int (*field_mod_func) (BIGNUM *, const BIGNUM *, const BIGNUM *, BN_CTX *); + BN_MONT_CTX *mont_data; /* data for ECDSA inverse */ } /* EC_GROUP */ ; struct ec_key_st { @@ -541,3 +549,20 @@ void ec_GFp_nistp_points_make_affine_internal(size_t num, void *point_array, void ec_GFp_nistp_recode_scalar_bits(unsigned char *sign, unsigned char *digit, unsigned char in); #endif +int ec_precompute_mont_data(EC_GROUP *); + +#ifdef ECP_NISTZ256_ASM +/** Returns GFp methods using montgomery multiplication, with x86-64 optimized + * P256. See http://eprint.iacr.org/2013/816. + * \return EC_METHOD object + */ +const EC_METHOD *EC_GFp_nistz256_method(void); +#endif + +#ifdef OPENSSL_FIPS +EC_GROUP *FIPS_ec_group_new_curve_gfp(const BIGNUM *p, const BIGNUM *a, + const BIGNUM *b, BN_CTX *ctx); +EC_GROUP *FIPS_ec_group_new_curve_gf2m(const BIGNUM *p, const BIGNUM *a, + const BIGNUM *b, BN_CTX *ctx); +EC_GROUP *FIPS_ec_group_new_by_curve_name(int nid); +#endif diff --git a/drivers/builtin_openssl2/crypto/ec/ec_lib.c b/drivers/builtin_openssl2/crypto/ec/ec_lib.c index e2275207ed..3ffa112cc3 100644 --- a/drivers/builtin_openssl2/crypto/ec/ec_lib.c +++ b/drivers/builtin_openssl2/crypto/ec/ec_lib.c @@ -94,13 +94,14 @@ EC_GROUP *EC_GROUP_new(const EC_METHOD *meth) ret->meth = meth; ret->extra_data = NULL; + ret->mont_data = NULL; ret->generator = NULL; BN_init(&ret->order); BN_init(&ret->cofactor); ret->curve_name = 0; - ret->asn1_flag = 0; + ret->asn1_flag = ~EC_GROUP_ASN1_FLAG_MASK; ret->asn1_form = POINT_CONVERSION_UNCOMPRESSED; ret->seed = NULL; @@ -124,6 +125,9 @@ void EC_GROUP_free(EC_GROUP *group) EC_EX_DATA_free_all_data(&group->extra_data); + if (EC_GROUP_VERSION(group) && group->mont_data) + BN_MONT_CTX_free(group->mont_data); + if (group->generator != NULL) EC_POINT_free(group->generator); BN_free(&group->order); @@ -147,6 +151,9 @@ void EC_GROUP_clear_free(EC_GROUP *group) EC_EX_DATA_clear_free_all_data(&group->extra_data); + if (EC_GROUP_VERSION(group) && group->mont_data) + BN_MONT_CTX_free(group->mont_data); + if (group->generator != NULL) EC_POINT_clear_free(group->generator); BN_clear_free(&group->order); @@ -189,6 +196,22 @@ int EC_GROUP_copy(EC_GROUP *dest, const EC_GROUP *src) return 0; } + if (EC_GROUP_VERSION(src) && src->mont_data != NULL) { + if (dest->mont_data == NULL) { + dest->mont_data = BN_MONT_CTX_new(); + if (dest->mont_data == NULL) + return 0; + } + if (!BN_MONT_CTX_copy(dest->mont_data, src->mont_data)) + return 0; + } else { + /* src->generator == NULL */ + if (EC_GROUP_VERSION(dest) && dest->mont_data != NULL) { + BN_MONT_CTX_free(dest->mont_data); + dest->mont_data = NULL; + } + } + if (src->generator != NULL) { if (dest->generator == NULL) { dest->generator = EC_POINT_new(dest); @@ -295,6 +318,13 @@ int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, } else BN_zero(&group->cofactor); + /* + * We ignore the return value because some groups have an order with + * factors of two, which makes the Montgomery setup fail. + * |group->mont_data| will be NULL in this case. + */ + ec_precompute_mont_data(group); + return 1; } @@ -303,6 +333,11 @@ const EC_POINT *EC_GROUP_get0_generator(const EC_GROUP *group) return group->generator; } +BN_MONT_CTX *EC_GROUP_get_mont_data(const EC_GROUP *group) +{ + return EC_GROUP_VERSION(group) ? group->mont_data : NULL; +} + int EC_GROUP_get_order(const EC_GROUP *group, BIGNUM *order, BN_CTX *ctx) { if (!BN_copy(order, &group->order)) @@ -332,12 +367,13 @@ int EC_GROUP_get_curve_name(const EC_GROUP *group) void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag) { - group->asn1_flag = flag; + group->asn1_flag &= ~EC_GROUP_ASN1_FLAG_MASK; + group->asn1_flag |= flag & EC_GROUP_ASN1_FLAG_MASK; } int EC_GROUP_get_asn1_flag(const EC_GROUP *group) { - return group->asn1_flag; + return group->asn1_flag & EC_GROUP_ASN1_FLAG_MASK; } void EC_GROUP_set_point_conversion_form(EC_GROUP *group, @@ -1057,3 +1093,42 @@ int EC_GROUP_have_precompute_mult(const EC_GROUP *group) return 0; /* cannot tell whether precomputation has * been performed */ } + +/* + * ec_precompute_mont_data sets |group->mont_data| from |group->order| and + * returns one on success. On error it returns zero. + */ +int ec_precompute_mont_data(EC_GROUP *group) +{ + BN_CTX *ctx = BN_CTX_new(); + int ret = 0; + + if (!EC_GROUP_VERSION(group)) + goto err; + + if (group->mont_data) { + BN_MONT_CTX_free(group->mont_data); + group->mont_data = NULL; + } + + if (ctx == NULL) + goto err; + + group->mont_data = BN_MONT_CTX_new(); + if (!group->mont_data) + goto err; + + if (!BN_MONT_CTX_set(group->mont_data, &group->order, ctx)) { + BN_MONT_CTX_free(group->mont_data); + group->mont_data = NULL; + goto err; + } + + ret = 1; + + err: + + if (ctx) + BN_CTX_free(ctx); + return ret; +} diff --git a/drivers/builtin_openssl2/crypto/ec/ec_pmeth.c b/drivers/builtin_openssl2/crypto/ec/ec_pmeth.c index c189d3ffbf..b76749010c 100644 --- a/drivers/builtin_openssl2/crypto/ec/ec_pmeth.c +++ b/drivers/builtin_openssl2/crypto/ec/ec_pmeth.c @@ -61,6 +61,7 @@ #include #include #include +#include "ec_lcl.h" #include #include #include "evp_locl.h" @@ -72,6 +73,19 @@ typedef struct { EC_GROUP *gen_group; /* message digest */ const EVP_MD *md; + /* Duplicate key if custom cofactor needed */ + EC_KEY *co_key; + /* Cofactor mode */ + signed char cofactor_mode; + /* KDF (if any) to use for ECDH */ + char kdf_type; + /* Message digest to use for key derivation */ + const EVP_MD *kdf_md; + /* User key material */ + unsigned char *kdf_ukm; + size_t kdf_ukmlen; + /* KDF output length */ + size_t kdf_outlen; } EC_PKEY_CTX; static int pkey_ec_init(EVP_PKEY_CTX *ctx) @@ -83,6 +97,14 @@ static int pkey_ec_init(EVP_PKEY_CTX *ctx) dctx->gen_group = NULL; dctx->md = NULL; + dctx->cofactor_mode = -1; + dctx->co_key = NULL; + dctx->kdf_type = EVP_PKEY_ECDH_KDF_NONE; + dctx->kdf_md = NULL; + dctx->kdf_outlen = 0; + dctx->kdf_ukm = NULL; + dctx->kdf_ukmlen = 0; + ctx->data = dctx; return 1; @@ -101,6 +123,22 @@ static int pkey_ec_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src) return 0; } dctx->md = sctx->md; + + if (sctx->co_key) { + dctx->co_key = EC_KEY_dup(sctx->co_key); + if (!dctx->co_key) + return 0; + } + dctx->kdf_type = sctx->kdf_type; + dctx->kdf_md = sctx->kdf_md; + dctx->kdf_outlen = sctx->kdf_outlen; + if (sctx->kdf_ukm) { + dctx->kdf_ukm = BUF_memdup(sctx->kdf_ukm, sctx->kdf_ukmlen); + if (!dctx->kdf_ukm) + return 0; + } else + dctx->kdf_ukm = NULL; + dctx->kdf_ukmlen = sctx->kdf_ukmlen; return 1; } @@ -110,6 +148,10 @@ static void pkey_ec_cleanup(EVP_PKEY_CTX *ctx) if (dctx) { if (dctx->gen_group) EC_GROUP_free(dctx->gen_group); + if (dctx->co_key) + EC_KEY_free(dctx->co_key); + if (dctx->kdf_ukm) + OPENSSL_free(dctx->kdf_ukm); OPENSSL_free(dctx); } } @@ -168,18 +210,21 @@ static int pkey_ec_derive(EVP_PKEY_CTX *ctx, unsigned char *key, int ret; size_t outlen; const EC_POINT *pubkey = NULL; + EC_KEY *eckey; + EC_PKEY_CTX *dctx = ctx->data; if (!ctx->pkey || !ctx->peerkey) { ECerr(EC_F_PKEY_EC_DERIVE, EC_R_KEYS_NOT_SET); return 0; } + eckey = dctx->co_key ? dctx->co_key : ctx->pkey->pkey.ec; + if (!key) { const EC_GROUP *group; - group = EC_KEY_get0_group(ctx->pkey->pkey.ec); + group = EC_KEY_get0_group(eckey); *keylen = (EC_GROUP_get_degree(group) + 7) / 8; return 1; } - pubkey = EC_KEY_get0_public_key(ctx->peerkey->pkey.ec); /* @@ -189,12 +234,48 @@ static int pkey_ec_derive(EVP_PKEY_CTX *ctx, unsigned char *key, outlen = *keylen; - ret = ECDH_compute_key(key, outlen, pubkey, ctx->pkey->pkey.ec, 0); - if (ret < 0) - return ret; + ret = ECDH_compute_key(key, outlen, pubkey, eckey, 0); + if (ret <= 0) + return 0; *keylen = ret; return 1; } + +static int pkey_ec_kdf_derive(EVP_PKEY_CTX *ctx, + unsigned char *key, size_t *keylen) +{ + EC_PKEY_CTX *dctx = ctx->data; + unsigned char *ktmp = NULL; + size_t ktmplen; + int rv = 0; + if (dctx->kdf_type == EVP_PKEY_ECDH_KDF_NONE) + return pkey_ec_derive(ctx, key, keylen); + if (!key) { + *keylen = dctx->kdf_outlen; + return 1; + } + if (*keylen != dctx->kdf_outlen) + return 0; + if (!pkey_ec_derive(ctx, NULL, &ktmplen)) + return 0; + ktmp = OPENSSL_malloc(ktmplen); + if (!ktmp) + return 0; + if (!pkey_ec_derive(ctx, ktmp, &ktmplen)) + goto err; + /* Do KDF stuff */ + if (!ECDH_KDF_X9_62(key, *keylen, ktmp, ktmplen, + dctx->kdf_ukm, dctx->kdf_ukmlen, dctx->kdf_md)) + goto err; + rv = 1; + + err: + if (ktmp) { + OPENSSL_cleanse(ktmp, ktmplen); + OPENSSL_free(ktmp); + } + return rv; +} #endif static int pkey_ec_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) @@ -213,6 +294,90 @@ static int pkey_ec_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) dctx->gen_group = group; return 1; + case EVP_PKEY_CTRL_EC_PARAM_ENC: + if (!dctx->gen_group) { + ECerr(EC_F_PKEY_EC_CTRL, EC_R_NO_PARAMETERS_SET); + return 0; + } + EC_GROUP_set_asn1_flag(dctx->gen_group, p1); + return 1; + +#ifndef OPENSSL_NO_ECDH + case EVP_PKEY_CTRL_EC_ECDH_COFACTOR: + if (p1 == -2) { + if (dctx->cofactor_mode != -1) + return dctx->cofactor_mode; + else { + EC_KEY *ec_key = ctx->pkey->pkey.ec; + return EC_KEY_get_flags(ec_key) & EC_FLAG_COFACTOR_ECDH ? 1 : + 0; + } + } else if (p1 < -1 || p1 > 1) + return -2; + dctx->cofactor_mode = p1; + if (p1 != -1) { + EC_KEY *ec_key = ctx->pkey->pkey.ec; + if (!ec_key->group) + return -2; + /* If cofactor is 1 cofactor mode does nothing */ + if (BN_is_one(&ec_key->group->cofactor)) + return 1; + if (!dctx->co_key) { + dctx->co_key = EC_KEY_dup(ec_key); + if (!dctx->co_key) + return 0; + } + if (p1) + EC_KEY_set_flags(dctx->co_key, EC_FLAG_COFACTOR_ECDH); + else + EC_KEY_clear_flags(dctx->co_key, EC_FLAG_COFACTOR_ECDH); + } else if (dctx->co_key) { + EC_KEY_free(dctx->co_key); + dctx->co_key = NULL; + } + return 1; +#endif + + case EVP_PKEY_CTRL_EC_KDF_TYPE: + if (p1 == -2) + return dctx->kdf_type; + if (p1 != EVP_PKEY_ECDH_KDF_NONE && p1 != EVP_PKEY_ECDH_KDF_X9_62) + return -2; + dctx->kdf_type = p1; + return 1; + + case EVP_PKEY_CTRL_EC_KDF_MD: + dctx->kdf_md = p2; + return 1; + + case EVP_PKEY_CTRL_GET_EC_KDF_MD: + *(const EVP_MD **)p2 = dctx->kdf_md; + return 1; + + case EVP_PKEY_CTRL_EC_KDF_OUTLEN: + if (p1 <= 0) + return -2; + dctx->kdf_outlen = (size_t)p1; + return 1; + + case EVP_PKEY_CTRL_GET_EC_KDF_OUTLEN: + *(int *)p2 = dctx->kdf_outlen; + return 1; + + case EVP_PKEY_CTRL_EC_KDF_UKM: + if (dctx->kdf_ukm) + OPENSSL_free(dctx->kdf_ukm); + dctx->kdf_ukm = p2; + if (p2) + dctx->kdf_ukmlen = p1; + else + dctx->kdf_ukmlen = 0; + return 1; + + case EVP_PKEY_CTRL_GET_EC_KDF_UKM: + *(unsigned char **)p2 = dctx->kdf_ukm; + return dctx->kdf_ukmlen; + case EVP_PKEY_CTRL_MD: if (EVP_MD_type((const EVP_MD *)p2) != NID_sha1 && EVP_MD_type((const EVP_MD *)p2) != NID_ecdsa_with_SHA1 && @@ -226,6 +391,10 @@ static int pkey_ec_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) dctx->md = p2; return 1; + case EVP_PKEY_CTRL_GET_MD: + *(const EVP_MD **)p2 = dctx->md; + return 1; + case EVP_PKEY_CTRL_PEER_KEY: /* Default behaviour is OK */ case EVP_PKEY_CTRL_DIGESTINIT: @@ -244,7 +413,9 @@ static int pkey_ec_ctrl_str(EVP_PKEY_CTX *ctx, { if (!strcmp(type, "ec_paramgen_curve")) { int nid; - nid = OBJ_sn2nid(value); + nid = EC_curve_nist2nid(value); + if (nid == NID_undef) + nid = OBJ_sn2nid(value); if (nid == NID_undef) nid = OBJ_ln2nid(value); if (nid == NID_undef) { @@ -252,7 +423,28 @@ static int pkey_ec_ctrl_str(EVP_PKEY_CTX *ctx, return 0; } return EVP_PKEY_CTX_set_ec_paramgen_curve_nid(ctx, nid); + } else if (!strcmp(type, "ec_param_enc")) { + int param_enc; + if (!strcmp(value, "explicit")) + param_enc = 0; + else if (!strcmp(value, "named_curve")) + param_enc = OPENSSL_EC_NAMED_CURVE; + else + return -2; + return EVP_PKEY_CTX_set_ec_param_enc(ctx, param_enc); + } else if (!strcmp(type, "ecdh_kdf_md")) { + const EVP_MD *md; + if (!(md = EVP_get_digestbyname(value))) { + ECerr(EC_F_PKEY_EC_CTRL_STR, EC_R_INVALID_DIGEST); + return 0; + } + return EVP_PKEY_CTX_set_ecdh_kdf_md(ctx, md); + } else if (!strcmp(type, "ecdh_cofactor_mode")) { + int co_mode; + co_mode = atoi(value); + return EVP_PKEY_CTX_set_ecdh_cofactor_mode(ctx, co_mode); } + return -2; } @@ -279,7 +471,8 @@ static int pkey_ec_paramgen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) static int pkey_ec_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) { EC_KEY *ec = NULL; - if (ctx->pkey == NULL) { + EC_PKEY_CTX *dctx = ctx->data; + if (ctx->pkey == NULL && dctx->gen_group == NULL) { ECerr(EC_F_PKEY_EC_KEYGEN, EC_R_NO_PARAMETERS_SET); return 0; } @@ -287,9 +480,14 @@ static int pkey_ec_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) if (!ec) return 0; EVP_PKEY_assign_EC_KEY(pkey, ec); - /* Note: if error return, pkey is freed by parent routine */ - if (!EVP_PKEY_copy_parameters(pkey, ctx->pkey)) - return 0; + if (ctx->pkey) { + /* Note: if error return, pkey is freed by parent routine */ + if (!EVP_PKEY_copy_parameters(pkey, ctx->pkey)) + return 0; + } else { + if (!EC_KEY_set_group(ec, dctx->gen_group)) + return 0; + } return EC_KEY_generate_key(pkey->pkey.ec); } @@ -322,7 +520,7 @@ const EVP_PKEY_METHOD ec_pkey_meth = { 0, #ifndef OPENSSL_NO_ECDH - pkey_ec_derive, + pkey_ec_kdf_derive, #else 0, #endif diff --git a/drivers/builtin_openssl2/crypto/ec/eck_prn.c b/drivers/builtin_openssl2/crypto/ec/eck_prn.c index 5ef12ec024..df9b37a750 100644 --- a/drivers/builtin_openssl2/crypto/ec/eck_prn.c +++ b/drivers/builtin_openssl2/crypto/ec/eck_prn.c @@ -171,6 +171,7 @@ int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off) if (EC_GROUP_get_asn1_flag(x)) { /* the curve parameter are given by an asn1 OID */ int nid; + const char *nname; if (!BIO_indent(bp, off, 128)) goto err; @@ -183,6 +184,13 @@ int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off) goto err; if (BIO_printf(bp, "\n") <= 0) goto err; + nname = EC_curve_nid2nist(nid); + if (nname) { + if (!BIO_indent(bp, off, 128)) + goto err; + if (BIO_printf(bp, "NIST CURVE: %s\n", nname) <= 0) + goto err; + } } else { /* explicit parameters */ int is_char_two = 0; diff --git a/drivers/builtin_openssl2/crypto/ec/ecp_nistp224.c b/drivers/builtin_openssl2/crypto/ec/ecp_nistp224.c index ed09f97ade..d81cc9ce6b 100644 --- a/drivers/builtin_openssl2/crypto/ec/ecp_nistp224.c +++ b/drivers/builtin_openssl2/crypto/ec/ecp_nistp224.c @@ -1657,8 +1657,7 @@ int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx) */ if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); - ret = 1; - goto err; + goto done; } if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) || (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) || @@ -1736,6 +1735,7 @@ int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx) } make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems); + done: if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup, nistp224_pre_comp_free, nistp224_pre_comp_clear_free)) diff --git a/drivers/builtin_openssl2/crypto/ec/ecp_nistp256.c b/drivers/builtin_openssl2/crypto/ec/ecp_nistp256.c index a5887086c6..78d191aac7 100644 --- a/drivers/builtin_openssl2/crypto/ec/ecp_nistp256.c +++ b/drivers/builtin_openssl2/crypto/ec/ecp_nistp256.c @@ -2249,8 +2249,7 @@ int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx) */ if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); - ret = 1; - goto err; + goto done; } if ((!BN_to_felem(x_tmp, &group->generator->X)) || (!BN_to_felem(y_tmp, &group->generator->Y)) || @@ -2337,6 +2336,7 @@ int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx) } make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems); + done: if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup, nistp256_pre_comp_free, nistp256_pre_comp_clear_free)) diff --git a/drivers/builtin_openssl2/crypto/ec/ecp_nistp521.c b/drivers/builtin_openssl2/crypto/ec/ecp_nistp521.c index cc7634512d..c53a61bbfb 100644 --- a/drivers/builtin_openssl2/crypto/ec/ecp_nistp521.c +++ b/drivers/builtin_openssl2/crypto/ec/ecp_nistp521.c @@ -1282,11 +1282,11 @@ static void point_add(felem x3, felem y3, felem z3, felem_scalar128(tmp2, 2); /* tmp2[i] < 17*2^121 */ felem_diff128(tmp, tmp2); - /*- - * tmp[i] < 2^127 - 2^69 + 17*2^122 - * = 2^126 - 2^122 - 2^6 - 2^2 - 1 - * < 2^127 - */ + /*- + * tmp[i] < 2^127 - 2^69 + 17*2^122 + * = 2^126 - 2^122 - 2^6 - 2^2 - 1 + * < 2^127 + */ felem_reduce(y_out, tmp); copy_conditional(x_out, x2, z1_is_zero); @@ -2056,8 +2056,7 @@ int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx) */ if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); - ret = 1; - goto err; + goto done; } if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) || (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) || @@ -2115,6 +2114,7 @@ int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx) } make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems); + done: if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup, nistp521_pre_comp_free, nistp521_pre_comp_clear_free)) diff --git a/drivers/builtin_openssl2/crypto/ec/ecp_nistz256.c b/drivers/builtin_openssl2/crypto/ec/ecp_nistz256.c new file mode 100644 index 0000000000..ca44d0aaee --- /dev/null +++ b/drivers/builtin_openssl2/crypto/ec/ecp_nistz256.c @@ -0,0 +1,1521 @@ +/****************************************************************************** + * * + * Copyright 2014 Intel Corporation * + * * + * Licensed under the Apache License, Version 2.0 (the "License"); * + * you may not use this file except in compliance with the License. * + * You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, software * + * distributed under the License is distributed on an "AS IS" BASIS, * + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * + * See the License for the specific language governing permissions and * + * limitations under the License. * + * * + ****************************************************************************** + * * + * Developers and authors: * + * Shay Gueron (1, 2), and Vlad Krasnov (1) * + * (1) Intel Corporation, Israel Development Center * + * (2) University of Haifa * + * Reference: * + * S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with * + * 256 Bit Primes" * + * * + ******************************************************************************/ + +#include + +#include +#include +#include +#include "cryptlib.h" + +#include "ec_lcl.h" + +#if BN_BITS2 != 64 +# define TOBN(hi,lo) lo,hi +#else +# define TOBN(hi,lo) ((BN_ULONG)hi<<32|lo) +#endif + +#if defined(__GNUC__) +# define ALIGN32 __attribute((aligned(32))) +#elif defined(_MSC_VER) +# define ALIGN32 __declspec(align(32)) +#else +# define ALIGN32 +#endif + +#define ALIGNPTR(p,N) ((unsigned char *)p+N-(size_t)p%N) +#define P256_LIMBS (256/BN_BITS2) + +typedef unsigned short u16; + +typedef struct { + BN_ULONG X[P256_LIMBS]; + BN_ULONG Y[P256_LIMBS]; + BN_ULONG Z[P256_LIMBS]; +} P256_POINT; + +typedef struct { + BN_ULONG X[P256_LIMBS]; + BN_ULONG Y[P256_LIMBS]; +} P256_POINT_AFFINE; + +typedef P256_POINT_AFFINE PRECOMP256_ROW[64]; + +/* structure for precomputed multiples of the generator */ +typedef struct ec_pre_comp_st { + const EC_GROUP *group; /* Parent EC_GROUP object */ + size_t w; /* Window size */ + /* + * Constant time access to the X and Y coordinates of the pre-computed, + * generator multiplies, in the Montgomery domain. Pre-calculated + * multiplies are stored in affine form. + */ + PRECOMP256_ROW *precomp; + void *precomp_storage; + int references; +} EC_PRE_COMP; + +/* Functions implemented in assembly */ +/* Modular mul by 2: res = 2*a mod P */ +void ecp_nistz256_mul_by_2(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS]); +/* Modular div by 2: res = a/2 mod P */ +void ecp_nistz256_div_by_2(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS]); +/* Modular mul by 3: res = 3*a mod P */ +void ecp_nistz256_mul_by_3(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS]); +/* Modular add: res = a+b mod P */ +void ecp_nistz256_add(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS], + const BN_ULONG b[P256_LIMBS]); +/* Modular sub: res = a-b mod P */ +void ecp_nistz256_sub(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS], + const BN_ULONG b[P256_LIMBS]); +/* Modular neg: res = -a mod P */ +void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]); +/* Montgomery mul: res = a*b*2^-256 mod P */ +void ecp_nistz256_mul_mont(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS], + const BN_ULONG b[P256_LIMBS]); +/* Montgomery sqr: res = a*a*2^-256 mod P */ +void ecp_nistz256_sqr_mont(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS]); +/* Convert a number from Montgomery domain, by multiplying with 1 */ +void ecp_nistz256_from_mont(BN_ULONG res[P256_LIMBS], + const BN_ULONG in[P256_LIMBS]); +/* Convert a number to Montgomery domain, by multiplying with 2^512 mod P*/ +void ecp_nistz256_to_mont(BN_ULONG res[P256_LIMBS], + const BN_ULONG in[P256_LIMBS]); +/* Functions that perform constant time access to the precomputed tables */ +void ecp_nistz256_select_w5(P256_POINT * val, + const P256_POINT * in_t, int index); +void ecp_nistz256_select_w7(P256_POINT_AFFINE * val, + const P256_POINT_AFFINE * in_t, int index); + +/* One converted into the Montgomery domain */ +static const BN_ULONG ONE[P256_LIMBS] = { + TOBN(0x00000000, 0x00000001), TOBN(0xffffffff, 0x00000000), + TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe) +}; + +static void *ecp_nistz256_pre_comp_dup(void *); +static void ecp_nistz256_pre_comp_free(void *); +static void ecp_nistz256_pre_comp_clear_free(void *); +static EC_PRE_COMP *ecp_nistz256_pre_comp_new(const EC_GROUP *group); + +/* Precomputed tables for the default generator */ +#include "ecp_nistz256_table.c" + +/* Recode window to a signed digit, see ecp_nistputil.c for details */ +static unsigned int _booth_recode_w5(unsigned int in) +{ + unsigned int s, d; + + s = ~((in >> 5) - 1); + d = (1 << 6) - in - 1; + d = (d & s) | (in & ~s); + d = (d >> 1) + (d & 1); + + return (d << 1) + (s & 1); +} + +static unsigned int _booth_recode_w7(unsigned int in) +{ + unsigned int s, d; + + s = ~((in >> 7) - 1); + d = (1 << 8) - in - 1; + d = (d & s) | (in & ~s); + d = (d >> 1) + (d & 1); + + return (d << 1) + (s & 1); +} + +static void copy_conditional(BN_ULONG dst[P256_LIMBS], + const BN_ULONG src[P256_LIMBS], BN_ULONG move) +{ + BN_ULONG mask1 = -move; + BN_ULONG mask2 = ~mask1; + + dst[0] = (src[0] & mask1) ^ (dst[0] & mask2); + dst[1] = (src[1] & mask1) ^ (dst[1] & mask2); + dst[2] = (src[2] & mask1) ^ (dst[2] & mask2); + dst[3] = (src[3] & mask1) ^ (dst[3] & mask2); + if (P256_LIMBS == 8) { + dst[4] = (src[4] & mask1) ^ (dst[4] & mask2); + dst[5] = (src[5] & mask1) ^ (dst[5] & mask2); + dst[6] = (src[6] & mask1) ^ (dst[6] & mask2); + dst[7] = (src[7] & mask1) ^ (dst[7] & mask2); + } +} + +static BN_ULONG is_zero(BN_ULONG in) +{ + in |= (0 - in); + in = ~in; + in &= BN_MASK2; + in >>= BN_BITS2 - 1; + return in; +} + +static BN_ULONG is_equal(const BN_ULONG a[P256_LIMBS], + const BN_ULONG b[P256_LIMBS]) +{ + BN_ULONG res; + + res = a[0] ^ b[0]; + res |= a[1] ^ b[1]; + res |= a[2] ^ b[2]; + res |= a[3] ^ b[3]; + if (P256_LIMBS == 8) { + res |= a[4] ^ b[4]; + res |= a[5] ^ b[5]; + res |= a[6] ^ b[6]; + res |= a[7] ^ b[7]; + } + + return is_zero(res); +} + +static BN_ULONG is_one(const BN_ULONG a[P256_LIMBS]) +{ + BN_ULONG res; + + res = a[0] ^ ONE[0]; + res |= a[1] ^ ONE[1]; + res |= a[2] ^ ONE[2]; + res |= a[3] ^ ONE[3]; + if (P256_LIMBS == 8) { + res |= a[4] ^ ONE[4]; + res |= a[5] ^ ONE[5]; + res |= a[6] ^ ONE[6]; + } + + return is_zero(res); +} + +static int ecp_nistz256_set_words(BIGNUM *a, BN_ULONG words[P256_LIMBS]) + { + if (bn_wexpand(a, P256_LIMBS) == NULL) { + ECerr(EC_F_ECP_NISTZ256_SET_WORDS, ERR_R_MALLOC_FAILURE); + return 0; + } + memcpy(a->d, words, sizeof(BN_ULONG) * P256_LIMBS); + a->top = P256_LIMBS; + bn_correct_top(a); + return 1; +} + +#ifndef ECP_NISTZ256_REFERENCE_IMPLEMENTATION +void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a); +void ecp_nistz256_point_add(P256_POINT *r, + const P256_POINT *a, const P256_POINT *b); +void ecp_nistz256_point_add_affine(P256_POINT *r, + const P256_POINT *a, + const P256_POINT_AFFINE *b); +#else +/* Point double: r = 2*a */ +static void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a) +{ + BN_ULONG S[P256_LIMBS]; + BN_ULONG M[P256_LIMBS]; + BN_ULONG Zsqr[P256_LIMBS]; + BN_ULONG tmp0[P256_LIMBS]; + + const BN_ULONG *in_x = a->X; + const BN_ULONG *in_y = a->Y; + const BN_ULONG *in_z = a->Z; + + BN_ULONG *res_x = r->X; + BN_ULONG *res_y = r->Y; + BN_ULONG *res_z = r->Z; + + ecp_nistz256_mul_by_2(S, in_y); + + ecp_nistz256_sqr_mont(Zsqr, in_z); + + ecp_nistz256_sqr_mont(S, S); + + ecp_nistz256_mul_mont(res_z, in_z, in_y); + ecp_nistz256_mul_by_2(res_z, res_z); + + ecp_nistz256_add(M, in_x, Zsqr); + ecp_nistz256_sub(Zsqr, in_x, Zsqr); + + ecp_nistz256_sqr_mont(res_y, S); + ecp_nistz256_div_by_2(res_y, res_y); + + ecp_nistz256_mul_mont(M, M, Zsqr); + ecp_nistz256_mul_by_3(M, M); + + ecp_nistz256_mul_mont(S, S, in_x); + ecp_nistz256_mul_by_2(tmp0, S); + + ecp_nistz256_sqr_mont(res_x, M); + + ecp_nistz256_sub(res_x, res_x, tmp0); + ecp_nistz256_sub(S, S, res_x); + + ecp_nistz256_mul_mont(S, S, M); + ecp_nistz256_sub(res_y, S, res_y); +} + +/* Point addition: r = a+b */ +static void ecp_nistz256_point_add(P256_POINT *r, + const P256_POINT *a, const P256_POINT *b) +{ + BN_ULONG U2[P256_LIMBS], S2[P256_LIMBS]; + BN_ULONG U1[P256_LIMBS], S1[P256_LIMBS]; + BN_ULONG Z1sqr[P256_LIMBS]; + BN_ULONG Z2sqr[P256_LIMBS]; + BN_ULONG H[P256_LIMBS], R[P256_LIMBS]; + BN_ULONG Hsqr[P256_LIMBS]; + BN_ULONG Rsqr[P256_LIMBS]; + BN_ULONG Hcub[P256_LIMBS]; + + BN_ULONG res_x[P256_LIMBS]; + BN_ULONG res_y[P256_LIMBS]; + BN_ULONG res_z[P256_LIMBS]; + + BN_ULONG in1infty, in2infty; + + const BN_ULONG *in1_x = a->X; + const BN_ULONG *in1_y = a->Y; + const BN_ULONG *in1_z = a->Z; + + const BN_ULONG *in2_x = b->X; + const BN_ULONG *in2_y = b->Y; + const BN_ULONG *in2_z = b->Z; + + /* We encode infinity as (0,0), which is not on the curve, + * so it is OK. */ + in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] | + in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]); + if (P256_LIMBS == 8) + in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] | + in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]); + + in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] | + in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]); + if (P256_LIMBS == 8) + in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] | + in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]); + + in1infty = is_zero(in1infty); + in2infty = is_zero(in2infty); + + ecp_nistz256_sqr_mont(Z2sqr, in2_z); /* Z2^2 */ + ecp_nistz256_sqr_mont(Z1sqr, in1_z); /* Z1^2 */ + + ecp_nistz256_mul_mont(S1, Z2sqr, in2_z); /* S1 = Z2^3 */ + ecp_nistz256_mul_mont(S2, Z1sqr, in1_z); /* S2 = Z1^3 */ + + ecp_nistz256_mul_mont(S1, S1, in1_y); /* S1 = Y1*Z2^3 */ + ecp_nistz256_mul_mont(S2, S2, in2_y); /* S2 = Y2*Z1^3 */ + ecp_nistz256_sub(R, S2, S1); /* R = S2 - S1 */ + + ecp_nistz256_mul_mont(U1, in1_x, Z2sqr); /* U1 = X1*Z2^2 */ + ecp_nistz256_mul_mont(U2, in2_x, Z1sqr); /* U2 = X2*Z1^2 */ + ecp_nistz256_sub(H, U2, U1); /* H = U2 - U1 */ + + /* + * This should not happen during sign/ecdh, so no constant time violation + */ + if (is_equal(U1, U2) && !in1infty && !in2infty) { + if (is_equal(S1, S2)) { + ecp_nistz256_point_double(r, a); + return; + } else { + memset(r, 0, sizeof(*r)); + return; + } + } + + ecp_nistz256_sqr_mont(Rsqr, R); /* R^2 */ + ecp_nistz256_mul_mont(res_z, H, in1_z); /* Z3 = H*Z1*Z2 */ + ecp_nistz256_sqr_mont(Hsqr, H); /* H^2 */ + ecp_nistz256_mul_mont(res_z, res_z, in2_z); /* Z3 = H*Z1*Z2 */ + ecp_nistz256_mul_mont(Hcub, Hsqr, H); /* H^3 */ + + ecp_nistz256_mul_mont(U2, U1, Hsqr); /* U1*H^2 */ + ecp_nistz256_mul_by_2(Hsqr, U2); /* 2*U1*H^2 */ + + ecp_nistz256_sub(res_x, Rsqr, Hsqr); + ecp_nistz256_sub(res_x, res_x, Hcub); + + ecp_nistz256_sub(res_y, U2, res_x); + + ecp_nistz256_mul_mont(S2, S1, Hcub); + ecp_nistz256_mul_mont(res_y, R, res_y); + ecp_nistz256_sub(res_y, res_y, S2); + + copy_conditional(res_x, in2_x, in1infty); + copy_conditional(res_y, in2_y, in1infty); + copy_conditional(res_z, in2_z, in1infty); + + copy_conditional(res_x, in1_x, in2infty); + copy_conditional(res_y, in1_y, in2infty); + copy_conditional(res_z, in1_z, in2infty); + + memcpy(r->X, res_x, sizeof(res_x)); + memcpy(r->Y, res_y, sizeof(res_y)); + memcpy(r->Z, res_z, sizeof(res_z)); +} + +/* Point addition when b is known to be affine: r = a+b */ +static void ecp_nistz256_point_add_affine(P256_POINT *r, + const P256_POINT *a, + const P256_POINT_AFFINE *b) +{ + BN_ULONG U2[P256_LIMBS], S2[P256_LIMBS]; + BN_ULONG Z1sqr[P256_LIMBS]; + BN_ULONG H[P256_LIMBS], R[P256_LIMBS]; + BN_ULONG Hsqr[P256_LIMBS]; + BN_ULONG Rsqr[P256_LIMBS]; + BN_ULONG Hcub[P256_LIMBS]; + + BN_ULONG res_x[P256_LIMBS]; + BN_ULONG res_y[P256_LIMBS]; + BN_ULONG res_z[P256_LIMBS]; + + BN_ULONG in1infty, in2infty; + + const BN_ULONG *in1_x = a->X; + const BN_ULONG *in1_y = a->Y; + const BN_ULONG *in1_z = a->Z; + + const BN_ULONG *in2_x = b->X; + const BN_ULONG *in2_y = b->Y; + + /* + * In affine representation we encode infty as (0,0), which is not on the + * curve, so it is OK + */ + in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] | + in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]); + if (P256_LIMBS == 8) + in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] | + in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]); + + in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] | + in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]); + if (P256_LIMBS == 8) + in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] | + in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]); + + in1infty = is_zero(in1infty); + in2infty = is_zero(in2infty); + + ecp_nistz256_sqr_mont(Z1sqr, in1_z); /* Z1^2 */ + + ecp_nistz256_mul_mont(U2, in2_x, Z1sqr); /* U2 = X2*Z1^2 */ + ecp_nistz256_sub(H, U2, in1_x); /* H = U2 - U1 */ + + ecp_nistz256_mul_mont(S2, Z1sqr, in1_z); /* S2 = Z1^3 */ + + ecp_nistz256_mul_mont(res_z, H, in1_z); /* Z3 = H*Z1*Z2 */ + + ecp_nistz256_mul_mont(S2, S2, in2_y); /* S2 = Y2*Z1^3 */ + ecp_nistz256_sub(R, S2, in1_y); /* R = S2 - S1 */ + + ecp_nistz256_sqr_mont(Hsqr, H); /* H^2 */ + ecp_nistz256_sqr_mont(Rsqr, R); /* R^2 */ + ecp_nistz256_mul_mont(Hcub, Hsqr, H); /* H^3 */ + + ecp_nistz256_mul_mont(U2, in1_x, Hsqr); /* U1*H^2 */ + ecp_nistz256_mul_by_2(Hsqr, U2); /* 2*U1*H^2 */ + + ecp_nistz256_sub(res_x, Rsqr, Hsqr); + ecp_nistz256_sub(res_x, res_x, Hcub); + ecp_nistz256_sub(H, U2, res_x); + + ecp_nistz256_mul_mont(S2, in1_y, Hcub); + ecp_nistz256_mul_mont(H, H, R); + ecp_nistz256_sub(res_y, H, S2); + + copy_conditional(res_x, in2_x, in1infty); + copy_conditional(res_x, in1_x, in2infty); + + copy_conditional(res_y, in2_y, in1infty); + copy_conditional(res_y, in1_y, in2infty); + + copy_conditional(res_z, ONE, in1infty); + copy_conditional(res_z, in1_z, in2infty); + + memcpy(r->X, res_x, sizeof(res_x)); + memcpy(r->Y, res_y, sizeof(res_y)); + memcpy(r->Z, res_z, sizeof(res_z)); +} +#endif + +/* r = in^-1 mod p */ +static void ecp_nistz256_mod_inverse(BN_ULONG r[P256_LIMBS], + const BN_ULONG in[P256_LIMBS]) +{ + /* + * The poly is ffffffff 00000001 00000000 00000000 00000000 ffffffff + * ffffffff ffffffff We use FLT and used poly-2 as exponent + */ + BN_ULONG p2[P256_LIMBS]; + BN_ULONG p4[P256_LIMBS]; + BN_ULONG p8[P256_LIMBS]; + BN_ULONG p16[P256_LIMBS]; + BN_ULONG p32[P256_LIMBS]; + BN_ULONG res[P256_LIMBS]; + int i; + + ecp_nistz256_sqr_mont(res, in); + ecp_nistz256_mul_mont(p2, res, in); /* 3*p */ + + ecp_nistz256_sqr_mont(res, p2); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(p4, res, p2); /* f*p */ + + ecp_nistz256_sqr_mont(res, p4); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(p8, res, p4); /* ff*p */ + + ecp_nistz256_sqr_mont(res, p8); + for (i = 0; i < 7; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(p16, res, p8); /* ffff*p */ + + ecp_nistz256_sqr_mont(res, p16); + for (i = 0; i < 15; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(p32, res, p16); /* ffffffff*p */ + + ecp_nistz256_sqr_mont(res, p32); + for (i = 0; i < 31; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, in); + + for (i = 0; i < 32 * 4; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, p32); + + for (i = 0; i < 32; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, p32); + + for (i = 0; i < 16; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, p16); + + for (i = 0; i < 8; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, p8); + + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, p4); + + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, p2); + + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, in); + + memcpy(r, res, sizeof(res)); +} + +/* + * ecp_nistz256_bignum_to_field_elem copies the contents of |in| to |out| and + * returns one if it fits. Otherwise it returns zero. + */ +static int ecp_nistz256_bignum_to_field_elem(BN_ULONG out[P256_LIMBS], + const BIGNUM *in) +{ + if (in->top > P256_LIMBS) + return 0; + + memset(out, 0, sizeof(BN_ULONG) * P256_LIMBS); + memcpy(out, in->d, sizeof(BN_ULONG) * in->top); + return 1; +} + +/* r = sum(scalar[i]*point[i]) */ +static int ecp_nistz256_windowed_mul(const EC_GROUP *group, + P256_POINT *r, + const BIGNUM **scalar, + const EC_POINT **point, + int num, BN_CTX *ctx) +{ + + int i, j, ret = 0; + unsigned int index; + unsigned char (*p_str)[33] = NULL; + const unsigned int window_size = 5; + const unsigned int mask = (1 << (window_size + 1)) - 1; + unsigned int wvalue; + BN_ULONG tmp[P256_LIMBS]; + ALIGN32 P256_POINT h; + const BIGNUM **scalars = NULL; + P256_POINT (*table)[16] = NULL; + void *table_storage = NULL; + + if ((table_storage = + OPENSSL_malloc(num * 16 * sizeof(P256_POINT) + 64)) == NULL + || (p_str = + OPENSSL_malloc(num * 33 * sizeof(unsigned char))) == NULL + || (scalars = OPENSSL_malloc(num * sizeof(BIGNUM *))) == NULL) { + ECerr(EC_F_ECP_NISTZ256_WINDOWED_MUL, ERR_R_MALLOC_FAILURE); + goto err; + } else { + table = (void *)ALIGNPTR(table_storage, 64); + } + + for (i = 0; i < num; i++) { + P256_POINT *row = table[i]; + + /* This is an unusual input, we don't guarantee constant-timeness. */ + if ((BN_num_bits(scalar[i]) > 256) || BN_is_negative(scalar[i])) { + BIGNUM *mod; + + if ((mod = BN_CTX_get(ctx)) == NULL) + goto err; + if (!BN_nnmod(mod, scalar[i], &group->order, ctx)) { + ECerr(EC_F_ECP_NISTZ256_WINDOWED_MUL, ERR_R_BN_LIB); + goto err; + } + scalars[i] = mod; + } else + scalars[i] = scalar[i]; + + for (j = 0; j < scalars[i]->top * BN_BYTES; j += BN_BYTES) { + BN_ULONG d = scalars[i]->d[j / BN_BYTES]; + + p_str[i][j + 0] = d & 0xff; + p_str[i][j + 1] = (d >> 8) & 0xff; + p_str[i][j + 2] = (d >> 16) & 0xff; + p_str[i][j + 3] = (d >>= 24) & 0xff; + if (BN_BYTES == 8) { + d >>= 8; + p_str[i][j + 4] = d & 0xff; + p_str[i][j + 5] = (d >> 8) & 0xff; + p_str[i][j + 6] = (d >> 16) & 0xff; + p_str[i][j + 7] = (d >> 24) & 0xff; + } + } + for (; j < 33; j++) + p_str[i][j] = 0; + + /* table[0] is implicitly (0,0,0) (the point at infinity), + * therefore it is not stored. All other values are actually + * stored with an offset of -1 in table. + */ + + if (!ecp_nistz256_bignum_to_field_elem(row[1 - 1].X, &point[i]->X) + || !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Y, &point[i]->Y) + || !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Z, &point[i]->Z)) { + ECerr(EC_F_ECP_NISTZ256_WINDOWED_MUL, EC_R_COORDINATES_OUT_OF_RANGE); + goto err; + } + + ecp_nistz256_point_double(&row[ 2 - 1], &row[ 1 - 1]); + ecp_nistz256_point_add (&row[ 3 - 1], &row[ 2 - 1], &row[1 - 1]); + ecp_nistz256_point_double(&row[ 4 - 1], &row[ 2 - 1]); + ecp_nistz256_point_double(&row[ 6 - 1], &row[ 3 - 1]); + ecp_nistz256_point_double(&row[ 8 - 1], &row[ 4 - 1]); + ecp_nistz256_point_double(&row[12 - 1], &row[ 6 - 1]); + ecp_nistz256_point_add (&row[ 5 - 1], &row[ 4 - 1], &row[1 - 1]); + ecp_nistz256_point_add (&row[ 7 - 1], &row[ 6 - 1], &row[1 - 1]); + ecp_nistz256_point_add (&row[ 9 - 1], &row[ 8 - 1], &row[1 - 1]); + ecp_nistz256_point_add (&row[13 - 1], &row[12 - 1], &row[1 - 1]); + ecp_nistz256_point_double(&row[14 - 1], &row[ 7 - 1]); + ecp_nistz256_point_double(&row[10 - 1], &row[ 5 - 1]); + ecp_nistz256_point_add (&row[15 - 1], &row[14 - 1], &row[1 - 1]); + ecp_nistz256_point_add (&row[11 - 1], &row[10 - 1], &row[1 - 1]); + ecp_nistz256_point_add (&row[16 - 1], &row[15 - 1], &row[1 - 1]); + } + + index = 255; + + wvalue = p_str[0][(index - 1) / 8]; + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + + ecp_nistz256_select_w5(r, table[0], _booth_recode_w5(wvalue) >> 1); + + while (index >= 5) { + for (i = (index == 255 ? 1 : 0); i < num; i++) { + unsigned int off = (index - 1) / 8; + + wvalue = p_str[i][off] | p_str[i][off + 1] << 8; + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + + wvalue = _booth_recode_w5(wvalue); + + ecp_nistz256_select_w5(&h, table[i], wvalue >> 1); + + ecp_nistz256_neg(tmp, h.Y); + copy_conditional(h.Y, tmp, (wvalue & 1)); + + ecp_nistz256_point_add(r, r, &h); + } + + index -= window_size; + + ecp_nistz256_point_double(r, r); + ecp_nistz256_point_double(r, r); + ecp_nistz256_point_double(r, r); + ecp_nistz256_point_double(r, r); + ecp_nistz256_point_double(r, r); + } + + /* Final window */ + for (i = 0; i < num; i++) { + wvalue = p_str[i][0]; + wvalue = (wvalue << 1) & mask; + + wvalue = _booth_recode_w5(wvalue); + + ecp_nistz256_select_w5(&h, table[i], wvalue >> 1); + + ecp_nistz256_neg(tmp, h.Y); + copy_conditional(h.Y, tmp, wvalue & 1); + + ecp_nistz256_point_add(r, r, &h); + } + + ret = 1; + err: + if (table_storage) + OPENSSL_free(table_storage); + if (p_str) + OPENSSL_free(p_str); + if (scalars) + OPENSSL_free(scalars); + return ret; +} + +/* Coordinates of G, for which we have precomputed tables */ +const static BN_ULONG def_xG[P256_LIMBS] = { + TOBN(0x79e730d4, 0x18a9143c), TOBN(0x75ba95fc, 0x5fedb601), + TOBN(0x79fb732b, 0x77622510), TOBN(0x18905f76, 0xa53755c6) +}; + +const static BN_ULONG def_yG[P256_LIMBS] = { + TOBN(0xddf25357, 0xce95560a), TOBN(0x8b4ab8e4, 0xba19e45c), + TOBN(0xd2e88688, 0xdd21f325), TOBN(0x8571ff18, 0x25885d85) +}; + +/* + * ecp_nistz256_is_affine_G returns one if |generator| is the standard, P-256 + * generator. + */ +static int ecp_nistz256_is_affine_G(const EC_POINT *generator) +{ + return (generator->X.top == P256_LIMBS) && + (generator->Y.top == P256_LIMBS) && + (generator->Z.top == (P256_LIMBS - P256_LIMBS / 8)) && + is_equal(generator->X.d, def_xG) && + is_equal(generator->Y.d, def_yG) && is_one(generator->Z.d); +} + +static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx) +{ + /* + * We precompute a table for a Booth encoded exponent (wNAF) based + * computation. Each table holds 64 values for safe access, with an + * implicit value of infinity at index zero. We use window of size 7, and + * therefore require ceil(256/7) = 37 tables. + */ + BIGNUM *order; + EC_POINT *P = NULL, *T = NULL; + const EC_POINT *generator; + EC_PRE_COMP *pre_comp; + BN_CTX *new_ctx = NULL; + int i, j, k, ret = 0; + size_t w; + + PRECOMP256_ROW *preComputedTable = NULL; + unsigned char *precomp_storage = NULL; + + /* if there is an old EC_PRE_COMP object, throw it away */ + EC_EX_DATA_free_data(&group->extra_data, ecp_nistz256_pre_comp_dup, + ecp_nistz256_pre_comp_free, + ecp_nistz256_pre_comp_clear_free); + + generator = EC_GROUP_get0_generator(group); + if (generator == NULL) { + ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, EC_R_UNDEFINED_GENERATOR); + return 0; + } + + if (ecp_nistz256_is_affine_G(generator)) { + /* + * No need to calculate tables for the standard generator because we + * have them statically. + */ + return 1; + } + + if ((pre_comp = ecp_nistz256_pre_comp_new(group)) == NULL) + return 0; + + if (ctx == NULL) { + ctx = new_ctx = BN_CTX_new(); + if (ctx == NULL) + goto err; + } + + BN_CTX_start(ctx); + order = BN_CTX_get(ctx); + + if (order == NULL) + goto err; + + if (!EC_GROUP_get_order(group, order, ctx)) + goto err; + + if (BN_is_zero(order)) { + ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, EC_R_UNKNOWN_ORDER); + goto err; + } + + w = 7; + + if ((precomp_storage = + OPENSSL_malloc(37 * 64 * sizeof(P256_POINT_AFFINE) + 64)) == NULL) { + ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, ERR_R_MALLOC_FAILURE); + goto err; + } else { + preComputedTable = (void *)ALIGNPTR(precomp_storage, 64); + } + + P = EC_POINT_new(group); + T = EC_POINT_new(group); + if (P == NULL || T == NULL) + goto err; + + /* + * The zero entry is implicitly infinity, and we skip it, storing other + * values with -1 offset. + */ + if (!EC_POINT_copy(T, generator)) + goto err; + + for (k = 0; k < 64; k++) { + if (!EC_POINT_copy(P, T)) + goto err; + for (j = 0; j < 37; j++) { + /* + * It would be faster to use EC_POINTs_make_affine and + * make multiple points affine at the same time. + */ + if (!EC_POINT_make_affine(group, P, ctx)) + goto err; + if (!ecp_nistz256_bignum_to_field_elem(preComputedTable[j][k].X, + &P->X) || + !ecp_nistz256_bignum_to_field_elem(preComputedTable[j][k].Y, + &P->Y)) { + ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, + EC_R_COORDINATES_OUT_OF_RANGE); + goto err; + } + for (i = 0; i < 7; i++) { + if (!EC_POINT_dbl(group, P, P, ctx)) + goto err; + } + } + if (!EC_POINT_add(group, T, T, generator, ctx)) + goto err; + } + + pre_comp->group = group; + pre_comp->w = w; + pre_comp->precomp = preComputedTable; + pre_comp->precomp_storage = precomp_storage; + + precomp_storage = NULL; + + if (!EC_EX_DATA_set_data(&group->extra_data, pre_comp, + ecp_nistz256_pre_comp_dup, + ecp_nistz256_pre_comp_free, + ecp_nistz256_pre_comp_clear_free)) { + goto err; + } + + pre_comp = NULL; + + ret = 1; + + err: + if (ctx != NULL) + BN_CTX_end(ctx); + BN_CTX_free(new_ctx); + + if (pre_comp) + ecp_nistz256_pre_comp_free(pre_comp); + if (precomp_storage) + OPENSSL_free(precomp_storage); + if (P) + EC_POINT_free(P); + if (T) + EC_POINT_free(T); + return ret; +} + +/* + * Note that by default ECP_NISTZ256_AVX2 is undefined. While it's great + * code processing 4 points in parallel, corresponding serial operation + * is several times slower, because it uses 29x29=58-bit multiplication + * as opposite to 64x64=128-bit in integer-only scalar case. As result + * it doesn't provide *significant* performance improvement. Note that + * just defining ECP_NISTZ256_AVX2 is not sufficient to make it work, + * you'd need to compile even asm/ecp_nistz256-avx.pl module. + */ +#if defined(ECP_NISTZ256_AVX2) +# if !(defined(__x86_64) || defined(__x86_64__)) || \ + defined(_M_AMD64) || defined(_MX64)) || \ + !(defined(__GNUC__) || defined(_MSC_VER)) /* this is for ALIGN32 */ +# undef ECP_NISTZ256_AVX2 +# else +/* Constant time access, loading four values, from four consecutive tables */ +void ecp_nistz256_avx2_select_w7(P256_POINT_AFFINE * val, + const P256_POINT_AFFINE * in_t, int index); +void ecp_nistz256_avx2_multi_select_w7(void *result, const void *in, int index0, + int index1, int index2, int index3); +void ecp_nistz256_avx2_transpose_convert(void *RESULTx4, const void *in); +void ecp_nistz256_avx2_convert_transpose_back(void *result, const void *Ax4); +void ecp_nistz256_avx2_point_add_affine_x4(void *RESULTx4, const void *Ax4, + const void *Bx4); +void ecp_nistz256_avx2_point_add_affines_x4(void *RESULTx4, const void *Ax4, + const void *Bx4); +void ecp_nistz256_avx2_to_mont(void *RESULTx4, const void *Ax4); +void ecp_nistz256_avx2_from_mont(void *RESULTx4, const void *Ax4); +void ecp_nistz256_avx2_set1(void *RESULTx4); +int ecp_nistz_avx2_eligible(void); + +static void booth_recode_w7(unsigned char *sign, + unsigned char *digit, unsigned char in) +{ + unsigned char s, d; + + s = ~((in >> 7) - 1); + d = (1 << 8) - in - 1; + d = (d & s) | (in & ~s); + d = (d >> 1) + (d & 1); + + *sign = s & 1; + *digit = d; +} + +/* + * ecp_nistz256_avx2_mul_g performs multiplication by G, using only the + * precomputed table. It does 4 affine point additions in parallel, + * significantly speeding up point multiplication for a fixed value. + */ +static void ecp_nistz256_avx2_mul_g(P256_POINT *r, + unsigned char p_str[33], + const P256_POINT_AFFINE(*preComputedTable)[64]) +{ + const unsigned int window_size = 7; + const unsigned int mask = (1 << (window_size + 1)) - 1; + unsigned int wvalue; + /* Using 4 windows at a time */ + unsigned char sign0, digit0; + unsigned char sign1, digit1; + unsigned char sign2, digit2; + unsigned char sign3, digit3; + unsigned int index = 0; + BN_ULONG tmp[P256_LIMBS]; + int i; + + ALIGN32 BN_ULONG aX4[4 * 9 * 3] = { 0 }; + ALIGN32 BN_ULONG bX4[4 * 9 * 2] = { 0 }; + ALIGN32 P256_POINT_AFFINE point_arr[P256_LIMBS]; + ALIGN32 P256_POINT res_point_arr[P256_LIMBS]; + + /* Initial four windows */ + wvalue = *((u16 *) & p_str[0]); + wvalue = (wvalue << 1) & mask; + index += window_size; + booth_recode_w7(&sign0, &digit0, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign1, &digit1, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign2, &digit2, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign3, &digit3, wvalue); + + ecp_nistz256_avx2_multi_select_w7(point_arr, preComputedTable[0], + digit0, digit1, digit2, digit3); + + ecp_nistz256_neg(tmp, point_arr[0].Y); + copy_conditional(point_arr[0].Y, tmp, sign0); + ecp_nistz256_neg(tmp, point_arr[1].Y); + copy_conditional(point_arr[1].Y, tmp, sign1); + ecp_nistz256_neg(tmp, point_arr[2].Y); + copy_conditional(point_arr[2].Y, tmp, sign2); + ecp_nistz256_neg(tmp, point_arr[3].Y); + copy_conditional(point_arr[3].Y, tmp, sign3); + + ecp_nistz256_avx2_transpose_convert(aX4, point_arr); + ecp_nistz256_avx2_to_mont(aX4, aX4); + ecp_nistz256_avx2_to_mont(&aX4[4 * 9], &aX4[4 * 9]); + ecp_nistz256_avx2_set1(&aX4[4 * 9 * 2]); + + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign0, &digit0, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign1, &digit1, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign2, &digit2, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign3, &digit3, wvalue); + + ecp_nistz256_avx2_multi_select_w7(point_arr, preComputedTable[4 * 1], + digit0, digit1, digit2, digit3); + + ecp_nistz256_neg(tmp, point_arr[0].Y); + copy_conditional(point_arr[0].Y, tmp, sign0); + ecp_nistz256_neg(tmp, point_arr[1].Y); + copy_conditional(point_arr[1].Y, tmp, sign1); + ecp_nistz256_neg(tmp, point_arr[2].Y); + copy_conditional(point_arr[2].Y, tmp, sign2); + ecp_nistz256_neg(tmp, point_arr[3].Y); + copy_conditional(point_arr[3].Y, tmp, sign3); + + ecp_nistz256_avx2_transpose_convert(bX4, point_arr); + ecp_nistz256_avx2_to_mont(bX4, bX4); + ecp_nistz256_avx2_to_mont(&bX4[4 * 9], &bX4[4 * 9]); + /* Optimized when both inputs are affine */ + ecp_nistz256_avx2_point_add_affines_x4(aX4, aX4, bX4); + + for (i = 2; i < 9; i++) { + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign0, &digit0, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign1, &digit1, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign2, &digit2, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign3, &digit3, wvalue); + + ecp_nistz256_avx2_multi_select_w7(point_arr, + preComputedTable[4 * i], + digit0, digit1, digit2, digit3); + + ecp_nistz256_neg(tmp, point_arr[0].Y); + copy_conditional(point_arr[0].Y, tmp, sign0); + ecp_nistz256_neg(tmp, point_arr[1].Y); + copy_conditional(point_arr[1].Y, tmp, sign1); + ecp_nistz256_neg(tmp, point_arr[2].Y); + copy_conditional(point_arr[2].Y, tmp, sign2); + ecp_nistz256_neg(tmp, point_arr[3].Y); + copy_conditional(point_arr[3].Y, tmp, sign3); + + ecp_nistz256_avx2_transpose_convert(bX4, point_arr); + ecp_nistz256_avx2_to_mont(bX4, bX4); + ecp_nistz256_avx2_to_mont(&bX4[4 * 9], &bX4[4 * 9]); + + ecp_nistz256_avx2_point_add_affine_x4(aX4, aX4, bX4); + } + + ecp_nistz256_avx2_from_mont(&aX4[4 * 9 * 0], &aX4[4 * 9 * 0]); + ecp_nistz256_avx2_from_mont(&aX4[4 * 9 * 1], &aX4[4 * 9 * 1]); + ecp_nistz256_avx2_from_mont(&aX4[4 * 9 * 2], &aX4[4 * 9 * 2]); + + ecp_nistz256_avx2_convert_transpose_back(res_point_arr, aX4); + /* Last window is performed serially */ + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + booth_recode_w7(&sign0, &digit0, wvalue); + ecp_nistz256_avx2_select_w7((P256_POINT_AFFINE *) r, + preComputedTable[36], digit0); + ecp_nistz256_neg(tmp, r->Y); + copy_conditional(r->Y, tmp, sign0); + memcpy(r->Z, ONE, sizeof(ONE)); + /* Sum the four windows */ + ecp_nistz256_point_add(r, r, &res_point_arr[0]); + ecp_nistz256_point_add(r, r, &res_point_arr[1]); + ecp_nistz256_point_add(r, r, &res_point_arr[2]); + ecp_nistz256_point_add(r, r, &res_point_arr[3]); +} +# endif +#endif + +static int ecp_nistz256_set_from_affine(EC_POINT *out, const EC_GROUP *group, + const P256_POINT_AFFINE *in, + BN_CTX *ctx) +{ + BIGNUM x, y; + BN_ULONG d_x[P256_LIMBS], d_y[P256_LIMBS]; + int ret = 0; + + memcpy(d_x, in->X, sizeof(d_x)); + x.d = d_x; + x.dmax = x.top = P256_LIMBS; + x.neg = 0; + x.flags = BN_FLG_STATIC_DATA; + + memcpy(d_y, in->Y, sizeof(d_y)); + y.d = d_y; + y.dmax = y.top = P256_LIMBS; + y.neg = 0; + y.flags = BN_FLG_STATIC_DATA; + + ret = EC_POINT_set_affine_coordinates_GFp(group, out, &x, &y, ctx); + + return ret; +} + +/* r = scalar*G + sum(scalars[i]*points[i]) */ +static int ecp_nistz256_points_mul(const EC_GROUP *group, + EC_POINT *r, + const BIGNUM *scalar, + size_t num, + const EC_POINT *points[], + const BIGNUM *scalars[], BN_CTX *ctx) +{ + int i = 0, ret = 0, no_precomp_for_generator = 0, p_is_infinity = 0; + size_t j; + unsigned char p_str[33] = { 0 }; + const PRECOMP256_ROW *preComputedTable = NULL; + const EC_PRE_COMP *pre_comp = NULL; + const EC_POINT *generator = NULL; + unsigned int index = 0; + BN_CTX *new_ctx = NULL; + const BIGNUM **new_scalars = NULL; + const EC_POINT **new_points = NULL; + const unsigned int window_size = 7; + const unsigned int mask = (1 << (window_size + 1)) - 1; + unsigned int wvalue; + ALIGN32 union { + P256_POINT p; + P256_POINT_AFFINE a; + } t, p; + BIGNUM *tmp_scalar; + + if (group->meth != r->meth) { + ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, EC_R_INCOMPATIBLE_OBJECTS); + return 0; + } + + if ((scalar == NULL) && (num == 0)) + return EC_POINT_set_to_infinity(group, r); + + for (j = 0; j < num; j++) { + if (group->meth != points[j]->meth) { + ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, EC_R_INCOMPATIBLE_OBJECTS); + return 0; + } + } + + if (ctx == NULL) { + ctx = new_ctx = BN_CTX_new(); + if (ctx == NULL) + goto err; + } + + BN_CTX_start(ctx); + + if (scalar) { + generator = EC_GROUP_get0_generator(group); + if (generator == NULL) { + ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, EC_R_UNDEFINED_GENERATOR); + goto err; + } + + /* look if we can use precomputed multiples of generator */ + pre_comp = + EC_EX_DATA_get_data(group->extra_data, ecp_nistz256_pre_comp_dup, + ecp_nistz256_pre_comp_free, + ecp_nistz256_pre_comp_clear_free); + + if (pre_comp) { + /* + * If there is a precomputed table for the generator, check that + * it was generated with the same generator. + */ + EC_POINT *pre_comp_generator = EC_POINT_new(group); + if (pre_comp_generator == NULL) + goto err; + + if (!ecp_nistz256_set_from_affine + (pre_comp_generator, group, pre_comp->precomp[0], ctx)) { + EC_POINT_free(pre_comp_generator); + goto err; + } + + if (0 == EC_POINT_cmp(group, generator, pre_comp_generator, ctx)) + preComputedTable = (const PRECOMP256_ROW *)pre_comp->precomp; + + EC_POINT_free(pre_comp_generator); + } + + if (preComputedTable == NULL && ecp_nistz256_is_affine_G(generator)) { + /* + * If there is no precomputed data, but the generator + * is the default, a hardcoded table of precomputed + * data is used. This is because applications, such as + * Apache, do not use EC_KEY_precompute_mult. + */ + preComputedTable = (const PRECOMP256_ROW *)ecp_nistz256_precomputed; + } + + if (preComputedTable) { + if ((BN_num_bits(scalar) > 256) + || BN_is_negative(scalar)) { + if ((tmp_scalar = BN_CTX_get(ctx)) == NULL) + goto err; + + if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) { + ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_BN_LIB); + goto err; + } + scalar = tmp_scalar; + } + + for (i = 0; i < scalar->top * BN_BYTES; i += BN_BYTES) { + BN_ULONG d = scalar->d[i / BN_BYTES]; + + p_str[i + 0] = d & 0xff; + p_str[i + 1] = (d >> 8) & 0xff; + p_str[i + 2] = (d >> 16) & 0xff; + p_str[i + 3] = (d >>= 24) & 0xff; + if (BN_BYTES == 8) { + d >>= 8; + p_str[i + 4] = d & 0xff; + p_str[i + 5] = (d >> 8) & 0xff; + p_str[i + 6] = (d >> 16) & 0xff; + p_str[i + 7] = (d >> 24) & 0xff; + } + } + + for (; i < 33; i++) + p_str[i] = 0; + +#if defined(ECP_NISTZ256_AVX2) + if (ecp_nistz_avx2_eligible()) { + ecp_nistz256_avx2_mul_g(&p.p, p_str, preComputedTable); + } else +#endif + { + /* First window */ + wvalue = (p_str[0] << 1) & mask; + index += window_size; + + wvalue = _booth_recode_w7(wvalue); + + ecp_nistz256_select_w7(&p.a, preComputedTable[0], wvalue >> 1); + + ecp_nistz256_neg(p.p.Z, p.p.Y); + copy_conditional(p.p.Y, p.p.Z, wvalue & 1); + + memcpy(p.p.Z, ONE, sizeof(ONE)); + + for (i = 1; i < 37; i++) { + unsigned int off = (index - 1) / 8; + wvalue = p_str[off] | p_str[off + 1] << 8; + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + + wvalue = _booth_recode_w7(wvalue); + + ecp_nistz256_select_w7(&t.a, + preComputedTable[i], wvalue >> 1); + + ecp_nistz256_neg(t.p.Z, t.a.Y); + copy_conditional(t.a.Y, t.p.Z, wvalue & 1); + + ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a); + } + } + } else { + p_is_infinity = 1; + no_precomp_for_generator = 1; + } + } else + p_is_infinity = 1; + + if (no_precomp_for_generator) { + /* + * Without a precomputed table for the generator, it has to be + * handled like a normal point. + */ + new_scalars = OPENSSL_malloc((num + 1) * sizeof(BIGNUM *)); + if (!new_scalars) { + ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_MALLOC_FAILURE); + goto err; + } + + new_points = OPENSSL_malloc((num + 1) * sizeof(EC_POINT *)); + if (!new_points) { + ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_MALLOC_FAILURE); + goto err; + } + + memcpy(new_scalars, scalars, num * sizeof(BIGNUM *)); + new_scalars[num] = scalar; + memcpy(new_points, points, num * sizeof(EC_POINT *)); + new_points[num] = generator; + + scalars = new_scalars; + points = new_points; + num++; + } + + if (num) { + P256_POINT *out = &t.p; + if (p_is_infinity) + out = &p.p; + + if (!ecp_nistz256_windowed_mul(group, out, scalars, points, num, ctx)) + goto err; + + if (!p_is_infinity) + ecp_nistz256_point_add(&p.p, &p.p, out); + } + + /* Not constant-time, but we're only operating on the public output. */ + if (!ecp_nistz256_set_words(&r->X, p.p.X) || + !ecp_nistz256_set_words(&r->Y, p.p.Y) || + !ecp_nistz256_set_words(&r->Z, p.p.Z)) { + goto err; + } + r->Z_is_one = is_one(p.p.Z) & 1; + + ret = 1; + +err: + if (ctx) + BN_CTX_end(ctx); + BN_CTX_free(new_ctx); + if (new_points) + OPENSSL_free(new_points); + if (new_scalars) + OPENSSL_free(new_scalars); + return ret; +} + +static int ecp_nistz256_get_affine(const EC_GROUP *group, + const EC_POINT *point, + BIGNUM *x, BIGNUM *y, BN_CTX *ctx) +{ + BN_ULONG z_inv2[P256_LIMBS]; + BN_ULONG z_inv3[P256_LIMBS]; + BN_ULONG x_aff[P256_LIMBS]; + BN_ULONG y_aff[P256_LIMBS]; + BN_ULONG point_x[P256_LIMBS], point_y[P256_LIMBS], point_z[P256_LIMBS]; + BN_ULONG x_ret[P256_LIMBS], y_ret[P256_LIMBS]; + + if (EC_POINT_is_at_infinity(group, point)) { + ECerr(EC_F_ECP_NISTZ256_GET_AFFINE, EC_R_POINT_AT_INFINITY); + return 0; + } + + if (!ecp_nistz256_bignum_to_field_elem(point_x, &point->X) || + !ecp_nistz256_bignum_to_field_elem(point_y, &point->Y) || + !ecp_nistz256_bignum_to_field_elem(point_z, &point->Z)) { + ECerr(EC_F_ECP_NISTZ256_GET_AFFINE, EC_R_COORDINATES_OUT_OF_RANGE); + return 0; + } + + ecp_nistz256_mod_inverse(z_inv3, point_z); + ecp_nistz256_sqr_mont(z_inv2, z_inv3); + ecp_nistz256_mul_mont(x_aff, z_inv2, point_x); + + if (x != NULL) { + ecp_nistz256_from_mont(x_ret, x_aff); + if (!ecp_nistz256_set_words(x, x_ret)) + return 0; + } + + if (y != NULL) { + ecp_nistz256_mul_mont(z_inv3, z_inv3, z_inv2); + ecp_nistz256_mul_mont(y_aff, z_inv3, point_y); + ecp_nistz256_from_mont(y_ret, y_aff); + if (!ecp_nistz256_set_words(y, y_ret)) + return 0; + } + + return 1; +} + +static EC_PRE_COMP *ecp_nistz256_pre_comp_new(const EC_GROUP *group) +{ + EC_PRE_COMP *ret = NULL; + + if (!group) + return NULL; + + ret = (EC_PRE_COMP *)OPENSSL_malloc(sizeof(EC_PRE_COMP)); + + if (!ret) { + ECerr(EC_F_ECP_NISTZ256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); + return ret; + } + + ret->group = group; + ret->w = 6; /* default */ + ret->precomp = NULL; + ret->precomp_storage = NULL; + ret->references = 1; + return ret; +} + +static void *ecp_nistz256_pre_comp_dup(void *src_) +{ + EC_PRE_COMP *src = src_; + + /* no need to actually copy, these objects never change! */ + CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); + + return src_; +} + +static void ecp_nistz256_pre_comp_free(void *pre_) +{ + int i; + EC_PRE_COMP *pre = pre_; + + if (!pre) + return; + + i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); + if (i > 0) + return; + + if (pre->precomp_storage) + OPENSSL_free(pre->precomp_storage); + + OPENSSL_free(pre); +} + +static void ecp_nistz256_pre_comp_clear_free(void *pre_) +{ + int i; + EC_PRE_COMP *pre = pre_; + + if (!pre) + return; + + i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); + if (i > 0) + return; + + if (pre->precomp_storage) { + OPENSSL_cleanse(pre->precomp, + 32 * sizeof(unsigned char) * (1 << pre->w) * 2 * 37); + OPENSSL_free(pre->precomp_storage); + } + OPENSSL_cleanse(pre, sizeof *pre); + OPENSSL_free(pre); +} + +static int ecp_nistz256_window_have_precompute_mult(const EC_GROUP *group) +{ + /* There is a hard-coded table for the default generator. */ + const EC_POINT *generator = EC_GROUP_get0_generator(group); + if (generator != NULL && ecp_nistz256_is_affine_G(generator)) { + /* There is a hard-coded table for the default generator. */ + return 1; + } + + return EC_EX_DATA_get_data(group->extra_data, ecp_nistz256_pre_comp_dup, + ecp_nistz256_pre_comp_free, + ecp_nistz256_pre_comp_clear_free) != NULL; +} + +const EC_METHOD *EC_GFp_nistz256_method(void) +{ + static const EC_METHOD ret = { + EC_FLAGS_DEFAULT_OCT, + NID_X9_62_prime_field, + ec_GFp_mont_group_init, + ec_GFp_mont_group_finish, + ec_GFp_mont_group_clear_finish, + ec_GFp_mont_group_copy, + ec_GFp_mont_group_set_curve, + ec_GFp_simple_group_get_curve, + ec_GFp_simple_group_get_degree, + ec_GFp_simple_group_check_discriminant, + ec_GFp_simple_point_init, + ec_GFp_simple_point_finish, + ec_GFp_simple_point_clear_finish, + ec_GFp_simple_point_copy, + ec_GFp_simple_point_set_to_infinity, + ec_GFp_simple_set_Jprojective_coordinates_GFp, + ec_GFp_simple_get_Jprojective_coordinates_GFp, + ec_GFp_simple_point_set_affine_coordinates, + ecp_nistz256_get_affine, + 0, 0, 0, + ec_GFp_simple_add, + ec_GFp_simple_dbl, + ec_GFp_simple_invert, + ec_GFp_simple_is_at_infinity, + ec_GFp_simple_is_on_curve, + ec_GFp_simple_cmp, + ec_GFp_simple_make_affine, + ec_GFp_simple_points_make_affine, + ecp_nistz256_points_mul, /* mul */ + ecp_nistz256_mult_precompute, /* precompute_mult */ + ecp_nistz256_window_have_precompute_mult, /* have_precompute_mult */ + ec_GFp_mont_field_mul, + ec_GFp_mont_field_sqr, + 0, /* field_div */ + ec_GFp_mont_field_encode, + ec_GFp_mont_field_decode, + ec_GFp_mont_field_set_to_one + }; + + return &ret; +} diff --git a/drivers/builtin_openssl2/crypto/ec/ecp_nistz256_table.c b/drivers/builtin_openssl2/crypto/ec/ecp_nistz256_table.c new file mode 100644 index 0000000000..2f0797db6b --- /dev/null +++ b/drivers/builtin_openssl2/crypto/ec/ecp_nistz256_table.c @@ -0,0 +1,9533 @@ +/* + * This is the precomputed constant time access table for the code in + * ecp_montp256.c, for the default generator. The table consists of 37 + * subtables, each subtable contains 64 affine points. The affine points are + * encoded as eight uint64's, four for the x coordinate and four for the y. + * Both values are in little-endian order. There are 37 tables because a + * signed, 6-bit wNAF form of the scalar is used and ceil(256/(6 + 1)) = 37. + * Within each table there are 64 values because the 6-bit wNAF value can + * take 64 values, ignoring the sign bit, which is implemented by performing + * a negation of the affine point when required. We would like to align it + * to 2MB in order to increase the chances of using a large page but that + * appears to lead to invalid ELF files being produced. + */ + +#if defined(__GNUC__) +__attribute((aligned(4096))) +#elif defined(_MSC_VER) +__declspec(align(4096)) +#elif defined(__SUNPRO_C) +# pragma align 64(ecp_nistz256_precomputed) +#endif +static const BN_ULONG ecp_nistz256_precomputed[37][64 * + sizeof(P256_POINT_AFFINE) / + sizeof(BN_ULONG)] = { + {TOBN(0x79e730d4, 0x18a9143c), TOBN(0x75ba95fc, 0x5fedb601), + TOBN(0x79fb732b, 0x77622510), TOBN(0x18905f76, 0xa53755c6), + TOBN(0xddf25357, 0xce95560a), TOBN(0x8b4ab8e4, 0xba19e45c), + TOBN(0xd2e88688, 0xdd21f325), TOBN(0x8571ff18, 0x25885d85), + TOBN(0x850046d4, 0x10ddd64d), TOBN(0xaa6ae3c1, 0xa433827d), + TOBN(0x73220503, 0x8d1490d9), TOBN(0xf6bb32e4, 0x3dcf3a3b), + TOBN(0x2f3648d3, 0x61bee1a5), TOBN(0x152cd7cb, 0xeb236ff8), + TOBN(0x19a8fb0e, 0x92042dbe), TOBN(0x78c57751, 0x0a5b8a3b), + TOBN(0xffac3f90, 0x4eebc127), TOBN(0xb027f84a, 0x087d81fb), + TOBN(0x66ad77dd, 0x87cbbc98), TOBN(0x26936a3f, 0xb6ff747e), + TOBN(0xb04c5c1f, 0xc983a7eb), TOBN(0x583e47ad, 0x0861fe1a), + TOBN(0x78820831, 0x1a2ee98e), TOBN(0xd5f06a29, 0xe587cc07), + TOBN(0x74b0b50d, 0x46918dcc), TOBN(0x4650a6ed, 0xc623c173), + TOBN(0x0cdaacac, 0xe8100af2), TOBN(0x577362f5, 0x41b0176b), + TOBN(0x2d96f24c, 0xe4cbaba6), TOBN(0x17628471, 0xfad6f447), + TOBN(0x6b6c36de, 0xe5ddd22e), TOBN(0x84b14c39, 0x4c5ab863), + TOBN(0xbe1b8aae, 0xc45c61f5), TOBN(0x90ec649a, 0x94b9537d), + TOBN(0x941cb5aa, 0xd076c20c), TOBN(0xc9079605, 0x890523c8), + TOBN(0xeb309b4a, 0xe7ba4f10), TOBN(0x73c568ef, 0xe5eb882b), + TOBN(0x3540a987, 0x7e7a1f68), TOBN(0x73a076bb, 0x2dd1e916), + TOBN(0x40394737, 0x3e77664a), TOBN(0x55ae744f, 0x346cee3e), + TOBN(0xd50a961a, 0x5b17a3ad), TOBN(0x13074b59, 0x54213673), + TOBN(0x93d36220, 0xd377e44b), TOBN(0x299c2b53, 0xadff14b5), + TOBN(0xf424d44c, 0xef639f11), TOBN(0xa4c9916d, 0x4a07f75f), + TOBN(0x0746354e, 0xa0173b4f), TOBN(0x2bd20213, 0xd23c00f7), + TOBN(0xf43eaab5, 0x0c23bb08), TOBN(0x13ba5119, 0xc3123e03), + TOBN(0x2847d030, 0x3f5b9d4d), TOBN(0x6742f2f2, 0x5da67bdd), + TOBN(0xef933bdc, 0x77c94195), TOBN(0xeaedd915, 0x6e240867), + TOBN(0x27f14cd1, 0x9499a78f), TOBN(0x462ab5c5, 0x6f9b3455), + TOBN(0x8f90f02a, 0xf02cfc6b), TOBN(0xb763891e, 0xb265230d), + TOBN(0xf59da3a9, 0x532d4977), TOBN(0x21e3327d, 0xcf9eba15), + TOBN(0x123c7b84, 0xbe60bbf0), TOBN(0x56ec12f2, 0x7706df76), + TOBN(0x75c96e8f, 0x264e20e8), TOBN(0xabe6bfed, 0x59a7a841), + TOBN(0x2cc09c04, 0x44c8eb00), TOBN(0xe05b3080, 0xf0c4e16b), + TOBN(0x1eb7777a, 0xa45f3314), TOBN(0x56af7bed, 0xce5d45e3), + TOBN(0x2b6e019a, 0x88b12f1a), TOBN(0x086659cd, 0xfd835f9b), + TOBN(0x2c18dbd1, 0x9dc21ec8), TOBN(0x98f9868a, 0x0fcf8139), + TOBN(0x737d2cd6, 0x48250b49), TOBN(0xcc61c947, 0x24b3428f), + TOBN(0x0c2b4078, 0x80dd9e76), TOBN(0xc43a8991, 0x383fbe08), + TOBN(0x5f7d2d65, 0x779be5d2), TOBN(0x78719a54, 0xeb3b4ab5), + TOBN(0xea7d260a, 0x6245e404), TOBN(0x9de40795, 0x6e7fdfe0), + TOBN(0x1ff3a415, 0x8dac1ab5), TOBN(0x3e7090f1, 0x649c9073), + TOBN(0x1a768561, 0x2b944e88), TOBN(0x250f939e, 0xe57f61c8), + TOBN(0x0c0daa89, 0x1ead643d), TOBN(0x68930023, 0xe125b88e), + TOBN(0x04b71aa7, 0xd2697768), TOBN(0xabdedef5, 0xca345a33), + TOBN(0x2409d29d, 0xee37385e), TOBN(0x4ee1df77, 0xcb83e156), + TOBN(0x0cac12d9, 0x1cbb5b43), TOBN(0x170ed2f6, 0xca895637), + TOBN(0x28228cfa, 0x8ade6d66), TOBN(0x7ff57c95, 0x53238aca), + TOBN(0xccc42563, 0x4b2ed709), TOBN(0x0e356769, 0x856fd30d), + TOBN(0xbcbcd43f, 0x559e9811), TOBN(0x738477ac, 0x5395b759), + TOBN(0x35752b90, 0xc00ee17f), TOBN(0x68748390, 0x742ed2e3), + TOBN(0x7cd06422, 0xbd1f5bc1), TOBN(0xfbc08769, 0xc9e7b797), + TOBN(0xa242a35b, 0xb0cf664a), TOBN(0x126e48f7, 0x7f9707e3), + TOBN(0x1717bf54, 0xc6832660), TOBN(0xfaae7332, 0xfd12c72e), + TOBN(0x27b52db7, 0x995d586b), TOBN(0xbe29569e, 0x832237c2), + TOBN(0xe8e4193e, 0x2a65e7db), TOBN(0x152706dc, 0x2eaa1bbb), + TOBN(0x72bcd8b7, 0xbc60055b), TOBN(0x03cc23ee, 0x56e27e4b), + TOBN(0xee337424, 0xe4819370), TOBN(0xe2aa0e43, 0x0ad3da09), + TOBN(0x40b8524f, 0x6383c45d), TOBN(0xd7663554, 0x42a41b25), + TOBN(0x64efa6de, 0x778a4797), TOBN(0x2042170a, 0x7079adf4), + TOBN(0x808b0b65, 0x0bc6fb80), TOBN(0x5882e075, 0x3ffe2e6b), + TOBN(0xd5ef2f7c, 0x2c83f549), TOBN(0x54d63c80, 0x9103b723), + TOBN(0xf2f11bd6, 0x52a23f9b), TOBN(0x3670c319, 0x4b0b6587), + TOBN(0x55c4623b, 0xb1580e9e), TOBN(0x64edf7b2, 0x01efe220), + TOBN(0x97091dcb, 0xd53c5c9d), TOBN(0xf17624b6, 0xac0a177b), + TOBN(0xb0f13975, 0x2cfe2dff), TOBN(0xc1a35c0a, 0x6c7a574e), + TOBN(0x227d3146, 0x93e79987), TOBN(0x0575bf30, 0xe89cb80e), + TOBN(0x2f4e247f, 0x0d1883bb), TOBN(0xebd51226, 0x3274c3d0), + TOBN(0x5f3e51c8, 0x56ada97a), TOBN(0x4afc964d, 0x8f8b403e), + TOBN(0xa6f247ab, 0x412e2979), TOBN(0x675abd1b, 0x6f80ebda), + TOBN(0x66a2bd72, 0x5e485a1d), TOBN(0x4b2a5caf, 0x8f4f0b3c), + TOBN(0x2626927f, 0x1b847bba), TOBN(0x6c6fc7d9, 0x0502394d), + TOBN(0xfea912ba, 0xa5659ae8), TOBN(0x68363aba, 0x25e1a16e), + TOBN(0xb8842277, 0x752c41ac), TOBN(0xfe545c28, 0x2897c3fc), + TOBN(0x2d36e9e7, 0xdc4c696b), TOBN(0x5806244a, 0xfba977c5), + TOBN(0x85665e9b, 0xe39508c1), TOBN(0xf720ee25, 0x6d12597b), + TOBN(0x8a979129, 0xd2337a31), TOBN(0x5916868f, 0x0f862bdc), + TOBN(0x048099d9, 0x5dd283ba), TOBN(0xe2d1eeb6, 0xfe5bfb4e), + TOBN(0x82ef1c41, 0x7884005d), TOBN(0xa2d4ec17, 0xffffcbae), + TOBN(0x9161c53f, 0x8aa95e66), TOBN(0x5ee104e1, 0xc5fee0d0), + TOBN(0x562e4cec, 0xc135b208), TOBN(0x74e1b265, 0x4783f47d), + TOBN(0x6d2a506c, 0x5a3f3b30), TOBN(0xecead9f4, 0xc16762fc), + TOBN(0xf29dd4b2, 0xe286e5b9), TOBN(0x1b0fadc0, 0x83bb3c61), + TOBN(0x7a75023e, 0x7fac29a4), TOBN(0xc086d5f1, 0xc9477fa3), + TOBN(0x0fc61135, 0x2f6f3076), TOBN(0xc99ffa23, 0xe3912a9a), + TOBN(0x6a0b0685, 0xd2f8ba3d), TOBN(0xfdc777e8, 0xe93358a4), + TOBN(0x94a787bb, 0x35415f04), TOBN(0x640c2d6a, 0x4d23fea4), + TOBN(0x9de917da, 0x153a35b5), TOBN(0x793e8d07, 0x5d5cd074), + TOBN(0xf4f87653, 0x2de45068), TOBN(0x37c7a7e8, 0x9e2e1f6e), + TOBN(0xd0825fa2, 0xa3584069), TOBN(0xaf2cea7c, 0x1727bf42), + TOBN(0x0360a4fb, 0x9e4785a9), TOBN(0xe5fda49c, 0x27299f4a), + TOBN(0x48068e13, 0x71ac2f71), TOBN(0x83d0687b, 0x9077666f), + TOBN(0x6d3883b2, 0x15d02819), TOBN(0x6d0d7550, 0x40dd9a35), + TOBN(0x61d7cbf9, 0x1d2b469f), TOBN(0xf97b232f, 0x2efc3115), + TOBN(0xa551d750, 0xb24bcbc7), TOBN(0x11ea4949, 0x88a1e356), + TOBN(0x7669f031, 0x93cb7501), TOBN(0x595dc55e, 0xca737b8a), + TOBN(0xa4a319ac, 0xd837879f), TOBN(0x6fc1b49e, 0xed6b67b0), + TOBN(0xe3959933, 0x32f1f3af), TOBN(0x966742eb, 0x65432a2e), + TOBN(0x4b8dc9fe, 0xb4966228), TOBN(0x96cc6312, 0x43f43950), + TOBN(0x12068859, 0xc9b731ee), TOBN(0x7b948dc3, 0x56f79968), + TOBN(0x61e4ad32, 0xed1f8008), TOBN(0xe6c9267a, 0xd8b17538), + TOBN(0x1ac7c5eb, 0x857ff6fb), TOBN(0x994baaa8, 0x55f2fb10), + TOBN(0x84cf14e1, 0x1d248018), TOBN(0x5a39898b, 0x628ac508), + TOBN(0x14fde97b, 0x5fa944f5), TOBN(0xed178030, 0xd12e5ac7), + TOBN(0x042c2af4, 0x97e2feb4), TOBN(0xd36a42d7, 0xaebf7313), + TOBN(0x49d2c9eb, 0x084ffdd7), TOBN(0x9f8aa54b, 0x2ef7c76a), + TOBN(0x9200b7ba, 0x09895e70), TOBN(0x3bd0c66f, 0xddb7fb58), + TOBN(0x2d97d108, 0x78eb4cbb), TOBN(0x2d431068, 0xd84bde31), + TOBN(0x4b523eb7, 0x172ccd1f), TOBN(0x7323cb28, 0x30a6a892), + TOBN(0x97082ec0, 0xcfe153eb), TOBN(0xe97f6b6a, 0xf2aadb97), + TOBN(0x1d3d393e, 0xd1a83da1), TOBN(0xa6a7f9c7, 0x804b2a68), + TOBN(0x4a688b48, 0x2d0cb71e), TOBN(0xa9b4cc5f, 0x40585278), + TOBN(0x5e5db46a, 0xcb66e132), TOBN(0xf1be963a, 0x0d925880), + TOBN(0x944a7027, 0x0317b9e2), TOBN(0xe266f959, 0x48603d48), + TOBN(0x98db6673, 0x5c208899), TOBN(0x90472447, 0xa2fb18a3), + TOBN(0x8a966939, 0x777c619f), TOBN(0x3798142a, 0x2a3be21b), + TOBN(0xb4241cb1, 0x3298b343), TOBN(0xa3a14e49, 0xb44f65a1), + TOBN(0xc5f4d6cd, 0x3ac77acd), TOBN(0xd0288cb5, 0x52b6fc3c), + TOBN(0xd5cc8c2f, 0x1c040abc), TOBN(0xb675511e, 0x06bf9b4a), + TOBN(0xd667da37, 0x9b3aa441), TOBN(0x460d45ce, 0x51601f72), + TOBN(0xe2f73c69, 0x6755ff89), TOBN(0xdd3cf7e7, 0x473017e6), + TOBN(0x8ef5689d, 0x3cf7600d), TOBN(0x948dc4f8, 0xb1fc87b4), + TOBN(0xd9e9fe81, 0x4ea53299), TOBN(0x2d921ca2, 0x98eb6028), + TOBN(0xfaecedfd, 0x0c9803fc), TOBN(0xf38ae891, 0x4d7b4745), + TOBN(0xd8c5fccf, 0xc5e3a3d8), TOBN(0xbefd904c, 0x4079dfbf), + TOBN(0xbc6d6a58, 0xfead0197), TOBN(0x39227077, 0x695532a4), + TOBN(0x09e23e6d, 0xdbef42f5), TOBN(0x7e449b64, 0x480a9908), + TOBN(0x7b969c1a, 0xad9a2e40), TOBN(0x6231d792, 0x9591c2a4), + TOBN(0x87151456, 0x0f664534), TOBN(0x85ceae7c, 0x4b68f103), + TOBN(0xac09c4ae, 0x65578ab9), TOBN(0x33ec6868, 0xf044b10c), + TOBN(0x6ac4832b, 0x3a8ec1f1), TOBN(0x5509d128, 0x5847d5ef), + TOBN(0xf909604f, 0x763f1574), TOBN(0xb16c4303, 0xc32f63c4), + TOBN(0xb6ab2014, 0x7ca23cd3), TOBN(0xcaa7a5c6, 0xa391849d), + TOBN(0x5b0673a3, 0x75678d94), TOBN(0xc982ddd4, 0xdd303e64), + TOBN(0xfd7b000b, 0x5db6f971), TOBN(0xbba2cb1f, 0x6f876f92), + TOBN(0xc77332a3, 0x3c569426), TOBN(0xa159100c, 0x570d74f8), + TOBN(0xfd16847f, 0xdec67ef5), TOBN(0x742ee464, 0x233e76b7), + TOBN(0x0b8e4134, 0xefc2b4c8), TOBN(0xca640b86, 0x42a3e521), + TOBN(0x653a0190, 0x8ceb6aa9), TOBN(0x313c300c, 0x547852d5), + TOBN(0x24e4ab12, 0x6b237af7), TOBN(0x2ba90162, 0x8bb47af8), + TOBN(0x3d5e58d6, 0xa8219bb7), TOBN(0xc691d0bd, 0x1b06c57f), + TOBN(0x0ae4cb10, 0xd257576e), TOBN(0x3569656c, 0xd54a3dc3), + TOBN(0xe5ebaebd, 0x94cda03a), TOBN(0x934e82d3, 0x162bfe13), + TOBN(0x450ac0ba, 0xe251a0c6), TOBN(0x480b9e11, 0xdd6da526), + TOBN(0x00467bc5, 0x8cce08b5), TOBN(0xb636458c, 0x7f178d55), + TOBN(0xc5748bae, 0xa677d806), TOBN(0x2763a387, 0xdfa394eb), + TOBN(0xa12b448a, 0x7d3cebb6), TOBN(0xe7adda3e, 0x6f20d850), + TOBN(0xf63ebce5, 0x1558462c), TOBN(0x58b36143, 0x620088a8), + TOBN(0x8a2cc3ca, 0x4d63c0ee), TOBN(0x51233117, 0x0fe948ce), + TOBN(0x7463fd85, 0x222ef33b), TOBN(0xadf0c7dc, 0x7c603d6c), + TOBN(0x0ec32d3b, 0xfe7765e5), TOBN(0xccaab359, 0xbf380409), + TOBN(0xbdaa84d6, 0x8e59319c), TOBN(0xd9a4c280, 0x9c80c34d), + TOBN(0xa9d89488, 0xa059c142), TOBN(0x6f5ae714, 0xff0b9346), + TOBN(0x068f237d, 0x16fb3664), TOBN(0x5853e4c4, 0x363186ac), + TOBN(0xe2d87d23, 0x63c52f98), TOBN(0x2ec4a766, 0x81828876), + TOBN(0x47b864fa, 0xe14e7b1c), TOBN(0x0c0bc0e5, 0x69192408), + TOBN(0xe4d7681d, 0xb82e9f3e), TOBN(0x83200f0b, 0xdf25e13c), + TOBN(0x8909984c, 0x66f27280), TOBN(0x462d7b00, 0x75f73227), + TOBN(0xd90ba188, 0xf2651798), TOBN(0x74c6e18c, 0x36ab1c34), + TOBN(0xab256ea3, 0x5ef54359), TOBN(0x03466612, 0xd1aa702f), + TOBN(0x624d6049, 0x2ed22e91), TOBN(0x6fdfe0b5, 0x6f072822), + TOBN(0xeeca1115, 0x39ce2271), TOBN(0x98100a4f, 0xdb01614f), + TOBN(0xb6b0daa2, 0xa35c628f), TOBN(0xb6f94d2e, 0xc87e9a47), + TOBN(0xc6773259, 0x1d57d9ce), TOBN(0xf70bfeec, 0x03884a7b), + TOBN(0x5fb35ccf, 0xed2bad01), TOBN(0xa155cbe3, 0x1da6a5c7), + TOBN(0xc2e2594c, 0x30a92f8f), TOBN(0x649c89ce, 0x5bfafe43), + TOBN(0xd158667d, 0xe9ff257a), TOBN(0x9b359611, 0xf32c50ae), + TOBN(0x4b00b20b, 0x906014cf), TOBN(0xf3a8cfe3, 0x89bc7d3d), + TOBN(0x4ff23ffd, 0x248a7d06), TOBN(0x80c5bfb4, 0x878873fa), + TOBN(0xb7d9ad90, 0x05745981), TOBN(0x179c85db, 0x3db01994), + TOBN(0xba41b062, 0x61a6966c), TOBN(0x4d82d052, 0xeadce5a8), + TOBN(0x9e91cd3b, 0xa5e6a318), TOBN(0x47795f4f, 0x95b2dda0), + TOBN(0xecfd7c1f, 0xd55a897c), TOBN(0x009194ab, 0xb29110fb), + TOBN(0x5f0e2046, 0xe381d3b0), TOBN(0x5f3425f6, 0xa98dd291), + TOBN(0xbfa06687, 0x730d50da), TOBN(0x0423446c, 0x4b083b7f), + TOBN(0x397a247d, 0xd69d3417), TOBN(0xeb629f90, 0x387ba42a), + TOBN(0x1ee426cc, 0xd5cd79bf), TOBN(0x0032940b, 0x946c6e18), + TOBN(0x1b1e8ae0, 0x57477f58), TOBN(0xe94f7d34, 0x6d823278), + TOBN(0xc747cb96, 0x782ba21a), TOBN(0xc5254469, 0xf72b33a5), + TOBN(0x772ef6de, 0xc7f80c81), TOBN(0xd73acbfe, 0x2cd9e6b5), + TOBN(0x4075b5b1, 0x49ee90d9), TOBN(0x785c339a, 0xa06e9eba), + TOBN(0xa1030d5b, 0xabf825e0), TOBN(0xcec684c3, 0xa42931dc), + TOBN(0x42ab62c9, 0xc1586e63), TOBN(0x45431d66, 0x5ab43f2b), + TOBN(0x57c8b2c0, 0x55f7835d), TOBN(0x033da338, 0xc1b7f865), + TOBN(0x283c7513, 0xcaa76097), TOBN(0x0a624fa9, 0x36c83906), + TOBN(0x6b20afec, 0x715af2c7), TOBN(0x4b969974, 0xeba78bfd), + TOBN(0x220755cc, 0xd921d60e), TOBN(0x9b944e10, 0x7baeca13), + TOBN(0x04819d51, 0x5ded93d4), TOBN(0x9bbff86e, 0x6dddfd27), + TOBN(0x6b344130, 0x77adc612), TOBN(0xa7496529, 0xbbd803a0), + TOBN(0x1a1baaa7, 0x6d8805bd), TOBN(0xc8403902, 0x470343ad), + TOBN(0x39f59f66, 0x175adff1), TOBN(0x0b26d7fb, 0xb7d8c5b7), + TOBN(0xa875f5ce, 0x529d75e3), TOBN(0x85efc7e9, 0x41325cc2), + TOBN(0x21950b42, 0x1ff6acd3), TOBN(0xffe70484, 0x53dc6909), + TOBN(0xff4cd0b2, 0x28766127), TOBN(0xabdbe608, 0x4fb7db2b), + TOBN(0x837c9228, 0x5e1109e8), TOBN(0x26147d27, 0xf4645b5a), + TOBN(0x4d78f592, 0xf7818ed8), TOBN(0xd394077e, 0xf247fa36), + TOBN(0x0fb9c2d0, 0x488c171a), TOBN(0xa78bfbaa, 0x13685278), + TOBN(0xedfbe268, 0xd5b1fa6a), TOBN(0x0dceb8db, 0x2b7eaba7), + TOBN(0xbf9e8089, 0x9ae2b710), TOBN(0xefde7ae6, 0xa4449c96), + TOBN(0x43b7716b, 0xcc143a46), TOBN(0xd7d34194, 0xc3628c13), + TOBN(0x508cec1c, 0x3b3f64c9), TOBN(0xe20bc0ba, 0x1e5edf3f), + TOBN(0xda1deb85, 0x2f4318d4), TOBN(0xd20ebe0d, 0x5c3fa443), + TOBN(0x370b4ea7, 0x73241ea3), TOBN(0x61f1511c, 0x5e1a5f65), + TOBN(0x99a5e23d, 0x82681c62), TOBN(0xd731e383, 0xa2f54c2d), + TOBN(0x2692f36e, 0x83445904), TOBN(0x2e0ec469, 0xaf45f9c0), + TOBN(0x905a3201, 0xc67528b7), TOBN(0x88f77f34, 0xd0e5e542), + TOBN(0xf67a8d29, 0x5864687c), TOBN(0x23b92eae, 0x22df3562), + TOBN(0x5c27014b, 0x9bbec39e), TOBN(0x7ef2f226, 0x9c0f0f8d), + TOBN(0x97359638, 0x546c4d8d), TOBN(0x5f9c3fc4, 0x92f24679), + TOBN(0x912e8bed, 0xa8c8acd9), TOBN(0xec3a318d, 0x306634b0), + TOBN(0x80167f41, 0xc31cb264), TOBN(0x3db82f6f, 0x522113f2), + TOBN(0xb155bcd2, 0xdcafe197), TOBN(0xfba1da59, 0x43465283), + TOBN(0xa0425b8e, 0xb212cf53), TOBN(0x4f2e512e, 0xf8557c5f), + TOBN(0xc1286ff9, 0x25c4d56c), TOBN(0xbb8a0fea, 0xee26c851), + TOBN(0xc28f70d2, 0xe7d6107e), TOBN(0x7ee0c444, 0xe76265aa), + TOBN(0x3df277a4, 0x1d1936b1), TOBN(0x1a556e3f, 0xea9595eb), + TOBN(0x258bbbf9, 0xe7305683), TOBN(0x31eea5bf, 0x07ef5be6), + TOBN(0x0deb0e4a, 0x46c814c1), TOBN(0x5cee8449, 0xa7b730dd), + TOBN(0xeab495c5, 0xa0182bde), TOBN(0xee759f87, 0x9e27a6b4), + TOBN(0xc2cf6a68, 0x80e518ca), TOBN(0x25e8013f, 0xf14cf3f4), + TOBN(0x8fc44140, 0x7e8d7a14), TOBN(0xbb1ff3ca, 0x9556f36a), + TOBN(0x6a844385, 0x14600044), TOBN(0xba3f0c4a, 0x7451ae63), + TOBN(0xdfcac25b, 0x1f9af32a), TOBN(0x01e0db86, 0xb1f2214b), + TOBN(0x4e9a5bc2, 0xa4b596ac), TOBN(0x83927681, 0x026c2c08), + TOBN(0x3ec832e7, 0x7acaca28), TOBN(0x1bfeea57, 0xc7385b29), + TOBN(0x068212e3, 0xfd1eaf38), TOBN(0xc1329830, 0x6acf8ccc), + TOBN(0xb909f2db, 0x2aac9e59), TOBN(0x5748060d, 0xb661782a), + TOBN(0xc5ab2632, 0xc79b7a01), TOBN(0xda44c6c6, 0x00017626), + TOBN(0xf26c00e8, 0xa7ea82f0), TOBN(0x99cac80d, 0xe4299aaf), + TOBN(0xd66fe3b6, 0x7ed78be1), TOBN(0x305f725f, 0x648d02cd), + TOBN(0x33ed1bc4, 0x623fb21b), TOBN(0xfa70533e, 0x7a6319ad), + TOBN(0x17ab562d, 0xbe5ffb3e), TOBN(0x06374994, 0x56674741), + TOBN(0x69d44ed6, 0x5c46aa8e), TOBN(0x2100d5d3, 0xa8d063d1), + TOBN(0xcb9727ea, 0xa2d17c36), TOBN(0x4c2bab1b, 0x8add53b7), + TOBN(0xa084e90c, 0x15426704), TOBN(0x778afcd3, 0xa837ebea), + TOBN(0x6651f701, 0x7ce477f8), TOBN(0xa0624998, 0x46fb7a8b), + TOBN(0xdc1e6828, 0xed8a6e19), TOBN(0x33fc2336, 0x4189d9c7), + TOBN(0x026f8fe2, 0x671c39bc), TOBN(0xd40c4ccd, 0xbc6f9915), + TOBN(0xafa135bb, 0xf80e75ca), TOBN(0x12c651a0, 0x22adff2c), + TOBN(0xc40a04bd, 0x4f51ad96), TOBN(0x04820109, 0xbbe4e832), + TOBN(0x3667eb1a, 0x7f4c04cc), TOBN(0x59556621, 0xa9404f84), + TOBN(0x71cdf653, 0x7eceb50a), TOBN(0x994a44a6, 0x9b8335fa), + TOBN(0xd7faf819, 0xdbeb9b69), TOBN(0x473c5680, 0xeed4350d), + TOBN(0xb6658466, 0xda44bba2), TOBN(0x0d1bc780, 0x872bdbf3), + TOBN(0xe535f175, 0xa1962f91), TOBN(0x6ed7e061, 0xed58f5a7), + TOBN(0x177aa4c0, 0x2089a233), TOBN(0x0dbcb03a, 0xe539b413), + TOBN(0xe3dc424e, 0xbb32e38e), TOBN(0x6472e5ef, 0x6806701e), + TOBN(0xdd47ff98, 0x814be9ee), TOBN(0x6b60cfff, 0x35ace009), + TOBN(0xb8d3d931, 0x9ff91fe5), TOBN(0x039c4800, 0xf0518eed), + TOBN(0x95c37632, 0x9182cb26), TOBN(0x0763a434, 0x82fc568d), + TOBN(0x707c04d5, 0x383e76ba), TOBN(0xac98b930, 0x824e8197), + TOBN(0x92bf7c8f, 0x91230de0), TOBN(0x90876a01, 0x40959b70), + TOBN(0xdb6d96f3, 0x05968b80), TOBN(0x380a0913, 0x089f73b9), + TOBN(0x7da70b83, 0xc2c61e01), TOBN(0x95fb8394, 0x569b38c7), + TOBN(0x9a3c6512, 0x80edfe2f), TOBN(0x8f726bb9, 0x8faeaf82), + TOBN(0x8010a4a0, 0x78424bf8), TOBN(0x29672044, 0x0e844970)} + , + {TOBN(0x63c5cb81, 0x7a2ad62a), TOBN(0x7ef2b6b9, 0xac62ff54), + TOBN(0x3749bba4, 0xb3ad9db5), TOBN(0xad311f2c, 0x46d5a617), + TOBN(0xb77a8087, 0xc2ff3b6d), TOBN(0xb46feaf3, 0x367834ff), + TOBN(0xf8aa266d, 0x75d6b138), TOBN(0xfa38d320, 0xec008188), + TOBN(0x486d8ffa, 0x696946fc), TOBN(0x50fbc6d8, 0xb9cba56d), + TOBN(0x7e3d423e, 0x90f35a15), TOBN(0x7c3da195, 0xc0dd962c), + TOBN(0xe673fdb0, 0x3cfd5d8b), TOBN(0x0704b7c2, 0x889dfca5), + TOBN(0xf6ce581f, 0xf52305aa), TOBN(0x399d49eb, 0x914d5e53), + TOBN(0x380a496d, 0x6ec293cd), TOBN(0x733dbda7, 0x8e7051f5), + TOBN(0x037e388d, 0xb849140a), TOBN(0xee4b32b0, 0x5946dbf6), + TOBN(0xb1c4fda9, 0xcae368d1), TOBN(0x5001a7b0, 0xfdb0b2f3), + TOBN(0x6df59374, 0x2e3ac46e), TOBN(0x4af675f2, 0x39b3e656), + TOBN(0x44e38110, 0x39949296), TOBN(0x5b63827b, 0x361db1b5), + TOBN(0x3e5323ed, 0x206eaff5), TOBN(0x942370d2, 0xc21f4290), + TOBN(0xf2caaf2e, 0xe0d985a1), TOBN(0x192cc64b, 0x7239846d), + TOBN(0x7c0b8f47, 0xae6312f8), TOBN(0x7dc61f91, 0x96620108), + TOBN(0xb830fb5b, 0xc2da7de9), TOBN(0xd0e643df, 0x0ff8d3be), + TOBN(0x31ee77ba, 0x188a9641), TOBN(0x4e8aa3aa, 0xbcf6d502), + TOBN(0xf9fb6532, 0x9a49110f), TOBN(0xd18317f6, 0x2dd6b220), + TOBN(0x7e3ced41, 0x52c3ea5a), TOBN(0x0d296a14, 0x7d579c4a), + TOBN(0x35d6a53e, 0xed4c3717), TOBN(0x9f8240cf, 0x3d0ed2a3), + TOBN(0x8c0d4d05, 0xe5543aa5), TOBN(0x45d5bbfb, 0xdd33b4b4), + TOBN(0xfa04cc73, 0x137fd28e), TOBN(0x862ac6ef, 0xc73b3ffd), + TOBN(0x403ff9f5, 0x31f51ef2), TOBN(0x34d5e0fc, 0xbc73f5a2), + TOBN(0xf2526820, 0x08913f4f), TOBN(0xea20ed61, 0xeac93d95), + TOBN(0x51ed38b4, 0x6ca6b26c), TOBN(0x8662dcbc, 0xea4327b0), + TOBN(0x6daf295c, 0x725d2aaa), TOBN(0xbad2752f, 0x8e52dcda), + TOBN(0x2210e721, 0x0b17dacc), TOBN(0xa37f7912, 0xd51e8232), + TOBN(0x4f7081e1, 0x44cc3add), TOBN(0xd5ffa1d6, 0x87be82cf), + TOBN(0x89890b6c, 0x0edd6472), TOBN(0xada26e1a, 0x3ed17863), + TOBN(0x276f2715, 0x63483caa), TOBN(0xe6924cd9, 0x2f6077fd), + TOBN(0x05a7fe98, 0x0a466e3c), TOBN(0xf1c794b0, 0xb1902d1f), + TOBN(0xe5213688, 0x82a8042c), TOBN(0xd931cfaf, 0xcd278298), + TOBN(0x069a0ae0, 0xf597a740), TOBN(0x0adbb3f3, 0xeb59107c), + TOBN(0x983e951e, 0x5eaa8eb8), TOBN(0xe663a8b5, 0x11b48e78), + TOBN(0x1631cc0d, 0x8a03f2c5), TOBN(0x7577c11e, 0x11e271e2), + TOBN(0x33b2385c, 0x08369a90), TOBN(0x2990c59b, 0x190eb4f8), + TOBN(0x819a6145, 0xc68eac80), TOBN(0x7a786d62, 0x2ec4a014), + TOBN(0x33faadbe, 0x20ac3a8d), TOBN(0x31a21781, 0x5aba2d30), + TOBN(0x209d2742, 0xdba4f565), TOBN(0xdb2ce9e3, 0x55aa0fbb), + TOBN(0x8cef334b, 0x168984df), TOBN(0xe81dce17, 0x33879638), + TOBN(0xf6e6949c, 0x263720f0), TOBN(0x5c56feaf, 0xf593cbec), + TOBN(0x8bff5601, 0xfde58c84), TOBN(0x74e24117, 0x2eccb314), + TOBN(0xbcf01b61, 0x4c9a8a78), TOBN(0xa233e35e, 0x544c9868), + TOBN(0xb3156bf3, 0x8bd7aff1), TOBN(0x1b5ee4cb, 0x1d81b146), + TOBN(0x7ba1ac41, 0xd628a915), TOBN(0x8f3a8f9c, 0xfd89699e), + TOBN(0x7329b9c9, 0xa0748be7), TOBN(0x1d391c95, 0xa92e621f), + TOBN(0xe51e6b21, 0x4d10a837), TOBN(0xd255f53a, 0x4947b435), + TOBN(0x07669e04, 0xf1788ee3), TOBN(0xc14f27af, 0xa86938a2), + TOBN(0x8b47a334, 0xe93a01c0), TOBN(0xff627438, 0xd9366808), + TOBN(0x7a0985d8, 0xca2a5965), TOBN(0x3d9a5542, 0xd6e9b9b3), + TOBN(0xc23eb80b, 0x4cf972e8), TOBN(0x5c1c33bb, 0x4fdf72fd), + TOBN(0x0c4a58d4, 0x74a86108), TOBN(0xf8048a8f, 0xee4c5d90), + TOBN(0xe3c7c924, 0xe86d4c80), TOBN(0x28c889de, 0x056a1e60), + TOBN(0x57e2662e, 0xb214a040), TOBN(0xe8c48e98, 0x37e10347), + TOBN(0x87742862, 0x80ac748a), TOBN(0xf1c24022, 0x186b06f2), + TOBN(0xac2dd4c3, 0x5f74040a), TOBN(0x409aeb71, 0xfceac957), + TOBN(0x4fbad782, 0x55c4ec23), TOBN(0xb359ed61, 0x8a7b76ec), + TOBN(0x12744926, 0xed6f4a60), TOBN(0xe21e8d7f, 0x4b912de3), + TOBN(0xe2575a59, 0xfc705a59), TOBN(0x72f1d4de, 0xed2dbc0e), + TOBN(0x3d2b24b9, 0xeb7926b8), TOBN(0xbff88cb3, 0xcdbe5509), + TOBN(0xd0f399af, 0xe4dd640b), TOBN(0x3c5fe130, 0x2f76ed45), + TOBN(0x6f3562f4, 0x3764fb3d), TOBN(0x7b5af318, 0x3151b62d), + TOBN(0xd5bd0bc7, 0xd79ce5f3), TOBN(0xfdaf6b20, 0xec66890f), + TOBN(0x735c67ec, 0x6063540c), TOBN(0x50b259c2, 0xe5f9cb8f), + TOBN(0xb8734f9a, 0x3f99c6ab), TOBN(0xf8cc13d5, 0xa3a7bc85), + TOBN(0x80c1b305, 0xc5217659), TOBN(0xfe5364d4, 0x4ec12a54), + TOBN(0xbd87045e, 0x681345fe), TOBN(0x7f8efeb1, 0x582f897f), + TOBN(0xe8cbf1e5, 0xd5923359), TOBN(0xdb0cea9d, 0x539b9fb0), + TOBN(0x0c5b34cf, 0x49859b98), TOBN(0x5e583c56, 0xa4403cc6), + TOBN(0x11fc1a2d, 0xd48185b7), TOBN(0xc93fbc7e, 0x6e521787), + TOBN(0x47e7a058, 0x05105b8b), TOBN(0x7b4d4d58, 0xdb8260c8), + TOBN(0xe33930b0, 0x46eb842a), TOBN(0x8e844a9a, 0x7bdae56d), + TOBN(0x34ef3a9e, 0x13f7fdfc), TOBN(0xb3768f82, 0x636ca176), + TOBN(0x2821f4e0, 0x4e09e61c), TOBN(0x414dc3a1, 0xa0c7cddc), + TOBN(0xd5379437, 0x54945fcd), TOBN(0x151b6eef, 0xb3555ff1), + TOBN(0xb31bd613, 0x6339c083), TOBN(0x39ff8155, 0xdfb64701), + TOBN(0x7c3388d2, 0xe29604ab), TOBN(0x1e19084b, 0xa6b10442), + TOBN(0x17cf54c0, 0xeccd47ef), TOBN(0x89693385, 0x4a5dfb30), + TOBN(0x69d023fb, 0x47daf9f6), TOBN(0x9222840b, 0x7d91d959), + TOBN(0x439108f5, 0x803bac62), TOBN(0x0b7dd91d, 0x379bd45f), + TOBN(0xd651e827, 0xca63c581), TOBN(0x5c5d75f6, 0x509c104f), + TOBN(0x7d5fc738, 0x1f2dc308), TOBN(0x20faa7bf, 0xd98454be), + TOBN(0x95374bee, 0xa517b031), TOBN(0xf036b9b1, 0x642692ac), + TOBN(0xc5106109, 0x39842194), TOBN(0xb7e2353e, 0x49d05295), + TOBN(0xfc8c1d5c, 0xefb42ee0), TOBN(0xe04884eb, 0x08ce811c), + TOBN(0xf1f75d81, 0x7419f40e), TOBN(0x5b0ac162, 0xa995c241), + TOBN(0x120921bb, 0xc4c55646), TOBN(0x713520c2, 0x8d33cf97), + TOBN(0xb4a65a5c, 0xe98c5100), TOBN(0x6cec871d, 0x2ddd0f5a), + TOBN(0x251f0b7f, 0x9ba2e78b), TOBN(0x224a8434, 0xce3a2a5f), + TOBN(0x26827f61, 0x25f5c46f), TOBN(0x6a22bedc, 0x48545ec0), + TOBN(0x25ae5fa0, 0xb1bb5cdc), TOBN(0xd693682f, 0xfcb9b98f), + TOBN(0x32027fe8, 0x91e5d7d3), TOBN(0xf14b7d17, 0x73a07678), + TOBN(0xf88497b3, 0xc0dfdd61), TOBN(0xf7c2eec0, 0x2a8c4f48), + TOBN(0xaa5573f4, 0x3756e621), TOBN(0xc013a240, 0x1825b948), + TOBN(0x1c03b345, 0x63878572), TOBN(0xa0472bea, 0x653a4184), + TOBN(0xf4222e27, 0x0ac69a80), TOBN(0x34096d25, 0xf51e54f6), + TOBN(0x00a648cb, 0x8fffa591), TOBN(0x4e87acdc, 0x69b6527f), + TOBN(0x0575e037, 0xe285ccb4), TOBN(0x188089e4, 0x50ddcf52), + TOBN(0xaa96c9a8, 0x870ff719), TOBN(0x74a56cd8, 0x1fc7e369), + TOBN(0x41d04ee2, 0x1726931a), TOBN(0x0bbbb2c8, 0x3660ecfd), + TOBN(0xa6ef6de5, 0x24818e18), TOBN(0xe421cc51, 0xe7d57887), + TOBN(0xf127d208, 0xbea87be6), TOBN(0x16a475d3, 0xb1cdd682), + TOBN(0x9db1b684, 0x439b63f7), TOBN(0x5359b3db, 0xf0f113b6), + TOBN(0xdfccf1de, 0x8bf06e31), TOBN(0x1fdf8f44, 0xdd383901), + TOBN(0x10775cad, 0x5017e7d2), TOBN(0xdfc3a597, 0x58d11eef), + TOBN(0x6ec9c8a0, 0xb1ecff10), TOBN(0xee6ed6cc, 0x28400549), + TOBN(0xb5ad7bae, 0x1b4f8d73), TOBN(0x61b4f11d, 0xe00aaab9), + TOBN(0x7b32d69b, 0xd4eff2d7), TOBN(0x88ae6771, 0x4288b60f), + TOBN(0x159461b4, 0x37a1e723), TOBN(0x1f3d4789, 0x570aae8c), + TOBN(0x869118c0, 0x7f9871da), TOBN(0x35fbda78, 0xf635e278), + TOBN(0x738f3641, 0xe1541dac), TOBN(0x6794b13a, 0xc0dae45f), + TOBN(0x065064ac, 0x09cc0917), TOBN(0x27c53729, 0xc68540fd), + TOBN(0x0d2d4c8e, 0xef227671), TOBN(0xd23a9f80, 0xa1785a04), + TOBN(0x98c59528, 0x52650359), TOBN(0xfa09ad01, 0x74a1acad), + TOBN(0x082d5a29, 0x0b55bf5c), TOBN(0xa40f1c67, 0x419b8084), + TOBN(0x3a5c752e, 0xdcc18770), TOBN(0x4baf1f2f, 0x8825c3a5), + TOBN(0xebd63f74, 0x21b153ed), TOBN(0xa2383e47, 0xb2f64723), + TOBN(0xe7bf620a, 0x2646d19a), TOBN(0x56cb44ec, 0x03c83ffd), + TOBN(0xaf7267c9, 0x4f6be9f1), TOBN(0x8b2dfd7b, 0xc06bb5e9), + TOBN(0xb87072f2, 0xa672c5c7), TOBN(0xeacb11c8, 0x0d53c5e2), + TOBN(0x22dac29d, 0xff435932), TOBN(0x37bdb99d, 0x4408693c), + TOBN(0xf6e62fb6, 0x2899c20f), TOBN(0x3535d512, 0x447ece24), + TOBN(0xfbdc6b88, 0xff577ce3), TOBN(0x726693bd, 0x190575f2), + TOBN(0x6772b0e5, 0xab4b35a2), TOBN(0x1d8b6001, 0xf5eeaacf), + TOBN(0x728f7ce4, 0x795b9580), TOBN(0x4a20ed2a, 0x41fb81da), + TOBN(0x9f685cd4, 0x4fec01e6), TOBN(0x3ed7ddcc, 0xa7ff50ad), + TOBN(0x460fd264, 0x0c2d97fd), TOBN(0x3a241426, 0xeb82f4f9), + TOBN(0x17d1df2c, 0x6a8ea820), TOBN(0xb2b50d3b, 0xf22cc254), + TOBN(0x03856cba, 0xb7291426), TOBN(0x87fd26ae, 0x04f5ee39), + TOBN(0x9cb696cc, 0x02bee4ba), TOBN(0x53121804, 0x06820fd6), + TOBN(0xa5dfc269, 0x0212e985), TOBN(0x666f7ffa, 0x160f9a09), + TOBN(0xc503cd33, 0xbccd9617), TOBN(0x365dede4, 0xba7730a3), + TOBN(0x798c6355, 0x5ddb0786), TOBN(0xa6c3200e, 0xfc9cd3bc), + TOBN(0x060ffb2c, 0xe5e35efd), TOBN(0x99a4e25b, 0x5555a1c1), + TOBN(0x11d95375, 0xf70b3751), TOBN(0x0a57354a, 0x160e1bf6), + TOBN(0xecb3ae4b, 0xf8e4b065), TOBN(0x07a834c4, 0x2e53022b), + TOBN(0x1cd300b3, 0x8692ed96), TOBN(0x16a6f792, 0x61ee14ec), + TOBN(0x8f1063c6, 0x6a8649ed), TOBN(0xfbcdfcfe, 0x869f3e14), + TOBN(0x2cfb97c1, 0x00a7b3ec), TOBN(0xcea49b3c, 0x7130c2f1), + TOBN(0x462d044f, 0xe9d96488), TOBN(0x4b53d52e, 0x8182a0c1), + TOBN(0x84b6ddd3, 0x0391e9e9), TOBN(0x80ab7b48, 0xb1741a09), + TOBN(0xec0e15d4, 0x27d3317f), TOBN(0x8dfc1ddb, 0x1a64671e), + TOBN(0x93cc5d5f, 0xd49c5b92), TOBN(0xc995d53d, 0x3674a331), + TOBN(0x302e41ec, 0x090090ae), TOBN(0x2278a0cc, 0xedb06830), + TOBN(0x1d025932, 0xfbc99690), TOBN(0x0c32fbd2, 0xb80d68da), + TOBN(0xd79146da, 0xf341a6c1), TOBN(0xae0ba139, 0x1bef68a0), + TOBN(0xc6b8a563, 0x8d774b3a), TOBN(0x1cf307bd, 0x880ba4d7), + TOBN(0xc033bdc7, 0x19803511), TOBN(0xa9f97b3b, 0x8888c3be), + TOBN(0x3d68aebc, 0x85c6d05e), TOBN(0xc3b88a9d, 0x193919eb), + TOBN(0x2d300748, 0xc48b0ee3), TOBN(0x7506bc7c, 0x07a746c1), + TOBN(0xfc48437c, 0x6e6d57f3), TOBN(0x5bd71587, 0xcfeaa91a), + TOBN(0xa4ed0408, 0xc1bc5225), TOBN(0xd0b946db, 0x2719226d), + TOBN(0x109ecd62, 0x758d2d43), TOBN(0x75c8485a, 0x2751759b), + TOBN(0xb0b75f49, 0x9ce4177a), TOBN(0x4fa61a1e, 0x79c10c3d), + TOBN(0xc062d300, 0xa167fcd7), TOBN(0x4df3874c, 0x750f0fa8), + TOBN(0x29ae2cf9, 0x83dfedc9), TOBN(0xf8437134, 0x8d87631a), + TOBN(0xaf571711, 0x7429c8d2), TOBN(0x18d15867, 0x146d9272), + TOBN(0x83053ecf, 0x69769bb7), TOBN(0xc55eb856, 0xc479ab82), + TOBN(0x5ef7791c, 0x21b0f4b2), TOBN(0xaa5956ba, 0x3d491525), + TOBN(0x407a96c2, 0x9fe20eba), TOBN(0xf27168bb, 0xe52a5ad3), + TOBN(0x43b60ab3, 0xbf1d9d89), TOBN(0xe45c51ef, 0x710e727a), + TOBN(0xdfca5276, 0x099b4221), TOBN(0x8dc6407c, 0x2557a159), + TOBN(0x0ead8335, 0x91035895), TOBN(0x0a9db957, 0x9c55dc32), + TOBN(0xe40736d3, 0xdf61bc76), TOBN(0x13a619c0, 0x3f778cdb), + TOBN(0x6dd921a4, 0xc56ea28f), TOBN(0x76a52433, 0x2fa647b4), + TOBN(0x23591891, 0xac5bdc5d), TOBN(0xff4a1a72, 0xbac7dc01), + TOBN(0x9905e261, 0x62df8453), TOBN(0x3ac045df, 0xe63b265f), + TOBN(0x8a3f341b, 0xad53dba7), TOBN(0x8ec269cc, 0x837b625a), + TOBN(0xd71a2782, 0x3ae31189), TOBN(0x8fb4f9a3, 0x55e96120), + TOBN(0x804af823, 0xff9875cf), TOBN(0x23224f57, 0x5d442a9b), + TOBN(0x1c4d3b9e, 0xecc62679), TOBN(0x91da22fb, 0xa0e7ddb1), + TOBN(0xa370324d, 0x6c04a661), TOBN(0x9710d3b6, 0x5e376d17), + TOBN(0xed8c98f0, 0x3044e357), TOBN(0xc364ebbe, 0x6422701c), + TOBN(0x347f5d51, 0x7733d61c), TOBN(0xd55644b9, 0xcea826c3), + TOBN(0x80c6e0ad, 0x55a25548), TOBN(0x0aa7641d, 0x844220a7), + TOBN(0x1438ec81, 0x31810660), TOBN(0x9dfa6507, 0xde4b4043), + TOBN(0x10b515d8, 0xcc3e0273), TOBN(0x1b6066dd, 0x28d8cfb2), + TOBN(0xd3b04591, 0x9c9efebd), TOBN(0x425d4bdf, 0xa21c1ff4), + TOBN(0x5fe5af19, 0xd57607d3), TOBN(0xbbf773f7, 0x54481084), + TOBN(0x8435bd69, 0x94b03ed1), TOBN(0xd9ad1de3, 0x634cc546), + TOBN(0x2cf423fc, 0x00e420ca), TOBN(0xeed26d80, 0xa03096dd), + TOBN(0xd7f60be7, 0xa4db09d2), TOBN(0xf47f569d, 0x960622f7), + TOBN(0xe5925fd7, 0x7296c729), TOBN(0xeff2db26, 0x26ca2715), + TOBN(0xa6fcd014, 0xb913e759), TOBN(0x53da4786, 0x8ff4de93), + TOBN(0x14616d79, 0xc32068e1), TOBN(0xb187d664, 0xccdf352e), + TOBN(0xf7afb650, 0x1dc90b59), TOBN(0x8170e943, 0x7daa1b26), + TOBN(0xc8e3bdd8, 0x700c0a84), TOBN(0x6e8d345f, 0x6482bdfa), + TOBN(0x84cfbfa1, 0xc5c5ea50), TOBN(0xd3baf14c, 0x67960681), + TOBN(0x26398403, 0x0dd50942), TOBN(0xe4b7839c, 0x4716a663), + TOBN(0xd5f1f794, 0xe7de6dc0), TOBN(0x5cd0f4d4, 0x622aa7ce), + TOBN(0x5295f3f1, 0x59acfeec), TOBN(0x8d933552, 0x953e0607), + TOBN(0xc7db8ec5, 0x776c5722), TOBN(0xdc467e62, 0x2b5f290c), + TOBN(0xd4297e70, 0x4ff425a9), TOBN(0x4be924c1, 0x0cf7bb72), + TOBN(0x0d5dc5ae, 0xa1892131), TOBN(0x8bf8a8e3, 0xa705c992), + TOBN(0x73a0b064, 0x7a305ac5), TOBN(0x00c9ca4e, 0x9a8c77a8), + TOBN(0x5dfee80f, 0x83774bdd), TOBN(0x63131602, 0x85734485), + TOBN(0xa1b524ae, 0x914a69a9), TOBN(0xebc2ffaf, 0xd4e300d7), + TOBN(0x52c93db7, 0x7cfa46a5), TOBN(0x71e6161f, 0x21653b50), + TOBN(0x3574fc57, 0xa4bc580a), TOBN(0xc09015dd, 0xe1bc1253), + TOBN(0x4b7b47b2, 0xd174d7aa), TOBN(0x4072d8e8, 0xf3a15d04), + TOBN(0xeeb7d47f, 0xd6fa07ed), TOBN(0x6f2b9ff9, 0xedbdafb1), + TOBN(0x18c51615, 0x3760fe8a), TOBN(0x7a96e6bf, 0xf06c6c13), + TOBN(0x4d7a0410, 0x0ea2d071), TOBN(0xa1914e9b, 0x0be2a5ce), + TOBN(0x5726e357, 0xd8a3c5cf), TOBN(0x1197ecc3, 0x2abb2b13), + TOBN(0x6c0d7f7f, 0x31ae88dd), TOBN(0x15b20d1a, 0xfdbb3efe), + TOBN(0xcd06aa26, 0x70584039), TOBN(0x2277c969, 0xa7dc9747), + TOBN(0xbca69587, 0x7855d815), TOBN(0x899ea238, 0x5188b32a), + TOBN(0x37d9228b, 0x760c1c9d), TOBN(0xc7efbb11, 0x9b5c18da), + TOBN(0x7f0d1bc8, 0x19f6dbc5), TOBN(0x4875384b, 0x07e6905b), + TOBN(0xc7c50baa, 0x3ba8cd86), TOBN(0xb0ce40fb, 0xc2905de0), + TOBN(0x70840673, 0x7a231952), TOBN(0xa912a262, 0xcf43de26), + TOBN(0x9c38ddcc, 0xeb5b76c1), TOBN(0x746f5285, 0x26fc0ab4), + TOBN(0x52a63a50, 0xd62c269f), TOBN(0x60049c55, 0x99458621), + TOBN(0xe7f48f82, 0x3c2f7c9e), TOBN(0x6bd99043, 0x917d5cf3), + TOBN(0xeb1317a8, 0x8701f469), TOBN(0xbd3fe2ed, 0x9a449fe0), + TOBN(0x421e79ca, 0x12ef3d36), TOBN(0x9ee3c36c, 0x3e7ea5de), + TOBN(0xe48198b5, 0xcdff36f7), TOBN(0xaff4f967, 0xc6b82228), + TOBN(0x15e19dd0, 0xc47adb7e), TOBN(0x45699b23, 0x032e7dfa), + TOBN(0x40680c8b, 0x1fae026a), TOBN(0x5a347a48, 0x550dbf4d), + TOBN(0xe652533b, 0x3cef0d7d), TOBN(0xd94f7b18, 0x2bbb4381), + TOBN(0x838752be, 0x0e80f500), TOBN(0x8e6e2488, 0x9e9c9bfb), + TOBN(0xc9751697, 0x16caca6a), TOBN(0x866c49d8, 0x38531ad9), + TOBN(0xc917e239, 0x7151ade1), TOBN(0x2d016ec1, 0x6037c407), + TOBN(0xa407ccc9, 0x00eac3f9), TOBN(0x835f6280, 0xe2ed4748), + TOBN(0xcc54c347, 0x1cc98e0d), TOBN(0x0e969937, 0xdcb572eb), + TOBN(0x1b16c8e8, 0x8f30c9cb), TOBN(0xa606ae75, 0x373c4661), + TOBN(0x47aa689b, 0x35502cab), TOBN(0xf89014ae, 0x4d9bb64f), + TOBN(0x202f6a9c, 0x31c71f7b), TOBN(0x01f95aa3, 0x296ffe5c), + TOBN(0x5fc06014, 0x53cec3a3), TOBN(0xeb991237, 0x5f498a45), + TOBN(0xae9a935e, 0x5d91ba87), TOBN(0xc6ac6281, 0x0b564a19), + TOBN(0x8a8fe81c, 0x3bd44e69), TOBN(0x7c8b467f, 0x9dd11d45), + TOBN(0xf772251f, 0xea5b8e69), TOBN(0xaeecb3bd, 0xc5b75fbc), + TOBN(0x1aca3331, 0x887ff0e5), TOBN(0xbe5d49ff, 0x19f0a131), + TOBN(0x582c13aa, 0xe5c8646f), TOBN(0xdbaa12e8, 0x20e19980), + TOBN(0x8f40f31a, 0xf7abbd94), TOBN(0x1f13f5a8, 0x1dfc7663), + TOBN(0x5d81f1ee, 0xaceb4fc0), TOBN(0x36256002, 0x5e6f0f42), + TOBN(0x4b67d6d7, 0x751370c8), TOBN(0x2608b698, 0x03e80589), + TOBN(0xcfc0d2fc, 0x05268301), TOBN(0xa6943d39, 0x40309212), + TOBN(0x192a90c2, 0x1fd0e1c2), TOBN(0xb209f113, 0x37f1dc76), + TOBN(0xefcc5e06, 0x97bf1298), TOBN(0xcbdb6730, 0x219d639e), + TOBN(0xd009c116, 0xb81e8c6f), TOBN(0xa3ffdde3, 0x1a7ce2e5), + TOBN(0xc53fbaaa, 0xa914d3ba), TOBN(0x836d500f, 0x88df85ee), + TOBN(0xd98dc71b, 0x66ee0751), TOBN(0x5a3d7005, 0x714516fd), + TOBN(0x21d3634d, 0x39eedbba), TOBN(0x35cd2e68, 0x0455a46d), + TOBN(0xc8cafe65, 0xf9d7eb0c), TOBN(0xbda3ce9e, 0x00cefb3e), + TOBN(0xddc17a60, 0x2c9cf7a4), TOBN(0x01572ee4, 0x7bcb8773), + TOBN(0xa92b2b01, 0x8c7548df), TOBN(0x732fd309, 0xa84600e3), + TOBN(0xe22109c7, 0x16543a40), TOBN(0x9acafd36, 0xfede3c6c), + TOBN(0xfb206852, 0x6824e614), TOBN(0x2a4544a9, 0xda25dca0), + TOBN(0x25985262, 0x91d60b06), TOBN(0x281b7be9, 0x28753545), + TOBN(0xec667b1a, 0x90f13b27), TOBN(0x33a83aff, 0x940e2eb4), + TOBN(0x80009862, 0xd5d721d5), TOBN(0x0c3357a3, 0x5bd3a182), + TOBN(0x27f3a83b, 0x7aa2cda4), TOBN(0xb58ae74e, 0xf6f83085), + TOBN(0x2a911a81, 0x2e6dad6b), TOBN(0xde286051, 0xf43d6c5b), + TOBN(0x4bdccc41, 0xf996c4d8), TOBN(0xe7312ec0, 0x0ae1e24e)} + , + {TOBN(0xf8d112e7, 0x6e6485b3), TOBN(0x4d3e24db, 0x771c52f8), + TOBN(0x48e3ee41, 0x684a2f6d), TOBN(0x7161957d, 0x21d95551), + TOBN(0x19631283, 0xcdb12a6c), TOBN(0xbf3fa882, 0x2e50e164), + TOBN(0xf6254b63, 0x3166cc73), TOBN(0x3aefa7ae, 0xaee8cc38), + TOBN(0x79b0fe62, 0x3b36f9fd), TOBN(0x26543b23, 0xfde19fc0), + TOBN(0x136e64a0, 0x958482ef), TOBN(0x23f63771, 0x9b095825), + TOBN(0x14cfd596, 0xb6a1142e), TOBN(0x5ea6aac6, 0x335aac0b), + TOBN(0x86a0e8bd, 0xf3081dd5), TOBN(0x5fb89d79, 0x003dc12a), + TOBN(0xf615c33a, 0xf72e34d4), TOBN(0x0bd9ea40, 0x110eec35), + TOBN(0x1c12bc5b, 0xc1dea34e), TOBN(0x686584c9, 0x49ae4699), + TOBN(0x13ad95d3, 0x8c97b942), TOBN(0x4609561a, 0x4e5c7562), + TOBN(0x9e94a4ae, 0xf2737f89), TOBN(0xf57594c6, 0x371c78b6), + TOBN(0x0f0165fc, 0xe3779ee3), TOBN(0xe00e7f9d, 0xbd495d9e), + TOBN(0x1fa4efa2, 0x20284e7a), TOBN(0x4564bade, 0x47ac6219), + TOBN(0x90e6312a, 0xc4708e8e), TOBN(0x4f5725fb, 0xa71e9adf), + TOBN(0xe95f55ae, 0x3d684b9f), TOBN(0x47f7ccb1, 0x1e94b415), + TOBN(0x7322851b, 0x8d946581), TOBN(0xf0d13133, 0xbdf4a012), + TOBN(0xa3510f69, 0x6584dae0), TOBN(0x03a7c171, 0x3c9f6c6d), + TOBN(0x5be97f38, 0xe475381a), TOBN(0xca1ba422, 0x85823334), + TOBN(0xf83cc5c7, 0x0be17dda), TOBN(0x158b1494, 0x0b918c0f), + TOBN(0xda3a77e5, 0x522e6b69), TOBN(0x69c908c3, 0xbbcd6c18), + TOBN(0x1f1b9e48, 0xd924fd56), TOBN(0x37c64e36, 0xaa4bb3f7), + TOBN(0x5a4fdbdf, 0xee478d7d), TOBN(0xba75c8bc, 0x0193f7a0), + TOBN(0x84bc1e84, 0x56cd16df), TOBN(0x1fb08f08, 0x46fad151), + TOBN(0x8a7cabf9, 0x842e9f30), TOBN(0xa331d4bf, 0x5eab83af), + TOBN(0xd272cfba, 0x017f2a6a), TOBN(0x27560abc, 0x83aba0e3), + TOBN(0x94b83387, 0x0e3a6b75), TOBN(0x25c6aea2, 0x6b9f50f5), + TOBN(0x803d691d, 0xb5fdf6d0), TOBN(0x03b77509, 0xe6333514), + TOBN(0x36178903, 0x61a341c1), TOBN(0x3604dc60, 0x0cfd6142), + TOBN(0x022295eb, 0x8533316c), TOBN(0x3dbde4ac, 0x44af2922), + TOBN(0x898afc5d, 0x1c7eef69), TOBN(0x58896805, 0xd14f4fa1), + TOBN(0x05002160, 0x203c21ca), TOBN(0x6f0d1f30, 0x40ef730b), + TOBN(0x8e8c44d4, 0x196224f8), TOBN(0x75a4ab95, 0x374d079d), + TOBN(0x79085ecc, 0x7d48f123), TOBN(0x56f04d31, 0x1bf65ad8), + TOBN(0xe220bf1c, 0xbda602b2), TOBN(0x73ee1742, 0xf9612c69), + TOBN(0x76008fc8, 0x084fd06b), TOBN(0x4000ef9f, 0xf11380d1), + TOBN(0x48201b4b, 0x12cfe297), TOBN(0x3eee129c, 0x292f74e5), + TOBN(0xe1fe114e, 0xc9e874e8), TOBN(0x899b055c, 0x92c5fc41), + TOBN(0x4e477a64, 0x3a39c8cf), TOBN(0x82f09efe, 0x78963cc9), + TOBN(0x6fd3fd8f, 0xd333f863), TOBN(0x85132b2a, 0xdc949c63), + TOBN(0x7e06a3ab, 0x516eb17b), TOBN(0x73bec06f, 0xd2c7372b), + TOBN(0xe4f74f55, 0xba896da6), TOBN(0xbb4afef8, 0x8e9eb40f), + TOBN(0x2d75bec8, 0xe61d66b0), TOBN(0x02bda4b4, 0xef29300b), + TOBN(0x8bbaa8de, 0x026baa5a), TOBN(0xff54befd, 0xa07f4440), + TOBN(0xbd9b8b1d, 0xbe7a2af3), TOBN(0xec51caa9, 0x4fb74a72), + TOBN(0xb9937a4b, 0x63879697), TOBN(0x7c9a9d20, 0xec2687d5), + TOBN(0x1773e44f, 0x6ef5f014), TOBN(0x8abcf412, 0xe90c6900), + TOBN(0x387bd022, 0x8142161e), TOBN(0x50393755, 0xfcb6ff2a), + TOBN(0x9813fd56, 0xed6def63), TOBN(0x53cf6482, 0x7d53106c), + TOBN(0x991a35bd, 0x431f7ac1), TOBN(0xf1e274dd, 0x63e65faf), + TOBN(0xf63ffa3c, 0x44cc7880), TOBN(0x411a426b, 0x7c256981), + TOBN(0xb698b9fd, 0x93a420e0), TOBN(0x89fdddc0, 0xae53f8fe), + TOBN(0x766e0722, 0x32398baa), TOBN(0x205fee42, 0x5cfca031), + TOBN(0xa49f5341, 0x7a029cf2), TOBN(0xa88c68b8, 0x4023890d), + TOBN(0xbc275041, 0x7337aaa8), TOBN(0x9ed364ad, 0x0eb384f4), + TOBN(0xe0816f85, 0x29aba92f), TOBN(0x2e9e1941, 0x04e38a88), + TOBN(0x57eef44a, 0x3dafd2d5), TOBN(0x35d1fae5, 0x97ed98d8), + TOBN(0x50628c09, 0x2307f9b1), TOBN(0x09d84aae, 0xd6cba5c6), + TOBN(0x67071bc7, 0x88aaa691), TOBN(0x2dea57a9, 0xafe6cb03), + TOBN(0xdfe11bb4, 0x3d78ac01), TOBN(0x7286418c, 0x7fd7aa51), + TOBN(0xfabf7709, 0x77f7195a), TOBN(0x8ec86167, 0xadeb838f), + TOBN(0xea1285a8, 0xbb4f012d), TOBN(0xd6883503, 0x9a3eab3f), + TOBN(0xee5d24f8, 0x309004c2), TOBN(0xa96e4b76, 0x13ffe95e), + TOBN(0x0cdffe12, 0xbd223ea4), TOBN(0x8f5c2ee5, 0xb6739a53), + TOBN(0x5cb4aaa5, 0xdd968198), TOBN(0xfa131c52, 0x72413a6c), + TOBN(0x53d46a90, 0x9536d903), TOBN(0xb270f0d3, 0x48606d8e), + TOBN(0x518c7564, 0xa053a3bc), TOBN(0x088254b7, 0x1a86caef), + TOBN(0xb3ba8cb4, 0x0ab5efd0), TOBN(0x5c59900e, 0x4605945d), + TOBN(0xecace1dd, 0xa1887395), TOBN(0x40960f36, 0x932a65de), + TOBN(0x9611ff5c, 0x3aa95529), TOBN(0xc58215b0, 0x7c1e5a36), + TOBN(0xd48c9b58, 0xf0e1a524), TOBN(0xb406856b, 0xf590dfb8), + TOBN(0xc7605e04, 0x9cd95662), TOBN(0x0dd036ee, 0xa33ecf82), + TOBN(0xa50171ac, 0xc33156b3), TOBN(0xf09d24ea, 0x4a80172e), + TOBN(0x4e1f72c6, 0x76dc8eef), TOBN(0xe60caadc, 0x5e3d44ee), + TOBN(0x006ef8a6, 0x979b1d8f), TOBN(0x60908a1c, 0x97788d26), + TOBN(0x6e08f95b, 0x266feec0), TOBN(0x618427c2, 0x22e8c94e), + TOBN(0x3d613339, 0x59145a65), TOBN(0xcd9bc368, 0xfa406337), + TOBN(0x82d11be3, 0x2d8a52a0), TOBN(0xf6877b27, 0x97a1c590), + TOBN(0x837a819b, 0xf5cbdb25), TOBN(0x2a4fd1d8, 0xde090249), + TOBN(0x622a7de7, 0x74990e5f), TOBN(0x840fa5a0, 0x7945511b), + TOBN(0x30b974be, 0x6558842d), TOBN(0x70df8c64, 0x17f3d0a6), + TOBN(0x7c803520, 0x7542e46d), TOBN(0x7251fe7f, 0xe4ecc823), + TOBN(0xe59134cb, 0x5e9aac9a), TOBN(0x11bb0934, 0xf0045d71), + TOBN(0x53e5d9b5, 0xdbcb1d4e), TOBN(0x8d97a905, 0x92defc91), + TOBN(0xfe289327, 0x7946d3f9), TOBN(0xe132bd24, 0x07472273), + TOBN(0xeeeb510c, 0x1eb6ae86), TOBN(0x777708c5, 0xf0595067), + TOBN(0x18e2c8cd, 0x1297029e), TOBN(0x2c61095c, 0xbbf9305e), + TOBN(0xe466c258, 0x6b85d6d9), TOBN(0x8ac06c36, 0xda1ea530), + TOBN(0xa365dc39, 0xa1304668), TOBN(0xe4a9c885, 0x07f89606), + TOBN(0x65a4898f, 0xacc7228d), TOBN(0x3e2347ff, 0x84ca8303), + TOBN(0xa5f6fb77, 0xea7d23a3), TOBN(0x2fac257d, 0x672a71cd), + TOBN(0x6908bef8, 0x7e6a44d3), TOBN(0x8ff87566, 0x891d3d7a), + TOBN(0xe58e90b3, 0x6b0cf82e), TOBN(0x6438d246, 0x2615b5e7), + TOBN(0x07b1f8fc, 0x669c145a), TOBN(0xb0d8b2da, 0x36f1e1cb), + TOBN(0x54d5dadb, 0xd9184c4d), TOBN(0x3dbb18d5, 0xf93d9976), + TOBN(0x0a3e0f56, 0xd1147d47), TOBN(0x2afa8c8d, 0xa0a48609), + TOBN(0x275353e8, 0xbc36742c), TOBN(0x898f427e, 0xeea0ed90), + TOBN(0x26f4947e, 0x3e477b00), TOBN(0x8ad8848a, 0x308741e3), + TOBN(0x6c703c38, 0xd74a2a46), TOBN(0x5e3e05a9, 0x9ba17ba2), + TOBN(0xc1fa6f66, 0x4ab9a9e4), TOBN(0x474a2d9a, 0x3841d6ec), + TOBN(0x871239ad, 0x653ae326), TOBN(0x14bcf72a, 0xa74cbb43), + TOBN(0x8737650e, 0x20d4c083), TOBN(0x3df86536, 0x110ed4af), + TOBN(0xd2d86fe7, 0xb53ca555), TOBN(0x688cb00d, 0xabd5d538), + TOBN(0xcf81bda3, 0x1ad38468), TOBN(0x7ccfe3cc, 0xf01167b6), + TOBN(0xcf4f47e0, 0x6c4c1fe6), TOBN(0x557e1f1a, 0x298bbb79), + TOBN(0xf93b974f, 0x30d45a14), TOBN(0x174a1d2d, 0x0baf97c4), + TOBN(0x7a003b30, 0xc51fbf53), TOBN(0xd8940991, 0xee68b225), + TOBN(0x5b0aa7b7, 0x1c0f4173), TOBN(0x975797c9, 0xa20a7153), + TOBN(0x26e08c07, 0xe3533d77), TOBN(0xd7222e6a, 0x2e341c99), + TOBN(0x9d60ec3d, 0x8d2dc4ed), TOBN(0xbdfe0d8f, 0x7c476cf8), + TOBN(0x1fe59ab6, 0x1d056605), TOBN(0xa9ea9df6, 0x86a8551f), + TOBN(0x8489941e, 0x47fb8d8c), TOBN(0xfeb874eb, 0x4a7f1b10), + TOBN(0xfe5fea86, 0x7ee0d98f), TOBN(0x201ad34b, 0xdbf61864), + TOBN(0x45d8fe47, 0x37c031d4), TOBN(0xd5f49fae, 0x795f0822), + TOBN(0xdb0fb291, 0xc7f4a40c), TOBN(0x2e69d9c1, 0x730ddd92), + TOBN(0x754e1054, 0x49d76987), TOBN(0x8a24911d, 0x7662db87), + TOBN(0x61fc1810, 0x60a71676), TOBN(0xe852d1a8, 0xf66a8ad1), + TOBN(0x172bbd65, 0x6417231e), TOBN(0x0d6de7bd, 0x3babb11f), + TOBN(0x6fde6f88, 0xc8e347f8), TOBN(0x1c587547, 0x9bd99cc3), + TOBN(0x78e54ed0, 0x34076950), TOBN(0x97f0f334, 0x796e83ba), + TOBN(0xe4dbe1ce, 0x4924867a), TOBN(0xbd5f51b0, 0x60b84917), + TOBN(0x37530040, 0x3cb09a79), TOBN(0xdb3fe0f8, 0xff1743d8), + TOBN(0xed7894d8, 0x556fa9db), TOBN(0xfa262169, 0x23412fbf), + TOBN(0x563be0db, 0xba7b9291), TOBN(0x6ca8b8c0, 0x0c9fb234), + TOBN(0xed406aa9, 0xbd763802), TOBN(0xc21486a0, 0x65303da1), + TOBN(0x61ae291e, 0xc7e62ec4), TOBN(0x622a0492, 0xdf99333e), + TOBN(0x7fd80c9d, 0xbb7a8ee0), TOBN(0xdc2ed3bc, 0x6c01aedb), + TOBN(0x35c35a12, 0x08be74ec), TOBN(0xd540cb1a, 0x469f671f), + TOBN(0xd16ced4e, 0xcf84f6c7), TOBN(0x8561fb9c, 0x2d090f43), + TOBN(0x7e693d79, 0x6f239db4), TOBN(0xa736f928, 0x77bd0d94), + TOBN(0x07b4d929, 0x2c1950ee), TOBN(0xda177543, 0x56dc11b3), + TOBN(0xa5dfbbaa, 0x7a6a878e), TOBN(0x1c70cb29, 0x4decb08a), + TOBN(0xfba28c8b, 0x6f0f7c50), TOBN(0xa8eba2b8, 0x854dcc6d), + TOBN(0x5ff8e89a, 0x36b78642), TOBN(0x070c1c8e, 0xf6873adf), + TOBN(0xbbd3c371, 0x6484d2e4), TOBN(0xfb78318f, 0x0d414129), + TOBN(0x2621a39c, 0x6ad93b0b), TOBN(0x979d74c2, 0xa9e917f7), + TOBN(0xfc195647, 0x61fb0428), TOBN(0x4d78954a, 0xbee624d4), + TOBN(0xb94896e0, 0xb8ae86fd), TOBN(0x6667ac0c, 0xc91c8b13), + TOBN(0x9f180512, 0x43bcf832), TOBN(0xfbadf8b7, 0xa0010137), + TOBN(0xc69b4089, 0xb3ba8aa7), TOBN(0xfac4bacd, 0xe687ce85), + TOBN(0x9164088d, 0x977eab40), TOBN(0x51f4c5b6, 0x2760b390), + TOBN(0xd238238f, 0x340dd553), TOBN(0x358566c3, 0xdb1d31c9), + TOBN(0x3a5ad69e, 0x5068f5ff), TOBN(0xf31435fc, 0xdaff6b06), + TOBN(0xae549a5b, 0xd6debff0), TOBN(0x59e5f0b7, 0x75e01331), + TOBN(0x5d492fb8, 0x98559acf), TOBN(0x96018c2e, 0x4db79b50), + TOBN(0x55f4a48f, 0x609f66aa), TOBN(0x1943b3af, 0x4900a14f), + TOBN(0xc22496df, 0x15a40d39), TOBN(0xb2a44684, 0x4c20f7c5), + TOBN(0x76a35afa, 0x3b98404c), TOBN(0xbec75725, 0xff5d1b77), + TOBN(0xb67aa163, 0xbea06444), TOBN(0x27e95bb2, 0xf724b6f2), + TOBN(0x3c20e3e9, 0xd238c8ab), TOBN(0x1213754e, 0xddd6ae17), + TOBN(0x8c431020, 0x716e0f74), TOBN(0x6679c82e, 0xffc095c2), + TOBN(0x2eb3adf4, 0xd0ac2932), TOBN(0x2cc970d3, 0x01bb7a76), + TOBN(0x70c71f2f, 0x740f0e66), TOBN(0x545c616b, 0x2b6b23cc), + TOBN(0x4528cfcb, 0xb40a8bd7), TOBN(0xff839633, 0x2ab27722), + TOBN(0x049127d9, 0x025ac99a), TOBN(0xd314d4a0, 0x2b63e33b), + TOBN(0xc8c310e7, 0x28d84519), TOBN(0x0fcb8983, 0xb3bc84ba), + TOBN(0x2cc52261, 0x38634818), TOBN(0x501814f4, 0xb44c2e0b), + TOBN(0xf7e181aa, 0x54dfdba3), TOBN(0xcfd58ff0, 0xe759718c), + TOBN(0xf90cdb14, 0xd3b507a8), TOBN(0x57bd478e, 0xc50bdad8), + TOBN(0x29c197e2, 0x50e5f9aa), TOBN(0x4db6eef8, 0xe40bc855), + TOBN(0x2cc8f21a, 0xd1fc0654), TOBN(0xc71cc963, 0x81269d73), + TOBN(0xecfbb204, 0x077f49f9), TOBN(0xdde92571, 0xca56b793), + TOBN(0x9abed6a3, 0xf97ad8f7), TOBN(0xe6c19d3f, 0x924de3bd), + TOBN(0x8dce92f4, 0xa140a800), TOBN(0x85f44d1e, 0x1337af07), + TOBN(0x5953c08b, 0x09d64c52), TOBN(0xa1b5e49f, 0xf5df9749), + TOBN(0x336a8fb8, 0x52735f7d), TOBN(0xb332b6db, 0x9add676b), + TOBN(0x558b88a0, 0xb4511aa4), TOBN(0x09788752, 0xdbd5cc55), + TOBN(0x16b43b9c, 0xd8cd52bd), TOBN(0x7f0bc5a0, 0xc2a2696b), + TOBN(0x146e12d4, 0xc11f61ef), TOBN(0x9ce10754, 0x3a83e79e), + TOBN(0x08ec73d9, 0x6cbfca15), TOBN(0x09ff29ad, 0x5b49653f), + TOBN(0xe31b72bd, 0xe7da946e), TOBN(0xebf9eb3b, 0xee80a4f2), + TOBN(0xd1aabd08, 0x17598ce4), TOBN(0x18b5fef4, 0x53f37e80), + TOBN(0xd5d5cdd3, 0x5958cd79), TOBN(0x3580a1b5, 0x1d373114), + TOBN(0xa36e4c91, 0xfa935726), TOBN(0xa38c534d, 0xef20d760), + TOBN(0x7088e40a, 0x2ff5845b), TOBN(0xe5bb40bd, 0xbd78177f), + TOBN(0x4f06a7a8, 0x857f9920), TOBN(0xe3cc3e50, 0xe968f05d), + TOBN(0x1d68b7fe, 0xe5682d26), TOBN(0x5206f76f, 0xaec7f87c), + TOBN(0x41110530, 0x041951ab), TOBN(0x58ec52c1, 0xd4b5a71a), + TOBN(0xf3488f99, 0x0f75cf9a), TOBN(0xf411951f, 0xba82d0d5), + TOBN(0x27ee75be, 0x618895ab), TOBN(0xeae060d4, 0x6d8aab14), + TOBN(0x9ae1df73, 0x7fb54dc2), TOBN(0x1f3e391b, 0x25963649), + TOBN(0x242ec32a, 0xfe055081), TOBN(0x5bd450ef, 0x8491c9bd), + TOBN(0x367efc67, 0x981eb389), TOBN(0xed7e1928, 0x3a0550d5), + TOBN(0x362e776b, 0xab3ce75c), TOBN(0xe890e308, 0x1f24c523), + TOBN(0xb961b682, 0xfeccef76), TOBN(0x8b8e11f5, 0x8bba6d92), + TOBN(0x8f2ccc4c, 0x2b2375c4), TOBN(0x0d7f7a52, 0xe2f86cfa), + TOBN(0xfd94d30a, 0x9efe5633), TOBN(0x2d8d246b, 0x5451f934), + TOBN(0x2234c6e3, 0x244e6a00), TOBN(0xde2b5b0d, 0xddec8c50), + TOBN(0x2ce53c5a, 0xbf776f5b), TOBN(0x6f724071, 0x60357b05), + TOBN(0xb2593717, 0x71bf3f7a), TOBN(0x87d2501c, 0x440c4a9f), + TOBN(0x440552e1, 0x87b05340), TOBN(0xb7bf7cc8, 0x21624c32), + TOBN(0x4155a6ce, 0x22facddb), TOBN(0x5a4228cb, 0x889837ef), + TOBN(0xef87d6d6, 0xfd4fd671), TOBN(0xa233687e, 0xc2daa10e), + TOBN(0x75622244, 0x03c0eb96), TOBN(0x7632d184, 0x8bf19be6), + TOBN(0x05d0f8e9, 0x40735ff4), TOBN(0x3a3e6e13, 0xc00931f1), + TOBN(0x31ccde6a, 0xdafe3f18), TOBN(0xf381366a, 0xcfe51207), + TOBN(0x24c222a9, 0x60167d92), TOBN(0x62f9d6f8, 0x7529f18c), + TOBN(0x412397c0, 0x0353b114), TOBN(0x334d89dc, 0xef808043), + TOBN(0xd9ec63ba, 0x2a4383ce), TOBN(0xcec8e937, 0x5cf92ba0), + TOBN(0xfb8b4288, 0xc8be74c0), TOBN(0x67d6912f, 0x105d4391), + TOBN(0x7b996c46, 0x1b913149), TOBN(0x36aae2ef, 0x3a4e02da), + TOBN(0xb68aa003, 0x972de594), TOBN(0x284ec70d, 0x4ec6d545), + TOBN(0xf3d2b2d0, 0x61391d54), TOBN(0x69c5d5d6, 0xfe114e92), + TOBN(0xbe0f00b5, 0xb4482dff), TOBN(0xe1596fa5, 0xf5bf33c5), + TOBN(0x10595b56, 0x96a71cba), TOBN(0x944938b2, 0xfdcadeb7), + TOBN(0xa282da4c, 0xfccd8471), TOBN(0x98ec05f3, 0x0d37bfe1), + TOBN(0xe171ce1b, 0x0698304a), TOBN(0x2d691444, 0x21bdf79b), + TOBN(0xd0cd3b74, 0x1b21dec1), TOBN(0x712ecd8b, 0x16a15f71), + TOBN(0x8d4c00a7, 0x00fd56e1), TOBN(0x02ec9692, 0xf9527c18), + TOBN(0x21c44937, 0x4a3e42e1), TOBN(0x9176fbab, 0x1392ae0a), + TOBN(0x8726f1ba, 0x44b7b618), TOBN(0xb4d7aae9, 0xf1de491c), + TOBN(0xf91df7b9, 0x07b582c0), TOBN(0x7e116c30, 0xef60aa3a), + TOBN(0x99270f81, 0x466265d7), TOBN(0xb15b6fe2, 0x4df7adf0), + TOBN(0xfe33b2d3, 0xf9738f7f), TOBN(0x48553ab9, 0xd6d70f95), + TOBN(0x2cc72ac8, 0xc21e94db), TOBN(0x795ac38d, 0xbdc0bbee), + TOBN(0x0a1be449, 0x2e40478f), TOBN(0x81bd3394, 0x052bde55), + TOBN(0x63c8dbe9, 0x56b3c4f2), TOBN(0x017a99cf, 0x904177cc), + TOBN(0x947bbddb, 0x4d010fc1), TOBN(0xacf9b00b, 0xbb2c9b21), + TOBN(0x2970bc8d, 0x47173611), TOBN(0x1a4cbe08, 0xac7d756f), + TOBN(0x06d9f4aa, 0x67d541a2), TOBN(0xa3e8b689, 0x59c2cf44), + TOBN(0xaad066da, 0x4d88f1dd), TOBN(0xc604f165, 0x7ad35dea), + TOBN(0x7edc0720, 0x4478ca67), TOBN(0xa10dfae0, 0xba02ce06), + TOBN(0xeceb1c76, 0xaf36f4e4), TOBN(0x994b2292, 0xaf3f8f48), + TOBN(0xbf9ed77b, 0x77c8a68c), TOBN(0x74f544ea, 0x51744c9d), + TOBN(0x82d05bb9, 0x8113a757), TOBN(0x4ef2d2b4, 0x8a9885e4), + TOBN(0x1e332be5, 0x1aa7865f), TOBN(0x22b76b18, 0x290d1a52), + TOBN(0x308a2310, 0x44351683), TOBN(0x9d861896, 0xa3f22840), + TOBN(0x5959ddcd, 0x841ed947), TOBN(0x0def0c94, 0x154b73bf), + TOBN(0xf0105417, 0x4c7c15e0), TOBN(0x539bfb02, 0x3a277c32), + TOBN(0xe699268e, 0xf9dccf5f), TOBN(0x9f5796a5, 0x0247a3bd), + TOBN(0x8b839de8, 0x4f157269), TOBN(0xc825c1e5, 0x7a30196b), + TOBN(0x6ef0aabc, 0xdc8a5a91), TOBN(0xf4a8ce6c, 0x498b7fe6), + TOBN(0x1cce35a7, 0x70cbac78), TOBN(0x83488e9b, 0xf6b23958), + TOBN(0x0341a070, 0xd76cb011), TOBN(0xda6c9d06, 0xae1b2658), + TOBN(0xb701fb30, 0xdd648c52), TOBN(0x994ca02c, 0x52fb9fd1), + TOBN(0x06933117, 0x6f563086), TOBN(0x3d2b8100, 0x17856bab), + TOBN(0xe89f48c8, 0x5963a46e), TOBN(0x658ab875, 0xa99e61c7), + TOBN(0x6e296f87, 0x4b8517b4), TOBN(0x36c4fcdc, 0xfc1bc656), + TOBN(0xde5227a1, 0xa3906def), TOBN(0x9fe95f57, 0x62418945), + TOBN(0x20c91e81, 0xfdd96cde), TOBN(0x5adbe47e, 0xda4480de), + TOBN(0xa009370f, 0x396de2b6), TOBN(0x98583d4b, 0xf0ecc7bd), + TOBN(0xf44f6b57, 0xe51d0672), TOBN(0x03d6b078, 0x556b1984), + TOBN(0x27dbdd93, 0xb0b64912), TOBN(0x9b3a3434, 0x15687b09), + TOBN(0x0dba6461, 0x51ec20a9), TOBN(0xec93db7f, 0xff28187c), + TOBN(0x00ff8c24, 0x66e48bdd), TOBN(0x2514f2f9, 0x11ccd78e), + TOBN(0xeba11f4f, 0xe1250603), TOBN(0x8a22cd41, 0x243fa156), + TOBN(0xa4e58df4, 0xb283e4c6), TOBN(0x78c29859, 0x8b39783f), + TOBN(0x5235aee2, 0xa5259809), TOBN(0xc16284b5, 0x0e0227dd), + TOBN(0xa5f57916, 0x1338830d), TOBN(0x6d4b8a6b, 0xd2123fca), + TOBN(0x236ea68a, 0xf9c546f8), TOBN(0xc1d36873, 0xfa608d36), + TOBN(0xcd76e495, 0x8d436d13), TOBN(0xd4d9c221, 0x8fb080af), + TOBN(0x665c1728, 0xe8ad3fb5), TOBN(0xcf1ebe4d, 0xb3d572e0), + TOBN(0xa7a8746a, 0x584c5e20), TOBN(0x267e4ea1, 0xb9dc7035), + TOBN(0x593a15cf, 0xb9548c9b), TOBN(0x5e6e2135, 0x4bd012f3), + TOBN(0xdf31cc6a, 0x8c8f936e), TOBN(0x8af84d04, 0xb5c241dc), + TOBN(0x63990a6f, 0x345efb86), TOBN(0x6fef4e61, 0xb9b962cb)} + , + {TOBN(0xf6368f09, 0x25722608), TOBN(0x131260db, 0x131cf5c6), + TOBN(0x40eb353b, 0xfab4f7ac), TOBN(0x85c78880, 0x37eee829), + TOBN(0x4c1581ff, 0xc3bdf24e), TOBN(0x5bff75cb, 0xf5c3c5a8), + TOBN(0x35e8c83f, 0xa14e6f40), TOBN(0xb81d1c0f, 0x0295e0ca), + TOBN(0xfcde7cc8, 0xf43a730f), TOBN(0xe89b6f3c, 0x33ab590e), + TOBN(0xc823f529, 0xad03240b), TOBN(0x82b79afe, 0x98bea5db), + TOBN(0x568f2856, 0x962fe5de), TOBN(0x0c590adb, 0x60c591f3), + TOBN(0x1fc74a14, 0x4a28a858), TOBN(0x3b662498, 0xb3203f4c), + TOBN(0x91e3cf0d, 0x6c39765a), TOBN(0xa2db3acd, 0xac3cca0b), + TOBN(0x288f2f08, 0xcb953b50), TOBN(0x2414582c, 0xcf43cf1a), + TOBN(0x8dec8bbc, 0x60eee9a8), TOBN(0x54c79f02, 0x729aa042), + TOBN(0xd81cd5ec, 0x6532f5d5), TOBN(0xa672303a, 0xcf82e15f), + TOBN(0x376aafa8, 0x719c0563), TOBN(0xcd8ad2dc, 0xbc5fc79f), + TOBN(0x303fdb9f, 0xcb750cd3), TOBN(0x14ff052f, 0x4418b08e), + TOBN(0xf75084cf, 0x3e2d6520), TOBN(0x7ebdf0f8, 0x144ed509), + TOBN(0xf43bf0f2, 0xd3f25b98), TOBN(0x86ad71cf, 0xa354d837), + TOBN(0xb827fe92, 0x26f43572), TOBN(0xdfd3ab5b, 0x5d824758), + TOBN(0x315dd23a, 0x539094c1), TOBN(0x85c0e37a, 0x66623d68), + TOBN(0x575c7972, 0x7be19ae0), TOBN(0x616a3396, 0xdf0d36b5), + TOBN(0xa1ebb3c8, 0x26b1ff7e), TOBN(0x635b9485, 0x140ad453), + TOBN(0x92bf3cda, 0xda430c0b), TOBN(0x4702850e, 0x3a96dac6), + TOBN(0xc91cf0a5, 0x15ac326a), TOBN(0x95de4f49, 0xab8c25e4), + TOBN(0xb01bad09, 0xe265c17c), TOBN(0x24e45464, 0x087b3881), + TOBN(0xd43e583c, 0xe1fac5ca), TOBN(0xe17cb318, 0x6ead97a6), + TOBN(0x6cc39243, 0x74dcec46), TOBN(0x33cfc02d, 0x54c2b73f), + TOBN(0x82917844, 0xf26cd99c), TOBN(0x8819dd95, 0xd1773f89), + TOBN(0x09572aa6, 0x0871f427), TOBN(0x8e0cf365, 0xf6f01c34), + TOBN(0x7fa52988, 0xbff1f5af), TOBN(0x4eb357ea, 0xe75e8e50), + TOBN(0xd9d0c8c4, 0x868af75d), TOBN(0xd7325cff, 0x45c8c7ea), + TOBN(0xab471996, 0xcc81ecb0), TOBN(0xff5d55f3, 0x611824ed), + TOBN(0xbe314541, 0x1977a0ee), TOBN(0x5085c4c5, 0x722038c6), + TOBN(0x2d5335bf, 0xf94bb495), TOBN(0x894ad8a6, 0xc8e2a082), + TOBN(0x5c3e2341, 0xada35438), TOBN(0xf4a9fc89, 0x049b8c4e), + TOBN(0xbeeb355a, 0x9f17cf34), TOBN(0x3f311e0e, 0x6c91fe10), + TOBN(0xc2d20038, 0x92ab9891), TOBN(0x257bdcc1, 0x3e8ce9a9), + TOBN(0x1b2d9789, 0x88c53bee), TOBN(0x927ce89a, 0xcdba143a), + TOBN(0xb0a32cca, 0x523db280), TOBN(0x5c889f8a, 0x50d43783), + TOBN(0x503e04b3, 0x4897d16f), TOBN(0x8cdb6e78, 0x08f5f2e8), + TOBN(0x6ab91cf0, 0x179c8e74), TOBN(0xd8874e52, 0x48211d60), + TOBN(0xf948d4d5, 0xea851200), TOBN(0x4076d41e, 0xe6f9840a), + TOBN(0xc20e263c, 0x47b517ea), TOBN(0x79a448fd, 0x30685e5e), + TOBN(0xe55f6f78, 0xf90631a0), TOBN(0x88a790b1, 0xa79e6346), + TOBN(0x62160c7d, 0x80969fe8), TOBN(0x54f92fd4, 0x41491bb9), + TOBN(0xa6645c23, 0x5c957526), TOBN(0xf44cc5ae, 0xbea3ce7b), + TOBN(0xf7628327, 0x8b1e68b7), TOBN(0xc731ad7a, 0x303f29d3), + TOBN(0xfe5a9ca9, 0x57d03ecb), TOBN(0x96c0d50c, 0x41bc97a7), + TOBN(0xc4669fe7, 0x9b4f7f24), TOBN(0xfdd781d8, 0x3d9967ef), + TOBN(0x7892c7c3, 0x5d2c208d), TOBN(0x8bf64f7c, 0xae545cb3), + TOBN(0xc01f862c, 0x467be912), TOBN(0xf4c85ee9, 0xc73d30cc), + TOBN(0x1fa6f4be, 0x6ab83ec7), TOBN(0xa07a3c1c, 0x4e3e3cf9), + TOBN(0x87f8ef45, 0x0c00beb3), TOBN(0x30e2c2b3, 0x000d4c3e), + TOBN(0x1aa00b94, 0xfe08bf5b), TOBN(0x32c133aa, 0x9224ef52), + TOBN(0x38df16bb, 0x32e5685d), TOBN(0x68a9e069, 0x58e6f544), + TOBN(0x495aaff7, 0xcdc5ebc6), TOBN(0xf894a645, 0x378b135f), + TOBN(0xf316350a, 0x09e27ecf), TOBN(0xeced201e, 0x58f7179d), + TOBN(0x2eec273c, 0xe97861ba), TOBN(0x47ec2cae, 0xd693be2e), + TOBN(0xfa4c97c4, 0xf68367ce), TOBN(0xe4f47d0b, 0xbe5a5755), + TOBN(0x17de815d, 0xb298a979), TOBN(0xd7eca659, 0xc177dc7d), + TOBN(0x20fdbb71, 0x49ded0a3), TOBN(0x4cb2aad4, 0xfb34d3c5), + TOBN(0x2cf31d28, 0x60858a33), TOBN(0x3b6873ef, 0xa24aa40f), + TOBN(0x540234b2, 0x2c11bb37), TOBN(0x2d0366dd, 0xed4c74a3), + TOBN(0xf9a968da, 0xeec5f25d), TOBN(0x36601068, 0x67b63142), + TOBN(0x07cd6d2c, 0x68d7b6d4), TOBN(0xa8f74f09, 0x0c842942), + TOBN(0xe2751404, 0x7768b1ee), TOBN(0x4b5f7e89, 0xfe62aee4), + TOBN(0xc6a77177, 0x89070d26), TOBN(0xa1f28e4e, 0xdd1c8bc7), + TOBN(0xea5f4f06, 0x469e1f17), TOBN(0x78fc242a, 0xfbdb78e0), + TOBN(0xc9c7c592, 0x8b0588f1), TOBN(0xb6b7a0fd, 0x1535921e), + TOBN(0xcc5bdb91, 0xbde5ae35), TOBN(0xb42c485e, 0x12ff1864), + TOBN(0xa1113e13, 0xdbab98aa), TOBN(0xde9d469b, 0xa17b1024), + TOBN(0x23f48b37, 0xc0462d3a), TOBN(0x3752e537, 0x7c5c078d), + TOBN(0xe3a86add, 0x15544eb9), TOBN(0xf013aea7, 0x80fba279), + TOBN(0x8b5bb76c, 0xf22001b5), TOBN(0xe617ba14, 0xf02891ab), + TOBN(0xd39182a6, 0x936219d3), TOBN(0x5ce1f194, 0xae51cb19), + TOBN(0xc78f8598, 0xbf07a74c), TOBN(0x6d7158f2, 0x22cbf1bc), + TOBN(0x3b846b21, 0xe300ce18), TOBN(0x35fba630, 0x2d11275d), + TOBN(0x5fe25c36, 0xa0239b9b), TOBN(0xd8beb35d, 0xdf05d940), + TOBN(0x4db02bb0, 0x1f7e320d), TOBN(0x0641c364, 0x6da320ea), + TOBN(0x6d95fa5d, 0x821389a3), TOBN(0x92699748, 0x8fcd8e3d), + TOBN(0x316fef17, 0xceb6c143), TOBN(0x67fcb841, 0xd933762b), + TOBN(0xbb837e35, 0x118b17f8), TOBN(0x4b92552f, 0x9fd24821), + TOBN(0xae6bc70e, 0x46aca793), TOBN(0x1cf0b0e4, 0xe579311b), + TOBN(0x8dc631be, 0x5802f716), TOBN(0x099bdc6f, 0xbddbee4d), + TOBN(0xcc352bb2, 0x0caf8b05), TOBN(0xf74d505a, 0x72d63df2), + TOBN(0xb9876d4b, 0x91c4f408), TOBN(0x1ce18473, 0x9e229b2d), + TOBN(0x49507597, 0x83abdb4a), TOBN(0x850fbcb6, 0xdee84b18), + TOBN(0x6325236e, 0x609e67dc), TOBN(0x04d831d9, 0x9336c6d8), + TOBN(0x8deaae3b, 0xfa12d45d), TOBN(0xe425f8ce, 0x4746e246), + TOBN(0x8004c175, 0x24f5f31e), TOBN(0xaca16d8f, 0xad62c3b7), + TOBN(0x0dc15a6a, 0x9152f934), TOBN(0xf1235e5d, 0xed0e12c1), + TOBN(0xc33c06ec, 0xda477dac), TOBN(0x76be8732, 0xb2ea0006), + TOBN(0xcf3f7831, 0x0c0cd313), TOBN(0x3c524553, 0xa614260d), + TOBN(0x31a756f8, 0xcab22d15), TOBN(0x03ee10d1, 0x77827a20), + TOBN(0xd1e059b2, 0x1994ef20), TOBN(0x2a653b69, 0x638ae318), + TOBN(0x70d5eb58, 0x2f699010), TOBN(0x279739f7, 0x09f5f84a), + TOBN(0x5da4663c, 0x8b799336), TOBN(0xfdfdf14d, 0x203c37eb), + TOBN(0x32d8a9dc, 0xa1dbfb2d), TOBN(0xab40cff0, 0x77d48f9b), + TOBN(0xc018b383, 0xd20b42d5), TOBN(0xf9a810ef, 0x9f78845f), + TOBN(0x40af3753, 0xbdba9df0), TOBN(0xb90bdcfc, 0x131dfdf9), + TOBN(0x18720591, 0xf01ab782), TOBN(0xc823f211, 0x6af12a88), + TOBN(0xa51b80f3, 0x0dc14401), TOBN(0xde248f77, 0xfb2dfbe3), + TOBN(0xef5a44e5, 0x0cafe751), TOBN(0x73997c9c, 0xd4dcd221), + TOBN(0x32fd86d1, 0xde854024), TOBN(0xd5b53adc, 0xa09b84bb), + TOBN(0x008d7a11, 0xdcedd8d1), TOBN(0x406bd1c8, 0x74b32c84), + TOBN(0x5d4472ff, 0x05dde8b1), TOBN(0x2e25f2cd, 0xfce2b32f), + TOBN(0xbec0dd5e, 0x29dfc254), TOBN(0x4455fcf6, 0x2b98b267), + TOBN(0x0b4d43a5, 0xc72df2ad), TOBN(0xea70e6be, 0x48a75397), + TOBN(0x2aad6169, 0x5820f3bf), TOBN(0xf410d2dd, 0x9e37f68f), + TOBN(0x70fb7dba, 0x7be5ac83), TOBN(0x636bb645, 0x36ec3eec), + TOBN(0x27104ea3, 0x9754e21c), TOBN(0xbc87a3e6, 0x8d63c373), + TOBN(0x483351d7, 0x4109db9a), TOBN(0x0fa724e3, 0x60134da7), + TOBN(0x9ff44c29, 0xb0720b16), TOBN(0x2dd0cf13, 0x06aceead), + TOBN(0x5942758c, 0xe26929a6), TOBN(0x96c5db92, 0xb766a92b), + TOBN(0xcec7d4c0, 0x5f18395e), TOBN(0xd3f22744, 0x1f80d032), + TOBN(0x7a68b37a, 0xcb86075b), TOBN(0x074764dd, 0xafef92db), + TOBN(0xded1e950, 0x7bc7f389), TOBN(0xc580c850, 0xb9756460), + TOBN(0xaeeec2a4, 0x7da48157), TOBN(0x3f0b4e7f, 0x82c587b3), + TOBN(0x231c6de8, 0xa9f19c53), TOBN(0x5717bd73, 0x6974e34e), + TOBN(0xd9e1d216, 0xf1508fa9), TOBN(0x9f112361, 0xdadaa124), + TOBN(0x80145e31, 0x823b7348), TOBN(0x4dd8f0d5, 0xac634069), + TOBN(0xe3d82fc7, 0x2297c258), TOBN(0x276fcfee, 0x9cee7431), + TOBN(0x8eb61b5e, 0x2bc0aea9), TOBN(0x4f668fd5, 0xde329431), + TOBN(0x03a32ab1, 0x38e4b87e), TOBN(0xe1374517, 0x73d0ef0b), + TOBN(0x1a46f7e6, 0x853ac983), TOBN(0xc3bdf42e, 0x68e78a57), + TOBN(0xacf20785, 0x2ea96dd1), TOBN(0xa10649b9, 0xf1638460), + TOBN(0xf2369f0b, 0x879fbbed), TOBN(0x0ff0ae86, 0xda9d1869), + TOBN(0x5251d759, 0x56766f45), TOBN(0x4984d8c0, 0x2be8d0fc), + TOBN(0x7ecc95a6, 0xd21008f0), TOBN(0x29bd54a0, 0x3a1a1c49), + TOBN(0xab9828c5, 0xd26c50f3), TOBN(0x32c0087c, 0x51d0d251), + TOBN(0x9bac3ce6, 0x0c1cdb26), TOBN(0xcd94d947, 0x557ca205), + TOBN(0x1b1bd598, 0x9db1fdcd), TOBN(0x0eda0108, 0xa3d8b149), + TOBN(0x95066610, 0x56152fcc), TOBN(0xc2f037e6, 0xe7192b33), + TOBN(0xdeffb41a, 0xc92e05a4), TOBN(0x1105f6c2, 0xc2f6c62e), + TOBN(0x68e73500, 0x8733913c), TOBN(0xcce86163, 0x3f3adc40), + TOBN(0xf407a942, 0x38a278e9), TOBN(0xd13c1b9d, 0x2ab21292), + TOBN(0x93ed7ec7, 0x1c74cf5c), TOBN(0x8887dc48, 0xf1a4c1b4), + TOBN(0x3830ff30, 0x4b3a11f1), TOBN(0x358c5a3c, 0x58937cb6), + TOBN(0x027dc404, 0x89022829), TOBN(0x40e93977, 0x3b798f79), + TOBN(0x90ad3337, 0x38be6ead), TOBN(0x9c23f6bc, 0xf34c0a5d), + TOBN(0xd1711a35, 0xfbffd8bb), TOBN(0x60fcfb49, 0x1949d3dd), + TOBN(0x09c8ef4b, 0x7825d93a), TOBN(0x24233cff, 0xa0a8c968), + TOBN(0x67ade46c, 0xe6d982af), TOBN(0xebb6bf3e, 0xe7544d7c), + TOBN(0xd6b9ba76, 0x3d8bd087), TOBN(0x46fe382d, 0x4dc61280), + TOBN(0xbd39a7e8, 0xb5bdbd75), TOBN(0xab381331, 0xb8f228fe), + TOBN(0x0709a77c, 0xce1c4300), TOBN(0x6a247e56, 0xf337ceac), + TOBN(0x8f34f21b, 0x636288be), TOBN(0x9dfdca74, 0xc8a7c305), + TOBN(0x6decfd1b, 0xea919e04), TOBN(0xcdf2688d, 0x8e1991f8), + TOBN(0xe607df44, 0xd0f8a67e), TOBN(0xd985df4b, 0x0b58d010), + TOBN(0x57f834c5, 0x0c24f8f4), TOBN(0xe976ef56, 0xa0bf01ae), + TOBN(0x536395ac, 0xa1c32373), TOBN(0x351027aa, 0x734c0a13), + TOBN(0xd2f1b5d6, 0x5e6bd5bc), TOBN(0x2b539e24, 0x223debed), + TOBN(0xd4994cec, 0x0eaa1d71), TOBN(0x2a83381d, 0x661dcf65), + TOBN(0x5f1aed2f, 0x7b54c740), TOBN(0x0bea3fa5, 0xd6dda5ee), + TOBN(0x9d4fb684, 0x36cc6134), TOBN(0x8eb9bbf3, 0xc0a443dd), + TOBN(0xfc500e2e, 0x383b7d2a), TOBN(0x7aad621c, 0x5b775257), + TOBN(0x69284d74, 0x0a8f7cc0), TOBN(0xe820c2ce, 0x07562d65), + TOBN(0xbf9531b9, 0x499758ee), TOBN(0x73e95ca5, 0x6ee0cc2d), + TOBN(0xf61790ab, 0xfbaf50a5), TOBN(0xdf55e76b, 0x684e0750), + TOBN(0xec516da7, 0xf176b005), TOBN(0x575553bb, 0x7a2dddc7), + TOBN(0x37c87ca3, 0x553afa73), TOBN(0x315f3ffc, 0x4d55c251), + TOBN(0xe846442a, 0xaf3e5d35), TOBN(0x61b91149, 0x6495ff28), + TOBN(0x23cc95d3, 0xfa326dc3), TOBN(0x1df4da1f, 0x18fc2cea), + TOBN(0x24bf9adc, 0xd0a37d59), TOBN(0xb6710053, 0x320d6e1e), + TOBN(0x96f9667e, 0x618344d1), TOBN(0xcc7ce042, 0xa06445af), + TOBN(0xa02d8514, 0xd68dbc3a), TOBN(0x4ea109e4, 0x280b5a5b), + TOBN(0x5741a7ac, 0xb40961bf), TOBN(0x4ada5937, 0x6aa56bfa), + TOBN(0x7feb9145, 0x02b765d1), TOBN(0x561e97be, 0xe6ad1582), + TOBN(0xbbc4a5b6, 0xda3982f5), TOBN(0x0c2659ed, 0xb546f468), + TOBN(0xb8e7e6aa, 0x59612d20), TOBN(0xd83dfe20, 0xac19e8e0), + TOBN(0x8530c45f, 0xb835398c), TOBN(0x6106a8bf, 0xb38a41c2), + TOBN(0x21e8f9a6, 0x35f5dcdb), TOBN(0x39707137, 0xcae498ed), + TOBN(0x70c23834, 0xd8249f00), TOBN(0x9f14b58f, 0xab2537a0), + TOBN(0xd043c365, 0x5f61c0c2), TOBN(0xdc5926d6, 0x09a194a7), + TOBN(0xddec0339, 0x8e77738a), TOBN(0xd07a63ef, 0xfba46426), + TOBN(0x2e58e79c, 0xee7f6e86), TOBN(0xe59b0459, 0xff32d241), + TOBN(0xc5ec84e5, 0x20fa0338), TOBN(0x97939ac8, 0xeaff5ace), + TOBN(0x0310a4e3, 0xb4a38313), TOBN(0x9115fba2, 0x8f9d9885), + TOBN(0x8dd710c2, 0x5fadf8c3), TOBN(0x66be38a2, 0xce19c0e2), + TOBN(0xd42a279c, 0x4cfe5022), TOBN(0x597bb530, 0x0e24e1b8), + TOBN(0x3cde86b7, 0xc153ca7f), TOBN(0xa8d30fb3, 0x707d63bd), + TOBN(0xac905f92, 0xbd60d21e), TOBN(0x98e7ffb6, 0x7b9a54ab), + TOBN(0xd7147df8, 0xe9726a30), TOBN(0xb5e216ff, 0xafce3533), + TOBN(0xb550b799, 0x2ff1ec40), TOBN(0x6b613b87, 0xa1e953fd), + TOBN(0x87b88dba, 0x792d5610), TOBN(0x2ee1270a, 0xa190fbe1), + TOBN(0x02f4e2dc, 0x2ef581da), TOBN(0x016530e4, 0xeff82a95), + TOBN(0xcbb93dfd, 0x8fd6ee89), TOBN(0x16d3d986, 0x46848fff), + TOBN(0x600eff24, 0x1da47adf), TOBN(0x1b9754a0, 0x0ad47a71), + TOBN(0x8f9266df, 0x70c33b98), TOBN(0xaadc87ae, 0xdf34186e), + TOBN(0x0d2ce8e1, 0x4ad24132), TOBN(0x8a47cbfc, 0x19946eba), + TOBN(0x47feeb66, 0x62b5f3af), TOBN(0xcefab561, 0x0abb3734), + TOBN(0x449de60e, 0x19f35cb1), TOBN(0x39f8db14, 0x157f0eb9), + TOBN(0xffaecc5b, 0x3c61bfd6), TOBN(0xa5a4d41d, 0x41216703), + TOBN(0x7f8fabed, 0x224e1cc2), TOBN(0x0d5a8186, 0x871ad953), + TOBN(0xf10774f7, 0xd22da9a9), TOBN(0x45b8a678, 0xcc8a9b0d), + TOBN(0xd9c2e722, 0xbdc32cff), TOBN(0xbf71b5f5, 0x337202a5), + TOBN(0x95c57f2f, 0x69fc4db9), TOBN(0xb6dad34c, 0x765d01e1), + TOBN(0x7e0bd13f, 0xcb904635), TOBN(0x61751253, 0x763a588c), + TOBN(0xd85c2997, 0x81af2c2d), TOBN(0xc0f7d9c4, 0x81b9d7da), + TOBN(0x838a34ae, 0x08533e8d), TOBN(0x15c4cb08, 0x311d8311), + TOBN(0x97f83285, 0x8e121e14), TOBN(0xeea7dc1e, 0x85000a5f), + TOBN(0x0c6059b6, 0x5d256274), TOBN(0xec9beace, 0xb95075c0), + TOBN(0x173daad7, 0x1df97828), TOBN(0xbf851cb5, 0xa8937877), + TOBN(0xb083c594, 0x01646f3c), TOBN(0x3bad30cf, 0x50c6d352), + TOBN(0xfeb2b202, 0x496bbcea), TOBN(0x3cf9fd4f, 0x18a1e8ba), + TOBN(0xd26de7ff, 0x1c066029), TOBN(0x39c81e9e, 0x4e9ed4f8), + TOBN(0xd8be0cb9, 0x7b390d35), TOBN(0x01df2bbd, 0x964aab27), + TOBN(0x3e8c1a65, 0xc3ef64f8), TOBN(0x567291d1, 0x716ed1dd), + TOBN(0x95499c6c, 0x5f5406d3), TOBN(0x71fdda39, 0x5ba8e23f), + TOBN(0xcfeb320e, 0xd5096ece), TOBN(0xbe7ba92b, 0xca66dd16), + TOBN(0x4608d36b, 0xc6fb5a7d), TOBN(0xe3eea15a, 0x6d2dd0e0), + TOBN(0x75b0a3eb, 0x8f97a36a), TOBN(0xf59814cc, 0x1c83de1e), + TOBN(0x56c9c5b0, 0x1c33c23f), TOBN(0xa96c1da4, 0x6faa4136), + TOBN(0x46bf2074, 0xde316551), TOBN(0x3b866e7b, 0x1f756c8f), + TOBN(0x727727d8, 0x1495ed6b), TOBN(0xb2394243, 0xb682dce7), + TOBN(0x8ab8454e, 0x758610f3), TOBN(0xc243ce84, 0x857d72a4), + TOBN(0x7b320d71, 0xdbbf370f), TOBN(0xff9afa37, 0x78e0f7ca), + TOBN(0x0119d1e0, 0xea7b523f), TOBN(0xb997f8cb, 0x058c7d42), + TOBN(0x285bcd2a, 0x37bbb184), TOBN(0x51dcec49, 0xa45d1fa6), + TOBN(0x6ade3b64, 0xe29634cb), TOBN(0x080c94a7, 0x26b86ef1), + TOBN(0xba583db1, 0x2283fbe3), TOBN(0x902bddc8, 0x5a9315ed), + TOBN(0x07c1ccb3, 0x86964bec), TOBN(0x78f4eacf, 0xb6258301), + TOBN(0x4bdf3a49, 0x56f90823), TOBN(0xba0f5080, 0x741d777b), + TOBN(0x091d71c3, 0xf38bf760), TOBN(0x9633d50f, 0x9b625b02), + TOBN(0x03ecb743, 0xb8c9de61), TOBN(0xb4751254, 0x5de74720), + TOBN(0x9f9defc9, 0x74ce1cb2), TOBN(0x774a4f6a, 0x00bd32ef), + TOBN(0xaca385f7, 0x73848f22), TOBN(0x53dad716, 0xf3f8558e), + TOBN(0xab7b34b0, 0x93c471f9), TOBN(0xf530e069, 0x19644bc7), + TOBN(0x3d9fb1ff, 0xdd59d31a), TOBN(0x4382e0df, 0x08daa795), + TOBN(0x165c6f4b, 0xd5cc88d7), TOBN(0xeaa392d5, 0x4a18c900), + TOBN(0x94203c67, 0x648024ee), TOBN(0x188763f2, 0x8c2fabcd), + TOBN(0xa80f87ac, 0xbbaec835), TOBN(0x632c96e0, 0xf29d8d54), + TOBN(0x29b0a60e, 0x4c00a95e), TOBN(0x2ef17f40, 0xe011e9fa), + TOBN(0xf6c0e1d1, 0x15b77223), TOBN(0xaaec2c62, 0x14b04e32), + TOBN(0xd35688d8, 0x3d84e58c), TOBN(0x2af5094c, 0x958571db), + TOBN(0x4fff7e19, 0x760682a6), TOBN(0x4cb27077, 0xe39a407c), + TOBN(0x0f59c547, 0x4ff0e321), TOBN(0x169f34a6, 0x1b34c8ff), + TOBN(0x2bff1096, 0x52bc1ba7), TOBN(0xa25423b7, 0x83583544), + TOBN(0x5d55d5d5, 0x0ac8b782), TOBN(0xff6622ec, 0x2db3c892), + TOBN(0x48fce741, 0x6b8bb642), TOBN(0x31d6998c, 0x69d7e3dc), + TOBN(0xdbaf8004, 0xcadcaed0), TOBN(0x801b0142, 0xd81d053c), + TOBN(0x94b189fc, 0x59630ec6), TOBN(0x120e9934, 0xaf762c8e), + TOBN(0x53a29aa4, 0xfdc6a404), TOBN(0x19d8e01e, 0xa1909948), + TOBN(0x3cfcabf1, 0xd7e89681), TOBN(0x3321a50d, 0x4e132d37), + TOBN(0xd0496863, 0xe9a86111), TOBN(0x8c0cde61, 0x06a3bc65), + TOBN(0xaf866c49, 0xfc9f8eef), TOBN(0x2066350e, 0xff7f5141), + TOBN(0x4f8a4689, 0xe56ddfbd), TOBN(0xea1b0c07, 0xfe32983a), + TOBN(0x2b317462, 0x873cb8cb), TOBN(0x658deddc, 0x2d93229f), + TOBN(0x65efaf4d, 0x0f64ef58), TOBN(0xfe43287d, 0x730cc7a8), + TOBN(0xaebc0c72, 0x3d047d70), TOBN(0x92efa539, 0xd92d26c9), + TOBN(0x06e78457, 0x94b56526), TOBN(0x415cb80f, 0x0961002d), + TOBN(0x89e5c565, 0x76dcb10f), TOBN(0x8bbb6982, 0xff9259fe), + TOBN(0x4fe8795b, 0x9abc2668), TOBN(0xb5d4f534, 0x1e678fb1), + TOBN(0x6601f3be, 0x7b7da2b9), TOBN(0x98da59e2, 0xa13d6805), + TOBN(0x190d8ea6, 0x01799a52), TOBN(0xa20cec41, 0xb86d2952), + TOBN(0x3062ffb2, 0x7fff2a7c), TOBN(0x741b32e5, 0x79f19d37), + TOBN(0xf80d8181, 0x4eb57d47), TOBN(0x7a2d0ed4, 0x16aef06b), + TOBN(0x09735fb0, 0x1cecb588), TOBN(0x1641caaa, 0xc6061f5b)} + , + {TOBN(0x7f99824f, 0x20151427), TOBN(0x206828b6, 0x92430206), + TOBN(0xaa9097d7, 0xe1112357), TOBN(0xacf9a2f2, 0x09e414ec), + TOBN(0xdbdac9da, 0x27915356), TOBN(0x7e0734b7, 0x001efee3), + TOBN(0x54fab5bb, 0xd2b288e2), TOBN(0x4c630fc4, 0xf62dd09c), + TOBN(0x8537107a, 0x1ac2703b), TOBN(0xb49258d8, 0x6bc857b5), + TOBN(0x57df14de, 0xbcdaccd1), TOBN(0x24ab68d7, 0xc4ae8529), + TOBN(0x7ed8b5d4, 0x734e59d0), TOBN(0x5f8740c8, 0xc495cc80), + TOBN(0x84aedd5a, 0x291db9b3), TOBN(0x80b360f8, 0x4fb995be), + TOBN(0xae915f5d, 0x5fa067d1), TOBN(0x4134b57f, 0x9668960c), + TOBN(0xbd3656d6, 0xa48edaac), TOBN(0xdac1e3e4, 0xfc1d7436), + TOBN(0x674ff869, 0xd81fbb26), TOBN(0x449ed3ec, 0xb26c33d4), + TOBN(0x85138705, 0xd94203e8), TOBN(0xccde538b, 0xbeeb6f4a), + TOBN(0x55d5c68d, 0xa61a76fa), TOBN(0x598b441d, 0xca1554dc), + TOBN(0xd39923b9, 0x773b279c), TOBN(0x33331d3c, 0x36bf9efc), + TOBN(0x2d4c848e, 0x298de399), TOBN(0xcfdb8e77, 0xa1a27f56), + TOBN(0x94c855ea, 0x57b8ab70), TOBN(0xdcdb9dae, 0x6f7879ba), + TOBN(0x7bdff8c2, 0x019f2a59), TOBN(0xb3ce5bb3, 0xcb4fbc74), + TOBN(0xea907f68, 0x8a9173dd), TOBN(0x6cd3d0d3, 0x95a75439), + TOBN(0x92ecc4d6, 0xefed021c), TOBN(0x09a9f9b0, 0x6a77339a), + TOBN(0x87ca6b15, 0x7188c64a), TOBN(0x10c29968, 0x44899158), + TOBN(0x5859a229, 0xed6e82ef), TOBN(0x16f338e3, 0x65ebaf4e), + TOBN(0x0cd31387, 0x5ead67ae), TOBN(0x1c73d228, 0x54ef0bb4), + TOBN(0x4cb55131, 0x74a5c8c7), TOBN(0x01cd2970, 0x7f69ad6a), + TOBN(0xa04d00dd, 0xe966f87e), TOBN(0xd96fe447, 0x0b7b0321), + TOBN(0x342ac06e, 0x88fbd381), TOBN(0x02cd4a84, 0x5c35a493), + TOBN(0xe8fa89de, 0x54f1bbcd), TOBN(0x341d6367, 0x2575ed4c), + TOBN(0xebe357fb, 0xd238202b), TOBN(0x600b4d1a, 0xa984ead9), + TOBN(0xc35c9f44, 0x52436ea0), TOBN(0x96fe0a39, 0xa370751b), + TOBN(0x4c4f0736, 0x7f636a38), TOBN(0x9f943fb7, 0x0e76d5cb), + TOBN(0xb03510ba, 0xa8b68b8b), TOBN(0xc246780a, 0x9ed07a1f), + TOBN(0x3c051415, 0x6d549fc2), TOBN(0xc2953f31, 0x607781ca), + TOBN(0x955e2c69, 0xd8d95413), TOBN(0xb300fadc, 0x7bd282e3), + TOBN(0x81fe7b50, 0x87e9189f), TOBN(0xdb17375c, 0xf42dda27), + TOBN(0x22f7d896, 0xcf0a5904), TOBN(0xa0e57c5a, 0xebe348e6), + TOBN(0xa61011d3, 0xf40e3c80), TOBN(0xb1189321, 0x8db705c5), + TOBN(0x4ed9309e, 0x50fedec3), TOBN(0xdcf14a10, 0x4d6d5c1d), + TOBN(0x056c265b, 0x55691342), TOBN(0xe8e08504, 0x91049dc7), + TOBN(0x131329f5, 0xc9bae20a), TOBN(0x96c8b3e8, 0xd9dccdb4), + TOBN(0x8c5ff838, 0xfb4ee6b4), TOBN(0xfc5a9aeb, 0x41e8ccf0), + TOBN(0x7417b764, 0xfae050c6), TOBN(0x0953c3d7, 0x00452080), + TOBN(0x21372682, 0x38dfe7e8), TOBN(0xea417e15, 0x2bb79d4b), + TOBN(0x59641f1c, 0x76e7cf2d), TOBN(0x271e3059, 0xea0bcfcc), + TOBN(0x624c7dfd, 0x7253ecbd), TOBN(0x2f552e25, 0x4fca6186), + TOBN(0xcbf84ecd, 0x4d866e9c), TOBN(0x73967709, 0xf68d4610), + TOBN(0xa14b1163, 0xc27901b4), TOBN(0xfd9236e0, 0x899b8bf3), + TOBN(0x42b091ec, 0xcbc6da0a), TOBN(0xbb1dac6f, 0x5ad1d297), + TOBN(0x80e61d53, 0xa91cf76e), TOBN(0x4110a412, 0xd31f1ee7), + TOBN(0x2d87c3ba, 0x13efcf77), TOBN(0x1f374bb4, 0xdf450d76), + TOBN(0x5e78e2f2, 0x0d188dab), TOBN(0xe3968ed0, 0xf4b885ef), + TOBN(0x46c0568e, 0x7314570f), TOBN(0x31616338, 0x01170521), + TOBN(0x18e1e7e2, 0x4f0c8afe), TOBN(0x4caa75ff, 0xdeea78da), + TOBN(0x82db67f2, 0x7c5d8a51), TOBN(0x36a44d86, 0x6f505370), + TOBN(0xd72c5bda, 0x0333974f), TOBN(0x5db516ae, 0x27a70146), + TOBN(0x34705281, 0x210ef921), TOBN(0xbff17a8f, 0x0c9c38e5), + TOBN(0x78f4814e, 0x12476da1), TOBN(0xc1e16613, 0x33c16980), + TOBN(0x9e5b386f, 0x424d4bca), TOBN(0x4c274e87, 0xc85740de), + TOBN(0xb6a9b88d, 0x6c2f5226), TOBN(0x14d1b944, 0x550d7ca8), + TOBN(0x580c85fc, 0x1fc41709), TOBN(0xc1da368b, 0x54c6d519), + TOBN(0x2b0785ce, 0xd5113cf7), TOBN(0x0670f633, 0x5a34708f), + TOBN(0x46e23767, 0x15cc3f88), TOBN(0x1b480cfa, 0x50c72c8f), + TOBN(0x20288602, 0x4147519a), TOBN(0xd0981eac, 0x26b372f0), + TOBN(0xa9d4a7ca, 0xa785ebc8), TOBN(0xd953c50d, 0xdbdf58e9), + TOBN(0x9d6361cc, 0xfd590f8f), TOBN(0x72e9626b, 0x44e6c917), + TOBN(0x7fd96110, 0x22eb64cf), TOBN(0x863ebb7e, 0x9eb288f3), + TOBN(0x6e6ab761, 0x6aca8ee7), TOBN(0x97d10b39, 0xd7b40358), + TOBN(0x1687d377, 0x1e5feb0d), TOBN(0xc83e50e4, 0x8265a27a), + TOBN(0x8f75a9fe, 0xc954b313), TOBN(0xcc2e8f47, 0x310d1f61), + TOBN(0xf5ba81c5, 0x6557d0e0), TOBN(0x25f9680c, 0x3eaf6207), + TOBN(0xf95c6609, 0x4354080b), TOBN(0x5225bfa5, 0x7bf2fe1c), + TOBN(0xc5c004e2, 0x5c7d98fa), TOBN(0x3561bf1c, 0x019aaf60), + TOBN(0x5e6f9f17, 0xba151474), TOBN(0xdec2f934, 0xb04f6eca), + TOBN(0x64e368a1, 0x269acb1e), TOBN(0x1332d9e4, 0x0cdda493), + TOBN(0x60d6cf69, 0xdf23de05), TOBN(0x66d17da2, 0x009339a0), + TOBN(0x9fcac985, 0x0a693923), TOBN(0xbcf057fc, 0xed7c6a6d), + TOBN(0xc3c5c8c5, 0xf0b5662c), TOBN(0x25318dd8, 0xdcba4f24), + TOBN(0x60e8cb75, 0x082b69ff), TOBN(0x7c23b3ee, 0x1e728c01), + TOBN(0x15e10a0a, 0x097e4403), TOBN(0xcb3d0a86, 0x19854665), + TOBN(0x88d8e211, 0xd67d4826), TOBN(0xb39af66e, 0x0b9d2839), + TOBN(0xa5f94588, 0xbd475ca8), TOBN(0xe06b7966, 0xc077b80b), + TOBN(0xfedb1485, 0xda27c26c), TOBN(0xd290d33a, 0xfe0fd5e0), + TOBN(0xa40bcc47, 0xf34fb0fa), TOBN(0xb4760cc8, 0x1fb1ab09), + TOBN(0x8fca0993, 0xa273bfe3), TOBN(0x13e4fe07, 0xf70b213c), + TOBN(0x3bcdb992, 0xfdb05163), TOBN(0x8c484b11, 0x0c2b19b6), + TOBN(0x1acb815f, 0xaaf2e3e2), TOBN(0xc6905935, 0xb89ff1b4), + TOBN(0xb2ad6f9d, 0x586e74e1), TOBN(0x488883ad, 0x67b80484), + TOBN(0x758aa2c7, 0x369c3ddb), TOBN(0x8ab74e69, 0x9f9afd31), + TOBN(0x10fc2d28, 0x5e21beb1), TOBN(0x3484518a, 0x318c42f9), + TOBN(0x377427dc, 0x53cf40c3), TOBN(0x9de0781a, 0x391bc1d9), + TOBN(0x8faee858, 0x693807e1), TOBN(0xa3865327, 0x4e81ccc7), + TOBN(0x02c30ff2, 0x6f835b84), TOBN(0xb604437b, 0x0d3d38d4), + TOBN(0xb3fc8a98, 0x5ca1823d), TOBN(0xb82f7ec9, 0x03be0324), + TOBN(0xee36d761, 0xcf684a33), TOBN(0x5a01df0e, 0x9f29bf7d), + TOBN(0x686202f3, 0x1306583d), TOBN(0x05b10da0, 0x437c622e), + TOBN(0xbf9aaa0f, 0x076a7bc8), TOBN(0x25e94efb, 0x8f8f4e43), + TOBN(0x8a35c9b7, 0xfa3dc26d), TOBN(0xe0e5fb93, 0x96ff03c5), + TOBN(0xa77e3843, 0xebc394ce), TOBN(0xcede6595, 0x8361de60), + TOBN(0xd27c22f6, 0xa1993545), TOBN(0xab01cc36, 0x24d671ba), + TOBN(0x63fa2877, 0xa169c28e), TOBN(0x925ef904, 0x2eb08376), + TOBN(0x3b2fa3cf, 0x53aa0b32), TOBN(0xb27beb5b, 0x71c49d7a), + TOBN(0xb60e1834, 0xd105e27f), TOBN(0xd6089788, 0x4f68570d), + TOBN(0x23094ce0, 0xd6fbc2ac), TOBN(0x738037a1, 0x815ff551), + TOBN(0xda73b1bb, 0x6bef119c), TOBN(0xdcf6c430, 0xeef506ba), + TOBN(0x00e4fe7b, 0xe3ef104a), TOBN(0xebdd9a2c, 0x0a065628), + TOBN(0x853a81c3, 0x8792043e), TOBN(0x22ad6ece, 0xb3b59108), + TOBN(0x9fb813c0, 0x39cd297d), TOBN(0x8ec7e16e, 0x05bda5d9), + TOBN(0x2834797c, 0x0d104b96), TOBN(0xcc11a2e7, 0x7c511510), + TOBN(0x96ca5a53, 0x96ee6380), TOBN(0x054c8655, 0xcea38742), + TOBN(0xb5946852, 0xd54dfa7d), TOBN(0x97c422e7, 0x1f4ab207), + TOBN(0xbf907509, 0x0c22b540), TOBN(0x2cde42aa, 0xb7c267d4), + TOBN(0xba18f9ed, 0x5ab0d693), TOBN(0x3ba62aa6, 0x6e4660d9), + TOBN(0xb24bf97b, 0xab9ea96a), TOBN(0x5d039642, 0xe3b60e32), + TOBN(0x4e6a4506, 0x7c4d9bd5), TOBN(0x666c5b9e, 0x7ed4a6a4), + TOBN(0xfa3fdcd9, 0x8edbd7cc), TOBN(0x4660bb87, 0xc6ccd753), + TOBN(0x9ae90820, 0x21e6b64f), TOBN(0x8a56a713, 0xb36bfb3f), + TOBN(0xabfce096, 0x5726d47f), TOBN(0x9eed01b2, 0x0b1a9a7f), + TOBN(0x30e9cad4, 0x4eb74a37), TOBN(0x7b2524cc, 0x53e9666d), + TOBN(0x6a29683b, 0x8f4b002f), TOBN(0xc2200d7a, 0x41f4fc20), + TOBN(0xcf3af47a, 0x3a338acc), TOBN(0x6539a4fb, 0xe7128975), + TOBN(0xcec31c14, 0xc33c7fcf), TOBN(0x7eb6799b, 0xc7be322b), + TOBN(0x119ef4e9, 0x6646f623), TOBN(0x7b7a26a5, 0x54d7299b), + TOBN(0xcb37f08d, 0x403f46f2), TOBN(0x94b8fc43, 0x1a0ec0c7), + TOBN(0xbb8514e3, 0xc332142f), TOBN(0xf3ed2c33, 0xe80d2a7a), + TOBN(0x8d2080af, 0xb639126c), TOBN(0xf7b6be60, 0xe3553ade), + TOBN(0x3950aa9f, 0x1c7e2b09), TOBN(0x847ff958, 0x6410f02b), + TOBN(0x877b7cf5, 0x678a31b0), TOBN(0xd50301ae, 0x3998b620), + TOBN(0x734257c5, 0xc00fb396), TOBN(0xf9fb18a0, 0x04e672a6), + TOBN(0xff8bd8eb, 0xe8758851), TOBN(0x1e64e4c6, 0x5d99ba44), + TOBN(0x4b8eaedf, 0x7dfd93b7), TOBN(0xba2f2a98, 0x04e76b8c), + TOBN(0x7d790cba, 0xe8053433), TOBN(0xc8e725a0, 0x3d2c9585), + TOBN(0x58c5c476, 0xcdd8f5ed), TOBN(0xd106b952, 0xefa9fe1d), + TOBN(0x3c5c775b, 0x0eff13a9), TOBN(0x242442ba, 0xe057b930), + TOBN(0xe9f458d4, 0xc9b70cbd), TOBN(0x69b71448, 0xa3cdb89a), + TOBN(0x41ee46f6, 0x0e2ed742), TOBN(0x573f1045, 0x40067493), + TOBN(0xb1e154ff, 0x9d54c304), TOBN(0x2ad0436a, 0x8d3a7502), + TOBN(0xee4aaa2d, 0x431a8121), TOBN(0xcd38b3ab, 0x886f11ed), + TOBN(0x57d49ea6, 0x034a0eb7), TOBN(0xd2b773bd, 0xf7e85e58), + TOBN(0x4a559ac4, 0x9b5c1f14), TOBN(0xc444be1a, 0x3e54df2b), + TOBN(0x13aad704, 0xeda41891), TOBN(0xcd927bec, 0x5eb5c788), + TOBN(0xeb3c8516, 0xe48c8a34), TOBN(0x1b7ac812, 0x4b546669), + TOBN(0x1815f896, 0x594df8ec), TOBN(0x87c6a79c, 0x79227865), + TOBN(0xae02a2f0, 0x9b56ddbd), TOBN(0x1339b5ac, 0x8a2f1cf3), + TOBN(0xf2b569c7, 0x839dff0d), TOBN(0xb0b9e864, 0xfee9a43d), + TOBN(0x4ff8ca41, 0x77bb064e), TOBN(0x145a2812, 0xfd249f63), + TOBN(0x3ab7beac, 0xf86f689a), TOBN(0x9bafec27, 0x01d35f5e), + TOBN(0x28054c65, 0x4265aa91), TOBN(0xa4b18304, 0x035efe42), + TOBN(0x6887b0e6, 0x9639dec7), TOBN(0xf4b8f6ad, 0x3d52aea5), + TOBN(0xfb9293cc, 0x971a8a13), TOBN(0x3f159e5d, 0x4c934d07), + TOBN(0x2c50e9b1, 0x09acbc29), TOBN(0x08eb65e6, 0x7154d129), + TOBN(0x4feff589, 0x30b75c3e), TOBN(0x0bb82fe2, 0x94491c93), + TOBN(0xd8ac377a, 0x89af62bb), TOBN(0xd7b51490, 0x9685e49f), + TOBN(0xabca9a7b, 0x04497f19), TOBN(0x1b35ed0a, 0x1a7ad13f), + TOBN(0x6b601e21, 0x3ec86ed6), TOBN(0xda91fcb9, 0xce0c76f1), + TOBN(0x9e28507b, 0xd7ab27e1), TOBN(0x7c19a555, 0x63945b7b), + TOBN(0x6b43f0a1, 0xaafc9827), TOBN(0x443b4fbd, 0x3aa55b91), + TOBN(0x962b2e65, 0x6962c88f), TOBN(0x139da8d4, 0xce0db0ca), + TOBN(0xb93f05dd, 0x1b8d6c4f), TOBN(0x779cdff7, 0x180b9824), + TOBN(0xbba23fdd, 0xae57c7b7), TOBN(0x345342f2, 0x1b932522), + TOBN(0xfd9c80fe, 0x556d4aa3), TOBN(0xa03907ba, 0x6525bb61), + TOBN(0x38b010e1, 0xff218933), TOBN(0xc066b654, 0xaa52117b), + TOBN(0x8e141920, 0x94f2e6ea), TOBN(0x66a27dca, 0x0d32f2b2), + TOBN(0x69c7f993, 0x048b3717), TOBN(0xbf5a989a, 0xb178ae1c), + TOBN(0x49fa9058, 0x564f1d6b), TOBN(0x27ec6e15, 0xd31fde4e), + TOBN(0x4cce0373, 0x7276e7fc), TOBN(0x64086d79, 0x89d6bf02), + TOBN(0x5a72f046, 0x4ccdd979), TOBN(0x909c3566, 0x47775631), + TOBN(0x1c07bc6b, 0x75dd7125), TOBN(0xb4c6bc97, 0x87a0428d), + TOBN(0x507ece52, 0xfdeb6b9d), TOBN(0xfca56512, 0xb2c95432), + TOBN(0x15d97181, 0xd0e8bd06), TOBN(0x384dd317, 0xc6bb46ea), + TOBN(0x5441ea20, 0x3952b624), TOBN(0xbcf70dee, 0x4e7dc2fb), + TOBN(0x372b016e, 0x6628e8c3), TOBN(0x07a0d667, 0xb60a7522), + TOBN(0xcf05751b, 0x0a344ee2), TOBN(0x0ec09a48, 0x118bdeec), + TOBN(0x6e4b3d4e, 0xd83dce46), TOBN(0x43a6316d, 0x99d2fc6e), + TOBN(0xa99d8989, 0x56cf044c), TOBN(0x7c7f4454, 0xae3e5fb7), + TOBN(0xb2e6b121, 0xfbabbe92), TOBN(0x281850fb, 0xe1330076), + TOBN(0x093581ec, 0x97890015), TOBN(0x69b1dded, 0x75ff77f5), + TOBN(0x7cf0b18f, 0xab105105), TOBN(0x953ced31, 0xa89ccfef), + TOBN(0x3151f85f, 0xeb914009), TOBN(0x3c9f1b87, 0x88ed48ad), + TOBN(0xc9aba1a1, 0x4a7eadcb), TOBN(0x928e7501, 0x522e71cf), + TOBN(0xeaede727, 0x3a2e4f83), TOBN(0x467e10d1, 0x1ce3bbd3), + TOBN(0xf3442ac3, 0xb955dcf0), TOBN(0xba96307d, 0xd3d5e527), + TOBN(0xf763a10e, 0xfd77f474), TOBN(0x5d744bd0, 0x6a6e1ff0), + TOBN(0xd287282a, 0xa777899e), TOBN(0xe20eda8f, 0xd03f3cde), + TOBN(0x6a7e75bb, 0x50b07d31), TOBN(0x0b7e2a94, 0x6f379de4), + TOBN(0x31cb64ad, 0x19f593cf), TOBN(0x7b1a9e4f, 0x1e76ef1d), + TOBN(0xe18c9c9d, 0xb62d609c), TOBN(0x439bad6d, 0xe779a650), + TOBN(0x219d9066, 0xe032f144), TOBN(0x1db632b8, 0xe8b2ec6a), + TOBN(0xff0d0fd4, 0xfda12f78), TOBN(0x56fb4c2d, 0x2a25d265), + TOBN(0x5f4e2ee1, 0x255a03f1), TOBN(0x61cd6af2, 0xe96af176), + TOBN(0xe0317ba8, 0xd068bc97), TOBN(0x927d6bab, 0x264b988e), + TOBN(0xa18f07e0, 0xe90fb21e), TOBN(0x00fd2b80, 0xbba7fca1), + TOBN(0x20387f27, 0x95cd67b5), TOBN(0x5b89a4e7, 0xd39707f7), + TOBN(0x8f83ad3f, 0x894407ce), TOBN(0xa0025b94, 0x6c226132), + TOBN(0xc79563c7, 0xf906c13b), TOBN(0x5f548f31, 0x4e7bb025), + TOBN(0x2b4c6b8f, 0xeac6d113), TOBN(0xa67e3f9c, 0x0e813c76), + TOBN(0x3982717c, 0x3fe1f4b9), TOBN(0x58865819, 0x26d8050e), + TOBN(0x99f3640c, 0xf7f06f20), TOBN(0xdc610216, 0x2a66ebc2), + TOBN(0x52f2c175, 0x767a1e08), TOBN(0x05660e1a, 0x5999871b), + TOBN(0x6b0f1762, 0x6d3c4693), TOBN(0xf0e7d627, 0x37ed7bea), + TOBN(0xc51758c7, 0xb75b226d), TOBN(0x40a88628, 0x1f91613b), + TOBN(0x889dbaa7, 0xbbb38ce0), TOBN(0xe0404b65, 0xbddcad81), + TOBN(0xfebccd3a, 0x8bc9671f), TOBN(0xfbf9a357, 0xee1f5375), + TOBN(0x5dc169b0, 0x28f33398), TOBN(0xb07ec11d, 0x72e90f65), + TOBN(0xae7f3b4a, 0xfaab1eb1), TOBN(0xd970195e, 0x5f17538a), + TOBN(0x52b05cbe, 0x0181e640), TOBN(0xf5debd62, 0x2643313d), + TOBN(0x76148154, 0x5df31f82), TOBN(0x23e03b33, 0x3a9e13c5), + TOBN(0xff758949, 0x4fde0c1f), TOBN(0xbf8a1abe, 0xe5b6ec20), + TOBN(0x702278fb, 0x87e1db6c), TOBN(0xc447ad7a, 0x35ed658f), + TOBN(0x48d4aa38, 0x03d0ccf2), TOBN(0x80acb338, 0x819a7c03), + TOBN(0x9bc7c89e, 0x6e17cecc), TOBN(0x46736b8b, 0x03be1d82), + TOBN(0xd65d7b60, 0xc0432f96), TOBN(0xddebe7a3, 0xdeb5442f), + TOBN(0x79a25307, 0x7dff69a2), TOBN(0x37a56d94, 0x02cf3122), + TOBN(0x8bab8aed, 0xf2350d0a), TOBN(0x13c3f276, 0x037b0d9a), + TOBN(0xc664957c, 0x44c65cae), TOBN(0x88b44089, 0xc2e71a88), + TOBN(0xdb88e5a3, 0x5cb02664), TOBN(0x5d4c0bf1, 0x8686c72e), + TOBN(0xea3d9b62, 0xa682d53e), TOBN(0x9b605ef4, 0x0b2ad431), + TOBN(0x71bac202, 0xc69645d0), TOBN(0xa115f03a, 0x6a1b66e7), + TOBN(0xfe2c563a, 0x158f4dc4), TOBN(0xf715b3a0, 0x4d12a78c), + TOBN(0x8f7f0a48, 0xd413213a), TOBN(0x2035806d, 0xc04becdb), + TOBN(0xecd34a99, 0x5d8587f5), TOBN(0x4d8c3079, 0x9f6d3a71), + TOBN(0x1b2a2a67, 0x8d95a8f6), TOBN(0xc58c9d7d, 0xf2110d0d), + TOBN(0xdeee81d5, 0xcf8fba3f), TOBN(0xa42be3c0, 0x0c7cdf68), + TOBN(0x2126f742, 0xd43b5eaa), TOBN(0x054a0766, 0xdfa59b85), + TOBN(0x9d0d5e36, 0x126bfd45), TOBN(0xa1f8fbd7, 0x384f8a8f), + TOBN(0x317680f5, 0xd563fccc), TOBN(0x48ca5055, 0xf280a928), + TOBN(0xe00b81b2, 0x27b578cf), TOBN(0x10aad918, 0x2994a514), + TOBN(0xd9e07b62, 0xb7bdc953), TOBN(0x9f0f6ff2, 0x5bc086dd), + TOBN(0x09d1ccff, 0x655eee77), TOBN(0x45475f79, 0x5bef7df1), + TOBN(0x3faa28fa, 0x86f702cc), TOBN(0x92e60905, 0x0f021f07), + TOBN(0xe9e62968, 0x7f8fa8c6), TOBN(0xbd71419a, 0xf036ea2c), + TOBN(0x171ee1cc, 0x6028da9a), TOBN(0x5352fe1a, 0xc251f573), + TOBN(0xf8ff236e, 0x3fa997f4), TOBN(0xd831b6c9, 0xa5749d5f), + TOBN(0x7c872e1d, 0xe350e2c2), TOBN(0xc56240d9, 0x1e0ce403), + TOBN(0xf9deb077, 0x6974f5cb), TOBN(0x7d50ba87, 0x961c3728), + TOBN(0xd6f89426, 0x5a3a2518), TOBN(0xcf817799, 0xc6303d43), + TOBN(0x510a0471, 0x619e5696), TOBN(0xab049ff6, 0x3a5e307b), + TOBN(0xe4cdf9b0, 0xfeb13ec7), TOBN(0xd5e97117, 0x9d8ff90c), + TOBN(0xf6f64d06, 0x9afa96af), TOBN(0x00d0bf5e, 0x9d2012a2), + TOBN(0xe63f301f, 0x358bcdc0), TOBN(0x07689e99, 0x0a9d47f8), + TOBN(0x1f689e2f, 0x4f43d43a), TOBN(0x4d542a16, 0x90920904), + TOBN(0xaea293d5, 0x9ca0a707), TOBN(0xd061fe45, 0x8ac68065), + TOBN(0x1033bf1b, 0x0090008c), TOBN(0x29749558, 0xc08a6db6), + TOBN(0x74b5fc59, 0xc1d5d034), TOBN(0xf712e9f6, 0x67e215e0), + TOBN(0xfd520cbd, 0x860200e6), TOBN(0x0229acb4, 0x3ea22588), + TOBN(0x9cd1e14c, 0xfff0c82e), TOBN(0x87684b62, 0x59c69e73), + TOBN(0xda85e61c, 0x96ccb989), TOBN(0x2d5dbb02, 0xa3d06493), + TOBN(0xf22ad33a, 0xe86b173c), TOBN(0xe8e41ea5, 0xa79ff0e3), + TOBN(0x01d2d725, 0xdd0d0c10), TOBN(0x31f39088, 0x032d28f9), + TOBN(0x7b3f71e1, 0x7829839e), TOBN(0x0cf691b4, 0x4502ae58), + TOBN(0xef658dbd, 0xbefc6115), TOBN(0xa5cd6ee5, 0xb3ab5314), + TOBN(0x206c8d7b, 0x5f1d2347), TOBN(0x794645ba, 0x4cc2253a), + TOBN(0xd517d8ff, 0x58389e08), TOBN(0x4fa20dee, 0x9f847288), + TOBN(0xeba072d8, 0xd797770a), TOBN(0x7360c91d, 0xbf429e26), + TOBN(0x7200a3b3, 0x80af8279), TOBN(0x6a1c9150, 0x82dadce3), + TOBN(0x0ee6d3a7, 0xc35d8794), TOBN(0x042e6558, 0x0356bae5), + TOBN(0x9f59698d, 0x643322fd), TOBN(0x9379ae15, 0x50a61967), + TOBN(0x64b9ae62, 0xfcc9981e), TOBN(0xaed3d631, 0x6d2934c6), + TOBN(0x2454b302, 0x5e4e65eb), TOBN(0xab09f647, 0xf9950428)} + , + {TOBN(0xb2083a12, 0x22248acc), TOBN(0x1f6ec0ef, 0x3264e366), + TOBN(0x5659b704, 0x5afdee28), TOBN(0x7a823a40, 0xe6430bb5), + TOBN(0x24592a04, 0xe1900a79), TOBN(0xcde09d4a, 0xc9ee6576), + TOBN(0x52b6463f, 0x4b5ea54a), TOBN(0x1efe9ed3, 0xd3ca65a7), + TOBN(0xe27a6dbe, 0x305406dd), TOBN(0x8eb7dc7f, 0xdd5d1957), + TOBN(0xf54a6876, 0x387d4d8f), TOBN(0x9c479409, 0xc7762de4), + TOBN(0xbe4d5b5d, 0x99b30778), TOBN(0x25380c56, 0x6e793682), + TOBN(0x602d37f3, 0xdac740e3), TOBN(0x140deabe, 0x1566e4ae), + TOBN(0x4481d067, 0xafd32acf), TOBN(0xd8f0fcca, 0xe1f71ccf), + TOBN(0xd208dd0c, 0xb596f2da), TOBN(0xd049d730, 0x9aad93f9), + TOBN(0xc79f263d, 0x42ab580e), TOBN(0x09411bb1, 0x23f707b4), + TOBN(0x8cfde1ff, 0x835e0eda), TOBN(0x72707490, 0x90f03402), + TOBN(0xeaee6126, 0xc49a861e), TOBN(0x024f3b65, 0xe14f0d06), + TOBN(0x51a3f1e8, 0xc69bfc17), TOBN(0xc3c3a8e9, 0xa7686381), + TOBN(0x3400752c, 0xb103d4c8), TOBN(0x02bc4613, 0x9218b36b), + TOBN(0xc67f75eb, 0x7651504a), TOBN(0xd6848b56, 0xd02aebfa), + TOBN(0xbd9802e6, 0xc30fa92b), TOBN(0x5a70d96d, 0x9a552784), + TOBN(0x9085c4ea, 0x3f83169b), TOBN(0xfa9423bb, 0x06908228), + TOBN(0x2ffebe12, 0xfe97a5b9), TOBN(0x85da6049, 0x71b99118), + TOBN(0x9cbc2f7f, 0x63178846), TOBN(0xfd96bc70, 0x9153218e), + TOBN(0x958381db, 0x1782269b), TOBN(0xae34bf79, 0x2597e550), + TOBN(0xbb5c6064, 0x5f385153), TOBN(0x6f0e96af, 0xe3088048), + TOBN(0xbf6a0215, 0x77884456), TOBN(0xb3b5688c, 0x69310ea7), + TOBN(0x17c94295, 0x04fad2de), TOBN(0xe020f0e5, 0x17896d4d), + TOBN(0x730ba0ab, 0x0976505f), TOBN(0x567f6813, 0x095e2ec5), + TOBN(0x47062010, 0x6331ab71), TOBN(0x72cfa977, 0x41d22b9f), + TOBN(0x33e55ead, 0x8a2373da), TOBN(0xa8d0d5f4, 0x7ba45a68), + TOBN(0xba1d8f9c, 0x03029d15), TOBN(0x8f34f1cc, 0xfc55b9f3), + TOBN(0xcca4428d, 0xbbe5a1a9), TOBN(0x8187fd5f, 0x3126bd67), + TOBN(0x0036973a, 0x48105826), TOBN(0xa39b6663, 0xb8bd61a0), + TOBN(0x6d42deef, 0x2d65a808), TOBN(0x4969044f, 0x94636b19), + TOBN(0xf611ee47, 0xdd5d564c), TOBN(0x7b2f3a49, 0xd2873077), + TOBN(0x94157d45, 0x300eb294), TOBN(0x2b2a656e, 0x169c1494), + TOBN(0xc000dd76, 0xd3a47aa9), TOBN(0xa2864e4f, 0xa6243ea4), + TOBN(0x82716c47, 0xdb89842e), TOBN(0x12dfd7d7, 0x61479fb7), + TOBN(0x3b9a2c56, 0xe0b2f6dc), TOBN(0x46be862a, 0xd7f85d67), + TOBN(0x03b0d8dd, 0x0f82b214), TOBN(0x460c34f9, 0xf103cbc6), + TOBN(0xf32e5c03, 0x18d79e19), TOBN(0x8b8888ba, 0xa84117f8), + TOBN(0x8f3c37dc, 0xc0722677), TOBN(0x10d21be9, 0x1c1c0f27), + TOBN(0xd47c8468, 0xe0f7a0c6), TOBN(0x9bf02213, 0xadecc0e0), + TOBN(0x0baa7d12, 0x42b48b99), TOBN(0x1bcb665d, 0x48424096), + TOBN(0x8b847cd6, 0xebfb5cfb), TOBN(0x87c2ae56, 0x9ad4d10d), + TOBN(0xf1cbb122, 0x0de36726), TOBN(0xe7043c68, 0x3fdfbd21), + TOBN(0x4bd0826a, 0x4e79d460), TOBN(0x11f5e598, 0x4bd1a2cb), + TOBN(0x97554160, 0xb7fe7b6e), TOBN(0x7d16189a, 0x400a3fb2), + TOBN(0xd73e9bea, 0xe328ca1e), TOBN(0x0dd04b97, 0xe793d8cc), + TOBN(0xa9c83c9b, 0x506db8cc), TOBN(0x5cd47aae, 0xcf38814c), + TOBN(0x26fc430d, 0xb64b45e6), TOBN(0x079b5499, 0xd818ea84), + TOBN(0xebb01102, 0xc1c24a3b), TOBN(0xca24e568, 0x1c161c1a), + TOBN(0x103eea69, 0x36f00a4a), TOBN(0x9ad76ee8, 0x76176c7b), + TOBN(0x97451fc2, 0x538e0ff7), TOBN(0x94f89809, 0x6604b3b0), + TOBN(0x6311436e, 0x3249cfd7), TOBN(0x27b4a7bd, 0x41224f69), + TOBN(0x03b5d21a, 0xe0ac2941), TOBN(0x279b0254, 0xc2d31937), + TOBN(0x3307c052, 0xcac992d0), TOBN(0x6aa7cb92, 0xefa8b1f3), + TOBN(0x5a182580, 0x0d37c7a5), TOBN(0x13380c37, 0x342d5422), + TOBN(0x92ac2d66, 0xd5d2ef92), TOBN(0x035a70c9, 0x030c63c6), + TOBN(0xc16025dd, 0x4ce4f152), TOBN(0x1f419a71, 0xf9df7c06), + TOBN(0x6d5b2214, 0x91e4bb14), TOBN(0xfc43c6cc, 0x839fb4ce), + TOBN(0x49f06591, 0x925d6b2d), TOBN(0x4b37d9d3, 0x62186598), + TOBN(0x8c54a971, 0xd01b1629), TOBN(0xe1a9c29f, 0x51d50e05), + TOBN(0x5109b785, 0x71ba1861), TOBN(0x48b22d5c, 0xd0c8f93d), + TOBN(0xe8fa84a7, 0x8633bb93), TOBN(0x53fba6ba, 0x5aebbd08), + TOBN(0x7ff27df3, 0xe5eea7d8), TOBN(0x521c8796, 0x68ca7158), + TOBN(0xb9d5133b, 0xce6f1a05), TOBN(0x2d50cd53, 0xfd0ebee4), + TOBN(0xc82115d6, 0xc5a3ef16), TOBN(0x993eff9d, 0xba079221), + TOBN(0xe4da2c5e, 0x4b5da81c), TOBN(0x9a89dbdb, 0x8033fd85), + TOBN(0x60819ebf, 0x2b892891), TOBN(0x53902b21, 0x5d14a4d5), + TOBN(0x6ac35051, 0xd7fda421), TOBN(0xcc6ab885, 0x61c83284), + TOBN(0x14eba133, 0xf74cff17), TOBN(0x240aaa03, 0xecb813f2), + TOBN(0xcfbb6540, 0x6f665bee), TOBN(0x084b1fe4, 0xa425ad73), + TOBN(0x009d5d16, 0xd081f6a6), TOBN(0x35304fe8, 0xeef82c90), + TOBN(0xf20346d5, 0xaa9eaa22), TOBN(0x0ada9f07, 0xac1c91e3), + TOBN(0xa6e21678, 0x968a6144), TOBN(0x54c1f77c, 0x07b31a1e), + TOBN(0xd6bb787e, 0x5781fbe1), TOBN(0x61bd2ee0, 0xe31f1c4a), + TOBN(0xf25aa1e9, 0x781105fc), TOBN(0x9cf2971f, 0x7b2f8e80), + TOBN(0x26d15412, 0xcdff919b), TOBN(0x01db4ebe, 0x34bc896e), + TOBN(0x7d9b3e23, 0xb40df1cf), TOBN(0x59337373, 0x94e971b4), + TOBN(0xbf57bd14, 0x669cf921), TOBN(0x865daedf, 0x0c1a1064), + TOBN(0x3eb70bd3, 0x83279125), TOBN(0xbc3d5b9f, 0x34ecdaab), + TOBN(0x91e3ed7e, 0x5f755caf), TOBN(0x49699f54, 0xd41e6f02), + TOBN(0x185770e1, 0xd4a7a15b), TOBN(0x08f3587a, 0xeaac87e7), + TOBN(0x352018db, 0x473133ea), TOBN(0x674ce719, 0x04fd30fc), + TOBN(0x7b8d9835, 0x088b3e0e), TOBN(0x7a0356a9, 0x5d0d47a1), + TOBN(0x9d9e7659, 0x6474a3c4), TOBN(0x61ea48a7, 0xff66966c), + TOBN(0x30417758, 0x0f3e4834), TOBN(0xfdbb21c2, 0x17a9afcb), + TOBN(0x756fa17f, 0x2f9a67b3), TOBN(0x2a6b2421, 0xa245c1a8), + TOBN(0x64be2794, 0x4af02291), TOBN(0xade465c6, 0x2a5804fe), + TOBN(0x8dffbd39, 0xa6f08fd7), TOBN(0xc4efa84c, 0xaa14403b), + TOBN(0xa1b91b2a, 0x442b0f5c), TOBN(0xb748e317, 0xcf997736), + TOBN(0x8d1b62bf, 0xcee90e16), TOBN(0x907ae271, 0x0b2078c0), + TOBN(0xdf31534b, 0x0c9bcddd), TOBN(0x043fb054, 0x39adce83), + TOBN(0x99031043, 0xd826846a), TOBN(0x61a9c0d6, 0xb144f393), + TOBN(0xdab48046, 0x47718427), TOBN(0xdf17ff9b, 0x6e830f8b), + TOBN(0x408d7ee8, 0xe49a1347), TOBN(0x6ac71e23, 0x91c1d4ae), + TOBN(0xc8cbb9fd, 0x1defd73c), TOBN(0x19840657, 0xbbbbfec5), + TOBN(0x39db1cb5, 0x9e7ef8ea), TOBN(0x78aa8296, 0x64105f30), + TOBN(0xa3d9b7f0, 0xa3738c29), TOBN(0x0a2f235a, 0xbc3250a3), + TOBN(0x55e506f6, 0x445e4caf), TOBN(0x0974f73d, 0x33475f7a), + TOBN(0xd37dbba3, 0x5ba2f5a8), TOBN(0x542c6e63, 0x6af40066), + TOBN(0x26d99b53, 0xc5d73e2c), TOBN(0x06060d7d, 0x6c3ca33e), + TOBN(0xcdbef1c2, 0x065fef4a), TOBN(0x77e60f7d, 0xfd5b92e3), + TOBN(0xd7c549f0, 0x26708350), TOBN(0x201b3ad0, 0x34f121bf), + TOBN(0x5fcac2a1, 0x0334fc14), TOBN(0x8a9a9e09, 0x344552f6), + TOBN(0x7dd8a1d3, 0x97653082), TOBN(0x5fc0738f, 0x79d4f289), + TOBN(0x787d244d, 0x17d2d8c3), TOBN(0xeffc6345, 0x70830684), + TOBN(0x5ddb96dd, 0xe4f73ae5), TOBN(0x8efb14b1, 0x172549a5), + TOBN(0x6eb73eee, 0x2245ae7a), TOBN(0xbca4061e, 0xea11f13e), + TOBN(0xb577421d, 0x30b01f5d), TOBN(0xaa688b24, 0x782e152c), + TOBN(0x67608e71, 0xbd3502ba), TOBN(0x4ef41f24, 0xb4de75a0), + TOBN(0xb08dde5e, 0xfd6125e5), TOBN(0xde484825, 0xa409543f), + TOBN(0x1f198d98, 0x65cc2295), TOBN(0x428a3771, 0x6e0edfa2), + TOBN(0x4f9697a2, 0xadf35fc7), TOBN(0x01a43c79, 0xf7cac3c7), + TOBN(0xb05d7059, 0x0fd3659a), TOBN(0x8927f30c, 0xbb7f2d9a), + TOBN(0x4023d1ac, 0x8cf984d3), TOBN(0x32125ed3, 0x02897a45), + TOBN(0xfb572dad, 0x3d414205), TOBN(0x73000ef2, 0xe3fa82a9), + TOBN(0x4c0868e9, 0xf10a5581), TOBN(0x5b61fc67, 0x6b0b3ca5), + TOBN(0xc1258d5b, 0x7cae440c), TOBN(0x21c08b41, 0x402b7531), + TOBN(0xf61a8955, 0xde932321), TOBN(0x3568faf8, 0x2d1408af), + TOBN(0x71b15e99, 0x9ecf965b), TOBN(0xf14ed248, 0xe917276f), + TOBN(0xc6f4caa1, 0x820cf9e2), TOBN(0x681b20b2, 0x18d83c7e), + TOBN(0x6cde738d, 0xc6c01120), TOBN(0x71db0813, 0xae70e0db), + TOBN(0x95fc0644, 0x74afe18c), TOBN(0x34619053, 0x129e2be7), + TOBN(0x80615cea, 0xdb2a3b15), TOBN(0x0a49a19e, 0xdb4c7073), + TOBN(0x0e1b84c8, 0x8fd2d367), TOBN(0xd74bf462, 0x033fb8aa), + TOBN(0x889f6d65, 0x533ef217), TOBN(0x7158c7e4, 0xc3ca2e87), + TOBN(0xfb670dfb, 0xdc2b4167), TOBN(0x75910a01, 0x844c257f), + TOBN(0xf336bf07, 0xcf88577d), TOBN(0x22245250, 0xe45e2ace), + TOBN(0x2ed92e8d, 0x7ca23d85), TOBN(0x29f8be4c, 0x2b812f58), + TOBN(0xdd9ebaa7, 0x076fe12b), TOBN(0x3f2400cb, 0xae1537f9), + TOBN(0x1aa93528, 0x17bdfb46), TOBN(0xc0f98430, 0x67883b41), + TOBN(0x5590ede1, 0x0170911d), TOBN(0x7562f5bb, 0x34d4b17f), + TOBN(0xe1fa1df2, 0x1826b8d2), TOBN(0xb40b796a, 0x6bd80d59), + TOBN(0xd65bf197, 0x3467ba92), TOBN(0x8c9b46db, 0xf70954b0), + TOBN(0x97c8a0f3, 0x0e78f15d), TOBN(0xa8f3a69a, 0x85a4c961), + TOBN(0x4242660f, 0x61e4ce9b), TOBN(0xbf06aab3, 0x6ea6790c), + TOBN(0xc6706f8e, 0xec986416), TOBN(0x9e56dec1, 0x9a9fc225), + TOBN(0x527c46f4, 0x9a9898d9), TOBN(0xd799e77b, 0x5633cdef), + TOBN(0x24eacc16, 0x7d9e4297), TOBN(0xabb61cea, 0x6b1cb734), + TOBN(0xbee2e8a7, 0xf778443c), TOBN(0x3bb42bf1, 0x29de2fe6), + TOBN(0xcbed86a1, 0x3003bb6f), TOBN(0xd3918e6c, 0xd781cdf6), + TOBN(0x4bee3271, 0x9a5103f1), TOBN(0x5243efc6, 0xf50eac06), + TOBN(0xb8e122cb, 0x6adcc119), TOBN(0x1b7faa84, 0xc0b80a08), + TOBN(0x32c3d1bd, 0x6dfcd08c), TOBN(0x129dec4e, 0x0be427de), + TOBN(0x98ab679c, 0x1d263c83), TOBN(0xafc83cb7, 0xcef64eff), + TOBN(0x85eb6088, 0x2fa6be76), TOBN(0x892585fb, 0x1328cbfe), + TOBN(0xc154d3ed, 0xcf618dda), TOBN(0xc44f601b, 0x3abaf26e), + TOBN(0x7bf57d0b, 0x2be1fdfd), TOBN(0xa833bd2d, 0x21137fee), + TOBN(0x9353af36, 0x2db591a8), TOBN(0xc76f26dc, 0x5562a056), + TOBN(0x1d87e47d, 0x3fdf5a51), TOBN(0x7afb5f93, 0x55c9cab0), + TOBN(0x91bbf58f, 0x89e0586e), TOBN(0x7c72c018, 0x0d843709), + TOBN(0xa9a5aafb, 0x99b5c3dc), TOBN(0xa48a0f1d, 0x3844aeb0), + TOBN(0x7178b7dd, 0xb667e482), TOBN(0x453985e9, 0x6e23a59a), + TOBN(0x4a54c860, 0x01b25dd8), TOBN(0x0dd37f48, 0xfb897c8a), + TOBN(0x5f8aa610, 0x0ea90cd9), TOBN(0xc8892c68, 0x16d5830d), + TOBN(0xeb4befc0, 0xef514ca5), TOBN(0x478eb679, 0xe72c9ee6), + TOBN(0x9bca20da, 0xdbc40d5f), TOBN(0xf015de21, 0xdde4f64a), + TOBN(0xaa6a4de0, 0xeaf4b8a5), TOBN(0x68cfd9ca, 0x4bc60e32), + TOBN(0x668a4b01, 0x7fd15e70), TOBN(0xd9f0694a, 0xf27dc09d), + TOBN(0xf6c3cad5, 0xba708bcd), TOBN(0x5cd2ba69, 0x5bb95c2a), + TOBN(0xaa28c1d3, 0x33c0a58f), TOBN(0x23e274e3, 0xabc77870), + TOBN(0x44c3692d, 0xdfd20a4a), TOBN(0x091c5fd3, 0x81a66653), + TOBN(0x6c0bb691, 0x09a0757d), TOBN(0x9072e8b9, 0x667343ea), + TOBN(0x31d40eb0, 0x80848bec), TOBN(0x95bd480a, 0x79fd36cc), + TOBN(0x01a77c61, 0x65ed43f5), TOBN(0xafccd127, 0x2e0d40bf), + TOBN(0xeccfc82d, 0x1cc1884b), TOBN(0xc85ac201, 0x5d4753b4), + TOBN(0xc7a6caac, 0x658e099f), TOBN(0xcf46369e, 0x04b27390), + TOBN(0xe2e7d049, 0x506467ea), TOBN(0x481b63a2, 0x37cdeccc), + TOBN(0x4029abd8, 0xed80143a), TOBN(0x28bfe3c7, 0xbcb00b88), + TOBN(0x3bec1009, 0x0643d84a), TOBN(0x885f3668, 0xabd11041), + TOBN(0xdb02432c, 0xf83a34d6), TOBN(0x32f7b360, 0x719ceebe), + TOBN(0xf06c7837, 0xdad1fe7a), TOBN(0x60a157a9, 0x5441a0b0), + TOBN(0x704970e9, 0xe2d47550), TOBN(0xcd2bd553, 0x271b9020), + TOBN(0xff57f82f, 0x33e24a0b), TOBN(0x9cbee23f, 0xf2565079), + TOBN(0x16353427, 0xeb5f5825), TOBN(0x276feec4, 0xe948d662), + TOBN(0xd1b62bc6, 0xda10032b), TOBN(0x718351dd, 0xf0e72a53), + TOBN(0x93452076, 0x2420e7ba), TOBN(0x96368fff, 0x3a00118d), + TOBN(0x00ce2d26, 0x150a49e4), TOBN(0x0c28b636, 0x3f04706b), + TOBN(0xbad65a46, 0x58b196d0), TOBN(0x6c8455fc, 0xec9f8b7c), + TOBN(0xe90c895f, 0x2d71867e), TOBN(0x5c0be31b, 0xedf9f38c), + TOBN(0x2a37a15e, 0xd8f6ec04), TOBN(0x239639e7, 0x8cd85251), + TOBN(0xd8975315, 0x9c7c4c6b), TOBN(0x603aa3c0, 0xd7409af7), + TOBN(0xb8d53d0c, 0x007132fb), TOBN(0x68d12af7, 0xa6849238), + TOBN(0xbe0607e7, 0xbf5d9279), TOBN(0x9aa50055, 0xaada74ce), + TOBN(0xe81079cb, 0xba7e8ccb), TOBN(0x610c71d1, 0xa5f4ff5e), + TOBN(0x9e2ee1a7, 0x5aa07093), TOBN(0xca84004b, 0xa75da47c), + TOBN(0x074d3951, 0x3de75401), TOBN(0xf938f756, 0xbb311592), + TOBN(0x96197618, 0x00a43421), TOBN(0x39a25362, 0x07bc78c8), + TOBN(0x278f710a, 0x0a171276), TOBN(0xb28446ea, 0x8d1a8f08), + TOBN(0x184781bf, 0xe3b6a661), TOBN(0x7751cb1d, 0xe6d279f7), + TOBN(0xf8ff95d6, 0xc59eb662), TOBN(0x186d90b7, 0x58d3dea7), + TOBN(0x0e4bb6c1, 0xdfb4f754), TOBN(0x5c5cf56b, 0x2b2801dc), + TOBN(0xc561e452, 0x1f54564d), TOBN(0xb4fb8c60, 0xf0dd7f13), + TOBN(0xf8849630, 0x33ff98c7), TOBN(0x9619fffa, 0xcf17769c), + TOBN(0xf8090bf6, 0x1bfdd80a), TOBN(0x14d9a149, 0x422cfe63), + TOBN(0xb354c360, 0x6f6df9ea), TOBN(0xdbcf770d, 0x218f17ea), + TOBN(0x207db7c8, 0x79eb3480), TOBN(0x213dbda8, 0x559b6a26), + TOBN(0xac4c200b, 0x29fc81b3), TOBN(0xebc3e09f, 0x171d87c1), + TOBN(0x91799530, 0x1481aa9e), TOBN(0x051b92e1, 0x92e114fa), + TOBN(0xdf8f92e9, 0xecb5537f), TOBN(0x44b1b2cc, 0x290c7483), + TOBN(0xa711455a, 0x2adeb016), TOBN(0x964b6856, 0x81a10c2c), + TOBN(0x4f159d99, 0xcec03623), TOBN(0x05532225, 0xef3271ea), + TOBN(0xb231bea3, 0xc5ee4849), TOBN(0x57a54f50, 0x7094f103), + TOBN(0x3e2d421d, 0x9598b352), TOBN(0xe865a49c, 0x67412ab4), + TOBN(0xd2998a25, 0x1cc3a912), TOBN(0x5d092808, 0x0c74d65d), + TOBN(0x73f45908, 0x4088567a), TOBN(0xeb6b280e, 0x1f214a61), + TOBN(0x8c9adc34, 0xcaf0c13d), TOBN(0x39d12938, 0xf561fb80), + TOBN(0xb2dc3a5e, 0xbc6edfb4), TOBN(0x7485b1b1, 0xfe4d210e), + TOBN(0x062e0400, 0xe186ae72), TOBN(0x91e32d5c, 0x6eeb3b88), + TOBN(0x6df574d7, 0x4be59224), TOBN(0xebc88ccc, 0x716d55f3), + TOBN(0x26c2e6d0, 0xcad6ed33), TOBN(0xc6e21e7d, 0x0d3e8b10), + TOBN(0x2cc5840e, 0x5bcc36bb), TOBN(0x9292445e, 0x7da74f69), + TOBN(0x8be8d321, 0x4e5193a8), TOBN(0x3ec23629, 0x8df06413), + TOBN(0xc7e9ae85, 0xb134defa), TOBN(0x6073b1d0, 0x1bb2d475), + TOBN(0xb9ad615e, 0x2863c00d), TOBN(0x9e29493d, 0x525f4ac4), + TOBN(0xc32b1dea, 0x4e9acf4f), TOBN(0x3e1f01c8, 0xa50db88d), + TOBN(0xb05d70ea, 0x04da916c), TOBN(0x714b0d0a, 0xd865803e), + TOBN(0x4bd493fc, 0x9920cb5e), TOBN(0x5b44b1f7, 0x92c7a3ac), + TOBN(0xa2a77293, 0xbcec9235), TOBN(0x5ee06e87, 0xcd378553), + TOBN(0xceff8173, 0xda621607), TOBN(0x2bb03e4c, 0x99f5d290), + TOBN(0x2945106a, 0xa6f734ac), TOBN(0xb5056604, 0xd25c4732), + TOBN(0x5945920c, 0xe079afee), TOBN(0x686e17a0, 0x6789831f), + TOBN(0x5966bee8, 0xb74a5ae5), TOBN(0x38a673a2, 0x1e258d46), + TOBN(0xbd1cc1f2, 0x83141c95), TOBN(0x3b2ecf4f, 0x0e96e486), + TOBN(0xcd3aa896, 0x74e5fc78), TOBN(0x415ec10c, 0x2482fa7a), + TOBN(0x15234419, 0x80503380), TOBN(0x513d917a, 0xd314b392), + TOBN(0xb0b52f4e, 0x63caecae), TOBN(0x07bf22ad, 0x2dc7780b), + TOBN(0xe761e8a1, 0xe4306839), TOBN(0x1b3be962, 0x5dd7feaa), + TOBN(0x4fe728de, 0x74c778f1), TOBN(0xf1fa0bda, 0x5e0070f6), + TOBN(0x85205a31, 0x6ec3f510), TOBN(0x2c7e4a14, 0xd2980475), + TOBN(0xde3c19c0, 0x6f30ebfd), TOBN(0xdb1c1f38, 0xd4b7e644), + TOBN(0xfe291a75, 0x5dce364a), TOBN(0xb7b22a3c, 0x058f5be3), + TOBN(0x2cd2c302, 0x37fea38c), TOBN(0x2930967a, 0x2e17be17), + TOBN(0x87f009de, 0x0c061c65), TOBN(0xcb014aac, 0xedc6ed44), + TOBN(0x49bd1cb4, 0x3bafb1eb), TOBN(0x81bd8b5c, 0x282d3688), + TOBN(0x1cdab87e, 0xf01a17af), TOBN(0x21f37ac4, 0xe710063b), + TOBN(0x5a6c5676, 0x42fc8193), TOBN(0xf4753e70, 0x56a6015c), + TOBN(0x020f795e, 0xa15b0a44), TOBN(0x8f37c8d7, 0x8958a958), + TOBN(0x63b7e89b, 0xa4b675b5), TOBN(0xb4fb0c0c, 0x0fc31aea), + TOBN(0xed95e639, 0xa7ff1f2e), TOBN(0x9880f5a3, 0x619614fb), + TOBN(0xdeb6ff02, 0x947151ab), TOBN(0x5bc5118c, 0xa868dcdb), + TOBN(0xd8da2055, 0x4c20cea5), TOBN(0xcac2776e, 0x14c4d69a), + TOBN(0xcccb22c1, 0x622d599b), TOBN(0xa4ddb653, 0x68a9bb50), + TOBN(0x2c4ff151, 0x1b4941b4), TOBN(0xe1ff19b4, 0x6efba588), + TOBN(0x35034363, 0xc48345e0), TOBN(0x45542e3d, 0x1e29dfc4), + TOBN(0xf197cb91, 0x349f7aed), TOBN(0x3b2b5a00, 0x8fca8420), + TOBN(0x7c175ee8, 0x23aaf6d8), TOBN(0x54dcf421, 0x35af32b6), + TOBN(0x0ba14307, 0x27d6561e), TOBN(0x879d5ee4, 0xd175b1e2), + TOBN(0xc7c43673, 0x99807db5), TOBN(0x77a54455, 0x9cd55bcd), + TOBN(0xe6c2ff13, 0x0105c072), TOBN(0x18f7a99f, 0x8dda7da4), + TOBN(0x4c301820, 0x0e2d35c1), TOBN(0x06a53ca0, 0xd9cc6c82), + TOBN(0xaa21cc1e, 0xf1aa1d9e), TOBN(0x32414334, 0x4a75b1e8), + TOBN(0x2a6d1328, 0x0ebe9fdc), TOBN(0x16bd173f, 0x98a4755a), + TOBN(0xfbb9b245, 0x2133ffd9), TOBN(0x39a8b2f1, 0x830f1a20), + TOBN(0x484bc97d, 0xd5a1f52a), TOBN(0xd6aebf56, 0xa40eddf8), + TOBN(0x32257acb, 0x76ccdac6), TOBN(0xaf4d36ec, 0x1586ff27), + TOBN(0x8eaa8863, 0xf8de7dd1), TOBN(0x0045d5cf, 0x88647c16)} + , + {TOBN(0xa6f3d574, 0xc005979d), TOBN(0xc2072b42, 0x6a40e350), + TOBN(0xfca5c156, 0x8de2ecf9), TOBN(0xa8c8bf5b, 0xa515344e), + TOBN(0x97aee555, 0x114df14a), TOBN(0xd4374a4d, 0xfdc5ec6b), + TOBN(0x754cc28f, 0x2ca85418), TOBN(0x71cb9e27, 0xd3c41f78), + TOBN(0x89105079, 0x03605c39), TOBN(0xf0843d9e, 0xa142c96c), + TOBN(0xf3744934, 0x16923684), TOBN(0x732caa2f, 0xfa0a2893), + TOBN(0xb2e8c270, 0x61160170), TOBN(0xc32788cc, 0x437fbaa3), + TOBN(0x39cd818e, 0xa6eda3ac), TOBN(0xe2e94239, 0x9e2b2e07), + TOBN(0x6967d39b, 0x0260e52a), TOBN(0xd42585cc, 0x90653325), + TOBN(0x0d9bd605, 0x21ca7954), TOBN(0x4fa20877, 0x81ed57b3), + TOBN(0x60c1eff8, 0xe34a0bbe), TOBN(0x56b0040c, 0x84f6ef64), + TOBN(0x28be2b24, 0xb1af8483), TOBN(0xb2278163, 0xf5531614), + TOBN(0x8df27545, 0x5922ac1c), TOBN(0xa7b3ef5c, 0xa52b3f63), + TOBN(0x8e77b214, 0x71de57c4), TOBN(0x31682c10, 0x834c008b), + TOBN(0xc76824f0, 0x4bd55d31), TOBN(0xb6d1c086, 0x17b61c71), + TOBN(0x31db0903, 0xc2a5089d), TOBN(0x9c092172, 0x184e5d3f), + TOBN(0xdd7ced5b, 0xc00cc638), TOBN(0x1a2015eb, 0x61278fc2), + TOBN(0x2e8e5288, 0x6a37f8d6), TOBN(0xc457786f, 0xe79933ad), + TOBN(0xb3fe4cce, 0x2c51211a), TOBN(0xad9b10b2, 0x24c20498), + TOBN(0x90d87a4f, 0xd28db5e5), TOBN(0x698cd105, 0x3aca2fc3), + TOBN(0x4f112d07, 0xe91b536d), TOBN(0xceb982f2, 0x9eba09d6), + TOBN(0x3c157b2c, 0x197c396f), TOBN(0xe23c2d41, 0x7b66eb24), + TOBN(0x480c57d9, 0x3f330d37), TOBN(0xb3a4c8a1, 0x79108deb), + TOBN(0x702388de, 0xcb199ce5), TOBN(0x0b019211, 0xb944a8d4), + TOBN(0x24f2a692, 0x840bb336), TOBN(0x7c353bdc, 0xa669fa7b), + TOBN(0xda20d6fc, 0xdec9c300), TOBN(0x625fbe2f, 0xa13a4f17), + TOBN(0xa2b1b61a, 0xdbc17328), TOBN(0x008965bf, 0xa9515621), + TOBN(0x49690939, 0xc620ff46), TOBN(0x182dd27d, 0x8717e91c), + TOBN(0x5ace5035, 0xea6c3997), TOBN(0x54259aaa, 0xc2610bef), + TOBN(0xef18bb3f, 0x3c80dd39), TOBN(0x6910b95b, 0x5fc3fa39), + TOBN(0xfce2f510, 0x43e09aee), TOBN(0xced56c9f, 0xa7675665), + TOBN(0x10e265ac, 0xd872db61), TOBN(0x6982812e, 0xae9fce69), + TOBN(0x29be11c6, 0xce800998), TOBN(0x72bb1752, 0xb90360d9), + TOBN(0x2c193197, 0x5a4ad590), TOBN(0x2ba2f548, 0x9fc1dbc0), + TOBN(0x7fe4eebb, 0xe490ebe0), TOBN(0x12a0a4cd, 0x7fae11c0), + TOBN(0x7197cf81, 0xe903ba37), TOBN(0xcf7d4aa8, 0xde1c6dd8), + TOBN(0x92af6bf4, 0x3fd5684c), TOBN(0x2b26eecf, 0x80360aa1), + TOBN(0xbd960f30, 0x00546a82), TOBN(0x407b3c43, 0xf59ad8fe), + TOBN(0x86cae5fe, 0x249c82ba), TOBN(0x9e0faec7, 0x2463744c), + TOBN(0x87f551e8, 0x94916272), TOBN(0x033f9344, 0x6ceb0615), + TOBN(0x1e5eb0d1, 0x8be82e84), TOBN(0x89967f0e, 0x7a582fef), + TOBN(0xbcf687d5, 0xa6e921fa), TOBN(0xdfee4cf3, 0xd37a09ba), + TOBN(0x94f06965, 0xb493c465), TOBN(0x638b9a1c, 0x7635c030), + TOBN(0x76667864, 0x66f05e9f), TOBN(0xccaf6808, 0xc04da725), + TOBN(0xca2eb690, 0x768fccfc), TOBN(0xf402d37d, 0xb835b362), + TOBN(0x0efac0d0, 0xe2fdfcce), TOBN(0xefc9cdef, 0xb638d990), + TOBN(0x2af12b72, 0xd1669a8b), TOBN(0x33c536bc, 0x5774ccbd), + TOBN(0x30b21909, 0xfb34870e), TOBN(0xc38fa2f7, 0x7df25aca), + TOBN(0x74c5f02b, 0xbf81f3f5), TOBN(0x0525a5ae, 0xaf7e4581), + TOBN(0x88d2aaba, 0x433c54ae), TOBN(0xed9775db, 0x806a56c5), + TOBN(0xd320738a, 0xc0edb37d), TOBN(0x25fdb6ee, 0x66cc1f51), + TOBN(0xac661d17, 0x10600d76), TOBN(0x931ec1f3, 0xbdd1ed76), + TOBN(0x65c11d62, 0x19ee43f1), TOBN(0x5cd57c3e, 0x60829d97), + TOBN(0xd26c91a3, 0x984be6e8), TOBN(0xf08d9309, 0x8b0c53bd), + TOBN(0x94bc9e5b, 0xc016e4ea), TOBN(0xd3916839, 0x11d43d2b), + TOBN(0x886c5ad7, 0x73701155), TOBN(0xe0377626, 0x20b00715), + TOBN(0x7f01c9ec, 0xaa80ba59), TOBN(0x3083411a, 0x68538e51), + TOBN(0x970370f1, 0xe88128af), TOBN(0x625cc3db, 0x91dec14b), + TOBN(0xfef9666c, 0x01ac3107), TOBN(0xb2a8d577, 0xd5057ac3), + TOBN(0xb0f26299, 0x92be5df7), TOBN(0xf579c8e5, 0x00353924), + TOBN(0xb8fa3d93, 0x1341ed7a), TOBN(0x4223272c, 0xa7b59d49), + TOBN(0x3dcb1947, 0x83b8c4a4), TOBN(0x4e413c01, 0xed1302e4), + TOBN(0x6d999127, 0xe17e44ce), TOBN(0xee86bf75, 0x33b3adfb), + TOBN(0xf6902fe6, 0x25aa96ca), TOBN(0xb73540e4, 0xe5aae47d), + TOBN(0x32801d7b, 0x1b4a158c), TOBN(0xe571c99e, 0x27e2a369), + TOBN(0x40cb76c0, 0x10d9f197), TOBN(0xc308c289, 0x3167c0ae), + TOBN(0xa6ef9dd3, 0xeb7958f2), TOBN(0xa7226dfc, 0x300879b1), + TOBN(0x6cd0b362, 0x7edf0636), TOBN(0x4efbce6c, 0x7bc37eed), + TOBN(0x75f92a05, 0x8d699021), TOBN(0x586d4c79, 0x772566e3), + TOBN(0x378ca5f1, 0x761ad23a), TOBN(0x650d86fc, 0x1465a8ac), + TOBN(0x7a4ed457, 0x842ba251), TOBN(0x6b65e3e6, 0x42234933), + TOBN(0xaf1543b7, 0x31aad657), TOBN(0xa4cefe98, 0xcbfec369), + TOBN(0xb587da90, 0x9f47befb), TOBN(0x6562e9fb, 0x41312d13), + TOBN(0xa691ea59, 0xeff1cefe), TOBN(0xcc30477a, 0x05fc4cf6), + TOBN(0xa1632461, 0x0b0ffd3d), TOBN(0xa1f16f3b, 0x5b355956), + TOBN(0x5b148d53, 0x4224ec24), TOBN(0xdc834e7b, 0xf977012a), + TOBN(0x7bfc5e75, 0xb2c69dbc), TOBN(0x3aa77a29, 0x03c3da6c), + TOBN(0xde0df03c, 0xca910271), TOBN(0xcbd5ca4a, 0x7806dc55), + TOBN(0xe1ca5807, 0x6db476cb), TOBN(0xfde15d62, 0x5f37a31e), + TOBN(0xf49af520, 0xf41af416), TOBN(0x96c5c5b1, 0x7d342db5), + TOBN(0x155c43b7, 0xeb4ceb9b), TOBN(0x2e993010, 0x4e77371a), + TOBN(0x1d2987da, 0x675d43af), TOBN(0xef2bc1c0, 0x8599fd72), + TOBN(0x96894b7b, 0x9342f6b2), TOBN(0x201eadf2, 0x7c8e71f0), + TOBN(0xf3479d9f, 0x4a1f3efc), TOBN(0xe0f8a742, 0x702a9704), + TOBN(0xeafd44b6, 0xb3eba40c), TOBN(0xf9739f29, 0xc1c1e0d0), + TOBN(0x0091471a, 0x619d505e), TOBN(0xc15f9c96, 0x9d7c263e), + TOBN(0x5be47285, 0x83afbe33), TOBN(0xa3b6d6af, 0x04f1e092), + TOBN(0xe76526b9, 0x751a9d11), TOBN(0x2ec5b26d, 0x9a4ae4d2), + TOBN(0xeb66f4d9, 0x02f6fb8d), TOBN(0x4063c561, 0x96912164), + TOBN(0xeb7050c1, 0x80ef3000), TOBN(0x288d1c33, 0xeaa5b3f0), + TOBN(0xe87c68d6, 0x07806fd8), TOBN(0xb2f7f9d5, 0x4bbbf50f), + TOBN(0x25972f3a, 0xac8d6627), TOBN(0xf8547774, 0x10e8c13b), + TOBN(0xcc50ef6c, 0x872b4a60), TOBN(0xab2a34a4, 0x4613521b), + TOBN(0x39c5c190, 0x983e15d1), TOBN(0x61dde5df, 0x59905512), + TOBN(0xe417f621, 0x9f2275f3), TOBN(0x0750c8b6, 0x451d894b), + TOBN(0x75b04ab9, 0x78b0bdaa), TOBN(0x3bfd9fd4, 0x458589bd), + TOBN(0xf1013e30, 0xee9120b6), TOBN(0x2b51af93, 0x23a4743e), + TOBN(0xea96ffae, 0x48d14d9e), TOBN(0x71dc0dbe, 0x698a1d32), + TOBN(0x914962d2, 0x0180cca4), TOBN(0x1ae60677, 0xc3568963), + TOBN(0x8cf227b1, 0x437bc444), TOBN(0xc650c83b, 0xc9962c7a), + TOBN(0x23c2c7dd, 0xfe7ccfc4), TOBN(0xf925c89d, 0x1b929d48), + TOBN(0x4460f74b, 0x06783c33), TOBN(0xac2c8d49, 0xa590475a), + TOBN(0xfb40b407, 0xb807bba0), TOBN(0x9d1e362d, 0x69ff8f3a), + TOBN(0xa33e9681, 0xcbef64a4), TOBN(0x67ece5fa, 0x332fb4b2), + TOBN(0x6900a99b, 0x739f10e3), TOBN(0xc3341ca9, 0xff525925), + TOBN(0xee18a626, 0xa9e2d041), TOBN(0xa5a83685, 0x29580ddd), + TOBN(0xf3470c81, 0x9d7de3cd), TOBN(0xedf02586, 0x2062cf9c), + TOBN(0xf43522fa, 0xc010edb0), TOBN(0x30314135, 0x13a4b1ae), + TOBN(0xc792e02a, 0xdb22b94b), TOBN(0x993d8ae9, 0xa1eaa45b), + TOBN(0x8aad6cd3, 0xcd1e1c63), TOBN(0x89529ca7, 0xc5ce688a), + TOBN(0x2ccee3aa, 0xe572a253), TOBN(0xe02b6438, 0x02a21efb), + TOBN(0xa7091b6e, 0xc9430358), TOBN(0x06d1b1fa, 0x9d7db504), + TOBN(0x58846d32, 0xc4744733), TOBN(0x40517c71, 0x379f9e34), + TOBN(0x2f65655f, 0x130ef6ca), TOBN(0x526e4488, 0xf1f3503f), + TOBN(0x8467bd17, 0x7ee4a976), TOBN(0x1d9dc913, 0x921363d1), + TOBN(0xd8d24c33, 0xb069e041), TOBN(0x5eb5da0a, 0x2cdf7f51), + TOBN(0x1c0f3cb1, 0x197b994f), TOBN(0x3c95a6c5, 0x2843eae9), + TOBN(0x7766ffc9, 0xa6097ea5), TOBN(0x7bea4093, 0xd723b867), + TOBN(0xb48e1f73, 0x4db378f9), TOBN(0x70025b00, 0xe37b77ac), + TOBN(0x943dc8e7, 0xaf24ad46), TOBN(0xb98a15ac, 0x16d00a85), + TOBN(0x3adc38ba, 0x2743b004), TOBN(0xb1c7f4f7, 0x334415ee), + TOBN(0xea43df8f, 0x1e62d05a), TOBN(0x32618905, 0x9d76a3b6), + TOBN(0x2fbd0bb5, 0xa23a0f46), TOBN(0x5bc971db, 0x6a01918c), + TOBN(0x7801d94a, 0xb4743f94), TOBN(0xb94df65e, 0x676ae22b), + TOBN(0xaafcbfab, 0xaf95894c), TOBN(0x7b9bdc07, 0x276b2241), + TOBN(0xeaf98362, 0x5bdda48b), TOBN(0x5977faf2, 0xa3fcb4df), + TOBN(0xbed042ef, 0x052c4b5b), TOBN(0x9fe87f71, 0x067591f0), + TOBN(0xc89c73ca, 0x22f24ec7), TOBN(0x7d37fa9e, 0xe64a9f1b), + TOBN(0x2710841a, 0x15562627), TOBN(0x2c01a613, 0xc243b034), + TOBN(0x1d135c56, 0x2bc68609), TOBN(0xc2ca1715, 0x8b03f1f6), + TOBN(0xc9966c2d, 0x3eb81d82), TOBN(0xc02abf4a, 0x8f6df13e), + TOBN(0x77b34bd7, 0x8f72b43b), TOBN(0xaff6218f, 0x360c82b0), + TOBN(0x0aa5726c, 0x8d55b9d2), TOBN(0xdc0adbe9, 0x99e9bffb), + TOBN(0x9097549c, 0xefb9e72a), TOBN(0x16755712, 0x9dfb3111), + TOBN(0xdd8bf984, 0xf26847f9), TOBN(0xbcb8e387, 0xdfb30cb7), + TOBN(0xc1fd32a7, 0x5171ef9c), TOBN(0x977f3fc7, 0x389b363f), + TOBN(0x116eaf2b, 0xf4babda0), TOBN(0xfeab68bd, 0xf7113c8e), + TOBN(0xd1e3f064, 0xb7def526), TOBN(0x1ac30885, 0xe0b3fa02), + TOBN(0x1c5a6e7b, 0x40142d9d), TOBN(0x839b5603, 0x30921c0b), + TOBN(0x48f301fa, 0x36a116a3), TOBN(0x380e1107, 0xcfd9ee6d), + TOBN(0x7945ead8, 0x58854be1), TOBN(0x4111c12e, 0xcbd4d49d), + TOBN(0xece3b1ec, 0x3a29c2ef), TOBN(0x6356d404, 0x8d3616f5), + TOBN(0x9f0d6a8f, 0x594d320e), TOBN(0x0989316d, 0xf651ccd2), + TOBN(0x6c32117a, 0x0f8fdde4), TOBN(0x9abe5cc5, 0xa26a9bbc), + TOBN(0xcff560fb, 0x9723f671), TOBN(0x21b2a12d, 0x7f3d593c), + TOBN(0xe4cb18da, 0x24ba0696), TOBN(0x186e2220, 0xc3543384), + TOBN(0x722f64e0, 0x88312c29), TOBN(0x94282a99, 0x17dc7752), + TOBN(0x62467bbf, 0x5a85ee89), TOBN(0xf435c650, 0xf10076a0), + TOBN(0xc9ff1539, 0x43b3a50b), TOBN(0x7132130c, 0x1a53efbc), + TOBN(0x31bfe063, 0xf7b0c5b7), TOBN(0xb0179a7d, 0x4ea994cc), + TOBN(0x12d064b3, 0xc85f455b), TOBN(0x47259328, 0x8f6e0062), + TOBN(0xf64e590b, 0xb875d6d9), TOBN(0x22dd6225, 0xad92bcc7), + TOBN(0xb658038e, 0xb9c3bd6d), TOBN(0x00cdb0d6, 0xfbba27c8), + TOBN(0x0c681337, 0x1062c45d), TOBN(0xd8515b8c, 0x2d33407d), + TOBN(0xcb8f699e, 0x8cbb5ecf), TOBN(0x8c4347f8, 0xc608d7d8), + TOBN(0x2c11850a, 0xbb3e00db), TOBN(0x20a8dafd, 0xecb49d19), + TOBN(0xbd781480, 0x45ee2f40), TOBN(0x75e354af, 0x416b60cf), + TOBN(0xde0b58a1, 0x8d49a8c4), TOBN(0xe40e94e2, 0xfa359536), + TOBN(0xbd4fa59f, 0x62accd76), TOBN(0x05cf466a, 0x8c762837), + TOBN(0xb5abda99, 0x448c277b), TOBN(0x5a9e01bf, 0x48b13740), + TOBN(0x9d457798, 0x326aad8d), TOBN(0xbdef4954, 0xc396f7e7), + TOBN(0x6fb274a2, 0xc253e292), TOBN(0x2800bf0a, 0x1cfe53e7), + TOBN(0x22426d31, 0x44438fd4), TOBN(0xef233923, 0x5e259f9a), + TOBN(0x4188503c, 0x03f66264), TOBN(0x9e5e7f13, 0x7f9fdfab), + TOBN(0x565eb76c, 0x5fcc1aba), TOBN(0xea632548, 0x59b5bff8), + TOBN(0x5587c087, 0xaab6d3fa), TOBN(0x92b639ea, 0x6ce39c1b), + TOBN(0x0706e782, 0x953b135c), TOBN(0x7308912e, 0x425268ef), + TOBN(0x599e92c7, 0x090e7469), TOBN(0x83b90f52, 0x9bc35e75), + TOBN(0x4750b3d0, 0x244975b3), TOBN(0xf3a44358, 0x11965d72), + TOBN(0x179c6774, 0x9c8dc751), TOBN(0xff18cdfe, 0xd23d9ff0), + TOBN(0xc4013833, 0x2028e247), TOBN(0x96e280e2, 0xf3bfbc79), + TOBN(0xf60417bd, 0xd0880a84), TOBN(0x263c9f3d, 0x2a568151), + TOBN(0x36be15b3, 0x2d2ce811), TOBN(0x846dc0c2, 0xf8291d21), + TOBN(0x5cfa0ecb, 0x789fcfdb), TOBN(0x45a0beed, 0xd7535b9a), + TOBN(0xec8e9f07, 0x96d69af1), TOBN(0x31a7c5b8, 0x599ab6dc), + TOBN(0xd36d45ef, 0xf9e2e09f), TOBN(0x3cf49ef1, 0xdcee954b), + TOBN(0x6be34cf3, 0x086cff9b), TOBN(0x88dbd491, 0x39a3360f), + TOBN(0x1e96b8cc, 0x0dbfbd1d), TOBN(0xc1e5f7bf, 0xcb7e2552), + TOBN(0x0547b214, 0x28819d98), TOBN(0xc770dd9c, 0x7aea9dcb), + TOBN(0xaef0d4c7, 0x041d68c8), TOBN(0xcc2b9818, 0x13cb9ba8), + TOBN(0x7fc7bc76, 0xfe86c607), TOBN(0x6b7b9337, 0x502a9a95), + TOBN(0x1948dc27, 0xd14dab63), TOBN(0x249dd198, 0xdae047be), + TOBN(0xe8356584, 0xa981a202), TOBN(0x3531dd18, 0x3a893387), + TOBN(0x1be11f90, 0xc85c7209), TOBN(0x93d2fe1e, 0xe2a52b5a), + TOBN(0x8225bfe2, 0xec6d6b97), TOBN(0x9cf6d6f4, 0xbd0aa5de), + TOBN(0x911459cb, 0x54779f5f), TOBN(0x5649cddb, 0x86aeb1f3), + TOBN(0x32133579, 0x3f26ce5a), TOBN(0xc289a102, 0x550f431e), + TOBN(0x559dcfda, 0x73b84c6f), TOBN(0x84973819, 0xee3ac4d7), + TOBN(0xb51e55e6, 0xf2606a82), TOBN(0xe25f7061, 0x90f2fb57), + TOBN(0xacef6c2a, 0xb1a4e37c), TOBN(0x864e359d, 0x5dcf2706), + TOBN(0x479e6b18, 0x7ce57316), TOBN(0x2cab2500, 0x3a96b23d), + TOBN(0xed489862, 0x8ef16df7), TOBN(0x2056538c, 0xef3758b5), + TOBN(0xa7df865e, 0xf15d3101), TOBN(0x80c5533a, 0x61b553d7), + TOBN(0x366e1997, 0x4ed14294), TOBN(0x6620741f, 0xb3c0bcd6), + TOBN(0x21d1d9c4, 0xedc45418), TOBN(0x005b859e, 0xc1cc4a9d), + TOBN(0xdf01f630, 0xa1c462f0), TOBN(0x15d06cf3, 0xf26820c7), + TOBN(0x9f7f24ee, 0x3484be47), TOBN(0x2ff33e96, 0x4a0c902f), + TOBN(0x00bdf457, 0x5a0bc453), TOBN(0x2378dfaf, 0x1aa238db), + TOBN(0x272420ec, 0x856720f2), TOBN(0x2ad9d95b, 0x96797291), + TOBN(0xd1242cc6, 0x768a1558), TOBN(0x2e287f8b, 0x5cc86aa8), + TOBN(0x796873d0, 0x990cecaa), TOBN(0xade55f81, 0x675d4080), + TOBN(0x2645eea3, 0x21f0cd84), TOBN(0x7a1efa0f, 0xb4e17d02), + TOBN(0xf6858420, 0x037cc061), TOBN(0x682e05f0, 0xd5d43e12), + TOBN(0x59c36994, 0x27218710), TOBN(0x85cbba4d, 0x3f7cd2fc), + TOBN(0x726f9729, 0x7a3cd22a), TOBN(0x9f8cd5dc, 0x4a628397), + TOBN(0x17b93ab9, 0xc23165ed), TOBN(0xff5f5dbf, 0x122823d4), + TOBN(0xc1e4e4b5, 0x654a446d), TOBN(0xd1a9496f, 0x677257ba), + TOBN(0x6387ba94, 0xde766a56), TOBN(0x23608bc8, 0x521ec74a), + TOBN(0x16a522d7, 0x6688c4d4), TOBN(0x9d6b4282, 0x07373abd), + TOBN(0xa62f07ac, 0xb42efaa3), TOBN(0xf73e00f7, 0xe3b90180), + TOBN(0x36175fec, 0x49421c3e), TOBN(0xc4e44f9b, 0x3dcf2678), + TOBN(0x76df436b, 0x7220f09f), TOBN(0x172755fb, 0x3aa8b6cf), + TOBN(0xbab89d57, 0x446139cc), TOBN(0x0a0a6e02, 0x5fe0208f), + TOBN(0xcdbb63e2, 0x11e5d399), TOBN(0x33ecaa12, 0xa8977f0b), + TOBN(0x59598b21, 0xf7c42664), TOBN(0xb3e91b32, 0xab65d08a), + TOBN(0x035822ee, 0xf4502526), TOBN(0x1dcf0176, 0x720a82a9), + TOBN(0x50f8598f, 0x3d589e02), TOBN(0xdf0478ff, 0xb1d63d2c), + TOBN(0x8b8068bd, 0x1571cd07), TOBN(0x30c3aa4f, 0xd79670cd), + TOBN(0x25e8fd4b, 0x941ade7f), TOBN(0x3d1debdc, 0x32790011), + TOBN(0x65b6dcbd, 0x3a3f9ff0), TOBN(0x282736a4, 0x793de69c), + TOBN(0xef69a0c3, 0xd41d3bd3), TOBN(0xb533b8c9, 0x07a26bde), + TOBN(0xe2801d97, 0xdb2edf9f), TOBN(0xdc4a8269, 0xe1877af0), + TOBN(0x6c1c5851, 0x3d590dbe), TOBN(0x84632f6b, 0xee4e9357), + TOBN(0xd36d36b7, 0x79b33374), TOBN(0xb46833e3, 0x9bbca2e6), + TOBN(0x37893913, 0xf7fc0586), TOBN(0x385315f7, 0x66bf4719), + TOBN(0x72c56293, 0xb31855dc), TOBN(0xd1416d4e, 0x849061fe), + TOBN(0xbeb3ab78, 0x51047213), TOBN(0x447f6e61, 0xf040c996), + TOBN(0xd06d310d, 0x638b1d0c), TOBN(0xe28a413f, 0xbad1522e), + TOBN(0x685a76cb, 0x82003f86), TOBN(0x610d07f7, 0x0bcdbca3), + TOBN(0x6ff66021, 0x9ca4c455), TOBN(0x7df39b87, 0xcea10eec), + TOBN(0xb9255f96, 0xe22db218), TOBN(0x8cc6d9eb, 0x08a34c44), + TOBN(0xcd4ffb86, 0x859f9276), TOBN(0x8fa15eb2, 0x50d07335), + TOBN(0xdf553845, 0xcf2c24b5), TOBN(0x89f66a9f, 0x52f9c3ba), + TOBN(0x8f22b5b9, 0xe4a7ceb3), TOBN(0xaffef809, 0x0e134686), + TOBN(0x3e53e1c6, 0x8eb8fac2), TOBN(0x93c1e4eb, 0x28aec98e), + TOBN(0xb6b91ec5, 0x32a43bcb), TOBN(0x2dbfa947, 0xb2d74a51), + TOBN(0xe065d190, 0xca84bad7), TOBN(0xfb13919f, 0xad58e65c), + TOBN(0x3c41718b, 0xf1cb6e31), TOBN(0x688969f0, 0x06d05c3f), + TOBN(0xd4f94ce7, 0x21264d45), TOBN(0xfdfb65e9, 0x7367532b), + TOBN(0x5b1be8b1, 0x0945a39d), TOBN(0x229f789c, 0x2b8baf3b), + TOBN(0xd8f41f3e, 0x6f49f15d), TOBN(0x678ce828, 0x907f0792), + TOBN(0xc69ace82, 0xfca6e867), TOBN(0x106451ae, 0xd01dcc89), + TOBN(0x1bb4f7f0, 0x19fc32d2), TOBN(0x64633dfc, 0xb00c52d2), + TOBN(0x8f13549a, 0xad9ea445), TOBN(0x99a3bf50, 0xfb323705), + TOBN(0x0c9625a2, 0x534d4dbc), TOBN(0x45b8f1d1, 0xc2a2fea3), + TOBN(0x76ec21a1, 0xa530fc1a), TOBN(0x4bac9c2a, 0x9e5bd734), + TOBN(0x5996d76a, 0x7b4e3587), TOBN(0x0045cdee, 0x1182d9e3), + TOBN(0x1aee24b9, 0x1207f13d), TOBN(0x66452e97, 0x97345a41), + TOBN(0x16e5b054, 0x9f950cd0), TOBN(0x9cc72fb1, 0xd7fdd075), + TOBN(0x6edd61e7, 0x66249663), TOBN(0xde4caa4d, 0xf043cccb), + TOBN(0x11b1f57a, 0x55c7ac17), TOBN(0x779cbd44, 0x1a85e24d), + TOBN(0x78030f86, 0xe46081e7), TOBN(0xfd4a6032, 0x8e20f643), + TOBN(0xcc7a6488, 0x0a750c0f), TOBN(0x39bacfe3, 0x4e548e83), + TOBN(0x3d418c76, 0x0c110f05), TOBN(0x3e4daa4c, 0xb1f11588), + TOBN(0x2733e7b5, 0x5ffc69ff), TOBN(0x46f147bc, 0x92053127), + TOBN(0x885b2434, 0xd722df94), TOBN(0x6a444f65, 0xe6fc6b7c)} + , + {TOBN(0x7a1a465a, 0xc3f16ea8), TOBN(0x115a461d, 0xb2f1d11c), + TOBN(0x4767dd95, 0x6c68a172), TOBN(0x3392f2eb, 0xd13a4698), + TOBN(0xc7a99ccd, 0xe526cdc7), TOBN(0x8e537fdc, 0x22292b81), + TOBN(0x76d8cf69, 0xa6d39198), TOBN(0xffc5ff43, 0x2446852d), + TOBN(0x97b14f7e, 0xa90567e6), TOBN(0x513257b7, 0xb6ae5cb7), + TOBN(0x85454a3c, 0x9f10903d), TOBN(0xd8d2c9ad, 0x69bc3724), + TOBN(0x38da9324, 0x6b29cb44), TOBN(0xb540a21d, 0x77c8cbac), + TOBN(0x9bbfe435, 0x01918e42), TOBN(0xfffa707a, 0x56c3614e), + TOBN(0x0ce4e3f1, 0xd4e353b7), TOBN(0x062d8a14, 0xef46b0a0), + TOBN(0x6408d5ab, 0x574b73fd), TOBN(0xbc41d1c9, 0xd3273ffd), + TOBN(0x3538e1e7, 0x6be77800), TOBN(0x71fe8b37, 0xc5655031), + TOBN(0x1cd91621, 0x6b9b331a), TOBN(0xad825d0b, 0xbb388f73), + TOBN(0x56c2e05b, 0x1cb76219), TOBN(0x0ec0bf91, 0x71567e7e), + TOBN(0xe7076f86, 0x61c4c910), TOBN(0xd67b085b, 0xbabc04d9), + TOBN(0x9fb90459, 0x5e93a96a), TOBN(0x7526c1ea, 0xfbdc249a), + TOBN(0x0d44d367, 0xecdd0bb7), TOBN(0x95399917, 0x9dc0d695), + TOBN(0x61360ee9, 0x9e240d18), TOBN(0x057cdcac, 0xb4b94466), + TOBN(0xe7667cd1, 0x2fe5325c), TOBN(0x1fa297b5, 0x21974e3b), + TOBN(0xfa4081e7, 0xdb083d76), TOBN(0x31993be6, 0xf206bd15), + TOBN(0x8949269b, 0x14c19f8c), TOBN(0x21468d72, 0xa9d92357), + TOBN(0x2ccbc583, 0xa4c506ec), TOBN(0x957ed188, 0xd1acfe97), + TOBN(0x8baed833, 0x12f1aea2), TOBN(0xef2a6cb4, 0x8325362d), + TOBN(0x130dde42, 0x8e195c43), TOBN(0xc842025a, 0x0e6050c6), + TOBN(0x2da972a7, 0x08686a5d), TOBN(0xb52999a1, 0xe508b4a8), + TOBN(0xd9f090b9, 0x10a5a8bd), TOBN(0xca91d249, 0x096864da), + TOBN(0x8e6a93be, 0x3f67dbc1), TOBN(0xacae6fba, 0xf5f4764c), + TOBN(0x1563c6e0, 0xd21411a0), TOBN(0x28fa787f, 0xda0a4ad8), + TOBN(0xd524491c, 0x908c8030), TOBN(0x1257ba0e, 0x4c795f07), + TOBN(0x83f49167, 0xceca9754), TOBN(0x426d2cf6, 0x4b7939a0), + TOBN(0x2555e355, 0x723fd0bf), TOBN(0xa96e6d06, 0xc4f144e2), + TOBN(0x4768a8dd, 0x87880e61), TOBN(0x15543815, 0xe508e4d5), + TOBN(0x09d7e772, 0xb1b65e15), TOBN(0x63439dd6, 0xac302fa0), + TOBN(0xb93f802f, 0xc14e35c2), TOBN(0x71735b7c, 0x4341333c), + TOBN(0x03a25104, 0x16d4f362), TOBN(0x3f4d069b, 0xbf433c8e), + TOBN(0x0d83ae01, 0xf78f5a7c), TOBN(0x50a8ffbe, 0x7c4eed07), + TOBN(0xc74f8906, 0x76e10f83), TOBN(0x7d080966, 0x9ddaf8e1), + TOBN(0xb11df8e1, 0x698e04cc), TOBN(0x877be203, 0x169005c8), + TOBN(0x32749e8c, 0x4f3c6179), TOBN(0x2dbc9d0a, 0x7853fc05), + TOBN(0x187d4f93, 0x9454d937), TOBN(0xe682ce9d, 0xb4800e1b), + TOBN(0xa9129ad8, 0x165e68e8), TOBN(0x0fe29735, 0xbe7f785b), + TOBN(0x5303f40c, 0x5b9e02b7), TOBN(0xa37c9692, 0x35ee04e8), + TOBN(0x5f46cc20, 0x34d6632b), TOBN(0x55ef72b2, 0x96ac545b), + TOBN(0xabec5c1f, 0x7b91b062), TOBN(0x0a79e1c7, 0xbb33e821), + TOBN(0xbb04b428, 0x3a9f4117), TOBN(0x0de1f28f, 0xfd2a475a), + TOBN(0x31019ccf, 0x3a4434b4), TOBN(0xa3458111, 0x1a7954dc), + TOBN(0xa9dac80d, 0xe34972a7), TOBN(0xb043d054, 0x74f6b8dd), + TOBN(0x021c319e, 0x11137b1a), TOBN(0x00a754ce, 0xed5cc03f), + TOBN(0x0aa2c794, 0xcbea5ad4), TOBN(0x093e67f4, 0x70c015b6), + TOBN(0x72cdfee9, 0xc97e3f6b), TOBN(0xc10bcab4, 0xb6da7461), + TOBN(0x3b02d2fc, 0xb59806b9), TOBN(0x85185e89, 0xa1de6f47), + TOBN(0x39e6931f, 0x0eb6c4d4), TOBN(0x4d4440bd, 0xd4fa5b04), + TOBN(0x5418786e, 0x34be7eb8), TOBN(0x6380e521, 0x9d7259bc), + TOBN(0x20ac0351, 0xd598d710), TOBN(0x272c4166, 0xcb3a4da4), + TOBN(0xdb82fe1a, 0xca71de1f), TOBN(0x746e79f2, 0xd8f54b0f), + TOBN(0x6e7fc736, 0x4b573e9b), TOBN(0x75d03f46, 0xfd4b5040), + TOBN(0x5c1cc36d, 0x0b98d87b), TOBN(0x513ba3f1, 0x1f472da1), + TOBN(0x79d0af26, 0xabb177dd), TOBN(0xf82ab568, 0x7891d564), + TOBN(0x2b6768a9, 0x72232173), TOBN(0xefbb3bb0, 0x8c1f6619), + TOBN(0xb29c11db, 0xa6d18358), TOBN(0x519e2797, 0xb0916d3a), + TOBN(0xd4dc18f0, 0x9188e290), TOBN(0x648e86e3, 0x98b0ca7f), + TOBN(0x859d3145, 0x983c38b5), TOBN(0xb14f176c, 0x637abc8b), + TOBN(0x2793fb9d, 0xcaff7be6), TOBN(0xebe5a55f, 0x35a66a5a), + TOBN(0x7cec1dcd, 0x9f87dc59), TOBN(0x7c595cd3, 0xfbdbf560), + TOBN(0x5b543b22, 0x26eb3257), TOBN(0x69080646, 0xc4c935fd), + TOBN(0x7f2e4403, 0x81e9ede3), TOBN(0x243c3894, 0xcaf6df0a), + TOBN(0x7c605bb1, 0x1c073b11), TOBN(0xcd06a541, 0xba6a4a62), + TOBN(0x29168949, 0x49d4e2e5), TOBN(0x33649d07, 0x4af66880), + TOBN(0xbfc0c885, 0xe9a85035), TOBN(0xb4e52113, 0xfc410f4b), + TOBN(0xdca3b706, 0x78a6513b), TOBN(0x92ea4a2a, 0x9edb1943), + TOBN(0x02642216, 0xdb6e2dd8), TOBN(0x9b45d0b4, 0x9fd57894), + TOBN(0x114e70db, 0xc69d11ae), TOBN(0x1477dd19, 0x4c57595f), + TOBN(0xbc2208b4, 0xec77c272), TOBN(0x95c5b4d7, 0xdb68f59c), + TOBN(0xb8c4fc63, 0x42e532b7), TOBN(0x386ba422, 0x9ae35290), + TOBN(0xfb5dda42, 0xd201ecbc), TOBN(0x2353dc8b, 0xa0e38fd6), + TOBN(0x9a0b85ea, 0x68f7e978), TOBN(0x96ec5682, 0x2ad6d11f), + TOBN(0x5e279d6c, 0xe5f6886d), TOBN(0xd3fe03cd, 0x3cb1914d), + TOBN(0xfe541fa4, 0x7ea67c77), TOBN(0x952bd2af, 0xe3ea810c), + TOBN(0x791fef56, 0x8d01d374), TOBN(0xa3a1c621, 0x0f11336e), + TOBN(0x5ad0d5a9, 0xc7ec6d79), TOBN(0xff7038af, 0x3225c342), + TOBN(0x003c6689, 0xbc69601b), TOBN(0x25059bc7, 0x45e8747d), + TOBN(0xfa4965b2, 0xf2086fbf), TOBN(0xf6840ea6, 0x86916078), + TOBN(0xd7ac7620, 0x70081d6c), TOBN(0xe600da31, 0xb5328645), + TOBN(0x01916f63, 0x529b8a80), TOBN(0xe80e4858, 0x2d7d6f3e), + TOBN(0x29eb0fe8, 0xd664ca7c), TOBN(0xf017637b, 0xe7b43b0c), + TOBN(0x9a75c806, 0x76cb2566), TOBN(0x8f76acb1, 0xb24892d9), + TOBN(0x7ae7b9cc, 0x1f08fe45), TOBN(0x19ef7329, 0x6a4907d8), + TOBN(0x2db4ab71, 0x5f228bf0), TOBN(0xf3cdea39, 0x817032d7), + TOBN(0x0b1f482e, 0xdcabe3c0), TOBN(0x3baf76b4, 0xbb86325c), + TOBN(0xd49065e0, 0x10089465), TOBN(0x3bab5d29, 0x8e77c596), + TOBN(0x7636c3a6, 0x193dbd95), TOBN(0xdef5d294, 0xb246e499), + TOBN(0xb22c58b9, 0x286b2475), TOBN(0xa0b93939, 0xcd80862b), + TOBN(0x3002c83a, 0xf0992388), TOBN(0x6de01f9b, 0xeacbe14c), + TOBN(0x6aac688e, 0xadd70482), TOBN(0x708de92a, 0x7b4a4e8a), + TOBN(0x75b6dd73, 0x758a6eef), TOBN(0xea4bf352, 0x725b3c43), + TOBN(0x10041f2c, 0x87912868), TOBN(0xb1b1be95, 0xef09297a), + TOBN(0x19ae23c5, 0xa9f3860a), TOBN(0xc4f0f839, 0x515dcf4b), + TOBN(0x3c7ecca3, 0x97f6306a), TOBN(0x744c44ae, 0x68a3a4b0), + TOBN(0x69cd13a0, 0xb3a1d8a2), TOBN(0x7cad0a1e, 0x5256b578), + TOBN(0xea653fcd, 0x33791d9e), TOBN(0x9cc2a05d, 0x74b2e05f), + TOBN(0x73b391dc, 0xfd7affa2), TOBN(0xddb7091e, 0xb6b05442), + TOBN(0xc71e27bf, 0x8538a5c6), TOBN(0x195c63dd, 0x89abff17), + TOBN(0xfd315285, 0x1b71e3da), TOBN(0x9cbdfda7, 0xfa680fa0), + TOBN(0x9db876ca, 0x849d7eab), TOBN(0xebe2764b, 0x3c273271), + TOBN(0x663357e3, 0xf208dcea), TOBN(0x8c5bd833, 0x565b1b70), + TOBN(0xccc3b4f5, 0x9837fc0d), TOBN(0x9b641ba8, 0xa79cf00f), + TOBN(0x7428243d, 0xdfdf3990), TOBN(0x83a594c4, 0x020786b1), + TOBN(0xb712451a, 0x526c4502), TOBN(0x9d39438e, 0x6adb3f93), + TOBN(0xfdb261e3, 0xe9ff0ccd), TOBN(0x80344e3c, 0xe07af4c3), + TOBN(0x75900d7c, 0x2fa4f126), TOBN(0x08a3b865, 0x5c99a232), + TOBN(0x2478b6bf, 0xdb25e0c3), TOBN(0x482cc2c2, 0x71db2edf), + TOBN(0x37df7e64, 0x5f321bb8), TOBN(0x8a93821b, 0x9a8005b4), + TOBN(0x3fa2f10c, 0xcc8c1958), TOBN(0x0d332218, 0x2c269d0a), + TOBN(0x20ab8119, 0xe246b0e6), TOBN(0xb39781e4, 0xd349fd17), + TOBN(0xd293231e, 0xb31aa100), TOBN(0x4b779c97, 0xbb032168), + TOBN(0x4b3f19e1, 0xc8470500), TOBN(0x45b7efe9, 0x0c4c869d), + TOBN(0xdb84f38a, 0xa1a6bbcc), TOBN(0x3b59cb15, 0xb2fddbc1), + TOBN(0xba5514df, 0x3fd165e8), TOBN(0x499fd6a9, 0x061f8811), + TOBN(0x72cd1fe0, 0xbfef9f00), TOBN(0x120a4bb9, 0x79ad7e8a), + TOBN(0xf2ffd095, 0x5f4a5ac5), TOBN(0xcfd174f1, 0x95a7a2f0), + TOBN(0xd42301ba, 0x9d17baf1), TOBN(0xd2fa487a, 0x77f22089), + TOBN(0x9cb09efe, 0xb1dc77e1), TOBN(0xe9566939, 0x21c99682), + TOBN(0x8c546901, 0x6c6067bb), TOBN(0xfd378574, 0x61c24456), + TOBN(0x2b6a6cbe, 0x81796b33), TOBN(0x62d550f6, 0x58e87f8b), + TOBN(0x1b763e1c, 0x7f1b01b4), TOBN(0x4b93cfea, 0x1b1b5e12), + TOBN(0xb9345238, 0x1d531696), TOBN(0x57201c00, 0x88cdde69), + TOBN(0xdde92251, 0x9a86afc7), TOBN(0xe3043895, 0xbd35cea8), + TOBN(0x7608c1e1, 0x8555970d), TOBN(0x8267dfa9, 0x2535935e), + TOBN(0xd4c60a57, 0x322ea38b), TOBN(0xe0bf7977, 0x804ef8b5), + TOBN(0x1a0dab28, 0xc06fece4), TOBN(0xd405991e, 0x94e7b49d), + TOBN(0xc542b6d2, 0x706dab28), TOBN(0xcb228da3, 0xa91618fb), + TOBN(0x224e4164, 0x107d1cea), TOBN(0xeb9fdab3, 0xd0f5d8f1), + TOBN(0xc02ba386, 0x0d6e41cd), TOBN(0x676a72c5, 0x9b1f7146), + TOBN(0xffd6dd98, 0x4d6cb00b), TOBN(0xcef9c5ca, 0xde2e8d7c), + TOBN(0xa1bbf5d7, 0x641c7936), TOBN(0x1b95b230, 0xee8f772e), + TOBN(0xf765a92e, 0xe8ac25b1), TOBN(0xceb04cfc, 0x3a18b7c6), + TOBN(0x27944cef, 0x0acc8966), TOBN(0xcbb3c957, 0x434c1004), + TOBN(0x9c9971a1, 0xa43ff93c), TOBN(0x5bc2db17, 0xa1e358a9), + TOBN(0x45b4862e, 0xa8d9bc82), TOBN(0x70ebfbfb, 0x2201e052), + TOBN(0xafdf64c7, 0x92871591), TOBN(0xea5bcae6, 0xb42d0219), + TOBN(0xde536c55, 0x2ad8f03c), TOBN(0xcd6c3f4d, 0xa76aa33c), + TOBN(0xbeb5f623, 0x0bca6de3), TOBN(0xdd20dd99, 0xb1e706fd), + TOBN(0x90b3ff9d, 0xac9059d4), TOBN(0x2d7b2902, 0x7ccccc4e), + TOBN(0x8a090a59, 0xce98840f), TOBN(0xa5d947e0, 0x8410680a), + TOBN(0x49ae346a, 0x923379a5), TOBN(0x7dbc84f9, 0xb28a3156), + TOBN(0xfd40d916, 0x54a1aff2), TOBN(0xabf318ba, 0x3a78fb9b), + TOBN(0x50152ed8, 0x3029f95e), TOBN(0x9fc1dd77, 0xc58ad7fa), + TOBN(0x5fa57915, 0x13595c17), TOBN(0xb9504668, 0x8f62b3a9), + TOBN(0x907b5b24, 0xff3055b0), TOBN(0x2e995e35, 0x9a84f125), + TOBN(0x87dacf69, 0x7e9bbcfb), TOBN(0x95d0c1d6, 0xe86d96e3), + TOBN(0x65726e3c, 0x2d95a75c), TOBN(0x2c3c9001, 0xacd27f21), + TOBN(0x1deab561, 0x6c973f57), TOBN(0x108b7e2c, 0xa5221643), + TOBN(0x5fee9859, 0xc4ef79d4), TOBN(0xbd62b88a, 0x40d4b8c6), + TOBN(0xb4dd29c4, 0x197c75d6), TOBN(0x266a6df2, 0xb7076feb), + TOBN(0x9512d0ea, 0x4bf2df11), TOBN(0x1320c24f, 0x6b0cc9ec), + TOBN(0x6bb1e0e1, 0x01a59596), TOBN(0x8317c5bb, 0xeff9aaac), + TOBN(0x65bb405e, 0x385aa6c9), TOBN(0x613439c1, 0x8f07988f), + TOBN(0xd730049f, 0x16a66e91), TOBN(0xe97f2820, 0xfa1b0e0d), + TOBN(0x4131e003, 0x304c28ea), TOBN(0x820ab732, 0x526bac62), + TOBN(0xb2ac9ef9, 0x28714423), TOBN(0x54ecfffa, 0xadb10cb2), + TOBN(0x8781476e, 0xf886a4cc), TOBN(0x4b2c87b5, 0xdb2f8d49), + TOBN(0xe857cd20, 0x0a44295d), TOBN(0x707d7d21, 0x58c6b044), + TOBN(0xae8521f9, 0xf596757c), TOBN(0x87448f03, 0x67b2b714), + TOBN(0x13a9bc45, 0x5ebcd58d), TOBN(0x79bcced9, 0x9122d3c1), + TOBN(0x3c644247, 0x9e076642), TOBN(0x0cf22778, 0x2df4767d), + TOBN(0x5e61aee4, 0x71d444b6), TOBN(0x211236bf, 0xc5084a1d), + TOBN(0x7e15bc9a, 0x4fd3eaf6), TOBN(0x68df2c34, 0xab622bf5), + TOBN(0x9e674f0f, 0x59bf4f36), TOBN(0xf883669b, 0xd7f34d73), + TOBN(0xc48ac1b8, 0x31497b1d), TOBN(0x323b925d, 0x5106703b), + TOBN(0x22156f42, 0x74082008), TOBN(0xeffc521a, 0xc8482bcb), + TOBN(0x5c6831bf, 0x12173479), TOBN(0xcaa2528f, 0xc4739490), + TOBN(0x84d2102a, 0x8f1b3c4d), TOBN(0xcf64dfc1, 0x2d9bec0d), + TOBN(0x433febad, 0x78a546ef), TOBN(0x1f621ec3, 0x7b73cef1), + TOBN(0x6aecd627, 0x37338615), TOBN(0x162082ab, 0x01d8edf6), + TOBN(0x833a8119, 0x19e86b66), TOBN(0x6023a251, 0xd299b5db), + TOBN(0xf5bb0c3a, 0xbbf04b89), TOBN(0x6735eb69, 0xae749a44), + TOBN(0xd0e058c5, 0x4713de3b), TOBN(0xfdf2593e, 0x2c3d4ccd), + TOBN(0x1b8f414e, 0xfdd23667), TOBN(0xdd52aaca, 0xfa2015ee), + TOBN(0x3e31b517, 0xbd9625ff), TOBN(0x5ec9322d, 0x8db5918c), + TOBN(0xbc73ac85, 0xa96f5294), TOBN(0x82aa5bf3, 0x61a0666a), + TOBN(0x49755810, 0xbf08ac42), TOBN(0xd21cdfd5, 0x891cedfc), + TOBN(0x918cb57b, 0x67f8be10), TOBN(0x365d1a7c, 0x56ffa726), + TOBN(0x2435c504, 0x6532de93), TOBN(0xc0fc5e10, 0x2674cd02), + TOBN(0x6e51fcf8, 0x9cbbb142), TOBN(0x1d436e5a, 0xafc50692), + TOBN(0x766bffff, 0x3fbcae22), TOBN(0x3148c2fd, 0xfd55d3b8), + TOBN(0x52c7fdc9, 0x233222fa), TOBN(0x89ff1092, 0xe419fb6b), + TOBN(0x3cd6db99, 0x25254977), TOBN(0x2e85a161, 0x1cf12ca7), + TOBN(0xadd2547c, 0xdc810bc9), TOBN(0xea3f458f, 0x9d257c22), + TOBN(0x642c1fbe, 0x27d6b19b), TOBN(0xed07e6b5, 0x140481a6), + TOBN(0x6ada1d42, 0x86d2e0f8), TOBN(0xe5920122, 0x0e8a9fd5), + TOBN(0x02c936af, 0x708c1b49), TOBN(0x60f30fee, 0x2b4bfaff), + TOBN(0x6637ad06, 0x858e6a61), TOBN(0xce4c7767, 0x3fd374d0), + TOBN(0x39d54b2d, 0x7188defb), TOBN(0xa8c9d250, 0xf56a6b66), + TOBN(0x58fc0f5e, 0xb24fe1dc), TOBN(0x9eaf9dee, 0x6b73f24c), + TOBN(0xa90d588b, 0x33650705), TOBN(0xde5b62c5, 0xaf2ec729), + TOBN(0x5c72cfae, 0xd3c2b36e), TOBN(0x868c19d5, 0x034435da), + TOBN(0x88605f93, 0xe17ee145), TOBN(0xaa60c4ee, 0x77a5d5b1), + TOBN(0xbcf5bfd2, 0x3b60c472), TOBN(0xaf4ef13c, 0xeb1d3049), + TOBN(0x373f44fc, 0xe13895c9), TOBN(0xf29b382f, 0x0cbc9822), + TOBN(0x1bfcb853, 0x73efaef6), TOBN(0xcf56ac9c, 0xa8c96f40), + TOBN(0xd7adf109, 0x7a191e24), TOBN(0x98035f44, 0xbf8a8dc2), + TOBN(0xf40a71b9, 0x1e750c84), TOBN(0xc57f7b0c, 0x5dc6c469), + TOBN(0x49a0e79c, 0x6fbc19c1), TOBN(0x6b0f5889, 0xa48ebdb8), + TOBN(0x5d3fd084, 0xa07c4e9f), TOBN(0xc3830111, 0xab27de14), + TOBN(0x0e4929fe, 0x33e08dcc), TOBN(0xf4a5ad24, 0x40bb73a3), + TOBN(0xde86c2bf, 0x490f97ca), TOBN(0x288f09c6, 0x67a1ce18), + TOBN(0x364bb886, 0x1844478d), TOBN(0x7840fa42, 0xceedb040), + TOBN(0x1269fdd2, 0x5a631b37), TOBN(0x94761f1e, 0xa47c8b7d), + TOBN(0xfc0c2e17, 0x481c6266), TOBN(0x85e16ea2, 0x3daa5fa7), + TOBN(0xccd86033, 0x92491048), TOBN(0x0c2f6963, 0xf4d402d7), + TOBN(0x6336f7df, 0xdf6a865c), TOBN(0x0a2a463c, 0xb5c02a87), + TOBN(0xb0e29be7, 0xbf2f12ee), TOBN(0xf0a22002, 0x66bad988), + TOBN(0x27f87e03, 0x9123c1d7), TOBN(0x21669c55, 0x328a8c98), + TOBN(0x186b9803, 0x92f14529), TOBN(0xd3d056cc, 0x63954df3), + TOBN(0x2f03fd58, 0x175a46f6), TOBN(0x63e34ebe, 0x11558558), + TOBN(0xe13fedee, 0x5b80cfa5), TOBN(0xe872a120, 0xd401dbd1), + TOBN(0x52657616, 0xe8a9d667), TOBN(0xbc8da4b6, 0xe08d6693), + TOBN(0x370fb9bb, 0x1b703e75), TOBN(0x6773b186, 0xd4338363), + TOBN(0x18dad378, 0xecef7bff), TOBN(0xaac787ed, 0x995677da), + TOBN(0x4801ea8b, 0x0437164b), TOBN(0xf430ad20, 0x73fe795e), + TOBN(0xb164154d, 0x8ee5eb73), TOBN(0x0884ecd8, 0x108f7c0e), + TOBN(0x0e6ec096, 0x5f520698), TOBN(0x640631fe, 0x44f7b8d9), + TOBN(0x92fd34fc, 0xa35a68b9), TOBN(0x9c5a4b66, 0x4d40cf4e), + TOBN(0x949454bf, 0x80b6783d), TOBN(0x80e701fe, 0x3a320a10), + TOBN(0x8d1a564a, 0x1a0a39b2), TOBN(0x1436d53d, 0x320587db), + TOBN(0xf5096e6d, 0x6556c362), TOBN(0xbc23a3c0, 0xe2455d7e), + TOBN(0x3a7aee54, 0x807230f9), TOBN(0x9ba1cfa6, 0x22ae82fd), + TOBN(0x833a057a, 0x99c5d706), TOBN(0x8be85f4b, 0x842315c9), + TOBN(0xd083179a, 0x66a72f12), TOBN(0x2fc77d5d, 0xcdcc73cd), + TOBN(0x22b88a80, 0x5616ee30), TOBN(0xfb09548f, 0xe7ab1083), + TOBN(0x8ad6ab0d, 0x511270cd), TOBN(0x61f6c57a, 0x6924d9ab), + TOBN(0xa0f7bf72, 0x90aecb08), TOBN(0x849f87c9, 0x0df784a4), + TOBN(0x27c79c15, 0xcfaf1d03), TOBN(0xbbf9f675, 0xc463face), + TOBN(0x91502c65, 0x765ba543), TOBN(0x18ce3cac, 0x42ea60dd), + TOBN(0xe5cee6ac, 0x6e43ecb3), TOBN(0x63e4e910, 0x68f2aeeb), + TOBN(0x26234fa3, 0xc85932ee), TOBN(0x96883e8b, 0x4c90c44d), + TOBN(0x29b9e738, 0xa18a50f6), TOBN(0xbfc62b2a, 0x3f0420df), + TOBN(0xd22a7d90, 0x6d3e1fa9), TOBN(0x17115618, 0xfe05b8a3), + TOBN(0x2a0c9926, 0xbb2b9c01), TOBN(0xc739fcc6, 0xe07e76a2), + TOBN(0x540e9157, 0x165e439a), TOBN(0x06353a62, 0x6a9063d8), + TOBN(0x84d95594, 0x61e927a3), TOBN(0x013b9b26, 0xe2e0be7f), + TOBN(0x4feaec3b, 0x973497f1), TOBN(0x15c0f94e, 0x093ebc2d), + TOBN(0x6af5f227, 0x33af0583), TOBN(0x0c2af206, 0xc61f3340), + TOBN(0xd25dbdf1, 0x4457397c), TOBN(0x2e8ed017, 0xcabcbae0), + TOBN(0xe3010938, 0xc2815306), TOBN(0xbaa99337, 0xe8c6cd68), + TOBN(0x08513182, 0x3b0ec7de), TOBN(0x1e1b822b, 0x58df05df), + TOBN(0x5c14842f, 0xa5c3b683), TOBN(0x98fe977e, 0x3eba34ce), + TOBN(0xfd2316c2, 0x0d5e8873), TOBN(0xe48d839a, 0xbd0d427d), + TOBN(0x495b2218, 0x623fc961), TOBN(0x24ee56e7, 0xb46fba5e), + TOBN(0x9184a55b, 0x91e4de58), TOBN(0xa7488ca5, 0xdfdea288), + TOBN(0xa723862e, 0xa8dcc943), TOBN(0x92d762b2, 0x849dc0fc), + TOBN(0x3c444a12, 0x091ff4a9), TOBN(0x581113fa, 0x0cada274), + TOBN(0xb9de0a45, 0x30d8eae2), TOBN(0x5e0fcd85, 0xdf6b41ea), + TOBN(0x6233ea68, 0xc094dbb5), TOBN(0xb77d062e, 0xd968d410), + TOBN(0x3e719bbc, 0x58b3002d), TOBN(0x68e7dd3d, 0x3dc49d58), + TOBN(0x8d825740, 0x013a5e58), TOBN(0x21311747, 0x3c9e3c1b), + TOBN(0x0cb0a2a7, 0x7c99b6ab), TOBN(0x5c48a3b3, 0xc2f888f2)} + , + {TOBN(0xc7913e91, 0x991724f3), TOBN(0x5eda799c, 0x39cbd686), + TOBN(0xddb595c7, 0x63d4fc1e), TOBN(0x6b63b80b, 0xac4fed54), + TOBN(0x6ea0fc69, 0x7e5fb516), TOBN(0x737708ba, 0xd0f1c964), + TOBN(0x9628745f, 0x11a92ca5), TOBN(0x61f37958, 0x9a86967a), + TOBN(0x9af39b2c, 0xaa665072), TOBN(0x78322fa4, 0xefd324ef), + TOBN(0x3d153394, 0xc327bd31), TOBN(0x81d5f271, 0x3129dab0), + TOBN(0xc72e0c42, 0xf48027f5), TOBN(0xaa40cdbc, 0x8536e717), + TOBN(0xf45a657a, 0x2d369d0f), TOBN(0xb03bbfc4, 0xea7f74e6), + TOBN(0x46a8c418, 0x0d738ded), TOBN(0x6f1a5bb0, 0xe0de5729), + TOBN(0xf10230b9, 0x8ba81675), TOBN(0x32c6f30c, 0x112b33d4), + TOBN(0x7559129d, 0xd8fffb62), TOBN(0x6a281b47, 0xb459bf05), + TOBN(0x77c1bd3a, 0xfa3b6776), TOBN(0x0709b380, 0x7829973a), + TOBN(0x8c26b232, 0xa3326505), TOBN(0x38d69272, 0xee1d41bf), + TOBN(0x0459453e, 0xffe32afa), TOBN(0xce8143ad, 0x7cb3ea87), + TOBN(0x932ec1fa, 0x7e6ab666), TOBN(0x6cd2d230, 0x22286264), + TOBN(0x459a46fe, 0x6736f8ed), TOBN(0x50bf0d00, 0x9eca85bb), + TOBN(0x0b825852, 0x877a21ec), TOBN(0x300414a7, 0x0f537a94), + TOBN(0x3f1cba40, 0x21a9a6a2), TOBN(0x50824eee, 0x76943c00), + TOBN(0xa0dbfcec, 0xf83cba5d), TOBN(0xf9538148, 0x93b4f3c0), + TOBN(0x61744162, 0x48f24dd7), TOBN(0x5322d64d, 0xe4fb09dd), + TOBN(0x57447384, 0x3d9325f3), TOBN(0xa9bef2d0, 0xf371cb84), + TOBN(0x77d2188b, 0xa61e36c5), TOBN(0xbbd6a7d7, 0xc602df72), + TOBN(0xba3aa902, 0x8f61bc0b), TOBN(0xf49085ed, 0x6ed0b6a1), + TOBN(0x8bc625d6, 0xae6e8298), TOBN(0x832b0b1d, 0xa2e9c01d), + TOBN(0xa337c447, 0xf1f0ced1), TOBN(0x800cc793, 0x9492dd2b), + TOBN(0x4b93151d, 0xbea08efa), TOBN(0x820cf3f8, 0xde0a741e), + TOBN(0xff1982dc, 0x1c0f7d13), TOBN(0xef921960, 0x84dde6ca), + TOBN(0x1ad7d972, 0x45f96ee3), TOBN(0x319c8dbe, 0x29dea0c7), + TOBN(0xd3ea3871, 0x7b82b99b), TOBN(0x75922d4d, 0x470eb624), + TOBN(0x8f66ec54, 0x3b95d466), TOBN(0x66e673cc, 0xbee1e346), + TOBN(0x6afe67c4, 0xb5f2b89a), TOBN(0x3de9c1e6, 0x290e5cd3), + TOBN(0x8c278bb6, 0x310a2ada), TOBN(0x420fa384, 0x0bdb323b), + TOBN(0x0ae1d63b, 0x0eb919b0), TOBN(0xd74ee51d, 0xa74b9620), + TOBN(0x395458d0, 0xa674290c), TOBN(0x324c930f, 0x4620a510), + TOBN(0x2d1f4d19, 0xfbac27d4), TOBN(0x4086e8ca, 0x9bedeeac), + TOBN(0x0cdd211b, 0x9b679ab8), TOBN(0x5970167d, 0x7090fec4), + TOBN(0x3420f2c9, 0xfaf1fc63), TOBN(0x616d333a, 0x328c8bb4), + TOBN(0x7d65364c, 0x57f1fe4a), TOBN(0x9343e877, 0x55e5c73a), + TOBN(0x5795176b, 0xe970e78c), TOBN(0xa36ccebf, 0x60533627), + TOBN(0xfc7c7380, 0x09cdfc1b), TOBN(0xb39a2afe, 0xb3fec326), + TOBN(0xb7ff1ba1, 0x6224408a), TOBN(0xcc856e92, 0x247cfc5e), + TOBN(0x01f102e7, 0xc18bc493), TOBN(0x4613ab74, 0x2091c727), + TOBN(0xaa25e89c, 0xc420bf2b), TOBN(0x00a53176, 0x90337ec2), + TOBN(0xd2be9f43, 0x7d025fc7), TOBN(0x3316fb85, 0x6e6fe3dc), + TOBN(0x27520af5, 0x9ac50814), TOBN(0xfdf95e78, 0x9a8e4223), + TOBN(0xb7e7df2a, 0x56bec5a0), TOBN(0xf7022f7d, 0xdf159e5d), + TOBN(0x93eeeab1, 0xcac1fe8f), TOBN(0x8040188c, 0x37451168), + TOBN(0x7ee8aa8a, 0xd967dce6), TOBN(0xfa0e79e7, 0x3abc9299), + TOBN(0x67332cfc, 0x2064cfd1), TOBN(0x339c31de, 0xb0651934), + TOBN(0x719b28d5, 0x2a3bcbea), TOBN(0xee74c82b, 0x9d6ae5c6), + TOBN(0x0927d05e, 0xbaf28ee6), TOBN(0x82cecf2c, 0x9d719028), + TOBN(0x0b0d353e, 0xddb30289), TOBN(0xfe4bb977, 0xfddb2e29), + TOBN(0xbb5bb990, 0x640bfd9e), TOBN(0xd226e277, 0x82f62108), + TOBN(0x4bf00985, 0x02ffdd56), TOBN(0x7756758a, 0x2ca1b1b5), + TOBN(0xc32b62a3, 0x5285fe91), TOBN(0xedbc546a, 0x8c9cd140), + TOBN(0x1e47a013, 0xaf5cb008), TOBN(0xbca7e720, 0x073ce8f2), + TOBN(0xe10b2ab8, 0x17a91cae), TOBN(0xb89aab65, 0x08e27f63), + TOBN(0x7b3074a7, 0xdba3ddf9), TOBN(0x1c20ce09, 0x330c2972), + TOBN(0x6b9917b4, 0x5fcf7e33), TOBN(0xe6793743, 0x945ceb42), + TOBN(0x18fc2215, 0x5c633d19), TOBN(0xad1adb3c, 0xc7485474), + TOBN(0x646f9679, 0x6424c49b), TOBN(0xf888dfe8, 0x67c241c9), + TOBN(0xe12d4b93, 0x24f68b49), TOBN(0x9a6b62d8, 0xa571df20), + TOBN(0x81b4b26d, 0x179483cb), TOBN(0x666f9632, 0x9511fae2), + TOBN(0xd281b3e4, 0xd53aa51f), TOBN(0x7f96a765, 0x7f3dbd16), + TOBN(0xa7f8b5bf, 0x074a30ce), TOBN(0xd7f52107, 0x005a32e6), + TOBN(0x6f9e0907, 0x50237ed4), TOBN(0x2f21da47, 0x8096fa2b), + TOBN(0xf3e19cb4, 0xeec863a0), TOBN(0xd18f77fd, 0x9527620a), + TOBN(0x9505c81c, 0x407c1cf8), TOBN(0x9998db4e, 0x1b6ec284), + TOBN(0x7e3389e5, 0xc247d44d), TOBN(0x12507141, 0x3f4f3d80), + TOBN(0xd4ba0110, 0x4a78a6c7), TOBN(0x312874a0, 0x767720be), + TOBN(0xded059a6, 0x75944370), TOBN(0xd6123d90, 0x3b2c0bdd), + TOBN(0xa56b717b, 0x51c108e3), TOBN(0x9bb7940e, 0x070623e9), + TOBN(0x794e2d59, 0x84ac066c), TOBN(0xf5954a92, 0xe68c69a0), + TOBN(0x28c52458, 0x4fd99dcc), TOBN(0x60e639fc, 0xb1012517), + TOBN(0xc2e60125, 0x7de79248), TOBN(0xe9ef6404, 0xf12fc6d7), + TOBN(0x4c4f2808, 0x2a3b5d32), TOBN(0x865ad32e, 0xc768eb8a), + TOBN(0xac02331b, 0x13fb70b6), TOBN(0x037b44c1, 0x95599b27), + TOBN(0x1a860fc4, 0x60bd082c), TOBN(0xa2e25745, 0xc980cd01), + TOBN(0xee3387a8, 0x1da0263e), TOBN(0x931bfb95, 0x2d10f3d6), + TOBN(0x5b687270, 0xa1f24a32), TOBN(0xf140e65d, 0xca494b86), + TOBN(0x4f4ddf91, 0xb2f1ac7a), TOBN(0xf99eaabb, 0x760fee27), + TOBN(0x57f4008a, 0x49c228e5), TOBN(0x090be440, 0x1cf713bb), + TOBN(0xac91fbe4, 0x5004f022), TOBN(0xd838c2c2, 0x569e1af6), + TOBN(0xd6c7d20b, 0x0f1daaa5), TOBN(0xaa063ac1, 0x1bbb02c0), + TOBN(0x0938a422, 0x59558a78), TOBN(0x5343c669, 0x8435da2f), + TOBN(0x96f67b18, 0x034410dc), TOBN(0x7cc1e424, 0x84510804), + TOBN(0x86a1543f, 0x16dfbb7d), TOBN(0x921fa942, 0x5b5bd592), + TOBN(0x9dcccb6e, 0xb33dd03c), TOBN(0x8581ddd9, 0xb843f51e), + TOBN(0x54935fcb, 0x81d73c9e), TOBN(0x6d07e979, 0x0a5e97ab), + TOBN(0x4dc7b30a, 0xcf3a6bab), TOBN(0x147ab1f3, 0x170bee11), + TOBN(0x0aaf8e3d, 0x9fafdee4), TOBN(0xfab3dbcb, 0x538a8b95), + TOBN(0x405df4b3, 0x6ef13871), TOBN(0xf1f4e9cb, 0x088d5a49), + TOBN(0x9bcd24d3, 0x66b33f1d), TOBN(0x3b97b820, 0x5ce445c0), + TOBN(0xe2926549, 0xba93ff61), TOBN(0xd9c341ce, 0x4dafe616), + TOBN(0xfb30a76e, 0x16efb6f3), TOBN(0xdf24b8ca, 0x605b953c), + TOBN(0x8bd52afe, 0xc2fffb9f), TOBN(0xbbac5ff7, 0xe19d0b96), + TOBN(0x43c01b87, 0x459afccd), TOBN(0x6bd45143, 0xb7432652), + TOBN(0x84734530, 0x55b5d78e), TOBN(0x81088fdb, 0x1554ba7d), + TOBN(0xada0a52c, 0x1e269375), TOBN(0xf9f037c4, 0x2dc5ec10), + TOBN(0xc0660607, 0x94bfbc11), TOBN(0xc0a630bb, 0xc9c40d2f), + TOBN(0x5efc797e, 0xab64c31e), TOBN(0xffdb1dab, 0x74507144), + TOBN(0xf6124287, 0x1ca6790c), TOBN(0xe9609d81, 0xe69bf1bf), + TOBN(0xdb898595, 0x00d24fc9), TOBN(0x9c750333, 0xe51fb417), + TOBN(0x51830a91, 0xfef7bbde), TOBN(0x0ce67dc8, 0x945f585c), + TOBN(0x9a730ed4, 0x4763eb50), TOBN(0x24a0e221, 0xc1ab0d66), + TOBN(0x643b6393, 0x648748f3), TOBN(0x1982daa1, 0x6d3c6291), + TOBN(0x6f00a9f7, 0x8bbc5549), TOBN(0x7a1783e1, 0x7f36384e), + TOBN(0xe8346323, 0xde977f50), TOBN(0x91ab688d, 0xb245502a), + TOBN(0x331ab6b5, 0x6d0bdd66), TOBN(0x0a6ef32e, 0x64b71229), + TOBN(0x1028150e, 0xfe7c352f), TOBN(0x27e04350, 0xce7b39d3), + TOBN(0x2a3c8acd, 0xc1070c82), TOBN(0xfb2034d3, 0x80c9feef), + TOBN(0x2d729621, 0x709f3729), TOBN(0x8df290bf, 0x62cb4549), + TOBN(0x02f99f33, 0xfc2e4326), TOBN(0x3b30076d, 0x5eddf032), + TOBN(0xbb21f8cf, 0x0c652fb5), TOBN(0x314fb49e, 0xed91cf7b), + TOBN(0xa013eca5, 0x2f700750), TOBN(0x2b9e3c23, 0x712a4575), + TOBN(0xe5355557, 0xaf30fbb0), TOBN(0x1ada3516, 0x7c77e771), + TOBN(0x45f6ecb2, 0x7b135670), TOBN(0xe85d19df, 0x7cfc202e), + TOBN(0x0f1b50c7, 0x58d1be9f), TOBN(0x5ebf2c0a, 0xead2e344), + TOBN(0x1531fe4e, 0xabc199c9), TOBN(0xc7032592, 0x56bab0ae), + TOBN(0x16ab2e48, 0x6c1fec54), TOBN(0x0f87fda8, 0x04280188), + TOBN(0xdc9f46fc, 0x609e4a74), TOBN(0x2a44a143, 0xba667f91), + TOBN(0xbc3d8b95, 0xb4d83436), TOBN(0xa01e4bd0, 0xc7bd2958), + TOBN(0x7b182932, 0x73483c90), TOBN(0xa79c6aa1, 0xa7c7b598), + TOBN(0xbf3983c6, 0xeaaac07e), TOBN(0x8f18181e, 0x96e0d4e6), + TOBN(0x8553d37c, 0x051af62b), TOBN(0xe9a998eb, 0x0bf94496), + TOBN(0xe0844f9f, 0xb0d59aa1), TOBN(0x983fd558, 0xe6afb813), + TOBN(0x9670c0ca, 0x65d69804), TOBN(0x732b22de, 0x6ea5ff2d), + TOBN(0xd7640ba9, 0x5fd8623b), TOBN(0x9f619163, 0xa6351782), + TOBN(0x0bfc27ee, 0xacee5043), TOBN(0xae419e73, 0x2eb10f02), + TOBN(0x19c028d1, 0x8943fb05), TOBN(0x71f01cf7, 0xff13aa2a), + TOBN(0x7790737e, 0x8887a132), TOBN(0x67513309, 0x66318410), + TOBN(0x9819e8a3, 0x7ddb795e), TOBN(0xfecb8ef5, 0xdad100b2), + TOBN(0x59f74a22, 0x3021926a), TOBN(0xb7c28a49, 0x6f9b4c1c), + TOBN(0xed1a733f, 0x912ad0ab), TOBN(0x42a910af, 0x01a5659c), + TOBN(0x3842c6e0, 0x7bd68cab), TOBN(0x2b57fa38, 0x76d70ac8), + TOBN(0x8a6707a8, 0x3c53aaeb), TOBN(0x62c1c510, 0x65b4db18), + TOBN(0x8de2c1fb, 0xb2d09dc7), TOBN(0xc3dfed12, 0x266bd23b), + TOBN(0x927d039b, 0xd5b27db6), TOBN(0x2fb2f0f1, 0x103243da), + TOBN(0xf855a07b, 0x80be7399), TOBN(0xed9327ce, 0x1f9f27a8), + TOBN(0xa0bd99c7, 0x729bdef7), TOBN(0x2b67125e, 0x28250d88), + TOBN(0x784b26e8, 0x8670ced7), TOBN(0xe3dfe41f, 0xc31bd3b4), + TOBN(0x9e353a06, 0xbcc85cbc), TOBN(0x302e2909, 0x60178a9d), + TOBN(0x860abf11, 0xa6eac16e), TOBN(0x76447000, 0xaa2b3aac), + TOBN(0x46ff9d19, 0x850afdab), TOBN(0x35bdd6a5, 0xfdb2d4c1), + TOBN(0xe82594b0, 0x7e5c9ce9), TOBN(0x0f379e53, 0x20af346e), + TOBN(0x608b31e3, 0xbc65ad4a), TOBN(0x710c6b12, 0x267c4826), + TOBN(0x51c966f9, 0x71954cf1), TOBN(0xb1cec793, 0x0d0aa215), + TOBN(0x1f155989, 0x86bd23a8), TOBN(0xae2ff99c, 0xf9452e86), + TOBN(0xd8dd953c, 0x340ceaa2), TOBN(0x26355275, 0x2e2e9333), + TOBN(0x15d4e5f9, 0x8586f06d), TOBN(0xd6bf94a8, 0xf7cab546), + TOBN(0x33c59a0a, 0xb76a9af0), TOBN(0x52740ab3, 0xba095af7), + TOBN(0xc444de8a, 0x24389ca0), TOBN(0xcc6f9863, 0x706da0cb), + TOBN(0xb5a741a7, 0x6b2515cf), TOBN(0x71c41601, 0x9585c749), + TOBN(0x78350d4f, 0xe683de97), TOBN(0x31d61524, 0x63d0b5f5), + TOBN(0x7a0cc5e1, 0xfbce090b), TOBN(0xaac927ed, 0xfbcb2a5b), + TOBN(0xe920de49, 0x20d84c35), TOBN(0x8c06a0b6, 0x22b4de26), + TOBN(0xd34dd58b, 0xafe7ddf3), TOBN(0x55851fed, 0xc1e6e55b), + TOBN(0xd1395616, 0x960696e7), TOBN(0x940304b2, 0x5f22705f), + TOBN(0x6f43f861, 0xb0a2a860), TOBN(0xcf121282, 0x0e7cc981), + TOBN(0x12186212, 0x0ab64a96), TOBN(0x09215b9a, 0xb789383c), + TOBN(0x311eb305, 0x37387c09), TOBN(0xc5832fce, 0xf03ee760), + TOBN(0x30358f58, 0x32f7ea19), TOBN(0xe01d3c34, 0x91d53551), + TOBN(0x1ca5ee41, 0xda48ea80), TOBN(0x34e71e8e, 0xcf4fa4c1), + TOBN(0x312abd25, 0x7af1e1c7), TOBN(0xe3afcdeb, 0x2153f4a5), + TOBN(0x9d5c84d7, 0x00235e9a), TOBN(0x0308d3f4, 0x8c4c836f), + TOBN(0xc0a66b04, 0x89332de5), TOBN(0x610dd399, 0x89e566ef), + TOBN(0xf8eea460, 0xd1ac1635), TOBN(0x84cbb3fb, 0x20a2c0df), + TOBN(0x40afb488, 0xe74a48c5), TOBN(0x29738198, 0xd326b150), + TOBN(0x2a17747f, 0xa6d74081), TOBN(0x60ea4c05, 0x55a26214), + TOBN(0x53514bb4, 0x1f88c5fe), TOBN(0xedd64567, 0x7e83426c), + TOBN(0xd5d6cbec, 0x96460b25), TOBN(0xa12fd0ce, 0x68dc115e), + TOBN(0xc5bc3ed2, 0x697840ea), TOBN(0x969876a8, 0xa6331e31), + TOBN(0x60c36217, 0x472ff580), TOBN(0xf4229705, 0x4ad41393), + TOBN(0x4bd99ef0, 0xa03b8b92), TOBN(0x501c7317, 0xc144f4f6), + TOBN(0x159009b3, 0x18464945), TOBN(0x6d5e594c, 0x74c5c6be), + TOBN(0x2d587011, 0x321a3660), TOBN(0xd1e184b1, 0x3898d022), + TOBN(0x5ba04752, 0x4c6a7e04), TOBN(0x47fa1e2b, 0x45550b65), + TOBN(0x9419daf0, 0x48c0a9a5), TOBN(0x66362953, 0x7c243236), + TOBN(0xcd0744b1, 0x5cb12a88), TOBN(0x561b6f9a, 0x2b646188), + TOBN(0x599415a5, 0x66c2c0c0), TOBN(0xbe3f0859, 0x0f83f09a), + TOBN(0x9141c5be, 0xb92041b8), TOBN(0x01ae38c7, 0x26477d0d), + TOBN(0xca8b71f3, 0xd12c7a94), TOBN(0xfab5b31f, 0x765c70db), + TOBN(0x76ae7492, 0x487443e9), TOBN(0x8595a310, 0x990d1349), + TOBN(0xf8dbeda8, 0x7d460a37), TOBN(0x7f7ad082, 0x1e45a38f), + TOBN(0xed1d4db6, 0x1059705a), TOBN(0xa3dd492a, 0xe6b9c697), + TOBN(0x4b92ee3a, 0x6eb38bd5), TOBN(0xbab2609d, 0x67cc0bb7), + TOBN(0x7fc4fe89, 0x6e70ee82), TOBN(0xeff2c56e, 0x13e6b7e3), + TOBN(0x9b18959e, 0x34d26fca), TOBN(0x2517ab66, 0x889d6b45), + TOBN(0xf167b4e0, 0xbdefdd4f), TOBN(0x69958465, 0xf366e401), + TOBN(0x5aa368ab, 0xa73bbec0), TOBN(0x12148709, 0x7b240c21), + TOBN(0x378c3233, 0x18969006), TOBN(0xcb4d73ce, 0xe1fe53d1), + TOBN(0x5f50a80e, 0x130c4361), TOBN(0xd67f5951, 0x7ef5212b), + TOBN(0xf145e21e, 0x9e70c72e), TOBN(0xb2e52e29, 0x5566d2fb), + TOBN(0x44eaba4a, 0x032397f5), TOBN(0x5e56937b, 0x7e31a7de), + TOBN(0x68dcf517, 0x456c61e1), TOBN(0xbc2e954a, 0xa8b0a388), + TOBN(0xe3552fa7, 0x60a8b755), TOBN(0x03442dae, 0x73ad0cde), + TOBN(0x37ffe747, 0xceb26210), TOBN(0x983545e8, 0x787baef9), + TOBN(0x8b8c8535, 0x86a3de31), TOBN(0xc621dbcb, 0xfacd46db), + TOBN(0x82e442e9, 0x59266fbb), TOBN(0xa3514c37, 0x339d471c), + TOBN(0x3a11b771, 0x62cdad96), TOBN(0xf0cb3b3c, 0xecf9bdf0), + TOBN(0x3fcbdbce, 0x478e2135), TOBN(0x7547b5cf, 0xbda35342), + TOBN(0xa97e81f1, 0x8a677af6), TOBN(0xc8c2bf83, 0x28817987), + TOBN(0xdf07eaaf, 0x45580985), TOBN(0xc68d1f05, 0xc93b45cb), + TOBN(0x106aa2fe, 0xc77b4cac), TOBN(0x4c1d8afc, 0x04a7ae86), + TOBN(0xdb41c3fd, 0x9eb45ab2), TOBN(0x5b234b5b, 0xd4b22e74), + TOBN(0xda253dec, 0xf215958a), TOBN(0x67e0606e, 0xa04edfa0), + TOBN(0xabbbf070, 0xef751b11), TOBN(0xf352f175, 0xf6f06dce), + TOBN(0xdfc4b6af, 0x6839f6b4), TOBN(0x53ddf9a8, 0x9959848e), + TOBN(0xda49c379, 0xc21520b0), TOBN(0x90864ff0, 0xdbd5d1b6), + TOBN(0x2f055d23, 0x5f49c7f7), TOBN(0xe51e4e6a, 0xa796b2d8), + TOBN(0xc361a67f, 0x5c9dc340), TOBN(0x5ad53c37, 0xbca7c620), + TOBN(0xda1d6588, 0x32c756d0), TOBN(0xad60d911, 0x8bb67e13), + TOBN(0xd6c47bdf, 0x0eeec8c6), TOBN(0x4a27fec1, 0x078a1821), + TOBN(0x081f7415, 0xc3099524), TOBN(0x8effdf0b, 0x82cd8060), + TOBN(0xdb70ec1c, 0x65842df8), TOBN(0x8821b358, 0xd319a901), + TOBN(0x72ee56ee, 0xde42b529), TOBN(0x5bb39592, 0x236e4286), + TOBN(0xd1183316, 0xfd6f7140), TOBN(0xf9fadb5b, 0xbd8e81f7), + TOBN(0x701d5e0c, 0x5a02d962), TOBN(0xfdee4dbf, 0x1b601324), + TOBN(0xbed17407, 0x35d7620e), TOBN(0x04e3c2c3, 0xf48c0012), + TOBN(0x9ee29da7, 0x3455449a), TOBN(0x562cdef4, 0x91a836c4), + TOBN(0x8f682a5f, 0x47701097), TOBN(0x617125d8, 0xff88d0c2), + TOBN(0x948fda24, 0x57bb86dd), TOBN(0x348abb8f, 0x289f7286), + TOBN(0xeb10eab5, 0x99d94bbd), TOBN(0xd51ba28e, 0x4684d160), + TOBN(0xabe0e51c, 0x30c8f41a), TOBN(0x66588b45, 0x13254f4a), + TOBN(0x147ebf01, 0xfad097a5), TOBN(0x49883ea8, 0x610e815d), + TOBN(0xe44d60ba, 0x8a11de56), TOBN(0xa970de6e, 0x827a7a6d), + TOBN(0x2be41424, 0x5e17fc19), TOBN(0xd833c657, 0x01214057), + TOBN(0x1375813b, 0x363e723f), TOBN(0x6820bb88, 0xe6a52e9b), + TOBN(0x7e7f6970, 0xd875d56a), TOBN(0xd6a0a9ac, 0x51fbf6bf), + TOBN(0x54ba8790, 0xa3083c12), TOBN(0xebaeb23d, 0x6ae7eb64), + TOBN(0xa8685c3a, 0xb99a907a), TOBN(0xf1e74550, 0x026bf40b), + TOBN(0x7b73a027, 0xc802cd9e), TOBN(0x9a8a927c, 0x4fef4635), + TOBN(0xe1b6f60c, 0x08191224), TOBN(0xc4126ebb, 0xde4ec091), + TOBN(0xe1dff4dc, 0x4ae38d84), TOBN(0xde3f57db, 0x4f2ef985), + TOBN(0x34964337, 0xd446a1dd), TOBN(0x7bf217a0, 0x859e77f6), + TOBN(0x8ff10527, 0x8e1d13f5), TOBN(0xa304ef03, 0x74eeae27), + TOBN(0xfc6f5e47, 0xd19dfa5a), TOBN(0xdb007de3, 0x7fad982b), + TOBN(0x28205ad1, 0x613715f5), TOBN(0x251e6729, 0x7889529e), + TOBN(0x72705184, 0x1ae98e78), TOBN(0xf818537d, 0x271cac32), + TOBN(0xc8a15b7e, 0xb7f410f5), TOBN(0xc474356f, 0x81f62393), + TOBN(0x92dbdc5a, 0xc242316b), TOBN(0xabe060ac, 0xdbf4aff5), + TOBN(0x6e8c38fe, 0x909a8ec6), TOBN(0x43e514e5, 0x6116cb94), + TOBN(0x2078fa38, 0x07d784f9), TOBN(0x1161a880, 0xf4b5b357), + TOBN(0x5283ce79, 0x13adea3d), TOBN(0x0756c3e6, 0xcc6a910b), + TOBN(0x60bcfe01, 0xaaa79697), TOBN(0x04a73b29, 0x56391db1), + TOBN(0xdd8dad47, 0x189b45a0), TOBN(0xbfac0dd0, 0x48d5b8d9), + TOBN(0x34ab3af5, 0x7d3d2ec2), TOBN(0x6fa2fc2d, 0x207bd3af), + TOBN(0x9ff40092, 0x66550ded), TOBN(0x719b3e87, 0x1fd5b913), + TOBN(0xa573a496, 0x6d17fbc7), TOBN(0x0cd1a70a, 0x73d2b24e), + TOBN(0x34e2c5ca, 0xb2676937), TOBN(0xe7050b06, 0xbf669f21), + TOBN(0xfbe948b6, 0x1ede9046), TOBN(0xa0530051, 0x97662659), + TOBN(0x58cbd4ed, 0xf10124c5), TOBN(0xde2646e4, 0xdd6c06c8), + TOBN(0x332f8108, 0x8cad38c0), TOBN(0x471b7e90, 0x6bd68ae2), + TOBN(0x56ac3fb2, 0x0d8e27a3), TOBN(0xb54660db, 0x136b4b0d), + TOBN(0x123a1e11, 0xa6fd8de4), TOBN(0x44dbffea, 0xa37799ef), + TOBN(0x4540b977, 0xce6ac17c), TOBN(0x495173a8, 0xaf60acef)} + , + {TOBN(0x9ebb284d, 0x391c2a82), TOBN(0xbcdd4863, 0x158308e8), + TOBN(0x006f16ec, 0x83f1edca), TOBN(0xa13e2c37, 0x695dc6c8), + TOBN(0x2ab756f0, 0x4a057a87), TOBN(0xa8765500, 0xa6b48f98), + TOBN(0x4252face, 0x68651c44), TOBN(0xa52b540b, 0xe1765e02), + TOBN(0x4f922fc5, 0x16a0d2bb), TOBN(0x0d5cc16c, 0x1a623499), + TOBN(0x9241cf3a, 0x57c62c8b), TOBN(0x2f5e6961, 0xfd1b667f), + TOBN(0x5c15c70b, 0xf5a01797), TOBN(0x3d20b44d, 0x60956192), + TOBN(0x04911b37, 0x071fdb52), TOBN(0xf648f916, 0x8d6f0f7b), + TOBN(0x6dc1acaf, 0xe60b7cf7), TOBN(0x25860a50, 0x84a9d869), + TOBN(0x56fc6f09, 0xe7ba8ac4), TOBN(0x828c5bd0, 0x6148d29e), + TOBN(0xac6b435e, 0xdc55ae5f), TOBN(0xa527f56c, 0xc0117411), + TOBN(0x94d5045e, 0xfd24342c), TOBN(0x2c4c0a35, 0x70b67c0d), + TOBN(0x027cc8b8, 0xfac61d9a), TOBN(0x7d25e062, 0xe3c6fe8a), + TOBN(0xe08805bf, 0xe5bff503), TOBN(0x13271e6c, 0x6ff632f7), + TOBN(0x55dca6c0, 0x232f76a5), TOBN(0x8957c32d, 0x701ef426), + TOBN(0xee728bcb, 0xa10a5178), TOBN(0x5ea60411, 0xb62c5173), + TOBN(0xfc4e964e, 0xd0b8892b), TOBN(0x9ea17683, 0x9301bb74), + TOBN(0x6265c5ae, 0xfcc48626), TOBN(0xe60cf82e, 0xbb3e9102), + TOBN(0x57adf797, 0xd4df5531), TOBN(0x235b59a1, 0x8deeefe2), + TOBN(0x60adcf58, 0x3f306eb1), TOBN(0x105c2753, 0x3d09492d), + TOBN(0x4090914b, 0xb5def996), TOBN(0x1cb69c83, 0x233dd1e7), + TOBN(0xc1e9c1d3, 0x9b3d5e76), TOBN(0x1f3338ed, 0xfccf6012), + TOBN(0xb1e95d0d, 0x2f5378a8), TOBN(0xacf4c2c7, 0x2f00cd21), + TOBN(0x6e984240, 0xeb5fe290), TOBN(0xd66c038d, 0x248088ae), + TOBN(0x804d264a, 0xf94d70cf), TOBN(0xbdb802ef, 0x7314bf7e), + TOBN(0x8fb54de2, 0x4333ed02), TOBN(0x740461e0, 0x285635d9), + TOBN(0x4113b2c8, 0x365e9383), TOBN(0xea762c83, 0x3fdef652), + TOBN(0x4eec6e2e, 0x47b956c1), TOBN(0xa3d814be, 0x65620fa4), + TOBN(0x9ad5462b, 0xb4d8bc50), TOBN(0x181c0b16, 0xa9195770), + TOBN(0xebd4fe1c, 0x78412a68), TOBN(0xae0341bc, 0xc0dff48c), + TOBN(0xb6bc45cf, 0x7003e866), TOBN(0xf11a6dea, 0x8a24a41b), + TOBN(0x5407151a, 0xd04c24c2), TOBN(0x62c9d27d, 0xda5b7b68), + TOBN(0x2e964235, 0x88cceff6), TOBN(0x8594c54f, 0x8b07ed69), + TOBN(0x1578e73c, 0xc84d0d0d), TOBN(0x7b4e1055, 0xff532868), + TOBN(0xa348c0d5, 0xb5ec995a), TOBN(0xbf4b9d55, 0x14289a54), + TOBN(0x9ba155a6, 0x58fbd777), TOBN(0x186ed7a8, 0x1a84491d), + TOBN(0xd4992b30, 0x614c0900), TOBN(0xda98d121, 0xbd00c24b), + TOBN(0x7f534dc8, 0x7ec4bfa1), TOBN(0x4a5ff674, 0x37dc34bc), + TOBN(0x68c196b8, 0x1d7ea1d7), TOBN(0x38cf2893, 0x80a6d208), + TOBN(0xfd56cd09, 0xe3cbbd6e), TOBN(0xec72e27e, 0x4205a5b6), + TOBN(0x15ea68f5, 0xa44f77f7), TOBN(0x7aa5f9fd, 0xb43c52bc), + TOBN(0x86ff676f, 0x94f0e609), TOBN(0xa4cde963, 0x2e2d432b), + TOBN(0x8cafa0c0, 0xeee470af), TOBN(0x84137d0e, 0x8a3f5ec8), + TOBN(0xebb40411, 0xfaa31231), TOBN(0xa239c13f, 0x6f7f7ccf), + TOBN(0x32865719, 0xa8afd30b), TOBN(0x86798328, 0x8a826dce), + TOBN(0xdf04e891, 0xc4a8fbe0), TOBN(0xbb6b6e1b, 0xebf56ad3), + TOBN(0x0a695b11, 0x471f1ff0), TOBN(0xd76c3389, 0xbe15baf0), + TOBN(0x018edb95, 0xbe96c43e), TOBN(0xf2beaaf4, 0x90794158), + TOBN(0x152db09e, 0xc3076a27), TOBN(0x5e82908e, 0xe416545d), + TOBN(0xa2c41272, 0x356d6f2e), TOBN(0xdc9c9642, 0x31fd74e1), + TOBN(0x66ceb88d, 0x519bf615), TOBN(0xe29ecd76, 0x05a2274e), + TOBN(0x3a0473c4, 0xbf5e2fa0), TOBN(0x6b6eb671, 0x64284e67), + TOBN(0xe8b97932, 0xb88756dd), TOBN(0xed4e8652, 0xf17e3e61), + TOBN(0xc2dd1499, 0x3ee1c4a4), TOBN(0xc0aaee17, 0x597f8c0e), + TOBN(0x15c4edb9, 0x6c168af3), TOBN(0x6563c7bf, 0xb39ae875), + TOBN(0xadfadb6f, 0x20adb436), TOBN(0xad55e8c9, 0x9a042ac0), + TOBN(0x975a1ed8, 0xb76da1f5), TOBN(0x10dfa466, 0xa58acb94), + TOBN(0x8dd7f7e3, 0xac060282), TOBN(0x6813e66a, 0x572a051e), + TOBN(0xb4ccae1e, 0x350cb901), TOBN(0xb653d656, 0x50cb7822), + TOBN(0x42484710, 0xdfab3b87), TOBN(0xcd7ee537, 0x9b670fd0), + TOBN(0x0a50b12e, 0x523b8bf6), TOBN(0x8009eb5b, 0x8f910c1b), + TOBN(0xf535af82, 0x4a167588), TOBN(0x0f835f9c, 0xfb2a2abd), + TOBN(0xf59b2931, 0x2afceb62), TOBN(0xc797df2a, 0x169d383f), + TOBN(0xeb3f5fb0, 0x66ac02b0), TOBN(0x029d4c6f, 0xdaa2d0ca), + TOBN(0xd4059bc1, 0xafab4bc5), TOBN(0x833f5c6f, 0x56783247), + TOBN(0xb5346630, 0x8d2d3605), TOBN(0x83387891, 0xd34d8433), + TOBN(0xd973b30f, 0xadd9419a), TOBN(0xbcca1099, 0xafe3fce8), + TOBN(0x08178315, 0x0809aac6), TOBN(0x01b7f21a, 0x540f0f11), + TOBN(0x65c29219, 0x909523c8), TOBN(0xa62f648f, 0xa3a1c741), + TOBN(0x88598d4f, 0x60c9e55a), TOBN(0xbce9141b, 0x0e4f347a), + TOBN(0x9af97d84, 0x35f9b988), TOBN(0x0210da62, 0x320475b6), + TOBN(0x3c076e22, 0x9191476c), TOBN(0x7520dbd9, 0x44fc7834), + TOBN(0x6a6b2cfe, 0xc1ab1bbd), TOBN(0xef8a65be, 0xdc650938), + TOBN(0x72855540, 0x805d7bc4), TOBN(0xda389396, 0xed11fdfd), + TOBN(0xa9d5bd36, 0x74660876), TOBN(0x11d67c54, 0xb45dff35), + TOBN(0x6af7d148, 0xa4f5da94), TOBN(0xbb8d4c3f, 0xc0bbeb31), + TOBN(0x87a7ebd1, 0xe0a1b12a), TOBN(0x1e4ef88d, 0x770ba95f), + TOBN(0x8c33345c, 0xdc2ae9cb), TOBN(0xcecf1276, 0x01cc8403), + TOBN(0x687c012e, 0x1b39b80f), TOBN(0xfd90d0ad, 0x35c33ba4), + TOBN(0xa3ef5a67, 0x5c9661c2), TOBN(0x368fc88e, 0xe017429e), + TOBN(0xd30c6761, 0x196a2fa2), TOBN(0x931b9817, 0xbd5b312e), + TOBN(0xba01000c, 0x72f54a31), TOBN(0xa203d2c8, 0x66eaa541), + TOBN(0xf2abdee0, 0x98939db3), TOBN(0xe37d6c2c, 0x3e606c02), + TOBN(0xf2921574, 0x521ff643), TOBN(0x2781b3c4, 0xd7e2fca3), + TOBN(0x664300b0, 0x7850ec06), TOBN(0xac5a38b9, 0x7d3a10cf), + TOBN(0x9233188d, 0xe34ab39d), TOBN(0xe77057e4, 0x5072cbb9), + TOBN(0xbcf0c042, 0xb59e78df), TOBN(0x4cfc91e8, 0x1d97de52), + TOBN(0x4661a26c, 0x3ee0ca4a), TOBN(0x5620a4c1, 0xfb8507bc), + TOBN(0x4b44d4aa, 0x049f842c), TOBN(0xceabc5d5, 0x1540e82b), + TOBN(0x306710fd, 0x15c6f156), TOBN(0xbe5ae52b, 0x63db1d72), + TOBN(0x06f1e7e6, 0x334957f1), TOBN(0x57e388f0, 0x31144a70), + TOBN(0xfb69bb2f, 0xdf96447b), TOBN(0x0f78ebd3, 0x73e38a12), + TOBN(0xb8222605, 0x2b7ce542), TOBN(0xe6d4ce99, 0x7472bde1), + TOBN(0x53e16ebe, 0x09d2f4da), TOBN(0x180ff42e, 0x53b92b2e), + TOBN(0xc59bcc02, 0x2c34a1c6), TOBN(0x3803d6f9, 0x422c46c2), + TOBN(0x18aff74f, 0x5c14a8a2), TOBN(0x55aebf80, 0x10a08b28), + TOBN(0x66097d58, 0x7135593f), TOBN(0x32e6eff7, 0x2be570cd), + TOBN(0x584e6a10, 0x2a8c860d), TOBN(0xcd185890, 0xa2eb4163), + TOBN(0x7ceae99d, 0x6d97e134), TOBN(0xd42c6b70, 0xdd8447ce), + TOBN(0x59ddbb4a, 0xb8c50273), TOBN(0x03c612df, 0x3cf34e1e), + TOBN(0x84b9ca15, 0x04b6c5a0), TOBN(0x35216f39, 0x18f0e3a3), + TOBN(0x3ec2d2bc, 0xbd986c00), TOBN(0x8bf546d9, 0xd19228fe), + TOBN(0xd1c655a4, 0x4cd623c3), TOBN(0x366ce718, 0x502b8e5a), + TOBN(0x2cfc84b4, 0xeea0bfe7), TOBN(0xe01d5cee, 0xcf443e8e), + TOBN(0x8ec045d9, 0x036520f8), TOBN(0xdfb3c3d1, 0x92d40e98), + TOBN(0x0bac4cce, 0xcc559a04), TOBN(0x35eccae5, 0x240ea6b1), + TOBN(0x180b32db, 0xf8a5a0ac), TOBN(0x547972a5, 0xeb699700), + TOBN(0xa3765801, 0xca26bca0), TOBN(0x57e09d0e, 0xa647f25a), + TOBN(0xb956970e, 0x2fdd23cc), TOBN(0xb80288bc, 0x5682e971), + TOBN(0xe6e6d91e, 0x9ae86ebc), TOBN(0x0564c83f, 0x8c9f1939), + TOBN(0x551932a2, 0x39560368), TOBN(0xe893752b, 0x049c28e2), + TOBN(0x0b03cee5, 0xa6a158c3), TOBN(0xe12d656b, 0x04964263), + TOBN(0x4b47554e, 0x63e3bc1d), TOBN(0xc719b6a2, 0x45044ff7), + TOBN(0x4f24d30a, 0xe48daa07), TOBN(0xa3f37556, 0xc8c1edc3), + TOBN(0x9a47bf76, 0x0700d360), TOBN(0xbb1a1824, 0x822ae4e2), + TOBN(0x22e275a3, 0x89f1fb4c), TOBN(0x72b1aa23, 0x9968c5f5), + TOBN(0xa75feaca, 0xbe063f64), TOBN(0x9b392f43, 0xbce47a09), + TOBN(0xd4241509, 0x1ad07aca), TOBN(0x4b0c591b, 0x8d26cd0f), + TOBN(0x2d42ddfd, 0x92f1169a), TOBN(0x63aeb1ac, 0x4cbf2392), + TOBN(0x1de9e877, 0x0691a2af), TOBN(0xebe79af7, 0xd98021da), + TOBN(0xcfdf2a4e, 0x40e50acf), TOBN(0xf0a98ad7, 0xaf01d665), + TOBN(0xefb640bf, 0x1831be1f), TOBN(0x6fe8bd2f, 0x80e9ada0), + TOBN(0x94c103a1, 0x6cafbc91), TOBN(0x170f8759, 0x8308e08c), + TOBN(0x5de2d2ab, 0x9780ff4f), TOBN(0x666466bc, 0x45b201f2), + TOBN(0x58af2010, 0xf5b343bc), TOBN(0x0f2e400a, 0xf2f142fe), + TOBN(0x3483bfde, 0xa85f4bdf), TOBN(0xf0b1d093, 0x03bfeaa9), + TOBN(0x2ea01b95, 0xc7081603), TOBN(0xe943e4c9, 0x3dba1097), + TOBN(0x47be92ad, 0xb438f3a6), TOBN(0x00bb7742, 0xe5bf6636), + TOBN(0x136b7083, 0x824297b4), TOBN(0x9d0e5580, 0x5584455f), + TOBN(0xab48cedc, 0xf1c7d69e), TOBN(0x53a9e481, 0x2a256e76), + TOBN(0x0402b0e0, 0x65eb2413), TOBN(0xdadbbb84, 0x8fc407a7), + TOBN(0xa65cd5a4, 0x8d7f5492), TOBN(0x21d44293, 0x74bae294), + TOBN(0x66917ce6, 0x3b5f1cc4), TOBN(0x37ae52ea, 0xce872e62), + TOBN(0xbb087b72, 0x2905f244), TOBN(0x12077086, 0x1e6af74f), + TOBN(0x4b644e49, 0x1058edea), TOBN(0x827510e3, 0xb638ca1d), + TOBN(0x8cf2b704, 0x6038591c), TOBN(0xffc8b47a, 0xfe635063), + TOBN(0x3ae220e6, 0x1b4d5e63), TOBN(0xbd864742, 0x9d961b4b), + TOBN(0x610c107e, 0x9bd16bed), TOBN(0x4270352a, 0x1127147b), + TOBN(0x7d17ffe6, 0x64cfc50e), TOBN(0x50dee01a, 0x1e36cb42), + TOBN(0x068a7622, 0x35dc5f9a), TOBN(0x9a08d536, 0xdf53f62c), + TOBN(0x4ed71457, 0x6be5f7de), TOBN(0xd93006f8, 0xc2263c9e), + TOBN(0xe073694c, 0xcacacb36), TOBN(0x2ff7a5b4, 0x3ae118ab), + TOBN(0x3cce53f1, 0xcd871236), TOBN(0xf156a39d, 0xc2aa6d52), + TOBN(0x9cc5f271, 0xb198d76d), TOBN(0xbc615b6f, 0x81383d39), + TOBN(0xa54538e8, 0xde3eee6b), TOBN(0x58c77538, 0xab910d91), + TOBN(0x31e5bdbc, 0x58d278bd), TOBN(0x3cde4adf, 0xb963acae), + TOBN(0xb1881fd2, 0x5302169c), TOBN(0x8ca60fa0, 0xa989ed8b), + TOBN(0xa1999458, 0xff96a0ee), TOBN(0xc1141f03, 0xac6c283d), + TOBN(0x7677408d, 0x6dfafed3), TOBN(0x33a01653, 0x39661588), + TOBN(0x3c9c15ec, 0x0b726fa0), TOBN(0x090cfd93, 0x6c9b56da), + TOBN(0xe34f4bae, 0xa3c40af5), TOBN(0x3469eadb, 0xd21129f1), + TOBN(0xcc51674a, 0x1e207ce8), TOBN(0x1e293b24, 0xc83b1ef9), + TOBN(0x17173d13, 0x1e6c0bb4), TOBN(0x19004695, 0x90776d35), + TOBN(0xe7980e34, 0x6de6f922), TOBN(0x873554cb, 0xf4dd9a22), + TOBN(0x0316c627, 0xcbf18a51), TOBN(0x4d93651b, 0x3032c081), + TOBN(0x207f2771, 0x3946834d), TOBN(0x2c08d7b4, 0x30cdbf80), + TOBN(0x137a4fb4, 0x86df2a61), TOBN(0xa1ed9c07, 0xecf7b4a2), + TOBN(0xb2e460e2, 0x7bd042ff), TOBN(0xb7f5e2fa, 0x5f62f5ec), + TOBN(0x7aa6ec6b, 0xcc2423b7), TOBN(0x75ce0a7f, 0xba63eea7), + TOBN(0x67a45fb1, 0xf250a6e1), TOBN(0x93bc919c, 0xe53cdc9f), + TOBN(0x9271f56f, 0x871942df), TOBN(0x2372ff6f, 0x7859ad66), + TOBN(0x5f4c2b96, 0x33cb1a78), TOBN(0xe3e29101, 0x5838aa83), + TOBN(0xa7ed1611, 0xe4e8110c), TOBN(0x2a2d70d5, 0x330198ce), + TOBN(0xbdf132e8, 0x6720efe0), TOBN(0xe61a8962, 0x66a471bf), + TOBN(0x796d3a85, 0x825808bd), TOBN(0x51dc3cb7, 0x3fd6e902), + TOBN(0x643c768a, 0x916219d1), TOBN(0x36cd7685, 0xa2ad7d32), + TOBN(0xe3db9d05, 0xb22922a4), TOBN(0x6494c87e, 0xdba29660), + TOBN(0xf0ac91df, 0xbcd2ebc7), TOBN(0x4deb57a0, 0x45107f8d), + TOBN(0x42271f59, 0xc3d12a73), TOBN(0x5f71687c, 0xa5c2c51d), + TOBN(0xcb1f50c6, 0x05797bcb), TOBN(0x29ed0ed9, 0xd6d34eb0), + TOBN(0xe5fe5b47, 0x4683c2eb), TOBN(0x4956eeb5, 0x97447c46), + TOBN(0x5b163a43, 0x71207167), TOBN(0x93fa2fed, 0x0248c5ef), + TOBN(0x67930af2, 0x31f63950), TOBN(0xa77797c1, 0x14caa2c9), + TOBN(0x526e80ee, 0x27ac7e62), TOBN(0xe1e6e626, 0x58b28aec), + TOBN(0x636178b0, 0xb3c9fef0), TOBN(0xaf7752e0, 0x6d5f90be), + TOBN(0x94ecaf18, 0xeece51cf), TOBN(0x2864d0ed, 0xca806e1f), + TOBN(0x6de2e383, 0x97c69134), TOBN(0x5a42c316, 0xeb291293), + TOBN(0xc7779219, 0x6a60bae0), TOBN(0xa24de346, 0x6b7599d1), + TOBN(0x49d374aa, 0xb75d4941), TOBN(0x98900586, 0x2d501ff0), + TOBN(0x9f16d40e, 0xeb7974cf), TOBN(0x1033860b, 0xcdd8c115), + TOBN(0xb6c69ac8, 0x2094cec3), TOBN(0x9976fb88, 0x403b770c), + TOBN(0x1dea026c, 0x4859590d), TOBN(0xb6acbb46, 0x8562d1fd), + TOBN(0x7cd6c461, 0x44569d85), TOBN(0xc3190a36, 0x97f0891d), + TOBN(0xc6f53195, 0x48d5a17d), TOBN(0x7d919966, 0xd749abc8), + TOBN(0x65104837, 0xdd1c8a20), TOBN(0x7e5410c8, 0x2f683419), + TOBN(0x958c3ca8, 0xbe94022e), TOBN(0x605c3197, 0x6145dac2), + TOBN(0x3fc07501, 0x01683d54), TOBN(0x1d7127c5, 0x595b1234), + TOBN(0x10b8f87c, 0x9481277f), TOBN(0x677db2a8, 0xe65a1adb), + TOBN(0xec2fccaa, 0xddce3345), TOBN(0x2a6811b7, 0x012a4350), + TOBN(0x96760ff1, 0xac598bdc), TOBN(0x054d652a, 0xd1bf4128), + TOBN(0x0a1151d4, 0x92a21005), TOBN(0xad7f3971, 0x33110fdf), + TOBN(0x8c95928c, 0x1960100f), TOBN(0x6c91c825, 0x7bf03362), + TOBN(0xc8c8b2a2, 0xce309f06), TOBN(0xfdb27b59, 0xca27204b), + TOBN(0xd223eaa5, 0x0848e32e), TOBN(0xb93e4b2e, 0xe7bfaf1e), + TOBN(0xc5308ae6, 0x44aa3ded), TOBN(0x317a666a, 0xc015d573), + TOBN(0xc888ce23, 0x1a979707), TOBN(0xf141c1e6, 0x0d5c4958), + TOBN(0xb53b7de5, 0x61906373), TOBN(0x858dbade, 0xeb999595), + TOBN(0x8cbb47b2, 0xa59e5c36), TOBN(0x660318b3, 0xdcf4e842), + TOBN(0xbd161ccd, 0x12ba4b7a), TOBN(0xf399daab, 0xf8c8282a), + TOBN(0x1587633a, 0xeeb2130d), TOBN(0xa465311a, 0xda38dd7d), + TOBN(0x5f75eec8, 0x64d3779b), TOBN(0x3c5d0476, 0xad64c171), + TOBN(0x87410371, 0x2a914428), TOBN(0x8096a891, 0x90e2fc29), + TOBN(0xd3d2ae9d, 0x23b3ebc2), TOBN(0x90bdd6db, 0xa580cfd6), + TOBN(0x52dbb7f3, 0xc5b01f6c), TOBN(0xe68eded4, 0xe102a2dc), + TOBN(0x17785b77, 0x99eb6df0), TOBN(0x26c3cc51, 0x7386b779), + TOBN(0x345ed988, 0x6417a48e), TOBN(0xe990b4e4, 0x07d6ef31), + TOBN(0x0f456b7e, 0x2586abba), TOBN(0x239ca6a5, 0x59c96e9a), + TOBN(0xe327459c, 0xe2eb4206), TOBN(0x3a4c3313, 0xa002b90a), + TOBN(0x2a114806, 0xf6a3f6fb), TOBN(0xad5cad2f, 0x85c251dd), + TOBN(0x92c1f613, 0xf5a784d3), TOBN(0xec7bfacf, 0x349766d5), + TOBN(0x04b3cd33, 0x3e23cb3b), TOBN(0x3979fe84, 0xc5a64b2d), + TOBN(0x192e2720, 0x7e589106), TOBN(0xa60c43d1, 0xa15b527f), + TOBN(0x2dae9082, 0xbe7cf3a6), TOBN(0xcc86ba92, 0xbc967274), + TOBN(0xf28a2ce8, 0xaea0a8a9), TOBN(0x404ca6d9, 0x6ee988b3), + TOBN(0xfd7e9c5d, 0x005921b8), TOBN(0xf56297f1, 0x44e79bf9), + TOBN(0xa163b460, 0x0d75ddc2), TOBN(0x30b23616, 0xa1f2be87), + TOBN(0x4b070d21, 0xbfe50e2b), TOBN(0x7ef8cfd0, 0xe1bfede1), + TOBN(0xadba0011, 0x2aac4ae0), TOBN(0x2a3e7d01, 0xb9ebd033), + TOBN(0x995277ec, 0xe38d9d1c), TOBN(0xb500249e, 0x9c5d2de3), + TOBN(0x8912b820, 0xf13ca8c9), TOBN(0xc8798114, 0x877793af), + TOBN(0x19e6125d, 0xec3f1dec), TOBN(0x07b1f040, 0x911178da), + TOBN(0xd93ededa, 0x904a6738), TOBN(0x55187a5a, 0x0bebedcd), + TOBN(0xf7d04722, 0xeb329d41), TOBN(0xf449099e, 0xf170b391), + TOBN(0xfd317a69, 0xca99f828), TOBN(0x50c3db2b, 0x34a4976d), + TOBN(0xe9ba7784, 0x3757b392), TOBN(0x326caefd, 0xaa3ca05a), + TOBN(0x78e5293b, 0xf1e593d4), TOBN(0x7842a937, 0x0d98fd13), + TOBN(0xe694bf96, 0x5f96b10d), TOBN(0x373a9df6, 0x06a8cd05), + TOBN(0x997d1e51, 0xe8f0c7fc), TOBN(0x1d019790, 0x63fd972e), + TOBN(0x0064d858, 0x5499fb32), TOBN(0x7b67bad9, 0x77a8aeb7), + TOBN(0x1d3eb977, 0x2d08eec5), TOBN(0x5fc047a6, 0xcbabae1d), + TOBN(0x0577d159, 0xe54a64bb), TOBN(0x8862201b, 0xc43497e4), + TOBN(0xad6b4e28, 0x2ce0608d), TOBN(0x8b687b7d, 0x0b167aac), + TOBN(0x6ed4d367, 0x8b2ecfa9), TOBN(0x24dfe62d, 0xa90c3c38), + TOBN(0xa1862e10, 0x3fe5c42b), TOBN(0x1ca73dca, 0xd5732a9f), + TOBN(0x35f038b7, 0x76bb87ad), TOBN(0x674976ab, 0xf242b81f), + TOBN(0x4f2bde7e, 0xb0fd90cd), TOBN(0x6efc172e, 0xa7fdf092), + TOBN(0x3806b69b, 0x92222f1f), TOBN(0x5a2459ca, 0x6cf7ae70), + TOBN(0x6789f69c, 0xa85217ee), TOBN(0x5f232b5e, 0xe3dc85ac), + TOBN(0x660e3ec5, 0x48e9e516), TOBN(0x124b4e47, 0x3197eb31), + TOBN(0x10a0cb13, 0xaafcca23), TOBN(0x7bd63ba4, 0x8213224f), + TOBN(0xaffad7cc, 0x290a7f4f), TOBN(0x6b409c9e, 0x0286b461), + TOBN(0x58ab809f, 0xffa407af), TOBN(0xc3122eed, 0xc68ac073), + TOBN(0x17bf9e50, 0x4ef24d7e), TOBN(0x5d929794, 0x3e2a5811), + TOBN(0x519bc867, 0x02902e01), TOBN(0x76bba5da, 0x39c8a851), + TOBN(0xe9f9669c, 0xda94951e), TOBN(0x4b6af58d, 0x66b8d418), + TOBN(0xfa321074, 0x17d426a4), TOBN(0xc78e66a9, 0x9dde6027), + TOBN(0x0516c083, 0x4a53b964), TOBN(0xfc659d38, 0xff602330), + TOBN(0x0ab55e5c, 0x58c5c897), TOBN(0x985099b2, 0x838bc5df), + TOBN(0x061d9efc, 0xc52fc238), TOBN(0x712b2728, 0x6ac1da3f), + TOBN(0xfb658149, 0x9283fe08), TOBN(0x4954ac94, 0xb8aaa2f7), + TOBN(0x85c0ada4, 0x7fb2e74f), TOBN(0xee8ba98e, 0xb89926b0), + TOBN(0xe4f9d37d, 0x23d1af5b), TOBN(0x14ccdbf9, 0xba9b015e), + TOBN(0xb674481b, 0x7bfe7178), TOBN(0x4e1debae, 0x65405868), + TOBN(0x061b2821, 0xc48c867d), TOBN(0x69c15b35, 0x513b30ea), + TOBN(0x3b4a1666, 0x36871088), TOBN(0xe5e29f5d, 0x1220b1ff), + TOBN(0x4b82bb35, 0x233d9f4d), TOBN(0x4e076333, 0x18cdc675)} + , + {TOBN(0x0d53f5c7, 0xa3e6fced), TOBN(0xe8cbbdd5, 0xf45fbdeb), + TOBN(0xf85c01df, 0x13339a70), TOBN(0x0ff71880, 0x142ceb81), + TOBN(0x4c4e8774, 0xbd70437a), TOBN(0x5fb32891, 0xba0bda6a), + TOBN(0x1cdbebd2, 0xf18bd26e), TOBN(0x2f9526f1, 0x03a9d522), + TOBN(0x40ce3051, 0x92c4d684), TOBN(0x8b04d725, 0x7612efcd), + TOBN(0xb9dcda36, 0x6f9cae20), TOBN(0x0edc4d24, 0xf058856c), + TOBN(0x64f2e6bf, 0x85427900), TOBN(0x3de81295, 0xdc09dfea), + TOBN(0xd41b4487, 0x379bf26c), TOBN(0x50b62c6d, 0x6df135a9), + TOBN(0xd4f8e3b4, 0xc72dfe67), TOBN(0xc416b0f6, 0x90e19fdf), + TOBN(0x18b9098d, 0x4c13bd35), TOBN(0xac11118a, 0x15b8cb9e), + TOBN(0xf598a318, 0xf0062841), TOBN(0xbfe0602f, 0x89f356f4), + TOBN(0x7ae3637e, 0x30177a0c), TOBN(0x34097747, 0x61136537), + TOBN(0x0db2fb5e, 0xd005832a), TOBN(0x5f5efd3b, 0x91042e4f), + TOBN(0x8c4ffdc6, 0xed70f8ca), TOBN(0xe4645d0b, 0xb52da9cc), + TOBN(0x9596f58b, 0xc9001d1f), TOBN(0x52c8f0bc, 0x4e117205), + TOBN(0xfd4aa0d2, 0xe398a084), TOBN(0x815bfe3a, 0x104f49de), + TOBN(0x97e5443f, 0x23885e5f), TOBN(0xf72f8f99, 0xe8433aab), + TOBN(0xbd00b154, 0xe4d4e604), TOBN(0xd0b35e6a, 0xe5e173ff), + TOBN(0x57b2a048, 0x9164722d), TOBN(0x3e3c665b, 0x88761ec8), + TOBN(0x6bdd1397, 0x3da83832), TOBN(0x3c8b1a1e, 0x73dafe3b), + TOBN(0x4497ace6, 0x54317cac), TOBN(0xbe600ab9, 0x521771b3), + TOBN(0xb42e409e, 0xb0dfe8b8), TOBN(0x386a67d7, 0x3942310f), + TOBN(0x25548d8d, 0x4431cc28), TOBN(0xa7cff142, 0x985dc524), + TOBN(0x4d60f5a1, 0x93c4be32), TOBN(0x83ebd5c8, 0xd071c6e1), + TOBN(0xba3a80a7, 0xb1fd2b0b), TOBN(0x9b3ad396, 0x5bec33e8), + TOBN(0xb3868d61, 0x79743fb3), TOBN(0xcfd169fc, 0xfdb462fa), + TOBN(0xd3b499d7, 0x9ce0a6af), TOBN(0x55dc1cf1, 0xe42d3ff8), + TOBN(0x04fb9e6c, 0xc6c3e1b2), TOBN(0x47e6961d, 0x6f69a474), + TOBN(0x54eb3acc, 0xe548b37b), TOBN(0xb38e7542, 0x84d40549), + TOBN(0x8c3daa51, 0x7b341b4f), TOBN(0x2f6928ec, 0x690bf7fa), + TOBN(0x0496b323, 0x86ce6c41), TOBN(0x01be1c55, 0x10adadcd), + TOBN(0xc04e67e7, 0x4bb5faf9), TOBN(0x3cbaf678, 0xe15c9985), + TOBN(0x8cd12145, 0x50ca4247), TOBN(0xba1aa47a, 0xe7dd30aa), + TOBN(0x2f81ddf1, 0xe58fee24), TOBN(0x03452936, 0xeec9b0e8), + TOBN(0x8bdc3b81, 0x243aea96), TOBN(0x9a2919af, 0x15c3d0e5), + TOBN(0x9ea640ec, 0x10948361), TOBN(0x5ac86d5b, 0x6e0bcccf), + TOBN(0xf892d918, 0xc36cf440), TOBN(0xaed3e837, 0xc939719c), + TOBN(0xb07b08d2, 0xc0218b64), TOBN(0x6f1bcbba, 0xce9790dd), + TOBN(0x4a84d6ed, 0x60919b8e), TOBN(0xd8900791, 0x8ac1f9eb), + TOBN(0xf84941aa, 0x0dd5daef), TOBN(0xb22fe40a, 0x67fd62c5), + TOBN(0x97e15ba2, 0x157f2db3), TOBN(0xbda2fc8f, 0x8e28ca9c), + TOBN(0x5d050da4, 0x37b9f454), TOBN(0x3d57eb57, 0x2379d72e), + TOBN(0xe9b5eba2, 0xfb5ee997), TOBN(0x01648ca2, 0xe11538ca), + TOBN(0x32bb76f6, 0xf6327974), TOBN(0x338f14b8, 0xff3f4bb7), + TOBN(0x524d226a, 0xd7ab9a2d), TOBN(0x9c00090d, 0x7dfae958), + TOBN(0x0ba5f539, 0x8751d8c2), TOBN(0x8afcbcdd, 0x3ab8262d), + TOBN(0x57392729, 0xe99d043b), TOBN(0xef51263b, 0xaebc943a), + TOBN(0x9feace93, 0x20862935), TOBN(0x639efc03, 0xb06c817b), + TOBN(0x1fe054b3, 0x66b4be7a), TOBN(0x3f25a9de, 0x84a37a1e), + TOBN(0xf39ef1ad, 0x78d75cd9), TOBN(0xd7b58f49, 0x5062c1b5), + TOBN(0x6f74f9a9, 0xff563436), TOBN(0xf718ff29, 0xe8af51e7), + TOBN(0x5234d313, 0x15e97fec), TOBN(0xb6a8e2b1, 0x292f1c0a), + TOBN(0xa7f53aa8, 0x327720c1), TOBN(0x956ca322, 0xba092cc8), + TOBN(0x8f03d64a, 0x28746c4d), TOBN(0x51fe1782, 0x66d0d392), + TOBN(0xd19b34db, 0x3c832c80), TOBN(0x60dccc5c, 0x6da2e3b4), + TOBN(0x245dd62e, 0x0a104ccc), TOBN(0xa7ab1de1, 0x620b21fd), + TOBN(0xb293ae0b, 0x3893d123), TOBN(0xf7b75783, 0xb15ee71c), + TOBN(0x5aa3c614, 0x42a9468b), TOBN(0xd686123c, 0xdb15d744), + TOBN(0x8c616891, 0xa7ab4116), TOBN(0x6fcd72c8, 0xa4e6a459), + TOBN(0xac219110, 0x77e5fad7), TOBN(0xfb6a20e7, 0x704fa46b), + TOBN(0xe839be7d, 0x341d81dc), TOBN(0xcddb6889, 0x32148379), + TOBN(0xda6211a1, 0xf7026ead), TOBN(0xf3b2575f, 0xf4d1cc5e), + TOBN(0x40cfc8f6, 0xa7a73ae6), TOBN(0x83879a5e, 0x61d5b483), + TOBN(0xc5acb1ed, 0x41a50ebc), TOBN(0x59a60cc8, 0x3c07d8fa), + TOBN(0x1b73bdce, 0xb1876262), TOBN(0x2b0d79f0, 0x12af4ee9), + TOBN(0x8bcf3b0b, 0xd46e1d07), TOBN(0x17d6af9d, 0xe45d152f), + TOBN(0x73520461, 0x6d736451), TOBN(0x43cbbd97, 0x56b0bf5a), + TOBN(0xb0833a5b, 0xd5999b9d), TOBN(0x702614f0, 0xeb72e398), + TOBN(0x0aadf01a, 0x59c3e9f8), TOBN(0x40200e77, 0xce6b3d16), + TOBN(0xda22bdd3, 0xdeddafad), TOBN(0x76dedaf4, 0x310d72e1), + TOBN(0x49ef807c, 0x4bc2e88f), TOBN(0x6ba81291, 0x146dd5a5), + TOBN(0xa1a4077a, 0x7d8d59e9), TOBN(0x87b6a2e7, 0x802db349), + TOBN(0xd5679997, 0x1b4e598e), TOBN(0xf499ef1f, 0x06fe4b1d), + TOBN(0x3978d3ae, 0xfcb267c5), TOBN(0xb582b557, 0x235786d0), + TOBN(0x32b3b2ca, 0x1715cb07), TOBN(0x4c3de6a2, 0x8480241d), + TOBN(0x63b5ffed, 0xcb571ecd), TOBN(0xeaf53900, 0xed2fe9a9), + TOBN(0xdec98d4a, 0xc3b81990), TOBN(0x1cb83722, 0x9e0cc8fe), + TOBN(0xfe0b0491, 0xd2b427b9), TOBN(0x0f2386ac, 0xe983a66c), + TOBN(0x930c4d1e, 0xb3291213), TOBN(0xa2f82b2e, 0x59a62ae4), + TOBN(0x77233853, 0xf93e89e3), TOBN(0x7f8063ac, 0x11777c7f), + TOBN(0xff0eb567, 0x59ad2877), TOBN(0x6f454642, 0x9865c754), + TOBN(0xe6fe701a, 0x236e9a84), TOBN(0xc586ef16, 0x06e40fc3), + TOBN(0x3f62b6e0, 0x24bafad9), TOBN(0xc8b42bd2, 0x64da906a), + TOBN(0xc98e1eb4, 0xda3276a0), TOBN(0x30d0e5fc, 0x06cbf852), + TOBN(0x1b6b2ae1, 0xe8b4dfd4), TOBN(0xd754d5c7, 0x8301cbac), + TOBN(0x66097629, 0x112a39ac), TOBN(0xf86b5999, 0x93ba4ab9), + TOBN(0x26c9dea7, 0x99f9d581), TOBN(0x0473b1a8, 0xc2fafeaa), + TOBN(0x1469af55, 0x3b2505a5), TOBN(0x227d16d7, 0xd6a43323), + TOBN(0x3316f73c, 0xad3d97f9), TOBN(0x52bf3bb5, 0x1f137455), + TOBN(0x953eafeb, 0x09954e7c), TOBN(0xa721dfed, 0xdd732411), + TOBN(0xb4929821, 0x141d4579), TOBN(0x3411321c, 0xaa3bd435), + TOBN(0xafb355aa, 0x17fa6015), TOBN(0xb4e7ef4a, 0x18e42f0e), + TOBN(0x604ac97c, 0x59371000), TOBN(0xe1c48c70, 0x7f759c18), + TOBN(0x3f62ecc5, 0xa5db6b65), TOBN(0x0a78b173, 0x38a21495), + TOBN(0x6be1819d, 0xbcc8ad94), TOBN(0x70dc04f6, 0xd89c3400), + TOBN(0x462557b4, 0xa6b4840a), TOBN(0x544c6ade, 0x60bd21c0), + TOBN(0x6a00f24e, 0x907a544b), TOBN(0xa7520dcb, 0x313da210), + TOBN(0xfe939b75, 0x11e4994b), TOBN(0x918b6ba6, 0xbc275d70), + TOBN(0xd3e5e0fc, 0x644be892), TOBN(0x707a9816, 0xfdaf6c42), + TOBN(0x60145567, 0xf15c13fe), TOBN(0x4818ebaa, 0xe130a54a), + TOBN(0x28aad3ad, 0x58d2f767), TOBN(0xdc5267fd, 0xd7e7c773), + TOBN(0x4919cc88, 0xc3afcc98), TOBN(0xaa2e6ab0, 0x2db8cd4b), + TOBN(0xd46fec04, 0xd0c63eaa), TOBN(0xa1cb92c5, 0x19ffa832), + TOBN(0x678dd178, 0xe43a631f), TOBN(0xfb5ae1cd, 0x3dc788b3), + TOBN(0x68b4fb90, 0x6e77de04), TOBN(0x7992bcf0, 0xf06dbb97), + TOBN(0x896e6a13, 0xc417c01d), TOBN(0x8d96332c, 0xb956be01), + TOBN(0x902fc93a, 0x413aa2b9), TOBN(0x99a4d915, 0xfc98c8a5), + TOBN(0x52c29407, 0x565f1137), TOBN(0x4072690f, 0x21e4f281), + TOBN(0x36e607cf, 0x02ff6072), TOBN(0xa47d2ca9, 0x8ad98cdc), + TOBN(0xbf471d1e, 0xf5f56609), TOBN(0xbcf86623, 0xf264ada0), + TOBN(0xb70c0687, 0xaa9e5cb6), TOBN(0xc98124f2, 0x17401c6c), + TOBN(0x8189635f, 0xd4a61435), TOBN(0xd28fb8af, 0xa9d98ea6), + TOBN(0xb9a67c2a, 0x40c251f8), TOBN(0x88cd5d87, 0xa2da44be), + TOBN(0x437deb96, 0xe09b5423), TOBN(0x150467db, 0x64287dc1), + TOBN(0xe161debb, 0xcdabb839), TOBN(0xa79e9742, 0xf1839a3e), + TOBN(0xbb8dd3c2, 0x652d202b), TOBN(0x7b3e67f7, 0xe9f97d96), + TOBN(0x5aa5d78f, 0xb1cb6ac9), TOBN(0xffa13e8e, 0xca1d0d45), + TOBN(0x369295dd, 0x2ba5bf95), TOBN(0xd68bd1f8, 0x39aff05e), + TOBN(0xaf0d86f9, 0x26d783f2), TOBN(0x543a59b3, 0xfc3aafc1), + TOBN(0x3fcf81d2, 0x7b7da97c), TOBN(0xc990a056, 0xd25dee46), + TOBN(0x3e6775b8, 0x519cce2c), TOBN(0xfc9af71f, 0xae13d863), + TOBN(0x774a4a6f, 0x47c1605c), TOBN(0x46ba4245, 0x2fd205e8), + TOBN(0xa06feea4, 0xd3fd524d), TOBN(0x1e724641, 0x6de1acc2), + TOBN(0xf53816f1, 0x334e2b42), TOBN(0x49e5918e, 0x922f0024), + TOBN(0x439530b6, 0x65c7322d), TOBN(0xcf12cc01, 0xb3c1b3fb), + TOBN(0xc70b0186, 0x0172f685), TOBN(0xb915ee22, 0x1b58391d), + TOBN(0x9afdf03b, 0xa317db24), TOBN(0x87dec659, 0x17b8ffc4), + TOBN(0x7f46597b, 0xe4d3d050), TOBN(0x80a1c1ed, 0x006500e7), + TOBN(0x84902a96, 0x78bf030e), TOBN(0xfb5e9c9a, 0x50560148), + TOBN(0x6dae0a92, 0x63362426), TOBN(0xdcaeecf4, 0xa9e30c40), + TOBN(0xc0d887bb, 0x518d0c6b), TOBN(0x99181152, 0xcb985b9d), + TOBN(0xad186898, 0xef7bc381), TOBN(0x18168ffb, 0x9ee46201), + TOBN(0x9a04cdaa, 0x2502753c), TOBN(0xbb279e26, 0x51407c41), + TOBN(0xeacb03aa, 0xf23564e5), TOBN(0x18336582, 0x71e61016), + TOBN(0x8684b8c4, 0xeb809877), TOBN(0xb336e18d, 0xea0e672e), + TOBN(0xefb601f0, 0x34ee5867), TOBN(0x2733edbe, 0x1341cfd1), + TOBN(0xb15e809a, 0x26025c3c), TOBN(0xe6e981a6, 0x9350df88), + TOBN(0x92376237, 0x8502fd8e), TOBN(0x4791f216, 0x0c12be9b), + TOBN(0xb7256789, 0x25f02425), TOBN(0xec863194, 0x7a974443), + TOBN(0x7c0ce882, 0xfb41cc52), TOBN(0xc266ff7e, 0xf25c07f2), + TOBN(0x3d4da8c3, 0x017025f3), TOBN(0xefcf628c, 0xfb9579b4), + TOBN(0x5c4d0016, 0x1f3716ec), TOBN(0x9c27ebc4, 0x6801116e), + TOBN(0x5eba0ea1, 0x1da1767e), TOBN(0xfe151452, 0x47004c57), + TOBN(0x3ace6df6, 0x8c2373b7), TOBN(0x75c3dffe, 0x5dbc37ac), + TOBN(0x3dc32a73, 0xddc925fc), TOBN(0xb679c841, 0x2f65ee0b), + TOBN(0x715a3295, 0x451cbfeb), TOBN(0xd9889768, 0xf76e9a29), + TOBN(0xec20ce7f, 0xb28ad247), TOBN(0xe99146c4, 0x00894d79), + TOBN(0x71457d7c, 0x9f5e3ea7), TOBN(0x097b2662, 0x38030031), + TOBN(0xdb7f6ae6, 0xcf9f82a8), TOBN(0x319decb9, 0x438f473a), + TOBN(0xa63ab386, 0x283856c3), TOBN(0x13e3172f, 0xb06a361b), + TOBN(0x2959f8dc, 0x7d5a006c), TOBN(0x2dbc27c6, 0x75fba752), + TOBN(0xc1227ab2, 0x87c22c9e), TOBN(0x06f61f75, 0x71a268b2), + TOBN(0x1b6bb971, 0x04779ce2), TOBN(0xaca83812, 0x0aadcb1d), + TOBN(0x297ae0bc, 0xaeaab2d5), TOBN(0xa5c14ee7, 0x5bfb9f13), + TOBN(0xaa00c583, 0xf17a62c7), TOBN(0x39eb962c, 0x173759f6), + TOBN(0x1eeba1d4, 0x86c9a88f), TOBN(0x0ab6c37a, 0xdf016c5e), + TOBN(0xa2a147db, 0xa28a0749), TOBN(0x246c20d6, 0xee519165), + TOBN(0x5068d1b1, 0xd3810715), TOBN(0xb1e7018c, 0x748160b9), + TOBN(0x03f5b1fa, 0xf380ff62), TOBN(0xef7fb1dd, 0xf3cb2c1e), + TOBN(0xeab539a8, 0xfc91a7da), TOBN(0x83ddb707, 0xf3f9b561), + TOBN(0xc550e211, 0xfe7df7a4), TOBN(0xa7cd07f2, 0x063f6f40), + TOBN(0xb0de3635, 0x2976879c), TOBN(0xb5f83f85, 0xe55741da), + TOBN(0x4ea9d25e, 0xf3d8ac3d), TOBN(0x6fe2066f, 0x62819f02), + TOBN(0x4ab2b9c2, 0xcef4a564), TOBN(0x1e155d96, 0x5ffa2de3), + TOBN(0x0eb0a19b, 0xc3a72d00), TOBN(0x4037665b, 0x8513c31b), + TOBN(0x2fb2b6bf, 0x04c64637), TOBN(0x45c34d6e, 0x08cdc639), + TOBN(0x56f1e10f, 0xf01fd796), TOBN(0x4dfb8101, 0xfe3667b8), + TOBN(0xe0eda253, 0x9021d0c0), TOBN(0x7a94e9ff, 0x8a06c6ab), + TOBN(0x2d3bb0d9, 0xbb9aa882), TOBN(0xea20e4e5, 0xec05fd10), + TOBN(0xed7eeb5f, 0x1a1ca64e), TOBN(0x2fa6b43c, 0xc6327cbd), + TOBN(0xb577e3cf, 0x3aa91121), TOBN(0x8c6bd5ea, 0x3a34079b), + TOBN(0xd7e5ba39, 0x60e02fc0), TOBN(0xf16dd2c3, 0x90141bf8), + TOBN(0xb57276d9, 0x80101b98), TOBN(0x760883fd, 0xb82f0f66), + TOBN(0x89d7de75, 0x4bc3eff3), TOBN(0x03b60643, 0x5dc2ab40), + TOBN(0xcd6e53df, 0xe05beeac), TOBN(0xf2f1e862, 0xbc3325cd), + TOBN(0xdd0f7921, 0x774f03c3), TOBN(0x97ca7221, 0x4552cc1b), + TOBN(0x5a0d6afe, 0x1cd19f72), TOBN(0xa20915dc, 0xf183fbeb), + TOBN(0x9fda4b40, 0x832c403c), TOBN(0x32738edd, 0xbe425442), + TOBN(0x469a1df6, 0xb5eccf1a), TOBN(0x4b5aff42, 0x28bbe1f0), + TOBN(0x31359d7f, 0x570dfc93), TOBN(0xa18be235, 0xf0088628), + TOBN(0xa5b30fba, 0xb00ed3a9), TOBN(0x34c61374, 0x73cdf8be), + TOBN(0x2c5c5f46, 0xabc56797), TOBN(0x5cecf93d, 0xb82a8ae2), + TOBN(0x7d3dbe41, 0xa968fbf0), TOBN(0xd23d4583, 0x1a5c7f3d), + TOBN(0xf28f69a0, 0xc087a9c7), TOBN(0xc2d75471, 0x474471ca), + TOBN(0x36ec9f4a, 0x4eb732ec), TOBN(0x6c943bbd, 0xb1ca6bed), + TOBN(0xd64535e1, 0xf2457892), TOBN(0x8b84a8ea, 0xf7e2ac06), + TOBN(0xe0936cd3, 0x2499dd5f), TOBN(0x12053d7e, 0x0ed04e57), + TOBN(0x4bdd0076, 0xe4305d9d), TOBN(0x34a527b9, 0x1f67f0a2), + TOBN(0xe79a4af0, 0x9cec46ea), TOBN(0xb15347a1, 0x658b9bc7), + TOBN(0x6bd2796f, 0x35af2f75), TOBN(0xac957990, 0x4051c435), + TOBN(0x2669dda3, 0xc33a655d), TOBN(0x5d503c2e, 0x88514aa3), + TOBN(0xdfa11337, 0x3753dd41), TOBN(0x3f054673, 0x0b754f78), + TOBN(0xbf185677, 0x496125bd), TOBN(0xfb0023c8, 0x3775006c), + TOBN(0xfa0f072f, 0x3a037899), TOBN(0x4222b6eb, 0x0e4aea57), + TOBN(0x3dde5e76, 0x7866d25a), TOBN(0xb6eb04f8, 0x4837aa6f), + TOBN(0x5315591a, 0x2cf1cdb8), TOBN(0x6dfb4f41, 0x2d4e683c), + TOBN(0x7e923ea4, 0x48ee1f3a), TOBN(0x9604d9f7, 0x05a2afd5), + TOBN(0xbe1d4a33, 0x40ea4948), TOBN(0x5b45f1f4, 0xb44cbd2f), + TOBN(0x5faf8376, 0x4acc757e), TOBN(0xa7cf9ab8, 0x63d68ff7), + TOBN(0x8ad62f69, 0xdf0e404b), TOBN(0xd65f33c2, 0x12bdafdf), + TOBN(0xc365de15, 0xa377b14e), TOBN(0x6bf5463b, 0x8e39f60c), + TOBN(0x62030d2d, 0x2ce68148), TOBN(0xd95867ef, 0xe6f843a8), + TOBN(0xd39a0244, 0xef5ab017), TOBN(0x0bd2d8c1, 0x4ab55d12), + TOBN(0xc9503db3, 0x41639169), TOBN(0x2d4e25b0, 0xf7660c8a), + TOBN(0x760cb3b5, 0xe224c5d7), TOBN(0xfa3baf8c, 0x68616919), + TOBN(0x9fbca113, 0x8d142552), TOBN(0x1ab18bf1, 0x7669ebf5), + TOBN(0x55e6f53e, 0x9bdf25dd), TOBN(0x04cc0bf3, 0xcb6cd154), + TOBN(0x595bef49, 0x95e89080), TOBN(0xfe9459a8, 0x104a9ac1), + TOBN(0xad2d89ca, 0xcce9bb32), TOBN(0xddea65e1, 0xf7de8285), + TOBN(0x62ed8c35, 0xb351bd4b), TOBN(0x4150ff36, 0x0c0e19a7), + TOBN(0x86e3c801, 0x345f4e47), TOBN(0x3bf21f71, 0x203a266c), + TOBN(0x7ae110d4, 0x855b1f13), TOBN(0x5d6aaf6a, 0x07262517), + TOBN(0x1e0f12e1, 0x813d28f1), TOBN(0x6000e11d, 0x7ad7a523), + TOBN(0xc7d8deef, 0xc744a17b), TOBN(0x1e990b48, 0x14c05a00), + TOBN(0x68fddaee, 0x93e976d5), TOBN(0x696241d1, 0x46610d63), + TOBN(0xb204e7c3, 0x893dda88), TOBN(0x8bccfa65, 0x6a3a6946), + TOBN(0xb59425b4, 0xc5cd1411), TOBN(0x701b4042, 0xff3658b1), + TOBN(0xe3e56bca, 0x4784cf93), TOBN(0x27de5f15, 0x8fe68d60), + TOBN(0x4ab9cfce, 0xf8d53f19), TOBN(0xddb10311, 0xa40a730d), + TOBN(0x6fa73cd1, 0x4eee0a8a), TOBN(0xfd548748, 0x5249719d), + TOBN(0x49d66316, 0xa8123ef0), TOBN(0x73c32db4, 0xe7f95438), + TOBN(0x2e2ed209, 0x0d9e7854), TOBN(0xf98a9329, 0x9d9f0507), + TOBN(0xc5d33cf6, 0x0c6aa20a), TOBN(0x9a32ba14, 0x75279bb2), + TOBN(0x7e3202cb, 0x774a7307), TOBN(0x64ed4bc4, 0xe8c42dbd), + TOBN(0xc20f1a06, 0xd4caed0d), TOBN(0xb8021407, 0x171d22b3), + TOBN(0xd426ca04, 0xd13268d7), TOBN(0x92377007, 0x25f4d126), + TOBN(0x4204cbc3, 0x71f21a85), TOBN(0x18461b7a, 0xf82369ba), + TOBN(0xc0c07d31, 0x3fc858f9), TOBN(0x5deb5a50, 0xe2bab569), + TOBN(0xd5959d46, 0xd5eea89e), TOBN(0xfdff8424, 0x08437f4b), + TOBN(0xf21071e4, 0x3cfe254f), TOBN(0x72417696, 0x95468321), + TOBN(0x5d8288b9, 0x102cae3e), TOBN(0x2d143e3d, 0xf1965dff), + TOBN(0x00c9a376, 0xa078d847), TOBN(0x6fc0da31, 0x26028731), + TOBN(0xa2baeadf, 0xe45083a2), TOBN(0x66bc7218, 0x5e5b4bcd), + TOBN(0x2c826442, 0xd04b8e7f), TOBN(0xc19f5451, 0x6c4b586b), + TOBN(0x60182c49, 0x5b7eeed5), TOBN(0xd9954ecd, 0x7aa9dfa1), + TOBN(0xa403a8ec, 0xc73884ad), TOBN(0x7fb17de2, 0x9bb39041), + TOBN(0x694b64c5, 0xabb020e8), TOBN(0x3d18c184, 0x19c4eec7), + TOBN(0x9c4673ef, 0x1c4793e5), TOBN(0xc7b8aeb5, 0x056092e6), + TOBN(0x3aa1ca43, 0xf0f8c16b), TOBN(0x224ed5ec, 0xd679b2f6), + TOBN(0x0d56eeaf, 0x55a205c9), TOBN(0xbfe115ba, 0x4b8e028b), + TOBN(0x97e60849, 0x3927f4fe), TOBN(0xf91fbf94, 0x759aa7c5), + TOBN(0x985af769, 0x6be90a51), TOBN(0xc1277b78, 0x78ccb823), + TOBN(0x395b656e, 0xe7a75952), TOBN(0x00df7de0, 0x928da5f5), + TOBN(0x09c23175, 0x4ca4454f), TOBN(0x4ec971f4, 0x7aa2d3c1), + TOBN(0x45c3c507, 0xe75d9ccc), TOBN(0x63b7be8a, 0x3dc90306), + TOBN(0x37e09c66, 0x5db44bdc), TOBN(0x50d60da1, 0x6841c6a2), + TOBN(0x6f9b65ee, 0x08df1b12), TOBN(0x38734879, 0x7ff089df), + TOBN(0x9c331a66, 0x3fe8013d), TOBN(0x017f5de9, 0x5f42fcc8), + TOBN(0x43077866, 0xe8e57567), TOBN(0xc9f781ce, 0xf9fcdb18), + TOBN(0x38131dda, 0x9b12e174), TOBN(0x25d84aa3, 0x8a03752a), + TOBN(0x45e09e09, 0x4d0c0ce2), TOBN(0x1564008b, 0x92bebba5), + TOBN(0xf7e8ad31, 0xa87284c7), TOBN(0xb7c4b46c, 0x97e7bbaa), + TOBN(0x3e22a7b3, 0x97acf4ec), TOBN(0x0426c400, 0x5ea8b640), + TOBN(0x5e3295a6, 0x4e969285), TOBN(0x22aabc59, 0xa6a45670), + TOBN(0xb929714c, 0x5f5942bc), TOBN(0x9a6168bd, 0xfa3182ed), + TOBN(0x2216a665, 0x104152ba), TOBN(0x46908d03, 0xb6926368)} + , + {TOBN(0xa9f5d874, 0x5a1251fb), TOBN(0x967747a8, 0xc72725c7), + TOBN(0x195c33e5, 0x31ffe89e), TOBN(0x609d210f, 0xe964935e), + TOBN(0xcafd6ca8, 0x2fe12227), TOBN(0xaf9b5b96, 0x0426469d), + TOBN(0x2e9ee04c, 0x5693183c), TOBN(0x1084a333, 0xc8146fef), + TOBN(0x96649933, 0xaed1d1f7), TOBN(0x566eaff3, 0x50563090), + TOBN(0x345057f0, 0xad2e39cf), TOBN(0x148ff65b, 0x1f832124), + TOBN(0x042e89d4, 0xcf94cf0d), TOBN(0x319bec84, 0x520c58b3), + TOBN(0x2a267626, 0x5361aa0d), TOBN(0xc86fa302, 0x8fbc87ad), + TOBN(0xfc83d2ab, 0x5c8b06d5), TOBN(0xb1a785a2, 0xfe4eac46), + TOBN(0xb99315bc, 0x846f7779), TOBN(0xcf31d816, 0xef9ea505), + TOBN(0x2391fe6a, 0x15d7dc85), TOBN(0x2f132b04, 0xb4016b33), + TOBN(0x29547fe3, 0x181cb4c7), TOBN(0xdb66d8a6, 0x650155a1), + TOBN(0x6b66d7e1, 0xadc1696f), TOBN(0x98ebe593, 0x0acd72d0), + TOBN(0x65f24550, 0xcc1b7435), TOBN(0xce231393, 0xb4b9a5ec), + TOBN(0x234a22d4, 0xdb067df9), TOBN(0x98dda095, 0xcaff9b00), + TOBN(0x1bbc75a0, 0x6100c9c1), TOBN(0x1560a9c8, 0x939cf695), + TOBN(0xcf006d3e, 0x99e0925f), TOBN(0x2dd74a96, 0x6322375a), + TOBN(0xc58b446a, 0xb56af5ba), TOBN(0x50292683, 0xe0b9b4f1), + TOBN(0xe2c34cb4, 0x1aeaffa3), TOBN(0x8b17203f, 0x9b9587c1), + TOBN(0x6d559207, 0xead1350c), TOBN(0x2b66a215, 0xfb7f9604), + TOBN(0x0850325e, 0xfe51bf74), TOBN(0x9c4f579e, 0x5e460094), + TOBN(0x5c87b92a, 0x76da2f25), TOBN(0x889de4e0, 0x6febef33), + TOBN(0x6900ec06, 0x646083ce), TOBN(0xbe2a0335, 0xbfe12773), + TOBN(0xadd1da35, 0xc5344110), TOBN(0x757568b7, 0xb802cd20), + TOBN(0x75559779, 0x00f7e6c8), TOBN(0x38e8b94f, 0x0facd2f0), + TOBN(0xfea1f3af, 0x03fde375), TOBN(0x5e11a1d8, 0x75881dfc), + TOBN(0xb3a6b02e, 0xc1e2f2ef), TOBN(0x193d2bbb, 0xc605a6c5), + TOBN(0x325ffeee, 0x339a0b2d), TOBN(0x27b6a724, 0x9e0c8846), + TOBN(0xe4050f1c, 0xf1c367ca), TOBN(0x9bc85a9b, 0xc90fbc7d), + TOBN(0xa373c4a2, 0xe1a11032), TOBN(0xb64232b7, 0xad0393a9), + TOBN(0xf5577eb0, 0x167dad29), TOBN(0x1604f301, 0x94b78ab2), + TOBN(0x0baa94af, 0xe829348b), TOBN(0x77fbd8dd, 0x41654342), + TOBN(0xdab50ea5, 0xb964e39a), TOBN(0xd4c29e3c, 0xd0d3c76e), + TOBN(0x80dae67c, 0x56d11964), TOBN(0x7307a8bf, 0xe5ffcc2f), + TOBN(0x65bbc1aa, 0x91708c3b), TOBN(0xa151e62c, 0x28bf0eeb), + TOBN(0x6cb53381, 0x6fa34db7), TOBN(0x5139e05c, 0xa29403a8), + TOBN(0x6ff651b4, 0x94a7cd2e), TOBN(0x5671ffd1, 0x0699336c), + TOBN(0x6f5fd2cc, 0x979a896a), TOBN(0x11e893a8, 0xd8148cef), + TOBN(0x988906a1, 0x65cf7b10), TOBN(0x81b67178, 0xc50d8485), + TOBN(0x7c0deb35, 0x8a35b3de), TOBN(0x423ac855, 0xc1d29799), + TOBN(0xaf580d87, 0xdac50b74), TOBN(0x28b2b89f, 0x5869734c), + TOBN(0x99a3b936, 0x874e28fb), TOBN(0xbb2c9190, 0x25f3f73a), + TOBN(0x199f6918, 0x84a9d5b7), TOBN(0x7ebe2325, 0x7e770374), + TOBN(0xf442e107, 0x0738efe2), TOBN(0xcf9f3f56, 0xcf9082d2), + TOBN(0x719f69e1, 0x09618708), TOBN(0xcc9e8364, 0xc183f9b1), + TOBN(0xec203a95, 0x366a21af), TOBN(0x6aec5d6d, 0x068b141f), + TOBN(0xee2df78a, 0x994f04e9), TOBN(0xb39ccae8, 0x271245b0), + TOBN(0xb875a4a9, 0x97e43f4f), TOBN(0x507dfe11, 0xdb2cea98), + TOBN(0x4fbf81cb, 0x489b03e9), TOBN(0xdb86ec5b, 0x6ec414fa), + TOBN(0xfad444f9, 0xf51b3ae5), TOBN(0xca7d33d6, 0x1914e3fe), + TOBN(0xa9c32f5c, 0x0ae6c4d0), TOBN(0xa9ca1d1e, 0x73969568), + TOBN(0x98043c31, 0x1aa7467e), TOBN(0xe832e75c, 0xe21b5ac6), + TOBN(0x314b7aea, 0x5232123d), TOBN(0x08307c8c, 0x65ae86db), + TOBN(0x06e7165c, 0xaa4668ed), TOBN(0xb170458b, 0xb4d3ec39), + TOBN(0x4d2e3ec6, 0xc19bb986), TOBN(0xc5f34846, 0xae0304ed), + TOBN(0x917695a0, 0x6c9f9722), TOBN(0x6c7f7317, 0x4cab1c0a), + TOBN(0x6295940e, 0x9d6d2e8b), TOBN(0xd318b8c1, 0x549f7c97), + TOBN(0x22453204, 0x97713885), TOBN(0x468d834b, 0xa8a440fe), + TOBN(0xd81fe5b2, 0xbfba796e), TOBN(0x152364db, 0x6d71f116), + TOBN(0xbb8c7c59, 0xb5b66e53), TOBN(0x0b12c61b, 0x2641a192), + TOBN(0x31f14802, 0xfcf0a7fd), TOBN(0x42fd0789, 0x5488b01e), + TOBN(0x71d78d6d, 0x9952b498), TOBN(0x8eb572d9, 0x07ac5201), + TOBN(0xe0a2a44c, 0x4d194a88), TOBN(0xd2b63fd9, 0xba017e66), + TOBN(0x78efc6c8, 0xf888aefc), TOBN(0xb76f6bda, 0x4a881a11), + TOBN(0x187f314b, 0xb46c2397), TOBN(0x004cf566, 0x5ded2819), + TOBN(0xa9ea5704, 0x38764d34), TOBN(0xbba45217, 0x78084709), + TOBN(0x06474571, 0x1171121e), TOBN(0xad7b7eb1, 0xe7c9b671), + TOBN(0xdacfbc40, 0x730f7507), TOBN(0x178cd8c6, 0xc7ad7bd1), + TOBN(0xbf0be101, 0xb2a67238), TOBN(0x3556d367, 0xaf9c14f2), + TOBN(0x104b7831, 0xa5662075), TOBN(0x58ca59bb, 0x79d9e60a), + TOBN(0x4bc45392, 0xa569a73b), TOBN(0x517a52e8, 0x5698f6c9), + TOBN(0x85643da5, 0xaeadd755), TOBN(0x1aed0cd5, 0x2a581b84), + TOBN(0xb9b4ff84, 0x80af1372), TOBN(0x244c3113, 0xf1ba5d1f), + TOBN(0x2a5dacbe, 0xf5f98d31), TOBN(0x2c3323e8, 0x4375bc2a), + TOBN(0x17a3ab4a, 0x5594b1dd), TOBN(0xa1928bfb, 0xceb4797e), + TOBN(0xe83af245, 0xe4886a19), TOBN(0x8979d546, 0x72b5a74a), + TOBN(0xa0f726bc, 0x19f9e967), TOBN(0xd9d03152, 0xe8fbbf4e), + TOBN(0xcfd6f51d, 0xb7707d40), TOBN(0x633084d9, 0x63f6e6e0), + TOBN(0xedcd9cdc, 0x55667eaf), TOBN(0x73b7f92b, 0x2e44d56f), + TOBN(0xfb2e39b6, 0x4e962b14), TOBN(0x7d408f6e, 0xf671fcbf), + TOBN(0xcc634ddc, 0x164a89bb), TOBN(0x74a42bb2, 0x3ef3bd05), + TOBN(0x1280dbb2, 0x428decbb), TOBN(0x6103f6bb, 0x402c8596), + TOBN(0xfa2bf581, 0x355a5752), TOBN(0x562f96a8, 0x00946674), + TOBN(0x4e4ca16d, 0x6da0223b), TOBN(0xfe47819f, 0x28d3aa25), + TOBN(0x9eea3075, 0xf8dfcf8a), TOBN(0xa284f0aa, 0x95669825), + TOBN(0xb3fca250, 0x867d3fd8), TOBN(0x20757b5f, 0x269d691e), + TOBN(0xf2c24020, 0x93b8a5de), TOBN(0xd3f93359, 0xebc06da6), + TOBN(0x1178293e, 0xb2739c33), TOBN(0xd2a3e770, 0xbcd686e5), + TOBN(0xa76f49f4, 0xcd941534), TOBN(0x0d37406b, 0xe3c71c0e), + TOBN(0x172d9397, 0x3b97f7e3), TOBN(0xec17e239, 0xbd7fd0de), + TOBN(0xe3290551, 0x6f496ba2), TOBN(0x6a693172, 0x36ad50e7), + TOBN(0xc4e539a2, 0x83e7eff5), TOBN(0x752737e7, 0x18e1b4cf), + TOBN(0xa2f7932c, 0x68af43ee), TOBN(0x5502468e, 0x703d00bd), + TOBN(0xe5dc978f, 0x2fb061f5), TOBN(0xc9a1904a, 0x28c815ad), + TOBN(0xd3af538d, 0x470c56a4), TOBN(0x159abc5f, 0x193d8ced), + TOBN(0x2a37245f, 0x20108ef3), TOBN(0xfa17081e, 0x223f7178), + TOBN(0x27b0fb2b, 0x10c8c0f5), TOBN(0x2102c3ea, 0x40650547), + TOBN(0x594564df, 0x8ac3bfa7), TOBN(0x98102033, 0x509dad96), + TOBN(0x6989643f, 0xf1d18a13), TOBN(0x35eebd91, 0xd7fc5af0), + TOBN(0x078d096a, 0xfaeaafd8), TOBN(0xb7a89341, 0xdef3de98), + TOBN(0x2a206e8d, 0xecf2a73a), TOBN(0x066a6397, 0x8e551994), + TOBN(0x3a6a088a, 0xb98d53a2), TOBN(0x0ce7c67c, 0x2d1124aa), + TOBN(0x48cec671, 0x759a113c), TOBN(0xe3b373d3, 0x4f6f67fa), + TOBN(0x5455d479, 0xfd36727b), TOBN(0xe5a428ee, 0xa13c0d81), + TOBN(0xb853dbc8, 0x1c86682b), TOBN(0xb78d2727, 0xb8d02b2a), + TOBN(0xaaf69bed, 0x8ebc329a), TOBN(0xdb6b40b3, 0x293b2148), + TOBN(0xe42ea77d, 0xb8c4961f), TOBN(0xb1a12f7c, 0x20e5e0ab), + TOBN(0xa0ec5274, 0x79e8b05e), TOBN(0x68027391, 0xfab60a80), + TOBN(0x6bfeea5f, 0x16b1bd5e), TOBN(0xf957e420, 0x4de30ad3), + TOBN(0xcbaf664e, 0x6a353b9e), TOBN(0x5c873312, 0x26d14feb), + TOBN(0x4e87f98c, 0xb65f57cb), TOBN(0xdb60a621, 0x5e0cdd41), + TOBN(0x67c16865, 0xa6881440), TOBN(0x1093ef1a, 0x46ab52aa), + TOBN(0xc095afb5, 0x3f4ece64), TOBN(0x6a6bb02e, 0x7604551a), + TOBN(0x55d44b4e, 0x0b26b8cd), TOBN(0xe5f9a999, 0xf971268a), + TOBN(0xc08ec425, 0x11a7de84), TOBN(0x83568095, 0xfda469dd), + TOBN(0x737bfba1, 0x6c6c90a2), TOBN(0x1cb9c4a0, 0xbe229831), + TOBN(0x93bccbba, 0xbb2eec64), TOBN(0xa0c23b64, 0xda03adbe), + TOBN(0x5f7aa00a, 0xe0e86ac4), TOBN(0x470b941e, 0xfc1401e6), + TOBN(0x5ad8d679, 0x9df43574), TOBN(0x4ccfb8a9, 0x0f65d810), + TOBN(0x1bce80e3, 0xaa7fbd81), TOBN(0x273291ad, 0x9508d20a), + TOBN(0xf5c4b46b, 0x42a92806), TOBN(0x810684ec, 0xa86ab44a), + TOBN(0x4591640b, 0xca0bc9f8), TOBN(0xb5efcdfc, 0x5c4b6054), + TOBN(0x16fc8907, 0x6e9edd12), TOBN(0xe29d0b50, 0xd4d792f9), + TOBN(0xa45fd01c, 0x9b03116d), TOBN(0x85035235, 0xc81765a4), + TOBN(0x1fe2a9b2, 0xb4b4b67c), TOBN(0xc1d10df0, 0xe8020604), + TOBN(0x9d64abfc, 0xbc8058d8), TOBN(0x8943b9b2, 0x712a0fbb), + TOBN(0x90eed914, 0x3b3def04), TOBN(0x85ab3aa2, 0x4ce775ff), + TOBN(0x605fd4ca, 0x7bbc9040), TOBN(0x8b34a564, 0xe2c75dfb), + TOBN(0x41ffc94a, 0x10358560), TOBN(0x2d8a5072, 0x9e5c28aa), + TOBN(0xe915a0fc, 0x4cc7eb15), TOBN(0xe9efab05, 0x8f6d0f5d), + TOBN(0xdbab47a9, 0xd19e9b91), TOBN(0x8cfed745, 0x0276154c), + TOBN(0x154357ae, 0x2cfede0d), TOBN(0x520630df, 0x19f5a4ef), + TOBN(0x25759f7c, 0xe382360f), TOBN(0xb6db05c9, 0x88bf5857), + TOBN(0x2917d61d, 0x6c58d46c), TOBN(0x14f8e491, 0xfd20cb7a), + TOBN(0xb68a727a, 0x11c20340), TOBN(0x0386f86f, 0xaf7ccbb6), + TOBN(0x5c8bc6cc, 0xfee09a20), TOBN(0x7d76ff4a, 0xbb7eea35), + TOBN(0xa7bdebe7, 0xdb15be7a), TOBN(0x67a08054, 0xd89f0302), + TOBN(0x56bf0ea9, 0xc1193364), TOBN(0xc8244467, 0x62837ebe), + TOBN(0x32bd8e8b, 0x20d841b8), TOBN(0x127a0548, 0xdbb8a54f), + TOBN(0x83dd4ca6, 0x63b20236), TOBN(0x87714718, 0x203491fa), + TOBN(0x4dabcaaa, 0xaa8a5288), TOBN(0x91cc0c8a, 0xaf23a1c9), + TOBN(0x34c72c6a, 0x3f220e0c), TOBN(0xbcc20bdf, 0x1232144a), + TOBN(0x6e2f42da, 0xa20ede1b), TOBN(0xc441f00c, 0x74a00515), + TOBN(0xbf46a5b6, 0x734b8c4b), TOBN(0x57409503, 0x7b56c9a4), + TOBN(0x9f735261, 0xe4585d45), TOBN(0x9231faed, 0x6734e642), + TOBN(0x1158a176, 0xbe70ee6c), TOBN(0x35f1068d, 0x7c3501bf), + TOBN(0x6beef900, 0xa2d26115), TOBN(0x649406f2, 0xef0afee3), + TOBN(0x3f43a60a, 0xbc2420a1), TOBN(0x509002a7, 0xd5aee4ac), + TOBN(0xb46836a5, 0x3ff3571b), TOBN(0x24f98b78, 0x837927c1), + TOBN(0x6254256a, 0x4533c716), TOBN(0xf27abb0b, 0xd07ee196), + TOBN(0xd7cf64fc, 0x5c6d5bfd), TOBN(0x6915c751, 0xf0cd7a77), + TOBN(0xd9f59012, 0x8798f534), TOBN(0x772b0da8, 0xf81d8b5f), + TOBN(0x1244260c, 0x2e03fa69), TOBN(0x36cf0e3a, 0x3be1a374), + TOBN(0x6e7c1633, 0xef06b960), TOBN(0xa71a4c55, 0x671f90f6), + TOBN(0x7a941251, 0x33c673db), TOBN(0xc0bea510, 0x73e8c131), + TOBN(0x61a8a699, 0xd4f6c734), TOBN(0x25e78c88, 0x341ed001), + TOBN(0x5c18acf8, 0x8e2f7d90), TOBN(0xfdbf33d7, 0x77be32cd), + TOBN(0x0a085cd7, 0xd2eb5ee9), TOBN(0x2d702cfb, 0xb3201115), + TOBN(0xb6e0ebdb, 0x85c88ce8), TOBN(0x23a3ce3c, 0x1e01d617), + TOBN(0x3041618e, 0x567333ac), TOBN(0x9dd0fd8f, 0x157edb6b), + TOBN(0x27f74702, 0xb57872b8), TOBN(0x2ef26b4f, 0x657d5fe1), + TOBN(0x95426f0a, 0x57cf3d40), TOBN(0x847e2ad1, 0x65a6067a), + TOBN(0xd474d9a0, 0x09996a74), TOBN(0x16a56acd, 0x2a26115c), + TOBN(0x02a615c3, 0xd16f4d43), TOBN(0xcc3fc965, 0xaadb85b7), + TOBN(0x386bda73, 0xce07d1b0), TOBN(0xd82910c2, 0x58ad4178), + TOBN(0x124f82cf, 0xcd2617f4), TOBN(0xcc2f5e8d, 0xef691770), + TOBN(0x82702550, 0xb8c30ccc), TOBN(0x7b856aea, 0x1a8e575a), + TOBN(0xbb822fef, 0xb1ab9459), TOBN(0x085928bc, 0xec24e38e), + TOBN(0x5d0402ec, 0xba8f4b4d), TOBN(0xc07cd4ba, 0x00b4d58b), + TOBN(0x5d8dffd5, 0x29227e7a), TOBN(0x61d44d0c, 0x31bf386f), + TOBN(0xe486dc2b, 0x135e6f4d), TOBN(0x680962eb, 0xe79410ef), + TOBN(0xa61bd343, 0xf10088b5), TOBN(0x6aa76076, 0xe2e28686), + TOBN(0x80463d11, 0x8fb98871), TOBN(0xcb26f5c3, 0xbbc76aff), + TOBN(0xd4ab8edd, 0xfbe03614), TOBN(0xc8eb579b, 0xc0cf2dee), + TOBN(0xcc004c15, 0xc93bae41), TOBN(0x46fbae5d, 0x3aeca3b2), + TOBN(0x671235cf, 0x0f1e9ab1), TOBN(0xadfba934, 0x9ec285c1), + TOBN(0x88ded013, 0xf216c980), TOBN(0xc8ac4fb8, 0xf79e0bc1), + TOBN(0xa29b89c6, 0xfb97a237), TOBN(0xb697b780, 0x9922d8e7), + TOBN(0x3142c639, 0xddb945b5), TOBN(0x447b06c7, 0xe094c3a9), + TOBN(0xcdcb3642, 0x72266c90), TOBN(0x633aad08, 0xa9385046), + TOBN(0xa36c936b, 0xb57c6477), TOBN(0x871f8b64, 0xe94dbcc6), + TOBN(0x28d0fb62, 0xa591a67b), TOBN(0x9d40e081, 0xc1d926f5), + TOBN(0x3111eaf6, 0xf2d84b5a), TOBN(0x228993f9, 0xa565b644), + TOBN(0x0ccbf592, 0x2c83188b), TOBN(0xf87b30ab, 0x3df3e197), + TOBN(0xb8658b31, 0x7642bca8), TOBN(0x1a032d7f, 0x52800f17), + TOBN(0x051dcae5, 0x79bf9445), TOBN(0xeba6b8ee, 0x54a2e253), + TOBN(0x5c8b9cad, 0xd4485692), TOBN(0x84bda40e, 0x8986e9be), + TOBN(0xd16d16a4, 0x2f0db448), TOBN(0x8ec80050, 0xa14d4188), + TOBN(0xb2b26107, 0x98fa7aaa), TOBN(0x41209ee4, 0xf073aa4e), + TOBN(0xf1570359, 0xf2d6b19b), TOBN(0xcbe6868c, 0xfc577caf), + TOBN(0x186c4bdc, 0x32c04dd3), TOBN(0xa6c35fae, 0xcfeee397), + TOBN(0xb4a1b312, 0xf086c0cf), TOBN(0xe0a5ccc6, 0xd9461fe2), + TOBN(0xc32278aa, 0x1536189f), TOBN(0x1126c55f, 0xba6df571), + TOBN(0x0f71a602, 0xb194560e), TOBN(0x8b2d7405, 0x324bd6e1), + TOBN(0x8481939e, 0x3738be71), TOBN(0xb5090b1a, 0x1a4d97a9), + TOBN(0x116c65a3, 0xf05ba915), TOBN(0x21863ad3, 0xaae448aa), + TOBN(0xd24e2679, 0xa7aae5d3), TOBN(0x7076013d, 0x0de5c1c4), + TOBN(0x2d50f8ba, 0xbb05b629), TOBN(0x73c1abe2, 0x6e66efbb), + TOBN(0xefd4b422, 0xf2488af7), TOBN(0xe4105d02, 0x663ba575), + TOBN(0x7eb60a8b, 0x53a69457), TOBN(0x62210008, 0xc945973b), + TOBN(0xfb255478, 0x77a50ec6), TOBN(0xbf0392f7, 0x0a37a72c), + TOBN(0xa0a7a19c, 0x4be18e7a), TOBN(0x90d8ea16, 0x25b1e0af), + TOBN(0x7582a293, 0xef953f57), TOBN(0x90a64d05, 0xbdc5465a), + TOBN(0xca79c497, 0xe2510717), TOBN(0x560dbb7c, 0x18cb641f), + TOBN(0x1d8e3286, 0x4b66abfb), TOBN(0xd26f52e5, 0x59030900), + TOBN(0x1ee3f643, 0x5584941a), TOBN(0x6d3b3730, 0x569f5958), + TOBN(0x9ff2a62f, 0x4789dba5), TOBN(0x91fcb815, 0x72b5c9b7), + TOBN(0xf446cb7d, 0x6c8f9a0e), TOBN(0x48f625c1, 0x39b7ecb5), + TOBN(0xbabae801, 0x1c6219b8), TOBN(0xe7a562d9, 0x28ac2f23), + TOBN(0xe1b48732, 0x26e20588), TOBN(0x06ee1cad, 0x775af051), + TOBN(0xda29ae43, 0xfaff79f7), TOBN(0xc141a412, 0x652ee9e0), + TOBN(0x1e127f6f, 0x195f4bd0), TOBN(0x29c6ab4f, 0x072f34f8), + TOBN(0x7b7c1477, 0x30448112), TOBN(0x82b51af1, 0xe4a38656), + TOBN(0x2bf2028a, 0x2f315010), TOBN(0xc9a4a01f, 0x6ea88cd4), + TOBN(0xf63e95d8, 0x257e5818), TOBN(0xdd8efa10, 0xb4519b16), + TOBN(0xed8973e0, 0x0da910bf), TOBN(0xed49d077, 0x5c0fe4a9), + TOBN(0xac3aac5e, 0xb7caee1e), TOBN(0x1033898d, 0xa7f4da57), + TOBN(0x42145c0e, 0x5c6669b9), TOBN(0x42daa688, 0xc1aa2aa0), + TOBN(0x629cc15c, 0x1a1d885a), TOBN(0x25572ec0, 0xf4b76817), + TOBN(0x8312e435, 0x9c8f8f28), TOBN(0x8107f8cd, 0x81965490), + TOBN(0x516ff3a3, 0x6fa6110c), TOBN(0x74fb1eb1, 0xfb93561f), + TOBN(0x6c0c9047, 0x8457522b), TOBN(0xcfd32104, 0x6bb8bdc6), + TOBN(0x2d6884a2, 0xcc80ad57), TOBN(0x7c27fc35, 0x86a9b637), + TOBN(0x3461baed, 0xadf4e8cd), TOBN(0x1d56251a, 0x617242f0), + TOBN(0x0b80d209, 0xc955bef4), TOBN(0xdf02cad2, 0x06adb047), + TOBN(0xf0d7cb91, 0x5ec74fee), TOBN(0xd2503375, 0x1111ba44), + TOBN(0x9671755e, 0xdf53cb36), TOBN(0x54dcb612, 0x3368551b), + TOBN(0x66d69aac, 0xc8a025a4), TOBN(0x6be946c6, 0xe77ef445), + TOBN(0x719946d1, 0xa995e094), TOBN(0x65e848f6, 0xe51e04d8), + TOBN(0xe62f3300, 0x6a1e3113), TOBN(0x1541c7c1, 0x501de503), + TOBN(0x4daac9fa, 0xf4acfade), TOBN(0x0e585897, 0x44cd0b71), + TOBN(0x544fd869, 0x0a51cd77), TOBN(0x60fc20ed, 0x0031016d), + TOBN(0x58b404ec, 0xa4276867), TOBN(0x46f6c3cc, 0x34f34993), + TOBN(0x477ca007, 0xc636e5bd), TOBN(0x8018f5e5, 0x7c458b47), + TOBN(0xa1202270, 0xe47b668f), TOBN(0xcef48ccd, 0xee14f203), + TOBN(0x23f98bae, 0x62ff9b4d), TOBN(0x55acc035, 0xc589eddd), + TOBN(0x3fe712af, 0x64db4444), TOBN(0x19e9d634, 0xbecdd480), + TOBN(0xe08bc047, 0xa930978a), TOBN(0x2dbf24ec, 0xa1280733), + TOBN(0x3c0ae38c, 0x2cd706b2), TOBN(0x5b012a5b, 0x359017b9), + TOBN(0x3943c38c, 0x72e0f5ae), TOBN(0x786167ea, 0x57176fa3), + TOBN(0xe5f9897d, 0x594881dc), TOBN(0x6b5efad8, 0xcfb820c1), + TOBN(0xb2179093, 0xd55018de), TOBN(0x39ad7d32, 0x0bac56ce), + TOBN(0xb55122e0, 0x2cfc0e81), TOBN(0x117c4661, 0xf6d89daa), + TOBN(0x362d01e1, 0xcb64fa09), TOBN(0x6a309b4e, 0x3e9c4ddd), + TOBN(0xfa979fb7, 0xabea49b1), TOBN(0xb4b1d27d, 0x10e2c6c5), + TOBN(0xbd61c2c4, 0x23afde7a), TOBN(0xeb6614f8, 0x9786d358), + TOBN(0x4a5d816b, 0x7f6f7459), TOBN(0xe431a44f, 0x09360e7b), + TOBN(0x8c27a032, 0xc309914c), TOBN(0xcea5d68a, 0xcaede3d8), + TOBN(0x3668f665, 0x3a0a3f95), TOBN(0x89369416, 0x7ceba27b), + TOBN(0x89981fad, 0xe4728fe9), TOBN(0x7102c8a0, 0x8a093562), + TOBN(0xbb80310e, 0x235d21c8), TOBN(0x505e55d1, 0xbefb7f7b), + TOBN(0xa0a90811, 0x12958a67), TOBN(0xd67e106a, 0x4d851fef), + TOBN(0xb84011a9, 0x431dd80e), TOBN(0xeb7c7cca, 0x73306cd9), + TOBN(0x20fadd29, 0xd1b3b730), TOBN(0x83858b5b, 0xfe37b3d3), + TOBN(0xbf4cd193, 0xb6251d5c), TOBN(0x1cca1fd3, 0x1352d952), + TOBN(0xc66157a4, 0x90fbc051), TOBN(0x7990a638, 0x89b98636),} + , + {TOBN(0xe5aa692a, 0x87dec0e1), TOBN(0x010ded8d, 0xf7b39d00), + TOBN(0x7b1b80c8, 0x54cfa0b5), TOBN(0x66beb876, 0xa0f8ea28), + TOBN(0x50d7f531, 0x3476cd0e), TOBN(0xa63d0e65, 0xb08d3949), + TOBN(0x1a09eea9, 0x53479fc6), TOBN(0x82ae9891, 0xf499e742), + TOBN(0xab58b910, 0x5ca7d866), TOBN(0x582967e2, 0x3adb3b34), + TOBN(0x89ae4447, 0xcceac0bc), TOBN(0x919c667c, 0x7bf56af5), + TOBN(0x9aec17b1, 0x60f5dcd7), TOBN(0xec697b9f, 0xddcaadbc), + TOBN(0x0b98f341, 0x463467f5), TOBN(0xb187f1f7, 0xa967132f), + TOBN(0x90fe7a1d, 0x214aeb18), TOBN(0x1506af3c, 0x741432f7), + TOBN(0xbb5565f9, 0xe591a0c4), TOBN(0x10d41a77, 0xb44f1bc3), + TOBN(0xa09d65e4, 0xa84bde96), TOBN(0x42f060d8, 0xf20a6a1c), + TOBN(0x652a3bfd, 0xf27f9ce7), TOBN(0xb6bdb65c, 0x3b3d739f), + TOBN(0xeb5ddcb6, 0xec7fae9f), TOBN(0x995f2714, 0xefb66e5a), + TOBN(0xdee95d8e, 0x69445d52), TOBN(0x1b6c2d46, 0x09e27620), + TOBN(0x32621c31, 0x8129d716), TOBN(0xb03909f1, 0x0958c1aa), + TOBN(0x8c468ef9, 0x1af4af63), TOBN(0x162c429f, 0xfba5cdf6), + TOBN(0x2f682343, 0x753b9371), TOBN(0x29cab45a, 0x5f1f9cd7), + TOBN(0x571623ab, 0xb245db96), TOBN(0xc507db09, 0x3fd79999), + TOBN(0x4e2ef652, 0xaf036c32), TOBN(0x86f0cc78, 0x05018e5c), + TOBN(0xc10a73d4, 0xab8be350), TOBN(0x6519b397, 0x7e826327), + TOBN(0xe8cb5eef, 0x9c053df7), TOBN(0x8de25b37, 0xb300ea6f), + TOBN(0xdb03fa92, 0xc849cffb), TOBN(0x242e43a7, 0xe84169bb), + TOBN(0xe4fa51f4, 0xdd6f958e), TOBN(0x6925a77f, 0xf4445a8d), + TOBN(0xe6e72a50, 0xe90d8949), TOBN(0xc66648e3, 0x2b1f6390), + TOBN(0xb2ab1957, 0x173e460c), TOBN(0x1bbbce75, 0x30704590), + TOBN(0xc0a90dbd, 0xdb1c7162), TOBN(0x505e399e, 0x15cdd65d), + TOBN(0x68434dcb, 0x57797ab7), TOBN(0x60ad35ba, 0x6a2ca8e8), + TOBN(0x4bfdb1e0, 0xde3336c1), TOBN(0xbbef99eb, 0xd8b39015), + TOBN(0x6c3b96f3, 0x1711ebec), TOBN(0x2da40f1f, 0xce98fdc4), + TOBN(0xb99774d3, 0x57b4411f), TOBN(0x87c8bdf4, 0x15b65bb6), + TOBN(0xda3a89e3, 0xc2eef12d), TOBN(0xde95bb9b, 0x3c7471f3), + TOBN(0x600f225b, 0xd812c594), TOBN(0x54907c5d, 0x2b75a56b), + TOBN(0xa93cc5f0, 0x8db60e35), TOBN(0x743e3cd6, 0xfa833319), + TOBN(0x7dad5c41, 0xf81683c9), TOBN(0x70c1e7d9, 0x9c34107e), + TOBN(0x0edc4a39, 0xa6be0907), TOBN(0x36d47035, 0x86d0b7d3), + TOBN(0x8c76da03, 0x272bfa60), TOBN(0x0b4a07ea, 0x0f08a414), + TOBN(0x699e4d29, 0x45c1dd53), TOBN(0xcadc5898, 0x231debb5), + TOBN(0xdf49fcc7, 0xa77f00e0), TOBN(0x93057bbf, 0xa73e5a0e), + TOBN(0x2f8b7ecd, 0x027a4cd1), TOBN(0x114734b3, 0xc614011a), + TOBN(0xe7a01db7, 0x67677c68), TOBN(0x89d9be5e, 0x7e273f4f), + TOBN(0xd225cb2e, 0x089808ef), TOBN(0xf1f7a27d, 0xd59e4107), + TOBN(0x53afc761, 0x8211b9c9), TOBN(0x0361bc67, 0xe6819159), + TOBN(0x2a865d0b, 0x7f071426), TOBN(0x6a3c1810, 0xe7072567), + TOBN(0x3e3bca1e, 0x0d6bcabd), TOBN(0xa1b02bc1, 0x408591bc), + TOBN(0xe0deee59, 0x31fba239), TOBN(0xf47424d3, 0x98bd91d1), + TOBN(0x0f8886f4, 0x071a3c1d), TOBN(0x3f7d41e8, 0xa819233b), + TOBN(0x708623c2, 0xcf6eb998), TOBN(0x86bb49af, 0x609a287f), + TOBN(0x942bb249, 0x63c90762), TOBN(0x0ef6eea5, 0x55a9654b), + TOBN(0x5f6d2d72, 0x36f5defe), TOBN(0xfa9922dc, 0x56f99176), + TOBN(0x6c8c5ece, 0xf78ce0c7), TOBN(0x7b44589d, 0xbe09b55e), + TOBN(0xe11b3bca, 0x9ea83770), TOBN(0xd7fa2c7f, 0x2ab71547), + TOBN(0x2a3dd6fa, 0x2a1ddcc0), TOBN(0x09acb430, 0x5a7b7707), + TOBN(0x4add4a2e, 0x649d4e57), TOBN(0xcd53a2b0, 0x1917526e), + TOBN(0xc5262330, 0x20b44ac4), TOBN(0x4028746a, 0xbaa2c31d), + TOBN(0x51318390, 0x64291d4c), TOBN(0xbf48f151, 0xee5ad909), + TOBN(0xcce57f59, 0x7b185681), TOBN(0x7c3ac1b0, 0x4854d442), + TOBN(0x65587dc3, 0xc093c171), TOBN(0xae7acb24, 0x24f42b65), + TOBN(0x5a338adb, 0x955996cb), TOBN(0xc8e65675, 0x6051f91b), + TOBN(0x66711fba, 0x28b8d0b1), TOBN(0x15d74137, 0xb6c10a90), + TOBN(0x70cdd7eb, 0x3a232a80), TOBN(0xc9e2f07f, 0x6191ed24), + TOBN(0xa80d1db6, 0xf79588c0), TOBN(0xfa52fc69, 0xb55768cc), + TOBN(0x0b4df1ae, 0x7f54438a), TOBN(0x0cadd1a7, 0xf9b46a4f), + TOBN(0xb40ea6b3, 0x1803dd6f), TOBN(0x488e4fa5, 0x55eaae35), + TOBN(0x9f047d55, 0x382e4e16), TOBN(0xc9b5b7e0, 0x2f6e0c98), + TOBN(0x6b1bd2d3, 0x95762649), TOBN(0xa9604ee7, 0xc7aea3f6), + TOBN(0x3646ff27, 0x6dc6f896), TOBN(0x9bf0e7f5, 0x2860bad1), + TOBN(0x2d92c821, 0x7cb44b92), TOBN(0xa2f5ce63, 0xaea9c182), + TOBN(0xd0a2afb1, 0x9154a5fd), TOBN(0x482e474c, 0x95801da6), + TOBN(0xc19972d0, 0xb611c24b), TOBN(0x1d468e65, 0x60a8f351), + TOBN(0xeb758069, 0x7bcf6421), TOBN(0xec9dd0ee, 0x88fbc491), + TOBN(0x5b59d2bf, 0x956c2e32), TOBN(0x73dc6864, 0xdcddf94e), + TOBN(0xfd5e2321, 0xbcee7665), TOBN(0xa7b4f8ef, 0x5e9a06c4), + TOBN(0xfba918dd, 0x7280f855), TOBN(0xbbaac260, 0x8baec688), + TOBN(0xa3b3f00f, 0x33400f42), TOBN(0x3d2dba29, 0x66f2e6e4), + TOBN(0xb6f71a94, 0x98509375), TOBN(0x8f33031f, 0xcea423cc), + TOBN(0x009b8dd0, 0x4807e6fb), TOBN(0x5163cfe5, 0x5cdb954c), + TOBN(0x03cc8f17, 0xcf41c6e8), TOBN(0xf1f03c2a, 0x037b925c), + TOBN(0xc39c19cc, 0x66d2427c), TOBN(0x823d24ba, 0x7b6c18e4), + TOBN(0x32ef9013, 0x901f0b4f), TOBN(0x684360f1, 0xf8941c2e), + TOBN(0x0ebaff52, 0x2c28092e), TOBN(0x7891e4e3, 0x256c932f), + TOBN(0x51264319, 0xac445e3d), TOBN(0x553432e7, 0x8ea74381), + TOBN(0xe6eeaa69, 0x67e9c50a), TOBN(0x27ced284, 0x62e628c7), + TOBN(0x3f96d375, 0x7a4afa57), TOBN(0xde0a14c3, 0xe484c150), + TOBN(0x364a24eb, 0x38bd9923), TOBN(0x1df18da0, 0xe5177422), + TOBN(0x174e8f82, 0xd8d38a9b), TOBN(0x2e97c600, 0xe7de1391), + TOBN(0xc5709850, 0xa1c175dd), TOBN(0x969041a0, 0x32ae5035), + TOBN(0xcbfd533b, 0x76a2086b), TOBN(0xd6bba71b, 0xd7c2e8fe), + TOBN(0xb2d58ee6, 0x099dfb67), TOBN(0x3a8b342d, 0x064a85d9), + TOBN(0x3bc07649, 0x522f9be3), TOBN(0x690c075b, 0xdf1f49a8), + TOBN(0x80e1aee8, 0x3854ec42), TOBN(0x2a7dbf44, 0x17689dc7), + TOBN(0xc004fc0e, 0x3faf4078), TOBN(0xb2f02e9e, 0xdf11862c), + TOBN(0xf10a5e0f, 0xa0a1b7b3), TOBN(0x30aca623, 0x8936ec80), + TOBN(0xf83cbf05, 0x02f40d9a), TOBN(0x4681c468, 0x2c318a4d), + TOBN(0x98575618, 0x0e9c2674), TOBN(0xbe79d046, 0x1847092e), + TOBN(0xaf1e480a, 0x78bd01e0), TOBN(0x6dd359e4, 0x72a51db9), + TOBN(0x62ce3821, 0xe3afbab6), TOBN(0xc5cee5b6, 0x17733199), + TOBN(0xe08b30d4, 0x6ffd9fbb), TOBN(0x6e5bc699, 0x36c610b7), + TOBN(0xf343cff2, 0x9ce262cf), TOBN(0xca2e4e35, 0x68b914c1), + TOBN(0x011d64c0, 0x16de36c5), TOBN(0xe0b10fdd, 0x42e2b829), + TOBN(0x78942981, 0x6685aaf8), TOBN(0xe7511708, 0x230ede97), + TOBN(0x671ed8fc, 0x3b922bf8), TOBN(0xe4d8c0a0, 0x4c29b133), + TOBN(0x87eb1239, 0x3b6e99c4), TOBN(0xaff3974c, 0x8793beba), + TOBN(0x03749405, 0x2c18df9b), TOBN(0xc5c3a293, 0x91007139), + TOBN(0x6a77234f, 0xe37a0b95), TOBN(0x02c29a21, 0xb661c96b), + TOBN(0xc3aaf1d6, 0x141ecf61), TOBN(0x9195509e, 0x3bb22f53), + TOBN(0x29597404, 0x22d51357), TOBN(0x1b083822, 0x537bed60), + TOBN(0xcd7d6e35, 0xe07289f0), TOBN(0x1f94c48c, 0x6dd86eff), + TOBN(0xc8bb1f82, 0xeb0f9cfa), TOBN(0x9ee0b7e6, 0x1b2eb97d), + TOBN(0x5a52fe2e, 0x34d74e31), TOBN(0xa352c310, 0x3bf79ab6), + TOBN(0x97ff6c5a, 0xabfeeb8f), TOBN(0xbfbe8fef, 0xf5c97305), + TOBN(0xd6081ce6, 0xa7904608), TOBN(0x1f812f3a, 0xc4fca249), + TOBN(0x9b24bc9a, 0xb9e5e200), TOBN(0x91022c67, 0x38012ee8), + TOBN(0xe83d9c5d, 0x30a713a1), TOBN(0x4876e3f0, 0x84ef0f93), + TOBN(0xc9777029, 0xc1fbf928), TOBN(0xef7a6bb3, 0xbce7d2a4), + TOBN(0xb8067228, 0xdfa2a659), TOBN(0xd5cd3398, 0xd877a48f), + TOBN(0xbea4fd8f, 0x025d0f3f), TOBN(0xd67d2e35, 0x2eae7c2b), + TOBN(0x184de7d7, 0xcc5f4394), TOBN(0xb5551b5c, 0x4536e142), + TOBN(0x2e89b212, 0xd34aa60a), TOBN(0x14a96fea, 0xf50051d5), + TOBN(0x4e21ef74, 0x0d12bb0b), TOBN(0xc522f020, 0x60b9677e), + TOBN(0x8b12e467, 0x2df7731d), TOBN(0x39f80382, 0x7b326d31), + TOBN(0xdfb8630c, 0x39024a94), TOBN(0xaacb96a8, 0x97319452), + TOBN(0xd68a3961, 0xeda3867c), TOBN(0x0c58e2b0, 0x77c4ffca), + TOBN(0x3d545d63, 0x4da919fa), TOBN(0xef79b69a, 0xf15e2289), + TOBN(0x54bc3d3d, 0x808bab10), TOBN(0xc8ab3007, 0x45f82c37), + TOBN(0xc12738b6, 0x7c4a658a), TOBN(0xb3c47639, 0x40e72182), + TOBN(0x3b77be46, 0x8798e44f), TOBN(0xdc047df2, 0x17a7f85f), + TOBN(0x2439d4c5, 0x5e59d92d), TOBN(0xcedca475, 0xe8e64d8d), + TOBN(0xa724cd0d, 0x87ca9b16), TOBN(0x35e4fd59, 0xa5540dfe), + TOBN(0xf8c1ff18, 0xe4bcf6b1), TOBN(0x856d6285, 0x295018fa), + TOBN(0x433f665c, 0x3263c949), TOBN(0xa6a76dd6, 0xa1f21409), + TOBN(0x17d32334, 0xcc7b4f79), TOBN(0xa1d03122, 0x06720e4a), + TOBN(0xadb6661d, 0x81d9bed5), TOBN(0xf0d6fb02, 0x11db15d1), + TOBN(0x7fd11ad5, 0x1fb747d2), TOBN(0xab50f959, 0x3033762b), + TOBN(0x2a7e711b, 0xfbefaf5a), TOBN(0xc7393278, 0x3fef2bbf), + TOBN(0xe29fa244, 0x0df6f9be), TOBN(0x9092757b, 0x71efd215), + TOBN(0xee60e311, 0x4f3d6fd9), TOBN(0x338542d4, 0x0acfb78b), + TOBN(0x44a23f08, 0x38961a0f), TOBN(0x1426eade, 0x986987ca), + TOBN(0x36e6ee2e, 0x4a863cc6), TOBN(0x48059420, 0x628b8b79), + TOBN(0x30303ad8, 0x7396e1de), TOBN(0x5c8bdc48, 0x38c5aad1), + TOBN(0x3e40e11f, 0x5c8f5066), TOBN(0xabd6e768, 0x8d246bbd), + TOBN(0x68aa40bb, 0x23330a01), TOBN(0xd23f5ee4, 0xc34eafa0), + TOBN(0x3bbee315, 0x5de02c21), TOBN(0x18dd4397, 0xd1d8dd06), + TOBN(0x3ba1939a, 0x122d7b44), TOBN(0xe6d3b40a, 0xa33870d6), + TOBN(0x8e620f70, 0x1c4fe3f8), TOBN(0xf6bba1a5, 0xd3a50cbf), + TOBN(0x4a78bde5, 0xcfc0aee0), TOBN(0x847edc46, 0xc08c50bd), + TOBN(0xbaa2439c, 0xad63c9b2), TOBN(0xceb4a728, 0x10fc2acb), + TOBN(0xa419e40e, 0x26da033d), TOBN(0x6cc3889d, 0x03e02683), + TOBN(0x1cd28559, 0xfdccf725), TOBN(0x0fd7e0f1, 0x8d13d208), + TOBN(0x01b9733b, 0x1f0df9d4), TOBN(0x8cc2c5f3, 0xa2b5e4f3), + TOBN(0x43053bfa, 0x3a304fd4), TOBN(0x8e87665c, 0x0a9f1aa7), + TOBN(0x087f29ec, 0xd73dc965), TOBN(0x15ace455, 0x3e9023db), + TOBN(0x2370e309, 0x2bce28b4), TOBN(0xf9723442, 0xb6b1e84a), + TOBN(0xbeee662e, 0xb72d9f26), TOBN(0xb19396de, 0xf0e47109), + TOBN(0x85b1fa73, 0xe13289d0), TOBN(0x436cf77e, 0x54e58e32), + TOBN(0x0ec833b3, 0xe990ef77), TOBN(0x7373e3ed, 0x1b11fc25), + TOBN(0xbe0eda87, 0x0fc332ce), TOBN(0xced04970, 0x8d7ea856), + TOBN(0xf85ff785, 0x7e977ca0), TOBN(0xb66ee8da, 0xdfdd5d2b), + TOBN(0xf5e37950, 0x905af461), TOBN(0x587b9090, 0x966d487c), + TOBN(0x6a198a1b, 0x32ba0127), TOBN(0xa7720e07, 0x141615ac), + TOBN(0xa23f3499, 0x996ef2f2), TOBN(0xef5f64b4, 0x470bcb3d), + TOBN(0xa526a962, 0x92b8c559), TOBN(0x0c14aac0, 0x69740a0f), + TOBN(0x0d41a9e3, 0xa6bdc0a5), TOBN(0x97d52106, 0x9c48aef4), + TOBN(0xcf16bd30, 0x3e7c253b), TOBN(0xcc834b1a, 0x47fdedc1), + TOBN(0x7362c6e5, 0x373aab2e), TOBN(0x264ed85e, 0xc5f590ff), + TOBN(0x7a46d9c0, 0x66d41870), TOBN(0xa50c20b1, 0x4787ba09), + TOBN(0x185e7e51, 0xe3d44635), TOBN(0xb3b3e080, 0x31e2d8dc), + TOBN(0xbed1e558, 0xa179e9d9), TOBN(0x2daa3f79, 0x74a76781), + TOBN(0x4372baf2, 0x3a40864f), TOBN(0x46900c54, 0x4fe75cb5), + TOBN(0xb95f171e, 0xf76765d0), TOBN(0x4ad726d2, 0x95c87502), + TOBN(0x2ec769da, 0x4d7c99bd), TOBN(0x5e2ddd19, 0xc36cdfa8), + TOBN(0xc22117fc, 0xa93e6dea), TOBN(0xe8a2583b, 0x93771123), + TOBN(0xbe2f6089, 0xfa08a3a2), TOBN(0x4809d5ed, 0x8f0e1112), + TOBN(0x3b414aa3, 0xda7a095e), TOBN(0x9049acf1, 0x26f5aadd), + TOBN(0x78d46a4d, 0x6be8b84a), TOBN(0xd66b1963, 0xb732b9b3), + TOBN(0x5c2ac2a0, 0xde6e9555), TOBN(0xcf52d098, 0xb5bd8770), + TOBN(0x15a15fa6, 0x0fd28921), TOBN(0x56ccb81e, 0x8b27536d), + TOBN(0x0f0d8ab8, 0x9f4ccbb8), TOBN(0xed5f44d2, 0xdb221729), + TOBN(0x43141988, 0x00bed10c), TOBN(0xc94348a4, 0x1d735b8b), + TOBN(0x79f3e9c4, 0x29ef8479), TOBN(0x4c13a4e3, 0x614c693f), + TOBN(0x32c9af56, 0x8e143a14), TOBN(0xbc517799, 0xe29ac5c4), + TOBN(0x05e17992, 0x2774856f), TOBN(0x6e52fb05, 0x6c1bf55f), + TOBN(0xaeda4225, 0xe4f19e16), TOBN(0x70f4728a, 0xaf5ccb26), + TOBN(0x5d2118d1, 0xb2947f22), TOBN(0xc827ea16, 0x281d6fb9), + TOBN(0x8412328d, 0x8cf0eabd), TOBN(0x45ee9fb2, 0x03ef9dcf), + TOBN(0x8e700421, 0xbb937d63), TOBN(0xdf8ff2d5, 0xcc4b37a6), + TOBN(0xa4c0d5b2, 0x5ced7b68), TOBN(0x6537c1ef, 0xc7308f59), + TOBN(0x25ce6a26, 0x3b37f8e8), TOBN(0x170e9a9b, 0xdeebc6ce), + TOBN(0xdd037952, 0x8728d72c), TOBN(0x445b0e55, 0x850154bc), + TOBN(0x4b7d0e06, 0x83a7337b), TOBN(0x1e3416d4, 0xffecf249), + TOBN(0x24840eff, 0x66a2b71f), TOBN(0xd0d9a50a, 0xb37cc26d), + TOBN(0xe2198150, 0x6fe28ef7), TOBN(0x3cc5ef16, 0x23324c7f), + TOBN(0x220f3455, 0x769b5263), TOBN(0xe2ade2f1, 0xa10bf475), + TOBN(0x28cd20fa, 0x458d3671), TOBN(0x1549722c, 0x2dc4847b), + TOBN(0x6dd01e55, 0x591941e3), TOBN(0x0e6fbcea, 0x27128ccb), + TOBN(0xae1a1e6b, 0x3bef0262), TOBN(0xfa8c472c, 0x8f54e103), + TOBN(0x7539c0a8, 0x72c052ec), TOBN(0xd7b27369, 0x5a3490e9), + TOBN(0x143fe1f1, 0x71684349), TOBN(0x36b4722e, 0x32e19b97), + TOBN(0xdc059227, 0x90980aff), TOBN(0x175c9c88, 0x9e13d674), + TOBN(0xa7de5b22, 0x6e6bfdb1), TOBN(0x5ea5b7b2, 0xbedb4b46), + TOBN(0xd5570191, 0xd34a6e44), TOBN(0xfcf60d2e, 0xa24ff7e6), + TOBN(0x614a392d, 0x677819e1), TOBN(0x7be74c7e, 0xaa5a29e8), + TOBN(0xab50fece, 0x63c85f3f), TOBN(0xaca2e2a9, 0x46cab337), + TOBN(0x7f700388, 0x122a6fe3), TOBN(0xdb69f703, 0x882a04a8), + TOBN(0x9a77935d, 0xcf7aed57), TOBN(0xdf16207c, 0x8d91c86f), + TOBN(0x2fca49ab, 0x63ed9998), TOBN(0xa3125c44, 0xa77ddf96), + TOBN(0x05dd8a86, 0x24344072), TOBN(0xa023dda2, 0xfec3fb56), + TOBN(0x421b41fc, 0x0c743032), TOBN(0x4f2120c1, 0x5e438639), + TOBN(0xfb7cae51, 0xc83c1b07), TOBN(0xb2370caa, 0xcac2171a), + TOBN(0x2eb2d962, 0x6cc820fb), TOBN(0x59feee5c, 0xb85a44bf), + TOBN(0x94620fca, 0x5b6598f0), TOBN(0x6b922cae, 0x7e314051), + TOBN(0xff8745ad, 0x106bed4e), TOBN(0x546e71f5, 0xdfa1e9ab), + TOBN(0x935c1e48, 0x1ec29487), TOBN(0x9509216c, 0x4d936530), + TOBN(0xc7ca3067, 0x85c9a2db), TOBN(0xd6ae5152, 0x6be8606f), + TOBN(0x09dbcae6, 0xe14c651d), TOBN(0xc9536e23, 0x9bc32f96), + TOBN(0xa90535a9, 0x34521b03), TOBN(0xf39c526c, 0x878756ff), + TOBN(0x383172ec, 0x8aedf03c), TOBN(0x20a8075e, 0xefe0c034), + TOBN(0xf22f9c62, 0x64026422), TOBN(0x8dd10780, 0x24b9d076), + TOBN(0x944c742a, 0x3bef2950), TOBN(0x55b9502e, 0x88a2b00b), + TOBN(0xa59e14b4, 0x86a09817), TOBN(0xa39dd3ac, 0x47bb4071), + TOBN(0x55137f66, 0x3be0592f), TOBN(0x07fcafd4, 0xc9e63f5b), + TOBN(0x963652ee, 0x346eb226), TOBN(0x7dfab085, 0xec2facb7), + TOBN(0x273bf2b8, 0x691add26), TOBN(0x30d74540, 0xf2b46c44), + TOBN(0x05e8e73e, 0xf2c2d065), TOBN(0xff9b8a00, 0xd42eeac9), + TOBN(0x2fcbd205, 0x97209d22), TOBN(0xeb740ffa, 0xde14ea2c), + TOBN(0xc71ff913, 0xa8aef518), TOBN(0x7bfc74bb, 0xfff4cfa2), + TOBN(0x1716680c, 0xb6b36048), TOBN(0x121b2cce, 0x9ef79af1), + TOBN(0xbff3c836, 0xa01eb3d3), TOBN(0x50eb1c6a, 0x5f79077b), + TOBN(0xa48c32d6, 0xa004bbcf), TOBN(0x47a59316, 0x7d64f61d), + TOBN(0x6068147f, 0x93102016), TOBN(0x12c5f654, 0x94d12576), + TOBN(0xefb071a7, 0xc9bc6b91), TOBN(0x7c2da0c5, 0x6e23ea95), + TOBN(0xf4fd45b6, 0xd4a1dd5d), TOBN(0x3e7ad9b6, 0x9122b13c), + TOBN(0x342ca118, 0xe6f57a48), TOBN(0x1c2e94a7, 0x06f8288f), + TOBN(0x99e68f07, 0x5a97d231), TOBN(0x7c80de97, 0x4d838758), + TOBN(0xbce0f5d0, 0x05872727), TOBN(0xbe5d95c2, 0x19c4d016), + TOBN(0x921d5cb1, 0x9c2492ee), TOBN(0x42192dc1, 0x404d6fb3), + TOBN(0x4c84dcd1, 0x32f988d3), TOBN(0xde26d61f, 0xa17b8e85), + TOBN(0xc466dcb6, 0x137c7408), TOBN(0x9a38d7b6, 0x36a266da), + TOBN(0x7ef5cb06, 0x83bebf1b), TOBN(0xe5cdcbbf, 0x0fd014e3), + TOBN(0x30aa376d, 0xf65965a0), TOBN(0x60fe88c2, 0xebb3e95e), + TOBN(0x33fd0b61, 0x66ee6f20), TOBN(0x8827dcdb, 0x3f41f0a0), + TOBN(0xbf8a9d24, 0x0c56c690), TOBN(0x40265dad, 0xddb7641d), + TOBN(0x522b05bf, 0x3a6b662b), TOBN(0x466d1dfe, 0xb1478c9b), + TOBN(0xaa616962, 0x1484469b), TOBN(0x0db60549, 0x02df8f9f), + TOBN(0xc37bca02, 0x3cb8bf51), TOBN(0x5effe346, 0x21371ce8), + TOBN(0xe8f65264, 0xff112c32), TOBN(0x8a9c736d, 0x7b971fb2), + TOBN(0xa4f19470, 0x7b75080d), TOBN(0xfc3f2c5a, 0x8839c59b), + TOBN(0x1d6c777e, 0x5aeb49c2), TOBN(0xf3db034d, 0xda1addfe), + TOBN(0xd76fee5a, 0x5535affc), TOBN(0x0853ac70, 0xb92251fd), + TOBN(0x37e3d594, 0x8b2a29d5), TOBN(0x28f1f457, 0x4de00ddb), + TOBN(0x8083c1b5, 0xf42c328b), TOBN(0xd8ef1d8f, 0xe493c73b), + TOBN(0x96fb6260, 0x41dc61bd), TOBN(0xf74e8a9d, 0x27ee2f8a), + TOBN(0x7c605a80, 0x2c946a5d), TOBN(0xeed48d65, 0x3839ccfd), + TOBN(0x9894344f, 0x3a29467a), TOBN(0xde81e949, 0xc51eba6d), + TOBN(0xdaea066b, 0xa5e5c2f2), TOBN(0x3fc8a614, 0x08c8c7b3), + TOBN(0x7adff88f, 0x06d0de9f), TOBN(0xbbc11cf5, 0x3b75ce0a), + TOBN(0x9fbb7acc, 0xfbbc87d5), TOBN(0xa1458e26, 0x7badfde2)} + , + {TOBN(0x1cb43668, 0xe039c256), TOBN(0x5f26fb8b, 0x7c17fd5d), + TOBN(0xeee426af, 0x79aa062b), TOBN(0x072002d0, 0xd78fbf04), + TOBN(0x4c9ca237, 0xe84fb7e3), TOBN(0xb401d8a1, 0x0c82133d), + TOBN(0xaaa52592, 0x6d7e4181), TOBN(0xe9430833, 0x73dbb152), + TOBN(0xf92dda31, 0xbe24319a), TOBN(0x03f7d28b, 0xe095a8e7), + TOBN(0xa52fe840, 0x98782185), TOBN(0x276ddafe, 0x29c24dbc), + TOBN(0x80cd5496, 0x1d7a64eb), TOBN(0xe4360889, 0x7f1dbe42), + TOBN(0x2f81a877, 0x8438d2d5), TOBN(0x7e4d52a8, 0x85169036), + TOBN(0x19e3d5b1, 0x1d59715d), TOBN(0xc7eaa762, 0xd788983e), + TOBN(0xe5a730b0, 0xabf1f248), TOBN(0xfbab8084, 0xfae3fd83), + TOBN(0x65e50d21, 0x53765b2f), TOBN(0xbdd4e083, 0xfa127f3d), + TOBN(0x9cf3c074, 0x397b1b10), TOBN(0x59f8090c, 0xb1b59fd3), + TOBN(0x7b15fd9d, 0x615faa8f), TOBN(0x8fa1eb40, 0x968554ed), + TOBN(0x7bb4447e, 0x7aa44882), TOBN(0x2bb2d0d1, 0x029fff32), + TOBN(0x075e2a64, 0x6caa6d2f), TOBN(0x8eb879de, 0x22e7351b), + TOBN(0xbcd5624e, 0x9a506c62), TOBN(0x218eaef0, 0xa87e24dc), + TOBN(0x37e56847, 0x44ddfa35), TOBN(0x9ccfc5c5, 0xdab3f747), + TOBN(0x9ac1df3f, 0x1ee96cf4), TOBN(0x0c0571a1, 0x3b480b8f), + TOBN(0x2fbeb3d5, 0x4b3a7b3c), TOBN(0x35c03669, 0x5dcdbb99), + TOBN(0x52a0f5dc, 0xb2415b3a), TOBN(0xd57759b4, 0x4413ed9a), + TOBN(0x1fe647d8, 0x3d30a2c5), TOBN(0x0857f77e, 0xf78a81dc), + TOBN(0x11d5a334, 0x131a4a9b), TOBN(0xc0a94af9, 0x29d393f5), + TOBN(0xbc3a5c0b, 0xdaa6ec1a), TOBN(0xba9fe493, 0x88d2d7ed), + TOBN(0xbb4335b4, 0xbb614797), TOBN(0x991c4d68, 0x72f83533), + TOBN(0x53258c28, 0xd2f01cb3), TOBN(0x93d6eaa3, 0xd75db0b1), + TOBN(0x419a2b0d, 0xe87d0db4), TOBN(0xa1e48f03, 0xd8fe8493), + TOBN(0xf747faf6, 0xc508b23a), TOBN(0xf137571a, 0x35d53549), + TOBN(0x9f5e58e2, 0xfcf9b838), TOBN(0xc7186cee, 0xa7fd3cf5), + TOBN(0x77b868ce, 0xe978a1d3), TOBN(0xe3a68b33, 0x7ab92d04), + TOBN(0x51029794, 0x87a5b862), TOBN(0x5f0606c3, 0x3a61d41d), + TOBN(0x2814be27, 0x6f9326f1), TOBN(0x2f521c14, 0xc6fe3c2e), + TOBN(0x17464d7d, 0xacdf7351), TOBN(0x10f5f9d3, 0x777f7e44), + TOBN(0xce8e616b, 0x269fb37d), TOBN(0xaaf73804, 0x7de62de5), + TOBN(0xaba11175, 0x4fdd4153), TOBN(0x515759ba, 0x3770b49b), + TOBN(0x8b09ebf8, 0xaa423a61), TOBN(0x592245a1, 0xcd41fb92), + TOBN(0x1cba8ec1, 0x9b4c8936), TOBN(0xa87e91e3, 0xaf36710e), + TOBN(0x1fd84ce4, 0x3d34a2e3), TOBN(0xee3759ce, 0xb43b5d61), + TOBN(0x895bc78c, 0x619186c7), TOBN(0xf19c3809, 0xcbb9725a), + TOBN(0xc0be21aa, 0xde744b1f), TOBN(0xa7d222b0, 0x60f8056b), + TOBN(0x74be6157, 0xb23efe11), TOBN(0x6fab2b4f, 0x0cd68253), + TOBN(0xad33ea5f, 0x4bf1d725), TOBN(0x9c1d8ee2, 0x4f6c950f), + TOBN(0x544ee78a, 0xa377af06), TOBN(0x54f489bb, 0x94a113e1), + TOBN(0x8f11d634, 0x992fb7e8), TOBN(0x0169a7aa, 0xa2a44347), + TOBN(0x1d49d4af, 0x95020e00), TOBN(0x95945722, 0xe08e120b), + TOBN(0xb6e33878, 0xa4d32282), TOBN(0xe36e029d, 0x48020ae7), + TOBN(0xe05847fb, 0x37a9b750), TOBN(0xf876812c, 0xb29e3819), + TOBN(0x84ad138e, 0xd23a17f0), TOBN(0x6d7b4480, 0xf0b3950e), + TOBN(0xdfa8aef4, 0x2fd67ae0), TOBN(0x8d3eea24, 0x52333af6), + TOBN(0x0d052075, 0xb15d5acc), TOBN(0xc6d9c79f, 0xbd815bc4), + TOBN(0x8dcafd88, 0xdfa36cf2), TOBN(0x908ccbe2, 0x38aa9070), + TOBN(0x638722c4, 0xba35afce), TOBN(0x5a3da8b0, 0xfd6abf0b), + TOBN(0x2dce252c, 0xc9c335c1), TOBN(0x84e7f0de, 0x65aa799b), + TOBN(0x2101a522, 0xb99a72cb), TOBN(0x06de6e67, 0x87618016), + TOBN(0x5ff8c7cd, 0xe6f3653e), TOBN(0x0a821ab5, 0xc7a6754a), + TOBN(0x7e3fa52b, 0x7cb0b5a2), TOBN(0xa7fb121c, 0xc9048790), + TOBN(0x1a725020, 0x06ce053a), TOBN(0xb490a31f, 0x04e929b0), + TOBN(0xe17be47d, 0x62dd61ad), TOBN(0x781a961c, 0x6be01371), + TOBN(0x1063bfd3, 0xdae3cbba), TOBN(0x35647406, 0x7f73c9ba), + TOBN(0xf50e957b, 0x2736a129), TOBN(0xa6313702, 0xed13f256), + TOBN(0x9436ee65, 0x3a19fcc5), TOBN(0xcf2bdb29, 0xe7a4c8b6), + TOBN(0xb06b1244, 0xc5f95cd8), TOBN(0xda8c8af0, 0xf4ab95f4), + TOBN(0x1bae59c2, 0xb9e5836d), TOBN(0x07d51e7e, 0x3acffffc), + TOBN(0x01e15e6a, 0xc2ccbcda), TOBN(0x3bc1923f, 0x8528c3e0), + TOBN(0x43324577, 0xa49fead4), TOBN(0x61a1b884, 0x2aa7a711), + TOBN(0xf9a86e08, 0x700230ef), TOBN(0x0af585a1, 0xbd19adf8), + TOBN(0x7645f361, 0xf55ad8f2), TOBN(0x6e676223, 0x46c3614c), + TOBN(0x23cb257c, 0x4e774d3f), TOBN(0x82a38513, 0xac102d1b), + TOBN(0x9bcddd88, 0x7b126aa5), TOBN(0xe716998b, 0xeefd3ee4), + TOBN(0x4239d571, 0xfb167583), TOBN(0xdd011c78, 0xd16c8f8a), + TOBN(0x271c2895, 0x69a27519), TOBN(0x9ce0a3b7, 0xd2d64b6a), + TOBN(0x8c977289, 0xd5ec6738), TOBN(0xa3b49f9a, 0x8840ef6b), + TOBN(0x808c14c9, 0x9a453419), TOBN(0x5c00295b, 0x0cf0a2d5), + TOBN(0x524414fb, 0x1d4bcc76), TOBN(0xb07691d2, 0x459a88f1), + TOBN(0x77f43263, 0xf70d110f), TOBN(0x64ada5e0, 0xb7abf9f3), + TOBN(0xafd0f94e, 0x5b544cf5), TOBN(0xb4a13a15, 0xfd2713fe), + TOBN(0xb99b7d6e, 0x250c74f4), TOBN(0x097f2f73, 0x20324e45), + TOBN(0x994b37d8, 0xaffa8208), TOBN(0xc3c31b0b, 0xdc29aafc), + TOBN(0x3da74651, 0x7a3a607f), TOBN(0xd8e1b8c1, 0xfe6955d6), + TOBN(0x716e1815, 0xc8418682), TOBN(0x541d487f, 0x7dc91d97), + TOBN(0x48a04669, 0xc6996982), TOBN(0xf39cab15, 0x83a6502e), + TOBN(0x025801a0, 0xe68db055), TOBN(0xf3569758, 0xba3338d5), + TOBN(0xb0c8c0aa, 0xee2afa84), TOBN(0x4f6985d3, 0xfb6562d1), + TOBN(0x351f1f15, 0x132ed17a), TOBN(0x510ed0b4, 0xc04365fe), + TOBN(0xa3f98138, 0xe5b1f066), TOBN(0xbc9d95d6, 0x32df03dc), + TOBN(0xa83ccf6e, 0x19abd09e), TOBN(0x0b4097c1, 0x4ff17edb), + TOBN(0x58a5c478, 0xd64a06ce), TOBN(0x2ddcc3fd, 0x544a58fd), + TOBN(0xd449503d, 0x9e8153b8), TOBN(0x3324fd02, 0x7774179b), + TOBN(0xaf5d47c8, 0xdbd9120c), TOBN(0xeb860162, 0x34fa94db), + TOBN(0x5817bdd1, 0x972f07f4), TOBN(0xe5579e2e, 0xd27bbceb), + TOBN(0x86847a1f, 0x5f11e5a6), TOBN(0xb39ed255, 0x7c3cf048), + TOBN(0xe1076417, 0xa2f62e55), TOBN(0x6b9ab38f, 0x1bcf82a2), + TOBN(0x4bb7c319, 0x7aeb29f9), TOBN(0xf6d17da3, 0x17227a46), + TOBN(0xab53ddbd, 0x0f968c00), TOBN(0xa03da7ec, 0x000c880b), + TOBN(0x7b239624, 0x6a9ad24d), TOBN(0x612c0401, 0x01ec60d0), + TOBN(0x70d10493, 0x109f5df1), TOBN(0xfbda4030, 0x80af7550), + TOBN(0x30b93f95, 0xc6b9a9b3), TOBN(0x0c74ec71, 0x007d9418), + TOBN(0x94175564, 0x6edb951f), TOBN(0x5f4a9d78, 0x7f22c282), + TOBN(0xb7870895, 0xb38d1196), TOBN(0xbc593df3, 0xa228ce7c), + TOBN(0xc78c5bd4, 0x6af3641a), TOBN(0x7802200b, 0x3d9b3dcc), + TOBN(0x0dc73f32, 0x8be33304), TOBN(0x847ed87d, 0x61ffb79a), + TOBN(0xf85c974e, 0x6d671192), TOBN(0x1e14100a, 0xde16f60f), + TOBN(0x45cb0d5a, 0x95c38797), TOBN(0x18923bba, 0x9b022da4), + TOBN(0xef2be899, 0xbbe7e86e), TOBN(0x4a1510ee, 0x216067bf), + TOBN(0xd98c8154, 0x84d5ce3e), TOBN(0x1af777f0, 0xf92a2b90), + TOBN(0x9fbcb400, 0x4ef65724), TOBN(0x3e04a4c9, 0x3c0ca6fe), + TOBN(0xfb3e2cb5, 0x55002994), TOBN(0x1f3a93c5, 0x5363ecab), + TOBN(0x1fe00efe, 0x3923555b), TOBN(0x744bedd9, 0x1e1751ea), + TOBN(0x3fb2db59, 0x6ab69357), TOBN(0x8dbd7365, 0xf5e6618b), + TOBN(0x99d53099, 0xdf1ea40e), TOBN(0xb3f24a0b, 0x57d61e64), + TOBN(0xd088a198, 0x596eb812), TOBN(0x22c8361b, 0x5762940b), + TOBN(0x66f01f97, 0xf9c0d95c), TOBN(0x88461172, 0x8e43cdae), + TOBN(0x11599a7f, 0xb72b15c3), TOBN(0x135a7536, 0x420d95cc), + TOBN(0x2dcdf0f7, 0x5f7ae2f6), TOBN(0x15fc6e1d, 0xd7fa6da2), + TOBN(0x81ca829a, 0xd1d441b6), TOBN(0x84c10cf8, 0x04a106b6), + TOBN(0xa9b26c95, 0xa73fbbd0), TOBN(0x7f24e0cb, 0x4d8f6ee8), + TOBN(0x48b45937, 0x1e25a043), TOBN(0xf8a74fca, 0x036f3dfe), + TOBN(0x1ed46585, 0xc9f84296), TOBN(0x7fbaa8fb, 0x3bc278b0), + TOBN(0xa8e96cd4, 0x6c4fcbd0), TOBN(0x940a1202, 0x73b60a5f), + TOBN(0x34aae120, 0x55a4aec8), TOBN(0x550e9a74, 0xdbd742f0), + TOBN(0x794456d7, 0x228c68ab), TOBN(0x492f8868, 0xa4e25ec6), + TOBN(0x682915ad, 0xb2d8f398), TOBN(0xf13b51cc, 0x5b84c953), + TOBN(0xcda90ab8, 0x5bb917d6), TOBN(0x4b615560, 0x4ea3dee1), + TOBN(0x578b4e85, 0x0a52c1c8), TOBN(0xeab1a695, 0x20b75fc4), + TOBN(0x60c14f3c, 0xaa0bb3c6), TOBN(0x220f448a, 0xb8216094), + TOBN(0x4fe7ee31, 0xb0e63d34), TOBN(0xf4600572, 0xa9e54fab), + TOBN(0xc0493334, 0xd5e7b5a4), TOBN(0x8589fb92, 0x06d54831), + TOBN(0xaa70f5cc, 0x6583553a), TOBN(0x0879094a, 0xe25649e5), + TOBN(0xcc904507, 0x10044652), TOBN(0xebb0696d, 0x02541c4f), + TOBN(0x5a171fde, 0xb9718710), TOBN(0x38f1bed8, 0xf374a9f5), + TOBN(0xc8c582e1, 0xba39bdc1), TOBN(0xfc457b0a, 0x908cc0ce), + TOBN(0x9a187fd4, 0x883841e2), TOBN(0x8ec25b39, 0x38725381), + TOBN(0x2553ed05, 0x96f84395), TOBN(0x095c7661, 0x6f6c6897), + TOBN(0x917ac85c, 0x4bdc5610), TOBN(0xb2885fe4, 0x179eb301), + TOBN(0x5fc65547, 0x8b78bdcc), TOBN(0x4a9fc893, 0xe59e4699), + TOBN(0xbb7ff0cd, 0x3ce299af), TOBN(0x195be9b3, 0xadf38b20), + TOBN(0x6a929c87, 0xd38ddb8f), TOBN(0x55fcc99c, 0xb21a51b9), + TOBN(0x2b695b4c, 0x721a4593), TOBN(0xed1e9a15, 0x768eaac2), + TOBN(0xfb63d71c, 0x7489f914), TOBN(0xf98ba31c, 0x78118910), + TOBN(0x80291373, 0x9b128eb4), TOBN(0x7801214e, 0xd448af4a), + TOBN(0xdbd2e22b, 0x55418dd3), TOBN(0xeffb3c0d, 0xd3998242), + TOBN(0xdfa6077c, 0xc7bf3827), TOBN(0xf2165bcb, 0x47f8238f), + TOBN(0xfe37cf68, 0x8564d554), TOBN(0xe5f825c4, 0x0a81fb98), + TOBN(0x43cc4f67, 0xffed4d6f), TOBN(0xbc609578, 0xb50a34b0), + TOBN(0x8aa8fcf9, 0x5041faf1), TOBN(0x5659f053, 0x651773b6), + TOBN(0xe87582c3, 0x6044d63b), TOBN(0xa6089409, 0x0cdb0ca0), + TOBN(0x8c993e0f, 0xbfb2bcf6), TOBN(0xfc64a719, 0x45985cfc), + TOBN(0x15c4da80, 0x83dbedba), TOBN(0x804ae112, 0x2be67df7), + TOBN(0xda4c9658, 0xa23defde), TOBN(0x12002ddd, 0x5156e0d3), + TOBN(0xe68eae89, 0x5dd21b96), TOBN(0x8b99f28b, 0xcf44624d), + TOBN(0x0ae00808, 0x1ec8897a), TOBN(0xdd0a9303, 0x6712f76e), + TOBN(0x96237522, 0x4e233de4), TOBN(0x192445b1, 0x2b36a8a5), + TOBN(0xabf9ff74, 0x023993d9), TOBN(0x21f37bf4, 0x2aad4a8f), + TOBN(0x340a4349, 0xf8bd2bbd), TOBN(0x1d902cd9, 0x4868195d), + TOBN(0x3d27bbf1, 0xe5fdb6f1), TOBN(0x7a5ab088, 0x124f9f1c), + TOBN(0xc466ab06, 0xf7a09e03), TOBN(0x2f8a1977, 0x31f2c123), + TOBN(0xda355dc7, 0x041b6657), TOBN(0xcb840d12, 0x8ece2a7c), + TOBN(0xb600ad9f, 0x7db32675), TOBN(0x78fea133, 0x07a06f1b), + TOBN(0x5d032269, 0xb31f6094), TOBN(0x07753ef5, 0x83ec37aa), + TOBN(0x03485aed, 0x9c0bea78), TOBN(0x41bb3989, 0xbc3f4524), + TOBN(0x09403761, 0x697f726d), TOBN(0x6109beb3, 0xdf394820), + TOBN(0x804111ea, 0x3b6d1145), TOBN(0xb6271ea9, 0xa8582654), + TOBN(0x619615e6, 0x24e66562), TOBN(0xa2554945, 0xd7b6ad9c), + TOBN(0xd9c4985e, 0x99bfe35f), TOBN(0x9770ccc0, 0x7b51cdf6), + TOBN(0x7c327013, 0x92881832), TOBN(0x8777d45f, 0x286b26d1), + TOBN(0x9bbeda22, 0xd847999d), TOBN(0x03aa33b6, 0xc3525d32), + TOBN(0x4b7b96d4, 0x28a959a1), TOBN(0xbb3786e5, 0x31e5d234), + TOBN(0xaeb5d3ce, 0x6961f247), TOBN(0x20aa85af, 0x02f93d3f), + TOBN(0x9cd1ad3d, 0xd7a7ae4f), TOBN(0xbf6688f0, 0x781adaa8), + TOBN(0xb1b40e86, 0x7469cead), TOBN(0x1904c524, 0x309fca48), + TOBN(0x9b7312af, 0x4b54bbc7), TOBN(0xbe24bf8f, 0x593affa2), + TOBN(0xbe5e0790, 0xbd98764b), TOBN(0xa0f45f17, 0xa26e299e), + TOBN(0x4af0d2c2, 0x6b8fe4c7), TOBN(0xef170db1, 0x8ae8a3e6), + TOBN(0x0e8d61a0, 0x29e0ccc1), TOBN(0xcd53e87e, 0x60ad36ca), + TOBN(0x328c6623, 0xc8173822), TOBN(0x7ee1767d, 0xa496be55), + TOBN(0x89f13259, 0x648945af), TOBN(0x9e45a5fd, 0x25c8009c), + TOBN(0xaf2febd9, 0x1f61ab8c), TOBN(0x43f6bc86, 0x8a275385), + TOBN(0x87792348, 0xf2142e79), TOBN(0x17d89259, 0xc6e6238a), + TOBN(0x7536d2f6, 0x4a839d9b), TOBN(0x1f428fce, 0x76a1fbdc), + TOBN(0x1c109601, 0x0db06dfe), TOBN(0xbfc16bc1, 0x50a3a3cc), + TOBN(0xf9cbd9ec, 0x9b30f41b), TOBN(0x5b5da0d6, 0x00138cce), + TOBN(0xec1d0a48, 0x56ef96a7), TOBN(0xb47eb848, 0x982bf842), + TOBN(0x66deae32, 0xec3f700d), TOBN(0x4e43c42c, 0xaa1181e0), + TOBN(0xa1d72a31, 0xd1a4aa2a), TOBN(0x440d4668, 0xc004f3ce), + TOBN(0x0d6a2d3b, 0x45fe8a7a), TOBN(0x820e52e2, 0xfb128365), + TOBN(0x29ac5fcf, 0x25e51b09), TOBN(0x180cd2bf, 0x2023d159), + TOBN(0xa9892171, 0xa1ebf90e), TOBN(0xf97c4c87, 0x7c132181), + TOBN(0x9f1dc724, 0xc03dbb7e), TOBN(0xae043765, 0x018cbbe4), + TOBN(0xfb0b2a36, 0x0767d153), TOBN(0xa8e2f4d6, 0x249cbaeb), + TOBN(0x172a5247, 0xd95ea168), TOBN(0x1758fada, 0x2970764a), + TOBN(0xac803a51, 0x1d978169), TOBN(0x299cfe2e, 0xde77e01b), + TOBN(0x652a1e17, 0xb0a98927), TOBN(0x2e26e1d1, 0x20014495), + TOBN(0x7ae0af9f, 0x7175b56a), TOBN(0xc2e22a80, 0xd64b9f95), + TOBN(0x4d0ff9fb, 0xd90a060a), TOBN(0x496a27db, 0xbaf38085), + TOBN(0x32305401, 0xda776bcf), TOBN(0xb8cdcef6, 0x725f209e), + TOBN(0x61ba0f37, 0x436a0bba), TOBN(0x263fa108, 0x76860049), + TOBN(0x92beb98e, 0xda3542cf), TOBN(0xa2d4d14a, 0xd5849538), + TOBN(0x989b9d68, 0x12e9a1bc), TOBN(0x61d9075c, 0x5f6e3268), + TOBN(0x352c6aa9, 0x99ace638), TOBN(0xde4e4a55, 0x920f43ff), + TOBN(0xe5e4144a, 0xd673c017), TOBN(0x667417ae, 0x6f6e05ea), + TOBN(0x613416ae, 0xdcd1bd56), TOBN(0x5eb36201, 0x86693711), + TOBN(0x2d7bc504, 0x3a1aa914), TOBN(0x175a1299, 0x76dc5975), + TOBN(0xe900e0f2, 0x3fc8125c), TOBN(0x569ef68c, 0x11198875), + TOBN(0x9012db63, 0x63a113b4), TOBN(0xe3bd3f56, 0x98835766), + TOBN(0xa5c94a52, 0x76412dea), TOBN(0xad9e2a09, 0xaa735e5c), + TOBN(0x405a984c, 0x508b65e9), TOBN(0xbde4a1d1, 0x6df1a0d1), + TOBN(0x1a9433a1, 0xdfba80da), TOBN(0xe9192ff9, 0x9440ad2e), + TOBN(0x9f649696, 0x5099fe92), TOBN(0x25ddb65c, 0x0b27a54a), + TOBN(0x178279dd, 0xc590da61), TOBN(0x5479a999, 0xfbde681a), + TOBN(0xd0e84e05, 0x013fe162), TOBN(0xbe11dc92, 0x632d471b), + TOBN(0xdf0b0c45, 0xfc0e089f), TOBN(0x04fb15b0, 0x4c144025), + TOBN(0xa61d5fc2, 0x13c99927), TOBN(0xa033e9e0, 0x3de2eb35), + TOBN(0xf8185d5c, 0xb8dacbb4), TOBN(0x9a88e265, 0x8644549d), + TOBN(0xf717af62, 0x54671ff6), TOBN(0x4bd4241b, 0x5fa58603), + TOBN(0x06fba40b, 0xe67773c0), TOBN(0xc1d933d2, 0x6a2847e9), + TOBN(0xf4f5acf3, 0x689e2c70), TOBN(0x92aab0e7, 0x46bafd31), + TOBN(0x798d76aa, 0x3473f6e5), TOBN(0xcc6641db, 0x93141934), + TOBN(0xcae27757, 0xd31e535e), TOBN(0x04cc43b6, 0x87c2ee11), + TOBN(0x8d1f9675, 0x2e029ffa), TOBN(0xc2150672, 0xe4cc7a2c), + TOBN(0x3b03c1e0, 0x8d68b013), TOBN(0xa9d6816f, 0xedf298f3), + TOBN(0x1bfbb529, 0xa2804464), TOBN(0x95a52fae, 0x5db22125), + TOBN(0x55b32160, 0x0e1cb64e), TOBN(0x004828f6, 0x7e7fc9fe), + TOBN(0x13394b82, 0x1bb0fb93), TOBN(0xb6293a2d, 0x35f1a920), + TOBN(0xde35ef21, 0xd145d2d9), TOBN(0xbe6225b3, 0xbb8fa603), + TOBN(0x00fc8f6b, 0x32cf252d), TOBN(0xa28e52e6, 0x117cf8c2), + TOBN(0x9d1dc89b, 0x4c371e6d), TOBN(0xcebe0675, 0x36ef0f28), + TOBN(0x5de05d09, 0xa4292f81), TOBN(0xa8303593, 0x353e3083), + TOBN(0xa1715b0a, 0x7e37a9bb), TOBN(0x8c56f61e, 0x2b8faec3), + TOBN(0x52507431, 0x33c9b102), TOBN(0x0130cefc, 0xa44431f0), + TOBN(0x56039fa0, 0xbd865cfb), TOBN(0x4b03e578, 0xbc5f1dd7), + TOBN(0x40edf2e4, 0xbabe7224), TOBN(0xc752496d, 0x3a1988f6), + TOBN(0xd1572d3b, 0x564beb6b), TOBN(0x0db1d110, 0x39a1c608), + TOBN(0x568d1934, 0x16f60126), TOBN(0x05ae9668, 0xf354af33), + TOBN(0x19de6d37, 0xc92544f2), TOBN(0xcc084353, 0xa35837d5), + TOBN(0xcbb6869c, 0x1a514ece), TOBN(0xb633e728, 0x2e1d1066), + TOBN(0xf15dd69f, 0x936c581c), TOBN(0x96e7b8ce, 0x7439c4f9), + TOBN(0x5e676f48, 0x2e448a5b), TOBN(0xb2ca7d5b, 0xfd916bbb), + TOBN(0xd55a2541, 0xf5024025), TOBN(0x47bc5769, 0xe4c2d937), + TOBN(0x7d31b92a, 0x0362189f), TOBN(0x83f3086e, 0xef7816f9), + TOBN(0xf9f46d94, 0xb587579a), TOBN(0xec2d22d8, 0x30e76c5f), + TOBN(0x27d57461, 0xb000ffcf), TOBN(0xbb7e65f9, 0x364ffc2c), + TOBN(0x7c7c9477, 0x6652a220), TOBN(0x61618f89, 0xd696c981), + TOBN(0x5021701d, 0x89effff3), TOBN(0xf2c8ff8e, 0x7c314163), + TOBN(0x2da413ad, 0x8efb4d3e), TOBN(0x937b5adf, 0xce176d95), + TOBN(0x22867d34, 0x2a67d51c), TOBN(0x262b9b10, 0x18eb3ac9), + TOBN(0x4e314fe4, 0xc43ff28b), TOBN(0x76476627, 0x6a664e7a), + TOBN(0x3e90e40b, 0xb7a565c2), TOBN(0x8588993a, 0xc1acf831), + TOBN(0xd7b501d6, 0x8f938829), TOBN(0x996627ee, 0x3edd7d4c), + TOBN(0x37d44a62, 0x90cd34c7), TOBN(0xa8327499, 0xf3833e8d), + TOBN(0x2e18917d, 0x4bf50353), TOBN(0x85dd726b, 0x556765fb), + TOBN(0x54fe65d6, 0x93d5ab66), TOBN(0x3ddbaced, 0x915c25fe), + TOBN(0xa799d9a4, 0x12f22e85), TOBN(0xe2a24867, 0x6d06f6bc), + TOBN(0xf4f1ee56, 0x43ca1637), TOBN(0xfda2828b, 0x61ece30a), + TOBN(0x758c1a3e, 0xa2dee7a6), TOBN(0xdcde2f3c, 0x734b2284), + TOBN(0xaba445d2, 0x4eaba6ad), TOBN(0x35aaf668, 0x76cee0a7), + TOBN(0x7e0b04a9, 0xe5aa049a), TOBN(0xe74083ad, 0x91103e84), + TOBN(0xbeb183ce, 0x40afecc3), TOBN(0x6b89de9f, 0xea043f7a),} + , + {TOBN(0x0e299d23, 0xfe67ba66), TOBN(0x91450760, 0x93cf2f34), + TOBN(0xf45b5ea9, 0x97fcf913), TOBN(0x5be00843, 0x8bd7ddda), + TOBN(0x358c3e05, 0xd53ff04d), TOBN(0xbf7ccdc3, 0x5de91ef7), + TOBN(0xad684dbf, 0xb69ec1a0), TOBN(0x367e7cf2, 0x801fd997), + TOBN(0x0ca1f3b7, 0xb0dc8595), TOBN(0x27de4608, 0x9f1d9f2e), + TOBN(0x1af3bf39, 0xbadd82a7), TOBN(0x79356a79, 0x65862448), + TOBN(0xc0602345, 0xf5f9a052), TOBN(0x1a8b0f89, 0x139a42f9), + TOBN(0xb53eee42, 0x844d40fc), TOBN(0x93b0bfe5, 0x4e5b6368), + TOBN(0x5434dd02, 0xc024789c), TOBN(0x90dca9ea, 0x41b57bfc), + TOBN(0x8aa898e2, 0x243398df), TOBN(0xf607c834, 0x894a94bb), + TOBN(0xbb07be97, 0xc2c99b76), TOBN(0x6576ba67, 0x18c29302), + TOBN(0x3d79efcc, 0xe703a88c), TOBN(0xf259ced7, 0xb6a0d106), + TOBN(0x0f893a5d, 0xc8de610b), TOBN(0xe8c515fb, 0x67e223ce), + TOBN(0x7774bfa6, 0x4ead6dc5), TOBN(0x89d20f95, 0x925c728f), + TOBN(0x7a1e0966, 0x098583ce), TOBN(0xa2eedb94, 0x93f2a7d7), + TOBN(0x1b282097, 0x4c304d4a), TOBN(0x0842e3da, 0xc077282d), + TOBN(0xe4d972a3, 0x3b9e2d7b), TOBN(0x7cc60b27, 0xc48218ff), + TOBN(0x8fc70838, 0x84149d91), TOBN(0x5c04346f, 0x2f461ecc), + TOBN(0xebe9fdf2, 0x614650a9), TOBN(0x5e35b537, 0xc1f666ac), + TOBN(0x645613d1, 0x88babc83), TOBN(0x88cace3a, 0xc5e1c93e), + TOBN(0x209ca375, 0x3de92e23), TOBN(0xccb03cc8, 0x5fbbb6e3), + TOBN(0xccb90f03, 0xd7b1487e), TOBN(0xfa9c2a38, 0xc710941f), + TOBN(0x756c3823, 0x6724ceed), TOBN(0x3a902258, 0x192d0323), + TOBN(0xb150e519, 0xea5e038e), TOBN(0xdcba2865, 0xc7427591), + TOBN(0xe549237f, 0x78890732), TOBN(0xc443bef9, 0x53fcb4d9), + TOBN(0x9884d8a6, 0xeb3480d6), TOBN(0x8a35b6a1, 0x3048b186), + TOBN(0xb4e44716, 0x65e9a90a), TOBN(0x45bf380d, 0x653006c0), + TOBN(0x8f3f820d, 0x4fe9ae3b), TOBN(0x244a35a0, 0x979a3b71), + TOBN(0xa1010e9d, 0x74cd06ff), TOBN(0x9c17c7df, 0xaca3eeac), + TOBN(0x74c86cd3, 0x8063aa2b), TOBN(0x8595c4b3, 0x734614ff), + TOBN(0xa3de00ca, 0x990f62cc), TOBN(0xd9bed213, 0xca0c3be5), + TOBN(0x7886078a, 0xdf8ce9f5), TOBN(0xddb27ce3, 0x5cd44444), + TOBN(0xed374a66, 0x58926ddd), TOBN(0x138b2d49, 0x908015b8), + TOBN(0x886c6579, 0xde1f7ab8), TOBN(0x888b9aa0, 0xc3020b7a), + TOBN(0xd3ec034e, 0x3a96e355), TOBN(0xba65b0b8, 0xf30fbe9a), + TOBN(0x064c8e50, 0xff21367a), TOBN(0x1f508ea4, 0x0b04b46e), + TOBN(0x98561a49, 0x747c866c), TOBN(0xbbb1e5fe, 0x0518a062), + TOBN(0x20ff4e8b, 0xecdc3608), TOBN(0x7f55cded, 0x20184027), + TOBN(0x8d73ec95, 0xf38c85f0), TOBN(0x5b589fdf, 0x8bc3b8c3), + TOBN(0xbe95dd98, 0x0f12b66f), TOBN(0xf5bd1a09, 0x0e338e01), + TOBN(0x65163ae5, 0x5e915918), TOBN(0x6158d6d9, 0x86f8a46b), + TOBN(0x8466b538, 0xeeebf99c), TOBN(0xca8761f6, 0xbca477ef), + TOBN(0xaf3449c2, 0x9ebbc601), TOBN(0xef3b0f41, 0xe0c3ae2f), + TOBN(0xaa6c577d, 0x5de63752), TOBN(0xe9166601, 0x64682a51), + TOBN(0x5a3097be, 0xfc15aa1e), TOBN(0x40d12548, 0xb54b0745), + TOBN(0x5bad4706, 0x519a5f12), TOBN(0xed03f717, 0xa439dee6), + TOBN(0x0794bb6c, 0x4a02c499), TOBN(0xf725083d, 0xcffe71d2), + TOBN(0x2cad7519, 0x0f3adcaf), TOBN(0x7f68ea1c, 0x43729310), + TOBN(0xe747c8c7, 0xb7ffd977), TOBN(0xec104c35, 0x80761a22), + TOBN(0x8395ebaf, 0x5a3ffb83), TOBN(0xfb3261f4, 0xe4b63db7), + TOBN(0x53544960, 0xd883e544), TOBN(0x13520d70, 0x8cc2eeb8), + TOBN(0x08f6337b, 0xd3d65f99), TOBN(0x83997db2, 0x781cf95b), + TOBN(0xce6ff106, 0x0dbd2c01), TOBN(0x4f8eea6b, 0x1f9ce934), + TOBN(0x546f7c4b, 0x0e993921), TOBN(0x6236a324, 0x5e753fc7), + TOBN(0x65a41f84, 0xa16022e9), TOBN(0x0c18d878, 0x43d1dbb2), + TOBN(0x73c55640, 0x2d4cef9c), TOBN(0xa0428108, 0x70444c74), + TOBN(0x68e4f15e, 0x9afdfb3c), TOBN(0x49a56143, 0x5bdfb6df), + TOBN(0xa9bc1bd4, 0x5f823d97), TOBN(0xbceb5970, 0xea111c2a), + TOBN(0x366b455f, 0xb269bbc4), TOBN(0x7cd85e1e, 0xe9bc5d62), + TOBN(0xc743c41c, 0x4f18b086), TOBN(0xa4b40990, 0x95294fb9), + TOBN(0x9c7c581d, 0x26ee8382), TOBN(0xcf17dcc5, 0x359d638e), + TOBN(0xee8273ab, 0xb728ae3d), TOBN(0x1d112926, 0xf821f047), + TOBN(0x11498477, 0x50491a74), TOBN(0x687fa761, 0xfde0dfb9), + TOBN(0x2c258022, 0x7ea435ab), TOBN(0x6b8bdb94, 0x91ce7e3f), + TOBN(0x4c5b5dc9, 0x3bf834aa), TOBN(0x04371819, 0x4f6c7e4b), + TOBN(0xc284e00a, 0x3736bcad), TOBN(0x0d881118, 0x21ae8f8d), + TOBN(0xf9cf0f82, 0xf48c8e33), TOBN(0xa11fd075, 0xa1bf40db), + TOBN(0xdceab0de, 0xdc2733e5), TOBN(0xc560a8b5, 0x8e986bd7), + TOBN(0x48dd1fe2, 0x3929d097), TOBN(0x3885b290, 0x92f188f1), + TOBN(0x0f2ae613, 0xda6fcdac), TOBN(0x9054303e, 0xb662a46c), + TOBN(0xb6871e44, 0x0738042a), TOBN(0x98e6a977, 0xbdaf6449), + TOBN(0xd8bc0650, 0xd1c9df1b), TOBN(0xef3d6451, 0x36e098f9), + TOBN(0x03fbae82, 0xb6d72d28), TOBN(0x77ca9db1, 0xf5d84080), + TOBN(0x8a112cff, 0xa58efc1c), TOBN(0x518d761c, 0xc564cb4a), + TOBN(0x69b5740e, 0xf0d1b5ce), TOBN(0x717039cc, 0xe9eb1785), + TOBN(0x3fe29f90, 0x22f53382), TOBN(0x8e54ba56, 0x6bc7c95c), + TOBN(0x9c806d8a, 0xf7f91d0f), TOBN(0x3b61b0f1, 0xa82a5728), + TOBN(0x4640032d, 0x94d76754), TOBN(0x273eb5de, 0x47d834c6), + TOBN(0x2988abf7, 0x7b4e4d53), TOBN(0xb7ce66bf, 0xde401777), + TOBN(0x9fba6b32, 0x715071b3), TOBN(0x82413c24, 0xad3a1a98), + TOBN(0x5b7fc8c4, 0xe0e8ad93), TOBN(0xb5679aee, 0x5fab868d), + TOBN(0xb1f9d2fa, 0x2b3946f3), TOBN(0x458897dc, 0x5685b50a), + TOBN(0x1e98c930, 0x89d0caf3), TOBN(0x39564c5f, 0x78642e92), + TOBN(0x1b77729a, 0x0dbdaf18), TOBN(0xf9170722, 0x579e82e6), + TOBN(0x680c0317, 0xe4515fa5), TOBN(0xf85cff84, 0xfb0c790f), + TOBN(0xc7a82aab, 0x6d2e0765), TOBN(0x7446bca9, 0x35c82b32), + TOBN(0x5de607aa, 0x6d63184f), TOBN(0x7c1a46a8, 0x262803a6), + TOBN(0xd218313d, 0xaebe8035), TOBN(0x92113ffd, 0xc73c51f8), + TOBN(0x4b38e083, 0x12e7e46c), TOBN(0x69d0a37a, 0x56126bd5), + TOBN(0xfb3f324b, 0x73c07e04), TOBN(0xa0c22f67, 0x8fda7267), + TOBN(0x8f2c0051, 0x4d2c7d8f), TOBN(0xbc45ced3, 0xcbe2cae5), + TOBN(0xe1c6cf07, 0xa8f0f277), TOBN(0xbc392312, 0x1eb99a98), + TOBN(0x75537b7e, 0x3cc8ac85), TOBN(0x8d725f57, 0xdd02753b), + TOBN(0xfd05ff64, 0xb737df2f), TOBN(0x55fe8712, 0xf6d2531d), + TOBN(0x57ce04a9, 0x6ab6b01c), TOBN(0x69a02a89, 0x7cd93724), + TOBN(0x4f82ac35, 0xcf86699b), TOBN(0x8242d3ad, 0x9cb4b232), + TOBN(0x713d0f65, 0xd62105e5), TOBN(0xbb222bfa, 0x2d29be61), + TOBN(0xf2f9a79e, 0x6cfbef09), TOBN(0xfc24d8d3, 0xd5d6782f), + TOBN(0x5db77085, 0xd4129967), TOBN(0xdb81c3cc, 0xdc3c2a43), + TOBN(0x9d655fc0, 0x05d8d9a3), TOBN(0x3f5d057a, 0x54298026), + TOBN(0x1157f56d, 0x88c54694), TOBN(0xb26baba5, 0x9b09573e), + TOBN(0x2cab03b0, 0x22adffd1), TOBN(0x60a412c8, 0xdd69f383), + TOBN(0xed76e98b, 0x54b25039), TOBN(0xd4ee67d3, 0x687e714d), + TOBN(0x87739648, 0x7b00b594), TOBN(0xce419775, 0xc9ef709b), + TOBN(0x40f76f85, 0x1c203a40), TOBN(0x30d352d6, 0xeafd8f91), + TOBN(0xaf196d3d, 0x95578dd2), TOBN(0xea4bb3d7, 0x77cc3f3d), + TOBN(0x42a5bd03, 0xb98e782b), TOBN(0xac958c40, 0x0624920d), + TOBN(0xb838134c, 0xfc56fcc8), TOBN(0x86ec4ccf, 0x89572e5e), + TOBN(0x69c43526, 0x9be47be0), TOBN(0x323b7dd8, 0xcb28fea1), + TOBN(0xfa5538ba, 0x3a6c67e5), TOBN(0xef921d70, 0x1d378e46), + TOBN(0xf92961fc, 0x3c4b880e), TOBN(0x3f6f914e, 0x98940a67), + TOBN(0xa990eb0a, 0xfef0ff39), TOBN(0xa6c2920f, 0xf0eeff9c), + TOBN(0xca804166, 0x51b8d9a3), TOBN(0x42531bc9, 0x0ffb0db1), + TOBN(0x72ce4718, 0xaa82e7ce), TOBN(0x6e199913, 0xdf574741), + TOBN(0xd5f1b13d, 0xd5d36946), TOBN(0x8255dc65, 0xf68f0194), + TOBN(0xdc9df4cd, 0x8710d230), TOBN(0x3453c20f, 0x138c1988), + TOBN(0x9af98dc0, 0x89a6ef01), TOBN(0x4dbcc3f0, 0x9857df85), + TOBN(0x34805601, 0x5c1ad924), TOBN(0x40448da5, 0xd0493046), + TOBN(0xf629926d, 0x4ee343e2), TOBN(0x6343f1bd, 0x90e8a301), + TOBN(0xefc93491, 0x40815b3f), TOBN(0xf882a423, 0xde8f66fb), + TOBN(0x3a12d5f4, 0xe7db9f57), TOBN(0x7dfba38a, 0x3c384c27), + TOBN(0x7a904bfd, 0x6fc660b1), TOBN(0xeb6c5db3, 0x2773b21c), + TOBN(0xc350ee66, 0x1cdfe049), TOBN(0x9baac0ce, 0x44540f29), + TOBN(0xbc57b6ab, 0xa5ec6aad), TOBN(0x167ce8c3, 0x0a7c1baa), + TOBN(0xb23a03a5, 0x53fb2b56), TOBN(0x6ce141e7, 0x4e057f78), + TOBN(0x796525c3, 0x89e490d9), TOBN(0x0bc95725, 0xa31a7e75), + TOBN(0x1ec56791, 0x1220fd06), TOBN(0x716e3a3c, 0x408b0bd6), + TOBN(0x31cd6bf7, 0xe8ebeba9), TOBN(0xa7326ca6, 0xbee6b670), + TOBN(0x3d9f851c, 0xcd090c43), TOBN(0x561e8f13, 0xf12c3988), + TOBN(0x50490b6a, 0x904b7be4), TOBN(0x61690ce1, 0x0410737b), + TOBN(0x299e9a37, 0x0f009052), TOBN(0x258758f0, 0xf026092e), + TOBN(0x9fa255f3, 0xfdfcdc0f), TOBN(0xdbc9fb1f, 0xc0e1bcd2), + TOBN(0x35f9dd6e, 0x24651840), TOBN(0xdca45a84, 0xa5c59abc), + TOBN(0x103d396f, 0xecca4938), TOBN(0x4532da0a, 0xb97b3f29), + TOBN(0xc4135ea5, 0x1999a6bf), TOBN(0x3aa9505a, 0x5e6bf2ee), + TOBN(0xf77cef06, 0x3f5be093), TOBN(0x97d1a0f8, 0xa943152e), + TOBN(0x2cb0ebba, 0x2e1c21dd), TOBN(0xf41b29fc, 0x2c6797c4), + TOBN(0xc6e17321, 0xb300101f), TOBN(0x4422b0e9, 0xd0d79a89), + TOBN(0x49e4901c, 0x92f1bfc4), TOBN(0x06ab1f8f, 0xe1e10ed9), + TOBN(0x84d35577, 0xdb2926b8), TOBN(0xca349d39, 0x356e8ec2), + TOBN(0x70b63d32, 0x343bf1a9), TOBN(0x8fd3bd28, 0x37d1a6b1), + TOBN(0x0454879c, 0x316865b4), TOBN(0xee959ff6, 0xc458efa2), + TOBN(0x0461dcf8, 0x9706dc3f), TOBN(0x737db0e2, 0x164e4b2e), + TOBN(0x09262680, 0x2f8843c8), TOBN(0x54498bbc, 0x7745e6f6), + TOBN(0x359473fa, 0xa29e24af), TOBN(0xfcc3c454, 0x70aa87a1), + TOBN(0xfd2c4bf5, 0x00573ace), TOBN(0xb65b514e, 0x28dd1965), + TOBN(0xe46ae7cf, 0x2193e393), TOBN(0x60e9a4e1, 0xf5444d97), + TOBN(0xe7594e96, 0x00ff38ed), TOBN(0x43d84d2f, 0x0a0e0f02), + TOBN(0x8b6db141, 0xee398a21), TOBN(0xb88a56ae, 0xe3bcc5be), + TOBN(0x0a1aa52f, 0x373460ea), TOBN(0x20da1a56, 0x160bb19b), + TOBN(0xfb54999d, 0x65bf0384), TOBN(0x71a14d24, 0x5d5a180e), + TOBN(0xbc44db7b, 0x21737b04), TOBN(0xd84fcb18, 0x01dd8e92), + TOBN(0x80de937b, 0xfa44b479), TOBN(0x53505499, 0x5c98fd4f), + TOBN(0x1edb12ab, 0x28f08727), TOBN(0x4c58b582, 0xa5f3ef53), + TOBN(0xbfb236d8, 0x8327f246), TOBN(0xc3a3bfaa, 0x4d7df320), + TOBN(0xecd96c59, 0xb96024f2), TOBN(0xfc293a53, 0x7f4e0433), + TOBN(0x5341352b, 0x5acf6e10), TOBN(0xc50343fd, 0xafe652c3), + TOBN(0x4af3792d, 0x18577a7f), TOBN(0xe1a4c617, 0xaf16823d), + TOBN(0x9b26d0cd, 0x33425d0a), TOBN(0x306399ed, 0x9b7bc47f), + TOBN(0x2a792f33, 0x706bb20b), TOBN(0x31219614, 0x98111055), + TOBN(0x864ec064, 0x87f5d28b), TOBN(0x11392d91, 0x962277fd), + TOBN(0xb5aa7942, 0xbb6aed5f), TOBN(0x080094dc, 0x47e799d9), + TOBN(0x4afa588c, 0x208ba19b), TOBN(0xd3e7570f, 0x8512f284), + TOBN(0xcbae64e6, 0x02f5799a), TOBN(0xdeebe7ef, 0x514b9492), + TOBN(0x30300f98, 0xe5c298ff), TOBN(0x17f561be, 0x3678361f), + TOBN(0xf52ff312, 0x98cb9a16), TOBN(0x6233c3bc, 0x5562d490), + TOBN(0x7bfa15a1, 0x92e3a2cb), TOBN(0x961bcfd1, 0xe6365119), + TOBN(0x3bdd29bf, 0x2c8c53b1), TOBN(0x739704df, 0x822844ba), + TOBN(0x7dacfb58, 0x7e7b754b), TOBN(0x23360791, 0xa806c9b9), + TOBN(0xe7eb88c9, 0x23504452), TOBN(0x2983e996, 0x852c1783), + TOBN(0xdd4ae529, 0x958d881d), TOBN(0x026bae03, 0x262c7b3c), + TOBN(0x3a6f9193, 0x960b52d1), TOBN(0xd0980f90, 0x92696cfb), + TOBN(0x4c1f428c, 0xd5f30851), TOBN(0x94dfed27, 0x2a4f6630), + TOBN(0x4df53772, 0xfc5d48a4), TOBN(0xdd2d5a2f, 0x933260ce), + TOBN(0x574115bd, 0xd44cc7a5), TOBN(0x4ba6b20d, 0xbd12533a), + TOBN(0x30e93cb8, 0x243057c9), TOBN(0x794c486a, 0x14de320e), + TOBN(0xe925d4ce, 0xf21496e4), TOBN(0xf951d198, 0xec696331), + TOBN(0x9810e2de, 0x3e8d812f), TOBN(0xd0a47259, 0x389294ab), + TOBN(0x513ba2b5, 0x0e3bab66), TOBN(0x462caff5, 0xabad306f), + TOBN(0xe2dc6d59, 0xaf04c49e), TOBN(0x1aeb8750, 0xe0b84b0b), + TOBN(0xc034f12f, 0x2f7d0ca2), TOBN(0x6d2e8128, 0xe06acf2f), + TOBN(0x801f4f83, 0x21facc2f), TOBN(0xa1170c03, 0xf40ef607), + TOBN(0xfe0a1d4f, 0x7805a99c), TOBN(0xbde56a36, 0xcc26aba5), + TOBN(0x5b1629d0, 0x35531f40), TOBN(0xac212c2b, 0x9afa6108), + TOBN(0x30a06bf3, 0x15697be5), TOBN(0x6f0545dc, 0x2c63c7c1), + TOBN(0x5d8cb842, 0x7ccdadaf), TOBN(0xd52e379b, 0xac7015bb), + TOBN(0xc4f56147, 0xf462c23e), TOBN(0xd44a4298, 0x46bc24b0), + TOBN(0xbc73d23a, 0xe2856d4f), TOBN(0x61cedd8c, 0x0832bcdf), + TOBN(0x60953556, 0x99f241d7), TOBN(0xee4adbd7, 0x001a349d), + TOBN(0x0b35bf6a, 0xaa89e491), TOBN(0x7f0076f4, 0x136f7546), + TOBN(0xd19a18ba, 0x9264da3d), TOBN(0x6eb2d2cd, 0x62a7a28b), + TOBN(0xcdba941f, 0x8761c971), TOBN(0x1550518b, 0xa3be4a5d), + TOBN(0xd0e8e2f0, 0x57d0b70c), TOBN(0xeea8612e, 0xcd133ba3), + TOBN(0x814670f0, 0x44416aec), TOBN(0x424db6c3, 0x30775061), + TOBN(0xd96039d1, 0x16213fd1), TOBN(0xc61e7fa5, 0x18a3478f), + TOBN(0xa805bdcc, 0xcb0c5021), TOBN(0xbdd6f3a8, 0x0cc616dd), + TOBN(0x06009667, 0x5d97f7e2), TOBN(0x31db0fc1, 0xaf0bf4b6), + TOBN(0x23680ed4, 0x5491627a), TOBN(0xb99a3c66, 0x7d741fb1), + TOBN(0xe9bb5f55, 0x36b1ff92), TOBN(0x29738577, 0x512b388d), + TOBN(0xdb8a2ce7, 0x50fcf263), TOBN(0x385346d4, 0x6c4f7b47), + TOBN(0xbe86c5ef, 0x31631f9e), TOBN(0xbf91da21, 0x03a57a29), + TOBN(0xc3b1f796, 0x7b23f821), TOBN(0x0f7d00d2, 0x770db354), + TOBN(0x8ffc6c3b, 0xd8fe79da), TOBN(0xcc5e8c40, 0xd525c996), + TOBN(0x4640991d, 0xcfff632a), TOBN(0x64d97e8c, 0x67112528), + TOBN(0xc232d973, 0x02f1cd1e), TOBN(0xce87eacb, 0x1dd212a4), + TOBN(0x6e4c8c73, 0xe69802f7), TOBN(0x12ef0290, 0x1fffddbd), + TOBN(0x941ec74e, 0x1bcea6e2), TOBN(0xd0b54024, 0x3cb92cbb), + TOBN(0x809fb9d4, 0x7e8f9d05), TOBN(0x3bf16159, 0xf2992aae), + TOBN(0xad40f279, 0xf8a7a838), TOBN(0x11aea631, 0x05615660), + TOBN(0xbf52e6f1, 0xa01f6fa1), TOBN(0xef046995, 0x3dc2aec9), + TOBN(0x785dbec9, 0xd8080711), TOBN(0xe1aec60a, 0x9fdedf76), + TOBN(0xece797b5, 0xfa21c126), TOBN(0xc66e898f, 0x05e52732), + TOBN(0x39bb69c4, 0x08811fdb), TOBN(0x8bfe1ef8, 0x2fc7f082), + TOBN(0xc8e7a393, 0x174f4138), TOBN(0xfba8ad1d, 0xd58d1f98), + TOBN(0xbc21d0ce, 0xbfd2fd5b), TOBN(0x0b839a82, 0x6ee60d61), + TOBN(0xaacf7658, 0xafd22253), TOBN(0xb526bed8, 0xaae396b3), + TOBN(0xccc1bbc2, 0x38564464), TOBN(0x9e3ff947, 0x8c45bc73), + TOBN(0xcde9bca3, 0x58188a78), TOBN(0x138b8ee0, 0xd73bf8f7), + TOBN(0x5c7e234c, 0x4123c489), TOBN(0x66e69368, 0xfa643297), + TOBN(0x0629eeee, 0x39a15fa3), TOBN(0x95fab881, 0xa9e2a927), + TOBN(0xb2497007, 0xeafbb1e1), TOBN(0xd75c9ce6, 0xe75b7a93), + TOBN(0x3558352d, 0xefb68d78), TOBN(0xa2f26699, 0x223f6396), + TOBN(0xeb911ecf, 0xe469b17a), TOBN(0x62545779, 0xe72d3ec2), + TOBN(0x8ea47de7, 0x82cb113f), TOBN(0xebe4b086, 0x4e1fa98d), + TOBN(0xec2d5ed7, 0x8cdfedb1), TOBN(0xa535c077, 0xfe211a74), + TOBN(0x9678109b, 0x11d244c5), TOBN(0xf17c8bfb, 0xbe299a76), + TOBN(0xb651412e, 0xfb11fbc4), TOBN(0xea0b5482, 0x94ab3f65), + TOBN(0xd8dffd95, 0x0cf78243), TOBN(0x2e719e57, 0xce0361d4), + TOBN(0x9007f085, 0x304ddc5b), TOBN(0x095e8c6d, 0x4daba2ea), + TOBN(0x5a33cdb4, 0x3f9d28a9), TOBN(0x85b95cd8, 0xe2283003), + TOBN(0xbcd6c819, 0xb9744733), TOBN(0x29c5f538, 0xfc7f5783), + TOBN(0x6c49b2fa, 0xd59038e4), TOBN(0x68349cc1, 0x3bbe1018), + TOBN(0xcc490c1d, 0x21830ee5), TOBN(0x36f9c4ee, 0xe9bfa297), + TOBN(0x58fd7294, 0x48de1a94), TOBN(0xaadb13a8, 0x4e8f2cdc), + TOBN(0x515eaaa0, 0x81313dba), TOBN(0xc76bb468, 0xc2152dd8), + TOBN(0x357f8d75, 0xa653dbf8), TOBN(0xe4d8c4d1, 0xb14ac143), + TOBN(0xbdb8e675, 0xb055cb40), TOBN(0x898f8e7b, 0x977b5167), + TOBN(0xecc65651, 0xb82fb863), TOBN(0x56544814, 0x6d88f01f), + TOBN(0xb0928e95, 0x263a75a9), TOBN(0xcfb6836f, 0x1a22fcda), + TOBN(0x651d14db, 0x3f3bd37c), TOBN(0x1d3837fb, 0xb6ad4664), + TOBN(0x7c5fb538, 0xff4f94ab), TOBN(0x7243c712, 0x6d7fb8f2), + TOBN(0xef13d60c, 0xa85c5287), TOBN(0x18cfb7c7, 0x4bb8dd1b), + TOBN(0x82f9bfe6, 0x72908219), TOBN(0x35c4592b, 0x9d5144ab), + TOBN(0x52734f37, 0x9cf4b42f), TOBN(0x6bac55e7, 0x8c60ddc4), + TOBN(0xb5cd811e, 0x94dea0f6), TOBN(0x259ecae4, 0xe18cc1a3), + TOBN(0x6a0e836e, 0x15e660f8), TOBN(0x6c639ea6, 0x0e02bff2), + TOBN(0x8721b8cb, 0x7e1026fd), TOBN(0x9e73b50b, 0x63261942), + TOBN(0xb8c70974, 0x77f01da3), TOBN(0x1839e6a6, 0x8268f57f), + TOBN(0x571b9415, 0x5150b805), TOBN(0x1892389e, 0xf92c7097), + TOBN(0x8d69c18e, 0x4a084b95), TOBN(0x7014c512, 0xbe5b495c), + TOBN(0x4780db36, 0x1b07523c), TOBN(0x2f6219ce, 0x2c1c64fa), + TOBN(0xc38b81b0, 0x602c105a), TOBN(0xab4f4f20, 0x5dc8e360), + TOBN(0x20d3c982, 0xcf7d62d2), TOBN(0x1f36e29d, 0x23ba8150), + TOBN(0x48ae0bf0, 0x92763f9e), TOBN(0x7a527e6b, 0x1d3a7007), + TOBN(0xb4a89097, 0x581a85e3), TOBN(0x1f1a520f, 0xdc158be5), + TOBN(0xf98db37d, 0x167d726e), TOBN(0x8802786e, 0x1113e862)} + , + {TOBN(0xefb2149e, 0x36f09ab0), TOBN(0x03f163ca, 0x4a10bb5b), + TOBN(0xd0297045, 0x06e20998), TOBN(0x56f0af00, 0x1b5a3bab), + TOBN(0x7af4cfec, 0x70880e0d), TOBN(0x7332a66f, 0xbe3d913f), + TOBN(0x32e6c84a, 0x7eceb4bd), TOBN(0xedc4a79a, 0x9c228f55), + TOBN(0xc37c7dd0, 0xc55c4496), TOBN(0xa6a96357, 0x25bbabd2), + TOBN(0x5b7e63f2, 0xadd7f363), TOBN(0x9dce3782, 0x2e73f1df), + TOBN(0xe1e5a16a, 0xb2b91f71), TOBN(0xe4489823, 0x5ba0163c), + TOBN(0xf2759c32, 0xf6e515ad), TOBN(0xa5e2f1f8, 0x8615eecf), + TOBN(0x74519be7, 0xabded551), TOBN(0x03d358b8, 0xc8b74410), + TOBN(0x4d00b10b, 0x0e10d9a9), TOBN(0x6392b0b1, 0x28da52b7), + TOBN(0x6744a298, 0x0b75c904), TOBN(0xc305b0ae, 0xa8f7f96c), + TOBN(0x042e421d, 0x182cf932), TOBN(0xf6fc5d50, 0x9e4636ca), + TOBN(0x795847c9, 0xd64cc78c), TOBN(0x6c50621b, 0x9b6cb27b), + TOBN(0x07099bf8, 0xdf8022ab), TOBN(0x48f862eb, 0xc04eda1d), + TOBN(0xd12732ed, 0xe1603c16), TOBN(0x19a80e0f, 0x5c9a9450), + TOBN(0xe2257f54, 0xb429b4fc), TOBN(0x66d3b2c6, 0x45460515), + TOBN(0x6ca4f87e, 0x822e37be), TOBN(0x73f237b4, 0x253bda4e), + TOBN(0xf747f3a2, 0x41190aeb), TOBN(0xf06fa36f, 0x804cf284), + TOBN(0x0a6bbb6e, 0xfc621c12), TOBN(0x5d624b64, 0x40b80ec6), + TOBN(0x4b072425, 0x7ba556f3), TOBN(0x7fa0c354, 0x3e2d20a8), + TOBN(0xe921fa31, 0xe3229d41), TOBN(0xa929c652, 0x94531bd4), + TOBN(0x84156027, 0xa6d38209), TOBN(0xf3d69f73, 0x6bdb97bd), + TOBN(0x8906d19a, 0x16833631), TOBN(0x68a34c2e, 0x03d51be3), + TOBN(0xcb59583b, 0x0e511cd8), TOBN(0x99ce6bfd, 0xfdc132a8), + TOBN(0x3facdaaa, 0xffcdb463), TOBN(0x658bbc1a, 0x34a38b08), + TOBN(0x12a801f8, 0xf1a9078d), TOBN(0x1567bcf9, 0x6ab855de), + TOBN(0xe08498e0, 0x3572359b), TOBN(0xcf0353e5, 0x8659e68b), + TOBN(0xbb86e9c8, 0x7d23807c), TOBN(0xbc08728d, 0x2198e8a2), + TOBN(0x8de2b7bc, 0x453cadd6), TOBN(0x203900a7, 0xbc0bc1f8), + TOBN(0xbcd86e47, 0xa6abd3af), TOBN(0x911cac12, 0x8502effb), + TOBN(0x2d550242, 0xec965469), TOBN(0x0e9f7692, 0x29e0017e), + TOBN(0x633f078f, 0x65979885), TOBN(0xfb87d449, 0x4cf751ef), + TOBN(0xe1790e4b, 0xfc25419a), TOBN(0x36467203, 0x4bff3cfd), + TOBN(0xc8db6386, 0x25b6e83f), TOBN(0x6cc69f23, 0x6cad6fd2), + TOBN(0x0219e45a, 0x6bc68bb9), TOBN(0xe43d79b6, 0x297f7334), + TOBN(0x7d445368, 0x465dc97c), TOBN(0x4b9eea32, 0x2a0b949a), + TOBN(0x1b96c6ba, 0x6102d021), TOBN(0xeaafac78, 0x2f4461ea), + TOBN(0xd4b85c41, 0xc49f19a8), TOBN(0x275c28e4, 0xcf538875), + TOBN(0x35451a9d, 0xdd2e54e0), TOBN(0x6991adb5, 0x0605618b), + TOBN(0x5b8b4bcd, 0x7b36cd24), TOBN(0x372a4f8c, 0x56f37216), + TOBN(0xc890bd73, 0xa6a5da60), TOBN(0x6f083da0, 0xdc4c9ff0), + TOBN(0xf4e14d94, 0xf0536e57), TOBN(0xf9ee1eda, 0xaaec8243), + TOBN(0x571241ec, 0x8bdcf8e7), TOBN(0xa5db8271, 0x0b041e26), + TOBN(0x9a0b9a99, 0xe3fff040), TOBN(0xcaaf21dd, 0x7c271202), + TOBN(0xb4e2b2e1, 0x4f0dd2e8), TOBN(0xe77e7c4f, 0x0a377ac7), + TOBN(0x69202c3f, 0x0d7a2198), TOBN(0xf759b7ff, 0x28200eb8), + TOBN(0xc87526ed, 0xdcfe314e), TOBN(0xeb84c524, 0x53d5cf99), + TOBN(0xb1b52ace, 0x515138b6), TOBN(0x5aa7ff8c, 0x23fca3f4), + TOBN(0xff0b13c3, 0xb9791a26), TOBN(0x960022da, 0xcdd58b16), + TOBN(0xdbd55c92, 0x57aad2de), TOBN(0x3baaaaa3, 0xf30fe619), + TOBN(0x9a4b2346, 0x0d881efd), TOBN(0x506416c0, 0x46325e2a), + TOBN(0x91381e76, 0x035c18d4), TOBN(0xb3bb68be, 0xf27817b0), + TOBN(0x15bfb8bf, 0x5116f937), TOBN(0x7c64a586, 0xc1268943), + TOBN(0x71e25cc3, 0x8419a2c8), TOBN(0x9fd6b0c4, 0x8335f463), + TOBN(0x4bf0ba3c, 0xe8ee0e0e), TOBN(0x6f6fba60, 0x298c21fa), + TOBN(0x57d57b39, 0xae66bee0), TOBN(0x292d5130, 0x22672544), + TOBN(0xf451105d, 0xbab093b3), TOBN(0x012f59b9, 0x02839986), + TOBN(0x8a915802, 0x3474a89c), TOBN(0x048c919c, 0x2de03e97), + TOBN(0xc476a2b5, 0x91071cd5), TOBN(0x791ed89a, 0x034970a5), + TOBN(0x89bd9042, 0xe1b7994b), TOBN(0x8eaf5179, 0xa1057ffd), + TOBN(0x6066e2a2, 0xd551ee10), TOBN(0x87a8f1d8, 0x727e09a6), + TOBN(0x00d08bab, 0x2c01148d), TOBN(0x6da8e4f1, 0x424f33fe), + TOBN(0x466d17f0, 0xcf9a4e71), TOBN(0xff502010, 0x3bf5cb19), + TOBN(0xdccf97d8, 0xd062ecc0), TOBN(0x80c0d9af, 0x81d80ac4), + TOBN(0xe87771d8, 0x033f2876), TOBN(0xb0186ec6, 0x7d5cc3db), + TOBN(0x58e8bb80, 0x3bc9bc1d), TOBN(0x4d1395cc, 0x6f6ef60e), + TOBN(0xa73c62d6, 0x186244a0), TOBN(0x918e5f23, 0x110a5b53), + TOBN(0xed4878ca, 0x741b7eab), TOBN(0x3038d71a, 0xdbe03e51), + TOBN(0x840204b7, 0xa93c3246), TOBN(0x21ab6069, 0xa0b9b4cd), + TOBN(0xf5fa6e2b, 0xb1d64218), TOBN(0x1de6ad0e, 0xf3d56191), + TOBN(0x570aaa88, 0xff1929c7), TOBN(0xc6df4c6b, 0x640e87b5), + TOBN(0xde8a74f2, 0xc65f0ccc), TOBN(0x8b972fd5, 0xe6f6cc01), + TOBN(0x3fff36b6, 0x0b846531), TOBN(0xba7e45e6, 0x10a5e475), + TOBN(0x84a1d10e, 0x4145b6c5), TOBN(0xf1f7f91a, 0x5e046d9d), + TOBN(0x0317a692, 0x44de90d7), TOBN(0x951a1d4a, 0xf199c15e), + TOBN(0x91f78046, 0xc9d73deb), TOBN(0x74c82828, 0xfab8224f), + TOBN(0xaa6778fc, 0xe7560b90), TOBN(0xb4073e61, 0xa7e824ce), + TOBN(0xff0d693c, 0xd642eba8), TOBN(0x7ce2e57a, 0x5dccef38), + TOBN(0x89c2c789, 0x1df1ad46), TOBN(0x83a06922, 0x098346fd), + TOBN(0x2d715d72, 0xda2fc177), TOBN(0x7b6dd71d, 0x85b6cf1d), + TOBN(0xc60a6d0a, 0x73fa9cb0), TOBN(0xedd3992e, 0x328bf5a9), + TOBN(0xc380ddd0, 0x832c8c82), TOBN(0xd182d410, 0xa2a0bf50), + TOBN(0x7d9d7438, 0xd9a528db), TOBN(0xe8b1a0e9, 0xcaf53994), + TOBN(0xddd6e5fe, 0x0e19987c), TOBN(0xacb8df03, 0x190b059d), + TOBN(0x53703a32, 0x8300129f), TOBN(0x1f637662, 0x68c43bfd), + TOBN(0xbcbd1913, 0x00e54051), TOBN(0x812fcc62, 0x7bf5a8c5), + TOBN(0x3f969d5f, 0x29fb85da), TOBN(0x72f4e00a, 0x694759e8), + TOBN(0x426b6e52, 0x790726b7), TOBN(0x617bbc87, 0x3bdbb209), + TOBN(0x511f8bb9, 0x97aee317), TOBN(0x812a4096, 0xe81536a8), + TOBN(0x137dfe59, 0x3ac09b9b), TOBN(0x0682238f, 0xba8c9a7a), + TOBN(0x7072ead6, 0xaeccb4bd), TOBN(0x6a34e9aa, 0x692ba633), + TOBN(0xc82eaec2, 0x6fff9d33), TOBN(0xfb753512, 0x1d4d2b62), + TOBN(0x1a0445ff, 0x1d7aadab), TOBN(0x65d38260, 0xd5f6a67c), + TOBN(0x6e62fb08, 0x91cfb26f), TOBN(0xef1e0fa5, 0x5c7d91d6), + TOBN(0x47e7c7ba, 0x33db72cd), TOBN(0x017cbc09, 0xfa7c74b2), + TOBN(0x3c931590, 0xf50a503c), TOBN(0xcac54f60, 0x616baa42), + TOBN(0x9b6cd380, 0xb2369f0f), TOBN(0x97d3a70d, 0x23c76151), + TOBN(0x5f9dd6fc, 0x9862a9c6), TOBN(0x044c4ab2, 0x12312f51), + TOBN(0x035ea0fd, 0x834a2ddc), TOBN(0x49e6b862, 0xcc7b826d), + TOBN(0xb03d6883, 0x62fce490), TOBN(0x62f2497a, 0xb37e36e9), + TOBN(0x04b005b6, 0xc6458293), TOBN(0x36bb5276, 0xe8d10af7), + TOBN(0xacf2dc13, 0x8ee617b8), TOBN(0x470d2d35, 0xb004b3d4), + TOBN(0x06790832, 0xfeeb1b77), TOBN(0x2bb75c39, 0x85657f9c), + TOBN(0xd70bd4ed, 0xc0f60004), TOBN(0xfe797ecc, 0x219b018b), + TOBN(0x9b5bec2a, 0x753aebcc), TOBN(0xdaf9f3dc, 0xc939eca5), + TOBN(0xd6bc6833, 0xd095ad09), TOBN(0x98abdd51, 0xdaa4d2fc), + TOBN(0xd9840a31, 0x8d168be5), TOBN(0xcf7c10e0, 0x2325a23c), + TOBN(0xa5c02aa0, 0x7e6ecfaf), TOBN(0x2462e7e6, 0xb5bfdf18), + TOBN(0xab2d8a8b, 0xa0cc3f12), TOBN(0x68dd485d, 0xbc672a29), + TOBN(0x72039752, 0x596f2cd3), TOBN(0x5d3eea67, 0xa0cf3d8d), + TOBN(0x810a1a81, 0xe6602671), TOBN(0x8f144a40, 0x14026c0c), + TOBN(0xbc753a6d, 0x76b50f85), TOBN(0xc4dc21e8, 0x645cd4a4), + TOBN(0xc5262dea, 0x521d0378), TOBN(0x802b8e0e, 0x05011c6f), + TOBN(0x1ba19cbb, 0x0b4c19ea), TOBN(0x21db64b5, 0xebf0aaec), + TOBN(0x1f394ee9, 0x70342f9d), TOBN(0x93a10aee, 0x1bc44a14), + TOBN(0xa7eed31b, 0x3efd0baa), TOBN(0x6e7c824e, 0x1d154e65), + TOBN(0xee23fa81, 0x9966e7ee), TOBN(0x64ec4aa8, 0x05b7920d), + TOBN(0x2d44462d, 0x2d90aad4), TOBN(0xf44dd195, 0xdf277ad5), + TOBN(0x8d6471f1, 0xbb46b6a1), TOBN(0x1e65d313, 0xfd885090), + TOBN(0x33a800f5, 0x13a977b4), TOBN(0xaca9d721, 0x0797e1ef), + TOBN(0x9a5a85a0, 0xfcff6a17), TOBN(0x9970a3f3, 0x1eca7cee), + TOBN(0xbb9f0d6b, 0xc9504be3), TOBN(0xe0c504be, 0xadd24ee2), + TOBN(0x7e09d956, 0x77fcc2f4), TOBN(0xef1a5227, 0x65bb5fc4), + TOBN(0x145d4fb1, 0x8b9286aa), TOBN(0x66fd0c5d, 0x6649028b), + TOBN(0x98857ceb, 0x1bf4581c), TOBN(0xe635e186, 0xaca7b166), + TOBN(0x278ddd22, 0x659722ac), TOBN(0xa0903c4c, 0x1db68007), + TOBN(0x366e4589, 0x48f21402), TOBN(0x31b49c14, 0xb96abda2), + TOBN(0x329c4b09, 0xe0403190), TOBN(0x97197ca3, 0xd29f43fe), + TOBN(0x8073dd1e, 0x274983d8), TOBN(0xda1a3bde, 0x55717c8f), + TOBN(0xfd3d4da2, 0x0361f9d1), TOBN(0x1332d081, 0x4c7de1ce), + TOBN(0x9b7ef7a3, 0xaa6d0e10), TOBN(0x17db2e73, 0xf54f1c4a), + TOBN(0xaf3dffae, 0x4cd35567), TOBN(0xaaa2f406, 0xe56f4e71), + TOBN(0x8966759e, 0x7ace3fc7), TOBN(0x9594eacf, 0x45a8d8c6), + TOBN(0x8de3bd8b, 0x91834e0e), TOBN(0xafe4ca53, 0x548c0421), + TOBN(0xfdd7e856, 0xe6ee81c6), TOBN(0x8f671beb, 0x6b891a3a), + TOBN(0xf7a58f2b, 0xfae63829), TOBN(0x9ab186fb, 0x9c11ac9f), + TOBN(0x8d6eb369, 0x10b5be76), TOBN(0x046b7739, 0xfb040bcd), + TOBN(0xccb4529f, 0xcb73de88), TOBN(0x1df0fefc, 0xcf26be03), + TOBN(0xad7757a6, 0xbcfcd027), TOBN(0xa8786c75, 0xbb3165ca), + TOBN(0xe9db1e34, 0x7e99a4d9), TOBN(0x99ee86df, 0xb06c504b), + TOBN(0x5b7c2ddd, 0xc15c9f0a), TOBN(0xdf87a734, 0x4295989e), + TOBN(0x59ece47c, 0x03d08fda), TOBN(0xb074d3dd, 0xad5fc702), + TOBN(0x20407903, 0x51a03776), TOBN(0x2bb1f77b, 0x2a608007), + TOBN(0x25c58f4f, 0xe1153185), TOBN(0xe6df62f6, 0x766e6447), + TOBN(0xefb3d1be, 0xed51275a), TOBN(0x5de47dc7, 0x2f0f483f), + TOBN(0x7932d98e, 0x97c2bedf), TOBN(0xd5c11927, 0x0219f8a1), + TOBN(0x9d751200, 0xa73a294e), TOBN(0x5f88434a, 0x9dc20172), + TOBN(0xd28d9fd3, 0xa26f506a), TOBN(0xa890cd31, 0x9d1dcd48), + TOBN(0x0aebaec1, 0x70f4d3b4), TOBN(0xfd1a1369, 0x0ffc8d00), + TOBN(0xb9d9c240, 0x57d57838), TOBN(0x45929d26, 0x68bac361), + TOBN(0x5a2cd060, 0x25b15ca6), TOBN(0x4b3c83e1, 0x6e474446), + TOBN(0x1aac7578, 0xee1e5134), TOBN(0xa418f5d6, 0xc91e2f41), + TOBN(0x6936fc8a, 0x213ed68b), TOBN(0x860ae7ed, 0x510a5224), + TOBN(0x63660335, 0xdef09b53), TOBN(0x641b2897, 0xcd79c98d), + TOBN(0x29bd38e1, 0x01110f35), TOBN(0x79c26f42, 0x648b1937), + TOBN(0x64dae519, 0x9d9164f4), TOBN(0xd85a2310, 0x0265c273), + TOBN(0x7173dd5d, 0x4b07e2b1), TOBN(0xd144c4cb, 0x8d9ea221), + TOBN(0xe8b04ea4, 0x1105ab14), TOBN(0x92dda542, 0xfe80d8f1), + TOBN(0xe9982fa8, 0xcf03dce6), TOBN(0x8b5ea965, 0x1a22cffc), + TOBN(0xf7f4ea7f, 0x3fad88c4), TOBN(0x62db773e, 0x6a5ba95c), + TOBN(0xd20f02fb, 0x93f24567), TOBN(0xfd46c69a, 0x315257ca), + TOBN(0x0ac74cc7, 0x8bcab987), TOBN(0x46f31c01, 0x5ceca2f5), + TOBN(0x40aedb59, 0x888b219e), TOBN(0xe50ecc37, 0xe1fccd02), + TOBN(0x1bcd9dad, 0x911f816c), TOBN(0x583cc1ec, 0x8db9b00c), + TOBN(0xf3cd2e66, 0xa483bf11), TOBN(0xfa08a6f5, 0xb1b2c169), + TOBN(0xf375e245, 0x4be9fa28), TOBN(0x99a7ffec, 0x5b6d011f), + TOBN(0x6a3ebddb, 0xc4ae62da), TOBN(0x6cea00ae, 0x374aef5d), + TOBN(0xab5fb98d, 0x9d4d05bc), TOBN(0x7cba1423, 0xd560f252), + TOBN(0x49b2cc21, 0x208490de), TOBN(0x1ca66ec3, 0xbcfb2879), + TOBN(0x7f1166b7, 0x1b6fb16f), TOBN(0xfff63e08, 0x65fe5db3), + TOBN(0xb8345abe, 0x8b2610be), TOBN(0xb732ed80, 0x39de3df4), + TOBN(0x0e24ed50, 0x211c32b4), TOBN(0xd10d8a69, 0x848ff27d), + TOBN(0xc1074398, 0xed4de248), TOBN(0xd7cedace, 0x10488927), + TOBN(0xa4aa6bf8, 0x85673e13), TOBN(0xb46bae91, 0x6daf30af), + TOBN(0x07088472, 0xfcef7ad8), TOBN(0x61151608, 0xd4b35e97), + TOBN(0xbcfe8f26, 0xdde29986), TOBN(0xeb84c4c7, 0xd5a34c79), + TOBN(0xc1eec55c, 0x164e1214), TOBN(0x891be86d, 0xa147bb03), + TOBN(0x9fab4d10, 0x0ba96835), TOBN(0xbf01e9b8, 0xa5c1ae9f), + TOBN(0x6b4de139, 0xb186ebc0), TOBN(0xd5c74c26, 0x85b91bca), + TOBN(0x5086a99c, 0xc2d93854), TOBN(0xeed62a7b, 0xa7a9dfbc), + TOBN(0x8778ed6f, 0x76b7618a), TOBN(0xbff750a5, 0x03b66062), + TOBN(0x4cb7be22, 0xb65186db), TOBN(0x369dfbf0, 0xcc3a6d13), + TOBN(0xc7dab26c, 0x7191a321), TOBN(0x9edac3f9, 0x40ed718e), + TOBN(0xbc142b36, 0xd0cfd183), TOBN(0xc8af82f6, 0x7c991693), + TOBN(0xb3d1e4d8, 0x97ce0b2a), TOBN(0xe6d7c87f, 0xc3a55cdf), + TOBN(0x35846b95, 0x68b81afe), TOBN(0x018d12af, 0xd3c239d8), + TOBN(0x2b2c6208, 0x01206e15), TOBN(0xe0e42453, 0xa3b882c6), + TOBN(0x854470a3, 0xa50162d5), TOBN(0x08157478, 0x7017a62a), + TOBN(0x18bd3fb4, 0x820357c7), TOBN(0x992039ae, 0x6f1458ad), + TOBN(0x9a1df3c5, 0x25b44aa1), TOBN(0x2d780357, 0xed3d5281), + TOBN(0x58cf7e4d, 0xc77ad4d4), TOBN(0xd49a7998, 0xf9df4fc4), + TOBN(0x4465a8b5, 0x1d71205e), TOBN(0xa0ee0ea6, 0x649254aa), + TOBN(0x4b5eeecf, 0xab7bd771), TOBN(0x6c873073, 0x35c262b9), + TOBN(0xdc5bd648, 0x3c9d61e7), TOBN(0x233d6d54, 0x321460d2), + TOBN(0xd20c5626, 0xfc195bcc), TOBN(0x25445958, 0x04d78b63), + TOBN(0xe03fcb3d, 0x17ec8ef3), TOBN(0x54b690d1, 0x46b8f781), + TOBN(0x82fa2c8a, 0x21230646), TOBN(0xf51aabb9, 0x084f418c), + TOBN(0xff4fbec1, 0x1a30ba43), TOBN(0x6a5acf73, 0x743c9df7), + TOBN(0x1da2b357, 0xd635b4d5), TOBN(0xc3de68dd, 0xecd5c1da), + TOBN(0xa689080b, 0xd61af0dd), TOBN(0xdea5938a, 0xd665bf99), + TOBN(0x0231d71a, 0xfe637294), TOBN(0x01968aa6, 0xa5a81cd8), + TOBN(0x11252d50, 0x048e63b5), TOBN(0xc446bc52, 0x6ca007e9), + TOBN(0xef8c50a6, 0x96d6134b), TOBN(0x9361fbf5, 0x9e09a05c), + TOBN(0xf17f85a6, 0xdca3291a), TOBN(0xb178d548, 0xff251a21), + TOBN(0x87f6374b, 0xa4df3915), TOBN(0x566ce1bf, 0x2fd5d608), + TOBN(0x425cba4d, 0x7de35102), TOBN(0x6b745f8f, 0x58c5d5e2), + TOBN(0x88402af6, 0x63122edf), TOBN(0x3190f9ed, 0x3b989a89), + TOBN(0x4ad3d387, 0xebba3156), TOBN(0xef385ad9, 0xc7c469a5), + TOBN(0xb08281de, 0x3f642c29), TOBN(0x20be0888, 0x910ffb88), + TOBN(0xf353dd4a, 0xd5292546), TOBN(0x3f1627de, 0x8377a262), + TOBN(0xa5faa013, 0xeefcd638), TOBN(0x8f3bf626, 0x74cc77c3), + TOBN(0x32618f65, 0xa348f55e), TOBN(0x5787c0dc, 0x9fefeb9e), + TOBN(0xf1673aa2, 0xd9a23e44), TOBN(0x88dfa993, 0x4e10690d), + TOBN(0x1ced1b36, 0x2bf91108), TOBN(0x9193ceca, 0x3af48649), + TOBN(0xfb34327d, 0x2d738fc5), TOBN(0x6697b037, 0x975fee6c), + TOBN(0x2f485da0, 0xc04079a5), TOBN(0x2cdf5735, 0x2feaa1ac), + TOBN(0x76944420, 0xbd55659e), TOBN(0x7973e32b, 0x4376090c), + TOBN(0x86bb4fe1, 0x163b591a), TOBN(0x10441aed, 0xc196f0ca), + TOBN(0x3b431f4a, 0x045ad915), TOBN(0x6c11b437, 0xa4afacb1), + TOBN(0x30b0c7db, 0x71fdbbd8), TOBN(0xb642931f, 0xeda65acd), + TOBN(0x4baae6e8, 0x9c92b235), TOBN(0xa73bbd0e, 0x6b3993a1), + TOBN(0xd06d60ec, 0x693dd031), TOBN(0x03cab91b, 0x7156881c), + TOBN(0xd615862f, 0x1db3574b), TOBN(0x485b0185, 0x64bb061a), + TOBN(0x27434988, 0xa0181e06), TOBN(0x2cd61ad4, 0xc1c0c757), + TOBN(0x3effed5a, 0x2ff9f403), TOBN(0x8dc98d8b, 0x62239029), + TOBN(0x2206021e, 0x1f17b70d), TOBN(0xafbec0ca, 0xbf510015), + TOBN(0x9fed7164, 0x80130dfa), TOBN(0x306dc2b5, 0x8a02dcf5), + TOBN(0x48f06620, 0xfeb10fc0), TOBN(0x78d1e1d5, 0x5a57cf51), + TOBN(0xadef8c5a, 0x192ef710), TOBN(0x88afbd4b, 0x3b7431f9), + TOBN(0x7e1f7407, 0x64250c9e), TOBN(0x6e31318d, 0xb58bec07), + TOBN(0xfd4fc4b8, 0x24f89b4e), TOBN(0x65a5dd88, 0x48c36a2a), + TOBN(0x4f1eccff, 0xf024baa7), TOBN(0x22a21cf2, 0xcba94650), + TOBN(0x95d29dee, 0x42a554f7), TOBN(0x828983a5, 0x002ec4ba), + TOBN(0x8112a1f7, 0x8badb73d), TOBN(0x79ea8897, 0xa27c1839), + TOBN(0x8969a5a7, 0xd065fd83), TOBN(0xf49af791, 0xb262a0bc), + TOBN(0xfcdea8b6, 0xaf2b5127), TOBN(0x10e913e1, 0x564c2dbc), + TOBN(0x51239d14, 0xbc21ef51), TOBN(0xe51c3ceb, 0x4ce57292), + TOBN(0x795ff068, 0x47bbcc3b), TOBN(0x86b46e1e, 0xbd7e11e6), + TOBN(0x0ea6ba23, 0x80041ef4), TOBN(0xd72fe505, 0x6262342e), + TOBN(0x8abc6dfd, 0x31d294d4), TOBN(0xbbe017a2, 0x1278c2c9), + TOBN(0xb1fcfa09, 0xb389328a), TOBN(0x322fbc62, 0xd01771b5), + TOBN(0x04c0d063, 0x60b045bf), TOBN(0xdb652edc, 0x10e52d01), + TOBN(0x50ef932c, 0x03ec6627), TOBN(0xde1b3b2d, 0xc1ee50e3), + TOBN(0x5ab7bdc5, 0xdc37a90d), TOBN(0xfea67213, 0x31e33a96), + TOBN(0x6482b5cb, 0x4f2999aa), TOBN(0x38476cc6, 0xb8cbf0dd), + TOBN(0x93ebfacb, 0x173405bb), TOBN(0x15cdafe7, 0xe52369ec), + TOBN(0xd42d5ba4, 0xd935b7db), TOBN(0x648b6004, 0x1c99a4cd), + TOBN(0x785101bd, 0xa3b5545b), TOBN(0x4bf2c38a, 0x9dd67faf), + TOBN(0xb1aadc63, 0x4442449c), TOBN(0xe0e9921a, 0x33ad4fb8), + TOBN(0x5c552313, 0xaa686d82), TOBN(0xdee635fa, 0x465d866c), + TOBN(0xbc3c224a, 0x18ee6e8a), TOBN(0xeed748a6, 0xed42e02f), + TOBN(0xe70f930a, 0xd474cd08), TOBN(0x774ea6ec, 0xfff24adf), + TOBN(0x03e2de1c, 0xf3480d4a), TOBN(0xf0d8edc7, 0xbc8acf1a), + TOBN(0xf23e3303, 0x68295a9c), TOBN(0xfadd5f68, 0xc546a97d), + TOBN(0x895597ad, 0x96f8acb1), TOBN(0xbddd49d5, 0x671bdae2), + TOBN(0x16fcd528, 0x21dd43f4), TOBN(0xa5a45412, 0x6619141a)} + , + {TOBN(0x8ce9b6bf, 0xc360e25a), TOBN(0xe6425195, 0x075a1a78), + TOBN(0x9dc756a8, 0x481732f4), TOBN(0x83c0440f, 0x5432b57a), + TOBN(0xc670b3f1, 0xd720281f), TOBN(0x2205910e, 0xd135e051), + TOBN(0xded14b0e, 0xdb052be7), TOBN(0x697b3d27, 0xc568ea39), + TOBN(0x2e599b9a, 0xfb3ff9ed), TOBN(0x28c2e0ab, 0x17f6515c), + TOBN(0x1cbee4fd, 0x474da449), TOBN(0x071279a4, 0x4f364452), + TOBN(0x97abff66, 0x01fbe855), TOBN(0x3ee394e8, 0x5fda51c4), + TOBN(0x190385f6, 0x67597c0b), TOBN(0x6e9fccc6, 0xa27ee34b), + TOBN(0x0b89de93, 0x14092ebb), TOBN(0xf17256bd, 0x428e240c), + TOBN(0xcf89a7f3, 0x93d2f064), TOBN(0x4f57841e, 0xe1ed3b14), + TOBN(0x4ee14405, 0xe708d855), TOBN(0x856aae72, 0x03f1c3d0), + TOBN(0xc8e5424f, 0xbdd7eed5), TOBN(0x3333e4ef, 0x73ab4270), + TOBN(0x3bc77ade, 0xdda492f8), TOBN(0xc11a3aea, 0x78297205), + TOBN(0x5e89a3e7, 0x34931b4c), TOBN(0x17512e2e, 0x9f5694bb), + TOBN(0x5dc349f3, 0x177bf8b6), TOBN(0x232ea4ba, 0x08c7ff3e), + TOBN(0x9c4f9d16, 0xf511145d), TOBN(0xccf109a3, 0x33b379c3), + TOBN(0xe75e7a88, 0xa1f25897), TOBN(0x7ac6961f, 0xa1b5d4d8), + TOBN(0xe3e10773, 0x08f3ed5c), TOBN(0x208a54ec, 0x0a892dfb), + TOBN(0xbe826e19, 0x78660710), TOBN(0x0cf70a97, 0x237df2c8), + TOBN(0x418a7340, 0xed704da5), TOBN(0xa3eeb9a9, 0x08ca33fd), + TOBN(0x49d96233, 0x169bca96), TOBN(0x04d286d4, 0x2da6aafb), + TOBN(0xc09606ec, 0xa0c2fa94), TOBN(0x8869d0d5, 0x23ff0fb3), + TOBN(0xa99937e5, 0xd0150d65), TOBN(0xa92e2503, 0x240c14c9), + TOBN(0x656bf945, 0x108e2d49), TOBN(0x152a733a, 0xa2f59e2b), + TOBN(0xb4323d58, 0x8434a920), TOBN(0xc0af8e93, 0x622103c5), + TOBN(0x667518ef, 0x938dbf9a), TOBN(0xa1843073, 0x83a9cdf2), + TOBN(0x350a94aa, 0x5447ab80), TOBN(0xe5e5a325, 0xc75a3d61), + TOBN(0x74ba507f, 0x68411a9e), TOBN(0x10581fc1, 0x594f70c5), + TOBN(0x60e28570, 0x80eb24a9), TOBN(0x7bedfb4d, 0x488e0cfd), + TOBN(0x721ebbd7, 0xc259cdb8), TOBN(0x0b0da855, 0xbc6390a9), + TOBN(0x2b4d04db, 0xde314c70), TOBN(0xcdbf1fbc, 0x6c32e846), + TOBN(0x33833eab, 0xb162fc9e), TOBN(0x9939b48b, 0xb0dd3ab7), + TOBN(0x5aaa98a7, 0xcb0c9c8c), TOBN(0x75105f30, 0x81c4375c), + TOBN(0xceee5057, 0x5ef1c90f), TOBN(0xb31e065f, 0xc23a17bf), + TOBN(0x5364d275, 0xd4b6d45a), TOBN(0xd363f3ad, 0x62ec8996), + TOBN(0xb5d21239, 0x4391c65b), TOBN(0x84564765, 0xebb41b47), + TOBN(0x20d18ecc, 0x37107c78), TOBN(0xacff3b6b, 0x570c2a66), + TOBN(0x22f975d9, 0x9bd0d845), TOBN(0xef0a0c46, 0xba178fa0), + TOBN(0x1a419651, 0x76b6028e), TOBN(0xc49ec674, 0x248612d4), + TOBN(0x5b6ac4f2, 0x7338af55), TOBN(0x06145e62, 0x7bee5a36), + TOBN(0x33e95d07, 0xe75746b5), TOBN(0x1c1e1f6d, 0xc40c78be), + TOBN(0x967833ef, 0x222ff8e2), TOBN(0x4bedcf6a, 0xb49180ad), + TOBN(0x6b37e9c1, 0x3d7a4c8a), TOBN(0x2748887c, 0x6ddfe760), + TOBN(0xf7055123, 0xaa3a5bbc), TOBN(0x954ff225, 0x7bbb8e74), + TOBN(0xc42b8ab1, 0x97c3dfb9), TOBN(0x55a549b0, 0xcf168154), + TOBN(0xad6748e7, 0xc1b50692), TOBN(0x2775780f, 0x6fc5cbcb), + TOBN(0x4eab80b8, 0xe1c9d7c8), TOBN(0x8c69dae1, 0x3fdbcd56), + TOBN(0x47e6b4fb, 0x9969eace), TOBN(0x002f1085, 0xa705cb5a), + TOBN(0x4e23ca44, 0x6d3fea55), TOBN(0xb4ae9c86, 0xf4810568), + TOBN(0x47bfb91b, 0x2a62f27d), TOBN(0x60deb4c9, 0xd9bac28c), + TOBN(0xa892d894, 0x7de6c34c), TOBN(0x4ee68259, 0x4494587d), + TOBN(0x914ee14e, 0x1a3f8a5b), TOBN(0xbb113eaa, 0x28700385), + TOBN(0x81ca03b9, 0x2115b4c9), TOBN(0x7c163d38, 0x8908cad1), + TOBN(0xc912a118, 0xaa18179a), TOBN(0xe09ed750, 0x886e3081), + TOBN(0xa676e3fa, 0x26f516ca), TOBN(0x753cacf7, 0x8e732f91), + TOBN(0x51592aea, 0x833da8b4), TOBN(0xc626f42f, 0x4cbea8aa), + TOBN(0xef9dc899, 0xa7b56eaf), TOBN(0x00c0e52c, 0x34ef7316), + TOBN(0x5b1e4e24, 0xfe818a86), TOBN(0x9d31e20d, 0xc538be47), + TOBN(0x22eb932d, 0x3ed68974), TOBN(0xe44bbc08, 0x7c4e87c4), + TOBN(0x4121086e, 0x0dde9aef), TOBN(0x8e6b9cff, 0x134f4345), + TOBN(0x96892c1f, 0x711b0eb9), TOBN(0xb905f2c8, 0x780ab954), + TOBN(0xace26309, 0xa20792db), TOBN(0xec8ac9b3, 0x0684e126), + TOBN(0x486ad8b6, 0xb40a2447), TOBN(0x60121fc1, 0x9fe3fb24), + TOBN(0x5626fccf, 0x1a8e3b3f), TOBN(0x4e568622, 0x6ad1f394), + TOBN(0xda7aae0d, 0x196aa5a1), TOBN(0xe0df8c77, 0x1041b5fb), + TOBN(0x451465d9, 0x26b318b7), TOBN(0xc29b6e55, 0x7ab136e9), + TOBN(0x2c2ab48b, 0x71148463), TOBN(0xb5738de3, 0x64454a76), + TOBN(0x54ccf9a0, 0x5a03abe4), TOBN(0x377c0296, 0x0427d58e), + TOBN(0x73f5f0b9, 0x2bb39c1f), TOBN(0x14373f2c, 0xe608d8c5), + TOBN(0xdcbfd314, 0x00fbb805), TOBN(0xdf18fb20, 0x83afdcfb), + TOBN(0x81a57f42, 0x42b3523f), TOBN(0xe958532d, 0x87f650fb), + TOBN(0xaa8dc8b6, 0x8b0a7d7c), TOBN(0x1b75dfb7, 0x150166be), + TOBN(0x90e4f7c9, 0x2d7d1413), TOBN(0x67e2d6b5, 0x9834f597), + TOBN(0x4fd4f4f9, 0xa808c3e8), TOBN(0xaf8237e0, 0xd5281ec1), + TOBN(0x25ab5fdc, 0x84687cee), TOBN(0xc5ded6b1, 0xa5b26c09), + TOBN(0x8e4a5aec, 0xc8ea7650), TOBN(0x23b73e5c, 0x14cc417f), + TOBN(0x2bfb4318, 0x3037bf52), TOBN(0xb61e6db5, 0x78c725d7), + TOBN(0x8efd4060, 0xbbb3e5d7), TOBN(0x2e014701, 0xdbac488e), + TOBN(0xac75cf9a, 0x360aa449), TOBN(0xb70cfd05, 0x79634d08), + TOBN(0xa591536d, 0xfffb15ef), TOBN(0xb2c37582, 0xd07c106c), + TOBN(0xb4293fdc, 0xf50225f9), TOBN(0xc52e175c, 0xb0e12b03), + TOBN(0xf649c3ba, 0xd0a8bf64), TOBN(0x745a8fef, 0xeb8ae3c6), + TOBN(0x30d7e5a3, 0x58321bc3), TOBN(0xb1732be7, 0x0bc4df48), + TOBN(0x1f217993, 0xe9ea5058), TOBN(0xf7a71cde, 0x3e4fd745), + TOBN(0x86cc533e, 0x894c5bbb), TOBN(0x6915c7d9, 0x69d83082), + TOBN(0xa6aa2d05, 0x5815c244), TOBN(0xaeeee592, 0x49b22ce5), + TOBN(0x89e39d13, 0x78135486), TOBN(0x3a275c1f, 0x16b76f2f), + TOBN(0xdb6bcc1b, 0xe036e8f5), TOBN(0x4df69b21, 0x5e4709f5), + TOBN(0xa188b250, 0x2d0f39aa), TOBN(0x622118bb, 0x15a85947), + TOBN(0x2ebf520f, 0xfde0f4fa), TOBN(0xa40e9f29, 0x4860e539), + TOBN(0x7b6a51eb, 0x22b57f0f), TOBN(0x849a33b9, 0x7e80644a), + TOBN(0x50e5d16f, 0x1cf095fe), TOBN(0xd754b54e, 0xec55f002), + TOBN(0x5cfbbb22, 0x236f4a98), TOBN(0x0b0c59e9, 0x066800bb), + TOBN(0x4ac69a8f, 0x5a9a7774), TOBN(0x2b33f804, 0xd6bec948), + TOBN(0xb3729295, 0x32e6c466), TOBN(0x68956d0f, 0x4e599c73), + TOBN(0xa47a249f, 0x155c31cc), TOBN(0x24d80f0d, 0xe1ce284e), + TOBN(0xcd821dfb, 0x988baf01), TOBN(0xe6331a7d, 0xdbb16647), + TOBN(0x1eb8ad33, 0x094cb960), TOBN(0x593cca38, 0xc91bbca5), + TOBN(0x384aac8d, 0x26567456), TOBN(0x40fa0309, 0xc04b6490), + TOBN(0x97834cd6, 0xdab6c8f6), TOBN(0x68a7318d, 0x3f91e55f), + TOBN(0xa00fd04e, 0xfc4d3157), TOBN(0xb56f8ab2, 0x2bf3bdea), + TOBN(0x014f5648, 0x4fa57172), TOBN(0x948c5860, 0x450abdb3), + TOBN(0x342b5df0, 0x0ebd4f08), TOBN(0x3e5168cd, 0x0e82938e), + TOBN(0x7aedc1ce, 0xb0df5dd0), TOBN(0x6bbbc6d9, 0xe5732516), + TOBN(0xc7bfd486, 0x605daaa6), TOBN(0x46fd72b7, 0xbb9a6c9e), + TOBN(0xe4847fb1, 0xa124fb89), TOBN(0x75959cbd, 0xa2d8ffbc), + TOBN(0x42579f65, 0xc8a588ee), TOBN(0x368c92e6, 0xb80b499d), + TOBN(0xea4ef6cd, 0x999a5df1), TOBN(0xaa73bb7f, 0x936fe604), + TOBN(0xf347a70d, 0x6457d188), TOBN(0x86eda86b, 0x8b7a388b), + TOBN(0xb7cdff06, 0x0ccd6013), TOBN(0xbeb1b6c7, 0xd0053fb2), + TOBN(0x0b022387, 0x99240a9f), TOBN(0x1bbb384f, 0x776189b2), + TOBN(0x8695e71e, 0x9066193a), TOBN(0x2eb50097, 0x06ffac7e), + TOBN(0x0654a9c0, 0x4a7d2caa), TOBN(0x6f3fb3d1, 0xa5aaa290), + TOBN(0x835db041, 0xff476e8f), TOBN(0x540b8b0b, 0xc42295e4), + TOBN(0xa5c73ac9, 0x05e214f5), TOBN(0x9a74075a, 0x56a0b638), + TOBN(0x2e4b1090, 0xce9e680b), TOBN(0x57a5b479, 0x6b8d9afa), + TOBN(0x0dca48e7, 0x26bfe65c), TOBN(0x097e391c, 0x7290c307), + TOBN(0x683c462e, 0x6669e72e), TOBN(0xf505be1e, 0x062559ac), + TOBN(0x5fbe3ea1, 0xe3a3035a), TOBN(0x6431ebf6, 0x9cd50da8), + TOBN(0xfd169d5c, 0x1f6407f2), TOBN(0x8d838a95, 0x60fce6b8), + TOBN(0x2a2bfa7f, 0x650006f0), TOBN(0xdfd7dad3, 0x50c0fbb2), + TOBN(0x92452495, 0xccf9ad96), TOBN(0x183bf494, 0xd95635f9), + TOBN(0x02d5df43, 0x4a7bd989), TOBN(0x505385cc, 0xa5431095), + TOBN(0xdd98e67d, 0xfd43f53e), TOBN(0xd61e1a6c, 0x500c34a9), + TOBN(0x5a4b46c6, 0x4a8a3d62), TOBN(0x8469c4d0, 0x247743d2), + TOBN(0x2bb3a13d, 0x88f7e433), TOBN(0x62b23a10, 0x01be5849), + TOBN(0xe83596b4, 0xa63d1a4c), TOBN(0x454e7fea, 0x7d183f3e), + TOBN(0x643fce61, 0x17afb01c), TOBN(0x4e65e5e6, 0x1c4c3638), + TOBN(0x41d85ea1, 0xef74c45b), TOBN(0x2cfbfa66, 0xae328506), + TOBN(0x98b078f5, 0x3ada7da9), TOBN(0xd985fe37, 0xec752fbb), + TOBN(0xeece68fe, 0x5a0148b4), TOBN(0x6f9a55c7, 0x2d78136d), + TOBN(0x232dccc4, 0xd2b729ce), TOBN(0xa27e0dfd, 0x90aafbc4), + TOBN(0x96474452, 0x12b4603e), TOBN(0xa876c551, 0x6b706d14), + TOBN(0xdf145fcf, 0x69a9d412), TOBN(0xe2ab75b7, 0x2d479c34), + TOBN(0x12df9a76, 0x1a23ff97), TOBN(0xc6138992, 0x5d359d10), + TOBN(0x6e51c7ae, 0xfa835f22), TOBN(0x69a79cb1, 0xc0fcc4d9), + TOBN(0xf57f350d, 0x594cc7e1), TOBN(0x3079ca63, 0x3350ab79), + TOBN(0x226fb614, 0x9aff594a), TOBN(0x35afec02, 0x6d59a62b), + TOBN(0x9bee46f4, 0x06ed2c6e), TOBN(0x58da1735, 0x7d939a57), + TOBN(0x44c50402, 0x8fd1797e), TOBN(0xd8853e7c, 0x5ccea6ca), + TOBN(0x4065508d, 0xa35fcd5f), TOBN(0x8965df8c, 0x495ccaeb), + TOBN(0x0f2da850, 0x12e1a962), TOBN(0xee471b94, 0xc1cf1cc4), + TOBN(0xcef19bc8, 0x0a08fb75), TOBN(0x704958f5, 0x81de3591), + TOBN(0x2867f8b2, 0x3aef4f88), TOBN(0x8d749384, 0xea9f9a5f), + TOBN(0x1b385537, 0x8c9049f4), TOBN(0x5be948f3, 0x7b92d8b6), + TOBN(0xd96f725d, 0xb6e2bd6b), TOBN(0x37a222bc, 0x958c454d), + TOBN(0xe7c61abb, 0x8809bf61), TOBN(0x46f07fbc, 0x1346f18d), + TOBN(0xfb567a7a, 0xe87c0d1c), TOBN(0x84a461c8, 0x7ef3d07a), + TOBN(0x0a5adce6, 0xd9278d98), TOBN(0x24d94813, 0x9dfc73e1), + TOBN(0x4f3528b6, 0x054321c3), TOBN(0x2e03fdde, 0x692ea706), + TOBN(0x10e60619, 0x47b533c0), TOBN(0x1a8bc73f, 0x2ca3c055), + TOBN(0xae58d4b2, 0x1bb62b8f), TOBN(0xb2045a73, 0x584a24e3), + TOBN(0x3ab3d5af, 0xbd76e195), TOBN(0x478dd1ad, 0x6938a810), + TOBN(0x6ffab393, 0x6ee3d5cb), TOBN(0xdfb693db, 0x22b361e4), + TOBN(0xf9694496, 0x51dbf1a7), TOBN(0xcab4b4ef, 0x08a2e762), + TOBN(0xe8c92f25, 0xd39bba9a), TOBN(0x850e61bc, 0xf1464d96), + TOBN(0xb7e830e3, 0xdc09508b), TOBN(0xfaf6d2cf, 0x74317655), + TOBN(0x72606ceb, 0xdf690355), TOBN(0x48bb92b3, 0xd0c3ded6), + TOBN(0x65b75484, 0x5c7cf892), TOBN(0xf6cd7ac9, 0xd5d5f01f), + TOBN(0xc2c30a59, 0x96401d69), TOBN(0x91268650, 0xed921878), + TOBN(0x380bf913, 0xb78c558f), TOBN(0x43c0baeb, 0xc8afdaa9), + TOBN(0x377f61d5, 0x54f169d3), TOBN(0xf8da07e3, 0xae5ff20b), + TOBN(0xb676c49d, 0xa8a90ea8), TOBN(0x81c1ff2b, 0x83a29b21), + TOBN(0x383297ac, 0x2ad8d276), TOBN(0x3001122f, 0xba89f982), + TOBN(0xe1d794be, 0x6718e448), TOBN(0x246c1482, 0x7c3e6e13), + TOBN(0x56646ef8, 0x5d26b5ef), TOBN(0x80f5091e, 0x88069cdd), + TOBN(0xc5992e2f, 0x724bdd38), TOBN(0x02e915b4, 0x8471e8c7), + TOBN(0x96ff320a, 0x0d0ff2a9), TOBN(0xbf886487, 0x4384d1a0), + TOBN(0xbbe1e6a6, 0xc93f72d6), TOBN(0xd5f75d12, 0xcad800ea), + TOBN(0xfa40a09f, 0xe7acf117), TOBN(0x32c8cdd5, 0x7581a355), + TOBN(0x74221992, 0x7023c499), TOBN(0xa8afe5d7, 0x38ec3901), + TOBN(0x5691afcb, 0xa90e83f0), TOBN(0x41bcaa03, 0x0b8f8eac), + TOBN(0xe38b5ff9, 0x8d2668d5), TOBN(0x0715281a, 0x7ad81965), + TOBN(0x1bc8fc7c, 0x03c6ce11), TOBN(0xcbbee6e2, 0x8b650436), + TOBN(0x06b00fe8, 0x0cdb9808), TOBN(0x17d6e066, 0xfe3ed315), + TOBN(0x2e9d38c6, 0x4d0b5018), TOBN(0xab8bfd56, 0x844dcaef), + TOBN(0x42894a59, 0x513aed8b), TOBN(0xf77f3b6d, 0x314bd07a), + TOBN(0xbbdecb8f, 0x8e42b582), TOBN(0xf10e2fa8, 0xd2390fe6), + TOBN(0xefb95022, 0x62a2f201), TOBN(0x4d59ea50, 0x50ee32b0), + TOBN(0xd87f7728, 0x6da789a8), TOBN(0xcf98a2cf, 0xf79492c4), + TOBN(0xf9577239, 0x720943c2), TOBN(0xba044cf5, 0x3990b9d0), + TOBN(0x5aa8e823, 0x95f2884a), TOBN(0x834de6ed, 0x0278a0af), + TOBN(0xc8e1ee9a, 0x5f25bd12), TOBN(0x9259ceaa, 0x6f7ab271), + TOBN(0x7e6d97a2, 0x77d00b76), TOBN(0x5c0c6eea, 0xa437832a), + TOBN(0x5232c20f, 0x5606b81d), TOBN(0xabd7b375, 0x0d991ee5), + TOBN(0x4d2bfe35, 0x8632d951), TOBN(0x78f85146, 0x98ed9364), + TOBN(0x951873f0, 0xf30c3282), TOBN(0x0da8ac80, 0xa789230b), + TOBN(0x3ac7789c, 0x5398967f), TOBN(0xa69b8f7f, 0xbdda0fb5), + TOBN(0xe5db7717, 0x6add8545), TOBN(0x1b71cb66, 0x72c49b66), + TOBN(0xd8560739, 0x68421d77), TOBN(0x03840fe8, 0x83e3afea), + TOBN(0xb391dad5, 0x1ec69977), TOBN(0xae243fb9, 0x307f6726), + TOBN(0xc88ac87b, 0xe8ca160c), TOBN(0x5174cced, 0x4ce355f4), + TOBN(0x98a35966, 0xe58ba37d), TOBN(0xfdcc8da2, 0x7817335d), + TOBN(0x5b752830, 0x83fbc7bf), TOBN(0x68e419d4, 0xd9c96984), + TOBN(0x409a39f4, 0x02a40380), TOBN(0x88940faf, 0x1fe977bc), + TOBN(0xc640a94b, 0x8f8edea6), TOBN(0x1e22cd17, 0xed11547d), + TOBN(0xe28568ce, 0x59ffc3e2), TOBN(0x60aa1b55, 0xc1dee4e7), + TOBN(0xc67497c8, 0x837cb363), TOBN(0x06fb438a, 0x105a2bf2), + TOBN(0x30357ec4, 0x500d8e20), TOBN(0x1ad9095d, 0x0670db10), + TOBN(0x7f589a05, 0xc73b7cfd), TOBN(0xf544607d, 0x880d6d28), + TOBN(0x17ba93b1, 0xa20ef103), TOBN(0xad859130, 0x6ba6577b), + TOBN(0x65c91cf6, 0x6fa214a0), TOBN(0xd7d49c6c, 0x27990da5), + TOBN(0xecd9ec8d, 0x20bb569d), TOBN(0xbd4b2502, 0xeeffbc33), + TOBN(0x2056ca5a, 0x6bed0467), TOBN(0x7916a1f7, 0x5b63728c), + TOBN(0xd4f9497d, 0x53a4f566), TOBN(0x89734664, 0x97b56810), + TOBN(0xf8e1da74, 0x0494a621), TOBN(0x82546a93, 0x8d011c68), + TOBN(0x1f3acb19, 0xc61ac162), TOBN(0x52f8fa9c, 0xabad0d3e), + TOBN(0x15356523, 0xb4b7ea43), TOBN(0x5a16ad61, 0xae608125), + TOBN(0xb0bcb87f, 0x4faed184), TOBN(0x5f236b1d, 0x5029f45f), + TOBN(0xd42c7607, 0x0bc6b1fc), TOBN(0xc644324e, 0x68aefce3), + TOBN(0x8e191d59, 0x5c5d8446), TOBN(0xc0208077, 0x13ae1979), + TOBN(0xadcaee55, 0x3ba59cc7), TOBN(0x20ed6d6b, 0xa2cb81ba), + TOBN(0x0952ba19, 0xb6efcffc), TOBN(0x60f12d68, 0x97c0b87c), + TOBN(0x4ee2c7c4, 0x9caa30bc), TOBN(0x767238b7, 0x97fbff4e), + TOBN(0xebc73921, 0x501b5d92), TOBN(0x3279e3df, 0xc2a37737), + TOBN(0x9fc12bc8, 0x6d197543), TOBN(0xfa94dc6f, 0x0a40db4e), + TOBN(0x7392b41a, 0x530ccbbd), TOBN(0x87c82146, 0xea823525), + TOBN(0xa52f984c, 0x05d98d0c), TOBN(0x2ae57d73, 0x5ef6974c), + TOBN(0x9377f7bf, 0x3042a6dd), TOBN(0xb1a007c0, 0x19647a64), + TOBN(0xfaa9079a, 0x0cca9767), TOBN(0x3d81a25b, 0xf68f72d5), + TOBN(0x752067f8, 0xff81578e), TOBN(0x78622150, 0x9045447d), + TOBN(0xc0c22fcf, 0x0505aa6f), TOBN(0x1030f0a6, 0x6bed1c77), + TOBN(0x31f29f15, 0x1f0bd739), TOBN(0x2d7989c7, 0xe6debe85), + TOBN(0x5c070e72, 0x8e677e98), TOBN(0x0a817bd3, 0x06e81fd5), + TOBN(0xc110d830, 0xb0f2ac95), TOBN(0x48d0995a, 0xab20e64e), + TOBN(0x0f3e00e1, 0x7729cd9a), TOBN(0x2a570c20, 0xdd556946), + TOBN(0x912dbcfd, 0x4e86214d), TOBN(0x2d014ee2, 0xcf615498), + TOBN(0x55e2b1e6, 0x3530d76e), TOBN(0xc5135ae4, 0xfd0fd6d1), + TOBN(0x0066273a, 0xd4f3049f), TOBN(0xbb8e9893, 0xe7087477), + TOBN(0x2dba1ddb, 0x14c6e5fd), TOBN(0xdba37886, 0x51f57e6c), + TOBN(0x5aaee0a6, 0x5a72f2cf), TOBN(0x1208bfbf, 0x7bea5642), + TOBN(0xf5c6aa3b, 0x67872c37), TOBN(0xd726e083, 0x43f93224), + TOBN(0x1854daa5, 0x061f1658), TOBN(0xc0016df1, 0xdf0cd2b3), + TOBN(0xc2a3f23e, 0x833d50de), TOBN(0x73b681d2, 0xbbbd3017), + TOBN(0x2f046dc4, 0x3ac343c0), TOBN(0x9c847e7d, 0x85716421), + TOBN(0xe1e13c91, 0x0917eed4), TOBN(0x3fc9eebd, 0x63a1b9c6), + TOBN(0x0f816a72, 0x7fe02299), TOBN(0x6335ccc2, 0x294f3319), + TOBN(0x3820179f, 0x4745c5be), TOBN(0xe647b782, 0x922f066e), + TOBN(0xc22e49de, 0x02cafb8a), TOBN(0x299bc2ff, 0xfcc2eccc), + TOBN(0x9a8feea2, 0x6e0e8282), TOBN(0xa627278b, 0xfe893205), + TOBN(0xa7e19733, 0x7933e47b), TOBN(0xf4ff6b13, 0x2e766402), + TOBN(0xa4d8be0a, 0x98440d9f), TOBN(0x658f5c2f, 0x38938808), + TOBN(0x90b75677, 0xc95b3b3e), TOBN(0xfa044269, 0x3137b6ff), + TOBN(0x077b039b, 0x43c47c29), TOBN(0xcca95dd3, 0x8a6445b2), + TOBN(0x0b498ba4, 0x2333fc4c), TOBN(0x274f8e68, 0xf736a1b1), + TOBN(0x6ca348fd, 0x5f1d4b2e), TOBN(0x24d3be78, 0xa8f10199), + TOBN(0x8535f858, 0xca14f530), TOBN(0xa6e7f163, 0x5b982e51), + TOBN(0x847c8512, 0x36e1bf62), TOBN(0xf6a7c58e, 0x03448418), + TOBN(0x583f3703, 0xf9374ab6), TOBN(0x864f9195, 0x6e564145), + TOBN(0x33bc3f48, 0x22526d50), TOBN(0x9f323c80, 0x1262a496), + TOBN(0xaa97a7ae, 0x3f046a9a), TOBN(0x70da183e, 0xdf8a039a), + TOBN(0x5b68f71c, 0x52aa0ba6), TOBN(0x9be0fe51, 0x21459c2d), + TOBN(0xc1e17eb6, 0xcbc613e5), TOBN(0x33131d55, 0x497ea61c), + TOBN(0x2f69d39e, 0xaf7eded5), TOBN(0x73c2f434, 0xde6af11b), + TOBN(0x4ca52493, 0xa4a375fa), TOBN(0x5f06787c, 0xb833c5c2), + TOBN(0x814e091f, 0x3e6e71cf), TOBN(0x76451f57, 0x8b746666)} + , + {TOBN(0x80f9bdef, 0x694db7e0), TOBN(0xedca8787, 0xb9fcddc6), + TOBN(0x51981c34, 0x03b8dce1), TOBN(0x4274dcf1, 0x70e10ba1), + TOBN(0xf72743b8, 0x6def6d1a), TOBN(0xd25b1670, 0xebdb1866), + TOBN(0xc4491e8c, 0x050c6f58), TOBN(0x2be2b2ab, 0x87fbd7f5), + TOBN(0x3e0e5c9d, 0xd111f8ec), TOBN(0xbcc33f8d, 0xb7c4e760), + TOBN(0x702f9a91, 0xbd392a51), TOBN(0x7da4a795, 0xc132e92d), + TOBN(0x1a0b0ae3, 0x0bb1151b), TOBN(0x54febac8, 0x02e32251), + TOBN(0xea3a5082, 0x694e9e78), TOBN(0xe58ffec1, 0xe4fe40b8), + TOBN(0xf85592fc, 0xd1e0cf9e), TOBN(0xdea75f0d, 0xc0e7b2e8), + TOBN(0xc04215cf, 0xc135584e), TOBN(0x174fc727, 0x2f57092a), + TOBN(0xe7277877, 0xeb930bea), TOBN(0x504caccb, 0x5eb02a5a), + TOBN(0xf9fe08f7, 0xf5241b9b), TOBN(0xe7fb62f4, 0x8d5ca954), + TOBN(0xfbb8349d, 0x29c4120b), TOBN(0x9f94391f, 0xc0d0d915), + TOBN(0xc4074fa7, 0x5410ba51), TOBN(0xa66adbf6, 0x150a5911), + TOBN(0xc164543c, 0x34bfca38), TOBN(0xe0f27560, 0xb9e1ccfc), + TOBN(0x99da0f53, 0xe820219c), TOBN(0xe8234498, 0xc6b4997a), + TOBN(0xcfb88b76, 0x9d4c5423), TOBN(0x9e56eb10, 0xb0521c49), + TOBN(0x418e0b5e, 0xbe8700a1), TOBN(0x00cbaad6, 0xf93cb58a), + TOBN(0xe923fbde, 0xd92a5e67), TOBN(0xca4979ac, 0x1f347f11), + TOBN(0x89162d85, 0x6bc0585b), TOBN(0xdd6254af, 0xac3c70e3), + TOBN(0x7b23c513, 0x516e19e4), TOBN(0x56e2e847, 0xc5c4d593), + TOBN(0x9f727d73, 0x5ce71ef6), TOBN(0x5b6304a6, 0xf79a44c5), + TOBN(0x6638a736, 0x3ab7e433), TOBN(0x1adea470, 0xfe742f83), + TOBN(0xe054b854, 0x5b7fc19f), TOBN(0xf935381a, 0xba1d0698), + TOBN(0x546eab2d, 0x799e9a74), TOBN(0x96239e0e, 0xa949f729), + TOBN(0xca274c6b, 0x7090055a), TOBN(0x835142c3, 0x9020c9b0), + TOBN(0xa405667a, 0xa2e8807f), TOBN(0x29f2c085, 0x1aa3d39e), + TOBN(0xcc555d64, 0x42fc72f5), TOBN(0xe856e0e7, 0xfbeacb3c), + TOBN(0xb5504f9d, 0x918e4936), TOBN(0x65035ef6, 0xb2513982), + TOBN(0x0553a0c2, 0x6f4d9cb9), TOBN(0x6cb10d56, 0xbea85509), + TOBN(0x48d957b7, 0xa242da11), TOBN(0x16a4d3dd, 0x672b7268), + TOBN(0x3d7e637c, 0x8502a96b), TOBN(0x27c7032b, 0x730d463b), + TOBN(0xbdc02b18, 0xe4136a14), TOBN(0xbacf969d, 0x678e32bf), + TOBN(0xc98d89a3, 0xdd9c3c03), TOBN(0x7b92420a, 0x23becc4f), + TOBN(0xd4b41f78, 0xc64d565c), TOBN(0x9f969d00, 0x10f28295), + TOBN(0xec7f7f76, 0xb13d051a), TOBN(0x08945e1e, 0xa92da585), + TOBN(0x55366b7d, 0x5846426f), TOBN(0xe7d09e89, 0x247d441d), + TOBN(0x510b404d, 0x736fbf48), TOBN(0x7fa003d0, 0xe784bd7d), + TOBN(0x25f7614f, 0x17fd9596), TOBN(0x49e0e0a1, 0x35cb98db), + TOBN(0x2c65957b, 0x2e83a76a), TOBN(0x5d40da8d, 0xcddbe0f8), + TOBN(0xf2b8c405, 0x050bad24), TOBN(0x8918426d, 0xc2aa4823), + TOBN(0x2aeab3dd, 0xa38365a7), TOBN(0x72031717, 0x7c91b690), + TOBN(0x8b00d699, 0x60a94120), TOBN(0x478a255d, 0xe99eaeec), + TOBN(0xbf656a5f, 0x6f60aafd), TOBN(0xdfd7cb75, 0x5dee77b3), + TOBN(0x37f68bb4, 0xa595939d), TOBN(0x03556479, 0x28740217), + TOBN(0x8e740e7c, 0x84ad7612), TOBN(0xd89bc843, 0x9044695f), + TOBN(0xf7f3da5d, 0x85a9184d), TOBN(0x562563bb, 0x9fc0b074), + TOBN(0x06d2e6aa, 0xf88a888e), TOBN(0x612d8643, 0x161fbe7c), + TOBN(0x465edba7, 0xf64085e7), TOBN(0xb230f304, 0x29aa8511), + TOBN(0x53388426, 0xcda2d188), TOBN(0x90885735, 0x4b666649), + TOBN(0x6f02ff9a, 0x652f54f6), TOBN(0x65c82294, 0x5fae2bf0), + TOBN(0x7816ade0, 0x62f5eee3), TOBN(0xdcdbdf43, 0xfcc56d70), + TOBN(0x9fb3bba3, 0x54530bb2), TOBN(0xbde3ef77, 0xcb0869ea), + TOBN(0x89bc9046, 0x0b431163), TOBN(0x4d03d7d2, 0xe4819a35), + TOBN(0x33ae4f9e, 0x43b6a782), TOBN(0x216db307, 0x9c88a686), + TOBN(0x91dd88e0, 0x00ffedd9), TOBN(0xb280da9f, 0x12bd4840), + TOBN(0x32a7cb8a, 0x1635e741), TOBN(0xfe14008a, 0x78be02a7), + TOBN(0x3fafb334, 0x1b7ae030), TOBN(0x7fd508e7, 0x5add0ce9), + TOBN(0x72c83219, 0xd607ad51), TOBN(0x0f229c0a, 0x8d40964a), + TOBN(0x1be2c336, 0x1c878da2), TOBN(0xe0c96742, 0xeab2ab86), + TOBN(0x458f8691, 0x3e538cd7), TOBN(0xa7001f6c, 0x8e08ad53), + TOBN(0x52b8c6e6, 0xbf5d15ff), TOBN(0x548234a4, 0x011215dd), + TOBN(0xff5a9d2d, 0x3d5b4045), TOBN(0xb0ffeeb6, 0x4a904190), + TOBN(0x55a3aca4, 0x48607f8b), TOBN(0x8cbd665c, 0x30a0672a), + TOBN(0x87f834e0, 0x42583068), TOBN(0x02da2aeb, 0xf3f6e683), + TOBN(0x6b763e5d, 0x05c12248), TOBN(0x7230378f, 0x65a8aefc), + TOBN(0x93bd80b5, 0x71e8e5ca), TOBN(0x53ab041c, 0xb3b62524), + TOBN(0x1b860513, 0x6c9c552e), TOBN(0xe84d402c, 0xd5524e66), + TOBN(0xa37f3573, 0xf37f5937), TOBN(0xeb0f6c7d, 0xd1e4fca5), + TOBN(0x2965a554, 0xac8ab0fc), TOBN(0x17fbf56c, 0x274676ac), + TOBN(0x2e2f6bd9, 0xacf7d720), TOBN(0x41fc8f88, 0x10224766), + TOBN(0x517a14b3, 0x85d53bef), TOBN(0xdae327a5, 0x7d76a7d1), + TOBN(0x6ad0a065, 0xc4818267), TOBN(0x33aa189b, 0x37c1bbc1), + TOBN(0x64970b52, 0x27392a92), TOBN(0x21699a1c, 0x2d1535ea), + TOBN(0xcd20779c, 0xc2d7a7fd), TOBN(0xe3186059, 0x99c83cf2), + TOBN(0x9b69440b, 0x72c0b8c7), TOBN(0xa81497d7, 0x7b9e0e4d), + TOBN(0x515d5c89, 0x1f5f82dc), TOBN(0x9a7f67d7, 0x6361079e), + TOBN(0xa8da81e3, 0x11a35330), TOBN(0xe44990c4, 0x4b18be1b), + TOBN(0xc7d5ed95, 0xaf103e59), TOBN(0xece8aba7, 0x8dac9261), + TOBN(0xbe82b099, 0x9394b8d3), TOBN(0x6830f09a, 0x16adfe83), + TOBN(0x250a29b4, 0x88172d01), TOBN(0x8b20bd65, 0xcaff9e02), + TOBN(0xb8a7661e, 0xe8a6329a), TOBN(0x4520304d, 0xd3fce920), + TOBN(0xae45da1f, 0x2b47f7ef), TOBN(0xe07f5288, 0x5bffc540), + TOBN(0xf7997009, 0x3464f874), TOBN(0x2244c2cd, 0xa6fa1f38), + TOBN(0x43c41ac1, 0x94d7d9b1), TOBN(0x5bafdd82, 0xc82e7f17), + TOBN(0xdf0614c1, 0x5fda0fca), TOBN(0x74b043a7, 0xa8ae37ad), + TOBN(0x3ba6afa1, 0x9e71734c), TOBN(0x15d5437e, 0x9c450f2e), + TOBN(0x4a5883fe, 0x67e242b1), TOBN(0x5143bdc2, 0x2c1953c2), + TOBN(0x542b8b53, 0xfc5e8920), TOBN(0x363bf9a8, 0x9a9cee08), + TOBN(0x02375f10, 0xc3486e08), TOBN(0x2037543b, 0x8c5e70d2), + TOBN(0x7109bccc, 0x625640b4), TOBN(0xcbc1051e, 0x8bc62c3b), + TOBN(0xf8455fed, 0x803f26ea), TOBN(0x6badceab, 0xeb372424), + TOBN(0xa2a9ce7c, 0x6b53f5f9), TOBN(0x64246595, 0x1b176d99), + TOBN(0xb1298d36, 0xb95c081b), TOBN(0x53505bb8, 0x1d9a9ee6), + TOBN(0x3f6f9e61, 0xf2ba70b0), TOBN(0xd07e16c9, 0x8afad453), + TOBN(0x9f1694bb, 0xe7eb4a6a), TOBN(0xdfebced9, 0x3cb0bc8e), + TOBN(0x92d3dcdc, 0x53868c8b), TOBN(0x174311a2, 0x386107a6), + TOBN(0x4109e07c, 0x689b4e64), TOBN(0x30e4587f, 0x2df3dcb6), + TOBN(0x841aea31, 0x0811b3b2), TOBN(0x6144d41d, 0x0cce43ea), + TOBN(0x464c4581, 0x2a9a7803), TOBN(0xd03d371f, 0x3e158930), + TOBN(0xc676d7f2, 0xb1f3390b), TOBN(0x9f7a1b8c, 0xa5b61272), + TOBN(0x4ebebfc9, 0xc2e127a9), TOBN(0x4602500c, 0x5dd997bf), + TOBN(0x7f09771c, 0x4711230f), TOBN(0x058eb37c, 0x020f09c1), + TOBN(0xab693d4b, 0xfee5e38b), TOBN(0x9289eb1f, 0x4653cbc0), + TOBN(0xbecf46ab, 0xd51b9cf5), TOBN(0xd2aa9c02, 0x9f0121af), + TOBN(0x36aaf7d2, 0xe90dc274), TOBN(0x909e4ea0, 0x48b95a3c), + TOBN(0xe6b70496, 0x6f32dbdb), TOBN(0x672188a0, 0x8b030b3e), + TOBN(0xeeffe5b3, 0xcfb617e2), TOBN(0x87e947de, 0x7c82709e), + TOBN(0xa44d2b39, 0x1770f5a7), TOBN(0xe4d4d791, 0x0e44eb82), + TOBN(0x42e69d1e, 0x3f69712a), TOBN(0xbf11c4d6, 0xac6a820e), + TOBN(0xb5e7f3e5, 0x42c4224c), TOBN(0xd6b4e81c, 0x449d941c), + TOBN(0x5d72bd16, 0x5450e878), TOBN(0x6a61e28a, 0xee25ac54), + TOBN(0x33272094, 0xe6f1cd95), TOBN(0x7512f30d, 0x0d18673f), + TOBN(0x32f7a4ca, 0x5afc1464), TOBN(0x2f095656, 0x6bbb977b), + TOBN(0x586f47ca, 0xa8226200), TOBN(0x02c868ad, 0x1ac07369), + TOBN(0x4ef2b845, 0xc613acbe), TOBN(0x43d7563e, 0x0386054c), + TOBN(0x54da9dc7, 0xab952578), TOBN(0xb5423df2, 0x26e84d0b), + TOBN(0xa8b64eeb, 0x9b872042), TOBN(0xac205782, 0x5990f6df), + TOBN(0x4ff696eb, 0x21f4c77a), TOBN(0x1a79c3e4, 0xaab273af), + TOBN(0x29bc922e, 0x9436b3f1), TOBN(0xff807ef8, 0xd6d9a27a), + TOBN(0x82acea3d, 0x778f22a0), TOBN(0xfb10b2e8, 0x5b5e7469), + TOBN(0xc0b16980, 0x2818ee7d), TOBN(0x011afff4, 0xc91c1a2f), + TOBN(0x95a6d126, 0xad124418), TOBN(0x31c081a5, 0xe72e295f), + TOBN(0x36bb283a, 0xf2f4db75), TOBN(0xd115540f, 0x7acef462), + TOBN(0xc7f3a8f8, 0x33f6746c), TOBN(0x21e46f65, 0xfea990ca), + TOBN(0x915fd5c5, 0xcaddb0a9), TOBN(0xbd41f016, 0x78614555), + TOBN(0x346f4434, 0x426ffb58), TOBN(0x80559436, 0x14dbc204), + TOBN(0xf3dd20fe, 0x5a969b7f), TOBN(0x9d59e956, 0xe899a39a), + TOBN(0xf1b0971c, 0x8ad4cf4b), TOBN(0x03448860, 0x2ffb8fb8), + TOBN(0xf071ac3c, 0x65340ba4), TOBN(0x408d0596, 0xb27fd758), + TOBN(0xe7c78ea4, 0x98c364b0), TOBN(0xa4aac4a5, 0x051e8ab5), + TOBN(0xb9e1d560, 0x485d9002), TOBN(0x9acd518a, 0x88844455), + TOBN(0xe4ca688f, 0xd06f56c0), TOBN(0xa48af70d, 0xdf027972), + TOBN(0x691f0f04, 0x5e9a609d), TOBN(0xa9dd82cd, 0xee61270e), + TOBN(0x8903ca63, 0xa0ef18d3), TOBN(0x9fb7ee35, 0x3d6ca3bd), + TOBN(0xa7b4a09c, 0xabf47d03), TOBN(0x4cdada01, 0x1c67de8e), + TOBN(0x52003749, 0x9355a244), TOBN(0xe77fd2b6, 0x4f2151a9), + TOBN(0x695d6cf6, 0x66b4efcb), TOBN(0xc5a0cacf, 0xda2cfe25), + TOBN(0x104efe5c, 0xef811865), TOBN(0xf52813e8, 0x9ea5cc3d), + TOBN(0x855683dc, 0x40b58dbc), TOBN(0x0338ecde, 0x175fcb11), + TOBN(0xf9a05637, 0x74921592), TOBN(0xb4f1261d, 0xb9bb9d31), + TOBN(0x551429b7, 0x4e9c5459), TOBN(0xbe182e6f, 0x6ea71f53), + TOBN(0xd3a3b07c, 0xdfc50573), TOBN(0x9ba1afda, 0x62be8d44), + TOBN(0x9bcfd2cb, 0x52ab65d3), TOBN(0xdf11d547, 0xa9571802), + TOBN(0x099403ee, 0x02a2404a), TOBN(0x497406f4, 0x21088a71), + TOBN(0x99479409, 0x5004ae71), TOBN(0xbdb42078, 0xa812c362), + TOBN(0x2b72a30f, 0xd8828442), TOBN(0x283add27, 0xfcb5ed1c), + TOBN(0xf7c0e200, 0x66a40015), TOBN(0x3e3be641, 0x08b295ef), + TOBN(0xac127dc1, 0xe038a675), TOBN(0x729deff3, 0x8c5c6320), + TOBN(0xb7df8fd4, 0xa90d2c53), TOBN(0x9b74b0ec, 0x681e7cd3), + TOBN(0x5cb5a623, 0xdab407e5), TOBN(0xcdbd3615, 0x76b340c6), + TOBN(0xa184415a, 0x7d28392c), TOBN(0xc184c1d8, 0xe96f7830), + TOBN(0xc3204f19, 0x81d3a80f), TOBN(0xfde0c841, 0xc8e02432), + TOBN(0x78203b3e, 0x8149e0c1), TOBN(0x5904bdbb, 0x08053a73), + TOBN(0x30fc1dd1, 0x101b6805), TOBN(0x43c223bc, 0x49aa6d49), + TOBN(0x9ed67141, 0x7a174087), TOBN(0x311469a0, 0xd5997008), + TOBN(0xb189b684, 0x5e43fc61), TOBN(0xf3282375, 0xe0d3ab57), + TOBN(0x4fa34b67, 0xb1181da8), TOBN(0x621ed0b2, 0x99ee52b8), + TOBN(0x9b178de1, 0xad990676), TOBN(0xd51de67b, 0x56d54065), + TOBN(0x2a2c27c4, 0x7538c201), TOBN(0x33856ec8, 0x38a40f5c), + TOBN(0x2522fc15, 0xbe6cdcde), TOBN(0x1e603f33, 0x9f0c6f89), + TOBN(0x7994edc3, 0x103e30a6), TOBN(0x033a00db, 0x220c853e), + TOBN(0xd3cfa409, 0xf7bb7fd7), TOBN(0x70f8781e, 0x462d18f6), + TOBN(0xbbd82980, 0x687fe295), TOBN(0x6eef4c32, 0x595669f3), + TOBN(0x86a9303b, 0x2f7e85c3), TOBN(0x5fce4621, 0x71988f9b), + TOBN(0x5b935bf6, 0xc138acb5), TOBN(0x30ea7d67, 0x25661212), + TOBN(0xef1eb5f4, 0xe51ab9a2), TOBN(0x0587c98a, 0xae067c78), + TOBN(0xb3ce1b3c, 0x77ca9ca6), TOBN(0x2a553d4d, 0x54b5f057), + TOBN(0xc7898236, 0x4da29ec2), TOBN(0xdbdd5d13, 0xb9c57316), + TOBN(0xc57d6e6b, 0x2cd80d47), TOBN(0x80b460cf, 0xfe9e7391), + TOBN(0x98648cab, 0xf963c31e), TOBN(0x67f9f633, 0xcc4d32fd), + TOBN(0x0af42a9d, 0xfdf7c687), TOBN(0x55f292a3, 0x0b015ea7), + TOBN(0x89e468b2, 0xcd21ab3d), TOBN(0xe504f022, 0xc393d392), + TOBN(0xab21e1d4, 0xa5013af9), TOBN(0xe3283f78, 0xc2c28acb), + TOBN(0xf38b35f6, 0x226bf99f), TOBN(0xe8354274, 0x0e291e69), + TOBN(0x61673a15, 0xb20c162d), TOBN(0xc101dc75, 0xb04fbdbe), + TOBN(0x8323b4c2, 0x255bd617), TOBN(0x6c969693, 0x6c2a9154), + TOBN(0xc6e65860, 0x62679387), TOBN(0x8e01db0c, 0xb8c88e23), + TOBN(0x33c42873, 0x893a5559), TOBN(0x7630f04b, 0x47a3e149), + TOBN(0xb5d80805, 0xddcf35f8), TOBN(0x582ca080, 0x77dfe732), + TOBN(0x2c7156e1, 0x0b1894a0), TOBN(0x92034001, 0xd81c68c0), + TOBN(0xed225d00, 0xc8b115b5), TOBN(0x237f9c22, 0x83b907f2), + TOBN(0x0ea2f32f, 0x4470e2c0), TOBN(0xb725f7c1, 0x58be4e95), + TOBN(0x0f1dcafa, 0xb1ae5463), TOBN(0x59ed5187, 0x1ba2fc04), + TOBN(0xf6e0f316, 0xd0115d4d), TOBN(0x5180b12f, 0xd3691599), + TOBN(0x157e32c9, 0x527f0a41), TOBN(0x7b0b081d, 0xa8e0ecc0), + TOBN(0x6dbaaa8a, 0xbf4f0dd0), TOBN(0x99b289c7, 0x4d252696), + TOBN(0x79b7755e, 0xdbf864fe), TOBN(0x6974e2b1, 0x76cad3ab), + TOBN(0x35dbbee2, 0x06ddd657), TOBN(0xe7cbdd11, 0x2ff3a96d), + TOBN(0x88381968, 0x076be758), TOBN(0x2d737e72, 0x08c91f5d), + TOBN(0x5f83ab62, 0x86ec3776), TOBN(0x98aa649d, 0x945fa7a1), + TOBN(0xf477ec37, 0x72ef0933), TOBN(0x66f52b1e, 0x098c17b1), + TOBN(0x9eec58fb, 0xd803738b), TOBN(0x91aaade7, 0xe4e86aa4), + TOBN(0x6b1ae617, 0xa5b51492), TOBN(0x63272121, 0xbbc45974), + TOBN(0x7e0e28f0, 0x862c5129), TOBN(0x0a8f79a9, 0x3321a4a0), + TOBN(0xe26d1664, 0x5041c88f), TOBN(0x0571b805, 0x53233e3a), + TOBN(0xd1b0ccde, 0xc9520711), TOBN(0x55a9e4ed, 0x3c8b84bf), + TOBN(0x9426bd39, 0xa1fef314), TOBN(0x4f5f638e, 0x6eb93f2b), + TOBN(0xba2a1ed3, 0x2bf9341b), TOBN(0xd63c1321, 0x4d42d5a9), + TOBN(0xd2964a89, 0x316dc7c5), TOBN(0xd1759606, 0xca511851), + TOBN(0xd8a9201f, 0xf9e6ed35), TOBN(0xb7b5ee45, 0x6736925a), + TOBN(0x0a83fbbc, 0x99581af7), TOBN(0x3076bc40, 0x64eeb051), + TOBN(0x5511c98c, 0x02dec312), TOBN(0x270de898, 0x238dcb78), + TOBN(0x2cf4cf9c, 0x539c08c9), TOBN(0xa70cb65e, 0x38d3b06e), + TOBN(0xb12ec10e, 0xcfe57bbd), TOBN(0x82c7b656, 0x35a0c2b5), + TOBN(0xddc7d5cd, 0x161c67bd), TOBN(0xe32e8985, 0xae3a32cc), + TOBN(0x7aba9444, 0xd11a5529), TOBN(0xe964ed02, 0x2427fa1a), + TOBN(0x1528392d, 0x24a1770a), TOBN(0xa152ce2c, 0x12c72fcd), + TOBN(0x714553a4, 0x8ec07649), TOBN(0x18b4c290, 0x459dd453), + TOBN(0xea32b714, 0x7b64b110), TOBN(0xb871bfa5, 0x2e6f07a2), + TOBN(0xb67112e5, 0x9e2e3c9b), TOBN(0xfbf250e5, 0x44aa90f6), + TOBN(0xf77aedb8, 0xbd539006), TOBN(0x3b0cdf9a, 0xd172a66f), + TOBN(0xedf69fea, 0xf8c51187), TOBN(0x05bb67ec, 0x741e4da7), + TOBN(0x47df0f32, 0x08114345), TOBN(0x56facb07, 0xbb9792b1), + TOBN(0xf3e007e9, 0x8f6229e4), TOBN(0x62d103f4, 0x526fba0f), + TOBN(0x4f33bef7, 0xb0339d79), TOBN(0x9841357b, 0xb59bfec1), + TOBN(0xfa8dbb59, 0xc34e6705), TOBN(0xc3c7180b, 0x7fdaa84c), + TOBN(0xf95872fc, 0xa4108537), TOBN(0x8750cc3b, 0x932a3e5a), + TOBN(0xb61cc69d, 0xb7275d7d), TOBN(0xffa0168b, 0x2e59b2e9), + TOBN(0xca032abc, 0x6ecbb493), TOBN(0x1d86dbd3, 0x2c9082d8), + TOBN(0xae1e0b67, 0xe28ef5ba), TOBN(0x2c9a4699, 0xcb18e169), + TOBN(0x0ecd0e33, 0x1e6bbd20), TOBN(0x571b360e, 0xaf5e81d2), + TOBN(0xcd9fea58, 0x101c1d45), TOBN(0x6651788e, 0x18880452), + TOBN(0xa9972635, 0x1f8dd446), TOBN(0x44bed022, 0xe37281d0), + TOBN(0x094b2b2d, 0x33da525d), TOBN(0xf193678e, 0x13144fd8), + TOBN(0xb8ab5ba4, 0xf4c1061d), TOBN(0x4343b5fa, 0xdccbe0f4), + TOBN(0xa8702371, 0x63812713), TOBN(0x47bf6d2d, 0xf7611d93), + TOBN(0x46729b8c, 0xbd21e1d7), TOBN(0x7484d4e0, 0xd629e77d), + TOBN(0x830e6eea, 0x60dbac1f), TOBN(0x23d8c484, 0xda06a2f7), + TOBN(0x896714b0, 0x50ca535b), TOBN(0xdc8d3644, 0xebd97a9b), + TOBN(0x106ef9fa, 0xb12177b4), TOBN(0xf79bf464, 0x534d5d9c), + TOBN(0x2537a349, 0xa6ab360b), TOBN(0xc7c54253, 0xa00c744f), + TOBN(0xb3c7a047, 0xe5911a76), TOBN(0x61ffa5c8, 0x647f1ee7), + TOBN(0x15aed36f, 0x8f56ab42), TOBN(0x6a0d41b0, 0xa3ff9ac9), + TOBN(0x68f469f5, 0xcc30d357), TOBN(0xbe9adf81, 0x6b72be96), + TOBN(0x1cd926fe, 0x903ad461), TOBN(0x7e89e38f, 0xcaca441b), + TOBN(0xf0f82de5, 0xfacf69d4), TOBN(0x363b7e76, 0x4775344c), + TOBN(0x6894f312, 0xb2e36d04), TOBN(0x3c6cb4fe, 0x11d1c9a5), + TOBN(0x85d9c339, 0x4008e1f2), TOBN(0x5e9a85ea, 0x249f326c), + TOBN(0xdc35c60a, 0x678c5e06), TOBN(0xc08b944f, 0x9f86fba9), + TOBN(0xde40c02c, 0x89f71f0f), TOBN(0xad8f3e31, 0xff3da3c0), + TOBN(0x3ea5096b, 0x42125ded), TOBN(0x13879cbf, 0xa7379183), + TOBN(0x6f4714a5, 0x6b306a0b), TOBN(0x359c2ea6, 0x67646c5e), + TOBN(0xfacf8943, 0x07726368), TOBN(0x07a58935, 0x65ff431e), + TOBN(0x24d661d1, 0x68754ab0), TOBN(0x801fce1d, 0x6f429a76), + TOBN(0xc068a85f, 0xa58ce769), TOBN(0xedc35c54, 0x5d5eca2b), + TOBN(0xea31276f, 0xa3f660d1), TOBN(0xa0184ebe, 0xb8fc7167), + TOBN(0x0f20f21a, 0x1d8db0ae), TOBN(0xd96d095f, 0x56c35e12), + TOBN(0xedf402b5, 0xf8c2a25b), TOBN(0x1bb772b9, 0x059204b6), + TOBN(0x50cbeae2, 0x19b4e34c), TOBN(0x93109d80, 0x3fa0845a), + TOBN(0x54f7ccf7, 0x8ef59fb5), TOBN(0x3b438fe2, 0x88070963), + TOBN(0x9e28c659, 0x31f3ba9b), TOBN(0x9cc31b46, 0xead9da92), + TOBN(0x3c2f0ba9, 0xb733aa5f), TOBN(0xdece47cb, 0xf05af235), + TOBN(0xf8e3f715, 0xa2ac82a5), TOBN(0xc97ba641, 0x2203f18a), + TOBN(0xc3af5504, 0x09c11060), TOBN(0x56ea2c05, 0x46af512d), + TOBN(0xfac28daf, 0xf3f28146), TOBN(0x87fab43a, 0x959ef494),} + , + {TOBN(0x09891641, 0xd4c5105f), TOBN(0x1ae80f8e, 0x6d7fbd65), + TOBN(0x9d67225f, 0xbee6bdb0), TOBN(0x3b433b59, 0x7fc4d860), + TOBN(0x44e66db6, 0x93e85638), TOBN(0xf7b59252, 0xe3e9862f), + TOBN(0xdb785157, 0x665c32ec), TOBN(0x702fefd7, 0xae362f50), + TOBN(0x3754475d, 0x0fefb0c3), TOBN(0xd48fb56b, 0x46d7c35d), + TOBN(0xa070b633, 0x363798a4), TOBN(0xae89f3d2, 0x8fdb98e6), + TOBN(0x970b89c8, 0x6363d14c), TOBN(0x89817521, 0x67abd27d), + TOBN(0x9bf7d474, 0x44d5a021), TOBN(0xb3083baf, 0xcac72aee), + TOBN(0x389741de, 0xbe949a44), TOBN(0x638e9388, 0x546a4fa5), + TOBN(0x3fe6419c, 0xa0047bdc), TOBN(0x7047f648, 0xaaea57ca), + TOBN(0x54e48a90, 0x41fbab17), TOBN(0xda8e0b28, 0x576bdba2), + TOBN(0xe807eebc, 0xc72afddc), TOBN(0x07d3336d, 0xf42577bf), + TOBN(0x62a8c244, 0xbfe20925), TOBN(0x91c19ac3, 0x8fdce867), + TOBN(0x5a96a5d5, 0xdd387063), TOBN(0x61d587d4, 0x21d324f6), + TOBN(0xe87673a2, 0xa37173ea), TOBN(0x23848008, 0x53778b65), + TOBN(0x10f8441e, 0x05bab43e), TOBN(0xfa11fe12, 0x4621efbe), + TOBN(0x047b772e, 0x81685d7b), TOBN(0x23f27d81, 0xbf34a976), + TOBN(0xc27608e2, 0x915f48ef), TOBN(0x3b0b43fa, 0xa521d5c3), + TOBN(0x7613fb26, 0x63ca7284), TOBN(0x7f5729b4, 0x1d4db837), + TOBN(0x87b14898, 0x583b526b), TOBN(0x00b732a6, 0xbbadd3d1), + TOBN(0x8e02f426, 0x2048e396), TOBN(0x436b50b6, 0x383d9de4), + TOBN(0xf78d3481, 0x471e85ad), TOBN(0x8b01ea6a, 0xd005c8d6), + TOBN(0xd3c7afee, 0x97015c07), TOBN(0x46cdf1a9, 0x4e3ba2ae), + TOBN(0x7a42e501, 0x83d3a1d2), TOBN(0xd54b5268, 0xb541dff4), + TOBN(0x3f24cf30, 0x4e23e9bc), TOBN(0x4387f816, 0x126e3624), + TOBN(0x26a46a03, 0x3b0b6d61), TOBN(0xaf1bc845, 0x8b2d777c), + TOBN(0x25c401ba, 0x527de79c), TOBN(0x0e1346d4, 0x4261bbb6), + TOBN(0x4b96c44b, 0x287b4bc7), TOBN(0x658493c7, 0x5254562f), + TOBN(0x23f949fe, 0xb8a24a20), TOBN(0x17ebfed1, 0xf52ca53f), + TOBN(0x9b691bbe, 0xbcfb4853), TOBN(0x5617ff6b, 0x6278a05d), + TOBN(0x241b34c5, 0xe3c99ebd), TOBN(0xfc64242e, 0x1784156a), + TOBN(0x4206482f, 0x695d67df), TOBN(0xb967ce0e, 0xee27c011), + TOBN(0x65db3751, 0x21c80b5d), TOBN(0x2e7a563c, 0xa31ecca0), + TOBN(0xe56ffc4e, 0x5238a07e), TOBN(0x3d6c2966, 0x32ced854), + TOBN(0xe99d7d1a, 0xaf70b885), TOBN(0xafc3bad9, 0x2d686459), + TOBN(0x9c78bf46, 0x0cc8ba5b), TOBN(0x5a439519, 0x18955aa3), + TOBN(0xf8b517a8, 0x5fe4e314), TOBN(0xe60234d0, 0xfcb8906f), + TOBN(0xffe542ac, 0xf2061b23), TOBN(0x287e191f, 0x6b4cb59c), + TOBN(0x21857ddc, 0x09d877d8), TOBN(0x1c23478c, 0x14678941), + TOBN(0xbbf0c056, 0xb6e05ea4), TOBN(0x82da4b53, 0xb01594fe), + TOBN(0xf7526791, 0xfadb8608), TOBN(0x049e832d, 0x7b74cdf6), + TOBN(0xa43581cc, 0xc2b90a34), TOBN(0x73639eb8, 0x9360b10c), + TOBN(0x4fba331f, 0xe1e4a71b), TOBN(0x6ffd6b93, 0x8072f919), + TOBN(0x6e53271c, 0x65679032), TOBN(0x67206444, 0xf14272ce), + TOBN(0xc0f734a3, 0xb2335834), TOBN(0x9526205a, 0x90ef6860), + TOBN(0xcb8be717, 0x04e2bb0d), TOBN(0x2418871e, 0x02f383fa), + TOBN(0xd7177681, 0x4082c157), TOBN(0xcc914ad0, 0x29c20073), + TOBN(0xf186c1eb, 0xe587e728), TOBN(0x6fdb3c22, 0x61bcd5fd), + TOBN(0x30d014a6, 0xf2f9f8e9), TOBN(0x963ece23, 0x4fec49d2), + TOBN(0x862025c5, 0x9605a8d9), TOBN(0x39874445, 0x19f8929a), + TOBN(0x01b6ff65, 0x12bf476a), TOBN(0x598a64d8, 0x09cf7d91), + TOBN(0xd7ec7749, 0x93be56ca), TOBN(0x10899785, 0xcbb33615), + TOBN(0xb8a092fd, 0x02eee3ad), TOBN(0xa86b3d35, 0x30145270), + TOBN(0x323d98c6, 0x8512b675), TOBN(0x4b8bc785, 0x62ebb40f), + TOBN(0x7d301f54, 0x413f9cde), TOBN(0xa5e4fb4f, 0x2bab5664), + TOBN(0x1d2b252d, 0x1cbfec23), TOBN(0xfcd576bb, 0xe177120d), + TOBN(0x04427d3e, 0x83731a34), TOBN(0x2bb9028e, 0xed836e8e), + TOBN(0xb36acff8, 0xb612ca7c), TOBN(0xb88fe5ef, 0xd3d9c73a), + TOBN(0xbe2a6bc6, 0xedea4eb3), TOBN(0x43b93133, 0x488eec77), + TOBN(0xf41ff566, 0xb17106e1), TOBN(0x469e9172, 0x654efa32), + TOBN(0xb4480f04, 0x41c23fa3), TOBN(0xb4712eb0, 0xc1989a2e), + TOBN(0x3ccbba0f, 0x93a29ca7), TOBN(0x6e205c14, 0xd619428c), + TOBN(0x90db7957, 0xb3641686), TOBN(0x0432691d, 0x45ac8b4e), + TOBN(0x07a759ac, 0xf64e0350), TOBN(0x0514d89c, 0x9c972517), + TOBN(0x1701147f, 0xa8e67fc3), TOBN(0x9e2e0b8b, 0xab2085be), + TOBN(0xd5651824, 0xac284e57), TOBN(0x890d4325, 0x74893664), + TOBN(0x8a7c5e6e, 0xc55e68a3), TOBN(0xbf12e90b, 0x4339c85a), + TOBN(0x31846b85, 0xf922b655), TOBN(0x9a54ce4d, 0x0bf4d700), + TOBN(0xd7f4e83a, 0xf1a14295), TOBN(0x916f955c, 0xb285d4f9), + TOBN(0xe57bb0e0, 0x99ffdaba), TOBN(0x28a43034, 0xeab0d152), + TOBN(0x0a36ffa2, 0xb8a9cef8), TOBN(0x5517407e, 0xb9ec051a), + TOBN(0x9c796096, 0xea68e672), TOBN(0x853db5fb, 0xfb3c77fb), + TOBN(0x21474ba9, 0xe864a51a), TOBN(0x6c267699, 0x6e8a1b8b), + TOBN(0x7c823626, 0x94120a28), TOBN(0xe61e9a48, 0x8383a5db), + TOBN(0x7dd75003, 0x9f84216d), TOBN(0xab020d07, 0xad43cd85), + TOBN(0x9437ae48, 0xda12c659), TOBN(0x6449c2eb, 0xe65452ad), + TOBN(0xcc7c4c1c, 0x2cf9d7c1), TOBN(0x1320886a, 0xee95e5ab), + TOBN(0xbb7b9056, 0xbeae170c), TOBN(0xc8a5b250, 0xdbc0d662), + TOBN(0x4ed81432, 0xc11d2303), TOBN(0x7da66912, 0x1f03769f), + TOBN(0x3ac7a5fd, 0x84539828), TOBN(0x14dada94, 0x3bccdd02), + TOBN(0x8b84c321, 0x7ef6b0d1), TOBN(0x52a9477a, 0x7c933f22), + TOBN(0x5ef6728a, 0xfd440b82), TOBN(0x5c3bd859, 0x6ce4bd5e), + TOBN(0x918b80f5, 0xf22c2d3e), TOBN(0x368d5040, 0xb7bb6cc5), + TOBN(0xb66142a1, 0x2695a11c), TOBN(0x60ac583a, 0xeb19ea70), + TOBN(0x317cbb98, 0x0eab2437), TOBN(0x8cc08c55, 0x5e2654c8), + TOBN(0xfe2d6520, 0xe6d8307f), TOBN(0xe9f147f3, 0x57428993), + TOBN(0x5f9c7d14, 0xd2fd6cf1), TOBN(0xa3ecd064, 0x2d4fcbb0), + TOBN(0xad83fef0, 0x8e7341f7), TOBN(0x643f23a0, 0x3a63115c), + TOBN(0xd38a78ab, 0xe65ab743), TOBN(0xbf7c75b1, 0x35edc89c), + TOBN(0x3dd8752e, 0x530df568), TOBN(0xf85c4a76, 0xe308c682), + TOBN(0x4c9955b2, 0xe68acf37), TOBN(0xa544df3d, 0xab32af85), + TOBN(0x4b8ec3f5, 0xa25cf493), TOBN(0x4d8f2764, 0x1a622feb), + TOBN(0x7bb4f7aa, 0xf0dcbc49), TOBN(0x7de551f9, 0x70bbb45b), + TOBN(0xcfd0f3e4, 0x9f2ca2e5), TOBN(0xece58709, 0x1f5c76ef), + TOBN(0x32920edd, 0x167d79ae), TOBN(0x039df8a2, 0xfa7d7ec1), + TOBN(0xf46206c0, 0xbb30af91), TOBN(0x1ff5e2f5, 0x22676b59), + TOBN(0x11f4a039, 0x6ea51d66), TOBN(0x506c1445, 0x807d7a26), + TOBN(0x60da5705, 0x755a9b24), TOBN(0x8fc8cc32, 0x1f1a319e), + TOBN(0x83642d4d, 0x9433d67d), TOBN(0x7fa5cb8f, 0x6a7dd296), + TOBN(0x576591db, 0x9b7bde07), TOBN(0x13173d25, 0x419716fb), + TOBN(0xea30599d, 0xd5b340ff), TOBN(0xfc6b5297, 0xb0fe76c5), + TOBN(0x1c6968c8, 0xab8f5adc), TOBN(0xf723c7f5, 0x901c928d), + TOBN(0x4203c321, 0x9773d402), TOBN(0xdf7c6aa3, 0x1b51dd47), + TOBN(0x3d49e37a, 0x552be23c), TOBN(0x57febee8, 0x0b5a6e87), + TOBN(0xc5ecbee4, 0x7bd8e739), TOBN(0x79d44994, 0xae63bf75), + TOBN(0x168bd00f, 0x38fb8923), TOBN(0x75d48ee4, 0xd0533130), + TOBN(0x554f77aa, 0xdb5cdf33), TOBN(0x3396e896, 0x3c696769), + TOBN(0x2fdddbf2, 0xd3fd674e), TOBN(0xbbb8f6ee, 0x99d0e3e5), + TOBN(0x51b90651, 0xcbae2f70), TOBN(0xefc4bc05, 0x93aaa8eb), + TOBN(0x8ecd8689, 0xdd1df499), TOBN(0x1aee99a8, 0x22f367a5), + TOBN(0x95d485b9, 0xae8274c5), TOBN(0x6c14d445, 0x7d30b39c), + TOBN(0xbafea90b, 0xbcc1ef81), TOBN(0x7c5f317a, 0xa459a2ed), + TOBN(0x01211075, 0x4ef44227), TOBN(0xa17bed6e, 0xdc20f496), + TOBN(0x0cdfe424, 0x819853cd), TOBN(0x13793298, 0xf71e2ce7), + TOBN(0x3c1f3078, 0xdbbe307b), TOBN(0x6dd1c20e, 0x76ee9936), + TOBN(0x23ee4b57, 0x423caa20), TOBN(0x4ac3793b, 0x8efb840e), + TOBN(0x934438eb, 0xed1f8ca0), TOBN(0x3e546658, 0x4ebb25a2), + TOBN(0xc415af0e, 0xc069896f), TOBN(0xc13eddb0, 0x9a5aa43d), + TOBN(0x7a04204f, 0xd49eb8f6), TOBN(0xd0d5bdfc, 0xd74f1670), + TOBN(0x3697e286, 0x56fc0558), TOBN(0x10207371, 0x01cebade), + TOBN(0x5f87e690, 0x0647a82b), TOBN(0x908e0ed4, 0x8f40054f), + TOBN(0xa9f633d4, 0x79853803), TOBN(0x8ed13c9a, 0x4a28b252), + TOBN(0x3e2ef676, 0x1f460f64), TOBN(0x53930b9b, 0x36d06336), + TOBN(0x347073ac, 0x8fc4979b), TOBN(0x84380e0e, 0x5ecd5597), + TOBN(0xe3b22c6b, 0xc4fe3c39), TOBN(0xba4a8153, 0x6c7bebdf), + TOBN(0xf23ab6b7, 0x25693459), TOBN(0x53bc3770, 0x14922b11), + TOBN(0x4645c8ab, 0x5afc60db), TOBN(0xaa022355, 0x20b9f2a3), + TOBN(0x52a2954c, 0xce0fc507), TOBN(0x8c2731bb, 0x7ce1c2e7), + TOBN(0xf39608ab, 0x18a0339d), TOBN(0xac7a658d, 0x3735436c), + TOBN(0xb22c2b07, 0xcd992b4f), TOBN(0x4e83daec, 0xf40dcfd4), + TOBN(0x8a34c7be, 0x2f39ea3e), TOBN(0xef0c005f, 0xb0a56d2e), + TOBN(0x62731f6a, 0x6edd8038), TOBN(0x5721d740, 0x4e3cb075), + TOBN(0x1ea41511, 0xfbeeee1b), TOBN(0xd1ef5e73, 0xef1d0c05), + TOBN(0x42feefd1, 0x73c07d35), TOBN(0xe530a00a, 0x8a329493), + TOBN(0x5d55b7fe, 0xf15ebfb0), TOBN(0x549de03c, 0xd322491a), + TOBN(0xf7b5f602, 0x745b3237), TOBN(0x3632a3a2, 0x1ab6e2b6), + TOBN(0x0d3bba89, 0x0ef59f78), TOBN(0x0dfc6443, 0xc9e52b9a), + TOBN(0x1dc79699, 0x72631447), TOBN(0xef033917, 0xb3be20b1), + TOBN(0x0c92735d, 0xb1383948), TOBN(0xc1fc29a2, 0xc0dd7d7d), + TOBN(0x6485b697, 0x403ed068), TOBN(0x13bfaab3, 0xaac93bdc), + TOBN(0x410dc6a9, 0x0deeaf52), TOBN(0xb003fb02, 0x4c641c15), + TOBN(0x1384978c, 0x5bc504c4), TOBN(0x37640487, 0x864a6a77), + TOBN(0x05991bc6, 0x222a77da), TOBN(0x62260a57, 0x5e47eb11), + TOBN(0xc7af6613, 0xf21b432c), TOBN(0x22f3acc9, 0xab4953e9), + TOBN(0x52934922, 0x8e41d155), TOBN(0x4d024568, 0x3ac059ef), + TOBN(0xb0201755, 0x4d884411), TOBN(0xce8055cf, 0xa59a178f), + TOBN(0xcd77d1af, 0xf6204549), TOBN(0xa0a00a3e, 0xc7066759), + TOBN(0x471071ef, 0x0272c229), TOBN(0x009bcf6b, 0xd3c4b6b0), + TOBN(0x2a2638a8, 0x22305177), TOBN(0xd51d59df, 0x41645bbf), + TOBN(0xa81142fd, 0xc0a7a3c0), TOBN(0xa17eca6d, 0x4c7063ee), + TOBN(0x0bb887ed, 0x60d9dcec), TOBN(0xd6d28e51, 0x20ad2455), + TOBN(0xebed6308, 0xa67102ba), TOBN(0x042c3114, 0x8bffa408), + TOBN(0xfd099ac5, 0x8aa68e30), TOBN(0x7a6a3d7c, 0x1483513e), + TOBN(0xffcc6b75, 0xba2d8f0c), TOBN(0x54dacf96, 0x1e78b954), + TOBN(0xf645696f, 0xa4a9af89), TOBN(0x3a411940, 0x06ac98ec), + TOBN(0x41b8b3f6, 0x22a67a20), TOBN(0x2d0b1e0f, 0x99dec626), + TOBN(0x27c89192, 0x40be34e8), TOBN(0xc7162b37, 0x91907f35), + TOBN(0x90188ec1, 0xa956702b), TOBN(0xca132f7d, 0xdf93769c), + TOBN(0x3ece44f9, 0x0e2025b4), TOBN(0x67aaec69, 0x0c62f14c), + TOBN(0xad741418, 0x22e3cc11), TOBN(0xcf9b75c3, 0x7ff9a50e), + TOBN(0x02fa2b16, 0x4d348272), TOBN(0xbd99d61a, 0x9959d56d), + TOBN(0xbc4f19db, 0x18762916), TOBN(0xcc7cce50, 0x49c1ac80), + TOBN(0x4d59ebaa, 0xd846bd83), TOBN(0x8775a9dc, 0xa9202849), + TOBN(0x07ec4ae1, 0x6e1f4ca9), TOBN(0x27eb5875, 0xba893f11), + TOBN(0x00284d51, 0x662cc565), TOBN(0x82353a6b, 0x0db4138d), + TOBN(0xd9c7aaaa, 0xaa32a594), TOBN(0xf5528b5e, 0xa5669c47), + TOBN(0xf3220231, 0x2f23c5ff), TOBN(0xe3e8147a, 0x6affa3a1), + TOBN(0xfb423d5c, 0x202ddda0), TOBN(0x3d6414ac, 0x6b871bd4), + TOBN(0x586f82e1, 0xa51a168a), TOBN(0xb712c671, 0x48ae5448), + TOBN(0x9a2e4bd1, 0x76233eb8), TOBN(0x0188223a, 0x78811ca9), + TOBN(0x553c5e21, 0xf7c18de1), TOBN(0x7682e451, 0xb27bb286), + TOBN(0x3ed036b3, 0x0e51e929), TOBN(0xf487211b, 0xec9cb34f), + TOBN(0x0d094277, 0x0c24efc8), TOBN(0x0349fd04, 0xbef737a4), + TOBN(0x6d1c9dd2, 0x514cdd28), TOBN(0x29c135ff, 0x30da9521), + TOBN(0xea6e4508, 0xf78b0b6f), TOBN(0x176f5dd2, 0x678c143c), + TOBN(0x08148418, 0x4be21e65), TOBN(0x27f7525c, 0xe7df38c4), + TOBN(0x1fb70e09, 0x748ab1a4), TOBN(0x9cba50a0, 0x5efe4433), + TOBN(0x7846c7a6, 0x15f75af2), TOBN(0x2a7c2c57, 0x5ee73ea8), + TOBN(0x42e566a4, 0x3f0a449a), TOBN(0x45474c3b, 0xad90fc3d), + TOBN(0x7447be3d, 0x8b61d057), TOBN(0x3e9d1cf1, 0x3a4ec092), + TOBN(0x1603e453, 0xf380a6e6), TOBN(0x0b86e431, 0x9b1437c2), + TOBN(0x7a4173f2, 0xef29610a), TOBN(0x8fa729a7, 0xf03d57f7), + TOBN(0x3e186f6e, 0x6c9c217e), TOBN(0xbe1d3079, 0x91919524), + TOBN(0x92a62a70, 0x153d4fb1), TOBN(0x32ed3e34, 0xd68c2f71), + TOBN(0xd785027f, 0x9eb1a8b7), TOBN(0xbc37eb77, 0xc5b22fe8), + TOBN(0x466b34f0, 0xb9d6a191), TOBN(0x008a89af, 0x9a05f816), + TOBN(0x19b028fb, 0x7d42c10a), TOBN(0x7fe8c92f, 0x49b3f6b8), + TOBN(0x58907cc0, 0xa5a0ade3), TOBN(0xb3154f51, 0x559d1a7c), + TOBN(0x5066efb6, 0xd9790ed6), TOBN(0xa77a0cbc, 0xa6aa793b), + TOBN(0x1a915f3c, 0x223e042e), TOBN(0x1c5def04, 0x69c5874b), + TOBN(0x0e830078, 0x73b6c1da), TOBN(0x55cf85d2, 0xfcd8557a), + TOBN(0x0f7c7c76, 0x0460f3b1), TOBN(0x87052acb, 0x46e58063), + TOBN(0x09212b80, 0x907eae66), TOBN(0x3cb068e0, 0x4d721c89), + TOBN(0xa87941ae, 0xdd45ac1c), TOBN(0xde8d5c0d, 0x0daa0dbb), + TOBN(0xda421fdc, 0xe3502e6e), TOBN(0xc8944201, 0x4d89a084), + TOBN(0x7307ba5e, 0xf0c24bfb), TOBN(0xda212beb, 0x20bde0ef), + TOBN(0xea2da24b, 0xf82ce682), TOBN(0x058d3816, 0x07f71fe4), + TOBN(0x35a02462, 0x5ffad8de), TOBN(0xcd7b05dc, 0xaadcefab), + TOBN(0xd442f8ed, 0x1d9f54ec), TOBN(0x8be3d618, 0xb2d3b5ca), + TOBN(0xe2220ed0, 0xe06b2ce2), TOBN(0x82699a5f, 0x1b0da4c0), + TOBN(0x3ff106f5, 0x71c0c3a7), TOBN(0x8f580f5a, 0x0d34180c), + TOBN(0x4ebb120e, 0x22d7d375), TOBN(0x5e5782cc, 0xe9513675), + TOBN(0x2275580c, 0x99c82a70), TOBN(0xe8359fbf, 0x15ea8c4c), + TOBN(0x53b48db8, 0x7b415e70), TOBN(0xaacf2240, 0x100c6014), + TOBN(0x9faaccf5, 0xe4652f1d), TOBN(0xbd6fdd2a, 0xd56157b2), + TOBN(0xa4f4fb1f, 0x6261ec50), TOBN(0x244e55ad, 0x476bcd52), + TOBN(0x881c9305, 0x047d320b), TOBN(0x1ca983d5, 0x6181263f), + TOBN(0x354e9a44, 0x278fb8ee), TOBN(0xad2dbc0f, 0x396e4964), + TOBN(0x723f3aa2, 0x9268b3de), TOBN(0x0d1ca29a, 0xe6e0609a), + TOBN(0x794866aa, 0x6cf44252), TOBN(0x0b59f3e3, 0x01af87ed), + TOBN(0xe234e5ff, 0x7f4a6c51), TOBN(0xa8768fd2, 0x61dc2f7e), + TOBN(0xdafc7332, 0x0a94d81f), TOBN(0xd7f84282, 0x06938ce1), + TOBN(0xae0b3c0e, 0x0546063e), TOBN(0x7fbadcb2, 0x5d61abc6), + TOBN(0xd5d7a2c9, 0x369ac400), TOBN(0xa5978d09, 0xae67d10c), + TOBN(0x290f211e, 0x4f85eaac), TOBN(0xe61e2ad1, 0xfacac681), + TOBN(0xae125225, 0x388384cd), TOBN(0xa7fb68e9, 0xccfde30f), + TOBN(0x7a59b936, 0x3daed4c2), TOBN(0x80a9aa40, 0x2606f789), + TOBN(0xb40c1ea5, 0xf6a6d90a), TOBN(0x948364d3, 0x514d5885), + TOBN(0x062ebc60, 0x70985182), TOBN(0xa6db5b0e, 0x33310895), + TOBN(0x64a12175, 0xe329c2f5), TOBN(0xc5f25bd2, 0x90ea237e), + TOBN(0x7915c524, 0x2d0a4c23), TOBN(0xeb5d26e4, 0x6bb3cc52), + TOBN(0x369a9116, 0xc09e2c92), TOBN(0x0c527f92, 0xcf182cf8), + TOBN(0x9e591938, 0x2aede0ac), TOBN(0xb2922208, 0x6cc34939), + TOBN(0x3c9d8962, 0x99a34361), TOBN(0x3c81836d, 0xc1905fe6), + TOBN(0x4bfeb57f, 0xa001ec5a), TOBN(0xe993f5bb, 0xa0dc5dba), + TOBN(0x47884109, 0x724a1380), TOBN(0x8a0369ab, 0x32fe9a04), + TOBN(0xea068d60, 0x8c927db8), TOBN(0xbf5f37cf, 0x94655741), + TOBN(0x47d402a2, 0x04b6c7ea), TOBN(0x4551c295, 0x6af259cb), + TOBN(0x698b71e7, 0xed77ee8b), TOBN(0xbddf7bd0, 0xf309d5c7), + TOBN(0x6201c22c, 0x34e780ca), TOBN(0xab04f7d8, 0x4c295ef4), + TOBN(0x1c947294, 0x4313a8ce), TOBN(0xe532e4ac, 0x92ca4cfe), + TOBN(0x89738f80, 0xd0a7a97a), TOBN(0xec088c88, 0xa580fd5b), + TOBN(0x612b1ecc, 0x42ce9e51), TOBN(0x8f9840fd, 0xb25fdd2a), + TOBN(0x3cda78c0, 0x01e7f839), TOBN(0x546b3d3a, 0xece05480), + TOBN(0x271719a9, 0x80d30916), TOBN(0x45497107, 0x584c20c4), + TOBN(0xaf8f9478, 0x5bc78608), TOBN(0x28c7d484, 0x277e2a4c), + TOBN(0xfce01767, 0x88a2ffe4), TOBN(0xdc506a35, 0x28e169a5), + TOBN(0x0ea10861, 0x7af9c93a), TOBN(0x1ed24361, 0x03fa0e08), + TOBN(0x96eaaa92, 0xa3d694e7), TOBN(0xc0f43b4d, 0xef50bc74), + TOBN(0xce6aa58c, 0x64114db4), TOBN(0x8218e8ea, 0x7c000fd4), + TOBN(0xac815dfb, 0x185f8844), TOBN(0xcd7e90cb, 0x1557abfb), + TOBN(0x23d16655, 0xafbfecdf), TOBN(0x80f3271f, 0x085cac4a), + TOBN(0x7fc39aa7, 0xd0e62f47), TOBN(0x88d519d1, 0x460a48e5), + TOBN(0x59559ac4, 0xd28f101e), TOBN(0x7981d9e9, 0xca9ae816), + TOBN(0x5c38652c, 0x9ac38203), TOBN(0x86eaf87f, 0x57657fe5), + TOBN(0x568fc472, 0xe21f5416), TOBN(0x2afff39c, 0xe7e597b5), + TOBN(0x3adbbb07, 0x256d4eab), TOBN(0x22598692, 0x8285ab89), + TOBN(0x35f8112a, 0x041caefe), TOBN(0x95df02e3, 0xa5064c8b), + TOBN(0x4d63356e, 0xc7004bf3), TOBN(0x230a08f4, 0xdb83c7de), + TOBN(0xca27b270, 0x8709a7b7), TOBN(0x0d1c4cc4, 0xcb9abd2d), + TOBN(0x8a0bc66e, 0x7550fee8), TOBN(0x369cd4c7, 0x9cf7247e), + TOBN(0x75562e84, 0x92b5b7e7), TOBN(0x8fed0da0, 0x5802af7b), + TOBN(0x6a7091c2, 0xe48fb889), TOBN(0x26882c13, 0x7b8a9d06), + TOBN(0xa2498663, 0x1b82a0e2), TOBN(0x844ed736, 0x3518152d), + TOBN(0x282f476f, 0xd86e27c7), TOBN(0xa04edaca, 0x04afefdc), + TOBN(0x8b256ebc, 0x6119e34d), TOBN(0x56a413e9, 0x0787d78b),} + , + {TOBN(0x82ee061d, 0x5a74be50), TOBN(0xe41781c4, 0xdea16ff5), + TOBN(0xe0b0c81e, 0x99bfc8a2), TOBN(0x624f4d69, 0x0b547e2d), + TOBN(0x3a83545d, 0xbdcc9ae4), TOBN(0x2573dbb6, 0x409b1e8e), + TOBN(0x482960c4, 0xa6c93539), TOBN(0xf01059ad, 0x5ae18798), + TOBN(0x715c9f97, 0x3112795f), TOBN(0xe8244437, 0x984e6ee1), + TOBN(0x55cb4858, 0xecb66bcd), TOBN(0x7c136735, 0xabaffbee), + TOBN(0x54661595, 0x5dbec38e), TOBN(0x51c0782c, 0x388ad153), + TOBN(0x9ba4c53a, 0xc6e0952f), TOBN(0x27e6782a, 0x1b21dfa8), + TOBN(0x682f903d, 0x4ed2dbc2), TOBN(0x0eba59c8, 0x7c3b2d83), + TOBN(0x8e9dc84d, 0x9c7e9335), TOBN(0x5f9b21b0, 0x0eb226d7), + TOBN(0xe33bd394, 0xaf267bae), TOBN(0xaa86cc25, 0xbe2e15ae), + TOBN(0x4f0bf67d, 0x6a8ec500), TOBN(0x5846aa44, 0xf9630658), + TOBN(0xfeb09740, 0xe2c2bf15), TOBN(0x627a2205, 0xa9e99704), + TOBN(0xec8d73d0, 0xc2fbc565), TOBN(0x223eed8f, 0xc20c8de8), + TOBN(0x1ee32583, 0xa8363b49), TOBN(0x1a0b6cb9, 0xc9c2b0a6), + TOBN(0x49f7c3d2, 0x90dbc85c), TOBN(0xa8dfbb97, 0x1ef4c1ac), + TOBN(0xafb34d4c, 0x65c7c2ab), TOBN(0x1d4610e7, 0xe2c5ea84), + TOBN(0x893f6d1b, 0x973c4ab5), TOBN(0xa3cdd7e9, 0x945ba5c4), + TOBN(0x60514983, 0x064417ee), TOBN(0x1459b23c, 0xad6bdf2b), + TOBN(0x23b2c341, 0x5cf726c3), TOBN(0x3a829635, 0x32d6354a), + TOBN(0x294f901f, 0xab192c18), TOBN(0xec5fcbfe, 0x7030164f), + TOBN(0xe2e2fcb7, 0xe2246ba6), TOBN(0x1e7c88b3, 0x221a1a0c), + TOBN(0x72c7dd93, 0xc92d88c5), TOBN(0x41c2148e, 0x1106fb59), + TOBN(0x547dd4f5, 0xa0f60f14), TOBN(0xed9b52b2, 0x63960f31), + TOBN(0x6c8349eb, 0xb0a5b358), TOBN(0xb154c5c2, 0x9e7e2ed6), + TOBN(0xcad5eccf, 0xeda462db), TOBN(0xf2d6dbe4, 0x2de66b69), + TOBN(0x426aedf3, 0x8665e5b2), TOBN(0x488a8513, 0x7b7f5723), + TOBN(0x15cc43b3, 0x8bcbb386), TOBN(0x27ad0af3, 0xd791d879), + TOBN(0xc16c236e, 0x846e364f), TOBN(0x7f33527c, 0xdea50ca0), + TOBN(0xc4810775, 0x0926b86d), TOBN(0x6c2a3609, 0x0598e70c), + TOBN(0xa6755e52, 0xf024e924), TOBN(0xe0fa07a4, 0x9db4afca), + TOBN(0x15c3ce7d, 0x66831790), TOBN(0x5b4ef350, 0xa6cbb0d6), + TOBN(0x2c4aafc4, 0xb6205969), TOBN(0x42563f02, 0xf6c7854f), + TOBN(0x016aced5, 0x1d983b48), TOBN(0xfeb356d8, 0x99949755), + TOBN(0x8c2a2c81, 0xd1a39bd7), TOBN(0x8f44340f, 0xe6934ae9), + TOBN(0x148cf91c, 0x447904da), TOBN(0x7340185f, 0x0f51a926), + TOBN(0x2f8f00fb, 0x7409ab46), TOBN(0x057e78e6, 0x80e289b2), + TOBN(0x03e5022c, 0xa888e5d1), TOBN(0x3c87111a, 0x9dede4e2), + TOBN(0x5b9b0e1c, 0x7809460b), TOBN(0xe751c852, 0x71c9abc7), + TOBN(0x8b944e28, 0xc7cc1dc9), TOBN(0x4f201ffa, 0x1d3cfa08), + TOBN(0x02fc905c, 0x3e6721ce), TOBN(0xd52d70da, 0xd0b3674c), + TOBN(0x5dc2e5ca, 0x18810da4), TOBN(0xa984b273, 0x5c69dd99), + TOBN(0x63b92527, 0x84de5ca4), TOBN(0x2f1c9872, 0xc852dec4), + TOBN(0x18b03593, 0xc2e3de09), TOBN(0x19d70b01, 0x9813dc2f), + TOBN(0x42806b2d, 0xa6dc1d29), TOBN(0xd3030009, 0xf871e144), + TOBN(0xa1feb333, 0xaaf49276), TOBN(0xb5583b9e, 0xc70bc04b), + TOBN(0x1db0be78, 0x95695f20), TOBN(0xfc841811, 0x89d012b5), + TOBN(0x6409f272, 0x05f61643), TOBN(0x40d34174, 0xd5883128), + TOBN(0xd79196f5, 0x67419833), TOBN(0x6059e252, 0x863b7b08), + TOBN(0x84da1817, 0x1c56700c), TOBN(0x5758ee56, 0xb28d3ec4), + TOBN(0x7da2771d, 0x013b0ea6), TOBN(0xfddf524b, 0x54c5e9b9), + TOBN(0x7df4faf8, 0x24305d80), TOBN(0x58f5c1bf, 0x3a97763f), + TOBN(0xa5af37f1, 0x7c696042), TOBN(0xd4cba22c, 0x4a2538de), + TOBN(0x211cb995, 0x9ea42600), TOBN(0xcd105f41, 0x7b069889), + TOBN(0xb1e1cf19, 0xddb81e74), TOBN(0x472f2d89, 0x5157b8ca), + TOBN(0x086fb008, 0xee9db885), TOBN(0x365cd570, 0x0f26d131), + TOBN(0x284b02bb, 0xa2be7053), TOBN(0xdcbbf7c6, 0x7ab9a6d6), + TOBN(0x4425559c, 0x20f7a530), TOBN(0x961f2dfa, 0x188767c8), + TOBN(0xe2fd9435, 0x70dc80c4), TOBN(0x104d6b63, 0xf0784120), + TOBN(0x7f592bc1, 0x53567122), TOBN(0xf6bc1246, 0xf688ad77), + TOBN(0x05214c05, 0x0f15dde9), TOBN(0xa47a76a8, 0x0d5f2b82), + TOBN(0xbb254d30, 0x62e82b62), TOBN(0x11a05fe0, 0x3ec955ee), + TOBN(0x7eaff46e, 0x9d529b36), TOBN(0x55ab1301, 0x8f9e3df6), + TOBN(0xc463e371, 0x99317698), TOBN(0xfd251438, 0xccda47ad), + TOBN(0xca9c3547, 0x23d695ea), TOBN(0x48ce626e, 0x16e589b5), + TOBN(0x6b5b64c7, 0xb187d086), TOBN(0xd02e1794, 0xb2207948), + TOBN(0x8b58e98f, 0x7198111d), TOBN(0x90ca6305, 0xdcf9c3cc), + TOBN(0x5691fe72, 0xf34089b0), TOBN(0x60941af1, 0xfc7c80ff), + TOBN(0xa09bc0a2, 0x22eb51e5), TOBN(0xc0bb7244, 0xaa9cf09a), + TOBN(0x36a8077f, 0x80159f06), TOBN(0x8b5c989e, 0xdddc560e), + TOBN(0x19d2f316, 0x512e1f43), TOBN(0x02eac554, 0xad08ff62), + TOBN(0x012ab84c, 0x07d20b4e), TOBN(0x37d1e115, 0xd6d4e4e1), + TOBN(0xb6443e1a, 0xab7b19a8), TOBN(0xf08d067e, 0xdef8cd45), + TOBN(0x63adf3e9, 0x685e03da), TOBN(0xcf15a10e, 0x4792b916), + TOBN(0xf44bcce5, 0xb738a425), TOBN(0xebe131d5, 0x9636b2fd), + TOBN(0x94068841, 0x7850d605), TOBN(0x09684eaa, 0xb40d749d), + TOBN(0x8c3c669c, 0x72ba075b), TOBN(0x89f78b55, 0xba469015), + TOBN(0x5706aade, 0x3e9f8ba8), TOBN(0x6d8bd565, 0xb32d7ed7), + TOBN(0x25f4e63b, 0x805f08d6), TOBN(0x7f48200d, 0xc3bcc1b5), + TOBN(0x4e801968, 0xb025d847), TOBN(0x74afac04, 0x87cbe0a8), + TOBN(0x43ed2c2b, 0x7e63d690), TOBN(0xefb6bbf0, 0x0223cdb8), + TOBN(0x4fec3cae, 0x2884d3fe), TOBN(0x065ecce6, 0xd75e25a4), + TOBN(0x6c2294ce, 0x69f79071), TOBN(0x0d9a8e5f, 0x044b8666), + TOBN(0x5009f238, 0x17b69d8f), TOBN(0x3c29f8fe, 0xc5dfdaf7), + TOBN(0x9067528f, 0xebae68c4), TOBN(0x5b385632, 0x30c5ba21), + TOBN(0x540df119, 0x1fdd1aec), TOBN(0xcf37825b, 0xcfba4c78), + TOBN(0x77eff980, 0xbeb11454), TOBN(0x40a1a991, 0x60c1b066), + TOBN(0xe8018980, 0xf889a1c7), TOBN(0xb9c52ae9, 0x76c24be0), + TOBN(0x05fbbcce, 0x45650ef4), TOBN(0xae000f10, 0x8aa29ac7), + TOBN(0x884b7172, 0x4f04c470), TOBN(0x7cd4fde2, 0x19bb5c25), + TOBN(0x6477b22a, 0xe8840869), TOBN(0xa8868859, 0x5fbd0686), + TOBN(0xf23cc02e, 0x1116dfba), TOBN(0x76cd563f, 0xd87d7776), + TOBN(0xe2a37598, 0xa9d82abf), TOBN(0x5f188ccb, 0xe6c170f5), + TOBN(0x81682200, 0x5066b087), TOBN(0xda22c212, 0xc7155ada), + TOBN(0x151e5d3a, 0xfbddb479), TOBN(0x4b606b84, 0x6d715b99), + TOBN(0x4a73b54b, 0xf997cb2e), TOBN(0x9a1bfe43, 0x3ecd8b66), + TOBN(0x1c312809, 0x2a67d48a), TOBN(0xcd6a671e, 0x031fa9e2), + TOBN(0xbec3312a, 0x0e43a34a), TOBN(0x1d935639, 0x55ef47d3), + TOBN(0x5ea02489, 0x8fea73ea), TOBN(0x8247b364, 0xa035afb2), + TOBN(0xb58300a6, 0x5265b54c), TOBN(0x3286662f, 0x722c7148), + TOBN(0xb77fd76b, 0xb4ec4c20), TOBN(0xf0a12fa7, 0x0f3fe3fd), + TOBN(0xf845bbf5, 0x41d8c7e8), TOBN(0xe4d969ca, 0x5ec10aa8), + TOBN(0x4c0053b7, 0x43e232a3), TOBN(0xdc7a3fac, 0x37f8a45a), + TOBN(0x3c4261c5, 0x20d81c8f), TOBN(0xfd4b3453, 0xb00eab00), + TOBN(0x76d48f86, 0xd36e3062), TOBN(0x626c5277, 0xa143ff02), + TOBN(0x538174de, 0xaf76f42e), TOBN(0x2267aa86, 0x6407ceac), + TOBN(0xfad76351, 0x72e572d5), TOBN(0xab861af7, 0xba7330eb), + TOBN(0xa0a1c8c7, 0x418d8657), TOBN(0x988821cb, 0x20289a52), + TOBN(0x79732522, 0xcccc18ad), TOBN(0xaadf3f8d, 0xf1a6e027), + TOBN(0xf7382c93, 0x17c2354d), TOBN(0x5ce1680c, 0xd818b689), + TOBN(0x359ebbfc, 0xd9ecbee9), TOBN(0x4330689c, 0x1cae62ac), + TOBN(0xb55ce5b4, 0xc51ac38a), TOBN(0x7921dfea, 0xfe238ee8), + TOBN(0x3972bef8, 0x271d1ca5), TOBN(0x3e423bc7, 0xe8aabd18), + TOBN(0x57b09f3f, 0x44a3e5e3), TOBN(0x5da886ae, 0x7b444d66), + TOBN(0x68206634, 0xa9964375), TOBN(0x356a2fa3, 0x699cd0ff), + TOBN(0xaf0faa24, 0xdba515e9), TOBN(0x536e1f5c, 0xb321d79a), + TOBN(0xd3b9913a, 0x5c04e4ea), TOBN(0xd549dcfe, 0xd6f11513), + TOBN(0xee227bf5, 0x79fd1d94), TOBN(0x9f35afee, 0xb43f2c67), + TOBN(0xd2638d24, 0xf1314f53), TOBN(0x62baf948, 0xcabcd822), + TOBN(0x5542de29, 0x4ef48db0), TOBN(0xb3eb6a04, 0xfc5f6bb2), + TOBN(0x23c110ae, 0x1208e16a), TOBN(0x1a4d15b5, 0xf8363e24), + TOBN(0x30716844, 0x164be00b), TOBN(0xa8e24824, 0xf6f4690d), + TOBN(0x548773a2, 0x90b170cf), TOBN(0xa1bef331, 0x42f191f4), + TOBN(0x70f418d0, 0x9247aa97), TOBN(0xea06028e, 0x48be9147), + TOBN(0xe13122f3, 0xdbfb894e), TOBN(0xbe9b79f6, 0xce274b18), + TOBN(0x85a49de5, 0xca58aadf), TOBN(0x24957758, 0x11487351), + TOBN(0x111def61, 0xbb939099), TOBN(0x1d6a974a, 0x26d13694), + TOBN(0x4474b4ce, 0xd3fc253b), TOBN(0x3a1485e6, 0x4c5db15e), + TOBN(0xe79667b4, 0x147c15b4), TOBN(0xe34f553b, 0x7bc61301), + TOBN(0x032b80f8, 0x17094381), TOBN(0x55d8bafd, 0x723eaa21), + TOBN(0x5a987995, 0xf1c0e74e), TOBN(0x5a9b292e, 0xebba289c), + TOBN(0x413cd4b2, 0xeb4c8251), TOBN(0x98b5d243, 0xd162db0a), + TOBN(0xbb47bf66, 0x68342520), TOBN(0x08d68949, 0xbaa862d1), + TOBN(0x11f349c7, 0xe906abcd), TOBN(0x454ce985, 0xed7bf00e), + TOBN(0xacab5c9e, 0xb55b803b), TOBN(0xb03468ea, 0x31e3c16d), + TOBN(0x5c24213d, 0xd273bf12), TOBN(0x211538eb, 0x71587887), + TOBN(0x198e4a2f, 0x731dea2d), TOBN(0xd5856cf2, 0x74ed7b2a), + TOBN(0x86a632eb, 0x13a664fe), TOBN(0x932cd909, 0xbda41291), + TOBN(0x850e95d4, 0xc0c4ddc0), TOBN(0xc0f422f8, 0x347fc2c9), + TOBN(0xe68cbec4, 0x86076bcb), TOBN(0xf9e7c0c0, 0xcd6cd286), + TOBN(0x65994ddb, 0x0f5f27ca), TOBN(0xe85461fb, 0xa80d59ff), + TOBN(0xff05481a, 0x66601023), TOBN(0xc665427a, 0xfc9ebbfb), + TOBN(0xb0571a69, 0x7587fd52), TOBN(0x935289f8, 0x8d49efce), + TOBN(0x61becc60, 0xea420688), TOBN(0xb22639d9, 0x13a786af), + TOBN(0x1a8e6220, 0x361ecf90), TOBN(0x001f23e0, 0x25506463), + TOBN(0xe4ae9b5d, 0x0a5c2b79), TOBN(0xebc9cdad, 0xd8149db5), + TOBN(0xb33164a1, 0x934aa728), TOBN(0x750eb00e, 0xae9b60f3), + TOBN(0x5a91615b, 0x9b9cfbfd), TOBN(0x97015cbf, 0xef45f7f6), + TOBN(0xb462c4a5, 0xbf5151df), TOBN(0x21adcc41, 0xb07118f2), + TOBN(0xd60c545b, 0x043fa42c), TOBN(0xfc21aa54, 0xe96be1ab), + TOBN(0xe84bc32f, 0x4e51ea80), TOBN(0x3dae45f0, 0x259b5d8d), + TOBN(0xbb73c7eb, 0xc38f1b5e), TOBN(0xe405a74a, 0xe8ae617d), + TOBN(0xbb1ae9c6, 0x9f1c56bd), TOBN(0x8c176b98, 0x49f196a4), + TOBN(0xc448f311, 0x6875092b), TOBN(0xb5afe3de, 0x9f976033), + TOBN(0xa8dafd49, 0x145813e5), TOBN(0x687fc4d9, 0xe2b34226), + TOBN(0xf2dfc92d, 0x4c7ff57f), TOBN(0x004e3fc1, 0x401f1b46), + TOBN(0x5afddab6, 0x1430c9ab), TOBN(0x0bdd41d3, 0x2238e997), + TOBN(0xf0947430, 0x418042ae), TOBN(0x71f9adda, 0xcdddc4cb), + TOBN(0x7090c016, 0xc52dd907), TOBN(0xd9bdf44d, 0x29e2047f), + TOBN(0xe6f1fe80, 0x1b1011a6), TOBN(0xb63accbc, 0xd9acdc78), + TOBN(0xcfc7e235, 0x1272a95b), TOBN(0x0c667717, 0xa6276ac8), + TOBN(0x3c0d3709, 0xe2d7eef7), TOBN(0x5add2b06, 0x9a685b3e), + TOBN(0x363ad32d, 0x14ea5d65), TOBN(0xf8e01f06, 0x8d7dd506), + TOBN(0xc9ea2213, 0x75b4aac6), TOBN(0xed2a2bf9, 0x0d353466), + TOBN(0x439d79b5, 0xe9d3a7c3), TOBN(0x8e0ee5a6, 0x81b7f34b), + TOBN(0xcf3dacf5, 0x1dc4ba75), TOBN(0x1d3d1773, 0xeb3310c7), + TOBN(0xa8e67112, 0x7747ae83), TOBN(0x31f43160, 0x197d6b40), + TOBN(0x0521ccee, 0xcd961400), TOBN(0x67246f11, 0xf6535768), + TOBN(0x702fcc5a, 0xef0c3133), TOBN(0x247cc45d, 0x7e16693b), + TOBN(0xfd484e49, 0xc729b749), TOBN(0x522cef7d, 0xb218320f), + TOBN(0xe56ef405, 0x59ab93b3), TOBN(0x225fba11, 0x9f181071), + TOBN(0x33bd6595, 0x15330ed0), TOBN(0xc4be69d5, 0x1ddb32f7), + TOBN(0x264c7668, 0x0448087c), TOBN(0xac30903f, 0x71432dae), + TOBN(0x3851b266, 0x00f9bf47), TOBN(0x400ed311, 0x6cdd6d03), + TOBN(0x045e79fe, 0xf8fd2424), TOBN(0xfdfd974a, 0xfa6da98b), + TOBN(0x45c9f641, 0x0c1e673a), TOBN(0x76f2e733, 0x5b2c5168), + TOBN(0x1adaebb5, 0x2a601753), TOBN(0xb286514c, 0xc57c2d49), + TOBN(0xd8769670, 0x1e0bfd24), TOBN(0x950c547e, 0x04478922), + TOBN(0xd1d41969, 0xe5d32bfe), TOBN(0x30bc1472, 0x750d6c3e), + TOBN(0x8f3679fe, 0xe0e27f3a), TOBN(0x8f64a7dc, 0xa4a6ee0c), + TOBN(0x2fe59937, 0x633dfb1f), TOBN(0xea82c395, 0x977f2547), + TOBN(0xcbdfdf1a, 0x661ea646), TOBN(0xc7ccc591, 0xb9085451), + TOBN(0x82177962, 0x81761e13), TOBN(0xda57596f, 0x9196885c), + TOBN(0xbc17e849, 0x28ffbd70), TOBN(0x1e6e0a41, 0x2671d36f), + TOBN(0x61ae872c, 0x4152fcf5), TOBN(0x441c87b0, 0x9e77e754), + TOBN(0xd0799dd5, 0xa34dff09), TOBN(0x766b4e44, 0x88a6b171), + TOBN(0xdc06a512, 0x11f1c792), TOBN(0xea02ae93, 0x4be35c3e), + TOBN(0xe5ca4d6d, 0xe90c469e), TOBN(0x4df4368e, 0x56e4ff5c), + TOBN(0x7817acab, 0x4baef62e), TOBN(0x9f5a2202, 0xa85b91e8), + TOBN(0x9666ebe6, 0x6ce57610), TOBN(0x32ad31f3, 0xf73bfe03), + TOBN(0x628330a4, 0x25bcf4d6), TOBN(0xea950593, 0x515056e6), + TOBN(0x59811c89, 0xe1332156), TOBN(0xc89cf1fe, 0x8c11b2d7), + TOBN(0x75b63913, 0x04e60cc0), TOBN(0xce811e8d, 0x4625d375), + TOBN(0x030e43fc, 0x2d26e562), TOBN(0xfbb30b4b, 0x608d36a0), + TOBN(0x634ff82c, 0x48528118), TOBN(0x7c6fe085, 0xcd285911), + TOBN(0x7f2830c0, 0x99358f28), TOBN(0x2e60a95e, 0x665e6c09), + TOBN(0x08407d3d, 0x9b785dbf), TOBN(0x530889ab, 0xa759bce7), + TOBN(0xf228e0e6, 0x52f61239), TOBN(0x2b6d1461, 0x6879be3c), + TOBN(0xe6902c04, 0x51a7bbf7), TOBN(0x30ad99f0, 0x76f24a64), + TOBN(0x66d9317a, 0x98bc6da0), TOBN(0xf4f877f3, 0xcb596ac0), + TOBN(0xb05ff62d, 0x4c44f119), TOBN(0x4555f536, 0xe9b77416), + TOBN(0xc7c0d059, 0x8caed63b), TOBN(0x0cd2b7ce, 0xc358b2a9), + TOBN(0x3f33287b, 0x46945fa3), TOBN(0xf8785b20, 0xd67c8791), + TOBN(0xc54a7a61, 0x9637bd08), TOBN(0x54d4598c, 0x18be79d7), + TOBN(0x889e5acb, 0xc46d7ce1), TOBN(0x9a515bb7, 0x8b085877), + TOBN(0xfac1a03d, 0x0b7a5050), TOBN(0x7d3e738a, 0xf2926035), + TOBN(0x861cc2ce, 0x2a6cb0eb), TOBN(0x6f2e2955, 0x8f7adc79), + TOBN(0x61c4d451, 0x33016376), TOBN(0xd9fd2c80, 0x5ad59090), + TOBN(0xe5a83738, 0xb2b836a1), TOBN(0x855b41a0, 0x7c0d6622), + TOBN(0x186fe317, 0x7cc19af1), TOBN(0x6465c1ff, 0xfdd99acb), + TOBN(0x46e5c23f, 0x6974b99e), TOBN(0x75a7cf8b, 0xa2717cbe), + TOBN(0x4d2ebc3f, 0x062be658), TOBN(0x094b4447, 0x5f209c98), + TOBN(0x4af285ed, 0xb940cb5a), TOBN(0x6706d792, 0x7cc82f10), + TOBN(0xc8c8776c, 0x030526fa), TOBN(0xfa8e6f76, 0xa0da9140), + TOBN(0x77ea9d34, 0x591ee4f0), TOBN(0x5f46e337, 0x40274166), + TOBN(0x1bdf98bb, 0xea671457), TOBN(0xd7c08b46, 0x862a1fe2), + TOBN(0x46cc303c, 0x1c08ad63), TOBN(0x99543440, 0x4c845e7b), + TOBN(0x1b8fbdb5, 0x48f36bf7), TOBN(0x5b82c392, 0x8c8273a7), + TOBN(0x08f712c4, 0x928435d5), TOBN(0x071cf0f1, 0x79330380), + TOBN(0xc74c2d24, 0xa8da054a), TOBN(0xcb0e7201, 0x43c46b5c), + TOBN(0x0ad7337a, 0xc0b7eff3), TOBN(0x8552225e, 0xc5e48b3c), + TOBN(0xe6f78b0c, 0x73f13a5f), TOBN(0x5e70062e, 0x82349cbe), + TOBN(0x6b8d5048, 0xe7073969), TOBN(0x392d2a29, 0xc33cb3d2), + TOBN(0xee4f727c, 0x4ecaa20f), TOBN(0xa068c99e, 0x2ccde707), + TOBN(0xfcd5651f, 0xb87a2913), TOBN(0xea3e3c15, 0x3cc252f0), + TOBN(0x777d92df, 0x3b6cd3e4), TOBN(0x7a414143, 0xc5a732e7), + TOBN(0xa895951a, 0xa71ff493), TOBN(0xfe980c92, 0xbbd37cf6), + TOBN(0x45bd5e64, 0xdecfeeff), TOBN(0x910dc2a9, 0xa44c43e9), + TOBN(0xcb403f26, 0xcca9f54d), TOBN(0x928bbdfb, 0x9303f6db), + TOBN(0x3c37951e, 0xa9eee67c), TOBN(0x3bd61a52, 0xf79961c3), + TOBN(0x09a238e6, 0x395c9a79), TOBN(0x6940ca2d, 0x61eb352d), + TOBN(0x7d1e5c5e, 0xc1875631), TOBN(0x1e19742c, 0x1e1b20d1), + TOBN(0x4633d908, 0x23fc2e6e), TOBN(0xa76e29a9, 0x08959149), + TOBN(0x61069d9c, 0x84ed7da5), TOBN(0x0baa11cf, 0x5dbcad51), + TOBN(0xd01eec64, 0x961849da), TOBN(0x93b75f1f, 0xaf3d8c28), + TOBN(0x57bc4f9f, 0x1ca2ee44), TOBN(0x5a26322d, 0x00e00558), + TOBN(0x1888d658, 0x61a023ef), TOBN(0x1d72aab4, 0xb9e5246e), + TOBN(0xa9a26348, 0xe5563ec0), TOBN(0xa0971963, 0xc3439a43), + TOBN(0x567dd54b, 0xadb9b5b7), TOBN(0x73fac1a1, 0xc45a524b), + TOBN(0x8fe97ef7, 0xfe38e608), TOBN(0x608748d2, 0x3f384f48), + TOBN(0xb0571794, 0xc486094f), TOBN(0x869254a3, 0x8bf3a8d6), + TOBN(0x148a8dd1, 0x310b0e25), TOBN(0x99ab9f3f, 0x9aa3f7d8), + TOBN(0x0927c68a, 0x6706c02e), TOBN(0x22b5e76c, 0x69790e6c), + TOBN(0x6c325260, 0x6c71376c), TOBN(0x53a57690, 0x09ef6657), + TOBN(0x8d63f852, 0xedffcf3a), TOBN(0xb4d2ed04, 0x3c0a6f55), + TOBN(0xdb3aa8de, 0x12519b9e), TOBN(0x5d38e9c4, 0x1e0a569a), + TOBN(0x871528bf, 0x303747e2), TOBN(0xa208e77c, 0xf5b5c18d), + TOBN(0x9d129c88, 0xca6bf923), TOBN(0xbcbf197f, 0xbf02839f), + TOBN(0x9b9bf030, 0x27323194), TOBN(0x3b055a8b, 0x339ca59d), + TOBN(0xb46b2312, 0x0f669520), TOBN(0x19789f1f, 0x497e5f24), + TOBN(0x9c499468, 0xaaf01801), TOBN(0x72ee1190, 0x8b69d59c), + TOBN(0x8bd39595, 0xacf4c079), TOBN(0x3ee11ece, 0x8e0cd048), + TOBN(0xebde86ec, 0x1ed66f18), TOBN(0x225d906b, 0xd61fce43), + TOBN(0x5cab07d6, 0xe8bed74d), TOBN(0x16e4617f, 0x27855ab7), + TOBN(0x6568aadd, 0xb2fbc3dd), TOBN(0xedb5484f, 0x8aeddf5b), + TOBN(0x878f20e8, 0x6dcf2fad), TOBN(0x3516497c, 0x615f5699),} + , + {TOBN(0xef0a3fec, 0xfa181e69), TOBN(0x9ea02f81, 0x30d69a98), + TOBN(0xb2e9cf8e, 0x66eab95d), TOBN(0x520f2beb, 0x24720021), + TOBN(0x621c540a, 0x1df84361), TOBN(0x12037721, 0x71fa6d5d), + TOBN(0x6e3c7b51, 0x0ff5f6ff), TOBN(0x817a069b, 0xabb2bef3), + TOBN(0x83572fb6, 0xb294cda6), TOBN(0x6ce9bf75, 0xb9039f34), + TOBN(0x20e012f0, 0x095cbb21), TOBN(0xa0aecc1b, 0xd063f0da), + TOBN(0x57c21c3a, 0xf02909e5), TOBN(0xc7d59ecf, 0x48ce9cdc), + TOBN(0x2732b844, 0x8ae336f8), TOBN(0x056e3723, 0x3f4f85f4), + TOBN(0x8a10b531, 0x89e800ca), TOBN(0x50fe0c17, 0x145208fd), + TOBN(0x9e43c0d3, 0xb714ba37), TOBN(0x427d200e, 0x34189acc), + TOBN(0x05dee24f, 0xe616e2c0), TOBN(0x9c25f4c8, 0xee1854c1), + TOBN(0x4d3222a5, 0x8f342a73), TOBN(0x0807804f, 0xa027c952), + TOBN(0xc222653a, 0x4f0d56f3), TOBN(0x961e4047, 0xca28b805), + TOBN(0x2c03f8b0, 0x4a73434b), TOBN(0x4c966787, 0xab712a19), + TOBN(0xcc196c42, 0x864fee42), TOBN(0xc1be93da, 0x5b0ece5c), + TOBN(0xa87d9f22, 0xc131c159), TOBN(0x2bb6d593, 0xdce45655), + TOBN(0x22c49ec9, 0xb809b7ce), TOBN(0x8a41486b, 0xe2c72c2c), + TOBN(0x813b9420, 0xfea0bf36), TOBN(0xb3d36ee9, 0xa66dac69), + TOBN(0x6fddc08a, 0x328cc987), TOBN(0x0a3bcd2c, 0x3a326461), + TOBN(0x7103c49d, 0xd810dbba), TOBN(0xf9d81a28, 0x4b78a4c4), + TOBN(0x3de865ad, 0xe4d55941), TOBN(0xdedafa5e, 0x30384087), + TOBN(0x6f414abb, 0x4ef18b9b), TOBN(0x9ee9ea42, 0xfaee5268), + TOBN(0x260faa16, 0x37a55a4a), TOBN(0xeb19a514, 0x015f93b9), + TOBN(0x51d7ebd2, 0x9e9c3598), TOBN(0x523fc56d, 0x1932178e), + TOBN(0x501d070c, 0xb98fe684), TOBN(0xd60fbe9a, 0x124a1458), + TOBN(0xa45761c8, 0x92bc6b3f), TOBN(0xf5384858, 0xfe6f27cb), + TOBN(0x4b0271f7, 0xb59e763b), TOBN(0x3d4606a9, 0x5b5a8e5e), + TOBN(0x1eda5d9b, 0x05a48292), TOBN(0xda7731d0, 0xe6fec446), + TOBN(0xa3e33693, 0x90d45871), TOBN(0xe9764040, 0x06166d8d), + TOBN(0xb5c33682, 0x89a90403), TOBN(0x4bd17983, 0x72f1d637), + TOBN(0xa616679e, 0xd5d2c53a), TOBN(0x5ec4bcd8, 0xfdcf3b87), + TOBN(0xae6d7613, 0xb66a694e), TOBN(0x7460fc76, 0xe3fc27e5), + TOBN(0x70469b82, 0x95caabee), TOBN(0xde024ca5, 0x889501e3), + TOBN(0x6bdadc06, 0x076ed265), TOBN(0x0cb1236b, 0x5a0ef8b2), + TOBN(0x4065ddbf, 0x0972ebf9), TOBN(0xf1dd3875, 0x22aca432), + TOBN(0xa88b97cf, 0x744aff76), TOBN(0xd1359afd, 0xfe8e3d24), + TOBN(0x52a3ba2b, 0x91502cf3), TOBN(0x2c3832a8, 0x084db75d), + TOBN(0x04a12ddd, 0xde30b1c9), TOBN(0x7802eabc, 0xe31fd60c), + TOBN(0x33707327, 0xa37fddab), TOBN(0x65d6f2ab, 0xfaafa973), + TOBN(0x3525c5b8, 0x11e6f91a), TOBN(0x76aeb0c9, 0x5f46530b), + TOBN(0xe8815ff6, 0x2f93a675), TOBN(0xa6ec9684, 0x05f48679), + TOBN(0x6dcbb556, 0x358ae884), TOBN(0x0af61472, 0xe19e3873), + TOBN(0x72334372, 0xa5f696be), TOBN(0xc65e57ea, 0x6f22fb70), + TOBN(0x268da30c, 0x946cea90), TOBN(0x136a8a87, 0x65681b2a), + TOBN(0xad5e81dc, 0x0f9f44d4), TOBN(0xf09a6960, 0x2c46585a), + TOBN(0xd1649164, 0xc447d1b1), TOBN(0x3b4b36c8, 0x879dc8b1), + TOBN(0x20d4177b, 0x3b6b234c), TOBN(0x096a2505, 0x1730d9d0), + TOBN(0x0611b9b8, 0xef80531d), TOBN(0xba904b3b, 0x64bb495d), + TOBN(0x1192d9d4, 0x93a3147a), TOBN(0x9f30a5dc, 0x9a565545), + TOBN(0x90b1f9cb, 0x6ef07212), TOBN(0x29958546, 0x0d87fc13), + TOBN(0xd3323eff, 0xc17db9ba), TOBN(0xcb18548c, 0xcb1644a8), + TOBN(0x18a306d4, 0x4f49ffbc), TOBN(0x28d658f1, 0x4c2e8684), + TOBN(0x44ba60cd, 0xa99f8c71), TOBN(0x67b7abdb, 0x4bf742ff), + TOBN(0x66310f9c, 0x914b3f99), TOBN(0xae430a32, 0xf412c161), + TOBN(0x1e6776d3, 0x88ace52f), TOBN(0x4bc0fa24, 0x52d7067d), + TOBN(0x03c286aa, 0x8f07cd1b), TOBN(0x4cb8f38c, 0xa985b2c1), + TOBN(0x83ccbe80, 0x8c3bff36), TOBN(0x005a0bd2, 0x5263e575), + TOBN(0x460d7dda, 0x259bdcd1), TOBN(0x4a1c5642, 0xfa5cab6b), + TOBN(0x2b7bdbb9, 0x9fe4fc88), TOBN(0x09418e28, 0xcc97bbb5), + TOBN(0xd8274fb4, 0xa12321ae), TOBN(0xb137007d, 0x5c87b64e), + TOBN(0x80531fe1, 0xc63c4962), TOBN(0x50541e89, 0x981fdb25), + TOBN(0xdc1291a1, 0xfd4c2b6b), TOBN(0xc0693a17, 0xa6df4fca), + TOBN(0xb2c4604e, 0x0117f203), TOBN(0x245f1963, 0x0a99b8d0), + TOBN(0xaedc20aa, 0xc6212c44), TOBN(0xb1ed4e56, 0x520f52a8), + TOBN(0xfe48f575, 0xf8547be3), TOBN(0x0a7033cd, 0xa9e45f98), + TOBN(0x4b45d3a9, 0x18c50100), TOBN(0xb2a6cd6a, 0xa61d41da), + TOBN(0x60bbb4f5, 0x57933c6b), TOBN(0xa7538ebd, 0x2b0d7ffc), + TOBN(0x9ea3ab8d, 0x8cd626b6), TOBN(0x8273a484, 0x3601625a), + TOBN(0x88859845, 0x0168e508), TOBN(0x8cbc9bb2, 0x99a94abd), + TOBN(0x713ac792, 0xfab0a671), TOBN(0xa3995b19, 0x6c9ebffc), + TOBN(0xe711668e, 0x1239e152), TOBN(0x56892558, 0xbbb8dff4), + TOBN(0x8bfc7dab, 0xdbf17963), TOBN(0x5b59fe5a, 0xb3de1253), + TOBN(0x7e3320eb, 0x34a9f7ae), TOBN(0xe5e8cf72, 0xd751efe4), + TOBN(0x7ea003bc, 0xd9be2f37), TOBN(0xc0f551a0, 0xb6c08ef7), + TOBN(0x56606268, 0x038f6725), TOBN(0x1dd38e35, 0x6d92d3b6), + TOBN(0x07dfce7c, 0xc3cbd686), TOBN(0x4e549e04, 0x651c5da8), + TOBN(0x4058f93b, 0x08b19340), TOBN(0xc2fae6f4, 0xcac6d89d), + TOBN(0x4bad8a8c, 0x8f159cc7), TOBN(0x0ddba4b3, 0xcb0b601c), + TOBN(0xda4fc7b5, 0x1dd95f8c), TOBN(0x1d163cd7, 0xcea5c255), + TOBN(0x30707d06, 0x274a8c4c), TOBN(0x79d9e008, 0x2802e9ce), + TOBN(0x02a29ebf, 0xe6ddd505), TOBN(0x37064e74, 0xb50bed1a), + TOBN(0x3f6bae65, 0xa7327d57), TOBN(0x3846f5f1, 0xf83920bc), + TOBN(0x87c37491, 0x60df1b9b), TOBN(0x4cfb2895, 0x2d1da29f), + TOBN(0x10a478ca, 0x4ed1743c), TOBN(0x390c6030, 0x3edd47c6), + TOBN(0x8f3e5312, 0x8c0a78de), TOBN(0xccd02bda, 0x1e85df70), + TOBN(0xd6c75c03, 0xa61b6582), TOBN(0x0762921c, 0xfc0eebd1), + TOBN(0xd34d0823, 0xd85010c0), TOBN(0xd73aaacb, 0x0044cf1f), + TOBN(0xfb4159bb, 0xa3b5e78a), TOBN(0x2287c7f7, 0xe5826f3f), + TOBN(0x4aeaf742, 0x580b1a01), TOBN(0xf080415d, 0x60423b79), + TOBN(0xe12622cd, 0xa7dea144), TOBN(0x49ea4996, 0x59d62472), + TOBN(0xb42991ef, 0x571f3913), TOBN(0x0610f214, 0xf5b25a8a), + TOBN(0x47adc585, 0x30b79e8f), TOBN(0xf90e3df6, 0x07a065a2), + TOBN(0x5d0a5deb, 0x43e2e034), TOBN(0x53fb5a34, 0x444024aa), + TOBN(0xa8628c68, 0x6b0c9f7f), TOBN(0x9c69c29c, 0xac563656), + TOBN(0x5a231feb, 0xbace47b6), TOBN(0xbdce0289, 0x9ea5a2ec), + TOBN(0x05da1fac, 0x9463853e), TOBN(0x96812c52, 0x509e78aa), + TOBN(0xd3fb5771, 0x57151692), TOBN(0xeb2721f8, 0xd98e1c44), + TOBN(0xc0506087, 0x32399be1), TOBN(0xda5a5511, 0xd979d8b8), + TOBN(0x737ed55d, 0xc6f56780), TOBN(0xe20d3004, 0x0dc7a7f4), + TOBN(0x02ce7301, 0xf5941a03), TOBN(0x91ef5215, 0xed30f83a), + TOBN(0x28727fc1, 0x4092d85f), TOBN(0x72d223c6, 0x5c49e41a), + TOBN(0xa7cf30a2, 0xba6a4d81), TOBN(0x7c086209, 0xb030d87d), + TOBN(0x04844c7d, 0xfc588b09), TOBN(0x728cd499, 0x5874bbb0), + TOBN(0xcc1281ee, 0xe84c0495), TOBN(0x0769b5ba, 0xec31958f), + TOBN(0x665c228b, 0xf99c2471), TOBN(0xf2d8a11b, 0x191eb110), + TOBN(0x4594f494, 0xd36d7024), TOBN(0x482ded8b, 0xcdcb25a1), + TOBN(0xc958a9d8, 0xdadd4885), TOBN(0x7004477e, 0xf1d2b547), + TOBN(0x0a45f6ef, 0x2a0af550), TOBN(0x4fc739d6, 0x2f8d6351), + TOBN(0x75cdaf27, 0x786f08a9), TOBN(0x8700bb26, 0x42c2737f), + TOBN(0x855a7141, 0x1c4e2670), TOBN(0x810188c1, 0x15076fef), + TOBN(0xc251d0c9, 0xabcd3297), TOBN(0xae4c8967, 0xf48108eb), + TOBN(0xbd146de7, 0x18ceed30), TOBN(0xf9d4f07a, 0xc986bced), + TOBN(0x5ad98ed5, 0x83fa1e08), TOBN(0x7780d33e, 0xbeabd1fb), + TOBN(0xe330513c, 0x903b1196), TOBN(0xba11de9e, 0xa47bc8c4), + TOBN(0x684334da, 0x02c2d064), TOBN(0x7ecf360d, 0xa48de23b), + TOBN(0x57a1b474, 0x0a9089d8), TOBN(0xf28fa439, 0xff36734c), + TOBN(0xf2a482cb, 0xea4570b3), TOBN(0xee65d68b, 0xa5ebcee9), + TOBN(0x988d0036, 0xb9694cd5), TOBN(0x53edd0e9, 0x37885d32), + TOBN(0xe37e3307, 0xbeb9bc6d), TOBN(0xe9abb907, 0x9f5c6768), + TOBN(0x4396ccd5, 0x51f2160f), TOBN(0x2500888c, 0x47336da6), + TOBN(0x383f9ed9, 0x926fce43), TOBN(0x809dd1c7, 0x04da2930), + TOBN(0x30f6f596, 0x8a4cb227), TOBN(0x0d700c7f, 0x73a56b38), + TOBN(0x1825ea33, 0xab64a065), TOBN(0xaab9b735, 0x1338df80), + TOBN(0x1516100d, 0x9b63f57f), TOBN(0x2574395a, 0x27a6a634), + TOBN(0xb5560fb6, 0x700a1acd), TOBN(0xe823fd73, 0xfd999681), + TOBN(0xda915d1f, 0x6cb4e1ba), TOBN(0x0d030118, 0x6ebe00a3), + TOBN(0x744fb0c9, 0x89fca8cd), TOBN(0x970d01db, 0xf9da0e0b), + TOBN(0x0ad8c564, 0x7931d76f), TOBN(0xb15737bf, 0xf659b96a), + TOBN(0xdc9933e8, 0xa8b484e7), TOBN(0xb2fdbdf9, 0x7a26dec7), + TOBN(0x2349e9a4, 0x9f1f0136), TOBN(0x7860368e, 0x70fddddb), + TOBN(0xd93d2c1c, 0xf9ad3e18), TOBN(0x6d6c5f17, 0x689f4e79), + TOBN(0x7a544d91, 0xb24ff1b6), TOBN(0x3e12a5eb, 0xfe16cd8c), + TOBN(0x543574e9, 0xa56b872f), TOBN(0xa1ad550c, 0xfcf68ea2), + TOBN(0x689e37d2, 0x3f560ef7), TOBN(0x8c54b9ca, 0xc9d47a8b), + TOBN(0x46d40a4a, 0x088ac342), TOBN(0xec450c7c, 0x1576c6d0), + TOBN(0xb589e31c, 0x1f9689e9), TOBN(0xdacf2602, 0xb8781718), + TOBN(0xa89237c6, 0xc8cb6b42), TOBN(0x1326fc93, 0xb96ef381), + TOBN(0x55d56c6d, 0xb5f07825), TOBN(0xacba2eea, 0x7449e22d), + TOBN(0x74e0887a, 0x633c3000), TOBN(0xcb6cd172, 0xd7cbcf71), + TOBN(0x309e81de, 0xc36cf1be), TOBN(0x07a18a6d, 0x60ae399b), + TOBN(0xb36c2679, 0x9edce57e), TOBN(0x52b892f4, 0xdf001d41), + TOBN(0xd884ae5d, 0x16a1f2c6), TOBN(0x9b329424, 0xefcc370a), + TOBN(0x3120daf2, 0xbd2e21df), TOBN(0x55298d2d, 0x02470a99), + TOBN(0x0b78af6c, 0xa05db32e), TOBN(0x5c76a331, 0x601f5636), + TOBN(0xaae861ff, 0xf8a4f29c), TOBN(0x70dc9240, 0xd68f8d49), + TOBN(0x960e649f, 0x81b1321c), TOBN(0x3d2c801b, 0x8792e4ce), + TOBN(0xf479f772, 0x42521876), TOBN(0x0bed93bc, 0x416c79b1), + TOBN(0xa67fbc05, 0x263e5bc9), TOBN(0x01e8e630, 0x521db049), + TOBN(0x76f26738, 0xc6f3431e), TOBN(0xe609cb02, 0xe3267541), + TOBN(0xb10cff2d, 0x818c877c), TOBN(0x1f0e75ce, 0x786a13cb), + TOBN(0xf4fdca64, 0x1158544d), TOBN(0x5d777e89, 0x6cb71ed0), + TOBN(0x3c233737, 0xa9aa4755), TOBN(0x7b453192, 0xe527ab40), + TOBN(0xdb59f688, 0x39f05ffe), TOBN(0x8f4f4be0, 0x6d82574e), + TOBN(0xcce3450c, 0xee292d1b), TOBN(0xaa448a12, 0x61ccd086), + TOBN(0xabce91b3, 0xf7914967), TOBN(0x4537f09b, 0x1908a5ed), + TOBN(0xa812421e, 0xf51042e7), TOBN(0xfaf5cebc, 0xec0b3a34), + TOBN(0x730ffd87, 0x4ca6b39a), TOBN(0x70fb72ed, 0x02efd342), + TOBN(0xeb4735f9, 0xd75c8edb), TOBN(0xc11f2157, 0xc278aa51), + TOBN(0xc459f635, 0xbf3bfebf), TOBN(0x3a1ff0b4, 0x6bd9601f), + TOBN(0xc9d12823, 0xc420cb73), TOBN(0x3e9af3e2, 0x3c2915a3), + TOBN(0xe0c82c72, 0xb41c3440), TOBN(0x175239e5, 0xe3039a5f), + TOBN(0xe1084b8a, 0x558795a3), TOBN(0x328d0a1d, 0xd01e5c60), + TOBN(0x0a495f2e, 0xd3788a04), TOBN(0x25d8ff16, 0x66c11a9f), + TOBN(0xf5155f05, 0x9ed692d6), TOBN(0x954fa107, 0x4f425fe4), + TOBN(0xd16aabf2, 0xe98aaa99), TOBN(0x90cd8ba0, 0x96b0f88a), + TOBN(0x957f4782, 0xc154026a), TOBN(0x54ee0734, 0x52af56d2), + TOBN(0xbcf89e54, 0x45b4147a), TOBN(0x3d102f21, 0x9a52816c), + TOBN(0x6808517e, 0x39b62e77), TOBN(0x92e25421, 0x69169ad8), + TOBN(0xd721d871, 0xbb608558), TOBN(0x60e4ebae, 0xf6d4ff9b), + TOBN(0x0ba10819, 0x41f2763e), TOBN(0xca2e45be, 0x51ee3247), + TOBN(0x66d172ec, 0x2bfd7a5f), TOBN(0x528a8f2f, 0x74d0b12d), + TOBN(0xe17f1e38, 0xdabe70dc), TOBN(0x1d5d7316, 0x9f93983c), + TOBN(0x51b2184a, 0xdf423e31), TOBN(0xcb417291, 0xaedb1a10), + TOBN(0x2054ca93, 0x625bcab9), TOBN(0x54396860, 0xa98998f0), + TOBN(0x4e53f6c4, 0xa54ae57e), TOBN(0x0ffeb590, 0xee648e9d), + TOBN(0xfbbdaadc, 0x6afaf6bc), TOBN(0xf88ae796, 0xaa3bfb8a), + TOBN(0x209f1d44, 0xd2359ed9), TOBN(0xac68dd03, 0xf3544ce2), + TOBN(0xf378da47, 0xfd51e569), TOBN(0xe1abd860, 0x2cc80097), + TOBN(0x23ca18d9, 0x343b6e3a), TOBN(0x480797e8, 0xb40a1bae), + TOBN(0xd1f0c717, 0x533f3e67), TOBN(0x44896970, 0x06e6cdfc), + TOBN(0x8ca21055, 0x52a82e8d), TOBN(0xb2caf785, 0x78460cdc), + TOBN(0x4c1b7b62, 0xe9037178), TOBN(0xefc09d2c, 0xdb514b58), + TOBN(0x5f2df9ee, 0x9113be5c), TOBN(0x2fbda78f, 0xb3f9271c), + TOBN(0xe09a81af, 0x8f83fc54), TOBN(0x06b13866, 0x8afb5141), + TOBN(0x38f6480f, 0x43e3865d), TOBN(0x72dd77a8, 0x1ddf47d9), + TOBN(0xf2a8e971, 0x4c205ff7), TOBN(0x46d449d8, 0x9d088ad8), + TOBN(0x926619ea, 0x185d706f), TOBN(0xe47e02eb, 0xc7dd7f62), + TOBN(0xe7f120a7, 0x8cbc2031), TOBN(0xc18bef00, 0x998d4ac9), + TOBN(0x18f37a9c, 0x6bdf22da), TOBN(0xefbc432f, 0x90dc82df), + TOBN(0xc52cef8e, 0x5d703651), TOBN(0x82887ba0, 0xd99881a5), + TOBN(0x7cec9dda, 0xb920ec1d), TOBN(0xd0d7e8c3, 0xec3e8d3b), + TOBN(0x445bc395, 0x4ca88747), TOBN(0xedeaa2e0, 0x9fd53535), + TOBN(0x461b1d93, 0x6cc87475), TOBN(0xd92a52e2, 0x6d2383bd), + TOBN(0xfabccb59, 0xd7903546), TOBN(0x6111a761, 0x3d14b112), + TOBN(0x0ae584fe, 0xb3d5f612), TOBN(0x5ea69b8d, 0x60e828ec), + TOBN(0x6c078985, 0x54087030), TOBN(0x649cab04, 0xac4821fe), + TOBN(0x25ecedcf, 0x8bdce214), TOBN(0xb5622f72, 0x86af7361), + TOBN(0x0e1227aa, 0x7038b9e2), TOBN(0xd0efb273, 0xac20fa77), + TOBN(0x817ff88b, 0x79df975b), TOBN(0x856bf286, 0x1999503e), + TOBN(0xb4d5351f, 0x5038ec46), TOBN(0x740a52c5, 0xfc42af6e), + TOBN(0x2e38bb15, 0x2cbb1a3f), TOBN(0xc3eb99fe, 0x17a83429), + TOBN(0xca4fcbf1, 0xdd66bb74), TOBN(0x880784d6, 0xcde5e8fc), + TOBN(0xddc84c1c, 0xb4e7a0be), TOBN(0x8780510d, 0xbd15a72f), + TOBN(0x44bcf1af, 0x81ec30e1), TOBN(0x141e50a8, 0x0a61073e), + TOBN(0x0d955718, 0x47be87ae), TOBN(0x68a61417, 0xf76a4372), + TOBN(0xf57e7e87, 0xc607c3d3), TOBN(0x043afaf8, 0x5252f332), + TOBN(0xcc14e121, 0x1552a4d2), TOBN(0xb6dee692, 0xbb4d4ab4), + TOBN(0xb6ab74c8, 0xa03816a4), TOBN(0x84001ae4, 0x6f394a29), + TOBN(0x5bed8344, 0xd795fb45), TOBN(0x57326e7d, 0xb79f55a5), + TOBN(0xc9533ce0, 0x4accdffc), TOBN(0x53473caf, 0x3993fa04), + TOBN(0x7906eb93, 0xa13df4c8), TOBN(0xa73e51f6, 0x97cbe46f), + TOBN(0xd1ab3ae1, 0x0ae4ccf8), TOBN(0x25614508, 0x8a5b3dbc), + TOBN(0x61eff962, 0x11a71b27), TOBN(0xdf71412b, 0x6bb7fa39), + TOBN(0xb31ba6b8, 0x2bd7f3ef), TOBN(0xb0b9c415, 0x69180d29), + TOBN(0xeec14552, 0x014cdde5), TOBN(0x702c624b, 0x227b4bbb), + TOBN(0x2b15e8c2, 0xd3e988f3), TOBN(0xee3bcc6d, 0xa4f7fd04), + TOBN(0x9d00822a, 0x42ac6c85), TOBN(0x2db0cea6, 0x1df9f2b7), + TOBN(0xd7cad2ab, 0x42de1e58), TOBN(0x346ed526, 0x2d6fbb61), + TOBN(0xb3962995, 0x1a2faf09), TOBN(0x2fa8a580, 0x7c25612e), + TOBN(0x30ae04da, 0x7cf56490), TOBN(0x75662908, 0x0eea3961), + TOBN(0x3609f5c5, 0x3d080847), TOBN(0xcb081d39, 0x5241d4f6), + TOBN(0xb4fb3810, 0x77961a63), TOBN(0xc20c5984, 0x2abb66fc), + TOBN(0x3d40aa7c, 0xf902f245), TOBN(0x9cb12736, 0x4e536b1e), + TOBN(0x5eda24da, 0x99b3134f), TOBN(0xafbd9c69, 0x5cd011af), + TOBN(0x9a16e30a, 0xc7088c7d), TOBN(0x5ab65710, 0x3207389f), + TOBN(0x1b09547f, 0xe7407a53), TOBN(0x2322f9d7, 0x4fdc6eab), + TOBN(0xc0f2f22d, 0x7430de4d), TOBN(0x19382696, 0xe68ca9a9), + TOBN(0x17f1eff1, 0x918e5868), TOBN(0xe3b5b635, 0x586f4204), + TOBN(0x146ef980, 0x3fbc4341), TOBN(0x359f2c80, 0x5b5eed4e), + TOBN(0x9f35744e, 0x7482e41d), TOBN(0x9a9ac3ec, 0xf3b224c2), + TOBN(0x9161a6fe, 0x91fc50ae), TOBN(0x89ccc66b, 0xc613fa7c), + TOBN(0x89268b14, 0xc732f15a), TOBN(0x7cd6f4e2, 0xb467ed03), + TOBN(0xfbf79869, 0xce56b40e), TOBN(0xf93e094c, 0xc02dde98), + TOBN(0xefe0c3a8, 0xedee2cd7), TOBN(0x90f3ffc0, 0xb268fd42), + TOBN(0x81a7fd56, 0x08241aed), TOBN(0x95ab7ad8, 0x00b1afe8), + TOBN(0x40127056, 0x3e310d52), TOBN(0xd3ffdeb1, 0x09d9fc43), + TOBN(0xc8f85c91, 0xd11a8594), TOBN(0x2e74d258, 0x31cf6db8), + TOBN(0x829c7ca3, 0x02b5dfd0), TOBN(0xe389cfbe, 0x69143c86), + TOBN(0xd01b6405, 0x941768d8), TOBN(0x45103995, 0x03bf825d), + TOBN(0xcc4ee166, 0x56cd17e2), TOBN(0xbea3c283, 0xba037e79), + TOBN(0x4e1ac06e, 0xd9a47520), TOBN(0xfbfe18aa, 0xaf852404), + TOBN(0x5615f8e2, 0x8087648a), TOBN(0x7301e47e, 0xb9d150d9), + TOBN(0x79f9f9dd, 0xb299b977), TOBN(0x76697a7b, 0xa5b78314), + TOBN(0x10d67468, 0x7d7c90e7), TOBN(0x7afffe03, 0x937210b5), + TOBN(0x5aef3e4b, 0x28c22cee), TOBN(0xefb0ecd8, 0x09fd55ae), + TOBN(0x4cea7132, 0x0d2a5d6a), TOBN(0x9cfb5fa1, 0x01db6357), + TOBN(0x395e0b57, 0xf36e1ac5), TOBN(0x008fa9ad, 0x36cafb7d), + TOBN(0x8f6cdf70, 0x5308c4db), TOBN(0x51527a37, 0x95ed2477), + TOBN(0xba0dee30, 0x5bd21311), TOBN(0x6ed41b22, 0x909c90d7), + TOBN(0xc5f6b758, 0x7c8696d3), TOBN(0x0db8eaa8, 0x3ce83a80), + TOBN(0xd297fe37, 0xb24b4b6f), TOBN(0xfe58afe8, 0x522d1f0d), + TOBN(0x97358736, 0x8c98dbd9), TOBN(0x6bc226ca, 0x9454a527), + TOBN(0xa12b384e, 0xce53c2d0), TOBN(0x779d897d, 0x5e4606da), + TOBN(0xa53e47b0, 0x73ec12b0), TOBN(0x462dbbba, 0x5756f1ad), + TOBN(0x69fe09f2, 0xcafe37b6), TOBN(0x273d1ebf, 0xecce2e17), + TOBN(0x8ac1d538, 0x3cf607fd), TOBN(0x8035f7ff, 0x12e10c25),} + , + {TOBN(0x854d34c7, 0x7e6c5520), TOBN(0xc27df9ef, 0xdcb9ea58), + TOBN(0x405f2369, 0xd686666d), TOBN(0x29d1febf, 0x0417aa85), + TOBN(0x9846819e, 0x93470afe), TOBN(0x3e6a9669, 0xe2a27f9e), + TOBN(0x24d008a2, 0xe31e6504), TOBN(0xdba7cecf, 0x9cb7680a), + TOBN(0xecaff541, 0x338d6e43), TOBN(0x56f7dd73, 0x4541d5cc), + TOBN(0xb5d426de, 0x96bc88ca), TOBN(0x48d94f6b, 0x9ed3a2c3), + TOBN(0x6354a3bb, 0x2ef8279c), TOBN(0xd575465b, 0x0b1867f2), + TOBN(0xef99b0ff, 0x95225151), TOBN(0xf3e19d88, 0xf94500d8), + TOBN(0x92a83268, 0xe32dd620), TOBN(0x913ec99f, 0x627849a2), + TOBN(0xedd8fdfa, 0x2c378882), TOBN(0xaf96f33e, 0xee6f8cfe), + TOBN(0xc06737e5, 0xdc3fa8a5), TOBN(0x236bb531, 0xb0b03a1d), + TOBN(0x33e59f29, 0x89f037b0), TOBN(0x13f9b5a7, 0xd9a12a53), + TOBN(0x0d0df6ce, 0x51efb310), TOBN(0xcb5b2eb4, 0x958df5be), + TOBN(0xd6459e29, 0x36158e59), TOBN(0x82aae2b9, 0x1466e336), + TOBN(0xfb658a39, 0x411aa636), TOBN(0x7152ecc5, 0xd4c0a933), + TOBN(0xf10c758a, 0x49f026b7), TOBN(0xf4837f97, 0xcb09311f), + TOBN(0xddfb02c4, 0xc753c45f), TOBN(0x18ca81b6, 0xf9c840fe), + TOBN(0x846fd09a, 0xb0f8a3e6), TOBN(0xb1162add, 0xe7733dbc), + TOBN(0x7070ad20, 0x236e3ab6), TOBN(0xf88cdaf5, 0xb2a56326), + TOBN(0x05fc8719, 0x997cbc7a), TOBN(0x442cd452, 0x4b665272), + TOBN(0x7807f364, 0xb71698f5), TOBN(0x6ba418d2, 0x9f7b605e), + TOBN(0xfd20b00f, 0xa03b2cbb), TOBN(0x883eca37, 0xda54386f), + TOBN(0xff0be43f, 0xf3437f24), TOBN(0xe910b432, 0xa48bb33c), + TOBN(0x4963a128, 0x329df765), TOBN(0xac1dd556, 0xbe2fe6f7), + TOBN(0x557610f9, 0x24a0a3fc), TOBN(0x38e17bf4, 0xe881c3f9), + TOBN(0x6ba84faf, 0xed0dac99), TOBN(0xd4a222c3, 0x59eeb918), + TOBN(0xc79c1dbe, 0x13f542b6), TOBN(0x1fc65e0d, 0xe425d457), + TOBN(0xeffb754f, 0x1debb779), TOBN(0x638d8fd0, 0x9e08af60), + TOBN(0x994f523a, 0x626332d5), TOBN(0x7bc38833, 0x5561bb44), + TOBN(0x005ed4b0, 0x3d845ea2), TOBN(0xd39d3ee1, 0xc2a1f08a), + TOBN(0x6561fdd3, 0xe7676b0d), TOBN(0x620e35ff, 0xfb706017), + TOBN(0x36ce424f, 0xf264f9a8), TOBN(0xc4c3419f, 0xda2681f7), + TOBN(0xfb6afd2f, 0x69beb6e8), TOBN(0x3a50b993, 0x6d700d03), + TOBN(0xc840b2ad, 0x0c83a14f), TOBN(0x573207be, 0x54085bef), + TOBN(0x5af882e3, 0x09fe7e5b), TOBN(0x957678a4, 0x3b40a7e1), + TOBN(0x172d4bdd, 0x543056e2), TOBN(0x9c1b26b4, 0x0df13c0a), + TOBN(0x1c30861c, 0xf405ff06), TOBN(0xebac86bd, 0x486e828b), + TOBN(0xe791a971, 0x636933fc), TOBN(0x50e7c2be, 0x7aeee947), + TOBN(0xc3d4a095, 0xfa90d767), TOBN(0xae60eb7b, 0xe670ab7b), + TOBN(0x17633a64, 0x397b056d), TOBN(0x93a21f33, 0x105012aa), + TOBN(0x663c370b, 0xabb88643), TOBN(0x91df36d7, 0x22e21599), + TOBN(0x183ba835, 0x8b761671), TOBN(0x381eea1d, 0x728f3bf1), + TOBN(0xb9b2f1ba, 0x39966e6c), TOBN(0x7c464a28, 0xe7295492), + TOBN(0x0fd5f70a, 0x09b26b7f), TOBN(0xa9aba1f9, 0xfbe009df), + TOBN(0x857c1f22, 0x369b87ad), TOBN(0x3c00e5d9, 0x32fca556), + TOBN(0x1ad74cab, 0x90b06466), TOBN(0xa7112386, 0x550faaf2), + TOBN(0x7435e198, 0x6d9bd5f5), TOBN(0x2dcc7e38, 0x59c3463f), + TOBN(0xdc7df748, 0xca7bd4b2), TOBN(0x13cd4c08, 0x9dec2f31), + TOBN(0x0d3b5df8, 0xe3237710), TOBN(0x0dadb26e, 0xcbd2f7b0), + TOBN(0x9f5966ab, 0xe4aa082b), TOBN(0x666ec8de, 0x350e966e), + TOBN(0x1bfd1ed5, 0xee524216), TOBN(0xcd93c59b, 0x41dab0b6), + TOBN(0x658a8435, 0xd186d6ba), TOBN(0x1b7d34d2, 0x159d1195), + TOBN(0x5936e460, 0x22caf46b), TOBN(0x6a45dd8f, 0x9a96fe4f), + TOBN(0xf7925434, 0xb98f474e), TOBN(0x41410412, 0x0053ef15), + TOBN(0x71cf8d12, 0x41de97bf), TOBN(0xb8547b61, 0xbd80bef4), + TOBN(0xb47d3970, 0xc4db0037), TOBN(0xf1bcd328, 0xfef20dff), + TOBN(0x31a92e09, 0x10caad67), TOBN(0x1f591960, 0x5531a1e1), + TOBN(0x3bb852e0, 0x5f4fc840), TOBN(0x63e297ca, 0x93a72c6c), + TOBN(0x3c2b0b2e, 0x49abad67), TOBN(0x6ec405fc, 0xed3db0d9), + TOBN(0xdc14a530, 0x7fef1d40), TOBN(0xccd19846, 0x280896fc), + TOBN(0x00f83176, 0x9bb81648), TOBN(0xd69eb485, 0x653120d0), + TOBN(0xd17d75f4, 0x4ccabc62), TOBN(0x34a07f82, 0xb749fcb1), + TOBN(0x2c3af787, 0xbbfb5554), TOBN(0xb06ed4d0, 0x62e283f8), + TOBN(0x5722889f, 0xa19213a0), TOBN(0x162b085e, 0xdcf3c7b4), + TOBN(0xbcaecb31, 0xe0dd3eca), TOBN(0xc6237fbc, 0xe52f13a5), + TOBN(0xcc2b6b03, 0x27bac297), TOBN(0x2ae1cac5, 0xb917f54a), + TOBN(0x474807d4, 0x7845ae4f), TOBN(0xfec7dd92, 0xce5972e0), + TOBN(0xc3bd2541, 0x1d7915bb), TOBN(0x66f85dc4, 0xd94907ca), + TOBN(0xd981b888, 0xbdbcf0ca), TOBN(0xd75f5da6, 0xdf279e9f), + TOBN(0x128bbf24, 0x7054e934), TOBN(0x3c6ff6e5, 0x81db134b), + TOBN(0x795b7cf4, 0x047d26e4), TOBN(0xf370f7b8, 0x5049ec37), + TOBN(0xc6712d4d, 0xced945af), TOBN(0xdf30b5ec, 0x095642bc), + TOBN(0x9b034c62, 0x4896246e), TOBN(0x5652c016, 0xee90bbd1), + TOBN(0xeb38636f, 0x87fedb73), TOBN(0x5e32f847, 0x0135a613), + TOBN(0x0703b312, 0xcf933c83), TOBN(0xd05bb76e, 0x1a7f47e6), + TOBN(0x825e4f0c, 0x949c2415), TOBN(0x569e5622, 0x7250d6f8), + TOBN(0xbbe9eb3a, 0x6568013e), TOBN(0x8dbd203f, 0x22f243fc), + TOBN(0x9dbd7694, 0xb342734a), TOBN(0x8f6d12f8, 0x46afa984), + TOBN(0xb98610a2, 0xc9eade29), TOBN(0xbab4f323, 0x47dd0f18), + TOBN(0x5779737b, 0x671c0d46), TOBN(0x10b6a7c6, 0xd3e0a42a), + TOBN(0xfb19ddf3, 0x3035b41c), TOBN(0xd336343f, 0x99c45895), + TOBN(0x61fe4938, 0x54c857e5), TOBN(0xc4d506be, 0xae4e57d5), + TOBN(0x3cd8c8cb, 0xbbc33f75), TOBN(0x7281f08a, 0x9262c77d), + TOBN(0x083f4ea6, 0xf11a2823), TOBN(0x8895041e, 0x9fba2e33), + TOBN(0xfcdfea49, 0x9c438edf), TOBN(0x7678dcc3, 0x91edba44), + TOBN(0xf07b3b87, 0xe2ba50f0), TOBN(0xc13888ef, 0x43948c1b), + TOBN(0xc2135ad4, 0x1140af42), TOBN(0x8e5104f3, 0x926ed1a7), + TOBN(0xf24430cb, 0x88f6695f), TOBN(0x0ce0637b, 0x6d73c120), + TOBN(0xb2db01e6, 0xfe631e8f), TOBN(0x1c5563d7, 0xd7bdd24b), + TOBN(0x8daea3ba, 0x369ad44f), TOBN(0x000c81b6, 0x8187a9f9), + TOBN(0x5f48a951, 0xaae1fd9a), TOBN(0xe35626c7, 0x8d5aed8a), + TOBN(0x20952763, 0x0498c622), TOBN(0x76d17634, 0x773aa504), + TOBN(0x36d90dda, 0xeb300f7a), TOBN(0x9dcf7dfc, 0xedb5e801), + TOBN(0x645cb268, 0x74d5244c), TOBN(0xa127ee79, 0x348e3aa2), + TOBN(0x488acc53, 0x575f1dbb), TOBN(0x95037e85, 0x80e6161e), + TOBN(0x57e59283, 0x292650d0), TOBN(0xabe67d99, 0x14938216), + TOBN(0x3c7f944b, 0x3f8e1065), TOBN(0xed908cb6, 0x330e8924), + TOBN(0x08ee8fd5, 0x6f530136), TOBN(0x2227b7d5, 0xd7ffc169), + TOBN(0x4f55c893, 0xb5cd6dd5), TOBN(0x82225e11, 0xa62796e8), + TOBN(0x5c6cead1, 0xcb18e12c), TOBN(0x4381ae0c, 0x84f5a51a), + TOBN(0x345913d3, 0x7fafa4c8), TOBN(0x3d918082, 0x0491aac0), + TOBN(0x9347871f, 0x3e69264c), TOBN(0xbea9dd3c, 0xb4f4f0cd), + TOBN(0xbda5d067, 0x3eadd3e7), TOBN(0x0033c1b8, 0x0573bcd8), + TOBN(0x25589379, 0x5da2486c), TOBN(0xcb89ee5b, 0x86abbee7), + TOBN(0x8fe0a8f3, 0x22532e5d), TOBN(0xb6410ff0, 0x727dfc4c), + TOBN(0x619b9d58, 0x226726db), TOBN(0x5ec25669, 0x7a2b2dc7), + TOBN(0xaf4d2e06, 0x4c3beb01), TOBN(0x852123d0, 0x7acea556), + TOBN(0x0e9470fa, 0xf783487a), TOBN(0x75a7ea04, 0x5664b3eb), + TOBN(0x4ad78f35, 0x6798e4ba), TOBN(0x9214e6e5, 0xc7d0e091), + TOBN(0xc420b488, 0xb1290403), TOBN(0x64049e0a, 0xfc295749), + TOBN(0x03ef5af1, 0x3ae9841f), TOBN(0xdbe4ca19, 0xb0b662a6), + TOBN(0x46845c5f, 0xfa453458), TOBN(0xf8dabf19, 0x10b66722), + TOBN(0xb650f0aa, 0xcce2793b), TOBN(0x71db851e, 0xc5ec47c1), + TOBN(0x3eb78f3e, 0x3b234fa9), TOBN(0xb0c60f35, 0xfc0106ce), + TOBN(0x05427121, 0x774eadbd), TOBN(0x25367faf, 0xce323863), + TOBN(0x7541b5c9, 0xcd086976), TOBN(0x4ff069e2, 0xdc507ad1), + TOBN(0x74145256, 0x8776e667), TOBN(0x6e76142c, 0xb23c6bb5), + TOBN(0xdbf30712, 0x1b3a8a87), TOBN(0x60e7363e, 0x98450836), + TOBN(0x5741450e, 0xb7366d80), TOBN(0xe4ee14ca, 0x4837dbdf), + TOBN(0xa765eb9b, 0x69d4316f), TOBN(0x04548dca, 0x8ef43825), + TOBN(0x9c9f4e4c, 0x5ae888eb), TOBN(0x733abb51, 0x56e9ac99), + TOBN(0xdaad3c20, 0xba6ac029), TOBN(0x9b8dd3d3, 0x2ba3e38e), + TOBN(0xa9bb4c92, 0x0bc5d11a), TOBN(0xf20127a7, 0x9c5f88a3), + TOBN(0x4f52b06e, 0x161d3cb8), TOBN(0x26c1ff09, 0x6afaf0a6), + TOBN(0x32670d2f, 0x7189e71f), TOBN(0xc6438748, 0x5ecf91e7), + TOBN(0x15758e57, 0xdb757a21), TOBN(0x427d09f8, 0x290a9ce5), + TOBN(0x846a308f, 0x38384a7a), TOBN(0xaac3acb4, 0xb0732b99), + TOBN(0x9e941009, 0x17845819), TOBN(0x95cba111, 0xa7ce5e03), + TOBN(0x6f3d4f7f, 0xb00009c4), TOBN(0xb8396c27, 0x8ff28b5f), + TOBN(0xb1a9ae43, 0x1c97975d), TOBN(0x9d7ba8af, 0xe5d9fed5), + TOBN(0x338cf09f, 0x34f485b6), TOBN(0xbc0ddacc, 0x64122516), + TOBN(0xa450da12, 0x05d471fe), TOBN(0x4c3a6250, 0x628dd8c9), + TOBN(0x69c7d103, 0xd1295837), TOBN(0xa2893e50, 0x3807eb2f), + TOBN(0xd6e1e1de, 0xbdb41491), TOBN(0xc630745b, 0x5e138235), + TOBN(0xc892109e, 0x48661ae1), TOBN(0x8d17e7eb, 0xea2b2674), + TOBN(0x00ec0f87, 0xc328d6b5), TOBN(0x6d858645, 0xf079ff9e), + TOBN(0x6cdf243e, 0x19115ead), TOBN(0x1ce1393e, 0x4bac4fcf), + TOBN(0x2c960ed0, 0x9c29f25b), TOBN(0x59be4d8e, 0x9d388a05), + TOBN(0x0d46e06c, 0xd0def72b), TOBN(0xb923db5d, 0xe0342748), + TOBN(0xf7d3aacd, 0x936d4a3d), TOBN(0x558519cc, 0x0b0b099e), + TOBN(0x3ea8ebf8, 0x827097ef), TOBN(0x259353db, 0xd054f55d), + TOBN(0x84c89abc, 0x6d2ed089), TOBN(0x5c548b69, 0x8e096a7c), + TOBN(0xd587f616, 0x994b995d), TOBN(0x4d1531f6, 0xa5845601), + TOBN(0x792ab31e, 0x451fd9f0), TOBN(0xc8b57bb2, 0x65adf6ca), + TOBN(0x68440fcb, 0x1cd5ad73), TOBN(0xb9c860e6, 0x6144da4f), + TOBN(0x2ab286aa, 0x8462beb8), TOBN(0xcc6b8fff, 0xef46797f), + TOBN(0xac820da4, 0x20c8a471), TOBN(0x69ae05a1, 0x77ff7faf), + TOBN(0xb9163f39, 0xbfb5da77), TOBN(0xbd03e590, 0x2c73ab7a), + TOBN(0x7e862b5e, 0xb2940d9e), TOBN(0x3c663d86, 0x4b9af564), + TOBN(0xd8309031, 0xbde3033d), TOBN(0x298231b2, 0xd42c5bc6), + TOBN(0x42090d2c, 0x552ad093), TOBN(0xa4799d1c, 0xff854695), + TOBN(0x0a88b5d6, 0xd31f0d00), TOBN(0xf8b40825, 0xa2f26b46), + TOBN(0xec29b1ed, 0xf1bd7218), TOBN(0xd491c53b, 0x4b24c86e), + TOBN(0xd2fe588f, 0x3395ea65), TOBN(0x6f3764f7, 0x4456ef15), + TOBN(0xdb43116d, 0xcdc34800), TOBN(0xcdbcd456, 0xc1e33955), + TOBN(0xefdb5540, 0x74ab286b), TOBN(0x948c7a51, 0xd18c5d7c), + TOBN(0xeb81aa37, 0x7378058e), TOBN(0x41c746a1, 0x04411154), + TOBN(0xa10c73bc, 0xfb828ac7), TOBN(0x6439be91, 0x9d972b29), + TOBN(0x4bf3b4b0, 0x43a2fbad), TOBN(0x39e6dadf, 0x82b5e840), + TOBN(0x4f716408, 0x6397bd4c), TOBN(0x0f7de568, 0x7f1eeccb), + TOBN(0x5865c5a1, 0xd2ffbfc1), TOBN(0xf74211fa, 0x4ccb6451), + TOBN(0x66368a88, 0xc0b32558), TOBN(0x5b539dc2, 0x9ad7812e), + TOBN(0x579483d0, 0x2f3af6f6), TOBN(0x52132078, 0x99934ece), + TOBN(0x50b9650f, 0xdcc9e983), TOBN(0xca989ec9, 0xaee42b8a), + TOBN(0x6a44c829, 0xd6f62f99), TOBN(0x8f06a309, 0x4c2a7c0c), + TOBN(0x4ea2b3a0, 0x98a0cb0a), TOBN(0x5c547b70, 0xbeee8364), + TOBN(0x461d40e1, 0x682afe11), TOBN(0x9e0fc77a, 0x7b41c0a8), + TOBN(0x79e4aefd, 0xe20d5d36), TOBN(0x2916e520, 0x32dd9f63), + TOBN(0xf59e52e8, 0x3f883faf), TOBN(0x396f9639, 0x2b868d35), + TOBN(0xc902a9df, 0x4ca19881), TOBN(0x0fc96822, 0xdb2401a6), + TOBN(0x41237587, 0x66f1c68d), TOBN(0x10fc6de3, 0xfb476c0d), + TOBN(0xf8b6b579, 0x841f5d90), TOBN(0x2ba8446c, 0xfa24f44a), + TOBN(0xa237b920, 0xef4a9975), TOBN(0x60bb6004, 0x2330435f), + TOBN(0xd6f4ab5a, 0xcfb7e7b5), TOBN(0xb2ac5097, 0x83435391), + TOBN(0xf036ee2f, 0xb0d1ea67), TOBN(0xae779a6a, 0x74c56230), + TOBN(0x59bff8c8, 0xab838ae6), TOBN(0xcd83ca99, 0x9b38e6f0), + TOBN(0xbb27bef5, 0xe33deed3), TOBN(0xe6356f6f, 0x001892a8), + TOBN(0xbf3be6cc, 0x7adfbd3e), TOBN(0xaecbc81c, 0x33d1ac9d), + TOBN(0xe4feb909, 0xe6e861dc), TOBN(0x90a247a4, 0x53f5f801), + TOBN(0x01c50acb, 0x27346e57), TOBN(0xce29242e, 0x461acc1b), + TOBN(0x04dd214a, 0x2f998a91), TOBN(0x271ee9b1, 0xd4baf27b), + TOBN(0x7e3027d1, 0xe8c26722), TOBN(0x21d1645c, 0x1820dce5), + TOBN(0x086f242c, 0x7501779c), TOBN(0xf0061407, 0xfa0e8009), + TOBN(0xf23ce477, 0x60187129), TOBN(0x05bbdedb, 0x0fde9bd0), + TOBN(0x682f4832, 0x25d98473), TOBN(0xf207fe85, 0x5c658427), + TOBN(0xb6fdd7ba, 0x4166ffa1), TOBN(0x0c314056, 0x9eed799d), + TOBN(0x0db8048f, 0x4107e28f), TOBN(0x74ed3871, 0x41216840), + TOBN(0x74489f8f, 0x56a3c06e), TOBN(0x1e1c005b, 0x12777134), + TOBN(0xdb332a73, 0xf37ec3c3), TOBN(0xc65259bd, 0xdd59eba0), + TOBN(0x2291709c, 0xdb4d3257), TOBN(0x9a793b25, 0xbd389390), + TOBN(0xf39fe34b, 0xe43756f0), TOBN(0x2f76bdce, 0x9afb56c9), + TOBN(0x9f37867a, 0x61208b27), TOBN(0xea1d4307, 0x089972c3), + TOBN(0x8c595330, 0x8bdf623a), TOBN(0x5f5accda, 0x8441fb7d), + TOBN(0xfafa9418, 0x32ddfd95), TOBN(0x6ad40c5a, 0x0fde9be7), + TOBN(0x43faba89, 0xaeca8709), TOBN(0xc64a7cf1, 0x2c248a9d), + TOBN(0x16620252, 0x72637a76), TOBN(0xaee1c791, 0x22b8d1bb), + TOBN(0xf0f798fd, 0x21a843b2), TOBN(0x56e4ed4d, 0x8d005cb1), + TOBN(0x355f7780, 0x1f0d8abe), TOBN(0x197b04cf, 0x34522326), + TOBN(0x41f9b31f, 0xfd42c13f), TOBN(0x5ef7feb2, 0xb40f933d), + TOBN(0x27326f42, 0x5d60bad4), TOBN(0x027ecdb2, 0x8c92cf89), + TOBN(0x04aae4d1, 0x4e3352fe), TOBN(0x08414d2f, 0x73591b90), + TOBN(0x5ed6124e, 0xb7da7d60), TOBN(0xb985b931, 0x4d13d4ec), + TOBN(0xa592d3ab, 0x96bf36f9), TOBN(0x012dbed5, 0xbbdf51df), + TOBN(0xa57963c0, 0xdf6c177d), TOBN(0x010ec869, 0x87ca29cf), + TOBN(0xba1700f6, 0xbf926dff), TOBN(0x7c9fdbd1, 0xf4bf6bc2), + TOBN(0xdc18dc8f, 0x64da11f5), TOBN(0xa6074b7a, 0xd938ae75), + TOBN(0x14270066, 0xe84f44a4), TOBN(0x99998d38, 0xd27b954e), + TOBN(0xc1be8ab2, 0xb4f38e9a), TOBN(0x8bb55bbf, 0x15c01016), + TOBN(0xf73472b4, 0x0ea2ab30), TOBN(0xd365a340, 0xf73d68dd), + TOBN(0xc01a7168, 0x19c2e1eb), TOBN(0x32f49e37, 0x34061719), + TOBN(0xb73c57f1, 0x01d8b4d6), TOBN(0x03c8423c, 0x26b47700), + TOBN(0x321d0bc8, 0xa4d8826a), TOBN(0x6004213c, 0x4bc0e638), + TOBN(0xf78c64a1, 0xc1c06681), TOBN(0x16e0a16f, 0xef018e50), + TOBN(0x31cbdf91, 0xdb42b2b3), TOBN(0xf8f4ffce, 0xe0d36f58), + TOBN(0xcdcc71cd, 0x4cc5e3e0), TOBN(0xd55c7cfa, 0xa129e3e0), + TOBN(0xccdb6ba0, 0x0fb2cbf1), TOBN(0x6aba0005, 0xc4bce3cb), + TOBN(0x501cdb30, 0xd232cfc4), TOBN(0x9ddcf12e, 0xd58a3cef), + TOBN(0x02d2cf9c, 0x87e09149), TOBN(0xdc5d7ec7, 0x2c976257), + TOBN(0x6447986e, 0x0b50d7dd), TOBN(0x88fdbaf7, 0x807f112a), + TOBN(0x58c9822a, 0xb00ae9f6), TOBN(0x6abfb950, 0x6d3d27e0), + TOBN(0xd0a74487, 0x8a429f4f), TOBN(0x0649712b, 0xdb516609), + TOBN(0xb826ba57, 0xe769b5df), TOBN(0x82335df2, 0x1fc7aaf2), + TOBN(0x2389f067, 0x5c93d995), TOBN(0x59ac367a, 0x68677be6), + TOBN(0xa77985ff, 0x21d9951b), TOBN(0x038956fb, 0x85011cce), + TOBN(0x608e48cb, 0xbb734e37), TOBN(0xc08c0bf2, 0x2be5b26f), + TOBN(0x17bbdd3b, 0xf9b1a0d9), TOBN(0xeac7d898, 0x10483319), + TOBN(0xc95c4baf, 0xbc1a6dea), TOBN(0xfdd0e2bf, 0x172aafdb), + TOBN(0x40373cbc, 0x8235c41a), TOBN(0x14303f21, 0xfb6f41d5), + TOBN(0xba063621, 0x0408f237), TOBN(0xcad3b09a, 0xecd2d1ed), + TOBN(0x4667855a, 0x52abb6a2), TOBN(0xba9157dc, 0xaa8b417b), + TOBN(0xfe7f3507, 0x4f013efb), TOBN(0x1b112c4b, 0xaa38c4a2), + TOBN(0xa1406a60, 0x9ba64345), TOBN(0xe53cba33, 0x6993c80b), + TOBN(0x45466063, 0xded40d23), TOBN(0x3d5f1f4d, 0x54908e25), + TOBN(0x9ebefe62, 0x403c3c31), TOBN(0x274ea0b5, 0x0672a624), + TOBN(0xff818d99, 0x451d1b71), TOBN(0x80e82643, 0x8f79cf79), + TOBN(0xa165df13, 0x73ce37f5), TOBN(0xa744ef4f, 0xfe3a21fd), + TOBN(0x73f1e7f5, 0xcf551396), TOBN(0xc616898e, 0x868c676b), + TOBN(0x671c28c7, 0x8c442c36), TOBN(0xcfe5e558, 0x5e0a317d), + TOBN(0x1242d818, 0x7051f476), TOBN(0x56fad2a6, 0x14f03442), + TOBN(0x262068bc, 0x0a44d0f6), TOBN(0xdfa2cd6e, 0xce6edf4e), + TOBN(0x0f43813a, 0xd15d1517), TOBN(0x61214cb2, 0x377d44f5), + TOBN(0xd399aa29, 0xc639b35f), TOBN(0x42136d71, 0x54c51c19), + TOBN(0x9774711b, 0x08417221), TOBN(0x0a5546b3, 0x52545a57), + TOBN(0x80624c41, 0x1150582d), TOBN(0x9ec5c418, 0xfbc555bc), + TOBN(0x2c87dcad, 0x771849f1), TOBN(0xb0c932c5, 0x01d7bf6f), + TOBN(0x6aa5cd3e, 0x89116eb2), TOBN(0xd378c25a, 0x51ca7bd3), + TOBN(0xc612a0da, 0x9e6e3e31), TOBN(0x0417a54d, 0xb68ad5d0), + TOBN(0x00451e4a, 0x22c6edb8), TOBN(0x9fbfe019, 0xb42827ce), + TOBN(0x2fa92505, 0xba9384a2), TOBN(0x21b8596e, 0x64ad69c1), + TOBN(0x8f4fcc49, 0x983b35a6), TOBN(0xde093760, 0x72754672), + TOBN(0x2f14ccc8, 0xf7bffe6d), TOBN(0x27566bff, 0x5d94263d), + TOBN(0xb5b4e9c6, 0x2df3ec30), TOBN(0x94f1d7d5, 0x3e6ea6ba), + TOBN(0x97b7851a, 0xaaca5e9b), TOBN(0x518aa521, 0x56713b97), + TOBN(0x3357e8c7, 0x150a61f6), TOBN(0x7842e7e2, 0xec2c2b69), + TOBN(0x8dffaf65, 0x6868a548), TOBN(0xd963bd82, 0xe068fc81), + TOBN(0x64da5c8b, 0x65917733), TOBN(0x927090ff, 0x7b247328),} + , + {TOBN(0x214bc9a7, 0xd298c241), TOBN(0xe3b697ba, 0x56807cfd), + TOBN(0xef1c7802, 0x4564eadb), TOBN(0xdde8cdcf, 0xb48149c5), + TOBN(0x946bf0a7, 0x5a4d2604), TOBN(0x27154d7f, 0x6c1538af), + TOBN(0x95cc9230, 0xde5b1fcc), TOBN(0xd88519e9, 0x66864f82), + TOBN(0xb828dd1a, 0x7cb1282c), TOBN(0xa08d7626, 0xbe46973a), + TOBN(0x6baf8d40, 0xe708d6b2), TOBN(0x72571fa1, 0x4daeb3f3), + TOBN(0x85b1732f, 0xf22dfd98), TOBN(0x87ab01a7, 0x0087108d), + TOBN(0xaaaafea8, 0x5988207a), TOBN(0xccc832f8, 0x69f00755), + TOBN(0x964d950e, 0x36ff3bf0), TOBN(0x8ad20f6f, 0xf0b34638), + TOBN(0x4d9177b3, 0xb5d7585f), TOBN(0xcf839760, 0xef3f019f), + TOBN(0x582fc5b3, 0x8288c545), TOBN(0x2f8e4e9b, 0x13116bd1), + TOBN(0xf91e1b2f, 0x332120ef), TOBN(0xcf568724, 0x2a17dd23), + TOBN(0x488f1185, 0xca8d9d1a), TOBN(0xadf2c77d, 0xd987ded2), + TOBN(0x5f3039f0, 0x60c46124), TOBN(0xe5d70b75, 0x71e095f4), + TOBN(0x82d58650, 0x6260e70f), TOBN(0x39d75ea7, 0xf750d105), + TOBN(0x8cf3d0b1, 0x75bac364), TOBN(0xf3a7564d, 0x21d01329), + TOBN(0x182f04cd, 0x2f52d2a7), TOBN(0x4fde149a, 0xe2df565a), + TOBN(0xb80c5eec, 0xa79fb2f7), TOBN(0xab491d7b, 0x22ddc897), + TOBN(0x99d76c18, 0xc6312c7f), TOBN(0xca0d5f3d, 0x6aa41a57), + TOBN(0x71207325, 0xd15363a0), TOBN(0xe82aa265, 0xbeb252c2), + TOBN(0x94ab4700, 0xec3128c2), TOBN(0x6c76d862, 0x8e383f49), + TOBN(0xdc36b150, 0xc03024eb), TOBN(0xfb439477, 0x53daac69), + TOBN(0xfc68764a, 0x8dc79623), TOBN(0x5b86995d, 0xb440fbb2), + TOBN(0xd66879bf, 0xccc5ee0d), TOBN(0x05228942, 0x95aa8bd3), + TOBN(0xb51a40a5, 0x1e6a75c1), TOBN(0x24327c76, 0x0ea7d817), + TOBN(0x06630182, 0x07774597), TOBN(0xd6fdbec3, 0x97fa7164), + TOBN(0x20c99dfb, 0x13c90f48), TOBN(0xd6ac5273, 0x686ef263), + TOBN(0xc6a50bdc, 0xfef64eeb), TOBN(0xcd87b281, 0x86fdfc32), + TOBN(0xb24aa43e, 0x3fcd3efc), TOBN(0xdd26c034, 0xb8088e9a), + TOBN(0xa5ef4dc9, 0xbd3d46ea), TOBN(0xa2f99d58, 0x8a4c6a6f), + TOBN(0xddabd355, 0x2f1da46c), TOBN(0x72c3f8ce, 0x1afacdd1), + TOBN(0xd90c4eee, 0x92d40578), TOBN(0xd28bb41f, 0xca623b94), + TOBN(0x50fc0711, 0x745edc11), TOBN(0x9dd9ad7d, 0x3dc87558), + TOBN(0xce6931fb, 0xb49d1e64), TOBN(0x6c77a0a2, 0xc98bd0f9), + TOBN(0x62b9a629, 0x6baf7cb1), TOBN(0xcf065f91, 0xccf72d22), + TOBN(0x7203cce9, 0x79639071), TOBN(0x09ae4885, 0xf9cb732f), + TOBN(0x5e7c3bec, 0xee8314f3), TOBN(0x1c068aed, 0xdbea298f), + TOBN(0x08d381f1, 0x7c80acec), TOBN(0x03b56be8, 0xe330495b), + TOBN(0xaeffb8f2, 0x9222882d), TOBN(0x95ff38f6, 0xc4af8bf7), + TOBN(0x50e32d35, 0x1fc57d8c), TOBN(0x6635be52, 0x17b444f0), + TOBN(0x04d15276, 0xa5177900), TOBN(0x4e1dbb47, 0xf6858752), + TOBN(0x5b475622, 0xc615796c), TOBN(0xa6fa0387, 0x691867bf), + TOBN(0xed7f5d56, 0x2844c6d0), TOBN(0xc633cf9b, 0x03a2477d), + TOBN(0xf6be5c40, 0x2d3721d6), TOBN(0xaf312eb7, 0xe9fd68e6), + TOBN(0x242792d2, 0xe7417ce1), TOBN(0xff42bc71, 0x970ee7f5), + TOBN(0x1ff4dc6d, 0x5c67a41e), TOBN(0x77709b7b, 0x20882a58), + TOBN(0x3554731d, 0xbe217f2c), TOBN(0x2af2a8cd, 0x5bb72177), + TOBN(0x58eee769, 0x591dd059), TOBN(0xbb2930c9, 0x4bba6477), + TOBN(0x863ee047, 0x7d930cfc), TOBN(0x4c262ad1, 0x396fd1f4), + TOBN(0xf4765bc8, 0x039af7e1), TOBN(0x2519834b, 0x5ba104f6), + TOBN(0x7cd61b4c, 0xd105f961), TOBN(0xa5415da5, 0xd63bca54), + TOBN(0x778280a0, 0x88a1f17c), TOBN(0xc4968949, 0x2329512c), + TOBN(0x174a9126, 0xcecdaa7a), TOBN(0xfc8c7e0e, 0x0b13247b), + TOBN(0x29c110d2, 0x3484c1c4), TOBN(0xf8eb8757, 0x831dfc3b), + TOBN(0x022f0212, 0xc0067452), TOBN(0x3f6f69ee, 0x7b9b926c), + TOBN(0x09032da0, 0xef42daf4), TOBN(0x79f00ade, 0x83f80de4), + TOBN(0x6210db71, 0x81236c97), TOBN(0x74f7685b, 0x3ee0781f), + TOBN(0x4df7da7b, 0xa3e41372), TOBN(0x2aae38b1, 0xb1a1553e), + TOBN(0x1688e222, 0xf6dd9d1b), TOBN(0x57695448, 0x5b8b6487), + TOBN(0x478d2127, 0x4b2edeaa), TOBN(0xb2818fa5, 0x1e85956a), + TOBN(0x1e6addda, 0xf176f2c0), TOBN(0x01ca4604, 0xe2572658), + TOBN(0x0a404ded, 0x85342ffb), TOBN(0x8cf60f96, 0x441838d6), + TOBN(0x9bbc691c, 0xc9071c4a), TOBN(0xfd588744, 0x34442803), + TOBN(0x97101c85, 0x809c0d81), TOBN(0xa7fb754c, 0x8c456f7f), + TOBN(0xc95f3c5c, 0xd51805e1), TOBN(0xab4ccd39, 0xb299dca8), + TOBN(0x3e03d20b, 0x47eaf500), TOBN(0xfa3165c1, 0xd7b80893), + TOBN(0x005e8b54, 0xe160e552), TOBN(0xdc4972ba, 0x9019d11f), + TOBN(0x21a6972e, 0x0c9a4a7a), TOBN(0xa52c258f, 0x37840fd7), + TOBN(0xf8559ff4, 0xc1e99d81), TOBN(0x08e1a7d6, 0xa3c617c0), + TOBN(0xb398fd43, 0x248c6ba7), TOBN(0x6ffedd91, 0xd1283794), + TOBN(0x8a6a59d2, 0xd629d208), TOBN(0xa9d141d5, 0x3490530e), + TOBN(0x42f6fc18, 0x38505989), TOBN(0x09bf250d, 0x479d94ee), + TOBN(0x223ad3b1, 0xb3822790), TOBN(0x6c5926c0, 0x93b8971c), + TOBN(0x609efc7e, 0x75f7fa62), TOBN(0x45d66a6d, 0x1ec2d989), + TOBN(0x4422d663, 0x987d2792), TOBN(0x4a73caad, 0x3eb31d2b), + TOBN(0xf06c2ac1, 0xa32cb9e6), TOBN(0xd9445c5f, 0x91aeba84), + TOBN(0x6af7a1d5, 0xaf71013f), TOBN(0xe68216e5, 0x0bedc946), + TOBN(0xf4cba30b, 0xd27370a0), TOBN(0x7981afbf, 0x870421cc), + TOBN(0x02496a67, 0x9449f0e1), TOBN(0x86cfc4be, 0x0a47edae), + TOBN(0x3073c936, 0xb1feca22), TOBN(0xf5694612, 0x03f8f8fb), + TOBN(0xd063b723, 0x901515ea), TOBN(0x4c6c77a5, 0x749cf038), + TOBN(0x6361e360, 0xab9e5059), TOBN(0x596cf171, 0xa76a37c0), + TOBN(0x800f53fa, 0x6530ae7a), TOBN(0x0f5e631e, 0x0792a7a6), + TOBN(0x5cc29c24, 0xefdb81c9), TOBN(0xa269e868, 0x3f9c40ba), + TOBN(0xec14f9e1, 0x2cb7191e), TOBN(0x78ea1bd8, 0xe5b08ea6), + TOBN(0x3c65aa9b, 0x46332bb9), TOBN(0x84cc22b3, 0xbf80ce25), + TOBN(0x0098e9e9, 0xd49d5bf1), TOBN(0xcd4ec1c6, 0x19087da4), + TOBN(0x3c9d07c5, 0xaef6e357), TOBN(0x839a0268, 0x9f8f64b8), + TOBN(0xc5e9eb62, 0xc6d8607f), TOBN(0x759689f5, 0x6aa995e4), + TOBN(0x70464669, 0xbbb48317), TOBN(0x921474bf, 0xe402417d), + TOBN(0xcabe135b, 0x2a354c8c), TOBN(0xd51e52d2, 0x812fa4b5), + TOBN(0xec741096, 0x53311fe8), TOBN(0x4f774535, 0xb864514b), + TOBN(0xbcadd671, 0x5bde48f8), TOBN(0xc9703873, 0x2189bc7d), + TOBN(0x5d45299e, 0xc709ee8a), TOBN(0xd1287ee2, 0x845aaff8), + TOBN(0x7d1f8874, 0xdb1dbf1f), TOBN(0xea46588b, 0x990c88d6), + TOBN(0x60ba649a, 0x84368313), TOBN(0xd5fdcbce, 0x60d543ae), + TOBN(0x90b46d43, 0x810d5ab0), TOBN(0x6739d8f9, 0x04d7e5cc), + TOBN(0x021c1a58, 0x0d337c33), TOBN(0x00a61162, 0x68e67c40), + TOBN(0x95ef413b, 0x379f0a1f), TOBN(0xfe126605, 0xe9e2ab95), + TOBN(0x67578b85, 0x2f5f199c), TOBN(0xf5c00329, 0x2cb84913), + TOBN(0xf7956430, 0x37577dd8), TOBN(0x83b82af4, 0x29c5fe88), + TOBN(0x9c1bea26, 0xcdbdc132), TOBN(0x589fa086, 0x9c04339e), + TOBN(0x033e9538, 0xb13799df), TOBN(0x85fa8b21, 0xd295d034), + TOBN(0xdf17f73f, 0xbd9ddcca), TOBN(0xf32bd122, 0xddb66334), + TOBN(0x55ef88a7, 0x858b044c), TOBN(0x1f0d69c2, 0x5aa9e397), + TOBN(0x55fd9cc3, 0x40d85559), TOBN(0xc774df72, 0x7785ddb2), + TOBN(0x5dcce9f6, 0xd3bd2e1c), TOBN(0xeb30da20, 0xa85dfed0), + TOBN(0x5ed7f5bb, 0xd3ed09c4), TOBN(0x7d42a35c, 0x82a9c1bd), + TOBN(0xcf3de995, 0x9890272d), TOBN(0x75f3432a, 0x3e713a10), + TOBN(0x5e13479f, 0xe28227b8), TOBN(0xb8561ea9, 0xfefacdc8), + TOBN(0xa6a297a0, 0x8332aafd), TOBN(0x9b0d8bb5, 0x73809b62), + TOBN(0xd2fa1cfd, 0x0c63036f), TOBN(0x7a16eb55, 0xbd64bda8), + TOBN(0x3f5cf5f6, 0x78e62ddc), TOBN(0x2267c454, 0x07fd752b), + TOBN(0x5e361b6b, 0x5e437bbe), TOBN(0x95c59501, 0x8354e075), + TOBN(0xec725f85, 0xf2b254d9), TOBN(0x844b617d, 0x2cb52b4e), + TOBN(0xed8554f5, 0xcf425fb5), TOBN(0xab67703e, 0x2af9f312), + TOBN(0x4cc34ec1, 0x3cf48283), TOBN(0xb09daa25, 0x9c8a705e), + TOBN(0xd1e9d0d0, 0x5b7d4f84), TOBN(0x4df6ef64, 0xdb38929d), + TOBN(0xe16b0763, 0xaa21ba46), TOBN(0xc6b1d178, 0xa293f8fb), + TOBN(0x0ff5b602, 0xd520aabf), TOBN(0x94d671bd, 0xc339397a), + TOBN(0x7c7d98cf, 0x4f5792fa), TOBN(0x7c5e0d67, 0x11215261), + TOBN(0x9b19a631, 0xa7c5a6d4), TOBN(0xc8511a62, 0x7a45274d), + TOBN(0x0c16621c, 0xa5a60d99), TOBN(0xf7fbab88, 0xcf5e48cb), + TOBN(0xab1e6ca2, 0xf7ddee08), TOBN(0x83bd08ce, 0xe7867f3c), + TOBN(0xf7e48e8a, 0x2ac13e27), TOBN(0x4494f6df, 0x4eb1a9f5), + TOBN(0xedbf84eb, 0x981f0a62), TOBN(0x49badc32, 0x536438f0), + TOBN(0x50bea541, 0x004f7571), TOBN(0xbac67d10, 0xdf1c94ee), + TOBN(0x253d73a1, 0xb727bc31), TOBN(0xb3d01cf2, 0x30686e28), + TOBN(0x51b77b1b, 0x55fd0b8b), TOBN(0xa099d183, 0xfeec3173), + TOBN(0x202b1fb7, 0x670e72b7), TOBN(0xadc88b33, 0xa8e1635f), + TOBN(0x34e8216a, 0xf989d905), TOBN(0xc2e68d20, 0x29b58d01), + TOBN(0x11f81c92, 0x6fe55a93), TOBN(0x15f1462a, 0x8f296f40), + TOBN(0x1915d375, 0xea3d62f2), TOBN(0xa17765a3, 0x01c8977d), + TOBN(0x7559710a, 0xe47b26f6), TOBN(0xe0bd29c8, 0x535077a5), + TOBN(0x615f976d, 0x08d84858), TOBN(0x370dfe85, 0x69ced5c1), + TOBN(0xbbc7503c, 0xa734fa56), TOBN(0xfbb9f1ec, 0x91ac4574), + TOBN(0x95d7ec53, 0x060dd7ef), TOBN(0xeef2dacd, 0x6e657979), + TOBN(0x54511af3, 0xe2a08235), TOBN(0x1e324aa4, 0x1f4aea3d), + TOBN(0x550e7e71, 0xe6e67671), TOBN(0xbccd5190, 0xbf52faf7), + TOBN(0xf880d316, 0x223cc62a), TOBN(0x0d402c7e, 0x2b32eb5d), + TOBN(0xa40bc039, 0x306a5a3b), TOBN(0x4e0a41fd, 0x96783a1b), + TOBN(0xa1e8d39a, 0x0253cdd4), TOBN(0x6480be26, 0xc7388638), + TOBN(0xee365e1d, 0x2285f382), TOBN(0x188d8d8f, 0xec0b5c36), + TOBN(0x34ef1a48, 0x1f0f4d82), TOBN(0x1a8f43e1, 0xa487d29a), + TOBN(0x8168226d, 0x77aefb3a), TOBN(0xf69a751e, 0x1e72c253), + TOBN(0x8e04359a, 0xe9594df1), TOBN(0x475ffd7d, 0xd14c0467), + TOBN(0xb5a2c2b1, 0x3844e95c), TOBN(0x85caf647, 0xdd12ef94), + TOBN(0x1ecd2a9f, 0xf1063d00), TOBN(0x1dd2e229, 0x23843311), + TOBN(0x38f0e09d, 0x73d17244), TOBN(0x3ede7746, 0x8fc653f1), + TOBN(0xae4459f5, 0xdc20e21c), TOBN(0x00db2ffa, 0x6a8599ea), + TOBN(0x11682c39, 0x30cfd905), TOBN(0x4934d074, 0xa5c112a6), + TOBN(0xbdf063c5, 0x568bfe95), TOBN(0x779a440a, 0x016c441a), + TOBN(0x0c23f218, 0x97d6fbdc), TOBN(0xd3a5cd87, 0xe0776aac), + TOBN(0xcee37f72, 0xd712e8db), TOBN(0xfb28c70d, 0x26f74e8d), + TOBN(0xffe0c728, 0xb61301a0), TOBN(0xa6282168, 0xd3724354), + TOBN(0x7ff4cb00, 0x768ffedc), TOBN(0xc51b3088, 0x03b02de9), + TOBN(0xa5a8147c, 0x3902dda5), TOBN(0x35d2f706, 0xfe6973b4), + TOBN(0x5ac2efcf, 0xc257457e), TOBN(0x933f48d4, 0x8700611b), + TOBN(0xc365af88, 0x4912beb2), TOBN(0x7f5a4de6, 0x162edf94), + TOBN(0xc646ba7c, 0x0c32f34b), TOBN(0x632c6af3, 0xb2091074), + TOBN(0x58d4f2e3, 0x753e43a9), TOBN(0x70e1d217, 0x24d4e23f), + TOBN(0xb24bf729, 0xafede6a6), TOBN(0x7f4a94d8, 0x710c8b60), + TOBN(0xaad90a96, 0x8d4faa6a), TOBN(0xd9ed0b32, 0xb066b690), + TOBN(0x52fcd37b, 0x78b6dbfd), TOBN(0x0b64615e, 0x8bd2b431), + TOBN(0x228e2048, 0xcfb9fad5), TOBN(0xbeaa386d, 0x240b76bd), + TOBN(0x2d6681c8, 0x90dad7bc), TOBN(0x3e553fc3, 0x06d38f5e), + TOBN(0xf27cdb9b, 0x9d5f9750), TOBN(0x3e85c52a, 0xd28c5b0e), + TOBN(0x190795af, 0x5247c39b), TOBN(0x547831eb, 0xbddd6828), + TOBN(0xf327a227, 0x4a82f424), TOBN(0x36919c78, 0x7e47f89d), + TOBN(0xe4783919, 0x43c7392c), TOBN(0xf101b9aa, 0x2316fefe), + TOBN(0xbcdc9e9c, 0x1c5009d2), TOBN(0xfb55ea13, 0x9cd18345), + TOBN(0xf5b5e231, 0xa3ce77c7), TOBN(0xde6b4527, 0xd2f2cb3d), + TOBN(0x10f6a333, 0x9bb26f5f), TOBN(0x1e85db8e, 0x044d85b6), + TOBN(0xc3697a08, 0x94197e54), TOBN(0x65e18cc0, 0xa7cb4ea8), + TOBN(0xa38c4f50, 0xa471fe6e), TOBN(0xf031747a, 0x2f13439c), + TOBN(0x53c4a6ba, 0xc007318b), TOBN(0xa8da3ee5, 0x1deccb3d), + TOBN(0x0555b31c, 0x558216b1), TOBN(0x90c7810c, 0x2f79e6c2), + TOBN(0x9b669f4d, 0xfe8eed3c), TOBN(0x70398ec8, 0xe0fac126), + TOBN(0xa96a449e, 0xf701b235), TOBN(0x0ceecdb3, 0xeb94f395), + TOBN(0x285fc368, 0xd0cb7431), TOBN(0x0d37bb52, 0x16a18c64), + TOBN(0x05110d38, 0xb880d2dd), TOBN(0xa60f177b, 0x65930d57), + TOBN(0x7da34a67, 0xf36235f5), TOBN(0x47f5e17c, 0x183816b9), + TOBN(0xc7664b57, 0xdb394af4), TOBN(0x39ba215d, 0x7036f789), + TOBN(0x46d2ca0e, 0x2f27b472), TOBN(0xc42647ee, 0xf73a84b7), + TOBN(0x44bc7545, 0x64488f1d), TOBN(0xaa922708, 0xf4cf85d5), + TOBN(0x721a01d5, 0x53e4df63), TOBN(0x649c0c51, 0x5db46ced), + TOBN(0x6bf0d64e, 0x3cffcb6c), TOBN(0xe3bf93fe, 0x50f71d96), + TOBN(0x75044558, 0xbcc194a0), TOBN(0x16ae3372, 0x6afdc554), + TOBN(0xbfc01adf, 0x5ca48f3f), TOBN(0x64352f06, 0xe22a9b84), + TOBN(0xcee54da1, 0xc1099e4a), TOBN(0xbbda54e8, 0xfa1b89c0), + TOBN(0x166a3df5, 0x6f6e55fb), TOBN(0x1ca44a24, 0x20176f88), + TOBN(0x936afd88, 0xdfb7b5ff), TOBN(0xe34c2437, 0x8611d4a0), + TOBN(0x7effbb75, 0x86142103), TOBN(0x6704ba1b, 0x1f34fc4d), + TOBN(0x7c2a468f, 0x10c1b122), TOBN(0x36b3a610, 0x8c6aace9), + TOBN(0xabfcc0a7, 0x75a0d050), TOBN(0x066f9197, 0x3ce33e32), + TOBN(0xce905ef4, 0x29fe09be), TOBN(0x89ee25ba, 0xa8376351), + TOBN(0x2a3ede22, 0xfd29dc76), TOBN(0x7fd32ed9, 0x36f17260), + TOBN(0x0cadcf68, 0x284b4126), TOBN(0x63422f08, 0xa7951fc8), + TOBN(0x562b24f4, 0x0807e199), TOBN(0xfe9ce5d1, 0x22ad4490), + TOBN(0xc2f51b10, 0x0db2b1b4), TOBN(0xeb3613ff, 0xe4541d0d), + TOBN(0xbd2c4a05, 0x2680813b), TOBN(0x527aa55d, 0x561b08d6), + TOBN(0xa9f8a40e, 0xa7205558), TOBN(0xe3eea56f, 0x243d0bec), + TOBN(0x7b853817, 0xa0ff58b3), TOBN(0xb67d3f65, 0x1a69e627), + TOBN(0x0b76bbb9, 0xa869b5d6), TOBN(0xa3afeb82, 0x546723ed), + TOBN(0x5f24416d, 0x3e554892), TOBN(0x8413b53d, 0x430e2a45), + TOBN(0x99c56aee, 0x9032a2a0), TOBN(0x09432bf6, 0xeec367b1), + TOBN(0x552850c6, 0xdaf0ecc1), TOBN(0x49ebce55, 0x5bc92048), + TOBN(0xdfb66ba6, 0x54811307), TOBN(0x1b84f797, 0x6f298597), + TOBN(0x79590481, 0x8d1d7a0d), TOBN(0xd9fabe03, 0x3a6fa556), + TOBN(0xa40f9c59, 0xba9e5d35), TOBN(0xcb1771c1, 0xf6247577), + TOBN(0x542a47ca, 0xe9a6312b), TOBN(0xa34b3560, 0x552dd8c5), + TOBN(0xfdf94de0, 0x0d794716), TOBN(0xd46124a9, 0x9c623094), + TOBN(0x56b7435d, 0x68afe8b4), TOBN(0x27f20540, 0x6c0d8ea1), + TOBN(0x12b77e14, 0x73186898), TOBN(0xdbc3dd46, 0x7479490f), + TOBN(0x951a9842, 0xc03b0c05), TOBN(0x8b1b3bb3, 0x7921bc96), + TOBN(0xa573b346, 0x2b202e0a), TOBN(0x77e4665d, 0x47254d56), + TOBN(0x08b70dfc, 0xd23e3984), TOBN(0xab86e8bc, 0xebd14236), + TOBN(0xaa3e07f8, 0x57114ba7), TOBN(0x5ac71689, 0xab0ef4f2), + TOBN(0x88fca384, 0x0139d9af), TOBN(0x72733f88, 0x76644af0), + TOBN(0xf122f72a, 0x65d74f4a), TOBN(0x13931577, 0xa5626c7a), + TOBN(0xd5b5d9eb, 0x70f8d5a4), TOBN(0x375adde7, 0xd7bbb228), + TOBN(0x31e88b86, 0x0c1c0b32), TOBN(0xd1f568c4, 0x173edbaa), + TOBN(0x1592fc83, 0x5459df02), TOBN(0x2beac0fb, 0x0fcd9a7e), + TOBN(0xb0a6fdb8, 0x1b473b0a), TOBN(0xe3224c6f, 0x0fe8fc48), + TOBN(0x680bd00e, 0xe87edf5b), TOBN(0x30385f02, 0x20e77cf5), + TOBN(0xe9ab98c0, 0x4d42d1b2), TOBN(0x72d191d2, 0xd3816d77), + TOBN(0x1564daca, 0x0917d9e5), TOBN(0x394eab59, 0x1f8fed7f), + TOBN(0xa209aa8d, 0x7fbb3896), TOBN(0x5564f3b9, 0xbe6ac98e), + TOBN(0xead21d05, 0xd73654ef), TOBN(0x68d1a9c4, 0x13d78d74), + TOBN(0x61e01708, 0x6d4973a0), TOBN(0x83da3500, 0x46e6d32a), + TOBN(0x6a3dfca4, 0x68ae0118), TOBN(0xa1b9a4c9, 0xd02da069), + TOBN(0x0b2ff9c7, 0xebab8302), TOBN(0x98af07c3, 0x944ba436), + TOBN(0x85997326, 0x995f0f9f), TOBN(0x467fade0, 0x71b58bc6), + TOBN(0x47e4495a, 0xbd625a2b), TOBN(0xfdd2d01d, 0x33c3b8cd), + TOBN(0x2c38ae28, 0xc693f9fa), TOBN(0x48622329, 0x348f7999), + TOBN(0x97bf738e, 0x2161f583), TOBN(0x15ee2fa7, 0x565e8cc9), + TOBN(0xa1a5c845, 0x5777e189), TOBN(0xcc10bee0, 0x456f2829), + TOBN(0x8ad95c56, 0xda762bd5), TOBN(0x152e2214, 0xe9d91da8), + TOBN(0x975b0e72, 0x7cb23c74), TOBN(0xfd5d7670, 0xa90c66df), + TOBN(0xb5b5b8ad, 0x225ffc53), TOBN(0xab6dff73, 0xfaded2ae), + TOBN(0xebd56781, 0x6f4cbe9d), TOBN(0x0ed8b249, 0x6a574bd7), + TOBN(0x41c246fe, 0x81a881fa), TOBN(0x91564805, 0xc3db9c70), + TOBN(0xd7c12b08, 0x5b862809), TOBN(0x1facd1f1, 0x55858d7b), + TOBN(0x7693747c, 0xaf09e92a), TOBN(0x3b69dcba, 0x189a425f), + TOBN(0x0be28e9f, 0x967365ef), TOBN(0x57300eb2, 0xe801f5c9), + TOBN(0x93b8ac6a, 0xd583352f), TOBN(0xa2cf1f89, 0xcd05b2b7), + TOBN(0x7c0c9b74, 0x4dcc40cc), TOBN(0xfee38c45, 0xada523fb), + TOBN(0xb49a4dec, 0x1099cc4d), TOBN(0x325c377f, 0x69f069c6), + TOBN(0xe12458ce, 0x476cc9ff), TOBN(0x580e0b6c, 0xc6d4cb63), + TOBN(0xd561c8b7, 0x9072289b), TOBN(0x0377f264, 0xa619e6da), + TOBN(0x26685362, 0x88e591a5), TOBN(0xa453a7bd, 0x7523ca2b), + TOBN(0x8a9536d2, 0xc1df4533), TOBN(0xc8e50f2f, 0xbe972f79), + TOBN(0xd433e50f, 0x6d3549cf), TOBN(0x6f33696f, 0xfacd665e), + TOBN(0x695bfdac, 0xce11fcb4), TOBN(0x810ee252, 0xaf7c9860), + TOBN(0x65450fe1, 0x7159bb2c), TOBN(0xf7dfbebe, 0x758b357b), + TOBN(0x2b057e74, 0xd69fea72), TOBN(0xd485717a, 0x92731745),} + , + {TOBN(0x896c42e8, 0xee36860c), TOBN(0xdaf04dfd, 0x4113c22d), + TOBN(0x1adbb7b7, 0x44104213), TOBN(0xe5fd5fa1, 0x1fd394ea), + TOBN(0x68235d94, 0x1a4e0551), TOBN(0x6772cfbe, 0x18d10151), + TOBN(0x276071e3, 0x09984523), TOBN(0xe4e879de, 0x5a56ba98), + TOBN(0xaaafafb0, 0x285b9491), TOBN(0x01a0be88, 0x1e4c705e), + TOBN(0xff1d4f5d, 0x2ad9caab), TOBN(0x6e349a4a, 0xc37a233f), + TOBN(0xcf1c1246, 0x4a1c6a16), TOBN(0xd99e6b66, 0x29383260), + TOBN(0xea3d4366, 0x5f6d5471), TOBN(0x36974d04, 0xff8cc89b), + TOBN(0xc26c49a1, 0xcfe89d80), TOBN(0xb42c026d, 0xda9c8371), + TOBN(0xca6c013a, 0xdad066d2), TOBN(0xfb8f7228, 0x56a4f3ee), + TOBN(0x08b579ec, 0xd850935b), TOBN(0x34c1a74c, 0xd631e1b3), + TOBN(0xcb5fe596, 0xac198534), TOBN(0x39ff21f6, 0xe1f24f25), + TOBN(0x27f29e14, 0x8f929057), TOBN(0x7a64ae06, 0xc0c853df), + TOBN(0x256cd183, 0x58e9c5ce), TOBN(0x9d9cce82, 0xded092a5), + TOBN(0xcc6e5979, 0x6e93b7c7), TOBN(0xe1e47092, 0x31bb9e27), + TOBN(0xb70b3083, 0xaa9e29a0), TOBN(0xbf181a75, 0x3785e644), + TOBN(0xf53f2c65, 0x8ead09f7), TOBN(0x1335e1d5, 0x9780d14d), + TOBN(0x69cc20e0, 0xcd1b66bc), TOBN(0x9b670a37, 0xbbe0bfc8), + TOBN(0xce53dc81, 0x28efbeed), TOBN(0x0c74e77c, 0x8326a6e5), + TOBN(0x3604e0d2, 0xb88e9a63), TOBN(0xbab38fca, 0x13dc2248), + TOBN(0x8ed6e8c8, 0x5c0a3f1e), TOBN(0xbcad2492, 0x7c87c37f), + TOBN(0xfdfb62bb, 0x9ee3b78d), TOBN(0xeba8e477, 0xcbceba46), + TOBN(0x37d38cb0, 0xeeaede4b), TOBN(0x0bc498e8, 0x7976deb6), + TOBN(0xb2944c04, 0x6b6147fb), TOBN(0x8b123f35, 0xf71f9609), + TOBN(0xa155dcc7, 0xde79dc24), TOBN(0xf1168a32, 0x558f69cd), + TOBN(0xbac21595, 0x0d1850df), TOBN(0x15c8295b, 0xb204c848), + TOBN(0xf661aa36, 0x7d8184ff), TOBN(0xc396228e, 0x30447bdb), + TOBN(0x11cd5143, 0xbde4a59e), TOBN(0xe3a26e3b, 0x6beab5e6), + TOBN(0xd3b3a13f, 0x1402b9d0), TOBN(0x573441c3, 0x2c7bc863), + TOBN(0x4b301ec4, 0x578c3e6e), TOBN(0xc26fc9c4, 0x0adaf57e), + TOBN(0x96e71bfd, 0x7493cea3), TOBN(0xd05d4b3f, 0x1af81456), + TOBN(0xdaca2a8a, 0x6a8c608f), TOBN(0x53ef07f6, 0x0725b276), + TOBN(0x07a5fbd2, 0x7824fc56), TOBN(0x34675218, 0x13289077), + TOBN(0x5bf69fd5, 0xe0c48349), TOBN(0xa613ddd3, 0xb6aa7875), + TOBN(0x7f78c19c, 0x5450d866), TOBN(0x46f4409c, 0x8f84a481), + TOBN(0x9f1d1928, 0x90fce239), TOBN(0x016c4168, 0xb2ce44b9), + TOBN(0xbae023f0, 0xc7435978), TOBN(0xb152c888, 0x20e30e19), + TOBN(0x9c241645, 0xe3fa6faf), TOBN(0x735d95c1, 0x84823e60), + TOBN(0x03197573, 0x03955317), TOBN(0x0b4b02a9, 0xf03b4995), + TOBN(0x076bf559, 0x70274600), TOBN(0x32c5cc53, 0xaaf57508), + TOBN(0xe8af6d1f, 0x60624129), TOBN(0xb7bc5d64, 0x9a5e2b5e), + TOBN(0x3814b048, 0x5f082d72), TOBN(0x76f267f2, 0xce19677a), + TOBN(0x626c630f, 0xb36eed93), TOBN(0x55230cd7, 0x3bf56803), + TOBN(0x78837949, 0xce2736a0), TOBN(0x0d792d60, 0xaa6c55f1), + TOBN(0x0318dbfd, 0xd5c7c5d2), TOBN(0xb38f8da7, 0x072b342d), + TOBN(0x3569bddc, 0x7b8de38a), TOBN(0xf25b5887, 0xa1c94842), + TOBN(0xb2d5b284, 0x2946ad60), TOBN(0x854f29ad, 0xe9d1707e), + TOBN(0xaa5159dc, 0x2c6a4509), TOBN(0x899f94c0, 0x57189837), + TOBN(0xcf6adc51, 0xf4a55b03), TOBN(0x261762de, 0x35e3b2d5), + TOBN(0x4cc43012, 0x04827b51), TOBN(0xcd22a113, 0xc6021442), + TOBN(0xce2fd61a, 0x247c9569), TOBN(0x59a50973, 0xd152beca), + TOBN(0x6c835a11, 0x63a716d4), TOBN(0xc26455ed, 0x187dedcf), + TOBN(0x27f536e0, 0x49ce89e7), TOBN(0x18908539, 0xcc890cb5), + TOBN(0x308909ab, 0xd83c2aa1), TOBN(0xecd3142b, 0x1ab73bd3), + TOBN(0x6a85bf59, 0xb3f5ab84), TOBN(0x3c320a68, 0xf2bea4c6), + TOBN(0xad8dc538, 0x6da4541f), TOBN(0xeaf34eb0, 0xb7c41186), + TOBN(0x1c780129, 0x977c97c4), TOBN(0x5ff9beeb, 0xc57eb9fa), + TOBN(0xa24d0524, 0xc822c478), TOBN(0xfd8eec2a, 0x461cd415), + TOBN(0xfbde194e, 0xf027458c), TOBN(0xb4ff5319, 0x1d1be115), + TOBN(0x63f874d9, 0x4866d6f4), TOBN(0x35c75015, 0xb21ad0c9), + TOBN(0xa6b5c9d6, 0x46ac49d2), TOBN(0x42c77c0b, 0x83137aa9), + TOBN(0x24d000fc, 0x68225a38), TOBN(0x0f63cfc8, 0x2fe1e907), + TOBN(0x22d1b01b, 0xc6441f95), TOBN(0x7d38f719, 0xec8e448f), + TOBN(0x9b33fa5f, 0x787fb1ba), TOBN(0x94dcfda1, 0x190158df), + TOBN(0xc47cb339, 0x5f6d4a09), TOBN(0x6b4f355c, 0xee52b826), + TOBN(0x3d100f5d, 0xf51b930a), TOBN(0xf4512fac, 0x9f668f69), + TOBN(0x546781d5, 0x206c4c74), TOBN(0xd021d4d4, 0xcb4d2e48), + TOBN(0x494a54c2, 0xca085c2d), TOBN(0xf1dbaca4, 0x520850a8), + TOBN(0x63c79326, 0x490a1aca), TOBN(0xcb64dd9c, 0x41526b02), + TOBN(0xbb772591, 0xa2979258), TOBN(0x3f582970, 0x48d97846), + TOBN(0xd66b70d1, 0x7c213ba7), TOBN(0xc28febb5, 0xe8a0ced4), + TOBN(0x6b911831, 0xc10338c1), TOBN(0x0d54e389, 0xbf0126f3), + TOBN(0x7048d460, 0x4af206ee), TOBN(0x786c88f6, 0x77e97cb9), + TOBN(0xd4375ae1, 0xac64802e), TOBN(0x469bcfe1, 0xd53ec11c), + TOBN(0xfc9b340d, 0x47062230), TOBN(0xe743bb57, 0xc5b4a3ac), + TOBN(0xfe00b4aa, 0x59ef45ac), TOBN(0x29a4ef23, 0x59edf188), + TOBN(0x40242efe, 0xb483689b), TOBN(0x2575d3f6, 0x513ac262), + TOBN(0xf30037c8, 0x0ca6db72), TOBN(0xc9fcce82, 0x98864be2), + TOBN(0x84a112ff, 0x0149362d), TOBN(0x95e57582, 0x1c4ae971), + TOBN(0x1fa4b1a8, 0x945cf86c), TOBN(0x4525a734, 0x0b024a2f), + TOBN(0xe76c8b62, 0x8f338360), TOBN(0x483ff593, 0x28edf32b), + TOBN(0x67e8e90a, 0x298b1aec), TOBN(0x9caab338, 0x736d9a21), + TOBN(0x5c09d2fd, 0x66892709), TOBN(0x2496b4dc, 0xb55a1d41), + TOBN(0x93f5fb1a, 0xe24a4394), TOBN(0x08c75049, 0x6fa8f6c1), + TOBN(0xcaead1c2, 0xc905d85f), TOBN(0xe9d7f790, 0x0733ae57), + TOBN(0x24c9a65c, 0xf07cdd94), TOBN(0x7389359c, 0xa4b55931), + TOBN(0xf58709b7, 0x367e45f7), TOBN(0x1f203067, 0xcb7e7adc), + TOBN(0x82444bff, 0xc7b72818), TOBN(0x07303b35, 0xbaac8033), + TOBN(0x1e1ee4e4, 0xd13b7ea1), TOBN(0xe6489b24, 0xe0e74180), + TOBN(0xa5f2c610, 0x7e70ef70), TOBN(0xa1655412, 0xbdd10894), + TOBN(0x555ebefb, 0x7af4194e), TOBN(0x533c1c3c, 0x8e89bd9c), + TOBN(0x735b9b57, 0x89895856), TOBN(0x15fb3cd2, 0x567f5c15), + TOBN(0x057fed45, 0x526f09fd), TOBN(0xe8a4f10c, 0x8128240a), + TOBN(0x9332efc4, 0xff2bfd8d), TOBN(0x214e77a0, 0xbd35aa31), + TOBN(0x32896d73, 0x14faa40e), TOBN(0x767867ec, 0x01e5f186), + TOBN(0xc9adf8f1, 0x17a1813e), TOBN(0xcb6cda78, 0x54741795), + TOBN(0xb7521b6d, 0x349d51aa), TOBN(0xf56b5a9e, 0xe3c7b8e9), + TOBN(0xc6f1e5c9, 0x32a096df), TOBN(0x083667c4, 0xa3635024), + TOBN(0x365ea135, 0x18087f2f), TOBN(0xf1b8eaac, 0xd136e45d), + TOBN(0xc8a0e484, 0x73aec989), TOBN(0xd75a324b, 0x142c9259), + TOBN(0xb7b4d001, 0x01dae185), TOBN(0x45434e0b, 0x9b7a94bc), + TOBN(0xf54339af, 0xfbd8cb0b), TOBN(0xdcc4569e, 0xe98ef49e), + TOBN(0x7789318a, 0x09a51299), TOBN(0x81b4d206, 0xb2b025d8), + TOBN(0xf64aa418, 0xfae85792), TOBN(0x3e50258f, 0xacd7baf7), + TOBN(0xdce84cdb, 0x2996864b), TOBN(0xa2e67089, 0x1f485fa4), + TOBN(0xb28b2bb6, 0x534c6a5a), TOBN(0x31a7ec6b, 0xc94b9d39), + TOBN(0x1d217766, 0xd6bc20da), TOBN(0x4acdb5ec, 0x86761190), + TOBN(0x68726328, 0x73701063), TOBN(0x4d24ee7c, 0x2128c29b), + TOBN(0xc072ebd3, 0xa19fd868), TOBN(0x612e481c, 0xdb8ddd3b), + TOBN(0xb4e1d754, 0x1a64d852), TOBN(0x00ef95ac, 0xc4c6c4ab), + TOBN(0x1536d2ed, 0xaa0a6c46), TOBN(0x61294086, 0x43774790), + TOBN(0x54af25e8, 0x343fda10), TOBN(0x9ff9d98d, 0xfd25d6f2), + TOBN(0x0746af7c, 0x468b8835), TOBN(0x977a31cb, 0x730ecea7), + TOBN(0xa5096b80, 0xc2cf4a81), TOBN(0xaa986833, 0x6458c37a), + TOBN(0x6af29bf3, 0xa6bd9d34), TOBN(0x6a62fe9b, 0x33c5d854), + TOBN(0x50e6c304, 0xb7133b5e), TOBN(0x04b60159, 0x7d6e6848), + TOBN(0x4cd296df, 0x5579bea4), TOBN(0x10e35ac8, 0x5ceedaf1), + TOBN(0x04c4c5fd, 0xe3bcc5b1), TOBN(0x95f9ee8a, 0x89412cf9), + TOBN(0x2c9459ee, 0x82b6eb0f), TOBN(0x2e845765, 0x95c2aadd), + TOBN(0x774a84ae, 0xd327fcfe), TOBN(0xd8c93722, 0x0368d476), + TOBN(0x0dbd5748, 0xf83e8a3b), TOBN(0xa579aa96, 0x8d2495f3), + TOBN(0x535996a0, 0xae496e9b), TOBN(0x07afbfe9, 0xb7f9bcc2), + TOBN(0x3ac1dc6d, 0x5b7bd293), TOBN(0x3b592cff, 0x7022323d), + TOBN(0xba0deb98, 0x9c0a3e76), TOBN(0x18e78e9f, 0x4b197acb), + TOBN(0x211cde10, 0x296c36ef), TOBN(0x7ee89672, 0x82c4da77), + TOBN(0xb617d270, 0xa57836da), TOBN(0xf0cd9c31, 0x9cb7560b), + TOBN(0x01fdcbf7, 0xe455fe90), TOBN(0x3fb53cbb, 0x7e7334f3), + TOBN(0x781e2ea4, 0x4e7de4ec), TOBN(0x8adab3ad, 0x0b384fd0), + TOBN(0x129eee2f, 0x53d64829), TOBN(0x7a471e17, 0xa261492b), + TOBN(0xe4f9adb9, 0xe4cb4a2c), TOBN(0x3d359f6f, 0x97ba2c2d), + TOBN(0x346c6786, 0x0aacd697), TOBN(0x92b444c3, 0x75c2f8a8), + TOBN(0xc79fa117, 0xd85df44e), TOBN(0x56782372, 0x398ddf31), + TOBN(0x60e690f2, 0xbbbab3b8), TOBN(0x4851f8ae, 0x8b04816b), + TOBN(0xc72046ab, 0x9c92e4d2), TOBN(0x518c74a1, 0x7cf3136b), + TOBN(0xff4eb50a, 0xf9877d4c), TOBN(0x14578d90, 0xa919cabb), + TOBN(0x8218f8c4, 0xac5eb2b6), TOBN(0xa3ccc547, 0x542016e4), + TOBN(0x025bf48e, 0x327f8349), TOBN(0xf3e97346, 0xf43cb641), + TOBN(0xdc2bafdf, 0x500f1085), TOBN(0x57167876, 0x2f063055), + TOBN(0x5bd914b9, 0x411925a6), TOBN(0x7c078d48, 0xa1123de5), + TOBN(0xee6bf835, 0x182b165d), TOBN(0xb11b5e5b, 0xba519727), + TOBN(0xe33ea76c, 0x1eea7b85), TOBN(0x2352b461, 0x92d4f85e), + TOBN(0xf101d334, 0xafe115bb), TOBN(0xfabc1294, 0x889175a3), + TOBN(0x7f6bcdc0, 0x5233f925), TOBN(0xe0a802db, 0xe77fec55), + TOBN(0xbdb47b75, 0x8069b659), TOBN(0x1c5e12de, 0xf98fbd74), + TOBN(0x869c58c6, 0x4b8457ee), TOBN(0xa5360f69, 0x4f7ea9f7), + TOBN(0xe576c09f, 0xf460b38f), TOBN(0x6b70d548, 0x22b7fb36), + TOBN(0x3fd237f1, 0x3bfae315), TOBN(0x33797852, 0xcbdff369), + TOBN(0x97df25f5, 0x25b516f9), TOBN(0x46f388f2, 0xba38ad2d), + TOBN(0x656c4658, 0x89d8ddbb), TOBN(0x8830b26e, 0x70f38ee8), + TOBN(0x4320fd5c, 0xde1212b0), TOBN(0xc34f30cf, 0xe4a2edb2), + TOBN(0xabb131a3, 0x56ab64b8), TOBN(0x7f77f0cc, 0xd99c5d26), + TOBN(0x66856a37, 0xbf981d94), TOBN(0x19e76d09, 0x738bd76e), + TOBN(0xe76c8ac3, 0x96238f39), TOBN(0xc0a482be, 0xa830b366), + TOBN(0xb7b8eaff, 0x0b4eb499), TOBN(0x8ecd83bc, 0x4bfb4865), + TOBN(0x971b2cb7, 0xa2f3776f), TOBN(0xb42176a4, 0xf4b88adf), + TOBN(0xb9617df5, 0xbe1fa446), TOBN(0x8b32d508, 0xcd031bd2), + TOBN(0x1c6bd47d, 0x53b618c0), TOBN(0xc424f46c, 0x6a227923), + TOBN(0x7303ffde, 0xdd92d964), TOBN(0xe9712878, 0x71b5abf2), + TOBN(0x8f48a632, 0xf815561d), TOBN(0x85f48ff5, 0xd3c055d1), + TOBN(0x222a1427, 0x7525684f), TOBN(0xd0d841a0, 0x67360cc3), + TOBN(0x4245a926, 0x0b9267c6), TOBN(0xc78913f1, 0xcf07f863), + TOBN(0xaa844c8e, 0x4d0d9e24), TOBN(0xa42ad522, 0x3d5f9017), + TOBN(0xbd371749, 0xa2c989d5), TOBN(0x928292df, 0xe1f5e78e), + TOBN(0x493b383e, 0x0a1ea6da), TOBN(0x5136fd8d, 0x13aee529), + TOBN(0x860c44b1, 0xf2c34a99), TOBN(0x3b00aca4, 0xbf5855ac), + TOBN(0xabf6aaa0, 0xfaaf37be), TOBN(0x65f43682, 0x2a53ec08), + TOBN(0x1d9a5801, 0xa11b12e1), TOBN(0x78a7ab2c, 0xe20ed475), + TOBN(0x0de1067e, 0x9a41e0d5), TOBN(0x30473f5f, 0x305023ea), + TOBN(0xdd3ae09d, 0x169c7d97), TOBN(0x5cd5baa4, 0xcfaef9cd), + TOBN(0x5cd7440b, 0x65a44803), TOBN(0xdc13966a, 0x47f364de), + TOBN(0x077b2be8, 0x2b8357c1), TOBN(0x0cb1b4c5, 0xe9d57c2a), + TOBN(0x7a4ceb32, 0x05ff363e), TOBN(0xf310fa4d, 0xca35a9ef), + TOBN(0xdbb7b352, 0xf97f68c6), TOBN(0x0c773b50, 0x0b02cf58), + TOBN(0xea2e4821, 0x3c1f96d9), TOBN(0xffb357b0, 0xeee01815), + TOBN(0xb9c924cd, 0xe0f28039), TOBN(0x0b36c95a, 0x46a3fbe4), + TOBN(0x1faaaea4, 0x5e46db6c), TOBN(0xcae575c3, 0x1928aaff), + TOBN(0x7f671302, 0xa70dab86), TOBN(0xfcbd12a9, 0x71c58cfc), + TOBN(0xcbef9acf, 0xbee0cb92), TOBN(0x573da0b9, 0xf8c1b583), + TOBN(0x4752fcfe, 0x0d41d550), TOBN(0xe7eec0e3, 0x2155cffe), + TOBN(0x0fc39fcb, 0x545ae248), TOBN(0x522cb8d1, 0x8065f44e), + TOBN(0x263c962a, 0x70cbb96c), TOBN(0xe034362a, 0xbcd124a9), + TOBN(0xf120db28, 0x3c2ae58d), TOBN(0xb9a38d49, 0xfef6d507), + TOBN(0xb1fd2a82, 0x1ff140fd), TOBN(0xbd162f30, 0x20aee7e0), + TOBN(0x4e17a5d4, 0xcb251949), TOBN(0x2aebcb83, 0x4f7e1c3d), + TOBN(0x608eb25f, 0x937b0527), TOBN(0xf42e1e47, 0xeb7d9997), + TOBN(0xeba699c4, 0xb8a53a29), TOBN(0x1f921c71, 0xe091b536), + TOBN(0xcce29e7b, 0x5b26bbd5), TOBN(0x7a8ef5ed, 0x3b61a680), + TOBN(0xe5ef8043, 0xba1f1c7e), TOBN(0x16ea8217, 0x18158dda), + TOBN(0x01778a2b, 0x599ff0f9), TOBN(0x68a923d7, 0x8104fc6b), + TOBN(0x5bfa44df, 0xda694ff3), TOBN(0x4f7199db, 0xf7667f12), + TOBN(0xc06d8ff6, 0xe46f2a79), TOBN(0x08b5dead, 0xe9f8131d), + TOBN(0x02519a59, 0xabb4ce7c), TOBN(0xc4f710bc, 0xb42aec3e), + TOBN(0x3d77b057, 0x78bde41a), TOBN(0x6474bf80, 0xb4186b5a), + TOBN(0x048b3f67, 0x88c65741), TOBN(0xc64519de, 0x03c7c154), + TOBN(0xdf073846, 0x0edfcc4f), TOBN(0x319aa737, 0x48f1aa6b), + TOBN(0x8b9f8a02, 0xca909f77), TOBN(0x90258139, 0x7580bfef), + TOBN(0xd8bfd3ca, 0xc0c22719), TOBN(0xc60209e4, 0xc9ca151e), + TOBN(0x7a744ab5, 0xd9a1a69c), TOBN(0x6de5048b, 0x14937f8f), + TOBN(0x171938d8, 0xe115ac04), TOBN(0x7df70940, 0x1c6b16d2), + TOBN(0xa6aeb663, 0x7f8e94e7), TOBN(0xc130388e, 0x2a2cf094), + TOBN(0x1850be84, 0x77f54e6e), TOBN(0x9f258a72, 0x65d60fe5), + TOBN(0xff7ff0c0, 0x6c9146d6), TOBN(0x039aaf90, 0xe63a830b), + TOBN(0x38f27a73, 0x9460342f), TOBN(0x4703148c, 0x3f795f8a), + TOBN(0x1bb5467b, 0x9681a97e), TOBN(0x00931ba5, 0xecaeb594), + TOBN(0xcdb6719d, 0x786f337c), TOBN(0xd9c01cd2, 0xe704397d), + TOBN(0x0f4a3f20, 0x555c2fef), TOBN(0x00452509, 0x7c0af223), + TOBN(0x54a58047, 0x84db8e76), TOBN(0x3bacf1aa, 0x93c8aa06), + TOBN(0x11ca957c, 0xf7919422), TOBN(0x50641053, 0x78cdaa40), + TOBN(0x7a303874, 0x9f7144ae), TOBN(0x170c963f, 0x43d4acfd), + TOBN(0x5e148149, 0x58ddd3ef), TOBN(0xa7bde582, 0x9e72dba8), + TOBN(0x0769da8b, 0x6fa68750), TOBN(0xfa64e532, 0x572e0249), + TOBN(0xfcaadf9d, 0x2619ad31), TOBN(0x87882daa, 0xa7b349cd), + TOBN(0x9f6eb731, 0x6c67a775), TOBN(0xcb10471a, 0xefc5d0b1), + TOBN(0xb433750c, 0xe1b806b2), TOBN(0x19c5714d, 0x57b1ae7e), + TOBN(0xc0dc8b7b, 0xed03fd3f), TOBN(0xdd03344f, 0x31bc194e), + TOBN(0xa66c52a7, 0x8c6320b5), TOBN(0x8bc82ce3, 0xd0b6fd93), + TOBN(0xf8e13501, 0xb35f1341), TOBN(0xe53156dd, 0x25a43e42), + TOBN(0xd3adf27e, 0x4daeb85c), TOBN(0xb81d8379, 0xbbeddeb5), + TOBN(0x1b0b546e, 0x2e435867), TOBN(0x9020eb94, 0xeba5dd60), + TOBN(0x37d91161, 0x8210cb9d), TOBN(0x4c596b31, 0x5c91f1cf), + TOBN(0xb228a90f, 0x0e0b040d), TOBN(0xbaf02d82, 0x45ff897f), + TOBN(0x2aac79e6, 0x00fa6122), TOBN(0x24828817, 0x8e36f557), + TOBN(0xb9521d31, 0x113ec356), TOBN(0x9e48861e, 0x15eff1f8), + TOBN(0x2aa1d412, 0xe0d41715), TOBN(0x71f86203, 0x53f131b8), + TOBN(0xf60da8da, 0x3fd19408), TOBN(0x4aa716dc, 0x278d9d99), + TOBN(0x394531f7, 0xa8c51c90), TOBN(0xb560b0e8, 0xf59db51c), + TOBN(0xa28fc992, 0xfa34bdad), TOBN(0xf024fa14, 0x9cd4f8bd), + TOBN(0x5cf530f7, 0x23a9d0d3), TOBN(0x615ca193, 0xe28c9b56), + TOBN(0x6d2a483d, 0x6f73c51e), TOBN(0xa4cb2412, 0xea0dc2dd), + TOBN(0x50663c41, 0x1eb917ff), TOBN(0x3d3a74cf, 0xeade299e), + TOBN(0x29b3990f, 0x4a7a9202), TOBN(0xa9bccf59, 0xa7b15c3d), + TOBN(0x66a3ccdc, 0xa5df9208), TOBN(0x48027c14, 0x43f2f929), + TOBN(0xd385377c, 0x40b557f0), TOBN(0xe001c366, 0xcd684660), + TOBN(0x1b18ed6b, 0xe2183a27), TOBN(0x879738d8, 0x63210329), + TOBN(0xa687c74b, 0xbda94882), TOBN(0xd1bbcc48, 0xa684b299), + TOBN(0xaf6f1112, 0x863b3724), TOBN(0x6943d1b4, 0x2c8ce9f8), + TOBN(0xe044a3bb, 0x098cafb4), TOBN(0x27ed2310, 0x60d48caf), + TOBN(0x542b5675, 0x3a31b84d), TOBN(0xcbf3dd50, 0xfcddbed7), + TOBN(0x25031f16, 0x41b1d830), TOBN(0xa7ec851d, 0xcb0c1e27), + TOBN(0xac1c8fe0, 0xb5ae75db), TOBN(0xb24c7557, 0x08c52120), + TOBN(0x57f811dc, 0x1d4636c3), TOBN(0xf8436526, 0x681a9939), + TOBN(0x1f6bc6d9, 0x9c81adb3), TOBN(0x840f8ac3, 0x5b7d80d4), + TOBN(0x731a9811, 0xf4387f1a), TOBN(0x7c501cd3, 0xb5156880), + TOBN(0xa5ca4a07, 0xdfe68867), TOBN(0xf123d8f0, 0x5fcea120), + TOBN(0x1fbb0e71, 0xd607039e), TOBN(0x2b70e215, 0xcd3a4546), + TOBN(0x32d2f01d, 0x53324091), TOBN(0xb796ff08, 0x180ab19b), + TOBN(0x32d87a86, 0x3c57c4aa), TOBN(0x2aed9caf, 0xb7c49a27), + TOBN(0x9fb35eac, 0x31630d98), TOBN(0x338e8cdf, 0x5c3e20a3), + TOBN(0x80f16182, 0x66cde8db), TOBN(0x4e159980, 0x2d72fd36), + TOBN(0xd7b8f13b, 0x9b6e5072), TOBN(0xf5213907, 0x3b7b5dc1), + TOBN(0x4d431f1d, 0x8ce4396e), TOBN(0x37a1a680, 0xa7ed2142), + TOBN(0xbf375696, 0xd01aaf6b), TOBN(0xaa1c0c54, 0xe63aab66), + TOBN(0x3014368b, 0x4ed80940), TOBN(0x67e6d056, 0x7a6fcedd), + TOBN(0x7c208c49, 0xca97579f), TOBN(0xfe3d7a81, 0xa23597f6), + TOBN(0x5e203202, 0x7e096ae2), TOBN(0xb1f3e1e7, 0x24b39366), + TOBN(0x26da26f3, 0x2fdcdffc), TOBN(0x79422f1d, 0x6097be83),} + , + {TOBN(0x263a2cfb, 0x9db3b381), TOBN(0x9c3a2dee, 0xd4df0a4b), + TOBN(0x728d06e9, 0x7d04e61f), TOBN(0x8b1adfbc, 0x42449325), + TOBN(0x6ec1d939, 0x7e053a1b), TOBN(0xee2be5c7, 0x66daf707), + TOBN(0x80ba1e14, 0x810ac7ab), TOBN(0xdd2ae778, 0xf530f174), + TOBN(0x0435d97a, 0x205b9d8b), TOBN(0x6eb8f064, 0x056756d4), + TOBN(0xd5e88a8b, 0xb6f8210e), TOBN(0x070ef12d, 0xec9fd9ea), + TOBN(0x4d849505, 0x3bcc876a), TOBN(0x12a75338, 0xa7404ce3), + TOBN(0xd22b49e1, 0xb8a1db5e), TOBN(0xec1f2051, 0x14bfa5ad), + TOBN(0xadbaeb79, 0xb6828f36), TOBN(0x9d7a0258, 0x01bd5b9e), + TOBN(0xeda01e0d, 0x1e844b0c), TOBN(0x4b625175, 0x887edfc9), + TOBN(0x14109fdd, 0x9669b621), TOBN(0x88a2ca56, 0xf6f87b98), + TOBN(0xfe2eb788, 0x170df6bc), TOBN(0x0cea06f4, 0xffa473f9), + TOBN(0x43ed81b5, 0xc4e83d33), TOBN(0xd9f35879, 0x5efd488b), + TOBN(0x164a620f, 0x9deb4d0f), TOBN(0xc6927bdb, 0xac6a7394), + TOBN(0x45c28df7, 0x9f9e0f03), TOBN(0x2868661e, 0xfcd7e1a9), + TOBN(0x7cf4e8d0, 0xffa348f1), TOBN(0x6bd4c284, 0x398538e0), + TOBN(0x2618a091, 0x289a8619), TOBN(0xef796e60, 0x6671b173), + TOBN(0x664e46e5, 0x9090c632), TOBN(0xa38062d4, 0x1e66f8fb), + TOBN(0x6c744a20, 0x0573274e), TOBN(0xd07b67e4, 0xa9271394), + TOBN(0x391223b2, 0x6bdc0e20), TOBN(0xbe2d93f1, 0xeb0a05a7), + TOBN(0xf23e2e53, 0x3f36d141), TOBN(0xe84bb3d4, 0x4dfca442), + TOBN(0xb804a48d, 0x6b7c023a), TOBN(0x1e16a8fa, 0x76431c3b), + TOBN(0x1b5452ad, 0xddd472e0), TOBN(0x7d405ee7, 0x0d1ee127), + TOBN(0x50fc6f1d, 0xffa27599), TOBN(0x351ac53c, 0xbf391b35), + TOBN(0x7efa14b8, 0x4444896b), TOBN(0x64974d2f, 0xf94027fb), + TOBN(0xefdcd0e8, 0xde84487d), TOBN(0x8c45b260, 0x2b48989b), + TOBN(0xa8fcbbc2, 0xd8463487), TOBN(0xd1b2b3f7, 0x3fbc476c), + TOBN(0x21d005b7, 0xc8f443c0), TOBN(0x518f2e67, 0x40c0139c), + TOBN(0x56036e8c, 0x06d75fc1), TOBN(0x2dcf7bb7, 0x3249a89f), + TOBN(0x81dd1d3d, 0xe245e7dd), TOBN(0xf578dc4b, 0xebd6e2a7), + TOBN(0x4c028903, 0xdf2ce7a0), TOBN(0xaee36288, 0x9c39afac), + TOBN(0xdc847c31, 0x146404ab), TOBN(0x6304c0d8, 0xa4e97818), + TOBN(0xae51dca2, 0xa91f6791), TOBN(0x2abe4190, 0x9baa9efc), + TOBN(0xd9d2e2f4, 0x559c7ac1), TOBN(0xe82f4b51, 0xfc9f773a), + TOBN(0xa7713027, 0x4073e81c), TOBN(0xc0276fac, 0xfbb596fc), + TOBN(0x1d819fc9, 0xa684f70c), TOBN(0x29b47fdd, 0xc9f7b1e0), + TOBN(0x358de103, 0x459b1940), TOBN(0xec881c59, 0x5b013e93), + TOBN(0x51574c93, 0x49532ad3), TOBN(0x2db1d445, 0xb37b46de), + TOBN(0xc6445b87, 0xdf239fd8), TOBN(0xc718af75, 0x151d24ee), + TOBN(0xaea1c4a4, 0xf43c6259), TOBN(0x40c0e5d7, 0x70be02f7), + TOBN(0x6a4590f4, 0x721b33f2), TOBN(0x2124f1fb, 0xfedf04ea), + TOBN(0xf8e53cde, 0x9745efe7), TOBN(0xe7e10432, 0x65f046d9), + TOBN(0xc3fca28e, 0xe4d0c7e6), TOBN(0x847e339a, 0x87253b1b), + TOBN(0x9b595348, 0x3743e643), TOBN(0xcb6a0a0b, 0x4fd12fc5), + TOBN(0xfb6836c3, 0x27d02dcc), TOBN(0x5ad00982, 0x7a68bcc2), + TOBN(0x1b24b44c, 0x005e912d), TOBN(0xcc83d20f, 0x811fdcfe), + TOBN(0x36527ec1, 0x666fba0c), TOBN(0x69948197, 0x14754635), + TOBN(0xfcdcb1a8, 0x556da9c2), TOBN(0xa5934267, 0x81a732b2), + TOBN(0xec1214ed, 0xa714181d), TOBN(0x609ac13b, 0x6067b341), + TOBN(0xff4b4c97, 0xa545df1f), TOBN(0xa1240501, 0x34d2076b), + TOBN(0x6efa0c23, 0x1409ca97), TOBN(0x254cc1a8, 0x20638c43), + TOBN(0xd4e363af, 0xdcfb46cd), TOBN(0x62c2adc3, 0x03942a27), + TOBN(0xc67b9df0, 0x56e46483), TOBN(0xa55abb20, 0x63736356), + TOBN(0xab93c098, 0xc551bc52), TOBN(0x382b49f9, 0xb15fe64b), + TOBN(0x9ec221ad, 0x4dff8d47), TOBN(0x79caf615, 0x437df4d6), + TOBN(0x5f13dc64, 0xbb456509), TOBN(0xe4c589d9, 0x191f0714), + TOBN(0x27b6a8ab, 0x3fd40e09), TOBN(0xe455842e, 0x77313ea9), + TOBN(0x8b51d1e2, 0x1f55988b), TOBN(0x5716dd73, 0x062bbbfc), + TOBN(0x633c11e5, 0x4e8bf3de), TOBN(0x9a0e77b6, 0x1b85be3b), + TOBN(0x56510729, 0x0911cca6), TOBN(0x27e76495, 0xefa6590f), + TOBN(0xe4ac8b33, 0x070d3aab), TOBN(0x2643672b, 0x9a2cd5e5), + TOBN(0x52eff79b, 0x1cfc9173), TOBN(0x665ca49b, 0x90a7c13f), + TOBN(0x5a8dda59, 0xb3efb998), TOBN(0x8a5b922d, 0x052f1341), + TOBN(0xae9ebbab, 0x3cf9a530), TOBN(0x35986e7b, 0xf56da4d7), + TOBN(0x3a636b5c, 0xff3513cc), TOBN(0xbb0cf8ba, 0x3198f7dd), + TOBN(0xb8d40522, 0x41f16f86), TOBN(0x760575d8, 0xde13a7bf), + TOBN(0x36f74e16, 0x9f7aa181), TOBN(0x163a3ecf, 0xf509ed1c), + TOBN(0x6aead61f, 0x3c40a491), TOBN(0x158c95fc, 0xdfe8fcaa), + TOBN(0xa3991b6e, 0x13cda46f), TOBN(0x79482415, 0x342faed0), + TOBN(0xf3ba5bde, 0x666b5970), TOBN(0x1d52e6bc, 0xb26ab6dd), + TOBN(0x768ba1e7, 0x8608dd3d), TOBN(0x4930db2a, 0xea076586), + TOBN(0xd9575714, 0xe7dc1afa), TOBN(0x1fc7bf7d, 0xf7c58817), + TOBN(0x6b47accd, 0xd9eee96c), TOBN(0x0ca277fb, 0xe58cec37), + TOBN(0x113fe413, 0xe702c42a), TOBN(0xdd1764ee, 0xc47cbe51), + TOBN(0x041e7cde, 0x7b3ed739), TOBN(0x50cb7459, 0x5ce9e1c0), + TOBN(0x35568513, 0x2925b212), TOBN(0x7cff95c4, 0x001b081c), + TOBN(0x63ee4cbd, 0x8088b454), TOBN(0xdb7f32f7, 0x9a9e0c8a), + TOBN(0xb377d418, 0x6b2447cb), TOBN(0xe3e982aa, 0xd370219b), + TOBN(0x06ccc1e4, 0xc2a2a593), TOBN(0x72c36865, 0x0773f24f), + TOBN(0xa13b4da7, 0x95859423), TOBN(0x8bbf1d33, 0x75040c8f), + TOBN(0x726f0973, 0xda50c991), TOBN(0x48afcd5b, 0x822d6ee2), + TOBN(0xe5fc718b, 0x20fd7771), TOBN(0xb9e8e77d, 0xfd0807a1), + TOBN(0x7f5e0f44, 0x99a7703d), TOBN(0x6972930e, 0x618e36f3), + TOBN(0x2b7c77b8, 0x23807bbe), TOBN(0xe5b82405, 0xcb27ff50), + TOBN(0xba8b8be3, 0xbd379062), TOBN(0xd64b7a1d, 0x2dce4a92), + TOBN(0x040a73c5, 0xb2952e37), TOBN(0x0a9e252e, 0xd438aeca), + TOBN(0xdd43956b, 0xc39d3bcb), TOBN(0x1a31ca00, 0xb32b2d63), + TOBN(0xd67133b8, 0x5c417a18), TOBN(0xd08e4790, 0x2ef442c8), + TOBN(0x98cb1ae9, 0x255c0980), TOBN(0x4bd86381, 0x2b4a739f), + TOBN(0x5a5c31e1, 0x1e4a45a1), TOBN(0x1e5d55fe, 0x9cb0db2f), + TOBN(0x74661b06, 0x8ff5cc29), TOBN(0x026b389f, 0x0eb8a4f4), + TOBN(0x536b21a4, 0x58848c24), TOBN(0x2e5bf8ec, 0x81dc72b0), + TOBN(0x03c187d0, 0xad886aac), TOBN(0x5c16878a, 0xb771b645), + TOBN(0xb07dfc6f, 0xc74045ab), TOBN(0x2c6360bf, 0x7800caed), + TOBN(0x24295bb5, 0xb9c972a3), TOBN(0xc9e6f88e, 0x7c9a6dba), + TOBN(0x90ffbf24, 0x92a79aa6), TOBN(0xde29d50a, 0x41c26ac2), + TOBN(0x9f0af483, 0xd309cbe6), TOBN(0x5b020d8a, 0xe0bced4f), + TOBN(0x606e986d, 0xb38023e3), TOBN(0xad8f2c9d, 0x1abc6933), + TOBN(0x19292e1d, 0xe7400e93), TOBN(0xfe3e18a9, 0x52be5e4d), + TOBN(0xe8e9771d, 0x2e0680bf), TOBN(0x8c5bec98, 0xc54db063), + TOBN(0x2af9662a, 0x74a55d1f), TOBN(0xe3fbf28f, 0x046f66d8), + TOBN(0xa3a72ab4, 0xd4dc4794), TOBN(0x09779f45, 0x5c7c2dd8), + TOBN(0xd893bdaf, 0xc3d19d8d), TOBN(0xd5a75094, 0x57d6a6df), + TOBN(0x8cf8fef9, 0x952e6255), TOBN(0x3da67cfb, 0xda9a8aff), + TOBN(0x4c23f62a, 0x2c160dcd), TOBN(0x34e6c5e3, 0x8f90eaef), + TOBN(0x35865519, 0xa9a65d5a), TOBN(0x07c48aae, 0x8fd38a3d), + TOBN(0xb7e7aeda, 0x50068527), TOBN(0x2c09ef23, 0x1c90936a), + TOBN(0x31ecfeb6, 0xe879324c), TOBN(0xa0871f6b, 0xfb0ec938), + TOBN(0xb1f0fb68, 0xd84d835d), TOBN(0xc90caf39, 0x861dc1e6), + TOBN(0x12e5b046, 0x7594f8d7), TOBN(0x26897ae2, 0x65012b92), + TOBN(0xbcf68a08, 0xa4d6755d), TOBN(0x403ee41c, 0x0991fbda), + TOBN(0x733e343e, 0x3bbf17e8), TOBN(0xd2c7980d, 0x679b3d65), + TOBN(0x33056232, 0xd2e11305), TOBN(0x966be492, 0xf3c07a6f), + TOBN(0x6a8878ff, 0xbb15509d), TOBN(0xff221101, 0x0a9b59a4), + TOBN(0x6c9f564a, 0xabe30129), TOBN(0xc6f2c940, 0x336e64cf), + TOBN(0x0fe75262, 0x8b0c8022), TOBN(0xbe0267e9, 0x6ae8db87), + TOBN(0x22e192f1, 0x93bc042b), TOBN(0xf085b534, 0xb237c458), + TOBN(0xa0d192bd, 0x832c4168), TOBN(0x7a76e9e3, 0xbdf6271d), + TOBN(0x52a882fa, 0xb88911b5), TOBN(0xc85345e4, 0xb4db0eb5), + TOBN(0xa3be02a6, 0x81a7c3ff), TOBN(0x51889c8c, 0xf0ec0469), + TOBN(0x9d031369, 0xa5e829e5), TOBN(0xcbb4c6fc, 0x1607aa41), + TOBN(0x75ac59a6, 0x241d84c1), TOBN(0xc043f2bf, 0x8829e0ee), + TOBN(0x82a38f75, 0x8ea5e185), TOBN(0x8bda40b9, 0xd87cbd9f), + TOBN(0x9e65e75e, 0x2d8fc601), TOBN(0x3d515f74, 0xa35690b3), + TOBN(0x534acf4f, 0xda79e5ac), TOBN(0x68b83b3a, 0x8630215f), + TOBN(0x5c748b2e, 0xd085756e), TOBN(0xb0317258, 0xe5d37cb2), + TOBN(0x6735841a, 0xc5ccc2c4), TOBN(0x7d7dc96b, 0x3d9d5069), + TOBN(0xa147e410, 0xfd1754bd), TOBN(0x65296e94, 0xd399ddd5), + TOBN(0xf6b5b2d0, 0xbc8fa5bc), TOBN(0x8a5ead67, 0x500c277b), + TOBN(0x214625e6, 0xdfa08a5d), TOBN(0x51fdfedc, 0x959cf047), + TOBN(0x6bc9430b, 0x289fca32), TOBN(0xe36ff0cf, 0x9d9bdc3f), + TOBN(0x2fe187cb, 0x58ea0ede), TOBN(0xed66af20, 0x5a900b3f), + TOBN(0x00e0968b, 0x5fa9f4d6), TOBN(0x2d4066ce, 0x37a362e7), + TOBN(0xa99a9748, 0xbd07e772), TOBN(0x710989c0, 0x06a4f1d0), + TOBN(0xd5dedf35, 0xce40cbd8), TOBN(0xab55c5f0, 0x1743293d), + TOBN(0x766f1144, 0x8aa24e2c), TOBN(0x94d874f8, 0x605fbcb4), + TOBN(0xa365f0e8, 0xa518001b), TOBN(0xee605eb6, 0x9d04ef0f), + TOBN(0x5a3915cd, 0xba8d4d25), TOBN(0x44c0e1b8, 0xb5113472), + TOBN(0xcbb024e8, 0x8b6740dc), TOBN(0x89087a53, 0xee1d4f0c), + TOBN(0xa88fa05c, 0x1fc4e372), TOBN(0x8bf395cb, 0xaf8b3af2), + TOBN(0x1e71c9a1, 0xdeb8568b), TOBN(0xa35daea0, 0x80fb3d32), + TOBN(0xe8b6f266, 0x2cf8fb81), TOBN(0x6d51afe8, 0x9490696a), + TOBN(0x81beac6e, 0x51803a19), TOBN(0xe3d24b7f, 0x86219080), + TOBN(0x727cfd9d, 0xdf6f463c), TOBN(0x8c6865ca, 0x72284ee8), + TOBN(0x32c88b7d, 0xb743f4ef), TOBN(0x3793909b, 0xe7d11dce), + TOBN(0xd398f922, 0x2ff2ebe8), TOBN(0x2c70ca44, 0xe5e49796), + TOBN(0xdf4d9929, 0xcb1131b1), TOBN(0x7826f298, 0x25888e79), + TOBN(0x4d3a112c, 0xf1d8740a), TOBN(0x00384cb6, 0x270afa8b), + TOBN(0xcb64125b, 0x3ab48095), TOBN(0x3451c256, 0x62d05106), + TOBN(0xd73d577d, 0xa4955845), TOBN(0x39570c16, 0xbf9f4433), + TOBN(0xd7dfaad3, 0xadecf263), TOBN(0xf1c3d8d1, 0xdc76e102), + TOBN(0x5e774a58, 0x54c6a836), TOBN(0xdad4b672, 0x3e92d47b), + TOBN(0xbe7e990f, 0xf0d796a0), TOBN(0x5fc62478, 0xdf0e8b02), + TOBN(0x8aae8bf4, 0x030c00ad), TOBN(0x3d2db93b, 0x9004ba0f), + TOBN(0xe48c8a79, 0xd85d5ddc), TOBN(0xe907caa7, 0x6bb07f34), + TOBN(0x58db343a, 0xa39eaed5), TOBN(0x0ea6e007, 0xadaf5724), + TOBN(0xe00df169, 0xd23233f3), TOBN(0x3e322796, 0x77cb637f), + TOBN(0x1f897c0e, 0x1da0cf6c), TOBN(0xa651f5d8, 0x31d6bbdd), + TOBN(0xdd61af19, 0x1a230c76), TOBN(0xbd527272, 0xcdaa5e4a), + TOBN(0xca753636, 0xd0abcd7e), TOBN(0x78bdd37c, 0x370bd8dc), + TOBN(0xc23916c2, 0x17cd93fe), TOBN(0x65b97a4d, 0xdadce6e2), + TOBN(0xe04ed4eb, 0x174e42f8), TOBN(0x1491ccaa, 0xbb21480a), + TOBN(0x145a8280, 0x23196332), TOBN(0x3c3862d7, 0x587b479a), + TOBN(0x9f4a88a3, 0x01dcd0ed), TOBN(0x4da2b7ef, 0x3ea12f1f), + TOBN(0xf8e7ae33, 0xb126e48e), TOBN(0x404a0b32, 0xf494e237), + TOBN(0x9beac474, 0xc55acadb), TOBN(0x4ee5cf3b, 0xcbec9fd9), + TOBN(0x336b33b9, 0x7df3c8c3), TOBN(0xbd905fe3, 0xb76808fd), + TOBN(0x8f436981, 0xaa45c16a), TOBN(0x255c5bfa, 0x3dd27b62), + TOBN(0x71965cbf, 0xc3dd9b4d), TOBN(0xce23edbf, 0xfc068a87), + TOBN(0xb78d4725, 0x745b029b), TOBN(0x74610713, 0xcefdd9bd), + TOBN(0x7116f75f, 0x1266bf52), TOBN(0x02046722, 0x18e49bb6), + TOBN(0xdf43df9f, 0x3d6f19e3), TOBN(0xef1bc7d0, 0xe685cb2f), + TOBN(0xcddb27c1, 0x7078c432), TOBN(0xe1961b9c, 0xb77fedb7), + TOBN(0x1edc2f5c, 0xc2290570), TOBN(0x2c3fefca, 0x19cbd886), + TOBN(0xcf880a36, 0xc2af389a), TOBN(0x96c610fd, 0xbda71cea), + TOBN(0xf03977a9, 0x32aa8463), TOBN(0x8eb7763f, 0x8586d90a), + TOBN(0x3f342454, 0x2a296e77), TOBN(0xc8718683, 0x42837a35), + TOBN(0x7dc71090, 0x6a09c731), TOBN(0x54778ffb, 0x51b816db), + TOBN(0x6b33bfec, 0xaf06defd), TOBN(0xfe3c105f, 0x8592b70b), + TOBN(0xf937fda4, 0x61da6114), TOBN(0x3c13e651, 0x4c266ad7), + TOBN(0xe363a829, 0x855938e8), TOBN(0x2eeb5d9e, 0x9de54b72), + TOBN(0xbeb93b0e, 0x20ccfab9), TOBN(0x3dffbb5f, 0x25e61a25), + TOBN(0x7f655e43, 0x1acc093d), TOBN(0x0cb6cc3d, 0x3964ce61), + TOBN(0x6ab283a1, 0xe5e9b460), TOBN(0x55d787c5, 0xa1c7e72d), + TOBN(0x4d2efd47, 0xdeadbf02), TOBN(0x11e80219, 0xac459068), + TOBN(0x810c7626, 0x71f311f0), TOBN(0xfa17ef8d, 0x4ab6ef53), + TOBN(0xaf47fd25, 0x93e43bff), TOBN(0x5cb5ff3f, 0x0be40632), + TOBN(0x54687106, 0x8ee61da3), TOBN(0x7764196e, 0xb08afd0f), + TOBN(0x831ab3ed, 0xf0290a8f), TOBN(0xcae81966, 0xcb47c387), + TOBN(0xaad7dece, 0x184efb4f), TOBN(0xdcfc53b3, 0x4749110e), + TOBN(0x6698f23c, 0x4cb632f9), TOBN(0xc42a1ad6, 0xb91f8067), + TOBN(0xb116a81d, 0x6284180a), TOBN(0xebedf5f8, 0xe901326f), + TOBN(0xf2274c9f, 0x97e3e044), TOBN(0x42018520, 0x11d09fc9), + TOBN(0x56a65f17, 0xd18e6e23), TOBN(0x2ea61e2a, 0x352b683c), + TOBN(0x27d291bc, 0x575eaa94), TOBN(0x9e7bc721, 0xb8ff522d), + TOBN(0x5f7268bf, 0xa7f04d6f), TOBN(0x5868c73f, 0xaba41748), + TOBN(0x9f85c2db, 0x7be0eead), TOBN(0x511e7842, 0xff719135), + TOBN(0x5a06b1e9, 0xc5ea90d7), TOBN(0x0c19e283, 0x26fab631), + TOBN(0x8af8f0cf, 0xe9206c55), TOBN(0x89389cb4, 0x3553c06a), + TOBN(0x39dbed97, 0xf65f8004), TOBN(0x0621b037, 0xc508991d), + TOBN(0x1c52e635, 0x96e78cc4), TOBN(0x5385c8b2, 0x0c06b4a8), + TOBN(0xd84ddfdb, 0xb0e87d03), TOBN(0xc49dfb66, 0x934bafad), + TOBN(0x7071e170, 0x59f70772), TOBN(0x3a073a84, 0x3a1db56b), + TOBN(0x03494903, 0x3b8af190), TOBN(0x7d882de3, 0xd32920f0), + TOBN(0x91633f0a, 0xb2cf8940), TOBN(0x72b0b178, 0x6f948f51), + TOBN(0x2d28dc30, 0x782653c8), TOBN(0x88829849, 0xdb903a05), + TOBN(0xb8095d0c, 0x6a19d2bb), TOBN(0x4b9e7f0c, 0x86f782cb), + TOBN(0x7af73988, 0x2d907064), TOBN(0xd12be0fe, 0x8b32643c), + TOBN(0x358ed23d, 0x0e165dc3), TOBN(0x3d47ce62, 0x4e2378ce), + TOBN(0x7e2bb0b9, 0xfeb8a087), TOBN(0x3246e8ae, 0xe29e10b9), + TOBN(0x459f4ec7, 0x03ce2b4d), TOBN(0xe9b4ca1b, 0xbbc077cf), + TOBN(0x2613b4f2, 0x0e9940c1), TOBN(0xfc598bb9, 0x047d1eb1), + TOBN(0x9744c62b, 0x45036099), TOBN(0xa9dee742, 0x167c65d8), + TOBN(0x0c511525, 0xdabe1943), TOBN(0xda110554, 0x93c6c624), + TOBN(0xae00a52c, 0x651a3be2), TOBN(0xcda5111d, 0x884449a6), + TOBN(0x063c06f4, 0xff33bed1), TOBN(0x73baaf9a, 0x0d3d76b4), + TOBN(0x52fb0c9d, 0x7fc63668), TOBN(0x6886c9dd, 0x0c039cde), + TOBN(0x602bd599, 0x55b22351), TOBN(0xb00cab02, 0x360c7c13), + TOBN(0x8cb616bc, 0x81b69442), TOBN(0x41486700, 0xb55c3cee), + TOBN(0x71093281, 0xf49ba278), TOBN(0xad956d9c, 0x64a50710), + TOBN(0x9561f28b, 0x638a7e81), TOBN(0x54155cdf, 0x5980ddc3), + TOBN(0xb2db4a96, 0xd26f247a), TOBN(0x9d774e4e, 0x4787d100), + TOBN(0x1a9e6e2e, 0x078637d2), TOBN(0x1c363e2d, 0x5e0ae06a), + TOBN(0x7493483e, 0xe9cfa354), TOBN(0x76843cb3, 0x7f74b98d), + TOBN(0xbaca6591, 0xd4b66947), TOBN(0xb452ce98, 0x04460a8c), + TOBN(0x6830d246, 0x43768f55), TOBN(0xf4197ed8, 0x7dff12df), + TOBN(0x6521b472, 0x400dd0f7), TOBN(0x59f5ca8f, 0x4b1e7093), + TOBN(0x6feff11b, 0x080338ae), TOBN(0x0ada31f6, 0xa29ca3c6), + TOBN(0x24794eb6, 0x94a2c215), TOBN(0xd83a43ab, 0x05a57ab4), + TOBN(0x264a543a, 0x2a6f89fe), TOBN(0x2c2a3868, 0xdd5ec7c2), + TOBN(0xd3373940, 0x8439d9b2), TOBN(0x715ea672, 0x0acd1f11), + TOBN(0x42c1d235, 0xe7e6cc19), TOBN(0x81ce6e96, 0xb990585c), + TOBN(0x04e5dfe0, 0xd809c7bd), TOBN(0xd7b2580c, 0x8f1050ab), + TOBN(0x6d91ad78, 0xd8a4176f), TOBN(0x0af556ee, 0x4e2e897c), + TOBN(0x162a8b73, 0x921de0ac), TOBN(0x52ac9c22, 0x7ea78400), + TOBN(0xee2a4eea, 0xefce2174), TOBN(0xbe61844e, 0x6d637f79), + TOBN(0x0491f1bc, 0x789a283b), TOBN(0x72d3ac3d, 0x880836f4), + TOBN(0xaa1c5ea3, 0x88e5402d), TOBN(0x1b192421, 0xd5cc473d), + TOBN(0x5c0b9998, 0x9dc84cac), TOBN(0xb0a8482d, 0x9c6e75b8), + TOBN(0x639961d0, 0x3a191ce2), TOBN(0xda3bc865, 0x6d837930), + TOBN(0xca990653, 0x056e6f8f), TOBN(0x84861c41, 0x64d133a7), + TOBN(0x8b403276, 0x746abe40), TOBN(0xb7b4d51a, 0xebf8e303), + TOBN(0x05b43211, 0x220a255d), TOBN(0xc997152c, 0x02419e6e), + TOBN(0x76ff47b6, 0x630c2fea), TOBN(0x50518677, 0x281fdade), + TOBN(0x3283b8ba, 0xcf902b0b), TOBN(0x8d4b4eb5, 0x37db303b), + TOBN(0xcc89f42d, 0x755011bc), TOBN(0xb43d74bb, 0xdd09d19b), + TOBN(0x65746bc9, 0x8adba350), TOBN(0x364eaf8c, 0xb51c1927), + TOBN(0x13c76596, 0x10ad72ec), TOBN(0x30045121, 0xf8d40c20), + TOBN(0x6d2d99b7, 0xea7b979b), TOBN(0xcd78cd74, 0xe6fb3bcd), + TOBN(0x11e45a9e, 0x86cffbfe), TOBN(0x78a61cf4, 0x637024f6), + TOBN(0xd06bc872, 0x3d502295), TOBN(0xf1376854, 0x458cb288), + TOBN(0xb9db26a1, 0x342f8586), TOBN(0xf33effcf, 0x4beee09e), + TOBN(0xd7e0c4cd, 0xb30cfb3a), TOBN(0x6d09b8c1, 0x6c9db4c8), + TOBN(0x40ba1a42, 0x07c8d9df), TOBN(0x6fd495f7, 0x1c52c66d), + TOBN(0xfb0e169f, 0x275264da), TOBN(0x80c2b746, 0xe57d8362), + TOBN(0xedd987f7, 0x49ad7222), TOBN(0xfdc229af, 0x4398ec7b),} + , + {TOBN(0xb0d1ed84, 0x52666a58), TOBN(0x4bcb6e00, 0xe6a9c3c2), + TOBN(0x3c57411c, 0x26906408), TOBN(0xcfc20755, 0x13556400), + TOBN(0xa08b1c50, 0x5294dba3), TOBN(0xa30ba286, 0x8b7dd31e), + TOBN(0xd70ba90e, 0x991eca74), TOBN(0x094e142c, 0xe762c2b9), + TOBN(0xb81d783e, 0x979f3925), TOBN(0x1efd130a, 0xaf4c89a7), + TOBN(0x525c2144, 0xfd1bf7fa), TOBN(0x4b296904, 0x1b265a9e), + TOBN(0xed8e9634, 0xb9db65b6), TOBN(0x35c82e32, 0x03599d8a), + TOBN(0xdaa7a54f, 0x403563f3), TOBN(0x9df088ad, 0x022c38ab), + TOBN(0xe5cfb066, 0xbb3fd30a), TOBN(0x429169da, 0xeff0354e), + TOBN(0x809cf852, 0x3524e36c), TOBN(0x136f4fb3, 0x0155be1d), + TOBN(0x4826af01, 0x1fbba712), TOBN(0x6ef0f0b4, 0x506ba1a1), + TOBN(0xd9928b31, 0x77aea73e), TOBN(0xe2bf6af2, 0x5eaa244e), + TOBN(0x8d084f12, 0x4237b64b), TOBN(0x688ebe99, 0xe3ecfd07), + TOBN(0x57b8a70c, 0xf6845dd8), TOBN(0x808fc59c, 0x5da4a325), + TOBN(0xa9032b2b, 0xa3585862), TOBN(0xb66825d5, 0xedf29386), + TOBN(0xb5a5a8db, 0x431ec29b), TOBN(0xbb143a98, 0x3a1e8dc8), + TOBN(0x35ee94ce, 0x12ae381b), TOBN(0x3a7f176c, 0x86ccda90), + TOBN(0xc63a657e, 0x4606eaca), TOBN(0x9ae5a380, 0x43cd04df), + TOBN(0x9bec8d15, 0xed251b46), TOBN(0x1f5d6d30, 0xcaca5e64), + TOBN(0x347b3b35, 0x9ff20f07), TOBN(0x4d65f034, 0xf7e4b286), + TOBN(0x9e93ba24, 0xf111661e), TOBN(0xedced484, 0xb105eb04), + TOBN(0x96dc9ba1, 0xf424b578), TOBN(0xbf8f66b7, 0xe83e9069), + TOBN(0x872d4df4, 0xd7ed8216), TOBN(0xbf07f377, 0x8e2cbecf), + TOBN(0x4281d899, 0x98e73754), TOBN(0xfec85fbb, 0x8aab8708), + TOBN(0x9a3c0dee, 0xa5ba5b0b), TOBN(0xe6a116ce, 0x42d05299), + TOBN(0xae9775fe, 0xe9b02d42), TOBN(0x72b05200, 0xa1545cb6), + TOBN(0xbc506f7d, 0x31a3b4ea), TOBN(0xe5893078, 0x8bbd9b32), + TOBN(0xc8bc5f37, 0xe4b12a97), TOBN(0x6b000c06, 0x4a73b671), + TOBN(0x13b5bf22, 0x765fa7d0), TOBN(0x59805bf0, 0x1d6a5370), + TOBN(0x67a5e29d, 0x4280db98), TOBN(0x4f53916f, 0x776b1ce3), + TOBN(0x714ff61f, 0x33ddf626), TOBN(0x4206238e, 0xa085d103), + TOBN(0x1c50d4b7, 0xe5809ee3), TOBN(0x999f450d, 0x85f8eb1d), + TOBN(0x658a6051, 0xe4c79e9b), TOBN(0x1394cb73, 0xc66a9fea), + TOBN(0x27f31ed5, 0xc6be7b23), TOBN(0xf4c88f36, 0x5aa6f8fe), + TOBN(0x0fb0721f, 0x4aaa499e), TOBN(0x68b3a7d5, 0xe3fb2a6b), + TOBN(0xa788097d, 0x3a92851d), TOBN(0x060e7f8a, 0xe96f4913), + TOBN(0x82eebe73, 0x1a3a93bc), TOBN(0x42bbf465, 0xa21adc1a), + TOBN(0xc10b6fa4, 0xef030efd), TOBN(0x247aa4c7, 0x87b097bb), + TOBN(0x8b8dc632, 0xf60c77da), TOBN(0x6ffbc26a, 0xc223523e), + TOBN(0xa4f6ff11, 0x344579cf), TOBN(0x5825653c, 0x980250f6), + TOBN(0xb2dd097e, 0xbc1aa2b9), TOBN(0x07889393, 0x37a0333a), + TOBN(0x1cf55e71, 0x37a0db38), TOBN(0x2648487f, 0x792c1613), + TOBN(0xdad01336, 0x3fcef261), TOBN(0x6239c81d, 0x0eabf129), + TOBN(0x8ee761de, 0x9d276be2), TOBN(0x406a7a34, 0x1eda6ad3), + TOBN(0x4bf367ba, 0x4a493b31), TOBN(0x54f20a52, 0x9bf7f026), + TOBN(0xb696e062, 0x9795914b), TOBN(0xcddab96d, 0x8bf236ac), + TOBN(0x4ff2c70a, 0xed25ea13), TOBN(0xfa1d09eb, 0x81cbbbe7), + TOBN(0x88fc8c87, 0x468544c5), TOBN(0x847a670d, 0x696b3317), + TOBN(0xf133421e, 0x64bcb626), TOBN(0xaea638c8, 0x26dee0b5), + TOBN(0xd6e7680b, 0xb310346c), TOBN(0xe06f4097, 0xd5d4ced3), + TOBN(0x09961452, 0x7512a30b), TOBN(0xf3d867fd, 0xe589a59a), + TOBN(0x2e73254f, 0x52d0c180), TOBN(0x9063d8a3, 0x333c74ac), + TOBN(0xeda6c595, 0xd314e7bc), TOBN(0x2ee7464b, 0x467899ed), + TOBN(0x1cef423c, 0x0a1ed5d3), TOBN(0x217e76ea, 0x69cc7613), + TOBN(0x27ccce1f, 0xe7cda917), TOBN(0x12d8016b, 0x8a893f16), + TOBN(0xbcd6de84, 0x9fc74f6b), TOBN(0xfa5817e2, 0xf3144e61), + TOBN(0x1f354164, 0x0821ee4c), TOBN(0x1583eab4, 0x0bc61992), + TOBN(0x7490caf6, 0x1d72879f), TOBN(0x998ad9f3, 0xf76ae7b2), + TOBN(0x1e181950, 0xa41157f7), TOBN(0xa9d7e1e6, 0xe8da3a7e), + TOBN(0x963784eb, 0x8426b95f), TOBN(0x0ee4ed6e, 0x542e2a10), + TOBN(0xb79d4cc5, 0xac751e7b), TOBN(0x93f96472, 0xfd4211bd), + TOBN(0x8c72d3d2, 0xc8de4fc6), TOBN(0x7b69cbf5, 0xdf44f064), + TOBN(0x3da90ca2, 0xf4bf94e1), TOBN(0x1a5325f8, 0xf12894e2), + TOBN(0x0a437f6c, 0x7917d60b), TOBN(0x9be70486, 0x96c9cb5d), + TOBN(0xb4d880bf, 0xe1dc5c05), TOBN(0xd738adda, 0xeebeeb57), + TOBN(0x6f0119d3, 0xdf0fe6a3), TOBN(0x5c686e55, 0x66eaaf5a), + TOBN(0x9cb10b50, 0xdfd0b7ec), TOBN(0xbdd0264b, 0x6a497c21), + TOBN(0xfc093514, 0x8c546c96), TOBN(0x58a947fa, 0x79dbf42a), + TOBN(0xc0b48d4e, 0x49ccd6d7), TOBN(0xff8fb02c, 0x88bd5580), + TOBN(0xc75235e9, 0x07d473b2), TOBN(0x4fab1ac5, 0xa2188af3), + TOBN(0x030fa3bc, 0x97576ec0), TOBN(0xe8c946e8, 0x0b7e7d2f), + TOBN(0x40a5c9cc, 0x70305600), TOBN(0x6d8260a9, 0xc8b013b4), + TOBN(0x0368304f, 0x70bba85c), TOBN(0xad090da1, 0xa4a0d311), + TOBN(0x7170e870, 0x2415eec1), TOBN(0xbfba35fe, 0x8461ea47), + TOBN(0x6279019a, 0xc1e91938), TOBN(0xa47638f3, 0x1afc415f), + TOBN(0x36c65cbb, 0xbcba0e0f), TOBN(0x02160efb, 0x034e2c48), + TOBN(0xe6c51073, 0x615cd9e4), TOBN(0x498ec047, 0xf1243c06), + TOBN(0x3e5a8809, 0xb17b3d8c), TOBN(0x5cd99e61, 0x0cc565f1), + TOBN(0x81e312df, 0x7851dafe), TOBN(0xf156f5ba, 0xa79061e2), + TOBN(0x80d62b71, 0x880c590e), TOBN(0xbec9746f, 0x0a39faa1), + TOBN(0x1d98a9c1, 0xc8ed1f7a), TOBN(0x09e43bb5, 0xa81d5ff2), + TOBN(0xd5f00f68, 0x0da0794a), TOBN(0x412050d9, 0x661aa836), + TOBN(0xa89f7c4e, 0x90747e40), TOBN(0x6dc05ebb, 0xb62a3686), + TOBN(0xdf4de847, 0x308e3353), TOBN(0x53868fbb, 0x9fb53bb9), + TOBN(0x2b09d2c3, 0xcfdcf7dd), TOBN(0x41a9fce3, 0x723fcab4), + TOBN(0x73d905f7, 0x07f57ca3), TOBN(0x080f9fb1, 0xac8e1555), + TOBN(0x7c088e84, 0x9ba7a531), TOBN(0x07d35586, 0xed9a147f), + TOBN(0x602846ab, 0xaf48c336), TOBN(0x7320fd32, 0x0ccf0e79), + TOBN(0xaa780798, 0xb18bd1ff), TOBN(0x52c2e300, 0xafdd2905), + TOBN(0xf27ea3d6, 0x434267cd), TOBN(0x8b96d16d, 0x15605b5f), + TOBN(0x7bb31049, 0x4b45706b), TOBN(0xe7f58b8e, 0x743d25f8), + TOBN(0xe9b5e45b, 0x87f30076), TOBN(0xd19448d6, 0x5d053d5a), + TOBN(0x1ecc8cb9, 0xd3210a04), TOBN(0x6bc7d463, 0xdafb5269), + TOBN(0x3e59b10a, 0x67c3489f), TOBN(0x1769788c, 0x65641e1b), + TOBN(0x8a53b82d, 0xbd6cb838), TOBN(0x7066d6e6, 0x236d5f22), + TOBN(0x03aa1c61, 0x6908536e), TOBN(0xc971da0d, 0x66ae9809), + TOBN(0x01b3a86b, 0xc49a2fac), TOBN(0x3b8420c0, 0x3092e77a), + TOBN(0x02057300, 0x7d6fb556), TOBN(0x6941b2a1, 0xbff40a87), + TOBN(0x140b6308, 0x0658ff2a), TOBN(0x87804363, 0x3424ab36), + TOBN(0x0253bd51, 0x5751e299), TOBN(0xc75bcd76, 0x449c3e3a), + TOBN(0x92eb4090, 0x7f8f875d), TOBN(0x9c9d754e, 0x56c26bbf), + TOBN(0x158cea61, 0x8110bbe7), TOBN(0x62a6b802, 0x745f91ea), + TOBN(0xa79c41aa, 0xc6e7394b), TOBN(0x445b6a83, 0xad57ef10), + TOBN(0x0c5277eb, 0x6ea6f40c), TOBN(0x319fe96b, 0x88633365), + TOBN(0x0b0fc61f, 0x385f63cb), TOBN(0x41250c84, 0x22bdd127), + TOBN(0x67d153f1, 0x09e942c2), TOBN(0x60920d08, 0xc021ad5d), + TOBN(0x229f5746, 0x724d81a5), TOBN(0xb7ffb892, 0x5bba3299), + TOBN(0x518c51a1, 0xde413032), TOBN(0x2a9bfe77, 0x3c2fd94c), + TOBN(0xcbcde239, 0x3191f4fd), TOBN(0x43093e16, 0xd3d6ada1), + TOBN(0x184579f3, 0x58769606), TOBN(0x2c94a8b3, 0xd236625c), + TOBN(0x6922b9c0, 0x5c437d8e), TOBN(0x3d4ae423, 0xd8d9f3c8), + TOBN(0xf72c31c1, 0x2e7090a2), TOBN(0x4ac3f5f3, 0xd76a55bd), + TOBN(0x342508fc, 0x6b6af991), TOBN(0x0d527100, 0x1b5cebbd), + TOBN(0xb84740d0, 0xdd440dd7), TOBN(0x748ef841, 0x780162fd), + TOBN(0xa8dbfe0e, 0xdfc6fafb), TOBN(0xeadfdf05, 0xf7300f27), + TOBN(0x7d06555f, 0xfeba4ec9), TOBN(0x12c56f83, 0x9e25fa97), + TOBN(0x77f84203, 0xd39b8c34), TOBN(0xed8b1be6, 0x3125eddb), + TOBN(0x5bbf2441, 0xf6e39dc5), TOBN(0xb00f6ee6, 0x6a5d678a), + TOBN(0xba456ecf, 0x57d0ea99), TOBN(0xdcae0f58, 0x17e06c43), + TOBN(0x01643de4, 0x0f5b4baa), TOBN(0x2c324341, 0xd161b9be), + TOBN(0x80177f55, 0xe126d468), TOBN(0xed325f1f, 0x76748e09), + TOBN(0x6116004a, 0xcfa9bdc2), TOBN(0x2d8607e6, 0x3a9fb468), + TOBN(0x0e573e27, 0x6009d660), TOBN(0x3a525d2e, 0x8d10c5a1), + TOBN(0xd26cb45c, 0x3b9009a0), TOBN(0xb6b0cdc0, 0xde9d7448), + TOBN(0x949c9976, 0xe1337c26), TOBN(0x6faadebd, 0xd73d68e5), + TOBN(0x9e158614, 0xf1b768d9), TOBN(0x22dfa557, 0x9cc4f069), + TOBN(0xccd6da17, 0xbe93c6d6), TOBN(0x24866c61, 0xa504f5b9), + TOBN(0x2121353c, 0x8d694da1), TOBN(0x1c6ca580, 0x0140b8c6), + TOBN(0xc245ad8c, 0xe964021e), TOBN(0xb83bffba, 0x032b82b3), + TOBN(0xfaa220c6, 0x47ef9898), TOBN(0x7e8d3ac6, 0x982c948a), + TOBN(0x1faa2091, 0xbc2d124a), TOBN(0xbd54c3dd, 0x05b15ff4), + TOBN(0x386bf3ab, 0xc87c6fb7), TOBN(0xfb2b0563, 0xfdeb6f66), + TOBN(0x4e77c557, 0x5b45afb4), TOBN(0xe9ded649, 0xefb8912d), + TOBN(0x7ec9bbf5, 0x42f6e557), TOBN(0x2570dfff, 0x62671f00), + TOBN(0x2b3bfb78, 0x88e084bd), TOBN(0xa024b238, 0xf37fe5b4), + TOBN(0x44e7dc04, 0x95649aee), TOBN(0x498ca255, 0x5e7ec1d8), + TOBN(0x3bc766ea, 0xaaa07e86), TOBN(0x0db6facb, 0xf3608586), + TOBN(0xbadd2549, 0xbdc259c8), TOBN(0x95af3c6e, 0x041c649f), + TOBN(0xb36a928c, 0x02e30afb), TOBN(0x9b5356ad, 0x008a88b8), + TOBN(0x4b67a5f1, 0xcf1d9e9d), TOBN(0xc6542e47, 0xa5d8d8ce), + TOBN(0x73061fe8, 0x7adfb6cc), TOBN(0xcc826fd3, 0x98678141), + TOBN(0x00e758b1, 0x3c80515a), TOBN(0x6afe3247, 0x41485083), + TOBN(0x0fcb08b9, 0xb6ae8a75), TOBN(0xb8cf388d, 0x4acf51e1), + TOBN(0x344a5560, 0x6961b9d6), TOBN(0x1a6778b8, 0x6a97fd0c), + TOBN(0xd840fdc1, 0xecc4c7e3), TOBN(0xde9fe47d, 0x16db68cc), + TOBN(0xe95f89de, 0xa3e216aa), TOBN(0x84f1a6a4, 0x9594a8be), + TOBN(0x7ddc7d72, 0x5a7b162b), TOBN(0xc5cfda19, 0xadc817a3), + TOBN(0x80a5d350, 0x78b58d46), TOBN(0x93365b13, 0x82978f19), + TOBN(0x2e44d225, 0x26a1fc90), TOBN(0x0d6d10d2, 0x4d70705d), + TOBN(0xd94b6b10, 0xd70c45f4), TOBN(0x0f201022, 0xb216c079), + TOBN(0xcec966c5, 0x658fde41), TOBN(0xa8d2bc7d, 0x7e27601d), + TOBN(0xbfcce3e1, 0xff230be7), TOBN(0x3394ff6b, 0x0033ffb5), + TOBN(0xd890c509, 0x8132c9af), TOBN(0xaac4b0eb, 0x361e7868), + TOBN(0x5194ded3, 0xe82d15aa), TOBN(0x4550bd2e, 0x23ae6b7d), + TOBN(0x3fda318e, 0xea5399d4), TOBN(0xd989bffa, 0x91638b80), + TOBN(0x5ea124d0, 0xa14aa12d), TOBN(0x1fb1b899, 0x3667b944), + TOBN(0x95ec7969, 0x44c44d6a), TOBN(0x91df144a, 0x57e86137), + TOBN(0x915fd620, 0x73adac44), TOBN(0x8f01732d, 0x59a83801), + TOBN(0xec579d25, 0x3aa0a633), TOBN(0x06de5e7c, 0xc9d6d59c), + TOBN(0xc132f958, 0xb1ef8010), TOBN(0x29476f96, 0xe65c1a02), + TOBN(0x336a77c0, 0xd34c3565), TOBN(0xef1105b2, 0x1b9f1e9e), + TOBN(0x63e6d08b, 0xf9e08002), TOBN(0x9aff2f21, 0xc613809e), + TOBN(0xb5754f85, 0x3a80e75d), TOBN(0xde71853e, 0x6bbda681), + TOBN(0x86f041df, 0x8197fd7a), TOBN(0x8b332e08, 0x127817fa), + TOBN(0x05d99be8, 0xb9c20cda), TOBN(0x89f7aad5, 0xd5cd0c98), + TOBN(0x7ef936fe, 0x5bb94183), TOBN(0x92ca0753, 0xb05cd7f2), + TOBN(0x9d65db11, 0x74a1e035), TOBN(0x02628cc8, 0x13eaea92), + TOBN(0xf2d9e242, 0x49e4fbf2), TOBN(0x94fdfd9b, 0xe384f8b7), + TOBN(0x65f56054, 0x63428c6b), TOBN(0x2f7205b2, 0x90b409a5), + TOBN(0xf778bb78, 0xff45ae11), TOBN(0xa13045be, 0xc5ee53b2), + TOBN(0xe00a14ff, 0x03ef77fe), TOBN(0x689cd59f, 0xffef8bef), + TOBN(0x3578f0ed, 0x1e9ade22), TOBN(0xe99f3ec0, 0x6268b6a8), + TOBN(0xa2057d91, 0xea1b3c3e), TOBN(0x2d1a7053, 0xb8823a4a), + TOBN(0xabbb336a, 0x2cca451e), TOBN(0xcd2466e3, 0x2218bb5d), + TOBN(0x3ac1f42f, 0xc8cb762d), TOBN(0x7e312aae, 0x7690211f), + TOBN(0xebb9bd73, 0x45d07450), TOBN(0x207c4b82, 0x46c2213f), + TOBN(0x99d425c1, 0x375913ec), TOBN(0x94e45e96, 0x67908220), + TOBN(0xc08f3087, 0xcd67dbf6), TOBN(0xa5670fbe, 0xc0887056), + TOBN(0x6717b64a, 0x66f5b8fc), TOBN(0xd5a56aea, 0x786fec28), + TOBN(0xa8c3f55f, 0xc0ff4952), TOBN(0xa77fefae, 0x457ac49b), + TOBN(0x29882d7c, 0x98379d44), TOBN(0xd000bdfb, 0x509edc8a), + TOBN(0xc6f95979, 0xe66fe464), TOBN(0x504a6115, 0xfa61bde0), + TOBN(0x56b3b871, 0xeffea31a), TOBN(0x2d3de26d, 0xf0c21a54), + TOBN(0x21dbff31, 0x834753bf), TOBN(0xe67ecf49, 0x69269d86), + TOBN(0x7a176952, 0x151fe690), TOBN(0x03515804, 0x7f2adb5f), + TOBN(0xee794b15, 0xd1b62a8d), TOBN(0xf004ceec, 0xaae454e6), + TOBN(0x0897ea7c, 0xf0386fac), TOBN(0x3b62ff12, 0xd1fca751), + TOBN(0x154181df, 0x1b7a04ec), TOBN(0x2008e04a, 0xfb5847ec), + TOBN(0xd147148e, 0x41dbd772), TOBN(0x2b419f73, 0x22942654), + TOBN(0x669f30d3, 0xe9c544f7), TOBN(0x52a2c223, 0xc8540149), + TOBN(0x5da9ee14, 0x634dfb02), TOBN(0x5f074ff0, 0xf47869f3), + TOBN(0x74ee878d, 0xa3933acc), TOBN(0xe6510651, 0x4fe35ed1), + TOBN(0xb3eb9482, 0xf1012e7a), TOBN(0x51013cc0, 0xa8a566ae), + TOBN(0xdd5e9243, 0x47c00d3b), TOBN(0x7fde089d, 0x946bb0e5), + TOBN(0x030754fe, 0xc731b4b3), TOBN(0x12a136a4, 0x99fda062), + TOBN(0x7c1064b8, 0x5a1a35bc), TOBN(0xbf1f5763, 0x446c84ef), + TOBN(0xed29a56d, 0xa16d4b34), TOBN(0x7fba9d09, 0xdca21c4f), + TOBN(0x66d7ac00, 0x6d8de486), TOBN(0x60061987, 0x73a2a5e1), + TOBN(0x8b400f86, 0x9da28ff0), TOBN(0x3133f708, 0x43c4599c), + TOBN(0x9911c9b8, 0xee28cb0d), TOBN(0xcd7e2874, 0x8e0af61d), + TOBN(0x5a85f0f2, 0x72ed91fc), TOBN(0x85214f31, 0x9cd4a373), + TOBN(0x881fe5be, 0x1925253c), TOBN(0xd8dc98e0, 0x91e8bc76), + TOBN(0x7120affe, 0x585cc3a2), TOBN(0x724952ed, 0x735bf97a), + TOBN(0x5581e7dc, 0x3eb34581), TOBN(0x5cbff4f2, 0xe52ee57d), + TOBN(0x8d320a0e, 0x87d8cc7b), TOBN(0x9beaa7f3, 0xf1d280d0), + TOBN(0x7a0b9571, 0x9beec704), TOBN(0x9126332e, 0x5b7f0057), + TOBN(0x01fbc1b4, 0x8ed3bd6d), TOBN(0x35bb2c12, 0xd945eb24), + TOBN(0x6404694e, 0x9a8ae255), TOBN(0xb6092eec, 0x8d6abfb3), + TOBN(0x4d76143f, 0xcc058865), TOBN(0x7b0a5af2, 0x6e249922), + TOBN(0x8aef9440, 0x6a50d353), TOBN(0xe11e4bcc, 0x64f0e07a), + TOBN(0x4472993a, 0xa14a90fa), TOBN(0x7706e20c, 0xba0c51d4), + TOBN(0xf403292f, 0x1532672d), TOBN(0x52573bfa, 0x21829382), + TOBN(0x6a7bb6a9, 0x3b5bdb83), TOBN(0x08da65c0, 0xa4a72318), + TOBN(0xc58d22aa, 0x63eb065f), TOBN(0x1717596c, 0x1b15d685), + TOBN(0x112df0d0, 0xb266d88b), TOBN(0xf688ae97, 0x5941945a), + TOBN(0x487386e3, 0x7c292cac), TOBN(0x42f3b50d, 0x57d6985c), + TOBN(0x6da4f998, 0x6a90fc34), TOBN(0xc8f257d3, 0x65ca8a8d), + TOBN(0xc2feabca, 0x6951f762), TOBN(0xe1bc81d0, 0x74c323ac), + TOBN(0x1bc68f67, 0x251a2a12), TOBN(0x10d86587, 0xbe8a70dc), + TOBN(0xd648af7f, 0xf0f84d2e), TOBN(0xf0aa9ebc, 0x6a43ac92), + TOBN(0x69e3be04, 0x27596893), TOBN(0xb6bb02a6, 0x45bf452b), + TOBN(0x0875c11a, 0xf4c698c8), TOBN(0x6652b5c7, 0xbece3794), + TOBN(0x7b3755fd, 0x4f5c0499), TOBN(0x6ea16558, 0xb5532b38), + TOBN(0xd1c69889, 0xa2e96ef7), TOBN(0x9c773c3a, 0x61ed8f48), + TOBN(0x2b653a40, 0x9b323abc), TOBN(0xe26605e1, 0xf0e1d791), + TOBN(0x45d41064, 0x4a87157a), TOBN(0x8f9a78b7, 0xcbbce616), + TOBN(0xcf1e44aa, 0xc407eddd), TOBN(0x81ddd1d8, 0xa35b964f), + TOBN(0x473e339e, 0xfd083999), TOBN(0x6c94bdde, 0x8e796802), + TOBN(0x5a304ada, 0x8545d185), TOBN(0x82ae44ea, 0x738bb8cb), + TOBN(0x628a35e3, 0xdf87e10e), TOBN(0xd3624f3d, 0xa15b9fe3), + TOBN(0xcc44209b, 0x14be4254), TOBN(0x7d0efcbc, 0xbdbc2ea5), + TOBN(0x1f603362, 0x04c37bbe), TOBN(0x21f363f5, 0x56a5852c), + TOBN(0xa1503d1c, 0xa8501550), TOBN(0x2251e0e1, 0xd8ab10bb), + TOBN(0xde129c96, 0x6961c51c), TOBN(0x1f7246a4, 0x81910f68), + TOBN(0x2eb744ee, 0x5f2591f2), TOBN(0x3c47d33f, 0x5e627157), + TOBN(0x4d6d62c9, 0x22f3bd68), TOBN(0x6120a64b, 0xcb8df856), + TOBN(0x3a9ac6c0, 0x7b5d07df), TOBN(0xa92b9558, 0x7ef39783), + TOBN(0xe128a134, 0xab3a9b4f), TOBN(0x41c18807, 0xb1252f05), + TOBN(0xfc7ed089, 0x80ba9b1c), TOBN(0xac8dc6de, 0xc532a9dd), + TOBN(0xbf829cef, 0x55246809), TOBN(0x101b784f, 0x5b4ee80f), + TOBN(0xc09945bb, 0xb6f11603), TOBN(0x57b09dbe, 0x41d2801e), + TOBN(0xfba5202f, 0xa97534a8), TOBN(0x7fd8ae5f, 0xc17b9614), + TOBN(0xa50ba666, 0x78308435), TOBN(0x9572f77c, 0xd3868c4d), + TOBN(0x0cef7bfd, 0x2dd7aab0), TOBN(0xe7958e08, 0x2c7c79ff), + TOBN(0x81262e42, 0x25346689), TOBN(0x716da290, 0xb07c7004), + TOBN(0x35f911ea, 0xb7950ee3), TOBN(0x6fd72969, 0x261d21b5), + TOBN(0x52389803, 0x08b640d3), TOBN(0x5b0026ee, 0x887f12a1), + TOBN(0x20e21660, 0x742e9311), TOBN(0x0ef6d541, 0x5ff77ff7), + TOBN(0x969127f0, 0xf9c41135), TOBN(0xf21d60c9, 0x68a64993), + TOBN(0x656e5d0c, 0xe541875c), TOBN(0xf1e0f84e, 0xa1d3c233), + TOBN(0x9bcca359, 0x06002d60), TOBN(0xbe2da60c, 0x06191552), + TOBN(0x5da8bbae, 0x61181ec3), TOBN(0x9f04b823, 0x65806f19), + TOBN(0xf1604a7d, 0xd4b79bb8), TOBN(0xaee806fb, 0x52c878c8), + TOBN(0x34144f11, 0x8d47b8e8), TOBN(0x72edf52b, 0x949f9054), + TOBN(0xebfca84e, 0x2127015a), TOBN(0x9051d0c0, 0x9cb7cef3), + TOBN(0x86e8fe58, 0x296deec8), TOBN(0x33b28188, 0x41010d74),} + , + {TOBN(0x01079383, 0x171b445f), TOBN(0x9bcf21e3, 0x8131ad4c), + TOBN(0x8cdfe205, 0xc93987e8), TOBN(0xe63f4152, 0xc92e8c8f), + TOBN(0x729462a9, 0x30add43d), TOBN(0x62ebb143, 0xc980f05a), + TOBN(0x4f3954e5, 0x3b06e968), TOBN(0xfe1d75ad, 0x242cf6b1), + TOBN(0x5f95c6c7, 0xaf8685c8), TOBN(0xd4c1c8ce, 0x2f8f01aa), + TOBN(0xc44bbe32, 0x2574692a), TOBN(0xb8003478, 0xd4a4a068), + TOBN(0x7c8fc6e5, 0x2eca3cdb), TOBN(0xea1db16b, 0xec04d399), + TOBN(0xb05bc82e, 0x8f2bc5cf), TOBN(0x763d517f, 0xf44793d2), + TOBN(0x4451c1b8, 0x08bd98d0), TOBN(0x644b1cd4, 0x6575f240), + TOBN(0x6907eb33, 0x7375d270), TOBN(0x56c8bebd, 0xfa2286bd), + TOBN(0xc713d2ac, 0xc4632b46), TOBN(0x17da427a, 0xafd60242), + TOBN(0x313065b7, 0xc95c7546), TOBN(0xf8239898, 0xbf17a3de), + TOBN(0xf3b7963f, 0x4c830320), TOBN(0x842c7aa0, 0x903203e3), + TOBN(0xaf22ca0a, 0xe7327afb), TOBN(0x38e13092, 0x967609b6), + TOBN(0x73b8fb62, 0x757558f1), TOBN(0x3cc3e831, 0xf7eca8c1), + TOBN(0xe4174474, 0xf6331627), TOBN(0xa77989ca, 0xc3c40234), + TOBN(0xe5fd17a1, 0x44a081e0), TOBN(0xd797fb7d, 0xb70e296a), + TOBN(0x2b472b30, 0x481f719c), TOBN(0x0e632a98, 0xfe6f8c52), + TOBN(0x89ccd116, 0xc5f0c284), TOBN(0xf51088af, 0x2d987c62), + TOBN(0x2a2bccda, 0x4c2de6cf), TOBN(0x810f9efe, 0xf679f0f9), + TOBN(0xb0f394b9, 0x7ffe4b3e), TOBN(0x0b691d21, 0xe5fa5d21), + TOBN(0xb0bd7747, 0x9dfbbc75), TOBN(0xd2830fda, 0xfaf78b00), + TOBN(0xf78c249c, 0x52434f57), TOBN(0x4b1f7545, 0x98096dab), + TOBN(0x73bf6f94, 0x8ff8c0b3), TOBN(0x34aef03d, 0x454e134c), + TOBN(0xf8d151f4, 0xb7ac7ec5), TOBN(0xd6ceb95a, 0xe50da7d5), + TOBN(0xa1b492b0, 0xdc3a0eb8), TOBN(0x75157b69, 0xb3dd2863), + TOBN(0xe2c4c74e, 0xc5413d62), TOBN(0xbe329ff7, 0xbc5fc4c7), + TOBN(0x835a2aea, 0x60fa9dda), TOBN(0xf117f5ad, 0x7445cb87), + TOBN(0xae8317f4, 0xb0166f7a), TOBN(0xfbd3e3f7, 0xceec74e6), + TOBN(0xfdb516ac, 0xe0874bfd), TOBN(0x3d846019, 0xc681f3a3), + TOBN(0x0b12ee5c, 0x7c1620b0), TOBN(0xba68b4dd, 0x2b63c501), + TOBN(0xac03cd32, 0x6668c51e), TOBN(0x2a6279f7, 0x4e0bcb5b), + TOBN(0x17bd69b0, 0x6ae85c10), TOBN(0x72946979, 0x1dfdd3a6), + TOBN(0xd9a03268, 0x2c078bec), TOBN(0x41c6a658, 0xbfd68a52), + TOBN(0xcdea1024, 0x0e023900), TOBN(0xbaeec121, 0xb10d144d), + TOBN(0x5a600e74, 0x058ab8dc), TOBN(0x1333af21, 0xbb89ccdd), + TOBN(0xdf25eae0, 0x3aaba1f1), TOBN(0x2cada16e, 0x3b7144cf), + TOBN(0x657ee27d, 0x71ab98bc), TOBN(0x99088b4c, 0x7a6fc96e), + TOBN(0x05d5c0a0, 0x3549dbd4), TOBN(0x42cbdf8f, 0xf158c3ac), + TOBN(0x3fb6b3b0, 0x87edd685), TOBN(0x22071cf6, 0x86f064d0), + TOBN(0xd2d6721f, 0xff2811e5), TOBN(0xdb81b703, 0xfe7fae8c), + TOBN(0x3cfb74ef, 0xd3f1f7bb), TOBN(0x0cdbcd76, 0x16cdeb5d), + TOBN(0x4f39642a, 0x566a808c), TOBN(0x02b74454, 0x340064d6), + TOBN(0xfabbadca, 0x0528fa6f), TOBN(0xe4c3074c, 0xd3fc0bb6), + TOBN(0xb32cb8b0, 0xb796d219), TOBN(0xc3e95f4f, 0x34741dd9), + TOBN(0x87212125, 0x68edf6f5), TOBN(0x7a03aee4, 0xa2b9cb8e), + TOBN(0x0cd3c376, 0xf53a89aa), TOBN(0x0d8af9b1, 0x948a28dc), + TOBN(0xcf86a3f4, 0x902ab04f), TOBN(0x8aacb62a, 0x7f42002d), + TOBN(0x106985eb, 0xf62ffd52), TOBN(0xe670b54e, 0x5797bf10), + TOBN(0x4b405209, 0xc5e30aef), TOBN(0x12c97a20, 0x4365b5e9), + TOBN(0x104646ce, 0x1fe32093), TOBN(0x13cb4ff6, 0x3907a8c9), + TOBN(0x8b9f30d1, 0xd46e726b), TOBN(0xe1985e21, 0xaba0f499), + TOBN(0xc573dea9, 0x10a230cd), TOBN(0x24f46a93, 0xcd30f947), + TOBN(0xf2623fcf, 0xabe2010a), TOBN(0x3f278cb2, 0x73f00e4f), + TOBN(0xed55c67d, 0x50b920eb), TOBN(0xf1cb9a2d, 0x8e760571), + TOBN(0x7c50d109, 0x0895b709), TOBN(0x4207cf07, 0x190d4369), + TOBN(0x3b027e81, 0xc4127fe1), TOBN(0xa9f8b9ad, 0x3ae9c566), + TOBN(0x5ab10851, 0xacbfbba5), TOBN(0xa747d648, 0x569556f5), + TOBN(0xcc172b5c, 0x2ba97bf7), TOBN(0x15e0f77d, 0xbcfa3324), + TOBN(0xa345b797, 0x7686279d), TOBN(0x5a723480, 0xe38003d3), + TOBN(0xfd8e139f, 0x8f5fcda8), TOBN(0xf3e558c4, 0xbdee5bfd), + TOBN(0xd76cbaf4, 0xe33f9f77), TOBN(0x3a4c97a4, 0x71771969), + TOBN(0xda27e84b, 0xf6dce6a7), TOBN(0xff373d96, 0x13e6c2d1), + TOBN(0xf115193c, 0xd759a6e9), TOBN(0x3f9b7025, 0x63d2262c), + TOBN(0xd9764a31, 0x317cd062), TOBN(0x30779d8e, 0x199f8332), + TOBN(0xd8074106, 0x16b11b0b), TOBN(0x7917ab9f, 0x78aeaed8), + TOBN(0xb67a9cbe, 0x28fb1d8e), TOBN(0x2e313563, 0x136eda33), + TOBN(0x010b7069, 0xa371a86c), TOBN(0x44d90fa2, 0x6744e6b7), + TOBN(0x68190867, 0xd6b3e243), TOBN(0x9fe6cd9d, 0x59048c48), + TOBN(0xb900b028, 0x95731538), TOBN(0xa012062f, 0x32cae04f), + TOBN(0x8107c8bc, 0x9399d082), TOBN(0x47e8c54a, 0x41df12e2), + TOBN(0x14ba5117, 0xb6ef3f73), TOBN(0x22260bea, 0x81362f0b), + TOBN(0x90ea261e, 0x1a18cc20), TOBN(0x2192999f, 0x2321d636), + TOBN(0xef64d314, 0xe311b6a0), TOBN(0xd7401e4c, 0x3b54a1f5), + TOBN(0x19019983, 0x6fbca2ba), TOBN(0x46ad3293, 0x8fbffc4b), + TOBN(0xa142d3f6, 0x3786bf40), TOBN(0xeb5cbc26, 0xb67039fc), + TOBN(0x9cb0ae6c, 0x252bd479), TOBN(0x05e0f88a, 0x12b5848f), + TOBN(0x78f6d2b2, 0xa5c97663), TOBN(0x6f6e149b, 0xc162225c), + TOBN(0xe602235c, 0xde601a89), TOBN(0xd17bbe98, 0xf373be1f), + TOBN(0xcaf49a5b, 0xa8471827), TOBN(0x7e1a0a85, 0x18aaa116), + TOBN(0x6c833196, 0x270580c3), TOBN(0x1e233839, 0xf1c98a14), + TOBN(0x67b2f7b4, 0xae34e0a5), TOBN(0x47ac8745, 0xd8ce7289), + TOBN(0x2b74779a, 0x100dd467), TOBN(0x274a4337, 0x4ee50d09), + TOBN(0x603dcf13, 0x83608bc9), TOBN(0xcd9da6c3, 0xc89e8388), + TOBN(0x2660199f, 0x355116ac), TOBN(0xcc38bb59, 0xb6d18eed), + TOBN(0x3075f31f, 0x2f4bc071), TOBN(0x9774457f, 0x265dc57e), + TOBN(0x06a6a9c8, 0xc6db88bb), TOBN(0x6429d07f, 0x4ec98e04), + TOBN(0x8d05e57b, 0x05ecaa8b), TOBN(0x20f140b1, 0x7872ea7b), + TOBN(0xdf8c0f09, 0xca494693), TOBN(0x48d3a020, 0xf252e909), + TOBN(0x4c5c29af, 0x57b14b12), TOBN(0x7e6fa37d, 0xbf47ad1c), + TOBN(0x66e7b506, 0x49a0c938), TOBN(0xb72c0d48, 0x6be5f41f), + TOBN(0x6a6242b8, 0xb2359412), TOBN(0xcd35c774, 0x8e859480), + TOBN(0x12536fea, 0x87baa627), TOBN(0x58c1fec1, 0xf72aa680), + TOBN(0x6c29b637, 0x601e5dc9), TOBN(0x9e3c3c1c, 0xde9e01b9), + TOBN(0xefc8127b, 0x2bcfe0b0), TOBN(0x35107102, 0x2a12f50d), + TOBN(0x6ccd6cb1, 0x4879b397), TOBN(0xf792f804, 0xf8a82f21), + TOBN(0x509d4804, 0xa9b46402), TOBN(0xedddf85d, 0xc10f0850), + TOBN(0x928410dc, 0x4b6208aa), TOBN(0xf6229c46, 0x391012dc), + TOBN(0xc5a7c41e, 0x7727b9b6), TOBN(0x289e4e4b, 0xaa444842), + TOBN(0x049ba1d9, 0xe9a947ea), TOBN(0x44f9e47f, 0x83c8debc), + TOBN(0xfa77a1fe, 0x611f8b8e), TOBN(0xfd2e416a, 0xf518f427), + TOBN(0xc5fffa70, 0x114ebac3), TOBN(0xfe57c4e9, 0x5d89697b), + TOBN(0xfdd053ac, 0xb1aaf613), TOBN(0x31df210f, 0xea585a45), + TOBN(0x318cc10e, 0x24985034), TOBN(0x1a38efd1, 0x5f1d6130), + TOBN(0xbf86f237, 0x0b1e9e21), TOBN(0xb258514d, 0x1dbe88aa), + TOBN(0x1e38a588, 0x90c1baf9), TOBN(0x2936a01e, 0xbdb9b692), + TOBN(0xd576de98, 0x6dd5b20c), TOBN(0xb586bf71, 0x70f98ecf), + TOBN(0xcccf0f12, 0xc42d2fd7), TOBN(0x8717e61c, 0xfb35bd7b), + TOBN(0x8b1e5722, 0x35e6fc06), TOBN(0x3477728f, 0x0b3e13d5), + TOBN(0x150c294d, 0xaa8a7372), TOBN(0xc0291d43, 0x3bfa528a), + TOBN(0xc6c8bc67, 0xcec5a196), TOBN(0xdeeb31e4, 0x5c2e8a7c), + TOBN(0xba93e244, 0xfb6e1c51), TOBN(0xb9f8b71b, 0x2e28e156), + TOBN(0xce65a287, 0x968a2ab9), TOBN(0xe3c5ce69, 0x46bbcb1f), + TOBN(0xf8c835b9, 0xe7ae3f30), TOBN(0x16bbee26, 0xff72b82b), + TOBN(0x665e2017, 0xfd42cd22), TOBN(0x1e139970, 0xf8b1d2a0), + TOBN(0x125cda29, 0x79204932), TOBN(0x7aee94a5, 0x49c3bee5), + TOBN(0x68c70160, 0x89821a66), TOBN(0xf7c37678, 0x8f981669), + TOBN(0xd90829fc, 0x48cc3645), TOBN(0x346af049, 0xd70addfc), + TOBN(0x2057b232, 0x370bf29c), TOBN(0xf90c73ce, 0x42e650ee), + TOBN(0xe03386ea, 0xa126ab90), TOBN(0x0e266e7e, 0x975a087b), + TOBN(0x80578eb9, 0x0fca65d9), TOBN(0x7e2989ea, 0x16af45b8), + TOBN(0x7438212d, 0xcac75a4e), TOBN(0x38c7ca39, 0x4fef36b8), + TOBN(0x8650c494, 0xd402676a), TOBN(0x26ab5a66, 0xf72c7c48), + TOBN(0x4e6cb426, 0xce3a464e), TOBN(0xf8f99896, 0x2b72f841), + TOBN(0x8c318491, 0x1a335cc8), TOBN(0x563459ba, 0x6a5913e4), + TOBN(0x1b920d61, 0xc7b32919), TOBN(0x805ab8b6, 0xa02425ad), + TOBN(0x2ac512da, 0x8d006086), TOBN(0x6ca4846a, 0xbcf5c0fd), + TOBN(0xafea51d8, 0xac2138d7), TOBN(0xcb647545, 0x344cd443), + TOBN(0x0429ee8f, 0xbd7d9040), TOBN(0xee66a2de, 0x819b9c96), + TOBN(0x54f9ec25, 0xdea7d744), TOBN(0x2ffea642, 0x671721bb), + TOBN(0x4f19dbd1, 0x114344ea), TOBN(0x04304536, 0xfd0dbc8b), + TOBN(0x014b50aa, 0x29ec7f91), TOBN(0xb5fc22fe, 0xbb06014d), + TOBN(0x60d963a9, 0x1ee682e0), TOBN(0xdf48abc0, 0xfe85c727), + TOBN(0x0cadba13, 0x2e707c2d), TOBN(0xde608d3a, 0xa645aeff), + TOBN(0x05f1c28b, 0xedafd883), TOBN(0x3c362ede, 0xbd94de1f), + TOBN(0x8dd0629d, 0x13593e41), TOBN(0x0a5e736f, 0x766d6eaf), + TOBN(0xbfa92311, 0xf68cf9d1), TOBN(0xa4f9ef87, 0xc1797556), + TOBN(0x10d75a1f, 0x5601c209), TOBN(0x651c374c, 0x09b07361), + TOBN(0x49950b58, 0x88b5cead), TOBN(0x0ef00058, 0x6fa9dbaa), + TOBN(0xf51ddc26, 0x4e15f33a), TOBN(0x1f8b5ca6, 0x2ef46140), + TOBN(0x343ac0a3, 0xee9523f0), TOBN(0xbb75eab2, 0x975ea978), + TOBN(0x1bccf332, 0x107387f4), TOBN(0x790f9259, 0x9ab0062e), + TOBN(0xf1a363ad, 0x1e4f6a5f), TOBN(0x06e08b84, 0x62519a50), + TOBN(0x60915187, 0x7265f1ee), TOBN(0x6a80ca34, 0x93ae985e), + TOBN(0x81b29768, 0xaaba4864), TOBN(0xb13cabf2, 0x8d52a7d6), + TOBN(0xb5c36348, 0x8ead03f1), TOBN(0xc932ad95, 0x81c7c1c0), + TOBN(0x5452708e, 0xcae1e27b), TOBN(0x9dac4269, 0x1b0df648), + TOBN(0x233e3f0c, 0xdfcdb8bc), TOBN(0xe6ceccdf, 0xec540174), + TOBN(0xbd0d845e, 0x95081181), TOBN(0xcc8a7920, 0x699355d5), + TOBN(0x111c0f6d, 0xc3b375a8), TOBN(0xfd95bc6b, 0xfd51e0dc), + TOBN(0x4a106a26, 0x6888523a), TOBN(0x4d142bd6, 0xcb01a06d), + TOBN(0x79bfd289, 0xadb9b397), TOBN(0x0bdbfb94, 0xe9863914), + TOBN(0x29d8a229, 0x1660f6a6), TOBN(0x7f6abcd6, 0x551c042d), + TOBN(0x13039deb, 0x0ac3ffe8), TOBN(0xa01be628, 0xec8523fb), + TOBN(0x6ea34103, 0x0ca1c328), TOBN(0xc74114bd, 0xb903928e), + TOBN(0x8aa4ff4e, 0x9e9144b0), TOBN(0x7064091f, 0x7f9a4b17), + TOBN(0xa3f4f521, 0xe447f2c4), TOBN(0x81b8da7a, 0x604291f0), + TOBN(0xd680bc46, 0x7d5926de), TOBN(0x84f21fd5, 0x34a1202f), + TOBN(0x1d1e3181, 0x4e9df3d8), TOBN(0x1ca4861a, 0x39ab8d34), + TOBN(0x809ddeec, 0x5b19aa4a), TOBN(0x59f72f7e, 0x4d329366), + TOBN(0xa2f93f41, 0x386d5087), TOBN(0x40bf739c, 0xdd67d64f), + TOBN(0xb4494205, 0x66702158), TOBN(0xc33c65be, 0x73b1e178), + TOBN(0xcdcd657c, 0x38ca6153), TOBN(0x97f4519a, 0xdc791976), + TOBN(0xcc7c7f29, 0xcd6e1f39), TOBN(0x38de9cfb, 0x7e3c3932), + TOBN(0xe448eba3, 0x7b793f85), TOBN(0xe9f8dbf9, 0xf067e914), + TOBN(0xc0390266, 0xf114ae87), TOBN(0x39ed75a7, 0xcd6a8e2a), + TOBN(0xadb14848, 0x7ffba390), TOBN(0x67f8cb8b, 0x6af9bc09), + TOBN(0x322c3848, 0x9c7476db), TOBN(0xa320fecf, 0x52a538d6), + TOBN(0xe0493002, 0xb2aced2b), TOBN(0xdfba1809, 0x616bd430), + TOBN(0x531c4644, 0xc331be70), TOBN(0xbc04d32e, 0x90d2e450), + TOBN(0x1805a0d1, 0x0f9f142d), TOBN(0x2c44a0c5, 0x47ee5a23), + TOBN(0x31875a43, 0x3989b4e3), TOBN(0x6b1949fd, 0x0c063481), + TOBN(0x2dfb9e08, 0xbe0f4492), TOBN(0x3ff0da03, 0xe9d5e517), + TOBN(0x03dbe9a1, 0xf79466a8), TOBN(0x0b87bcd0, 0x15ea9932), + TOBN(0xeb64fc83, 0xab1f58ab), TOBN(0x6d9598da, 0x817edc8a), + TOBN(0x699cff66, 0x1d3b67e5), TOBN(0x645c0f29, 0x92635853), + TOBN(0x253cdd82, 0xeabaf21c), TOBN(0x82b9602a, 0x2241659e), + TOBN(0x2cae07ec, 0x2d9f7091), TOBN(0xbe4c720c, 0x8b48cd9b), + TOBN(0x6ce5bc03, 0x6f08d6c9), TOBN(0x36e8a997, 0xaf10bf40), + TOBN(0x83422d21, 0x3e10ff12), TOBN(0x7b26d3eb, 0xbcc12494), + TOBN(0xb240d2d0, 0xc9469ad6), TOBN(0xc4a11b4d, 0x30afa05b), + TOBN(0x4b604ace, 0xdd6ba286), TOBN(0x18486600, 0x3ee2864c), + TOBN(0x5869d6ba, 0x8d9ce5be), TOBN(0x0d8f68c5, 0xff4bfb0d), + TOBN(0xb69f210b, 0x5700cf73), TOBN(0x61f6653a, 0x6d37c135), + TOBN(0xff3d432b, 0x5aff5a48), TOBN(0x0d81c4b9, 0x72ba3a69), + TOBN(0xee879ae9, 0xfa1899ef), TOBN(0xbac7e2a0, 0x2d6acafd), + TOBN(0xd6d93f6c, 0x1c664399), TOBN(0x4c288de1, 0x5bcb135d), + TOBN(0x83031dab, 0x9dab7cbf), TOBN(0xfe23feb0, 0x3abbf5f0), + TOBN(0x9f1b2466, 0xcdedca85), TOBN(0x140bb710, 0x1a09538c), + TOBN(0xac8ae851, 0x5e11115d), TOBN(0x0d63ff67, 0x6f03f59e), + TOBN(0x755e5551, 0x7d234afb), TOBN(0x61c2db4e, 0x7e208fc1), + TOBN(0xaa9859ce, 0xf28a4b5d), TOBN(0xbdd6d4fc, 0x34af030f), + TOBN(0xd1c4a26d, 0x3be01cb1), TOBN(0x9ba14ffc, 0x243aa07c), + TOBN(0xf95cd3a9, 0xb2503502), TOBN(0xe379bc06, 0x7d2a93ab), + TOBN(0x3efc18e9, 0xd4ca8d68), TOBN(0x083558ec, 0x80bb412a), + TOBN(0xd903b940, 0x9645a968), TOBN(0xa499f0b6, 0x9ba6054f), + TOBN(0x208b573c, 0xb8349abe), TOBN(0x3baab3e5, 0x30b4fc1c), + TOBN(0x87e978ba, 0xcb524990), TOBN(0x3524194e, 0xccdf0e80), + TOBN(0x62711725, 0x7d4bcc42), TOBN(0xe90a3d9b, 0xb90109ba), + TOBN(0x3b1bdd57, 0x1323e1e0), TOBN(0xb78e9bd5, 0x5eae1599), + TOBN(0x0794b746, 0x9e03d278), TOBN(0x80178605, 0xd70e6297), + TOBN(0x171792f8, 0x99c97855), TOBN(0x11b393ee, 0xf5a86b5c), + TOBN(0x48ef6582, 0xd8884f27), TOBN(0xbd44737a, 0xbf19ba5f), + TOBN(0x8698de4c, 0xa42062c6), TOBN(0x8975eb80, 0x61ce9c54), + TOBN(0xd50e57c7, 0xd7fe71f3), TOBN(0x15342190, 0xbc97ce38), + TOBN(0x51bda2de, 0x4df07b63), TOBN(0xba12aeae, 0x200eb87d), + TOBN(0xabe135d2, 0xa9b4f8f6), TOBN(0x04619d65, 0xfad6d99c), + TOBN(0x4a6683a7, 0x7994937c), TOBN(0x7a778c8b, 0x6f94f09a), + TOBN(0x8c508623, 0x20a71b89), TOBN(0x241a2aed, 0x1c229165), + TOBN(0x352be595, 0xaaf83a99), TOBN(0x9fbfee7f, 0x1562bac8), + TOBN(0xeaf658b9, 0x5c4017e3), TOBN(0x1dc7f9e0, 0x15120b86), + TOBN(0xd84f13dd, 0x4c034d6f), TOBN(0x283dd737, 0xeaea3038), + TOBN(0x197f2609, 0xcd85d6a2), TOBN(0x6ebbc345, 0xfae60177), + TOBN(0xb80f031b, 0x4e12fede), TOBN(0xde55d0c2, 0x07a2186b), + TOBN(0x1fb3e37f, 0x24dcdd5a), TOBN(0x8d602da5, 0x7ed191fb), + TOBN(0x108fb056, 0x76023e0d), TOBN(0x70178c71, 0x459c20c0), + TOBN(0xfad5a386, 0x3fe54cf0), TOBN(0xa4a3ec4f, 0x02bbb475), + TOBN(0x1aa5ec20, 0x919d94d7), TOBN(0x5d3b63b5, 0xa81e4ab3), + TOBN(0x7fa733d8, 0x5ad3d2af), TOBN(0xfbc586dd, 0xd1ac7a37), + TOBN(0x282925de, 0x40779614), TOBN(0xfe0ffffb, 0xe74a242a), + TOBN(0x3f39e67f, 0x906151e5), TOBN(0xcea27f5f, 0x55e10649), + TOBN(0xdca1d4e1, 0xc17cf7b7), TOBN(0x0c326d12, 0x2fe2362d), + TOBN(0x05f7ac33, 0x7dd35df3), TOBN(0x0c3b7639, 0xc396dbdf), + TOBN(0x0912f5ac, 0x03b7db1c), TOBN(0x9dea4b70, 0x5c9ed4a9), + TOBN(0x475e6e53, 0xaae3f639), TOBN(0xfaba0e7c, 0xfc278bac), + TOBN(0x16f9e221, 0x9490375f), TOBN(0xaebf9746, 0xa5a7ed0a), + TOBN(0x45f9af3f, 0xf41ad5d6), TOBN(0x03c4623c, 0xb2e99224), + TOBN(0x82c5bb5c, 0xb3cf56aa), TOBN(0x64311819, 0x34567ed3), + TOBN(0xec57f211, 0x8be489ac), TOBN(0x2821895d, 0xb9a1104b), + TOBN(0x610dc875, 0x6064e007), TOBN(0x8e526f3f, 0x5b20d0fe), + TOBN(0x6e71ca77, 0x5b645aee), TOBN(0x3d1dcb9f, 0x800e10ff), + TOBN(0x36b51162, 0x189cf6de), TOBN(0x2c5a3e30, 0x6bb17353), + TOBN(0xc186cd3e, 0x2a6c6fbf), TOBN(0xa74516fa, 0x4bf97906), + TOBN(0x5b4b8f4b, 0x279d6901), TOBN(0x0c4e57b4, 0x2b573743), + TOBN(0x75fdb229, 0xb6e386b6), TOBN(0xb46793fd, 0x99deac27), + TOBN(0xeeec47ea, 0xcf712629), TOBN(0xe965f3c4, 0xcbc3b2dd), + TOBN(0x8dd1fb83, 0x425c6559), TOBN(0x7fc00ee6, 0x0af06fda), + TOBN(0xe98c9225, 0x33d956df), TOBN(0x0f1ef335, 0x4fbdc8a2), + TOBN(0x2abb5145, 0xb79b8ea2), TOBN(0x40fd2945, 0xbdbff288), + TOBN(0x6a814ac4, 0xd7185db7), TOBN(0xc4329d6f, 0xc084609a), + TOBN(0xc9ba7b52, 0xed1be45d), TOBN(0x891dd20d, 0xe4cd2c74), + TOBN(0x5a4d4a7f, 0x824139b1), TOBN(0x66c17716, 0xb873c710), + TOBN(0x5e5bc141, 0x2843c4e0), TOBN(0xd5ac4817, 0xb97eb5bf), + TOBN(0xc0f8af54, 0x450c95c7), TOBN(0xc91b3fa0, 0x318406c5), + TOBN(0x360c340a, 0xab9d97f8), TOBN(0xfb57bd07, 0x90a2d611), + TOBN(0x4339ae3c, 0xa6a6f7e5), TOBN(0x9c1fcd2a, 0x2feb8a10), + TOBN(0x972bcca9, 0xc7ea7432), TOBN(0x1b0b924c, 0x308076f6), + TOBN(0x80b2814a, 0x2a5b4ca5), TOBN(0x2f78f55b, 0x61ef3b29), + TOBN(0xf838744a, 0xc18a414f), TOBN(0xc611eaae, 0x903d0a86), + TOBN(0x94dabc16, 0x2a453f55), TOBN(0xe6f2e3da, 0x14efb279), + TOBN(0x5b7a6017, 0x9320dc3c), TOBN(0x692e382f, 0x8df6b5a4), + TOBN(0x3f5e15e0, 0x2d40fa90), TOBN(0xc87883ae, 0x643dd318), + TOBN(0x511053e4, 0x53544774), TOBN(0x834d0ecc, 0x3adba2bc), + TOBN(0x4215d7f7, 0xbae371f5), TOBN(0xfcfd57bf, 0x6c8663bc), + TOBN(0xded2383d, 0xd6901b1d), TOBN(0x3b49fbb4, 0xb5587dc3), + TOBN(0xfd44a08d, 0x07625f62), TOBN(0x3ee4d65b, 0x9de9b762),} + , + {TOBN(0x64e5137d, 0x0d63d1fa), TOBN(0x658fc052, 0x02a9d89f), + TOBN(0x48894874, 0x50436309), TOBN(0xe9ae30f8, 0xd598da61), + TOBN(0x2ed710d1, 0x818baf91), TOBN(0xe27e9e06, 0x8b6a0c20), + TOBN(0x1e28dcfb, 0x1c1a6b44), TOBN(0x883acb64, 0xd6ac57dc), + TOBN(0x8735728d, 0xc2c6ff70), TOBN(0x79d6122f, 0xc5dc2235), + TOBN(0x23f5d003, 0x19e277f9), TOBN(0x7ee84e25, 0xdded8cc7), + TOBN(0x91a8afb0, 0x63cd880a), TOBN(0x3f3ea7c6, 0x3574af60), + TOBN(0x0cfcdc84, 0x02de7f42), TOBN(0x62d0792f, 0xb31aa152), + TOBN(0x8e1b4e43, 0x8a5807ce), TOBN(0xad283893, 0xe4109a7e), + TOBN(0xc30cc9cb, 0xafd59dda), TOBN(0xf65f36c6, 0x3d8d8093), + TOBN(0xdf31469e, 0xa60d32b2), TOBN(0xee93df4b, 0x3e8191c8), + TOBN(0x9c1017c5, 0x355bdeb5), TOBN(0xd2623185, 0x8616aa28), + TOBN(0xb02c83f9, 0xdec31a21), TOBN(0x988c8b23, 0x6ad9d573), + TOBN(0x53e983ae, 0xa57be365), TOBN(0xe968734d, 0x646f834e), + TOBN(0x9137ea8f, 0x5da6309b), TOBN(0x10f3a624, 0xc1f1ce16), + TOBN(0x782a9ea2, 0xca440921), TOBN(0xdf94739e, 0x5b46f1b5), + TOBN(0x9f9be006, 0xcce85c9b), TOBN(0x360e70d6, 0xa4c7c2d3), + TOBN(0x2cd5beea, 0xaefa1e60), TOBN(0x64cf63c0, 0x8c3d2b6d), + TOBN(0xfb107fa3, 0xe1cf6f90), TOBN(0xb7e937c6, 0xd5e044e6), + TOBN(0x74e8ca78, 0xce34db9f), TOBN(0x4f8b36c1, 0x3e210bd0), + TOBN(0x1df165a4, 0x34a35ea8), TOBN(0x3418e0f7, 0x4d4412f6), + TOBN(0x5af1f8af, 0x518836c3), TOBN(0x42ceef4d, 0x130e1965), + TOBN(0x5560ca0b, 0x543a1957), TOBN(0xc33761e5, 0x886cb123), + TOBN(0x66624b1f, 0xfe98ed30), TOBN(0xf772f4bf, 0x1090997d), + TOBN(0xf4e540bb, 0x4885d410), TOBN(0x7287f810, 0x9ba5f8d7), + TOBN(0x22d0d865, 0xde98dfb1), TOBN(0x49ff51a1, 0xbcfbb8a3), + TOBN(0xb6b6fa53, 0x6bc3012e), TOBN(0x3d31fd72, 0x170d541d), + TOBN(0x8018724f, 0x4b0f4966), TOBN(0x79e7399f, 0x87dbde07), + TOBN(0x56f8410e, 0xf4f8b16a), TOBN(0x97241afe, 0xc47b266a), + TOBN(0x0a406b8e, 0x6d9c87c1), TOBN(0x803f3e02, 0xcd42ab1b), + TOBN(0x7f0309a8, 0x04dbec69), TOBN(0xa83b85f7, 0x3bbad05f), + TOBN(0xc6097273, 0xad8e197f), TOBN(0xc097440e, 0x5067adc1), + TOBN(0x730eafb6, 0x3524ff16), TOBN(0xd7f9b51e, 0x823fc6ce), + TOBN(0x27bd0d32, 0x443e4ac0), TOBN(0x40c59ad9, 0x4d66f217), + TOBN(0x6c33136f, 0x17c387a4), TOBN(0x5043b8d5, 0xeb86804d), + TOBN(0x74970312, 0x675a73c9), TOBN(0x838fdb31, 0xf16669b6), + TOBN(0xc507b6dd, 0x418e7ddd), TOBN(0x39888d93, 0x472f19d6), + TOBN(0x7eae26be, 0x0c27eb4d), TOBN(0x17b53ed3, 0xfbabb884), + TOBN(0xfc27021b, 0x2b01ae4f), TOBN(0x88462e87, 0xcf488682), + TOBN(0xbee096ec, 0x215e2d87), TOBN(0xeb2fea9a, 0xd242e29b), + TOBN(0x5d985b5f, 0xb821fc28), TOBN(0x89d2e197, 0xdc1e2ad2), + TOBN(0x55b566b8, 0x9030ba62), TOBN(0xe3fd41b5, 0x4f41b1c6), + TOBN(0xb738ac2e, 0xb9a96d61), TOBN(0x7f8567ca, 0x369443f4), + TOBN(0x8698622d, 0xf803a440), TOBN(0x2b586236, 0x8fe2f4dc), + TOBN(0xbbcc00c7, 0x56b95bce), TOBN(0x5ec03906, 0x616da680), + TOBN(0x79162ee6, 0x72214252), TOBN(0x43132b63, 0x86a892d2), + TOBN(0x4bdd3ff2, 0x2f3263bf), TOBN(0xd5b3733c, 0x9cd0a142), + TOBN(0x592eaa82, 0x44415ccb), TOBN(0x663e8924, 0x8d5474ea), + TOBN(0x8058a25e, 0x5236344e), TOBN(0x82e8df9d, 0xbda76ee6), + TOBN(0xdcf6efd8, 0x11cc3d22), TOBN(0x00089cda, 0x3b4ab529), + TOBN(0x91d3a071, 0xbd38a3db), TOBN(0x4ea97fc0, 0xef72b925), + TOBN(0x0c9fc15b, 0xea3edf75), TOBN(0x5a6297cd, 0xa4348ed3), + TOBN(0x0d38ab35, 0xce7c42d4), TOBN(0x9fd493ef, 0x82feab10), + TOBN(0x46056b6d, 0x82111b45), TOBN(0xda11dae1, 0x73efc5c3), + TOBN(0xdc740278, 0x5545a7fb), TOBN(0xbdb2601c, 0x40d507e6), + TOBN(0x121dfeeb, 0x7066fa58), TOBN(0x214369a8, 0x39ae8c2a), + TOBN(0x195709cb, 0x06e0956c), TOBN(0x4c9d254f, 0x010cd34b), + TOBN(0xf51e13f7, 0x0471a532), TOBN(0xe19d6791, 0x1e73054d), + TOBN(0xf702a628, 0xdb5c7be3), TOBN(0xc7141218, 0xb24dde05), + TOBN(0xdc18233c, 0xf29b2e2e), TOBN(0x3a6bd1e8, 0x85342dba), + TOBN(0x3f747fa0, 0xb311898c), TOBN(0xe2a272e4, 0xcd0eac65), + TOBN(0x4bba5851, 0xf914d0bc), TOBN(0x7a1a9660, 0xc4a43ee3), + TOBN(0xe5a367ce, 0xa1c8cde9), TOBN(0x9d958ba9, 0x7271abe3), + TOBN(0xf3ff7eb6, 0x3d1615cd), TOBN(0xa2280dce, 0xf5ae20b0), + TOBN(0x56dba5c1, 0xcf640147), TOBN(0xea5a2e3d, 0x5e83d118), + TOBN(0x04cd6b6d, 0xda24c511), TOBN(0x1c0f4671, 0xe854d214), + TOBN(0x91a6b7a9, 0x69565381), TOBN(0xdc966240, 0xdecf1f5b), + TOBN(0x1b22d21c, 0xfcf5d009), TOBN(0x2a05f641, 0x9021dbd5), + TOBN(0x8c0ed566, 0xd4312483), TOBN(0x5179a95d, 0x643e216f), + TOBN(0xcc185fec, 0x17044493), TOBN(0xb3063339, 0x54991a21), + TOBN(0xd801ecdb, 0x0081a726), TOBN(0x0149b0c6, 0x4fa89bbb), + TOBN(0xafe9065a, 0x4391b6b9), TOBN(0xedc92786, 0xd633f3a3), + TOBN(0xe408c24a, 0xae6a8e13), TOBN(0x85833fde, 0x9f3897ab), + TOBN(0x43800e7e, 0xd81a0715), TOBN(0xde08e346, 0xb44ffc5f), + TOBN(0x7094184c, 0xcdeff2e0), TOBN(0x49f9387b, 0x165eaed1), + TOBN(0x635d6129, 0x777c468a), TOBN(0x8c0dcfd1, 0x538c2dd8), + TOBN(0xd6d9d9e3, 0x7a6a308b), TOBN(0x62375830, 0x4c2767d3), + TOBN(0x874a8bc6, 0xf38cbeb6), TOBN(0xd94d3f1a, 0xccb6fd9e), + TOBN(0x92a9735b, 0xba21f248), TOBN(0x272ad0e5, 0x6cd1efb0), + TOBN(0x7437b69c, 0x05b03284), TOBN(0xe7f04702, 0x6948c225), + TOBN(0x8a56c04a, 0xcba2ecec), TOBN(0x0c181270, 0xe3a73e41), + TOBN(0x6cb34e9d, 0x03e93725), TOBN(0xf77c8713, 0x496521a9), + TOBN(0x94569183, 0xfa7f9f90), TOBN(0xf2e7aa4c, 0x8c9707ad), + TOBN(0xced2c9ba, 0x26c1c9a3), TOBN(0x9109fe96, 0x40197507), + TOBN(0x9ae868a9, 0xe9adfe1c), TOBN(0x3984403d, 0x314e39bb), + TOBN(0xb5875720, 0xf2fe378f), TOBN(0x33f901e0, 0xba44a628), + TOBN(0xea1125fe, 0x3652438c), TOBN(0xae9ec4e6, 0x9dd1f20b), + TOBN(0x1e740d9e, 0xbebf7fbd), TOBN(0x6dbd3ddc, 0x42dbe79c), + TOBN(0x62082aec, 0xedd36776), TOBN(0xf612c478, 0xe9859039), + TOBN(0xa493b201, 0x032f7065), TOBN(0xebd4d8f2, 0x4ff9b211), + TOBN(0x3f23a0aa, 0xaac4cb32), TOBN(0xea3aadb7, 0x15ed4005), + TOBN(0xacf17ea4, 0xafa27e63), TOBN(0x56125c1a, 0xc11fd66c), + TOBN(0x266344a4, 0x3794f8dc), TOBN(0xdcca923a, 0x483c5c36), + TOBN(0x2d6b6bbf, 0x3f9d10a0), TOBN(0xb320c5ca, 0x81d9bdf3), + TOBN(0x620e28ff, 0x47b50a95), TOBN(0x933e3b01, 0xcef03371), + TOBN(0xf081bf85, 0x99100153), TOBN(0x183be9a0, 0xc3a8c8d6), + TOBN(0x4e3ddc5a, 0xd6bbe24d), TOBN(0xc6c74630, 0x53843795), + TOBN(0x78193dd7, 0x65ec2d4c), TOBN(0xb8df26cc, 0xcd3c89b2), + TOBN(0x98dbe399, 0x5a483f8d), TOBN(0x72d8a957, 0x7dd3313a), + TOBN(0x65087294, 0xab0bd375), TOBN(0xfcd89248, 0x7c259d16), + TOBN(0x8a9443d7, 0x7613aa81), TOBN(0x80100800, 0x85fe6584), + TOBN(0x70fc4dbc, 0x7fb10288), TOBN(0xf58280d3, 0xe86beee8), + TOBN(0x14fdd82f, 0x7c978c38), TOBN(0xdf1204c1, 0x0de44d7b), + TOBN(0xa08a1c84, 0x4160252f), TOBN(0x591554ca, 0xc17646a5), + TOBN(0x214a37d6, 0xa05bd525), TOBN(0x48d5f09b, 0x07957b3c), + TOBN(0x0247cdcb, 0xd7109bc9), TOBN(0x40f9e4bb, 0x30599ce7), + TOBN(0xc325fa03, 0xf46ad2ec), TOBN(0x00f766cf, 0xc3e3f9ee), + TOBN(0xab556668, 0xd43a4577), TOBN(0x68d30a61, 0x3ee03b93), + TOBN(0x7ddc81ea, 0x77b46a08), TOBN(0xcf5a6477, 0xc7480699), + TOBN(0x43a8cb34, 0x6633f683), TOBN(0x1b867e6b, 0x92363c60), + TOBN(0x43921114, 0x1f60558e), TOBN(0xcdbcdd63, 0x2f41450e), + TOBN(0x7fc04601, 0xcc630e8b), TOBN(0xea7c66d5, 0x97038b43), + TOBN(0x7259b8a5, 0x04e99fd8), TOBN(0x98a8dd12, 0x4785549a), + TOBN(0x0e459a7c, 0x840552e1), TOBN(0xcdfcf4d0, 0x4bb0909e), + TOBN(0x34a86db2, 0x53758da7), TOBN(0xe643bb83, 0xeac997e1), + TOBN(0x96400bd7, 0x530c5b7e), TOBN(0x9f97af87, 0xb41c8b52), + TOBN(0x34fc8820, 0xfbeee3f9), TOBN(0x93e53490, 0x49091afd), + TOBN(0x764b9be5, 0x9a31f35c), TOBN(0x71f37864, 0x57e3d924), + TOBN(0x02fb34e0, 0x943aa75e), TOBN(0xa18c9c58, 0xab8ff6e4), + TOBN(0x080f31b1, 0x33cf0d19), TOBN(0x5c9682db, 0x083518a7), + TOBN(0x873d4ca6, 0xb709c3de), TOBN(0x64a84262, 0x3575b8f0), + TOBN(0x6275da1f, 0x020154bb), TOBN(0x97678caa, 0xd17cf1ab), + TOBN(0x8779795f, 0x951a95c3), TOBN(0xdd35b163, 0x50fccc08), + TOBN(0x32709627, 0x33d8f031), TOBN(0x3c5ab10a, 0x498dd85c), + TOBN(0xb6c185c3, 0x41dca566), TOBN(0x7de7feda, 0xd8622aa3), + TOBN(0x99e84d92, 0x901b6dfb), TOBN(0x30a02b0e, 0x7c4ad288), + TOBN(0xc7c81daa, 0x2fd3cf36), TOBN(0xd1319547, 0xdf89e59f), + TOBN(0xb2be8184, 0xcd496733), TOBN(0xd5f449eb, 0x93d3412b), + TOBN(0x7ea41b1b, 0x25fe531d), TOBN(0xf9797432, 0x6a1d5646), + TOBN(0x86067f72, 0x2bde501a), TOBN(0xf91481c0, 0x0c85e89c), + TOBN(0xca8ee465, 0xf8b05bc6), TOBN(0x1844e1cf, 0x02e83cda), + TOBN(0xca82114a, 0xb4dbe33b), TOBN(0x0f9f8769, 0x4eabfde2), + TOBN(0x4936b1c0, 0x38b27fe2), TOBN(0x63b6359b, 0xaba402df), + TOBN(0x40c0ea2f, 0x656bdbab), TOBN(0x9c992a89, 0x6580c39c), + TOBN(0x600e8f15, 0x2a60aed1), TOBN(0xeb089ca4, 0xe0bf49df), + TOBN(0x9c233d7d, 0x2d42d99a), TOBN(0x648d3f95, 0x4c6bc2fa), + TOBN(0xdcc383a8, 0xe1add3f3), TOBN(0xf42c0c6a, 0x4f64a348), + TOBN(0x2abd176f, 0x0030dbdb), TOBN(0x4de501a3, 0x7d6c215e), + TOBN(0x4a107c1f, 0x4b9a64bc), TOBN(0xa77f0ad3, 0x2496cd59), + TOBN(0xfb78ac62, 0x7688dffb), TOBN(0x7025a2ca, 0x67937d8e), + TOBN(0xfde8b2d1, 0xd1a8f4e7), TOBN(0xf5b3da47, 0x7354927c), + TOBN(0xe48606a3, 0xd9205735), TOBN(0xac477cc6, 0xe177b917), + TOBN(0xfb1f73d2, 0xa883239a), TOBN(0xe12572f6, 0xcc8b8357), + TOBN(0x9d355e9c, 0xfb1f4f86), TOBN(0x89b795f8, 0xd9f3ec6e), + TOBN(0x27be56f1, 0xb54398dc), TOBN(0x1890efd7, 0x3fedeed5), + TOBN(0x62f77f1f, 0x9c6d0140), TOBN(0x7ef0e314, 0x596f0ee4), + TOBN(0x50ca6631, 0xcc61dab3), TOBN(0x4a39801d, 0xf4866e4f), + TOBN(0x66c8d032, 0xae363b39), TOBN(0x22c591e5, 0x2ead66aa), + TOBN(0x954ba308, 0xde02a53e), TOBN(0x2a6c060f, 0xd389f357), + TOBN(0xe6cfcde8, 0xfbf40b66), TOBN(0x8e02fc56, 0xc6340ce1), + TOBN(0xe4957795, 0x73adb4ba), TOBN(0x7b86122c, 0xa7b03805), + TOBN(0x63f83512, 0x0c8e6fa6), TOBN(0x83660ea0, 0x057d7804), + TOBN(0xbad79105, 0x21ba473c), TOBN(0xb6c50bee, 0xded5389d), + TOBN(0xee2caf4d, 0xaa7c9bc0), TOBN(0xd97b8de4, 0x8c4e98a7), + TOBN(0xa9f63e70, 0xab3bbddb), TOBN(0x3898aabf, 0x2597815a), + TOBN(0x7659af89, 0xac15b3d9), TOBN(0xedf7725b, 0x703ce784), + TOBN(0x25470fab, 0xe085116b), TOBN(0x04a43375, 0x87285310), + TOBN(0x4e39187e, 0xe2bfd52f), TOBN(0x36166b44, 0x7d9ebc74), + TOBN(0x92ad433c, 0xfd4b322c), TOBN(0x726aa817, 0xba79ab51), + TOBN(0xf96eacd8, 0xc1db15eb), TOBN(0xfaf71e91, 0x0476be63), + TOBN(0xdd69a640, 0x641fad98), TOBN(0xb7995918, 0x29622559), + TOBN(0x03c6daa5, 0xde4199dc), TOBN(0x92cadc97, 0xad545eb4), + TOBN(0x1028238b, 0x256534e4), TOBN(0x73e80ce6, 0x8595409a), + TOBN(0x690d4c66, 0xd05dc59b), TOBN(0xc95f7b8f, 0x981dee80), + TOBN(0xf4337014, 0xd856ac25), TOBN(0x441bd9dd, 0xac524dca), + TOBN(0x640b3d85, 0x5f0499f5), TOBN(0x39cf84a9, 0xd5fda182), + TOBN(0x04e7b055, 0xb2aa95a0), TOBN(0x29e33f0a, 0x0ddf1860), + TOBN(0x082e74b5, 0x423f6b43), TOBN(0x217edeb9, 0x0aaa2b0f), + TOBN(0x58b83f35, 0x83cbea55), TOBN(0xc485ee4d, 0xbc185d70), + TOBN(0x833ff03b, 0x1e5f6992), TOBN(0xb5b9b9cc, 0xcf0c0dd5), + TOBN(0x7caaee8e, 0x4e9e8a50), TOBN(0x462e907b, 0x6269dafd), + TOBN(0x6ed5cee9, 0xfbe791c6), TOBN(0x68ca3259, 0xed430790), + TOBN(0x2b72bdf2, 0x13b5ba88), TOBN(0x60294c8a, 0x35ef0ac4), + TOBN(0x9c3230ed, 0x19b99b08), TOBN(0x560fff17, 0x6c2589aa), + TOBN(0x552b8487, 0xd6770374), TOBN(0xa373202d, 0x9a56f685), + TOBN(0xd3e7f907, 0x45f175d9), TOBN(0x3c2f315f, 0xd080d810), + TOBN(0x1130e9dd, 0x7b9520e8), TOBN(0xc078f9e2, 0x0af037b5), + TOBN(0x38cd2ec7, 0x1e9c104c), TOBN(0x0f684368, 0xc472fe92), + TOBN(0xd3f1b5ed, 0x6247e7ef), TOBN(0xb32d33a9, 0x396dfe21), + TOBN(0x46f59cf4, 0x4a9aa2c2), TOBN(0x69cd5168, 0xff0f7e41), + TOBN(0x3f59da0f, 0x4b3234da), TOBN(0xcf0b0235, 0xb4579ebe), + TOBN(0x6d1cbb25, 0x6d2476c7), TOBN(0x4f0837e6, 0x9dc30f08), + TOBN(0x9a4075bb, 0x906f6e98), TOBN(0x253bb434, 0xc761e7d1), + TOBN(0xde2e645f, 0x6e73af10), TOBN(0xb89a4060, 0x0c5f131c), + TOBN(0xd12840c5, 0xb8cc037f), TOBN(0x3d093a5b, 0x7405bb47), + TOBN(0x6202c253, 0x206348b8), TOBN(0xbf5d57fc, 0xc55a3ca7), + TOBN(0x89f6c90c, 0x8c3bef48), TOBN(0x23ac7623, 0x5a0a960a), + TOBN(0xdfbd3d6b, 0x552b42ab), TOBN(0x3ef22458, 0x132061f6), + TOBN(0xd74e9bda, 0xc97e6516), TOBN(0x88779360, 0xc230f49e), + TOBN(0xa6ec1de3, 0x1e74ea49), TOBN(0x581dcee5, 0x3fb645a2), + TOBN(0xbaef2391, 0x8f483f14), TOBN(0x6d2dddfc, 0xd137d13b), + TOBN(0x54cde50e, 0xd2743a42), TOBN(0x89a34fc5, 0xe4d97e67), + TOBN(0x13f1f5b3, 0x12e08ce5), TOBN(0xa80540b8, 0xa7f0b2ca), + TOBN(0x854bcf77, 0x01982805), TOBN(0xb8653ffd, 0x233bea04), + TOBN(0x8e7b8787, 0x02b0b4c9), TOBN(0x2675261f, 0x9acb170a), + TOBN(0x061a9d90, 0x930c14e5), TOBN(0xb59b30e0, 0xdef0abea), + TOBN(0x1dc19ea6, 0x0200ec7d), TOBN(0xb6f4a3f9, 0x0bce132b), + TOBN(0xb8d5de90, 0xf13e27e0), TOBN(0xbaee5ef0, 0x1fade16f), + TOBN(0x6f406aaa, 0xe4c6cf38), TOBN(0xab4cfe06, 0xd1369815), + TOBN(0x0dcffe87, 0xefd550c6), TOBN(0x9d4f59c7, 0x75ff7d39), + TOBN(0xb02553b1, 0x51deb6ad), TOBN(0x812399a4, 0xb1877749), + TOBN(0xce90f71f, 0xca6006e1), TOBN(0xc32363a6, 0xb02b6e77), + TOBN(0x02284fbe, 0xdc36c64d), TOBN(0x86c81e31, 0xa7e1ae61), + TOBN(0x2576c7e5, 0xb909d94a), TOBN(0x8b6f7d02, 0x818b2bb0), + TOBN(0xeca3ed07, 0x56faa38a), TOBN(0xa3790e6c, 0x9305bb54), + TOBN(0xd784eeda, 0x7bc73061), TOBN(0xbd56d369, 0x6dd50614), + TOBN(0xd6575949, 0x229a8aa9), TOBN(0xdcca8f47, 0x4595ec28), + TOBN(0x814305c1, 0x06ab4fe6), TOBN(0xc8c39768, 0x24f43f16), + TOBN(0xe2a45f36, 0x523f2b36), TOBN(0x995c6493, 0x920d93bb), + TOBN(0xf8afdab7, 0x90f1632b), TOBN(0x79ebbecd, 0x1c295954), + TOBN(0xc7bb3ddb, 0x79592f48), TOBN(0x67216a7b, 0x5f88e998), + TOBN(0xd91f098b, 0xbc01193e), TOBN(0xf7d928a5, 0xb1db83fc), + TOBN(0x55e38417, 0xe991f600), TOBN(0x2a91113e, 0x2981a934), + TOBN(0xcbc9d648, 0x06b13bde), TOBN(0xb011b6ac, 0x0755ff44), + TOBN(0x6f4cb518, 0x045ec613), TOBN(0x522d2d31, 0xc2f5930a), + TOBN(0x5acae1af, 0x382e65de), TOBN(0x57643067, 0x27bc966f), + TOBN(0x5e12705d, 0x1c7193f0), TOBN(0xf0f32f47, 0x3be8858e), + TOBN(0x785c3d7d, 0x96c6dfc7), TOBN(0xd75b4a20, 0xbf31795d), + TOBN(0x91acf17b, 0x342659d4), TOBN(0xe596ea34, 0x44f0378f), + TOBN(0x4515708f, 0xce52129d), TOBN(0x17387e1e, 0x79f2f585), + TOBN(0x72cfd2e9, 0x49dee168), TOBN(0x1ae05223, 0x3e2af239), + TOBN(0x009e75be, 0x1d94066a), TOBN(0x6cca31c7, 0x38abf413), + TOBN(0xb50bd61d, 0x9bc49908), TOBN(0x4a9b4a8c, 0xf5e2bc1e), + TOBN(0xeb6cc5f7, 0x946f83ac), TOBN(0x27da93fc, 0xebffab28), + TOBN(0xea314c96, 0x4821c8c5), TOBN(0x8de49ded, 0xa83c15f4), + TOBN(0x7a64cf20, 0x7af33004), TOBN(0x45f1bfeb, 0xc9627e10), + TOBN(0x878b0626, 0x54b9df60), TOBN(0x5e4fdc3c, 0xa95c0b33), + TOBN(0xe54a37ca, 0xc2035d8e), TOBN(0x9087cda9, 0x80f20b8c), + TOBN(0x36f61c23, 0x8319ade4), TOBN(0x766f287a, 0xde8cfdf8), + TOBN(0x48821948, 0x346f3705), TOBN(0x49a7b853, 0x16e4f4a2), + TOBN(0xb9b3f8a7, 0x5cedadfd), TOBN(0x8f562815, 0x8db2a815), + TOBN(0xc0b7d554, 0x01f68f95), TOBN(0x12971e27, 0x688a208e), + TOBN(0xc9f8b696, 0xd0ff34fc), TOBN(0x20824de2, 0x1222718c), + TOBN(0x7213cf9f, 0x0c95284d), TOBN(0xe2ad741b, 0xdc158240), + TOBN(0x0ee3a6df, 0x54043ccf), TOBN(0x16ff479b, 0xd84412b3), + TOBN(0xf6c74ee0, 0xdfc98af0), TOBN(0xa78a169f, 0x52fcd2fb), + TOBN(0xd8ae8746, 0x99c930e9), TOBN(0x1d33e858, 0x49e117a5), + TOBN(0x7581fcb4, 0x6624759f), TOBN(0xde50644f, 0x5bedc01d), + TOBN(0xbeec5d00, 0xcaf3155e), TOBN(0x672d66ac, 0xbc73e75f), + TOBN(0x86b9d8c6, 0x270b01db), TOBN(0xd249ef83, 0x50f55b79), + TOBN(0x6131d6d4, 0x73978fe3), TOBN(0xcc4e4542, 0x754b00a1), + TOBN(0x4e05df05, 0x57dfcfe9), TOBN(0x94b29cdd, 0x51ef6bf0), + TOBN(0xe4530cff, 0x9bc7edf2), TOBN(0x8ac236fd, 0xd3da65f3), + TOBN(0x0faf7d5f, 0xc8eb0b48), TOBN(0x4d2de14c, 0x660eb039), + TOBN(0xc006bba7, 0x60430e54), TOBN(0x10a2d0d6, 0xda3289ab), + TOBN(0x9c037a5d, 0xd7979c59), TOBN(0x04d1f3d3, 0xa116d944), + TOBN(0x9ff22473, 0x8a0983cd), TOBN(0x28e25b38, 0xc883cabb), + TOBN(0xe968dba5, 0x47a58995), TOBN(0x2c80b505, 0x774eebdf), + TOBN(0xee763b71, 0x4a953beb), TOBN(0x502e223f, 0x1642e7f6), + TOBN(0x6fe4b641, 0x61d5e722), TOBN(0x9d37c5b0, 0xdbef5316), + TOBN(0x0115ed70, 0xf8330bc7), TOBN(0x139850e6, 0x75a72789), + TOBN(0x27d7faec, 0xffceccc2), TOBN(0x3016a860, 0x4fd9f7f6), + TOBN(0xc492ec64, 0x4cd8f64c), TOBN(0x58a2d790, 0x279d7b51), + TOBN(0x0ced1fc5, 0x1fc75256), TOBN(0x3e658aed, 0x8f433017), + TOBN(0x0b61942e, 0x05da59eb), TOBN(0xba3d60a3, 0x0ddc3722), + TOBN(0x7c311cd1, 0x742e7f87), TOBN(0x6473ffee, 0xf6b01b6e),} + , + {TOBN(0x8303604f, 0x692ac542), TOBN(0xf079ffe1, 0x227b91d3), + TOBN(0x19f63e63, 0x15aaf9bd), TOBN(0xf99ee565, 0xf1f344fb), + TOBN(0x8a1d661f, 0xd6219199), TOBN(0x8c883bc6, 0xd48ce41c), + TOBN(0x1065118f, 0x3c74d904), TOBN(0x713889ee, 0x0faf8b1b), + TOBN(0x972b3f8f, 0x81a1b3be), TOBN(0x4f3ce145, 0xce2764a0), + TOBN(0xe2d0f1cc, 0x28c4f5f7), TOBN(0xdeee0c0d, 0xc7f3985b), + TOBN(0x7df4adc0, 0xd39e25c3), TOBN(0x40619820, 0xc467a080), + TOBN(0x440ebc93, 0x61cf5a58), TOBN(0x527729a6, 0x422ad600), + TOBN(0xca6c0937, 0xb1b76ba6), TOBN(0x1a2eab85, 0x4d2026dc), + TOBN(0xb1715e15, 0x19d9ae0a), TOBN(0xf1ad9199, 0xbac4a026), + TOBN(0x35b3dfb8, 0x07ea7b0e), TOBN(0xedf5496f, 0x3ed9eb89), + TOBN(0x8932e5ff, 0x2d6d08ab), TOBN(0xf314874e, 0x25bd2731), + TOBN(0xefb26a75, 0x3f73f449), TOBN(0x1d1c94f8, 0x8d44fc79), + TOBN(0x49f0fbc5, 0x3bc0dc4d), TOBN(0xb747ea0b, 0x3698a0d0), + TOBN(0x5218c3fe, 0x228d291e), TOBN(0x35b804b5, 0x43c129d6), + TOBN(0xfac859b8, 0xd1acc516), TOBN(0x6c10697d, 0x95d6e668), + TOBN(0xc38e438f, 0x0876fd4e), TOBN(0x45f0c307, 0x83d2f383), + TOBN(0x203cc2ec, 0xb10934cb), TOBN(0x6a8f2439, 0x2c9d46ee), + TOBN(0xf16b431b, 0x65ccde7b), TOBN(0x41e2cd18, 0x27e76a6f), + TOBN(0xb9c8cf8f, 0x4e3484d7), TOBN(0x64426efd, 0x8315244a), + TOBN(0x1c0a8e44, 0xfc94dea3), TOBN(0x34c8cdbf, 0xdad6a0b0), + TOBN(0x919c3840, 0x04113cef), TOBN(0xfd32fba4, 0x15490ffa), + TOBN(0x58d190f6, 0x795dcfb7), TOBN(0xfef01b03, 0x83588baf), + TOBN(0x9e6d1d63, 0xca1fc1c0), TOBN(0x53173f96, 0xf0a41ac9), + TOBN(0x2b1d402a, 0xba16f73b), TOBN(0x2fb31014, 0x8cf9b9fc), + TOBN(0x2d51e60e, 0x446ef7bf), TOBN(0xc731021b, 0xb91e1745), + TOBN(0x9d3b4724, 0x4fee99d4), TOBN(0x4bca48b6, 0xfac5c1ea), + TOBN(0x70f5f514, 0xbbea9af7), TOBN(0x751f55a5, 0x974c283a), + TOBN(0x6e30251a, 0xcb452fdb), TOBN(0x31ee6965, 0x50f30650), + TOBN(0xb0b3e508, 0x933548d9), TOBN(0xb8949a4f, 0xf4b0ef5b), + TOBN(0x208b8326, 0x3c88f3bd), TOBN(0xab147c30, 0xdb1d9989), + TOBN(0xed6515fd, 0x44d4df03), TOBN(0x17a12f75, 0xe72eb0c5), + TOBN(0x3b59796d, 0x36cf69db), TOBN(0x1219eee9, 0x56670c18), + TOBN(0xfe3341f7, 0x7a070d8e), TOBN(0x9b70130b, 0xa327f90c), + TOBN(0x36a32462, 0x0ae18e0e), TOBN(0x2021a623, 0x46c0a638), + TOBN(0x251b5817, 0xc62eb0d4), TOBN(0x87bfbcdf, 0x4c762293), + TOBN(0xf78ab505, 0xcdd61d64), TOBN(0x8c7a53fc, 0xc8c18857), + TOBN(0xa653ce6f, 0x16147515), TOBN(0x9c923aa5, 0xea7d52d5), + TOBN(0xc24709cb, 0x5c18871f), TOBN(0x7d53bec8, 0x73b3cc74), + TOBN(0x59264aff, 0xfdd1d4c4), TOBN(0x5555917e, 0x240da582), + TOBN(0xcae8bbda, 0x548f5a0e), TOBN(0x1910eaba, 0x3bbfbbe1), + TOBN(0xae579685, 0x7677afc3), TOBN(0x49ea61f1, 0x73ff0b5c), + TOBN(0x78655478, 0x4f7c3922), TOBN(0x95d337cd, 0x20c68eef), + TOBN(0x68f1e1e5, 0xdf779ab9), TOBN(0x14b491b0, 0xb5cf69a8), + TOBN(0x7a6cbbe0, 0x28e3fe89), TOBN(0xe7e1fee4, 0xc5aac0eb), + TOBN(0x7f47eda5, 0x697e5140), TOBN(0x4f450137, 0xb454921f), + TOBN(0xdb625f84, 0x95cd8185), TOBN(0x74be0ba1, 0xcdb2e583), + TOBN(0xaee4fd7c, 0xdd5e6de4), TOBN(0x4251437d, 0xe8101739), + TOBN(0x686d72a0, 0xac620366), TOBN(0x4be3fb9c, 0xb6d59344), + TOBN(0x6e8b44e7, 0xa1eb75b9), TOBN(0x84e39da3, 0x91a5c10c), + TOBN(0x37cc1490, 0xb38f0409), TOBN(0x02951943, 0x2c2ade82), + TOBN(0x9b688783, 0x1190a2d8), TOBN(0x25627d14, 0x231182ba), + TOBN(0x6eb550aa, 0x658a6d87), TOBN(0x1405aaa7, 0xcf9c7325), + TOBN(0xd147142e, 0x5c8748c9), TOBN(0x7f637e4f, 0x53ede0e0), + TOBN(0xf8ca2776, 0x14ffad2c), TOBN(0xe58fb1bd, 0xbafb6791), + TOBN(0x17158c23, 0xbf8f93fc), TOBN(0x7f15b373, 0x0a4a4655), + TOBN(0x39d4add2, 0xd842ca72), TOBN(0xa71e4391, 0x3ed96305), + TOBN(0x5bb09cbe, 0x6700be14), TOBN(0x68d69d54, 0xd8befcf6), + TOBN(0xa45f5367, 0x37183bcf), TOBN(0x7152b7bb, 0x3370dff7), + TOBN(0xcf887baa, 0xbf12525b), TOBN(0xe7ac7bdd, 0xd6d1e3cd), + TOBN(0x25914f78, 0x81fdad90), TOBN(0xcf638f56, 0x0d2cf6ab), + TOBN(0xb90bc03f, 0xcc054de5), TOBN(0x932811a7, 0x18b06350), + TOBN(0x2f00b330, 0x9bbd11ff), TOBN(0x76108a6f, 0xb4044974), + TOBN(0x801bb9e0, 0xa851d266), TOBN(0x0dd099be, 0xbf8990c1), + TOBN(0x58c5aaaa, 0xabe32986), TOBN(0x0fe9dd2a, 0x50d59c27), + TOBN(0x84951ff4, 0x8d307305), TOBN(0x6c23f829, 0x86529b78), + TOBN(0x50bb2218, 0x0b136a79), TOBN(0x7e2174de, 0x77a20996), + TOBN(0x6f00a4b9, 0xc0bb4da6), TOBN(0x89a25a17, 0xefdde8da), + TOBN(0xf728a27e, 0xc11ee01d), TOBN(0xf900553a, 0xe5f10dfb), + TOBN(0x189a83c8, 0x02ec893c), TOBN(0x3ca5bdc1, 0x23f66d77), + TOBN(0x98781537, 0x97eada9f), TOBN(0x59c50ab3, 0x10256230), + TOBN(0x346042d9, 0x323c69b3), TOBN(0x1b715a6d, 0x2c460449), + TOBN(0xa41dd476, 0x6ae06e0b), TOBN(0xcdd7888e, 0x9d42e25f), + TOBN(0x0f395f74, 0x56b25a20), TOBN(0xeadfe0ae, 0x8700e27e), + TOBN(0xb09d52a9, 0x69950093), TOBN(0x3525d9cb, 0x327f8d40), + TOBN(0xb8235a94, 0x67df886a), TOBN(0x77e4b0dd, 0x035faec2), + TOBN(0x115eb20a, 0x517d7061), TOBN(0x77fe3433, 0x6c2df683), + TOBN(0x6870ddc7, 0xcdc6fc67), TOBN(0xb1610588, 0x0b87de83), + TOBN(0x343584ca, 0xd9c4ddbe), TOBN(0xb3164f1c, 0x3d754be2), + TOBN(0x0731ed3a, 0xc1e6c894), TOBN(0x26327dec, 0x4f6b904c), + TOBN(0x9d49c6de, 0x97b5cd32), TOBN(0x40835dae, 0xb5eceecd), + TOBN(0xc66350ed, 0xd9ded7fe), TOBN(0x8aeebb5c, 0x7a678804), + TOBN(0x51d42fb7, 0x5b8ee9ec), TOBN(0xd7a17bdd, 0x8e3ca118), + TOBN(0x40d7511a, 0x2ef4400e), TOBN(0xc48990ac, 0x875a66f4), + TOBN(0x8de07d2a, 0x2199e347), TOBN(0xbee75556, 0x2a39e051), + TOBN(0x56918786, 0x916e51dc), TOBN(0xeb191313, 0x4a2d89ec), + TOBN(0x6679610d, 0x37d341ed), TOBN(0x434fbb41, 0x56d51c2b), + TOBN(0xe54b7ee7, 0xd7492dba), TOBN(0xaa33a79a, 0x59021493), + TOBN(0x49fc5054, 0xe4bd6d3d), TOBN(0x09540f04, 0x5ab551d0), + TOBN(0x8acc9085, 0x4942d3a6), TOBN(0x231af02f, 0x2d28323b), + TOBN(0x93458cac, 0x0992c163), TOBN(0x1fef8e71, 0x888e3bb4), + TOBN(0x27578da5, 0xbe8c268c), TOBN(0xcc8be792, 0xe805ec00), + TOBN(0x29267bae, 0xc61c3855), TOBN(0xebff429d, 0x58c1fd3b), + TOBN(0x22d886c0, 0x8c0b93b8), TOBN(0xca5e00b2, 0x2ddb8953), + TOBN(0xcf330117, 0xc3fed8b7), TOBN(0xd49ac6fa, 0x819c01f6), + TOBN(0x6ddaa6bd, 0x3c0fbd54), TOBN(0x91743068, 0x8049a2cf), + TOBN(0xd67f981e, 0xaff2ef81), TOBN(0xc3654d35, 0x2818ae80), + TOBN(0x81d05044, 0x1b2aa892), TOBN(0x2db067bf, 0x3d099328), + TOBN(0xe7c79e86, 0x703dcc97), TOBN(0xe66f9b37, 0xe133e215), + TOBN(0xcdf119a6, 0xe39a7a5c), TOBN(0x47c60de3, 0x876f1b61), + TOBN(0x6e405939, 0xd860f1b2), TOBN(0x3e9a1dbc, 0xf5ed4d4a), + TOBN(0x3f23619e, 0xc9b6bcbd), TOBN(0x5ee790cf, 0x734e4497), + TOBN(0xf0a834b1, 0x5bdaf9bb), TOBN(0x02cedda7, 0x4ca295f0), + TOBN(0x4619aa2b, 0xcb8e378c), TOBN(0xe5613244, 0xcc987ea4), + TOBN(0x0bc022cc, 0x76b23a50), TOBN(0x4a2793ad, 0x0a6c21ce), + TOBN(0x38328780, 0x89cac3f5), TOBN(0x29176f1b, 0xcba26d56), + TOBN(0x06296187, 0x4f6f59eb), TOBN(0x86e9bca9, 0x8bdc658e), + TOBN(0x2ca9c4d3, 0x57e30402), TOBN(0x5438b216, 0x516a09bb), + TOBN(0x0a6a063c, 0x7672765a), TOBN(0x37a3ce64, 0x0547b9bf), + TOBN(0x42c099c8, 0x98b1a633), TOBN(0xb5ab800d, 0x05ee6961), + TOBN(0xf1963f59, 0x11a5acd6), TOBN(0xbaee6157, 0x46201063), + TOBN(0x36d9a649, 0xa596210a), TOBN(0xaed04363, 0x1ba7138c), + TOBN(0xcf817d1c, 0xa4a82b76), TOBN(0x5586960e, 0xf3806be9), + TOBN(0x7ab67c89, 0x09dc6bb5), TOBN(0x52ace7a0, 0x114fe7eb), + TOBN(0xcd987618, 0xcbbc9b70), TOBN(0x4f06fd5a, 0x604ca5e1), + TOBN(0x90af14ca, 0x6dbde133), TOBN(0x1afe4322, 0x948a3264), + TOBN(0xa70d2ca6, 0xc44b2c6c), TOBN(0xab726799, 0x0ef87dfe), + TOBN(0x310f64dc, 0x2e696377), TOBN(0x49b42e68, 0x4c8126a0), + TOBN(0x0ea444c3, 0xcea0b176), TOBN(0x53a8ddf7, 0xcb269182), + TOBN(0xf3e674eb, 0xbbba9dcb), TOBN(0x0d2878a8, 0xd8669d33), + TOBN(0x04b935d5, 0xd019b6a3), TOBN(0xbb5cf88e, 0x406f1e46), + TOBN(0xa1912d16, 0x5b57c111), TOBN(0x9803fc21, 0x19ebfd78), + TOBN(0x4f231c9e, 0xc07764a9), TOBN(0xd93286ee, 0xb75bd055), + TOBN(0x83a9457d, 0x8ee6c9de), TOBN(0x04695915, 0x6087ec90), + TOBN(0x14c6dd8a, 0x58d6cd46), TOBN(0x9cb633b5, 0x8e6634d2), + TOBN(0xc1305047, 0xf81bc328), TOBN(0x12ede0e2, 0x26a177e5), + TOBN(0x332cca62, 0x065a6f4f), TOBN(0xc3a47ecd, 0x67be487b), + TOBN(0x741eb187, 0x0f47ed1c), TOBN(0x99e66e58, 0xe7598b14), + TOBN(0x6f0544ca, 0x63d0ff12), TOBN(0xe5efc784, 0xb610a05f), + TOBN(0xf72917b1, 0x7cad7b47), TOBN(0x3ff6ea20, 0xf2cac0c0), + TOBN(0xcc23791b, 0xf21db8b7), TOBN(0x7dac70b1, 0xd7d93565), + TOBN(0x682cda1d, 0x694bdaad), TOBN(0xeb88bb8c, 0x1023516d), + TOBN(0xc4c634b4, 0xdfdbeb1b), TOBN(0x22f5ca72, 0xb4ee4dea), + TOBN(0x1045a368, 0xe6524821), TOBN(0xed9e8a3f, 0x052b18b2), + TOBN(0x9b7f2cb1, 0xb961f49a), TOBN(0x7fee2ec1, 0x7b009670), + TOBN(0x350d8754, 0x22507a6d), TOBN(0x561bd711, 0x4db55f1d), + TOBN(0x4c189ccc, 0x320bbcaf), TOBN(0x568434cf, 0xdf1de48c), + TOBN(0x6af1b00e, 0x0fa8f128), TOBN(0xf0ba9d02, 0x8907583c), + TOBN(0x735a4004, 0x32ff9f60), TOBN(0x3dd8e4b6, 0xc25dcf33), + TOBN(0xf2230f16, 0x42c74cef), TOBN(0xd8117623, 0x013fa8ad), + TOBN(0x36822876, 0xf51fe76e), TOBN(0x8a6811cc, 0x11d62589), + TOBN(0xc3fc7e65, 0x46225718), TOBN(0xb7df2c9f, 0xc82fdbcd), + TOBN(0x3b1d4e52, 0xdd7b205b), TOBN(0xb6959478, 0x47a2e414), + TOBN(0x05e4d793, 0xefa91148), TOBN(0xb47ed446, 0xfd2e9675), + TOBN(0x1a7098b9, 0x04c9d9bf), TOBN(0x661e2881, 0x1b793048), + TOBN(0xb1a16966, 0xb01ee461), TOBN(0xbc521308, 0x2954746f), + TOBN(0xc909a0fc, 0x2477de50), TOBN(0xd80bb41c, 0x7dbd51ef), + TOBN(0xa85be7ec, 0x53294905), TOBN(0x6d465b18, 0x83958f97), + TOBN(0x16f6f330, 0xfb6840fd), TOBN(0xfaaeb214, 0x3401e6c8), + TOBN(0xaf83d30f, 0xccb5b4f8), TOBN(0x22885739, 0x266dec4b), + TOBN(0x51b4367c, 0x7bc467df), TOBN(0x926562e3, 0xd842d27a), + TOBN(0xdfcb6614, 0x0fea14a6), TOBN(0xeb394dae, 0xf2734cd9), + TOBN(0x3eeae5d2, 0x11c0be98), TOBN(0xb1e6ed11, 0x814e8165), + TOBN(0x191086bc, 0xe52bce1c), TOBN(0x14b74cc6, 0xa75a04da), + TOBN(0x63cf1186, 0x8c060985), TOBN(0x071047de, 0x2dbd7f7c), + TOBN(0x4e433b8b, 0xce0942ca), TOBN(0xecbac447, 0xd8fec61d), + TOBN(0x8f0ed0e2, 0xebf3232f), TOBN(0xfff80f9e, 0xc52a2edd), + TOBN(0xad9ab433, 0x75b55fdb), TOBN(0x73ca7820, 0xe42e0c11), + TOBN(0x6dace0a0, 0xe6251b46), TOBN(0x89bc6b5c, 0x4c0d932d), + TOBN(0x3438cd77, 0x095da19a), TOBN(0x2f24a939, 0x8d48bdfb), + TOBN(0x99b47e46, 0x766561b7), TOBN(0x736600e6, 0x0ed0322a), + TOBN(0x06a47cb1, 0x638e1865), TOBN(0x927c1c2d, 0xcb136000), + TOBN(0x29542337, 0x0cc5df69), TOBN(0x99b37c02, 0x09d649a9), + TOBN(0xc5f0043c, 0x6aefdb27), TOBN(0x6cdd9987, 0x1be95c27), + TOBN(0x69850931, 0x390420d2), TOBN(0x299c40ac, 0x0983efa4), + TOBN(0x3a05e778, 0xaf39aead), TOBN(0x84274408, 0x43a45193), + TOBN(0x6bcd0fb9, 0x91a711a0), TOBN(0x461592c8, 0x9f52ab17), + TOBN(0xb49302b4, 0xda3c6ed6), TOBN(0xc51fddc7, 0x330d7067), + TOBN(0x94babeb6, 0xda50d531), TOBN(0x521b840d, 0xa6a7b9da), + TOBN(0x5305151e, 0x404bdc89), TOBN(0x1bcde201, 0xd0d07449), + TOBN(0xf427a78b, 0x3b76a59a), TOBN(0xf84841ce, 0x07791a1b), + TOBN(0xebd314be, 0xbf91ed1c), TOBN(0x8e61d34c, 0xbf172943), + TOBN(0x1d5dc451, 0x5541b892), TOBN(0xb186ee41, 0xfc9d9e54), + TOBN(0x9d9f345e, 0xd5bf610d), TOBN(0x3e7ba65d, 0xf6acca9f), + TOBN(0x9dda787a, 0xa8369486), TOBN(0x09f9dab7, 0x8eb5ba53), + TOBN(0x5afb2033, 0xd6481bc3), TOBN(0x76f4ce30, 0xafa62104), + TOBN(0xa8fa00cf, 0xf4f066b5), TOBN(0x89ab5143, 0x461dafc2), + TOBN(0x44339ed7, 0xa3389998), TOBN(0x2ff862f1, 0xbc214903), + TOBN(0x2c88f985, 0xb05556e3), TOBN(0xcd96058e, 0x3467081e), + TOBN(0x7d6a4176, 0xedc637ea), TOBN(0xe1743d09, 0x36a5acdc), + TOBN(0x66fd72e2, 0x7eb37726), TOBN(0xf7fa264e, 0x1481a037), + TOBN(0x9fbd3bde, 0x45f4aa79), TOBN(0xed1e0147, 0x767c3e22), + TOBN(0x7621f979, 0x82e7abe2), TOBN(0x19eedc72, 0x45f633f8), + TOBN(0xe69b155e, 0x6137bf3a), TOBN(0xa0ad13ce, 0x414ee94e), + TOBN(0x93e3d524, 0x1c0e651a), TOBN(0xab1a6e2a, 0x02ce227e), + TOBN(0xe7af1797, 0x4ab27eca), TOBN(0x245446de, 0xbd444f39), + TOBN(0x59e22a21, 0x56c07613), TOBN(0x43deafce, 0xf4275498), + TOBN(0x10834ccb, 0x67fd0946), TOBN(0xa75841e5, 0x47406edf), + TOBN(0xebd6a677, 0x7b0ac93d), TOBN(0xa6e37b0d, 0x78f5e0d7), + TOBN(0x2516c096, 0x76f5492b), TOBN(0x1e4bf888, 0x9ac05f3a), + TOBN(0xcdb42ce0, 0x4df0ba2b), TOBN(0x935d5cfd, 0x5062341b), + TOBN(0x8a303333, 0x82acac20), TOBN(0x429438c4, 0x5198b00e), + TOBN(0x1d083bc9, 0x049d33fa), TOBN(0x58b82dda, 0x946f67ff), + TOBN(0xac3e2db8, 0x67a1d6a3), TOBN(0x62e6bead, 0x1798aac8), + TOBN(0xfc85980f, 0xde46c58c), TOBN(0xa7f69379, 0x69c8d7be), + TOBN(0x23557927, 0x837b35ec), TOBN(0x06a933d8, 0xe0790c0c), + TOBN(0x827c0e9b, 0x077ff55d), TOBN(0x53977798, 0xbb26e680), + TOBN(0x59530874, 0x1d9cb54f), TOBN(0xcca3f449, 0x4aac53ef), + TOBN(0x11dc5c87, 0xa07eda0f), TOBN(0xc138bccf, 0xfd6400c8), + TOBN(0x549680d3, 0x13e5da72), TOBN(0xc93eed82, 0x4540617e), + TOBN(0xfd3db157, 0x4d0b75c0), TOBN(0x9716eb42, 0x6386075b), + TOBN(0x0639605c, 0x817b2c16), TOBN(0x09915109, 0xf1e4f201), + TOBN(0x35c9a928, 0x5cca6c3b), TOBN(0xb25f7d1a, 0x3505c900), + TOBN(0xeb9f7d20, 0x630480c4), TOBN(0xc3c7b8c6, 0x2a1a501c), + TOBN(0x3f99183c, 0x5a1f8e24), TOBN(0xfdb118fa, 0x9dd255f0), + TOBN(0xb9b18b90, 0xc27f62a6), TOBN(0xe8f732f7, 0x396ec191), + TOBN(0x524a2d91, 0x0be786ab), TOBN(0x5d32adef, 0x0ac5a0f5), + TOBN(0x9b53d4d6, 0x9725f694), TOBN(0x032a76c6, 0x0510ba89), + TOBN(0x840391a3, 0xebeb1544), TOBN(0x44b7b88c, 0x3ed73ac3), + TOBN(0xd24bae7a, 0x256cb8b3), TOBN(0x7ceb151a, 0xe394cb12), + TOBN(0xbd6b66d0, 0x5bc1e6a8), TOBN(0xec70cecb, 0x090f07bf), + TOBN(0x270644ed, 0x7d937589), TOBN(0xee9e1a3d, 0x5f1dccfe), + TOBN(0xb0d40a84, 0x745b98d2), TOBN(0xda429a21, 0x2556ed40), + TOBN(0xf676eced, 0x85148cb9), TOBN(0x5a22d40c, 0xded18936), + TOBN(0x3bc4b9e5, 0x70e8a4ce), TOBN(0xbfd1445b, 0x9eae0379), + TOBN(0xf23f2c0c, 0x1a0bd47e), TOBN(0xa9c0bb31, 0xe1845531), + TOBN(0x9ddc4d60, 0x0a4c3f6b), TOBN(0xbdfaad79, 0x2c15ef44), + TOBN(0xce55a236, 0x7f484acc), TOBN(0x08653ca7, 0x055b1f15), + TOBN(0x2efa8724, 0x538873a3), TOBN(0x09299e5d, 0xace1c7e7), + TOBN(0x07afab66, 0xade332ba), TOBN(0x9be1fdf6, 0x92dd71b7), + TOBN(0xa49b5d59, 0x5758b11c), TOBN(0x0b852893, 0xc8654f40), + TOBN(0xb63ef6f4, 0x52379447), TOBN(0xd4957d29, 0x105e690c), + TOBN(0x7d484363, 0x646559b0), TOBN(0xf4a8273c, 0x49788a8e), + TOBN(0xee406cb8, 0x34ce54a9), TOBN(0x1e1c260f, 0xf86fda9b), + TOBN(0xe150e228, 0xcf6a4a81), TOBN(0x1fa3b6a3, 0x1b488772), + TOBN(0x1e6ff110, 0xc5a9c15b), TOBN(0xc6133b91, 0x8ad6aa47), + TOBN(0x8ac5d55c, 0x9dffa978), TOBN(0xba1d1c1d, 0x5f3965f2), + TOBN(0xf969f4e0, 0x7732b52f), TOBN(0xfceecdb5, 0xa5172a07), + TOBN(0xb0120a5f, 0x10f2b8f5), TOBN(0xc83a6cdf, 0x5c4c2f63), + TOBN(0x4d47a491, 0xf8f9c213), TOBN(0xd9e1cce5, 0xd3f1bbd5), + TOBN(0x0d91bc7c, 0xaba7e372), TOBN(0xfcdc74c8, 0xdfd1a2db), + TOBN(0x05efa800, 0x374618e5), TOBN(0x11216969, 0x15a7925e), + TOBN(0xd4c89823, 0xf6021c5d), TOBN(0x880d5e84, 0xeff14423), + TOBN(0x6523bc5a, 0x6dcd1396), TOBN(0xd1acfdfc, 0x113c978b), + TOBN(0xb0c164e8, 0xbbb66840), TOBN(0xf7f4301e, 0x72b58459), + TOBN(0xc29ad4a6, 0xa638e8ec), TOBN(0xf5ab8961, 0x46b78699), + TOBN(0x9dbd7974, 0x0e954750), TOBN(0x0121de88, 0x64f9d2c6), + TOBN(0x2e597b42, 0xd985232e), TOBN(0x55b6c3c5, 0x53451777), + TOBN(0xbb53e547, 0x519cb9fb), TOBN(0xf134019f, 0x8428600d), + TOBN(0x5a473176, 0xe081791a), TOBN(0x2f3e2263, 0x35fb0c08), + TOBN(0xb28c3017, 0x73d273b0), TOBN(0xccd21076, 0x7721ef9a), + TOBN(0x054cc292, 0xb650dc39), TOBN(0x662246de, 0x6188045e), + TOBN(0x904b52fa, 0x6b83c0d1), TOBN(0xa72df267, 0x97e9cd46), + TOBN(0x886b43cd, 0x899725e4), TOBN(0x2b651688, 0xd849ff22), + TOBN(0x60479b79, 0x02f34533), TOBN(0x5e354c14, 0x0c77c148), + TOBN(0xb4bb7581, 0xa8537c78), TOBN(0x188043d7, 0xefe1495f), + TOBN(0x9ba12f42, 0x8c1d5026), TOBN(0x2e0c8a26, 0x93d4aaab), + TOBN(0xbdba7b8b, 0xaa57c450), TOBN(0x140c9ad6, 0x9bbdafef), + TOBN(0x2067aa42, 0x25ac0f18), TOBN(0xf7b1295b, 0x04d1fbf3), + TOBN(0x14829111, 0xa4b04824), TOBN(0x2ce3f192, 0x33bd5e91), + TOBN(0x9c7a1d55, 0x8f2e1b72), TOBN(0xfe932286, 0x302aa243), + TOBN(0x497ca7b4, 0xd4be9554), TOBN(0xb8e821b8, 0xe0547a6e), + TOBN(0xfb2838be, 0x67e573e0), TOBN(0x05891db9, 0x4084c44b), + TOBN(0x91311373, 0x96c1c2c5), TOBN(0x6aebfa3f, 0xd958444b), + TOBN(0xac9cdce9, 0xe56e55c1), TOBN(0x7148ced3, 0x2caa46d0), + TOBN(0x2e10c7ef, 0xb61fe8eb), TOBN(0x9fd835da, 0xff97cf4d),} + , + {TOBN(0xa36da109, 0x081e9387), TOBN(0xfb9780d7, 0x8c935828), + TOBN(0xd5940332, 0xe540b015), TOBN(0xc9d7b51b, 0xe0f466fa), + TOBN(0xfaadcd41, 0xd6d9f671), TOBN(0xba6c1e28, 0xb1a2ac17), + TOBN(0x066a7833, 0xed201e5f), TOBN(0x19d99719, 0xf90f462b), + TOBN(0xf431f462, 0x060b5f61), TOBN(0xa56f46b4, 0x7bd057c2), + TOBN(0x348dca6c, 0x47e1bf65), TOBN(0x9a38783e, 0x41bcf1ff), + TOBN(0x7a5d33a9, 0xda710718), TOBN(0x5a779987, 0x2e0aeaf6), + TOBN(0xca87314d, 0x2d29d187), TOBN(0xfa0edc3e, 0xc687d733), + TOBN(0x9df33621, 0x6a31e09b), TOBN(0xde89e44d, 0xc1350e35), + TOBN(0x29214871, 0x4ca0cf52), TOBN(0xdf379672, 0x0b88a538), + TOBN(0xc92a510a, 0x2591d61b), TOBN(0x79aa87d7, 0x585b447b), + TOBN(0xf67db604, 0xe5287f77), TOBN(0x1697c8bf, 0x5efe7a80), + TOBN(0x1c894849, 0xcb198ac7), TOBN(0xa884a93d, 0x0f264665), + TOBN(0x2da964ef, 0x9b200678), TOBN(0x3c351b87, 0x009834e6), + TOBN(0xafb2ef9f, 0xe2c4b44b), TOBN(0x580f6c47, 0x3326790c), + TOBN(0xb8480521, 0x0b02264a), TOBN(0x8ba6f9e2, 0x42a194e2), + TOBN(0xfc87975f, 0x8fb54738), TOBN(0x35160788, 0x27c3ead3), + TOBN(0x834116d2, 0xb74a085a), TOBN(0x53c99a73, 0xa62fe996), + TOBN(0x87585be0, 0x5b81c51b), TOBN(0x925bafa8, 0xbe0852b7), + TOBN(0x76a4fafd, 0xa84d19a7), TOBN(0x39a45982, 0x585206d4), + TOBN(0x499b6ab6, 0x5eb03c0e), TOBN(0xf19b7954, 0x72bc3fde), + TOBN(0xa86b5b9c, 0x6e3a80d2), TOBN(0xe4377508, 0x6d42819f), + TOBN(0xc1663650, 0xbb3ee8a3), TOBN(0x75eb14fc, 0xb132075f), + TOBN(0xa8ccc906, 0x7ad834f6), TOBN(0xea6a2474, 0xe6e92ffd), + TOBN(0x9d72fd95, 0x0f8d6758), TOBN(0xcb84e101, 0x408c07dd), + TOBN(0xb9114bfd, 0xa5e23221), TOBN(0x358b5fe2, 0xe94e742c), + TOBN(0x1c0577ec, 0x95f40e75), TOBN(0xf0155451, 0x3d73f3d6), + TOBN(0x9d55cd67, 0xbd1b9b66), TOBN(0x63e86e78, 0xaf8d63c7), + TOBN(0x39d934ab, 0xd3c095f1), TOBN(0x04b261be, 0xe4b76d71), + TOBN(0x1d2e6970, 0xe73e6984), TOBN(0x879fb23b, 0x5e5fcb11), + TOBN(0x11506c72, 0xdfd75490), TOBN(0x3a97d085, 0x61bcf1c1), + TOBN(0x43201d82, 0xbf5e7007), TOBN(0x7f0ac52f, 0x798232a7), + TOBN(0x2715cbc4, 0x6eb564d4), TOBN(0x8d6c752c, 0x9e570e29), + TOBN(0xf80247c8, 0x9ef5fd5d), TOBN(0xc3c66b46, 0xd53eb514), + TOBN(0x9666b401, 0x0f87de56), TOBN(0xce62c06f, 0xc6c603b5), + TOBN(0xae7b4c60, 0x7e4fc942), TOBN(0x38ac0b77, 0x663a9c19), + TOBN(0xcb4d20ee, 0x4b049136), TOBN(0x8b63bf12, 0x356a4613), + TOBN(0x1221aef6, 0x70e08128), TOBN(0xe62d8c51, 0x4acb6b16), + TOBN(0x71f64a67, 0x379e7896), TOBN(0xb25237a2, 0xcafd7fa5), + TOBN(0xf077bd98, 0x3841ba6a), TOBN(0xc4ac0244, 0x3cd16e7e), + TOBN(0x548ba869, 0x21fea4ca), TOBN(0xd36d0817, 0xf3dfdac1), + TOBN(0x09d8d71f, 0xf4685faf), TOBN(0x8eff66be, 0xc52c459a), + TOBN(0x182faee7, 0x0b57235e), TOBN(0xee3c39b1, 0x0106712b), + TOBN(0x5107331f, 0xc0fcdcb0), TOBN(0x669fb9dc, 0xa51054ba), + TOBN(0xb25101fb, 0x319d7682), TOBN(0xb0293129, 0x0a982fee), + TOBN(0x51c1c9b9, 0x0261b344), TOBN(0x0e008c5b, 0xbfd371fa), + TOBN(0xd866dd1c, 0x0278ca33), TOBN(0x666f76a6, 0xe5aa53b1), + TOBN(0xe5cfb779, 0x6013a2cf), TOBN(0x1d3a1aad, 0xa3521836), + TOBN(0xcedd2531, 0x73faa485), TOBN(0xc8ee6c4f, 0xc0a76878), + TOBN(0xddbccfc9, 0x2a11667d), TOBN(0x1a418ea9, 0x1c2f695a), + TOBN(0xdb11bd92, 0x51f73971), TOBN(0x3e4b3c82, 0xda2ed89f), + TOBN(0x9a44f3f4, 0xe73e0319), TOBN(0xd1e3de0f, 0x303431af), + TOBN(0x3c5604ff, 0x50f75f9c), TOBN(0x1d8eddf3, 0x7e752b22), + TOBN(0x0ef074dd, 0x3c9a1118), TOBN(0xd0ffc172, 0xccb86d7b), + TOBN(0xabd1ece3, 0x037d90f2), TOBN(0xe3f307d6, 0x6055856c), + TOBN(0x422f9328, 0x7e4c6daf), TOBN(0x902aac66, 0x334879a0), + TOBN(0xb6a1e7bf, 0x94cdfade), TOBN(0x6c97e1ed, 0x7fc6d634), + TOBN(0x662ad24d, 0xa2fb63f8), TOBN(0xf81be1b9, 0xa5928405), + TOBN(0x86d765e4, 0xd14b4206), TOBN(0xbecc2e0e, 0x8fa0db65), + TOBN(0xa28838e0, 0xb17fc76c), TOBN(0xe49a602a, 0xe37cf24e), + TOBN(0x76b4131a, 0x567193ec), TOBN(0xaf3c305a, 0xe5f6e70b), + TOBN(0x9587bd39, 0x031eebdd), TOBN(0x5709def8, 0x71bbe831), + TOBN(0x57059983, 0x0eb2b669), TOBN(0x4d80ce1b, 0x875b7029), + TOBN(0x838a7da8, 0x0364ac16), TOBN(0x2f431d23, 0xbe1c83ab), + TOBN(0xe56812a6, 0xf9294dd3), TOBN(0xb448d01f, 0x9b4b0d77), + TOBN(0xf3ae6061, 0x04e8305c), TOBN(0x2bead645, 0x94d8c63e), + TOBN(0x0a85434d, 0x84fd8b07), TOBN(0x537b983f, 0xf7a9dee5), + TOBN(0xedcc5f18, 0xef55bd85), TOBN(0x2041af62, 0x21c6cf8b), + TOBN(0x8e52874c, 0xb940c71e), TOBN(0x211935a9, 0xdb5f4b3a), + TOBN(0x94350492, 0x301b1dc3), TOBN(0x33d2646d, 0x29958620), + TOBN(0x16b0d64b, 0xef911404), TOBN(0x9d1f25ea, 0x9a3c5ef4), + TOBN(0x20f200eb, 0x4a352c78), TOBN(0x43929f2c, 0x4bd0b428), + TOBN(0xa5656667, 0xc7196e29), TOBN(0x7992c2f0, 0x9391be48), + TOBN(0xaaa97cbd, 0x9ee0cd6e), TOBN(0x51b0310c, 0x3dc8c9bf), + TOBN(0x237f8acf, 0xdd9f22cb), TOBN(0xbb1d81a1, 0xb585d584), + TOBN(0x8d5d85f5, 0x8c416388), TOBN(0x0d6e5a5a, 0x42fe474f), + TOBN(0xe7812766, 0x38235d4e), TOBN(0x1c62bd67, 0x496e3298), + TOBN(0x8378660c, 0x3f175bc8), TOBN(0x4d04e189, 0x17afdd4d), + TOBN(0x32a81601, 0x85a8068c), TOBN(0xdb58e4e1, 0x92b29a85), + TOBN(0xe8a65b86, 0xc70d8a3b), TOBN(0x5f0e6f4e, 0x98a0403b), + TOBN(0x08129684, 0x69ed2370), TOBN(0x34dc30bd, 0x0871ee26), + TOBN(0x3a5ce948, 0x7c9c5b05), TOBN(0x7d487b80, 0x43a90c87), + TOBN(0x4089ba37, 0xdd0e7179), TOBN(0x45f80191, 0xb4041811), + TOBN(0x1c3e1058, 0x98747ba5), TOBN(0x98c4e13a, 0x6e1ae592), + TOBN(0xd44636e6, 0xe82c9f9e), TOBN(0x711db87c, 0xc33a1043), + TOBN(0x6f431263, 0xaa8aec05), TOBN(0x43ff120d, 0x2744a4aa), + TOBN(0xd3bd892f, 0xae77779b), TOBN(0xf0fe0cc9, 0x8cdc9f82), + TOBN(0xca5f7fe6, 0xf1c5b1bc), TOBN(0xcc63a682, 0x44929a72), + TOBN(0xc7eaba0c, 0x09dbe19a), TOBN(0x2f3585ad, 0x6b5c73c2), + TOBN(0x8ab8924b, 0x0ae50c30), TOBN(0x17fcd27a, 0x638b30ba), + TOBN(0xaf414d34, 0x10b3d5a5), TOBN(0x09c107d2, 0x2a9accf1), + TOBN(0x15dac49f, 0x946a6242), TOBN(0xaec3df2a, 0xd707d642), + TOBN(0x2c2492b7, 0x3f894ae0), TOBN(0xf59df3e5, 0xb75f18ce), + TOBN(0x7cb740d2, 0x8f53cad0), TOBN(0x3eb585fb, 0xc4f01294), + TOBN(0x17da0c86, 0x32c7f717), TOBN(0xeb8c795b, 0xaf943f4c), + TOBN(0x4ee23fb5, 0xf67c51d2), TOBN(0xef187575, 0x68889949), + TOBN(0xa6b4bdb2, 0x0389168b), TOBN(0xc4ecd258, 0xea577d03), + TOBN(0x3a63782b, 0x55743082), TOBN(0x6f678f4c, 0xc72f08cd), + TOBN(0x553511cf, 0x65e58dd8), TOBN(0xd53b4e3e, 0xd402c0cd), + TOBN(0x37de3e29, 0xa037c14c), TOBN(0x86b6c516, 0xc05712aa), + TOBN(0x2834da3e, 0xb38dff6f), TOBN(0xbe012c52, 0xea636be8), + TOBN(0x292d238c, 0x61dd37f8), TOBN(0x0e54523f, 0x8f8142db), + TOBN(0xe31eb436, 0x036a05d8), TOBN(0x83e3cdff, 0x1e93c0ff), + TOBN(0x3fd2fe0f, 0x50821ddf), TOBN(0xc8e19b0d, 0xff9eb33b), + TOBN(0xc8cc943f, 0xb569a5fe), TOBN(0xad0090d4, 0xd4342d75), + TOBN(0x82090b4b, 0xcaeca000), TOBN(0xca39687f, 0x1bd410eb), + TOBN(0xe7bb0df7, 0x65959d77), TOBN(0x39d78218, 0x9c964999), + TOBN(0xd87f62e8, 0xb2415451), TOBN(0xe5efb774, 0xbed76108), + TOBN(0x3ea011a4, 0xe822f0d0), TOBN(0xbc647ad1, 0x5a8704f8), + TOBN(0xbb315b35, 0x50c6820f), TOBN(0x863dec3d, 0xb7e76bec), + TOBN(0x01ff5d3a, 0xf017bfc7), TOBN(0x20054439, 0x976b8229), + TOBN(0x067fca37, 0x0bbd0d3b), TOBN(0xf63dde64, 0x7f5e3d0f), + TOBN(0x22dbefb3, 0x2a4c94e9), TOBN(0xafbff0fe, 0x96f8278a), + TOBN(0x80aea0b1, 0x3503793d), TOBN(0xb2238029, 0x5f06cd29), + TOBN(0x65703e57, 0x8ec3feca), TOBN(0x06c38314, 0x393e7053), + TOBN(0xa0b751eb, 0x7c6734c4), TOBN(0xd2e8a435, 0xc59f0f1e), + TOBN(0x147d9052, 0x5e9ca895), TOBN(0x2f4dd31e, 0x972072df), + TOBN(0xa16fda8e, 0xe6c6755c), TOBN(0xc66826ff, 0xcf196558), + TOBN(0x1f1a76a3, 0x0cf43895), TOBN(0xa9d604e0, 0x83c3097b), + TOBN(0xe1908309, 0x66390e0e), TOBN(0xa50bf753, 0xb3c85eff), + TOBN(0x0696bdde, 0xf6a70251), TOBN(0x548b801b, 0x3c6ab16a), + TOBN(0x37fcf704, 0xa4d08762), TOBN(0x090b3def, 0xdff76c4e), + TOBN(0x87e8cb89, 0x69cb9158), TOBN(0x44a90744, 0x995ece43), + TOBN(0xf85395f4, 0x0ad9fbf5), TOBN(0x49b0f6c5, 0x4fb0c82d), + TOBN(0x75d9bc15, 0xadf7cccf), TOBN(0x81a3e5d6, 0xdfa1e1b0), + TOBN(0x8c39e444, 0x249bc17e), TOBN(0xf37dccb2, 0x8ea7fd43), + TOBN(0xda654873, 0x907fba12), TOBN(0x35daa6da, 0x4a372904), + TOBN(0x0564cfc6, 0x6283a6c5), TOBN(0xd09fa4f6, 0x4a9395bf), + TOBN(0x688e9ec9, 0xaeb19a36), TOBN(0xd913f1ce, 0xc7bfbfb4), + TOBN(0x797b9a3c, 0x61c2faa6), TOBN(0x2f979bec, 0x6a0a9c12), + TOBN(0xb5969d0f, 0x359679ec), TOBN(0xebcf523d, 0x079b0460), + TOBN(0xfd6b0008, 0x10fab870), TOBN(0x3f2edcda, 0x9373a39c), + TOBN(0x0d64f9a7, 0x6f568431), TOBN(0xf848c27c, 0x02f8898c), + TOBN(0xf418ade1, 0x260b5bd5), TOBN(0xc1f3e323, 0x6973dee8), + TOBN(0x46e9319c, 0x26c185dd), TOBN(0x6d85b7d8, 0x546f0ac4), + TOBN(0x427965f2, 0x247f9d57), TOBN(0xb519b636, 0xb0035f48), + TOBN(0x6b6163a9, 0xab87d59c), TOBN(0xff9f58c3, 0x39caaa11), + TOBN(0x4ac39cde, 0x3177387b), TOBN(0x5f6557c2, 0x873e77f9), + TOBN(0x67504006, 0x36a83041), TOBN(0x9b1c96ca, 0x75ef196c), + TOBN(0xf34283de, 0xb08c7940), TOBN(0x7ea09644, 0x1128c316), + TOBN(0xb510b3b5, 0x6aa39dff), TOBN(0x59b43da2, 0x9f8e4d8c), + TOBN(0xa8ce31fd, 0x9e4c4b9f), TOBN(0x0e20be26, 0xc1303c01), + TOBN(0x18187182, 0xe8ee47c9), TOBN(0xd9687cdb, 0x7db98101), + TOBN(0x7a520e4d, 0xa1e14ff6), TOBN(0x429808ba, 0x8836d572), + TOBN(0xa37ca60d, 0x4944b663), TOBN(0xf901f7a9, 0xa3f91ae5), + TOBN(0xe4e3e76e, 0x9e36e3b1), TOBN(0x9aa219cf, 0x29d93250), + TOBN(0x347fe275, 0x056a2512), TOBN(0xa4d643d9, 0xde65d95c), + TOBN(0x9669d396, 0x699fc3ed), TOBN(0xb598dee2, 0xcf8c6bbe), + TOBN(0x682ac1e5, 0xdda9e5c6), TOBN(0x4e0d3c72, 0xcaa9fc95), + TOBN(0x17faaade, 0x772bea44), TOBN(0x5ef8428c, 0xab0009c8), + TOBN(0xcc4ce47a, 0x460ff016), TOBN(0xda6d12bf, 0x725281cb), + TOBN(0x44c67848, 0x0223aad2), TOBN(0x6e342afa, 0x36256e28), + TOBN(0x1400bb0b, 0x93a37c04), TOBN(0x62b1bc9b, 0xdd10bd96), + TOBN(0x7251adeb, 0x0dac46b7), TOBN(0x7d33b92e, 0x7be4ef51), + TOBN(0x28b2a94b, 0xe61fa29a), TOBN(0x4b2be13f, 0x06422233), + TOBN(0x36d6d062, 0x330d8d37), TOBN(0x5ef80e1e, 0xb28ca005), + TOBN(0x174d4699, 0x6d16768e), TOBN(0x9fc4ff6a, 0x628bf217), + TOBN(0x77705a94, 0x154e490d), TOBN(0x9d96dd28, 0x8d2d997a), + TOBN(0x77e2d9d8, 0xce5d72c4), TOBN(0x9d06c5a4, 0xc11c714f), + TOBN(0x02aa5136, 0x79e4a03e), TOBN(0x1386b3c2, 0x030ff28b), + TOBN(0xfe82e8a6, 0xfb283f61), TOBN(0x7df203e5, 0xf3abc3fb), + TOBN(0xeec7c351, 0x3a4d3622), TOBN(0xf7d17dbf, 0xdf762761), + TOBN(0xc3956e44, 0x522055f0), TOBN(0xde3012db, 0x8fa748db), + TOBN(0xca9fcb63, 0xbf1dcc14), TOBN(0xa56d9dcf, 0xbe4e2f3a), + TOBN(0xb86186b6, 0x8bcec9c2), TOBN(0x7cf24df9, 0x680b9f06), + TOBN(0xc46b45ea, 0xc0d29281), TOBN(0xfff42bc5, 0x07b10e12), + TOBN(0x12263c40, 0x4d289427), TOBN(0x3d5f1899, 0xb4848ec4), + TOBN(0x11f97010, 0xd040800c), TOBN(0xb4c5f529, 0x300feb20), + TOBN(0xcc543f8f, 0xde94fdcb), TOBN(0xe96af739, 0xc7c2f05e), + TOBN(0xaa5e0036, 0x882692e1), TOBN(0x09c75b68, 0x950d4ae9), + TOBN(0x62f63df2, 0xb5932a7a), TOBN(0x2658252e, 0xde0979ad), + TOBN(0x2a19343f, 0xb5e69631), TOBN(0x718c7501, 0x525b666b), + TOBN(0x26a42d69, 0xea40dc3a), TOBN(0xdc84ad22, 0xaecc018f), + TOBN(0x25c36c7b, 0x3270f04a), TOBN(0x46ba6d47, 0x50fa72ed), + TOBN(0x6c37d1c5, 0x93e58a8e), TOBN(0xa2394731, 0x120c088c), + TOBN(0xc3be4263, 0xcb6e86da), TOBN(0x2c417d36, 0x7126d038), + TOBN(0x5b70f9c5, 0x8b6f8efa), TOBN(0x671a2faa, 0x37718536), + TOBN(0xd3ced3c6, 0xb539c92b), TOBN(0xe56f1bd9, 0xa31203c2), + TOBN(0x8b096ec4, 0x9ff3c8eb), TOBN(0x2deae432, 0x43491cea), + TOBN(0x2465c6eb, 0x17943794), TOBN(0x5d267e66, 0x20586843), + TOBN(0x9d3d116d, 0xb07159d0), TOBN(0xae07a67f, 0xc1896210), + TOBN(0x8fc84d87, 0xbb961579), TOBN(0x30009e49, 0x1c1f8dd6), + TOBN(0x8a8caf22, 0xe3132819), TOBN(0xcffa197c, 0xf23ab4ff), + TOBN(0x58103a44, 0x205dd687), TOBN(0x57b796c3, 0x0ded67a2), + TOBN(0x0b9c3a6c, 0xa1779ad7), TOBN(0xa33cfe2e, 0x357c09c5), + TOBN(0x2ea29315, 0x3db4a57e), TOBN(0x91959695, 0x8ebeb52e), + TOBN(0x118db9a6, 0xe546c879), TOBN(0x8e996df4, 0x6295c8d6), + TOBN(0xdd990484, 0x55ec806b), TOBN(0x24f291ca, 0x165c1035), + TOBN(0xcca523bb, 0x440e2229), TOBN(0x324673a2, 0x73ef4d04), + TOBN(0xaf3adf34, 0x3e11ec39), TOBN(0x6136d7f1, 0xdc5968d3), + TOBN(0x7a7b2899, 0xb053a927), TOBN(0x3eaa2661, 0xae067ecd), + TOBN(0x8549b9c8, 0x02779cd9), TOBN(0x061d7940, 0xc53385ea), + TOBN(0x3e0ba883, 0xf06d18bd), TOBN(0x4ba6de53, 0xb2700843), + TOBN(0xb966b668, 0x591a9e4d), TOBN(0x93f67567, 0x7f4fa0ed), + TOBN(0x5a02711b, 0x4347237b), TOBN(0xbc041e2f, 0xe794608e), + TOBN(0x55af10f5, 0x70f73d8c), TOBN(0xd2d4d4f7, 0xbb7564f7), + TOBN(0xd7d27a89, 0xb3e93ce7), TOBN(0xf7b5a875, 0x5d3a2c1b), + TOBN(0xb29e68a0, 0x255b218a), TOBN(0xb533837e, 0x8af76754), + TOBN(0xd1b05a73, 0x579fab2e), TOBN(0xb41055a1, 0xecd74385), + TOBN(0xb2369274, 0x445e9115), TOBN(0x2972a7c4, 0xf520274e), + TOBN(0x6c08334e, 0xf678e68a), TOBN(0x4e4160f0, 0x99b057ed), + TOBN(0x3cfe11b8, 0x52ccb69a), TOBN(0x2fd1823a, 0x21c8f772), + TOBN(0xdf7f072f, 0x3298f055), TOBN(0x8c0566f9, 0xfec74a6e), + TOBN(0xe549e019, 0x5bb4d041), TOBN(0x7c3930ba, 0x9208d850), + TOBN(0xe07141fc, 0xaaa2902b), TOBN(0x539ad799, 0xe4f69ad3), + TOBN(0xa6453f94, 0x813f9ffd), TOBN(0xc58d3c48, 0x375bc2f7), + TOBN(0xb3326fad, 0x5dc64e96), TOBN(0x3aafcaa9, 0xb240e354), + TOBN(0x1d1b0903, 0xaca1e7a9), TOBN(0x4ceb9767, 0x1211b8a0), + TOBN(0xeca83e49, 0xe32a858e), TOBN(0x4c32892e, 0xae907bad), + TOBN(0xd5b42ab6, 0x2eb9b494), TOBN(0x7fde3ee2, 0x1eabae1b), + TOBN(0x13b5ab09, 0xcaf54957), TOBN(0xbfb028be, 0xe5f5d5d5), + TOBN(0x928a0650, 0x2003e2c0), TOBN(0x90793aac, 0x67476843), + TOBN(0x5e942e79, 0xc81710a0), TOBN(0x557e4a36, 0x27ccadd4), + TOBN(0x72a2bc56, 0x4bcf6d0c), TOBN(0x09ee5f43, 0x26d7b80c), + TOBN(0x6b70dbe9, 0xd4292f19), TOBN(0x56f74c26, 0x63f16b18), + TOBN(0xc23db0f7, 0x35fbb42a), TOBN(0xb606bdf6, 0x6ae10040), + TOBN(0x1eb15d4d, 0x044573ac), TOBN(0x7dc3cf86, 0x556b0ba4), + TOBN(0x97af9a33, 0xc60df6f7), TOBN(0x0b1ef85c, 0xa716ce8c), + TOBN(0x2922f884, 0xc96958be), TOBN(0x7c32fa94, 0x35690963), + TOBN(0x2d7f667c, 0xeaa00061), TOBN(0xeaaf7c17, 0x3547365c), + TOBN(0x1eb4de46, 0x87032d58), TOBN(0xc54f3d83, 0x5e2c79e0), + TOBN(0x07818df4, 0x5d04ef23), TOBN(0x55faa9c8, 0x673d41b4), + TOBN(0xced64f6f, 0x89b95355), TOBN(0x4860d2ea, 0xb7415c84), + TOBN(0x5fdb9bd2, 0x050ebad3), TOBN(0xdb53e0cc, 0x6685a5bf), + TOBN(0xb830c031, 0x9feb6593), TOBN(0xdd87f310, 0x6accff17), + TOBN(0x2303ebab, 0x9f555c10), TOBN(0x94603695, 0x287e7065), + TOBN(0xf88311c3, 0x2e83358c), TOBN(0x508dd9b4, 0xeefb0178), + TOBN(0x7ca23706, 0x2dba8652), TOBN(0x62aac5a3, 0x0047abe5), + TOBN(0x9a61d2a0, 0x8b1ea7b3), TOBN(0xd495ab63, 0xae8b1485), + TOBN(0x38740f84, 0x87052f99), TOBN(0x178ebe5b, 0xb2974eea), + TOBN(0x030bbcca, 0x5b36d17f), TOBN(0xb5e4cce3, 0xaaf86eea), + TOBN(0xb51a0220, 0x68f8e9e0), TOBN(0xa4348796, 0x09eb3e75), + TOBN(0xbe592309, 0xeef1a752), TOBN(0x5d7162d7, 0x6f2aa1ed), + TOBN(0xaebfb5ed, 0x0f007dd2), TOBN(0x255e14b2, 0xc89edd22), + TOBN(0xba85e072, 0x0303b697), TOBN(0xc5d17e25, 0xf05720ff), + TOBN(0x02b58d6e, 0x5128ebb6), TOBN(0x2c80242d, 0xd754e113), + TOBN(0x919fca5f, 0xabfae1ca), TOBN(0x937afaac, 0x1a21459b), + TOBN(0x9e0ca91c, 0x1f66a4d2), TOBN(0x194cc7f3, 0x23ec1331), + TOBN(0xad25143a, 0x8aa11690), TOBN(0xbe40ad8d, 0x09b59e08), + TOBN(0x37d60d9b, 0xe750860a), TOBN(0x6c53b008, 0xc6bf434c), + TOBN(0xb572415d, 0x1356eb80), TOBN(0xb8bf9da3, 0x9578ded8), + TOBN(0x22658e36, 0x5e8fb38b), TOBN(0x9b70ce22, 0x5af8cb22), + TOBN(0x7c00018a, 0x829a8180), TOBN(0x84329f93, 0xb81ed295), + TOBN(0x7c343ea2, 0x5f3cea83), TOBN(0x38f8655f, 0x67586536), + TOBN(0xa661a0d0, 0x1d3ec517), TOBN(0x98744652, 0x512321ae), + TOBN(0x084ca591, 0xeca92598), TOBN(0xa9bb9dc9, 0x1dcb3feb), + TOBN(0x14c54355, 0x78b4c240), TOBN(0x5ed62a3b, 0x610cafdc), + TOBN(0x07512f37, 0x1b38846b), TOBN(0x571bb70a, 0xb0e38161), + TOBN(0xb556b95b, 0x2da705d2), TOBN(0x3ef8ada6, 0xb1a08f98), + TOBN(0x85302ca7, 0xddecfbe5), TOBN(0x0e530573, 0x943105cd), + TOBN(0x60554d55, 0x21a9255d), TOBN(0x63a32fa1, 0xf2f3802a), + TOBN(0x35c8c5b0, 0xcd477875), TOBN(0x97f458ea, 0x6ad42da1), + TOBN(0x832d7080, 0xeb6b242d), TOBN(0xd30bd023, 0x3b71e246), + TOBN(0x7027991b, 0xbe31139d), TOBN(0x68797e91, 0x462e4e53), + TOBN(0x423fe20a, 0x6b4e185a), TOBN(0x82f2c67e, 0x42d9b707), + TOBN(0x25c81768, 0x4cf7811b), TOBN(0xbd53005e, 0x045bb95d),} + , + {TOBN(0xe5f649be, 0x9d8e68fd), TOBN(0xdb0f0533, 0x1b044320), + TOBN(0xf6fde9b3, 0xe0c33398), TOBN(0x92f4209b, 0x66c8cfae), + TOBN(0xe9d1afcc, 0x1a739d4b), TOBN(0x09aea75f, 0xa28ab8de), + TOBN(0x14375fb5, 0xeac6f1d0), TOBN(0x6420b560, 0x708f7aa5), + TOBN(0x9eae499c, 0x6254dc41), TOBN(0x7e293924, 0x7a837e7e), + TOBN(0x74aec08c, 0x090524a7), TOBN(0xf82b9219, 0x8d6f55f2), + TOBN(0x493c962e, 0x1402cec5), TOBN(0x9f17ca17, 0xfa2f30e7), + TOBN(0xbcd783e8, 0xe9b879cb), TOBN(0xea3d8c14, 0x5a6f145f), + TOBN(0xdede15e7, 0x5e0dee6e), TOBN(0x74f24872, 0xdc628aa2), + TOBN(0xd3e9c4fe, 0x7861bb93), TOBN(0x56d4822a, 0x6187b2e0), + TOBN(0xb66417cf, 0xc59826f9), TOBN(0xca260969, 0x2408169e), + TOBN(0xedf69d06, 0xc79ef885), TOBN(0x00031f8a, 0xdc7d138f), + TOBN(0x103c46e6, 0x0ebcf726), TOBN(0x4482b831, 0x6231470e), + TOBN(0x6f6dfaca, 0x487c2109), TOBN(0x2e0ace97, 0x62e666ef), + TOBN(0x3246a9d3, 0x1f8d1f42), TOBN(0x1b1e83f1, 0x574944d2), + TOBN(0x13dfa63a, 0xa57f334b), TOBN(0x0cf8daed, 0x9f025d81), + TOBN(0x30d78ea8, 0x00ee11c1), TOBN(0xeb053cd4, 0xb5e3dd75), + TOBN(0x9b65b13e, 0xd58c43c5), TOBN(0xc3ad49bd, 0xbd151663), + TOBN(0x99fd8e41, 0xb6427990), TOBN(0x12cf15bd, 0x707eae1e), + TOBN(0x29ad4f1b, 0x1aabb71e), TOBN(0x5143e74d, 0x07545d0e), + TOBN(0x30266336, 0xc88bdee1), TOBN(0x25f29306, 0x5876767c), + TOBN(0x9c078571, 0xc6731996), TOBN(0xc88690b2, 0xed552951), + TOBN(0x274f2c2d, 0x852705b4), TOBN(0xb0bf8d44, 0x4e09552d), + TOBN(0x7628beeb, 0x986575d1), TOBN(0x407be238, 0x7f864651), + TOBN(0x0e5e3049, 0xa639fc6b), TOBN(0xe75c35d9, 0x86003625), + TOBN(0x0cf35bd8, 0x5dcc1646), TOBN(0x8bcaced2, 0x6c26273a), + TOBN(0xe22ecf1d, 0xb5536742), TOBN(0x013dd897, 0x1a9e068b), + TOBN(0x17f411cb, 0x8a7909c5), TOBN(0x5757ac98, 0x861dd506), + TOBN(0x85de1f0d, 0x1e935abb), TOBN(0xdefd10b4, 0x154de37a), + TOBN(0xb8d9e392, 0x369cebb5), TOBN(0x54d5ef9b, 0x761324be), + TOBN(0x4d6341ba, 0x74f17e26), TOBN(0xc0a0e3c8, 0x78c1dde4), + TOBN(0xa6d77581, 0x87d918fd), TOBN(0x66876015, 0x02ca3a13), + TOBN(0xc7313e9c, 0xf36658f0), TOBN(0xc433ef1c, 0x71f8057e), + TOBN(0x85326246, 0x1b6a835a), TOBN(0xc8f05398, 0x7c86394c), + TOBN(0xff398cdf, 0xe983c4a1), TOBN(0xbf5e8162, 0x03b7b931), + TOBN(0x93193c46, 0xb7b9045b), TOBN(0x1e4ebf5d, 0xa4a6e46b), + TOBN(0xf9942a60, 0x43a24fe7), TOBN(0x29c1191e, 0xffb3492b), + TOBN(0x9f662449, 0x902fde05), TOBN(0xc792a7ac, 0x6713c32d), + TOBN(0x2fd88ad8, 0xb737982c), TOBN(0x7e3a0319, 0xa21e60e3), + TOBN(0x09b0de44, 0x7383591a), TOBN(0x6df141ee, 0x8310a456), + TOBN(0xaec1a039, 0xe6d6f471), TOBN(0x14b2ba0f, 0x1198d12e), + TOBN(0xebc1a160, 0x3aeee5ac), TOBN(0x401f4836, 0xe0b964ce), + TOBN(0x2ee43796, 0x4fd03f66), TOBN(0x3fdb4e49, 0xdd8f3f12), + TOBN(0x6ef267f6, 0x29380f18), TOBN(0x3e8e9670, 0x8da64d16), + TOBN(0xbc19180c, 0x207674f1), TOBN(0x112e09a7, 0x33ae8fdb), + TOBN(0x99667554, 0x6aaeb71e), TOBN(0x79432af1, 0xe101b1c7), + TOBN(0xd5eb558f, 0xde2ddec6), TOBN(0x81392d1f, 0x5357753f), + TOBN(0xa7a76b97, 0x3ae1158a), TOBN(0x416fbbff, 0x4a899991), + TOBN(0x9e65fdfd, 0x0d4a9dcf), TOBN(0x7bc29e48, 0x944ddf12), + TOBN(0xbc1a92d9, 0x3c856866), TOBN(0x273c6905, 0x6e98dfe2), + TOBN(0x69fce418, 0xcdfaa6b8), TOBN(0x606bd823, 0x5061c69f), + TOBN(0x42d495a0, 0x6af75e27), TOBN(0x8ed3d505, 0x6d873a1f), + TOBN(0xaf552841, 0x6ab25b6a), TOBN(0xc6c0ffc7, 0x2b1a4523), + TOBN(0xab18827b, 0x21c99e03), TOBN(0x060e8648, 0x9034691b), + TOBN(0x5207f90f, 0x93c7f398), TOBN(0x9f4a96cb, 0x82f8d10b), + TOBN(0xdd71cd79, 0x3ad0f9e3), TOBN(0x84f435d2, 0xfc3a54f5), + TOBN(0x4b03c55b, 0x8e33787f), TOBN(0xef42f975, 0xa6384673), + TOBN(0xff7304f7, 0x5051b9f0), TOBN(0x18aca1dc, 0x741c87c2), + TOBN(0x56f120a7, 0x2d4bfe80), TOBN(0xfd823b3d, 0x053e732c), + TOBN(0x11bccfe4, 0x7537ca16), TOBN(0xdf6c9c74, 0x1b5a996b), + TOBN(0xee7332c7, 0x904fc3fa), TOBN(0x14a23f45, 0xc7e3636a), + TOBN(0xc38659c3, 0xf091d9aa), TOBN(0x4a995e5d, 0xb12d8540), + TOBN(0x20a53bec, 0xf3a5598a), TOBN(0x56534b17, 0xb1eaa995), + TOBN(0x9ed3dca4, 0xbf04e03c), TOBN(0x716c563a, 0xd8d56268), + TOBN(0x27ba77a4, 0x1d6178e7), TOBN(0xe4c80c40, 0x68a1ff8e), + TOBN(0x75011099, 0x0a13f63d), TOBN(0x7bf33521, 0xa61d46f3), + TOBN(0x0aff218e, 0x10b365bb), TOBN(0x81021804, 0x0fd7ea75), + TOBN(0x05a3fd8a, 0xa4b3a925), TOBN(0xb829e75f, 0x9b3db4e6), + TOBN(0x6bdc75a5, 0x4d53e5fb), TOBN(0x04a5dc02, 0xd52717e3), + TOBN(0x86af502f, 0xe9a42ec2), TOBN(0x8867e8fb, 0x2630e382), + TOBN(0xbf845c6e, 0xbec9889b), TOBN(0x54f491f2, 0xcb47c98d), + TOBN(0xa3091fba, 0x790c2a12), TOBN(0xd7f6fd78, 0xc20f708b), + TOBN(0xa569ac30, 0xacde5e17), TOBN(0xd0f996d0, 0x6852b4d7), + TOBN(0xe51d4bb5, 0x4609ae54), TOBN(0x3fa37d17, 0x0daed061), + TOBN(0x62a88684, 0x34b8fb41), TOBN(0x99a2acbd, 0x9efb64f1), + TOBN(0xb75c1a5e, 0x6448e1f2), TOBN(0xfa99951a, 0x42b5a069), + TOBN(0x6d956e89, 0x2f3b26e7), TOBN(0xf4709860, 0xda875247), + TOBN(0x3ad15179, 0x2482dda3), TOBN(0xd64110e3, 0x017d82f0), + TOBN(0x14928d2c, 0xfad414e4), TOBN(0x2b155f58, 0x2ed02b24), + TOBN(0x481a141b, 0xcb821bf1), TOBN(0x12e3c770, 0x4f81f5da), + TOBN(0xe49c5de5, 0x9fff8381), TOBN(0x11053232, 0x5bbec894), + TOBN(0xa0d051cc, 0x454d88c4), TOBN(0x4f6db89c, 0x1f8e531b), + TOBN(0x34fe3fd6, 0xca563a44), TOBN(0x7f5c2215, 0x58da8ab9), + TOBN(0x8445016d, 0x9474f0a1), TOBN(0x17d34d61, 0xcb7d8a0a), + TOBN(0x8e9d3910, 0x1c474019), TOBN(0xcaff2629, 0xd52ceefb), + TOBN(0xf9cf3e32, 0xc1622c2b), TOBN(0xd4b95e3c, 0xe9071a05), + TOBN(0xfbbca61f, 0x1594438c), TOBN(0x1eb6e6a6, 0x04aadedf), + TOBN(0x853027f4, 0x68e14940), TOBN(0x221d322a, 0xdfabda9c), + TOBN(0xed8ea9f6, 0xb7cb179a), TOBN(0xdc7b764d, 0xb7934dcc), + TOBN(0xfcb13940, 0x5e09180d), TOBN(0x6629a6bf, 0xb47dc2dd), + TOBN(0xbfc55e4e, 0x9f5a915e), TOBN(0xb1db9d37, 0x6204441e), + TOBN(0xf82d68cf, 0x930c5f53), TOBN(0x17d3a142, 0xcbb605b1), + TOBN(0xdd5944ea, 0x308780f2), TOBN(0xdc8de761, 0x3845f5e4), + TOBN(0x6beaba7d, 0x7624d7a3), TOBN(0x1e709afd, 0x304df11e), + TOBN(0x95364376, 0x02170456), TOBN(0xbf204b3a, 0xc8f94b64), + TOBN(0x4e53af7c, 0x5680ca68), TOBN(0x0526074a, 0xe0c67574), + TOBN(0x95d8cef8, 0xecd92af6), TOBN(0xe6b9fa7a, 0x6cd1745a), + TOBN(0x3d546d3d, 0xa325c3e4), TOBN(0x1f57691d, 0x9ae93aae), + TOBN(0xe891f3fe, 0x9d2e1a33), TOBN(0xd430093f, 0xac063d35), + TOBN(0xeda59b12, 0x5513a327), TOBN(0xdc2134f3, 0x5536f18f), + TOBN(0xaa51fe2c, 0x5c210286), TOBN(0x3f68aaee, 0x1cab658c), + TOBN(0x5a23a00b, 0xf9357292), TOBN(0x9a626f39, 0x7efdabed), + TOBN(0xfe2b3bf3, 0x199d78e3), TOBN(0xb7a2af77, 0x71bbc345), + TOBN(0x3d19827a, 0x1e59802c), TOBN(0x823bbc15, 0xb487a51c), + TOBN(0x856139f2, 0x99d0a422), TOBN(0x9ac3df65, 0xf456c6fb), + TOBN(0xaddf65c6, 0x701f8bd6), TOBN(0x149f321e, 0x3758df87), + TOBN(0xb1ecf714, 0x721b7eba), TOBN(0xe17df098, 0x31a3312a), + TOBN(0xdb2fd6ec, 0xd5c4d581), TOBN(0xfd02996f, 0x8fcea1b3), + TOBN(0xe29fa63e, 0x7882f14f), TOBN(0xc9f6dc35, 0x07c6cadc), + TOBN(0x46f22d6f, 0xb882bed0), TOBN(0x1a45755b, 0xd118e52c), + TOBN(0x9f2c7c27, 0x7c4608cf), TOBN(0x7ccbdf32, 0x568012c2), + TOBN(0xfcb0aedd, 0x61729b0e), TOBN(0x7ca2ca9e, 0xf7d75dbf), + TOBN(0xf58fecb1, 0x6f640f62), TOBN(0xe274b92b, 0x39f51946), + TOBN(0x7f4dfc04, 0x6288af44), TOBN(0x0a91f32a, 0xeac329e5), + TOBN(0x43ad274b, 0xd6aaba31), TOBN(0x719a1640, 0x0f6884f9), + TOBN(0x685d29f6, 0xdaf91e20), TOBN(0x5ec1cc33, 0x27e49d52), + TOBN(0x38f4de96, 0x3b54a059), TOBN(0x0e0015e5, 0xefbcfdb3), + TOBN(0x177d23d9, 0x4dbb8da6), TOBN(0x98724aa2, 0x97a617ad), + TOBN(0x30f0885b, 0xfdb6558e), TOBN(0xf9f7a28a, 0xc7899a96), + TOBN(0xd2ae8ac8, 0x872dc112), TOBN(0xfa0642ca, 0x73c3c459), + TOBN(0x15296981, 0xe7dfc8d6), TOBN(0x67cd4450, 0x1fb5b94a), + TOBN(0x0ec71cf1, 0x0eddfd37), TOBN(0xc7e5eeb3, 0x9a8eddc7), + TOBN(0x02ac8e3d, 0x81d95028), TOBN(0x0088f172, 0x70b0e35d), + TOBN(0xec041fab, 0xe1881fe3), TOBN(0x62cf71b8, 0xd99e7faa), + TOBN(0x5043dea7, 0xe0f222c2), TOBN(0x309d42ac, 0x72e65142), + TOBN(0x94fe9ddd, 0x9216cd30), TOBN(0xd6539c7d, 0x0f87feec), + TOBN(0x03c5a57c, 0x432ac7d7), TOBN(0x72692cf0, 0x327fda10), + TOBN(0xec28c85f, 0x280698de), TOBN(0x2331fb46, 0x7ec283b1), + TOBN(0xd34bfa32, 0x2867e633), TOBN(0x78709a82, 0x0a9cc815), + TOBN(0xb7fe6964, 0x875e2fa5), TOBN(0x25cc064f, 0x9e98bfb5), + TOBN(0x9eb0151c, 0x493a65c5), TOBN(0x5fb5d941, 0x53182464), + TOBN(0x69e6f130, 0xf04618e2), TOBN(0xa8ecec22, 0xf89c8ab6), + TOBN(0xcd6ac88b, 0xb96209bd), TOBN(0x65fa8cdb, 0xb3e1c9e0), + TOBN(0xa47d22f5, 0x4a8d8eac), TOBN(0x83895cdf, 0x8d33f963), + TOBN(0xa8adca59, 0xb56cd3d1), TOBN(0x10c8350b, 0xdaf38232), + TOBN(0x2b161fb3, 0xa5080a9f), TOBN(0xbe7f5c64, 0x3af65b3a), + TOBN(0x2c754039, 0x97403a11), TOBN(0x94626cf7, 0x121b96af), + TOBN(0x431de7c4, 0x6a983ec2), TOBN(0x3780dd3a, 0x52cc3df7), + TOBN(0xe28a0e46, 0x2baf8e3b), TOBN(0xabe68aad, 0x51d299ae), + TOBN(0x603eb8f9, 0x647a2408), TOBN(0x14c61ed6, 0x5c750981), + TOBN(0x88b34414, 0xc53352e7), TOBN(0x5a34889c, 0x1337d46e), + TOBN(0x612c1560, 0xf95f2bc8), TOBN(0x8a3f8441, 0xd4807a3a), + TOBN(0x680d9e97, 0x5224da68), TOBN(0x60cd6e88, 0xc3eb00e9), + TOBN(0x3875a98e, 0x9a6bc375), TOBN(0xdc80f924, 0x4fd554c2), + TOBN(0x6c4b3415, 0x6ac77407), TOBN(0xa1e5ea8f, 0x25420681), + TOBN(0x541bfa14, 0x4607a458), TOBN(0x5dbc7e7a, 0x96d7fbf9), + TOBN(0x646a851b, 0x31590a47), TOBN(0x039e85ba, 0x15ee6df8), + TOBN(0xd19fa231, 0xd7b43fc0), TOBN(0x84bc8be8, 0x299a0e04), + TOBN(0x2b9d2936, 0xf20df03a), TOBN(0x24054382, 0x8608d472), + TOBN(0x76b6ba04, 0x9149202a), TOBN(0xb21c3831, 0x3670e7b7), + TOBN(0xddd93059, 0xd6fdee10), TOBN(0x9da47ad3, 0x78488e71), + TOBN(0x99cc1dfd, 0xa0fcfb25), TOBN(0x42abde10, 0x64696954), + TOBN(0x14cc15fc, 0x17eab9fe), TOBN(0xd6e863e4, 0xd3e70972), + TOBN(0x29a7765c, 0x6432112c), TOBN(0x88660001, 0x5b0774d8), + TOBN(0x3729175a, 0x2c088eae), TOBN(0x13afbcae, 0x8230b8d4), + TOBN(0x44768151, 0x915f4379), TOBN(0xf086431a, 0xd8d22812), + TOBN(0x37461955, 0xc298b974), TOBN(0x905fb5f0, 0xf8711e04), + TOBN(0x787abf3a, 0xfe969d18), TOBN(0x392167c2, 0x6f6a494e), + TOBN(0xfc7a0d2d, 0x28c511da), TOBN(0xf127c7dc, 0xb66a262d), + TOBN(0xf9c4bb95, 0xfd63fdf0), TOBN(0x90016589, 0x3913ef46), + TOBN(0x74d2a73c, 0x11aa600d), TOBN(0x2f5379bd, 0x9fb5ab52), + TOBN(0xe49e53a4, 0x7fb70068), TOBN(0x68dd39e5, 0x404aa9a7), + TOBN(0xb9b0cf57, 0x2ecaa9c3), TOBN(0xba0e103b, 0xe824826b), + TOBN(0x60c2198b, 0x4631a3c4), TOBN(0xc5ff84ab, 0xfa8966a2), + TOBN(0x2d6ebe22, 0xac95aff8), TOBN(0x1c9bb6db, 0xb5a46d09), + TOBN(0x419062da, 0x53ee4f8d), TOBN(0x7b9042d0, 0xbb97efef), + TOBN(0x0f87f080, 0x830cf6bd), TOBN(0x4861d19a, 0x6ec8a6c6), + TOBN(0xd3a0daa1, 0x202f01aa), TOBN(0xb0111674, 0xf25afbd5), + TOBN(0x6d00d6cf, 0x1afb20d9), TOBN(0x13695000, 0x40671bc5), + TOBN(0x913ab0dc, 0x2485ea9b), TOBN(0x1f2bed06, 0x9eef61ac), + TOBN(0x850c8217, 0x6d799e20), TOBN(0x93415f37, 0x3271c2de), + TOBN(0x5afb06e9, 0x6c4f5910), TOBN(0x688a52df, 0xc4e9e421), + TOBN(0x30495ba3, 0xe2a9a6db), TOBN(0x4601303d, 0x58f9268b), + TOBN(0xbe3b0dad, 0x7eb0f04f), TOBN(0x4ea47250, 0x4456936d), + TOBN(0x8caf8798, 0xd33fd3e7), TOBN(0x1ccd8a89, 0xeb433708), + TOBN(0x9effe3e8, 0x87fd50ad), TOBN(0xbe240a56, 0x6b29c4df), + TOBN(0xec4ffd98, 0xca0e7ebd), TOBN(0xf586783a, 0xe748616e), + TOBN(0xa5b00d8f, 0xc77baa99), TOBN(0x0acada29, 0xb4f34c9c), + TOBN(0x36dad67d, 0x0fe723ac), TOBN(0x1d8e53a5, 0x39c36c1e), + TOBN(0xe4dd342d, 0x1f4bea41), TOBN(0x64fd5e35, 0xebc9e4e0), + TOBN(0x96f01f90, 0x57908805), TOBN(0xb5b9ea3d, 0x5ed480dd), + TOBN(0x366c5dc2, 0x3efd2dd0), TOBN(0xed2fe305, 0x6e9dfa27), + TOBN(0x4575e892, 0x6e9197e2), TOBN(0x11719c09, 0xab502a5d), + TOBN(0x264c7bec, 0xe81f213f), TOBN(0x741b9241, 0x55f5c457), + TOBN(0x78ac7b68, 0x49a5f4f4), TOBN(0xf91d70a2, 0x9fc45b7d), + TOBN(0x39b05544, 0xb0f5f355), TOBN(0x11f06bce, 0xeef930d9), + TOBN(0xdb84d25d, 0x038d05e1), TOBN(0x04838ee5, 0xbacc1d51), + TOBN(0x9da3ce86, 0x9e8ee00b), TOBN(0xc3412057, 0xc36eda1f), + TOBN(0xae80b913, 0x64d9c2f4), TOBN(0x7468bac3, 0xa010a8ff), + TOBN(0xdfd20037, 0x37359d41), TOBN(0x1a0f5ab8, 0x15efeacc), + TOBN(0x7c25ad2f, 0x659d0ce0), TOBN(0x4011bcbb, 0x6785cff1), + TOBN(0x128b9912, 0x7e2192c7), TOBN(0xa549d8e1, 0x13ccb0e8), + TOBN(0x805588d8, 0xc85438b1), TOBN(0x5680332d, 0xbc25cb27), + TOBN(0xdcd1bc96, 0x1a4bfdf4), TOBN(0x779ff428, 0x706f6566), + TOBN(0x8bbee998, 0xf059987a), TOBN(0xf6ce8cf2, 0xcc686de7), + TOBN(0xf8ad3c4a, 0x953cfdb2), TOBN(0xd1d426d9, 0x2205da36), + TOBN(0xb3c0f13f, 0xc781a241), TOBN(0x3e89360e, 0xd75362a8), + TOBN(0xccd05863, 0xc8a91184), TOBN(0x9bd0c9b7, 0xefa8a7f4), + TOBN(0x97ee4d53, 0x8a912a4b), TOBN(0xde5e15f8, 0xbcf518fd), + TOBN(0x6a055bf8, 0xc467e1e0), TOBN(0x10be4b4b, 0x1587e256), + TOBN(0xd90c14f2, 0x668621c9), TOBN(0xd5518f51, 0xab9c92c1), + TOBN(0x8e6a0100, 0xd6d47b3c), TOBN(0xcbe980dd, 0x66716175), + TOBN(0x500d3f10, 0xddd83683), TOBN(0x3b6cb35d, 0x99cac73c), + TOBN(0x53730c8b, 0x6083d550), TOBN(0xcf159767, 0xdf0a1987), + TOBN(0x84bfcf53, 0x43ad73b3), TOBN(0x1b528c20, 0x4f035a94), + TOBN(0x4294edf7, 0x33eeac69), TOBN(0xb6283e83, 0x817f3240), + TOBN(0xc3fdc959, 0x0a5f25b1), TOBN(0xefaf8aa5, 0x5844ee22), + TOBN(0xde269ba5, 0xdbdde4de), TOBN(0xe3347160, 0xc56133bf), + TOBN(0xc1184219, 0x8d9ea9f8), TOBN(0x090de5db, 0xf3fc1ab5), + TOBN(0x404c37b1, 0x0bf22cda), TOBN(0x7de20ec8, 0xf5618894), + TOBN(0x754c588e, 0xecdaecab), TOBN(0x6ca4b0ed, 0x88342743), + TOBN(0x76f08bdd, 0xf4a938ec), TOBN(0xd182de89, 0x91493ccb), + TOBN(0xd652c53e, 0xc8a4186a), TOBN(0xb3e878db, 0x946d8e33), + TOBN(0x088453c0, 0x5f37663c), TOBN(0x5cd9daaa, 0xb407748b), + TOBN(0xa1f5197f, 0x586d5e72), TOBN(0x47500be8, 0xc443ca59), + TOBN(0x78ef35b2, 0xe2652424), TOBN(0x09c5d26f, 0x6dd7767d), + TOBN(0x7175a79a, 0xa74d3f7b), TOBN(0x0428fd8d, 0xcf5ea459), + TOBN(0x511cb97c, 0xa5d1746d), TOBN(0x36363939, 0xe71d1278), + TOBN(0xcf2df955, 0x10350bf4), TOBN(0xb3817439, 0x60aae782), + TOBN(0xa748c0e4, 0x3e688809), TOBN(0x98021fbf, 0xd7a5a006), + TOBN(0x9076a70c, 0x0e367a98), TOBN(0xbea1bc15, 0x0f62b7c2), + TOBN(0x2645a68c, 0x30fe0343), TOBN(0xacaffa78, 0x699dc14f), + TOBN(0xf4469964, 0x457bf9c4), TOBN(0x0db6407b, 0x0d2ead83), + TOBN(0x68d56cad, 0xb2c6f3eb), TOBN(0x3b512e73, 0xf376356c), + TOBN(0xe43b0e1f, 0xfce10408), TOBN(0x89ddc003, 0x5a5e257d), + TOBN(0xb0ae0d12, 0x0362e5b3), TOBN(0x07f983c7, 0xb0519161), + TOBN(0xc2e94d15, 0x5d5231e7), TOBN(0xcff22aed, 0x0b4f9513), + TOBN(0xb02588dd, 0x6ad0b0b5), TOBN(0xb967d1ac, 0x11d0dcd5), + TOBN(0x8dac6bc6, 0xcf777b6c), TOBN(0x0062bdbd, 0x4c6d1959), + TOBN(0x53da71b5, 0x0ef5cc85), TOBN(0x07012c7d, 0x4006f14f), + TOBN(0x4617f962, 0xac47800d), TOBN(0x53365f2b, 0xc102ed75), + TOBN(0xb422efcb, 0x4ab8c9d3), TOBN(0x195cb26b, 0x34af31c9), + TOBN(0x3a926e29, 0x05f2c4ce), TOBN(0xbd2bdecb, 0x9856966c), + TOBN(0x5d16ab3a, 0x85527015), TOBN(0x9f81609e, 0x4486c231), + TOBN(0xd8b96b2c, 0xda350002), TOBN(0xbd054690, 0xfa1b7d36), + TOBN(0xdc90ebf5, 0xe71d79bc), TOBN(0xf241b6f9, 0x08964e4e), + TOBN(0x7c838643, 0x2fe3cd4c), TOBN(0xe0f33acb, 0xb4bc633c), + TOBN(0xb4a9ecec, 0x3d139f1f), TOBN(0x05ce69cd, 0xdc4a1f49), + TOBN(0xa19d1b16, 0xf5f98aaf), TOBN(0x45bb71d6, 0x6f23e0ef), + TOBN(0x33789fcd, 0x46cdfdd3), TOBN(0x9b8e2978, 0xcee040ca), + TOBN(0x9c69b246, 0xae0a6828), TOBN(0xba533d24, 0x7078d5aa), + TOBN(0x7a2e42c0, 0x7bb4fbdb), TOBN(0xcfb4879a, 0x7035385c), + TOBN(0x8c3dd30b, 0x3281705b), TOBN(0x7e361c6c, 0x404fe081), + TOBN(0x7b21649c, 0x3f604edf), TOBN(0x5dbf6a3f, 0xe52ffe47), + TOBN(0xc41b7c23, 0x4b54d9bf), TOBN(0x1374e681, 0x3511c3d9), + TOBN(0x1863bf16, 0xc1b2b758), TOBN(0x90e78507, 0x1e9e6a96), + TOBN(0xab4bf98d, 0x5d86f174), TOBN(0xd74e0bd3, 0x85e96fe4), + TOBN(0x8afde39f, 0xcac5d344), TOBN(0x90946dbc, 0xbd91b847), + TOBN(0xf5b42358, 0xfe1a838c), TOBN(0x05aae6c5, 0x620ac9d8), + TOBN(0x8e193bd8, 0xa1ce5a0b), TOBN(0x8f710571, 0x4dabfd72), + TOBN(0x8d8fdd48, 0x182caaac), TOBN(0x8c4aeefa, 0x040745cf), + TOBN(0x73c6c30a, 0xf3b93e6d), TOBN(0x991241f3, 0x16f42011), + TOBN(0xa0158eea, 0xe457a477), TOBN(0xd19857db, 0xee6ddc05), + TOBN(0xb3265224, 0x18c41671), TOBN(0x3ffdfc7e, 0x3c2c0d58), + TOBN(0x3a3a5254, 0x26ee7cda), TOBN(0x341b0869, 0xdf02c3a8), + TOBN(0xa023bf42, 0x723bbfc8), TOBN(0x3d15002a, 0x14452691),} + , + {TOBN(0x5ef7324c, 0x85edfa30), TOBN(0x25976554, 0x87d4f3da), + TOBN(0x352f5bc0, 0xdcb50c86), TOBN(0x8f6927b0, 0x4832a96c), + TOBN(0xd08ee1ba, 0x55f2f94c), TOBN(0x6a996f99, 0x344b45fa), + TOBN(0xe133cb8d, 0xa8aa455d), TOBN(0x5d0721ec, 0x758dc1f7), + TOBN(0x6ba7a920, 0x79e5fb67), TOBN(0xe1331feb, 0x70aa725e), + TOBN(0x5080ccf5, 0x7df5d837), TOBN(0xe4cae01d, 0x7ff72e21), + TOBN(0xd9243ee6, 0x0412a77d), TOBN(0x06ff7cac, 0xdf449025), + TOBN(0xbe75f7cd, 0x23ef5a31), TOBN(0xbc957822, 0x0ddef7a8), + TOBN(0x8cf7230c, 0xb0ce1c55), TOBN(0x5b534d05, 0x0bbfb607), + TOBN(0xee1ef113, 0x0e16363b), TOBN(0x27e0aa7a, 0xb4999e82), + TOBN(0xce1dac2d, 0x79362c41), TOBN(0x67920c90, 0x91bb6cb0), + TOBN(0x1e648d63, 0x2223df24), TOBN(0x0f7d9eef, 0xe32e8f28), + TOBN(0x6943f39a, 0xfa833834), TOBN(0x22951722, 0xa6328562), + TOBN(0x81d63dd5, 0x4170fc10), TOBN(0x9f5fa58f, 0xaecc2e6d), + TOBN(0xb66c8725, 0xe77d9a3b), TOBN(0x11235cea, 0x6384ebe0), + TOBN(0x06a8c118, 0x5845e24a), TOBN(0x0137b286, 0xebd093b1), + TOBN(0xc589e1ce, 0x44ace150), TOBN(0xe0f8d3d9, 0x4381e97c), + TOBN(0x59e99b11, 0x62c5a4b8), TOBN(0x90d262f7, 0xfd0ec9f9), + TOBN(0xfbc854c9, 0x283e13c9), TOBN(0x2d04fde7, 0xaedc7085), + TOBN(0x057d7765, 0x47dcbecb), TOBN(0x8dbdf591, 0x9a76fa5f), + TOBN(0xd0150695, 0x0de1e578), TOBN(0x2e1463e7, 0xe9f72bc6), + TOBN(0xffa68441, 0x1b39eca5), TOBN(0x673c8530, 0x7c037f2f), + TOBN(0xd0d6a600, 0x747f91da), TOBN(0xb08d43e1, 0xc9cb78e9), + TOBN(0x0fc0c644, 0x27b5cef5), TOBN(0x5c1d160a, 0xa60a2fd6), + TOBN(0xf98cae53, 0x28c8e13b), TOBN(0x375f10c4, 0xb2eddcd1), + TOBN(0xd4eb8b7f, 0x5cce06ad), TOBN(0xb4669f45, 0x80a2e1ef), + TOBN(0xd593f9d0, 0x5bbd8699), TOBN(0x5528a4c9, 0xe7976d13), + TOBN(0x3923e095, 0x1c7e28d3), TOBN(0xb9293790, 0x3f6bb577), + TOBN(0xdb567d6a, 0xc42bd6d2), TOBN(0x6df86468, 0xbb1f96ae), + TOBN(0x0efe5b1a, 0x4843b28e), TOBN(0x961bbb05, 0x6379b240), + TOBN(0xb6caf5f0, 0x70a6a26b), TOBN(0x70686c0d, 0x328e6e39), + TOBN(0x80da06cf, 0x895fc8d3), TOBN(0x804d8810, 0xb363fdc9), + TOBN(0xbe22877b, 0x207f1670), TOBN(0x9b0dd188, 0x4e615291), + TOBN(0x625ae8dc, 0x97a3c2bf), TOBN(0x08584ef7, 0x439b86e8), + TOBN(0xde7190a5, 0xdcd898ff), TOBN(0x26286c40, 0x2058ee3d), + TOBN(0x3db0b217, 0x5f87b1c1), TOBN(0xcc334771, 0x102a6db5), + TOBN(0xd99de954, 0x2f770fb1), TOBN(0x97c1c620, 0x4cd7535e), + TOBN(0xd3b6c448, 0x3f09cefc), TOBN(0xd725af15, 0x5a63b4f8), + TOBN(0x0c95d24f, 0xc01e20ec), TOBN(0xdfd37494, 0x9ae7121f), + TOBN(0x7d6ddb72, 0xec77b7ec), TOBN(0xfe079d3b, 0x0353a4ae), + TOBN(0x3066e70a, 0x2e6ac8d2), TOBN(0x9c6b5a43, 0x106e5c05), + TOBN(0x52d3c6f5, 0xede59b8c), TOBN(0x30d6a5c3, 0xfccec9ae), + TOBN(0xedec7c22, 0x4fc0a9ef), TOBN(0x190ff083, 0x95c16ced), + TOBN(0xbe12ec8f, 0x94de0fde), TOBN(0x0d131ab8, 0x852d3433), + TOBN(0x42ace07e, 0x85701291), TOBN(0x94793ed9, 0x194061a8), + TOBN(0x30e83ed6, 0xd7f4a485), TOBN(0x9eec7269, 0xf9eeff4d), + TOBN(0x90acba59, 0x0c9d8005), TOBN(0x5feca458, 0x1e79b9d1), + TOBN(0x8fbe5427, 0x1d506a1e), TOBN(0xa32b2c8e, 0x2439cfa7), + TOBN(0x1671c173, 0x73dd0b4e), TOBN(0x37a28214, 0x44a054c6), + TOBN(0x81760a1b, 0x4e8b53f1), TOBN(0xa6c04224, 0xf9f93b9e), + TOBN(0x18784b34, 0xcf671e3c), TOBN(0x81bbecd2, 0xcda9b994), + TOBN(0x38831979, 0xb2ab3848), TOBN(0xef54feb7, 0xf2e03c2d), + TOBN(0xcf197ca7, 0xfb8088fa), TOBN(0x01427247, 0x4ddc96c5), + TOBN(0xa2d2550a, 0x30777176), TOBN(0x53469898, 0x4d0cf71d), + TOBN(0x6ce937b8, 0x3a2aaac6), TOBN(0xe9f91dc3, 0x5af38d9b), + TOBN(0x2598ad83, 0xc8bf2899), TOBN(0x8e706ac9, 0xb5536c16), + TOBN(0x40dc7495, 0xf688dc98), TOBN(0x26490cd7, 0x124c4afc), + TOBN(0xe651ec84, 0x1f18775c), TOBN(0x393ea6c3, 0xb4fdaf4a), + TOBN(0x1e1f3343, 0x7f338e0d), TOBN(0x39fb832b, 0x6053e7b5), + TOBN(0x46e702da, 0x619e14d5), TOBN(0x859cacd1, 0xcdeef6e0), + TOBN(0x63b99ce7, 0x4462007d), TOBN(0xb8ab48a5, 0x4cb5f5b7), + TOBN(0x9ec673d2, 0xf55edde7), TOBN(0xd1567f74, 0x8cfaefda), + TOBN(0x46381b6b, 0x0887bcec), TOBN(0x694497ce, 0xe178f3c2), + TOBN(0x5e6525e3, 0x1e6266cb), TOBN(0x5931de26, 0x697d6413), + TOBN(0x87f8df7c, 0x0e58d493), TOBN(0xb1ae5ed0, 0x58b73f12), + TOBN(0xc368f784, 0xdea0c34d), TOBN(0x9bd0a120, 0x859a91a0), + TOBN(0xb00d88b7, 0xcc863c68), TOBN(0x3a1cc11e, 0x3d1f4d65), + TOBN(0xea38e0e7, 0x0aa85593), TOBN(0x37f13e98, 0x7dc4aee8), + TOBN(0x10d38667, 0xbc947bad), TOBN(0x738e07ce, 0x2a36ee2e), + TOBN(0xc93470cd, 0xc577fcac), TOBN(0xdee1b616, 0x2782470d), + TOBN(0x36a25e67, 0x2e793d12), TOBN(0xd6aa6cae, 0xe0f186da), + TOBN(0x474d0fd9, 0x80e07af7), TOBN(0xf7cdc47d, 0xba8a5cd4), + TOBN(0x28af6d9d, 0xab15247f), TOBN(0x7c789c10, 0x493a537f), + TOBN(0x7ac9b110, 0x23a334e7), TOBN(0x0236ac09, 0x12c9c277), + TOBN(0xa7e5bd25, 0x1d7a5144), TOBN(0x098b9c2a, 0xf13ec4ec), + TOBN(0x3639daca, 0xd3f0abca), TOBN(0x642da81a, 0xa23960f9), + TOBN(0x7d2e5c05, 0x4f7269b1), TOBN(0xfcf30777, 0xe287c385), + TOBN(0x10edc84f, 0xf2a46f21), TOBN(0x35441757, 0x4f43fa36), + TOBN(0xf1327899, 0xfd703431), TOBN(0xa438d7a6, 0x16dd587a), + TOBN(0x65c34c57, 0xe9c8352d), TOBN(0xa728edab, 0x5cc5a24e), + TOBN(0xaed78abc, 0x42531689), TOBN(0x0a51a0e8, 0x010963ef), + TOBN(0x5776fa0a, 0xd717d9b3), TOBN(0xf356c239, 0x7dd3428b), + TOBN(0x29903fff, 0x8d3a3dac), TOBN(0x409597fa, 0x3d94491f), + TOBN(0x4cd7a5ff, 0xbf4a56a4), TOBN(0xe5096474, 0x8adab462), + TOBN(0xa97b5126, 0x5c3427b0), TOBN(0x6401405c, 0xd282c9bd), + TOBN(0x3629f8d7, 0x222c5c45), TOBN(0xb1c02c16, 0xe8d50aed), + TOBN(0xbea2ed75, 0xd9635bc9), TOBN(0x226790c7, 0x6e24552f), + TOBN(0x3c33f2a3, 0x65f1d066), TOBN(0x2a43463e, 0x6dfccc2e), + TOBN(0x8cc3453a, 0xdb483761), TOBN(0xe7cc6085, 0x65d5672b), + TOBN(0x277ed6cb, 0xde3efc87), TOBN(0x19f2f368, 0x69234eaf), + TOBN(0x9aaf4317, 0x5c0b800b), TOBN(0x1f1e7c89, 0x8b6da6e2), + TOBN(0x6cfb4715, 0xb94ec75e), TOBN(0xd590dd5f, 0x453118c2), + TOBN(0x14e49da1, 0x1f17a34c), TOBN(0x5420ab39, 0x235a1456), + TOBN(0xb7637241, 0x2f50363b), TOBN(0x7b15d623, 0xc3fabb6e), + TOBN(0xa0ef40b1, 0xe274e49c), TOBN(0x5cf50744, 0x96b1860a), + TOBN(0xd6583fbf, 0x66afe5a4), TOBN(0x44240510, 0xf47e3e9a), + TOBN(0x99254343, 0x11b2d595), TOBN(0xf1367499, 0xeec8df57), + TOBN(0x3cb12c61, 0x3e73dd05), TOBN(0xd248c033, 0x7dac102a), + TOBN(0xcf154f13, 0xa77739f5), TOBN(0xbf4288cb, 0x23d2af42), + TOBN(0xaa64c9b6, 0x32e4a1cf), TOBN(0xee8c07a8, 0xc8a208f3), + TOBN(0xe10d4999, 0x6fe8393f), TOBN(0x0f809a3f, 0xe91f3a32), + TOBN(0x61096d1c, 0x802f63c8), TOBN(0x289e1462, 0x57750d3d), + TOBN(0xed06167e, 0x9889feea), TOBN(0xd5c9c0e2, 0xe0993909), + TOBN(0x46fca0d8, 0x56508ac6), TOBN(0x91826047, 0x4f1b8e83), + TOBN(0x4f2c877a, 0x9a4a2751), TOBN(0x71bd0072, 0xcae6fead), + TOBN(0x38df8dcc, 0x06aa1941), TOBN(0x5a074b4c, 0x63beeaa8), + TOBN(0xd6d65934, 0xc1cec8ed), TOBN(0xa6ecb49e, 0xaabc03bd), + TOBN(0xaade91c2, 0xde8a8415), TOBN(0xcfb0efdf, 0x691136e0), + TOBN(0x11af45ee, 0x23ab3495), TOBN(0xa132df88, 0x0b77463d), + TOBN(0x8923c15c, 0x815d06f4), TOBN(0xc3ceb3f5, 0x0d61a436), + TOBN(0xaf52291d, 0xe88fb1da), TOBN(0xea057974, 0x1da12179), + TOBN(0xb0d7218c, 0xd2fef720), TOBN(0x6c0899c9, 0x8e1d8845), + TOBN(0x98157504, 0x752ddad7), TOBN(0xd60bd74f, 0xa1a68a97), + TOBN(0x7047a3a9, 0xf658fb99), TOBN(0x1f5d86d6, 0x5f8511e4), + TOBN(0xb8a4bc42, 0x4b5a6d88), TOBN(0x69eb2c33, 0x1abefa7d), + TOBN(0x95bf39e8, 0x13c9c510), TOBN(0xf571960a, 0xd48aab43), + TOBN(0x7e8cfbcf, 0x704e23c6), TOBN(0xc71b7d22, 0x28aaa65b), + TOBN(0xa041b2bd, 0x245e3c83), TOBN(0x69b98834, 0xd21854ff), + TOBN(0x89d227a3, 0x963bfeec), TOBN(0x99947aaa, 0xde7da7cb), + TOBN(0x1d9ee9db, 0xee68a9b1), TOBN(0x0a08f003, 0x698ec368), + TOBN(0xe9ea4094, 0x78ef2487), TOBN(0xc8d2d415, 0x02cfec26), + TOBN(0xc52f9a6e, 0xb7dcf328), TOBN(0x0ed489e3, 0x85b6a937), + TOBN(0x9b94986b, 0xbef3366e), TOBN(0x0de59c70, 0xedddddb8), + TOBN(0xffdb748c, 0xeadddbe2), TOBN(0x9b9784bb, 0x8266ea40), + TOBN(0x142b5502, 0x1a93507a), TOBN(0xb4cd1187, 0x8d3c06cf), + TOBN(0xdf70e76a, 0x91ec3f40), TOBN(0x484e81ad, 0x4e7553c2), + TOBN(0x830f87b5, 0x272e9d6e), TOBN(0xea1c93e5, 0xc6ff514a), + TOBN(0x67cc2adc, 0xc4192a8e), TOBN(0xc77e27e2, 0x42f4535a), + TOBN(0x9cdbab36, 0xd2b713c5), TOBN(0x86274ea0, 0xcf7b0cd3), + TOBN(0x784680f3, 0x09af826b), TOBN(0xbfcc837a, 0x0c72dea3), + TOBN(0xa8bdfe9d, 0xd6529b73), TOBN(0x708aa228, 0x63a88002), + TOBN(0x6c7a9a54, 0xc91d45b9), TOBN(0xdf1a38bb, 0xfd004f56), + TOBN(0x2e8c9a26, 0xb8bad853), TOBN(0x2d52cea3, 0x3723eae7), + TOBN(0x054d6d81, 0x56ca2830), TOBN(0xa3317d14, 0x9a8dc411), + TOBN(0xa08662fe, 0xfd4ddeda), TOBN(0xed2a153a, 0xb55d792b), + TOBN(0x7035c16a, 0xbfc6e944), TOBN(0xb6bc5834, 0x00171cf3), + TOBN(0xe27152b3, 0x83d102b6), TOBN(0xfe695a47, 0x0646b848), + TOBN(0xa5bb09d8, 0x916e6d37), TOBN(0xb4269d64, 0x0d17015e), + TOBN(0x8d8156a1, 0x0a1d2285), TOBN(0xfeef6c51, 0x46d26d72), + TOBN(0x9dac57c8, 0x4c5434a7), TOBN(0x0282e5be, 0x59d39e31), + TOBN(0xedfff181, 0x721c486d), TOBN(0x301baf10, 0xbc58824e), + TOBN(0x8136a6aa, 0x00570031), TOBN(0x55aaf78c, 0x1cddde68), + TOBN(0x26829371, 0x59c63952), TOBN(0x3a3bd274, 0x8bc25baf), + TOBN(0xecdf8657, 0xb7e52dc3), TOBN(0x2dd8c087, 0xfd78e6c8), + TOBN(0x20553274, 0xf5531461), TOBN(0x8b4a1281, 0x5d95499b), + TOBN(0xe2c8763a, 0x1a80f9d2), TOBN(0xd1dbe32b, 0x4ddec758), + TOBN(0xaf12210d, 0x30c34169), TOBN(0xba74a953, 0x78baa533), + TOBN(0x3d133c6e, 0xa438f254), TOBN(0xa431531a, 0x201bef5b), + TOBN(0x15295e22, 0xf669d7ec), TOBN(0xca374f64, 0x357fb515), + TOBN(0x8a8406ff, 0xeaa3fdb3), TOBN(0x106ae448, 0xdf3f2da8), + TOBN(0x8f9b0a90, 0x33c8e9a1), TOBN(0x234645e2, 0x71ad5885), + TOBN(0x3d083224, 0x1c0aed14), TOBN(0xf10a7d3e, 0x7a942d46), + TOBN(0x7c11deee, 0x40d5c9be), TOBN(0xb2bae7ff, 0xba84ed98), + TOBN(0x93e97139, 0xaad58ddd), TOBN(0x3d872796, 0x3f6d1fa3), + TOBN(0x483aca81, 0x8569ff13), TOBN(0x8b89a5fb, 0x9a600f72), + TOBN(0x4cbc27c3, 0xc06f2b86), TOBN(0x22130713, 0x63ad9c0b), + TOBN(0xb5358b1e, 0x48ac2840), TOBN(0x18311294, 0xecba9477), + TOBN(0xda58f990, 0xa6946b43), TOBN(0x3098baf9, 0x9ab41819), + TOBN(0x66c4c158, 0x4198da52), TOBN(0xab4fc17c, 0x146bfd1b), + TOBN(0x2f0a4c3c, 0xbf36a908), TOBN(0x2ae9e34b, 0x58cf7838), + TOBN(0xf411529e, 0x3fa11b1f), TOBN(0x21e43677, 0x974af2b4), + TOBN(0x7c20958e, 0xc230793b), TOBN(0x710ea885, 0x16e840f3), + TOBN(0xfc0b21fc, 0xc5dc67cf), TOBN(0x08d51647, 0x88405718), + TOBN(0xd955c21f, 0xcfe49eb7), TOBN(0x9722a5d5, 0x56dd4a1f), + TOBN(0xc9ef50e2, 0xc861baa5), TOBN(0xc0c21a5d, 0x9505ac3e), + TOBN(0xaf6b9a33, 0x8b7c063f), TOBN(0xc6370339, 0x2f4779c1), + TOBN(0x22df99c7, 0x638167c3), TOBN(0xfe6ffe76, 0x795db30c), + TOBN(0x2b822d33, 0xa4854989), TOBN(0xfef031dd, 0x30563aa5), + TOBN(0x16b09f82, 0xd57c667f), TOBN(0xc70312ce, 0xcc0b76f1), + TOBN(0xbf04a9e6, 0xc9118aec), TOBN(0x82fcb419, 0x3409d133), + TOBN(0x1a8ab385, 0xab45d44d), TOBN(0xfba07222, 0x617b83a3), + TOBN(0xb05f50dd, 0x58e81b52), TOBN(0x1d8db553, 0x21ce5aff), + TOBN(0x3097b8d4, 0xe344a873), TOBN(0x7d8d116d, 0xfe36d53e), + TOBN(0x6db22f58, 0x7875e750), TOBN(0x2dc5e373, 0x43e144ea), + TOBN(0xc05f32e6, 0xe799eb95), TOBN(0xe9e5f4df, 0x6899e6ec), + TOBN(0xbdc3bd68, 0x1fab23d5), TOBN(0xb72b8ab7, 0x73af60e6), + TOBN(0x8db27ae0, 0x2cecc84a), TOBN(0x600016d8, 0x7bdb871c), + TOBN(0x42a44b13, 0xd7c46f58), TOBN(0xb8919727, 0xc3a77d39), + TOBN(0xcfc6bbbd, 0xdafd6088), TOBN(0x1a740146, 0x6bd20d39), + TOBN(0x8c747abd, 0x98c41072), TOBN(0x4c91e765, 0xbdf68ea1), + TOBN(0x7c95e5ca, 0x08819a78), TOBN(0xcf48b729, 0xc9587921), + TOBN(0x091c7c5f, 0xdebbcc7d), TOBN(0x6f287404, 0xf0e05149), + TOBN(0xf83b5ac2, 0x26cd44ec), TOBN(0x88ae32a6, 0xcfea250e), + TOBN(0x6ac5047a, 0x1d06ebc5), TOBN(0xc7e550b4, 0xd434f781), + TOBN(0x61ab1cf2, 0x5c727bd2), TOBN(0x2e4badb1, 0x1cf915b0), + TOBN(0x1b4dadec, 0xf69d3920), TOBN(0xe61b1ca6, 0xf14c1dfe), + TOBN(0x90b479cc, 0xbd6bd51f), TOBN(0x8024e401, 0x8045ec30), + TOBN(0xcab29ca3, 0x25ef0e62), TOBN(0x4f2e9416, 0x49e4ebc0), + TOBN(0x45eb40ec, 0x0ccced58), TOBN(0x25cd4b9c, 0x0da44f98), + TOBN(0x43e06458, 0x871812c6), TOBN(0x99f80d55, 0x16cef651), + TOBN(0x571340c9, 0xce6dc153), TOBN(0x138d5117, 0xd8665521), + TOBN(0xacdb45bc, 0x4e07014d), TOBN(0x2f34bb38, 0x84b60b91), + TOBN(0xf44a4fd2, 0x2ae8921e), TOBN(0xb039288e, 0x892ba1e2), + TOBN(0x9da50174, 0xb1c180b2), TOBN(0x6b70ab66, 0x1693dc87), + TOBN(0x7e9babc9, 0xe7057481), TOBN(0x4581ddef, 0x9c80dc41), + TOBN(0x0c890da9, 0x51294682), TOBN(0x0b5629d3, 0x3f4736e5), + TOBN(0x2340c79e, 0xb06f5b41), TOBN(0xa42e84ce, 0x4e243469), + TOBN(0xf9a20135, 0x045a71a9), TOBN(0xefbfb415, 0xd27b6fb6), + TOBN(0x25ebea23, 0x9d33cd6f), TOBN(0x9caedb88, 0xaa6c0af8), + TOBN(0x53dc7e9a, 0xd9ce6f96), TOBN(0x3897f9fd, 0x51e0b15a), + TOBN(0xf51cb1f8, 0x8e5d788e), TOBN(0x1aec7ba8, 0xe1d490ee), + TOBN(0x265991e0, 0xcc58cb3c), TOBN(0x9f306e8c, 0x9fc3ad31), + TOBN(0x5fed006e, 0x5040a0ac), TOBN(0xca9d5043, 0xfb476f2e), + TOBN(0xa19c06e8, 0xbeea7a23), TOBN(0xd2865801, 0x0edabb63), + TOBN(0xdb92293f, 0x6967469a), TOBN(0x2894d839, 0x8d8a8ed8), + TOBN(0x87c9e406, 0xbbc77122), TOBN(0x8671c6f1, 0x2ea3a26a), + TOBN(0xe42df8d6, 0xd7de9853), TOBN(0x2e3ce346, 0xb1f2bcc7), + TOBN(0xda601dfc, 0x899d50cf), TOBN(0xbfc913de, 0xfb1b598f), + TOBN(0x81c4909f, 0xe61f7908), TOBN(0x192e304f, 0x9bbc7b29), + TOBN(0xc3ed8738, 0xc104b338), TOBN(0xedbe9e47, 0x783f5d61), + TOBN(0x0c06e9be, 0x2db30660), TOBN(0xda3e613f, 0xc0eb7d8e), + TOBN(0xd8fa3e97, 0x322e096e), TOBN(0xfebd91e8, 0xd336e247), + TOBN(0x8f13ccc4, 0xdf655a49), TOBN(0xa9e00dfc, 0x5eb20210), + TOBN(0x84631d0f, 0xc656b6ea), TOBN(0x93a058cd, 0xd8c0d947), + TOBN(0x6846904a, 0x67bd3448), TOBN(0x4a3d4e1a, 0xf394fd5c), + TOBN(0xc102c1a5, 0xdb225f52), TOBN(0xe3455bba, 0xfc4f5e9a), + TOBN(0x6b36985b, 0x4b9ad1ce), TOBN(0xa9818536, 0x5bb7f793), + TOBN(0x6c25e1d0, 0x48b1a416), TOBN(0x1381dd53, 0x3c81bee7), + TOBN(0xd2a30d61, 0x7a4a7620), TOBN(0xc8412926, 0x39b8944c), + TOBN(0x3c1c6fbe, 0x7a97c33a), TOBN(0x941e541d, 0x938664e7), + TOBN(0x417499e8, 0x4a34f239), TOBN(0x15fdb83c, 0xb90402d5), + TOBN(0xb75f46bf, 0x433aa832), TOBN(0xb61e15af, 0x63215db1), + TOBN(0xaabe59d4, 0xa127f89a), TOBN(0x5d541e0c, 0x07e816da), + TOBN(0xaaba0659, 0xa618b692), TOBN(0x55327733, 0x17266026), + TOBN(0xaf53a0fc, 0x95f57552), TOBN(0x32947650, 0x6cacb0c9), + TOBN(0x253ff58d, 0xc821be01), TOBN(0xb0309531, 0xa06f1146), + TOBN(0x59bbbdf5, 0x05c2e54d), TOBN(0x158f27ad, 0x26e8dd22), + TOBN(0xcc5b7ffb, 0x397e1e53), TOBN(0xae03f65b, 0x7fc1e50d), + TOBN(0xa9784ebd, 0x9c95f0f9), TOBN(0x5ed9deb2, 0x24640771), + TOBN(0x31244af7, 0x035561c4), TOBN(0x87332f3a, 0x7ee857de), + TOBN(0x09e16e9e, 0x2b9e0d88), TOBN(0x52d910f4, 0x56a06049), + TOBN(0x507ed477, 0xa9592f48), TOBN(0x85cb917b, 0x2365d678), + TOBN(0xf8511c93, 0x4c8998d1), TOBN(0x2186a3f1, 0x730ea58f), + TOBN(0x50189626, 0xb2029db0), TOBN(0x9137a6d9, 0x02ceb75a), + TOBN(0x2fe17f37, 0x748bc82c), TOBN(0x87c2e931, 0x80469f8c), + TOBN(0x850f71cd, 0xbf891aa2), TOBN(0x0ca1b89b, 0x75ec3d8d), + TOBN(0x516c43aa, 0x5e1cd3cd), TOBN(0x89397808, 0x9a887c28), + TOBN(0x0059c699, 0xddea1f9f), TOBN(0x7737d6fa, 0x8e6868f7), + TOBN(0x6d93746a, 0x60f1524b), TOBN(0x36985e55, 0xba052aa7), + TOBN(0x41b1d322, 0xed923ea5), TOBN(0x3429759f, 0x25852a11), + TOBN(0xbeca6ec3, 0x092e9f41), TOBN(0x3a238c66, 0x62256bbd), + TOBN(0xd82958ea, 0x70ad487d), TOBN(0x4ac8aaf9, 0x65610d93), + TOBN(0x3fa101b1, 0x5e4ccab0), TOBN(0x9bf430f2, 0x9de14bfb), + TOBN(0xa10f5cc6, 0x6531899d), TOBN(0x590005fb, 0xea8ce17d), + TOBN(0xc437912f, 0x24544cb6), TOBN(0x9987b71a, 0xd79ac2e3), + TOBN(0x13e3d9dd, 0xc058a212), TOBN(0x00075aac, 0xd2de9606), + TOBN(0x80ab508b, 0x6cac8369), TOBN(0x87842be7, 0xf54f6c89), + TOBN(0xa7ad663d, 0x6bc532a4), TOBN(0x67813de7, 0x78a91bc8), + TOBN(0x5dcb61ce, 0xc3427239), TOBN(0x5f3c7cf0, 0xc56934d9), + TOBN(0xc079e0fb, 0xe3191591), TOBN(0xe40896bd, 0xb01aada7), + TOBN(0x8d466791, 0x0492d25f), TOBN(0x8aeb30c9, 0xe7408276), + TOBN(0xe9437495, 0x9287aacc), TOBN(0x23d4708d, 0x79fe03d4), + TOBN(0x8cda9cf2, 0xd0c05199), TOBN(0x502fbc22, 0xfae78454), + TOBN(0xc0bda9df, 0xf572a182), TOBN(0x5f9b71b8, 0x6158b372), + TOBN(0xe0f33a59, 0x2b82dd07), TOBN(0x76302735, 0x9523032e), + TOBN(0x7fe1a721, 0xc4505a32), TOBN(0x7b6e3e82, 0xf796409f),} + , + {TOBN(0xe3417bc0, 0x35d0b34a), TOBN(0x440b386b, 0x8327c0a7), + TOBN(0x8fb7262d, 0xac0362d1), TOBN(0x2c41114c, 0xe0cdf943), + TOBN(0x2ba5cef1, 0xad95a0b1), TOBN(0xc09b37a8, 0x67d54362), + TOBN(0x26d6cdd2, 0x01e486c9), TOBN(0x20477abf, 0x42ff9297), + TOBN(0xa004dcb3, 0x292a9287), TOBN(0xddc15cf6, 0x77b092c7), + TOBN(0x083a8464, 0x806c0605), TOBN(0x4a68df70, 0x3db997b0), + TOBN(0x9c134e45, 0x05bf7dd0), TOBN(0xa4e63d39, 0x8ccf7f8c), + TOBN(0xa6e6517f, 0x41b5f8af), TOBN(0xaa8b9342, 0xad7bc1cc), + TOBN(0x126f35b5, 0x1e706ad9), TOBN(0xb99cebb4, 0xc3a9ebdf), + TOBN(0xa75389af, 0xbf608d90), TOBN(0x76113c4f, 0xc6c89858), + TOBN(0x80de8eb0, 0x97e2b5aa), TOBN(0x7e1022cc, 0x63b91304), + TOBN(0x3bdab605, 0x6ccc066c), TOBN(0x33cbb144, 0xb2edf900), + TOBN(0xc4176471, 0x7af715d2), TOBN(0xe2f7f594, 0xd0134a96), + TOBN(0x2c1873ef, 0xa41ec956), TOBN(0xe4e7b4f6, 0x77821304), + TOBN(0xe5c8ff97, 0x88d5374a), TOBN(0x2b915e63, 0x80823d5b), + TOBN(0xea6bc755, 0xb2ee8fe2), TOBN(0x6657624c, 0xe7112651), + TOBN(0x157af101, 0xdace5aca), TOBN(0xc4fdbcf2, 0x11a6a267), + TOBN(0xdaddf340, 0xc49c8609), TOBN(0x97e49f52, 0xe9604a65), + TOBN(0x9be8e790, 0x937e2ad5), TOBN(0x846e2508, 0x326e17f1), + TOBN(0x3f38007a, 0x0bbbc0dc), TOBN(0xcf03603f, 0xb11e16d6), + TOBN(0xd6f800e0, 0x7442f1d5), TOBN(0x475607d1, 0x66e0e3ab), + TOBN(0x82807f16, 0xb7c64047), TOBN(0x8858e1e3, 0xa749883d), + TOBN(0x5859120b, 0x8231ee10), TOBN(0x1b80e7eb, 0x638a1ece), + TOBN(0xcb72525a, 0xc6aa73a4), TOBN(0xa7cdea3d, 0x844423ac), + TOBN(0x5ed0c007, 0xf8ae7c38), TOBN(0x6db07a5c, 0x3d740192), + TOBN(0xbe5e9c2a, 0x5fe36db3), TOBN(0xd5b9d57a, 0x76e95046), + TOBN(0x54ac32e7, 0x8eba20f2), TOBN(0xef11ca8f, 0x71b9a352), + TOBN(0x305e373e, 0xff98a658), TOBN(0xffe5a100, 0x823eb667), + TOBN(0x57477b11, 0xe51732d2), TOBN(0xdfd6eb28, 0x2538fc0e), + TOBN(0x5c43b0cc, 0x3b39eec5), TOBN(0x6af12778, 0xcb36cc57), + TOBN(0x70b0852d, 0x06c425ae), TOBN(0x6df92f8c, 0x5c221b9b), + TOBN(0x6c8d4f9e, 0xce826d9c), TOBN(0xf59aba7b, 0xb49359c3), + TOBN(0x5c8ed8d5, 0xda64309d), TOBN(0x61a6de56, 0x91b30704), + TOBN(0xd6b52f6a, 0x2f9b5808), TOBN(0x0eee4194, 0x98c958a7), + TOBN(0xcddd9aab, 0x771e4caa), TOBN(0x83965dfd, 0x78bc21be), + TOBN(0x02affce3, 0xb3b504f5), TOBN(0x30847a21, 0x561c8291), + TOBN(0xd2eb2cf1, 0x52bfda05), TOBN(0xe0e4c4e9, 0x6197b98c), + TOBN(0x1d35076c, 0xf8a1726f), TOBN(0x6c06085b, 0x2db11e3d), + TOBN(0x15c0c4d7, 0x4463ba14), TOBN(0x9d292f83, 0x0030238c), + TOBN(0x1311ee8b, 0x3727536d), TOBN(0xfeea86ef, 0xbeaedc1e), + TOBN(0xb9d18cd3, 0x66131e2e), TOBN(0xf31d974f, 0x80fe2682), + TOBN(0xb6e49e0f, 0xe4160289), TOBN(0x7c48ec0b, 0x08e92799), + TOBN(0x818111d8, 0xd1989aa7), TOBN(0xb34fa0aa, 0xebf926f9), + TOBN(0xdb5fe2f5, 0xa245474a), TOBN(0xf80a6ebb, 0x3c7ca756), + TOBN(0xa7f96054, 0xafa05dd8), TOBN(0x26dfcf21, 0xfcaf119e), + TOBN(0xe20ef2e3, 0x0564bb59), TOBN(0xef4dca50, 0x61cb02b8), + TOBN(0xcda7838a, 0x65d30672), TOBN(0x8b08d534, 0xfd657e86), + TOBN(0x4c5b4395, 0x46d595c8), TOBN(0x39b58725, 0x425cb836), + TOBN(0x8ea61059, 0x3de9abe3), TOBN(0x40434881, 0x9cdc03be), + TOBN(0x9b261245, 0xcfedce8c), TOBN(0x78c318b4, 0xcf5234a1), + TOBN(0x510bcf16, 0xfde24c99), TOBN(0x2a77cb75, 0xa2c2ff5d), + TOBN(0x9c895c2b, 0x27960fb4), TOBN(0xd30ce975, 0xb0eda42b), + TOBN(0xfda85393, 0x1a62cc26), TOBN(0x23c69b96, 0x50c0e052), + TOBN(0xa227df15, 0xbfc633f3), TOBN(0x2ac78848, 0x1bae7d48), + TOBN(0x487878f9, 0x187d073d), TOBN(0x6c2be919, 0x967f807d), + TOBN(0x765861d8, 0x336e6d8f), TOBN(0x88b8974c, 0xce528a43), + TOBN(0x09521177, 0xff57d051), TOBN(0x2ff38037, 0xfb6a1961), + TOBN(0xfc0aba74, 0xa3d76ad4), TOBN(0x7c764803, 0x25a7ec17), + TOBN(0x7532d75f, 0x48879bc8), TOBN(0xea7eacc0, 0x58ce6bc1), + TOBN(0xc82176b4, 0x8e896c16), TOBN(0x9a30e0b2, 0x2c750fed), + TOBN(0xc37e2c2e, 0x421d3aa4), TOBN(0xf926407c, 0xe84fa840), + TOBN(0x18abc03d, 0x1454e41c), TOBN(0x26605ecd, 0x3f7af644), + TOBN(0x242341a6, 0xd6a5eabf), TOBN(0x1edb84f4, 0x216b668e), + TOBN(0xd836edb8, 0x04010102), TOBN(0x5b337ce7, 0x945e1d8c), + TOBN(0xd2075c77, 0xc055dc14), TOBN(0x2a0ffa25, 0x81d89cdf), + TOBN(0x8ce815ea, 0x6ffdcbaf), TOBN(0xa3428878, 0xfb648867), + TOBN(0x277699cf, 0x884655fb), TOBN(0xfa5b5bd6, 0x364d3e41), + TOBN(0x01f680c6, 0x441e1cb7), TOBN(0x3fd61e66, 0xb70a7d67), + TOBN(0x666ba2dc, 0xcc78cf66), TOBN(0xb3018174, 0x6fdbff77), + TOBN(0x8d4dd0db, 0x168d4668), TOBN(0x259455d0, 0x1dab3a2a), + TOBN(0xf58564c5, 0xcde3acec), TOBN(0x77141925, 0x13adb276), + TOBN(0x527d725d, 0x8a303f65), TOBN(0x55deb6c9, 0xe6f38f7b), + TOBN(0xfd5bb657, 0xb1fa70fb), TOBN(0xfa07f50f, 0xd8073a00), + TOBN(0xf72e3aa7, 0xbca02500), TOBN(0xf68f895d, 0x9975740d), + TOBN(0x30112060, 0x5cae2a6a), TOBN(0x01bd7218, 0x02874842), + TOBN(0x3d423891, 0x7ce47bd3), TOBN(0xa66663c1, 0x789544f6), + TOBN(0x864d05d7, 0x3272d838), TOBN(0xe22924f9, 0xfa6295c5), + TOBN(0x8189593f, 0x6c2fda32), TOBN(0x330d7189, 0xb184b544), + TOBN(0x79efa62c, 0xbde1f714), TOBN(0x35771c94, 0xe5cb1a63), + TOBN(0x2f4826b8, 0x641c8332), TOBN(0x00a894fb, 0xc8cee854), + TOBN(0xb4b9a39b, 0x36194d40), TOBN(0xe857a7c5, 0x77612601), + TOBN(0xf4209dd2, 0x4ecf2f58), TOBN(0x82b9e66d, 0x5a033487), + TOBN(0xc1e36934, 0xe4e8b9dd), TOBN(0xd2372c9d, 0xa42377d7), + TOBN(0x51dc94c7, 0x0e3ae43b), TOBN(0x4c57761e, 0x04474f6f), + TOBN(0xdcdacd0a, 0x1058a318), TOBN(0x369cf3f5, 0x78053a9a), + TOBN(0xc6c3de50, 0x31c68de2), TOBN(0x4653a576, 0x3c4b6d9f), + TOBN(0x1688dd5a, 0xaa4e5c97), TOBN(0x5be80aa1, 0xb7ab3c74), + TOBN(0x70cefe7c, 0xbc65c283), TOBN(0x57f95f13, 0x06867091), + TOBN(0xa39114e2, 0x4415503b), TOBN(0xc08ff7c6, 0x4cbb17e9), + TOBN(0x1eff674d, 0xd7dec966), TOBN(0x6d4690af, 0x53376f63), + TOBN(0xff6fe32e, 0xea74237b), TOBN(0xc436d17e, 0xcd57508e), + TOBN(0x15aa28e1, 0xedcc40fe), TOBN(0x0d769c04, 0x581bbb44), + TOBN(0xc240b6de, 0x34eaacda), TOBN(0xd9e116e8, 0x2ba0f1de), + TOBN(0xcbe45ec7, 0x79438e55), TOBN(0x91787c9d, 0x96f752d7), + TOBN(0x897f532b, 0xf129ac2f), TOBN(0xd307b7c8, 0x5a36e22c), + TOBN(0x91940675, 0x749fb8f3), TOBN(0xd14f95d0, 0x157fdb28), + TOBN(0xfe51d029, 0x6ae55043), TOBN(0x8931e98f, 0x44a87de1), + TOBN(0xe57f1cc6, 0x09e4fee2), TOBN(0x0d063b67, 0x4e072d92), + TOBN(0x70a998b9, 0xed0e4316), TOBN(0xe74a736b, 0x306aca46), + TOBN(0xecf0fbf2, 0x4fda97c7), TOBN(0xa40f65cb, 0x3e178d93), + TOBN(0x16253604, 0x16df4285), TOBN(0xb0c9babb, 0xd0c56ae2), + TOBN(0x73032b19, 0xcfc5cfc3), TOBN(0xe497e5c3, 0x09752056), + TOBN(0x12096bb4, 0x164bda96), TOBN(0x1ee42419, 0xa0b74da1), + TOBN(0x8fc36243, 0x403826ba), TOBN(0x0c8f0069, 0xdc09e660), + TOBN(0x8667e981, 0xc27253c9), TOBN(0x05a6aefb, 0x92b36a45), + TOBN(0xa62c4b36, 0x9cb7bb46), TOBN(0x8394f375, 0x11f7027b), + TOBN(0x747bc79c, 0x5f109d0f), TOBN(0xcad88a76, 0x5b8cc60a), + TOBN(0x80c5a66b, 0x58f09e68), TOBN(0xe753d451, 0xf6127eac), + TOBN(0xc44b74a1, 0x5b0ec6f5), TOBN(0x47989fe4, 0x5289b2b8), + TOBN(0x745f8484, 0x58d6fc73), TOBN(0xec362a6f, 0xf61c70ab), + TOBN(0x070c98a7, 0xb3a8ad41), TOBN(0x73a20fc0, 0x7b63db51), + TOBN(0xed2c2173, 0xf44c35f4), TOBN(0x8a56149d, 0x9acc9dca), + TOBN(0x98f17881, 0x9ac6e0f4), TOBN(0x360fdeaf, 0xa413b5ed), + TOBN(0x0625b8f4, 0xa300b0fd), TOBN(0xf1f4d76a, 0x5b3222d3), + TOBN(0x9d6f5109, 0x587f76b8), TOBN(0x8b4ee08d, 0x2317fdb5), + TOBN(0x88089bb7, 0x8c68b095), TOBN(0x95570e9a, 0x5808d9b9), + TOBN(0xa395c36f, 0x35d33ae7), TOBN(0x200ea123, 0x50bb5a94), + TOBN(0x20c789bd, 0x0bafe84b), TOBN(0x243ef52d, 0x0919276a), + TOBN(0x3934c577, 0xe23ae233), TOBN(0xb93807af, 0xa460d1ec), + TOBN(0xb72a53b1, 0xf8fa76a4), TOBN(0xd8914cb0, 0xc3ca4491), + TOBN(0x2e128494, 0x3fb42622), TOBN(0x3b2700ac, 0x500907d5), + TOBN(0xf370fb09, 0x1a95ec63), TOBN(0xf8f30be2, 0x31b6dfbd), + TOBN(0xf2b2f8d2, 0x69e55f15), TOBN(0x1fead851, 0xcc1323e9), + TOBN(0xfa366010, 0xd9e5eef6), TOBN(0x64d487b0, 0xe316107e), + TOBN(0x4c076b86, 0xd23ddc82), TOBN(0x03fd344c, 0x7e0143f0), + TOBN(0xa95362ff, 0x317af2c5), TOBN(0x0add3db7, 0xe18b7a4f), + TOBN(0x9c673e3f, 0x8260e01b), TOBN(0xfbeb49e5, 0x54a1cc91), + TOBN(0x91351bf2, 0x92f2e433), TOBN(0xc755e7ec, 0x851141eb), + TOBN(0xc9a95139, 0x29607745), TOBN(0x0ca07420, 0xa26f2b28), + TOBN(0xcb2790e7, 0x4bc6f9dd), TOBN(0x345bbb58, 0xadcaffc0), + TOBN(0xc65ea38c, 0xbe0f27a2), TOBN(0x67c24d7c, 0x641fcb56), + TOBN(0x2c25f0a7, 0xa9e2c757), TOBN(0x93f5cdb0, 0x16f16c49), + TOBN(0x2ca5a9d7, 0xc5ee30a1), TOBN(0xd1593635, 0xb909b729), + TOBN(0x804ce9f3, 0xdadeff48), TOBN(0xec464751, 0xb07c30c3), + TOBN(0x89d65ff3, 0x9e49af6a), TOBN(0xf2d6238a, 0x6f3d01bc), + TOBN(0x1095561e, 0x0bced843), TOBN(0x51789e12, 0xc8a13fd8), + TOBN(0xd633f929, 0x763231df), TOBN(0x46df9f7d, 0xe7cbddef), + TOBN(0x01c889c0, 0xcb265da8), TOBN(0xfce1ad10, 0xaf4336d2), + TOBN(0x8d110df6, 0xfc6a0a7e), TOBN(0xdd431b98, 0x6da425dc), + TOBN(0xcdc4aeab, 0x1834aabe), TOBN(0x84deb124, 0x8439b7fc), + TOBN(0x8796f169, 0x3c2a5998), TOBN(0x9b9247b4, 0x7947190d), + TOBN(0x55b9d9a5, 0x11597014), TOBN(0x7e9dd70d, 0x7b1566ee), + TOBN(0x94ad78f7, 0xcbcd5e64), TOBN(0x0359ac17, 0x9bd4c032), + TOBN(0x3b11baaf, 0x7cc222ae), TOBN(0xa6a6e284, 0xba78e812), + TOBN(0x8392053f, 0x24cea1a0), TOBN(0xc97bce4a, 0x33621491), + TOBN(0x7eb1db34, 0x35399ee9), TOBN(0x473f78ef, 0xece81ad1), + TOBN(0x41d72fe0, 0xf63d3d0d), TOBN(0xe620b880, 0xafab62fc), + TOBN(0x92096bc9, 0x93158383), TOBN(0x41a21357, 0x8f896f6c), + TOBN(0x1b5ee2fa, 0xc7dcfcab), TOBN(0x650acfde, 0x9546e007), + TOBN(0xc081b749, 0xb1b02e07), TOBN(0xda9e41a0, 0xf9eca03d), + TOBN(0x013ba727, 0x175a54ab), TOBN(0xca0cd190, 0xea5d8d10), + TOBN(0x85ea52c0, 0x95fd96a9), TOBN(0x2c591b9f, 0xbc5c3940), + TOBN(0x6fb4d4e4, 0x2bad4d5f), TOBN(0xfa4c3590, 0xfef0059b), + TOBN(0x6a10218a, 0xf5122294), TOBN(0x9a78a81a, 0xa85751d1), + TOBN(0x04f20579, 0xa98e84e7), TOBN(0xfe1242c0, 0x4997e5b5), + TOBN(0xe77a273b, 0xca21e1e4), TOBN(0xfcc8b1ef, 0x9411939d), + TOBN(0xe20ea302, 0x92d0487a), TOBN(0x1442dbec, 0x294b91fe), + TOBN(0x1f7a4afe, 0xbb6b0e8f), TOBN(0x1700ef74, 0x6889c318), + TOBN(0xf5bbffc3, 0x70f1fc62), TOBN(0x3b31d4b6, 0x69c79cca), + TOBN(0xe8bc2aab, 0xa7f6340d), TOBN(0xb0b08ab4, 0xa725e10a), + TOBN(0x44f05701, 0xae340050), TOBN(0xba4b3016, 0x1cf0c569), + TOBN(0x5aa29f83, 0xfbe19a51), TOBN(0x1b9ed428, 0xb71d752e), + TOBN(0x1666e54e, 0xeb4819f5), TOBN(0x616cdfed, 0x9e18b75b), + TOBN(0x112ed5be, 0x3ee27b0b), TOBN(0xfbf28319, 0x44c7de4d), + TOBN(0xd685ec85, 0xe0e60d84), TOBN(0x68037e30, 0x1db7ee78), + TOBN(0x5b65bdcd, 0x003c4d6e), TOBN(0x33e7363a, 0x93e29a6a), + TOBN(0x995b3a61, 0x08d0756c), TOBN(0xd727f85c, 0x2faf134b), + TOBN(0xfac6edf7, 0x1d337823), TOBN(0x99b9aa50, 0x0439b8b4), + TOBN(0x722eb104, 0xe2b4e075), TOBN(0x49987295, 0x437c4926), + TOBN(0xb1e4c0e4, 0x46a9b82d), TOBN(0xd0cb3197, 0x57a006f5), + TOBN(0xf3de0f7d, 0xd7808c56), TOBN(0xb5c54d8f, 0x51f89772), + TOBN(0x500a114a, 0xadbd31aa), TOBN(0x9afaaaa6, 0x295f6cab), + TOBN(0x94705e21, 0x04cf667a), TOBN(0xfc2a811b, 0x9d3935d7), + TOBN(0x560b0280, 0x6d09267c), TOBN(0xf19ed119, 0xf780e53b), + TOBN(0xf0227c09, 0x067b6269), TOBN(0x967b8533, 0x5caef599), + TOBN(0x155b9243, 0x68efeebc), TOBN(0xcd6d34f5, 0xc497bae6), + TOBN(0x1dd8d5d3, 0x6cceb370), TOBN(0x2aeac579, 0xa78d7bf9), + TOBN(0x5d65017d, 0x70b67a62), TOBN(0x70c8e44f, 0x17c53f67), + TOBN(0xd1fc0950, 0x86a34d09), TOBN(0xe0fca256, 0xe7134907), + TOBN(0xe24fa29c, 0x80fdd315), TOBN(0x2c4acd03, 0xd87499ad), + TOBN(0xbaaf7517, 0x3b5a9ba6), TOBN(0xb9cbe1f6, 0x12e51a51), + TOBN(0xd88edae3, 0x5e154897), TOBN(0xe4309c3c, 0x77b66ca0), + TOBN(0xf5555805, 0xf67f3746), TOBN(0x85fc37ba, 0xa36401ff), + TOBN(0xdf86e2ca, 0xd9499a53), TOBN(0x6270b2a3, 0xecbc955b), + TOBN(0xafae64f5, 0x974ad33b), TOBN(0x04d85977, 0xfe7b2df1), + TOBN(0x2a3db3ff, 0x4ab03f73), TOBN(0x0b87878a, 0x8702740a), + TOBN(0x6d263f01, 0x5a061732), TOBN(0xc25430ce, 0xa32a1901), + TOBN(0xf7ebab3d, 0xdb155018), TOBN(0x3a86f693, 0x63a9b78e), + TOBN(0x349ae368, 0xda9f3804), TOBN(0x470f07fe, 0xa164349c), + TOBN(0xd52f4cc9, 0x8562baa5), TOBN(0xc74a9e86, 0x2b290df3), + TOBN(0xd3a1aa35, 0x43471a24), TOBN(0x239446be, 0xb8194511), + TOBN(0xbec2dd00, 0x81dcd44d), TOBN(0xca3d7f0f, 0xc42ac82d), + TOBN(0x1f3db085, 0xfdaf4520), TOBN(0xbb6d3e80, 0x4549daf2), + TOBN(0xf5969d8a, 0x19ad5c42), TOBN(0x7052b13d, 0xdbfd1511), + TOBN(0x11890d1b, 0x682b9060), TOBN(0xa71d3883, 0xac34452c), + TOBN(0xa438055b, 0x783805b4), TOBN(0x43241277, 0x4725b23e), + TOBN(0xf20cf96e, 0x4901bbed), TOBN(0x6419c710, 0xf432a2bb), + TOBN(0x57a0fbb9, 0xdfa9cd7d), TOBN(0x589111e4, 0x00daa249), + TOBN(0x19809a33, 0x7b60554e), TOBN(0xea5f8887, 0xede283a4), + TOBN(0x2d713802, 0x503bfd35), TOBN(0x151bb0af, 0x585d2a53), + TOBN(0x40b08f74, 0x43b30ca8), TOBN(0xe10b5bba, 0xd9934583), + TOBN(0xe8a546d6, 0xb51110ad), TOBN(0x1dd50e66, 0x28e0b6c5), + TOBN(0x292e9d54, 0xcff2b821), TOBN(0x3882555d, 0x47281760), + TOBN(0x134838f8, 0x3724d6e3), TOBN(0xf2c679e0, 0x22ddcda1), + TOBN(0x40ee8815, 0x6d2a5768), TOBN(0x7f227bd2, 0x1c1e7e2d), + TOBN(0x487ba134, 0xd04ff443), TOBN(0x76e2ff3d, 0xc614e54b), + TOBN(0x36b88d6f, 0xa3177ec7), TOBN(0xbf731d51, 0x2328fff5), + TOBN(0x758caea2, 0x49ba158e), TOBN(0x5ab8ff4c, 0x02938188), + TOBN(0x33e16056, 0x35edc56d), TOBN(0x5a69d349, 0x7e940d79), + TOBN(0x6c4fd001, 0x03866dcb), TOBN(0x20a38f57, 0x4893cdef), + TOBN(0xfbf3e790, 0xfac3a15b), TOBN(0x6ed7ea2e, 0x7a4f8e6b), + TOBN(0xa663eb4f, 0xbc3aca86), TOBN(0x22061ea5, 0x080d53f7), + TOBN(0x2480dfe6, 0xf546783f), TOBN(0xd38bc6da, 0x5a0a641e), + TOBN(0xfb093cd1, 0x2ede8965), TOBN(0x89654db4, 0xacb455cf), + TOBN(0x413cbf9a, 0x26e1adee), TOBN(0x291f3764, 0x373294d4), + TOBN(0x00797257, 0x648083fe), TOBN(0x25f504d3, 0x208cc341), + TOBN(0x635a8e5e, 0xc3a0ee43), TOBN(0x70aaebca, 0x679898ff), + TOBN(0x9ee9f547, 0x5dc63d56), TOBN(0xce987966, 0xffb34d00), + TOBN(0xf9f86b19, 0x5e26310a), TOBN(0x9e435484, 0x382a8ca8), + TOBN(0x253bcb81, 0xc2352fe4), TOBN(0xa4eac8b0, 0x4474b571), + TOBN(0xc1b97512, 0xc1ad8cf8), TOBN(0x193b4e9e, 0x99e0b697), + TOBN(0x939d2716, 0x01e85df0), TOBN(0x4fb265b3, 0xcd44eafd), + TOBN(0x321e7dcd, 0xe51e1ae2), TOBN(0x8e3a8ca6, 0xe3d8b096), + TOBN(0x8de46cb0, 0x52604998), TOBN(0x91099ad8, 0x39072aa7), + TOBN(0x2617f91c, 0x93aa96b8), TOBN(0x0fc8716b, 0x7fca2e13), + TOBN(0xa7106f5e, 0x95328723), TOBN(0xd1c9c40b, 0x262e6522), + TOBN(0xb9bafe86, 0x42b7c094), TOBN(0x1873439d, 0x1543c021), + TOBN(0xe1baa5de, 0x5cbefd5d), TOBN(0xa363fc5e, 0x521e8aff), + TOBN(0xefe6320d, 0xf862eaac), TOBN(0x14419c63, 0x22c647dc), + TOBN(0x0e06707c, 0x4e46d428), TOBN(0xcb6c834f, 0x4a178f8f), + TOBN(0x0f993a45, 0xd30f917c), TOBN(0xd4c4b049, 0x9879afee), + TOBN(0xb6142a1e, 0x70500063), TOBN(0x7c9b41c3, 0xa5d9d605), + TOBN(0xbc00fc2f, 0x2f8ba2c7), TOBN(0x0966eb2f, 0x7c67aa28), + TOBN(0x13f7b516, 0x5a786972), TOBN(0x3bfb7557, 0x8a2fbba0), + TOBN(0x131c4f23, 0x5a2b9620), TOBN(0xbff3ed27, 0x6faf46be), + TOBN(0x9b4473d1, 0x7e172323), TOBN(0x421e8878, 0x339f6246), + TOBN(0x0fa8587a, 0x25a41632), TOBN(0xc0814124, 0xa35b6c93), + TOBN(0x2b18a9f5, 0x59ebb8db), TOBN(0x264e3357, 0x76edb29c), + TOBN(0xaf245ccd, 0xc87c51e2), TOBN(0x16b3015b, 0x501e6214), + TOBN(0xbb31c560, 0x0a3882ce), TOBN(0x6961bb94, 0xfec11e04), + TOBN(0x3b825b8d, 0xeff7a3a0), TOBN(0xbec33738, 0xb1df7326), + TOBN(0x68ad747c, 0x99604a1f), TOBN(0xd154c934, 0x9a3bd499), + TOBN(0xac33506f, 0x1cc7a906), TOBN(0x73bb5392, 0x6c560e8f), + TOBN(0x6428fcbe, 0x263e3944), TOBN(0xc11828d5, 0x1c387434), + TOBN(0x3cd04be1, 0x3e4b12ff), TOBN(0xc3aad9f9, 0x2d88667c), + TOBN(0xc52ddcf8, 0x248120cf), TOBN(0x985a892e, 0x2a389532), + TOBN(0xfbb4b21b, 0x3bb85fa0), TOBN(0xf95375e0, 0x8dfc6269), + TOBN(0xfb4fb06c, 0x7ee2acea), TOBN(0x6785426e, 0x309c4d1f), + TOBN(0x659b17c8, 0xd8ceb147), TOBN(0x9b649eee, 0xb70a5554), + TOBN(0x6b7fa0b5, 0xac6bc634), TOBN(0xd99fe2c7, 0x1d6e732f), + TOBN(0x30e6e762, 0x8d3abba2), TOBN(0x18fee6e7, 0xa797b799), + TOBN(0x5c9d360d, 0xc696464d), TOBN(0xe3baeb48, 0x27bfde12), + TOBN(0x2bf5db47, 0xf23206d5), TOBN(0x2f6d3420, 0x1d260152), + TOBN(0x17b87653, 0x3f8ff89a), TOBN(0x5157c30c, 0x378fa458), + TOBN(0x7517c5c5, 0x2d4fb936), TOBN(0xef22f7ac, 0xe6518cdc), + TOBN(0xdeb483e6, 0xbf847a64), TOBN(0xf5084558, 0x92e0fa89),} + , + {TOBN(0xab9659d8, 0xdf7304d4), TOBN(0xb71bcf1b, 0xff210e8e), + TOBN(0xa9a2438b, 0xd73fbd60), TOBN(0x4595cd1f, 0x5d11b4de), + TOBN(0x9c0d329a, 0x4835859d), TOBN(0x4a0f0d2d, 0x7dbb6e56), + TOBN(0xc6038e5e, 0xdf928a4e), TOBN(0xc9429621, 0x8f5ad154), + TOBN(0x91213462, 0xf23f2d92), TOBN(0x6cab71bd, 0x60b94078), + TOBN(0x6bdd0a63, 0x176cde20), TOBN(0x54c9b20c, 0xee4d54bc), + TOBN(0x3cd2d8aa, 0x9f2ac02f), TOBN(0x03f8e617, 0x206eedb0), + TOBN(0xc7f68e16, 0x93086434), TOBN(0x831469c5, 0x92dd3db9), + TOBN(0x8521df24, 0x8f981354), TOBN(0x587e23ec, 0x3588a259), + TOBN(0xcbedf281, 0xd7a0992c), TOBN(0x06930a55, 0x38961407), + TOBN(0x09320deb, 0xbe5bbe21), TOBN(0xa7ffa5b5, 0x2491817f), + TOBN(0xe6c8b4d9, 0x09065160), TOBN(0xac4f3992, 0xfff6d2a9), + TOBN(0x7aa7a158, 0x3ae9c1bd), TOBN(0xe0af6d98, 0xe37ce240), + TOBN(0xe54342d9, 0x28ab38b4), TOBN(0xe8b75007, 0x0a1c98ca), + TOBN(0xefce86af, 0xe02358f2), TOBN(0x31b8b856, 0xea921228), + TOBN(0x052a1912, 0x0a1c67fc), TOBN(0xb4069ea4, 0xe3aead59), + TOBN(0x3232d6e2, 0x7fa03cb3), TOBN(0xdb938e5b, 0x0fdd7d88), + TOBN(0x04c1d2cd, 0x2ccbfc5d), TOBN(0xd2f45c12, 0xaf3a580f), + TOBN(0x592620b5, 0x7883e614), TOBN(0x5fd27e68, 0xbe7c5f26), + TOBN(0x139e45a9, 0x1567e1e3), TOBN(0x2cc71d2d, 0x44d8aaaf), + TOBN(0x4a9090cd, 0xe36d0757), TOBN(0xf722d7b1, 0xd9a29382), + TOBN(0xfb7fb04c, 0x04b48ddf), TOBN(0x628ad2a7, 0xebe16f43), + TOBN(0xcd3fbfb5, 0x20226040), TOBN(0x6c34ecb1, 0x5104b6c4), + TOBN(0x30c0754e, 0xc903c188), TOBN(0xec336b08, 0x2d23cab0), + TOBN(0x473d62a2, 0x1e206ee5), TOBN(0xf1e27480, 0x8c49a633), + TOBN(0x87ab956c, 0xe9f6b2c3), TOBN(0x61830b48, 0x62b606ea), + TOBN(0x67cd6846, 0xe78e815f), TOBN(0xfe40139f, 0x4c02082a), + TOBN(0x52bbbfcb, 0x952ec365), TOBN(0x74c11642, 0x6b9836ab), + TOBN(0x9f51439e, 0x558df019), TOBN(0x230da4ba, 0xac712b27), + TOBN(0x518919e3, 0x55185a24), TOBN(0x4dcefcdd, 0x84b78f50), + TOBN(0xa7d90fb2, 0xa47d4c5a), TOBN(0x55ac9abf, 0xb30e009e), + TOBN(0xfd2fc359, 0x74eed273), TOBN(0xb72d824c, 0xdbea8faf), + TOBN(0xce721a74, 0x4513e2ca), TOBN(0x0b418612, 0x38240b2c), + TOBN(0x05199968, 0xd5baa450), TOBN(0xeb1757ed, 0x2b0e8c25), + TOBN(0x6ebc3e28, 0x3dfac6d5), TOBN(0xb2431e2e, 0x48a237f5), + TOBN(0x2acb5e23, 0x52f61499), TOBN(0x5558a2a7, 0xe06c936b), + TOBN(0xd213f923, 0xcbb13d1b), TOBN(0x98799f42, 0x5bfb9bfe), + TOBN(0x1ae8ddc9, 0x701144a9), TOBN(0x0b8b3bb6, 0x4c5595ee), + TOBN(0x0ea9ef2e, 0x3ecebb21), TOBN(0x17cb6c4b, 0x3671f9a7), + TOBN(0x47ef464f, 0x726f1d1f), TOBN(0x171b9484, 0x6943a276), + TOBN(0x51a4ae2d, 0x7ef0329c), TOBN(0x08509222, 0x91c4402a), + TOBN(0x64a61d35, 0xafd45bbc), TOBN(0x38f096fe, 0x3035a851), + TOBN(0xc7468b74, 0xa1dec027), TOBN(0xe8cf10e7, 0x4fc7dcba), + TOBN(0xea35ff40, 0xf4a06353), TOBN(0x0b4c0dfa, 0x8b77dd66), + TOBN(0x779b8552, 0xde7e5c19), TOBN(0xfab28609, 0xc1c0256c), + TOBN(0x64f58eee, 0xabd4743d), TOBN(0x4e8ef838, 0x7b6cc93b), + TOBN(0xee650d26, 0x4cb1bf3d), TOBN(0x4c1f9d09, 0x73dedf61), + TOBN(0xaef7c9d7, 0xbfb70ced), TOBN(0x1ec0507e, 0x1641de1e), + TOBN(0xcd7e5cc7, 0xcde45079), TOBN(0xde173c9a, 0x516ac9e4), + TOBN(0x517a8494, 0xc170315c), TOBN(0x438fd905, 0x91d8e8fb), + TOBN(0x5145c506, 0xc7d9630b), TOBN(0x6457a87b, 0xf47d4d75), + TOBN(0xd31646bf, 0x0d9a80e8), TOBN(0x453add2b, 0xcef3aabe), + TOBN(0xc9941109, 0xa607419d), TOBN(0xfaa71e62, 0xbb6bca80), + TOBN(0x34158c13, 0x07c431f3), TOBN(0x594abebc, 0x992bc47a), + TOBN(0x6dfea691, 0xeb78399f), TOBN(0x48aafb35, 0x3f42cba4), + TOBN(0xedcd65af, 0x077c04f0), TOBN(0x1a29a366, 0xe884491a), + TOBN(0x023a40e5, 0x1c21f2bf), TOBN(0xf99a513c, 0xa5057aee), + TOBN(0xa3fe7e25, 0xbcab072e), TOBN(0x8568d2e1, 0x40e32bcf), + TOBN(0x904594eb, 0xd3f69d9f), TOBN(0x181a9733, 0x07affab1), + TOBN(0xe4d68d76, 0xb6e330f4), TOBN(0x87a6dafb, 0xc75a7fc1), + TOBN(0x549db2b5, 0xef7d9289), TOBN(0x2480d4a8, 0x197f015a), + TOBN(0x61d5590b, 0xc40493b6), TOBN(0x3a55b52e, 0x6f780331), + TOBN(0x40eb8115, 0x309eadb0), TOBN(0xdea7de5a, 0x92e5c625), + TOBN(0x64d631f0, 0xcc6a3d5a), TOBN(0x9d5e9d7c, 0x93e8dd61), + TOBN(0xf297bef5, 0x206d3ffc), TOBN(0x23d5e033, 0x7d808bd4), + TOBN(0x4a4f6912, 0xd24cf5ba), TOBN(0xe4d8163b, 0x09cdaa8a), + TOBN(0x0e0de9ef, 0xd3082e8e), TOBN(0x4fe1246c, 0x0192f360), + TOBN(0x1f900150, 0x4b8eee0a), TOBN(0x5219da81, 0xf1da391b), + TOBN(0x7bf6a5c1, 0xf7ea25aa), TOBN(0xd165e6bf, 0xfbb07d5f), + TOBN(0xe3539361, 0x89e78671), TOBN(0xa3fcac89, 0x2bac4219), + TOBN(0xdfab6fd4, 0xf0baa8ab), TOBN(0x5a4adac1, 0xe2c1c2e5), + TOBN(0x6cd75e31, 0x40d85849), TOBN(0xce263fea, 0x19b39181), + TOBN(0xcb6803d3, 0x07032c72), TOBN(0x7f40d5ce, 0x790968c8), + TOBN(0xa6de86bd, 0xdce978f0), TOBN(0x25547c4f, 0x368f751c), + TOBN(0xb1e685fd, 0x65fb2a9e), TOBN(0xce69336f, 0x1eb9179c), + TOBN(0xb15d1c27, 0x12504442), TOBN(0xb7df465c, 0xb911a06b), + TOBN(0xb8d804a3, 0x315980cd), TOBN(0x693bc492, 0xfa3bebf7), + TOBN(0x3578aeee, 0x2253c504), TOBN(0x158de498, 0xcd2474a2), + TOBN(0x1331f5c7, 0xcfda8368), TOBN(0xd2d7bbb3, 0x78d7177e), + TOBN(0xdf61133a, 0xf3c1e46e), TOBN(0x5836ce7d, 0xd30e7be8), + TOBN(0x83084f19, 0x94f834cb), TOBN(0xd35653d4, 0x429ed782), + TOBN(0xa542f16f, 0x59e58243), TOBN(0xc2b52f65, 0x0470a22d), + TOBN(0xe3b6221b, 0x18f23d96), TOBN(0xcb05abac, 0x3f5252b4), + TOBN(0xca00938b, 0x87d61402), TOBN(0x2f186cdd, 0x411933e4), + TOBN(0xe042ece5, 0x9a29a5c5), TOBN(0xb19b3c07, 0x3b6c8402), + TOBN(0xc97667c7, 0x19d92684), TOBN(0xb5624622, 0xebc66372), + TOBN(0x0cb96e65, 0x3c04fa02), TOBN(0x83a7176c, 0x8eaa39aa), + TOBN(0x2033561d, 0xeaa1633f), TOBN(0x45a9d086, 0x4533df73), + TOBN(0xe0542c1d, 0x3dc090bc), TOBN(0x82c996ef, 0xaa59c167), + TOBN(0xe3f735e8, 0x0ee7fc4d), TOBN(0x7b179393, 0x7c35db79), + TOBN(0xb6419e25, 0xf8c5dbfd), TOBN(0x4d9d7a1e, 0x1f327b04), + TOBN(0x979f6f9b, 0x298dfca8), TOBN(0xc7c5dff1, 0x8de9366a), + TOBN(0x1b7a588d, 0x04c82bdd), TOBN(0x68005534, 0xf8319dfd), + TOBN(0xde8a55b5, 0xd8eb9580), TOBN(0x5ea886da, 0x8d5bca81), + TOBN(0xe8530a01, 0x252a0b4d), TOBN(0x1bffb4fe, 0x35eaa0a1), + TOBN(0x2ad828b1, 0xd8e99563), TOBN(0x7de96ef5, 0x95f9cd87), + TOBN(0x4abb2d0c, 0xd77d970c), TOBN(0x03cfb933, 0xd33ef9cb), + TOBN(0xb0547c01, 0x8b211fe9), TOBN(0x2fe64809, 0xa56ed1c6), + TOBN(0xcb7d5624, 0xc2ac98cc), TOBN(0x2a1372c0, 0x1a393e33), + TOBN(0xc8d1ec1c, 0x29660521), TOBN(0xf3d31b04, 0xb37ac3e9), + TOBN(0xa29ae9df, 0x5ece6e7c), TOBN(0x0603ac8f, 0x0facfb55), + TOBN(0xcfe85b7a, 0xdda233a5), TOBN(0xe618919f, 0xbd75f0b8), + TOBN(0xf555a3d2, 0x99bf1603), TOBN(0x1f43afc9, 0xf184255a), + TOBN(0xdcdaf341, 0x319a3e02), TOBN(0xd3b117ef, 0x03903a39), + TOBN(0xe095da13, 0x65d1d131), TOBN(0x86f16367, 0xc37ad03e), + TOBN(0x5f37389e, 0x462cd8dd), TOBN(0xc103fa04, 0xd67a60e6), + TOBN(0x57c34344, 0xf4b478f0), TOBN(0xce91edd8, 0xe117c98d), + TOBN(0x001777b0, 0x231fc12e), TOBN(0x11ae47f2, 0xb207bccb), + TOBN(0xd983cf8d, 0x20f8a242), TOBN(0x7aff5b1d, 0xf22e1ad8), + TOBN(0x68fd11d0, 0x7fc4feb3), TOBN(0x5d53ae90, 0xb0f1c3e1), + TOBN(0x50fb7905, 0xec041803), TOBN(0x85e3c977, 0x14404888), + TOBN(0x0e67faed, 0xac628d8f), TOBN(0x2e865150, 0x6668532c), + TOBN(0x15acaaa4, 0x6a67a6b0), TOBN(0xf4cdee25, 0xb25cec41), + TOBN(0x49ee565a, 0xe4c6701e), TOBN(0x2a04ca66, 0xfc7d63d8), + TOBN(0xeb105018, 0xef0543fb), TOBN(0xf709a4f5, 0xd1b0d81d), + TOBN(0x5b906ee6, 0x2915d333), TOBN(0xf4a87412, 0x96f1f0ab), + TOBN(0xb6b82fa7, 0x4d82f4c2), TOBN(0x90725a60, 0x6804efb3), + TOBN(0xbc82ec46, 0xadc3425e), TOBN(0xb7b80581, 0x2787843e), + TOBN(0xdf46d91c, 0xdd1fc74c), TOBN(0xdc1c62cb, 0xe783a6c4), + TOBN(0x59d1b9f3, 0x1a04cbba), TOBN(0xd87f6f72, 0x95e40764), + TOBN(0x02b4cfc1, 0x317f4a76), TOBN(0x8d2703eb, 0x91036bce), + TOBN(0x98206cc6, 0xa5e72a56), TOBN(0x57be9ed1, 0xcf53fb0f), + TOBN(0x09374571, 0xef0b17ac), TOBN(0x74b2655e, 0xd9181b38), + TOBN(0xc8f80ea8, 0x89935d0e), TOBN(0xc0d9e942, 0x91529936), + TOBN(0x19686041, 0x1e84e0e5), TOBN(0xa5db84d3, 0xaea34c93), + TOBN(0xf9d5bb19, 0x7073a732), TOBN(0xb8d2fe56, 0x6bcfd7c0), + TOBN(0x45775f36, 0xf3eb82fa), TOBN(0x8cb20ccc, 0xfdff8b58), + TOBN(0x1659b65f, 0x8374c110), TOBN(0xb8b4a422, 0x330c789a), + TOBN(0x75e3c3ea, 0x6fe8208b), TOBN(0xbd74b9e4, 0x286e78fe), + TOBN(0x0be2e81b, 0xd7d93a1a), TOBN(0x7ed06e27, 0xdd0a5aae), + TOBN(0x721f5a58, 0x6be8b800), TOBN(0x428299d1, 0xd846db28), + TOBN(0x95cb8e6b, 0x5be88ed3), TOBN(0xc3186b23, 0x1c034e11), + TOBN(0xa6312c9e, 0x8977d99b), TOBN(0xbe944331, 0x83f531e7), + TOBN(0x8232c0c2, 0x18d3b1d4), TOBN(0x617aae8b, 0xe1247b73), + TOBN(0x40153fc4, 0x282aec3b), TOBN(0xc6063d2f, 0xf7b8f823), + TOBN(0x68f10e58, 0x3304f94c), TOBN(0x31efae74, 0xee676346), + TOBN(0xbadb6c6d, 0x40a9b97c), TOBN(0x14702c63, 0x4f666256), + TOBN(0xdeb954f1, 0x5184b2e3), TOBN(0x5184a526, 0x94b6ca40), + TOBN(0xfff05337, 0x003c32ea), TOBN(0x5aa374dd, 0x205974c7), + TOBN(0x9a763854, 0x4b0dd71a), TOBN(0x459cd27f, 0xdeb947ec), + TOBN(0xa6e28161, 0x459c2b92), TOBN(0x2f020fa8, 0x75ee8ef5), + TOBN(0xb132ec2d, 0x30b06310), TOBN(0xc3e15899, 0xbc6a4530), + TOBN(0xdc5f53fe, 0xaa3f451a), TOBN(0x3a3c7f23, 0xc2d9acac), + TOBN(0x2ec2f892, 0x6b27e58b), TOBN(0x68466ee7, 0xd742799f), + TOBN(0x98324dd4, 0x1fa26613), TOBN(0xa2dc6dab, 0xbdc29d63), + TOBN(0xf9675faa, 0xd712d657), TOBN(0x813994be, 0x21fd8d15), + TOBN(0x5ccbb722, 0xfd4f7553), TOBN(0x5135ff8b, 0xf3a36b20), + TOBN(0x44be28af, 0x69559df5), TOBN(0x40b65bed, 0x9d41bf30), + TOBN(0xd98bf2a4, 0x3734e520), TOBN(0x5e3abbe3, 0x209bdcba), + TOBN(0x77c76553, 0xbc945b35), TOBN(0x5331c093, 0xc6ef14aa), + TOBN(0x518ffe29, 0x76b60c80), TOBN(0x2285593b, 0x7ace16f8), + TOBN(0xab1f64cc, 0xbe2b9784), TOBN(0xe8f2c0d9, 0xab2421b6), + TOBN(0x617d7174, 0xc1df065c), TOBN(0xafeeb5ab, 0x5f6578fa), + TOBN(0x16ff1329, 0x263b54a8), TOBN(0x45c55808, 0xc990dce3), + TOBN(0x42eab6c0, 0xecc8c177), TOBN(0x799ea9b5, 0x5982ecaa), + TOBN(0xf65da244, 0xb607ef8e), TOBN(0x8ab226ce, 0x32a3fc2c), + TOBN(0x745741e5, 0x7ea973dc), TOBN(0x5c00ca70, 0x20888f2e), + TOBN(0x7cdce3cf, 0x45fd9cf1), TOBN(0x8a741ef1, 0x5507f872), + TOBN(0x47c51c2f, 0x196b4cec), TOBN(0x70d08e43, 0xc97ea618), + TOBN(0x930da15c, 0x15b18a2b), TOBN(0x33b6c678, 0x2f610514), + TOBN(0xc662e4f8, 0x07ac9794), TOBN(0x1eccf050, 0xba06cb79), + TOBN(0x1ff08623, 0xe7d954e5), TOBN(0x6ef2c5fb, 0x24cf71c3), + TOBN(0xb2c063d2, 0x67978453), TOBN(0xa0cf3796, 0x1d654af8), + TOBN(0x7cb242ea, 0x7ebdaa37), TOBN(0x206e0b10, 0xb86747e0), + TOBN(0x481dae5f, 0xd5ecfefc), TOBN(0x07084fd8, 0xc2bff8fc), + TOBN(0x8040a01a, 0xea324596), TOBN(0x4c646980, 0xd4de4036), + TOBN(0x9eb8ab4e, 0xd65abfc3), TOBN(0xe01cb91f, 0x13541ec7), + TOBN(0x8f029adb, 0xfd695012), TOBN(0x9ae28483, 0x3c7569ec), + TOBN(0xa5614c9e, 0xa66d80a1), TOBN(0x680a3e44, 0x75f5f911), + TOBN(0x0c07b14d, 0xceba4fc1), TOBN(0x891c285b, 0xa13071c1), + TOBN(0xcac67ceb, 0x799ece3c), TOBN(0x29b910a9, 0x41e07e27), + TOBN(0x66bdb409, 0xf2e43123), TOBN(0x06f8b137, 0x7ac9ecbe), + TOBN(0x5981fafd, 0x38547090), TOBN(0x19ab8b9f, 0x85e3415d), + TOBN(0xfc28c194, 0xc7e31b27), TOBN(0x843be0aa, 0x6fbcbb42), + TOBN(0xf3b1ed43, 0xa6db836c), TOBN(0x2a1330e4, 0x01a45c05), + TOBN(0x4f19f3c5, 0x95c1a377), TOBN(0xa85f39d0, 0x44b5ee33), + TOBN(0x3da18e6d, 0x4ae52834), TOBN(0x5a403b39, 0x7423dcb0), + TOBN(0xbb555e0a, 0xf2374aef), TOBN(0x2ad599c4, 0x1e8ca111), + TOBN(0x1b3a2fb9, 0x014b3bf8), TOBN(0x73092684, 0xf66d5007), + TOBN(0x079f1426, 0xc4340102), TOBN(0x1827cf81, 0x8fddf4de), + TOBN(0xc83605f6, 0xf10ff927), TOBN(0xd3871451, 0x23739fc6), + TOBN(0x6d163450, 0xcac1c2cc), TOBN(0x6b521296, 0xa2ec1ac5), + TOBN(0x0606c4f9, 0x6e3cb4a5), TOBN(0xe47d3f41, 0x778abff7), + TOBN(0x425a8d5e, 0xbe8e3a45), TOBN(0x53ea9e97, 0xa6102160), + TOBN(0x477a106e, 0x39cbb688), TOBN(0x532401d2, 0xf3386d32), + TOBN(0x8e564f64, 0xb1b9b421), TOBN(0xca9b8388, 0x81dad33f), + TOBN(0xb1422b4e, 0x2093913e), TOBN(0x533d2f92, 0x69bc8112), + TOBN(0x3fa017be, 0xebe7b2c7), TOBN(0xb2767c4a, 0xcaf197c6), + TOBN(0xc925ff87, 0xaedbae9f), TOBN(0x7daf0eb9, 0x36880a54), + TOBN(0x9284ddf5, 0x9c4d0e71), TOBN(0x1581cf93, 0x316f8cf5), + TOBN(0x3eeca887, 0x3ac1f452), TOBN(0xb417fce9, 0xfb6aeffe), + TOBN(0xa5918046, 0xeefb8dc3), TOBN(0x73d318ac, 0x02209400), + TOBN(0xe800400f, 0x728693e5), TOBN(0xe87d814b, 0x339927ed), + TOBN(0x93e94d3b, 0x57ea9910), TOBN(0xff8a35b6, 0x2245fb69), + TOBN(0x043853d7, 0x7f200d34), TOBN(0x470f1e68, 0x0f653ce1), + TOBN(0x81ac05bd, 0x59a06379), TOBN(0xa14052c2, 0x03930c29), + TOBN(0x6b72fab5, 0x26bc2797), TOBN(0x13670d16, 0x99f16771), + TOBN(0x00170052, 0x1e3e48d1), TOBN(0x978fe401, 0xb7adf678), + TOBN(0x55ecfb92, 0xd41c5dd4), TOBN(0x5ff8e247, 0xc7b27da5), + TOBN(0xe7518272, 0x013fb606), TOBN(0x5768d7e5, 0x2f547a3c), + TOBN(0xbb24eaa3, 0x60017a5f), TOBN(0x6b18e6e4, 0x9c64ce9b), + TOBN(0xc225c655, 0x103dde07), TOBN(0xfc3672ae, 0x7592f7ea), + TOBN(0x9606ad77, 0xd06283a1), TOBN(0x542fc650, 0xe4d59d99), + TOBN(0xabb57c49, 0x2a40e7c2), TOBN(0xac948f13, 0xa8db9f55), + TOBN(0x6d4c9682, 0xb04465c3), TOBN(0xe3d062fa, 0x6468bd15), + TOBN(0xa51729ac, 0x5f318d7e), TOBN(0x1fc87df6, 0x9eb6fc95), + TOBN(0x63d146a8, 0x0591f652), TOBN(0xa861b8f7, 0x589621aa), + TOBN(0x59f5f15a, 0xce31348c), TOBN(0x8f663391, 0x440da6da), + TOBN(0xcfa778ac, 0xb591ffa3), TOBN(0x027ca9c5, 0x4cdfebce), + TOBN(0xbe8e05a5, 0x444ea6b3), TOBN(0x8aab4e69, 0xa78d8254), + TOBN(0x2437f04f, 0xb474d6b8), TOBN(0x6597ffd4, 0x045b3855), + TOBN(0xbb0aea4e, 0xca47ecaa), TOBN(0x568aae83, 0x85c7ebfc), + TOBN(0x0e966e64, 0xc73b2383), TOBN(0x49eb3447, 0xd17d8762), + TOBN(0xde107821, 0x8da05dab), TOBN(0x443d8baa, 0x016b7236), + TOBN(0x163b63a5, 0xea7610d6), TOBN(0xe47e4185, 0xce1ca979), + TOBN(0xae648b65, 0x80baa132), TOBN(0xebf53de2, 0x0e0d5b64), + TOBN(0x8d3bfcb4, 0xd3c8c1ca), TOBN(0x0d914ef3, 0x5d04b309), + TOBN(0x55ef6415, 0x3de7d395), TOBN(0xbde1666f, 0x26b850e8), + TOBN(0xdbe1ca6e, 0xd449ab19), TOBN(0x8902b322, 0xe89a2672), + TOBN(0xb1674b7e, 0xdacb7a53), TOBN(0x8e9faf6e, 0xf52523ff), + TOBN(0x6ba535da, 0x9a85788b), TOBN(0xd21f03ae, 0xbd0626d4), + TOBN(0x099f8c47, 0xe873dc64), TOBN(0xcda8564d, 0x018ec97e), + TOBN(0x3e8d7a5c, 0xde92c68c), TOBN(0x78e035a1, 0x73323cc4), + TOBN(0x3ef26275, 0xf880ff7c), TOBN(0xa4ee3dff, 0x273eedaa), + TOBN(0x58823507, 0xaf4e18f8), TOBN(0x967ec9b5, 0x0672f328), + TOBN(0x9ded19d9, 0x559d3186), TOBN(0x5e2ab3de, 0x6cdce39c), + TOBN(0xabad6e4d, 0x11c226df), TOBN(0xf9783f43, 0x87723014), + TOBN(0x9a49a0cf, 0x1a885719), TOBN(0xfc0c1a5a, 0x90da9dbf), + TOBN(0x8bbaec49, 0x571d92ac), TOBN(0x569e85fe, 0x4692517f), + TOBN(0x8333b014, 0xa14ea4af), TOBN(0x32f2a62f, 0x12e5c5ad), + TOBN(0x98c2ce3a, 0x06d89b85), TOBN(0xb90741aa, 0x2ff77a08), + TOBN(0x2530defc, 0x01f795a2), TOBN(0xd6e5ba0b, 0x84b3c199), + TOBN(0x7d8e8451, 0x12e4c936), TOBN(0xae419f7d, 0xbd0be17b), + TOBN(0xa583fc8c, 0x22262bc9), TOBN(0x6b842ac7, 0x91bfe2bd), + TOBN(0x33cef4e9, 0x440d6827), TOBN(0x5f69f4de, 0xef81fb14), + TOBN(0xf16cf6f6, 0x234fbb92), TOBN(0x76ae3fc3, 0xd9e7e158), + TOBN(0x4e89f6c2, 0xe9740b33), TOBN(0x677bc85d, 0x4962d6a1), + TOBN(0x6c6d8a7f, 0x68d10d15), TOBN(0x5f9a7224, 0x0257b1cd), + TOBN(0x7096b916, 0x4ad85961), TOBN(0x5f8c47f7, 0xe657ab4a), + TOBN(0xde57d7d0, 0xf7461d7e), TOBN(0x7eb6094d, 0x80ce5ee2), + TOBN(0x0b1e1dfd, 0x34190547), TOBN(0x8a394f43, 0xf05dd150), + TOBN(0x0a9eb24d, 0x97df44e6), TOBN(0x78ca06bf, 0x87675719), + TOBN(0x6f0b3462, 0x6ffeec22), TOBN(0x9d91bcea, 0x36cdd8fb), + TOBN(0xac83363c, 0xa105be47), TOBN(0x81ba76c1, 0x069710e3), + TOBN(0x3d1b24cb, 0x28c682c6), TOBN(0x27f25228, 0x8612575b), + TOBN(0xb587c779, 0xe8e66e98), TOBN(0x7b0c03e9, 0x405eb1fe), + TOBN(0xfdf0d030, 0x15b548e7), TOBN(0xa8be76e0, 0x38b36af7), + TOBN(0x4cdab04a, 0x4f310c40), TOBN(0x6287223e, 0xf47ecaec), + TOBN(0x678e6055, 0x8b399320), TOBN(0x61fe3fa6, 0xc01e4646), + TOBN(0xc482866b, 0x03261a5e), TOBN(0xdfcf45b8, 0x5c2f244a), + TOBN(0x8fab9a51, 0x2f684b43), TOBN(0xf796c654, 0xc7220a66), + TOBN(0x1d90707e, 0xf5afa58f), TOBN(0x2c421d97, 0x4fdbe0de), + TOBN(0xc4f4cda3, 0xaf2ebc2f), TOBN(0xa0af843d, 0xcb4efe24), + TOBN(0x53b857c1, 0x9ccd10b1), TOBN(0xddc9d1eb, 0x914d3e04), + TOBN(0x7bdec8bb, 0x62771deb), TOBN(0x829277aa, 0x91c5aa81), + TOBN(0x7af18dd6, 0x832391ae), TOBN(0x1740f316, 0xc71a84ca),} + , + {TOBN(0x8928e99a, 0xeeaf8c49), TOBN(0xee7aa73d, 0x6e24d728), + TOBN(0x4c5007c2, 0xe72b156c), TOBN(0x5fcf57c5, 0xed408a1d), + TOBN(0x9f719e39, 0xb6057604), TOBN(0x7d343c01, 0xc2868bbf), + TOBN(0x2cca254b, 0x7e103e2d), TOBN(0xe6eb38a9, 0xf131bea2), + TOBN(0xb33e624f, 0x8be762b4), TOBN(0x2a9ee4d1, 0x058e3413), + TOBN(0x968e6369, 0x67d805fa), TOBN(0x9848949b, 0x7db8bfd7), + TOBN(0x5308d7e5, 0xd23a8417), TOBN(0x892f3b1d, 0xf3e29da5), + TOBN(0xc95c139e, 0x3dee471f), TOBN(0x8631594d, 0xd757e089), + TOBN(0xe0c82a3c, 0xde918dcc), TOBN(0x2e7b5994, 0x26fdcf4b), + TOBN(0x82c50249, 0x32cb1b2d), TOBN(0xea613a9d, 0x7657ae07), + TOBN(0xc2eb5f6c, 0xf1fdc9f7), TOBN(0xb6eae8b8, 0x879fe682), + TOBN(0x253dfee0, 0x591cbc7f), TOBN(0x000da713, 0x3e1290e6), + TOBN(0x1083e2ea, 0x1f095615), TOBN(0x0a28ad77, 0x14e68c33), + TOBN(0x6bfc0252, 0x3d8818be), TOBN(0xb585113a, 0xf35850cd), + TOBN(0x7d935f0b, 0x30df8aa1), TOBN(0xaddda07c, 0x4ab7e3ac), + TOBN(0x92c34299, 0x552f00cb), TOBN(0xc33ed1de, 0x2909df6c), + TOBN(0x22c2195d, 0x80e87766), TOBN(0x9e99e6d8, 0x9ddf4ac0), + TOBN(0x09642e4e, 0x65e74934), TOBN(0x2610ffa2, 0xff1ff241), + TOBN(0x4d1d47d4, 0x751c8159), TOBN(0x697b4985, 0xaf3a9363), + TOBN(0x0318ca46, 0x87477c33), TOBN(0xa90cb565, 0x9441eff3), + TOBN(0x58bb3848, 0x36f024cb), TOBN(0x85be1f77, 0x36016168), + TOBN(0x6c59587c, 0xdc7e07f1), TOBN(0x191be071, 0xaf1d8f02), + TOBN(0xbf169fa5, 0xcca5e55c), TOBN(0x3864ba3c, 0xf7d04eac), + TOBN(0x915e367f, 0x8d7d05db), TOBN(0xb48a876d, 0xa6549e5d), + TOBN(0xef89c656, 0x580e40a2), TOBN(0xf194ed8c, 0x728068bc), + TOBN(0x74528045, 0xa47990c9), TOBN(0xf53fc7d7, 0x5e1a4649), + TOBN(0xbec5ae9b, 0x78593e7d), TOBN(0x2cac4ee3, 0x41db65d7), + TOBN(0xa8c1eb24, 0x04a3d39b), TOBN(0x53b7d634, 0x03f8f3ef), + TOBN(0x2dc40d48, 0x3e07113c), TOBN(0x6e4a5d39, 0x7d8b63ae), + TOBN(0x5582a94b, 0x79684c2b), TOBN(0x932b33d4, 0x622da26c), + TOBN(0xf534f651, 0x0dbbf08d), TOBN(0x211d07c9, 0x64c23a52), + TOBN(0x0eeece0f, 0xee5bdc9b), TOBN(0xdf178168, 0xf7015558), + TOBN(0xd4294635, 0x0a712229), TOBN(0x93cbe448, 0x09273f8c), + TOBN(0x00b095ef, 0x8f13bc83), TOBN(0xbb741972, 0x8798978c), + TOBN(0x9d7309a2, 0x56dbe6e7), TOBN(0xe578ec56, 0x5a5d39ec), + TOBN(0x3961151b, 0x851f9a31), TOBN(0x2da7715d, 0xe5709eb4), + TOBN(0x867f3017, 0x53dfabf0), TOBN(0x728d2078, 0xb8e39259), + TOBN(0x5c75a0cd, 0x815d9958), TOBN(0xf84867a6, 0x16603be1), + TOBN(0xc865b13d, 0x70e35b1c), TOBN(0x02414468, 0x19b03e2c), + TOBN(0xe46041da, 0xac1f3121), TOBN(0x7c9017ad, 0x6f028a7c), + TOBN(0xabc96de9, 0x0a482873), TOBN(0x4265d6b1, 0xb77e54d4), + TOBN(0x68c38e79, 0xa57d88e7), TOBN(0xd461d766, 0x9ce82de3), + TOBN(0x817a9ec5, 0x64a7e489), TOBN(0xcc5675cd, 0xa0def5f2), + TOBN(0x9a00e785, 0x985d494e), TOBN(0xc626833f, 0x1b03514a), + TOBN(0xabe7905a, 0x83cdd60e), TOBN(0x50602fb5, 0xa1170184), + TOBN(0x689886cd, 0xb023642a), TOBN(0xd568d090, 0xa6e1fb00), + TOBN(0x5b1922c7, 0x0259217f), TOBN(0x93831cd9, 0xc43141e4), + TOBN(0xdfca3587, 0x0c95f86e), TOBN(0xdec2057a, 0x568ae828), + TOBN(0xc44ea599, 0xf98a759a), TOBN(0x55a0a7a2, 0xf7c23c1d), + TOBN(0xd5ffb6e6, 0x94c4f687), TOBN(0x3563cce2, 0x12848478), + TOBN(0x812b3517, 0xe7b1fbe1), TOBN(0x8a7dc979, 0x4f7338e0), + TOBN(0x211ecee9, 0x52d048db), TOBN(0x2eea4056, 0xc86ea3b8), + TOBN(0xd8cb68a7, 0xba772b34), TOBN(0xe16ed341, 0x5f4e2541), + TOBN(0x9b32f6a6, 0x0fec14db), TOBN(0xeee376f7, 0x391698be), + TOBN(0xe9a7aa17, 0x83674c02), TOBN(0x65832f97, 0x5843022a), + TOBN(0x29f3a8da, 0x5ba4990f), TOBN(0x79a59c3a, 0xfb8e3216), + TOBN(0x9cdc4d2e, 0xbd19bb16), TOBN(0xc6c7cfd0, 0xb3262d86), + TOBN(0xd4ce14d0, 0x969c0b47), TOBN(0x1fa352b7, 0x13e56128), + TOBN(0x383d55b8, 0x973db6d3), TOBN(0x71836850, 0xe8e5b7bf), + TOBN(0xc7714596, 0xe6bb571f), TOBN(0x259df31f, 0x2d5b2dd2), + TOBN(0x568f8925, 0x913cc16d), TOBN(0x18bc5b6d, 0xe1a26f5a), + TOBN(0xdfa413be, 0xf5f499ae), TOBN(0xf8835dec, 0xc3f0ae84), + TOBN(0xb6e60bd8, 0x65a40ab0), TOBN(0x65596439, 0x194b377e), + TOBN(0xbcd85625, 0x92084a69), TOBN(0x5ce433b9, 0x4f23ede0), + TOBN(0xe8e8f04f, 0x6ad65143), TOBN(0x11511827, 0xd6e14af6), + TOBN(0x3d390a10, 0x8295c0c7), TOBN(0x71e29ee4, 0x621eba16), + TOBN(0xa588fc09, 0x63717b46), TOBN(0x02be02fe, 0xe06ad4a2), + TOBN(0x931558c6, 0x04c22b22), TOBN(0xbb4d4bd6, 0x12f3c849), + TOBN(0x54a4f496, 0x20efd662), TOBN(0x92ba6d20, 0xc5952d14), + TOBN(0x2db8ea1e, 0xcc9784c2), TOBN(0x81cc10ca, 0x4b353644), + TOBN(0x40b570ad, 0x4b4d7f6c), TOBN(0x5c9f1d96, 0x84a1dcd2), + TOBN(0x01379f81, 0x3147e797), TOBN(0xe5c6097b, 0x2bd499f5), + TOBN(0x40dcafa6, 0x328e5e20), TOBN(0xf7b5244a, 0x54815550), + TOBN(0xb9a4f118, 0x47bfc978), TOBN(0x0ea0e79f, 0xd25825b1), + TOBN(0xa50f96eb, 0x646c7ecf), TOBN(0xeb811493, 0x446dea9d), + TOBN(0x2af04677, 0xdfabcf69), TOBN(0xbe3a068f, 0xc713f6e8), + TOBN(0x860d523d, 0x42e06189), TOBN(0xbf077941, 0x4e3aff13), + TOBN(0x0b616dca, 0xc1b20650), TOBN(0xe66dd6d1, 0x2131300d), + TOBN(0xd4a0fd67, 0xff99abde), TOBN(0xc9903550, 0xc7aac50d), + TOBN(0x022ecf8b, 0x7c46b2d7), TOBN(0x3333b1e8, 0x3abf92af), + TOBN(0x11cc113c, 0x6c491c14), TOBN(0x05976688, 0x80dd3f88), + TOBN(0xf5b4d9e7, 0x29d932ed), TOBN(0xe982aad8, 0xa2c38b6d), + TOBN(0x6f925347, 0x8be0dcf0), TOBN(0x700080ae, 0x65ca53f2), + TOBN(0xd8131156, 0x443ca77f), TOBN(0xe92d6942, 0xec51f984), + TOBN(0xd2a08af8, 0x85dfe9ae), TOBN(0xd825d9a5, 0x4d2a86ca), + TOBN(0x2c53988d, 0x39dff020), TOBN(0xf38b135a, 0x430cdc40), + TOBN(0x0c918ae0, 0x62a7150b), TOBN(0xf31fd8de, 0x0c340e9b), + TOBN(0xafa0e7ae, 0x4dbbf02e), TOBN(0x5847fb2a, 0x5eba6239), + TOBN(0x6b1647dc, 0xdccbac8b), TOBN(0xb642aa78, 0x06f485c8), + TOBN(0x873f3765, 0x7038ecdf), TOBN(0x2ce5e865, 0xfa49d3fe), + TOBN(0xea223788, 0xc98c4400), TOBN(0x8104a8cd, 0xf1fa5279), + TOBN(0xbcf7cc7a, 0x06becfd7), TOBN(0x49424316, 0xc8f974ae), + TOBN(0xc0da65e7, 0x84d6365d), TOBN(0xbcb7443f, 0x8f759fb8), + TOBN(0x35c712b1, 0x7ae81930), TOBN(0x80428dff, 0x4c6e08ab), + TOBN(0xf19dafef, 0xa4faf843), TOBN(0xced8538d, 0xffa9855f), + TOBN(0x20ac409c, 0xbe3ac7ce), TOBN(0x358c1fb6, 0x882da71e), + TOBN(0xafa9c0e5, 0xfd349961), TOBN(0x2b2cfa51, 0x8421c2fc), + TOBN(0x2a80db17, 0xf3a28d38), TOBN(0xa8aba539, 0x5d138e7e), + TOBN(0x52012d1d, 0x6e96eb8d), TOBN(0x65d8dea0, 0xcbaf9622), + TOBN(0x57735447, 0xb264f56c), TOBN(0xbeebef3f, 0x1b6c8da2), + TOBN(0xfc346d98, 0xce785254), TOBN(0xd50e8d72, 0xbb64a161), + TOBN(0xc03567c7, 0x49794add), TOBN(0x15a76065, 0x752c7ef6), + TOBN(0x59f3a222, 0x961f23d6), TOBN(0x378e4438, 0x73ecc0b0), + TOBN(0xc74be434, 0x5a82fde4), TOBN(0xae509af2, 0xd8b9cf34), + TOBN(0x4a61ee46, 0x577f44a1), TOBN(0xe09b748c, 0xb611deeb), + TOBN(0xc0481b2c, 0xf5f7b884), TOBN(0x35626678, 0x61acfa6b), + TOBN(0x37f4c518, 0xbf8d21e6), TOBN(0x22d96531, 0xb205a76d), + TOBN(0x37fb85e1, 0x954073c0), TOBN(0xbceafe4f, 0x65b3a567), + TOBN(0xefecdef7, 0xbe42a582), TOBN(0xd3fc6080, 0x65046be6), + TOBN(0xc9af13c8, 0x09e8dba9), TOBN(0x1e6c9847, 0x641491ff), + TOBN(0x3b574925, 0xd30c31f7), TOBN(0xb7eb72ba, 0xac2a2122), + TOBN(0x776a0dac, 0xef0859e7), TOBN(0x06fec314, 0x21900942), + TOBN(0x2464bc10, 0xf8c22049), TOBN(0x9bfbcce7, 0x875ebf69), + TOBN(0xd7a88e2a, 0x4336326b), TOBN(0xda05261c, 0x5bc2acfa), + TOBN(0xc29f5bdc, 0xeba7efc8), TOBN(0x471237ca, 0x25dbbf2e), + TOBN(0xa72773f2, 0x2975f127), TOBN(0xdc744e8e, 0x04d0b326), + TOBN(0x38a7ed16, 0xa56edb73), TOBN(0x64357e37, 0x2c007e70), + TOBN(0xa167d15b, 0x5080b400), TOBN(0x07b41164, 0x23de4be1), + TOBN(0xb2d91e32, 0x74c89883), TOBN(0x3c162821, 0x2882e7ed), + TOBN(0xad6b36ba, 0x7503e482), TOBN(0x48434e8e, 0x0ea34331), + TOBN(0x79f4f24f, 0x2c7ae0b9), TOBN(0xc46fbf81, 0x1939b44a), + TOBN(0x76fefae8, 0x56595eb1), TOBN(0x417b66ab, 0xcd5f29c7), + TOBN(0x5f2332b2, 0xc5ceec20), TOBN(0xd69661ff, 0xe1a1cae2), + TOBN(0x5ede7e52, 0x9b0286e6), TOBN(0x9d062529, 0xe276b993), + TOBN(0x324794b0, 0x7e50122b), TOBN(0xdd744f8b, 0x4af07ca5), + TOBN(0x30a12f08, 0xd63fc97b), TOBN(0x39650f1a, 0x76626d9d), + TOBN(0x101b47f7, 0x1fa38477), TOBN(0x3d815f19, 0xd4dc124f), + TOBN(0x1569ae95, 0xb26eb58a), TOBN(0xc3cde188, 0x95fb1887), + TOBN(0x54e9f37b, 0xf9539a48), TOBN(0xb0100e06, 0x7408c1a5), + TOBN(0x821d9811, 0xea580cbb), TOBN(0x8af52d35, 0x86e50c56), + TOBN(0xdfbd9d47, 0xdbbf698b), TOBN(0x2961a1ea, 0x03dc1c73), + TOBN(0x203d38f8, 0xe76a5df8), TOBN(0x08a53a68, 0x6def707a), + TOBN(0x26eefb48, 0x1bee45d4), TOBN(0xb3cee346, 0x3c688036), + TOBN(0x463c5315, 0xc42f2469), TOBN(0x19d84d2e, 0x81378162), + TOBN(0x22d7c3c5, 0x1c4d349f), TOBN(0x65965844, 0x163d59c5), + TOBN(0xcf198c56, 0xb8abceae), TOBN(0x6fb1fb1b, 0x628559d5), + TOBN(0x8bbffd06, 0x07bf8fe3), TOBN(0x46259c58, 0x3467734b), + TOBN(0xd8953cea, 0x35f7f0d3), TOBN(0x1f0bece2, 0xd65b0ff1), + TOBN(0xf7d5b4b3, 0xf3c72914), TOBN(0x29e8ea95, 0x3cb53389), + TOBN(0x4a365626, 0x836b6d46), TOBN(0xe849f910, 0xea174fde), + TOBN(0x7ec62fbb, 0xf4737f21), TOBN(0xd8dba5ab, 0x6209f5ac), + TOBN(0x24b5d7a9, 0xa5f9adbe), TOBN(0x707d28f7, 0xa61dc768), + TOBN(0x7711460b, 0xcaa999ea), TOBN(0xba7b174d, 0x1c92e4cc), + TOBN(0x3c4bab66, 0x18d4bf2d), TOBN(0xb8f0c980, 0xeb8bd279), + TOBN(0x024bea9a, 0x324b4737), TOBN(0xfba9e423, 0x32a83bca), + TOBN(0x6e635643, 0xa232dced), TOBN(0x99619367, 0x2571c8ba), + TOBN(0xe8c9f357, 0x54b7032b), TOBN(0xf936b3ba, 0x2442d54a), + TOBN(0x2263f0f0, 0x8290c65a), TOBN(0x48989780, 0xee2c7fdb), + TOBN(0xadc5d55a, 0x13d4f95e), TOBN(0x737cff85, 0xad9b8500), + TOBN(0x271c557b, 0x8a73f43d), TOBN(0xbed617a4, 0xe18bc476), + TOBN(0x66245401, 0x7dfd8ab2), TOBN(0xae7b89ae, 0x3a2870aa), + TOBN(0x1b555f53, 0x23a7e545), TOBN(0x6791e247, 0xbe057e4c), + TOBN(0x860136ad, 0x324fa34d), TOBN(0xea111447, 0x4cbeae28), + TOBN(0x023a4270, 0xbedd3299), TOBN(0x3d5c3a7f, 0xc1c35c34), + TOBN(0xb0f6db67, 0x8d0412d2), TOBN(0xd92625e2, 0xfcdc6b9a), + TOBN(0x92ae5ccc, 0x4e28a982), TOBN(0xea251c36, 0x47a3ce7e), + TOBN(0x9d658932, 0x790691bf), TOBN(0xed610589, 0x06b736ae), + TOBN(0x712c2f04, 0xc0d63b6e), TOBN(0x5cf06fd5, 0xc63d488f), + TOBN(0x97363fac, 0xd9588e41), TOBN(0x1f9bf762, 0x2b93257e), + TOBN(0xa9d1ffc4, 0x667acace), TOBN(0x1cf4a1aa, 0x0a061ecf), + TOBN(0x40e48a49, 0xdc1818d0), TOBN(0x0643ff39, 0xa3621ab0), + TOBN(0x5768640c, 0xe39ef639), TOBN(0x1fc099ea, 0x04d86854), + TOBN(0x9130b9c3, 0xeccd28fd), TOBN(0xd743cbd2, 0x7eec54ab), + TOBN(0x052b146f, 0xe5b475b6), TOBN(0x058d9a82, 0x900a7d1f), + TOBN(0x65e02292, 0x91262b72), TOBN(0x96f924f9, 0xbb0edf03), + TOBN(0x5cfa59c8, 0xfe206842), TOBN(0xf6037004, 0x5eafa720), + TOBN(0x5f30699e, 0x18d7dd96), TOBN(0x381e8782, 0xcbab2495), + TOBN(0x91669b46, 0xdd8be949), TOBN(0xb40606f5, 0x26aae8ef), + TOBN(0x2812b839, 0xfc6751a4), TOBN(0x16196214, 0xfba800ef), + TOBN(0x4398d5ca, 0x4c1a2875), TOBN(0x720c00ee, 0x653d8349), + TOBN(0xc2699eb0, 0xd820007c), TOBN(0x880ee660, 0xa39b5825), + TOBN(0x70694694, 0x471f6984), TOBN(0xf7d16ea8, 0xe3dda99a), + TOBN(0x28d675b2, 0xc0519a23), TOBN(0x9ebf94fe, 0x4f6952e3), + TOBN(0xf28bb767, 0xa2294a8a), TOBN(0x85512b4d, 0xfe0af3f5), + TOBN(0x18958ba8, 0x99b16a0d), TOBN(0x95c2430c, 0xba7548a7), + TOBN(0xb30d1b10, 0xa16be615), TOBN(0xe3ebbb97, 0x85bfb74c), + TOBN(0xa3273cfe, 0x18549fdb), TOBN(0xf6e200bf, 0x4fcdb792), + TOBN(0x54a76e18, 0x83aba56c), TOBN(0x73ec66f6, 0x89ef6aa2), + TOBN(0x8d17add7, 0xd1b9a305), TOBN(0xa959c5b9, 0xb7ae1b9d), + TOBN(0x88643522, 0x6bcc094a), TOBN(0xcc5616c4, 0xd7d429b9), + TOBN(0xa6dada01, 0xe6a33f7c), TOBN(0xc6217a07, 0x9d4e70ad), + TOBN(0xd619a818, 0x09c15b7c), TOBN(0xea06b329, 0x0e80c854), + TOBN(0x174811ce, 0xa5f5e7b9), TOBN(0x66dfc310, 0x787c65f4), + TOBN(0x4ea7bd69, 0x3316ab54), TOBN(0xc12c4acb, 0x1dcc0f70), + TOBN(0xe4308d1a, 0x1e407dd9), TOBN(0xe8a3587c, 0x91afa997), + TOBN(0xea296c12, 0xab77b7a5), TOBN(0xb5ad49e4, 0x673c0d52), + TOBN(0x40f9b2b2, 0x7006085a), TOBN(0xa88ff340, 0x87bf6ec2), + TOBN(0x978603b1, 0x4e3066a6), TOBN(0xb3f99fc2, 0xb5e486e2), + TOBN(0x07b53f5e, 0xb2e63645), TOBN(0xbe57e547, 0x84c84232), + TOBN(0xd779c216, 0x7214d5cf), TOBN(0x617969cd, 0x029a3aca), + TOBN(0xd17668cd, 0x8a7017a0), TOBN(0x77b4d19a, 0xbe9b7ee8), + TOBN(0x58fd0e93, 0x9c161776), TOBN(0xa8c4f4ef, 0xd5968a72), + TOBN(0x296071cc, 0x67b3de77), TOBN(0xae3c0b8e, 0x634f7905), + TOBN(0x67e440c2, 0x8a7100c9), TOBN(0xbb8c3c1b, 0xeb4b9b42), + TOBN(0x6d71e8ea, 0xc51b3583), TOBN(0x7591f5af, 0x9525e642), + TOBN(0xf73a2f7b, 0x13f509f3), TOBN(0x618487aa, 0x5619ac9b), + TOBN(0x3a72e5f7, 0x9d61718a), TOBN(0x00413bcc, 0x7592d28c), + TOBN(0x7d9b11d3, 0x963c35cf), TOBN(0x77623bcf, 0xb90a46ed), + TOBN(0xdeef273b, 0xdcdd2a50), TOBN(0x4a741f9b, 0x0601846e), + TOBN(0x33b89e51, 0x0ec6e929), TOBN(0xcb02319f, 0x8b7f22cd), + TOBN(0xbbe1500d, 0x084bae24), TOBN(0x2f0ae8d7, 0x343d2693), + TOBN(0xacffb5f2, 0x7cdef811), TOBN(0xaa0c030a, 0x263fb94f), + TOBN(0x6eef0d61, 0xa0f442de), TOBN(0xf92e1817, 0x27b139d3), + TOBN(0x1ae6deb7, 0x0ad8bc28), TOBN(0xa89e38dc, 0xc0514130), + TOBN(0x81eeb865, 0xd2fdca23), TOBN(0x5a15ee08, 0xcc8ef895), + TOBN(0x768fa10a, 0x01905614), TOBN(0xeff5b8ef, 0x880ee19b), + TOBN(0xf0c0cabb, 0xcb1c8a0e), TOBN(0x2e1ee9cd, 0xb8c838f9), + TOBN(0x0587d8b8, 0x8a4a14c0), TOBN(0xf6f27896, 0x2ff698e5), + TOBN(0xed38ef1c, 0x89ee6256), TOBN(0xf44ee1fe, 0x6b353b45), + TOBN(0x9115c0c7, 0x70e903b3), TOBN(0xc78ec0a1, 0x818f31df), + TOBN(0x6c003324, 0xb7dccbc6), TOBN(0xd96dd1f3, 0x163bbc25), + TOBN(0x33aa82dd, 0x5cedd805), TOBN(0x123aae4f, 0x7f7eb2f1), + TOBN(0x1723fcf5, 0xa26262cd), TOBN(0x1f7f4d5d, 0x0060ebd5), + TOBN(0xf19c5c01, 0xb2eaa3af), TOBN(0x2ccb9b14, 0x9790accf), + TOBN(0x1f9c1cad, 0x52324aa6), TOBN(0x63200526, 0x7247df54), + TOBN(0x5732fe42, 0xbac96f82), TOBN(0x52fe771f, 0x01a1c384), + TOBN(0x546ca13d, 0xb1001684), TOBN(0xb56b4eee, 0xa1709f75), + TOBN(0x266545a9, 0xd5db8672), TOBN(0xed971c90, 0x1e8f3cfb), + TOBN(0x4e7d8691, 0xe3a07b29), TOBN(0x7570d9ec, 0xe4b696b9), + TOBN(0xdc5fa067, 0x7bc7e9ae), TOBN(0x68b44caf, 0xc82c4844), + TOBN(0x519d34b3, 0xbf44da80), TOBN(0x283834f9, 0x5ab32e66), + TOBN(0x6e608797, 0x6278a000), TOBN(0x1e62960e, 0x627312f6), + TOBN(0x9b87b27b, 0xe6901c55), TOBN(0x80e78538, 0x24fdbc1f), + TOBN(0xbbbc0951, 0x2facc27d), TOBN(0x06394239, 0xac143b5a), + TOBN(0x35bb4a40, 0x376c1944), TOBN(0x7cb62694, 0x63da1511), + TOBN(0xafd29161, 0xb7148a3b), TOBN(0xa6f9d9ed, 0x4e2ea2ee), + TOBN(0x15dc2ca2, 0x880dd212), TOBN(0x903c3813, 0xa61139a9), + TOBN(0x2aa7b46d, 0x6c0f8785), TOBN(0x36ce2871, 0x901c60ff), + TOBN(0xc683b028, 0xe10d9c12), TOBN(0x7573baa2, 0x032f33d3), + TOBN(0x87a9b1f6, 0x67a31b58), TOBN(0xfd3ed11a, 0xf4ffae12), + TOBN(0x83dcaa9a, 0x0cb2748e), TOBN(0x8239f018, 0x5d6fdf16), + TOBN(0xba67b49c, 0x72753941), TOBN(0x2beec455, 0xc321cb36), + TOBN(0x88015606, 0x3f8b84ce), TOBN(0x76417083, 0x8d38c86f), + TOBN(0x054f1ca7, 0x598953dd), TOBN(0xc939e110, 0x4e8e7429), + TOBN(0x9b1ac2b3, 0x5a914f2f), TOBN(0x39e35ed3, 0xe74b8f9c), + TOBN(0xd0debdb2, 0x781b2fb0), TOBN(0x1585638f, 0x2d997ba2), + TOBN(0x9c4b646e, 0x9e2fce99), TOBN(0x68a21081, 0x1e80857f), + TOBN(0x06d54e44, 0x3643b52a), TOBN(0xde8d6d63, 0x0d8eb843), + TOBN(0x70321563, 0x42146a0a), TOBN(0x8ba826f2, 0x5eaa3622), + TOBN(0x227a58bd, 0x86138787), TOBN(0x43b6c03c, 0x10281d37), + TOBN(0x6326afbb, 0xb54dde39), TOBN(0x744e5e8a, 0xdb6f2d5f), + TOBN(0x48b2a99a, 0xcff158e1), TOBN(0xa93c8fa0, 0xef87918f), + TOBN(0x2182f956, 0xde058c5c), TOBN(0x216235d2, 0x936f9e7a), + TOBN(0xace0c0db, 0xd2e31e67), TOBN(0xc96449bf, 0xf23ac3e7), + TOBN(0x7e9a2874, 0x170693bd), TOBN(0xa28e14fd, 0xa45e6335), + TOBN(0x5757f6b3, 0x56427344), TOBN(0x822e4556, 0xacf8edf9), + TOBN(0x2b7a6ee2, 0xe6a285cd), TOBN(0x5866f211, 0xa9df3af0), + TOBN(0x40dde2dd, 0xf845b844), TOBN(0x986c3726, 0x110e5e49), + TOBN(0x73680c2a, 0xf7172277), TOBN(0x57b94f0f, 0x0cccb244), + TOBN(0xbdff7267, 0x2d438ca7), TOBN(0xbad1ce11, 0xcf4663fd), + TOBN(0x9813ed9d, 0xd8f71cae), TOBN(0xf43272a6, 0x961fdaa6), + TOBN(0xbeff0119, 0xbd6d1637), TOBN(0xfebc4f91, 0x30361978), + TOBN(0x02b37a95, 0x2f41deff), TOBN(0x0e44a59a, 0xe63b89b7), + TOBN(0x673257dc, 0x143ff951), TOBN(0x19c02205, 0xd752baf4), + TOBN(0x46c23069, 0xc4b7d692), TOBN(0x2e6392c3, 0xfd1502ac), + TOBN(0x6057b1a2, 0x1b220846), TOBN(0xe51ff946, 0x0c1b5b63),} + , + {TOBN(0x6e85cb51, 0x566c5c43), TOBN(0xcff9c919, 0x3597f046), + TOBN(0x9354e90c, 0x4994d94a), TOBN(0xe0a39332, 0x2147927d), + TOBN(0x8427fac1, 0x0dc1eb2b), TOBN(0x88cfd8c2, 0x2ff319fa), + TOBN(0xe2d4e684, 0x01965274), TOBN(0xfa2e067d, 0x67aaa746), + TOBN(0xb6d92a7f, 0x3e5f9f11), TOBN(0x9afe153a, 0xd6cb3b8e), + TOBN(0x4d1a6dd7, 0xddf800bd), TOBN(0xf6c13cc0, 0xcaf17e19), + TOBN(0x15f6c58e, 0x325fc3ee), TOBN(0x71095400, 0xa31dc3b2), + TOBN(0x168e7c07, 0xafa3d3e7), TOBN(0x3f8417a1, 0x94c7ae2d), + TOBN(0xec234772, 0x813b230d), TOBN(0x634d0f5f, 0x17344427), + TOBN(0x11548ab1, 0xd77fc56a), TOBN(0x7fab1750, 0xce06af77), + TOBN(0xb62c10a7, 0x4f7c4f83), TOBN(0xa7d2edc4, 0x220a67d9), + TOBN(0x1c404170, 0x921209a0), TOBN(0x0b9815a0, 0xface59f0), + TOBN(0x2842589b, 0x319540c3), TOBN(0x18490f59, 0xa283d6f8), + TOBN(0xa2731f84, 0xdaae9fcb), TOBN(0x3db6d960, 0xc3683ba0), + TOBN(0xc85c63bb, 0x14611069), TOBN(0xb19436af, 0x0788bf05), + TOBN(0x905459df, 0x347460d2), TOBN(0x73f6e094, 0xe11a7db1), + TOBN(0xdc7f938e, 0xb6357f37), TOBN(0xc5d00f79, 0x2bd8aa62), + TOBN(0xc878dcb9, 0x2ca979fc), TOBN(0x37e83ed9, 0xeb023a99), + TOBN(0x6b23e273, 0x1560bf3d), TOBN(0x1086e459, 0x1d0fae61), + TOBN(0x78248316, 0x9a9414bd), TOBN(0x1b956bc0, 0xf0ea9ea1), + TOBN(0x7b85bb91, 0xc31b9c38), TOBN(0x0c5aa90b, 0x48ef57b5), + TOBN(0xdedeb169, 0xaf3bab6f), TOBN(0xe610ad73, 0x2d373685), + TOBN(0xf13870df, 0x02ba8e15), TOBN(0x0337edb6, 0x8ca7f771), + TOBN(0xe4acf747, 0xb62c036c), TOBN(0xd921d576, 0xb6b94e81), + TOBN(0xdbc86439, 0x2c422f7a), TOBN(0xfb635362, 0xed348898), + TOBN(0x83084668, 0xc45bfcd1), TOBN(0xc357c9e3, 0x2b315e11), + TOBN(0xb173b540, 0x5b2e5b8c), TOBN(0x7e946931, 0xe102b9a4), + TOBN(0x17c890eb, 0x7b0fb199), TOBN(0xec225a83, 0xd61b662b), + TOBN(0xf306a3c8, 0xee3c76cb), TOBN(0x3cf11623, 0xd32a1f6e), + TOBN(0xe6d5ab64, 0x6863e956), TOBN(0x3b8a4cbe, 0x5c005c26), + TOBN(0xdcd529a5, 0x9ce6bb27), TOBN(0xc4afaa52, 0x04d4b16f), + TOBN(0xb0624a26, 0x7923798d), TOBN(0x85e56df6, 0x6b307fab), + TOBN(0x0281893c, 0x2bf29698), TOBN(0x91fc19a4, 0xd7ce7603), + TOBN(0x75a5dca3, 0xad9a558f), TOBN(0x40ceb3fa, 0x4d50bf77), + TOBN(0x1baf6060, 0xbc9ba369), TOBN(0x927e1037, 0x597888c2), + TOBN(0xd936bf19, 0x86a34c07), TOBN(0xd4cf10c1, 0xc34ae980), + TOBN(0x3a3e5334, 0x859dd614), TOBN(0x9c475b5b, 0x18d0c8ee), + TOBN(0x63080d1f, 0x07cd51d5), TOBN(0xc9c0d0a6, 0xb88b4326), + TOBN(0x1ac98691, 0xc234296f), TOBN(0x2a0a83a4, 0x94887fb6), + TOBN(0x56511427, 0x0cea9cf2), TOBN(0x5230a6e8, 0xa24802f5), + TOBN(0xf7a2bf0f, 0x72e3d5c1), TOBN(0x37717446, 0x4f21439e), + TOBN(0xfedcbf25, 0x9ce30334), TOBN(0xe0030a78, 0x7ce202f9), + TOBN(0x6f2d9ebf, 0x1202e9ca), TOBN(0xe79dde6c, 0x75e6e591), + TOBN(0xf52072af, 0xf1dac4f8), TOBN(0x6c8d087e, 0xbb9b404d), + TOBN(0xad0fc73d, 0xbce913af), TOBN(0x909e587b, 0x458a07cb), + TOBN(0x1300da84, 0xd4f00c8a), TOBN(0x425cd048, 0xb54466ac), + TOBN(0xb59cb9be, 0x90e9d8bf), TOBN(0x991616db, 0x3e431b0e), + TOBN(0xd3aa117a, 0x531aecff), TOBN(0x91af92d3, 0x59f4dc3b), + TOBN(0x9b1ec292, 0xe93fda29), TOBN(0x76bb6c17, 0xe97d91bc), + TOBN(0x7509d95f, 0xaface1e6), TOBN(0x3653fe47, 0xbe855ae3), + TOBN(0x73180b28, 0x0f680e75), TOBN(0x75eefd1b, 0xeeb6c26c), + TOBN(0xa4cdf29f, 0xb66d4236), TOBN(0x2d70a997, 0x6b5821d8), + TOBN(0x7a3ee207, 0x20445c36), TOBN(0x71d1ac82, 0x59877174), + TOBN(0x0fc539f7, 0x949f73e9), TOBN(0xd05cf3d7, 0x982e3081), + TOBN(0x8758e20b, 0x7b1c7129), TOBN(0xffadcc20, 0x569e61f2), + TOBN(0xb05d3a2f, 0x59544c2d), TOBN(0xbe16f5c1, 0x9fff5e53), + TOBN(0x73cf65b8, 0xaad58135), TOBN(0x622c2119, 0x037aa5be), + TOBN(0x79373b3f, 0x646fd6a0), TOBN(0x0e029db5, 0x0d3978cf), + TOBN(0x8bdfc437, 0x94fba037), TOBN(0xaefbd687, 0x620797a6), + TOBN(0x3fa5382b, 0xbd30d38e), TOBN(0x7627cfbf, 0x585d7464), + TOBN(0xb2330fef, 0x4e4ca463), TOBN(0xbcef7287, 0x3566cc63), + TOBN(0xd161d2ca, 0xcf780900), TOBN(0x135dc539, 0x5b54827d), + TOBN(0x638f052e, 0x27bf1bc6), TOBN(0x10a224f0, 0x07dfa06c), + TOBN(0xe973586d, 0x6d3321da), TOBN(0x8b0c5738, 0x26152c8f), + TOBN(0x07ef4f2a, 0x34606074), TOBN(0x80fe7fe8, 0xa0f7047a), + TOBN(0x3d1a8152, 0xe1a0e306), TOBN(0x32cf43d8, 0x88da5222), + TOBN(0xbf89a95f, 0x5f02ffe6), TOBN(0x3d9eb9a4, 0x806ad3ea), + TOBN(0x012c17bb, 0x79c8e55e), TOBN(0xfdcd1a74, 0x99c81dac), + TOBN(0x7043178b, 0xb9556098), TOBN(0x4090a1df, 0x801c3886), + TOBN(0x759800ff, 0x9b67b912), TOBN(0x3e5c0304, 0x232620c8), + TOBN(0x4b9d3c4b, 0x70dceeca), TOBN(0xbb2d3c15, 0x181f648e), + TOBN(0xf981d837, 0x6e33345c), TOBN(0xb626289b, 0x0cf2297a), + TOBN(0x766ac659, 0x8baebdcf), TOBN(0x1a28ae09, 0x75df01e5), + TOBN(0xb71283da, 0x375876d8), TOBN(0x4865a96d, 0x607b9800), + TOBN(0x25dd1bcd, 0x237936b2), TOBN(0x332f4f4b, 0x60417494), + TOBN(0xd0923d68, 0x370a2147), TOBN(0x497f5dfb, 0xdc842203), + TOBN(0x9dc74cbd, 0x32be5e0f), TOBN(0x7475bcb7, 0x17a01375), + TOBN(0x438477c9, 0x50d872b1), TOBN(0xcec67879, 0xffe1d63d), + TOBN(0x9b006014, 0xd8578c70), TOBN(0xc9ad99a8, 0x78bb6b8b), + TOBN(0x6799008e, 0x11fb3806), TOBN(0xcfe81435, 0xcd44cab3), + TOBN(0xa2ee1582, 0x2f4fb344), TOBN(0xb8823450, 0x483fa6eb), + TOBN(0x622d323d, 0x652c7749), TOBN(0xd8474a98, 0xbeb0a15b), + TOBN(0xe43c154d, 0x5d1c00d0), TOBN(0x7fd581d9, 0x0e3e7aac), + TOBN(0x2b44c619, 0x2525ddf8), TOBN(0x67a033eb, 0xb8ae9739), + TOBN(0x113ffec1, 0x9ef2d2e4), TOBN(0x1bf6767e, 0xd5a0ea7f), + TOBN(0x57fff75e, 0x03714c0a), TOBN(0xa23c422e, 0x0a23e9ee), + TOBN(0xdd5f6b2d, 0x540f83af), TOBN(0xc2c2c27e, 0x55ea46a7), + TOBN(0xeb6b4246, 0x672a1208), TOBN(0xd13599f7, 0xae634f7a), + TOBN(0xcf914b5c, 0xd7b32c6e), TOBN(0x61a5a640, 0xeaf61814), + TOBN(0x8dc3df8b, 0x208a1bbb), TOBN(0xef627fd6, 0xb6d79aa5), + TOBN(0x44232ffc, 0xc4c86bc8), TOBN(0xe6f9231b, 0x061539fe), + TOBN(0x1d04f25a, 0x958b9533), TOBN(0x180cf934, 0x49e8c885), + TOBN(0x89689595, 0x9884aaf7), TOBN(0xb1959be3, 0x07b348a6), + TOBN(0x96250e57, 0x3c147c87), TOBN(0xae0efb3a, 0xdd0c61f8), + TOBN(0xed00745e, 0xca8c325e), TOBN(0x3c911696, 0xecff3f70), + TOBN(0x73acbc65, 0x319ad41d), TOBN(0x7b01a020, 0xf0b1c7ef), + TOBN(0xea32b293, 0x63a1483f), TOBN(0x89eabe71, 0x7a248f96), + TOBN(0x9c6231d3, 0x343157e5), TOBN(0x93a375e5, 0xdf3c546d), + TOBN(0xe76e9343, 0x6a2afe69), TOBN(0xc4f89100, 0xe166c88e), + TOBN(0x248efd0d, 0x4f872093), TOBN(0xae0eb3ea, 0x8fe0ea61), + TOBN(0xaf89790d, 0x9d79046e), TOBN(0x4d650f2d, 0x6cee0976), + TOBN(0xa3935d9a, 0x43071eca), TOBN(0x66fcd2c9, 0x283b0bfe), + TOBN(0x0e665eb5, 0x696605f1), TOBN(0xe77e5d07, 0xa54cd38d), + TOBN(0x90ee050a, 0x43d950cf), TOBN(0x86ddebda, 0xd32e69b5), + TOBN(0x6ad94a3d, 0xfddf7415), TOBN(0xf7fa1309, 0x3f6e8d5a), + TOBN(0xc4831d1d, 0xe9957f75), TOBN(0x7de28501, 0xd5817447), + TOBN(0x6f1d7078, 0x9e2aeb6b), TOBN(0xba2b9ff4, 0xf67a53c2), + TOBN(0x36963767, 0xdf9defc3), TOBN(0x479deed3, 0x0d38022c), + TOBN(0xd2edb89b, 0x3a8631e8), TOBN(0x8de855de, 0x7a213746), + TOBN(0xb2056cb7, 0xb00c5f11), TOBN(0xdeaefbd0, 0x2c9b85e4), + TOBN(0x03f39a8d, 0xd150892d), TOBN(0x37b84686, 0x218b7985), + TOBN(0x36296dd8, 0xb7375f1a), TOBN(0x472cd4b1, 0xb78e898e), + TOBN(0x15dff651, 0xe9f05de9), TOBN(0xd4045069, 0x2ce98ba9), + TOBN(0x8466a7ae, 0x9b38024c), TOBN(0xb910e700, 0xe5a6b5ef), + TOBN(0xae1c56ea, 0xb3aa8f0d), TOBN(0xbab2a507, 0x7eee74a6), + TOBN(0x0dca11e2, 0x4b4c4620), TOBN(0xfd896e2e, 0x4c47d1f4), + TOBN(0xeb45ae53, 0x308fbd93), TOBN(0x46cd5a2e, 0x02c36fda), + TOBN(0x6a3d4e90, 0xbaa48385), TOBN(0xdd55e62e, 0x9dbe9960), + TOBN(0xa1406aa0, 0x2a81ede7), TOBN(0x6860dd14, 0xf9274ea7), + TOBN(0xcfdcb0c2, 0x80414f86), TOBN(0xff410b10, 0x22f94327), + TOBN(0x5a33cc38, 0x49ad467b), TOBN(0xefb48b6c, 0x0a7335f1), + TOBN(0x14fb54a4, 0xb153a360), TOBN(0x604aa9d2, 0xb52469cc), + TOBN(0x5e9dc486, 0x754e48e9), TOBN(0x693cb455, 0x37471e8e), + TOBN(0xfb2fd7cd, 0x8d3b37b6), TOBN(0x63345e16, 0xcf09ff07), + TOBN(0x9910ba6b, 0x23a5d896), TOBN(0x1fe19e35, 0x7fe4364e), + TOBN(0x6e1da8c3, 0x9a33c677), TOBN(0x15b4488b, 0x29fd9fd0), + TOBN(0x1f439254, 0x1a1f22bf), TOBN(0x920a8a70, 0xab8163e8), + TOBN(0x3fd1b249, 0x07e5658e), TOBN(0xf2c4f79c, 0xb6ec839b), + TOBN(0x1abbc3d0, 0x4aa38d1b), TOBN(0x3b0db35c, 0xb5d9510e), + TOBN(0x1754ac78, 0x3e60dec0), TOBN(0x53272fd7, 0xea099b33), + TOBN(0x5fb0494f, 0x07a8e107), TOBN(0x4a89e137, 0x6a8191fa), + TOBN(0xa113b7f6, 0x3c4ad544), TOBN(0x88a2e909, 0x6cb9897b), + TOBN(0x17d55de3, 0xb44a3f84), TOBN(0xacb2f344, 0x17c6c690), + TOBN(0x32088168, 0x10232390), TOBN(0xf2e8a61f, 0x6c733bf7), + TOBN(0xa774aab6, 0x9c2d7652), TOBN(0xfb5307e3, 0xed95c5bc), + TOBN(0xa05c73c2, 0x4981f110), TOBN(0x1baae31c, 0xa39458c9), + TOBN(0x1def185b, 0xcbea62e7), TOBN(0xe8ac9eae, 0xeaf63059), + TOBN(0x098a8cfd, 0x9921851c), TOBN(0xd959c3f1, 0x3abe2f5b), + TOBN(0xa4f19525, 0x20e40ae5), TOBN(0x320789e3, 0x07a24aa1), + TOBN(0x259e6927, 0x7392b2bc), TOBN(0x58f6c667, 0x1918668b), + TOBN(0xce1db2bb, 0xc55d2d8b), TOBN(0x41d58bb7, 0xf4f6ca56), + TOBN(0x7650b680, 0x8f877614), TOBN(0x905e16ba, 0xf4c349ed), + TOBN(0xed415140, 0xf661acac), TOBN(0x3b8784f0, 0xcb2270af), + TOBN(0x3bc280ac, 0x8a402cba), TOBN(0xd53f7146, 0x0937921a), + TOBN(0xc03c8ee5, 0xe5681e83), TOBN(0x62126105, 0xf6ac9e4a), + TOBN(0x9503a53f, 0x936b1a38), TOBN(0x3d45e2d4, 0x782fecbd), + TOBN(0x69a5c439, 0x76e8ae98), TOBN(0xb53b2eeb, 0xbfb4b00e), + TOBN(0xf1674712, 0x72386c89), TOBN(0x30ca34a2, 0x4268bce4), + TOBN(0x7f1ed86c, 0x78341730), TOBN(0x8ef5beb8, 0xb525e248), + TOBN(0xbbc489fd, 0xb74fbf38), TOBN(0x38a92a0e, 0x91a0b382), + TOBN(0x7a77ba3f, 0x22433ccf), TOBN(0xde8362d6, 0xa29f05a9), + TOBN(0x7f6a30ea, 0x61189afc), TOBN(0x693b5505, 0x59ef114f), + TOBN(0x50266bc0, 0xcd1797a1), TOBN(0xea17b47e, 0xf4b7af2d), + TOBN(0xd6c4025c, 0x3df9483e), TOBN(0x8cbb9d9f, 0xa37b18c9), + TOBN(0x91cbfd9c, 0x4d8424cf), TOBN(0xdb7048f1, 0xab1c3506), + TOBN(0x9eaf641f, 0x028206a3), TOBN(0xf986f3f9, 0x25bdf6ce), + TOBN(0x262143b5, 0x224c08dc), TOBN(0x2bbb09b4, 0x81b50c91), + TOBN(0xc16ed709, 0xaca8c84f), TOBN(0xa6210d9d, 0xb2850ca8), + TOBN(0x6d8df67a, 0x09cb54d6), TOBN(0x91eef6e0, 0x500919a4), + TOBN(0x90f61381, 0x0f132857), TOBN(0x9acede47, 0xf8d5028b), + TOBN(0x844d1b71, 0x90b771c3), TOBN(0x563b71e4, 0xba6426be), + TOBN(0x2efa2e83, 0xbdb802ff), TOBN(0x3410cbab, 0xab5b4a41), + TOBN(0x555b2d26, 0x30da84dd), TOBN(0xd0711ae9, 0xee1cc29a), + TOBN(0xcf3e8c60, 0x2f547792), TOBN(0x03d7d5de, 0xdc678b35), + TOBN(0x071a2fa8, 0xced806b8), TOBN(0x222e6134, 0x697f1478), + TOBN(0xdc16fd5d, 0xabfcdbbf), TOBN(0x44912ebf, 0x121b53b8), + TOBN(0xac943674, 0x2496c27c), TOBN(0x8ea3176c, 0x1ffc26b0), + TOBN(0xb6e224ac, 0x13debf2c), TOBN(0x524cc235, 0xf372a832), + TOBN(0xd706e1d8, 0x9f6f1b18), TOBN(0x2552f005, 0x44cce35b), + TOBN(0x8c8326c2, 0xa88e31fc), TOBN(0xb5468b2c, 0xf9552047), + TOBN(0xce683e88, 0x3ff90f2b), TOBN(0x77947bdf, 0x2f0a5423), + TOBN(0xd0a1b28b, 0xed56e328), TOBN(0xaee35253, 0xc20134ac), + TOBN(0x7e98367d, 0x3567962f), TOBN(0x379ed61f, 0x8188bffb), + TOBN(0x73bba348, 0xfaf130a1), TOBN(0x6c1f75e1, 0x904ed734), + TOBN(0x18956642, 0x3b4a79fc), TOBN(0xf20bc83d, 0x54ef4493), + TOBN(0x836d425d, 0x9111eca1), TOBN(0xe5b5c318, 0x009a8dcf), + TOBN(0x3360b25d, 0x13221bc5), TOBN(0x707baad2, 0x6b3eeaf7), + TOBN(0xd7279ed8, 0x743a95a1), TOBN(0x7450a875, 0x969e809f), + TOBN(0x32b6bd53, 0xe5d0338f), TOBN(0x1e77f7af, 0x2b883bbc), + TOBN(0x90da12cc, 0x1063ecd0), TOBN(0xe2697b58, 0xc315be47), + TOBN(0x2771a5bd, 0xda85d534), TOBN(0x53e78c1f, 0xff980eea), + TOBN(0xadf1cf84, 0x900385e7), TOBN(0x7d3b14f6, 0xc9387b62), + TOBN(0x170e74b0, 0xcb8f2bd2), TOBN(0x2d50b486, 0x827fa993), + TOBN(0xcdbe8c9a, 0xf6f32bab), TOBN(0x55e906b0, 0xc3b93ab8), + TOBN(0x747f22fc, 0x8fe280d1), TOBN(0xcd8e0de5, 0xb2e114ab), + TOBN(0x5ab7dbeb, 0xe10b68b0), TOBN(0x9dc63a9c, 0xa480d4b2), + TOBN(0x78d4bc3b, 0x4be1495f), TOBN(0x25eb3db8, 0x9359122d), + TOBN(0x3f8ac05b, 0x0809cbdc), TOBN(0xbf4187bb, 0xd37c702f), + TOBN(0x84cea069, 0x1416a6a5), TOBN(0x8f860c79, 0x43ef881c), + TOBN(0x41311f8a, 0x38038a5d), TOBN(0xe78c2ec0, 0xfc612067), + TOBN(0x494d2e81, 0x5ad73581), TOBN(0xb4cc9e00, 0x59604097), + TOBN(0xff558aec, 0xf3612cba), TOBN(0x35beef7a, 0x9e36c39e), + TOBN(0x1845c7cf, 0xdbcf41b9), TOBN(0x5703662a, 0xaea997c0), + TOBN(0x8b925afe, 0xe402f6d8), TOBN(0xd0a1b1ae, 0x4dd72162), + TOBN(0x9f47b375, 0x03c41c4b), TOBN(0xa023829b, 0x0391d042), + TOBN(0x5f5045c3, 0x503b8b0a), TOBN(0x123c2688, 0x98c010e5), + TOBN(0x324ec0cc, 0x36ba06ee), TOBN(0xface3115, 0x3dd2cc0c), + TOBN(0xb364f3be, 0xf333e91f), TOBN(0xef8aff73, 0x28e832b0), + TOBN(0x1e9bad04, 0x2d05841b), TOBN(0x42f0e3df, 0x356a21e2), + TOBN(0xa3270bcb, 0x4add627e), TOBN(0xb09a8158, 0xd322e711), + TOBN(0x86e326a1, 0x0fee104a), TOBN(0xad7788f8, 0x3703f65d), + TOBN(0x7e765430, 0x47bc4833), TOBN(0x6cee582b, 0x2b9b893a), + TOBN(0x9cd2a167, 0xe8f55a7b), TOBN(0xefbee3c6, 0xd9e4190d), + TOBN(0x33ee7185, 0xd40c2e9d), TOBN(0x844cc9c5, 0xa380b548), + TOBN(0x323f8ecd, 0x66926e04), TOBN(0x0001e38f, 0x8110c1ba), + TOBN(0x8dbcac12, 0xfc6a7f07), TOBN(0xd65e1d58, 0x0cec0827), + TOBN(0xd2cd4141, 0xbe76ca2d), TOBN(0x7895cf5c, 0xe892f33a), + TOBN(0x956d230d, 0x367139d2), TOBN(0xa91abd3e, 0xd012c4c1), + TOBN(0x34fa4883, 0x87eb36bf), TOBN(0xc5f07102, 0x914b8fb4), + TOBN(0x90f0e579, 0xadb9c95f), TOBN(0xfe6ea8cb, 0x28888195), + TOBN(0x7b9b5065, 0xedfa9284), TOBN(0x6c510bd2, 0x2b8c8d65), + TOBN(0xd7b8ebef, 0xcbe8aafd), TOBN(0xedb3af98, 0x96b1da07), + TOBN(0x28ff779d, 0x6295d426), TOBN(0x0c4f6ac7, 0x3fa3ad7b), + TOBN(0xec44d054, 0x8b8e2604), TOBN(0x9b32a66d, 0x8b0050e1), + TOBN(0x1f943366, 0xf0476ce2), TOBN(0x7554d953, 0xa602c7b4), + TOBN(0xbe35aca6, 0x524f2809), TOBN(0xb6881229, 0xfd4edbea), + TOBN(0xe8cd0c8f, 0x508efb63), TOBN(0x9eb5b5c8, 0x6abcefc7), + TOBN(0xf5621f5f, 0xb441ab4f), TOBN(0x79e6c046, 0xb76a2b22), + TOBN(0x74a4792c, 0xe37a1f69), TOBN(0xcbd252cb, 0x03542b60), + TOBN(0x785f65d5, 0xb3c20bd3), TOBN(0x8dea6143, 0x4fabc60c), + TOBN(0x45e21446, 0xde673629), TOBN(0x57f7aa1e, 0x703c2d21), + TOBN(0xa0e99b7f, 0x98c868c7), TOBN(0x4e42f66d, 0x8b641676), + TOBN(0x602884dc, 0x91077896), TOBN(0xa0d690cf, 0xc2c9885b), + TOBN(0xfeb4da33, 0x3b9a5187), TOBN(0x5f789598, 0x153c87ee), + TOBN(0x2192dd47, 0x52b16dba), TOBN(0xdeefc0e6, 0x3524c1b1), + TOBN(0x465ea76e, 0xe4383693), TOBN(0x79401711, 0x361b8d98), + TOBN(0xa5f9ace9, 0xf21a15cb), TOBN(0x73d26163, 0xefee9aeb), + TOBN(0xcca844b3, 0xe677016c), TOBN(0x6c122b07, 0x57eaee06), + TOBN(0xb782dce7, 0x15f09690), TOBN(0x508b9b12, 0x2dfc0fc9), + TOBN(0x9015ab4b, 0x65d89fc6), TOBN(0x5e79dab7, 0xd6d5bb0f), + TOBN(0x64f021f0, 0x6c775aa2), TOBN(0xdf09d8cc, 0x37c7eca1), + TOBN(0x9a761367, 0xef2fa506), TOBN(0xed4ca476, 0x5b81eec6), + TOBN(0x262ede36, 0x10bbb8b5), TOBN(0x0737ce83, 0x0641ada3), + TOBN(0x4c94288a, 0xe9831ccc), TOBN(0x487fc1ce, 0x8065e635), + TOBN(0xb13d7ab3, 0xb8bb3659), TOBN(0xdea5df3e, 0x855e4120), + TOBN(0xb9a18573, 0x85eb0244), TOBN(0x1a1b8ea3, 0xa7cfe0a3), + TOBN(0x3b837119, 0x67b0867c), TOBN(0x8d5e0d08, 0x9d364520), + TOBN(0x52dccc1e, 0xd930f0e3), TOBN(0xefbbcec7, 0xbf20bbaf), + TOBN(0x99cffcab, 0x0263ad10), TOBN(0xd8199e6d, 0xfcd18f8a), + TOBN(0x64e2773f, 0xe9f10617), TOBN(0x0079e8e1, 0x08704848), + TOBN(0x1169989f, 0x8a342283), TOBN(0x8097799c, 0xa83012e6), + TOBN(0xece966cb, 0x8a6a9001), TOBN(0x93b3afef, 0x072ac7fc), + TOBN(0xe6893a2a, 0x2db3d5ba), TOBN(0x263dc462, 0x89bf4fdc), + TOBN(0x8852dfc9, 0xe0396673), TOBN(0x7ac70895, 0x3af362b6), + TOBN(0xbb9cce4d, 0x5c2f342b), TOBN(0xbf80907a, 0xb52d7aae), + TOBN(0x97f3d3cd, 0x2161bcd0), TOBN(0xb25b0834, 0x0962744d), + TOBN(0xc5b18ea5, 0x6c3a1dda), TOBN(0xfe4ec7eb, 0x06c92317), + TOBN(0xb787b890, 0xad1c4afe), TOBN(0xdccd9a92, 0x0ede801a), + TOBN(0x9ac6ddda, 0xdb58da1f), TOBN(0x22bbc12f, 0xb8cae6ee), + TOBN(0xc6f8bced, 0x815c4a43), TOBN(0x8105a92c, 0xf96480c7), + TOBN(0x0dc3dbf3, 0x7a859d51), TOBN(0xe3ec7ce6, 0x3041196b), + TOBN(0xd9f64b25, 0x0d1067c9), TOBN(0xf2321321, 0x3d1f8dd8), + TOBN(0x8b5c619c, 0x76497ee8), TOBN(0x5d2b0ac6, 0xc717370e), + TOBN(0x98204cb6, 0x4fcf68e1), TOBN(0x0bdec211, 0x62bc6792), + TOBN(0x6973ccef, 0xa63b1011), TOBN(0xf9e3fa97, 0xe0de1ac5), + TOBN(0x5efb693e, 0x3d0e0c8b), TOBN(0x037248e9, 0xd2d4fcb4),} + , + {TOBN(0x80802dc9, 0x1ec34f9e), TOBN(0xd8772d35, 0x33810603), + TOBN(0x3f06d66c, 0x530cb4f3), TOBN(0x7be5ed0d, 0xc475c129), + TOBN(0xcb9e3c19, 0x31e82b10), TOBN(0xc63d2857, 0xc9ff6b4c), + TOBN(0xb92118c6, 0x92a1b45e), TOBN(0x0aec4414, 0x7285bbca), + TOBN(0xfc189ae7, 0x1e29a3ef), TOBN(0xcbe906f0, 0x4c93302e), + TOBN(0xd0107914, 0xceaae10e), TOBN(0xb7a23f34, 0xb68e19f8), + TOBN(0xe9d875c2, 0xefd2119d), TOBN(0x03198c6e, 0xfcadc9c8), + TOBN(0x65591bf6, 0x4da17113), TOBN(0x3cf0bbf8, 0x3d443038), + TOBN(0xae485bb7, 0x2b724759), TOBN(0x945353e1, 0xb2d4c63a), + TOBN(0x82159d07, 0xde7d6f2c), TOBN(0x389caef3, 0x4ec5b109), + TOBN(0x4a8ebb53, 0xdb65ef14), TOBN(0x2dc2cb7e, 0xdd99de43), + TOBN(0x816fa3ed, 0x83f2405f), TOBN(0x73429bb9, 0xc14208a3), + TOBN(0xb618d590, 0xb01e6e27), TOBN(0x047e2ccd, 0xe180b2dc), + TOBN(0xd1b299b5, 0x04aea4a9), TOBN(0x412c9e1e, 0x9fa403a4), + TOBN(0x88d28a36, 0x79407552), TOBN(0x49c50136, 0xf332b8e3), + TOBN(0x3a1b6fcc, 0xe668de19), TOBN(0x178851bc, 0x75122b97), + TOBN(0xb1e13752, 0xfb85fa4c), TOBN(0xd61257ce, 0x383c8ce9), + TOBN(0xd43da670, 0xd2f74dae), TOBN(0xa35aa23f, 0xbf846bbb), + TOBN(0x5e74235d, 0x4421fc83), TOBN(0xf6df8ee0, 0xc363473b), + TOBN(0x34d7f52a, 0x3c4aa158), TOBN(0x50d05aab, 0x9bc6d22e), + TOBN(0x8c56e735, 0xa64785f4), TOBN(0xbc56637b, 0x5f29cd07), + TOBN(0x53b2bb80, 0x3ee35067), TOBN(0x50235a0f, 0xdc919270), + TOBN(0x191ab6d8, 0xf2c4aa65), TOBN(0xc3475831, 0x8396023b), + TOBN(0x80400ba5, 0xf0f805ba), TOBN(0x8881065b, 0x5ec0f80f), + TOBN(0xc370e522, 0xcc1b5e83), TOBN(0xde2d4ad1, 0x860b8bfb), + TOBN(0xad364df0, 0x67b256df), TOBN(0x8f12502e, 0xe0138997), + TOBN(0x503fa0dc, 0x7783920a), TOBN(0xe80014ad, 0xc0bc866a), + TOBN(0x3f89b744, 0xd3064ba6), TOBN(0x03511dcd, 0xcba5dba5), + TOBN(0x197dd46d, 0x95a7b1a2), TOBN(0x9c4e7ad6, 0x3c6341fb), + TOBN(0x426eca29, 0x484c2ece), TOBN(0x9211e489, 0xde7f4f8a), + TOBN(0x14997f6e, 0xc78ef1f4), TOBN(0x2b2c0910, 0x06574586), + TOBN(0x17286a6e, 0x1c3eede8), TOBN(0x25f92e47, 0x0f60e018), + TOBN(0x805c5646, 0x31890a36), TOBN(0x703ef600, 0x57feea5b), + TOBN(0x389f747c, 0xaf3c3030), TOBN(0xe0e5daeb, 0x54dd3739), + TOBN(0xfe24a4c3, 0xc9c9f155), TOBN(0x7e4bf176, 0xb5393962), + TOBN(0x37183de2, 0xaf20bf29), TOBN(0x4a1bd7b5, 0xf95a8c3b), + TOBN(0xa83b9699, 0x46191d3d), TOBN(0x281fc8dd, 0x7b87f257), + TOBN(0xb18e2c13, 0x54107588), TOBN(0x6372def7, 0x9b2bafe8), + TOBN(0xdaf4bb48, 0x0d8972ca), TOBN(0x3f2dd4b7, 0x56167a3f), + TOBN(0x1eace32d, 0x84310cf4), TOBN(0xe3bcefaf, 0xe42700aa), + TOBN(0x5fe5691e, 0xd785e73d), TOBN(0xa5db5ab6, 0x2ea60467), + TOBN(0x02e23d41, 0xdfc6514a), TOBN(0x35e8048e, 0xe03c3665), + TOBN(0x3f8b118f, 0x1adaa0f8), TOBN(0x28ec3b45, 0x84ce1a5a), + TOBN(0xe8cacc6e, 0x2c6646b8), TOBN(0x1343d185, 0xdbd0e40f), + TOBN(0xe5d7f844, 0xcaaa358c), TOBN(0x1a1db7e4, 0x9924182a), + TOBN(0xd64cd42d, 0x9c875d9a), TOBN(0xb37b515f, 0x042eeec8), + TOBN(0x4d4dd409, 0x7b165fbe), TOBN(0xfc322ed9, 0xe206eff3), + TOBN(0x7dee4102, 0x59b7e17e), TOBN(0x55a481c0, 0x8236ca00), + TOBN(0x8c885312, 0xc23fc975), TOBN(0x15715806, 0x05d6297b), + TOBN(0xa078868e, 0xf78edd39), TOBN(0x956b31e0, 0x03c45e52), + TOBN(0x470275d5, 0xff7b33a6), TOBN(0xc8d5dc3a, 0x0c7e673f), + TOBN(0x419227b4, 0x7e2f2598), TOBN(0x8b37b634, 0x4c14a975), + TOBN(0xd0667ed6, 0x8b11888c), TOBN(0x5e0e8c3e, 0x803e25dc), + TOBN(0x34e5d0dc, 0xb987a24a), TOBN(0x9f40ac3b, 0xae920323), + TOBN(0x5463de95, 0x34e0f63a), TOBN(0xa128bf92, 0x6b6328f9), + TOBN(0x491ccd7c, 0xda64f1b7), TOBN(0x7ef1ec27, 0xc47bde35), + TOBN(0xa857240f, 0xa36a2737), TOBN(0x35dc1366, 0x63621bc1), + TOBN(0x7a3a6453, 0xd4fb6897), TOBN(0x80f1a439, 0xc929319d), + TOBN(0xfc18274b, 0xf8cb0ba0), TOBN(0xb0b53766, 0x8078c5eb), + TOBN(0xfb0d4924, 0x1e01d0ef), TOBN(0x50d7c67d, 0x372ab09c), + TOBN(0xb4e370af, 0x3aeac968), TOBN(0xe4f7fee9, 0xc4b63266), + TOBN(0xb4acd4c2, 0xe3ac5664), TOBN(0xf8910bd2, 0xceb38cbf), + TOBN(0x1c3ae50c, 0xc9c0726e), TOBN(0x15309569, 0xd97b40bf), + TOBN(0x70884b7f, 0xfd5a5a1b), TOBN(0x3890896a, 0xef8314cd), + TOBN(0x58e1515c, 0xa5618c93), TOBN(0xe665432b, 0x77d942d1), + TOBN(0xb32181bf, 0xb6f767a8), TOBN(0x753794e8, 0x3a604110), + TOBN(0x09afeb7c, 0xe8c0dbcc), TOBN(0x31e02613, 0x598673a3), + TOBN(0x5d98e557, 0x7d46db00), TOBN(0xfc21fb8c, 0x9d985b28), + TOBN(0xc9040116, 0xb0843e0b), TOBN(0x53b1b3a8, 0x69b04531), + TOBN(0xdd1649f0, 0x85d7d830), TOBN(0xbb3bcc87, 0xcb7427e8), + TOBN(0x77261100, 0xc93dce83), TOBN(0x7e79da61, 0xa1922a2a), + TOBN(0x587a2b02, 0xf3149ce8), TOBN(0x147e1384, 0xde92ec83), + TOBN(0x484c83d3, 0xaf077f30), TOBN(0xea78f844, 0x0658b53a), + TOBN(0x912076c2, 0x027aec53), TOBN(0xf34714e3, 0x93c8177d), + TOBN(0x37ef5d15, 0xc2376c84), TOBN(0x8315b659, 0x3d1aa783), + TOBN(0x3a75c484, 0xef852a90), TOBN(0x0ba0c58a, 0x16086bd4), + TOBN(0x29688d7a, 0x529a6d48), TOBN(0x9c7f250d, 0xc2f19203), + TOBN(0x123042fb, 0x682e2df9), TOBN(0x2b7587e7, 0xad8121bc), + TOBN(0x30fc0233, 0xe0182a65), TOBN(0xb82ecf87, 0xe3e1128a), + TOBN(0x71682861, 0x93fb098f), TOBN(0x043e21ae, 0x85e9e6a7), + TOBN(0xab5b49d6, 0x66c834ea), TOBN(0x3be43e18, 0x47414287), + TOBN(0xf40fb859, 0x219a2a47), TOBN(0x0e6559e9, 0xcc58df3c), + TOBN(0xfe1dfe8e, 0x0c6615b4), TOBN(0x14abc8fd, 0x56459d70), + TOBN(0x7be0fa8e, 0x05de0386), TOBN(0x8e63ef68, 0xe9035c7c), + TOBN(0x116401b4, 0x53b31e91), TOBN(0x0cba7ad4, 0x4436b4d8), + TOBN(0x9151f9a0, 0x107afd66), TOBN(0xafaca8d0, 0x1f0ee4c4), + TOBN(0x75fe5c1d, 0x9ee9761c), TOBN(0x3497a16b, 0xf0c0588f), + TOBN(0x3ee2bebd, 0x0304804c), TOBN(0xa8fb9a60, 0xc2c990b9), + TOBN(0xd14d32fe, 0x39251114), TOBN(0x36bf25bc, 0xcac73366), + TOBN(0xc9562c66, 0xdba7495c), TOBN(0x324d301b, 0x46ad348b), + TOBN(0x9f46620c, 0xd670407e), TOBN(0x0ea8d4f1, 0xe3733a01), + TOBN(0xd396d532, 0xb0c324e0), TOBN(0x5b211a0e, 0x03c317cd), + TOBN(0x090d7d20, 0x5ffe7b37), TOBN(0x3b7f3efb, 0x1747d2da), + TOBN(0xa2cb525f, 0xb54fc519), TOBN(0x6e220932, 0xf66a971e), + TOBN(0xddc160df, 0xb486d440), TOBN(0x7fcfec46, 0x3fe13465), + TOBN(0x83da7e4e, 0x76e4c151), TOBN(0xd6fa48a1, 0xd8d302b5), + TOBN(0xc6304f26, 0x5872cd88), TOBN(0x806c1d3c, 0x278b90a1), + TOBN(0x3553e725, 0xcaf0bc1c), TOBN(0xff59e603, 0xbb9d8d5c), + TOBN(0xa4550f32, 0x7a0b85dd), TOBN(0xdec5720a, 0x93ecc217), + TOBN(0x0b88b741, 0x69d62213), TOBN(0x7212f245, 0x5b365955), + TOBN(0x20764111, 0xb5cae787), TOBN(0x13cb7f58, 0x1dfd3124), + TOBN(0x2dca77da, 0x1175aefb), TOBN(0xeb75466b, 0xffaae775), + TOBN(0x74d76f3b, 0xdb6cff32), TOBN(0x7440f37a, 0x61fcda9a), + TOBN(0x1bb3ac92, 0xb525028b), TOBN(0x20fbf8f7, 0xa1975f29), + TOBN(0x982692e1, 0xdf83097f), TOBN(0x28738f6c, 0x554b0800), + TOBN(0xdc703717, 0xa2ce2f2f), TOBN(0x7913b93c, 0x40814194), + TOBN(0x04924593, 0x1fe89636), TOBN(0x7b98443f, 0xf78834a6), + TOBN(0x11c6ab01, 0x5114a5a1), TOBN(0x60deb383, 0xffba5f4c), + TOBN(0x4caa54c6, 0x01a982e6), TOBN(0x1dd35e11, 0x3491cd26), + TOBN(0x973c315f, 0x7cbd6b05), TOBN(0xcab00775, 0x52494724), + TOBN(0x04659b1f, 0x6565e15a), TOBN(0xbf30f529, 0x8c8fb026), + TOBN(0xfc21641b, 0xa8a0de37), TOBN(0xe9c7a366, 0xfa5e5114), + TOBN(0xdb849ca5, 0x52f03ad8), TOBN(0xc7e8dbe9, 0x024e35c0), + TOBN(0xa1a2bbac, 0xcfc3c789), TOBN(0xbf733e7d, 0x9c26f262), + TOBN(0x882ffbf5, 0xb8444823), TOBN(0xb7224e88, 0x6bf8483b), + TOBN(0x53023b8b, 0x65bef640), TOBN(0xaabfec91, 0xd4d5f8cd), + TOBN(0xa40e1510, 0x079ea1bd), TOBN(0x1ad9addc, 0xd05d5d26), + TOBN(0xdb3f2eab, 0x13e68d4f), TOBN(0x1cff1ae2, 0x640f803f), + TOBN(0xe0e7b749, 0xd4cee117), TOBN(0x8e9f275b, 0x4036d909), + TOBN(0xce34e31d, 0x8f4d4c38), TOBN(0x22b37f69, 0xd75130fc), + TOBN(0x83e0f1fd, 0xb4014604), TOBN(0xa8ce9919, 0x89415078), + TOBN(0x82375b75, 0x41792efe), TOBN(0x4f59bf5c, 0x97d4515b), + TOBN(0xac4f324f, 0x923a277d), TOBN(0xd9bc9b7d, 0x650f3406), + TOBN(0xc6fa87d1, 0x8a39bc51), TOBN(0x82588530, 0x5ccc108f), + TOBN(0x5ced3c9f, 0x82e4c634), TOBN(0x8efb8314, 0x3a4464f8), + TOBN(0xe706381b, 0x7a1dca25), TOBN(0x6cd15a3c, 0x5a2a412b), + TOBN(0x9347a8fd, 0xbfcd8fb5), TOBN(0x31db2eef, 0x6e54cd22), + TOBN(0xc4aeb11e, 0xf8d8932f), TOBN(0x11e7c1ed, 0x344411af), + TOBN(0x2653050c, 0xdc9a151e), TOBN(0x9edbfc08, 0x3bb0a859), + TOBN(0x926c81c7, 0xfd5691e7), TOBN(0x9c1b2342, 0x6f39019a), + TOBN(0x64a81c8b, 0x7f8474b9), TOBN(0x90657c07, 0x01761819), + TOBN(0x390b3331, 0x55e0375a), TOBN(0xc676c626, 0xb6ebc47d), + TOBN(0x51623247, 0xb7d6dee8), TOBN(0x0948d927, 0x79659313), + TOBN(0x99700161, 0xe9ab35ed), TOBN(0x06cc32b4, 0x8ddde408), + TOBN(0x6f2fd664, 0x061ef338), TOBN(0x1606fa02, 0xc202e9ed), + TOBN(0x55388bc1, 0x929ba99b), TOBN(0xc4428c5e, 0x1e81df69), + TOBN(0xce2028ae, 0xf91b0b2a), TOBN(0xce870a23, 0xf03dfd3f), + TOBN(0x66ec2c87, 0x0affe8ed), TOBN(0xb205fb46, 0x284d0c00), + TOBN(0xbf5dffe7, 0x44cefa48), TOBN(0xb6fc37a8, 0xa19876d7), + TOBN(0xbecfa84c, 0x08b72863), TOBN(0xd7205ff5, 0x2576374f), + TOBN(0x80330d32, 0x8887de41), TOBN(0x5de0df0c, 0x869ea534), + TOBN(0x13f42753, 0x3c56ea17), TOBN(0xeb1f6069, 0x452b1a78), + TOBN(0x50474396, 0xe30ea15c), TOBN(0x575816a1, 0xc1494125), + TOBN(0xbe1ce55b, 0xfe6bb38f), TOBN(0xb901a948, 0x96ae30f7), + TOBN(0xe5af0f08, 0xd8fc3548), TOBN(0x5010b5d0, 0xd73bfd08), + TOBN(0x993d2880, 0x53fe655a), TOBN(0x99f2630b, 0x1c1309fd), + TOBN(0xd8677baf, 0xb4e3b76f), TOBN(0x14e51ddc, 0xb840784b), + TOBN(0x326c750c, 0xbf0092ce), TOBN(0xc83d306b, 0xf528320f), + TOBN(0xc4456715, 0x77d4715c), TOBN(0xd30019f9, 0x6b703235), + TOBN(0x207ccb2e, 0xd669e986), TOBN(0x57c824af, 0xf6dbfc28), + TOBN(0xf0eb532f, 0xd8f92a23), TOBN(0x4a557fd4, 0x9bb98fd2), + TOBN(0xa57acea7, 0xc1e6199a), TOBN(0x0c663820, 0x8b94b1ed), + TOBN(0x9b42be8f, 0xf83a9266), TOBN(0xc7741c97, 0x0101bd45), + TOBN(0x95770c11, 0x07bd9ceb), TOBN(0x1f50250a, 0x8b2e0744), + TOBN(0xf762eec8, 0x1477b654), TOBN(0xc65b900e, 0x15efe59a), + TOBN(0x88c96148, 0x9546a897), TOBN(0x7e8025b3, 0xc30b4d7c), + TOBN(0xae4065ef, 0x12045cf9), TOBN(0x6fcb2caf, 0x9ccce8bd), + TOBN(0x1fa0ba4e, 0xf2cf6525), TOBN(0xf683125d, 0xcb72c312), + TOBN(0xa01da4ea, 0xe312410e), TOBN(0x67e28677, 0x6cd8e830), + TOBN(0xabd95752, 0x98fb3f07), TOBN(0x05f11e11, 0xeef649a5), + TOBN(0xba47faef, 0x9d3472c2), TOBN(0x3adff697, 0xc77d1345), + TOBN(0x4761fa04, 0xdd15afee), TOBN(0x64f1f61a, 0xb9e69462), + TOBN(0xfa691fab, 0x9bfb9093), TOBN(0x3df8ae8f, 0xa1133dfe), + TOBN(0xcd5f8967, 0x58cc710d), TOBN(0xfbb88d50, 0x16c7fe79), + TOBN(0x8e011b4c, 0xe88c50d1), TOBN(0x7532e807, 0xa8771c4f), + TOBN(0x64c78a48, 0xe2278ee4), TOBN(0x0b283e83, 0x3845072a), + TOBN(0x98a6f291, 0x49e69274), TOBN(0xb96e9668, 0x1868b21c), + TOBN(0x38f0adc2, 0xb1a8908e), TOBN(0x90afcff7, 0x1feb829d), + TOBN(0x9915a383, 0x210b0856), TOBN(0xa5a80602, 0xdef04889), + TOBN(0x800e9af9, 0x7c64d509), TOBN(0x81382d0b, 0xb8996f6f), + TOBN(0x490eba53, 0x81927e27), TOBN(0x46c63b32, 0x4af50182), + TOBN(0x784c5fd9, 0xd3ad62ce), TOBN(0xe4fa1870, 0xf8ae8736), + TOBN(0x4ec9d0bc, 0xd7466b25), TOBN(0x84ddbe1a, 0xdb235c65), + TOBN(0x5e2645ee, 0x163c1688), TOBN(0x570bd00e, 0x00eba747), + TOBN(0xfa51b629, 0x128bfa0f), TOBN(0x92fce1bd, 0x6c1d3b68), + TOBN(0x3e7361dc, 0xb66778b1), TOBN(0x9c7d249d, 0x5561d2bb), + TOBN(0xa40b28bf, 0x0bbc6229), TOBN(0x1c83c05e, 0xdfd91497), + TOBN(0x5f9f5154, 0xf083df05), TOBN(0xbac38b3c, 0xeee66c9d), + TOBN(0xf71db7e3, 0xec0dfcfd), TOBN(0xf2ecda8e, 0x8b0a8416), + TOBN(0x52fddd86, 0x7812aa66), TOBN(0x2896ef10, 0x4e6f4272), + TOBN(0xff27186a, 0x0fe9a745), TOBN(0x08249fcd, 0x49ca70db), + TOBN(0x7425a2e6, 0x441cac49), TOBN(0xf4a0885a, 0xece5ff57), + TOBN(0x6e2cb731, 0x7d7ead58), TOBN(0xf96cf7d6, 0x1898d104), + TOBN(0xafe67c9d, 0x4f2c9a89), TOBN(0x89895a50, 0x1c7bf5bc), + TOBN(0xdc7cb8e5, 0x573cecfa), TOBN(0x66497eae, 0xd15f03e6), + TOBN(0x6bc0de69, 0x3f084420), TOBN(0x323b9b36, 0xacd532b0), + TOBN(0xcfed390a, 0x0115a3c1), TOBN(0x9414c40b, 0x2d65ca0e), + TOBN(0x641406bd, 0x2f530c78), TOBN(0x29369a44, 0x833438f2), + TOBN(0x996884f5, 0x903fa271), TOBN(0xe6da0fd2, 0xb9da921e), + TOBN(0xa6f2f269, 0x5db01e54), TOBN(0x1ee3e9bd, 0x6876214e), + TOBN(0xa26e181c, 0xe27a9497), TOBN(0x36d254e4, 0x8e215e04), + TOBN(0x42f32a6c, 0x252cabca), TOBN(0x99481487, 0x80b57614), + TOBN(0x4c4dfe69, 0x40d9cae1), TOBN(0x05869580, 0x11a10f09), + TOBN(0xca287b57, 0x3491b64b), TOBN(0x77862d5d, 0x3fd4a53b), + TOBN(0xbf94856e, 0x50349126), TOBN(0x2be30bd1, 0x71c5268f), + TOBN(0x10393f19, 0xcbb650a6), TOBN(0x639531fe, 0x778cf9fd), + TOBN(0x02556a11, 0xb2935359), TOBN(0xda38aa96, 0xaf8c126e), + TOBN(0x47dbe6c2, 0x0960167f), TOBN(0x37bbabb6, 0x501901cd), + TOBN(0xb6e979e0, 0x2c947778), TOBN(0xd69a5175, 0x7a1a1dc6), + TOBN(0xc3ed5095, 0x9d9faf0c), TOBN(0x4dd9c096, 0x1d5fa5f0), + TOBN(0xa0c4304d, 0x64f16ea8), TOBN(0x8b1cac16, 0x7e718623), + TOBN(0x0b576546, 0x7c67f03e), TOBN(0x559cf5ad, 0xcbd88c01), + TOBN(0x074877bb, 0x0e2af19a), TOBN(0x1f717ec1, 0xa1228c92), + TOBN(0x70bcb800, 0x326e8920), TOBN(0xec6e2c5c, 0x4f312804), + TOBN(0x426aea7d, 0x3fca4752), TOBN(0xf12c0949, 0x2211f62a), + TOBN(0x24beecd8, 0x7be7b6b5), TOBN(0xb77eaf4c, 0x36d7a27d), + TOBN(0x154c2781, 0xfda78fd3), TOBN(0x848a83b0, 0x264eeabe), + TOBN(0x81287ef0, 0x4ffe2bc4), TOBN(0x7b6d88c6, 0xb6b6fc2a), + TOBN(0x805fb947, 0xce417d99), TOBN(0x4b93dcc3, 0x8b916cc4), + TOBN(0x72e65bb3, 0x21273323), TOBN(0xbcc1badd, 0x6ea9886e), + TOBN(0x0e223011, 0x4bc5ee85), TOBN(0xa561be74, 0xc18ee1e4), + TOBN(0x762fd2d4, 0xa6bcf1f1), TOBN(0x50e6a5a4, 0x95231489), + TOBN(0xca96001f, 0xa00b500b), TOBN(0x5c098cfc, 0x5d7dcdf5), + TOBN(0xa64e2d2e, 0x8c446a85), TOBN(0xbae9bcf1, 0x971f3c62), + TOBN(0x4ec22683, 0x8435a2c5), TOBN(0x8ceaed6c, 0x4bad4643), + TOBN(0xe9f8fb47, 0xccccf4e3), TOBN(0xbd4f3fa4, 0x1ce3b21e), + TOBN(0xd79fb110, 0xa3db3292), TOBN(0xe28a37da, 0xb536c66a), + TOBN(0x279ce87b, 0x8e49e6a9), TOBN(0x70ccfe8d, 0xfdcec8e3), + TOBN(0x2193e4e0, 0x3ba464b2), TOBN(0x0f39d60e, 0xaca9a398), + TOBN(0x7d7932af, 0xf82c12ab), TOBN(0xd8ff50ed, 0x91e7e0f7), + TOBN(0xea961058, 0xfa28a7e0), TOBN(0xc726cf25, 0x0bf5ec74), + TOBN(0xe74d55c8, 0xdb229666), TOBN(0x0bd9abbf, 0xa57f5799), + TOBN(0x7479ef07, 0x4dfc47b3), TOBN(0xd9c65fc3, 0x0c52f91d), + TOBN(0x8e0283fe, 0x36a8bde2), TOBN(0xa32a8b5e, 0x7d4b7280), + TOBN(0x6a677c61, 0x12e83233), TOBN(0x0fbb3512, 0xdcc9bf28), + TOBN(0x562e8ea5, 0x0d780f61), TOBN(0x0db8b22b, 0x1dc4e89c), + TOBN(0x0a6fd1fb, 0x89be0144), TOBN(0x8c77d246, 0xca57113b), + TOBN(0x4639075d, 0xff09c91c), TOBN(0x5b47b17f, 0x5060824c), + TOBN(0x58aea2b0, 0x16287b52), TOBN(0xa1343520, 0xd0cd8eb0), + TOBN(0x6148b4d0, 0xc5d58573), TOBN(0xdd2b6170, 0x291c68ae), + TOBN(0xa61b3929, 0x1da3b3b7), TOBN(0x5f946d79, 0x08c4ac10), + TOBN(0x4105d4a5, 0x7217d583), TOBN(0x5061da3d, 0x25e6de5e), + TOBN(0x3113940d, 0xec1b4991), TOBN(0xf12195e1, 0x36f485ae), + TOBN(0xa7507fb2, 0x731a2ee0), TOBN(0x95057a8e, 0x6e9e196e), + TOBN(0xa3c2c911, 0x2e130136), TOBN(0x97dfbb36, 0x33c60d15), + TOBN(0xcaf3c581, 0xb300ee2b), TOBN(0x77f25d90, 0xf4bac8b8), + TOBN(0xdb1c4f98, 0x6d840cd6), TOBN(0x471d62c0, 0xe634288c), + TOBN(0x8ec2f85e, 0xcec8a161), TOBN(0x41f37cbc, 0xfa6f4ae2), + TOBN(0x6793a20f, 0x4b709985), TOBN(0x7a7bd33b, 0xefa8985b), + TOBN(0x2c6a3fbd, 0x938e6446), TOBN(0x19042619, 0x2a8d47c1), + TOBN(0x16848667, 0xcc36975f), TOBN(0x02acf168, 0x9d5f1dfb), + TOBN(0x62d41ad4, 0x613baa94), TOBN(0xb56fbb92, 0x9f684670), + TOBN(0xce610d0d, 0xe9e40569), TOBN(0x7b99c65f, 0x35489fef), + TOBN(0x0c88ad1b, 0x3df18b97), TOBN(0x81b7d9be, 0x5d0e9edb), + TOBN(0xd85218c0, 0xc716cc0a), TOBN(0xf4b5ff90, 0x85691c49), + TOBN(0xa4fd666b, 0xce356ac6), TOBN(0x17c72895, 0x4b327a7a), + TOBN(0xf93d5085, 0xda6be7de), TOBN(0xff71530e, 0x3301d34e), + TOBN(0x4cd96442, 0xd8f448e8), TOBN(0x9283d331, 0x2ed18ffa), + TOBN(0x4d33dd99, 0x2a849870), TOBN(0xa716964b, 0x41576335), + TOBN(0xff5e3a9b, 0x179be0e5), TOBN(0x5b9d6b1b, 0x83b13632), + TOBN(0x3b8bd7d4, 0xa52f313b), TOBN(0xc9dd95a0, 0x637a4660), + TOBN(0x30035962, 0x0b3e218f), TOBN(0xce1481a3, 0xc7b28a3c), + TOBN(0xab41b43a, 0x43228d83), TOBN(0x24ae1c30, 0x4ad63f99), + TOBN(0x8e525f1a, 0x46a51229), TOBN(0x14af860f, 0xcd26d2b4), + TOBN(0xd6baef61, 0x3f714aa1), TOBN(0xf51865ad, 0xeb78795e), + TOBN(0xd3e21fce, 0xe6a9d694), TOBN(0x82ceb1dd, 0x8a37b527)} +}; diff --git a/drivers/builtin_openssl2/crypto/ec/ectest.c b/drivers/builtin_openssl2/crypto/ec/ectest.c deleted file mode 100644 index efab0b07b1..0000000000 --- a/drivers/builtin_openssl2/crypto/ec/ectest.c +++ /dev/null @@ -1,1861 +0,0 @@ -/* crypto/ec/ectest.c */ -/* - * Originally written by Bodo Moeller for the OpenSSL project. - */ -/* ==================================================================== - * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ -/* ==================================================================== - * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. - * - * Portions of the attached software ("Contribution") are developed by - * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project. - * - * The Contribution is licensed pursuant to the OpenSSL open source - * license provided above. - * - * The elliptic curve binary polynomial software is originally written by - * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems Laboratories. - * - */ - -#include -#include -#ifdef FLAT_INC -# include "e_os.h" -#else -# include "../e_os.h" -#endif -#include -#include - -#ifdef OPENSSL_NO_EC -int main(int argc, char *argv[]) -{ - puts("Elliptic curves are disabled."); - return 0; -} -#else - -# include -# ifndef OPENSSL_NO_ENGINE -# include -# endif -# include -# include -# include -# include -# include -# include - -# if defined(_MSC_VER) && defined(_MIPS_) && (_MSC_VER/100==12) -/* suppress "too big too optimize" warning */ -# pragma warning(disable:4959) -# endif - -# define ABORT do { \ - fflush(stdout); \ - fprintf(stderr, "%s:%d: ABORT\n", __FILE__, __LINE__); \ - ERR_print_errors_fp(stderr); \ - EXIT(1); \ -} while (0) - -# define TIMING_BASE_PT 0 -# define TIMING_RAND_PT 1 -# define TIMING_SIMUL 2 - -# if 0 -static void timings(EC_GROUP *group, int type, BN_CTX *ctx) -{ - clock_t clck; - int i, j; - BIGNUM *s; - BIGNUM *r[10], *r0[10]; - EC_POINT *P; - - s = BN_new(); - if (s == NULL) - ABORT; - - fprintf(stdout, "Timings for %d-bit field, ", EC_GROUP_get_degree(group)); - if (!EC_GROUP_get_order(group, s, ctx)) - ABORT; - fprintf(stdout, "%d-bit scalars ", (int)BN_num_bits(s)); - fflush(stdout); - - P = EC_POINT_new(group); - if (P == NULL) - ABORT; - EC_POINT_copy(P, EC_GROUP_get0_generator(group)); - - for (i = 0; i < 10; i++) { - if ((r[i] = BN_new()) == NULL) - ABORT; - if (!BN_pseudo_rand(r[i], BN_num_bits(s), 0, 0)) - ABORT; - if (type != TIMING_BASE_PT) { - if ((r0[i] = BN_new()) == NULL) - ABORT; - if (!BN_pseudo_rand(r0[i], BN_num_bits(s), 0, 0)) - ABORT; - } - } - - clck = clock(); - for (i = 0; i < 10; i++) { - for (j = 0; j < 10; j++) { - if (!EC_POINT_mul - (group, P, (type != TIMING_RAND_PT) ? r[i] : NULL, - (type != TIMING_BASE_PT) ? P : NULL, - (type != TIMING_BASE_PT) ? r0[i] : NULL, ctx)) - ABORT; - } - } - clck = clock() - clck; - - fprintf(stdout, "\n"); - -# ifdef CLOCKS_PER_SEC - /* - * "To determine the time in seconds, the value returned by the clock - * function should be divided by the value of the macro CLOCKS_PER_SEC." - * -- ISO/IEC 9899 - */ -# define UNIT "s" -# else - /* - * "`CLOCKS_PER_SEC' undeclared (first use this function)" -- cc on - * NeXTstep/OpenStep - */ -# define UNIT "units" -# define CLOCKS_PER_SEC 1 -# endif - - if (type == TIMING_BASE_PT) { - fprintf(stdout, "%i %s in %.2f " UNIT "\n", i * j, - "base point multiplications", (double)clck / CLOCKS_PER_SEC); - } else if (type == TIMING_RAND_PT) { - fprintf(stdout, "%i %s in %.2f " UNIT "\n", i * j, - "random point multiplications", - (double)clck / CLOCKS_PER_SEC); - } else if (type == TIMING_SIMUL) { - fprintf(stdout, "%i %s in %.2f " UNIT "\n", i * j, - "s*P+t*Q operations", (double)clck / CLOCKS_PER_SEC); - } - fprintf(stdout, "average: %.4f " UNIT "\n", - (double)clck / (CLOCKS_PER_SEC * i * j)); - - EC_POINT_free(P); - BN_free(s); - for (i = 0; i < 10; i++) { - BN_free(r[i]); - if (type != TIMING_BASE_PT) - BN_free(r0[i]); - } -} -# endif - -/* test multiplication with group order, long and negative scalars */ -static void group_order_tests(EC_GROUP *group) -{ - BIGNUM *n1, *n2, *order; - EC_POINT *P = EC_POINT_new(group); - EC_POINT *Q = EC_POINT_new(group); - BN_CTX *ctx = BN_CTX_new(); - int i; - - n1 = BN_new(); - n2 = BN_new(); - order = BN_new(); - fprintf(stdout, "verify group order ..."); - fflush(stdout); - if (!EC_GROUP_get_order(group, order, ctx)) - ABORT; - if (!EC_POINT_mul(group, Q, order, NULL, NULL, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) - ABORT; - fprintf(stdout, "."); - fflush(stdout); - if (!EC_GROUP_precompute_mult(group, ctx)) - ABORT; - if (!EC_POINT_mul(group, Q, order, NULL, NULL, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) - ABORT; - fprintf(stdout, " ok\n"); - fprintf(stdout, "long/negative scalar tests "); - for (i = 1; i <= 2; i++) { - const BIGNUM *scalars[6]; - const EC_POINT *points[6]; - - fprintf(stdout, i == 1 ? - "allowing precomputation ... " : - "without precomputation ... "); - if (!BN_set_word(n1, i)) - ABORT; - /* - * If i == 1, P will be the predefined generator for which - * EC_GROUP_precompute_mult has set up precomputation. - */ - if (!EC_POINT_mul(group, P, n1, NULL, NULL, ctx)) - ABORT; - - if (!BN_one(n1)) - ABORT; - /* n1 = 1 - order */ - if (!BN_sub(n1, n1, order)) - ABORT; - if (!EC_POINT_mul(group, Q, NULL, P, n1, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, Q, P, ctx)) - ABORT; - - /* n2 = 1 + order */ - if (!BN_add(n2, order, BN_value_one())) - ABORT; - if (!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, Q, P, ctx)) - ABORT; - - /* n2 = (1 - order) * (1 + order) = 1 - order^2 */ - if (!BN_mul(n2, n1, n2, ctx)) - ABORT; - if (!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, Q, P, ctx)) - ABORT; - - /* n2 = order^2 - 1 */ - BN_set_negative(n2, 0); - if (!EC_POINT_mul(group, Q, NULL, P, n2, ctx)) - ABORT; - /* Add P to verify the result. */ - if (!EC_POINT_add(group, Q, Q, P, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, Q)) - ABORT; - - /* Exercise EC_POINTs_mul, including corner cases. */ - if (EC_POINT_is_at_infinity(group, P)) - ABORT; - scalars[0] = n1; - points[0] = Q; /* => infinity */ - scalars[1] = n2; - points[1] = P; /* => -P */ - scalars[2] = n1; - points[2] = Q; /* => infinity */ - scalars[3] = n2; - points[3] = Q; /* => infinity */ - scalars[4] = n1; - points[4] = P; /* => P */ - scalars[5] = n2; - points[5] = Q; /* => infinity */ - if (!EC_POINTs_mul(group, P, NULL, 6, points, scalars, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, P)) - ABORT; - } - fprintf(stdout, "ok\n"); - - EC_POINT_free(P); - EC_POINT_free(Q); - BN_free(n1); - BN_free(n2); - BN_free(order); - BN_CTX_free(ctx); -} - -static void prime_field_tests(void) -{ - BN_CTX *ctx = NULL; - BIGNUM *p, *a, *b; - EC_GROUP *group; - EC_GROUP *P_160 = NULL, *P_192 = NULL, *P_224 = NULL, *P_256 = - NULL, *P_384 = NULL, *P_521 = NULL; - EC_POINT *P, *Q, *R; - BIGNUM *x, *y, *z; - unsigned char buf[100]; - size_t i, len; - int k; - -# if 1 /* optional */ - ctx = BN_CTX_new(); - if (!ctx) - ABORT; -# endif - - p = BN_new(); - a = BN_new(); - b = BN_new(); - if (!p || !a || !b) - ABORT; - - if (!BN_hex2bn(&p, "17")) - ABORT; - if (!BN_hex2bn(&a, "1")) - ABORT; - if (!BN_hex2bn(&b, "1")) - ABORT; - - group = EC_GROUP_new(EC_GFp_mont_method()); /* applications should use - * EC_GROUP_new_curve_GFp so - * that the library gets to - * choose the EC_METHOD */ - if (!group) - ABORT; - - if (!EC_GROUP_set_curve_GFp(group, p, a, b, ctx)) - ABORT; - - { - EC_GROUP *tmp; - tmp = EC_GROUP_new(EC_GROUP_method_of(group)); - if (!tmp) - ABORT; - if (!EC_GROUP_copy(tmp, group)) - ABORT; - EC_GROUP_free(group); - group = tmp; - } - - if (!EC_GROUP_get_curve_GFp(group, p, a, b, ctx)) - ABORT; - - fprintf(stdout, - "Curve defined by Weierstrass equation\n y^2 = x^3 + a*x + b (mod 0x"); - BN_print_fp(stdout, p); - fprintf(stdout, ")\n a = 0x"); - BN_print_fp(stdout, a); - fprintf(stdout, "\n b = 0x"); - BN_print_fp(stdout, b); - fprintf(stdout, "\n"); - - P = EC_POINT_new(group); - Q = EC_POINT_new(group); - R = EC_POINT_new(group); - if (!P || !Q || !R) - ABORT; - - if (!EC_POINT_set_to_infinity(group, P)) - ABORT; - if (!EC_POINT_is_at_infinity(group, P)) - ABORT; - - buf[0] = 0; - if (!EC_POINT_oct2point(group, Q, buf, 1, ctx)) - ABORT; - - if (!EC_POINT_add(group, P, P, Q, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, P)) - ABORT; - - x = BN_new(); - y = BN_new(); - z = BN_new(); - if (!x || !y || !z) - ABORT; - - if (!BN_hex2bn(&x, "D")) - ABORT; - if (!EC_POINT_set_compressed_coordinates_GFp(group, Q, x, 1, ctx)) - ABORT; - if (EC_POINT_is_on_curve(group, Q, ctx) <= 0) { - if (!EC_POINT_get_affine_coordinates_GFp(group, Q, x, y, ctx)) - ABORT; - fprintf(stderr, "Point is not on curve: x = 0x"); - BN_print_fp(stderr, x); - fprintf(stderr, ", y = 0x"); - BN_print_fp(stderr, y); - fprintf(stderr, "\n"); - ABORT; - } - - fprintf(stdout, "A cyclic subgroup:\n"); - k = 100; - do { - if (k-- == 0) - ABORT; - - if (EC_POINT_is_at_infinity(group, P)) - fprintf(stdout, " point at infinity\n"); - else { - if (!EC_POINT_get_affine_coordinates_GFp(group, P, x, y, ctx)) - ABORT; - - fprintf(stdout, " x = 0x"); - BN_print_fp(stdout, x); - fprintf(stdout, ", y = 0x"); - BN_print_fp(stdout, y); - fprintf(stdout, "\n"); - } - - if (!EC_POINT_copy(R, P)) - ABORT; - if (!EC_POINT_add(group, P, P, Q, ctx)) - ABORT; - -# if 0 /* optional */ - { - EC_POINT *points[3]; - - points[0] = R; - points[1] = Q; - points[2] = P; - if (!EC_POINTs_make_affine(group, 2, points, ctx)) - ABORT; - } -# endif - - } - while (!EC_POINT_is_at_infinity(group, P)); - - if (!EC_POINT_add(group, P, Q, R, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, P)) - ABORT; - - len = - EC_POINT_point2oct(group, Q, POINT_CONVERSION_COMPRESSED, buf, - sizeof buf, ctx); - if (len == 0) - ABORT; - if (!EC_POINT_oct2point(group, P, buf, len, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, P, Q, ctx)) - ABORT; - fprintf(stdout, "Generator as octet string, compressed form:\n "); - for (i = 0; i < len; i++) - fprintf(stdout, "%02X", buf[i]); - - len = - EC_POINT_point2oct(group, Q, POINT_CONVERSION_UNCOMPRESSED, buf, - sizeof buf, ctx); - if (len == 0) - ABORT; - if (!EC_POINT_oct2point(group, P, buf, len, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, P, Q, ctx)) - ABORT; - fprintf(stdout, "\nGenerator as octet string, uncompressed form:\n "); - for (i = 0; i < len; i++) - fprintf(stdout, "%02X", buf[i]); - - len = - EC_POINT_point2oct(group, Q, POINT_CONVERSION_HYBRID, buf, sizeof buf, - ctx); - if (len == 0) - ABORT; - if (!EC_POINT_oct2point(group, P, buf, len, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, P, Q, ctx)) - ABORT; - fprintf(stdout, "\nGenerator as octet string, hybrid form:\n "); - for (i = 0; i < len; i++) - fprintf(stdout, "%02X", buf[i]); - - if (!EC_POINT_get_Jprojective_coordinates_GFp(group, R, x, y, z, ctx)) - ABORT; - fprintf(stdout, - "\nA representation of the inverse of that generator in\nJacobian projective coordinates:\n X = 0x"); - BN_print_fp(stdout, x); - fprintf(stdout, ", Y = 0x"); - BN_print_fp(stdout, y); - fprintf(stdout, ", Z = 0x"); - BN_print_fp(stdout, z); - fprintf(stdout, "\n"); - - if (!EC_POINT_invert(group, P, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, P, R, ctx)) - ABORT; - - /* - * Curve secp160r1 (Certicom Research SEC 2 Version 1.0, section 2.4.2, - * 2000) -- not a NIST curve, but commonly used - */ - - if (!BN_hex2bn(&p, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF7FFFFFFF")) - ABORT; - if (1 != BN_is_prime_ex(p, BN_prime_checks, ctx, NULL)) - ABORT; - if (!BN_hex2bn(&a, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF7FFFFFFC")) - ABORT; - if (!BN_hex2bn(&b, "1C97BEFC54BD7A8B65ACF89F81D4D4ADC565FA45")) - ABORT; - if (!EC_GROUP_set_curve_GFp(group, p, a, b, ctx)) - ABORT; - - if (!BN_hex2bn(&x, "4A96B5688EF573284664698968C38BB913CBFC82")) - ABORT; - if (!BN_hex2bn(&y, "23a628553168947d59dcc912042351377ac5fb32")) - ABORT; - if (!EC_POINT_set_affine_coordinates_GFp(group, P, x, y, ctx)) - ABORT; - if (EC_POINT_is_on_curve(group, P, ctx) <= 0) - ABORT; - if (!BN_hex2bn(&z, "0100000000000000000001F4C8F927AED3CA752257")) - ABORT; - if (!EC_GROUP_set_generator(group, P, z, BN_value_one())) - ABORT; - - if (!EC_POINT_get_affine_coordinates_GFp(group, P, x, y, ctx)) - ABORT; - fprintf(stdout, "\nSEC2 curve secp160r1 -- Generator:\n x = 0x"); - BN_print_fp(stdout, x); - fprintf(stdout, "\n y = 0x"); - BN_print_fp(stdout, y); - fprintf(stdout, "\n"); - /* G_y value taken from the standard: */ - if (!BN_hex2bn(&z, "23a628553168947d59dcc912042351377ac5fb32")) - ABORT; - if (0 != BN_cmp(y, z)) - ABORT; - - fprintf(stdout, "verify degree ..."); - if (EC_GROUP_get_degree(group) != 160) - ABORT; - fprintf(stdout, " ok\n"); - - group_order_tests(group); - - if (!(P_160 = EC_GROUP_new(EC_GROUP_method_of(group)))) - ABORT; - if (!EC_GROUP_copy(P_160, group)) - ABORT; - - /* Curve P-192 (FIPS PUB 186-2, App. 6) */ - - if (!BN_hex2bn(&p, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFF")) - ABORT; - if (1 != BN_is_prime_ex(p, BN_prime_checks, ctx, NULL)) - ABORT; - if (!BN_hex2bn(&a, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFC")) - ABORT; - if (!BN_hex2bn(&b, "64210519E59C80E70FA7E9AB72243049FEB8DEECC146B9B1")) - ABORT; - if (!EC_GROUP_set_curve_GFp(group, p, a, b, ctx)) - ABORT; - - if (!BN_hex2bn(&x, "188DA80EB03090F67CBF20EB43A18800F4FF0AFD82FF1012")) - ABORT; - if (!EC_POINT_set_compressed_coordinates_GFp(group, P, x, 1, ctx)) - ABORT; - if (EC_POINT_is_on_curve(group, P, ctx) <= 0) - ABORT; - if (!BN_hex2bn(&z, "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D22831")) - ABORT; - if (!EC_GROUP_set_generator(group, P, z, BN_value_one())) - ABORT; - - if (!EC_POINT_get_affine_coordinates_GFp(group, P, x, y, ctx)) - ABORT; - fprintf(stdout, "\nNIST curve P-192 -- Generator:\n x = 0x"); - BN_print_fp(stdout, x); - fprintf(stdout, "\n y = 0x"); - BN_print_fp(stdout, y); - fprintf(stdout, "\n"); - /* G_y value taken from the standard: */ - if (!BN_hex2bn(&z, "07192B95FFC8DA78631011ED6B24CDD573F977A11E794811")) - ABORT; - if (0 != BN_cmp(y, z)) - ABORT; - - fprintf(stdout, "verify degree ..."); - if (EC_GROUP_get_degree(group) != 192) - ABORT; - fprintf(stdout, " ok\n"); - - group_order_tests(group); - - if (!(P_192 = EC_GROUP_new(EC_GROUP_method_of(group)))) - ABORT; - if (!EC_GROUP_copy(P_192, group)) - ABORT; - - /* Curve P-224 (FIPS PUB 186-2, App. 6) */ - - if (!BN_hex2bn - (&p, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000000000000000000000001")) - ABORT; - if (1 != BN_is_prime_ex(p, BN_prime_checks, ctx, NULL)) - ABORT; - if (!BN_hex2bn - (&a, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFE")) - ABORT; - if (!BN_hex2bn - (&b, "B4050A850C04B3ABF54132565044B0B7D7BFD8BA270B39432355FFB4")) - ABORT; - if (!EC_GROUP_set_curve_GFp(group, p, a, b, ctx)) - ABORT; - - if (!BN_hex2bn - (&x, "B70E0CBD6BB4BF7F321390B94A03C1D356C21122343280D6115C1D21")) - ABORT; - if (!EC_POINT_set_compressed_coordinates_GFp(group, P, x, 0, ctx)) - ABORT; - if (EC_POINT_is_on_curve(group, P, ctx) <= 0) - ABORT; - if (!BN_hex2bn - (&z, "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3D")) - ABORT; - if (!EC_GROUP_set_generator(group, P, z, BN_value_one())) - ABORT; - - if (!EC_POINT_get_affine_coordinates_GFp(group, P, x, y, ctx)) - ABORT; - fprintf(stdout, "\nNIST curve P-224 -- Generator:\n x = 0x"); - BN_print_fp(stdout, x); - fprintf(stdout, "\n y = 0x"); - BN_print_fp(stdout, y); - fprintf(stdout, "\n"); - /* G_y value taken from the standard: */ - if (!BN_hex2bn - (&z, "BD376388B5F723FB4C22DFE6CD4375A05A07476444D5819985007E34")) - ABORT; - if (0 != BN_cmp(y, z)) - ABORT; - - fprintf(stdout, "verify degree ..."); - if (EC_GROUP_get_degree(group) != 224) - ABORT; - fprintf(stdout, " ok\n"); - - group_order_tests(group); - - if (!(P_224 = EC_GROUP_new(EC_GROUP_method_of(group)))) - ABORT; - if (!EC_GROUP_copy(P_224, group)) - ABORT; - - /* Curve P-256 (FIPS PUB 186-2, App. 6) */ - - if (!BN_hex2bn - (&p, - "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF")) - ABORT; - if (1 != BN_is_prime_ex(p, BN_prime_checks, ctx, NULL)) - ABORT; - if (!BN_hex2bn - (&a, - "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC")) - ABORT; - if (!BN_hex2bn - (&b, - "5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B")) - ABORT; - if (!EC_GROUP_set_curve_GFp(group, p, a, b, ctx)) - ABORT; - - if (!BN_hex2bn - (&x, - "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296")) - ABORT; - if (!EC_POINT_set_compressed_coordinates_GFp(group, P, x, 1, ctx)) - ABORT; - if (EC_POINT_is_on_curve(group, P, ctx) <= 0) - ABORT; - if (!BN_hex2bn(&z, "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E" - "84F3B9CAC2FC632551")) - ABORT; - if (!EC_GROUP_set_generator(group, P, z, BN_value_one())) - ABORT; - - if (!EC_POINT_get_affine_coordinates_GFp(group, P, x, y, ctx)) - ABORT; - fprintf(stdout, "\nNIST curve P-256 -- Generator:\n x = 0x"); - BN_print_fp(stdout, x); - fprintf(stdout, "\n y = 0x"); - BN_print_fp(stdout, y); - fprintf(stdout, "\n"); - /* G_y value taken from the standard: */ - if (!BN_hex2bn - (&z, - "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5")) - ABORT; - if (0 != BN_cmp(y, z)) - ABORT; - - fprintf(stdout, "verify degree ..."); - if (EC_GROUP_get_degree(group) != 256) - ABORT; - fprintf(stdout, " ok\n"); - - group_order_tests(group); - - if (!(P_256 = EC_GROUP_new(EC_GROUP_method_of(group)))) - ABORT; - if (!EC_GROUP_copy(P_256, group)) - ABORT; - - /* Curve P-384 (FIPS PUB 186-2, App. 6) */ - - if (!BN_hex2bn(&p, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" - "FFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFF")) - ABORT; - if (1 != BN_is_prime_ex(p, BN_prime_checks, ctx, NULL)) - ABORT; - if (!BN_hex2bn(&a, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" - "FFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFC")) - ABORT; - if (!BN_hex2bn(&b, "B3312FA7E23EE7E4988E056BE3F82D19181D9C6EFE8141" - "120314088F5013875AC656398D8A2ED19D2A85C8EDD3EC2AEF")) - ABORT; - if (!EC_GROUP_set_curve_GFp(group, p, a, b, ctx)) - ABORT; - - if (!BN_hex2bn(&x, "AA87CA22BE8B05378EB1C71EF320AD746E1D3B628BA79B" - "9859F741E082542A385502F25DBF55296C3A545E3872760AB7")) - ABORT; - if (!EC_POINT_set_compressed_coordinates_GFp(group, P, x, 1, ctx)) - ABORT; - if (EC_POINT_is_on_curve(group, P, ctx) <= 0) - ABORT; - if (!BN_hex2bn(&z, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" - "FFC7634D81F4372DDF581A0DB248B0A77AECEC196ACCC52973")) - ABORT; - if (!EC_GROUP_set_generator(group, P, z, BN_value_one())) - ABORT; - - if (!EC_POINT_get_affine_coordinates_GFp(group, P, x, y, ctx)) - ABORT; - fprintf(stdout, "\nNIST curve P-384 -- Generator:\n x = 0x"); - BN_print_fp(stdout, x); - fprintf(stdout, "\n y = 0x"); - BN_print_fp(stdout, y); - fprintf(stdout, "\n"); - /* G_y value taken from the standard: */ - if (!BN_hex2bn(&z, "3617DE4A96262C6F5D9E98BF9292DC29F8F41DBD289A14" - "7CE9DA3113B5F0B8C00A60B1CE1D7E819D7A431D7C90EA0E5F")) - ABORT; - if (0 != BN_cmp(y, z)) - ABORT; - - fprintf(stdout, "verify degree ..."); - if (EC_GROUP_get_degree(group) != 384) - ABORT; - fprintf(stdout, " ok\n"); - - group_order_tests(group); - - if (!(P_384 = EC_GROUP_new(EC_GROUP_method_of(group)))) - ABORT; - if (!EC_GROUP_copy(P_384, group)) - ABORT; - - /* Curve P-521 (FIPS PUB 186-2, App. 6) */ - - if (!BN_hex2bn(&p, "1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" - "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" - "FFFFFFFFFFFFFFFFFFFFFFFFFFFF")) - ABORT; - if (1 != BN_is_prime_ex(p, BN_prime_checks, ctx, NULL)) - ABORT; - if (!BN_hex2bn(&a, "1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" - "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" - "FFFFFFFFFFFFFFFFFFFFFFFFFFFC")) - ABORT; - if (!BN_hex2bn(&b, "051953EB9618E1C9A1F929A21A0B68540EEA2DA725B99B" - "315F3B8B489918EF109E156193951EC7E937B1652C0BD3BB1BF073573" - "DF883D2C34F1EF451FD46B503F00")) - ABORT; - if (!EC_GROUP_set_curve_GFp(group, p, a, b, ctx)) - ABORT; - - if (!BN_hex2bn(&x, "C6858E06B70404E9CD9E3ECB662395B4429C648139053F" - "B521F828AF606B4D3DBAA14B5E77EFE75928FE1DC127A2FFA8DE3348B" - "3C1856A429BF97E7E31C2E5BD66")) - ABORT; - if (!EC_POINT_set_compressed_coordinates_GFp(group, P, x, 0, ctx)) - ABORT; - if (EC_POINT_is_on_curve(group, P, ctx) <= 0) - ABORT; - if (!BN_hex2bn(&z, "1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" - "FFFFFFFFFFFFFFFFFFFFA51868783BF2F966B7FCC0148F709A5D03BB5" - "C9B8899C47AEBB6FB71E91386409")) - ABORT; - if (!EC_GROUP_set_generator(group, P, z, BN_value_one())) - ABORT; - - if (!EC_POINT_get_affine_coordinates_GFp(group, P, x, y, ctx)) - ABORT; - fprintf(stdout, "\nNIST curve P-521 -- Generator:\n x = 0x"); - BN_print_fp(stdout, x); - fprintf(stdout, "\n y = 0x"); - BN_print_fp(stdout, y); - fprintf(stdout, "\n"); - /* G_y value taken from the standard: */ - if (!BN_hex2bn(&z, "11839296A789A3BC0045C8A5FB42C7D1BD998F54449579" - "B446817AFBD17273E662C97EE72995EF42640C550B9013FAD0761353C" - "7086A272C24088BE94769FD16650")) - ABORT; - if (0 != BN_cmp(y, z)) - ABORT; - - fprintf(stdout, "verify degree ..."); - if (EC_GROUP_get_degree(group) != 521) - ABORT; - fprintf(stdout, " ok\n"); - - group_order_tests(group); - - if (!(P_521 = EC_GROUP_new(EC_GROUP_method_of(group)))) - ABORT; - if (!EC_GROUP_copy(P_521, group)) - ABORT; - - /* more tests using the last curve */ - - if (!EC_POINT_copy(Q, P)) - ABORT; - if (EC_POINT_is_at_infinity(group, Q)) - ABORT; - if (!EC_POINT_dbl(group, P, P, ctx)) - ABORT; - if (EC_POINT_is_on_curve(group, P, ctx) <= 0) - ABORT; - if (!EC_POINT_invert(group, Q, ctx)) - ABORT; /* P = -2Q */ - - if (!EC_POINT_add(group, R, P, Q, ctx)) - ABORT; - if (!EC_POINT_add(group, R, R, Q, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, R)) - ABORT; /* R = P + 2Q */ - - { - const EC_POINT *points[4]; - const BIGNUM *scalars[4]; - BIGNUM scalar3; - - if (EC_POINT_is_at_infinity(group, Q)) - ABORT; - points[0] = Q; - points[1] = Q; - points[2] = Q; - points[3] = Q; - - if (!EC_GROUP_get_order(group, z, ctx)) - ABORT; - if (!BN_add(y, z, BN_value_one())) - ABORT; - if (BN_is_odd(y)) - ABORT; - if (!BN_rshift1(y, y)) - ABORT; - scalars[0] = y; /* (group order + 1)/2, so y*Q + y*Q = Q */ - scalars[1] = y; - - fprintf(stdout, "combined multiplication ..."); - fflush(stdout); - - /* z is still the group order */ - if (!EC_POINTs_mul(group, P, NULL, 2, points, scalars, ctx)) - ABORT; - if (!EC_POINTs_mul(group, R, z, 2, points, scalars, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, P, R, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, R, Q, ctx)) - ABORT; - - fprintf(stdout, "."); - fflush(stdout); - - if (!BN_pseudo_rand(y, BN_num_bits(y), 0, 0)) - ABORT; - if (!BN_add(z, z, y)) - ABORT; - BN_set_negative(z, 1); - scalars[0] = y; - scalars[1] = z; /* z = -(order + y) */ - - if (!EC_POINTs_mul(group, P, NULL, 2, points, scalars, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, P)) - ABORT; - - fprintf(stdout, "."); - fflush(stdout); - - if (!BN_pseudo_rand(x, BN_num_bits(y) - 1, 0, 0)) - ABORT; - if (!BN_add(z, x, y)) - ABORT; - BN_set_negative(z, 1); - scalars[0] = x; - scalars[1] = y; - scalars[2] = z; /* z = -(x+y) */ - - BN_init(&scalar3); - BN_zero(&scalar3); - scalars[3] = &scalar3; - - if (!EC_POINTs_mul(group, P, NULL, 4, points, scalars, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, P)) - ABORT; - - fprintf(stdout, " ok\n\n"); - - BN_free(&scalar3); - } - -# if 0 - timings(P_160, TIMING_BASE_PT, ctx); - timings(P_160, TIMING_RAND_PT, ctx); - timings(P_160, TIMING_SIMUL, ctx); - timings(P_192, TIMING_BASE_PT, ctx); - timings(P_192, TIMING_RAND_PT, ctx); - timings(P_192, TIMING_SIMUL, ctx); - timings(P_224, TIMING_BASE_PT, ctx); - timings(P_224, TIMING_RAND_PT, ctx); - timings(P_224, TIMING_SIMUL, ctx); - timings(P_256, TIMING_BASE_PT, ctx); - timings(P_256, TIMING_RAND_PT, ctx); - timings(P_256, TIMING_SIMUL, ctx); - timings(P_384, TIMING_BASE_PT, ctx); - timings(P_384, TIMING_RAND_PT, ctx); - timings(P_384, TIMING_SIMUL, ctx); - timings(P_521, TIMING_BASE_PT, ctx); - timings(P_521, TIMING_RAND_PT, ctx); - timings(P_521, TIMING_SIMUL, ctx); -# endif - - if (ctx) - BN_CTX_free(ctx); - BN_free(p); - BN_free(a); - BN_free(b); - EC_GROUP_free(group); - EC_POINT_free(P); - EC_POINT_free(Q); - EC_POINT_free(R); - BN_free(x); - BN_free(y); - BN_free(z); - - if (P_160) - EC_GROUP_free(P_160); - if (P_192) - EC_GROUP_free(P_192); - if (P_224) - EC_GROUP_free(P_224); - if (P_256) - EC_GROUP_free(P_256); - if (P_384) - EC_GROUP_free(P_384); - if (P_521) - EC_GROUP_free(P_521); - -} - -/* Change test based on whether binary point compression is enabled or not. */ -# ifdef OPENSSL_EC_BIN_PT_COMP -# define CHAR2_CURVE_TEST_INTERNAL(_name, _p, _a, _b, _x, _y, _y_bit, _order, _cof, _degree, _variable) \ - if (!BN_hex2bn(&x, _x)) ABORT; \ - if (!EC_POINT_set_compressed_coordinates_GF2m(group, P, x, _y_bit, ctx)) ABORT; \ - if (EC_POINT_is_on_curve(group, P, ctx) <= 0) ABORT; \ - if (!BN_hex2bn(&z, _order)) ABORT; \ - if (!BN_hex2bn(&cof, _cof)) ABORT; \ - if (!EC_GROUP_set_generator(group, P, z, cof)) ABORT; \ - if (!EC_POINT_get_affine_coordinates_GF2m(group, P, x, y, ctx)) ABORT; \ - fprintf(stdout, "\n%s -- Generator:\n x = 0x", _name); \ - BN_print_fp(stdout, x); \ - fprintf(stdout, "\n y = 0x"); \ - BN_print_fp(stdout, y); \ - fprintf(stdout, "\n"); \ - /* G_y value taken from the standard: */ \ - if (!BN_hex2bn(&z, _y)) ABORT; \ - if (0 != BN_cmp(y, z)) ABORT; -# else -# define CHAR2_CURVE_TEST_INTERNAL(_name, _p, _a, _b, _x, _y, _y_bit, _order, _cof, _degree, _variable) \ - if (!BN_hex2bn(&x, _x)) ABORT; \ - if (!BN_hex2bn(&y, _y)) ABORT; \ - if (!EC_POINT_set_affine_coordinates_GF2m(group, P, x, y, ctx)) ABORT; \ - if (EC_POINT_is_on_curve(group, P, ctx) <= 0) ABORT; \ - if (!BN_hex2bn(&z, _order)) ABORT; \ - if (!BN_hex2bn(&cof, _cof)) ABORT; \ - if (!EC_GROUP_set_generator(group, P, z, cof)) ABORT; \ - fprintf(stdout, "\n%s -- Generator:\n x = 0x", _name); \ - BN_print_fp(stdout, x); \ - fprintf(stdout, "\n y = 0x"); \ - BN_print_fp(stdout, y); \ - fprintf(stdout, "\n"); -# endif - -# define CHAR2_CURVE_TEST(_name, _p, _a, _b, _x, _y, _y_bit, _order, _cof, _degree, _variable) \ - if (!BN_hex2bn(&p, _p)) ABORT; \ - if (!BN_hex2bn(&a, _a)) ABORT; \ - if (!BN_hex2bn(&b, _b)) ABORT; \ - if (!EC_GROUP_set_curve_GF2m(group, p, a, b, ctx)) ABORT; \ - CHAR2_CURVE_TEST_INTERNAL(_name, _p, _a, _b, _x, _y, _y_bit, _order, _cof, _degree, _variable) \ - fprintf(stdout, "verify degree ..."); \ - if (EC_GROUP_get_degree(group) != _degree) ABORT; \ - fprintf(stdout, " ok\n"); \ - group_order_tests(group); \ - if (!(_variable = EC_GROUP_new(EC_GROUP_method_of(group)))) ABORT; \ - if (!EC_GROUP_copy(_variable, group)) ABORT; \ - -# ifndef OPENSSL_NO_EC2M - -static void char2_field_tests(void) -{ - BN_CTX *ctx = NULL; - BIGNUM *p, *a, *b; - EC_GROUP *group; - EC_GROUP *C2_K163 = NULL, *C2_K233 = NULL, *C2_K283 = NULL, *C2_K409 = - NULL, *C2_K571 = NULL; - EC_GROUP *C2_B163 = NULL, *C2_B233 = NULL, *C2_B283 = NULL, *C2_B409 = - NULL, *C2_B571 = NULL; - EC_POINT *P, *Q, *R; - BIGNUM *x, *y, *z, *cof; - unsigned char buf[100]; - size_t i, len; - int k; - -# if 1 /* optional */ - ctx = BN_CTX_new(); - if (!ctx) - ABORT; -# endif - - p = BN_new(); - a = BN_new(); - b = BN_new(); - if (!p || !a || !b) - ABORT; - - if (!BN_hex2bn(&p, "13")) - ABORT; - if (!BN_hex2bn(&a, "3")) - ABORT; - if (!BN_hex2bn(&b, "1")) - ABORT; - - group = EC_GROUP_new(EC_GF2m_simple_method()); /* applications should use - * EC_GROUP_new_curve_GF2m - * so that the library gets - * to choose the EC_METHOD */ - if (!group) - ABORT; - if (!EC_GROUP_set_curve_GF2m(group, p, a, b, ctx)) - ABORT; - - { - EC_GROUP *tmp; - tmp = EC_GROUP_new(EC_GROUP_method_of(group)); - if (!tmp) - ABORT; - if (!EC_GROUP_copy(tmp, group)) - ABORT; - EC_GROUP_free(group); - group = tmp; - } - - if (!EC_GROUP_get_curve_GF2m(group, p, a, b, ctx)) - ABORT; - - fprintf(stdout, - "Curve defined by Weierstrass equation\n y^2 + x*y = x^3 + a*x^2 + b (mod 0x"); - BN_print_fp(stdout, p); - fprintf(stdout, ")\n a = 0x"); - BN_print_fp(stdout, a); - fprintf(stdout, "\n b = 0x"); - BN_print_fp(stdout, b); - fprintf(stdout, "\n(0x... means binary polynomial)\n"); - - P = EC_POINT_new(group); - Q = EC_POINT_new(group); - R = EC_POINT_new(group); - if (!P || !Q || !R) - ABORT; - - if (!EC_POINT_set_to_infinity(group, P)) - ABORT; - if (!EC_POINT_is_at_infinity(group, P)) - ABORT; - - buf[0] = 0; - if (!EC_POINT_oct2point(group, Q, buf, 1, ctx)) - ABORT; - - if (!EC_POINT_add(group, P, P, Q, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, P)) - ABORT; - - x = BN_new(); - y = BN_new(); - z = BN_new(); - cof = BN_new(); - if (!x || !y || !z || !cof) - ABORT; - - if (!BN_hex2bn(&x, "6")) - ABORT; -/* Change test based on whether binary point compression is enabled or not. */ -# ifdef OPENSSL_EC_BIN_PT_COMP - if (!EC_POINT_set_compressed_coordinates_GF2m(group, Q, x, 1, ctx)) - ABORT; -# else - if (!BN_hex2bn(&y, "8")) - ABORT; - if (!EC_POINT_set_affine_coordinates_GF2m(group, Q, x, y, ctx)) - ABORT; -# endif - if (EC_POINT_is_on_curve(group, Q, ctx) <= 0) { -/* Change test based on whether binary point compression is enabled or not. */ -# ifdef OPENSSL_EC_BIN_PT_COMP - if (!EC_POINT_get_affine_coordinates_GF2m(group, Q, x, y, ctx)) - ABORT; -# endif - fprintf(stderr, "Point is not on curve: x = 0x"); - BN_print_fp(stderr, x); - fprintf(stderr, ", y = 0x"); - BN_print_fp(stderr, y); - fprintf(stderr, "\n"); - ABORT; - } - - fprintf(stdout, "A cyclic subgroup:\n"); - k = 100; - do { - if (k-- == 0) - ABORT; - - if (EC_POINT_is_at_infinity(group, P)) - fprintf(stdout, " point at infinity\n"); - else { - if (!EC_POINT_get_affine_coordinates_GF2m(group, P, x, y, ctx)) - ABORT; - - fprintf(stdout, " x = 0x"); - BN_print_fp(stdout, x); - fprintf(stdout, ", y = 0x"); - BN_print_fp(stdout, y); - fprintf(stdout, "\n"); - } - - if (!EC_POINT_copy(R, P)) - ABORT; - if (!EC_POINT_add(group, P, P, Q, ctx)) - ABORT; - } - while (!EC_POINT_is_at_infinity(group, P)); - - if (!EC_POINT_add(group, P, Q, R, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, P)) - ABORT; - -/* Change test based on whether binary point compression is enabled or not. */ -# ifdef OPENSSL_EC_BIN_PT_COMP - len = - EC_POINT_point2oct(group, Q, POINT_CONVERSION_COMPRESSED, buf, - sizeof buf, ctx); - if (len == 0) - ABORT; - if (!EC_POINT_oct2point(group, P, buf, len, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, P, Q, ctx)) - ABORT; - fprintf(stdout, "Generator as octet string, compressed form:\n "); - for (i = 0; i < len; i++) - fprintf(stdout, "%02X", buf[i]); -# endif - - len = - EC_POINT_point2oct(group, Q, POINT_CONVERSION_UNCOMPRESSED, buf, - sizeof buf, ctx); - if (len == 0) - ABORT; - if (!EC_POINT_oct2point(group, P, buf, len, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, P, Q, ctx)) - ABORT; - fprintf(stdout, "\nGenerator as octet string, uncompressed form:\n "); - for (i = 0; i < len; i++) - fprintf(stdout, "%02X", buf[i]); - -/* Change test based on whether binary point compression is enabled or not. */ -# ifdef OPENSSL_EC_BIN_PT_COMP - len = - EC_POINT_point2oct(group, Q, POINT_CONVERSION_HYBRID, buf, sizeof buf, - ctx); - if (len == 0) - ABORT; - if (!EC_POINT_oct2point(group, P, buf, len, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, P, Q, ctx)) - ABORT; - fprintf(stdout, "\nGenerator as octet string, hybrid form:\n "); - for (i = 0; i < len; i++) - fprintf(stdout, "%02X", buf[i]); -# endif - - fprintf(stdout, "\n"); - - if (!EC_POINT_invert(group, P, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, P, R, ctx)) - ABORT; - - /* Curve K-163 (FIPS PUB 186-2, App. 6) */ - CHAR2_CURVE_TEST - ("NIST curve K-163", - "0800000000000000000000000000000000000000C9", - "1", - "1", - "02FE13C0537BBC11ACAA07D793DE4E6D5E5C94EEE8", - "0289070FB05D38FF58321F2E800536D538CCDAA3D9", - 1, "04000000000000000000020108A2E0CC0D99F8A5EF", "2", 163, C2_K163); - - /* Curve B-163 (FIPS PUB 186-2, App. 6) */ - CHAR2_CURVE_TEST - ("NIST curve B-163", - "0800000000000000000000000000000000000000C9", - "1", - "020A601907B8C953CA1481EB10512F78744A3205FD", - "03F0EBA16286A2D57EA0991168D4994637E8343E36", - "00D51FBC6C71A0094FA2CDD545B11C5C0C797324F1", - 1, "040000000000000000000292FE77E70C12A4234C33", "2", 163, C2_B163); - - /* Curve K-233 (FIPS PUB 186-2, App. 6) */ - CHAR2_CURVE_TEST - ("NIST curve K-233", - "020000000000000000000000000000000000000004000000000000000001", - "0", - "1", - "017232BA853A7E731AF129F22FF4149563A419C26BF50A4C9D6EEFAD6126", - "01DB537DECE819B7F70F555A67C427A8CD9BF18AEB9B56E0C11056FAE6A3", - 0, - "008000000000000000000000000000069D5BB915BCD46EFB1AD5F173ABDF", - "4", 233, C2_K233); - - /* Curve B-233 (FIPS PUB 186-2, App. 6) */ - CHAR2_CURVE_TEST - ("NIST curve B-233", - "020000000000000000000000000000000000000004000000000000000001", - "000000000000000000000000000000000000000000000000000000000001", - "0066647EDE6C332C7F8C0923BB58213B333B20E9CE4281FE115F7D8F90AD", - "00FAC9DFCBAC8313BB2139F1BB755FEF65BC391F8B36F8F8EB7371FD558B", - "01006A08A41903350678E58528BEBF8A0BEFF867A7CA36716F7E01F81052", - 1, - "01000000000000000000000000000013E974E72F8A6922031D2603CFE0D7", - "2", 233, C2_B233); - - /* Curve K-283 (FIPS PUB 186-2, App. 6) */ - CHAR2_CURVE_TEST - ("NIST curve K-283", - "0800000000000000000000000000000000000000000000000000000000000000000010A1", - "0", - "1", - "0503213F78CA44883F1A3B8162F188E553CD265F23C1567A16876913B0C2AC2458492836", - "01CCDA380F1C9E318D90F95D07E5426FE87E45C0E8184698E45962364E34116177DD2259", - 0, - "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE9AE2ED07577265DFF7F94451E061E163C61", - "4", 283, C2_K283); - - /* Curve B-283 (FIPS PUB 186-2, App. 6) */ - CHAR2_CURVE_TEST - ("NIST curve B-283", - "0800000000000000000000000000000000000000000000000000000000000000000010A1", - "000000000000000000000000000000000000000000000000000000000000000000000001", - "027B680AC8B8596DA5A4AF8A19A0303FCA97FD7645309FA2A581485AF6263E313B79A2F5", - "05F939258DB7DD90E1934F8C70B0DFEC2EED25B8557EAC9C80E2E198F8CDBECD86B12053", - "03676854FE24141CB98FE6D4B20D02B4516FF702350EDDB0826779C813F0DF45BE8112F4", - 1, - "03FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEF90399660FC938A90165B042A7CEFADB307", - "2", 283, C2_B283); - - /* Curve K-409 (FIPS PUB 186-2, App. 6) */ - CHAR2_CURVE_TEST - ("NIST curve K-409", - "02000000000000000000000000000000000000000000000000000000000000000000000000000000008000000000000000000001", - "0", - "1", - "0060F05F658F49C1AD3AB1890F7184210EFD0987E307C84C27ACCFB8F9F67CC2C460189EB5AAAA62EE222EB1B35540CFE9023746", - "01E369050B7C4E42ACBA1DACBF04299C3460782F918EA427E6325165E9EA10E3DA5F6C42E9C55215AA9CA27A5863EC48D8E0286B", - 1, - "007FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE5F83B2D4EA20400EC4557D5ED3E3E7CA5B4B5C83B8E01E5FCF", - "4", 409, C2_K409); - - /* Curve B-409 (FIPS PUB 186-2, App. 6) */ - CHAR2_CURVE_TEST - ("NIST curve B-409", - "02000000000000000000000000000000000000000000000000000000000000000000000000000000008000000000000000000001", - "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001", - "0021A5C2C8EE9FEB5C4B9A753B7B476B7FD6422EF1F3DD674761FA99D6AC27C8A9A197B272822F6CD57A55AA4F50AE317B13545F", - "015D4860D088DDB3496B0C6064756260441CDE4AF1771D4DB01FFE5B34E59703DC255A868A1180515603AEAB60794E54BB7996A7", - "0061B1CFAB6BE5F32BBFA78324ED106A7636B9C5A7BD198D0158AA4F5488D08F38514F1FDF4B4F40D2181B3681C364BA0273C706", - 1, - "010000000000000000000000000000000000000000000000000001E2AAD6A612F33307BE5FA47C3C9E052F838164CD37D9A21173", - "2", 409, C2_B409); - - /* Curve K-571 (FIPS PUB 186-2, App. 6) */ - CHAR2_CURVE_TEST - ("NIST curve K-571", - "80000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000425", - "0", - "1", - "026EB7A859923FBC82189631F8103FE4AC9CA2970012D5D46024804801841CA44370958493B205E647DA304DB4CEB08CBBD1BA39494776FB988B47174DCA88C7E2945283A01C8972", - "0349DC807F4FBF374F4AEADE3BCA95314DD58CEC9F307A54FFC61EFC006D8A2C9D4979C0AC44AEA74FBEBBB9F772AEDCB620B01A7BA7AF1B320430C8591984F601CD4C143EF1C7A3", - 0, - "020000000000000000000000000000000000000000000000000000000000000000000000131850E1F19A63E4B391A8DB917F4138B630D84BE5D639381E91DEB45CFE778F637C1001", - "4", 571, C2_K571); - - /* Curve B-571 (FIPS PUB 186-2, App. 6) */ - CHAR2_CURVE_TEST - ("NIST curve B-571", - "80000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000425", - "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001", - "02F40E7E2221F295DE297117B7F3D62F5C6A97FFCB8CEFF1CD6BA8CE4A9A18AD84FFABBD8EFA59332BE7AD6756A66E294AFD185A78FF12AA520E4DE739BACA0C7FFEFF7F2955727A", - "0303001D34B856296C16C0D40D3CD7750A93D1D2955FA80AA5F40FC8DB7B2ABDBDE53950F4C0D293CDD711A35B67FB1499AE60038614F1394ABFA3B4C850D927E1E7769C8EEC2D19", - "037BF27342DA639B6DCCFFFEB73D69D78C6C27A6009CBBCA1980F8533921E8A684423E43BAB08A576291AF8F461BB2A8B3531D2F0485C19B16E2F1516E23DD3C1A4827AF1B8AC15B", - 1, - "03FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE661CE18FF55987308059B186823851EC7DD9CA1161DE93D5174D66E8382E9BB2FE84E47", - "2", 571, C2_B571); - - /* more tests using the last curve */ - - if (!EC_POINT_copy(Q, P)) - ABORT; - if (EC_POINT_is_at_infinity(group, Q)) - ABORT; - if (!EC_POINT_dbl(group, P, P, ctx)) - ABORT; - if (EC_POINT_is_on_curve(group, P, ctx) <= 0) - ABORT; - if (!EC_POINT_invert(group, Q, ctx)) - ABORT; /* P = -2Q */ - - if (!EC_POINT_add(group, R, P, Q, ctx)) - ABORT; - if (!EC_POINT_add(group, R, R, Q, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, R)) - ABORT; /* R = P + 2Q */ - - { - const EC_POINT *points[3]; - const BIGNUM *scalars[3]; - - if (EC_POINT_is_at_infinity(group, Q)) - ABORT; - points[0] = Q; - points[1] = Q; - points[2] = Q; - - if (!BN_add(y, z, BN_value_one())) - ABORT; - if (BN_is_odd(y)) - ABORT; - if (!BN_rshift1(y, y)) - ABORT; - scalars[0] = y; /* (group order + 1)/2, so y*Q + y*Q = Q */ - scalars[1] = y; - - fprintf(stdout, "combined multiplication ..."); - fflush(stdout); - - /* z is still the group order */ - if (!EC_POINTs_mul(group, P, NULL, 2, points, scalars, ctx)) - ABORT; - if (!EC_POINTs_mul(group, R, z, 2, points, scalars, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, P, R, ctx)) - ABORT; - if (0 != EC_POINT_cmp(group, R, Q, ctx)) - ABORT; - - fprintf(stdout, "."); - fflush(stdout); - - if (!BN_pseudo_rand(y, BN_num_bits(y), 0, 0)) - ABORT; - if (!BN_add(z, z, y)) - ABORT; - BN_set_negative(z, 1); - scalars[0] = y; - scalars[1] = z; /* z = -(order + y) */ - - if (!EC_POINTs_mul(group, P, NULL, 2, points, scalars, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, P)) - ABORT; - - fprintf(stdout, "."); - fflush(stdout); - - if (!BN_pseudo_rand(x, BN_num_bits(y) - 1, 0, 0)) - ABORT; - if (!BN_add(z, x, y)) - ABORT; - BN_set_negative(z, 1); - scalars[0] = x; - scalars[1] = y; - scalars[2] = z; /* z = -(x+y) */ - - if (!EC_POINTs_mul(group, P, NULL, 3, points, scalars, ctx)) - ABORT; - if (!EC_POINT_is_at_infinity(group, P)) - ABORT; - - fprintf(stdout, " ok\n\n"); - } - -# if 0 - timings(C2_K163, TIMING_BASE_PT, ctx); - timings(C2_K163, TIMING_RAND_PT, ctx); - timings(C2_K163, TIMING_SIMUL, ctx); - timings(C2_B163, TIMING_BASE_PT, ctx); - timings(C2_B163, TIMING_RAND_PT, ctx); - timings(C2_B163, TIMING_SIMUL, ctx); - timings(C2_K233, TIMING_BASE_PT, ctx); - timings(C2_K233, TIMING_RAND_PT, ctx); - timings(C2_K233, TIMING_SIMUL, ctx); - timings(C2_B233, TIMING_BASE_PT, ctx); - timings(C2_B233, TIMING_RAND_PT, ctx); - timings(C2_B233, TIMING_SIMUL, ctx); - timings(C2_K283, TIMING_BASE_PT, ctx); - timings(C2_K283, TIMING_RAND_PT, ctx); - timings(C2_K283, TIMING_SIMUL, ctx); - timings(C2_B283, TIMING_BASE_PT, ctx); - timings(C2_B283, TIMING_RAND_PT, ctx); - timings(C2_B283, TIMING_SIMUL, ctx); - timings(C2_K409, TIMING_BASE_PT, ctx); - timings(C2_K409, TIMING_RAND_PT, ctx); - timings(C2_K409, TIMING_SIMUL, ctx); - timings(C2_B409, TIMING_BASE_PT, ctx); - timings(C2_B409, TIMING_RAND_PT, ctx); - timings(C2_B409, TIMING_SIMUL, ctx); - timings(C2_K571, TIMING_BASE_PT, ctx); - timings(C2_K571, TIMING_RAND_PT, ctx); - timings(C2_K571, TIMING_SIMUL, ctx); - timings(C2_B571, TIMING_BASE_PT, ctx); - timings(C2_B571, TIMING_RAND_PT, ctx); - timings(C2_B571, TIMING_SIMUL, ctx); -# endif - - if (ctx) - BN_CTX_free(ctx); - BN_free(p); - BN_free(a); - BN_free(b); - EC_GROUP_free(group); - EC_POINT_free(P); - EC_POINT_free(Q); - EC_POINT_free(R); - BN_free(x); - BN_free(y); - BN_free(z); - BN_free(cof); - - if (C2_K163) - EC_GROUP_free(C2_K163); - if (C2_B163) - EC_GROUP_free(C2_B163); - if (C2_K233) - EC_GROUP_free(C2_K233); - if (C2_B233) - EC_GROUP_free(C2_B233); - if (C2_K283) - EC_GROUP_free(C2_K283); - if (C2_B283) - EC_GROUP_free(C2_B283); - if (C2_K409) - EC_GROUP_free(C2_K409); - if (C2_B409) - EC_GROUP_free(C2_B409); - if (C2_K571) - EC_GROUP_free(C2_K571); - if (C2_B571) - EC_GROUP_free(C2_B571); - -} -# endif - -static void internal_curve_test(void) -{ - EC_builtin_curve *curves = NULL; - size_t crv_len = 0, n = 0; - int ok = 1; - - crv_len = EC_get_builtin_curves(NULL, 0); - - curves = OPENSSL_malloc(sizeof(EC_builtin_curve) * crv_len); - - if (curves == NULL) - return; - - if (!EC_get_builtin_curves(curves, crv_len)) { - OPENSSL_free(curves); - return; - } - - fprintf(stdout, "testing internal curves: "); - - for (n = 0; n < crv_len; n++) { - EC_GROUP *group = NULL; - int nid = curves[n].nid; - if ((group = EC_GROUP_new_by_curve_name(nid)) == NULL) { - ok = 0; - fprintf(stdout, "\nEC_GROUP_new_curve_name() failed with" - " curve %s\n", OBJ_nid2sn(nid)); - /* try next curve */ - continue; - } - if (!EC_GROUP_check(group, NULL)) { - ok = 0; - fprintf(stdout, "\nEC_GROUP_check() failed with" - " curve %s\n", OBJ_nid2sn(nid)); - EC_GROUP_free(group); - /* try the next curve */ - continue; - } - fprintf(stdout, "."); - fflush(stdout); - EC_GROUP_free(group); - } - if (ok) - fprintf(stdout, " ok\n\n"); - else { - fprintf(stdout, " failed\n\n"); - ABORT; - } - OPENSSL_free(curves); - return; -} - -# ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 -/* - * nistp_test_params contains magic numbers for testing our optimized - * implementations of several NIST curves with characteristic > 3. - */ -struct nistp_test_params { - const EC_METHOD *(*meth) (); - int degree; - /* - * Qx, Qy and D are taken from - * http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/ECDSA_Prime.pdf - * Otherwise, values are standard curve parameters from FIPS 180-3 - */ - const char *p, *a, *b, *Qx, *Qy, *Gx, *Gy, *order, *d; -}; - -static const struct nistp_test_params nistp_tests_params[] = { - { - /* P-224 */ - EC_GFp_nistp224_method, - 224, - /* p */ - "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000000000000000000000001", - /* a */ - "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFE", - /* b */ - "B4050A850C04B3ABF54132565044B0B7D7BFD8BA270B39432355FFB4", - /* Qx */ - "E84FB0B8E7000CB657D7973CF6B42ED78B301674276DF744AF130B3E", - /* Qy */ - "4376675C6FC5612C21A0FF2D2A89D2987DF7A2BC52183B5982298555", - /* Gx */ - "B70E0CBD6BB4BF7F321390B94A03C1D356C21122343280D6115C1D21", - /* Gy */ - "BD376388B5F723FB4C22DFE6CD4375A05A07476444D5819985007E34", - /* order */ - "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3D", - /* d */ - "3F0C488E987C80BE0FEE521F8D90BE6034EC69AE11CA72AA777481E8", - }, - { - /* P-256 */ - EC_GFp_nistp256_method, - 256, - /* p */ - "ffffffff00000001000000000000000000000000ffffffffffffffffffffffff", - /* a */ - "ffffffff00000001000000000000000000000000fffffffffffffffffffffffc", - /* b */ - "5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", - /* Qx */ - "b7e08afdfe94bad3f1dc8c734798ba1c62b3a0ad1e9ea2a38201cd0889bc7a19", - /* Qy */ - "3603f747959dbf7a4bb226e41928729063adc7ae43529e61b563bbc606cc5e09", - /* Gx */ - "6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", - /* Gy */ - "4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", - /* order */ - "ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551", - /* d */ - "c477f9f65c22cce20657faa5b2d1d8122336f851a508a1ed04e479c34985bf96", - }, - { - /* P-521 */ - EC_GFp_nistp521_method, - 521, - /* p */ - "1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", - /* a */ - "1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffc", - /* b */ - "051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00", - /* Qx */ - "0098e91eef9a68452822309c52fab453f5f117c1da8ed796b255e9ab8f6410cca16e59df403a6bdc6ca467a37056b1e54b3005d8ac030decfeb68df18b171885d5c4", - /* Qy */ - "0164350c321aecfc1cca1ba4364c9b15656150b4b78d6a48d7d28e7f31985ef17be8554376b72900712c4b83ad668327231526e313f5f092999a4632fd50d946bc2e", - /* Gx */ - "c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66", - /* Gy */ - "11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650", - /* order */ - "1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409", - /* d */ - "0100085f47b8e1b8b11b7eb33028c0b2888e304bfc98501955b45bba1478dc184eeedf09b86a5f7c21994406072787205e69a63709fe35aa93ba333514b24f961722", - }, -}; - -static void nistp_single_test(const struct nistp_test_params *test) -{ - BN_CTX *ctx; - BIGNUM *p, *a, *b, *x, *y, *n, *m, *order; - EC_GROUP *NISTP; - EC_POINT *G, *P, *Q, *Q_CHECK; - - fprintf(stdout, "\nNIST curve P-%d (optimised implementation):\n", - test->degree); - ctx = BN_CTX_new(); - p = BN_new(); - a = BN_new(); - b = BN_new(); - x = BN_new(); - y = BN_new(); - m = BN_new(); - n = BN_new(); - order = BN_new(); - - NISTP = EC_GROUP_new(test->meth()); - if (!NISTP) - ABORT; - if (!BN_hex2bn(&p, test->p)) - ABORT; - if (1 != BN_is_prime_ex(p, BN_prime_checks, ctx, NULL)) - ABORT; - if (!BN_hex2bn(&a, test->a)) - ABORT; - if (!BN_hex2bn(&b, test->b)) - ABORT; - if (!EC_GROUP_set_curve_GFp(NISTP, p, a, b, ctx)) - ABORT; - G = EC_POINT_new(NISTP); - P = EC_POINT_new(NISTP); - Q = EC_POINT_new(NISTP); - Q_CHECK = EC_POINT_new(NISTP); - if (!BN_hex2bn(&x, test->Qx)) - ABORT; - if (!BN_hex2bn(&y, test->Qy)) - ABORT; - if (!EC_POINT_set_affine_coordinates_GFp(NISTP, Q_CHECK, x, y, ctx)) - ABORT; - if (!BN_hex2bn(&x, test->Gx)) - ABORT; - if (!BN_hex2bn(&y, test->Gy)) - ABORT; - if (!EC_POINT_set_affine_coordinates_GFp(NISTP, G, x, y, ctx)) - ABORT; - if (!BN_hex2bn(&order, test->order)) - ABORT; - if (!EC_GROUP_set_generator(NISTP, G, order, BN_value_one())) - ABORT; - - fprintf(stdout, "verify degree ... "); - if (EC_GROUP_get_degree(NISTP) != test->degree) - ABORT; - fprintf(stdout, "ok\n"); - - fprintf(stdout, "NIST test vectors ... "); - if (!BN_hex2bn(&n, test->d)) - ABORT; - /* fixed point multiplication */ - EC_POINT_mul(NISTP, Q, n, NULL, NULL, ctx); - if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) - ABORT; - /* random point multiplication */ - EC_POINT_mul(NISTP, Q, NULL, G, n, ctx); - if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) - ABORT; - - /* set generator to P = 2*G, where G is the standard generator */ - if (!EC_POINT_dbl(NISTP, P, G, ctx)) - ABORT; - if (!EC_GROUP_set_generator(NISTP, P, order, BN_value_one())) - ABORT; - /* set the scalar to m=n/2, where n is the NIST test scalar */ - if (!BN_rshift(m, n, 1)) - ABORT; - - /* test the non-standard generator */ - /* fixed point multiplication */ - EC_POINT_mul(NISTP, Q, m, NULL, NULL, ctx); - if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) - ABORT; - /* random point multiplication */ - EC_POINT_mul(NISTP, Q, NULL, P, m, ctx); - if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) - ABORT; - - /* now repeat all tests with precomputation */ - if (!EC_GROUP_precompute_mult(NISTP, ctx)) - ABORT; - - /* fixed point multiplication */ - EC_POINT_mul(NISTP, Q, m, NULL, NULL, ctx); - if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) - ABORT; - /* random point multiplication */ - EC_POINT_mul(NISTP, Q, NULL, P, m, ctx); - if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) - ABORT; - - /* reset generator */ - if (!EC_GROUP_set_generator(NISTP, G, order, BN_value_one())) - ABORT; - /* fixed point multiplication */ - EC_POINT_mul(NISTP, Q, n, NULL, NULL, ctx); - if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) - ABORT; - /* random point multiplication */ - EC_POINT_mul(NISTP, Q, NULL, G, n, ctx); - if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx)) - ABORT; - - fprintf(stdout, "ok\n"); - group_order_tests(NISTP); -# if 0 - timings(NISTP, TIMING_BASE_PT, ctx); - timings(NISTP, TIMING_RAND_PT, ctx); -# endif - EC_GROUP_free(NISTP); - EC_POINT_free(G); - EC_POINT_free(P); - EC_POINT_free(Q); - EC_POINT_free(Q_CHECK); - BN_free(n); - BN_free(m); - BN_free(p); - BN_free(a); - BN_free(b); - BN_free(x); - BN_free(y); - BN_free(order); - BN_CTX_free(ctx); -} - -static void nistp_tests() -{ - unsigned i; - - for (i = 0; - i < sizeof(nistp_tests_params) / sizeof(struct nistp_test_params); - i++) { - nistp_single_test(&nistp_tests_params[i]); - } -} -# endif - -static const char rnd_seed[] = - "string to make the random number generator think it has entropy"; - -int main(int argc, char *argv[]) -{ - - /* enable memory leak checking unless explicitly disabled */ - if (!((getenv("OPENSSL_DEBUG_MEMORY") != NULL) - && (0 == strcmp(getenv("OPENSSL_DEBUG_MEMORY"), "off")))) { - CRYPTO_malloc_debug_init(); - CRYPTO_set_mem_debug_options(V_CRYPTO_MDEBUG_ALL); - } else { - /* OPENSSL_DEBUG_MEMORY=off */ - CRYPTO_set_mem_debug_functions(0, 0, 0, 0, 0); - } - CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON); - ERR_load_crypto_strings(); - - RAND_seed(rnd_seed, sizeof rnd_seed); /* or BN_generate_prime may fail */ - - prime_field_tests(); - puts(""); -# ifndef OPENSSL_NO_EC2M - char2_field_tests(); -# endif -# ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 - nistp_tests(); -# endif - /* test the internal curves */ - internal_curve_test(); - -# ifndef OPENSSL_NO_ENGINE - ENGINE_cleanup(); -# endif - CRYPTO_cleanup_all_ex_data(); - ERR_free_strings(); - ERR_remove_thread_state(NULL); - CRYPTO_mem_leaks_fp(stderr); - - return 0; -} -#endif diff --git a/drivers/builtin_openssl2/crypto/ecdh/ecdhtest.c b/drivers/builtin_openssl2/crypto/ecdh/ecdhtest.c deleted file mode 100644 index 996321d1ee..0000000000 --- a/drivers/builtin_openssl2/crypto/ecdh/ecdhtest.c +++ /dev/null @@ -1,410 +0,0 @@ -/* crypto/ecdh/ecdhtest.c */ -/* ==================================================================== - * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. - * - * The Elliptic Curve Public-Key Crypto Library (ECC Code) included - * herein is developed by SUN MICROSYSTEMS, INC., and is contributed - * to the OpenSSL project. - * - * The ECC Code is licensed pursuant to the OpenSSL open source - * license provided below. - * - * The ECDH software is originally written by Douglas Stebila of - * Sun Microsystems Laboratories. - * - */ -/* ==================================================================== - * Copyright (c) 1998-2003 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - -#include -#include -#include - -#include "../e_os.h" - -#include /* for OPENSSL_NO_ECDH */ -#include -#include -#include -#include -#include -#include -#include - -#ifdef OPENSSL_NO_ECDH -int main(int argc, char *argv[]) -{ - printf("No ECDH support\n"); - return (0); -} -#else -# include -# include - -# ifdef OPENSSL_SYS_WIN16 -# define MS_CALLBACK _far _loadds -# else -# define MS_CALLBACK -# endif - -# if 0 -static void MS_CALLBACK cb(int p, int n, void *arg); -# endif - -static const char rnd_seed[] = - "string to make the random number generator think it has entropy"; - -static const int KDF1_SHA1_len = 20; -static void *KDF1_SHA1(const void *in, size_t inlen, void *out, - size_t *outlen) -{ -# ifndef OPENSSL_NO_SHA - if (*outlen < SHA_DIGEST_LENGTH) - return NULL; - else - *outlen = SHA_DIGEST_LENGTH; - return SHA1(in, inlen, out); -# else - return NULL; -# endif -} - -static int test_ecdh_curve(int nid, const char *text, BN_CTX *ctx, BIO *out) -{ - EC_KEY *a = NULL; - EC_KEY *b = NULL; - BIGNUM *x_a = NULL, *y_a = NULL, *x_b = NULL, *y_b = NULL; - char buf[12]; - unsigned char *abuf = NULL, *bbuf = NULL; - int i, alen, blen, aout, bout, ret = 0; - const EC_GROUP *group; - - a = EC_KEY_new_by_curve_name(nid); - b = EC_KEY_new_by_curve_name(nid); - if (a == NULL || b == NULL) - goto err; - - group = EC_KEY_get0_group(a); - - if ((x_a = BN_new()) == NULL) - goto err; - if ((y_a = BN_new()) == NULL) - goto err; - if ((x_b = BN_new()) == NULL) - goto err; - if ((y_b = BN_new()) == NULL) - goto err; - - BIO_puts(out, "Testing key generation with "); - BIO_puts(out, text); -# ifdef NOISY - BIO_puts(out, "\n"); -# else - (void)BIO_flush(out); -# endif - - if (!EC_KEY_generate_key(a)) - goto err; - - if (EC_METHOD_get_field_type(EC_GROUP_method_of(group)) == - NID_X9_62_prime_field) { - if (!EC_POINT_get_affine_coordinates_GFp - (group, EC_KEY_get0_public_key(a), x_a, y_a, ctx)) - goto err; - } -# ifndef OPENSSL_NO_EC2M - else { - if (!EC_POINT_get_affine_coordinates_GF2m(group, - EC_KEY_get0_public_key(a), - x_a, y_a, ctx)) - goto err; - } -# endif -# ifdef NOISY - BIO_puts(out, " pri 1="); - BN_print(out, a->priv_key); - BIO_puts(out, "\n pub 1="); - BN_print(out, x_a); - BIO_puts(out, ","); - BN_print(out, y_a); - BIO_puts(out, "\n"); -# else - BIO_printf(out, " ."); - (void)BIO_flush(out); -# endif - - if (!EC_KEY_generate_key(b)) - goto err; - - if (EC_METHOD_get_field_type(EC_GROUP_method_of(group)) == - NID_X9_62_prime_field) { - if (!EC_POINT_get_affine_coordinates_GFp - (group, EC_KEY_get0_public_key(b), x_b, y_b, ctx)) - goto err; - } -# ifndef OPENSSL_NO_EC2M - else { - if (!EC_POINT_get_affine_coordinates_GF2m(group, - EC_KEY_get0_public_key(b), - x_b, y_b, ctx)) - goto err; - } -# endif - -# ifdef NOISY - BIO_puts(out, " pri 2="); - BN_print(out, b->priv_key); - BIO_puts(out, "\n pub 2="); - BN_print(out, x_b); - BIO_puts(out, ","); - BN_print(out, y_b); - BIO_puts(out, "\n"); -# else - BIO_printf(out, "."); - (void)BIO_flush(out); -# endif - - alen = KDF1_SHA1_len; - abuf = (unsigned char *)OPENSSL_malloc(alen); - aout = - ECDH_compute_key(abuf, alen, EC_KEY_get0_public_key(b), a, KDF1_SHA1); - -# ifdef NOISY - BIO_puts(out, " key1 ="); - for (i = 0; i < aout; i++) { - sprintf(buf, "%02X", abuf[i]); - BIO_puts(out, buf); - } - BIO_puts(out, "\n"); -# else - BIO_printf(out, "."); - (void)BIO_flush(out); -# endif - - blen = KDF1_SHA1_len; - bbuf = (unsigned char *)OPENSSL_malloc(blen); - bout = - ECDH_compute_key(bbuf, blen, EC_KEY_get0_public_key(a), b, KDF1_SHA1); - -# ifdef NOISY - BIO_puts(out, " key2 ="); - for (i = 0; i < bout; i++) { - sprintf(buf, "%02X", bbuf[i]); - BIO_puts(out, buf); - } - BIO_puts(out, "\n"); -# else - BIO_printf(out, "."); - (void)BIO_flush(out); -# endif - - if ((aout < 4) || (bout != aout) || (memcmp(abuf, bbuf, aout) != 0)) { -# ifndef NOISY - BIO_printf(out, " failed\n\n"); - BIO_printf(out, "key a:\n"); - BIO_printf(out, "private key: "); - BN_print(out, EC_KEY_get0_private_key(a)); - BIO_printf(out, "\n"); - BIO_printf(out, "public key (x,y): "); - BN_print(out, x_a); - BIO_printf(out, ","); - BN_print(out, y_a); - BIO_printf(out, "\nkey b:\n"); - BIO_printf(out, "private key: "); - BN_print(out, EC_KEY_get0_private_key(b)); - BIO_printf(out, "\n"); - BIO_printf(out, "public key (x,y): "); - BN_print(out, x_b); - BIO_printf(out, ","); - BN_print(out, y_b); - BIO_printf(out, "\n"); - BIO_printf(out, "generated key a: "); - for (i = 0; i < bout; i++) { - sprintf(buf, "%02X", bbuf[i]); - BIO_puts(out, buf); - } - BIO_printf(out, "\n"); - BIO_printf(out, "generated key b: "); - for (i = 0; i < aout; i++) { - sprintf(buf, "%02X", abuf[i]); - BIO_puts(out, buf); - } - BIO_printf(out, "\n"); -# endif - fprintf(stderr, "Error in ECDH routines\n"); - ret = 0; - } else { -# ifndef NOISY - BIO_printf(out, " ok\n"); -# endif - ret = 1; - } - err: - ERR_print_errors_fp(stderr); - - if (abuf != NULL) - OPENSSL_free(abuf); - if (bbuf != NULL) - OPENSSL_free(bbuf); - if (x_a) - BN_free(x_a); - if (y_a) - BN_free(y_a); - if (x_b) - BN_free(x_b); - if (y_b) - BN_free(y_b); - if (b) - EC_KEY_free(b); - if (a) - EC_KEY_free(a); - return (ret); -} - -int main(int argc, char *argv[]) -{ - BN_CTX *ctx = NULL; - int ret = 1; - BIO *out; - - CRYPTO_malloc_debug_init(); - CRYPTO_dbg_set_options(V_CRYPTO_MDEBUG_ALL); - CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON); - -# ifdef OPENSSL_SYS_WIN32 - CRYPTO_malloc_init(); -# endif - - RAND_seed(rnd_seed, sizeof rnd_seed); - - out = BIO_new(BIO_s_file()); - if (out == NULL) - EXIT(1); - BIO_set_fp(out, stdout, BIO_NOCLOSE); - - if ((ctx = BN_CTX_new()) == NULL) - goto err; - - /* NIST PRIME CURVES TESTS */ - if (!test_ecdh_curve - (NID_X9_62_prime192v1, "NIST Prime-Curve P-192", ctx, out)) - goto err; - if (!test_ecdh_curve(NID_secp224r1, "NIST Prime-Curve P-224", ctx, out)) - goto err; - if (!test_ecdh_curve - (NID_X9_62_prime256v1, "NIST Prime-Curve P-256", ctx, out)) - goto err; - if (!test_ecdh_curve(NID_secp384r1, "NIST Prime-Curve P-384", ctx, out)) - goto err; - if (!test_ecdh_curve(NID_secp521r1, "NIST Prime-Curve P-521", ctx, out)) - goto err; -# ifndef OPENSSL_NO_EC2M - /* NIST BINARY CURVES TESTS */ - if (!test_ecdh_curve(NID_sect163k1, "NIST Binary-Curve K-163", ctx, out)) - goto err; - if (!test_ecdh_curve(NID_sect163r2, "NIST Binary-Curve B-163", ctx, out)) - goto err; - if (!test_ecdh_curve(NID_sect233k1, "NIST Binary-Curve K-233", ctx, out)) - goto err; - if (!test_ecdh_curve(NID_sect233r1, "NIST Binary-Curve B-233", ctx, out)) - goto err; - if (!test_ecdh_curve(NID_sect283k1, "NIST Binary-Curve K-283", ctx, out)) - goto err; - if (!test_ecdh_curve(NID_sect283r1, "NIST Binary-Curve B-283", ctx, out)) - goto err; - if (!test_ecdh_curve(NID_sect409k1, "NIST Binary-Curve K-409", ctx, out)) - goto err; - if (!test_ecdh_curve(NID_sect409r1, "NIST Binary-Curve B-409", ctx, out)) - goto err; - if (!test_ecdh_curve(NID_sect571k1, "NIST Binary-Curve K-571", ctx, out)) - goto err; - if (!test_ecdh_curve(NID_sect571r1, "NIST Binary-Curve B-571", ctx, out)) - goto err; -# endif - - ret = 0; - - err: - ERR_print_errors_fp(stderr); - if (ctx) - BN_CTX_free(ctx); - BIO_free(out); - CRYPTO_cleanup_all_ex_data(); - ERR_remove_thread_state(NULL); - CRYPTO_mem_leaks_fp(stderr); - EXIT(ret); - return (ret); -} - -# if 0 -static void MS_CALLBACK cb(int p, int n, void *arg) -{ - char c = '*'; - - if (p == 0) - c = '.'; - if (p == 1) - c = '+'; - if (p == 2) - c = '*'; - if (p == 3) - c = '\n'; - BIO_write((BIO *)arg, &c, 1); - (void)BIO_flush((BIO *)arg); -# ifdef LINT - p = n; -# endif -} -# endif -#endif diff --git a/drivers/builtin_openssl2/crypto/ecdh/ech_kdf.c b/drivers/builtin_openssl2/crypto/ecdh/ech_kdf.c new file mode 100644 index 0000000000..ac722ac9ee --- /dev/null +++ b/drivers/builtin_openssl2/crypto/ecdh/ech_kdf.c @@ -0,0 +1,111 @@ +/* crypto/ecdh/ec_kdf.c */ +/* + * Written by Stephen Henson for the OpenSSL project. + */ +/* ==================================================================== + * Copyright (c) 2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#define OPENSSL_FIPSAPI + +#include +#include +#include + +/* Key derivation function from X9.62/SECG */ +/* Way more than we will ever need */ +#define ECDH_KDF_MAX (1 << 30) + +int ECDH_KDF_X9_62(unsigned char *out, size_t outlen, + const unsigned char *Z, size_t Zlen, + const unsigned char *sinfo, size_t sinfolen, + const EVP_MD *md) +{ + EVP_MD_CTX mctx; + int rv = 0; + unsigned int i; + size_t mdlen; + unsigned char ctr[4]; + if (sinfolen > ECDH_KDF_MAX || outlen > ECDH_KDF_MAX + || Zlen > ECDH_KDF_MAX) + return 0; + mdlen = EVP_MD_size(md); + EVP_MD_CTX_init(&mctx); + for (i = 1;; i++) { + unsigned char mtmp[EVP_MAX_MD_SIZE]; + EVP_DigestInit_ex(&mctx, md, NULL); + ctr[3] = i & 0xFF; + ctr[2] = (i >> 8) & 0xFF; + ctr[1] = (i >> 16) & 0xFF; + ctr[0] = (i >> 24) & 0xFF; + if (!EVP_DigestUpdate(&mctx, Z, Zlen)) + goto err; + if (!EVP_DigestUpdate(&mctx, ctr, sizeof(ctr))) + goto err; + if (!EVP_DigestUpdate(&mctx, sinfo, sinfolen)) + goto err; + if (outlen >= mdlen) { + if (!EVP_DigestFinal(&mctx, out, NULL)) + goto err; + outlen -= mdlen; + if (outlen == 0) + break; + out += mdlen; + } else { + if (!EVP_DigestFinal(&mctx, mtmp, NULL)) + goto err; + memcpy(out, mtmp, outlen); + OPENSSL_cleanse(mtmp, mdlen); + break; + } + } + rv = 1; + err: + EVP_MD_CTX_cleanup(&mctx); + return rv; +} diff --git a/drivers/builtin_openssl2/crypto/ecdh/ech_ossl.c b/drivers/builtin_openssl2/crypto/ecdh/ech_ossl.c index d448b19a52..df115cc262 100644 --- a/drivers/builtin_openssl2/crypto/ecdh/ech_ossl.c +++ b/drivers/builtin_openssl2/crypto/ecdh/ech_ossl.c @@ -138,6 +138,16 @@ static int ecdh_compute_key(void *out, size_t outlen, const EC_POINT *pub_key, } group = EC_KEY_get0_group(ecdh); + + if (EC_KEY_get_flags(ecdh) & EC_FLAG_COFACTOR_ECDH) { + if (!EC_GROUP_get_cofactor(group, x, ctx) || + !BN_mul(x, x, priv_key, ctx)) { + ECDHerr(ECDH_F_ECDH_COMPUTE_KEY, ERR_R_MALLOC_FAILURE); + goto err; + } + priv_key = x; + } + if ((tmp = EC_POINT_new(group)) == NULL) { ECDHerr(ECDH_F_ECDH_COMPUTE_KEY, ERR_R_MALLOC_FAILURE); goto err; diff --git a/drivers/builtin_openssl2/crypto/ecdsa/ecdsatest.c b/drivers/builtin_openssl2/crypto/ecdsa/ecdsatest.c deleted file mode 100644 index 0f301f86d9..0000000000 --- a/drivers/builtin_openssl2/crypto/ecdsa/ecdsatest.c +++ /dev/null @@ -1,556 +0,0 @@ -/* crypto/ecdsa/ecdsatest.c */ -/* - * Written by Nils Larsch for the OpenSSL project. - */ -/* ==================================================================== - * Copyright (c) 2000-2005 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ -/* ==================================================================== - * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. - * - * Portions of the attached software ("Contribution") are developed by - * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project. - * - * The Contribution is licensed pursuant to the OpenSSL open source - * license provided above. - * - * The elliptic curve binary polynomial software is originally written by - * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems Laboratories. - * - */ - -#include -#include -#include - -#include /* To see if OPENSSL_NO_ECDSA is defined */ - -#ifdef OPENSSL_NO_ECDSA -int main(int argc, char *argv[]) -{ - puts("Elliptic curves are disabled."); - return 0; -} -#else - -# include -# include -# include -# include -# include -# ifndef OPENSSL_NO_ENGINE -# include -# endif -# include -# include - -static const char rnd_seed[] = "string to make the random number generator " - "think it has entropy"; - -/* declaration of the test functions */ -int x9_62_tests(BIO *); -int x9_62_test_internal(BIO *out, int nid, const char *r, const char *s); -int test_builtin(BIO *); - -/* functions to change the RAND_METHOD */ -int change_rand(void); -int restore_rand(void); -int fbytes(unsigned char *buf, int num); - -RAND_METHOD fake_rand; -const RAND_METHOD *old_rand; - -int change_rand(void) -{ - /* save old rand method */ - if ((old_rand = RAND_get_rand_method()) == NULL) - return 0; - - fake_rand.seed = old_rand->seed; - fake_rand.cleanup = old_rand->cleanup; - fake_rand.add = old_rand->add; - fake_rand.status = old_rand->status; - /* use own random function */ - fake_rand.bytes = fbytes; - fake_rand.pseudorand = old_rand->bytes; - /* set new RAND_METHOD */ - if (!RAND_set_rand_method(&fake_rand)) - return 0; - return 1; -} - -int restore_rand(void) -{ - if (!RAND_set_rand_method(old_rand)) - return 0; - else - return 1; -} - -static int fbytes_counter = 0; -static const char *numbers[8] = { - "651056770906015076056810763456358567190100156695615665659", - "6140507067065001063065065565667405560006161556565665656654", - "8763001015071075675010661307616710783570106710677817767166" - "71676178726717", - "7000000175690566466555057817571571075705015757757057795755" - "55657156756655", - "1275552191113212300012030439187146164646146646466749494799", - "1542725565216523985789236956265265265235675811949404040041", - "1456427555219115346513212300075341203043918714616464614664" - "64667494947990", - "1712787255652165239672857892369562652652652356758119494040" - "40041670216363" -}; - -int fbytes(unsigned char *buf, int num) -{ - int ret; - BIGNUM *tmp = NULL; - - if (fbytes_counter >= 8) - return 0; - tmp = BN_new(); - if (!tmp) - return 0; - if (!BN_dec2bn(&tmp, numbers[fbytes_counter])) { - BN_free(tmp); - return 0; - } - fbytes_counter++; - if (num != BN_num_bytes(tmp) || !BN_bn2bin(tmp, buf)) - ret = 0; - else - ret = 1; - if (tmp) - BN_free(tmp); - return ret; -} - -/* some tests from the X9.62 draft */ -int x9_62_test_internal(BIO *out, int nid, const char *r_in, const char *s_in) -{ - int ret = 0; - const char message[] = "abc"; - unsigned char digest[20]; - unsigned int dgst_len = 0; - EVP_MD_CTX md_ctx; - EC_KEY *key = NULL; - ECDSA_SIG *signature = NULL; - BIGNUM *r = NULL, *s = NULL; - - EVP_MD_CTX_init(&md_ctx); - /* get the message digest */ - EVP_DigestInit(&md_ctx, EVP_ecdsa()); - EVP_DigestUpdate(&md_ctx, (const void *)message, 3); - EVP_DigestFinal(&md_ctx, digest, &dgst_len); - - BIO_printf(out, "testing %s: ", OBJ_nid2sn(nid)); - /* create the key */ - if ((key = EC_KEY_new_by_curve_name(nid)) == NULL) - goto x962_int_err; - if (!EC_KEY_generate_key(key)) - goto x962_int_err; - BIO_printf(out, "."); - (void)BIO_flush(out); - /* create the signature */ - signature = ECDSA_do_sign(digest, 20, key); - if (signature == NULL) - goto x962_int_err; - BIO_printf(out, "."); - (void)BIO_flush(out); - /* compare the created signature with the expected signature */ - if ((r = BN_new()) == NULL || (s = BN_new()) == NULL) - goto x962_int_err; - if (!BN_dec2bn(&r, r_in) || !BN_dec2bn(&s, s_in)) - goto x962_int_err; - if (BN_cmp(signature->r, r) || BN_cmp(signature->s, s)) - goto x962_int_err; - BIO_printf(out, "."); - (void)BIO_flush(out); - /* verify the signature */ - if (ECDSA_do_verify(digest, 20, signature, key) != 1) - goto x962_int_err; - BIO_printf(out, "."); - (void)BIO_flush(out); - - BIO_printf(out, " ok\n"); - ret = 1; - x962_int_err: - if (!ret) - BIO_printf(out, " failed\n"); - if (key) - EC_KEY_free(key); - if (signature) - ECDSA_SIG_free(signature); - if (r) - BN_free(r); - if (s) - BN_free(s); - EVP_MD_CTX_cleanup(&md_ctx); - return ret; -} - -int x9_62_tests(BIO *out) -{ - int ret = 0; - - BIO_printf(out, "some tests from X9.62:\n"); - - /* set own rand method */ - if (!change_rand()) - goto x962_err; - - if (!x9_62_test_internal(out, NID_X9_62_prime192v1, - "3342403536405981729393488334694600415596881826869351677613", - "5735822328888155254683894997897571951568553642892029982342")) - goto x962_err; - if (!x9_62_test_internal(out, NID_X9_62_prime239v1, - "3086361431751678114926225473006680188549593787585317781474" - "62058306432176", - "3238135532097973577080787768312505059318910517550078427819" - "78505179448783")) - goto x962_err; -# ifndef OPENSSL_NO_EC2M - if (!x9_62_test_internal(out, NID_X9_62_c2tnb191v1, - "87194383164871543355722284926904419997237591535066528048", - "308992691965804947361541664549085895292153777025772063598")) - goto x962_err; - if (!x9_62_test_internal(out, NID_X9_62_c2tnb239v1, - "2159633321041961198501834003903461262881815148684178964245" - "5876922391552", - "1970303740007316867383349976549972270528498040721988191026" - "49413465737174")) - goto x962_err; -# endif - ret = 1; - x962_err: - if (!restore_rand()) - ret = 0; - return ret; -} - -int test_builtin(BIO *out) -{ - EC_builtin_curve *curves = NULL; - size_t crv_len = 0, n = 0; - EC_KEY *eckey = NULL, *wrong_eckey = NULL; - EC_GROUP *group; - ECDSA_SIG *ecdsa_sig = NULL; - unsigned char digest[20], wrong_digest[20]; - unsigned char *signature = NULL; - const unsigned char *sig_ptr; - unsigned char *sig_ptr2; - unsigned char *raw_buf = NULL; - unsigned int sig_len, degree, r_len, s_len, bn_len, buf_len; - int nid, ret = 0; - - /* fill digest values with some random data */ - if (RAND_pseudo_bytes(digest, 20) <= 0 || - RAND_pseudo_bytes(wrong_digest, 20) <= 0) { - BIO_printf(out, "ERROR: unable to get random data\n"); - goto builtin_err; - } - - /* - * create and verify a ecdsa signature with every availble curve (with ) - */ - BIO_printf(out, "\ntesting ECDSA_sign() and ECDSA_verify() " - "with some internal curves:\n"); - - /* get a list of all internal curves */ - crv_len = EC_get_builtin_curves(NULL, 0); - - curves = OPENSSL_malloc(sizeof(EC_builtin_curve) * crv_len); - - if (curves == NULL) { - BIO_printf(out, "malloc error\n"); - goto builtin_err; - } - - if (!EC_get_builtin_curves(curves, crv_len)) { - BIO_printf(out, "unable to get internal curves\n"); - goto builtin_err; - } - - /* now create and verify a signature for every curve */ - for (n = 0; n < crv_len; n++) { - unsigned char dirt, offset; - - nid = curves[n].nid; - if (nid == NID_ipsec4) - continue; - /* create new ecdsa key (== EC_KEY) */ - if ((eckey = EC_KEY_new()) == NULL) - goto builtin_err; - group = EC_GROUP_new_by_curve_name(nid); - if (group == NULL) - goto builtin_err; - if (EC_KEY_set_group(eckey, group) == 0) - goto builtin_err; - EC_GROUP_free(group); - degree = EC_GROUP_get_degree(EC_KEY_get0_group(eckey)); - if (degree < 160) - /* drop the curve */ - { - EC_KEY_free(eckey); - eckey = NULL; - continue; - } - BIO_printf(out, "%s: ", OBJ_nid2sn(nid)); - /* create key */ - if (!EC_KEY_generate_key(eckey)) { - BIO_printf(out, " failed\n"); - goto builtin_err; - } - /* create second key */ - if ((wrong_eckey = EC_KEY_new()) == NULL) - goto builtin_err; - group = EC_GROUP_new_by_curve_name(nid); - if (group == NULL) - goto builtin_err; - if (EC_KEY_set_group(wrong_eckey, group) == 0) - goto builtin_err; - EC_GROUP_free(group); - if (!EC_KEY_generate_key(wrong_eckey)) { - BIO_printf(out, " failed\n"); - goto builtin_err; - } - - BIO_printf(out, "."); - (void)BIO_flush(out); - /* check key */ - if (!EC_KEY_check_key(eckey)) { - BIO_printf(out, " failed\n"); - goto builtin_err; - } - BIO_printf(out, "."); - (void)BIO_flush(out); - /* create signature */ - sig_len = ECDSA_size(eckey); - if ((signature = OPENSSL_malloc(sig_len)) == NULL) - goto builtin_err; - if (!ECDSA_sign(0, digest, 20, signature, &sig_len, eckey)) { - BIO_printf(out, " failed\n"); - goto builtin_err; - } - BIO_printf(out, "."); - (void)BIO_flush(out); - /* verify signature */ - if (ECDSA_verify(0, digest, 20, signature, sig_len, eckey) != 1) { - BIO_printf(out, " failed\n"); - goto builtin_err; - } - BIO_printf(out, "."); - (void)BIO_flush(out); - /* verify signature with the wrong key */ - if (ECDSA_verify(0, digest, 20, signature, sig_len, wrong_eckey) == 1) { - BIO_printf(out, " failed\n"); - goto builtin_err; - } - BIO_printf(out, "."); - (void)BIO_flush(out); - /* wrong digest */ - if (ECDSA_verify(0, wrong_digest, 20, signature, sig_len, eckey) == 1) { - BIO_printf(out, " failed\n"); - goto builtin_err; - } - BIO_printf(out, "."); - (void)BIO_flush(out); - /* wrong length */ - if (ECDSA_verify(0, digest, 20, signature, sig_len - 1, eckey) == 1) { - BIO_printf(out, " failed\n"); - goto builtin_err; - } - BIO_printf(out, "."); - (void)BIO_flush(out); - - /* - * Modify a single byte of the signature: to ensure we don't garble - * the ASN1 structure, we read the raw signature and modify a byte in - * one of the bignums directly. - */ - sig_ptr = signature; - if ((ecdsa_sig = d2i_ECDSA_SIG(NULL, &sig_ptr, sig_len)) == NULL) { - BIO_printf(out, " failed\n"); - goto builtin_err; - } - - /* Store the two BIGNUMs in raw_buf. */ - r_len = BN_num_bytes(ecdsa_sig->r); - s_len = BN_num_bytes(ecdsa_sig->s); - bn_len = (degree + 7) / 8; - if ((r_len > bn_len) || (s_len > bn_len)) { - BIO_printf(out, " failed\n"); - goto builtin_err; - } - buf_len = 2 * bn_len; - if ((raw_buf = OPENSSL_malloc(buf_len)) == NULL) - goto builtin_err; - /* Pad the bignums with leading zeroes. */ - memset(raw_buf, 0, buf_len); - BN_bn2bin(ecdsa_sig->r, raw_buf + bn_len - r_len); - BN_bn2bin(ecdsa_sig->s, raw_buf + buf_len - s_len); - - /* Modify a single byte in the buffer. */ - offset = raw_buf[10] % buf_len; - dirt = raw_buf[11] ? raw_buf[11] : 1; - raw_buf[offset] ^= dirt; - /* Now read the BIGNUMs back in from raw_buf. */ - if ((BN_bin2bn(raw_buf, bn_len, ecdsa_sig->r) == NULL) || - (BN_bin2bn(raw_buf + bn_len, bn_len, ecdsa_sig->s) == NULL)) - goto builtin_err; - - sig_ptr2 = signature; - sig_len = i2d_ECDSA_SIG(ecdsa_sig, &sig_ptr2); - if (ECDSA_verify(0, digest, 20, signature, sig_len, eckey) == 1) { - BIO_printf(out, " failed\n"); - goto builtin_err; - } - /* - * Sanity check: undo the modification and verify signature. - */ - raw_buf[offset] ^= dirt; - if ((BN_bin2bn(raw_buf, bn_len, ecdsa_sig->r) == NULL) || - (BN_bin2bn(raw_buf + bn_len, bn_len, ecdsa_sig->s) == NULL)) - goto builtin_err; - - sig_ptr2 = signature; - sig_len = i2d_ECDSA_SIG(ecdsa_sig, &sig_ptr2); - if (ECDSA_verify(0, digest, 20, signature, sig_len, eckey) != 1) { - BIO_printf(out, " failed\n"); - goto builtin_err; - } - BIO_printf(out, "."); - (void)BIO_flush(out); - - BIO_printf(out, " ok\n"); - /* cleanup */ - /* clean bogus errors */ - ERR_clear_error(); - OPENSSL_free(signature); - signature = NULL; - EC_KEY_free(eckey); - eckey = NULL; - EC_KEY_free(wrong_eckey); - wrong_eckey = NULL; - ECDSA_SIG_free(ecdsa_sig); - ecdsa_sig = NULL; - OPENSSL_free(raw_buf); - raw_buf = NULL; - } - - ret = 1; - builtin_err: - if (eckey) - EC_KEY_free(eckey); - if (wrong_eckey) - EC_KEY_free(wrong_eckey); - if (ecdsa_sig) - ECDSA_SIG_free(ecdsa_sig); - if (signature) - OPENSSL_free(signature); - if (raw_buf) - OPENSSL_free(raw_buf); - if (curves) - OPENSSL_free(curves); - - return ret; -} - -int main(void) -{ - int ret = 1; - BIO *out; - - out = BIO_new_fp(stdout, BIO_NOCLOSE); - - /* enable memory leak checking unless explicitly disabled */ - if (!((getenv("OPENSSL_DEBUG_MEMORY") != NULL) && - (0 == strcmp(getenv("OPENSSL_DEBUG_MEMORY"), "off")))) { - CRYPTO_malloc_debug_init(); - CRYPTO_set_mem_debug_options(V_CRYPTO_MDEBUG_ALL); - } else { - /* OPENSSL_DEBUG_MEMORY=off */ - CRYPTO_set_mem_debug_functions(0, 0, 0, 0, 0); - } - CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON); - - ERR_load_crypto_strings(); - - /* initialize the prng */ - RAND_seed(rnd_seed, sizeof(rnd_seed)); - - /* the tests */ - if (!x9_62_tests(out)) - goto err; - if (!test_builtin(out)) - goto err; - - ret = 0; - err: - if (ret) - BIO_printf(out, "\nECDSA test failed\n"); - else - BIO_printf(out, "\nECDSA test passed\n"); - if (ret) - ERR_print_errors(out); - CRYPTO_cleanup_all_ex_data(); - ERR_remove_thread_state(NULL); - ERR_free_strings(); - CRYPTO_mem_leaks(out); - if (out != NULL) - BIO_free(out); - return ret; -} -#endif diff --git a/drivers/builtin_openssl2/crypto/ecdsa/ecs_err.c b/drivers/builtin_openssl2/crypto/ecdsa/ecs_err.c index 6fc64a0077..f1fa7b55f9 100644 --- a/drivers/builtin_openssl2/crypto/ecdsa/ecs_err.c +++ b/drivers/builtin_openssl2/crypto/ecdsa/ecs_err.c @@ -74,6 +74,7 @@ static ERR_STRING_DATA ECDSA_str_functs[] = { {ERR_FUNC(ECDSA_F_ECDSA_DATA_NEW_METHOD), "ECDSA_DATA_NEW_METHOD"}, {ERR_FUNC(ECDSA_F_ECDSA_DO_SIGN), "ECDSA_do_sign"}, {ERR_FUNC(ECDSA_F_ECDSA_DO_VERIFY), "ECDSA_do_verify"}, + {ERR_FUNC(ECDSA_F_ECDSA_METHOD_NEW), "ECDSA_METHOD_new"}, {ERR_FUNC(ECDSA_F_ECDSA_SIGN_SETUP), "ECDSA_sign_setup"}, {0, NULL} }; diff --git a/drivers/builtin_openssl2/crypto/ecdsa/ecs_lib.c b/drivers/builtin_openssl2/crypto/ecdsa/ecs_lib.c index 0f2d3432f6..8dc1dda462 100644 --- a/drivers/builtin_openssl2/crypto/ecdsa/ecs_lib.c +++ b/drivers/builtin_openssl2/crypto/ecdsa/ecs_lib.c @@ -275,3 +275,80 @@ void *ECDSA_get_ex_data(EC_KEY *d, int idx) return NULL; return (CRYPTO_get_ex_data(&ecdsa->ex_data, idx)); } + +ECDSA_METHOD *ECDSA_METHOD_new(const ECDSA_METHOD *ecdsa_meth) +{ + ECDSA_METHOD *ret; + + ret = OPENSSL_malloc(sizeof(ECDSA_METHOD)); + if (ret == NULL) { + ECDSAerr(ECDSA_F_ECDSA_METHOD_NEW, ERR_R_MALLOC_FAILURE); + return NULL; + } + + if (ecdsa_meth) + *ret = *ecdsa_meth; + else { + ret->ecdsa_sign_setup = 0; + ret->ecdsa_do_sign = 0; + ret->ecdsa_do_verify = 0; + ret->name = NULL; + ret->flags = 0; + } + ret->flags |= ECDSA_METHOD_FLAG_ALLOCATED; + return ret; +} + +void ECDSA_METHOD_set_sign(ECDSA_METHOD *ecdsa_method, + ECDSA_SIG *(*ecdsa_do_sign) (const unsigned char + *dgst, int dgst_len, + const BIGNUM *inv, + const BIGNUM *rp, + EC_KEY *eckey)) +{ + ecdsa_method->ecdsa_do_sign = ecdsa_do_sign; +} + +void ECDSA_METHOD_set_sign_setup(ECDSA_METHOD *ecdsa_method, + int (*ecdsa_sign_setup) (EC_KEY *eckey, + BN_CTX *ctx, + BIGNUM **kinv, + BIGNUM **r)) +{ + ecdsa_method->ecdsa_sign_setup = ecdsa_sign_setup; +} + +void ECDSA_METHOD_set_verify(ECDSA_METHOD *ecdsa_method, + int (*ecdsa_do_verify) (const unsigned char + *dgst, int dgst_len, + const ECDSA_SIG *sig, + EC_KEY *eckey)) +{ + ecdsa_method->ecdsa_do_verify = ecdsa_do_verify; +} + +void ECDSA_METHOD_set_flags(ECDSA_METHOD *ecdsa_method, int flags) +{ + ecdsa_method->flags = flags | ECDSA_METHOD_FLAG_ALLOCATED; +} + +void ECDSA_METHOD_set_name(ECDSA_METHOD *ecdsa_method, char *name) +{ + ecdsa_method->name = name; +} + +void ECDSA_METHOD_free(ECDSA_METHOD *ecdsa_method) +{ + if (ecdsa_method->flags & ECDSA_METHOD_FLAG_ALLOCATED) + OPENSSL_free(ecdsa_method); +} + +void ECDSA_METHOD_set_app_data(ECDSA_METHOD *ecdsa_method, void *app) +{ + ecdsa_method->app_data = app; +} + +void *ECDSA_METHOD_get_app_data(ECDSA_METHOD *ecdsa_method) +{ + return ecdsa_method->app_data; +} diff --git a/drivers/builtin_openssl2/crypto/ecdsa/ecs_locl.h b/drivers/builtin_openssl2/crypto/ecdsa/ecs_locl.h index 76b2caf1f4..d3a5efc547 100644 --- a/drivers/builtin_openssl2/crypto/ecdsa/ecs_locl.h +++ b/drivers/builtin_openssl2/crypto/ecdsa/ecs_locl.h @@ -79,9 +79,13 @@ struct ecdsa_method { int (*finish) (EC_KEY *eckey); # endif int flags; - char *app_data; + void *app_data; }; +/* The ECDSA_METHOD was allocated and can be freed */ + +# define ECDSA_METHOD_FLAG_ALLOCATED 0x2 + /* * If this flag is set the ECDSA method is FIPS compliant and can be used in * FIPS mode. This is set in the validated module method. If an application diff --git a/drivers/builtin_openssl2/crypto/ecdsa/ecs_ossl.c b/drivers/builtin_openssl2/crypto/ecdsa/ecs_ossl.c index 4c5fa6b926..dd769609be 100644 --- a/drivers/builtin_openssl2/crypto/ecdsa/ecs_ossl.c +++ b/drivers/builtin_openssl2/crypto/ecdsa/ecs_ossl.c @@ -179,10 +179,32 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp, while (BN_is_zero(r)); /* compute the inverse of k */ - if (!BN_mod_inverse(k, k, order, ctx)) { - ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); - goto err; + if (EC_GROUP_get_mont_data(group) != NULL) { + /* + * We want inverse in constant time, therefore we utilize the fact + * order must be prime and use Fermats Little Theorem instead. + */ + if (!BN_set_word(X, 2)) { + ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); + goto err; + } + if (!BN_mod_sub(X, order, X, order, ctx)) { + ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); + goto err; + } + BN_set_flags(X, BN_FLG_CONSTTIME); + if (!BN_mod_exp_mont_consttime + (k, k, X, order, ctx, EC_GROUP_get_mont_data(group))) { + ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); + goto err; + } + } else { + if (!BN_mod_inverse(k, k, order, ctx)) { + ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); + goto err; + } } + /* clear old values if necessary */ if (*rp != NULL) BN_clear_free(*rp); diff --git a/drivers/builtin_openssl2/crypto/engine/eng_all.c b/drivers/builtin_openssl2/crypto/engine/eng_all.c index 66c4374d5e..48ad0d26b4 100644 --- a/drivers/builtin_openssl2/crypto/engine/eng_all.c +++ b/drivers/builtin_openssl2/crypto/engine/eng_all.c @@ -76,9 +76,6 @@ void ENGINE_load_builtin_engines(void) #if !defined(OPENSSL_NO_HW) && (defined(__OpenBSD__) || defined(__FreeBSD__) || defined(HAVE_CRYPTODEV)) ENGINE_load_cryptodev(); #endif -#ifndef OPENSSL_NO_RSAX - ENGINE_load_rsax(); -#endif #ifndef OPENSSL_NO_RDRAND ENGINE_load_rdrand(); #endif diff --git a/drivers/builtin_openssl2/crypto/engine/eng_cryptodev.c b/drivers/builtin_openssl2/crypto/engine/eng_cryptodev.c index a8a24d0543..8fb9c3373d 100644 --- a/drivers/builtin_openssl2/crypto/engine/eng_cryptodev.c +++ b/drivers/builtin_openssl2/crypto/engine/eng_cryptodev.c @@ -54,10 +54,10 @@ void ENGINE_load_cryptodev(void) # include # include -# include -# include -# include -# include +# include +# include +# include +# include # include # include # include @@ -160,6 +160,17 @@ static struct { { CRYPTO_AES_CBC, NID_aes_256_cbc, 16, 32, }, +# ifdef CRYPTO_AES_CTR + { + CRYPTO_AES_CTR, NID_aes_128_ctr, 14, 16, + }, + { + CRYPTO_AES_CTR, NID_aes_192_ctr, 14, 24, + }, + { + CRYPTO_AES_CTR, NID_aes_256_ctr, 14, 32, + }, +# endif { CRYPTO_BLF_CBC, NID_bf_cbc, 8, 16, }, @@ -630,6 +641,46 @@ const EVP_CIPHER cryptodev_aes_256_cbc = { NULL }; +# ifdef CRYPTO_AES_CTR +const EVP_CIPHER cryptodev_aes_ctr = { + NID_aes_128_ctr, + 16, 16, 14, + EVP_CIPH_CTR_MODE, + cryptodev_init_key, + cryptodev_cipher, + cryptodev_cleanup, + sizeof(struct dev_crypto_state), + EVP_CIPHER_set_asn1_iv, + EVP_CIPHER_get_asn1_iv, + NULL +}; + +const EVP_CIPHER cryptodev_aes_ctr_192 = { + NID_aes_192_ctr, + 16, 24, 14, + EVP_CIPH_CTR_MODE, + cryptodev_init_key, + cryptodev_cipher, + cryptodev_cleanup, + sizeof(struct dev_crypto_state), + EVP_CIPHER_set_asn1_iv, + EVP_CIPHER_get_asn1_iv, + NULL +}; + +const EVP_CIPHER cryptodev_aes_ctr_256 = { + NID_aes_256_ctr, + 16, 32, 14, + EVP_CIPH_CTR_MODE, + cryptodev_init_key, + cryptodev_cipher, + cryptodev_cleanup, + sizeof(struct dev_crypto_state), + EVP_CIPHER_set_asn1_iv, + EVP_CIPHER_get_asn1_iv, + NULL +}; +# endif /* * Registered by the ENGINE when used to find out how to deal with * a particular NID in the ENGINE. this says what we'll do at the @@ -667,6 +718,17 @@ cryptodev_engine_ciphers(ENGINE *e, const EVP_CIPHER **cipher, case NID_aes_256_cbc: *cipher = &cryptodev_aes_256_cbc; break; +# ifdef CRYPTO_AES_CTR + case NID_aes_128_ctr: + *cipher = &cryptodev_aes_ctr; + break; + case NID_aes_192_ctr: + *cipher = &cryptodev_aes_ctr_192; + break; + case NID_aes_256_ctr: + *cipher = &cryptodev_aes_ctr_256; + break; +# endif default: *cipher = NULL; break; diff --git a/drivers/builtin_openssl2/crypto/engine/eng_dyn.c b/drivers/builtin_openssl2/crypto/engine/eng_dyn.c index 3169b09ad8..40f30e9d58 100644 --- a/drivers/builtin_openssl2/crypto/engine/eng_dyn.c +++ b/drivers/builtin_openssl2/crypto/engine/eng_dyn.c @@ -243,8 +243,10 @@ static int dynamic_set_data_ctx(ENGINE *e, dynamic_data_ctx **ctx) * If we lost the race to set the context, c is non-NULL and *ctx is the * context of the thread that won. */ - if (c) + if (c) { + sk_OPENSSL_STRING_free(c->dirs); OPENSSL_free(c); + } return 1; } diff --git a/drivers/builtin_openssl2/crypto/engine/eng_rsax.c b/drivers/builtin_openssl2/crypto/engine/eng_rsax.c deleted file mode 100644 index 86ee9d8939..0000000000 --- a/drivers/builtin_openssl2/crypto/engine/eng_rsax.c +++ /dev/null @@ -1,701 +0,0 @@ -/* crypto/engine/eng_rsax.c */ -/* Copyright (c) 2010-2010 Intel Corp. - * Author: Vinodh.Gopal@intel.com - * Jim Guilford - * Erdinc.Ozturk@intel.com - * Maxim.Perminov@intel.com - * Ying.Huang@intel.com - * - * More information about algorithm used can be found at: - * http://www.cse.buffalo.edu/srds2009/escs2009_submission_Gopal.pdf - */ -/* ==================================================================== - * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - */ - -#include - -#include -#include -#include -#include -#include -#ifndef OPENSSL_NO_RSA -# include -#endif -#include -#include - -/* RSAX is available **ONLY* on x86_64 CPUs */ -#undef COMPILE_RSAX - -#if (defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_AMD64) || defined (_M_X64)) && !defined(OPENSSL_NO_ASM) -# define COMPILE_RSAX -static ENGINE *ENGINE_rsax(void); -#endif - -void ENGINE_load_rsax(void) -{ -/* On non-x86 CPUs it just returns. */ -#ifdef COMPILE_RSAX - ENGINE *toadd = ENGINE_rsax(); - if (!toadd) - return; - ENGINE_add(toadd); - ENGINE_free(toadd); - ERR_clear_error(); -#endif -} - -#ifdef COMPILE_RSAX -# define E_RSAX_LIB_NAME "rsax engine" - -static int e_rsax_destroy(ENGINE *e); -static int e_rsax_init(ENGINE *e); -static int e_rsax_finish(ENGINE *e); -static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f) (void)); - -# ifndef OPENSSL_NO_RSA -/* RSA stuff */ -static int e_rsax_rsa_mod_exp(BIGNUM *r, const BIGNUM *I, RSA *rsa, - BN_CTX *ctx); -static int e_rsax_rsa_finish(RSA *r); -# endif - -static const ENGINE_CMD_DEFN e_rsax_cmd_defns[] = { - {0, NULL, NULL, 0} -}; - -# ifndef OPENSSL_NO_RSA -/* Our internal RSA_METHOD that we provide pointers to */ -static RSA_METHOD e_rsax_rsa = { - "Intel RSA-X method", - NULL, - NULL, - NULL, - NULL, - e_rsax_rsa_mod_exp, - NULL, - NULL, - e_rsax_rsa_finish, - RSA_FLAG_CACHE_PUBLIC | RSA_FLAG_CACHE_PRIVATE, - NULL, - NULL, - NULL -}; -# endif - -/* Constants used when creating the ENGINE */ -static const char *engine_e_rsax_id = "rsax"; -static const char *engine_e_rsax_name = "RSAX engine support"; - -/* This internal function is used by ENGINE_rsax() */ -static int bind_helper(ENGINE *e) -{ -# ifndef OPENSSL_NO_RSA - const RSA_METHOD *meth1; -# endif - if (!ENGINE_set_id(e, engine_e_rsax_id) || - !ENGINE_set_name(e, engine_e_rsax_name) || -# ifndef OPENSSL_NO_RSA - !ENGINE_set_RSA(e, &e_rsax_rsa) || -# endif - !ENGINE_set_destroy_function(e, e_rsax_destroy) || - !ENGINE_set_init_function(e, e_rsax_init) || - !ENGINE_set_finish_function(e, e_rsax_finish) || - !ENGINE_set_ctrl_function(e, e_rsax_ctrl) || - !ENGINE_set_cmd_defns(e, e_rsax_cmd_defns)) - return 0; - -# ifndef OPENSSL_NO_RSA - meth1 = RSA_PKCS1_SSLeay(); - e_rsax_rsa.rsa_pub_enc = meth1->rsa_pub_enc; - e_rsax_rsa.rsa_pub_dec = meth1->rsa_pub_dec; - e_rsax_rsa.rsa_priv_enc = meth1->rsa_priv_enc; - e_rsax_rsa.rsa_priv_dec = meth1->rsa_priv_dec; - e_rsax_rsa.bn_mod_exp = meth1->bn_mod_exp; -# endif - return 1; -} - -static ENGINE *ENGINE_rsax(void) -{ - ENGINE *ret = ENGINE_new(); - if (!ret) - return NULL; - if (!bind_helper(ret)) { - ENGINE_free(ret); - return NULL; - } - return ret; -} - -# ifndef OPENSSL_NO_RSA -/* Used to attach our own key-data to an RSA structure */ -static int rsax_ex_data_idx = -1; -# endif - -static int e_rsax_destroy(ENGINE *e) -{ - return 1; -} - -/* (de)initialisation functions. */ -static int e_rsax_init(ENGINE *e) -{ -# ifndef OPENSSL_NO_RSA - if (rsax_ex_data_idx == -1) - rsax_ex_data_idx = RSA_get_ex_new_index(0, NULL, NULL, NULL, NULL); -# endif - if (rsax_ex_data_idx == -1) - return 0; - return 1; -} - -static int e_rsax_finish(ENGINE *e) -{ - return 1; -} - -static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f) (void)) -{ - int to_return = 1; - - switch (cmd) { - /* The command isn't understood by this engine */ - default: - to_return = 0; - break; - } - - return to_return; -} - -# ifndef OPENSSL_NO_RSA - -# ifdef _WIN32 -typedef unsigned __int64 UINT64; -# else -typedef unsigned long long UINT64; -# endif -typedef unsigned short UINT16; - -/* - * Table t is interleaved in the following manner: The order in memory is - * t[0][0], t[0][1], ..., t[0][7], t[1][0], ... A particular 512-bit value is - * stored in t[][index] rather than the more normal t[index][]; i.e. the - * qwords of a particular entry in t are not adjacent in memory - */ - -/* Init BIGNUM b from the interleaved UINT64 array */ -static int interleaved_array_to_bn_512(BIGNUM *b, UINT64 *array); - -/* - * Extract array elements from BIGNUM b To set the whole array from b, call - * with n=8 - */ -static int bn_extract_to_array_512(const BIGNUM *b, unsigned int n, - UINT64 *array); - -struct mod_ctx_512 { - UINT64 t[8][8]; - UINT64 m[8]; - UINT64 m1[8]; /* 2^278 % m */ - UINT64 m2[8]; /* 2^640 % m */ - UINT64 k1[2]; /* (- 1/m) % 2^128 */ -}; - -static int mod_exp_pre_compute_data_512(UINT64 *m, struct mod_ctx_512 *data); - -void mod_exp_512(UINT64 *result, /* 512 bits, 8 qwords */ - UINT64 *g, /* 512 bits, 8 qwords */ - UINT64 *exp, /* 512 bits, 8 qwords */ - struct mod_ctx_512 *data); - -typedef struct st_e_rsax_mod_ctx { - UINT64 type; - union { - struct mod_ctx_512 b512; - } ctx; - -} E_RSAX_MOD_CTX; - -static E_RSAX_MOD_CTX *e_rsax_get_ctx(RSA *rsa, int idx, BIGNUM *m) -{ - E_RSAX_MOD_CTX *hptr; - - if (idx < 0 || idx > 2) - return NULL; - - hptr = RSA_get_ex_data(rsa, rsax_ex_data_idx); - if (!hptr) { - hptr = OPENSSL_malloc(3 * sizeof(E_RSAX_MOD_CTX)); - if (!hptr) - return NULL; - hptr[2].type = hptr[1].type = hptr[0].type = 0; - RSA_set_ex_data(rsa, rsax_ex_data_idx, hptr); - } - - if (hptr[idx].type == (UINT64)BN_num_bits(m)) - return hptr + idx; - - if (BN_num_bits(m) == 512) { - UINT64 _m[8]; - bn_extract_to_array_512(m, 8, _m); - memset(&hptr[idx].ctx.b512, 0, sizeof(struct mod_ctx_512)); - mod_exp_pre_compute_data_512(_m, &hptr[idx].ctx.b512); - } - - hptr[idx].type = BN_num_bits(m); - return hptr + idx; -} - -static int e_rsax_rsa_finish(RSA *rsa) -{ - E_RSAX_MOD_CTX *hptr = RSA_get_ex_data(rsa, rsax_ex_data_idx); - if (hptr) { - OPENSSL_free(hptr); - RSA_set_ex_data(rsa, rsax_ex_data_idx, NULL); - } - if (rsa->_method_mod_n) - BN_MONT_CTX_free(rsa->_method_mod_n); - if (rsa->_method_mod_p) - BN_MONT_CTX_free(rsa->_method_mod_p); - if (rsa->_method_mod_q) - BN_MONT_CTX_free(rsa->_method_mod_q); - return 1; -} - -static int e_rsax_bn_mod_exp(BIGNUM *r, const BIGNUM *g, const BIGNUM *e, - const BIGNUM *m, BN_CTX *ctx, - BN_MONT_CTX *in_mont, - E_RSAX_MOD_CTX *rsax_mod_ctx) -{ - if (rsax_mod_ctx && BN_get_flags(e, BN_FLG_CONSTTIME) != 0) { - if (BN_num_bits(m) == 512) { - UINT64 _r[8]; - UINT64 _g[8]; - UINT64 _e[8]; - - /* Init the arrays from the BIGNUMs */ - bn_extract_to_array_512(g, 8, _g); - bn_extract_to_array_512(e, 8, _e); - - mod_exp_512(_r, _g, _e, &rsax_mod_ctx->ctx.b512); - /* Return the result in the BIGNUM */ - interleaved_array_to_bn_512(r, _r); - return 1; - } - } - - return BN_mod_exp_mont(r, g, e, m, ctx, in_mont); -} - -/* - * Declares for the Intel CIAP 512-bit / CRT / 1024 bit RSA modular - * exponentiation routine precalculations and a structure to hold the - * necessary values. These files are meant to live in crypto/rsa/ in the - * target openssl. - */ - -/* - * Local method: extracts a piece from a BIGNUM, to fit it into - * an array. Call with n=8 to extract an entire 512-bit BIGNUM - */ -static int bn_extract_to_array_512(const BIGNUM *b, unsigned int n, - UINT64 *array) -{ - int i; - UINT64 tmp; - unsigned char bn_buff[64]; - memset(bn_buff, 0, 64); - if (BN_num_bytes(b) > 64) { - printf("Can't support this byte size\n"); - return 0; - } - if (BN_num_bytes(b) != 0) { - if (!BN_bn2bin(b, bn_buff + (64 - BN_num_bytes(b)))) { - printf("Error's in bn2bin\n"); - /* We have to error, here */ - return 0; - } - } - while (n-- > 0) { - array[n] = 0; - for (i = 7; i >= 0; i--) { - tmp = bn_buff[63 - (n * 8 + i)]; - array[n] |= tmp << (8 * i); - } - } - return 1; -} - -/* Init a 512-bit BIGNUM from the UINT64*_ (8 * 64) interleaved array */ -static int interleaved_array_to_bn_512(BIGNUM *b, UINT64 *array) -{ - unsigned char tmp[64]; - int n = 8; - int i; - while (n-- > 0) { - for (i = 7; i >= 0; i--) { - tmp[63 - (n * 8 + i)] = (unsigned char)(array[n] >> (8 * i)); - }} - BN_bin2bn(tmp, 64, b); - return 0; -} - -/* The main 512bit precompute call */ -static int mod_exp_pre_compute_data_512(UINT64 *m, struct mod_ctx_512 *data) -{ - BIGNUM two_768, two_640, two_128, two_512, tmp, _m, tmp2; - - /* We need a BN_CTX for the modulo functions */ - BN_CTX *ctx; - /* Some tmps */ - UINT64 _t[8]; - int i, j, ret = 0; - - /* Init _m with m */ - BN_init(&_m); - interleaved_array_to_bn_512(&_m, m); - memset(_t, 0, 64); - - /* Inits */ - BN_init(&two_768); - BN_init(&two_640); - BN_init(&two_128); - BN_init(&two_512); - BN_init(&tmp); - BN_init(&tmp2); - - /* Create our context */ - if ((ctx = BN_CTX_new()) == NULL) { - goto err; - } - BN_CTX_start(ctx); - - /* - * For production, if you care, these only need to be set once, - * and may be made constants. - */ - BN_lshift(&two_768, BN_value_one(), 768); - BN_lshift(&two_640, BN_value_one(), 640); - BN_lshift(&two_128, BN_value_one(), 128); - BN_lshift(&two_512, BN_value_one(), 512); - - if (0 == (m[7] & 0x8000000000000000)) { - goto err; - } - if (0 == (m[0] & 0x1)) { /* Odd modulus required for Mont */ - goto err; - } - - /* Precompute m1 */ - BN_mod(&tmp, &two_768, &_m, ctx); - if (!bn_extract_to_array_512(&tmp, 8, &data->m1[0])) { - goto err; - } - - /* Precompute m2 */ - BN_mod(&tmp, &two_640, &_m, ctx); - if (!bn_extract_to_array_512(&tmp, 8, &data->m2[0])) { - goto err; - } - - /* - * Precompute k1, a 128b number = ((-1)* m-1 ) mod 2128; k1 should - * be non-negative. - */ - BN_mod_inverse(&tmp, &_m, &two_128, ctx); - if (!BN_is_zero(&tmp)) { - BN_sub(&tmp, &two_128, &tmp); - } - if (!bn_extract_to_array_512(&tmp, 2, &data->k1[0])) { - goto err; - } - - /* Precompute t */ - for (i = 0; i < 8; i++) { - BN_zero(&tmp); - if (i & 1) { - BN_add(&tmp, &two_512, &tmp); - } - if (i & 2) { - BN_add(&tmp, &two_512, &tmp); - } - if (i & 4) { - BN_add(&tmp, &two_640, &tmp); - } - - BN_nnmod(&tmp2, &tmp, &_m, ctx); - if (!bn_extract_to_array_512(&tmp2, 8, _t)) { - goto err; - } - for (j = 0; j < 8; j++) - data->t[j][i] = _t[j]; - } - - /* Precompute m */ - for (i = 0; i < 8; i++) { - data->m[i] = m[i]; - } - - ret = 1; - - err: - /* Cleanup */ - if (ctx != NULL) { - BN_CTX_end(ctx); - BN_CTX_free(ctx); - } - BN_free(&two_768); - BN_free(&two_640); - BN_free(&two_128); - BN_free(&two_512); - BN_free(&tmp); - BN_free(&tmp2); - BN_free(&_m); - - return ret; -} - -static int e_rsax_rsa_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, - BN_CTX *ctx) -{ - BIGNUM *r1, *m1, *vrfy; - BIGNUM local_dmp1, local_dmq1, local_c, local_r1; - BIGNUM *dmp1, *dmq1, *c, *pr1; - int ret = 0; - - BN_CTX_start(ctx); - r1 = BN_CTX_get(ctx); - m1 = BN_CTX_get(ctx); - vrfy = BN_CTX_get(ctx); - - { - BIGNUM local_p, local_q; - BIGNUM *p = NULL, *q = NULL; - int error = 0; - - /* - * Make sure BN_mod_inverse in Montgomery intialization uses the - * BN_FLG_CONSTTIME flag (unless RSA_FLAG_NO_CONSTTIME is set) - */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - BN_init(&local_p); - p = &local_p; - BN_with_flags(p, rsa->p, BN_FLG_CONSTTIME); - - BN_init(&local_q); - q = &local_q; - BN_with_flags(q, rsa->q, BN_FLG_CONSTTIME); - } else { - p = rsa->p; - q = rsa->q; - } - - if (rsa->flags & RSA_FLAG_CACHE_PRIVATE) { - if (!BN_MONT_CTX_set_locked - (&rsa->_method_mod_p, CRYPTO_LOCK_RSA, p, ctx)) - error = 1; - if (!BN_MONT_CTX_set_locked - (&rsa->_method_mod_q, CRYPTO_LOCK_RSA, q, ctx)) - error = 1; - } - - /* clean up */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - BN_free(&local_p); - BN_free(&local_q); - } - if (error) - goto err; - } - - if (rsa->flags & RSA_FLAG_CACHE_PUBLIC) - if (!BN_MONT_CTX_set_locked - (&rsa->_method_mod_n, CRYPTO_LOCK_RSA, rsa->n, ctx)) - goto err; - - /* compute I mod q */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - c = &local_c; - BN_with_flags(c, I, BN_FLG_CONSTTIME); - if (!BN_mod(r1, c, rsa->q, ctx)) - goto err; - } else { - if (!BN_mod(r1, I, rsa->q, ctx)) - goto err; - } - - /* compute r1^dmq1 mod q */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - dmq1 = &local_dmq1; - BN_with_flags(dmq1, rsa->dmq1, BN_FLG_CONSTTIME); - } else - dmq1 = rsa->dmq1; - - if (!e_rsax_bn_mod_exp(m1, r1, dmq1, rsa->q, ctx, - rsa->_method_mod_q, e_rsax_get_ctx(rsa, 0, - rsa->q))) - goto err; - - /* compute I mod p */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - c = &local_c; - BN_with_flags(c, I, BN_FLG_CONSTTIME); - if (!BN_mod(r1, c, rsa->p, ctx)) - goto err; - } else { - if (!BN_mod(r1, I, rsa->p, ctx)) - goto err; - } - - /* compute r1^dmp1 mod p */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - dmp1 = &local_dmp1; - BN_with_flags(dmp1, rsa->dmp1, BN_FLG_CONSTTIME); - } else - dmp1 = rsa->dmp1; - - if (!e_rsax_bn_mod_exp(r0, r1, dmp1, rsa->p, ctx, - rsa->_method_mod_p, e_rsax_get_ctx(rsa, 1, - rsa->p))) - goto err; - - if (!BN_sub(r0, r0, m1)) - goto err; - /* - * This will help stop the size of r0 increasing, which does affect the - * multiply if it optimised for a power of 2 size - */ - if (BN_is_negative(r0)) - if (!BN_add(r0, r0, rsa->p)) - goto err; - - if (!BN_mul(r1, r0, rsa->iqmp, ctx)) - goto err; - - /* Turn BN_FLG_CONSTTIME flag on before division operation */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - pr1 = &local_r1; - BN_with_flags(pr1, r1, BN_FLG_CONSTTIME); - } else - pr1 = r1; - if (!BN_mod(r0, pr1, rsa->p, ctx)) - goto err; - - /* - * If p < q it is occasionally possible for the correction of adding 'p' - * if r0 is negative above to leave the result still negative. This can - * break the private key operations: the following second correction - * should *always* correct this rare occurrence. This will *never* happen - * with OpenSSL generated keys because they ensure p > q [steve] - */ - if (BN_is_negative(r0)) - if (!BN_add(r0, r0, rsa->p)) - goto err; - if (!BN_mul(r1, r0, rsa->q, ctx)) - goto err; - if (!BN_add(r0, r1, m1)) - goto err; - - if (rsa->e && rsa->n) { - if (!e_rsax_bn_mod_exp - (vrfy, r0, rsa->e, rsa->n, ctx, rsa->_method_mod_n, - e_rsax_get_ctx(rsa, 2, rsa->n))) - goto err; - - /* - * If 'I' was greater than (or equal to) rsa->n, the operation will - * be equivalent to using 'I mod n'. However, the result of the - * verify will *always* be less than 'n' so we don't check for - * absolute equality, just congruency. - */ - if (!BN_sub(vrfy, vrfy, I)) - goto err; - if (!BN_mod(vrfy, vrfy, rsa->n, ctx)) - goto err; - if (BN_is_negative(vrfy)) - if (!BN_add(vrfy, vrfy, rsa->n)) - goto err; - if (!BN_is_zero(vrfy)) { - /* - * 'I' and 'vrfy' aren't congruent mod n. Don't leak - * miscalculated CRT output, just do a raw (slower) mod_exp and - * return that instead. - */ - - BIGNUM local_d; - BIGNUM *d = NULL; - - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - d = &local_d; - BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME); - } else - d = rsa->d; - if (!e_rsax_bn_mod_exp(r0, I, d, rsa->n, ctx, - rsa->_method_mod_n, e_rsax_get_ctx(rsa, 2, - rsa->n))) - goto err; - } - } - ret = 1; - - err: - BN_CTX_end(ctx); - - return ret; -} -# endif /* !OPENSSL_NO_RSA */ -#endif /* !COMPILE_RSAX */ diff --git a/drivers/builtin_openssl2/crypto/engine/enginetest.c b/drivers/builtin_openssl2/crypto/engine/enginetest.c deleted file mode 100644 index ab7c0c00dd..0000000000 --- a/drivers/builtin_openssl2/crypto/engine/enginetest.c +++ /dev/null @@ -1,269 +0,0 @@ -/* crypto/engine/enginetest.c */ -/* - * Written by Geoff Thorpe (geoff@geoffthorpe.net) for the OpenSSL project - * 2000. - */ -/* ==================================================================== - * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - -#include -#include -#include - -#ifdef OPENSSL_NO_ENGINE -int main(int argc, char *argv[]) -{ - printf("No ENGINE support\n"); - return (0); -} -#else -# include -# include -# include -# include - -static void display_engine_list(void) -{ - ENGINE *h; - int loop; - - h = ENGINE_get_first(); - loop = 0; - printf("listing available engine types\n"); - while (h) { - printf("engine %i, id = \"%s\", name = \"%s\"\n", - loop++, ENGINE_get_id(h), ENGINE_get_name(h)); - h = ENGINE_get_next(h); - } - printf("end of list\n"); - /* - * ENGINE_get_first() increases the struct_ref counter, so we must call - * ENGINE_free() to decrease it again - */ - ENGINE_free(h); -} - -int main(int argc, char *argv[]) -{ - ENGINE *block[512]; - char buf[256]; - const char *id, *name; - ENGINE *ptr; - int loop; - int to_return = 1; - ENGINE *new_h1 = NULL; - ENGINE *new_h2 = NULL; - ENGINE *new_h3 = NULL; - ENGINE *new_h4 = NULL; - - /* enable memory leak checking unless explicitly disabled */ - if (!((getenv("OPENSSL_DEBUG_MEMORY") != NULL) - && (0 == strcmp(getenv("OPENSSL_DEBUG_MEMORY"), "off")))) { - CRYPTO_malloc_debug_init(); - CRYPTO_set_mem_debug_options(V_CRYPTO_MDEBUG_ALL); - } else { - /* OPENSSL_DEBUG_MEMORY=off */ - CRYPTO_set_mem_debug_functions(0, 0, 0, 0, 0); - } - CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON); - ERR_load_crypto_strings(); - - memset(block, 0, 512 * sizeof(ENGINE *)); - if (((new_h1 = ENGINE_new()) == NULL) || - !ENGINE_set_id(new_h1, "test_id0") || - !ENGINE_set_name(new_h1, "First test item") || - ((new_h2 = ENGINE_new()) == NULL) || - !ENGINE_set_id(new_h2, "test_id1") || - !ENGINE_set_name(new_h2, "Second test item") || - ((new_h3 = ENGINE_new()) == NULL) || - !ENGINE_set_id(new_h3, "test_id2") || - !ENGINE_set_name(new_h3, "Third test item") || - ((new_h4 = ENGINE_new()) == NULL) || - !ENGINE_set_id(new_h4, "test_id3") || - !ENGINE_set_name(new_h4, "Fourth test item")) { - printf("Couldn't set up test ENGINE structures\n"); - goto end; - } - printf("\nenginetest beginning\n\n"); - display_engine_list(); - if (!ENGINE_add(new_h1)) { - printf("Add failed!\n"); - goto end; - } - display_engine_list(); - ptr = ENGINE_get_first(); - if (!ENGINE_remove(ptr)) { - printf("Remove failed!\n"); - goto end; - } - if (ptr) - ENGINE_free(ptr); - display_engine_list(); - if (!ENGINE_add(new_h3) || !ENGINE_add(new_h2)) { - printf("Add failed!\n"); - goto end; - } - display_engine_list(); - if (!ENGINE_remove(new_h2)) { - printf("Remove failed!\n"); - goto end; - } - display_engine_list(); - if (!ENGINE_add(new_h4)) { - printf("Add failed!\n"); - goto end; - } - display_engine_list(); - if (ENGINE_add(new_h3)) { - printf("Add *should* have failed but didn't!\n"); - goto end; - } else - printf("Add that should fail did.\n"); - ERR_clear_error(); - if (ENGINE_remove(new_h2)) { - printf("Remove *should* have failed but didn't!\n"); - goto end; - } else - printf("Remove that should fail did.\n"); - ERR_clear_error(); - if (!ENGINE_remove(new_h3)) { - printf("Remove failed!\n"); - goto end; - } - display_engine_list(); - if (!ENGINE_remove(new_h4)) { - printf("Remove failed!\n"); - goto end; - } - display_engine_list(); - /* - * Depending on whether there's any hardware support compiled in, this - * remove may be destined to fail. - */ - ptr = ENGINE_get_first(); - if (ptr) - if (!ENGINE_remove(ptr)) - printf("Remove failed!i - probably no hardware " - "support present.\n"); - if (ptr) - ENGINE_free(ptr); - display_engine_list(); - if (!ENGINE_add(new_h1) || !ENGINE_remove(new_h1)) { - printf("Couldn't add and remove to an empty list!\n"); - goto end; - } else - printf("Successfully added and removed to an empty list!\n"); - printf("About to beef up the engine-type list\n"); - for (loop = 0; loop < 512; loop++) { - sprintf(buf, "id%i", loop); - id = BUF_strdup(buf); - sprintf(buf, "Fake engine type %i", loop); - name = BUF_strdup(buf); - if (((block[loop] = ENGINE_new()) == NULL) || - !ENGINE_set_id(block[loop], id) || - !ENGINE_set_name(block[loop], name)) { - printf("Couldn't create block of ENGINE structures.\n" - "I'll probably also core-dump now, damn.\n"); - goto end; - } - } - for (loop = 0; loop < 512; loop++) { - if (!ENGINE_add(block[loop])) { - printf("\nAdding stopped at %i, (%s,%s)\n", - loop, ENGINE_get_id(block[loop]), - ENGINE_get_name(block[loop])); - goto cleanup_loop; - } else - printf("."); - fflush(stdout); - } - cleanup_loop: - printf("\nAbout to empty the engine-type list\n"); - while ((ptr = ENGINE_get_first()) != NULL) { - if (!ENGINE_remove(ptr)) { - printf("\nRemove failed!\n"); - goto end; - } - ENGINE_free(ptr); - printf("."); - fflush(stdout); - } - for (loop = 0; loop < 512; loop++) { - OPENSSL_free((void *)ENGINE_get_id(block[loop])); - OPENSSL_free((void *)ENGINE_get_name(block[loop])); - } - printf("\nTests completed happily\n"); - to_return = 0; - end: - if (to_return) - ERR_print_errors_fp(stderr); - if (new_h1) - ENGINE_free(new_h1); - if (new_h2) - ENGINE_free(new_h2); - if (new_h3) - ENGINE_free(new_h3); - if (new_h4) - ENGINE_free(new_h4); - for (loop = 0; loop < 512; loop++) - if (block[loop]) - ENGINE_free(block[loop]); - ENGINE_cleanup(); - CRYPTO_cleanup_all_ex_data(); - ERR_free_strings(); - ERR_remove_thread_state(NULL); - CRYPTO_mem_leaks_fp(stderr); - return to_return; -} -#endif diff --git a/drivers/builtin_openssl2/crypto/err/openssl.ec b/drivers/builtin_openssl2/crypto/err/openssl.ec index bafbc35d30..139afe3234 100644 --- a/drivers/builtin_openssl2/crypto/err/openssl.ec +++ b/drivers/builtin_openssl2/crypto/err/openssl.ec @@ -41,6 +41,7 @@ L NONE crypto/x509/x509_vfy.h NONE L NONE crypto/ec/ec_lcl.h NONE L NONE crypto/asn1/asn_lcl.h NONE L NONE crypto/cms/cms_lcl.h NONE +L NONE ssl/ssl_locl.h NONE F RSAREF_F_RSA_BN2BIN diff --git a/drivers/builtin_openssl2/crypto/evp/c_allc.c b/drivers/builtin_openssl2/crypto/evp/c_allc.c index 3097c2112e..280e58408f 100644 --- a/drivers/builtin_openssl2/crypto/evp/c_allc.c +++ b/drivers/builtin_openssl2/crypto/evp/c_allc.c @@ -93,6 +93,7 @@ void OpenSSL_add_all_ciphers(void) EVP_add_cipher(EVP_des_ecb()); EVP_add_cipher(EVP_des_ede()); EVP_add_cipher(EVP_des_ede3()); + EVP_add_cipher(EVP_des_ede3_wrap()); #endif #ifndef OPENSSL_NO_RC4 @@ -172,6 +173,8 @@ void OpenSSL_add_all_ciphers(void) EVP_add_cipher(EVP_aes_128_ctr()); EVP_add_cipher(EVP_aes_128_gcm()); EVP_add_cipher(EVP_aes_128_xts()); + EVP_add_cipher(EVP_aes_128_ccm()); + EVP_add_cipher(EVP_aes_128_wrap()); EVP_add_cipher_alias(SN_aes_128_cbc, "AES128"); EVP_add_cipher_alias(SN_aes_128_cbc, "aes128"); EVP_add_cipher(EVP_aes_192_ecb()); @@ -182,6 +185,8 @@ void OpenSSL_add_all_ciphers(void) EVP_add_cipher(EVP_aes_192_ofb()); EVP_add_cipher(EVP_aes_192_ctr()); EVP_add_cipher(EVP_aes_192_gcm()); + EVP_add_cipher(EVP_aes_192_ccm()); + EVP_add_cipher(EVP_aes_192_wrap()); EVP_add_cipher_alias(SN_aes_192_cbc, "AES192"); EVP_add_cipher_alias(SN_aes_192_cbc, "aes192"); EVP_add_cipher(EVP_aes_256_ecb()); @@ -193,12 +198,18 @@ void OpenSSL_add_all_ciphers(void) EVP_add_cipher(EVP_aes_256_ctr()); EVP_add_cipher(EVP_aes_256_gcm()); EVP_add_cipher(EVP_aes_256_xts()); + EVP_add_cipher(EVP_aes_256_ccm()); + EVP_add_cipher(EVP_aes_256_wrap()); EVP_add_cipher_alias(SN_aes_256_cbc, "AES256"); EVP_add_cipher_alias(SN_aes_256_cbc, "aes256"); # if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1) EVP_add_cipher(EVP_aes_128_cbc_hmac_sha1()); EVP_add_cipher(EVP_aes_256_cbc_hmac_sha1()); # endif +# if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA256) + EVP_add_cipher(EVP_aes_128_cbc_hmac_sha256()); + EVP_add_cipher(EVP_aes_256_cbc_hmac_sha256()); +# endif #endif #ifndef OPENSSL_NO_CAMELLIA diff --git a/drivers/builtin_openssl2/crypto/evp/digest.c b/drivers/builtin_openssl2/crypto/evp/digest.c index 2e202c8fe0..f2643f3248 100644 --- a/drivers/builtin_openssl2/crypto/evp/digest.c +++ b/drivers/builtin_openssl2/crypto/evp/digest.c @@ -119,6 +119,7 @@ #ifdef OPENSSL_FIPS # include +# include "evp_locl.h" #endif void EVP_MD_CTX_init(EVP_MD_CTX *ctx) @@ -145,6 +146,17 @@ int EVP_DigestInit(EVP_MD_CTX *ctx, const EVP_MD *type) int EVP_DigestInit_ex(EVP_MD_CTX *ctx, const EVP_MD *type, ENGINE *impl) { EVP_MD_CTX_clear_flags(ctx, EVP_MD_CTX_FLAG_CLEANED); +#ifdef OPENSSL_FIPS + /* If FIPS mode switch to approved implementation if possible */ + if (FIPS_mode()) { + const EVP_MD *fipsmd; + if (type) { + fipsmd = evp_get_fips_md(type); + if (fipsmd) + type = fipsmd; + } + } +#endif #ifndef OPENSSL_NO_ENGINE /* * Whether it's nice or not, "Inits" can be used on "Final"'d contexts so diff --git a/drivers/builtin_openssl2/crypto/evp/e_aes.c b/drivers/builtin_openssl2/crypto/evp/e_aes.c index a4327fcb05..1734a823c1 100644 --- a/drivers/builtin_openssl2/crypto/evp/e_aes.c +++ b/drivers/builtin_openssl2/crypto/evp/e_aes.c @@ -57,12 +57,17 @@ # include # include # include "evp_locl.h" -# ifndef OPENSSL_FIPS -# include "modes_lcl.h" -# include +# include "modes_lcl.h" +# include + +# undef EVP_CIPH_FLAG_FIPS +# define EVP_CIPH_FLAG_FIPS 0 typedef struct { - AES_KEY ks; + union { + double align; + AES_KEY ks; + } ks; block128_f block; union { cbc128_f cbc; @@ -71,7 +76,10 @@ typedef struct { } EVP_AES_KEY; typedef struct { - AES_KEY ks; /* AES key schedule to use */ + union { + double align; + AES_KEY ks; + } ks; /* AES key schedule to use */ int key_set; /* Set if key initialised */ int iv_set; /* Set if an iv is set */ GCM128_CONTEXT gcm; @@ -84,7 +92,10 @@ typedef struct { } EVP_AES_GCM_CTX; typedef struct { - AES_KEY ks1, ks2; /* AES key schedules to use */ + union { + double align; + AES_KEY ks; + } ks1, ks2; /* AES key schedules to use */ XTS128_CONTEXT xts; void (*stream) (const unsigned char *in, unsigned char *out, size_t length, @@ -93,7 +104,10 @@ typedef struct { } EVP_AES_XTS_CTX; typedef struct { - AES_KEY ks; /* AES key schedule to use */ + union { + double align; + AES_KEY ks; + } ks; /* AES key schedule to use */ int key_set; /* Set if key initialised */ int iv_set; /* Set if an iv is set */ int tag_set; /* Set if tag is valid */ @@ -103,9 +117,9 @@ typedef struct { ccm128_f str; } EVP_AES_CCM_CTX; -# define MAXBITCHUNK ((size_t)1<<(sizeof(size_t)*8-4)) +# define MAXBITCHUNK ((size_t)1<<(sizeof(size_t)*8-4)) -# ifdef VPAES_ASM +# ifdef VPAES_ASM int vpaes_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); int vpaes_set_decrypt_key(const unsigned char *userKey, int bits, @@ -120,8 +134,8 @@ void vpaes_cbc_encrypt(const unsigned char *in, unsigned char *out, size_t length, const AES_KEY *key, unsigned char *ivec, int enc); -# endif -# ifdef BSAES_ASM +# endif +# ifdef BSAES_ASM void bsaes_cbc_encrypt(const unsigned char *in, unsigned char *out, size_t length, const AES_KEY *key, unsigned char ivec[16], int enc); @@ -134,40 +148,54 @@ void bsaes_xts_encrypt(const unsigned char *inp, unsigned char *out, void bsaes_xts_decrypt(const unsigned char *inp, unsigned char *out, size_t len, const AES_KEY *key1, const AES_KEY *key2, const unsigned char iv[16]); -# endif -# ifdef AES_CTR_ASM +# endif +# ifdef AES_CTR_ASM void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, size_t blocks, const AES_KEY *key, const unsigned char ivec[AES_BLOCK_SIZE]); -# endif -# ifdef AES_XTS_ASM +# endif +# ifdef AES_XTS_ASM void AES_xts_encrypt(const char *inp, char *out, size_t len, const AES_KEY *key1, const AES_KEY *key2, const unsigned char iv[16]); void AES_xts_decrypt(const char *inp, char *out, size_t len, const AES_KEY *key1, const AES_KEY *key2, const unsigned char iv[16]); +# endif + +# if defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) +# include "ppc_arch.h" +# ifdef VPAES_ASM +# define VPAES_CAPABLE (OPENSSL_ppccap_P & PPC_ALTIVEC) # endif +# define HWAES_CAPABLE (OPENSSL_ppccap_P & PPC_CRYPTO207) +# define HWAES_set_encrypt_key aes_p8_set_encrypt_key +# define HWAES_set_decrypt_key aes_p8_set_decrypt_key +# define HWAES_encrypt aes_p8_encrypt +# define HWAES_decrypt aes_p8_decrypt +# define HWAES_cbc_encrypt aes_p8_cbc_encrypt +# define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks +# endif -# if defined(AES_ASM) && !defined(I386_ONLY) && ( \ +# if defined(AES_ASM) && !defined(I386_ONLY) && ( \ ((defined(__i386) || defined(__i386__) || \ defined(_M_IX86)) && defined(OPENSSL_IA32_SSE2))|| \ defined(__x86_64) || defined(__x86_64__) || \ defined(_M_AMD64) || defined(_M_X64) || \ defined(__INTEL__) ) -extern unsigned int OPENSSL_ia32cap_P[2]; +extern unsigned int OPENSSL_ia32cap_P[]; -# ifdef VPAES_ASM -# define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) -# endif -# ifdef BSAES_ASM -# define BSAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) -# endif +# ifdef VPAES_ASM +# define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) +# endif +# ifdef BSAES_ASM +# define BSAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) +# endif /* * AES-NI section */ -# define AESNI_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(57-32))) +# define AESNI_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(57-32))) int aesni_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); @@ -218,6 +246,26 @@ void aesni_ccm64_decrypt_blocks(const unsigned char *in, const unsigned char ivec[16], unsigned char cmac[16]); +# if defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) +size_t aesni_gcm_encrypt(const unsigned char *in, + unsigned char *out, + size_t len, + const void *key, unsigned char ivec[16], u64 *Xi); +# define AES_gcm_encrypt aesni_gcm_encrypt +size_t aesni_gcm_decrypt(const unsigned char *in, + unsigned char *out, + size_t len, + const void *key, unsigned char ivec[16], u64 *Xi); +# define AES_gcm_decrypt aesni_gcm_decrypt +void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *in, + size_t len); +# define AES_GCM_ASM(gctx) (gctx->ctr==aesni_ctr32_encrypt_blocks && \ + gctx->gcm.ghash==gcm_ghash_avx) +# define AES_GCM_ASM2(gctx) (gctx->gcm.block==(block128_f)aesni_encrypt && \ + gctx->gcm.ghash==gcm_ghash_avx) +# undef AES_GCM_ASM2 /* minor size optimization */ +# endif + static int aesni_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { @@ -271,23 +319,23 @@ static int aesni_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, return 1; } -# define aesni_ofb_cipher aes_ofb_cipher +# define aesni_ofb_cipher aes_ofb_cipher static int aesni_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); -# define aesni_cfb_cipher aes_cfb_cipher +# define aesni_cfb_cipher aes_cfb_cipher static int aesni_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); -# define aesni_cfb8_cipher aes_cfb8_cipher +# define aesni_cfb8_cipher aes_cfb8_cipher static int aesni_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); -# define aesni_cfb1_cipher aes_cfb1_cipher +# define aesni_cfb1_cipher aes_cfb1_cipher static int aesni_cfb1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); -# define aesni_ctr_cipher aes_ctr_cipher +# define aesni_ctr_cipher aes_ctr_cipher static int aesni_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); @@ -298,7 +346,7 @@ static int aesni_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, if (!iv && !key) return 1; if (key) { - aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); + aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f) aesni_encrypt); gctx->ctr = (ctr128_f) aesni_ctr32_encrypt_blocks; /* @@ -323,7 +371,7 @@ static int aesni_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; } -# define aesni_gcm_cipher aes_gcm_cipher +# define aesni_gcm_cipher aes_gcm_cipher static int aesni_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); @@ -337,17 +385,17 @@ static int aesni_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, if (key) { /* key_len is two AES keys */ if (enc) { - aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); + aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f) aesni_encrypt; xctx->stream = aesni_xts_encrypt; } else { - aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); + aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f) aesni_decrypt; xctx->stream = aesni_xts_decrypt; } aesni_set_encrypt_key(key + ctx->key_len / 2, - ctx->key_len * 4, &xctx->ks2); + ctx->key_len * 4, &xctx->ks2.ks); xctx->xts.block2 = (block128_f) aesni_encrypt; xctx->xts.key1 = &xctx->ks1; @@ -361,7 +409,7 @@ static int aesni_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; } -# define aesni_xts_cipher aes_xts_cipher +# define aesni_xts_cipher aes_xts_cipher static int aesni_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); @@ -372,7 +420,7 @@ static int aesni_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, if (!iv && !key) return 1; if (key) { - aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); + aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, &cctx->ks, (block128_f) aesni_encrypt); cctx->str = enc ? (ccm128_f) aesni_ccm64_encrypt_blocks : @@ -386,11 +434,11 @@ static int aesni_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; } -# define aesni_ccm_cipher aes_ccm_cipher +# define aesni_ccm_cipher aes_ccm_cipher static int aesni_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); -# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ +# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ static const EVP_CIPHER aesni_##keylen##_##mode = { \ nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ flags|EVP_CIPH_##MODE##_MODE, \ @@ -411,7 +459,7 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ { return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; } -# define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ +# define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ static const EVP_CIPHER aesni_##keylen##_##mode = { \ nid##_##keylen##_##mode,blocksize, \ (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ @@ -433,9 +481,378 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ { return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; } +# elif defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) + +# include "sparc_arch.h" + +extern unsigned int OPENSSL_sparcv9cap_P[]; + +# define SPARC_AES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_AES) + +void aes_t4_set_encrypt_key(const unsigned char *key, int bits, AES_KEY *ks); +void aes_t4_set_decrypt_key(const unsigned char *key, int bits, AES_KEY *ks); +void aes_t4_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void aes_t4_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +/* + * Key-length specific subroutines were chosen for following reason. + * Each SPARC T4 core can execute up to 8 threads which share core's + * resources. Loading as much key material to registers allows to + * minimize references to shared memory interface, as well as amount + * of instructions in inner loops [much needed on T4]. But then having + * non-key-length specific routines would require conditional branches + * either in inner loops or on subroutines' entries. Former is hardly + * acceptable, while latter means code size increase to size occupied + * by multiple key-length specfic subroutines, so why fight? + */ +void aes128_t4_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + unsigned char *ivec); +void aes128_t4_cbc_decrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + unsigned char *ivec); +void aes192_t4_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + unsigned char *ivec); +void aes192_t4_cbc_decrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + unsigned char *ivec); +void aes256_t4_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + unsigned char *ivec); +void aes256_t4_cbc_decrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + unsigned char *ivec); +void aes128_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key, + unsigned char *ivec); +void aes192_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key, + unsigned char *ivec); +void aes256_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key, + unsigned char *ivec); +void aes128_t4_xts_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key1, + const AES_KEY *key2, const unsigned char *ivec); +void aes128_t4_xts_decrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key1, + const AES_KEY *key2, const unsigned char *ivec); +void aes256_t4_xts_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key1, + const AES_KEY *key2, const unsigned char *ivec); +void aes256_t4_xts_decrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key1, + const AES_KEY *key2, const unsigned char *ivec); + +static int aes_t4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ + int ret, mode, bits; + EVP_AES_KEY *dat = (EVP_AES_KEY *) ctx->cipher_data; + + mode = ctx->cipher->flags & EVP_CIPH_MODE; + bits = ctx->key_len * 8; + if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) + && !enc) { + ret = 0; + aes_t4_set_decrypt_key(key, bits, ctx->cipher_data); + dat->block = (block128_f) aes_t4_decrypt; + switch (bits) { + case 128: + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) aes128_t4_cbc_decrypt : NULL; + break; + case 192: + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) aes192_t4_cbc_decrypt : NULL; + break; + case 256: + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) aes256_t4_cbc_decrypt : NULL; + break; + default: + ret = -1; + } + } else { + ret = 0; + aes_t4_set_encrypt_key(key, bits, ctx->cipher_data); + dat->block = (block128_f) aes_t4_encrypt; + switch (bits) { + case 128: + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) aes128_t4_cbc_encrypt; + else if (mode == EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f) aes128_t4_ctr32_encrypt; + else + dat->stream.cbc = NULL; + break; + case 192: + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) aes192_t4_cbc_encrypt; + else if (mode == EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f) aes192_t4_ctr32_encrypt; + else + dat->stream.cbc = NULL; + break; + case 256: + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) aes256_t4_cbc_encrypt; + else if (mode == EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f) aes256_t4_ctr32_encrypt; + else + dat->stream.cbc = NULL; + break; + default: + ret = -1; + } + } + + if (ret < 0) { + EVPerr(EVP_F_AES_T4_INIT_KEY, EVP_R_AES_KEY_SETUP_FAILED); + return 0; + } + + return 1; +} + +# define aes_t4_cbc_cipher aes_cbc_cipher +static int aes_t4_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define aes_t4_ecb_cipher aes_ecb_cipher +static int aes_t4_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define aes_t4_ofb_cipher aes_ofb_cipher +static int aes_t4_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define aes_t4_cfb_cipher aes_cfb_cipher +static int aes_t4_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define aes_t4_cfb8_cipher aes_cfb8_cipher +static int aes_t4_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define aes_t4_cfb1_cipher aes_cfb1_cipher +static int aes_t4_cfb1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define aes_t4_ctr_cipher aes_ctr_cipher +static int aes_t4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +static int aes_t4_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ + EVP_AES_GCM_CTX *gctx = ctx->cipher_data; + if (!iv && !key) + return 1; + if (key) { + int bits = ctx->key_len * 8; + aes_t4_set_encrypt_key(key, bits, &gctx->ks.ks); + CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, + (block128_f) aes_t4_encrypt); + switch (bits) { + case 128: + gctx->ctr = (ctr128_f) aes128_t4_ctr32_encrypt; + break; + case 192: + gctx->ctr = (ctr128_f) aes192_t4_ctr32_encrypt; + break; + case 256: + gctx->ctr = (ctr128_f) aes256_t4_ctr32_encrypt; + break; + default: + return 0; + } + /* + * If we have an iv can set it directly, otherwise use saved IV. + */ + if (iv == NULL && gctx->iv_set) + iv = gctx->iv; + if (iv) { + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); + gctx->iv_set = 1; + } + gctx->key_set = 1; + } else { + /* If key set use IV, otherwise copy */ + if (gctx->key_set) + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); + else + memcpy(gctx->iv, iv, gctx->ivlen); + gctx->iv_set = 1; + gctx->iv_gen = 0; + } + return 1; +} + +# define aes_t4_gcm_cipher aes_gcm_cipher +static int aes_t4_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +static int aes_t4_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ + EVP_AES_XTS_CTX *xctx = ctx->cipher_data; + if (!iv && !key) + return 1; + + if (key) { + int bits = ctx->key_len * 4; + xctx->stream = NULL; + /* key_len is two AES keys */ + if (enc) { + aes_t4_set_encrypt_key(key, bits, &xctx->ks1.ks); + xctx->xts.block1 = (block128_f) aes_t4_encrypt; + switch (bits) { + case 128: + xctx->stream = aes128_t4_xts_encrypt; + break; +# if 0 /* not yet */ + case 192: + xctx->stream = aes192_t4_xts_encrypt; + break; +# endif + case 256: + xctx->stream = aes256_t4_xts_encrypt; + break; + default: + return 0; + } + } else { + aes_t4_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); + xctx->xts.block1 = (block128_f) aes_t4_decrypt; + switch (bits) { + case 128: + xctx->stream = aes128_t4_xts_decrypt; + break; +# if 0 /* not yet */ + case 192: + xctx->stream = aes192_t4_xts_decrypt; + break; +# endif + case 256: + xctx->stream = aes256_t4_xts_decrypt; + break; + default: + return 0; + } + } + + aes_t4_set_encrypt_key(key + ctx->key_len / 2, + ctx->key_len * 4, &xctx->ks2.ks); + xctx->xts.block2 = (block128_f) aes_t4_encrypt; + + xctx->xts.key1 = &xctx->ks1; + } + + if (iv) { + xctx->xts.key2 = &xctx->ks2; + memcpy(ctx->iv, iv, 16); + } + + return 1; +} + +# define aes_t4_xts_cipher aes_xts_cipher +static int aes_t4_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +static int aes_t4_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ + EVP_AES_CCM_CTX *cctx = ctx->cipher_data; + if (!iv && !key) + return 1; + if (key) { + int bits = ctx->key_len * 8; + aes_t4_set_encrypt_key(key, bits, &cctx->ks.ks); + CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, + &cctx->ks, (block128_f) aes_t4_encrypt); +# if 0 /* not yet */ + switch (bits) { + case 128: + cctx->str = enc ? (ccm128_f) aes128_t4_ccm64_encrypt : + (ccm128_f) ae128_t4_ccm64_decrypt; + break; + case 192: + cctx->str = enc ? (ccm128_f) aes192_t4_ccm64_encrypt : + (ccm128_f) ae192_t4_ccm64_decrypt; + break; + case 256: + cctx->str = enc ? (ccm128_f) aes256_t4_ccm64_encrypt : + (ccm128_f) ae256_t4_ccm64_decrypt; + break; + default: + return 0; + } # else + cctx->str = NULL; +# endif + cctx->key_set = 1; + } + if (iv) { + memcpy(ctx->iv, iv, 15 - cctx->L); + cctx->iv_set = 1; + } + return 1; +} -# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ +# define aes_t4_ccm_cipher aes_ccm_cipher +static int aes_t4_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_t4_init_key, \ + aes_t4_##mode##_cipher, \ + NULL, \ + sizeof(EVP_AES_KEY), \ + NULL,NULL,NULL,NULL }; \ +static const EVP_CIPHER aes_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize, \ + keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_init_key, \ + aes_##mode##_cipher, \ + NULL, \ + sizeof(EVP_AES_KEY), \ + NULL,NULL,NULL,NULL }; \ +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; } + +# define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \ + nid##_##keylen##_##mode,blocksize, \ + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_t4_##mode##_init_key, \ + aes_t4_##mode##_cipher, \ + aes_##mode##_cleanup, \ + sizeof(EVP_AES_##MODE##_CTX), \ + NULL,NULL,aes_##mode##_ctrl,NULL }; \ +static const EVP_CIPHER aes_##keylen##_##mode = { \ + nid##_##keylen##_##mode,blocksize, \ + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_##mode##_init_key, \ + aes_##mode##_cipher, \ + aes_##mode##_cleanup, \ + sizeof(EVP_AES_##MODE##_CTX), \ + NULL,NULL,aes_##mode##_ctrl,NULL }; \ +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; } + +# else + +# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ static const EVP_CIPHER aes_##keylen##_##mode = { \ nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ flags|EVP_CIPH_##MODE##_MODE, \ @@ -447,7 +864,7 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ { return &aes_##keylen##_##mode; } -# define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ +# define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ static const EVP_CIPHER aes_##keylen##_##mode = { \ nid##_##keylen##_##mode,blocksize, \ (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ @@ -459,9 +876,42 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \ NULL,NULL,aes_##mode##_ctrl,NULL }; \ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ { return &aes_##keylen##_##mode; } +# endif + +# if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__)) +# include "arm_arch.h" +# if __ARM_MAX_ARCH__>=7 +# if defined(BSAES_ASM) +# define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) +# endif +# define HWAES_CAPABLE (OPENSSL_armcap_P & ARMV8_AES) +# define HWAES_set_encrypt_key aes_v8_set_encrypt_key +# define HWAES_set_decrypt_key aes_v8_set_decrypt_key +# define HWAES_encrypt aes_v8_encrypt +# define HWAES_decrypt aes_v8_decrypt +# define HWAES_cbc_encrypt aes_v8_cbc_encrypt +# define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks # endif +# endif + +# if defined(HWAES_CAPABLE) +int HWAES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +int HWAES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +void HWAES_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void HWAES_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const AES_KEY *key, + unsigned char *ivec, const int enc); +void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + const unsigned char ivec[16]); +# endif -# define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ +# define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ BLOCK_CIPHER_generic(nid,keylen,1,16,ofb128,ofb,OFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ @@ -479,51 +929,80 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, mode = ctx->cipher->flags & EVP_CIPH_MODE; if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) -# ifdef BSAES_CAPABLE +# ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) { + ret = HWAES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); + dat->block = (block128_f) HWAES_decrypt; + dat->stream.cbc = NULL; +# ifdef HWAES_cbc_encrypt + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) HWAES_cbc_encrypt; +# endif + } else +# endif +# ifdef BSAES_CAPABLE if (BSAES_CAPABLE && mode == EVP_CIPH_CBC_MODE) { - ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks); + ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); dat->block = (block128_f) AES_decrypt; dat->stream.cbc = (cbc128_f) bsaes_cbc_encrypt; } else -# endif -# ifdef VPAES_CAPABLE +# endif +# ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks); + ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); dat->block = (block128_f) vpaes_decrypt; dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? (cbc128_f) vpaes_cbc_encrypt : NULL; } else -# endif +# endif { - ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks); + ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); dat->block = (block128_f) AES_decrypt; dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? (cbc128_f) AES_cbc_encrypt : NULL; } else -# ifdef BSAES_CAPABLE +# ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) { + ret = HWAES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks); + dat->block = (block128_f) HWAES_encrypt; + dat->stream.cbc = NULL; +# ifdef HWAES_cbc_encrypt + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) HWAES_cbc_encrypt; + else +# endif +# ifdef HWAES_ctr32_encrypt_blocks + if (mode == EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f) HWAES_ctr32_encrypt_blocks; + else +# endif + (void)0; /* terminate potentially open 'else' */ + } else +# endif +# ifdef BSAES_CAPABLE if (BSAES_CAPABLE && mode == EVP_CIPH_CTR_MODE) { - ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks); + ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks); dat->block = (block128_f) AES_encrypt; dat->stream.ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks; } else -# endif -# ifdef VPAES_CAPABLE +# endif +# ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks); + ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks); dat->block = (block128_f) vpaes_encrypt; dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? (cbc128_f) vpaes_cbc_encrypt : NULL; } else -# endif +# endif { - ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks); + ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks); dat->block = (block128_f) AES_encrypt; dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? (cbc128_f) AES_cbc_encrypt : NULL; -# ifdef AES_CTR_ASM +# ifdef AES_CTR_ASM if (mode == EVP_CIPH_CTR_MODE) dat->stream.ctr = (ctr128_f) AES_ctr32_encrypt; -# endif +# endif } if (ret < 0) { @@ -544,7 +1023,7 @@ static int aes_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, else if (ctx->encrypt) CRYPTO_cbc128_encrypt(in, out, len, &dat->ks, ctx->iv, dat->block); else - CRYPTO_cbc128_encrypt(in, out, len, &dat->ks, ctx->iv, dat->block); + CRYPTO_cbc128_decrypt(in, out, len, &dat->ks, ctx->iv, dat->block); return 1; } @@ -680,11 +1159,6 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) case EVP_CTRL_GCM_SET_IVLEN: if (arg <= 0) return 0; -# ifdef OPENSSL_FIPS - if (FIPS_module_mode() && !(c->flags & EVP_CIPH_FLAG_NON_FIPS_ALLOW) - && arg < 12) - return 0; -# endif /* Allocate memory for IV if needed */ if ((arg > EVP_MAX_IV_LENGTH) && (arg > gctx->ivlen)) { if (gctx->iv != c->iv) @@ -805,34 +1279,47 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; if (key) { do { -# ifdef BSAES_CAPABLE +# ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) { + HWAES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); + CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, + (block128_f) HWAES_encrypt); +# ifdef HWAES_ctr32_encrypt_blocks + gctx->ctr = (ctr128_f) HWAES_ctr32_encrypt_blocks; +# else + gctx->ctr = NULL; +# endif + break; + } else +# endif +# ifdef BSAES_CAPABLE if (BSAES_CAPABLE) { - AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); + AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f) AES_encrypt); gctx->ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks; break; } else -# endif -# ifdef VPAES_CAPABLE +# endif +# ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); + vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f) vpaes_encrypt); gctx->ctr = NULL; break; } else -# endif +# endif (void)0; /* terminate potentially open 'else' */ - AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); + AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f) AES_encrypt); -# ifdef AES_CTR_ASM +# ifdef AES_CTR_ASM gctx->ctr = (ctr128_f) AES_ctr32_encrypt; -# else +# else gctx->ctr = NULL; -# endif +# endif } while (0); /* @@ -891,11 +1378,38 @@ static int aes_gcm_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, if (ctx->encrypt) { /* Encrypt payload */ if (gctx->ctr) { + size_t bulk = 0; +# if defined(AES_GCM_ASM) + if (len >= 32 && AES_GCM_ASM(gctx)) { + if (CRYPTO_gcm128_encrypt(&gctx->gcm, NULL, NULL, 0)) + return -1; + + bulk = AES_gcm_encrypt(in, out, len, + gctx->gcm.key, + gctx->gcm.Yi.c, gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + } +# endif if (CRYPTO_gcm128_encrypt_ctr32(&gctx->gcm, - in, out, len, gctx->ctr)) + in + bulk, + out + bulk, + len - bulk, gctx->ctr)) goto err; } else { - if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, len)) + size_t bulk = 0; +# if defined(AES_GCM_ASM2) + if (len >= 32 && AES_GCM_ASM2(gctx)) { + if (CRYPTO_gcm128_encrypt(&gctx->gcm, NULL, NULL, 0)) + return -1; + + bulk = AES_gcm_encrypt(in, out, len, + gctx->gcm.key, + gctx->gcm.Yi.c, gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + } +# endif + if (CRYPTO_gcm128_encrypt(&gctx->gcm, + in + bulk, out + bulk, len - bulk)) goto err; } out += len; @@ -905,11 +1419,38 @@ static int aes_gcm_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, } else { /* Decrypt */ if (gctx->ctr) { + size_t bulk = 0; +# if defined(AES_GCM_ASM) + if (len >= 16 && AES_GCM_ASM(gctx)) { + if (CRYPTO_gcm128_decrypt(&gctx->gcm, NULL, NULL, 0)) + return -1; + + bulk = AES_gcm_decrypt(in, out, len, + gctx->gcm.key, + gctx->gcm.Yi.c, gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + } +# endif if (CRYPTO_gcm128_decrypt_ctr32(&gctx->gcm, - in, out, len, gctx->ctr)) + in + bulk, + out + bulk, + len - bulk, gctx->ctr)) goto err; } else { - if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, len)) + size_t bulk = 0; +# if defined(AES_GCM_ASM2) + if (len >= 16 && AES_GCM_ASM2(gctx)) { + if (CRYPTO_gcm128_decrypt(&gctx->gcm, NULL, NULL, 0)) + return -1; + + bulk = AES_gcm_decrypt(in, out, len, + gctx->gcm.key, + gctx->gcm.Yi.c, gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + } +# endif + if (CRYPTO_gcm128_decrypt(&gctx->gcm, + in + bulk, out + bulk, len - bulk)) goto err; } /* Retrieve tag */ @@ -947,20 +1488,90 @@ static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, return -1; } else if (ctx->encrypt) { if (gctx->ctr) { + size_t bulk = 0; +# if defined(AES_GCM_ASM) + if (len >= 32 && AES_GCM_ASM(gctx)) { + size_t res = (16 - gctx->gcm.mres) % 16; + + if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, res)) + return -1; + + bulk = AES_gcm_encrypt(in + res, + out + res, len - res, + gctx->gcm.key, gctx->gcm.Yi.c, + gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + bulk += res; + } +# endif if (CRYPTO_gcm128_encrypt_ctr32(&gctx->gcm, - in, out, len, gctx->ctr)) + in + bulk, + out + bulk, + len - bulk, gctx->ctr)) return -1; } else { - if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, len)) + size_t bulk = 0; +# if defined(AES_GCM_ASM2) + if (len >= 32 && AES_GCM_ASM2(gctx)) { + size_t res = (16 - gctx->gcm.mres) % 16; + + if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, res)) + return -1; + + bulk = AES_gcm_encrypt(in + res, + out + res, len - res, + gctx->gcm.key, gctx->gcm.Yi.c, + gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + bulk += res; + } +# endif + if (CRYPTO_gcm128_encrypt(&gctx->gcm, + in + bulk, out + bulk, len - bulk)) return -1; } } else { if (gctx->ctr) { + size_t bulk = 0; +# if defined(AES_GCM_ASM) + if (len >= 16 && AES_GCM_ASM(gctx)) { + size_t res = (16 - gctx->gcm.mres) % 16; + + if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, res)) + return -1; + + bulk = AES_gcm_decrypt(in + res, + out + res, len - res, + gctx->gcm.key, + gctx->gcm.Yi.c, gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + bulk += res; + } +# endif if (CRYPTO_gcm128_decrypt_ctr32(&gctx->gcm, - in, out, len, gctx->ctr)) + in + bulk, + out + bulk, + len - bulk, gctx->ctr)) return -1; } else { - if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, len)) + size_t bulk = 0; +# if defined(AES_GCM_ASM2) + if (len >= 16 && AES_GCM_ASM2(gctx)) { + size_t res = (16 - gctx->gcm.mres) % 16; + + if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, res)) + return -1; + + bulk = AES_gcm_decrypt(in + res, + out + res, len - res, + gctx->gcm.key, + gctx->gcm.Yi.c, gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + bulk += res; + } +# endif + if (CRYPTO_gcm128_decrypt(&gctx->gcm, + in + bulk, out + bulk, len - bulk)) return -1; } } @@ -983,7 +1594,7 @@ static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, } -# define CUSTOM_FLAGS (EVP_CIPH_FLAG_DEFAULT_ASN1 \ +# define CUSTOM_FLAGS (EVP_CIPH_FLAG_DEFAULT_ASN1 \ | EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER \ | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT \ | EVP_CIPH_CUSTOM_COPY) @@ -1032,47 +1643,69 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, if (key) do { -# ifdef AES_XTS_ASM +# ifdef AES_XTS_ASM xctx->stream = enc ? AES_xts_encrypt : AES_xts_decrypt; -# else +# else xctx->stream = NULL; -# endif +# endif /* key_len is two AES keys */ -# ifdef BSAES_CAPABLE +# ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) { + if (enc) { + HWAES_set_encrypt_key(key, ctx->key_len * 4, + &xctx->ks1.ks); + xctx->xts.block1 = (block128_f) HWAES_encrypt; + } else { + HWAES_set_decrypt_key(key, ctx->key_len * 4, + &xctx->ks1.ks); + xctx->xts.block1 = (block128_f) HWAES_decrypt; + } + + HWAES_set_encrypt_key(key + ctx->key_len / 2, + ctx->key_len * 4, &xctx->ks2.ks); + xctx->xts.block2 = (block128_f) HWAES_encrypt; + + xctx->xts.key1 = &xctx->ks1; + break; + } else +# endif +# ifdef BSAES_CAPABLE if (BSAES_CAPABLE) xctx->stream = enc ? bsaes_xts_encrypt : bsaes_xts_decrypt; else -# endif -# ifdef VPAES_CAPABLE +# endif +# ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { if (enc) { - vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); + vpaes_set_encrypt_key(key, ctx->key_len * 4, + &xctx->ks1.ks); xctx->xts.block1 = (block128_f) vpaes_encrypt; } else { - vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); + vpaes_set_decrypt_key(key, ctx->key_len * 4, + &xctx->ks1.ks); xctx->xts.block1 = (block128_f) vpaes_decrypt; } vpaes_set_encrypt_key(key + ctx->key_len / 2, - ctx->key_len * 4, &xctx->ks2); + ctx->key_len * 4, &xctx->ks2.ks); xctx->xts.block2 = (block128_f) vpaes_encrypt; xctx->xts.key1 = &xctx->ks1; break; } else -# endif +# endif (void)0; /* terminate potentially open 'else' */ if (enc) { - AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); + AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f) AES_encrypt; } else { - AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); + AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f) AES_decrypt; } AES_set_encrypt_key(key + ctx->key_len / 2, - ctx->key_len * 4, &xctx->ks2); + ctx->key_len * 4, &xctx->ks2.ks); xctx->xts.block2 = (block128_f) AES_encrypt; xctx->xts.key1 = &xctx->ks1; @@ -1094,14 +1727,6 @@ static int aes_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, return 0; if (!out || !in || len < AES_BLOCK_SIZE) return 0; -# ifdef OPENSSL_FIPS - /* Requirement of SP800-38E */ - if (FIPS_module_mode() && !(ctx->flags & EVP_CIPH_FLAG_NON_FIPS_ALLOW) && - (len > (1UL << 20) * 16)) { - EVPerr(EVP_F_AES_XTS_CIPHER, EVP_R_TOO_LARGE); - return 0; - } -# endif if (xctx->stream) (*xctx->stream) (in, out, len, xctx->xts.key1, xctx->xts.key2, ctx->iv); @@ -1111,9 +1736,9 @@ static int aes_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, return 1; } -# define aes_xts_cleanup NULL +# define aes_xts_cleanup NULL -# define XTS_FLAGS (EVP_CIPH_FLAG_DEFAULT_ASN1 | EVP_CIPH_CUSTOM_IV \ +# define XTS_FLAGS (EVP_CIPH_FLAG_DEFAULT_ASN1 | EVP_CIPH_CUSTOM_IV \ | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT \ | EVP_CIPH_CUSTOM_COPY) @@ -1191,17 +1816,28 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; if (key) do { -# ifdef VPAES_CAPABLE +# ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) { + HWAES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); + + CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, + &cctx->ks, (block128_f) HWAES_encrypt); + cctx->str = NULL; + cctx->key_set = 1; + break; + } else +# endif +# ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); + vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, &cctx->ks, (block128_f) vpaes_encrypt); cctx->str = NULL; cctx->key_set = 1; break; } -# endif - AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); +# endif + AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, &cctx->ks, (block128_f) AES_encrypt); cctx->str = NULL; @@ -1274,7 +1910,7 @@ static int aes_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, } -# define aes_ccm_cleanup NULL +# define aes_ccm_cleanup NULL BLOCK_CIPHER_custom(NID_aes, 128, 1, 12, ccm, CCM, EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS) @@ -1282,5 +1918,107 @@ BLOCK_CIPHER_custom(NID_aes, 128, 1, 12, ccm, CCM, EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS) BLOCK_CIPHER_custom(NID_aes, 256, 1, 12, ccm, CCM, EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS) -# endif #endif +typedef struct { + union { + double align; + AES_KEY ks; + } ks; + /* Indicates if IV has been set */ + unsigned char *iv; +} EVP_AES_WRAP_CTX; + +static int aes_wrap_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ + EVP_AES_WRAP_CTX *wctx = ctx->cipher_data; + if (!iv && !key) + return 1; + if (key) { + if (ctx->encrypt) + AES_set_encrypt_key(key, ctx->key_len * 8, &wctx->ks.ks); + else + AES_set_decrypt_key(key, ctx->key_len * 8, &wctx->ks.ks); + if (!iv) + wctx->iv = NULL; + } + if (iv) { + memcpy(ctx->iv, iv, 8); + wctx->iv = ctx->iv; + } + return 1; +} + +static int aes_wrap_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t inlen) +{ + EVP_AES_WRAP_CTX *wctx = ctx->cipher_data; + size_t rv; + if (!in) + return 0; + if (inlen % 8) + return -1; + if (ctx->encrypt && inlen < 8) + return -1; + if (!ctx->encrypt && inlen < 16) + return -1; + if (!out) { + if (ctx->encrypt) + return inlen + 8; + else + return inlen - 8; + } + if (ctx->encrypt) + rv = CRYPTO_128_wrap(&wctx->ks.ks, wctx->iv, out, in, inlen, + (block128_f) AES_encrypt); + else + rv = CRYPTO_128_unwrap(&wctx->ks.ks, wctx->iv, out, in, inlen, + (block128_f) AES_decrypt); + return rv ? (int)rv : -1; +} + +#define WRAP_FLAGS (EVP_CIPH_WRAP_MODE \ + | EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER \ + | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_FLAG_DEFAULT_ASN1) + +static const EVP_CIPHER aes_128_wrap = { + NID_id_aes128_wrap, + 8, 16, 8, WRAP_FLAGS, + aes_wrap_init_key, aes_wrap_cipher, + NULL, + sizeof(EVP_AES_WRAP_CTX), + NULL, NULL, NULL, NULL +}; + +const EVP_CIPHER *EVP_aes_128_wrap(void) +{ + return &aes_128_wrap; +} + +static const EVP_CIPHER aes_192_wrap = { + NID_id_aes192_wrap, + 8, 24, 8, WRAP_FLAGS, + aes_wrap_init_key, aes_wrap_cipher, + NULL, + sizeof(EVP_AES_WRAP_CTX), + NULL, NULL, NULL, NULL +}; + +const EVP_CIPHER *EVP_aes_192_wrap(void) +{ + return &aes_192_wrap; +} + +static const EVP_CIPHER aes_256_wrap = { + NID_id_aes256_wrap, + 8, 32, 8, WRAP_FLAGS, + aes_wrap_init_key, aes_wrap_cipher, + NULL, + sizeof(EVP_AES_WRAP_CTX), + NULL, NULL, NULL, NULL +}; + +const EVP_CIPHER *EVP_aes_256_wrap(void) +{ + return &aes_256_wrap; +} diff --git a/drivers/builtin_openssl2/crypto/evp/e_aes_cbc_hmac_sha1.c b/drivers/builtin_openssl2/crypto/evp/e_aes_cbc_hmac_sha1.c index d1f5928f62..8330964ee1 100644 --- a/drivers/builtin_openssl2/crypto/evp/e_aes_cbc_hmac_sha1.c +++ b/drivers/builtin_openssl2/crypto/evp/e_aes_cbc_hmac_sha1.c @@ -58,7 +58,8 @@ # include # include # include -# include "evp_locl.h" +# include +# include "modes_lcl.h" # ifndef EVP_CIPH_FLAG_AEAD_CIPHER # define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000 @@ -70,6 +71,10 @@ # define EVP_CIPH_FLAG_DEFAULT_ASN1 0 # endif +# if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK) +# define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0 +# endif + # define TLS1_1_VERSION 0x0302 typedef struct { @@ -89,11 +94,7 @@ typedef struct { defined(_M_AMD64) || defined(_M_X64) || \ defined(__INTEL__) ) -# if defined(__GNUC__) && __GNUC__>=2 && !defined(PEDANTIC) -# define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; }) -# endif - -extern unsigned int OPENSSL_ia32cap_P[2]; +extern unsigned int OPENSSL_ia32cap_P[]; # define AESNI_CAPABLE (1<<(57-32)) int aesni_set_encrypt_key(const unsigned char *userKey, int bits, @@ -110,6 +111,10 @@ void aesni_cbc_sha1_enc(const void *inp, void *out, size_t blocks, const AES_KEY *key, unsigned char iv[16], SHA_CTX *ctx, const void *in0); +void aesni256_cbc_sha1_dec(const void *inp, void *out, size_t blocks, + const AES_KEY *key, unsigned char iv[16], + SHA_CTX *ctx, const void *in0); + # define data(ctx) ((EVP_AES_HMAC_SHA1 *)(ctx)->cipher_data) static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx, @@ -134,6 +139,7 @@ static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx, } # define STITCHED_CALL +# undef STITCHED_DECRYPT_CALL # if !defined(STITCHED_CALL) # define aes_off 0 @@ -177,6 +183,275 @@ static void sha1_update(SHA_CTX *c, const void *data, size_t len) # endif # define SHA1_Update sha1_update +# if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + +typedef struct { + unsigned int A[8], B[8], C[8], D[8], E[8]; +} SHA1_MB_CTX; +typedef struct { + const unsigned char *ptr; + int blocks; +} HASH_DESC; + +void sha1_multi_block(SHA1_MB_CTX *, const HASH_DESC *, int); + +typedef struct { + const unsigned char *inp; + unsigned char *out; + int blocks; + u64 iv[2]; +} CIPH_DESC; + +void aesni_multi_cbc_encrypt(CIPH_DESC *, void *, int); + +static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, + unsigned char *out, + const unsigned char *inp, + size_t inp_len, int n4x) +{ /* n4x is 1 or 2 */ + HASH_DESC hash_d[8], edges[8]; + CIPH_DESC ciph_d[8]; + unsigned char storage[sizeof(SHA1_MB_CTX) + 32]; + union { + u64 q[16]; + u32 d[32]; + u8 c[128]; + } blocks[8]; + SHA1_MB_CTX *ctx; + unsigned int frag, last, packlen, i, x4 = 4 * n4x, minblocks, processed = + 0; + size_t ret = 0; + u8 *IVs; +# if defined(BSWAP8) + u64 seqnum; +# endif + + /* ask for IVs in bulk */ + if (RAND_bytes((IVs = blocks[0].c), 16 * x4) <= 0) + return 0; + + ctx = (SHA1_MB_CTX *) (storage + 32 - ((size_t)storage % 32)); /* align */ + + frag = (unsigned int)inp_len >> (1 + n4x); + last = (unsigned int)inp_len + frag - (frag << (1 + n4x)); + if (last > frag && ((last + 13 + 9) % 64) < (x4 - 1)) { + frag++; + last -= x4 - 1; + } + + packlen = 5 + 16 + ((frag + 20 + 16) & -16); + + /* populate descriptors with pointers and IVs */ + hash_d[0].ptr = inp; + ciph_d[0].inp = inp; + /* 5+16 is place for header and explicit IV */ + ciph_d[0].out = out + 5 + 16; + memcpy(ciph_d[0].out - 16, IVs, 16); + memcpy(ciph_d[0].iv, IVs, 16); + IVs += 16; + + for (i = 1; i < x4; i++) { + ciph_d[i].inp = hash_d[i].ptr = hash_d[i - 1].ptr + frag; + ciph_d[i].out = ciph_d[i - 1].out + packlen; + memcpy(ciph_d[i].out - 16, IVs, 16); + memcpy(ciph_d[i].iv, IVs, 16); + IVs += 16; + } + +# if defined(BSWAP8) + memcpy(blocks[0].c, key->md.data, 8); + seqnum = BSWAP8(blocks[0].q[0]); +# endif + for (i = 0; i < x4; i++) { + unsigned int len = (i == (x4 - 1) ? last : frag); +# if !defined(BSWAP8) + unsigned int carry, j; +# endif + + ctx->A[i] = key->md.h0; + ctx->B[i] = key->md.h1; + ctx->C[i] = key->md.h2; + ctx->D[i] = key->md.h3; + ctx->E[i] = key->md.h4; + + /* fix seqnum */ +# if defined(BSWAP8) + blocks[i].q[0] = BSWAP8(seqnum + i); +# else + for (carry = i, j = 8; j--;) { + blocks[i].c[j] = ((u8 *)key->md.data)[j] + carry; + carry = (blocks[i].c[j] - carry) >> (sizeof(carry) * 8 - 1); + } +# endif + blocks[i].c[8] = ((u8 *)key->md.data)[8]; + blocks[i].c[9] = ((u8 *)key->md.data)[9]; + blocks[i].c[10] = ((u8 *)key->md.data)[10]; + /* fix length */ + blocks[i].c[11] = (u8)(len >> 8); + blocks[i].c[12] = (u8)(len); + + memcpy(blocks[i].c + 13, hash_d[i].ptr, 64 - 13); + hash_d[i].ptr += 64 - 13; + hash_d[i].blocks = (len - (64 - 13)) / 64; + + edges[i].ptr = blocks[i].c; + edges[i].blocks = 1; + } + + /* hash 13-byte headers and first 64-13 bytes of inputs */ + sha1_multi_block(ctx, edges, n4x); + /* hash bulk inputs */ +# define MAXCHUNKSIZE 2048 +# if MAXCHUNKSIZE%64 +# error "MAXCHUNKSIZE is not divisible by 64" +# elif MAXCHUNKSIZE + /* + * goal is to minimize pressure on L1 cache by moving in shorter steps, + * so that hashed data is still in the cache by the time we encrypt it + */ + minblocks = ((frag <= last ? frag : last) - (64 - 13)) / 64; + if (minblocks > MAXCHUNKSIZE / 64) { + for (i = 0; i < x4; i++) { + edges[i].ptr = hash_d[i].ptr; + edges[i].blocks = MAXCHUNKSIZE / 64; + ciph_d[i].blocks = MAXCHUNKSIZE / 16; + } + do { + sha1_multi_block(ctx, edges, n4x); + aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x); + + for (i = 0; i < x4; i++) { + edges[i].ptr = hash_d[i].ptr += MAXCHUNKSIZE; + hash_d[i].blocks -= MAXCHUNKSIZE / 64; + edges[i].blocks = MAXCHUNKSIZE / 64; + ciph_d[i].inp += MAXCHUNKSIZE; + ciph_d[i].out += MAXCHUNKSIZE; + ciph_d[i].blocks = MAXCHUNKSIZE / 16; + memcpy(ciph_d[i].iv, ciph_d[i].out - 16, 16); + } + processed += MAXCHUNKSIZE; + minblocks -= MAXCHUNKSIZE / 64; + } while (minblocks > MAXCHUNKSIZE / 64); + } +# endif +# undef MAXCHUNKSIZE + sha1_multi_block(ctx, hash_d, n4x); + + memset(blocks, 0, sizeof(blocks)); + for (i = 0; i < x4; i++) { + unsigned int len = (i == (x4 - 1) ? last : frag), + off = hash_d[i].blocks * 64; + const unsigned char *ptr = hash_d[i].ptr + off; + + off = (len - processed) - (64 - 13) - off; /* remainder actually */ + memcpy(blocks[i].c, ptr, off); + blocks[i].c[off] = 0x80; + len += 64 + 13; /* 64 is HMAC header */ + len *= 8; /* convert to bits */ + if (off < (64 - 8)) { +# ifdef BSWAP4 + blocks[i].d[15] = BSWAP4(len); +# else + PUTU32(blocks[i].c + 60, len); +# endif + edges[i].blocks = 1; + } else { +# ifdef BSWAP4 + blocks[i].d[31] = BSWAP4(len); +# else + PUTU32(blocks[i].c + 124, len); +# endif + edges[i].blocks = 2; + } + edges[i].ptr = blocks[i].c; + } + + /* hash input tails and finalize */ + sha1_multi_block(ctx, edges, n4x); + + memset(blocks, 0, sizeof(blocks)); + for (i = 0; i < x4; i++) { +# ifdef BSWAP4 + blocks[i].d[0] = BSWAP4(ctx->A[i]); + ctx->A[i] = key->tail.h0; + blocks[i].d[1] = BSWAP4(ctx->B[i]); + ctx->B[i] = key->tail.h1; + blocks[i].d[2] = BSWAP4(ctx->C[i]); + ctx->C[i] = key->tail.h2; + blocks[i].d[3] = BSWAP4(ctx->D[i]); + ctx->D[i] = key->tail.h3; + blocks[i].d[4] = BSWAP4(ctx->E[i]); + ctx->E[i] = key->tail.h4; + blocks[i].c[20] = 0x80; + blocks[i].d[15] = BSWAP4((64 + 20) * 8); +# else + PUTU32(blocks[i].c + 0, ctx->A[i]); + ctx->A[i] = key->tail.h0; + PUTU32(blocks[i].c + 4, ctx->B[i]); + ctx->B[i] = key->tail.h1; + PUTU32(blocks[i].c + 8, ctx->C[i]); + ctx->C[i] = key->tail.h2; + PUTU32(blocks[i].c + 12, ctx->D[i]); + ctx->D[i] = key->tail.h3; + PUTU32(blocks[i].c + 16, ctx->E[i]); + ctx->E[i] = key->tail.h4; + blocks[i].c[20] = 0x80; + PUTU32(blocks[i].c + 60, (64 + 20) * 8); +# endif + edges[i].ptr = blocks[i].c; + edges[i].blocks = 1; + } + + /* finalize MACs */ + sha1_multi_block(ctx, edges, n4x); + + for (i = 0; i < x4; i++) { + unsigned int len = (i == (x4 - 1) ? last : frag), pad, j; + unsigned char *out0 = out; + + memcpy(ciph_d[i].out, ciph_d[i].inp, len - processed); + ciph_d[i].inp = ciph_d[i].out; + + out += 5 + 16 + len; + + /* write MAC */ + PUTU32(out + 0, ctx->A[i]); + PUTU32(out + 4, ctx->B[i]); + PUTU32(out + 8, ctx->C[i]); + PUTU32(out + 12, ctx->D[i]); + PUTU32(out + 16, ctx->E[i]); + out += 20; + len += 20; + + /* pad */ + pad = 15 - len % 16; + for (j = 0; j <= pad; j++) + *(out++) = pad; + len += pad + 1; + + ciph_d[i].blocks = (len - processed) / 16; + len += 16; /* account for explicit iv */ + + /* arrange header */ + out0[0] = ((u8 *)key->md.data)[8]; + out0[1] = ((u8 *)key->md.data)[9]; + out0[2] = ((u8 *)key->md.data)[10]; + out0[3] = (u8)(len >> 8); + out0[4] = (u8)(len); + + ret += len + 5; + inp += frag; + } + + aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x); + + OPENSSL_cleanse(blocks, sizeof(blocks)); + OPENSSL_cleanse(ctx, sizeof(*ctx)); + + return ret; +} +# endif + static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len) { @@ -257,10 +532,7 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, /* arrange cache line alignment */ pmac = (void *)(((size_t)mac.c + 31) & ((size_t)0 - 32)); - /* decrypt HMAC|padding at once */ - aesni_cbc_encrypt(in, out, len, &key->ks, ctx->iv, 0); - - if (plen) { /* "TLS" mode of operation */ + if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */ size_t inp_len, mask, j, i; unsigned int res, maxpad, pad, bitlen; int ret = 1; @@ -268,17 +540,37 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, unsigned int u[SHA_LBLOCK]; unsigned char c[SHA_CBLOCK]; } *data = (void *)key->md.data; +# if defined(STITCHED_DECRYPT_CALL) + unsigned char tail_iv[AES_BLOCK_SIZE]; + int stitch = 0; +# endif if ((key->aux.tls_aad[plen - 4] << 8 | key->aux.tls_aad[plen - 3]) - >= TLS1_1_VERSION) - iv = AES_BLOCK_SIZE; - - if (len < (iv + SHA_DIGEST_LENGTH + 1)) + >= TLS1_1_VERSION) { + if (len < (AES_BLOCK_SIZE + SHA_DIGEST_LENGTH + 1)) + return 0; + + /* omit explicit iv */ + memcpy(ctx->iv, in, AES_BLOCK_SIZE); + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + len -= AES_BLOCK_SIZE; + } else if (len < (SHA_DIGEST_LENGTH + 1)) return 0; - /* omit explicit iv */ - out += iv; - len -= iv; +# if defined(STITCHED_DECRYPT_CALL) + if (len >= 1024 && ctx->key_len == 32) { + /* decrypt last block */ + memcpy(tail_iv, in + len - 2 * AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + aesni_cbc_encrypt(in + len - AES_BLOCK_SIZE, + out + len - AES_BLOCK_SIZE, AES_BLOCK_SIZE, + &key->ks, tail_iv, 0); + stitch = 1; + } else +# endif + /* decrypt HMAC|padding at once */ + aesni_cbc_encrypt(in, out, len, &key->ks, ctx->iv, 0); /* figure out payload length */ pad = out[len - 1]; @@ -298,6 +590,29 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, key->md = key->head; SHA1_Update(&key->md, key->aux.tls_aad, plen); +# if defined(STITCHED_DECRYPT_CALL) + if (stitch) { + blocks = (len - (256 + 32 + SHA_CBLOCK)) / SHA_CBLOCK; + aes_off = len - AES_BLOCK_SIZE - blocks * SHA_CBLOCK; + sha_off = SHA_CBLOCK - plen; + + aesni_cbc_encrypt(in, out, aes_off, &key->ks, ctx->iv, 0); + + SHA1_Update(&key->md, out, sha_off); + aesni256_cbc_sha1_dec(in + aes_off, + out + aes_off, blocks, &key->ks, + ctx->iv, &key->md, out + sha_off); + + sha_off += blocks *= SHA_CBLOCK; + out += sha_off; + len -= sha_off; + inp_len -= sha_off; + + key->md.Nl += (blocks << 3); /* at most 18 bits */ + memcpy(ctx->iv, tail_iv, AES_BLOCK_SIZE); + } +# endif + # if 1 len -= SHA_DIGEST_LENGTH; /* amend mac */ if (len >= (256 + SHA_CBLOCK)) { @@ -311,8 +626,8 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, /* but pretend as if we hashed padded payload */ bitlen = key->md.Nl + (inp_len << 3); /* at most 18 bits */ -# ifdef BSWAP - bitlen = BSWAP(bitlen); +# ifdef BSWAP4 + bitlen = BSWAP4(bitlen); # else mac.c[0] = 0; mac.c[1] = (unsigned char)(bitlen >> 16); @@ -376,12 +691,12 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, pmac->u[3] |= key->md.h3 & mask; pmac->u[4] |= key->md.h4 & mask; -# ifdef BSWAP - pmac->u[0] = BSWAP(pmac->u[0]); - pmac->u[1] = BSWAP(pmac->u[1]); - pmac->u[2] = BSWAP(pmac->u[2]); - pmac->u[3] = BSWAP(pmac->u[3]); - pmac->u[4] = BSWAP(pmac->u[4]); +# ifdef BSWAP4 + pmac->u[0] = BSWAP4(pmac->u[0]); + pmac->u[1] = BSWAP4(pmac->u[1]); + pmac->u[2] = BSWAP4(pmac->u[2]); + pmac->u[3] = BSWAP4(pmac->u[3]); + pmac->u[4] = BSWAP4(pmac->u[4]); # else for (i = 0; i < 5; i++) { res = pmac->u[i]; @@ -458,6 +773,33 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, # endif return ret; } else { +# if defined(STITCHED_DECRYPT_CALL) + if (len >= 1024 && ctx->key_len == 32) { + if (sha_off %= SHA_CBLOCK) + blocks = (len - 3 * SHA_CBLOCK) / SHA_CBLOCK; + else + blocks = (len - 2 * SHA_CBLOCK) / SHA_CBLOCK; + aes_off = len - blocks * SHA_CBLOCK; + + aesni_cbc_encrypt(in, out, aes_off, &key->ks, ctx->iv, 0); + SHA1_Update(&key->md, out, sha_off); + aesni256_cbc_sha1_dec(in + aes_off, + out + aes_off, blocks, &key->ks, + ctx->iv, &key->md, out + sha_off); + + sha_off += blocks *= SHA_CBLOCK; + out += sha_off; + len -= sha_off; + + key->md.Nh += blocks >> 29; + key->md.Nl += blocks <<= 3; + if (key->md.Nl < (unsigned int)blocks) + key->md.Nh++; + } else +# endif + /* decrypt HMAC|padding at once */ + aesni_cbc_encrypt(in, out, len, &key->ks, ctx->iv, 0); + SHA1_Update(&key->md, out, len); } } @@ -531,6 +873,70 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, return SHA_DIGEST_LENGTH; } } +# if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE: + return (int)(5 + 16 + ((arg + 20 + 16) & -16)); + case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD: + { + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr; + unsigned int n4x = 1, x4; + unsigned int frag, last, packlen, inp_len; + + if (arg < (int)sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM)) + return -1; + + inp_len = param->inp[11] << 8 | param->inp[12]; + + if (ctx->encrypt) { + if ((param->inp[9] << 8 | param->inp[10]) < TLS1_1_VERSION) + return -1; + + if (inp_len) { + if (inp_len < 4096) + return 0; /* too short */ + + if (inp_len >= 8192 && OPENSSL_ia32cap_P[2] & (1 << 5)) + n4x = 2; /* AVX2 */ + } else if ((n4x = param->interleave / 4) && n4x <= 2) + inp_len = param->len; + else + return -1; + + key->md = key->head; + SHA1_Update(&key->md, param->inp, 13); + + x4 = 4 * n4x; + n4x += 1; + + frag = inp_len >> n4x; + last = inp_len + frag - (frag << n4x); + if (last > frag && ((last + 13 + 9) % 64 < (x4 - 1))) { + frag++; + last -= x4 - 1; + } + + packlen = 5 + 16 + ((frag + 20 + 16) & -16); + packlen = (packlen << n4x) - packlen; + packlen += 5 + 16 + ((last + 20 + 16) & -16); + + param->interleave = x4; + + return (int)packlen; + } else + return -1; /* not yet */ + } + case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT: + { + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr; + + return (int)tls1_1_multi_block_encrypt(key, param->out, + param->inp, param->len, + param->interleave / 4); + } + case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT: +# endif default: return -1; } @@ -544,7 +950,7 @@ static EVP_CIPHER aesni_128_cbc_hmac_sha1_cipher = { # endif 16, 16, 16, EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 | - EVP_CIPH_FLAG_AEAD_CIPHER, + EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, aesni_cbc_hmac_sha1_init_key, aesni_cbc_hmac_sha1_cipher, NULL, @@ -563,7 +969,7 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher = { # endif 16, 32, 16, EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 | - EVP_CIPH_FLAG_AEAD_CIPHER, + EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, aesni_cbc_hmac_sha1_init_key, aesni_cbc_hmac_sha1_cipher, NULL, diff --git a/drivers/builtin_openssl2/crypto/evp/e_aes_cbc_hmac_sha256.c b/drivers/builtin_openssl2/crypto/evp/e_aes_cbc_hmac_sha256.c new file mode 100644 index 0000000000..37800213c7 --- /dev/null +++ b/drivers/builtin_openssl2/crypto/evp/e_aes_cbc_hmac_sha256.c @@ -0,0 +1,982 @@ +/* ==================================================================== + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include + +#include +#include + +#if !defined(OPENSSL_NO_AES) && !defined(OPENSSL_NO_SHA256) + +# include +# include +# include +# include +# include +# include "modes_lcl.h" + +# ifndef EVP_CIPH_FLAG_AEAD_CIPHER +# define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000 +# define EVP_CTRL_AEAD_TLS1_AAD 0x16 +# define EVP_CTRL_AEAD_SET_MAC_KEY 0x17 +# endif + +# if !defined(EVP_CIPH_FLAG_DEFAULT_ASN1) +# define EVP_CIPH_FLAG_DEFAULT_ASN1 0 +# endif + +# if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK) +# define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0 +# endif + +# define TLS1_1_VERSION 0x0302 + +typedef struct { + AES_KEY ks; + SHA256_CTX head, tail, md; + size_t payload_length; /* AAD length in decrypt case */ + union { + unsigned int tls_ver; + unsigned char tls_aad[16]; /* 13 used */ + } aux; +} EVP_AES_HMAC_SHA256; + +# define NO_PAYLOAD_LENGTH ((size_t)-1) + +# if defined(AES_ASM) && ( \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_AMD64) || defined(_M_X64) || \ + defined(__INTEL__) ) + +extern unsigned int OPENSSL_ia32cap_P[]; +# define AESNI_CAPABLE (1<<(57-32)) + +int aesni_set_encrypt_key(const unsigned char *userKey, int bits, + AES_KEY *key); +int aesni_set_decrypt_key(const unsigned char *userKey, int bits, + AES_KEY *key); + +void aesni_cbc_encrypt(const unsigned char *in, + unsigned char *out, + size_t length, + const AES_KEY *key, unsigned char *ivec, int enc); + +int aesni_cbc_sha256_enc(const void *inp, void *out, size_t blocks, + const AES_KEY *key, unsigned char iv[16], + SHA256_CTX *ctx, const void *in0); + +# define data(ctx) ((EVP_AES_HMAC_SHA256 *)(ctx)->cipher_data) + +static int aesni_cbc_hmac_sha256_init_key(EVP_CIPHER_CTX *ctx, + const unsigned char *inkey, + const unsigned char *iv, int enc) +{ + EVP_AES_HMAC_SHA256 *key = data(ctx); + int ret; + + if (enc) + memset(&key->ks, 0, sizeof(key->ks.rd_key)), + ret = aesni_set_encrypt_key(inkey, ctx->key_len * 8, &key->ks); + else + ret = aesni_set_decrypt_key(inkey, ctx->key_len * 8, &key->ks); + + SHA256_Init(&key->head); /* handy when benchmarking */ + key->tail = key->head; + key->md = key->head; + + key->payload_length = NO_PAYLOAD_LENGTH; + + return ret < 0 ? 0 : 1; +} + +# define STITCHED_CALL + +# if !defined(STITCHED_CALL) +# define aes_off 0 +# endif + +void sha256_block_data_order(void *c, const void *p, size_t len); + +static void sha256_update(SHA256_CTX *c, const void *data, size_t len) +{ + const unsigned char *ptr = data; + size_t res; + + if ((res = c->num)) { + res = SHA256_CBLOCK - res; + if (len < res) + res = len; + SHA256_Update(c, ptr, res); + ptr += res; + len -= res; + } + + res = len % SHA256_CBLOCK; + len -= res; + + if (len) { + sha256_block_data_order(c, ptr, len / SHA256_CBLOCK); + + ptr += len; + c->Nh += len >> 29; + c->Nl += len <<= 3; + if (c->Nl < (unsigned int)len) + c->Nh++; + } + + if (res) + SHA256_Update(c, ptr, res); +} + +# ifdef SHA256_Update +# undef SHA256_Update +# endif +# define SHA256_Update sha256_update + +# if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + +typedef struct { + unsigned int A[8], B[8], C[8], D[8], E[8], F[8], G[8], H[8]; +} SHA256_MB_CTX; +typedef struct { + const unsigned char *ptr; + int blocks; +} HASH_DESC; + +void sha256_multi_block(SHA256_MB_CTX *, const HASH_DESC *, int); + +typedef struct { + const unsigned char *inp; + unsigned char *out; + int blocks; + u64 iv[2]; +} CIPH_DESC; + +void aesni_multi_cbc_encrypt(CIPH_DESC *, void *, int); + +static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, + unsigned char *out, + const unsigned char *inp, + size_t inp_len, int n4x) +{ /* n4x is 1 or 2 */ + HASH_DESC hash_d[8], edges[8]; + CIPH_DESC ciph_d[8]; + unsigned char storage[sizeof(SHA256_MB_CTX) + 32]; + union { + u64 q[16]; + u32 d[32]; + u8 c[128]; + } blocks[8]; + SHA256_MB_CTX *ctx; + unsigned int frag, last, packlen, i, x4 = 4 * n4x, minblocks, processed = + 0; + size_t ret = 0; + u8 *IVs; +# if defined(BSWAP8) + u64 seqnum; +# endif + + /* ask for IVs in bulk */ + if (RAND_bytes((IVs = blocks[0].c), 16 * x4) <= 0) + return 0; + + /* align */ + ctx = (SHA256_MB_CTX *) (storage + 32 - ((size_t)storage % 32)); + + frag = (unsigned int)inp_len >> (1 + n4x); + last = (unsigned int)inp_len + frag - (frag << (1 + n4x)); + if (last > frag && ((last + 13 + 9) % 64) < (x4 - 1)) { + frag++; + last -= x4 - 1; + } + + packlen = 5 + 16 + ((frag + 32 + 16) & -16); + + /* populate descriptors with pointers and IVs */ + hash_d[0].ptr = inp; + ciph_d[0].inp = inp; + /* 5+16 is place for header and explicit IV */ + ciph_d[0].out = out + 5 + 16; + memcpy(ciph_d[0].out - 16, IVs, 16); + memcpy(ciph_d[0].iv, IVs, 16); + IVs += 16; + + for (i = 1; i < x4; i++) { + ciph_d[i].inp = hash_d[i].ptr = hash_d[i - 1].ptr + frag; + ciph_d[i].out = ciph_d[i - 1].out + packlen; + memcpy(ciph_d[i].out - 16, IVs, 16); + memcpy(ciph_d[i].iv, IVs, 16); + IVs += 16; + } + +# if defined(BSWAP8) + memcpy(blocks[0].c, key->md.data, 8); + seqnum = BSWAP8(blocks[0].q[0]); +# endif + for (i = 0; i < x4; i++) { + unsigned int len = (i == (x4 - 1) ? last : frag); +# if !defined(BSWAP8) + unsigned int carry, j; +# endif + + ctx->A[i] = key->md.h[0]; + ctx->B[i] = key->md.h[1]; + ctx->C[i] = key->md.h[2]; + ctx->D[i] = key->md.h[3]; + ctx->E[i] = key->md.h[4]; + ctx->F[i] = key->md.h[5]; + ctx->G[i] = key->md.h[6]; + ctx->H[i] = key->md.h[7]; + + /* fix seqnum */ +# if defined(BSWAP8) + blocks[i].q[0] = BSWAP8(seqnum + i); +# else + for (carry = i, j = 8; j--;) { + blocks[i].c[j] = ((u8 *)key->md.data)[j] + carry; + carry = (blocks[i].c[j] - carry) >> (sizeof(carry) * 8 - 1); + } +# endif + blocks[i].c[8] = ((u8 *)key->md.data)[8]; + blocks[i].c[9] = ((u8 *)key->md.data)[9]; + blocks[i].c[10] = ((u8 *)key->md.data)[10]; + /* fix length */ + blocks[i].c[11] = (u8)(len >> 8); + blocks[i].c[12] = (u8)(len); + + memcpy(blocks[i].c + 13, hash_d[i].ptr, 64 - 13); + hash_d[i].ptr += 64 - 13; + hash_d[i].blocks = (len - (64 - 13)) / 64; + + edges[i].ptr = blocks[i].c; + edges[i].blocks = 1; + } + + /* hash 13-byte headers and first 64-13 bytes of inputs */ + sha256_multi_block(ctx, edges, n4x); + /* hash bulk inputs */ +# define MAXCHUNKSIZE 2048 +# if MAXCHUNKSIZE%64 +# error "MAXCHUNKSIZE is not divisible by 64" +# elif MAXCHUNKSIZE + /* + * goal is to minimize pressure on L1 cache by moving in shorter steps, + * so that hashed data is still in the cache by the time we encrypt it + */ + minblocks = ((frag <= last ? frag : last) - (64 - 13)) / 64; + if (minblocks > MAXCHUNKSIZE / 64) { + for (i = 0; i < x4; i++) { + edges[i].ptr = hash_d[i].ptr; + edges[i].blocks = MAXCHUNKSIZE / 64; + ciph_d[i].blocks = MAXCHUNKSIZE / 16; + } + do { + sha256_multi_block(ctx, edges, n4x); + aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x); + + for (i = 0; i < x4; i++) { + edges[i].ptr = hash_d[i].ptr += MAXCHUNKSIZE; + hash_d[i].blocks -= MAXCHUNKSIZE / 64; + edges[i].blocks = MAXCHUNKSIZE / 64; + ciph_d[i].inp += MAXCHUNKSIZE; + ciph_d[i].out += MAXCHUNKSIZE; + ciph_d[i].blocks = MAXCHUNKSIZE / 16; + memcpy(ciph_d[i].iv, ciph_d[i].out - 16, 16); + } + processed += MAXCHUNKSIZE; + minblocks -= MAXCHUNKSIZE / 64; + } while (minblocks > MAXCHUNKSIZE / 64); + } +# endif +# undef MAXCHUNKSIZE + sha256_multi_block(ctx, hash_d, n4x); + + memset(blocks, 0, sizeof(blocks)); + for (i = 0; i < x4; i++) { + unsigned int len = (i == (x4 - 1) ? last : frag), + off = hash_d[i].blocks * 64; + const unsigned char *ptr = hash_d[i].ptr + off; + + off = (len - processed) - (64 - 13) - off; /* remainder actually */ + memcpy(blocks[i].c, ptr, off); + blocks[i].c[off] = 0x80; + len += 64 + 13; /* 64 is HMAC header */ + len *= 8; /* convert to bits */ + if (off < (64 - 8)) { +# ifdef BSWAP4 + blocks[i].d[15] = BSWAP4(len); +# else + PUTU32(blocks[i].c + 60, len); +# endif + edges[i].blocks = 1; + } else { +# ifdef BSWAP4 + blocks[i].d[31] = BSWAP4(len); +# else + PUTU32(blocks[i].c + 124, len); +# endif + edges[i].blocks = 2; + } + edges[i].ptr = blocks[i].c; + } + + /* hash input tails and finalize */ + sha256_multi_block(ctx, edges, n4x); + + memset(blocks, 0, sizeof(blocks)); + for (i = 0; i < x4; i++) { +# ifdef BSWAP4 + blocks[i].d[0] = BSWAP4(ctx->A[i]); + ctx->A[i] = key->tail.h[0]; + blocks[i].d[1] = BSWAP4(ctx->B[i]); + ctx->B[i] = key->tail.h[1]; + blocks[i].d[2] = BSWAP4(ctx->C[i]); + ctx->C[i] = key->tail.h[2]; + blocks[i].d[3] = BSWAP4(ctx->D[i]); + ctx->D[i] = key->tail.h[3]; + blocks[i].d[4] = BSWAP4(ctx->E[i]); + ctx->E[i] = key->tail.h[4]; + blocks[i].d[5] = BSWAP4(ctx->F[i]); + ctx->F[i] = key->tail.h[5]; + blocks[i].d[6] = BSWAP4(ctx->G[i]); + ctx->G[i] = key->tail.h[6]; + blocks[i].d[7] = BSWAP4(ctx->H[i]); + ctx->H[i] = key->tail.h[7]; + blocks[i].c[32] = 0x80; + blocks[i].d[15] = BSWAP4((64 + 32) * 8); +# else + PUTU32(blocks[i].c + 0, ctx->A[i]); + ctx->A[i] = key->tail.h[0]; + PUTU32(blocks[i].c + 4, ctx->B[i]); + ctx->B[i] = key->tail.h[1]; + PUTU32(blocks[i].c + 8, ctx->C[i]); + ctx->C[i] = key->tail.h[2]; + PUTU32(blocks[i].c + 12, ctx->D[i]); + ctx->D[i] = key->tail.h[3]; + PUTU32(blocks[i].c + 16, ctx->E[i]); + ctx->E[i] = key->tail.h[4]; + PUTU32(blocks[i].c + 20, ctx->F[i]); + ctx->F[i] = key->tail.h[5]; + PUTU32(blocks[i].c + 24, ctx->G[i]); + ctx->G[i] = key->tail.h[6]; + PUTU32(blocks[i].c + 28, ctx->H[i]); + ctx->H[i] = key->tail.h[7]; + blocks[i].c[32] = 0x80; + PUTU32(blocks[i].c + 60, (64 + 32) * 8); +# endif + edges[i].ptr = blocks[i].c; + edges[i].blocks = 1; + } + + /* finalize MACs */ + sha256_multi_block(ctx, edges, n4x); + + for (i = 0; i < x4; i++) { + unsigned int len = (i == (x4 - 1) ? last : frag), pad, j; + unsigned char *out0 = out; + + memcpy(ciph_d[i].out, ciph_d[i].inp, len - processed); + ciph_d[i].inp = ciph_d[i].out; + + out += 5 + 16 + len; + + /* write MAC */ + PUTU32(out + 0, ctx->A[i]); + PUTU32(out + 4, ctx->B[i]); + PUTU32(out + 8, ctx->C[i]); + PUTU32(out + 12, ctx->D[i]); + PUTU32(out + 16, ctx->E[i]); + PUTU32(out + 20, ctx->F[i]); + PUTU32(out + 24, ctx->G[i]); + PUTU32(out + 28, ctx->H[i]); + out += 32; + len += 32; + + /* pad */ + pad = 15 - len % 16; + for (j = 0; j <= pad; j++) + *(out++) = pad; + len += pad + 1; + + ciph_d[i].blocks = (len - processed) / 16; + len += 16; /* account for explicit iv */ + + /* arrange header */ + out0[0] = ((u8 *)key->md.data)[8]; + out0[1] = ((u8 *)key->md.data)[9]; + out0[2] = ((u8 *)key->md.data)[10]; + out0[3] = (u8)(len >> 8); + out0[4] = (u8)(len); + + ret += len + 5; + inp += frag; + } + + aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x); + + OPENSSL_cleanse(blocks, sizeof(blocks)); + OPENSSL_cleanse(ctx, sizeof(*ctx)); + + return ret; +} +# endif + +static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, + unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_AES_HMAC_SHA256 *key = data(ctx); + unsigned int l; + size_t plen = key->payload_length, iv = 0, /* explicit IV in TLS 1.1 and + * later */ + sha_off = 0; +# if defined(STITCHED_CALL) + size_t aes_off = 0, blocks; + + sha_off = SHA256_CBLOCK - key->md.num; +# endif + + key->payload_length = NO_PAYLOAD_LENGTH; + + if (len % AES_BLOCK_SIZE) + return 0; + + if (ctx->encrypt) { + if (plen == NO_PAYLOAD_LENGTH) + plen = len; + else if (len != + ((plen + SHA256_DIGEST_LENGTH + + AES_BLOCK_SIZE) & -AES_BLOCK_SIZE)) + return 0; + else if (key->aux.tls_ver >= TLS1_1_VERSION) + iv = AES_BLOCK_SIZE; + +# if defined(STITCHED_CALL) + /* + * Assembly stitch handles AVX-capable processors, but its + * performance is not optimal on AMD Jaguar, ~40% worse, for + * unknown reasons. Incidentally processor in question supports + * AVX, but not AMD-specific XOP extension, which can be used + * to identify it and avoid stitch invocation. So that after we + * establish that current CPU supports AVX, we even see if it's + * either even XOP-capable Bulldozer-based or GenuineIntel one. + */ + if (OPENSSL_ia32cap_P[1] & (1 << (60 - 32)) && /* AVX? */ + ((OPENSSL_ia32cap_P[1] & (1 << (43 - 32))) /* XOP? */ + | (OPENSSL_ia32cap_P[0] & (1<<30))) && /* "Intel CPU"? */ + plen > (sha_off + iv) && + (blocks = (plen - (sha_off + iv)) / SHA256_CBLOCK)) { + SHA256_Update(&key->md, in + iv, sha_off); + + (void)aesni_cbc_sha256_enc(in, out, blocks, &key->ks, + ctx->iv, &key->md, in + iv + sha_off); + blocks *= SHA256_CBLOCK; + aes_off += blocks; + sha_off += blocks; + key->md.Nh += blocks >> 29; + key->md.Nl += blocks <<= 3; + if (key->md.Nl < (unsigned int)blocks) + key->md.Nh++; + } else { + sha_off = 0; + } +# endif + sha_off += iv; + SHA256_Update(&key->md, in + sha_off, plen - sha_off); + + if (plen != len) { /* "TLS" mode of operation */ + if (in != out) + memcpy(out + aes_off, in + aes_off, plen - aes_off); + + /* calculate HMAC and append it to payload */ + SHA256_Final(out + plen, &key->md); + key->md = key->tail; + SHA256_Update(&key->md, out + plen, SHA256_DIGEST_LENGTH); + SHA256_Final(out + plen, &key->md); + + /* pad the payload|hmac */ + plen += SHA256_DIGEST_LENGTH; + for (l = len - plen - 1; plen < len; plen++) + out[plen] = l; + /* encrypt HMAC|padding at once */ + aesni_cbc_encrypt(out + aes_off, out + aes_off, len - aes_off, + &key->ks, ctx->iv, 1); + } else { + aesni_cbc_encrypt(in + aes_off, out + aes_off, len - aes_off, + &key->ks, ctx->iv, 1); + } + } else { + union { + unsigned int u[SHA256_DIGEST_LENGTH / sizeof(unsigned int)]; + unsigned char c[64 + SHA256_DIGEST_LENGTH]; + } mac, *pmac; + + /* arrange cache line alignment */ + pmac = (void *)(((size_t)mac.c + 63) & ((size_t)0 - 64)); + + /* decrypt HMAC|padding at once */ + aesni_cbc_encrypt(in, out, len, &key->ks, ctx->iv, 0); + + if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */ + size_t inp_len, mask, j, i; + unsigned int res, maxpad, pad, bitlen; + int ret = 1; + union { + unsigned int u[SHA_LBLOCK]; + unsigned char c[SHA256_CBLOCK]; + } *data = (void *)key->md.data; + + if ((key->aux.tls_aad[plen - 4] << 8 | key->aux.tls_aad[plen - 3]) + >= TLS1_1_VERSION) + iv = AES_BLOCK_SIZE; + + if (len < (iv + SHA256_DIGEST_LENGTH + 1)) + return 0; + + /* omit explicit iv */ + out += iv; + len -= iv; + + /* figure out payload length */ + pad = out[len - 1]; + maxpad = len - (SHA256_DIGEST_LENGTH + 1); + maxpad |= (255 - maxpad) >> (sizeof(maxpad) * 8 - 8); + maxpad &= 255; + + inp_len = len - (SHA256_DIGEST_LENGTH + pad + 1); + mask = (0 - ((inp_len - len) >> (sizeof(inp_len) * 8 - 1))); + inp_len &= mask; + ret &= (int)mask; + + key->aux.tls_aad[plen - 2] = inp_len >> 8; + key->aux.tls_aad[plen - 1] = inp_len; + + /* calculate HMAC */ + key->md = key->head; + SHA256_Update(&key->md, key->aux.tls_aad, plen); + +# if 1 + len -= SHA256_DIGEST_LENGTH; /* amend mac */ + if (len >= (256 + SHA256_CBLOCK)) { + j = (len - (256 + SHA256_CBLOCK)) & (0 - SHA256_CBLOCK); + j += SHA256_CBLOCK - key->md.num; + SHA256_Update(&key->md, out, j); + out += j; + len -= j; + inp_len -= j; + } + + /* but pretend as if we hashed padded payload */ + bitlen = key->md.Nl + (inp_len << 3); /* at most 18 bits */ +# ifdef BSWAP4 + bitlen = BSWAP4(bitlen); +# else + mac.c[0] = 0; + mac.c[1] = (unsigned char)(bitlen >> 16); + mac.c[2] = (unsigned char)(bitlen >> 8); + mac.c[3] = (unsigned char)bitlen; + bitlen = mac.u[0]; +# endif + + pmac->u[0] = 0; + pmac->u[1] = 0; + pmac->u[2] = 0; + pmac->u[3] = 0; + pmac->u[4] = 0; + pmac->u[5] = 0; + pmac->u[6] = 0; + pmac->u[7] = 0; + + for (res = key->md.num, j = 0; j < len; j++) { + size_t c = out[j]; + mask = (j - inp_len) >> (sizeof(j) * 8 - 8); + c &= mask; + c |= 0x80 & ~mask & ~((inp_len - j) >> (sizeof(j) * 8 - 8)); + data->c[res++] = (unsigned char)c; + + if (res != SHA256_CBLOCK) + continue; + + /* j is not incremented yet */ + mask = 0 - ((inp_len + 7 - j) >> (sizeof(j) * 8 - 1)); + data->u[SHA_LBLOCK - 1] |= bitlen & mask; + sha256_block_data_order(&key->md, data, 1); + mask &= 0 - ((j - inp_len - 72) >> (sizeof(j) * 8 - 1)); + pmac->u[0] |= key->md.h[0] & mask; + pmac->u[1] |= key->md.h[1] & mask; + pmac->u[2] |= key->md.h[2] & mask; + pmac->u[3] |= key->md.h[3] & mask; + pmac->u[4] |= key->md.h[4] & mask; + pmac->u[5] |= key->md.h[5] & mask; + pmac->u[6] |= key->md.h[6] & mask; + pmac->u[7] |= key->md.h[7] & mask; + res = 0; + } + + for (i = res; i < SHA256_CBLOCK; i++, j++) + data->c[i] = 0; + + if (res > SHA256_CBLOCK - 8) { + mask = 0 - ((inp_len + 8 - j) >> (sizeof(j) * 8 - 1)); + data->u[SHA_LBLOCK - 1] |= bitlen & mask; + sha256_block_data_order(&key->md, data, 1); + mask &= 0 - ((j - inp_len - 73) >> (sizeof(j) * 8 - 1)); + pmac->u[0] |= key->md.h[0] & mask; + pmac->u[1] |= key->md.h[1] & mask; + pmac->u[2] |= key->md.h[2] & mask; + pmac->u[3] |= key->md.h[3] & mask; + pmac->u[4] |= key->md.h[4] & mask; + pmac->u[5] |= key->md.h[5] & mask; + pmac->u[6] |= key->md.h[6] & mask; + pmac->u[7] |= key->md.h[7] & mask; + + memset(data, 0, SHA256_CBLOCK); + j += 64; + } + data->u[SHA_LBLOCK - 1] = bitlen; + sha256_block_data_order(&key->md, data, 1); + mask = 0 - ((j - inp_len - 73) >> (sizeof(j) * 8 - 1)); + pmac->u[0] |= key->md.h[0] & mask; + pmac->u[1] |= key->md.h[1] & mask; + pmac->u[2] |= key->md.h[2] & mask; + pmac->u[3] |= key->md.h[3] & mask; + pmac->u[4] |= key->md.h[4] & mask; + pmac->u[5] |= key->md.h[5] & mask; + pmac->u[6] |= key->md.h[6] & mask; + pmac->u[7] |= key->md.h[7] & mask; + +# ifdef BSWAP4 + pmac->u[0] = BSWAP4(pmac->u[0]); + pmac->u[1] = BSWAP4(pmac->u[1]); + pmac->u[2] = BSWAP4(pmac->u[2]); + pmac->u[3] = BSWAP4(pmac->u[3]); + pmac->u[4] = BSWAP4(pmac->u[4]); + pmac->u[5] = BSWAP4(pmac->u[5]); + pmac->u[6] = BSWAP4(pmac->u[6]); + pmac->u[7] = BSWAP4(pmac->u[7]); +# else + for (i = 0; i < 8; i++) { + res = pmac->u[i]; + pmac->c[4 * i + 0] = (unsigned char)(res >> 24); + pmac->c[4 * i + 1] = (unsigned char)(res >> 16); + pmac->c[4 * i + 2] = (unsigned char)(res >> 8); + pmac->c[4 * i + 3] = (unsigned char)res; + } +# endif + len += SHA256_DIGEST_LENGTH; +# else + SHA256_Update(&key->md, out, inp_len); + res = key->md.num; + SHA256_Final(pmac->c, &key->md); + + { + unsigned int inp_blocks, pad_blocks; + + /* but pretend as if we hashed padded payload */ + inp_blocks = + 1 + ((SHA256_CBLOCK - 9 - res) >> (sizeof(res) * 8 - 1)); + res += (unsigned int)(len - inp_len); + pad_blocks = res / SHA256_CBLOCK; + res %= SHA256_CBLOCK; + pad_blocks += + 1 + ((SHA256_CBLOCK - 9 - res) >> (sizeof(res) * 8 - 1)); + for (; inp_blocks < pad_blocks; inp_blocks++) + sha1_block_data_order(&key->md, data, 1); + } +# endif + key->md = key->tail; + SHA256_Update(&key->md, pmac->c, SHA256_DIGEST_LENGTH); + SHA256_Final(pmac->c, &key->md); + + /* verify HMAC */ + out += inp_len; + len -= inp_len; +# if 1 + { + unsigned char *p = + out + len - 1 - maxpad - SHA256_DIGEST_LENGTH; + size_t off = out - p; + unsigned int c, cmask; + + maxpad += SHA256_DIGEST_LENGTH; + for (res = 0, i = 0, j = 0; j < maxpad; j++) { + c = p[j]; + cmask = + ((int)(j - off - SHA256_DIGEST_LENGTH)) >> + (sizeof(int) * 8 - 1); + res |= (c ^ pad) & ~cmask; /* ... and padding */ + cmask &= ((int)(off - 1 - j)) >> (sizeof(int) * 8 - 1); + res |= (c ^ pmac->c[i]) & cmask; + i += 1 & cmask; + } + maxpad -= SHA256_DIGEST_LENGTH; + + res = 0 - ((0 - res) >> (sizeof(res) * 8 - 1)); + ret &= (int)~res; + } +# else + for (res = 0, i = 0; i < SHA256_DIGEST_LENGTH; i++) + res |= out[i] ^ pmac->c[i]; + res = 0 - ((0 - res) >> (sizeof(res) * 8 - 1)); + ret &= (int)~res; + + /* verify padding */ + pad = (pad & ~res) | (maxpad & res); + out = out + len - 1 - pad; + for (res = 0, i = 0; i < pad; i++) + res |= out[i] ^ pad; + + res = (0 - res) >> (sizeof(res) * 8 - 1); + ret &= (int)~res; +# endif + return ret; + } else { + SHA256_Update(&key->md, out, len); + } + } + + return 1; +} + +static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, + void *ptr) +{ + EVP_AES_HMAC_SHA256 *key = data(ctx); + + switch (type) { + case EVP_CTRL_AEAD_SET_MAC_KEY: + { + unsigned int i; + unsigned char hmac_key[64]; + + memset(hmac_key, 0, sizeof(hmac_key)); + + if (arg > (int)sizeof(hmac_key)) { + SHA256_Init(&key->head); + SHA256_Update(&key->head, ptr, arg); + SHA256_Final(hmac_key, &key->head); + } else { + memcpy(hmac_key, ptr, arg); + } + + for (i = 0; i < sizeof(hmac_key); i++) + hmac_key[i] ^= 0x36; /* ipad */ + SHA256_Init(&key->head); + SHA256_Update(&key->head, hmac_key, sizeof(hmac_key)); + + for (i = 0; i < sizeof(hmac_key); i++) + hmac_key[i] ^= 0x36 ^ 0x5c; /* opad */ + SHA256_Init(&key->tail); + SHA256_Update(&key->tail, hmac_key, sizeof(hmac_key)); + + OPENSSL_cleanse(hmac_key, sizeof(hmac_key)); + + return 1; + } + case EVP_CTRL_AEAD_TLS1_AAD: + { + unsigned char *p = ptr; + unsigned int len = p[arg - 2] << 8 | p[arg - 1]; + + if (arg != EVP_AEAD_TLS1_AAD_LEN) + return -1; + + if (ctx->encrypt) { + key->payload_length = len; + if ((key->aux.tls_ver = + p[arg - 4] << 8 | p[arg - 3]) >= TLS1_1_VERSION) { + len -= AES_BLOCK_SIZE; + p[arg - 2] = len >> 8; + p[arg - 1] = len; + } + key->md = key->head; + SHA256_Update(&key->md, p, arg); + + return (int)(((len + SHA256_DIGEST_LENGTH + + AES_BLOCK_SIZE) & -AES_BLOCK_SIZE) + - len); + } else { + memcpy(key->aux.tls_aad, ptr, arg); + key->payload_length = arg; + + return SHA256_DIGEST_LENGTH; + } + } +# if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE: + return (int)(5 + 16 + ((arg + 32 + 16) & -16)); + case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD: + { + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr; + unsigned int n4x = 1, x4; + unsigned int frag, last, packlen, inp_len; + + if (arg < (int)sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM)) + return -1; + + inp_len = param->inp[11] << 8 | param->inp[12]; + + if (ctx->encrypt) { + if ((param->inp[9] << 8 | param->inp[10]) < TLS1_1_VERSION) + return -1; + + if (inp_len) { + if (inp_len < 4096) + return 0; /* too short */ + + if (inp_len >= 8192 && OPENSSL_ia32cap_P[2] & (1 << 5)) + n4x = 2; /* AVX2 */ + } else if ((n4x = param->interleave / 4) && n4x <= 2) + inp_len = param->len; + else + return -1; + + key->md = key->head; + SHA256_Update(&key->md, param->inp, 13); + + x4 = 4 * n4x; + n4x += 1; + + frag = inp_len >> n4x; + last = inp_len + frag - (frag << n4x); + if (last > frag && ((last + 13 + 9) % 64 < (x4 - 1))) { + frag++; + last -= x4 - 1; + } + + packlen = 5 + 16 + ((frag + 32 + 16) & -16); + packlen = (packlen << n4x) - packlen; + packlen += 5 + 16 + ((last + 32 + 16) & -16); + + param->interleave = x4; + + return (int)packlen; + } else + return -1; /* not yet */ + } + case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT: + { + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr; + + return (int)tls1_1_multi_block_encrypt(key, param->out, + param->inp, param->len, + param->interleave / 4); + } + case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT: +# endif + default: + return -1; + } +} + +static EVP_CIPHER aesni_128_cbc_hmac_sha256_cipher = { +# ifdef NID_aes_128_cbc_hmac_sha256 + NID_aes_128_cbc_hmac_sha256, +# else + NID_undef, +# endif + 16, 16, 16, + EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 | + EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, + aesni_cbc_hmac_sha256_init_key, + aesni_cbc_hmac_sha256_cipher, + NULL, + sizeof(EVP_AES_HMAC_SHA256), + EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_set_asn1_iv, + EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_get_asn1_iv, + aesni_cbc_hmac_sha256_ctrl, + NULL +}; + +static EVP_CIPHER aesni_256_cbc_hmac_sha256_cipher = { +# ifdef NID_aes_256_cbc_hmac_sha256 + NID_aes_256_cbc_hmac_sha256, +# else + NID_undef, +# endif + 16, 32, 16, + EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 | + EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, + aesni_cbc_hmac_sha256_init_key, + aesni_cbc_hmac_sha256_cipher, + NULL, + sizeof(EVP_AES_HMAC_SHA256), + EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_set_asn1_iv, + EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_get_asn1_iv, + aesni_cbc_hmac_sha256_ctrl, + NULL +}; + +const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void) +{ + return ((OPENSSL_ia32cap_P[1] & AESNI_CAPABLE) && + aesni_cbc_sha256_enc(NULL, NULL, 0, NULL, NULL, NULL, NULL) ? + &aesni_128_cbc_hmac_sha256_cipher : NULL); +} + +const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void) +{ + return ((OPENSSL_ia32cap_P[1] & AESNI_CAPABLE) && + aesni_cbc_sha256_enc(NULL, NULL, 0, NULL, NULL, NULL, NULL) ? + &aesni_256_cbc_hmac_sha256_cipher : NULL); +} +# else +const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void) +{ + return NULL; +} + +const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void) +{ + return NULL; +} +# endif +#endif diff --git a/drivers/builtin_openssl2/crypto/evp/e_camellia.c b/drivers/builtin_openssl2/crypto/evp/e_camellia.c index f7b135dae8..f273f9c947 100644 --- a/drivers/builtin_openssl2/crypto/evp/e_camellia.c +++ b/drivers/builtin_openssl2/crypto/evp/e_camellia.c @@ -61,6 +61,7 @@ # include # include # include "evp_locl.h" +# include "modes_lcl.h" static int camellia_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc); @@ -68,48 +69,322 @@ static int camellia_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, /* Camellia subkey Structure */ typedef struct { CAMELLIA_KEY ks; + block128_f block; + union { + cbc128_f cbc; + ctr128_f ctr; + } stream; } EVP_CAMELLIA_KEY; +# define MAXBITCHUNK ((size_t)1<<(sizeof(size_t)*8-4)) + /* Attribute operation for Camellia */ # define data(ctx) EVP_C_DATA(EVP_CAMELLIA_KEY,ctx) -IMPLEMENT_BLOCK_CIPHER(camellia_128, ks, Camellia, EVP_CAMELLIA_KEY, - NID_camellia_128, 16, 16, 16, 128, - 0, camellia_init_key, NULL, - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, NULL) - IMPLEMENT_BLOCK_CIPHER(camellia_192, ks, Camellia, EVP_CAMELLIA_KEY, - NID_camellia_192, 16, 24, 16, 128, - 0, camellia_init_key, NULL, - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, NULL) - IMPLEMENT_BLOCK_CIPHER(camellia_256, ks, Camellia, EVP_CAMELLIA_KEY, - NID_camellia_256, 16, 32, 16, 128, - 0, camellia_init_key, NULL, - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, NULL) -# define IMPLEMENT_CAMELLIA_CFBR(ksize,cbits) IMPLEMENT_CFBR(camellia,Camellia,EVP_CAMELLIA_KEY,ks,ksize,cbits,16) - IMPLEMENT_CAMELLIA_CFBR(128, 1) - IMPLEMENT_CAMELLIA_CFBR(192, 1) - IMPLEMENT_CAMELLIA_CFBR(256, 1) - - IMPLEMENT_CAMELLIA_CFBR(128, 8) - IMPLEMENT_CAMELLIA_CFBR(192, 8) - IMPLEMENT_CAMELLIA_CFBR(256, 8) +# if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) +/* ---------^^^ this is not a typo, just a way to detect that + * assembler support was in general requested... */ +# include "sparc_arch.h" + +extern unsigned int OPENSSL_sparcv9cap_P[]; + +# define SPARC_CMLL_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_CAMELLIA) + +void cmll_t4_set_key(const unsigned char *key, int bits, CAMELLIA_KEY *ks); +void cmll_t4_encrypt(const unsigned char *in, unsigned char *out, + const CAMELLIA_KEY *key); +void cmll_t4_decrypt(const unsigned char *in, unsigned char *out, + const CAMELLIA_KEY *key); + +void cmll128_t4_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const CAMELLIA_KEY *key, + unsigned char *ivec); +void cmll128_t4_cbc_decrypt(const unsigned char *in, unsigned char *out, + size_t len, const CAMELLIA_KEY *key, + unsigned char *ivec); +void cmll256_t4_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const CAMELLIA_KEY *key, + unsigned char *ivec); +void cmll256_t4_cbc_decrypt(const unsigned char *in, unsigned char *out, + size_t len, const CAMELLIA_KEY *key, + unsigned char *ivec); +void cmll128_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const CAMELLIA_KEY *key, + unsigned char *ivec); +void cmll256_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const CAMELLIA_KEY *key, + unsigned char *ivec); + +static int cmll_t4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ + int ret, mode, bits; + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + mode = ctx->cipher->flags & EVP_CIPH_MODE; + bits = ctx->key_len * 8; + + cmll_t4_set_key(key, bits, &dat->ks); + + if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) + && !enc) { + ret = 0; + dat->block = (block128_f) cmll_t4_decrypt; + switch (bits) { + case 128: + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) cmll128_t4_cbc_decrypt : NULL; + break; + case 192: + case 256: + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) cmll256_t4_cbc_decrypt : NULL; + break; + default: + ret = -1; + } + } else { + ret = 0; + dat->block = (block128_f) cmll_t4_encrypt; + switch (bits) { + case 128: + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) cmll128_t4_cbc_encrypt; + else if (mode == EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f) cmll128_t4_ctr32_encrypt; + else + dat->stream.cbc = NULL; + break; + case 192: + case 256: + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) cmll256_t4_cbc_encrypt; + else if (mode == EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f) cmll256_t4_ctr32_encrypt; + else + dat->stream.cbc = NULL; + break; + default: + ret = -1; + } + } + + if (ret < 0) { + EVPerr(EVP_F_CMLL_T4_INIT_KEY, EVP_R_CAMELLIA_KEY_SETUP_FAILED); + return 0; + } + + return 1; +} + +# define cmll_t4_cbc_cipher camellia_cbc_cipher +static int cmll_t4_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define cmll_t4_ecb_cipher camellia_ecb_cipher +static int cmll_t4_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); +# define cmll_t4_ofb_cipher camellia_ofb_cipher +static int cmll_t4_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define cmll_t4_cfb_cipher camellia_cfb_cipher +static int cmll_t4_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define cmll_t4_cfb8_cipher camellia_cfb8_cipher +static int cmll_t4_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define cmll_t4_cfb1_cipher camellia_cfb1_cipher +static int cmll_t4_cfb1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define cmll_t4_ctr_cipher camellia_ctr_cipher +static int cmll_t4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ +static const EVP_CIPHER cmll_t4_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + cmll_t4_init_key, \ + cmll_t4_##mode##_cipher, \ + NULL, \ + sizeof(EVP_CAMELLIA_KEY), \ + NULL,NULL,NULL,NULL }; \ +static const EVP_CIPHER camellia_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize, \ + keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + camellia_init_key, \ + camellia_##mode##_cipher, \ + NULL, \ + sizeof(EVP_CAMELLIA_KEY), \ + NULL,NULL,NULL,NULL }; \ +const EVP_CIPHER *EVP_camellia_##keylen##_##mode(void) \ +{ return SPARC_CMLL_CAPABLE?&cmll_t4_##keylen##_##mode:&camellia_##keylen##_##mode; } + +# else + +# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ +static const EVP_CIPHER camellia_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + camellia_init_key, \ + camellia_##mode##_cipher, \ + NULL, \ + sizeof(EVP_CAMELLIA_KEY), \ + NULL,NULL,NULL,NULL }; \ +const EVP_CIPHER *EVP_camellia_##keylen##_##mode(void) \ +{ return &camellia_##keylen##_##mode; } + +# endif + +# define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ + BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ + BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ + BLOCK_CIPHER_generic(nid,keylen,1,16,ofb128,ofb,OFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ + BLOCK_CIPHER_generic(nid,keylen,1,16,cfb128,cfb,CFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ + BLOCK_CIPHER_generic(nid,keylen,1,16,cfb1,cfb1,CFB,flags) \ + BLOCK_CIPHER_generic(nid,keylen,1,16,cfb8,cfb8,CFB,flags) +# if 0 /* not yet, missing NID */ +BLOCK_CIPHER_generic(nid, keylen, 1, 16, ctr, ctr, CTR, flags) +# endif /* The subkey for Camellia is generated. */ static int camellia_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { - int ret; - - ret = Camellia_set_key(key, ctx->key_len * 8, ctx->cipher_data); + int ret, mode; + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + ret = Camellia_set_key(key, ctx->key_len * 8, &dat->ks); if (ret < 0) { EVPerr(EVP_F_CAMELLIA_INIT_KEY, EVP_R_CAMELLIA_KEY_SETUP_FAILED); return 0; } + mode = ctx->cipher->flags & EVP_CIPH_MODE; + if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) + && !enc) { + dat->block = (block128_f) Camellia_decrypt; + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) Camellia_cbc_encrypt : NULL; + } else { + dat->block = (block128_f) Camellia_encrypt; + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) Camellia_cbc_encrypt : NULL; + } + return 1; } +static int camellia_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + if (dat->stream.cbc) + (*dat->stream.cbc) (in, out, len, &dat->ks, ctx->iv, ctx->encrypt); + else if (ctx->encrypt) + CRYPTO_cbc128_encrypt(in, out, len, &dat->ks, ctx->iv, dat->block); + else + CRYPTO_cbc128_decrypt(in, out, len, &dat->ks, ctx->iv, dat->block); + + return 1; +} + +static int camellia_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + size_t bl = ctx->cipher->block_size; + size_t i; + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + if (len < bl) + return 1; + + for (i = 0, len -= bl; i <= len; i += bl) + (*dat->block) (in + i, out + i, &dat->ks); + + return 1; +} + +static int camellia_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + CRYPTO_ofb128_encrypt(in, out, len, &dat->ks, + ctx->iv, &ctx->num, dat->block); + return 1; +} + +static int camellia_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + CRYPTO_cfb128_encrypt(in, out, len, &dat->ks, + ctx->iv, &ctx->num, ctx->encrypt, dat->block); + return 1; +} + +static int camellia_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + CRYPTO_cfb128_8_encrypt(in, out, len, &dat->ks, + ctx->iv, &ctx->num, ctx->encrypt, dat->block); + return 1; +} + +static int camellia_cfb1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + if (ctx->flags & EVP_CIPH_FLAG_LENGTH_BITS) { + CRYPTO_cfb128_1_encrypt(in, out, len, &dat->ks, + ctx->iv, &ctx->num, ctx->encrypt, dat->block); + return 1; + } + + while (len >= MAXBITCHUNK) { + CRYPTO_cfb128_1_encrypt(in, out, MAXBITCHUNK * 8, &dat->ks, + ctx->iv, &ctx->num, ctx->encrypt, dat->block); + len -= MAXBITCHUNK; + } + if (len) + CRYPTO_cfb128_1_encrypt(in, out, len * 8, &dat->ks, + ctx->iv, &ctx->num, ctx->encrypt, dat->block); + + return 1; +} + +# if 0 /* not yet, missing NID */ +static int camellia_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + unsigned int num = ctx->num; + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + if (dat->stream.ctr) + CRYPTO_ctr128_encrypt_ctr32(in, out, len, &dat->ks, + ctx->iv, ctx->buf, &num, dat->stream.ctr); + else + CRYPTO_ctr128_encrypt(in, out, len, &dat->ks, + ctx->iv, ctx->buf, &num, dat->block); + ctx->num = (size_t)num; + return 1; +} +# endif + +BLOCK_CIPHER_generic_pack(NID_camellia, 128, 0) + BLOCK_CIPHER_generic_pack(NID_camellia, 192, 0) + BLOCK_CIPHER_generic_pack(NID_camellia, 256, 0) #else # ifdef PEDANTIC diff --git a/drivers/builtin_openssl2/crypto/evp/e_des.c b/drivers/builtin_openssl2/crypto/evp/e_des.c index ea1a4c4228..8ca65cd03a 100644 --- a/drivers/builtin_openssl2/crypto/evp/e_des.c +++ b/drivers/builtin_openssl2/crypto/evp/e_des.c @@ -65,6 +65,33 @@ # include # include +typedef struct { + union { + double align; + DES_key_schedule ks; + } ks; + union { + void (*cbc) (const void *, void *, size_t, + const DES_key_schedule *, unsigned char *); + } stream; +} EVP_DES_KEY; + +# if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) +/* ----------^^^ this is not a typo, just a way to detect that + * assembler support was in general requested... */ +# include "sparc_arch.h" + +extern unsigned int OPENSSL_sparcv9cap_P[]; + +# define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES) + +void des_t4_key_expand(const void *key, DES_key_schedule *ks); +void des_t4_cbc_encrypt(const void *inp, void *out, size_t len, + const DES_key_schedule *ks, unsigned char iv[8]); +void des_t4_cbc_decrypt(const void *inp, void *out, size_t len, + const DES_key_schedule *ks, unsigned char iv[8]); +# endif + static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc); static int des_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr); @@ -102,6 +129,12 @@ static int des_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t inl) { + EVP_DES_KEY *dat = (EVP_DES_KEY *) ctx->cipher_data; + + if (dat->stream.cbc != NULL) { + (*dat->stream.cbc) (in, out, inl, &dat->ks.ks, ctx->iv); + return 1; + } while (inl >= EVP_MAXCHUNK) { DES_ncbc_encrypt(in, out, (long)EVP_MAXCHUNK, ctx->cipher_data, (DES_cblock *)ctx->iv, ctx->encrypt); @@ -179,16 +212,15 @@ static int des_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, return 1; } -BLOCK_CIPHER_defs(des, DES_key_schedule, NID_des, 8, 8, 8, 64, +BLOCK_CIPHER_defs(des, EVP_DES_KEY, NID_des, 8, 8, 8, 64, EVP_CIPH_RAND_KEY, des_init_key, NULL, EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) + BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 1, + EVP_CIPH_RAND_KEY, des_init_key, NULL, + EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) -BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 1, - EVP_CIPH_RAND_KEY, des_init_key, NULL, - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) - -BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 8, + BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 8, EVP_CIPH_RAND_KEY, des_init_key, NULL, EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) @@ -196,8 +228,22 @@ static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { DES_cblock *deskey = (DES_cblock *)key; + EVP_DES_KEY *dat = (EVP_DES_KEY *) ctx->cipher_data; + + dat->stream.cbc = NULL; +# if defined(SPARC_DES_CAPABLE) + if (SPARC_DES_CAPABLE) { + int mode = ctx->cipher->flags & EVP_CIPH_MODE; + + if (mode == EVP_CIPH_CBC_MODE) { + des_t4_key_expand(key, &dat->ks.ks); + dat->stream.cbc = enc ? des_t4_cbc_encrypt : des_t4_cbc_decrypt; + return 1; + } + } +# endif # ifdef EVP_CHECK_DES_KEY - if (DES_set_key_checked(deskey, ctx->cipher_data) != 0) + if (DES_set_key_checked(deskey, dat->ks.ks) != 0) return 0; # else DES_set_key_unchecked(deskey, ctx->cipher_data); diff --git a/drivers/builtin_openssl2/crypto/evp/e_des3.c b/drivers/builtin_openssl2/crypto/evp/e_des3.c index 1272305799..0e910d6d80 100644 --- a/drivers/builtin_openssl2/crypto/evp/e_des3.c +++ b/drivers/builtin_openssl2/crypto/evp/e_des3.c @@ -65,7 +65,39 @@ # include # include -# ifndef OPENSSL_FIPS +/* Block use of implementations in FIPS mode */ +# undef EVP_CIPH_FLAG_FIPS +# define EVP_CIPH_FLAG_FIPS 0 + +typedef struct { + union { + double align; + DES_key_schedule ks[3]; + } ks; + union { + void (*cbc) (const void *, void *, size_t, + const DES_key_schedule *, unsigned char *); + } stream; +} DES_EDE_KEY; +# define ks1 ks.ks[0] +# define ks2 ks.ks[1] +# define ks3 ks.ks[2] + +# if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) +/* ---------^^^ this is not a typo, just a way to detect that + * assembler support was in general requested... */ +# include "sparc_arch.h" + +extern unsigned int OPENSSL_sparcv9cap_P[]; + +# define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES) + +void des_t4_key_expand(const void *key, DES_key_schedule *ks); +void des_t4_ede3_cbc_encrypt(const void *inp, void *out, size_t len, + const DES_key_schedule ks[3], unsigned char iv[8]); +void des_t4_ede3_cbc_decrypt(const void *inp, void *out, size_t len, + const DES_key_schedule ks[3], unsigned char iv[8]); +# endif static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc); @@ -75,13 +107,7 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, static int des3_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr); -typedef struct { - DES_key_schedule ks1; /* key schedule */ - DES_key_schedule ks2; /* key schedule (for ede) */ - DES_key_schedule ks3; /* key schedule (for ede3) */ -} DES_EDE_KEY; - -# define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data) +# define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data) /* * Because of various casts and different args can't use @@ -123,7 +149,9 @@ static int des_ede_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t inl) { -# ifdef KSSL_DEBUG + DES_EDE_KEY *dat = data(ctx); + +# ifdef KSSL_DEBUG { int i; fprintf(stderr, "des_ede_cbc_cipher(ctx=%p, buflen=%d)\n", ctx, @@ -133,21 +161,24 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, fprintf(stderr, "%02X", ctx->iv[i]); fprintf(stderr, "\n"); } -# endif /* KSSL_DEBUG */ +# endif /* KSSL_DEBUG */ + if (dat->stream.cbc) { + (*dat->stream.cbc) (in, out, inl, dat->ks.ks, ctx->iv); + return 1; + } + while (inl >= EVP_MAXCHUNK) { DES_ede3_cbc_encrypt(in, out, (long)EVP_MAXCHUNK, - &data(ctx)->ks1, &data(ctx)->ks2, - &data(ctx)->ks3, (DES_cblock *)ctx->iv, - ctx->encrypt); + &dat->ks1, &dat->ks2, &dat->ks3, + (DES_cblock *)ctx->iv, ctx->encrypt); inl -= EVP_MAXCHUNK; in += EVP_MAXCHUNK; out += EVP_MAXCHUNK; } if (inl) DES_ede3_cbc_encrypt(in, out, (long)inl, - &data(ctx)->ks1, &data(ctx)->ks2, - &data(ctx)->ks3, (DES_cblock *)ctx->iv, - ctx->encrypt); + &dat->ks1, &dat->ks2, &dat->ks3, + (DES_cblock *)ctx->iv, ctx->encrypt); return 1; } @@ -215,39 +246,57 @@ static int des_ede3_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, } BLOCK_CIPHER_defs(des_ede, DES_EDE_KEY, NID_des_ede, 8, 16, 8, 64, - EVP_CIPH_RAND_KEY, des_ede_init_key, NULL, - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl) -# define des_ede3_cfb64_cipher des_ede_cfb64_cipher -# define des_ede3_ofb_cipher des_ede_ofb_cipher -# define des_ede3_cbc_cipher des_ede_cbc_cipher -# define des_ede3_ecb_cipher des_ede_ecb_cipher + EVP_CIPH_RAND_KEY | EVP_CIPH_FLAG_DEFAULT_ASN1, + des_ede_init_key, NULL, NULL, NULL, des3_ctrl) +# define des_ede3_cfb64_cipher des_ede_cfb64_cipher +# define des_ede3_ofb_cipher des_ede_ofb_cipher +# define des_ede3_cbc_cipher des_ede_cbc_cipher +# define des_ede3_ecb_cipher des_ede_ecb_cipher BLOCK_CIPHER_defs(des_ede3, DES_EDE_KEY, NID_des_ede3, 8, 24, 8, 64, - EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl) + EVP_CIPH_RAND_KEY | EVP_CIPH_FLAG_FIPS | + EVP_CIPH_FLAG_DEFAULT_ASN1, des_ede3_init_key, NULL, NULL, NULL, + des3_ctrl) BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 1, - EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, - EVP_CIPHER_set_asn1_iv, - EVP_CIPHER_get_asn1_iv, des3_ctrl) + EVP_CIPH_RAND_KEY | EVP_CIPH_FLAG_FIPS | + EVP_CIPH_FLAG_DEFAULT_ASN1, des_ede3_init_key, NULL, NULL, + NULL, des3_ctrl) BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 8, - EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, - EVP_CIPHER_set_asn1_iv, - EVP_CIPHER_get_asn1_iv, des3_ctrl) + EVP_CIPH_RAND_KEY | EVP_CIPH_FLAG_FIPS | + EVP_CIPH_FLAG_DEFAULT_ASN1, des_ede3_init_key, NULL, NULL, + NULL, des3_ctrl) static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { DES_cblock *deskey = (DES_cblock *)key; -# ifdef EVP_CHECK_DES_KEY - if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1) - || DES_set_key_checked(&deskey[1], &data(ctx)->ks2)) + DES_EDE_KEY *dat = data(ctx); + + dat->stream.cbc = NULL; +# if defined(SPARC_DES_CAPABLE) + if (SPARC_DES_CAPABLE) { + int mode = ctx->cipher->flags & EVP_CIPH_MODE; + + if (mode == EVP_CIPH_CBC_MODE) { + des_t4_key_expand(&deskey[0], &dat->ks1); + des_t4_key_expand(&deskey[1], &dat->ks2); + memcpy(&dat->ks3, &dat->ks1, sizeof(dat->ks1)); + dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt : + des_t4_ede3_cbc_decrypt; + return 1; + } + } +# endif +# ifdef EVP_CHECK_DES_KEY + if (DES_set_key_checked(&deskey[0], &dat->ks1) + || DES_set_key_checked(&deskey[1], &dat->ks2)) return 0; -# else - DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1); - DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2); -# endif - memcpy(&data(ctx)->ks3, &data(ctx)->ks1, sizeof(data(ctx)->ks1)); +# else + DES_set_key_unchecked(&deskey[0], &dat->ks1); + DES_set_key_unchecked(&deskey[1], &dat->ks2); +# endif + memcpy(&dat->ks3, &dat->ks1, sizeof(dat->ks1)); return 1; } @@ -255,7 +304,9 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { DES_cblock *deskey = (DES_cblock *)key; -# ifdef KSSL_DEBUG + DES_EDE_KEY *dat = data(ctx); + +# ifdef KSSL_DEBUG { int i; fprintf(stderr, "des_ede3_init_key(ctx=%p)\n", ctx); @@ -270,18 +321,33 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, fprintf(stderr, "\n"); } } -# endif /* KSSL_DEBUG */ +# endif /* KSSL_DEBUG */ + + dat->stream.cbc = NULL; +# if defined(SPARC_DES_CAPABLE) + if (SPARC_DES_CAPABLE) { + int mode = ctx->cipher->flags & EVP_CIPH_MODE; -# ifdef EVP_CHECK_DES_KEY - if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1) - || DES_set_key_checked(&deskey[1], &data(ctx)->ks2) - || DES_set_key_checked(&deskey[2], &data(ctx)->ks3)) + if (mode == EVP_CIPH_CBC_MODE) { + des_t4_key_expand(&deskey[0], &dat->ks1); + des_t4_key_expand(&deskey[1], &dat->ks2); + des_t4_key_expand(&deskey[2], &dat->ks3); + dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt : + des_t4_ede3_cbc_decrypt; + return 1; + } + } +# endif +# ifdef EVP_CHECK_DES_KEY + if (DES_set_key_checked(&deskey[0], &dat->ks1) + || DES_set_key_checked(&deskey[1], &dat->ks2) + || DES_set_key_checked(&deskey[2], &dat->ks3)) return 0; -# else - DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1); - DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2); - DES_set_key_unchecked(&deskey[2], &data(ctx)->ks3); -# endif +# else + DES_set_key_unchecked(&deskey[0], &dat->ks1); + DES_set_key_unchecked(&deskey[1], &dat->ks2); + DES_set_key_unchecked(&deskey[2], &dat->ks3); +# endif return 1; } @@ -315,5 +381,115 @@ const EVP_CIPHER *EVP_des_ede3(void) { return &des_ede3_ecb; } + +# ifndef OPENSSL_NO_SHA + +# include + +static const unsigned char wrap_iv[8] = + { 0x4a, 0xdd, 0xa2, 0x2c, 0x79, 0xe8, 0x21, 0x05 }; + +static int des_ede3_unwrap(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t inl) +{ + unsigned char icv[8], iv[8], sha1tmp[SHA_DIGEST_LENGTH]; + int rv = -1; + if (inl < 24) + return -1; + if (out == NULL) + return inl - 16; + memcpy(ctx->iv, wrap_iv, 8); + /* Decrypt first block which will end up as icv */ + des_ede_cbc_cipher(ctx, icv, in, 8); + /* Decrypt central blocks */ + /* + * If decrypting in place move whole output along a block so the next + * des_ede_cbc_cipher is in place. + */ + if (out == in) { + memmove(out, out + 8, inl - 8); + in -= 8; + } + des_ede_cbc_cipher(ctx, out, in + 8, inl - 16); + /* Decrypt final block which will be IV */ + des_ede_cbc_cipher(ctx, iv, in + inl - 8, 8); + /* Reverse order of everything */ + BUF_reverse(icv, NULL, 8); + BUF_reverse(out, NULL, inl - 16); + BUF_reverse(ctx->iv, iv, 8); + /* Decrypt again using new IV */ + des_ede_cbc_cipher(ctx, out, out, inl - 16); + des_ede_cbc_cipher(ctx, icv, icv, 8); + /* Work out SHA1 hash of first portion */ + SHA1(out, inl - 16, sha1tmp); + + if (!CRYPTO_memcmp(sha1tmp, icv, 8)) + rv = inl - 16; + OPENSSL_cleanse(icv, 8); + OPENSSL_cleanse(sha1tmp, SHA_DIGEST_LENGTH); + OPENSSL_cleanse(iv, 8); + OPENSSL_cleanse(ctx->iv, 8); + if (rv == -1) + OPENSSL_cleanse(out, inl - 16); + + return rv; +} + +static int des_ede3_wrap(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t inl) +{ + unsigned char sha1tmp[SHA_DIGEST_LENGTH]; + if (out == NULL) + return inl + 16; + /* Copy input to output buffer + 8 so we have space for IV */ + memmove(out + 8, in, inl); + /* Work out ICV */ + SHA1(in, inl, sha1tmp); + memcpy(out + inl + 8, sha1tmp, 8); + OPENSSL_cleanse(sha1tmp, SHA_DIGEST_LENGTH); + /* Generate random IV */ + if (RAND_bytes(ctx->iv, 8) <= 0) + return -1; + memcpy(out, ctx->iv, 8); + /* Encrypt everything after IV in place */ + des_ede_cbc_cipher(ctx, out + 8, out + 8, inl + 8); + BUF_reverse(out, NULL, inl + 16); + memcpy(ctx->iv, wrap_iv, 8); + des_ede_cbc_cipher(ctx, out, out, inl + 16); + return inl + 16; +} + +static int des_ede3_wrap_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t inl) +{ + /* + * Sanity check input length: we typically only wrap keys so EVP_MAXCHUNK + * is more than will ever be needed. Also input length must be a multiple + * of 8 bits. + */ + if (inl >= EVP_MAXCHUNK || inl % 8) + return -1; + if (ctx->encrypt) + return des_ede3_wrap(ctx, out, in, inl); + else + return des_ede3_unwrap(ctx, out, in, inl); +} + +static const EVP_CIPHER des3_wrap = { + NID_id_smime_alg_CMS3DESwrap, + 8, 24, 0, + EVP_CIPH_WRAP_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER + | EVP_CIPH_FLAG_DEFAULT_ASN1, + des_ede3_init_key, des_ede3_wrap_cipher, + NULL, + sizeof(DES_EDE_KEY), + NULL, NULL, NULL, NULL +}; + +const EVP_CIPHER *EVP_des_ede3_wrap(void) +{ + return &des3_wrap; +} + # endif #endif diff --git a/drivers/builtin_openssl2/crypto/evp/e_null.c b/drivers/builtin_openssl2/crypto/evp/e_null.c index af90ce326b..599fcb808d 100644 --- a/drivers/builtin_openssl2/crypto/evp/e_null.c +++ b/drivers/builtin_openssl2/crypto/evp/e_null.c @@ -61,8 +61,6 @@ #include #include -#ifndef OPENSSL_FIPS - static int null_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc); static int null_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, @@ -100,4 +98,3 @@ static int null_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, memcpy((char *)out, (const char *)in, inl); return 1; } -#endif diff --git a/drivers/builtin_openssl2/crypto/evp/evp_enc.c b/drivers/builtin_openssl2/crypto/evp/evp_enc.c index 4e983c4bda..65f0e0244d 100644 --- a/drivers/builtin_openssl2/crypto/evp/evp_enc.c +++ b/drivers/builtin_openssl2/crypto/evp/evp_enc.c @@ -169,8 +169,14 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, #endif #ifdef OPENSSL_FIPS - if (FIPS_mode()) + if (FIPS_mode()) { + const EVP_CIPHER *fcipher; + if (cipher) + fcipher = evp_get_fips_cipher(cipher); + if (fcipher) + cipher = fcipher; return FIPS_cipherinit(ctx, cipher, key, iv, enc); + } #endif ctx->cipher = cipher; if (ctx->cipher->ctx_size) { @@ -183,7 +189,8 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ctx->cipher_data = NULL; } ctx->key_len = cipher->key_len; - ctx->flags = 0; + /* Preserve wrap enable flag, zero everything else */ + ctx->flags &= EVP_CIPHER_CTX_FLAG_WRAP_ALLOW; if (ctx->cipher->flags & EVP_CIPH_CTRL_INIT) { if (!EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_INIT, 0, NULL)) { EVPerr(EVP_F_EVP_CIPHERINIT_EX, EVP_R_INITIALIZATION_ERROR); @@ -206,6 +213,12 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, || ctx->cipher->block_size == 8 || ctx->cipher->block_size == 16); + if (!(ctx->flags & EVP_CIPHER_CTX_FLAG_WRAP_ALLOW) + && EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_WRAP_MODE) { + EVPerr(EVP_F_EVP_CIPHERINIT_EX, EVP_R_WRAP_MODE_NOT_ALLOWED); + return 0; + } + if (!(EVP_CIPHER_CTX_flags(ctx) & EVP_CIPH_CUSTOM_IV)) { switch (EVP_CIPHER_CTX_mode(ctx)) { diff --git a/drivers/builtin_openssl2/crypto/evp/evp_err.c b/drivers/builtin_openssl2/crypto/evp/evp_err.c index 686a69958d..15cf5532b3 100644 --- a/drivers/builtin_openssl2/crypto/evp/evp_err.c +++ b/drivers/builtin_openssl2/crypto/evp/evp_err.c @@ -1,6 +1,6 @@ /* crypto/evp/evp_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2013 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -73,11 +73,13 @@ static ERR_STRING_DATA EVP_str_functs[] = { {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, {ERR_FUNC(EVP_F_AESNI_XTS_CIPHER), "AESNI_XTS_CIPHER"}, {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"}, + {ERR_FUNC(EVP_F_AES_T4_INIT_KEY), "AES_T4_INIT_KEY"}, {ERR_FUNC(EVP_F_AES_XTS), "AES_XTS"}, {ERR_FUNC(EVP_F_AES_XTS_CIPHER), "AES_XTS_CIPHER"}, {ERR_FUNC(EVP_F_ALG_MODULE_INIT), "ALG_MODULE_INIT"}, {ERR_FUNC(EVP_F_CAMELLIA_INIT_KEY), "CAMELLIA_INIT_KEY"}, {ERR_FUNC(EVP_F_CMAC_INIT), "CMAC_INIT"}, + {ERR_FUNC(EVP_F_CMLL_T4_INIT_KEY), "CMLL_T4_INIT_KEY"}, {ERR_FUNC(EVP_F_D2I_PKEY), "D2I_PKEY"}, {ERR_FUNC(EVP_F_DO_SIGVER_INIT), "DO_SIGVER_INIT"}, {ERR_FUNC(EVP_F_DSAPKEY2PKCS8), "DSAPKEY2PKCS8"}, @@ -232,6 +234,7 @@ static ERR_STRING_DATA EVP_str_reasons[] = { {ERR_REASON(EVP_R_UNSUPPORTED_PRIVATE_KEY_ALGORITHM), "unsupported private key algorithm"}, {ERR_REASON(EVP_R_UNSUPPORTED_SALT_TYPE), "unsupported salt type"}, + {ERR_REASON(EVP_R_WRAP_MODE_NOT_ALLOWED), "wrap mode not allowed"}, {ERR_REASON(EVP_R_WRONG_FINAL_BLOCK_LENGTH), "wrong final block length"}, {ERR_REASON(EVP_R_WRONG_PUBLIC_KEY_TYPE), "wrong public key type"}, {0, NULL} diff --git a/drivers/builtin_openssl2/crypto/evp/evp_fips.c b/drivers/builtin_openssl2/crypto/evp/evp_fips.c deleted file mode 100644 index 71a32fca8d..0000000000 --- a/drivers/builtin_openssl2/crypto/evp/evp_fips.c +++ /dev/null @@ -1,310 +0,0 @@ -/* crypto/evp/evp_fips.c */ -/* - * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL - * project. - */ -/* ==================================================================== - * Copyright (c) 2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -#include - -#ifdef OPENSSL_FIPS -# include - -const EVP_CIPHER *EVP_aes_128_cbc(void) -{ - return FIPS_evp_aes_128_cbc(); -} - -const EVP_CIPHER *EVP_aes_128_ccm(void) -{ - return FIPS_evp_aes_128_ccm(); -} - -const EVP_CIPHER *EVP_aes_128_cfb1(void) -{ - return FIPS_evp_aes_128_cfb1(); -} - -const EVP_CIPHER *EVP_aes_128_cfb128(void) -{ - return FIPS_evp_aes_128_cfb128(); -} - -const EVP_CIPHER *EVP_aes_128_cfb8(void) -{ - return FIPS_evp_aes_128_cfb8(); -} - -const EVP_CIPHER *EVP_aes_128_ctr(void) -{ - return FIPS_evp_aes_128_ctr(); -} - -const EVP_CIPHER *EVP_aes_128_ecb(void) -{ - return FIPS_evp_aes_128_ecb(); -} - -const EVP_CIPHER *EVP_aes_128_gcm(void) -{ - return FIPS_evp_aes_128_gcm(); -} - -const EVP_CIPHER *EVP_aes_128_ofb(void) -{ - return FIPS_evp_aes_128_ofb(); -} - -const EVP_CIPHER *EVP_aes_128_xts(void) -{ - return FIPS_evp_aes_128_xts(); -} - -const EVP_CIPHER *EVP_aes_192_cbc(void) -{ - return FIPS_evp_aes_192_cbc(); -} - -const EVP_CIPHER *EVP_aes_192_ccm(void) -{ - return FIPS_evp_aes_192_ccm(); -} - -const EVP_CIPHER *EVP_aes_192_cfb1(void) -{ - return FIPS_evp_aes_192_cfb1(); -} - -const EVP_CIPHER *EVP_aes_192_cfb128(void) -{ - return FIPS_evp_aes_192_cfb128(); -} - -const EVP_CIPHER *EVP_aes_192_cfb8(void) -{ - return FIPS_evp_aes_192_cfb8(); -} - -const EVP_CIPHER *EVP_aes_192_ctr(void) -{ - return FIPS_evp_aes_192_ctr(); -} - -const EVP_CIPHER *EVP_aes_192_ecb(void) -{ - return FIPS_evp_aes_192_ecb(); -} - -const EVP_CIPHER *EVP_aes_192_gcm(void) -{ - return FIPS_evp_aes_192_gcm(); -} - -const EVP_CIPHER *EVP_aes_192_ofb(void) -{ - return FIPS_evp_aes_192_ofb(); -} - -const EVP_CIPHER *EVP_aes_256_cbc(void) -{ - return FIPS_evp_aes_256_cbc(); -} - -const EVP_CIPHER *EVP_aes_256_ccm(void) -{ - return FIPS_evp_aes_256_ccm(); -} - -const EVP_CIPHER *EVP_aes_256_cfb1(void) -{ - return FIPS_evp_aes_256_cfb1(); -} - -const EVP_CIPHER *EVP_aes_256_cfb128(void) -{ - return FIPS_evp_aes_256_cfb128(); -} - -const EVP_CIPHER *EVP_aes_256_cfb8(void) -{ - return FIPS_evp_aes_256_cfb8(); -} - -const EVP_CIPHER *EVP_aes_256_ctr(void) -{ - return FIPS_evp_aes_256_ctr(); -} - -const EVP_CIPHER *EVP_aes_256_ecb(void) -{ - return FIPS_evp_aes_256_ecb(); -} - -const EVP_CIPHER *EVP_aes_256_gcm(void) -{ - return FIPS_evp_aes_256_gcm(); -} - -const EVP_CIPHER *EVP_aes_256_ofb(void) -{ - return FIPS_evp_aes_256_ofb(); -} - -const EVP_CIPHER *EVP_aes_256_xts(void) -{ - return FIPS_evp_aes_256_xts(); -} - -const EVP_CIPHER *EVP_des_ede(void) -{ - return FIPS_evp_des_ede(); -} - -const EVP_CIPHER *EVP_des_ede3(void) -{ - return FIPS_evp_des_ede3(); -} - -const EVP_CIPHER *EVP_des_ede3_cbc(void) -{ - return FIPS_evp_des_ede3_cbc(); -} - -const EVP_CIPHER *EVP_des_ede3_cfb1(void) -{ - return FIPS_evp_des_ede3_cfb1(); -} - -const EVP_CIPHER *EVP_des_ede3_cfb64(void) -{ - return FIPS_evp_des_ede3_cfb64(); -} - -const EVP_CIPHER *EVP_des_ede3_cfb8(void) -{ - return FIPS_evp_des_ede3_cfb8(); -} - -const EVP_CIPHER *EVP_des_ede3_ecb(void) -{ - return FIPS_evp_des_ede3_ecb(); -} - -const EVP_CIPHER *EVP_des_ede3_ofb(void) -{ - return FIPS_evp_des_ede3_ofb(); -} - -const EVP_CIPHER *EVP_des_ede_cbc(void) -{ - return FIPS_evp_des_ede_cbc(); -} - -const EVP_CIPHER *EVP_des_ede_cfb64(void) -{ - return FIPS_evp_des_ede_cfb64(); -} - -const EVP_CIPHER *EVP_des_ede_ecb(void) -{ - return FIPS_evp_des_ede_ecb(); -} - -const EVP_CIPHER *EVP_des_ede_ofb(void) -{ - return FIPS_evp_des_ede_ofb(); -} - -const EVP_CIPHER *EVP_enc_null(void) -{ - return FIPS_evp_enc_null(); -} - -const EVP_MD *EVP_sha1(void) -{ - return FIPS_evp_sha1(); -} - -const EVP_MD *EVP_sha224(void) -{ - return FIPS_evp_sha224(); -} - -const EVP_MD *EVP_sha256(void) -{ - return FIPS_evp_sha256(); -} - -const EVP_MD *EVP_sha384(void) -{ - return FIPS_evp_sha384(); -} - -const EVP_MD *EVP_sha512(void) -{ - return FIPS_evp_sha512(); -} - -const EVP_MD *EVP_dss(void) -{ - return FIPS_evp_dss(); -} - -const EVP_MD *EVP_dss1(void) -{ - return FIPS_evp_dss1(); -} - -const EVP_MD *EVP_ecdsa(void) -{ - return FIPS_evp_ecdsa(); -} - -#endif diff --git a/drivers/builtin_openssl2/crypto/evp/evp_lib.c b/drivers/builtin_openssl2/crypto/evp/evp_lib.c index b16d623a93..7e0bab90d4 100644 --- a/drivers/builtin_openssl2/crypto/evp/evp_lib.c +++ b/drivers/builtin_openssl2/crypto/evp/evp_lib.c @@ -60,6 +60,10 @@ #include "cryptlib.h" #include #include +#ifdef OPENSSL_FIPS +# include +# include "evp_locl.h" +#endif int EVP_CIPHER_param_to_asn1(EVP_CIPHER_CTX *c, ASN1_TYPE *type) { @@ -69,6 +73,11 @@ int EVP_CIPHER_param_to_asn1(EVP_CIPHER_CTX *c, ASN1_TYPE *type) ret = c->cipher->set_asn1_parameters(c, type); else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1) { switch (EVP_CIPHER_CTX_mode(c)) { + case EVP_CIPH_WRAP_MODE: + if (EVP_CIPHER_CTX_nid(c) == NID_id_smime_alg_CMS3DESwrap) + ASN1_TYPE_set(type, V_ASN1_NULL, NULL); + ret = 1; + break; case EVP_CIPH_GCM_MODE: case EVP_CIPH_CCM_MODE: @@ -93,6 +102,10 @@ int EVP_CIPHER_asn1_to_param(EVP_CIPHER_CTX *c, ASN1_TYPE *type) else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1) { switch (EVP_CIPHER_CTX_mode(c)) { + case EVP_CIPH_WRAP_MODE: + ret = 1; + break; + case EVP_CIPH_GCM_MODE: case EVP_CIPH_CCM_MODE: case EVP_CIPH_XTS_MODE: @@ -221,12 +234,22 @@ const EVP_CIPHER *EVP_CIPHER_CTX_cipher(const EVP_CIPHER_CTX *ctx) unsigned long EVP_CIPHER_flags(const EVP_CIPHER *cipher) { +#ifdef OPENSSL_FIPS + const EVP_CIPHER *fcipher; + fcipher = evp_get_fips_cipher(cipher); + if (fcipher && fcipher->flags & EVP_CIPH_FLAG_FIPS) + return cipher->flags | EVP_CIPH_FLAG_FIPS; +#endif return cipher->flags; } unsigned long EVP_CIPHER_CTX_flags(const EVP_CIPHER_CTX *ctx) { +#ifdef OPENSSL_FIPS + return EVP_CIPHER_flags(ctx->cipher); +#else return ctx->cipher->flags; +#endif } void *EVP_CIPHER_CTX_get_app_data(const EVP_CIPHER_CTX *ctx) @@ -293,8 +316,40 @@ int EVP_MD_size(const EVP_MD *md) return md->md_size; } +#ifdef OPENSSL_FIPS + +const EVP_MD *evp_get_fips_md(const EVP_MD *md) +{ + int nid = EVP_MD_type(md); + if (nid == NID_dsa) + return FIPS_evp_dss1(); + else if (nid == NID_dsaWithSHA) + return FIPS_evp_dss(); + else if (nid == NID_ecdsa_with_SHA1) + return FIPS_evp_ecdsa(); + else + return FIPS_get_digestbynid(nid); +} + +const EVP_CIPHER *evp_get_fips_cipher(const EVP_CIPHER *cipher) +{ + int nid = cipher->nid; + if (nid == NID_undef) + return FIPS_evp_enc_null(); + else + return FIPS_get_cipherbynid(nid); +} + +#endif + unsigned long EVP_MD_flags(const EVP_MD *md) { +#ifdef OPENSSL_FIPS + const EVP_MD *fmd; + fmd = evp_get_fips_md(md); + if (fmd && fmd->flags & EVP_MD_FLAG_FIPS) + return md->flags | EVP_MD_FLAG_FIPS; +#endif return md->flags; } diff --git a/drivers/builtin_openssl2/crypto/evp/evp_locl.h b/drivers/builtin_openssl2/crypto/evp/evp_locl.h index 980dadab23..2bb709a065 100644 --- a/drivers/builtin_openssl2/crypto/evp/evp_locl.h +++ b/drivers/builtin_openssl2/crypto/evp/evp_locl.h @@ -333,6 +333,9 @@ int PKCS5_v2_PBKDF2_keyivgen(EVP_CIPHER_CTX *ctx, const char *pass, const EVP_CIPHER *c, const EVP_MD *md, int en_de); +const EVP_MD *evp_get_fips_md(const EVP_MD *md); +const EVP_CIPHER *evp_get_fips_cipher(const EVP_CIPHER *cipher); + #ifdef OPENSSL_FIPS # ifdef OPENSSL_DOING_MAKEDEPEND diff --git a/drivers/builtin_openssl2/crypto/evp/evp_test.c b/drivers/builtin_openssl2/crypto/evp/evp_test.c deleted file mode 100644 index d06b4ee509..0000000000 --- a/drivers/builtin_openssl2/crypto/evp/evp_test.c +++ /dev/null @@ -1,424 +0,0 @@ -/* Written by Ben Laurie, 2001 */ -/* - * Copyright (c) 2001 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "../e_os.h" - -#include -#include -#ifndef OPENSSL_NO_ENGINE -# include -#endif -#include -#include - -static void hexdump(FILE *f, const char *title, const unsigned char *s, int l) -{ - int n = 0; - - fprintf(f, "%s", title); - for (; n < l; ++n) { - if ((n % 16) == 0) - fprintf(f, "\n%04x", n); - fprintf(f, " %02x", s[n]); - } - fprintf(f, "\n"); -} - -static int convert(unsigned char *s) -{ - unsigned char *d; - - for (d = s; *s; s += 2, ++d) { - unsigned int n; - - if (!s[1]) { - fprintf(stderr, "Odd number of hex digits!"); - EXIT(4); - } - sscanf((char *)s, "%2x", &n); - *d = (unsigned char)n; - } - return s - d; -} - -static char *sstrsep(char **string, const char *delim) -{ - char isdelim[256]; - char *token = *string; - - if (**string == 0) - return NULL; - - memset(isdelim, 0, 256); - isdelim[0] = 1; - - while (*delim) { - isdelim[(unsigned char)(*delim)] = 1; - delim++; - } - - while (!isdelim[(unsigned char)(**string)]) { - (*string)++; - } - - if (**string) { - **string = 0; - (*string)++; - } - - return token; -} - -static unsigned char *ustrsep(char **p, const char *sep) -{ - return (unsigned char *)sstrsep(p, sep); -} - -static int test1_exit(int ec) -{ - EXIT(ec); - return (0); /* To keep some compilers quiet */ -} - -static void test1(const EVP_CIPHER *c, const unsigned char *key, int kn, - const unsigned char *iv, int in, - const unsigned char *plaintext, int pn, - const unsigned char *ciphertext, int cn, int encdec) -{ - EVP_CIPHER_CTX ctx; - unsigned char out[4096]; - int outl, outl2; - - printf("Testing cipher %s%s\n", EVP_CIPHER_name(c), - (encdec == - 1 ? "(encrypt)" : (encdec == - 0 ? "(decrypt)" : "(encrypt/decrypt)"))); - hexdump(stdout, "Key", key, kn); - if (in) - hexdump(stdout, "IV", iv, in); - hexdump(stdout, "Plaintext", plaintext, pn); - hexdump(stdout, "Ciphertext", ciphertext, cn); - - if (kn != c->key_len) { - fprintf(stderr, "Key length doesn't match, got %d expected %lu\n", kn, - (unsigned long)c->key_len); - test1_exit(5); - } - EVP_CIPHER_CTX_init(&ctx); - if (encdec != 0) { - if (!EVP_EncryptInit_ex(&ctx, c, NULL, key, iv)) { - fprintf(stderr, "EncryptInit failed\n"); - ERR_print_errors_fp(stderr); - test1_exit(10); - } - EVP_CIPHER_CTX_set_padding(&ctx, 0); - - if (!EVP_EncryptUpdate(&ctx, out, &outl, plaintext, pn)) { - fprintf(stderr, "Encrypt failed\n"); - ERR_print_errors_fp(stderr); - test1_exit(6); - } - if (!EVP_EncryptFinal_ex(&ctx, out + outl, &outl2)) { - fprintf(stderr, "EncryptFinal failed\n"); - ERR_print_errors_fp(stderr); - test1_exit(7); - } - - if (outl + outl2 != cn) { - fprintf(stderr, "Ciphertext length mismatch got %d expected %d\n", - outl + outl2, cn); - test1_exit(8); - } - - if (memcmp(out, ciphertext, cn)) { - fprintf(stderr, "Ciphertext mismatch\n"); - hexdump(stderr, "Got", out, cn); - hexdump(stderr, "Expected", ciphertext, cn); - test1_exit(9); - } - } - - if (encdec <= 0) { - if (!EVP_DecryptInit_ex(&ctx, c, NULL, key, iv)) { - fprintf(stderr, "DecryptInit failed\n"); - ERR_print_errors_fp(stderr); - test1_exit(11); - } - EVP_CIPHER_CTX_set_padding(&ctx, 0); - - if (!EVP_DecryptUpdate(&ctx, out, &outl, ciphertext, cn)) { - fprintf(stderr, "Decrypt failed\n"); - ERR_print_errors_fp(stderr); - test1_exit(6); - } - if (!EVP_DecryptFinal_ex(&ctx, out + outl, &outl2)) { - fprintf(stderr, "DecryptFinal failed\n"); - ERR_print_errors_fp(stderr); - test1_exit(7); - } - - if (outl + outl2 != pn) { - fprintf(stderr, "Plaintext length mismatch got %d expected %d\n", - outl + outl2, pn); - test1_exit(8); - } - - if (memcmp(out, plaintext, pn)) { - fprintf(stderr, "Plaintext mismatch\n"); - hexdump(stderr, "Got", out, pn); - hexdump(stderr, "Expected", plaintext, pn); - test1_exit(9); - } - } - - EVP_CIPHER_CTX_cleanup(&ctx); - - printf("\n"); -} - -static int test_cipher(const char *cipher, const unsigned char *key, int kn, - const unsigned char *iv, int in, - const unsigned char *plaintext, int pn, - const unsigned char *ciphertext, int cn, int encdec) -{ - const EVP_CIPHER *c; - - c = EVP_get_cipherbyname(cipher); - if (!c) - return 0; - - test1(c, key, kn, iv, in, plaintext, pn, ciphertext, cn, encdec); - - return 1; -} - -static int test_digest(const char *digest, - const unsigned char *plaintext, int pn, - const unsigned char *ciphertext, unsigned int cn) -{ - const EVP_MD *d; - EVP_MD_CTX ctx; - unsigned char md[EVP_MAX_MD_SIZE]; - unsigned int mdn; - - d = EVP_get_digestbyname(digest); - if (!d) - return 0; - - printf("Testing digest %s\n", EVP_MD_name(d)); - hexdump(stdout, "Plaintext", plaintext, pn); - hexdump(stdout, "Digest", ciphertext, cn); - - EVP_MD_CTX_init(&ctx); - if (!EVP_DigestInit_ex(&ctx, d, NULL)) { - fprintf(stderr, "DigestInit failed\n"); - ERR_print_errors_fp(stderr); - EXIT(100); - } - if (!EVP_DigestUpdate(&ctx, plaintext, pn)) { - fprintf(stderr, "DigestUpdate failed\n"); - ERR_print_errors_fp(stderr); - EXIT(101); - } - if (!EVP_DigestFinal_ex(&ctx, md, &mdn)) { - fprintf(stderr, "DigestFinal failed\n"); - ERR_print_errors_fp(stderr); - EXIT(101); - } - EVP_MD_CTX_cleanup(&ctx); - - if (mdn != cn) { - fprintf(stderr, "Digest length mismatch, got %d expected %d\n", mdn, - cn); - EXIT(102); - } - - if (memcmp(md, ciphertext, cn)) { - fprintf(stderr, "Digest mismatch\n"); - hexdump(stderr, "Got", md, cn); - hexdump(stderr, "Expected", ciphertext, cn); - EXIT(103); - } - - printf("\n"); - - EVP_MD_CTX_cleanup(&ctx); - - return 1; -} - -int main(int argc, char **argv) -{ - const char *szTestFile; - FILE *f; - - if (argc != 2) { - fprintf(stderr, "%s \n", argv[0]); - EXIT(1); - } - CRYPTO_malloc_debug_init(); - CRYPTO_set_mem_debug_options(V_CRYPTO_MDEBUG_ALL); - CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON); - - szTestFile = argv[1]; - - f = fopen(szTestFile, "r"); - if (!f) { - perror(szTestFile); - EXIT(2); - } - - /* Load up the software EVP_CIPHER and EVP_MD definitions */ - OpenSSL_add_all_ciphers(); - OpenSSL_add_all_digests(); -#ifndef OPENSSL_NO_ENGINE - /* Load all compiled-in ENGINEs */ - ENGINE_load_builtin_engines(); -#endif -#if 0 - OPENSSL_config(); -#endif -#ifndef OPENSSL_NO_ENGINE - /* - * Register all available ENGINE implementations of ciphers and digests. - * This could perhaps be changed to "ENGINE_register_all_complete()"? - */ - ENGINE_register_all_ciphers(); - ENGINE_register_all_digests(); - /* - * If we add command-line options, this statement should be switchable. - * It'll prevent ENGINEs being ENGINE_init()ialised for cipher/digest use - * if they weren't already initialised. - */ - /* ENGINE_set_cipher_flags(ENGINE_CIPHER_FLAG_NOINIT); */ -#endif - - for (;;) { - char line[4096]; - char *p; - char *cipher; - unsigned char *iv, *key, *plaintext, *ciphertext; - int encdec; - int kn, in, pn, cn; - - if (!fgets((char *)line, sizeof line, f)) - break; - if (line[0] == '#' || line[0] == '\n') - continue; - p = line; - cipher = sstrsep(&p, ":"); - key = ustrsep(&p, ":"); - iv = ustrsep(&p, ":"); - plaintext = ustrsep(&p, ":"); - ciphertext = ustrsep(&p, ":"); - if (p[-1] == '\n') { - p[-1] = '\0'; - encdec = -1; - } else { - encdec = atoi(sstrsep(&p, "\n")); - } - - kn = convert(key); - in = convert(iv); - pn = convert(plaintext); - cn = convert(ciphertext); - - if (!test_cipher - (cipher, key, kn, iv, in, plaintext, pn, ciphertext, cn, encdec) - && !test_digest(cipher, plaintext, pn, ciphertext, cn)) { -#ifdef OPENSSL_NO_AES - if (strstr(cipher, "AES") == cipher) { - fprintf(stdout, "Cipher disabled, skipping %s\n", cipher); - continue; - } -#endif -#ifdef OPENSSL_NO_DES - if (strstr(cipher, "DES") == cipher) { - fprintf(stdout, "Cipher disabled, skipping %s\n", cipher); - continue; - } -#endif -#ifdef OPENSSL_NO_RC4 - if (strstr(cipher, "RC4") == cipher) { - fprintf(stdout, "Cipher disabled, skipping %s\n", cipher); - continue; - } -#endif -#ifdef OPENSSL_NO_CAMELLIA - if (strstr(cipher, "CAMELLIA") == cipher) { - fprintf(stdout, "Cipher disabled, skipping %s\n", cipher); - continue; - } -#endif -#ifdef OPENSSL_NO_SEED - if (strstr(cipher, "SEED") == cipher) { - fprintf(stdout, "Cipher disabled, skipping %s\n", cipher); - continue; - } -#endif - fprintf(stderr, "Can't find %s\n", cipher); - EXIT(3); - } - } - fclose(f); - -#ifndef OPENSSL_NO_ENGINE - ENGINE_cleanup(); -#endif - EVP_cleanup(); - CRYPTO_cleanup_all_ex_data(); - ERR_remove_thread_state(NULL); - ERR_free_strings(); - CRYPTO_mem_leaks_fp(stderr); - - return 0; -} diff --git a/drivers/builtin_openssl2/crypto/evp/evptests.txt b/drivers/builtin_openssl2/crypto/evp/evptests.txt index c273707c14..4e9958b3b5 100644 --- a/drivers/builtin_openssl2/crypto/evp/evptests.txt +++ b/drivers/builtin_openssl2/crypto/evp/evptests.txt @@ -1,4 +1,5 @@ #cipher:key:iv:plaintext:ciphertext:0/1(decrypt/encrypt) +#aadcipher:key:iv:plaintext:ciphertext:aad:tag:0/1(decrypt/encrypt) #digest:::input:output # SHA(1) tests (from shatest.c) @@ -332,3 +333,69 @@ SEED-ECB:00000000000000000000000000000000::000102030405060708090A0B0C0D0E0F:5EBA SEED-ECB:000102030405060708090A0B0C0D0E0F::00000000000000000000000000000000:C11F22F20140505084483597E4370F43:1 SEED-ECB:4706480851E61BE85D74BFB3FD956185::83A2F8A288641FB9A4E9A5CC2F131C7D:EE54D13EBCAE706D226BC3142CD40D4A:1 SEED-ECB:28DBC3BC49FFD87DCFA509B11D422BE7::B41E6BE2EBA84A148E2EED84593C5EC7:9B9B7BFCD1813CB95D0B3618F40F5122:1 + +# AES CCM 256 bit key +aes-256-ccm:1bde3251d41a8b5ea013c195ae128b218b3e0306376357077ef1c1c78548b92e:5b8e40746f6b98e00f1d13ff41:53bd72a97089e312422bf72e242377b3c6ee3e2075389b999c4ef7f28bd2b80a:9a5fcccdb4cf04e7293d2775cc76a488f042382d949b43b7d6bb2b9864786726:c17a32514eb6103f3249e076d4c871dc97e04b286699e54491dc18f6d734d4c0:2024931d73bca480c24a24ece6b6c2bf + +# AES GCM test vectors from http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-spec.pdf +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000::::58e2fccefa7e3061367f1d57a4e7455a +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78::ab6e47d42cec13bdf53a67b21257bddf +aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091473f5985::4d5c2af327cd64a62cf35abd2ba6fab4 +aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091:feedfacedeadbeeffeedfacedeadbeefabaddad2:5bc94fbc3221a5db94fae95ae7121a47 +aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:61353b4c2806934a777ff51fa22a4755699b2a714fcdc6f83766e5f97b6c742373806900e49f24b22b097544d4896b424989b5e1ebac0f07c23f4598:feedfacedeadbeeffeedfacedeadbeefabaddad2:3612d2e79e3b0785561be14aaca2fccb +aes-128-gcm:feffe9928665731c6d6a8f9467308308:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:8ce24998625615b603a033aca13fb894be9112a5c3a211a8ba262a3cca7e2ca701e4a9a4fba43c90ccdcb281d48c7c6fd62875d2aca417034c34aee5:feedfacedeadbeeffeedfacedeadbeefabaddad2:619cc5aefffe0bfa462af43c1699d050 +aes-192-gcm:000000000000000000000000000000000000000000000000:000000000000000000000000::::cd33b28ac773f74ba00ed1f312572435 +aes-192-gcm:000000000000000000000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:98e7247c07f0fe411c267e4384b0f600::2ff58d80033927ab8ef4d4587514f0fb +aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710acade256::9924a7c8587336bfb118024db8674a14 +aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710:feedfacedeadbeeffeedfacedeadbeefabaddad2:2519498e80f1478f37ba55bd6d27618c +aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:0f10f599ae14a154ed24b36e25324db8c566632ef2bbb34f8347280fc4507057fddc29df9a471f75c66541d4d4dad1c9e93a19a58e8b473fa0f062f7:feedfacedeadbeeffeedfacedeadbeefabaddad2:65dcc57fcf623a24094fcca40d3533f8 +aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:d27e88681ce3243c4830165a8fdcf9ff1de9a1d8e6b447ef6ef7b79828666e4581e79012af34ddd9e2f037589b292db3e67c036745fa22e7e9b7373b:feedfacedeadbeeffeedfacedeadbeefabaddad2:dcf566ff291c25bbb8568fc3d376a6d9 +aes-256-gcm:0000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000::::530f8afbc74536b9a963b4f1c4cb738b +aes-256-gcm:0000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:cea7403d4d606b6e074ec5d3baf39d18::d0d1c8a799996bf0265b98b5d48ab919 +aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad::b094dac5d93471bdec1a502270e3cc6c +aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662:feedfacedeadbeeffeedfacedeadbeefabaddad2:76fc6ece0f4e1768cddf8853bb2d551b +aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:c3762df1ca787d32ae47c13bf19844cbaf1ae14d0b976afac52ff7d79bba9de0feb582d33934a4f0954cc2363bc73f7862ac430e64abe499f47c9b1f:feedfacedeadbeeffeedfacedeadbeefabaddad2:3a337dbf46a792c45e454913fe2ea8f2 +aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:5a8def2f0c9e53f1f75d7853659e2a20eeb2b22aafde6419a058ab4f6f746bf40fc0c3b780f244452da3ebf1c5d82cdea2418997200ef82e44ae7e3f:feedfacedeadbeeffeedfacedeadbeefabaddad2:a44a8266ee1c8eb0c8b5d4cf5ae9f19a +# local add-ons, primarily streaming ghash tests +# 128 bytes aad +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:::d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad:5fea793a2d6f974d37e68e0cb8ff9492 +# 48 bytes plaintext +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0::9dd0a376b08e40eb00c35f29f9ea61a4 +# 80 bytes plaintext +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0c94da219118e297d7b7ebcbcc9c388f28ade7d85a8ee35616f7124a9d5270291::98885a3a22bd4742fe7b72172193b163 +# 128 bytes plaintext +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0c94da219118e297d7b7ebcbcc9c388f28ade7d85a8ee35616f7124a9d527029195b84d1b96c690ff2f2de30bf2ec89e00253786e126504f0dab90c48a30321de3345e6b0461e7c9e6c6b7afedde83f40::cac45f60e31efd3b5a43b98a22ce1aa1 +# 192 bytes plaintext, iv is chosen so that initial counter LSB is 0xFF +aes-128-gcm:00000000000000000000000000000000:ffffffff000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:56b3373ca9ef6e4a2b64fe1e9a17b61425f10d47a75a5fce13efc6bc784af24f4141bdd48cf7c770887afd573cca5418a9aeffcd7c5ceddfc6a78397b9a85b499da558257267caab2ad0b23ca476a53cb17fb41c4b8b475cb4f3f7165094c229c9e8c4dc0a2a5ff1903e501511221376a1cdb8364c5061a20cae74bc4acd76ceb0abc9fd3217ef9f8c90be402ddf6d8697f4f880dff15bfb7a6b28241ec8fe183c2d59e3f9dfff653c7126f0acb9e64211f42bae12af462b1070bef1ab5e3606::566f8ef683078bfdeeffa869d751a017 +# 80 bytes plaintext, submitted by Intel +aes-128-gcm:843ffcf5d2b72694d19ed01d01249412:dbcca32ebf9b804617c3aa9e:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f:6268c6fa2a80b2d137467f092f657ac04d89be2beaa623d61b5a868c8f03ff95d3dcee23ad2f1ab3a6c80eaf4b140eb05de3457f0fbc111a6b43d0763aa422a3013cf1dc37fe417d1fbfc449b75d4cc5:00000000000000000000000000000000101112131415161718191a1b1c1d1e1f:3b629ccfbc1119b7319e1dce2cd6fd6d + +# AES XTS test vectors from IEEE Std 1619-2007 +aes-128-xts:0000000000000000000000000000000000000000000000000000000000000000:00000000000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000:917cf69ebd68b2ec9b9fe9a3eadda692cd43d2f59598ed858c02c2652fbf922e +aes-128-xts:1111111111111111111111111111111122222222222222222222222222222222:33333333330000000000000000000000:4444444444444444444444444444444444444444444444444444444444444444:c454185e6a16936e39334038acef838bfb186fff7480adc4289382ecd6d394f0 +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f022222222222222222222222222222222:33333333330000000000000000000000:4444444444444444444444444444444444444444444444444444444444444444:af85336b597afc1a900b2eb21ec949d292df4c047e0b21532186a5971a227a89 +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:00000000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:27a7479befa1d476489f308cd4cfa6e2a96e4bbe3208ff25287dd3819616e89cc78cf7f5e543445f8333d8fa7f56000005279fa5d8b5e4ad40e736ddb4d35412328063fd2aab53e5ea1e0a9f332500a5df9487d07a5c92cc512c8866c7e860ce93fdf166a24912b422976146ae20ce846bb7dc9ba94a767aaef20c0d61ad02655ea92dc4c4e41a8952c651d33174be51a10c421110e6d81588ede82103a252d8a750e8768defffed9122810aaeb99f9172af82b604dc4b8e51bcb08235a6f4341332e4ca60482a4ba1a03b3e65008fc5da76b70bf1690db4eae29c5f1badd03c5ccf2a55d705ddcd86d449511ceb7ec30bf12b1fa35b913f9f747a8afd1b130e94bff94effd01a91735ca1726acd0b197c4e5b03393697e126826fb6bbde8ecc1e08298516e2c9ed03ff3c1b7860f6de76d4cecd94c8119855ef5297ca67e9f3e7ff72b1e99785ca0a7e7720c5b36dc6d72cac9574c8cbbc2f801e23e56fd344b07f22154beba0f08ce8891e643ed995c94d9a69c9f1b5f499027a78572aeebd74d20cc39881c213ee770b1010e4bea718846977ae119f7a023ab58cca0ad752afe656bb3c17256a9f6e9bf19fdd5a38fc82bbe872c5539edb609ef4f79c203ebb140f2e583cb2ad15b4aa5b655016a8449277dbd477ef2c8d6c017db738b18deb4a427d1923ce3ff262735779a418f20a282df920147beabe421ee5319d0568 +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:01000000000000000000000000000000:27a7479befa1d476489f308cd4cfa6e2a96e4bbe3208ff25287dd3819616e89cc78cf7f5e543445f8333d8fa7f56000005279fa5d8b5e4ad40e736ddb4d35412328063fd2aab53e5ea1e0a9f332500a5df9487d07a5c92cc512c8866c7e860ce93fdf166a24912b422976146ae20ce846bb7dc9ba94a767aaef20c0d61ad02655ea92dc4c4e41a8952c651d33174be51a10c421110e6d81588ede82103a252d8a750e8768defffed9122810aaeb99f9172af82b604dc4b8e51bcb08235a6f4341332e4ca60482a4ba1a03b3e65008fc5da76b70bf1690db4eae29c5f1badd03c5ccf2a55d705ddcd86d449511ceb7ec30bf12b1fa35b913f9f747a8afd1b130e94bff94effd01a91735ca1726acd0b197c4e5b03393697e126826fb6bbde8ecc1e08298516e2c9ed03ff3c1b7860f6de76d4cecd94c8119855ef5297ca67e9f3e7ff72b1e99785ca0a7e7720c5b36dc6d72cac9574c8cbbc2f801e23e56fd344b07f22154beba0f08ce8891e643ed995c94d9a69c9f1b5f499027a78572aeebd74d20cc39881c213ee770b1010e4bea718846977ae119f7a023ab58cca0ad752afe656bb3c17256a9f6e9bf19fdd5a38fc82bbe872c5539edb609ef4f79c203ebb140f2e583cb2ad15b4aa5b655016a8449277dbd477ef2c8d6c017db738b18deb4a427d1923ce3ff262735779a418f20a282df920147beabe421ee5319d0568:264d3ca8512194fec312c8c9891f279fefdd608d0c027b60483a3fa811d65ee59d52d9e40ec5672d81532b38b6b089ce951f0f9c35590b8b978d175213f329bb1c2fd30f2f7f30492a61a532a79f51d36f5e31a7c9a12c286082ff7d2394d18f783e1a8e72c722caaaa52d8f065657d2631fd25bfd8e5baad6e527d763517501c68c5edc3cdd55435c532d7125c8614deed9adaa3acade5888b87bef641c4c994c8091b5bcd387f3963fb5bc37aa922fbfe3df4e5b915e6eb514717bdd2a74079a5073f5c4bfd46adf7d282e7a393a52579d11a028da4d9cd9c77124f9648ee383b1ac763930e7162a8d37f350b2f74b8472cf09902063c6b32e8c2d9290cefbd7346d1c779a0df50edcde4531da07b099c638e83a755944df2aef1aa31752fd323dcb710fb4bfbb9d22b925bc3577e1b8949e729a90bbafeacf7f7879e7b1147e28ba0bae940db795a61b15ecf4df8db07b824bb062802cc98a9545bb2aaeed77cb3fc6db15dcd7d80d7d5bc406c4970a3478ada8899b329198eb61c193fb6275aa8ca340344a75a862aebe92eee1ce032fd950b47d7704a3876923b4ad62844bf4a09c4dbe8b4397184b7471360c9564880aedddb9baa4af2e75394b08cd32ff479c57a07d3eab5d54de5f9738b8d27f27a9f0ab11799d7b7ffefb2704c95c6ad12c39f1e867a4b7b1d7818a4b753dfd2a89ccb45e001a03a867b187f225dd +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:02000000000000000000000000000000:264d3ca8512194fec312c8c9891f279fefdd608d0c027b60483a3fa811d65ee59d52d9e40ec5672d81532b38b6b089ce951f0f9c35590b8b978d175213f329bb1c2fd30f2f7f30492a61a532a79f51d36f5e31a7c9a12c286082ff7d2394d18f783e1a8e72c722caaaa52d8f065657d2631fd25bfd8e5baad6e527d763517501c68c5edc3cdd55435c532d7125c8614deed9adaa3acade5888b87bef641c4c994c8091b5bcd387f3963fb5bc37aa922fbfe3df4e5b915e6eb514717bdd2a74079a5073f5c4bfd46adf7d282e7a393a52579d11a028da4d9cd9c77124f9648ee383b1ac763930e7162a8d37f350b2f74b8472cf09902063c6b32e8c2d9290cefbd7346d1c779a0df50edcde4531da07b099c638e83a755944df2aef1aa31752fd323dcb710fb4bfbb9d22b925bc3577e1b8949e729a90bbafeacf7f7879e7b1147e28ba0bae940db795a61b15ecf4df8db07b824bb062802cc98a9545bb2aaeed77cb3fc6db15dcd7d80d7d5bc406c4970a3478ada8899b329198eb61c193fb6275aa8ca340344a75a862aebe92eee1ce032fd950b47d7704a3876923b4ad62844bf4a09c4dbe8b4397184b7471360c9564880aedddb9baa4af2e75394b08cd32ff479c57a07d3eab5d54de5f9738b8d27f27a9f0ab11799d7b7ffefb2704c95c6ad12c39f1e867a4b7b1d7818a4b753dfd2a89ccb45e001a03a867b187f225dd:fa762a3680b76007928ed4a4f49a9456031b704782e65e16cecb54ed7d017b5e18abd67b338e81078f21edb7868d901ebe9c731a7c18b5e6dec1d6a72e078ac9a4262f860beefa14f4e821018272e411a951502b6e79066e84252c3346f3aa62344351a291d4bedc7a07618bdea2af63145cc7a4b8d4070691ae890cd65733e7946e9021a1dffc4c59f159425ee6d50ca9b135fa6162cea18a939838dc000fb386fad086acce5ac07cb2ece7fd580b00cfa5e98589631dc25e8e2a3daf2ffdec26531659912c9d8f7a15e5865ea8fb5816d6207052bd7128cd743c12c8118791a4736811935eb982a532349e31dd401e0b660a568cb1a4711f552f55ded59f1f15bf7196b3ca12a91e488ef59d64f3a02bf45239499ac6176ae321c4a211ec545365971c5d3f4f09d4eb139bfdf2073d33180b21002b65cc9865e76cb24cd92c874c24c18350399a936ab3637079295d76c417776b94efce3a0ef7206b15110519655c956cbd8b2489405ee2b09a6b6eebe0c53790a12a8998378b33a5b71159625f4ba49d2a2fdba59fbf0897bc7aabd8d707dc140a80f0f309f835d3da54ab584e501dfa0ee977fec543f74186a802b9a37adb3e8291eca04d66520d229e60401e7282bef486ae059aa70696e0e305d777140a7a883ecdcb69b9ff938e8a4231864c69ca2c2043bed007ff3e605e014bcf518138dc3a25c5e236171a2d01d6 +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:fd000000000000000000000000000000:8e41b78c390b5af9d758bb214a67e9f6bf7727b09ac6124084c37611398fa45daad94868600ed391fb1acd4857a95b466e62ef9f4b377244d1c152e7b30d731aad30c716d214b707aed99eb5b5e580b3e887cf7497465651d4b60e6042051da3693c3b78c14489543be8b6ad0ba629565bba202313ba7b0d0c94a3252b676f46cc02ce0f8a7d34c0ed229129673c1f61aed579d08a9203a25aac3a77e9db60267996db38df637356d9dcd1632e369939f2a29d89345c66e05066f1a3677aef18dea4113faeb629e46721a66d0a7e785d3e29af2594eb67dfa982affe0aac058f6e15864269b135418261fc3afb089472cf68c45dd7f231c6249ba0255e1e033833fc4d00a3fe02132d7bc3873614b8aee34273581ea0325c81f0270affa13641d052d36f0757d484014354d02d6883ca15c24d8c3956b1bd027bcf41f151fd8023c5340e5606f37e90fdb87c86fb4fa634b3718a30bace06a66eaf8f63c4aa3b637826a87fe8cfa44282e92cb1615af3a28e53bc74c7cba1a0977be9065d0c1a5dec6c54ae38d37f37aa35283e048e5530a85c4e7a29d7b92ec0c3169cdf2a805c7604bce60049b9fb7b8eaac10f51ae23794ceba68bb58112e293b9b692ca721b37c662f8574ed4dba6f88e170881c82cddc1034a0ca7e284bf0962b6b26292d836fa9f73c1ac770eef0f2d3a1eaf61d3e03555fd424eedd67e18a18094f888:d55f684f81f4426e9fde92a5ff02df2ac896af63962888a97910c1379e20b0a3b1db613fb7fe2e07004329ea5c22bfd33e3dbe4cf58cc608c2c26c19a2e2fe22f98732c2b5cb844cc6c0702d91e1d50fc4382a7eba5635cd602432a2306ac4ce82f8d70c8d9bc15f918fe71e74c622d5cf71178bf6e0b9cc9f2b41dd8dbe441c41cd0c73a6dc47a348f6702f9d0e9b1b1431e948e299b9ec2272ab2c5f0c7be86affa5dec87a0bee81d3d50007edaa2bcfccb35605155ff36ed8edd4a40dcd4b243acd11b2b987bdbfaf91a7cac27e9c5aea525ee53de7b2d3332c8644402b823e94a7db26276d2d23aa07180f76b4fd29b9c0823099c9d62c519880aee7e9697617c1497d47bf3e571950311421b6b734d38b0db91eb85331b91ea9f61530f54512a5a52a4bad589eb69781d537f23297bb459bdad2948a29e1550bf4787e0be95bb173cf5fab17dab7a13a052a63453d97ccec1a321954886b7a1299faaeecae35c6eaaca753b041b5e5f093bf83397fd21dd6b3012066fcc058cc32c3b09d7562dee29509b5839392c9ff05f51f3166aaac4ac5f238038a3045e6f72e48ef0fe8bc675e82c318a268e43970271bf119b81bf6a982746554f84e72b9f00280a320a08142923c23c883423ff949827f29bbacdc1ccdb04938ce6098c95ba6b32528f4ef78eed778b2e122ddfd1cbdd11d1c0a6783e011fc536d63d053260637 +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:fe000000000000000000000000000000:d55f684f81f4426e9fde92a5ff02df2ac896af63962888a97910c1379e20b0a3b1db613fb7fe2e07004329ea5c22bfd33e3dbe4cf58cc608c2c26c19a2e2fe22f98732c2b5cb844cc6c0702d91e1d50fc4382a7eba5635cd602432a2306ac4ce82f8d70c8d9bc15f918fe71e74c622d5cf71178bf6e0b9cc9f2b41dd8dbe441c41cd0c73a6dc47a348f6702f9d0e9b1b1431e948e299b9ec2272ab2c5f0c7be86affa5dec87a0bee81d3d50007edaa2bcfccb35605155ff36ed8edd4a40dcd4b243acd11b2b987bdbfaf91a7cac27e9c5aea525ee53de7b2d3332c8644402b823e94a7db26276d2d23aa07180f76b4fd29b9c0823099c9d62c519880aee7e9697617c1497d47bf3e571950311421b6b734d38b0db91eb85331b91ea9f61530f54512a5a52a4bad589eb69781d537f23297bb459bdad2948a29e1550bf4787e0be95bb173cf5fab17dab7a13a052a63453d97ccec1a321954886b7a1299faaeecae35c6eaaca753b041b5e5f093bf83397fd21dd6b3012066fcc058cc32c3b09d7562dee29509b5839392c9ff05f51f3166aaac4ac5f238038a3045e6f72e48ef0fe8bc675e82c318a268e43970271bf119b81bf6a982746554f84e72b9f00280a320a08142923c23c883423ff949827f29bbacdc1ccdb04938ce6098c95ba6b32528f4ef78eed778b2e122ddfd1cbdd11d1c0a6783e011fc536d63d053260637:72efc1ebfe1ee25975a6eb3aa8589dda2b261f1c85bdab442a9e5b2dd1d7c3957a16fc08e526d4b1223f1b1232a11af274c3d70dac57f83e0983c498f1a6f1aecb021c3e70085a1e527f1ce41ee5911a82020161529cd82773762daf5459de94a0a82adae7e1703c808543c29ed6fb32d9e004327c1355180c995a07741493a09c21ba01a387882da4f62534b87bb15d60d197201c0fd3bf30c1500a3ecfecdd66d8721f90bcc4c17ee925c61b0a03727a9c0d5f5ca462fbfa0af1c2513a9d9d4b5345bd27a5f6e653f751693e6b6a2b8ead57d511e00e58c45b7b8d005af79288f5c7c22fd4f1bf7a898b03a5634c6a1ae3f9fae5de4f296a2896b23e7ed43ed14fa5a2803f4d28f0d3ffcf24757677aebdb47bb388378708948a8d4126ed1839e0da29a537a8c198b3c66ab00712dd261674bf45a73d67f76914f830ca014b65596f27e4cf62de66125a5566df9975155628b400fbfb3a29040ed50faffdbb18aece7c5c44693260aab386c0a37b11b114f1c415aebb653be468179428d43a4d8bc3ec38813eca30a13cf1bb18d524f1992d44d8b1a42ea30b22e6c95b199d8d182f8840b09d059585c31ad691fa0619ff038aca2c39a943421157361717c49d322028a74648113bd8c9d7ec77cf3c89c1ec8718ceff8516d96b34c3c614f10699c9abc4ed0411506223bea16af35c883accdbe1104eef0cfdb54e12fb230a +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:ff000000000000000000000000000000:72efc1ebfe1ee25975a6eb3aa8589dda2b261f1c85bdab442a9e5b2dd1d7c3957a16fc08e526d4b1223f1b1232a11af274c3d70dac57f83e0983c498f1a6f1aecb021c3e70085a1e527f1ce41ee5911a82020161529cd82773762daf5459de94a0a82adae7e1703c808543c29ed6fb32d9e004327c1355180c995a07741493a09c21ba01a387882da4f62534b87bb15d60d197201c0fd3bf30c1500a3ecfecdd66d8721f90bcc4c17ee925c61b0a03727a9c0d5f5ca462fbfa0af1c2513a9d9d4b5345bd27a5f6e653f751693e6b6a2b8ead57d511e00e58c45b7b8d005af79288f5c7c22fd4f1bf7a898b03a5634c6a1ae3f9fae5de4f296a2896b23e7ed43ed14fa5a2803f4d28f0d3ffcf24757677aebdb47bb388378708948a8d4126ed1839e0da29a537a8c198b3c66ab00712dd261674bf45a73d67f76914f830ca014b65596f27e4cf62de66125a5566df9975155628b400fbfb3a29040ed50faffdbb18aece7c5c44693260aab386c0a37b11b114f1c415aebb653be468179428d43a4d8bc3ec38813eca30a13cf1bb18d524f1992d44d8b1a42ea30b22e6c95b199d8d182f8840b09d059585c31ad691fa0619ff038aca2c39a943421157361717c49d322028a74648113bd8c9d7ec77cf3c89c1ec8718ceff8516d96b34c3c614f10699c9abc4ed0411506223bea16af35c883accdbe1104eef0cfdb54e12fb230a:3260ae8dad1f4a32c5cafe3ab0eb95549d461a67ceb9e5aa2d3afb62dece0553193ba50c75be251e08d1d08f1088576c7efdfaaf3f459559571e12511753b07af073f35da06af0ce0bbf6b8f5ccc5cea500ec1b211bd51f63b606bf6528796ca12173ba39b8935ee44ccce646f90a45bf9ccc567f0ace13dc2d53ebeedc81f58b2e41179dddf0d5a5c42f5d8506c1a5d2f8f59f3ea873cbcd0eec19acbf325423bd3dcb8c2b1bf1d1eaed0eba7f0698e4314fbeb2f1566d1b9253008cbccf45a2b0d9c5c9c21474f4076e02be26050b99dee4fd68a4cf890e496e4fcae7b70f94ea5a9062da0daeba1993d2ccd1dd3c244b8428801495a58b216547e7e847c46d1d756377b6242d2e5fb83bf752b54e0df71e889f3a2bb0f4c10805bf3c590376e3c24e22ff57f7fa965577375325cea5d920db94b9c336b455f6e894c01866fe9fbb8c8d3f70a2957285f6dfb5dcd8cbf54782f8fe7766d4723819913ac773421e3a31095866bad22c86a6036b2518b2059b4229d18c8c2ccbdf906c6cc6e82464ee57bddb0bebcb1dc645325bfb3e665ef7251082c88ebb1cf203bd779fdd38675713c8daadd17e1cabee432b09787b6ddf3304e38b731b45df5df51b78fcfb3d32466028d0ba36555e7e11ab0ee0666061d1645d962444bc47a38188930a84b4d561395c73c087021927ca638b7afc8a8679ccb84c26555440ec7f10445cd + +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ff000000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:1c3b3a102f770386e4836c99e370cf9bea00803f5e482357a4ae12d414a3e63b5d31e276f8fe4a8d66b317f9ac683f44680a86ac35adfc3345befecb4bb188fd5776926c49a3095eb108fd1098baec70aaa66999a72a82f27d848b21d4a741b0c5cd4d5fff9dac89aeba122961d03a757123e9870f8acf1000020887891429ca2a3e7a7d7df7b10355165c8b9a6d0a7de8b062c4500dc4cd120c0f7418dae3d0b5781c34803fa75421c790dfe1de1834f280d7667b327f6c8cd7557e12ac3a0f93ec05c52e0493ef31a12d3d9260f79a289d6a379bc70c50841473d1a8cc81ec583e9645e07b8d9670655ba5bbcfecc6dc3966380ad8fecb17b6ba02469a020a84e18e8f84252070c13e9f1f289be54fbc481457778f616015e1327a02b140f1505eb309326d68378f8374595c849d84f4c333ec4423885143cb47bd71c5edae9be69a2ffeceb1bec9de244fbe15992b11b77c040f12bd8f6a975a44a0f90c29a9abc3d4d893927284c58754cce294529f8614dcd2aba991925fedc4ae74ffac6e333b93eb4aff0479da9a410e4450e0dd7ae4c6e2910900575da401fc07059f645e8b7e9bfdef33943054ff84011493c27b3429eaedb4ed5376441a77ed43851ad77f16f541dfd269d50d6a5f14fb0aab1cbb4c1550be97f7ab4066193c4caa773dad38014bd2092fa755c824bb5e54c4f36ffda9fcea70b9c6e693e148c151 +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffff0000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:77a31251618a15e6b92d1d66dffe7b50b50bad552305ba0217a610688eff7e11e1d0225438e093242d6db274fde801d4cae06f2092c728b2478559df58e837c2469ee4a4fa794e4bbc7f39bc026e3cb72c33b0888f25b4acf56a2a9804f1ce6d3d6e1dc6ca181d4b546179d55544aa7760c40d06741539c7e3cd9d2f6650b2013fd0eeb8c2b8e3d8d240ccae2d4c98320a7442e1c8d75a42d6e6cfa4c2eca1798d158c7aecdf82490f24bb9b38e108bcda12c3faf9a21141c3613b58367f922aaa26cd22f23d708dae699ad7cb40a8ad0b6e2784973dcb605684c08b8d6998c69aac049921871ebb65301a4619ca80ecb485a31d744223ce8ddc2394828d6a80470c092f5ba413c3378fa6054255c6f9df4495862bbb3287681f931b687c888abf844dfc8fc28331e579928cd12bd2390ae123cf03818d14dedde5c0c24c8ab018bfca75ca096f2d531f3d1619e785f1ada437cab92e980558b3dce1474afb75bfedbf8ff54cb2618e0244c9ac0d3c66fb51598cd2db11f9be39791abe447c63094f7c453b7ff87cb5bb36b7c79efb0872d17058b83b15ab0866ad8a58656c5a7e20dbdf308b2461d97c0ec0024a2715055249cf3b478ddd4740de654f75ca686e0d7345c69ed50cdc2a8b332b1f8824108ac937eb050585608ee734097fc09054fbff89eeaeea791f4a7ab1f9868294a4f9e27b42af8100cb9d59cef9645803 +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffff00000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:e387aaa58ba483afa7e8eb469778317ecf4cf573aa9d4eac23f2cdf914e4e200a8b490e42ee646802dc6ee2b471b278195d60918ececb44bf79966f83faba0499298ebc699c0c8634715a320bb4f075d622e74c8c932004f25b41e361025b5a87815391f6108fc4afa6a05d9303c6ba68a128a55705d415985832fdeaae6c8e19110e84d1b1f199a2692119edc96132658f09da7c623efcec712537a3d94c0bf5d7e352ec94ae5797fdb377dc1551150721adf15bd26a8efc2fcaad56881fa9e62462c28f30ae1ceaca93c345cf243b73f542e2074a705bd2643bb9f7cc79bb6e7091ea6e232df0f9ad0d6cf502327876d82207abf2115cdacf6d5a48f6c1879a65b115f0f8b3cb3c59d15dd8c769bc014795a1837f3901b5845eb491adfefe097b1fa30a12fc1f65ba22905031539971a10f2f36c321bb51331cdefb39e3964c7ef079994f5b69b2edd83a71ef549971ee93f44eac3938fcdd61d01fa71799da3a8091c4c48aa9ed263ff0749df95d44fef6a0bb578ec69456aa5408ae32c7af08ad7ba8921287e3bbee31b767be06a0e705c864a769137df28292283ea81a2480241b44d9921cdbec1bc28dc1fda114bd8e5217ac9d8ebafa720e9da4f9ace231cc949e5b96fe76ffc21063fddc83a6b8679c00d35e09576a875305bed5f36ed242c8900dd1fa965bc950dfce09b132263a1eef52dd6888c309f5a7d712826 +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffffff000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:bf53d2dade78e822a4d949a9bc6766b01b06a8ef70d26748c6a7fc36d80ae4c5520f7c4ab0ac8544424fa405162fef5a6b7f229498063618d39f0003cb5fb8d1c86b643497da1ff945c8d3bedeca4f479702a7a735f043ddb1d6aaade3c4a0ac7ca7f3fa5279bef56f82cd7a2f38672e824814e10700300a055e1630b8f1cb0e919f5e942010a416e2bf48cb46993d3cb6a51c19bacf864785a00bc2ecff15d350875b246ed53e68be6f55bd7e05cfc2b2ed6432198a6444b6d8c247fab941f569768b5c429366f1d3f00f0345b96123d56204c01c63b22ce78baf116e525ed90fdea39fa469494d3866c31e05f295ff21fea8d4e6e13d67e47ce722e9698a1c1048d68ebcde76b86fcf976eab8aa9790268b7068e017a8b9b749409514f1053027fd16c3786ea1bac5f15cb79711ee2abe82f5cf8b13ae73030ef5b9e4457e75d1304f988d62dd6fc4b94ed38ba831da4b7634971b6cd8ec325d9c61c00f1df73627ed3745a5e8489f3a95c69639c32cd6e1d537a85f75cc844726e8a72fc0077ad22000f1d5078f6b866318c668f1ad03d5a5fced5219f2eabbd0aa5c0f460d183f04404a0d6f469558e81fab24a167905ab4c7878502ad3e38fdbe62a41556cec37325759533ce8f25f367c87bb5578d667ae93f9e2fd99bcbc5f2fbba88cf6516139420fcff3b7361d86322c4bd84c82f335abb152c4a93411373aaa8220 +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffffffff0000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:64497e5a831e4a932c09be3e5393376daa599548b816031d224bbf50a818ed2350eae7e96087c8a0db51ad290bd00c1ac1620857635bf246c176ab463be30b808da548081ac847b158e1264be25bb0910bbc92647108089415d45fab1b3d2604e8a8eff1ae4020cfa39936b66827b23f371b92200be90251e6d73c5f86de5fd4a950781933d79a28272b782a2ec313efdfcc0628f43d744c2dc2ff3dcb66999b50c7ca895b0c64791eeaa5f29499fb1c026f84ce5b5c72ba1083cddb5ce45434631665c333b60b11593fb253c5179a2c8db813782a004856a1653011e93fb6d876c18366dd8683f53412c0c180f9c848592d593f8609ca736317d356e13e2bff3a9f59cd9aeb19cd482593d8c46128bb32423b37a9adfb482b99453fbe25a41bf6feb4aa0bef5ed24bf73c762978025482c13115e4015aac992e5613a3b5c2f685b84795cb6e9b2656d8c88157e52c42f978d8634c43d06fea928f2822e465aa6576e9bf419384506cc3ce3c54ac1a6f67dc66f3b30191e698380bc999b05abce19dc0c6dcc2dd001ec535ba18deb2df1a101023108318c75dc98611a09dc48a0acdec676fabdf222f07e026f059b672b56e5cbc8e1d21bbd867dd927212054681d70ea737134cdfce93b6f82ae22423274e58a0821cc5502e2d0ab4585e94de6975be5e0b4efce51cd3e70c25a1fbbbd609d273ad5b0d59631c531f6a0a57b9 + +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f10:6c1625db4671522d3d7599601de7ca09ed +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f1011:d069444b7a7e0cab09e24447d24deb1fedbf +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f101112:e5df1351c0544ba1350b3363cd8ef4beedbf9d +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f10111213:9d84c813f719aa2c7be3f66171c7c5c2edbf9dac +aes-128-xts:e0e1e2e3e4e5e6e7e8e9eaebecedeeefc0c1c2c3c4c5c6c7c8c9cacbcccdcecf:21436587a90000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:38b45812ef43a05bd957e545907e223b954ab4aaf088303ad910eadf14b42be68b2461149d8c8ba85f992be970bc621f1b06573f63e867bf5875acafa04e42ccbd7bd3c2a0fb1fff791ec5ec36c66ae4ac1e806d81fbf709dbe29e471fad38549c8e66f5345d7c1eb94f405d1ec785cc6f6a68f6254dd8339f9d84057e01a17741990482999516b5611a38f41bb6478e6f173f320805dd71b1932fc333cb9ee39936beea9ad96fa10fb4112b901734ddad40bc1878995f8e11aee7d141a2f5d48b7a4e1e7f0b2c04830e69a4fd1378411c2f287edf48c6c4e5c247a19680f7fe41cefbd49b582106e3616cbbe4dfb2344b2ae9519391f3e0fb4922254b1d6d2d19c6d4d537b3a26f3bcc51588b32f3eca0829b6a5ac72578fb814fb43cf80d64a233e3f997a3f02683342f2b33d25b492536b93becb2f5e1a8b82f5b883342729e8ae09d16938841a21a97fb543eea3bbff59f13c1a18449e398701c1ad51648346cbc04c27bb2da3b93a1372ccae548fb53bee476f9e9c91773b1bb19828394d55d3e1a20ed69113a860b6829ffa847224604435070221b257e8dff783615d2cae4803a93aa4334ab482a0afac9c0aeda70b45a481df5dec5df8cc0f423c77a5fd46cd312021d4b438862419a791be03bb4d97c0e59578542531ba466a83baf92cefc151b5cc1611a167893819b63fb8a6b18e86de60290fa72b797b0ce59f3 +# AES wrap tests from RFC3394 +id-aes128-wrap:000102030405060708090A0B0C0D0E0F::00112233445566778899AABBCCDDEEFF:1FA68B0A8112B447AEF34BD8FB5A7B829D3E862371D2CFE5 +id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF:96778B25AE6CA435F92B5B97C050AED2468AB8A17AD84E5D +id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF:64E8C3F9CE0F5BA263E9777905818A2A93C8191E7D6E8AE7 +id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF0001020304050607:031D33264E15D33268F24EC260743EDCE1C6C7DDEE725A936BA814915C6762D2 +id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF0001020304050607:A8F9BC1612C68B3FF6E6F4FBE30E71E4769C8B80A32CB8958CD5D17D6B254DA1 +id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF000102030405060708090A0B0C0D0E0F:28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD21 diff --git a/drivers/builtin_openssl2/crypto/evp/m_dss.c b/drivers/builtin_openssl2/crypto/evp/m_dss.c index f22ba522c2..147844862d 100644 --- a/drivers/builtin_openssl2/crypto/evp/m_dss.c +++ b/drivers/builtin_openssl2/crypto/evp/m_dss.c @@ -66,7 +66,6 @@ #endif #ifndef OPENSSL_NO_SHA -# ifndef OPENSSL_FIPS static int init(EVP_MD_CTX *ctx) { @@ -102,5 +101,4 @@ const EVP_MD *EVP_dss(void) { return (&dsa_md); } -# endif #endif diff --git a/drivers/builtin_openssl2/crypto/evp/m_dss1.c b/drivers/builtin_openssl2/crypto/evp/m_dss1.c index 976148e436..e36fabff70 100644 --- a/drivers/builtin_openssl2/crypto/evp/m_dss1.c +++ b/drivers/builtin_openssl2/crypto/evp/m_dss1.c @@ -68,8 +68,6 @@ # include # endif -# ifndef OPENSSL_FIPS - static int init(EVP_MD_CTX *ctx) { return SHA1_Init(ctx->md_data); @@ -104,5 +102,4 @@ const EVP_MD *EVP_dss1(void) { return (&dss1_md); } -# endif #endif diff --git a/drivers/builtin_openssl2/crypto/evp/m_ecdsa.c b/drivers/builtin_openssl2/crypto/evp/m_ecdsa.c index d11a13a6d7..803d314955 100644 --- a/drivers/builtin_openssl2/crypto/evp/m_ecdsa.c +++ b/drivers/builtin_openssl2/crypto/evp/m_ecdsa.c @@ -116,7 +116,6 @@ #include #ifndef OPENSSL_NO_SHA -# ifndef OPENSSL_FIPS static int init(EVP_MD_CTX *ctx) { @@ -152,5 +151,4 @@ const EVP_MD *EVP_ecdsa(void) { return (&ecdsa_md); } -# endif #endif diff --git a/drivers/builtin_openssl2/crypto/evp/m_sha1.c b/drivers/builtin_openssl2/crypto/evp/m_sha1.c index 0cc63555ad..a74e6b7794 100644 --- a/drivers/builtin_openssl2/crypto/evp/m_sha1.c +++ b/drivers/builtin_openssl2/crypto/evp/m_sha1.c @@ -59,16 +59,14 @@ #include #include "cryptlib.h" -#ifndef OPENSSL_FIPS +#ifndef OPENSSL_NO_SHA -# ifndef OPENSSL_NO_SHA - -# include -# include -# include -# ifndef OPENSSL_NO_RSA -# include -# endif +# include +# include +# include +# ifndef OPENSSL_NO_RSA +# include +# endif static int init(EVP_MD_CTX *ctx) { @@ -104,9 +102,9 @@ const EVP_MD *EVP_sha1(void) { return (&sha1_md); } -# endif +#endif -# ifndef OPENSSL_NO_SHA256 +#ifndef OPENSSL_NO_SHA256 static int init224(EVP_MD_CTX *ctx) { return SHA224_Init(ctx->md_data); @@ -171,9 +169,9 @@ const EVP_MD *EVP_sha256(void) { return (&sha256_md); } -# endif /* ifndef OPENSSL_NO_SHA256 */ +#endif /* ifndef OPENSSL_NO_SHA256 */ -# ifndef OPENSSL_NO_SHA512 +#ifndef OPENSSL_NO_SHA512 static int init384(EVP_MD_CTX *ctx) { return SHA384_Init(ctx->md_data); @@ -234,6 +232,4 @@ const EVP_MD *EVP_sha512(void) { return (&sha512_md); } -# endif /* ifndef OPENSSL_NO_SHA512 */ - -#endif +#endif /* ifndef OPENSSL_NO_SHA512 */ diff --git a/drivers/builtin_openssl2/crypto/evp/m_sigver.c b/drivers/builtin_openssl2/crypto/evp/m_sigver.c index e153a1833e..4492d207f2 100644 --- a/drivers/builtin_openssl2/crypto/evp/m_sigver.c +++ b/drivers/builtin_openssl2/crypto/evp/m_sigver.c @@ -73,15 +73,18 @@ static int do_sigver_init(EVP_MD_CTX *ctx, EVP_PKEY_CTX **pctx, if (ctx->pctx == NULL) return 0; - if (type == NULL) { - int def_nid; - if (EVP_PKEY_get_default_digest_nid(pkey, &def_nid) > 0) - type = EVP_get_digestbynid(def_nid); - } + if (!(ctx->pctx->pmeth->flags & EVP_PKEY_FLAG_SIGCTX_CUSTOM)) { - if (type == NULL) { - EVPerr(EVP_F_DO_SIGVER_INIT, EVP_R_NO_DEFAULT_DIGEST); - return 0; + if (type == NULL) { + int def_nid; + if (EVP_PKEY_get_default_digest_nid(pkey, &def_nid) > 0) + type = EVP_get_digestbynid(def_nid); + } + + if (type == NULL) { + EVPerr(EVP_F_DO_SIGVER_INIT, EVP_R_NO_DEFAULT_DIGEST); + return 0; + } } if (ver) { @@ -103,6 +106,8 @@ static int do_sigver_init(EVP_MD_CTX *ctx, EVP_PKEY_CTX **pctx, return 0; if (pctx) *pctx = ctx->pctx; + if (ctx->pctx->pmeth->flags & EVP_PKEY_FLAG_SIGCTX_CUSTOM) + return 1; if (!EVP_DigestInit_ex(ctx, type, e)) return 0; return 1; @@ -124,7 +129,19 @@ int EVP_DigestSignFinal(EVP_MD_CTX *ctx, unsigned char *sigret, size_t *siglen) { int sctx, r = 0; - if (ctx->pctx->pmeth->signctx) + EVP_PKEY_CTX *pctx = ctx->pctx; + if (pctx->pmeth->flags & EVP_PKEY_FLAG_SIGCTX_CUSTOM) { + EVP_PKEY_CTX *dctx; + if (!sigret) + return pctx->pmeth->signctx(pctx, sigret, siglen, ctx); + dctx = EVP_PKEY_CTX_dup(ctx->pctx); + if (!dctx) + return 0; + r = dctx->pmeth->signctx(dctx, sigret, siglen, ctx); + EVP_PKEY_CTX_free(dctx); + return r; + } + if (pctx->pmeth->signctx) sctx = 1; else sctx = 0; @@ -147,20 +164,19 @@ int EVP_DigestSignFinal(EVP_MD_CTX *ctx, unsigned char *sigret, return 0; } else { if (sctx) { - if (ctx->pctx->pmeth->signctx(ctx->pctx, sigret, siglen, ctx) <= - 0) + if (pctx->pmeth->signctx(pctx, sigret, siglen, ctx) <= 0) return 0; } else { int s = EVP_MD_size(ctx->digest); - if (s < 0 - || EVP_PKEY_sign(ctx->pctx, sigret, siglen, NULL, s) <= 0) + if (s < 0 || EVP_PKEY_sign(pctx, sigret, siglen, NULL, s) <= 0) return 0; } } return 1; } -int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx, unsigned char *sig, size_t siglen) +int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx, const unsigned char *sig, + size_t siglen) { EVP_MD_CTX tmp_ctx; unsigned char md[EVP_MAX_MD_SIZE]; diff --git a/drivers/builtin_openssl2/crypto/evp/p_lib.c b/drivers/builtin_openssl2/crypto/evp/p_lib.c index 375f561258..c0171244d5 100644 --- a/drivers/builtin_openssl2/crypto/evp/p_lib.c +++ b/drivers/builtin_openssl2/crypto/evp/p_lib.c @@ -337,7 +337,7 @@ int EVP_PKEY_set1_DH(EVP_PKEY *pkey, DH *key) DH *EVP_PKEY_get1_DH(EVP_PKEY *pkey) { - if (pkey->type != EVP_PKEY_DH) { + if (pkey->type != EVP_PKEY_DH && pkey->type != EVP_PKEY_DHX) { EVPerr(EVP_F_EVP_PKEY_GET1_DH, EVP_R_EXPECTING_A_DH_KEY); return NULL; } diff --git a/drivers/builtin_openssl2/crypto/evp/pmeth_lib.c b/drivers/builtin_openssl2/crypto/evp/pmeth_lib.c index ae8bccb0c6..9f81d10021 100644 --- a/drivers/builtin_openssl2/crypto/evp/pmeth_lib.c +++ b/drivers/builtin_openssl2/crypto/evp/pmeth_lib.c @@ -75,6 +75,7 @@ STACK_OF(EVP_PKEY_METHOD) *app_pkey_methods = NULL; extern const EVP_PKEY_METHOD rsa_pkey_meth, dh_pkey_meth, dsa_pkey_meth; extern const EVP_PKEY_METHOD ec_pkey_meth, hmac_pkey_meth, cmac_pkey_meth; +extern const EVP_PKEY_METHOD dhx_pkey_meth; static const EVP_PKEY_METHOD *standard_methods[] = { #ifndef OPENSSL_NO_RSA @@ -90,7 +91,10 @@ static const EVP_PKEY_METHOD *standard_methods[] = { &ec_pkey_meth, #endif &hmac_pkey_meth, - &cmac_pkey_meth + &cmac_pkey_meth, +#ifndef OPENSSL_NO_DH + &dhx_pkey_meth +#endif }; DECLARE_OBJ_BSEARCH_CMP_FN(const EVP_PKEY_METHOD *, const EVP_PKEY_METHOD *, diff --git a/drivers/builtin_openssl2/crypto/hmac/hm_ameth.c b/drivers/builtin_openssl2/crypto/hmac/hm_ameth.c index cf147437ea..944c6c857b 100644 --- a/drivers/builtin_openssl2/crypto/hmac/hm_ameth.c +++ b/drivers/builtin_openssl2/crypto/hmac/hm_ameth.c @@ -87,7 +87,7 @@ static int hmac_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) { switch (op) { case ASN1_PKEY_CTRL_DEFAULT_MD_NID: - *(int *)arg2 = NID_sha1; + *(int *)arg2 = NID_sha256; return 1; default: diff --git a/drivers/builtin_openssl2/crypto/hmac/hmac.c b/drivers/builtin_openssl2/crypto/hmac/hmac.c index 33d88be117..51a0a3efcd 100644 --- a/drivers/builtin_openssl2/crypto/hmac/hmac.c +++ b/drivers/builtin_openssl2/crypto/hmac/hmac.c @@ -72,6 +72,16 @@ int HMAC_Init_ex(HMAC_CTX *ctx, const void *key, int len, unsigned char pad[HMAC_MAX_MD_CBLOCK]; #ifdef OPENSSL_FIPS + /* If FIPS mode switch to approved implementation if possible */ + if (FIPS_mode()) { + const EVP_MD *fipsmd; + if (md) { + fipsmd = FIPS_get_digestbynid(EVP_MD_type(md)); + if (fipsmd) + md = fipsmd; + } + } + if (FIPS_mode()) { /* If we have an ENGINE need to allow non FIPS */ if ((impl || ctx->i_ctx.engine) diff --git a/drivers/builtin_openssl2/crypto/hmac/hmactest.c b/drivers/builtin_openssl2/crypto/hmac/hmactest.c deleted file mode 100644 index 271d0ebf26..0000000000 --- a/drivers/builtin_openssl2/crypto/hmac/hmactest.c +++ /dev/null @@ -1,332 +0,0 @@ -/* crypto/hmac/hmactest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -#include "../e_os.h" - -#ifdef OPENSSL_NO_HMAC -int main(int argc, char *argv[]) -{ - printf("No HMAC support\n"); - return (0); -} -#else -# include -# ifndef OPENSSL_NO_MD5 -# include -# endif - -# ifdef CHARSET_EBCDIC -# include -# endif - -# ifndef OPENSSL_NO_MD5 -static struct test_st { - unsigned char key[16]; - int key_len; - unsigned char data[64]; - int data_len; - unsigned char *digest; -} test[8] = { - { - "", 0, "More text test vectors to stuff up EBCDIC machines :-)", 54, - (unsigned char *)"e9139d1e6ee064ef8cf514fc7dc83e86", - }, - { - { - 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, - 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, - }, 16, "Hi There", 8, - (unsigned char *)"9294727a3638bb1c13f48ef8158bfc9d", - }, - { - "Jefe", 4, "what do ya want for nothing?", 28, - (unsigned char *)"750c783e6ab0b503eaa86e310a5db738", - }, - { - { - 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, - 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, - }, 16, { - 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, - 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, - 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, - 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, - 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd - }, 50, (unsigned char *)"56be34521d144c88dbb8c733f0e8b3f6", - }, - { - "", 0, "My test data", 12, - (unsigned char *)"61afdecb95429ef494d61fdee15990cabf0826fc" - }, - { - "", 0, "My test data", 12, - (unsigned char *)"2274b195d90ce8e03406f4b526a47e0787a88a65479938f1a5baa3ce0f079776" - }, - { - "123456", 6, "My test data", 12, - (unsigned char *)"bab53058ae861a7f191abe2d0145cbb123776a6369ee3f9d79ce455667e411dd" - }, - { - "12345", 5, "My test data again", 12, - (unsigned char *)"7dbe8c764c068e3bcd6e6b0fbcd5e6fc197b15bb" - } -}; -# endif - -static char *pt(unsigned char *md, unsigned int len); - -int main(int argc, char *argv[]) -{ -# ifndef OPENSSL_NO_MD5 - int i; - char *p; -# endif - int err = 0; - HMAC_CTX ctx, ctx2; - unsigned char buf[EVP_MAX_MD_SIZE]; - unsigned int len; - -# ifdef OPENSSL_NO_MD5 - printf("test skipped: MD5 disabled\n"); -# else - -# ifdef CHARSET_EBCDIC - ebcdic2ascii(test[0].data, test[0].data, test[0].data_len); - ebcdic2ascii(test[1].data, test[1].data, test[1].data_len); - ebcdic2ascii(test[2].key, test[2].key, test[2].key_len); - ebcdic2ascii(test[2].data, test[2].data, test[2].data_len); -# endif - - for (i = 0; i < 4; i++) { - p = pt(HMAC(EVP_md5(), - test[i].key, test[i].key_len, - test[i].data, test[i].data_len, NULL, NULL), - MD5_DIGEST_LENGTH); - - if (strcmp(p, (char *)test[i].digest) != 0) { - printf("Error calculating HMAC on %d entry'\n", i); - printf("got %s instead of %s\n", p, test[i].digest); - err++; - } else - printf("test %d ok\n", i); - } -# endif /* OPENSSL_NO_MD5 */ - -/* test4 */ - HMAC_CTX_init(&ctx); - if (HMAC_Init_ex(&ctx, NULL, 0, NULL, NULL)) { - printf("Should fail to initialise HMAC with empty MD and key (test 4)\n"); - err++; - goto test5; - } - if (HMAC_Update(&ctx, test[4].data, test[4].data_len)) { - printf("Should fail HMAC_Update with ctx not set up (test 4)\n"); - err++; - goto test5; - } - if (HMAC_Init_ex(&ctx, NULL, 0, EVP_sha1(), NULL)) { - printf("Should fail to initialise HMAC with empty key (test 4)\n"); - err++; - goto test5; - } - if (HMAC_Update(&ctx, test[4].data, test[4].data_len)) { - printf("Should fail HMAC_Update with ctx not set up (test 4)\n"); - err++; - goto test5; - } - printf("test 4 ok\n"); -test5: - HMAC_CTX_init(&ctx); - if (HMAC_Init_ex(&ctx, test[4].key, test[4].key_len, NULL, NULL)) { - printf("Should fail to initialise HMAC with empty MD (test 5)\n"); - err++; - goto test6; - } - if (HMAC_Update(&ctx, test[4].data, test[4].data_len)) { - printf("Should fail HMAC_Update with ctx not set up (test 5)\n"); - err++; - goto test6; - } - if (HMAC_Init_ex(&ctx, test[4].key, -1, EVP_sha1(), NULL)) { - printf("Should fail to initialise HMAC with invalid key len(test 5)\n"); - err++; - goto test6; - } - if (!HMAC_Init_ex(&ctx, test[4].key, test[4].key_len, EVP_sha1(), NULL)) { - printf("Failed to initialise HMAC (test 5)\n"); - err++; - goto test6; - } - if (!HMAC_Update(&ctx, test[4].data, test[4].data_len)) { - printf("Error updating HMAC with data (test 5)\n"); - err++; - goto test6; - } - if (!HMAC_Final(&ctx, buf, &len)) { - printf("Error finalising data (test 5)\n"); - err++; - goto test6; - } - p = pt(buf, len); - if (strcmp(p, (char *)test[4].digest) != 0) { - printf("Error calculating interim HMAC on test 5\n"); - printf("got %s instead of %s\n", p, test[4].digest); - err++; - goto test6; - } - if (HMAC_Init_ex(&ctx, NULL, 0, EVP_sha256(), NULL)) { - printf("Should disallow changing MD without a new key (test 5)\n"); - err++; - goto test6; - } - if (!HMAC_Init_ex(&ctx, test[4].key, test[4].key_len, EVP_sha256(), NULL)) { - printf("Failed to reinitialise HMAC (test 5)\n"); - err++; - goto test6; - } - if (!HMAC_Update(&ctx, test[5].data, test[5].data_len)) { - printf("Error updating HMAC with data (sha256) (test 5)\n"); - err++; - goto test6; - } - if (!HMAC_Final(&ctx, buf, &len)) { - printf("Error finalising data (sha256) (test 5)\n"); - err++; - goto test6; - } - p = pt(buf, len); - if (strcmp(p, (char *)test[5].digest) != 0) { - printf("Error calculating 2nd interim HMAC on test 5\n"); - printf("got %s instead of %s\n", p, test[5].digest); - err++; - goto test6; - } - if (!HMAC_Init_ex(&ctx, test[6].key, test[6].key_len, NULL, NULL)) { - printf("Failed to reinitialise HMAC with key (test 5)\n"); - err++; - goto test6; - } - if (!HMAC_Update(&ctx, test[6].data, test[6].data_len)) { - printf("Error updating HMAC with data (new key) (test 5)\n"); - err++; - goto test6; - } - if (!HMAC_Final(&ctx, buf, &len)) { - printf("Error finalising data (new key) (test 5)\n"); - err++; - goto test6; - } - p = pt(buf, len); - if (strcmp(p, (char *)test[6].digest) != 0) { - printf("error calculating HMAC on test 5\n"); - printf("got %s instead of %s\n", p, test[6].digest); - err++; - } else { - printf("test 5 ok\n"); - } -test6: - HMAC_CTX_init(&ctx); - if (!HMAC_Init_ex(&ctx, test[7].key, test[7].key_len, EVP_sha1(), NULL)) { - printf("Failed to initialise HMAC (test 6)\n"); - err++; - goto end; - } - if (!HMAC_Update(&ctx, test[7].data, test[7].data_len)) { - printf("Error updating HMAC with data (test 6)\n"); - err++; - goto end; - } - if (!HMAC_CTX_copy(&ctx2, &ctx)) { - printf("Failed to copy HMAC_CTX (test 6)\n"); - err++; - goto end; - } - if (!HMAC_Final(&ctx2, buf, &len)) { - printf("Error finalising data (test 6)\n"); - err++; - goto end; - } - p = pt(buf, len); - if (strcmp(p, (char *)test[7].digest) != 0) { - printf("Error calculating HMAC on test 6\n"); - printf("got %s instead of %s\n", p, test[7].digest); - err++; - } else { - printf("test 6 ok\n"); - } -end: - EXIT(err); - return (0); -} - -# ifndef OPENSSL_NO_MD5 -static char *pt(unsigned char *md, unsigned int len) -{ - unsigned int i; - static char buf[80]; - - for (i = 0; i < len; i++) - sprintf(&(buf[i * 2]), "%02x", md[i]); - return (buf); -} -# endif -#endif diff --git a/drivers/builtin_openssl2/crypto/ia64cpuid.S b/drivers/builtin_openssl2/crypto/ia64cpuid.S deleted file mode 100644 index 7832b9b640..0000000000 --- a/drivers/builtin_openssl2/crypto/ia64cpuid.S +++ /dev/null @@ -1,167 +0,0 @@ -// Works on all IA-64 platforms: Linux, HP-UX, Win64i... -// On Win64i compile with ias.exe. -.text - -.global OPENSSL_cpuid_setup# -.proc OPENSSL_cpuid_setup# -OPENSSL_cpuid_setup: -{ .mib; br.ret.sptk.many b0 };; -.endp OPENSSL_cpuid_setup# - -.global OPENSSL_rdtsc# -.proc OPENSSL_rdtsc# -OPENSSL_rdtsc: -{ .mib; mov r8=ar.itc - br.ret.sptk.many b0 };; -.endp OPENSSL_rdtsc# - -.global OPENSSL_atomic_add# -.proc OPENSSL_atomic_add# -.align 32 -OPENSSL_atomic_add: -{ .mii; ld4 r2=[r32] - nop.i 0 - nop.i 0 };; -.Lspin: -{ .mii; mov ar.ccv=r2 - add r8=r2,r33 - mov r3=r2 };; -{ .mmi; mf;; - cmpxchg4.acq r2=[r32],r8,ar.ccv - nop.i 0 };; -{ .mib; cmp.ne p6,p0=r2,r3 - nop.i 0 -(p6) br.dpnt .Lspin };; -{ .mib; nop.m 0 - sxt4 r8=r8 - br.ret.sptk.many b0 };; -.endp OPENSSL_atomic_add# - -// Returns a structure comprising pointer to the top of stack of -// the caller and pointer beyond backing storage for the current -// register frame. The latter is required, because it might be -// insufficient to wipe backing storage for the current frame -// (as this procedure does), one might have to go further, toward -// higher addresses to reach for whole "retroactively" saved -// context... -.global OPENSSL_wipe_cpu# -.proc OPENSSL_wipe_cpu# -.align 32 -OPENSSL_wipe_cpu: - .prologue - .fframe 0 - .save ar.pfs,r2 - .save ar.lc,r3 -{ .mib; alloc r2=ar.pfs,0,96,0,96 - mov r3=ar.lc - brp.loop.imp .L_wipe_top,.L_wipe_end-16 - };; -{ .mii; mov r9=ar.bsp - mov r8=pr - mov ar.lc=96 };; - .body -{ .mii; add r9=96*8-8,r9 - mov ar.ec=1 };; - -// One can sweep double as fast, but then we can't quarantee -// that backing storage is wiped... -.L_wipe_top: -{ .mfi; st8 [r9]=r0,-8 - mov f127=f0 - mov r127=r0 } -{ .mfb; nop.m 0 - nop.f 0 - br.ctop.sptk .L_wipe_top };; -.L_wipe_end: - -{ .mfi; mov r11=r0 - mov f6=f0 - mov r14=r0 } -{ .mfi; mov r15=r0 - mov f7=f0 - mov r16=r0 } -{ .mfi; mov r17=r0 - mov f8=f0 - mov r18=r0 } -{ .mfi; mov r19=r0 - mov f9=f0 - mov r20=r0 } -{ .mfi; mov r21=r0 - mov f10=f0 - mov r22=r0 } -{ .mfi; mov r23=r0 - mov f11=f0 - mov r24=r0 } -{ .mfi; mov r25=r0 - mov f12=f0 - mov r26=r0 } -{ .mfi; mov r27=r0 - mov f13=f0 - mov r28=r0 } -{ .mfi; mov r29=r0 - mov f14=f0 - mov r30=r0 } -{ .mfi; mov r31=r0 - mov f15=f0 - nop.i 0 } -{ .mfi; mov f16=f0 } -{ .mfi; mov f17=f0 } -{ .mfi; mov f18=f0 } -{ .mfi; mov f19=f0 } -{ .mfi; mov f20=f0 } -{ .mfi; mov f21=f0 } -{ .mfi; mov f22=f0 } -{ .mfi; mov f23=f0 } -{ .mfi; mov f24=f0 } -{ .mfi; mov f25=f0 } -{ .mfi; mov f26=f0 } -{ .mfi; mov f27=f0 } -{ .mfi; mov f28=f0 } -{ .mfi; mov f29=f0 } -{ .mfi; mov f30=f0 } -{ .mfi; add r9=96*8+8,r9 - mov f31=f0 - mov pr=r8,0x1ffff } -{ .mib; mov r8=sp - mov ar.lc=r3 - br.ret.sptk b0 };; -.endp OPENSSL_wipe_cpu# - -.global OPENSSL_cleanse# -.proc OPENSSL_cleanse# -OPENSSL_cleanse: -{ .mib; cmp.eq p6,p0=0,r33 // len==0 -#if defined(_HPUX_SOURCE) && !defined(_LP64) - addp4 r32=0,r32 -#endif -(p6) br.ret.spnt b0 };; -{ .mib; and r2=7,r32 - cmp.leu p6,p0=15,r33 // len>=15 -(p6) br.cond.dptk .Lot };; - -.Little: -{ .mib; st1 [r32]=r0,1 - cmp.ltu p6,p7=1,r33 } // len>1 -{ .mbb; add r33=-1,r33 // len-- -(p6) br.cond.dptk .Little -(p7) br.ret.sptk.many b0 };; - -.Lot: -{ .mib; cmp.eq p6,p0=0,r2 -(p6) br.cond.dptk .Laligned };; -{ .mmi; st1 [r32]=r0,1;; - and r2=7,r32 } -{ .mib; add r33=-1,r33 - br .Lot };; - -.Laligned: -{ .mmi; st8 [r32]=r0,8 - and r2=-8,r33 // len&~7 - add r33=-8,r33 };; // len-=8 -{ .mib; cmp.ltu p6,p0=8,r2 // ((len+8)&~7)>8 -(p6) br.cond.dptk .Laligned };; - -{ .mbb; cmp.eq p6,p7=r0,r33 -(p7) br.cond.dpnt .Little -(p6) br.ret.sptk.many b0 };; -.endp OPENSSL_cleanse# diff --git a/drivers/builtin_openssl2/crypto/idea/ideatest.c b/drivers/builtin_openssl2/crypto/idea/ideatest.c deleted file mode 100644 index a967dd58a7..0000000000 --- a/drivers/builtin_openssl2/crypto/idea/ideatest.c +++ /dev/null @@ -1,232 +0,0 @@ -/* crypto/idea/ideatest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -#include "../e_os.h" - -#ifdef OPENSSL_NO_IDEA -int main(int argc, char *argv[]) -{ - printf("No IDEA support\n"); - return (0); -} -#else -# include - -unsigned char k[16] = { - 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, - 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 -}; - -unsigned char in[8] = { 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03 }; -unsigned char c[8] = { 0x11, 0xFB, 0xED, 0x2B, 0x01, 0x98, 0x6D, 0xE5 }; - -unsigned char out[80]; - -char *text = "Hello to all people out there"; - -static unsigned char cfb_key[16] = { - 0xe1, 0xf0, 0xc3, 0xd2, 0xa5, 0xb4, 0x87, 0x96, - 0x69, 0x78, 0x4b, 0x5a, 0x2d, 0x3c, 0x0f, 0x1e, -}; -static unsigned char cfb_iv[80] = - { 0x34, 0x12, 0x78, 0x56, 0xab, 0x90, 0xef, 0xcd }; -static unsigned char cfb_buf1[40], cfb_buf2[40], cfb_tmp[8]; -# define CFB_TEST_SIZE 24 -static unsigned char plain[CFB_TEST_SIZE] = { - 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, - 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, - 0x69, 0x6d, 0x65, 0x20, 0x66, 0x6f, - 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20 -}; - -static unsigned char cfb_cipher64[CFB_TEST_SIZE] = { - 0x59, 0xD8, 0xE2, 0x65, 0x00, 0x58, 0x6C, 0x3F, - 0x2C, 0x17, 0x25, 0xD0, 0x1A, 0x38, 0xB7, 0x2A, - 0x39, 0x61, 0x37, 0xDC, 0x79, 0xFB, 0x9F, 0x45 -/*- 0xF9,0x78,0x32,0xB5,0x42,0x1A,0x6B,0x38, - 0x9A,0x44,0xD6,0x04,0x19,0x43,0xC4,0xD9, - 0x3D,0x1E,0xAE,0x47,0xFC,0xCF,0x29,0x0B,*/ -}; - -static int cfb64_test(unsigned char *cfb_cipher); -static char *pt(unsigned char *p); -int main(int argc, char *argv[]) -{ - int i, err = 0; - IDEA_KEY_SCHEDULE key, dkey; - unsigned char iv[8]; - - idea_set_encrypt_key(k, &key); - idea_ecb_encrypt(in, out, &key); - if (memcmp(out, c, 8) != 0) { - printf("ecb idea error encrypting\n"); - printf("got :"); - for (i = 0; i < 8; i++) - printf("%02X ", out[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 8; i++) - printf("%02X ", c[i]); - err = 20; - printf("\n"); - } - - idea_set_decrypt_key(&key, &dkey); - idea_ecb_encrypt(c, out, &dkey); - if (memcmp(out, in, 8) != 0) { - printf("ecb idea error decrypting\n"); - printf("got :"); - for (i = 0; i < 8; i++) - printf("%02X ", out[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 8; i++) - printf("%02X ", in[i]); - printf("\n"); - err = 3; - } - - if (err == 0) - printf("ecb idea ok\n"); - - memcpy(iv, k, 8); - idea_cbc_encrypt((unsigned char *)text, out, strlen(text) + 1, &key, iv, - 1); - memcpy(iv, k, 8); - idea_cbc_encrypt(out, out, 8, &dkey, iv, 0); - idea_cbc_encrypt(&(out[8]), &(out[8]), strlen(text) + 1 - 8, &dkey, iv, - 0); - if (memcmp(text, out, strlen(text) + 1) != 0) { - printf("cbc idea bad\n"); - err = 4; - } else - printf("cbc idea ok\n"); - - printf("cfb64 idea "); - if (cfb64_test(cfb_cipher64)) { - printf("bad\n"); - err = 5; - } else - printf("ok\n"); - -# ifdef OPENSSL_SYS_NETWARE - if (err) - printf("ERROR: %d\n", err); -# endif - EXIT(err); - return (err); -} - -static int cfb64_test(unsigned char *cfb_cipher) -{ - IDEA_KEY_SCHEDULE eks, dks; - int err = 0, i, n; - - idea_set_encrypt_key(cfb_key, &eks); - idea_set_decrypt_key(&eks, &dks); - memcpy(cfb_tmp, cfb_iv, 8); - n = 0; - idea_cfb64_encrypt(plain, cfb_buf1, (long)12, &eks, - cfb_tmp, &n, IDEA_ENCRYPT); - idea_cfb64_encrypt(&(plain[12]), &(cfb_buf1[12]), - (long)CFB_TEST_SIZE - 12, &eks, - cfb_tmp, &n, IDEA_ENCRYPT); - if (memcmp(cfb_cipher, cfb_buf1, CFB_TEST_SIZE) != 0) { - err = 1; - printf("idea_cfb64_encrypt encrypt error\n"); - for (i = 0; i < CFB_TEST_SIZE; i += 8) - printf("%s\n", pt(&(cfb_buf1[i]))); - } - memcpy(cfb_tmp, cfb_iv, 8); - n = 0; - idea_cfb64_encrypt(cfb_buf1, cfb_buf2, (long)13, &eks, - cfb_tmp, &n, IDEA_DECRYPT); - idea_cfb64_encrypt(&(cfb_buf1[13]), &(cfb_buf2[13]), - (long)CFB_TEST_SIZE - 13, &eks, - cfb_tmp, &n, IDEA_DECRYPT); - if (memcmp(plain, cfb_buf2, CFB_TEST_SIZE) != 0) { - err = 1; - printf("idea_cfb_encrypt decrypt error\n"); - for (i = 0; i < 24; i += 8) - printf("%s\n", pt(&(cfb_buf2[i]))); - } - return (err); -} - -static char *pt(unsigned char *p) -{ - static char bufs[10][20]; - static int bnum = 0; - char *ret; - int i; - static char *f = "0123456789ABCDEF"; - - ret = &(bufs[bnum++][0]); - bnum %= 10; - for (i = 0; i < 8; i++) { - ret[i * 2] = f[(p[i] >> 4) & 0xf]; - ret[i * 2 + 1] = f[p[i] & 0xf]; - } - ret[16] = '\0'; - return (ret); -} -#endif diff --git a/drivers/builtin_openssl2/crypto/install-crypto.com b/drivers/builtin_openssl2/crypto/install-crypto.com index d19081d4df..af1d75b526 100755 --- a/drivers/builtin_openssl2/crypto/install-crypto.com +++ b/drivers/builtin_openssl2/crypto/install-crypto.com @@ -81,7 +81,7 @@ $ sdirs := , - buffer, bio, stack, lhash, rand, err, - evp, asn1, pem, x509, x509v3, conf, txt_db, pkcs7, pkcs12, comp, ocsp, - ui, krb5, - - cms, pqueue, ts, jpake, srp, store, cmac + store, cms, pqueue, ts, jpake $! $ exheader_ := crypto.h, opensslv.h, ebcdic.h, symhacks.h, ossl_typ.h $ exheader_'archd' := opensslconf.h @@ -139,9 +139,6 @@ $ exheader_cms := cms.h $ exheader_pqueue := pqueue.h $ exheader_ts := ts.h $ exheader_jpake := jpake.h -$ exheader_srp := srp.h -$ exheader_store := store.h -$ exheader_cmac := cmac.h $ libs := ssl_libcrypto $! $ exe_dir := [-.'archd'.exe.crypto] diff --git a/drivers/builtin_openssl2/crypto/jpake/jpake.c b/drivers/builtin_openssl2/crypto/jpake/jpake.c index ac853d4914..ebc0975575 100644 --- a/drivers/builtin_openssl2/crypto/jpake/jpake.c +++ b/drivers/builtin_openssl2/crypto/jpake/jpake.c @@ -4,6 +4,7 @@ #include #include #include +#include /* * In the definition, (xa, xb, xc, xd) are Alice's (x1, x2, x3, x4) or diff --git a/drivers/builtin_openssl2/crypto/jpake/jpaketest.c b/drivers/builtin_openssl2/crypto/jpake/jpaketest.c deleted file mode 100644 index ef9e54bdb3..0000000000 --- a/drivers/builtin_openssl2/crypto/jpake/jpaketest.c +++ /dev/null @@ -1,185 +0,0 @@ -#include - -#ifdef OPENSSL_NO_JPAKE - -# include - -int main(int argc, char *argv[]) -{ - printf("No J-PAKE support\n"); - return (0); -} - -#else - -# include -# include - -static void showbn(const char *name, const BIGNUM *bn) -{ - fputs(name, stdout); - fputs(" = ", stdout); - BN_print_fp(stdout, bn); - putc('\n', stdout); -} - -static int run_jpake(JPAKE_CTX *alice, JPAKE_CTX *bob) -{ - JPAKE_STEP1 alice_s1; - JPAKE_STEP1 bob_s1; - JPAKE_STEP2 alice_s2; - JPAKE_STEP2 bob_s2; - JPAKE_STEP3A alice_s3a; - JPAKE_STEP3B bob_s3b; - - /* Alice -> Bob: step 1 */ - puts("A->B s1"); - JPAKE_STEP1_init(&alice_s1); - JPAKE_STEP1_generate(&alice_s1, alice); - if (!JPAKE_STEP1_process(bob, &alice_s1)) { - printf("Bob fails to process Alice's step 1\n"); - ERR_print_errors_fp(stdout); - return 1; - } - JPAKE_STEP1_release(&alice_s1); - - /* Bob -> Alice: step 1 */ - puts("B->A s1"); - JPAKE_STEP1_init(&bob_s1); - JPAKE_STEP1_generate(&bob_s1, bob); - if (!JPAKE_STEP1_process(alice, &bob_s1)) { - printf("Alice fails to process Bob's step 1\n"); - ERR_print_errors_fp(stdout); - return 2; - } - JPAKE_STEP1_release(&bob_s1); - - /* Alice -> Bob: step 2 */ - puts("A->B s2"); - JPAKE_STEP2_init(&alice_s2); - JPAKE_STEP2_generate(&alice_s2, alice); - if (!JPAKE_STEP2_process(bob, &alice_s2)) { - printf("Bob fails to process Alice's step 2\n"); - ERR_print_errors_fp(stdout); - return 3; - } - JPAKE_STEP2_release(&alice_s2); - - /* Bob -> Alice: step 2 */ - puts("B->A s2"); - JPAKE_STEP2_init(&bob_s2); - JPAKE_STEP2_generate(&bob_s2, bob); - if (!JPAKE_STEP2_process(alice, &bob_s2)) { - printf("Alice fails to process Bob's step 2\n"); - ERR_print_errors_fp(stdout); - return 4; - } - JPAKE_STEP2_release(&bob_s2); - - showbn("Alice's key", JPAKE_get_shared_key(alice)); - showbn("Bob's key ", JPAKE_get_shared_key(bob)); - - /* Alice -> Bob: step 3a */ - puts("A->B s3a"); - JPAKE_STEP3A_init(&alice_s3a); - JPAKE_STEP3A_generate(&alice_s3a, alice); - if (!JPAKE_STEP3A_process(bob, &alice_s3a)) { - printf("Bob fails to process Alice's step 3a\n"); - ERR_print_errors_fp(stdout); - return 5; - } - JPAKE_STEP3A_release(&alice_s3a); - - /* Bob -> Alice: step 3b */ - puts("B->A s3b"); - JPAKE_STEP3B_init(&bob_s3b); - JPAKE_STEP3B_generate(&bob_s3b, bob); - if (!JPAKE_STEP3B_process(alice, &bob_s3b)) { - printf("Alice fails to process Bob's step 3b\n"); - ERR_print_errors_fp(stdout); - return 6; - } - JPAKE_STEP3B_release(&bob_s3b); - - return 0; -} - -int main(int argc, char **argv) -{ - JPAKE_CTX *alice; - JPAKE_CTX *bob; - BIGNUM *p = NULL; - BIGNUM *g = NULL; - BIGNUM *q = NULL; - BIGNUM *secret = BN_new(); - BIO *bio_err; - - bio_err = BIO_new_fp(stderr, BIO_NOCLOSE); - - CRYPTO_malloc_debug_init(); - CRYPTO_dbg_set_options(V_CRYPTO_MDEBUG_ALL); - CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON); - - ERR_load_crypto_strings(); - - /*- - BN_hex2bn(&p, "fd7f53811d75122952df4a9c2eece4e7f611b7523cef4400c31e3f80b6512669455d402251fb593d8d58fabfc5f5ba30f6cb9b556cd7813b801d346ff26660b76b9950a5a49f9fe8047b1022c24fbba9d7feb7c61bf83b57e7c6a8a6150f04fb83f6d3c51ec3023554135a169132f675f3ae2b61d72aeff22203199dd14801c7"); - BN_hex2bn(&g, "f7e1a085d69b3ddecbbcab5c36b857b97994afbbfa3aea82f9574c0b3d0782675159578ebad4594fe67107108180b449167123e84c281613b7cf09328cc8a6e13c167a8b547c8d28e0a3ae1e2bb3a675916ea37f0bfa213562f1fb627a01243bcca4f1bea8519089a883dfe15ae59f06928b665e807b552564014c3bfecf492a"); - BN_hex2bn(&q, "9760508f15230bccb292b982a2eb840bf0581cf5"); - */ - /*- - p = BN_new(); - BN_generate_prime(p, 1024, 1, NULL, NULL, NULL, NULL); - */ - /* Use a safe prime for p (that we found earlier) */ - BN_hex2bn(&p, - "F9E5B365665EA7A05A9C534502780FEE6F1AB5BD4F49947FD036DBD7E905269AF46EF28B0FC07487EE4F5D20FB3C0AF8E700F3A2FA3414970CBED44FEDFF80CE78D800F184BB82435D137AADA2C6C16523247930A63B85661D1FC817A51ACD96168E95898A1F83A79FFB529368AA7833ABD1B0C3AEDDB14D2E1A2F71D99F763F"); - showbn("p", p); - g = BN_new(); - BN_set_word(g, 2); - showbn("g", g); - q = BN_new(); - BN_rshift1(q, p); - showbn("q", q); - - BN_rand(secret, 32, -1, 0); - - /* A normal run, expect this to work... */ - alice = JPAKE_CTX_new("Alice", "Bob", p, g, q, secret); - bob = JPAKE_CTX_new("Bob", "Alice", p, g, q, secret); - - if (run_jpake(alice, bob) != 0) { - fprintf(stderr, "Plain JPAKE run failed\n"); - return 1; - } - - JPAKE_CTX_free(bob); - JPAKE_CTX_free(alice); - - /* Now give Alice and Bob different secrets */ - alice = JPAKE_CTX_new("Alice", "Bob", p, g, q, secret); - BN_add_word(secret, 1); - bob = JPAKE_CTX_new("Bob", "Alice", p, g, q, secret); - - if (run_jpake(alice, bob) != 5) { - fprintf(stderr, "Mismatched secret JPAKE run failed\n"); - return 1; - } - - JPAKE_CTX_free(bob); - JPAKE_CTX_free(alice); - - BN_free(secret); - BN_free(q); - BN_free(g); - BN_free(p); - - CRYPTO_cleanup_all_ex_data(); - ERR_remove_thread_state(NULL); - ERR_free_strings(); - CRYPTO_mem_leaks(bio_err); - - return 0; -} - -#endif diff --git a/drivers/builtin_openssl2/crypto/lhash/lh_test.c b/drivers/builtin_openssl2/crypto/lhash/lh_test.c deleted file mode 100644 index d9db83f7ae..0000000000 --- a/drivers/builtin_openssl2/crypto/lhash/lh_test.c +++ /dev/null @@ -1,88 +0,0 @@ -/* crypto/lhash/lh_test.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include -#include - -main() -{ - LHASH *conf; - char buf[256]; - int i; - - conf = lh_new(lh_strhash, strcmp); - for (;;) { - char *p; - - buf[0] = '\0'; - fgets(buf, 256, stdin); - if (buf[0] == '\0') - break; - i = strlen(buf); - p = OPENSSL_malloc(i + 1); - memcpy(p, buf, i + 1); - lh_insert(conf, p); - } - - lh_node_stats(conf, stdout); - lh_stats(conf, stdout); - lh_node_usage_stats(conf, stdout); - exit(0); -} diff --git a/drivers/builtin_openssl2/crypto/lhash/num.pl b/drivers/builtin_openssl2/crypto/lhash/num.pl deleted file mode 100644 index 30fedf9cd5..0000000000 --- a/drivers/builtin_openssl2/crypto/lhash/num.pl +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/local/bin/perl - -#node 10 -> 4 - -while (<>) - { - next unless /^node/; - chop; - @a=split; - $num{$a[3]}++; - } - -@a=sort {$a <=> $b } keys %num; -foreach (0 .. $a[$#a]) - { - printf "%4d:%4d\n",$_,$num{$_}; - } diff --git a/drivers/builtin_openssl2/crypto/md2/md2test.c b/drivers/builtin_openssl2/crypto/md2/md2test.c deleted file mode 100644 index 49a8a9bc78..0000000000 --- a/drivers/builtin_openssl2/crypto/md2/md2test.c +++ /dev/null @@ -1,142 +0,0 @@ -/* crypto/md2/md2test.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -#include "../e_os.h" - -#ifdef OPENSSL_NO_MD2 -int main(int argc, char *argv[]) -{ - printf("No MD2 support\n"); - return (0); -} -#else -# include -# include - -# ifdef CHARSET_EBCDIC -# include -# endif - -static char *test[] = { - "", - "a", - "abc", - "message digest", - "abcdefghijklmnopqrstuvwxyz", - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", - "12345678901234567890123456789012345678901234567890123456789012345678901234567890", - NULL, -}; - -static char *ret[] = { - "8350e5a3e24c153df2275c9f80692773", - "32ec01ec4a6dac72c0ab96fb34c0b5d1", - "da853b0d3f88d99b30283a69e6ded6bb", - "ab4f496bfb2a530b219ff33031fe06b0", - "4e8ddff3650292ab5a4108c3aa47940b", - "da33def2a42df13975352846c30338cd", - "d5976f79d83d3a0dc9806c3c66f3efd8", -}; - -static char *pt(unsigned char *md); -int main(int argc, char *argv[]) -{ - int i, err = 0; - char **P, **R; - char *p; - unsigned char md[MD2_DIGEST_LENGTH]; - - P = test; - R = ret; - i = 1; - while (*P != NULL) { - EVP_Digest((unsigned char *)*P, strlen(*P), md, NULL, EVP_md2(), - NULL); - p = pt(md); - if (strcmp(p, *R) != 0) { - printf("error calculating MD2 on '%s'\n", *P); - printf("got %s instead of %s\n", p, *R); - err++; - } else - printf("test %d ok\n", i); - i++; - R++; - P++; - } -# ifdef OPENSSL_SYS_NETWARE - if (err) - printf("ERROR: %d\n", err); -# endif - EXIT(err); - return err; -} - -static char *pt(unsigned char *md) -{ - int i; - static char buf[80]; - - for (i = 0; i < MD2_DIGEST_LENGTH; i++) - sprintf(&(buf[i * 2]), "%02x", md[i]); - return (buf); -} -#endif diff --git a/drivers/builtin_openssl2/crypto/md32_common.h b/drivers/builtin_openssl2/crypto/md32_common.h index 1823833419..96828d2693 100644 --- a/drivers/builtin_openssl2/crypto/md32_common.h +++ b/drivers/builtin_openssl2/crypto/md32_common.h @@ -215,12 +215,30 @@ asm ("bswapl %0":"=r"(r):"0"(r)); \ *((unsigned int *)(c))=r; (c)+=4; r; }) # endif +# elif defined(__aarch64__) +# if defined(__BYTE_ORDER__) +# if defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__ +# define HOST_c2l(c,l) ({ unsigned int r; \ + asm ("rev %w0,%w1" \ + :"=r"(r) \ + :"r"(*((const unsigned int *)(c))));\ + (c)+=4; (l)=r; }) +# define HOST_l2c(l,c) ({ unsigned int r; \ + asm ("rev %w0,%w1" \ + :"=r"(r) \ + :"r"((unsigned int)(l)));\ + *((unsigned int *)(c))=r; (c)+=4; r; }) +# elif defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ +# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l)) +# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l)) +# endif +# endif # endif # endif -# endif -# if defined(__s390__) || defined(__s390x__) -# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l)) -# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l)) +# if defined(__s390__) || defined(__s390x__) +# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l)) +# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l)) +# endif # endif # ifndef HOST_c2l @@ -250,12 +268,12 @@ (c)+=4; (l); }) # endif # endif -# endif -# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) -# ifndef B_ENDIAN - /* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */ -# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, l) -# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, l) +# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) +# ifndef B_ENDIAN + /* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */ +# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, l) +# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, l) +# endif # endif # endif diff --git a/drivers/builtin_openssl2/crypto/md4/md4test.c b/drivers/builtin_openssl2/crypto/md4/md4test.c deleted file mode 100644 index 59f23bb5d6..0000000000 --- a/drivers/builtin_openssl2/crypto/md4/md4test.c +++ /dev/null @@ -1,133 +0,0 @@ -/* crypto/md4/md4test.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -#include "../e_os.h" - -#ifdef OPENSSL_NO_MD4 -int main(int argc, char *argv[]) -{ - printf("No MD4 support\n"); - return (0); -} -#else -# include -# include - -static char *test[] = { - "", - "a", - "abc", - "message digest", - "abcdefghijklmnopqrstuvwxyz", - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", - "12345678901234567890123456789012345678901234567890123456789012345678901234567890", - NULL, -}; - -static char *ret[] = { - "31d6cfe0d16ae931b73c59d7e0c089c0", - "bde52cb31de33e46245e05fbdbd6fb24", - "a448017aaf21d8525fc10ae87aa6729d", - "d9130a8164549fe818874806e1c7014b", - "d79e1c308aa5bbcdeea8ed63df412da9", - "043f8582f241db351ce627e153e7f0e4", - "e33b4ddc9c38f2199c3e7b164fcc0536", -}; - -static char *pt(unsigned char *md); -int main(int argc, char *argv[]) -{ - int i, err = 0; - char **P, **R; - char *p; - unsigned char md[MD4_DIGEST_LENGTH]; - - P = test; - R = ret; - i = 1; - while (*P != NULL) { - EVP_Digest(&(P[0][0]), strlen((char *)*P), md, NULL, EVP_md4(), NULL); - p = pt(md); - if (strcmp(p, (char *)*R) != 0) { - printf("error calculating MD4 on '%s'\n", *P); - printf("got %s instead of %s\n", p, *R); - err++; - } else - printf("test %d ok\n", i); - i++; - R++; - P++; - } - EXIT(err); - return (0); -} - -static char *pt(unsigned char *md) -{ - int i; - static char buf[80]; - - for (i = 0; i < MD4_DIGEST_LENGTH; i++) - sprintf(&(buf[i * 2]), "%02x", md[i]); - return (buf); -} -#endif diff --git a/drivers/builtin_openssl2/crypto/md5/asm/md5-586.pl b/drivers/builtin_openssl2/crypto/md5/asm/md5-586.pl deleted file mode 100644 index 6cb66bb499..0000000000 --- a/drivers/builtin_openssl2/crypto/md5/asm/md5-586.pl +++ /dev/null @@ -1,307 +0,0 @@ -#!/usr/local/bin/perl - -# Normal is the -# md5_block_x86(MD5_CTX *c, ULONG *X); -# version, non-normal is the -# md5_block_x86(MD5_CTX *c, ULONG *X,int blocks); - -$normal=0; - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],$0); - -$A="eax"; -$B="ebx"; -$C="ecx"; -$D="edx"; -$tmp1="edi"; -$tmp2="ebp"; -$X="esi"; - -# What we need to load into $tmp for the next round -%Ltmp1=("R0",&Np($C), "R1",&Np($C), "R2",&Np($C), "R3",&Np($D)); -@xo=( - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, # R0 - 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, # R1 - 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, # R2 - 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3 - ); - -&md5_block("md5_block_asm_data_order"); -&asm_finish(); - -sub Np - { - local($p)=@_; - local(%n)=($A,$D,$B,$A,$C,$B,$D,$C); - return($n{$p}); - } - -sub R0 - { - local($pos,$a,$b,$c,$d,$K,$ki,$s,$t)=@_; - - &mov($tmp1,$C) if $pos < 0; - &mov($tmp2,&DWP($xo[$ki]*4,$K,"",0)) if $pos < 0; # very first one - - # body proper - - &comment("R0 $ki"); - &xor($tmp1,$d); # F function - part 2 - - &and($tmp1,$b); # F function - part 3 - &lea($a,&DWP($t,$a,$tmp2,1)); - - &xor($tmp1,$d); # F function - part 4 - - &add($a,$tmp1); - &mov($tmp1,&Np($c)) if $pos < 1; # next tmp1 for R0 - &mov($tmp1,&Np($c)) if $pos == 1; # next tmp1 for R1 - - &rotl($a,$s); - - &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); - - &add($a,$b); - } - -sub R1 - { - local($pos,$a,$b,$c,$d,$K,$ki,$s,$t)=@_; - - &comment("R1 $ki"); - - &lea($a,&DWP($t,$a,$tmp2,1)); - - &xor($tmp1,$b); # G function - part 2 - &and($tmp1,$d); # G function - part 3 - - &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); - &xor($tmp1,$c); # G function - part 4 - - &add($a,$tmp1); - &mov($tmp1,&Np($c)) if $pos < 1; # G function - part 1 - &mov($tmp1,&Np($c)) if $pos == 1; # G function - part 1 - - &rotl($a,$s); - - &add($a,$b); - } - -sub R2 - { - local($n,$pos,$a,$b,$c,$d,$K,$ki,$s,$t)=@_; - # This one is different, only 3 logical operations - -if (($n & 1) == 0) - { - &comment("R2 $ki"); - # make sure to do 'D' first, not 'B', else we clash with - # the last add from the previous round. - - &xor($tmp1,$d); # H function - part 2 - - &xor($tmp1,$b); # H function - part 3 - &lea($a,&DWP($t,$a,$tmp2,1)); - - &add($a,$tmp1); - - &rotl($a,$s); - - &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)); - &mov($tmp1,&Np($c)); - } -else - { - &comment("R2 $ki"); - # make sure to do 'D' first, not 'B', else we clash with - # the last add from the previous round. - - &lea($a,&DWP($t,$a,$tmp2,1)); - - &add($b,$c); # MOVED FORWARD - &xor($tmp1,$d); # H function - part 2 - - &xor($tmp1,$b); # H function - part 3 - &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if ($pos != 2); - - &add($a,$tmp1); - &mov($tmp1,&Np($c)) if $pos < 1; # H function - part 1 - &mov($tmp1,-1) if $pos == 1; # I function - part 1 - - &rotl($a,$s); - - &add($a,$b); - } - } - -sub R3 - { - local($pos,$a,$b,$c,$d,$K,$ki,$s,$t)=@_; - - &comment("R3 $ki"); - - # ¬($tmp1) - &xor($tmp1,$d) if $pos < 0; # I function - part 2 - - &or($tmp1,$b); # I function - part 3 - &lea($a,&DWP($t,$a,$tmp2,1)); - - &xor($tmp1,$c); # I function - part 4 - &mov($tmp2,&DWP($xo[$ki+1]*4,$K,"",0)) if $pos != 2; # load X/k value - &mov($tmp2,&wparam(0)) if $pos == 2; - - &add($a,$tmp1); - &mov($tmp1,-1) if $pos < 1; # H function - part 1 - &add($K,64) if $pos >=1 && !$normal; - - &rotl($a,$s); - - &xor($tmp1,&Np($d)) if $pos <= 0; # I function - part = first time - &mov($tmp1,&DWP( 0,$tmp2,"",0)) if $pos > 0; - &add($a,$b); - } - - -sub md5_block - { - local($name)=@_; - - &function_begin_B($name,"",3); - - # parameter 1 is the MD5_CTX structure. - # A 0 - # B 4 - # C 8 - # D 12 - - &push("esi"); - &push("edi"); - &mov($tmp1, &wparam(0)); # edi - &mov($X, &wparam(1)); # esi - &mov($C, &wparam(2)); - &push("ebp"); - &shl($C, 6); - &push("ebx"); - &add($C, $X); # offset we end at - &sub($C, 64); - &mov($A, &DWP( 0,$tmp1,"",0)); - &push($C); # Put on the TOS - &mov($B, &DWP( 4,$tmp1,"",0)); - &mov($C, &DWP( 8,$tmp1,"",0)); - &mov($D, &DWP(12,$tmp1,"",0)); - - &set_label("start") unless $normal; - &comment(""); - &comment("R0 section"); - - &R0(-2,$A,$B,$C,$D,$X, 0, 7,0xd76aa478); - &R0( 0,$D,$A,$B,$C,$X, 1,12,0xe8c7b756); - &R0( 0,$C,$D,$A,$B,$X, 2,17,0x242070db); - &R0( 0,$B,$C,$D,$A,$X, 3,22,0xc1bdceee); - &R0( 0,$A,$B,$C,$D,$X, 4, 7,0xf57c0faf); - &R0( 0,$D,$A,$B,$C,$X, 5,12,0x4787c62a); - &R0( 0,$C,$D,$A,$B,$X, 6,17,0xa8304613); - &R0( 0,$B,$C,$D,$A,$X, 7,22,0xfd469501); - &R0( 0,$A,$B,$C,$D,$X, 8, 7,0x698098d8); - &R0( 0,$D,$A,$B,$C,$X, 9,12,0x8b44f7af); - &R0( 0,$C,$D,$A,$B,$X,10,17,0xffff5bb1); - &R0( 0,$B,$C,$D,$A,$X,11,22,0x895cd7be); - &R0( 0,$A,$B,$C,$D,$X,12, 7,0x6b901122); - &R0( 0,$D,$A,$B,$C,$X,13,12,0xfd987193); - &R0( 0,$C,$D,$A,$B,$X,14,17,0xa679438e); - &R0( 1,$B,$C,$D,$A,$X,15,22,0x49b40821); - - &comment(""); - &comment("R1 section"); - &R1(-1,$A,$B,$C,$D,$X,16, 5,0xf61e2562); - &R1( 0,$D,$A,$B,$C,$X,17, 9,0xc040b340); - &R1( 0,$C,$D,$A,$B,$X,18,14,0x265e5a51); - &R1( 0,$B,$C,$D,$A,$X,19,20,0xe9b6c7aa); - &R1( 0,$A,$B,$C,$D,$X,20, 5,0xd62f105d); - &R1( 0,$D,$A,$B,$C,$X,21, 9,0x02441453); - &R1( 0,$C,$D,$A,$B,$X,22,14,0xd8a1e681); - &R1( 0,$B,$C,$D,$A,$X,23,20,0xe7d3fbc8); - &R1( 0,$A,$B,$C,$D,$X,24, 5,0x21e1cde6); - &R1( 0,$D,$A,$B,$C,$X,25, 9,0xc33707d6); - &R1( 0,$C,$D,$A,$B,$X,26,14,0xf4d50d87); - &R1( 0,$B,$C,$D,$A,$X,27,20,0x455a14ed); - &R1( 0,$A,$B,$C,$D,$X,28, 5,0xa9e3e905); - &R1( 0,$D,$A,$B,$C,$X,29, 9,0xfcefa3f8); - &R1( 0,$C,$D,$A,$B,$X,30,14,0x676f02d9); - &R1( 1,$B,$C,$D,$A,$X,31,20,0x8d2a4c8a); - - &comment(""); - &comment("R2 section"); - &R2( 0,-1,$A,$B,$C,$D,$X,32, 4,0xfffa3942); - &R2( 1, 0,$D,$A,$B,$C,$X,33,11,0x8771f681); - &R2( 2, 0,$C,$D,$A,$B,$X,34,16,0x6d9d6122); - &R2( 3, 0,$B,$C,$D,$A,$X,35,23,0xfde5380c); - &R2( 4, 0,$A,$B,$C,$D,$X,36, 4,0xa4beea44); - &R2( 5, 0,$D,$A,$B,$C,$X,37,11,0x4bdecfa9); - &R2( 6, 0,$C,$D,$A,$B,$X,38,16,0xf6bb4b60); - &R2( 7, 0,$B,$C,$D,$A,$X,39,23,0xbebfbc70); - &R2( 8, 0,$A,$B,$C,$D,$X,40, 4,0x289b7ec6); - &R2( 9, 0,$D,$A,$B,$C,$X,41,11,0xeaa127fa); - &R2(10, 0,$C,$D,$A,$B,$X,42,16,0xd4ef3085); - &R2(11, 0,$B,$C,$D,$A,$X,43,23,0x04881d05); - &R2(12, 0,$A,$B,$C,$D,$X,44, 4,0xd9d4d039); - &R2(13, 0,$D,$A,$B,$C,$X,45,11,0xe6db99e5); - &R2(14, 0,$C,$D,$A,$B,$X,46,16,0x1fa27cf8); - &R2(15, 1,$B,$C,$D,$A,$X,47,23,0xc4ac5665); - - &comment(""); - &comment("R3 section"); - &R3(-1,$A,$B,$C,$D,$X,48, 6,0xf4292244); - &R3( 0,$D,$A,$B,$C,$X,49,10,0x432aff97); - &R3( 0,$C,$D,$A,$B,$X,50,15,0xab9423a7); - &R3( 0,$B,$C,$D,$A,$X,51,21,0xfc93a039); - &R3( 0,$A,$B,$C,$D,$X,52, 6,0x655b59c3); - &R3( 0,$D,$A,$B,$C,$X,53,10,0x8f0ccc92); - &R3( 0,$C,$D,$A,$B,$X,54,15,0xffeff47d); - &R3( 0,$B,$C,$D,$A,$X,55,21,0x85845dd1); - &R3( 0,$A,$B,$C,$D,$X,56, 6,0x6fa87e4f); - &R3( 0,$D,$A,$B,$C,$X,57,10,0xfe2ce6e0); - &R3( 0,$C,$D,$A,$B,$X,58,15,0xa3014314); - &R3( 0,$B,$C,$D,$A,$X,59,21,0x4e0811a1); - &R3( 0,$A,$B,$C,$D,$X,60, 6,0xf7537e82); - &R3( 0,$D,$A,$B,$C,$X,61,10,0xbd3af235); - &R3( 0,$C,$D,$A,$B,$X,62,15,0x2ad7d2bb); - &R3( 2,$B,$C,$D,$A,$X,63,21,0xeb86d391); - - # &mov($tmp2,&wparam(0)); # done in the last R3 - # &mov($tmp1, &DWP( 0,$tmp2,"",0)); # done is the last R3 - - &add($A,$tmp1); - &mov($tmp1, &DWP( 4,$tmp2,"",0)); - - &add($B,$tmp1); - &mov($tmp1, &DWP( 8,$tmp2,"",0)); - - &add($C,$tmp1); - &mov($tmp1, &DWP(12,$tmp2,"",0)); - - &add($D,$tmp1); - &mov(&DWP( 0,$tmp2,"",0),$A); - - &mov(&DWP( 4,$tmp2,"",0),$B); - &mov($tmp1,&swtmp(0)) unless $normal; - - &mov(&DWP( 8,$tmp2,"",0),$C); - &mov(&DWP(12,$tmp2,"",0),$D); - - &cmp($tmp1,$X) unless $normal; # check count - &jae(&label("start")) unless $normal; - - &pop("eax"); # pop the temp variable off the stack - &pop("ebx"); - &pop("ebp"); - &pop("edi"); - &pop("esi"); - &ret(); - &function_end_B($name); - } - diff --git a/drivers/builtin_openssl2/crypto/md5/asm/md5-ia64.S b/drivers/builtin_openssl2/crypto/md5/asm/md5-ia64.S deleted file mode 100644 index e7de08d46a..0000000000 --- a/drivers/builtin_openssl2/crypto/md5/asm/md5-ia64.S +++ /dev/null @@ -1,992 +0,0 @@ -/* Copyright (c) 2005 Hewlett-Packard Development Company, L.P. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -// Common registers are assigned as follows: -// -// COMMON -// -// t0 Const Tbl Ptr TPtr -// t1 Round Constant TRound -// t4 Block residual LenResid -// t5 Residual Data DTmp -// -// {in,out}0 Block 0 Cycle RotateM0 -// {in,out}1 Block Value 12 M12 -// {in,out}2 Block Value 8 M8 -// {in,out}3 Block Value 4 M4 -// {in,out}4 Block Value 0 M0 -// {in,out}5 Block 1 Cycle RotateM1 -// {in,out}6 Block Value 13 M13 -// {in,out}7 Block Value 9 M9 -// {in,out}8 Block Value 5 M5 -// {in,out}9 Block Value 1 M1 -// {in,out}10 Block 2 Cycle RotateM2 -// {in,out}11 Block Value 14 M14 -// {in,out}12 Block Value 10 M10 -// {in,out}13 Block Value 6 M6 -// {in,out}14 Block Value 2 M2 -// {in,out}15 Block 3 Cycle RotateM3 -// {in,out}16 Block Value 15 M15 -// {in,out}17 Block Value 11 M11 -// {in,out}18 Block Value 7 M7 -// {in,out}19 Block Value 3 M3 -// {in,out}20 Scratch Z -// {in,out}21 Scratch Y -// {in,out}22 Scratch X -// {in,out}23 Scratch W -// {in,out}24 Digest A A -// {in,out}25 Digest B B -// {in,out}26 Digest C C -// {in,out}27 Digest D D -// {in,out}28 Active Data Ptr DPtr -// in28 Dummy Value - -// out28 Dummy Value - -// bt0 Coroutine Link QUICK_RTN -// -/// These predicates are used for computing the padding block(s) and -/// are shared between the driver and digest co-routines -// -// pt0 Extra Pad Block pExtra -// pt1 Load next word pLoad -// pt2 Skip next word pSkip -// pt3 Search for Pad pNoPad -// pt4 Pad Word 0 pPad0 -// pt5 Pad Word 1 pPad1 -// pt6 Pad Word 2 pPad2 -// pt7 Pad Word 3 pPad3 - -#define DTmp r19 -#define LenResid r18 -#define QUICK_RTN b6 -#define TPtr r14 -#define TRound r15 -#define pExtra p6 -#define pLoad p7 -#define pNoPad p9 -#define pPad0 p10 -#define pPad1 p11 -#define pPad2 p12 -#define pPad3 p13 -#define pSkip p8 - -#define A_ out24 -#define B_ out25 -#define C_ out26 -#define D_ out27 -#define DPtr_ out28 -#define M0_ out4 -#define M1_ out9 -#define M10_ out12 -#define M11_ out17 -#define M12_ out1 -#define M13_ out6 -#define M14_ out11 -#define M15_ out16 -#define M2_ out14 -#define M3_ out19 -#define M4_ out3 -#define M5_ out8 -#define M6_ out13 -#define M7_ out18 -#define M8_ out2 -#define M9_ out7 -#define RotateM0_ out0 -#define RotateM1_ out5 -#define RotateM2_ out10 -#define RotateM3_ out15 -#define W_ out23 -#define X_ out22 -#define Y_ out21 -#define Z_ out20 - -#define A in24 -#define B in25 -#define C in26 -#define D in27 -#define DPtr in28 -#define M0 in4 -#define M1 in9 -#define M10 in12 -#define M11 in17 -#define M12 in1 -#define M13 in6 -#define M14 in11 -#define M15 in16 -#define M2 in14 -#define M3 in19 -#define M4 in3 -#define M5 in8 -#define M6 in13 -#define M7 in18 -#define M8 in2 -#define M9 in7 -#define RotateM0 in0 -#define RotateM1 in5 -#define RotateM2 in10 -#define RotateM3 in15 -#define W in23 -#define X in22 -#define Y in21 -#define Z in20 - -/* register stack configuration for md5_block_asm_data_order(): */ -#define MD5_NINP 3 -#define MD5_NLOC 0 -#define MD5_NOUT 29 -#define MD5_NROT 0 - -/* register stack configuration for helpers: */ -#define _NINPUTS MD5_NOUT -#define _NLOCALS 0 -#define _NOUTPUT 0 -#define _NROTATE 24 /* this must be <= _NINPUTS */ - -#if defined(_HPUX_SOURCE) && !defined(_LP64) -#define ADDP addp4 -#else -#define ADDP add -#endif - -#if defined(_HPUX_SOURCE) || defined(B_ENDIAN) -#define HOST_IS_BIG_ENDIAN -#endif - -// Macros for getting the left and right portions of little-endian words - -#define GETLW(dst, src, align) dep.z dst = src, 32 - 8 * align, 8 * align -#define GETRW(dst, src, align) extr.u dst = src, 8 * align, 32 - 8 * align - -// MD5 driver -// -// Reads an input block, then calls the digest block -// subroutine and adds the results to the accumulated -// digest. It allocates 32 outs which the subroutine -// uses as it's inputs and rotating -// registers. Initializes the round constant pointer and -// takes care of saving/restoring ar.lc -// -/// INPUT -// -// in0 Context Ptr CtxPtr0 -// in1 Input Data Ptr DPtrIn -// in2 Integral Blocks BlockCount -// rp Return Address - -// -/// CODE -// -// v2 Input Align InAlign -// t0 Shared w/digest - -// t1 Shared w/digest - -// t2 Shared w/digest - -// t3 Shared w/digest - -// t4 Shared w/digest - -// t5 Shared w/digest - -// t6 PFS Save PFSSave -// t7 ar.lc Save LCSave -// t8 Saved PR PRSave -// t9 2nd CtxPtr CtxPtr1 -// t10 Table Base CTable -// t11 Table[0] CTable0 -// t13 Accumulator A AccumA -// t14 Accumulator B AccumB -// t15 Accumulator C AccumC -// t16 Accumulator D AccumD -// pt0 Shared w/digest - -// pt1 Shared w/digest - -// pt2 Shared w/digest - -// pt3 Shared w/digest - -// pt4 Shared w/digest - -// pt5 Shared w/digest - -// pt6 Shared w/digest - -// pt7 Shared w/digest - -// pt8 Not Aligned pOff -// pt8 Blocks Left pAgain - -#define AccumA r27 -#define AccumB r28 -#define AccumC r29 -#define AccumD r30 -#define CTable r24 -#define CTable0 r25 -#define CtxPtr0 in0 -#define CtxPtr1 r23 -#define DPtrIn in1 -#define BlockCount in2 -#define InAlign r10 -#define LCSave r21 -#define PFSSave r20 -#define PRSave r22 -#define pAgain p63 -#define pOff p63 - - .text - -/* md5_block_asm_data_order(MD5_CTX *c, const void *data, size_t num) - - where: - c: a pointer to a structure of this type: - - typedef struct MD5state_st - { - MD5_LONG A,B,C,D; - MD5_LONG Nl,Nh; - MD5_LONG data[MD5_LBLOCK]; - unsigned int num; - } - MD5_CTX; - - data: a pointer to the input data (may be misaligned) - num: the number of 16-byte blocks to hash (i.e., the length - of DATA is 16*NUM. - - */ - - .type md5_block_asm_data_order, @function - .global md5_block_asm_data_order - .align 32 - .proc md5_block_asm_data_order -md5_block_asm_data_order: -.md5_block: - .prologue -{ .mmi - .save ar.pfs, PFSSave - alloc PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT - ADDP CtxPtr1 = 8, CtxPtr0 - mov CTable = ip -} -{ .mmi - ADDP DPtrIn = 0, DPtrIn - ADDP CtxPtr0 = 0, CtxPtr0 - .save ar.lc, LCSave - mov LCSave = ar.lc -} -;; -{ .mmi - add CTable = .md5_tbl_data_order#-.md5_block#, CTable - and InAlign = 0x3, DPtrIn -} - -{ .mmi - ld4 AccumA = [CtxPtr0], 4 - ld4 AccumC = [CtxPtr1], 4 - .save pr, PRSave - mov PRSave = pr - .body -} -;; -{ .mmi - ld4 AccumB = [CtxPtr0] - ld4 AccumD = [CtxPtr1] - dep DPtr_ = 0, DPtrIn, 0, 2 -} ;; -#ifdef HOST_IS_BIG_ENDIAN - rum psr.be;; // switch to little-endian -#endif -{ .mmb - ld4 CTable0 = [CTable], 4 - cmp.ne pOff, p0 = 0, InAlign -(pOff) br.cond.spnt.many .md5_unaligned -} ;; - -// The FF load/compute loop rotates values three times, so that -// loading into M12 here produces the M0 value, M13 -> M1, etc. - -.md5_block_loop0: -{ .mmi - ld4 M12_ = [DPtr_], 4 - mov TPtr = CTable - mov TRound = CTable0 -} ;; -{ .mmi - ld4 M13_ = [DPtr_], 4 - mov A_ = AccumA - mov B_ = AccumB -} ;; -{ .mmi - ld4 M14_ = [DPtr_], 4 - mov C_ = AccumC - mov D_ = AccumD -} ;; -{ .mmb - ld4 M15_ = [DPtr_], 4 - add BlockCount = -1, BlockCount - br.call.sptk.many QUICK_RTN = md5_digest_block0 -} ;; - -// Now, we add the new digest values and do some clean-up -// before checking if there's another full block to process - -{ .mmi - add AccumA = AccumA, A_ - add AccumB = AccumB, B_ - cmp.ne pAgain, p0 = 0, BlockCount -} -{ .mib - add AccumC = AccumC, C_ - add AccumD = AccumD, D_ -(pAgain) br.cond.dptk.many .md5_block_loop0 -} ;; - -.md5_exit: -#ifdef HOST_IS_BIG_ENDIAN - sum psr.be;; // switch back to big-endian mode -#endif -{ .mmi - st4 [CtxPtr0] = AccumB, -4 - st4 [CtxPtr1] = AccumD, -4 - mov pr = PRSave, 0x1ffff ;; -} -{ .mmi - st4 [CtxPtr0] = AccumA - st4 [CtxPtr1] = AccumC - mov ar.lc = LCSave -} ;; -{ .mib - mov ar.pfs = PFSSave - br.ret.sptk.few rp -} ;; - -#define MD5UNALIGNED(offset) \ -.md5_process##offset: \ -{ .mib ; \ - nop 0x0 ; \ - GETRW(DTmp, DTmp, offset) ; \ -} ;; \ -.md5_block_loop##offset: \ -{ .mmi ; \ - ld4 Y_ = [DPtr_], 4 ; \ - mov TPtr = CTable ; \ - mov TRound = CTable0 ; \ -} ;; \ -{ .mmi ; \ - ld4 M13_ = [DPtr_], 4 ; \ - mov A_ = AccumA ; \ - mov B_ = AccumB ; \ -} ;; \ -{ .mii ; \ - ld4 M14_ = [DPtr_], 4 ; \ - GETLW(W_, Y_, offset) ; \ - mov C_ = AccumC ; \ -} \ -{ .mmi ; \ - mov D_ = AccumD ;; \ - or M12_ = W_, DTmp ; \ - GETRW(DTmp, Y_, offset) ; \ -} \ -{ .mib ; \ - ld4 M15_ = [DPtr_], 4 ; \ - add BlockCount = -1, BlockCount ; \ - br.call.sptk.many QUICK_RTN = md5_digest_block##offset; \ -} ;; \ -{ .mmi ; \ - add AccumA = AccumA, A_ ; \ - add AccumB = AccumB, B_ ; \ - cmp.ne pAgain, p0 = 0, BlockCount ; \ -} \ -{ .mib ; \ - add AccumC = AccumC, C_ ; \ - add AccumD = AccumD, D_ ; \ -(pAgain) br.cond.dptk.many .md5_block_loop##offset ; \ -} ;; \ -{ .mib ; \ - nop 0x0 ; \ - nop 0x0 ; \ - br.cond.sptk.many .md5_exit ; \ -} ;; - - .align 32 -.md5_unaligned: -// -// Because variable shifts are expensive, we special case each of -// the four alignements. In practice, this won't hurt too much -// since only one working set of code will be loaded. -// -{ .mib - ld4 DTmp = [DPtr_], 4 - cmp.eq pOff, p0 = 1, InAlign -(pOff) br.cond.dpnt.many .md5_process1 -} ;; -{ .mib - cmp.eq pOff, p0 = 2, InAlign - nop 0x0 -(pOff) br.cond.dpnt.many .md5_process2 -} ;; - MD5UNALIGNED(3) - MD5UNALIGNED(1) - MD5UNALIGNED(2) - - .endp md5_block_asm_data_order - - -// MD5 Perform the F function and load -// -// Passed the first 4 words (M0 - M3) and initial (A, B, C, D) values, -// computes the FF() round of functions, then branches to the common -// digest code to finish up with GG(), HH, and II(). -// -// INPUT -// -// rp Return Address - -// -// CODE -// -// v0 PFS bit bucket PFS -// v1 Loop Trip Count LTrip -// pt0 Load next word pMore - -/* For F round: */ -#define LTrip r9 -#define PFS r8 -#define pMore p6 - -/* For GHI rounds: */ -#define T r9 -#define U r10 -#define V r11 - -#define COMPUTE(a, b, s, M, R) \ -{ \ - .mii ; \ - ld4 TRound = [TPtr], 4 ; \ - dep.z Y = Z, 32, 32 ;; \ - shrp Z = Z, Y, 64 - s ; \ -} ;; \ -{ \ - .mmi ; \ - add a = Z, b ; \ - mov R = M ; \ - nop 0x0 ; \ -} ;; - -#define LOOP(a, b, s, M, R, label) \ -{ .mii ; \ - ld4 TRound = [TPtr], 4 ; \ - dep.z Y = Z, 32, 32 ;; \ - shrp Z = Z, Y, 64 - s ; \ -} ;; \ -{ .mib ; \ - add a = Z, b ; \ - mov R = M ; \ - br.ctop.sptk.many label ; \ -} ;; - -// G(B, C, D) = (B & D) | (C & ~D) - -#define G(a, b, c, d, M) \ -{ .mmi ; \ - add Z = M, TRound ; \ - and Y = b, d ; \ - andcm X = c, d ; \ -} ;; \ -{ .mii ; \ - add Z = Z, a ; \ - or Y = Y, X ;; \ - add Z = Z, Y ; \ -} ;; - -// H(B, C, D) = B ^ C ^ D - -#define H(a, b, c, d, M) \ -{ .mmi ; \ - add Z = M, TRound ; \ - xor Y = b, c ; \ - nop 0x0 ; \ -} ;; \ -{ .mii ; \ - add Z = Z, a ; \ - xor Y = Y, d ;; \ - add Z = Z, Y ; \ -} ;; - -// I(B, C, D) = C ^ (B | ~D) -// -// However, since we have an andcm operator, we use the fact that -// -// Y ^ Z == ~Y ^ ~Z -// -// to rewrite the expression as -// -// I(B, C, D) = ~C ^ (~B & D) - -#define I(a, b, c, d, M) \ -{ .mmi ; \ - add Z = M, TRound ; \ - andcm Y = d, b ; \ - andcm X = -1, c ; \ -} ;; \ -{ .mii ; \ - add Z = Z, a ; \ - xor Y = Y, X ;; \ - add Z = Z, Y ; \ -} ;; - -#define GG4(label) \ - G(A, B, C, D, M0) \ - COMPUTE(A, B, 5, M0, RotateM0) \ - G(D, A, B, C, M1) \ - COMPUTE(D, A, 9, M1, RotateM1) \ - G(C, D, A, B, M2) \ - COMPUTE(C, D, 14, M2, RotateM2) \ - G(B, C, D, A, M3) \ - LOOP(B, C, 20, M3, RotateM3, label) - -#define HH4(label) \ - H(A, B, C, D, M0) \ - COMPUTE(A, B, 4, M0, RotateM0) \ - H(D, A, B, C, M1) \ - COMPUTE(D, A, 11, M1, RotateM1) \ - H(C, D, A, B, M2) \ - COMPUTE(C, D, 16, M2, RotateM2) \ - H(B, C, D, A, M3) \ - LOOP(B, C, 23, M3, RotateM3, label) - -#define II4(label) \ - I(A, B, C, D, M0) \ - COMPUTE(A, B, 6, M0, RotateM0) \ - I(D, A, B, C, M1) \ - COMPUTE(D, A, 10, M1, RotateM1) \ - I(C, D, A, B, M2) \ - COMPUTE(C, D, 15, M2, RotateM2) \ - I(B, C, D, A, M3) \ - LOOP(B, C, 21, M3, RotateM3, label) - -#define FFLOAD(a, b, c, d, M, N, s) \ -{ .mii ; \ -(pMore) ld4 N = [DPtr], 4 ; \ - add Z = M, TRound ; \ - and Y = c, b ; \ -} \ -{ .mmi ; \ - andcm X = d, b ;; \ - add Z = Z, a ; \ - or Y = Y, X ; \ -} ;; \ -{ .mii ; \ - ld4 TRound = [TPtr], 4 ; \ - add Z = Z, Y ;; \ - dep.z Y = Z, 32, 32 ; \ -} ;; \ -{ .mii ; \ - nop 0x0 ; \ - shrp Z = Z, Y, 64 - s ;; \ - add a = Z, b ; \ -} ;; - -#define FFLOOP(a, b, c, d, M, N, s, dest) \ -{ .mii ; \ -(pMore) ld4 N = [DPtr], 4 ; \ - add Z = M, TRound ; \ - and Y = c, b ; \ -} \ -{ .mmi ; \ - andcm X = d, b ;; \ - add Z = Z, a ; \ - or Y = Y, X ; \ -} ;; \ -{ .mii ; \ - ld4 TRound = [TPtr], 4 ; \ - add Z = Z, Y ;; \ - dep.z Y = Z, 32, 32 ; \ -} ;; \ -{ .mii ; \ - nop 0x0 ; \ - shrp Z = Z, Y, 64 - s ;; \ - add a = Z, b ; \ -} \ -{ .mib ; \ - cmp.ne pMore, p0 = 0, LTrip ; \ - add LTrip = -1, LTrip ; \ - br.ctop.dptk.many dest ; \ -} ;; - - .type md5_digest_block0, @function - .align 32 - - .proc md5_digest_block0 - .prologue -md5_digest_block0: - .altrp QUICK_RTN - .body -{ .mmi - alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE - mov LTrip = 2 - mov ar.lc = 3 -} ;; -{ .mii - cmp.eq pMore, p0 = r0, r0 - mov ar.ec = 0 - nop 0x0 -} ;; - -.md5_FF_round0: - FFLOAD(A, B, C, D, M12, RotateM0, 7) - FFLOAD(D, A, B, C, M13, RotateM1, 12) - FFLOAD(C, D, A, B, M14, RotateM2, 17) - FFLOOP(B, C, D, A, M15, RotateM3, 22, .md5_FF_round0) - // - // !!! Fall through to md5_digest_GHI - // - .endp md5_digest_block0 - - .type md5_digest_GHI, @function - .align 32 - - .proc md5_digest_GHI - .prologue - .regstk _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE -md5_digest_GHI: - .altrp QUICK_RTN - .body -// -// The following sequence shuffles the block counstants round for the -// next round: -// -// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 -// 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12 -// -{ .mmi - mov Z = M0 - mov Y = M15 - mov ar.lc = 3 -} -{ .mmi - mov X = M2 - mov W = M9 - mov V = M4 -} ;; - -{ .mmi - mov M0 = M1 - mov M15 = M12 - mov ar.ec = 1 -} -{ .mmi - mov M2 = M11 - mov M9 = M14 - mov M4 = M5 -} ;; - -{ .mmi - mov M1 = M6 - mov M12 = M13 - mov U = M3 -} -{ .mmi - mov M11 = M8 - mov M14 = M7 - mov M5 = M10 -} ;; - -{ .mmi - mov M6 = Y - mov M13 = X - mov M3 = Z -} -{ .mmi - mov M8 = W - mov M7 = V - mov M10 = U -} ;; - -.md5_GG_round: - GG4(.md5_GG_round) - -// The following sequence shuffles the block constants round for the -// next round: -// -// 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12 -// 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2 - -{ .mmi - mov Z = M0 - mov Y = M1 - mov ar.lc = 3 -} -{ .mmi - mov X = M3 - mov W = M5 - mov V = M6 -} ;; - -{ .mmi - mov M0 = M4 - mov M1 = M11 - mov ar.ec = 1 -} -{ .mmi - mov M3 = M9 - mov U = M8 - mov T = M13 -} ;; - -{ .mmi - mov M4 = Z - mov M11 = Y - mov M5 = M7 -} -{ .mmi - mov M6 = M14 - mov M8 = M12 - mov M13 = M15 -} ;; - -{ .mmi - mov M7 = W - mov M14 = V - nop 0x0 -} -{ .mmi - mov M9 = X - mov M12 = U - mov M15 = T -} ;; - -.md5_HH_round: - HH4(.md5_HH_round) - -// The following sequence shuffles the block constants round for the -// next round: -// -// 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2 -// 0 7 14 5 12 3 10 1 8 15 6 13 4 11 2 9 - -{ .mmi - mov Z = M0 - mov Y = M15 - mov ar.lc = 3 -} -{ .mmi - mov X = M10 - mov W = M1 - mov V = M4 -} ;; - -{ .mmi - mov M0 = M9 - mov M15 = M12 - mov ar.ec = 1 -} -{ .mmi - mov M10 = M11 - mov M1 = M6 - mov M4 = M13 -} ;; - -{ .mmi - mov M9 = M14 - mov M12 = M5 - mov U = M3 -} -{ .mmi - mov M11 = M8 - mov M6 = M7 - mov M13 = M2 -} ;; - -{ .mmi - mov M14 = Y - mov M5 = X - mov M3 = Z -} -{ .mmi - mov M8 = W - mov M7 = V - mov M2 = U -} ;; - -.md5_II_round: - II4(.md5_II_round) - -{ .mib - nop 0x0 - nop 0x0 - br.ret.sptk.many QUICK_RTN -} ;; - - .endp md5_digest_GHI - -#define FFLOADU(a, b, c, d, M, P, N, s, offset) \ -{ .mii ; \ -(pMore) ld4 N = [DPtr], 4 ; \ - add Z = M, TRound ; \ - and Y = c, b ; \ -} \ -{ .mmi ; \ - andcm X = d, b ;; \ - add Z = Z, a ; \ - or Y = Y, X ; \ -} ;; \ -{ .mii ; \ - ld4 TRound = [TPtr], 4 ; \ - GETLW(W, P, offset) ; \ - add Z = Z, Y ; \ -} ;; \ -{ .mii ; \ - or W = W, DTmp ; \ - dep.z Y = Z, 32, 32 ;; \ - shrp Z = Z, Y, 64 - s ; \ -} ;; \ -{ .mii ; \ - add a = Z, b ; \ - GETRW(DTmp, P, offset) ; \ - mov P = W ; \ -} ;; - -#define FFLOOPU(a, b, c, d, M, P, N, s, offset) \ -{ .mii ; \ -(pMore) ld4 N = [DPtr], 4 ; \ - add Z = M, TRound ; \ - and Y = c, b ; \ -} \ -{ .mmi ; \ - andcm X = d, b ;; \ - add Z = Z, a ; \ - or Y = Y, X ; \ -} ;; \ -{ .mii ; \ - ld4 TRound = [TPtr], 4 ; \ -(pMore) GETLW(W, P, offset) ; \ - add Z = Z, Y ; \ -} ;; \ -{ .mii ; \ -(pMore) or W = W, DTmp ; \ - dep.z Y = Z, 32, 32 ;; \ - shrp Z = Z, Y, 64 - s ; \ -} ;; \ -{ .mii ; \ - add a = Z, b ; \ -(pMore) GETRW(DTmp, P, offset) ; \ -(pMore) mov P = W ; \ -} \ -{ .mib ; \ - cmp.ne pMore, p0 = 0, LTrip ; \ - add LTrip = -1, LTrip ; \ - br.ctop.sptk.many .md5_FF_round##offset ; \ -} ;; - -#define MD5FBLOCK(offset) \ - .type md5_digest_block##offset, @function ; \ - \ - .align 32 ; \ - .proc md5_digest_block##offset ; \ - .prologue ; \ - .altrp QUICK_RTN ; \ - .body ; \ -md5_digest_block##offset: \ -{ .mmi ; \ - alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE ; \ - mov LTrip = 2 ; \ - mov ar.lc = 3 ; \ -} ;; \ -{ .mii ; \ - cmp.eq pMore, p0 = r0, r0 ; \ - mov ar.ec = 0 ; \ - nop 0x0 ; \ -} ;; \ - \ - .pred.rel "mutex", pLoad, pSkip ; \ -.md5_FF_round##offset: \ - FFLOADU(A, B, C, D, M12, M13, RotateM0, 7, offset) \ - FFLOADU(D, A, B, C, M13, M14, RotateM1, 12, offset) \ - FFLOADU(C, D, A, B, M14, M15, RotateM2, 17, offset) \ - FFLOOPU(B, C, D, A, M15, RotateM0, RotateM3, 22, offset) \ - \ -{ .mib ; \ - nop 0x0 ; \ - nop 0x0 ; \ - br.cond.sptk.many md5_digest_GHI ; \ -} ;; \ - .endp md5_digest_block##offset - -MD5FBLOCK(1) -MD5FBLOCK(2) -MD5FBLOCK(3) - - .align 64 - .type md5_constants, @object -md5_constants: -.md5_tbl_data_order: // To ensure little-endian data - // order, code as bytes. - data1 0x78, 0xa4, 0x6a, 0xd7 // 0 - data1 0x56, 0xb7, 0xc7, 0xe8 // 1 - data1 0xdb, 0x70, 0x20, 0x24 // 2 - data1 0xee, 0xce, 0xbd, 0xc1 // 3 - data1 0xaf, 0x0f, 0x7c, 0xf5 // 4 - data1 0x2a, 0xc6, 0x87, 0x47 // 5 - data1 0x13, 0x46, 0x30, 0xa8 // 6 - data1 0x01, 0x95, 0x46, 0xfd // 7 - data1 0xd8, 0x98, 0x80, 0x69 // 8 - data1 0xaf, 0xf7, 0x44, 0x8b // 9 - data1 0xb1, 0x5b, 0xff, 0xff // 10 - data1 0xbe, 0xd7, 0x5c, 0x89 // 11 - data1 0x22, 0x11, 0x90, 0x6b // 12 - data1 0x93, 0x71, 0x98, 0xfd // 13 - data1 0x8e, 0x43, 0x79, 0xa6 // 14 - data1 0x21, 0x08, 0xb4, 0x49 // 15 - data1 0x62, 0x25, 0x1e, 0xf6 // 16 - data1 0x40, 0xb3, 0x40, 0xc0 // 17 - data1 0x51, 0x5a, 0x5e, 0x26 // 18 - data1 0xaa, 0xc7, 0xb6, 0xe9 // 19 - data1 0x5d, 0x10, 0x2f, 0xd6 // 20 - data1 0x53, 0x14, 0x44, 0x02 // 21 - data1 0x81, 0xe6, 0xa1, 0xd8 // 22 - data1 0xc8, 0xfb, 0xd3, 0xe7 // 23 - data1 0xe6, 0xcd, 0xe1, 0x21 // 24 - data1 0xd6, 0x07, 0x37, 0xc3 // 25 - data1 0x87, 0x0d, 0xd5, 0xf4 // 26 - data1 0xed, 0x14, 0x5a, 0x45 // 27 - data1 0x05, 0xe9, 0xe3, 0xa9 // 28 - data1 0xf8, 0xa3, 0xef, 0xfc // 29 - data1 0xd9, 0x02, 0x6f, 0x67 // 30 - data1 0x8a, 0x4c, 0x2a, 0x8d // 31 - data1 0x42, 0x39, 0xfa, 0xff // 32 - data1 0x81, 0xf6, 0x71, 0x87 // 33 - data1 0x22, 0x61, 0x9d, 0x6d // 34 - data1 0x0c, 0x38, 0xe5, 0xfd // 35 - data1 0x44, 0xea, 0xbe, 0xa4 // 36 - data1 0xa9, 0xcf, 0xde, 0x4b // 37 - data1 0x60, 0x4b, 0xbb, 0xf6 // 38 - data1 0x70, 0xbc, 0xbf, 0xbe // 39 - data1 0xc6, 0x7e, 0x9b, 0x28 // 40 - data1 0xfa, 0x27, 0xa1, 0xea // 41 - data1 0x85, 0x30, 0xef, 0xd4 // 42 - data1 0x05, 0x1d, 0x88, 0x04 // 43 - data1 0x39, 0xd0, 0xd4, 0xd9 // 44 - data1 0xe5, 0x99, 0xdb, 0xe6 // 45 - data1 0xf8, 0x7c, 0xa2, 0x1f // 46 - data1 0x65, 0x56, 0xac, 0xc4 // 47 - data1 0x44, 0x22, 0x29, 0xf4 // 48 - data1 0x97, 0xff, 0x2a, 0x43 // 49 - data1 0xa7, 0x23, 0x94, 0xab // 50 - data1 0x39, 0xa0, 0x93, 0xfc // 51 - data1 0xc3, 0x59, 0x5b, 0x65 // 52 - data1 0x92, 0xcc, 0x0c, 0x8f // 53 - data1 0x7d, 0xf4, 0xef, 0xff // 54 - data1 0xd1, 0x5d, 0x84, 0x85 // 55 - data1 0x4f, 0x7e, 0xa8, 0x6f // 56 - data1 0xe0, 0xe6, 0x2c, 0xfe // 57 - data1 0x14, 0x43, 0x01, 0xa3 // 58 - data1 0xa1, 0x11, 0x08, 0x4e // 59 - data1 0x82, 0x7e, 0x53, 0xf7 // 60 - data1 0x35, 0xf2, 0x3a, 0xbd // 61 - data1 0xbb, 0xd2, 0xd7, 0x2a // 62 - data1 0x91, 0xd3, 0x86, 0xeb // 63 -.size md5_constants#,64*4 diff --git a/drivers/builtin_openssl2/crypto/md5/asm/md5-x86_64.pl b/drivers/builtin_openssl2/crypto/md5/asm/md5-x86_64.pl deleted file mode 100755 index 381bf77e1c..0000000000 --- a/drivers/builtin_openssl2/crypto/md5/asm/md5-x86_64.pl +++ /dev/null @@ -1,370 +0,0 @@ -#!/usr/bin/perl -w -# -# MD5 optimized for AMD64. -# -# Author: Marc Bevand -# Licence: I hereby disclaim the copyright on this code and place it -# in the public domain. -# - -use strict; - -my $code; - -# round1_step() does: -# dst = x + ((dst + F(x,y,z) + X[k] + T_i) <<< s) -# %r10d = X[k_next] -# %r11d = z' (copy of z for the next step) -# Each round1_step() takes about 5.3 clocks (9 instructions, 1.7 IPC) -sub round1_step -{ - my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; - $code .= " mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */\n" if ($pos == -1); - $code .= " mov %edx, %r11d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); - $code .= <A - mov 1*4(%rbp), %ebx # ebx = ctx->B - mov 2*4(%rbp), %ecx # ecx = ctx->C - mov 3*4(%rbp), %edx # edx = ctx->D - # end is 'rdi' - # ptr is 'rsi' - # A is 'eax' - # B is 'ebx' - # C is 'ecx' - # D is 'edx' - - cmp %rdi, %rsi # cmp end with ptr - je .Lend # jmp if ptr == end - - # BEGIN of loop over 16-word blocks -.Lloop: # save old values of A, B, C, D - mov %eax, %r8d - mov %ebx, %r9d - mov %ecx, %r14d - mov %edx, %r15d -EOF -round1_step(-1,'%eax','%ebx','%ecx','%edx', '1','0xd76aa478', '7'); -round1_step( 0,'%edx','%eax','%ebx','%ecx', '2','0xe8c7b756','12'); -round1_step( 0,'%ecx','%edx','%eax','%ebx', '3','0x242070db','17'); -round1_step( 0,'%ebx','%ecx','%edx','%eax', '4','0xc1bdceee','22'); -round1_step( 0,'%eax','%ebx','%ecx','%edx', '5','0xf57c0faf', '7'); -round1_step( 0,'%edx','%eax','%ebx','%ecx', '6','0x4787c62a','12'); -round1_step( 0,'%ecx','%edx','%eax','%ebx', '7','0xa8304613','17'); -round1_step( 0,'%ebx','%ecx','%edx','%eax', '8','0xfd469501','22'); -round1_step( 0,'%eax','%ebx','%ecx','%edx', '9','0x698098d8', '7'); -round1_step( 0,'%edx','%eax','%ebx','%ecx','10','0x8b44f7af','12'); -round1_step( 0,'%ecx','%edx','%eax','%ebx','11','0xffff5bb1','17'); -round1_step( 0,'%ebx','%ecx','%edx','%eax','12','0x895cd7be','22'); -round1_step( 0,'%eax','%ebx','%ecx','%edx','13','0x6b901122', '7'); -round1_step( 0,'%edx','%eax','%ebx','%ecx','14','0xfd987193','12'); -round1_step( 0,'%ecx','%edx','%eax','%ebx','15','0xa679438e','17'); -round1_step( 1,'%ebx','%ecx','%edx','%eax', '0','0x49b40821','22'); - -round2_step(-1,'%eax','%ebx','%ecx','%edx', '6','0xf61e2562', '5'); -round2_step( 0,'%edx','%eax','%ebx','%ecx','11','0xc040b340', '9'); -round2_step( 0,'%ecx','%edx','%eax','%ebx', '0','0x265e5a51','14'); -round2_step( 0,'%ebx','%ecx','%edx','%eax', '5','0xe9b6c7aa','20'); -round2_step( 0,'%eax','%ebx','%ecx','%edx','10','0xd62f105d', '5'); -round2_step( 0,'%edx','%eax','%ebx','%ecx','15', '0x2441453', '9'); -round2_step( 0,'%ecx','%edx','%eax','%ebx', '4','0xd8a1e681','14'); -round2_step( 0,'%ebx','%ecx','%edx','%eax', '9','0xe7d3fbc8','20'); -round2_step( 0,'%eax','%ebx','%ecx','%edx','14','0x21e1cde6', '5'); -round2_step( 0,'%edx','%eax','%ebx','%ecx', '3','0xc33707d6', '9'); -round2_step( 0,'%ecx','%edx','%eax','%ebx', '8','0xf4d50d87','14'); -round2_step( 0,'%ebx','%ecx','%edx','%eax','13','0x455a14ed','20'); -round2_step( 0,'%eax','%ebx','%ecx','%edx', '2','0xa9e3e905', '5'); -round2_step( 0,'%edx','%eax','%ebx','%ecx', '7','0xfcefa3f8', '9'); -round2_step( 0,'%ecx','%edx','%eax','%ebx','12','0x676f02d9','14'); -round2_step( 1,'%ebx','%ecx','%edx','%eax', '0','0x8d2a4c8a','20'); - -round3_step(-1,'%eax','%ebx','%ecx','%edx', '8','0xfffa3942', '4'); -round3_step( 0,'%edx','%eax','%ebx','%ecx','11','0x8771f681','11'); -round3_step( 0,'%ecx','%edx','%eax','%ebx','14','0x6d9d6122','16'); -round3_step( 0,'%ebx','%ecx','%edx','%eax', '1','0xfde5380c','23'); -round3_step( 0,'%eax','%ebx','%ecx','%edx', '4','0xa4beea44', '4'); -round3_step( 0,'%edx','%eax','%ebx','%ecx', '7','0x4bdecfa9','11'); -round3_step( 0,'%ecx','%edx','%eax','%ebx','10','0xf6bb4b60','16'); -round3_step( 0,'%ebx','%ecx','%edx','%eax','13','0xbebfbc70','23'); -round3_step( 0,'%eax','%ebx','%ecx','%edx', '0','0x289b7ec6', '4'); -round3_step( 0,'%edx','%eax','%ebx','%ecx', '3','0xeaa127fa','11'); -round3_step( 0,'%ecx','%edx','%eax','%ebx', '6','0xd4ef3085','16'); -round3_step( 0,'%ebx','%ecx','%edx','%eax', '9', '0x4881d05','23'); -round3_step( 0,'%eax','%ebx','%ecx','%edx','12','0xd9d4d039', '4'); -round3_step( 0,'%edx','%eax','%ebx','%ecx','15','0xe6db99e5','11'); -round3_step( 0,'%ecx','%edx','%eax','%ebx', '2','0x1fa27cf8','16'); -round3_step( 1,'%ebx','%ecx','%edx','%eax', '0','0xc4ac5665','23'); - -round4_step(-1,'%eax','%ebx','%ecx','%edx', '7','0xf4292244', '6'); -round4_step( 0,'%edx','%eax','%ebx','%ecx','14','0x432aff97','10'); -round4_step( 0,'%ecx','%edx','%eax','%ebx', '5','0xab9423a7','15'); -round4_step( 0,'%ebx','%ecx','%edx','%eax','12','0xfc93a039','21'); -round4_step( 0,'%eax','%ebx','%ecx','%edx', '3','0x655b59c3', '6'); -round4_step( 0,'%edx','%eax','%ebx','%ecx','10','0x8f0ccc92','10'); -round4_step( 0,'%ecx','%edx','%eax','%ebx', '1','0xffeff47d','15'); -round4_step( 0,'%ebx','%ecx','%edx','%eax', '8','0x85845dd1','21'); -round4_step( 0,'%eax','%ebx','%ecx','%edx','15','0x6fa87e4f', '6'); -round4_step( 0,'%edx','%eax','%ebx','%ecx', '6','0xfe2ce6e0','10'); -round4_step( 0,'%ecx','%edx','%eax','%ebx','13','0xa3014314','15'); -round4_step( 0,'%ebx','%ecx','%edx','%eax', '4','0x4e0811a1','21'); -round4_step( 0,'%eax','%ebx','%ecx','%edx','11','0xf7537e82', '6'); -round4_step( 0,'%edx','%eax','%ebx','%ecx', '2','0xbd3af235','10'); -round4_step( 0,'%ecx','%edx','%eax','%ebx', '9','0x2ad7d2bb','15'); -round4_step( 1,'%ebx','%ecx','%edx','%eax', '0','0xeb86d391','21'); -$code .= <A = A - mov %ebx, 1*4(%rbp) # ctx->B = B - mov %ecx, 2*4(%rbp) # ctx->C = C - mov %edx, 3*4(%rbp) # ctx->D = D - - mov (%rsp),%r15 - mov 8(%rsp),%r14 - mov 16(%rsp),%r12 - mov 24(%rsp),%rbx - mov 32(%rsp),%rbp - add \$40,%rsp -.Lepilogue: - ret -.size md5_block_asm_data_order,.-md5_block_asm_data_order -EOF - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -my $rec="%rcx"; -my $frame="%rdx"; -my $context="%r8"; -my $disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type se_handler,\@abi-omnipotent -.align 16 -se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lprologue(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lprologue - jb .Lin_prologue - - mov 152($context),%rax # pull context->Rsp - - lea .Lepilogue(%rip),%r10 - cmp %r10,%rbx # context->Rip>=.Lepilogue - jae .Lin_prologue - - lea 40(%rax),%rax - - mov -8(%rax),%rbp - mov -16(%rax),%rbx - mov -24(%rax),%r12 - mov -32(%rax),%r14 - mov -40(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size se_handler,.-se_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_md5_block_asm_data_order - .rva .LSEH_end_md5_block_asm_data_order - .rva .LSEH_info_md5_block_asm_data_order - -.section .xdata -.align 8 -.LSEH_info_md5_block_asm_data_order: - .byte 9,0,0,0 - .rva se_handler -___ -} - -print $code; - -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/md5/md5_locl.h b/drivers/builtin_openssl2/crypto/md5/md5_locl.h index 5f6f2fdedd..82e69218da 100644 --- a/drivers/builtin_openssl2/crypto/md5/md5_locl.h +++ b/drivers/builtin_openssl2/crypto/md5/md5_locl.h @@ -71,6 +71,8 @@ # define md5_block_data_order md5_block_asm_data_order # elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64) # define md5_block_data_order md5_block_asm_data_order +# elif defined(__sparc) || defined(__sparc__) +# define md5_block_data_order md5_block_asm_data_order # endif #endif diff --git a/drivers/builtin_openssl2/crypto/md5/md5test.c b/drivers/builtin_openssl2/crypto/md5/md5test.c deleted file mode 100644 index 0d0ab2d7d1..0000000000 --- a/drivers/builtin_openssl2/crypto/md5/md5test.c +++ /dev/null @@ -1,138 +0,0 @@ -/* crypto/md5/md5test.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -#include "../e_os.h" - -#ifdef OPENSSL_NO_MD5 -int main(int argc, char *argv[]) -{ - printf("No MD5 support\n"); - return (0); -} -#else -# include -# include - -static char *test[] = { - "", - "a", - "abc", - "message digest", - "abcdefghijklmnopqrstuvwxyz", - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", - "12345678901234567890123456789012345678901234567890123456789012345678901234567890", - NULL, -}; - -static char *ret[] = { - "d41d8cd98f00b204e9800998ecf8427e", - "0cc175b9c0f1b6a831c399e269772661", - "900150983cd24fb0d6963f7d28e17f72", - "f96b697d7cb7938d525a2f31aaf161d0", - "c3fcd3d76192e4007dfb496cca67e13b", - "d174ab98d277d9f5a5611c2c9f419d9f", - "57edf4a22be3c955ac49da2e2107b67a", -}; - -static char *pt(unsigned char *md); -int main(int argc, char *argv[]) -{ - int i, err = 0; - char **P, **R; - char *p; - unsigned char md[MD5_DIGEST_LENGTH]; - - P = test; - R = ret; - i = 1; - while (*P != NULL) { - EVP_Digest(&(P[0][0]), strlen((char *)*P), md, NULL, EVP_md5(), NULL); - p = pt(md); - if (strcmp(p, (char *)*R) != 0) { - printf("error calculating MD5 on '%s'\n", *P); - printf("got %s instead of %s\n", p, *R); - err++; - } else - printf("test %d ok\n", i); - i++; - R++; - P++; - } - -# ifdef OPENSSL_SYS_NETWARE - if (err) - printf("ERROR: %d\n", err); -# endif - EXIT(err); - return (0); -} - -static char *pt(unsigned char *md) -{ - int i; - static char buf[80]; - - for (i = 0; i < MD5_DIGEST_LENGTH; i++) - sprintf(&(buf[i * 2]), "%02x", md[i]); - return (buf); -} -#endif diff --git a/drivers/builtin_openssl2/crypto/mdc2/mdc2test.c b/drivers/builtin_openssl2/crypto/mdc2/mdc2test.c deleted file mode 100644 index 8416252f80..0000000000 --- a/drivers/builtin_openssl2/crypto/mdc2/mdc2test.c +++ /dev/null @@ -1,146 +0,0 @@ -/* crypto/mdc2/mdc2test.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -#include "../e_os.h" - -#if defined(OPENSSL_NO_DES) && !defined(OPENSSL_NO_MDC2) -# define OPENSSL_NO_MDC2 -#endif - -#ifdef OPENSSL_NO_MDC2 -int main(int argc, char *argv[]) -{ - printf("No MDC2 support\n"); - return (0); -} -#else -# include -# include - -# ifdef CHARSET_EBCDIC -# include -# endif - -static unsigned char pad1[16] = { - 0x42, 0xE5, 0x0C, 0xD2, 0x24, 0xBA, 0xCE, 0xBA, - 0x76, 0x0B, 0xDD, 0x2B, 0xD4, 0x09, 0x28, 0x1A -}; - -static unsigned char pad2[16] = { - 0x2E, 0x46, 0x79, 0xB5, 0xAD, 0xD9, 0xCA, 0x75, - 0x35, 0xD8, 0x7A, 0xFE, 0xAB, 0x33, 0xBE, 0xE2 -}; - -int main(int argc, char *argv[]) -{ - int ret = 0; - unsigned char md[MDC2_DIGEST_LENGTH]; - int i; - EVP_MD_CTX c; - static char *text = "Now is the time for all "; - -# ifdef CHARSET_EBCDIC - ebcdic2ascii(text, text, strlen(text)); -# endif - - EVP_MD_CTX_init(&c); - EVP_DigestInit_ex(&c, EVP_mdc2(), NULL); - EVP_DigestUpdate(&c, (unsigned char *)text, strlen(text)); - EVP_DigestFinal_ex(&c, &(md[0]), NULL); - - if (memcmp(md, pad1, MDC2_DIGEST_LENGTH) != 0) { - for (i = 0; i < MDC2_DIGEST_LENGTH; i++) - printf("%02X", md[i]); - printf(" <- generated\n"); - for (i = 0; i < MDC2_DIGEST_LENGTH; i++) - printf("%02X", pad1[i]); - printf(" <- correct\n"); - ret = 1; - } else - printf("pad1 - ok\n"); - - EVP_DigestInit_ex(&c, EVP_mdc2(), NULL); - /* FIXME: use a ctl function? */ - ((MDC2_CTX *)c.md_data)->pad_type = 2; - EVP_DigestUpdate(&c, (unsigned char *)text, strlen(text)); - EVP_DigestFinal_ex(&c, &(md[0]), NULL); - - if (memcmp(md, pad2, MDC2_DIGEST_LENGTH) != 0) { - for (i = 0; i < MDC2_DIGEST_LENGTH; i++) - printf("%02X", md[i]); - printf(" <- generated\n"); - for (i = 0; i < MDC2_DIGEST_LENGTH; i++) - printf("%02X", pad2[i]); - printf(" <- correct\n"); - ret = 1; - } else - printf("pad2 - ok\n"); - - EVP_MD_CTX_cleanup(&c); -# ifdef OPENSSL_SYS_NETWARE - if (ret) - printf("ERROR: %d\n", ret); -# endif - EXIT(ret); - return (ret); -} -#endif diff --git a/drivers/builtin_openssl2/crypto/modes/asm/ghash-alpha.pl b/drivers/builtin_openssl2/crypto/modes/asm/ghash-alpha.pl deleted file mode 100644 index aa36029386..0000000000 --- a/drivers/builtin_openssl2/crypto/modes/asm/ghash-alpha.pl +++ /dev/null @@ -1,460 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# March 2010 -# -# The module implements "4-bit" GCM GHASH function and underlying -# single multiplication operation in GF(2^128). "4-bit" means that it -# uses 256 bytes per-key table [+128 bytes shared table]. Even though -# loops are aggressively modulo-scheduled in respect to references to -# Htbl and Z.hi updates for 8 cycles per byte, measured performance is -# ~12 cycles per processed byte on 21264 CPU. It seems to be a dynamic -# scheduling "glitch," because uprofile(1) indicates uniform sample -# distribution, as if all instruction bundles execute in 1.5 cycles. -# Meaning that it could have been even faster, yet 12 cycles is ~60% -# better than gcc-generated code and ~80% than code generated by vendor -# compiler. - -$cnt="v0"; # $0 -$t0="t0"; -$t1="t1"; -$t2="t2"; -$Thi0="t3"; # $4 -$Tlo0="t4"; -$Thi1="t5"; -$Tlo1="t6"; -$rem="t7"; # $8 -################# -$Xi="a0"; # $16, input argument block -$Htbl="a1"; -$inp="a2"; -$len="a3"; -$nlo="a4"; # $20 -$nhi="a5"; -$Zhi="t8"; -$Zlo="t9"; -$Xhi="t10"; # $24 -$Xlo="t11"; -$remp="t12"; -$rem_4bit="AT"; # $28 - -{ my $N; - sub loop() { - - $N++; -$code.=<<___; -.align 4 - extbl $Xlo,7,$nlo - and $nlo,0xf0,$nhi - sll $nlo,4,$nlo - and $nlo,0xf0,$nlo - - addq $nlo,$Htbl,$nlo - ldq $Zlo,8($nlo) - addq $nhi,$Htbl,$nhi - ldq $Zhi,0($nlo) - - and $Zlo,0x0f,$remp - sll $Zhi,60,$t0 - lda $cnt,6(zero) - extbl $Xlo,6,$nlo - - ldq $Tlo1,8($nhi) - s8addq $remp,$rem_4bit,$remp - ldq $Thi1,0($nhi) - srl $Zlo,4,$Zlo - - ldq $rem,0($remp) - srl $Zhi,4,$Zhi - xor $t0,$Zlo,$Zlo - and $nlo,0xf0,$nhi - - xor $Tlo1,$Zlo,$Zlo - sll $nlo,4,$nlo - xor $Thi1,$Zhi,$Zhi - and $nlo,0xf0,$nlo - - addq $nlo,$Htbl,$nlo - ldq $Tlo0,8($nlo) - addq $nhi,$Htbl,$nhi - ldq $Thi0,0($nlo) - -.Looplo$N: - and $Zlo,0x0f,$remp - sll $Zhi,60,$t0 - subq $cnt,1,$cnt - srl $Zlo,4,$Zlo - - ldq $Tlo1,8($nhi) - xor $rem,$Zhi,$Zhi - ldq $Thi1,0($nhi) - s8addq $remp,$rem_4bit,$remp - - ldq $rem,0($remp) - srl $Zhi,4,$Zhi - xor $t0,$Zlo,$Zlo - extbl $Xlo,$cnt,$nlo - - and $nlo,0xf0,$nhi - xor $Thi0,$Zhi,$Zhi - xor $Tlo0,$Zlo,$Zlo - sll $nlo,4,$nlo - - - and $Zlo,0x0f,$remp - sll $Zhi,60,$t0 - and $nlo,0xf0,$nlo - srl $Zlo,4,$Zlo - - s8addq $remp,$rem_4bit,$remp - xor $rem,$Zhi,$Zhi - addq $nlo,$Htbl,$nlo - addq $nhi,$Htbl,$nhi - - ldq $rem,0($remp) - srl $Zhi,4,$Zhi - ldq $Tlo0,8($nlo) - xor $t0,$Zlo,$Zlo - - xor $Tlo1,$Zlo,$Zlo - xor $Thi1,$Zhi,$Zhi - ldq $Thi0,0($nlo) - bne $cnt,.Looplo$N - - - and $Zlo,0x0f,$remp - sll $Zhi,60,$t0 - lda $cnt,7(zero) - srl $Zlo,4,$Zlo - - ldq $Tlo1,8($nhi) - xor $rem,$Zhi,$Zhi - ldq $Thi1,0($nhi) - s8addq $remp,$rem_4bit,$remp - - ldq $rem,0($remp) - srl $Zhi,4,$Zhi - xor $t0,$Zlo,$Zlo - extbl $Xhi,$cnt,$nlo - - and $nlo,0xf0,$nhi - xor $Thi0,$Zhi,$Zhi - xor $Tlo0,$Zlo,$Zlo - sll $nlo,4,$nlo - - and $Zlo,0x0f,$remp - sll $Zhi,60,$t0 - and $nlo,0xf0,$nlo - srl $Zlo,4,$Zlo - - s8addq $remp,$rem_4bit,$remp - xor $rem,$Zhi,$Zhi - addq $nlo,$Htbl,$nlo - addq $nhi,$Htbl,$nhi - - ldq $rem,0($remp) - srl $Zhi,4,$Zhi - ldq $Tlo0,8($nlo) - xor $t0,$Zlo,$Zlo - - xor $Tlo1,$Zlo,$Zlo - xor $Thi1,$Zhi,$Zhi - ldq $Thi0,0($nlo) - unop - - -.Loophi$N: - and $Zlo,0x0f,$remp - sll $Zhi,60,$t0 - subq $cnt,1,$cnt - srl $Zlo,4,$Zlo - - ldq $Tlo1,8($nhi) - xor $rem,$Zhi,$Zhi - ldq $Thi1,0($nhi) - s8addq $remp,$rem_4bit,$remp - - ldq $rem,0($remp) - srl $Zhi,4,$Zhi - xor $t0,$Zlo,$Zlo - extbl $Xhi,$cnt,$nlo - - and $nlo,0xf0,$nhi - xor $Thi0,$Zhi,$Zhi - xor $Tlo0,$Zlo,$Zlo - sll $nlo,4,$nlo - - - and $Zlo,0x0f,$remp - sll $Zhi,60,$t0 - and $nlo,0xf0,$nlo - srl $Zlo,4,$Zlo - - s8addq $remp,$rem_4bit,$remp - xor $rem,$Zhi,$Zhi - addq $nlo,$Htbl,$nlo - addq $nhi,$Htbl,$nhi - - ldq $rem,0($remp) - srl $Zhi,4,$Zhi - ldq $Tlo0,8($nlo) - xor $t0,$Zlo,$Zlo - - xor $Tlo1,$Zlo,$Zlo - xor $Thi1,$Zhi,$Zhi - ldq $Thi0,0($nlo) - bne $cnt,.Loophi$N - - - and $Zlo,0x0f,$remp - sll $Zhi,60,$t0 - srl $Zlo,4,$Zlo - - ldq $Tlo1,8($nhi) - xor $rem,$Zhi,$Zhi - ldq $Thi1,0($nhi) - s8addq $remp,$rem_4bit,$remp - - ldq $rem,0($remp) - srl $Zhi,4,$Zhi - xor $t0,$Zlo,$Zlo - - xor $Tlo0,$Zlo,$Zlo - xor $Thi0,$Zhi,$Zhi - - and $Zlo,0x0f,$remp - sll $Zhi,60,$t0 - srl $Zlo,4,$Zlo - - s8addq $remp,$rem_4bit,$remp - xor $rem,$Zhi,$Zhi - - ldq $rem,0($remp) - srl $Zhi,4,$Zhi - xor $Tlo1,$Zlo,$Zlo - xor $Thi1,$Zhi,$Zhi - xor $t0,$Zlo,$Zlo - xor $rem,$Zhi,$Zhi -___ -}} - -$code=<<___; -#ifdef __linux__ -#include -#else -#include -#include -#endif - -.text - -.set noat -.set noreorder -.globl gcm_gmult_4bit -.align 4 -.ent gcm_gmult_4bit -gcm_gmult_4bit: - .frame sp,0,ra - .prologue 0 - - ldq $Xlo,8($Xi) - ldq $Xhi,0($Xi) - - bsr $t0,picmeup - nop -___ - - &loop(); - -$code.=<<___; - srl $Zlo,24,$t0 # byte swap - srl $Zlo,8,$t1 - - sll $Zlo,8,$t2 - sll $Zlo,24,$Zlo - zapnot $t0,0x11,$t0 - zapnot $t1,0x22,$t1 - - zapnot $Zlo,0x88,$Zlo - or $t0,$t1,$t0 - zapnot $t2,0x44,$t2 - - or $Zlo,$t0,$Zlo - srl $Zhi,24,$t0 - srl $Zhi,8,$t1 - - or $Zlo,$t2,$Zlo - sll $Zhi,8,$t2 - sll $Zhi,24,$Zhi - - srl $Zlo,32,$Xlo - sll $Zlo,32,$Zlo - - zapnot $t0,0x11,$t0 - zapnot $t1,0x22,$t1 - or $Zlo,$Xlo,$Xlo - - zapnot $Zhi,0x88,$Zhi - or $t0,$t1,$t0 - zapnot $t2,0x44,$t2 - - or $Zhi,$t0,$Zhi - or $Zhi,$t2,$Zhi - - srl $Zhi,32,$Xhi - sll $Zhi,32,$Zhi - - or $Zhi,$Xhi,$Xhi - stq $Xlo,8($Xi) - stq $Xhi,0($Xi) - - ret (ra) -.end gcm_gmult_4bit -___ - -$inhi="s0"; -$inlo="s1"; - -$code.=<<___; -.globl gcm_ghash_4bit -.align 4 -.ent gcm_ghash_4bit -gcm_ghash_4bit: - lda sp,-32(sp) - stq ra,0(sp) - stq s0,8(sp) - stq s1,16(sp) - .mask 0x04000600,-32 - .frame sp,32,ra - .prologue 0 - - ldq_u $inhi,0($inp) - ldq_u $Thi0,7($inp) - ldq_u $inlo,8($inp) - ldq_u $Tlo0,15($inp) - ldq $Xhi,0($Xi) - ldq $Xlo,8($Xi) - - bsr $t0,picmeup - nop - -.Louter: - extql $inhi,$inp,$inhi - extqh $Thi0,$inp,$Thi0 - or $inhi,$Thi0,$inhi - lda $inp,16($inp) - - extql $inlo,$inp,$inlo - extqh $Tlo0,$inp,$Tlo0 - or $inlo,$Tlo0,$inlo - subq $len,16,$len - - xor $Xlo,$inlo,$Xlo - xor $Xhi,$inhi,$Xhi -___ - - &loop(); - -$code.=<<___; - srl $Zlo,24,$t0 # byte swap - srl $Zlo,8,$t1 - - sll $Zlo,8,$t2 - sll $Zlo,24,$Zlo - zapnot $t0,0x11,$t0 - zapnot $t1,0x22,$t1 - - zapnot $Zlo,0x88,$Zlo - or $t0,$t1,$t0 - zapnot $t2,0x44,$t2 - - or $Zlo,$t0,$Zlo - srl $Zhi,24,$t0 - srl $Zhi,8,$t1 - - or $Zlo,$t2,$Zlo - sll $Zhi,8,$t2 - sll $Zhi,24,$Zhi - - srl $Zlo,32,$Xlo - sll $Zlo,32,$Zlo - beq $len,.Ldone - - zapnot $t0,0x11,$t0 - zapnot $t1,0x22,$t1 - or $Zlo,$Xlo,$Xlo - ldq_u $inhi,0($inp) - - zapnot $Zhi,0x88,$Zhi - or $t0,$t1,$t0 - zapnot $t2,0x44,$t2 - ldq_u $Thi0,7($inp) - - or $Zhi,$t0,$Zhi - or $Zhi,$t2,$Zhi - ldq_u $inlo,8($inp) - ldq_u $Tlo0,15($inp) - - srl $Zhi,32,$Xhi - sll $Zhi,32,$Zhi - - or $Zhi,$Xhi,$Xhi - br zero,.Louter - -.Ldone: - zapnot $t0,0x11,$t0 - zapnot $t1,0x22,$t1 - or $Zlo,$Xlo,$Xlo - - zapnot $Zhi,0x88,$Zhi - or $t0,$t1,$t0 - zapnot $t2,0x44,$t2 - - or $Zhi,$t0,$Zhi - or $Zhi,$t2,$Zhi - - srl $Zhi,32,$Xhi - sll $Zhi,32,$Zhi - - or $Zhi,$Xhi,$Xhi - - stq $Xlo,8($Xi) - stq $Xhi,0($Xi) - - .set noreorder - /*ldq ra,0(sp)*/ - ldq s0,8(sp) - ldq s1,16(sp) - lda sp,32(sp) - ret (ra) -.end gcm_ghash_4bit - -.align 4 -.ent picmeup -picmeup: - .frame sp,0,$t0 - .prologue 0 - br $rem_4bit,.Lpic -.Lpic: lda $rem_4bit,12($rem_4bit) - ret ($t0) -.end picmeup - nop -rem_4bit: - .long 0,0x0000<<16, 0,0x1C20<<16, 0,0x3840<<16, 0,0x2460<<16 - .long 0,0x7080<<16, 0,0x6CA0<<16, 0,0x48C0<<16, 0,0x54E0<<16 - .long 0,0xE100<<16, 0,0xFD20<<16, 0,0xD940<<16, 0,0xC560<<16 - .long 0,0x9180<<16, 0,0x8DA0<<16, 0,0xA9C0<<16, 0,0xB5E0<<16 -.ascii "GHASH for Alpha, CRYPTOGAMS by " -.align 4 - -___ -$output=shift and open STDOUT,">$output"; -print $code; -close STDOUT; - diff --git a/drivers/builtin_openssl2/crypto/modes/asm/ghash-armv4.pl b/drivers/builtin_openssl2/crypto/modes/asm/ghash-armv4.pl deleted file mode 100644 index e46f8e34da..0000000000 --- a/drivers/builtin_openssl2/crypto/modes/asm/ghash-armv4.pl +++ /dev/null @@ -1,429 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# April 2010 -# -# The module implements "4-bit" GCM GHASH function and underlying -# single multiplication operation in GF(2^128). "4-bit" means that it -# uses 256 bytes per-key table [+32 bytes shared table]. There is no -# experimental performance data available yet. The only approximation -# that can be made at this point is based on code size. Inner loop is -# 32 instructions long and on single-issue core should execute in <40 -# cycles. Having verified that gcc 3.4 didn't unroll corresponding -# loop, this assembler loop body was found to be ~3x smaller than -# compiler-generated one... -# -# July 2010 -# -# Rescheduling for dual-issue pipeline resulted in 8.5% improvement on -# Cortex A8 core and ~25 cycles per processed byte (which was observed -# to be ~3 times faster than gcc-generated code:-) -# -# February 2011 -# -# Profiler-assisted and platform-specific optimization resulted in 7% -# improvement on Cortex A8 core and ~23.5 cycles per byte. -# -# March 2011 -# -# Add NEON implementation featuring polynomial multiplication, i.e. no -# lookup tables involved. On Cortex A8 it was measured to process one -# byte in 15 cycles or 55% faster than integer-only code. - -# ==================================================================== -# Note about "528B" variant. In ARM case it makes lesser sense to -# implement it for following reasons: -# -# - performance improvement won't be anywhere near 50%, because 128- -# bit shift operation is neatly fused with 128-bit xor here, and -# "538B" variant would eliminate only 4-5 instructions out of 32 -# in the inner loop (meaning that estimated improvement is ~15%); -# - ARM-based systems are often embedded ones and extra memory -# consumption might be unappreciated (for so little improvement); -# -# Byte order [in]dependence. ========================================= -# -# Caller is expected to maintain specific *dword* order in Htable, -# namely with *least* significant dword of 128-bit value at *lower* -# address. This differs completely from C code and has everything to -# do with ldm instruction and order in which dwords are "consumed" by -# algorithm. *Byte* order within these dwords in turn is whatever -# *native* byte order on current platform. See gcm128.c for working -# example... - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -$Xi="r0"; # argument block -$Htbl="r1"; -$inp="r2"; -$len="r3"; - -$Zll="r4"; # variables -$Zlh="r5"; -$Zhl="r6"; -$Zhh="r7"; -$Tll="r8"; -$Tlh="r9"; -$Thl="r10"; -$Thh="r11"; -$nlo="r12"; -################# r13 is stack pointer -$nhi="r14"; -################# r15 is program counter - -$rem_4bit=$inp; # used in gcm_gmult_4bit -$cnt=$len; - -sub Zsmash() { - my $i=12; - my @args=@_; - for ($Zll,$Zlh,$Zhl,$Zhh) { - $code.=<<___; -#if __ARM_ARCH__>=7 && defined(__ARMEL__) - rev $_,$_ - str $_,[$Xi,#$i] -#elif defined(__ARMEB__) - str $_,[$Xi,#$i] -#else - mov $Tlh,$_,lsr#8 - strb $_,[$Xi,#$i+3] - mov $Thl,$_,lsr#16 - strb $Tlh,[$Xi,#$i+2] - mov $Thh,$_,lsr#24 - strb $Thl,[$Xi,#$i+1] - strb $Thh,[$Xi,#$i] -#endif -___ - $code.="\t".shift(@args)."\n"; - $i-=4; - } -} - -$code=<<___; -#include "arm_arch.h" - -.text -.code 32 - -.type rem_4bit,%object -.align 5 -rem_4bit: -.short 0x0000,0x1C20,0x3840,0x2460 -.short 0x7080,0x6CA0,0x48C0,0x54E0 -.short 0xE100,0xFD20,0xD940,0xC560 -.short 0x9180,0x8DA0,0xA9C0,0xB5E0 -.size rem_4bit,.-rem_4bit - -.type rem_4bit_get,%function -rem_4bit_get: - sub $rem_4bit,pc,#8 - sub $rem_4bit,$rem_4bit,#32 @ &rem_4bit - b .Lrem_4bit_got - nop -.size rem_4bit_get,.-rem_4bit_get - -.global gcm_ghash_4bit -.type gcm_ghash_4bit,%function -gcm_ghash_4bit: - sub r12,pc,#8 - add $len,$inp,$len @ $len to point at the end - stmdb sp!,{r3-r11,lr} @ save $len/end too - sub r12,r12,#48 @ &rem_4bit - - ldmia r12,{r4-r11} @ copy rem_4bit ... - stmdb sp!,{r4-r11} @ ... to stack - - ldrb $nlo,[$inp,#15] - ldrb $nhi,[$Xi,#15] -.Louter: - eor $nlo,$nlo,$nhi - and $nhi,$nlo,#0xf0 - and $nlo,$nlo,#0x0f - mov $cnt,#14 - - add $Zhh,$Htbl,$nlo,lsl#4 - ldmia $Zhh,{$Zll-$Zhh} @ load Htbl[nlo] - add $Thh,$Htbl,$nhi - ldrb $nlo,[$inp,#14] - - and $nhi,$Zll,#0xf @ rem - ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi] - add $nhi,$nhi,$nhi - eor $Zll,$Tll,$Zll,lsr#4 - ldrh $Tll,[sp,$nhi] @ rem_4bit[rem] - eor $Zll,$Zll,$Zlh,lsl#28 - ldrb $nhi,[$Xi,#14] - eor $Zlh,$Tlh,$Zlh,lsr#4 - eor $Zlh,$Zlh,$Zhl,lsl#28 - eor $Zhl,$Thl,$Zhl,lsr#4 - eor $Zhl,$Zhl,$Zhh,lsl#28 - eor $Zhh,$Thh,$Zhh,lsr#4 - eor $nlo,$nlo,$nhi - and $nhi,$nlo,#0xf0 - and $nlo,$nlo,#0x0f - eor $Zhh,$Zhh,$Tll,lsl#16 - -.Linner: - add $Thh,$Htbl,$nlo,lsl#4 - and $nlo,$Zll,#0xf @ rem - subs $cnt,$cnt,#1 - add $nlo,$nlo,$nlo - ldmia $Thh,{$Tll-$Thh} @ load Htbl[nlo] - eor $Zll,$Tll,$Zll,lsr#4 - eor $Zll,$Zll,$Zlh,lsl#28 - eor $Zlh,$Tlh,$Zlh,lsr#4 - eor $Zlh,$Zlh,$Zhl,lsl#28 - ldrh $Tll,[sp,$nlo] @ rem_4bit[rem] - eor $Zhl,$Thl,$Zhl,lsr#4 - ldrplb $nlo,[$inp,$cnt] - eor $Zhl,$Zhl,$Zhh,lsl#28 - eor $Zhh,$Thh,$Zhh,lsr#4 - - add $Thh,$Htbl,$nhi - and $nhi,$Zll,#0xf @ rem - eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem] - add $nhi,$nhi,$nhi - ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi] - eor $Zll,$Tll,$Zll,lsr#4 - ldrplb $Tll,[$Xi,$cnt] - eor $Zll,$Zll,$Zlh,lsl#28 - eor $Zlh,$Tlh,$Zlh,lsr#4 - ldrh $Tlh,[sp,$nhi] - eor $Zlh,$Zlh,$Zhl,lsl#28 - eor $Zhl,$Thl,$Zhl,lsr#4 - eor $Zhl,$Zhl,$Zhh,lsl#28 - eorpl $nlo,$nlo,$Tll - eor $Zhh,$Thh,$Zhh,lsr#4 - andpl $nhi,$nlo,#0xf0 - andpl $nlo,$nlo,#0x0f - eor $Zhh,$Zhh,$Tlh,lsl#16 @ ^= rem_4bit[rem] - bpl .Linner - - ldr $len,[sp,#32] @ re-load $len/end - add $inp,$inp,#16 - mov $nhi,$Zll -___ - &Zsmash("cmp\t$inp,$len","ldrneb\t$nlo,[$inp,#15]"); -$code.=<<___; - bne .Louter - - add sp,sp,#36 -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r11,pc} -#else - ldmia sp!,{r4-r11,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -#endif -.size gcm_ghash_4bit,.-gcm_ghash_4bit - -.global gcm_gmult_4bit -.type gcm_gmult_4bit,%function -gcm_gmult_4bit: - stmdb sp!,{r4-r11,lr} - ldrb $nlo,[$Xi,#15] - b rem_4bit_get -.Lrem_4bit_got: - and $nhi,$nlo,#0xf0 - and $nlo,$nlo,#0x0f - mov $cnt,#14 - - add $Zhh,$Htbl,$nlo,lsl#4 - ldmia $Zhh,{$Zll-$Zhh} @ load Htbl[nlo] - ldrb $nlo,[$Xi,#14] - - add $Thh,$Htbl,$nhi - and $nhi,$Zll,#0xf @ rem - ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi] - add $nhi,$nhi,$nhi - eor $Zll,$Tll,$Zll,lsr#4 - ldrh $Tll,[$rem_4bit,$nhi] @ rem_4bit[rem] - eor $Zll,$Zll,$Zlh,lsl#28 - eor $Zlh,$Tlh,$Zlh,lsr#4 - eor $Zlh,$Zlh,$Zhl,lsl#28 - eor $Zhl,$Thl,$Zhl,lsr#4 - eor $Zhl,$Zhl,$Zhh,lsl#28 - eor $Zhh,$Thh,$Zhh,lsr#4 - and $nhi,$nlo,#0xf0 - eor $Zhh,$Zhh,$Tll,lsl#16 - and $nlo,$nlo,#0x0f - -.Loop: - add $Thh,$Htbl,$nlo,lsl#4 - and $nlo,$Zll,#0xf @ rem - subs $cnt,$cnt,#1 - add $nlo,$nlo,$nlo - ldmia $Thh,{$Tll-$Thh} @ load Htbl[nlo] - eor $Zll,$Tll,$Zll,lsr#4 - eor $Zll,$Zll,$Zlh,lsl#28 - eor $Zlh,$Tlh,$Zlh,lsr#4 - eor $Zlh,$Zlh,$Zhl,lsl#28 - ldrh $Tll,[$rem_4bit,$nlo] @ rem_4bit[rem] - eor $Zhl,$Thl,$Zhl,lsr#4 - ldrplb $nlo,[$Xi,$cnt] - eor $Zhl,$Zhl,$Zhh,lsl#28 - eor $Zhh,$Thh,$Zhh,lsr#4 - - add $Thh,$Htbl,$nhi - and $nhi,$Zll,#0xf @ rem - eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem] - add $nhi,$nhi,$nhi - ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi] - eor $Zll,$Tll,$Zll,lsr#4 - eor $Zll,$Zll,$Zlh,lsl#28 - eor $Zlh,$Tlh,$Zlh,lsr#4 - ldrh $Tll,[$rem_4bit,$nhi] @ rem_4bit[rem] - eor $Zlh,$Zlh,$Zhl,lsl#28 - eor $Zhl,$Thl,$Zhl,lsr#4 - eor $Zhl,$Zhl,$Zhh,lsl#28 - eor $Zhh,$Thh,$Zhh,lsr#4 - andpl $nhi,$nlo,#0xf0 - andpl $nlo,$nlo,#0x0f - eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem] - bpl .Loop -___ - &Zsmash(); -$code.=<<___; -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r11,pc} -#else - ldmia sp!,{r4-r11,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -#endif -.size gcm_gmult_4bit,.-gcm_gmult_4bit -___ -{ -my $cnt=$Htbl; # $Htbl is used once in the very beginning - -my ($Hhi, $Hlo, $Zo, $T, $xi, $mod) = map("d$_",(0..7)); -my ($Qhi, $Qlo, $Z, $R, $zero, $Qpost, $IN) = map("q$_",(8..15)); - -# Z:Zo keeps 128-bit result shifted by 1 to the right, with bottom bit -# in Zo. Or should I say "top bit", because GHASH is specified in -# reverse bit order? Otherwise straightforward 128-bt H by one input -# byte multiplication and modulo-reduction, times 16. - -sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } -sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } -sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; } - -$code.=<<___; -#if __ARM_ARCH__>=7 -.fpu neon - -.global gcm_gmult_neon -.type gcm_gmult_neon,%function -.align 4 -gcm_gmult_neon: - sub $Htbl,#16 @ point at H in GCM128_CTX - vld1.64 `&Dhi("$IN")`,[$Xi,:64]!@ load Xi - vmov.i32 $mod,#0xe1 @ our irreducible polynomial - vld1.64 `&Dlo("$IN")`,[$Xi,:64]! - vshr.u64 $mod,#32 - vldmia $Htbl,{$Hhi-$Hlo} @ load H - veor $zero,$zero -#ifdef __ARMEL__ - vrev64.8 $IN,$IN -#endif - veor $Qpost,$Qpost - veor $R,$R - mov $cnt,#16 - veor $Z,$Z - mov $len,#16 - veor $Zo,$Zo - vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte - b .Linner_neon -.size gcm_gmult_neon,.-gcm_gmult_neon - -.global gcm_ghash_neon -.type gcm_ghash_neon,%function -.align 4 -gcm_ghash_neon: - vld1.64 `&Dhi("$Z")`,[$Xi,:64]! @ load Xi - vmov.i32 $mod,#0xe1 @ our irreducible polynomial - vld1.64 `&Dlo("$Z")`,[$Xi,:64]! - vshr.u64 $mod,#32 - vldmia $Xi,{$Hhi-$Hlo} @ load H - veor $zero,$zero - nop -#ifdef __ARMEL__ - vrev64.8 $Z,$Z -#endif -.Louter_neon: - vld1.64 `&Dhi($IN)`,[$inp]! @ load inp - veor $Qpost,$Qpost - vld1.64 `&Dlo($IN)`,[$inp]! - veor $R,$R - mov $cnt,#16 -#ifdef __ARMEL__ - vrev64.8 $IN,$IN -#endif - veor $Zo,$Zo - veor $IN,$Z @ inp^=Xi - veor $Z,$Z - vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte -.Linner_neon: - subs $cnt,$cnt,#1 - vmull.p8 $Qlo,$Hlo,$xi @ H.lo·Xi[i] - vmull.p8 $Qhi,$Hhi,$xi @ H.hi·Xi[i] - vext.8 $IN,$zero,#1 @ IN>>=8 - - veor $Z,$Qpost @ modulo-scheduled part - vshl.i64 `&Dlo("$R")`,#48 - vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte - veor $T,`&Dlo("$Qlo")`,`&Dlo("$Z")` - - veor `&Dhi("$Z")`,`&Dlo("$R")` - vuzp.8 $Qlo,$Qhi - vsli.8 $Zo,$T,#1 @ compose the "carry" byte - vext.8 $Z,$zero,#1 @ Z>>=8 - - vmull.p8 $R,$Zo,$mod @ "carry"·0xe1 - vshr.u8 $Zo,$T,#7 @ save Z's bottom bit - vext.8 $Qpost,$Qlo,$zero,#1 @ Qlo>>=8 - veor $Z,$Qhi - bne .Linner_neon - - veor $Z,$Qpost @ modulo-scheduled artefact - vshl.i64 `&Dlo("$R")`,#48 - veor `&Dhi("$Z")`,`&Dlo("$R")` - - @ finalization, normalize Z:Zo - vand $Zo,$mod @ suffices to mask the bit - vshr.u64 `&Dhi(&Q("$Zo"))`,`&Dlo("$Z")`,#63 - vshl.i64 $Z,#1 - subs $len,#16 - vorr $Z,`&Q("$Zo")` @ Z=Z:Zo<<1 - bne .Louter_neon - -#ifdef __ARMEL__ - vrev64.8 $Z,$Z -#endif - sub $Xi,#16 - vst1.64 `&Dhi("$Z")`,[$Xi,:64]! @ write out Xi - vst1.64 `&Dlo("$Z")`,[$Xi,:64] - - bx lr -.size gcm_ghash_neon,.-gcm_ghash_neon -#endif -___ -} -$code.=<<___; -.asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by " -.align 2 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; -close STDOUT; # enforce flush diff --git a/drivers/builtin_openssl2/crypto/modes/asm/ghash-ia64.pl b/drivers/builtin_openssl2/crypto/modes/asm/ghash-ia64.pl deleted file mode 100755 index 0354c95444..0000000000 --- a/drivers/builtin_openssl2/crypto/modes/asm/ghash-ia64.pl +++ /dev/null @@ -1,463 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# March 2010 -# -# The module implements "4-bit" GCM GHASH function and underlying -# single multiplication operation in GF(2^128). "4-bit" means that it -# uses 256 bytes per-key table [+128 bytes shared table]. Streamed -# GHASH performance was measured to be 6.67 cycles per processed byte -# on Itanium 2, which is >90% better than Microsoft compiler generated -# code. To anchor to something else sha1-ia64.pl module processes one -# byte in 5.7 cycles. On Itanium GHASH should run at ~8.5 cycles per -# byte. - -# September 2010 -# -# It was originally thought that it makes lesser sense to implement -# "528B" variant on Itanium 2 for following reason. Because number of -# functional units is naturally limited, it appeared impossible to -# implement "528B" loop in 4 cycles, only in 5. This would mean that -# theoretically performance improvement couldn't be more than 20%. -# But occasionally you prove yourself wrong:-) I figured out a way to -# fold couple of instructions and having freed yet another instruction -# slot by unrolling the loop... Resulting performance is 4.45 cycles -# per processed byte and 50% better than "256B" version. On original -# Itanium performance should remain the same as the "256B" version, -# i.e. ~8.5 cycles. - -$output=shift and (open STDOUT,">$output" or die "can't open $output: $!"); - -if ($^O eq "hpux") { - $ADDP="addp4"; - for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); } -} else { $ADDP="add"; } -for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/); - $big_endian=0 if (/\-DL_ENDIAN/); } -if (!defined($big_endian)) - { $big_endian=(unpack('L',pack('N',1))==1); } - -sub loop() { -my $label=shift; -my ($p16,$p17)=(shift)?("p63","p63"):("p16","p17"); # mask references to inp - -# Loop is scheduled for 6 ticks on Itanium 2 and 8 on Itanium, i.e. -# in scalable manner;-) Naturally assuming data in L1 cache... -# Special note about 'dep' instruction, which is used to construct -# &rem_4bit[Zlo&0xf]. It works, because rem_4bit is aligned at 128 -# bytes boundary and lower 7 bits of its address are guaranteed to -# be zero. -$code.=<<___; -$label: -{ .mfi; (p18) ld8 Hlo=[Hi[1]],-8 - (p19) dep rem=Zlo,rem_4bitp,3,4 } -{ .mfi; (p19) xor Zhi=Zhi,Hhi - ($p17) xor xi[1]=xi[1],in[1] };; -{ .mfi; (p18) ld8 Hhi=[Hi[1]] - (p19) shrp Zlo=Zhi,Zlo,4 } -{ .mfi; (p19) ld8 rem=[rem] - (p18) and Hi[1]=mask0xf0,xi[2] };; -{ .mmi; ($p16) ld1 in[0]=[inp],-1 - (p18) xor Zlo=Zlo,Hlo - (p19) shr.u Zhi=Zhi,4 } -{ .mib; (p19) xor Hhi=Hhi,rem - (p18) add Hi[1]=Htbl,Hi[1] };; - -{ .mfi; (p18) ld8 Hlo=[Hi[1]],-8 - (p18) dep rem=Zlo,rem_4bitp,3,4 } -{ .mfi; (p17) shladd Hi[0]=xi[1],4,r0 - (p18) xor Zhi=Zhi,Hhi };; -{ .mfi; (p18) ld8 Hhi=[Hi[1]] - (p18) shrp Zlo=Zhi,Zlo,4 } -{ .mfi; (p18) ld8 rem=[rem] - (p17) and Hi[0]=mask0xf0,Hi[0] };; -{ .mmi; (p16) ld1 xi[0]=[Xi],-1 - (p18) xor Zlo=Zlo,Hlo - (p18) shr.u Zhi=Zhi,4 } -{ .mib; (p18) xor Hhi=Hhi,rem - (p17) add Hi[0]=Htbl,Hi[0] - br.ctop.sptk $label };; -___ -} - -$code=<<___; -.explicit -.text - -prevfs=r2; prevlc=r3; prevpr=r8; -mask0xf0=r21; -rem=r22; rem_4bitp=r23; -Xi=r24; Htbl=r25; -inp=r26; end=r27; -Hhi=r28; Hlo=r29; -Zhi=r30; Zlo=r31; - -.align 128 -.skip 16 // aligns loop body -.global gcm_gmult_4bit# -.proc gcm_gmult_4bit# -gcm_gmult_4bit: - .prologue -{ .mmi; .save ar.pfs,prevfs - alloc prevfs=ar.pfs,2,6,0,8 - $ADDP Xi=15,in0 // &Xi[15] - mov rem_4bitp=ip } -{ .mii; $ADDP Htbl=8,in1 // &Htbl[0].lo - .save ar.lc,prevlc - mov prevlc=ar.lc - .save pr,prevpr - mov prevpr=pr };; - - .body - .rotr in[3],xi[3],Hi[2] - -{ .mib; ld1 xi[2]=[Xi],-1 // Xi[15] - mov mask0xf0=0xf0 - brp.loop.imp .Loop1,.Lend1-16};; -{ .mmi; ld1 xi[1]=[Xi],-1 // Xi[14] - };; -{ .mii; shladd Hi[1]=xi[2],4,r0 - mov pr.rot=0x7<<16 - mov ar.lc=13 };; -{ .mii; and Hi[1]=mask0xf0,Hi[1] - mov ar.ec=3 - xor Zlo=Zlo,Zlo };; -{ .mii; add Hi[1]=Htbl,Hi[1] // &Htbl[nlo].lo - add rem_4bitp=rem_4bit#-gcm_gmult_4bit#,rem_4bitp - xor Zhi=Zhi,Zhi };; -___ - &loop (".Loop1",1); -$code.=<<___; -.Lend1: -{ .mib; xor Zhi=Zhi,Hhi };; // modulo-scheduling artefact -{ .mib; mux1 Zlo=Zlo,\@rev };; -{ .mib; mux1 Zhi=Zhi,\@rev };; -{ .mmi; add Hlo=9,Xi;; // ;; is here to prevent - add Hhi=1,Xi };; // pipeline flush on Itanium -{ .mib; st8 [Hlo]=Zlo - mov pr=prevpr,0x1ffff };; -{ .mib; st8 [Hhi]=Zhi - mov ar.lc=prevlc - br.ret.sptk.many b0 };; -.endp gcm_gmult_4bit# -___ - -###################################################################### -# "528B" (well, "512B" actualy) streamed GHASH -# -$Xip="in0"; -$Htbl="in1"; -$inp="in2"; -$len="in3"; -$rem_8bit="loc0"; -$mask0xff="loc1"; -($sum,$rum) = $big_endian ? ("nop.m","nop.m") : ("sum","rum"); - -sub load_htable() { - for (my $i=0;$i<8;$i++) { - $code.=<<___; -{ .mmi; ld8 r`16+2*$i+1`=[r8],16 // Htable[$i].hi - ld8 r`16+2*$i`=[r9],16 } // Htable[$i].lo -{ .mmi; ldf8 f`32+2*$i+1`=[r10],16 // Htable[`8+$i`].hi - ldf8 f`32+2*$i`=[r11],16 // Htable[`8+$i`].lo -___ - $code.=shift if (($i+$#_)==7); - $code.="\t};;\n" - } -} - -$code.=<<___; -prevsp=r3; - -.align 32 -.skip 16 // aligns loop body -.global gcm_ghash_4bit# -.proc gcm_ghash_4bit# -gcm_ghash_4bit: - .prologue -{ .mmi; .save ar.pfs,prevfs - alloc prevfs=ar.pfs,4,2,0,0 - .vframe prevsp - mov prevsp=sp - mov $rem_8bit=ip };; - .body -{ .mfi; $ADDP r8=0+0,$Htbl - $ADDP r9=0+8,$Htbl } -{ .mfi; $ADDP r10=128+0,$Htbl - $ADDP r11=128+8,$Htbl };; -___ - &load_htable( - " $ADDP $Xip=15,$Xip", # &Xi[15] - " $ADDP $len=$len,$inp", # &inp[len] - " $ADDP $inp=15,$inp", # &inp[15] - " mov $mask0xff=0xff", - " add sp=-512,sp", - " andcm sp=sp,$mask0xff", # align stack frame - " add r14=0,sp", - " add r15=8,sp"); -$code.=<<___; -{ .mmi; $sum 1<<1 // go big-endian - add r8=256+0,sp - add r9=256+8,sp } -{ .mmi; add r10=256+128+0,sp - add r11=256+128+8,sp - add $len=-17,$len };; -___ -for($i=0;$i<8;$i++) { # generate first half of Hshr4[] -my ($rlo,$rhi)=("r".eval(16+2*$i),"r".eval(16+2*$i+1)); -$code.=<<___; -{ .mmi; st8 [r8]=$rlo,16 // Htable[$i].lo - st8 [r9]=$rhi,16 // Htable[$i].hi - shrp $rlo=$rhi,$rlo,4 }//;; -{ .mmi; stf8 [r10]=f`32+2*$i`,16 // Htable[`8+$i`].lo - stf8 [r11]=f`32+2*$i+1`,16 // Htable[`8+$i`].hi - shr.u $rhi=$rhi,4 };; -{ .mmi; st8 [r14]=$rlo,16 // Htable[$i].lo>>4 - st8 [r15]=$rhi,16 }//;; // Htable[$i].hi>>4 -___ -} -$code.=<<___; -{ .mmi; ld8 r16=[r8],16 // Htable[8].lo - ld8 r17=[r9],16 };; // Htable[8].hi -{ .mmi; ld8 r18=[r8],16 // Htable[9].lo - ld8 r19=[r9],16 } // Htable[9].hi -{ .mmi; rum 1<<5 // clear um.mfh - shrp r16=r17,r16,4 };; -___ -for($i=0;$i<6;$i++) { # generate second half of Hshr4[] -$code.=<<___; -{ .mmi; ld8 r`20+2*$i`=[r8],16 // Htable[`10+$i`].lo - ld8 r`20+2*$i+1`=[r9],16 // Htable[`10+$i`].hi - shr.u r`16+2*$i+1`=r`16+2*$i+1`,4 };; -{ .mmi; st8 [r14]=r`16+2*$i`,16 // Htable[`8+$i`].lo>>4 - st8 [r15]=r`16+2*$i+1`,16 // Htable[`8+$i`].hi>>4 - shrp r`18+2*$i`=r`18+2*$i+1`,r`18+2*$i`,4 } -___ -} -$code.=<<___; -{ .mmi; shr.u r`16+2*$i+1`=r`16+2*$i+1`,4 };; -{ .mmi; st8 [r14]=r`16+2*$i`,16 // Htable[`8+$i`].lo>>4 - st8 [r15]=r`16+2*$i+1`,16 // Htable[`8+$i`].hi>>4 - shrp r`18+2*$i`=r`18+2*$i+1`,r`18+2*$i`,4 } -{ .mmi; add $Htbl=256,sp // &Htable[0] - add $rem_8bit=rem_8bit#-gcm_ghash_4bit#,$rem_8bit - shr.u r`18+2*$i+1`=r`18+2*$i+1`,4 };; -{ .mmi; st8 [r14]=r`18+2*$i` // Htable[`8+$i`].lo>>4 - st8 [r15]=r`18+2*$i+1` } // Htable[`8+$i`].hi>>4 -___ - -$in="r15"; -@xi=("r16","r17"); -@rem=("r18","r19"); -($Alo,$Ahi,$Blo,$Bhi,$Zlo,$Zhi)=("r20","r21","r22","r23","r24","r25"); -($Atbl,$Btbl)=("r26","r27"); - -$code.=<<___; # (p16) -{ .mmi; ld1 $in=[$inp],-1 //(p16) *inp-- - ld1 $xi[0]=[$Xip],-1 //(p16) *Xi-- - cmp.eq p0,p6=r0,r0 };; // clear p6 -___ -push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers - -$code.=<<___; # (p16),(p17) -{ .mmi; ld1 $xi[0]=[$Xip],-1 //(p16) *Xi-- - xor $xi[1]=$xi[1],$in };; //(p17) xi=$xi[i]^inp[i] -{ .mii; ld1 $in=[$inp],-1 //(p16) *inp-- - dep $Atbl=$xi[1],$Htbl,4,4 //(p17) &Htable[nlo].lo - and $xi[1]=-16,$xi[1] };; //(p17) nhi=xi&0xf0 -.align 32 -.LOOP: -{ .mmi; -(p6) st8 [$Xip]=$Zhi,13 - xor $Zlo=$Zlo,$Zlo - add $Btbl=$xi[1],$Htbl };; //(p17) &Htable[nhi].lo -___ -push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers - -$code.=<<___; # (p16),(p17),(p18) -{ .mmi; ld8 $Alo=[$Atbl],8 //(p18) Htable[nlo].lo,&Htable[nlo].hi - ld8 $rem[0]=[$Btbl],-256 //(p18) Htable[nhi].lo,&Hshr4[nhi].lo - xor $xi[1]=$xi[1],$in };; //(p17) xi=$xi[i]^inp[i] -{ .mfi; ld8 $Ahi=[$Atbl] //(p18) Htable[nlo].hi - dep $Atbl=$xi[1],$Htbl,4,4 } //(p17) &Htable[nlo].lo -{ .mfi; shladd $rem[0]=$rem[0],4,r0 //(p18) Htable[nhi].lo<<4 - xor $Zlo=$Zlo,$Alo };; //(p18) Z.lo^=Htable[nlo].lo -{ .mmi; ld8 $Blo=[$Btbl],8 //(p18) Hshr4[nhi].lo,&Hshr4[nhi].hi - ld1 $in=[$inp],-1 } //(p16) *inp-- -{ .mmi; xor $rem[0]=$rem[0],$Zlo //(p18) Z.lo^(Htable[nhi].lo<<4) - mov $Zhi=$Ahi //(p18) Z.hi^=Htable[nlo].hi - and $xi[1]=-16,$xi[1] };; //(p17) nhi=xi&0xf0 -{ .mmi; ld8 $Bhi=[$Btbl] //(p18) Hshr4[nhi].hi - ld1 $xi[0]=[$Xip],-1 //(p16) *Xi-- - shrp $Zlo=$Zhi,$Zlo,8 } //(p18) Z.lo=(Z.hi<<56)|(Z.lo>>8) -{ .mmi; and $rem[0]=$rem[0],$mask0xff //(p18) rem=($Zlo^(Htable[nhi].lo<<4))&0xff - add $Btbl=$xi[1],$Htbl };; //(p17) &Htable[nhi] -___ -push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers - -for ($i=1;$i<14;$i++) { -# Above and below fragments are derived from this one by removing -# unsuitable (p??) instructions. -$code.=<<___; # (p16),(p17),(p18),(p19) -{ .mmi; ld8 $Alo=[$Atbl],8 //(p18) Htable[nlo].lo,&Htable[nlo].hi - ld8 $rem[0]=[$Btbl],-256 //(p18) Htable[nhi].lo,&Hshr4[nhi].lo - shr.u $Zhi=$Zhi,8 } //(p19) Z.hi>>=8 -{ .mmi; shladd $rem[1]=$rem[1],1,$rem_8bit //(p19) &rem_8bit[rem] - xor $Zlo=$Zlo,$Blo //(p19) Z.lo^=Hshr4[nhi].lo - xor $xi[1]=$xi[1],$in };; //(p17) xi=$xi[i]^inp[i] -{ .mmi; ld8 $Ahi=[$Atbl] //(p18) Htable[nlo].hi - ld2 $rem[1]=[$rem[1]] //(p19) rem_8bit[rem] - dep $Atbl=$xi[1],$Htbl,4,4 } //(p17) &Htable[nlo].lo -{ .mmi; shladd $rem[0]=$rem[0],4,r0 //(p18) Htable[nhi].lo<<4 - xor $Zlo=$Zlo,$Alo //(p18) Z.lo^=Htable[nlo].lo - xor $Zhi=$Zhi,$Bhi };; //(p19) Z.hi^=Hshr4[nhi].hi -{ .mmi; ld8 $Blo=[$Btbl],8 //(p18) Hshr4[nhi].lo,&Hshr4[nhi].hi - ld1 $in=[$inp],-1 //(p16) *inp-- - shl $rem[1]=$rem[1],48 } //(p19) rem_8bit[rem]<<48 -{ .mmi; xor $rem[0]=$rem[0],$Zlo //(p18) Z.lo^(Htable[nhi].lo<<4) - xor $Zhi=$Zhi,$Ahi //(p18) Z.hi^=Htable[nlo].hi - and $xi[1]=-16,$xi[1] };; //(p17) nhi=xi&0xf0 -{ .mmi; ld8 $Bhi=[$Btbl] //(p18) Hshr4[nhi].hi - ld1 $xi[0]=[$Xip],-1 //(p16) *Xi-- - shrp $Zlo=$Zhi,$Zlo,8 } //(p18) Z.lo=(Z.hi<<56)|(Z.lo>>8) -{ .mmi; and $rem[0]=$rem[0],$mask0xff //(p18) rem=($Zlo^(Htable[nhi].lo<<4))&0xff - xor $Zhi=$Zhi,$rem[1] //(p19) Z.hi^=rem_8bit[rem]<<48 - add $Btbl=$xi[1],$Htbl };; //(p17) &Htable[nhi] -___ -push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers -} - -$code.=<<___; # (p17),(p18),(p19) -{ .mmi; ld8 $Alo=[$Atbl],8 //(p18) Htable[nlo].lo,&Htable[nlo].hi - ld8 $rem[0]=[$Btbl],-256 //(p18) Htable[nhi].lo,&Hshr4[nhi].lo - shr.u $Zhi=$Zhi,8 } //(p19) Z.hi>>=8 -{ .mmi; shladd $rem[1]=$rem[1],1,$rem_8bit //(p19) &rem_8bit[rem] - xor $Zlo=$Zlo,$Blo //(p19) Z.lo^=Hshr4[nhi].lo - xor $xi[1]=$xi[1],$in };; //(p17) xi=$xi[i]^inp[i] -{ .mmi; ld8 $Ahi=[$Atbl] //(p18) Htable[nlo].hi - ld2 $rem[1]=[$rem[1]] //(p19) rem_8bit[rem] - dep $Atbl=$xi[1],$Htbl,4,4 };; //(p17) &Htable[nlo].lo -{ .mmi; shladd $rem[0]=$rem[0],4,r0 //(p18) Htable[nhi].lo<<4 - xor $Zlo=$Zlo,$Alo //(p18) Z.lo^=Htable[nlo].lo - xor $Zhi=$Zhi,$Bhi };; //(p19) Z.hi^=Hshr4[nhi].hi -{ .mmi; ld8 $Blo=[$Btbl],8 //(p18) Hshr4[nhi].lo,&Hshr4[nhi].hi - shl $rem[1]=$rem[1],48 } //(p19) rem_8bit[rem]<<48 -{ .mmi; xor $rem[0]=$rem[0],$Zlo //(p18) Z.lo^(Htable[nhi].lo<<4) - xor $Zhi=$Zhi,$Ahi //(p18) Z.hi^=Htable[nlo].hi - and $xi[1]=-16,$xi[1] };; //(p17) nhi=xi&0xf0 -{ .mmi; ld8 $Bhi=[$Btbl] //(p18) Hshr4[nhi].hi - shrp $Zlo=$Zhi,$Zlo,8 } //(p18) Z.lo=(Z.hi<<56)|(Z.lo>>8) -{ .mmi; and $rem[0]=$rem[0],$mask0xff //(p18) rem=($Zlo^(Htable[nhi].lo<<4))&0xff - xor $Zhi=$Zhi,$rem[1] //(p19) Z.hi^=rem_8bit[rem]<<48 - add $Btbl=$xi[1],$Htbl };; //(p17) &Htable[nhi] -___ -push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers - -$code.=<<___; # (p18),(p19) -{ .mfi; ld8 $Alo=[$Atbl],8 //(p18) Htable[nlo].lo,&Htable[nlo].hi - shr.u $Zhi=$Zhi,8 } //(p19) Z.hi>>=8 -{ .mfi; shladd $rem[1]=$rem[1],1,$rem_8bit //(p19) &rem_8bit[rem] - xor $Zlo=$Zlo,$Blo };; //(p19) Z.lo^=Hshr4[nhi].lo -{ .mfi; ld8 $Ahi=[$Atbl] //(p18) Htable[nlo].hi - xor $Zlo=$Zlo,$Alo } //(p18) Z.lo^=Htable[nlo].lo -{ .mfi; ld2 $rem[1]=[$rem[1]] //(p19) rem_8bit[rem] - xor $Zhi=$Zhi,$Bhi };; //(p19) Z.hi^=Hshr4[nhi].hi -{ .mfi; ld8 $Blo=[$Btbl],8 //(p18) Htable[nhi].lo,&Htable[nhi].hi - shl $rem[1]=$rem[1],48 } //(p19) rem_8bit[rem]<<48 -{ .mfi; shladd $rem[0]=$Zlo,4,r0 //(p18) Z.lo<<4 - xor $Zhi=$Zhi,$Ahi };; //(p18) Z.hi^=Htable[nlo].hi -{ .mfi; ld8 $Bhi=[$Btbl] //(p18) Htable[nhi].hi - shrp $Zlo=$Zhi,$Zlo,4 } //(p18) Z.lo=(Z.hi<<60)|(Z.lo>>4) -{ .mfi; and $rem[0]=$rem[0],$mask0xff //(p18) rem=($Zlo^(Htable[nhi].lo<<4))&0xff - xor $Zhi=$Zhi,$rem[1] };; //(p19) Z.hi^=rem_8bit[rem]<<48 -___ -push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers - -$code.=<<___; # (p19) -{ .mmi; cmp.ltu p6,p0=$inp,$len - add $inp=32,$inp - shr.u $Zhi=$Zhi,4 } //(p19) Z.hi>>=4 -{ .mmi; shladd $rem[1]=$rem[1],1,$rem_8bit //(p19) &rem_8bit[rem] - xor $Zlo=$Zlo,$Blo //(p19) Z.lo^=Hshr4[nhi].lo - add $Xip=9,$Xip };; // &Xi.lo -{ .mmi; ld2 $rem[1]=[$rem[1]] //(p19) rem_8bit[rem] -(p6) ld1 $in=[$inp],-1 //[p16] *inp-- -(p6) extr.u $xi[1]=$Zlo,8,8 } //[p17] Xi[14] -{ .mmi; xor $Zhi=$Zhi,$Bhi //(p19) Z.hi^=Hshr4[nhi].hi -(p6) and $xi[0]=$Zlo,$mask0xff };; //[p16] Xi[15] -{ .mmi; st8 [$Xip]=$Zlo,-8 -(p6) xor $xi[0]=$xi[0],$in //[p17] xi=$xi[i]^inp[i] - shl $rem[1]=$rem[1],48 };; //(p19) rem_8bit[rem]<<48 -{ .mmi; -(p6) ld1 $in=[$inp],-1 //[p16] *inp-- - xor $Zhi=$Zhi,$rem[1] //(p19) Z.hi^=rem_8bit[rem]<<48 -(p6) dep $Atbl=$xi[0],$Htbl,4,4 } //[p17] &Htable[nlo].lo -{ .mib; -(p6) and $xi[0]=-16,$xi[0] //[p17] nhi=xi&0xf0 -(p6) br.cond.dptk.many .LOOP };; - -{ .mib; st8 [$Xip]=$Zhi };; -{ .mib; $rum 1<<1 // return to little-endian - .restore sp - mov sp=prevsp - br.ret.sptk.many b0 };; -.endp gcm_ghash_4bit# -___ -$code.=<<___; -.align 128 -.type rem_4bit#,\@object -rem_4bit: - data8 0x0000<<48, 0x1C20<<48, 0x3840<<48, 0x2460<<48 - data8 0x7080<<48, 0x6CA0<<48, 0x48C0<<48, 0x54E0<<48 - data8 0xE100<<48, 0xFD20<<48, 0xD940<<48, 0xC560<<48 - data8 0x9180<<48, 0x8DA0<<48, 0xA9C0<<48, 0xB5E0<<48 -.size rem_4bit#,128 -.type rem_8bit#,\@object -rem_8bit: - data1 0x00,0x00, 0x01,0xC2, 0x03,0x84, 0x02,0x46, 0x07,0x08, 0x06,0xCA, 0x04,0x8C, 0x05,0x4E - data1 0x0E,0x10, 0x0F,0xD2, 0x0D,0x94, 0x0C,0x56, 0x09,0x18, 0x08,0xDA, 0x0A,0x9C, 0x0B,0x5E - data1 0x1C,0x20, 0x1D,0xE2, 0x1F,0xA4, 0x1E,0x66, 0x1B,0x28, 0x1A,0xEA, 0x18,0xAC, 0x19,0x6E - data1 0x12,0x30, 0x13,0xF2, 0x11,0xB4, 0x10,0x76, 0x15,0x38, 0x14,0xFA, 0x16,0xBC, 0x17,0x7E - data1 0x38,0x40, 0x39,0x82, 0x3B,0xC4, 0x3A,0x06, 0x3F,0x48, 0x3E,0x8A, 0x3C,0xCC, 0x3D,0x0E - data1 0x36,0x50, 0x37,0x92, 0x35,0xD4, 0x34,0x16, 0x31,0x58, 0x30,0x9A, 0x32,0xDC, 0x33,0x1E - data1 0x24,0x60, 0x25,0xA2, 0x27,0xE4, 0x26,0x26, 0x23,0x68, 0x22,0xAA, 0x20,0xEC, 0x21,0x2E - data1 0x2A,0x70, 0x2B,0xB2, 0x29,0xF4, 0x28,0x36, 0x2D,0x78, 0x2C,0xBA, 0x2E,0xFC, 0x2F,0x3E - data1 0x70,0x80, 0x71,0x42, 0x73,0x04, 0x72,0xC6, 0x77,0x88, 0x76,0x4A, 0x74,0x0C, 0x75,0xCE - data1 0x7E,0x90, 0x7F,0x52, 0x7D,0x14, 0x7C,0xD6, 0x79,0x98, 0x78,0x5A, 0x7A,0x1C, 0x7B,0xDE - data1 0x6C,0xA0, 0x6D,0x62, 0x6F,0x24, 0x6E,0xE6, 0x6B,0xA8, 0x6A,0x6A, 0x68,0x2C, 0x69,0xEE - data1 0x62,0xB0, 0x63,0x72, 0x61,0x34, 0x60,0xF6, 0x65,0xB8, 0x64,0x7A, 0x66,0x3C, 0x67,0xFE - data1 0x48,0xC0, 0x49,0x02, 0x4B,0x44, 0x4A,0x86, 0x4F,0xC8, 0x4E,0x0A, 0x4C,0x4C, 0x4D,0x8E - data1 0x46,0xD0, 0x47,0x12, 0x45,0x54, 0x44,0x96, 0x41,0xD8, 0x40,0x1A, 0x42,0x5C, 0x43,0x9E - data1 0x54,0xE0, 0x55,0x22, 0x57,0x64, 0x56,0xA6, 0x53,0xE8, 0x52,0x2A, 0x50,0x6C, 0x51,0xAE - data1 0x5A,0xF0, 0x5B,0x32, 0x59,0x74, 0x58,0xB6, 0x5D,0xF8, 0x5C,0x3A, 0x5E,0x7C, 0x5F,0xBE - data1 0xE1,0x00, 0xE0,0xC2, 0xE2,0x84, 0xE3,0x46, 0xE6,0x08, 0xE7,0xCA, 0xE5,0x8C, 0xE4,0x4E - data1 0xEF,0x10, 0xEE,0xD2, 0xEC,0x94, 0xED,0x56, 0xE8,0x18, 0xE9,0xDA, 0xEB,0x9C, 0xEA,0x5E - data1 0xFD,0x20, 0xFC,0xE2, 0xFE,0xA4, 0xFF,0x66, 0xFA,0x28, 0xFB,0xEA, 0xF9,0xAC, 0xF8,0x6E - data1 0xF3,0x30, 0xF2,0xF2, 0xF0,0xB4, 0xF1,0x76, 0xF4,0x38, 0xF5,0xFA, 0xF7,0xBC, 0xF6,0x7E - data1 0xD9,0x40, 0xD8,0x82, 0xDA,0xC4, 0xDB,0x06, 0xDE,0x48, 0xDF,0x8A, 0xDD,0xCC, 0xDC,0x0E - data1 0xD7,0x50, 0xD6,0x92, 0xD4,0xD4, 0xD5,0x16, 0xD0,0x58, 0xD1,0x9A, 0xD3,0xDC, 0xD2,0x1E - data1 0xC5,0x60, 0xC4,0xA2, 0xC6,0xE4, 0xC7,0x26, 0xC2,0x68, 0xC3,0xAA, 0xC1,0xEC, 0xC0,0x2E - data1 0xCB,0x70, 0xCA,0xB2, 0xC8,0xF4, 0xC9,0x36, 0xCC,0x78, 0xCD,0xBA, 0xCF,0xFC, 0xCE,0x3E - data1 0x91,0x80, 0x90,0x42, 0x92,0x04, 0x93,0xC6, 0x96,0x88, 0x97,0x4A, 0x95,0x0C, 0x94,0xCE - data1 0x9F,0x90, 0x9E,0x52, 0x9C,0x14, 0x9D,0xD6, 0x98,0x98, 0x99,0x5A, 0x9B,0x1C, 0x9A,0xDE - data1 0x8D,0xA0, 0x8C,0x62, 0x8E,0x24, 0x8F,0xE6, 0x8A,0xA8, 0x8B,0x6A, 0x89,0x2C, 0x88,0xEE - data1 0x83,0xB0, 0x82,0x72, 0x80,0x34, 0x81,0xF6, 0x84,0xB8, 0x85,0x7A, 0x87,0x3C, 0x86,0xFE - data1 0xA9,0xC0, 0xA8,0x02, 0xAA,0x44, 0xAB,0x86, 0xAE,0xC8, 0xAF,0x0A, 0xAD,0x4C, 0xAC,0x8E - data1 0xA7,0xD0, 0xA6,0x12, 0xA4,0x54, 0xA5,0x96, 0xA0,0xD8, 0xA1,0x1A, 0xA3,0x5C, 0xA2,0x9E - data1 0xB5,0xE0, 0xB4,0x22, 0xB6,0x64, 0xB7,0xA6, 0xB2,0xE8, 0xB3,0x2A, 0xB1,0x6C, 0xB0,0xAE - data1 0xBB,0xF0, 0xBA,0x32, 0xB8,0x74, 0xB9,0xB6, 0xBC,0xF8, 0xBD,0x3A, 0xBF,0x7C, 0xBE,0xBE -.size rem_8bit#,512 -stringz "GHASH for IA64, CRYPTOGAMS by " -___ - -$code =~ s/mux1(\s+)\S+\@rev/nop.i$1 0x0/gm if ($big_endian); -$code =~ s/\`([^\`]*)\`/eval $1/gem; - -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/modes/asm/ghash-parisc.pl b/drivers/builtin_openssl2/crypto/modes/asm/ghash-parisc.pl deleted file mode 100644 index d5ad96b403..0000000000 --- a/drivers/builtin_openssl2/crypto/modes/asm/ghash-parisc.pl +++ /dev/null @@ -1,731 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# April 2010 -# -# The module implements "4-bit" GCM GHASH function and underlying -# single multiplication operation in GF(2^128). "4-bit" means that it -# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC -# it processes one byte in 19.6 cycles, which is more than twice as -# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for -# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per -# processed byte. This is ~2.2x faster than 64-bit code generated by -# vendor compiler (which used to be very hard to beat:-). -# -# Special thanks to polarhome.com for providing HP-UX account. - -$flavour = shift; -$output = shift; -open STDOUT,">$output"; - -if ($flavour =~ /64/) { - $LEVEL ="2.0W"; - $SIZE_T =8; - $FRAME_MARKER =80; - $SAVED_RP =16; - $PUSH ="std"; - $PUSHMA ="std,ma"; - $POP ="ldd"; - $POPMB ="ldd,mb"; - $NREGS =6; -} else { - $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0"; - $SIZE_T =4; - $FRAME_MARKER =48; - $SAVED_RP =20; - $PUSH ="stw"; - $PUSHMA ="stwm"; - $POP ="ldw"; - $POPMB ="ldwm"; - $NREGS =11; -} - -$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker - # [+ argument transfer] - -################# volatile registers -$Xi="%r26"; # argument block -$Htbl="%r25"; -$inp="%r24"; -$len="%r23"; -$Hhh=$Htbl; # variables -$Hll="%r22"; -$Zhh="%r21"; -$Zll="%r20"; -$cnt="%r19"; -$rem_4bit="%r28"; -$rem="%r29"; -$mask0xf0="%r31"; - -################# preserved registers -$Thh="%r1"; -$Tll="%r2"; -$nlo="%r3"; -$nhi="%r4"; -$byte="%r5"; -if ($SIZE_T==4) { - $Zhl="%r6"; - $Zlh="%r7"; - $Hhl="%r8"; - $Hlh="%r9"; - $Thl="%r10"; - $Tlh="%r11"; -} -$rem2="%r6"; # used in PA-RISC 2.0 code - -$code.=<<___; - .LEVEL $LEVEL - .SPACE \$TEXT\$ - .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY - - .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR - .ALIGN 64 -gcm_gmult_4bit - .PROC - .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS - .ENTRY - $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue - $PUSHMA %r3,$FRAME(%sp) - $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) - $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) - $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) -___ -$code.=<<___ if ($SIZE_T==4); - $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) - $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) - $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) - $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) - $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) -___ -$code.=<<___; - blr %r0,$rem_4bit - ldi 3,$rem -L\$pic_gmult - andcm $rem_4bit,$rem,$rem_4bit - addl $inp,$len,$len - ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit - ldi 0xf0,$mask0xf0 -___ -$code.=<<___ if ($SIZE_T==4); - ldi 31,$rem - mtctl $rem,%cr11 - extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 - b L\$parisc1_gmult - nop -___ - -$code.=<<___; - ldb 15($Xi),$nlo - ldo 8($Htbl),$Hll - - and $mask0xf0,$nlo,$nhi - depd,z $nlo,59,4,$nlo - - ldd $nlo($Hll),$Zll - ldd $nlo($Hhh),$Zhh - - depd,z $Zll,60,4,$rem - shrpd $Zhh,$Zll,4,$Zll - extrd,u $Zhh,59,60,$Zhh - ldb 14($Xi),$nlo - - ldd $nhi($Hll),$Tll - ldd $nhi($Hhh),$Thh - and $mask0xf0,$nlo,$nhi - depd,z $nlo,59,4,$nlo - - xor $Tll,$Zll,$Zll - xor $Thh,$Zhh,$Zhh - ldd $rem($rem_4bit),$rem - b L\$oop_gmult_pa2 - ldi 13,$cnt - - .ALIGN 8 -L\$oop_gmult_pa2 - xor $rem,$Zhh,$Zhh ; moved here to work around gas bug - depd,z $Zll,60,4,$rem - - shrpd $Zhh,$Zll,4,$Zll - extrd,u $Zhh,59,60,$Zhh - ldd $nlo($Hll),$Tll - ldd $nlo($Hhh),$Thh - - xor $Tll,$Zll,$Zll - xor $Thh,$Zhh,$Zhh - ldd $rem($rem_4bit),$rem - - xor $rem,$Zhh,$Zhh - depd,z $Zll,60,4,$rem - ldbx $cnt($Xi),$nlo - - shrpd $Zhh,$Zll,4,$Zll - extrd,u $Zhh,59,60,$Zhh - ldd $nhi($Hll),$Tll - ldd $nhi($Hhh),$Thh - - and $mask0xf0,$nlo,$nhi - depd,z $nlo,59,4,$nlo - ldd $rem($rem_4bit),$rem - - xor $Tll,$Zll,$Zll - addib,uv -1,$cnt,L\$oop_gmult_pa2 - xor $Thh,$Zhh,$Zhh - - xor $rem,$Zhh,$Zhh - depd,z $Zll,60,4,$rem - - shrpd $Zhh,$Zll,4,$Zll - extrd,u $Zhh,59,60,$Zhh - ldd $nlo($Hll),$Tll - ldd $nlo($Hhh),$Thh - - xor $Tll,$Zll,$Zll - xor $Thh,$Zhh,$Zhh - ldd $rem($rem_4bit),$rem - - xor $rem,$Zhh,$Zhh - depd,z $Zll,60,4,$rem - - shrpd $Zhh,$Zll,4,$Zll - extrd,u $Zhh,59,60,$Zhh - ldd $nhi($Hll),$Tll - ldd $nhi($Hhh),$Thh - - xor $Tll,$Zll,$Zll - xor $Thh,$Zhh,$Zhh - ldd $rem($rem_4bit),$rem - - xor $rem,$Zhh,$Zhh - std $Zll,8($Xi) - std $Zhh,0($Xi) -___ - -$code.=<<___ if ($SIZE_T==4); - b L\$done_gmult - nop - -L\$parisc1_gmult - ldb 15($Xi),$nlo - ldo 12($Htbl),$Hll - ldo 8($Htbl),$Hlh - ldo 4($Htbl),$Hhl - - and $mask0xf0,$nlo,$nhi - zdep $nlo,27,4,$nlo - - ldwx $nlo($Hll),$Zll - ldwx $nlo($Hlh),$Zlh - ldwx $nlo($Hhl),$Zhl - ldwx $nlo($Hhh),$Zhh - zdep $Zll,28,4,$rem - ldb 14($Xi),$nlo - ldwx $rem($rem_4bit),$rem - shrpw $Zlh,$Zll,4,$Zll - ldwx $nhi($Hll),$Tll - shrpw $Zhl,$Zlh,4,$Zlh - ldwx $nhi($Hlh),$Tlh - shrpw $Zhh,$Zhl,4,$Zhl - ldwx $nhi($Hhl),$Thl - extru $Zhh,27,28,$Zhh - ldwx $nhi($Hhh),$Thh - xor $rem,$Zhh,$Zhh - and $mask0xf0,$nlo,$nhi - zdep $nlo,27,4,$nlo - - xor $Tll,$Zll,$Zll - ldwx $nlo($Hll),$Tll - xor $Tlh,$Zlh,$Zlh - ldwx $nlo($Hlh),$Tlh - xor $Thl,$Zhl,$Zhl - b L\$oop_gmult_pa1 - ldi 13,$cnt - - .ALIGN 8 -L\$oop_gmult_pa1 - zdep $Zll,28,4,$rem - ldwx $nlo($Hhl),$Thl - xor $Thh,$Zhh,$Zhh - ldwx $rem($rem_4bit),$rem - shrpw $Zlh,$Zll,4,$Zll - ldwx $nlo($Hhh),$Thh - shrpw $Zhl,$Zlh,4,$Zlh - ldbx $cnt($Xi),$nlo - xor $Tll,$Zll,$Zll - ldwx $nhi($Hll),$Tll - shrpw $Zhh,$Zhl,4,$Zhl - xor $Tlh,$Zlh,$Zlh - ldwx $nhi($Hlh),$Tlh - extru $Zhh,27,28,$Zhh - xor $Thl,$Zhl,$Zhl - ldwx $nhi($Hhl),$Thl - xor $rem,$Zhh,$Zhh - zdep $Zll,28,4,$rem - xor $Thh,$Zhh,$Zhh - ldwx $nhi($Hhh),$Thh - shrpw $Zlh,$Zll,4,$Zll - ldwx $rem($rem_4bit),$rem - shrpw $Zhl,$Zlh,4,$Zlh - shrpw $Zhh,$Zhl,4,$Zhl - and $mask0xf0,$nlo,$nhi - extru $Zhh,27,28,$Zhh - zdep $nlo,27,4,$nlo - xor $Tll,$Zll,$Zll - ldwx $nlo($Hll),$Tll - xor $Tlh,$Zlh,$Zlh - ldwx $nlo($Hlh),$Tlh - xor $rem,$Zhh,$Zhh - addib,uv -1,$cnt,L\$oop_gmult_pa1 - xor $Thl,$Zhl,$Zhl - - zdep $Zll,28,4,$rem - ldwx $nlo($Hhl),$Thl - xor $Thh,$Zhh,$Zhh - ldwx $rem($rem_4bit),$rem - shrpw $Zlh,$Zll,4,$Zll - ldwx $nlo($Hhh),$Thh - shrpw $Zhl,$Zlh,4,$Zlh - xor $Tll,$Zll,$Zll - ldwx $nhi($Hll),$Tll - shrpw $Zhh,$Zhl,4,$Zhl - xor $Tlh,$Zlh,$Zlh - ldwx $nhi($Hlh),$Tlh - extru $Zhh,27,28,$Zhh - xor $rem,$Zhh,$Zhh - xor $Thl,$Zhl,$Zhl - ldwx $nhi($Hhl),$Thl - xor $Thh,$Zhh,$Zhh - ldwx $nhi($Hhh),$Thh - zdep $Zll,28,4,$rem - ldwx $rem($rem_4bit),$rem - shrpw $Zlh,$Zll,4,$Zll - shrpw $Zhl,$Zlh,4,$Zlh - shrpw $Zhh,$Zhl,4,$Zhl - extru $Zhh,27,28,$Zhh - xor $Tll,$Zll,$Zll - xor $Tlh,$Zlh,$Zlh - xor $rem,$Zhh,$Zhh - stw $Zll,12($Xi) - xor $Thl,$Zhl,$Zhl - stw $Zlh,8($Xi) - xor $Thh,$Zhh,$Zhh - stw $Zhl,4($Xi) - stw $Zhh,0($Xi) -___ -$code.=<<___; -L\$done_gmult - $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue - $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 - $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 - $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 -___ -$code.=<<___ if ($SIZE_T==4); - $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 - $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 - $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 - $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 - $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 -___ -$code.=<<___; - bv (%r2) - .EXIT - $POPMB -$FRAME(%sp),%r3 - .PROCEND - - .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR - .ALIGN 64 -gcm_ghash_4bit - .PROC - .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11 - .ENTRY - $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue - $PUSHMA %r3,$FRAME(%sp) - $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) - $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) - $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) -___ -$code.=<<___ if ($SIZE_T==4); - $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) - $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) - $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) - $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) - $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) -___ -$code.=<<___; - blr %r0,$rem_4bit - ldi 3,$rem -L\$pic_ghash - andcm $rem_4bit,$rem,$rem_4bit - addl $inp,$len,$len - ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit - ldi 0xf0,$mask0xf0 -___ -$code.=<<___ if ($SIZE_T==4); - ldi 31,$rem - mtctl $rem,%cr11 - extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 - b L\$parisc1_ghash - nop -___ - -$code.=<<___; - ldb 15($Xi),$nlo - ldo 8($Htbl),$Hll - -L\$outer_ghash_pa2 - ldb 15($inp),$nhi - xor $nhi,$nlo,$nlo - and $mask0xf0,$nlo,$nhi - depd,z $nlo,59,4,$nlo - - ldd $nlo($Hll),$Zll - ldd $nlo($Hhh),$Zhh - - depd,z $Zll,60,4,$rem - shrpd $Zhh,$Zll,4,$Zll - extrd,u $Zhh,59,60,$Zhh - ldb 14($Xi),$nlo - ldb 14($inp),$byte - - ldd $nhi($Hll),$Tll - ldd $nhi($Hhh),$Thh - xor $byte,$nlo,$nlo - and $mask0xf0,$nlo,$nhi - depd,z $nlo,59,4,$nlo - - xor $Tll,$Zll,$Zll - xor $Thh,$Zhh,$Zhh - ldd $rem($rem_4bit),$rem - b L\$oop_ghash_pa2 - ldi 13,$cnt - - .ALIGN 8 -L\$oop_ghash_pa2 - xor $rem,$Zhh,$Zhh ; moved here to work around gas bug - depd,z $Zll,60,4,$rem2 - - shrpd $Zhh,$Zll,4,$Zll - extrd,u $Zhh,59,60,$Zhh - ldd $nlo($Hll),$Tll - ldd $nlo($Hhh),$Thh - - xor $Tll,$Zll,$Zll - xor $Thh,$Zhh,$Zhh - ldbx $cnt($Xi),$nlo - ldbx $cnt($inp),$byte - - depd,z $Zll,60,4,$rem - shrpd $Zhh,$Zll,4,$Zll - ldd $rem2($rem_4bit),$rem2 - - xor $rem2,$Zhh,$Zhh - xor $byte,$nlo,$nlo - ldd $nhi($Hll),$Tll - ldd $nhi($Hhh),$Thh - - and $mask0xf0,$nlo,$nhi - depd,z $nlo,59,4,$nlo - - extrd,u $Zhh,59,60,$Zhh - xor $Tll,$Zll,$Zll - - ldd $rem($rem_4bit),$rem - addib,uv -1,$cnt,L\$oop_ghash_pa2 - xor $Thh,$Zhh,$Zhh - - xor $rem,$Zhh,$Zhh - depd,z $Zll,60,4,$rem2 - - shrpd $Zhh,$Zll,4,$Zll - extrd,u $Zhh,59,60,$Zhh - ldd $nlo($Hll),$Tll - ldd $nlo($Hhh),$Thh - - xor $Tll,$Zll,$Zll - xor $Thh,$Zhh,$Zhh - - depd,z $Zll,60,4,$rem - shrpd $Zhh,$Zll,4,$Zll - ldd $rem2($rem_4bit),$rem2 - - xor $rem2,$Zhh,$Zhh - ldd $nhi($Hll),$Tll - ldd $nhi($Hhh),$Thh - - extrd,u $Zhh,59,60,$Zhh - xor $Tll,$Zll,$Zll - xor $Thh,$Zhh,$Zhh - ldd $rem($rem_4bit),$rem - - xor $rem,$Zhh,$Zhh - std $Zll,8($Xi) - ldo 16($inp),$inp - std $Zhh,0($Xi) - cmpb,*<> $inp,$len,L\$outer_ghash_pa2 - copy $Zll,$nlo -___ - -$code.=<<___ if ($SIZE_T==4); - b L\$done_ghash - nop - -L\$parisc1_ghash - ldb 15($Xi),$nlo - ldo 12($Htbl),$Hll - ldo 8($Htbl),$Hlh - ldo 4($Htbl),$Hhl - -L\$outer_ghash_pa1 - ldb 15($inp),$byte - xor $byte,$nlo,$nlo - and $mask0xf0,$nlo,$nhi - zdep $nlo,27,4,$nlo - - ldwx $nlo($Hll),$Zll - ldwx $nlo($Hlh),$Zlh - ldwx $nlo($Hhl),$Zhl - ldwx $nlo($Hhh),$Zhh - zdep $Zll,28,4,$rem - ldb 14($Xi),$nlo - ldb 14($inp),$byte - ldwx $rem($rem_4bit),$rem - shrpw $Zlh,$Zll,4,$Zll - ldwx $nhi($Hll),$Tll - shrpw $Zhl,$Zlh,4,$Zlh - ldwx $nhi($Hlh),$Tlh - shrpw $Zhh,$Zhl,4,$Zhl - ldwx $nhi($Hhl),$Thl - extru $Zhh,27,28,$Zhh - ldwx $nhi($Hhh),$Thh - xor $byte,$nlo,$nlo - xor $rem,$Zhh,$Zhh - and $mask0xf0,$nlo,$nhi - zdep $nlo,27,4,$nlo - - xor $Tll,$Zll,$Zll - ldwx $nlo($Hll),$Tll - xor $Tlh,$Zlh,$Zlh - ldwx $nlo($Hlh),$Tlh - xor $Thl,$Zhl,$Zhl - b L\$oop_ghash_pa1 - ldi 13,$cnt - - .ALIGN 8 -L\$oop_ghash_pa1 - zdep $Zll,28,4,$rem - ldwx $nlo($Hhl),$Thl - xor $Thh,$Zhh,$Zhh - ldwx $rem($rem_4bit),$rem - shrpw $Zlh,$Zll,4,$Zll - ldwx $nlo($Hhh),$Thh - shrpw $Zhl,$Zlh,4,$Zlh - ldbx $cnt($Xi),$nlo - xor $Tll,$Zll,$Zll - ldwx $nhi($Hll),$Tll - shrpw $Zhh,$Zhl,4,$Zhl - ldbx $cnt($inp),$byte - xor $Tlh,$Zlh,$Zlh - ldwx $nhi($Hlh),$Tlh - extru $Zhh,27,28,$Zhh - xor $Thl,$Zhl,$Zhl - ldwx $nhi($Hhl),$Thl - xor $rem,$Zhh,$Zhh - zdep $Zll,28,4,$rem - xor $Thh,$Zhh,$Zhh - ldwx $nhi($Hhh),$Thh - shrpw $Zlh,$Zll,4,$Zll - ldwx $rem($rem_4bit),$rem - shrpw $Zhl,$Zlh,4,$Zlh - xor $byte,$nlo,$nlo - shrpw $Zhh,$Zhl,4,$Zhl - and $mask0xf0,$nlo,$nhi - extru $Zhh,27,28,$Zhh - zdep $nlo,27,4,$nlo - xor $Tll,$Zll,$Zll - ldwx $nlo($Hll),$Tll - xor $Tlh,$Zlh,$Zlh - ldwx $nlo($Hlh),$Tlh - xor $rem,$Zhh,$Zhh - addib,uv -1,$cnt,L\$oop_ghash_pa1 - xor $Thl,$Zhl,$Zhl - - zdep $Zll,28,4,$rem - ldwx $nlo($Hhl),$Thl - xor $Thh,$Zhh,$Zhh - ldwx $rem($rem_4bit),$rem - shrpw $Zlh,$Zll,4,$Zll - ldwx $nlo($Hhh),$Thh - shrpw $Zhl,$Zlh,4,$Zlh - xor $Tll,$Zll,$Zll - ldwx $nhi($Hll),$Tll - shrpw $Zhh,$Zhl,4,$Zhl - xor $Tlh,$Zlh,$Zlh - ldwx $nhi($Hlh),$Tlh - extru $Zhh,27,28,$Zhh - xor $rem,$Zhh,$Zhh - xor $Thl,$Zhl,$Zhl - ldwx $nhi($Hhl),$Thl - xor $Thh,$Zhh,$Zhh - ldwx $nhi($Hhh),$Thh - zdep $Zll,28,4,$rem - ldwx $rem($rem_4bit),$rem - shrpw $Zlh,$Zll,4,$Zll - shrpw $Zhl,$Zlh,4,$Zlh - shrpw $Zhh,$Zhl,4,$Zhl - extru $Zhh,27,28,$Zhh - xor $Tll,$Zll,$Zll - xor $Tlh,$Zlh,$Zlh - xor $rem,$Zhh,$Zhh - stw $Zll,12($Xi) - xor $Thl,$Zhl,$Zhl - stw $Zlh,8($Xi) - xor $Thh,$Zhh,$Zhh - stw $Zhl,4($Xi) - ldo 16($inp),$inp - stw $Zhh,0($Xi) - comb,<> $inp,$len,L\$outer_ghash_pa1 - copy $Zll,$nlo -___ -$code.=<<___; -L\$done_ghash - $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue - $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 - $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 - $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 -___ -$code.=<<___ if ($SIZE_T==4); - $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 - $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 - $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 - $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 - $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 -___ -$code.=<<___; - bv (%r2) - .EXIT - $POPMB -$FRAME(%sp),%r3 - .PROCEND - - .ALIGN 64 -L\$rem_4bit - .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0 - .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0 - .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0 - .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0 - .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by " - .ALIGN 64 -___ - -# Explicitly encode PA-RISC 2.0 instructions used in this module, so -# that it can be compiled with .LEVEL 1.0. It should be noted that I -# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 -# directive... - -my $ldd = sub { - my ($mod,$args) = @_; - my $orig = "ldd$mod\t$args"; - - if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4 - { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3; - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5 - { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3; - $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset - $opcode|=(1<<5) if ($mod =~ /^,m/); - $opcode|=(1<<13) if ($mod =~ /^,mb/); - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - else { "\t".$orig; } -}; - -my $std = sub { - my ($mod,$args) = @_; - my $orig = "std$mod\t$args"; - - if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices - { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - else { "\t".$orig; } -}; - -my $extrd = sub { - my ($mod,$args) = @_; - my $orig = "extrd$mod\t$args"; - - # I only have ",u" completer, it's implicitly encoded... - if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 - { my $opcode=(0x36<<26)|($1<<21)|($4<<16); - my $len=32-$3; - $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos - $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 - { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); - my $len=32-$2; - $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len - $opcode |= (1<<13) if ($mod =~ /,\**=/); - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - else { "\t".$orig; } -}; - -my $shrpd = sub { - my ($mod,$args) = @_; - my $orig = "shrpd$mod\t$args"; - - if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 - { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; - my $cpos=63-$3; - $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 - { sprintf "\t.WORD\t0x%08x\t; %s", - (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; - } - else { "\t".$orig; } -}; - -my $depd = sub { - my ($mod,$args) = @_; - my $orig = "depd$mod\t$args"; - - # I only have ",z" completer, it's impicitly encoded... - if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16 - { my $opcode=(0x3c<<26)|($4<<21)|($1<<16); - my $cpos=63-$2; - my $len=32-$3; - $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos - $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - else { "\t".$orig; } -}; - -sub assemble { - my ($mnemonic,$mod,$args)=@_; - my $opcode = eval("\$$mnemonic"); - - ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; -} - -foreach (split("\n",$code)) { - s/\`([^\`]*)\`/eval $1/ge; - if ($SIZE_T==4) { - s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e; - s/cmpb,\*/comb,/; - s/,\*/,/; - } - s/\bbv\b/bve/ if ($SIZE_T==8); - print $_,"\n"; -} - -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/modes/asm/ghash-s390x.pl b/drivers/builtin_openssl2/crypto/modes/asm/ghash-s390x.pl deleted file mode 100644 index 6a40d5d89c..0000000000 --- a/drivers/builtin_openssl2/crypto/modes/asm/ghash-s390x.pl +++ /dev/null @@ -1,262 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# September 2010. -# -# The module implements "4-bit" GCM GHASH function and underlying -# single multiplication operation in GF(2^128). "4-bit" means that it -# uses 256 bytes per-key table [+128 bytes shared table]. Performance -# was measured to be ~18 cycles per processed byte on z10, which is -# almost 40% better than gcc-generated code. It should be noted that -# 18 cycles is worse result than expected: loop is scheduled for 12 -# and the result should be close to 12. In the lack of instruction- -# level profiling data it's impossible to tell why... - -# November 2010. -# -# Adapt for -m31 build. If kernel supports what's called "highgprs" -# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit -# instructions and achieve "64-bit" performance even in 31-bit legacy -# application context. The feature is not specific to any particular -# processor, as long as it's "z-CPU". Latter implies that the code -# remains z/Architecture specific. On z990 it was measured to perform -# 2.8x better than 32-bit code generated by gcc 4.3. - -# March 2011. -# -# Support for hardware KIMD-GHASH is verified to produce correct -# result and therefore is engaged. On z196 it was measured to process -# 8KB buffer ~7 faster than software implementation. It's not as -# impressive for smaller buffer sizes and for smallest 16-bytes buffer -# it's actually almost 2 times slower. Which is the reason why -# KIMD-GHASH is not used in gcm_gmult_4bit. - -$flavour = shift; - -if ($flavour =~ /3[12]/) { - $SIZE_T=4; - $g=""; -} else { - $SIZE_T=8; - $g="g"; -} - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -$softonly=0; - -$Zhi="%r0"; -$Zlo="%r1"; - -$Xi="%r2"; # argument block -$Htbl="%r3"; -$inp="%r4"; -$len="%r5"; - -$rem0="%r6"; # variables -$rem1="%r7"; -$nlo="%r8"; -$nhi="%r9"; -$xi="%r10"; -$cnt="%r11"; -$tmp="%r12"; -$x78="%r13"; -$rem_4bit="%r14"; - -$sp="%r15"; - -$code.=<<___; -.text - -.globl gcm_gmult_4bit -.align 32 -gcm_gmult_4bit: -___ -$code.=<<___ if(!$softonly && 0); # hardware is slow for single block... - larl %r1,OPENSSL_s390xcap_P - lg %r0,0(%r1) - tmhl %r0,0x4000 # check for message-security-assist - jz .Lsoft_gmult - lghi %r0,0 - la %r1,16($sp) - .long 0xb93e0004 # kimd %r0,%r4 - lg %r1,24($sp) - tmhh %r1,0x4000 # check for function 65 - jz .Lsoft_gmult - stg %r0,16($sp) # arrange 16 bytes of zero input - stg %r0,24($sp) - lghi %r0,65 # function 65 - la %r1,0($Xi) # H lies right after Xi in gcm128_context - la $inp,16($sp) - lghi $len,16 - .long 0xb93e0004 # kimd %r0,$inp - brc 1,.-4 # pay attention to "partial completion" - br %r14 -.align 32 -.Lsoft_gmult: -___ -$code.=<<___; - stm${g} %r6,%r14,6*$SIZE_T($sp) - - aghi $Xi,-1 - lghi $len,1 - lghi $x78,`0xf<<3` - larl $rem_4bit,rem_4bit - - lg $Zlo,8+1($Xi) # Xi - j .Lgmult_shortcut -.type gcm_gmult_4bit,\@function -.size gcm_gmult_4bit,(.-gcm_gmult_4bit) - -.globl gcm_ghash_4bit -.align 32 -gcm_ghash_4bit: -___ -$code.=<<___ if(!$softonly); - larl %r1,OPENSSL_s390xcap_P - lg %r0,0(%r1) - tmhl %r0,0x4000 # check for message-security-assist - jz .Lsoft_ghash - lghi %r0,0 - la %r1,16($sp) - .long 0xb93e0004 # kimd %r0,%r4 - lg %r1,24($sp) - tmhh %r1,0x4000 # check for function 65 - jz .Lsoft_ghash - lghi %r0,65 # function 65 - la %r1,0($Xi) # H lies right after Xi in gcm128_context - .long 0xb93e0004 # kimd %r0,$inp - brc 1,.-4 # pay attention to "partial completion" - br %r14 -.align 32 -.Lsoft_ghash: -___ -$code.=<<___ if ($flavour =~ /3[12]/); - llgfr $len,$len -___ -$code.=<<___; - stm${g} %r6,%r14,6*$SIZE_T($sp) - - aghi $Xi,-1 - srlg $len,$len,4 - lghi $x78,`0xf<<3` - larl $rem_4bit,rem_4bit - - lg $Zlo,8+1($Xi) # Xi - lg $Zhi,0+1($Xi) - lghi $tmp,0 -.Louter: - xg $Zhi,0($inp) # Xi ^= inp - xg $Zlo,8($inp) - xgr $Zhi,$tmp - stg $Zlo,8+1($Xi) - stg $Zhi,0+1($Xi) - -.Lgmult_shortcut: - lghi $tmp,0xf0 - sllg $nlo,$Zlo,4 - srlg $xi,$Zlo,8 # extract second byte - ngr $nlo,$tmp - lgr $nhi,$Zlo - lghi $cnt,14 - ngr $nhi,$tmp - - lg $Zlo,8($nlo,$Htbl) - lg $Zhi,0($nlo,$Htbl) - - sllg $nlo,$xi,4 - sllg $rem0,$Zlo,3 - ngr $nlo,$tmp - ngr $rem0,$x78 - ngr $xi,$tmp - - sllg $tmp,$Zhi,60 - srlg $Zlo,$Zlo,4 - srlg $Zhi,$Zhi,4 - xg $Zlo,8($nhi,$Htbl) - xg $Zhi,0($nhi,$Htbl) - lgr $nhi,$xi - sllg $rem1,$Zlo,3 - xgr $Zlo,$tmp - ngr $rem1,$x78 - j .Lghash_inner -.align 16 -.Lghash_inner: - srlg $Zlo,$Zlo,4 - sllg $tmp,$Zhi,60 - xg $Zlo,8($nlo,$Htbl) - srlg $Zhi,$Zhi,4 - llgc $xi,0($cnt,$Xi) - xg $Zhi,0($nlo,$Htbl) - sllg $nlo,$xi,4 - xg $Zhi,0($rem0,$rem_4bit) - nill $nlo,0xf0 - sllg $rem0,$Zlo,3 - xgr $Zlo,$tmp - ngr $rem0,$x78 - nill $xi,0xf0 - - sllg $tmp,$Zhi,60 - srlg $Zlo,$Zlo,4 - srlg $Zhi,$Zhi,4 - xg $Zlo,8($nhi,$Htbl) - xg $Zhi,0($nhi,$Htbl) - lgr $nhi,$xi - xg $Zhi,0($rem1,$rem_4bit) - sllg $rem1,$Zlo,3 - xgr $Zlo,$tmp - ngr $rem1,$x78 - brct $cnt,.Lghash_inner - - sllg $tmp,$Zhi,60 - srlg $Zlo,$Zlo,4 - srlg $Zhi,$Zhi,4 - xg $Zlo,8($nlo,$Htbl) - xg $Zhi,0($nlo,$Htbl) - sllg $xi,$Zlo,3 - xg $Zhi,0($rem0,$rem_4bit) - xgr $Zlo,$tmp - ngr $xi,$x78 - - sllg $tmp,$Zhi,60 - srlg $Zlo,$Zlo,4 - srlg $Zhi,$Zhi,4 - xg $Zlo,8($nhi,$Htbl) - xg $Zhi,0($nhi,$Htbl) - xgr $Zlo,$tmp - xg $Zhi,0($rem1,$rem_4bit) - - lg $tmp,0($xi,$rem_4bit) - la $inp,16($inp) - sllg $tmp,$tmp,4 # correct last rem_4bit[rem] - brctg $len,.Louter - - xgr $Zhi,$tmp - stg $Zlo,8+1($Xi) - stg $Zhi,0+1($Xi) - lm${g} %r6,%r14,6*$SIZE_T($sp) - br %r14 -.type gcm_ghash_4bit,\@function -.size gcm_ghash_4bit,(.-gcm_ghash_4bit) - -.align 64 -rem_4bit: - .long `0x0000<<12`,0,`0x1C20<<12`,0,`0x3840<<12`,0,`0x2460<<12`,0 - .long `0x7080<<12`,0,`0x6CA0<<12`,0,`0x48C0<<12`,0,`0x54E0<<12`,0 - .long `0xE100<<12`,0,`0xFD20<<12`,0,`0xD940<<12`,0,`0xC560<<12`,0 - .long `0x9180<<12`,0,`0x8DA0<<12`,0,`0xA9C0<<12`,0,`0xB5E0<<12`,0 -.type rem_4bit,\@object -.size rem_4bit,(.-rem_4bit) -.string "GHASH for s390x, CRYPTOGAMS by " -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/modes/asm/ghash-sparcv9.pl b/drivers/builtin_openssl2/crypto/modes/asm/ghash-sparcv9.pl deleted file mode 100644 index 70e7b044a3..0000000000 --- a/drivers/builtin_openssl2/crypto/modes/asm/ghash-sparcv9.pl +++ /dev/null @@ -1,330 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# March 2010 -# -# The module implements "4-bit" GCM GHASH function and underlying -# single multiplication operation in GF(2^128). "4-bit" means that it -# uses 256 bytes per-key table [+128 bytes shared table]. Performance -# results are for streamed GHASH subroutine on UltraSPARC pre-Tx CPU -# and are expressed in cycles per processed byte, less is better: -# -# gcc 3.3.x cc 5.2 this assembler -# -# 32-bit build 81.4 43.3 12.6 (+546%/+244%) -# 64-bit build 20.2 21.2 12.6 (+60%/+68%) -# -# Here is data collected on UltraSPARC T1 system running Linux: -# -# gcc 4.4.1 this assembler -# -# 32-bit build 566 50 (+1000%) -# 64-bit build 56 50 (+12%) -# -# I don't quite understand why difference between 32-bit and 64-bit -# compiler-generated code is so big. Compilers *were* instructed to -# generate code for UltraSPARC and should have used 64-bit registers -# for Z vector (see C code) even in 32-bit build... Oh well, it only -# means more impressive improvement coefficients for this assembler -# module;-) Loops are aggressively modulo-scheduled in respect to -# references to input data and Z.hi updates to achieve 12 cycles -# timing. To anchor to something else, sha1-sparcv9.pl spends 11.6 -# cycles to process one byte on UltraSPARC pre-Tx CPU and ~24 on T1. - -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=112; } - -$output=shift; -open STDOUT,">$output"; - -$Zhi="%o0"; # 64-bit values -$Zlo="%o1"; -$Thi="%o2"; -$Tlo="%o3"; -$rem="%o4"; -$tmp="%o5"; - -$nhi="%l0"; # small values and pointers -$nlo="%l1"; -$xi0="%l2"; -$xi1="%l3"; -$rem_4bit="%l4"; -$remi="%l5"; -$Htblo="%l6"; -$cnt="%l7"; - -$Xi="%i0"; # input argument block -$Htbl="%i1"; -$inp="%i2"; -$len="%i3"; - -$code.=<<___; -.section ".text",#alloc,#execinstr - -.align 64 -rem_4bit: - .long `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0 - .long `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0 - .long `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0 - .long `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0 -.type rem_4bit,#object -.size rem_4bit,(.-rem_4bit) - -.globl gcm_ghash_4bit -.align 32 -gcm_ghash_4bit: - save %sp,-$frame,%sp - ldub [$inp+15],$nlo - ldub [$Xi+15],$xi0 - ldub [$Xi+14],$xi1 - add $len,$inp,$len - add $Htbl,8,$Htblo - -1: call .+8 - add %o7,rem_4bit-1b,$rem_4bit - -.Louter: - xor $xi0,$nlo,$nlo - and $nlo,0xf0,$nhi - and $nlo,0x0f,$nlo - sll $nlo,4,$nlo - ldx [$Htblo+$nlo],$Zlo - ldx [$Htbl+$nlo],$Zhi - - ldub [$inp+14],$nlo - - ldx [$Htblo+$nhi],$Tlo - and $Zlo,0xf,$remi - ldx [$Htbl+$nhi],$Thi - sll $remi,3,$remi - ldx [$rem_4bit+$remi],$rem - srlx $Zlo,4,$Zlo - mov 13,$cnt - sllx $Zhi,60,$tmp - xor $Tlo,$Zlo,$Zlo - srlx $Zhi,4,$Zhi - xor $Zlo,$tmp,$Zlo - - xor $xi1,$nlo,$nlo - and $Zlo,0xf,$remi - and $nlo,0xf0,$nhi - and $nlo,0x0f,$nlo - ba .Lghash_inner - sll $nlo,4,$nlo -.align 32 -.Lghash_inner: - ldx [$Htblo+$nlo],$Tlo - sll $remi,3,$remi - xor $Thi,$Zhi,$Zhi - ldx [$Htbl+$nlo],$Thi - srlx $Zlo,4,$Zlo - xor $rem,$Zhi,$Zhi - ldx [$rem_4bit+$remi],$rem - sllx $Zhi,60,$tmp - xor $Tlo,$Zlo,$Zlo - ldub [$inp+$cnt],$nlo - srlx $Zhi,4,$Zhi - xor $Zlo,$tmp,$Zlo - ldub [$Xi+$cnt],$xi1 - xor $Thi,$Zhi,$Zhi - and $Zlo,0xf,$remi - - ldx [$Htblo+$nhi],$Tlo - sll $remi,3,$remi - xor $rem,$Zhi,$Zhi - ldx [$Htbl+$nhi],$Thi - srlx $Zlo,4,$Zlo - ldx [$rem_4bit+$remi],$rem - sllx $Zhi,60,$tmp - xor $xi1,$nlo,$nlo - srlx $Zhi,4,$Zhi - and $nlo,0xf0,$nhi - addcc $cnt,-1,$cnt - xor $Zlo,$tmp,$Zlo - and $nlo,0x0f,$nlo - xor $Tlo,$Zlo,$Zlo - sll $nlo,4,$nlo - blu .Lghash_inner - and $Zlo,0xf,$remi - - ldx [$Htblo+$nlo],$Tlo - sll $remi,3,$remi - xor $Thi,$Zhi,$Zhi - ldx [$Htbl+$nlo],$Thi - srlx $Zlo,4,$Zlo - xor $rem,$Zhi,$Zhi - ldx [$rem_4bit+$remi],$rem - sllx $Zhi,60,$tmp - xor $Tlo,$Zlo,$Zlo - srlx $Zhi,4,$Zhi - xor $Zlo,$tmp,$Zlo - xor $Thi,$Zhi,$Zhi - - add $inp,16,$inp - cmp $inp,$len - be,pn `$bits==64?"%xcc":"%icc"`,.Ldone - and $Zlo,0xf,$remi - - ldx [$Htblo+$nhi],$Tlo - sll $remi,3,$remi - xor $rem,$Zhi,$Zhi - ldx [$Htbl+$nhi],$Thi - srlx $Zlo,4,$Zlo - ldx [$rem_4bit+$remi],$rem - sllx $Zhi,60,$tmp - xor $Tlo,$Zlo,$Zlo - ldub [$inp+15],$nlo - srlx $Zhi,4,$Zhi - xor $Zlo,$tmp,$Zlo - xor $Thi,$Zhi,$Zhi - stx $Zlo,[$Xi+8] - xor $rem,$Zhi,$Zhi - stx $Zhi,[$Xi] - srl $Zlo,8,$xi1 - and $Zlo,0xff,$xi0 - ba .Louter - and $xi1,0xff,$xi1 -.align 32 -.Ldone: - ldx [$Htblo+$nhi],$Tlo - sll $remi,3,$remi - xor $rem,$Zhi,$Zhi - ldx [$Htbl+$nhi],$Thi - srlx $Zlo,4,$Zlo - ldx [$rem_4bit+$remi],$rem - sllx $Zhi,60,$tmp - xor $Tlo,$Zlo,$Zlo - srlx $Zhi,4,$Zhi - xor $Zlo,$tmp,$Zlo - xor $Thi,$Zhi,$Zhi - stx $Zlo,[$Xi+8] - xor $rem,$Zhi,$Zhi - stx $Zhi,[$Xi] - - ret - restore -.type gcm_ghash_4bit,#function -.size gcm_ghash_4bit,(.-gcm_ghash_4bit) -___ - -undef $inp; -undef $len; - -$code.=<<___; -.globl gcm_gmult_4bit -.align 32 -gcm_gmult_4bit: - save %sp,-$frame,%sp - ldub [$Xi+15],$nlo - add $Htbl,8,$Htblo - -1: call .+8 - add %o7,rem_4bit-1b,$rem_4bit - - and $nlo,0xf0,$nhi - and $nlo,0x0f,$nlo - sll $nlo,4,$nlo - ldx [$Htblo+$nlo],$Zlo - ldx [$Htbl+$nlo],$Zhi - - ldub [$Xi+14],$nlo - - ldx [$Htblo+$nhi],$Tlo - and $Zlo,0xf,$remi - ldx [$Htbl+$nhi],$Thi - sll $remi,3,$remi - ldx [$rem_4bit+$remi],$rem - srlx $Zlo,4,$Zlo - mov 13,$cnt - sllx $Zhi,60,$tmp - xor $Tlo,$Zlo,$Zlo - srlx $Zhi,4,$Zhi - xor $Zlo,$tmp,$Zlo - - and $Zlo,0xf,$remi - and $nlo,0xf0,$nhi - and $nlo,0x0f,$nlo - ba .Lgmult_inner - sll $nlo,4,$nlo -.align 32 -.Lgmult_inner: - ldx [$Htblo+$nlo],$Tlo - sll $remi,3,$remi - xor $Thi,$Zhi,$Zhi - ldx [$Htbl+$nlo],$Thi - srlx $Zlo,4,$Zlo - xor $rem,$Zhi,$Zhi - ldx [$rem_4bit+$remi],$rem - sllx $Zhi,60,$tmp - xor $Tlo,$Zlo,$Zlo - ldub [$Xi+$cnt],$nlo - srlx $Zhi,4,$Zhi - xor $Zlo,$tmp,$Zlo - xor $Thi,$Zhi,$Zhi - and $Zlo,0xf,$remi - - ldx [$Htblo+$nhi],$Tlo - sll $remi,3,$remi - xor $rem,$Zhi,$Zhi - ldx [$Htbl+$nhi],$Thi - srlx $Zlo,4,$Zlo - ldx [$rem_4bit+$remi],$rem - sllx $Zhi,60,$tmp - srlx $Zhi,4,$Zhi - and $nlo,0xf0,$nhi - addcc $cnt,-1,$cnt - xor $Zlo,$tmp,$Zlo - and $nlo,0x0f,$nlo - xor $Tlo,$Zlo,$Zlo - sll $nlo,4,$nlo - blu .Lgmult_inner - and $Zlo,0xf,$remi - - ldx [$Htblo+$nlo],$Tlo - sll $remi,3,$remi - xor $Thi,$Zhi,$Zhi - ldx [$Htbl+$nlo],$Thi - srlx $Zlo,4,$Zlo - xor $rem,$Zhi,$Zhi - ldx [$rem_4bit+$remi],$rem - sllx $Zhi,60,$tmp - xor $Tlo,$Zlo,$Zlo - srlx $Zhi,4,$Zhi - xor $Zlo,$tmp,$Zlo - xor $Thi,$Zhi,$Zhi - and $Zlo,0xf,$remi - - ldx [$Htblo+$nhi],$Tlo - sll $remi,3,$remi - xor $rem,$Zhi,$Zhi - ldx [$Htbl+$nhi],$Thi - srlx $Zlo,4,$Zlo - ldx [$rem_4bit+$remi],$rem - sllx $Zhi,60,$tmp - xor $Tlo,$Zlo,$Zlo - srlx $Zhi,4,$Zhi - xor $Zlo,$tmp,$Zlo - xor $Thi,$Zhi,$Zhi - stx $Zlo,[$Xi+8] - xor $rem,$Zhi,$Zhi - stx $Zhi,[$Xi] - - ret - restore -.type gcm_gmult_4bit,#function -.size gcm_gmult_4bit,(.-gcm_gmult_4bit) -.asciz "GHASH for SPARCv9, CRYPTOGAMS by " -.align 4 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/modes/asm/ghash-x86.pl b/drivers/builtin_openssl2/crypto/modes/asm/ghash-x86.pl deleted file mode 100644 index 2426cd0c8a..0000000000 --- a/drivers/builtin_openssl2/crypto/modes/asm/ghash-x86.pl +++ /dev/null @@ -1,1342 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# March, May, June 2010 -# -# The module implements "4-bit" GCM GHASH function and underlying -# single multiplication operation in GF(2^128). "4-bit" means that it -# uses 256 bytes per-key table [+64/128 bytes fixed table]. It has two -# code paths: vanilla x86 and vanilla MMX. Former will be executed on -# 486 and Pentium, latter on all others. MMX GHASH features so called -# "528B" variant of "4-bit" method utilizing additional 256+16 bytes -# of per-key storage [+512 bytes shared table]. Performance results -# are for streamed GHASH subroutine and are expressed in cycles per -# processed byte, less is better: -# -# gcc 2.95.3(*) MMX assembler x86 assembler -# -# Pentium 105/111(**) - 50 -# PIII 68 /75 12.2 24 -# P4 125/125 17.8 84(***) -# Opteron 66 /70 10.1 30 -# Core2 54 /67 8.4 18 -# -# (*) gcc 3.4.x was observed to generate few percent slower code, -# which is one of reasons why 2.95.3 results were chosen, -# another reason is lack of 3.4.x results for older CPUs; -# comparison with MMX results is not completely fair, because C -# results are for vanilla "256B" implementation, while -# assembler results are for "528B";-) -# (**) second number is result for code compiled with -fPIC flag, -# which is actually more relevant, because assembler code is -# position-independent; -# (***) see comment in non-MMX routine for further details; -# -# To summarize, it's >2-5 times faster than gcc-generated code. To -# anchor it to something else SHA1 assembler processes one byte in -# 11-13 cycles on contemporary x86 cores. As for choice of MMX in -# particular, see comment at the end of the file... - -# May 2010 -# -# Add PCLMULQDQ version performing at 2.10 cycles per processed byte. -# The question is how close is it to theoretical limit? The pclmulqdq -# instruction latency appears to be 14 cycles and there can't be more -# than 2 of them executing at any given time. This means that single -# Karatsuba multiplication would take 28 cycles *plus* few cycles for -# pre- and post-processing. Then multiplication has to be followed by -# modulo-reduction. Given that aggregated reduction method [see -# "Carry-less Multiplication and Its Usage for Computing the GCM Mode" -# white paper by Intel] allows you to perform reduction only once in -# a while we can assume that asymptotic performance can be estimated -# as (28+Tmod/Naggr)/16, where Tmod is time to perform reduction -# and Naggr is the aggregation factor. -# -# Before we proceed to this implementation let's have closer look at -# the best-performing code suggested by Intel in their white paper. -# By tracing inter-register dependencies Tmod is estimated as ~19 -# cycles and Naggr chosen by Intel is 4, resulting in 2.05 cycles per -# processed byte. As implied, this is quite optimistic estimate, -# because it does not account for Karatsuba pre- and post-processing, -# which for a single multiplication is ~5 cycles. Unfortunately Intel -# does not provide performance data for GHASH alone. But benchmarking -# AES_GCM_encrypt ripped out of Fig. 15 of the white paper with aadt -# alone resulted in 2.46 cycles per byte of out 16KB buffer. Note that -# the result accounts even for pre-computing of degrees of the hash -# key H, but its portion is negligible at 16KB buffer size. -# -# Moving on to the implementation in question. Tmod is estimated as -# ~13 cycles and Naggr is 2, giving asymptotic performance of ... -# 2.16. How is it possible that measured performance is better than -# optimistic theoretical estimate? There is one thing Intel failed -# to recognize. By serializing GHASH with CTR in same subroutine -# former's performance is really limited to above (Tmul + Tmod/Naggr) -# equation. But if GHASH procedure is detached, the modulo-reduction -# can be interleaved with Naggr-1 multiplications at instruction level -# and under ideal conditions even disappear from the equation. So that -# optimistic theoretical estimate for this implementation is ... -# 28/16=1.75, and not 2.16. Well, it's probably way too optimistic, -# at least for such small Naggr. I'd argue that (28+Tproc/Naggr), -# where Tproc is time required for Karatsuba pre- and post-processing, -# is more realistic estimate. In this case it gives ... 1.91 cycles. -# Or in other words, depending on how well we can interleave reduction -# and one of the two multiplications the performance should be betwen -# 1.91 and 2.16. As already mentioned, this implementation processes -# one byte out of 8KB buffer in 2.10 cycles, while x86_64 counterpart -# - in 2.02. x86_64 performance is better, because larger register -# bank allows to interleave reduction and multiplication better. -# -# Does it make sense to increase Naggr? To start with it's virtually -# impossible in 32-bit mode, because of limited register bank -# capacity. Otherwise improvement has to be weighed agiainst slower -# setup, as well as code size and complexity increase. As even -# optimistic estimate doesn't promise 30% performance improvement, -# there are currently no plans to increase Naggr. -# -# Special thanks to David Woodhouse for -# providing access to a Westmere-based system on behalf of Intel -# Open Source Technology Centre. - -# January 2010 -# -# Tweaked to optimize transitions between integer and FP operations -# on same XMM register, PCLMULQDQ subroutine was measured to process -# one byte in 2.07 cycles on Sandy Bridge, and in 2.12 - on Westmere. -# The minor regression on Westmere is outweighed by ~15% improvement -# on Sandy Bridge. Strangely enough attempt to modify 64-bit code in -# similar manner resulted in almost 20% degradation on Sandy Bridge, -# where original 64-bit code processes one byte in 1.95 cycles. - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386"); - -$sse2=0; -for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } - -($Zhh,$Zhl,$Zlh,$Zll) = ("ebp","edx","ecx","ebx"); -$inp = "edi"; -$Htbl = "esi"; - -$unroll = 0; # Affects x86 loop. Folded loop performs ~7% worse - # than unrolled, which has to be weighted against - # 2.5x x86-specific code size reduction. - -sub x86_loop { - my $off = shift; - my $rem = "eax"; - - &mov ($Zhh,&DWP(4,$Htbl,$Zll)); - &mov ($Zhl,&DWP(0,$Htbl,$Zll)); - &mov ($Zlh,&DWP(12,$Htbl,$Zll)); - &mov ($Zll,&DWP(8,$Htbl,$Zll)); - &xor ($rem,$rem); # avoid partial register stalls on PIII - - # shrd practically kills P4, 2.5x deterioration, but P4 has - # MMX code-path to execute. shrd runs tad faster [than twice - # the shifts, move's and or's] on pre-MMX Pentium (as well as - # PIII and Core2), *but* minimizes code size, spares register - # and thus allows to fold the loop... - if (!$unroll) { - my $cnt = $inp; - &mov ($cnt,15); - &jmp (&label("x86_loop")); - &set_label("x86_loop",16); - for($i=1;$i<=2;$i++) { - &mov (&LB($rem),&LB($Zll)); - &shrd ($Zll,$Zlh,4); - &and (&LB($rem),0xf); - &shrd ($Zlh,$Zhl,4); - &shrd ($Zhl,$Zhh,4); - &shr ($Zhh,4); - &xor ($Zhh,&DWP($off+16,"esp",$rem,4)); - - &mov (&LB($rem),&BP($off,"esp",$cnt)); - if ($i&1) { - &and (&LB($rem),0xf0); - } else { - &shl (&LB($rem),4); - } - - &xor ($Zll,&DWP(8,$Htbl,$rem)); - &xor ($Zlh,&DWP(12,$Htbl,$rem)); - &xor ($Zhl,&DWP(0,$Htbl,$rem)); - &xor ($Zhh,&DWP(4,$Htbl,$rem)); - - if ($i&1) { - &dec ($cnt); - &js (&label("x86_break")); - } else { - &jmp (&label("x86_loop")); - } - } - &set_label("x86_break",16); - } else { - for($i=1;$i<32;$i++) { - &comment($i); - &mov (&LB($rem),&LB($Zll)); - &shrd ($Zll,$Zlh,4); - &and (&LB($rem),0xf); - &shrd ($Zlh,$Zhl,4); - &shrd ($Zhl,$Zhh,4); - &shr ($Zhh,4); - &xor ($Zhh,&DWP($off+16,"esp",$rem,4)); - - if ($i&1) { - &mov (&LB($rem),&BP($off+15-($i>>1),"esp")); - &and (&LB($rem),0xf0); - } else { - &mov (&LB($rem),&BP($off+15-($i>>1),"esp")); - &shl (&LB($rem),4); - } - - &xor ($Zll,&DWP(8,$Htbl,$rem)); - &xor ($Zlh,&DWP(12,$Htbl,$rem)); - &xor ($Zhl,&DWP(0,$Htbl,$rem)); - &xor ($Zhh,&DWP(4,$Htbl,$rem)); - } - } - &bswap ($Zll); - &bswap ($Zlh); - &bswap ($Zhl); - if (!$x86only) { - &bswap ($Zhh); - } else { - &mov ("eax",$Zhh); - &bswap ("eax"); - &mov ($Zhh,"eax"); - } -} - -if ($unroll) { - &function_begin_B("_x86_gmult_4bit_inner"); - &x86_loop(4); - &ret (); - &function_end_B("_x86_gmult_4bit_inner"); -} - -sub deposit_rem_4bit { - my $bias = shift; - - &mov (&DWP($bias+0, "esp"),0x0000<<16); - &mov (&DWP($bias+4, "esp"),0x1C20<<16); - &mov (&DWP($bias+8, "esp"),0x3840<<16); - &mov (&DWP($bias+12,"esp"),0x2460<<16); - &mov (&DWP($bias+16,"esp"),0x7080<<16); - &mov (&DWP($bias+20,"esp"),0x6CA0<<16); - &mov (&DWP($bias+24,"esp"),0x48C0<<16); - &mov (&DWP($bias+28,"esp"),0x54E0<<16); - &mov (&DWP($bias+32,"esp"),0xE100<<16); - &mov (&DWP($bias+36,"esp"),0xFD20<<16); - &mov (&DWP($bias+40,"esp"),0xD940<<16); - &mov (&DWP($bias+44,"esp"),0xC560<<16); - &mov (&DWP($bias+48,"esp"),0x9180<<16); - &mov (&DWP($bias+52,"esp"),0x8DA0<<16); - &mov (&DWP($bias+56,"esp"),0xA9C0<<16); - &mov (&DWP($bias+60,"esp"),0xB5E0<<16); -} - -$suffix = $x86only ? "" : "_x86"; - -&function_begin("gcm_gmult_4bit".$suffix); - &stack_push(16+4+1); # +1 for stack alignment - &mov ($inp,&wparam(0)); # load Xi - &mov ($Htbl,&wparam(1)); # load Htable - - &mov ($Zhh,&DWP(0,$inp)); # load Xi[16] - &mov ($Zhl,&DWP(4,$inp)); - &mov ($Zlh,&DWP(8,$inp)); - &mov ($Zll,&DWP(12,$inp)); - - &deposit_rem_4bit(16); - - &mov (&DWP(0,"esp"),$Zhh); # copy Xi[16] on stack - &mov (&DWP(4,"esp"),$Zhl); - &mov (&DWP(8,"esp"),$Zlh); - &mov (&DWP(12,"esp"),$Zll); - &shr ($Zll,20); - &and ($Zll,0xf0); - - if ($unroll) { - &call ("_x86_gmult_4bit_inner"); - } else { - &x86_loop(0); - &mov ($inp,&wparam(0)); - } - - &mov (&DWP(12,$inp),$Zll); - &mov (&DWP(8,$inp),$Zlh); - &mov (&DWP(4,$inp),$Zhl); - &mov (&DWP(0,$inp),$Zhh); - &stack_pop(16+4+1); -&function_end("gcm_gmult_4bit".$suffix); - -&function_begin("gcm_ghash_4bit".$suffix); - &stack_push(16+4+1); # +1 for 64-bit alignment - &mov ($Zll,&wparam(0)); # load Xi - &mov ($Htbl,&wparam(1)); # load Htable - &mov ($inp,&wparam(2)); # load in - &mov ("ecx",&wparam(3)); # load len - &add ("ecx",$inp); - &mov (&wparam(3),"ecx"); - - &mov ($Zhh,&DWP(0,$Zll)); # load Xi[16] - &mov ($Zhl,&DWP(4,$Zll)); - &mov ($Zlh,&DWP(8,$Zll)); - &mov ($Zll,&DWP(12,$Zll)); - - &deposit_rem_4bit(16); - - &set_label("x86_outer_loop",16); - &xor ($Zll,&DWP(12,$inp)); # xor with input - &xor ($Zlh,&DWP(8,$inp)); - &xor ($Zhl,&DWP(4,$inp)); - &xor ($Zhh,&DWP(0,$inp)); - &mov (&DWP(12,"esp"),$Zll); # dump it on stack - &mov (&DWP(8,"esp"),$Zlh); - &mov (&DWP(4,"esp"),$Zhl); - &mov (&DWP(0,"esp"),$Zhh); - - &shr ($Zll,20); - &and ($Zll,0xf0); - - if ($unroll) { - &call ("_x86_gmult_4bit_inner"); - } else { - &x86_loop(0); - &mov ($inp,&wparam(2)); - } - &lea ($inp,&DWP(16,$inp)); - &cmp ($inp,&wparam(3)); - &mov (&wparam(2),$inp) if (!$unroll); - &jb (&label("x86_outer_loop")); - - &mov ($inp,&wparam(0)); # load Xi - &mov (&DWP(12,$inp),$Zll); - &mov (&DWP(8,$inp),$Zlh); - &mov (&DWP(4,$inp),$Zhl); - &mov (&DWP(0,$inp),$Zhh); - &stack_pop(16+4+1); -&function_end("gcm_ghash_4bit".$suffix); - -if (!$x86only) {{{ - -&static_label("rem_4bit"); - -if (!$sse2) {{ # pure-MMX "May" version... - -$S=12; # shift factor for rem_4bit - -&function_begin_B("_mmx_gmult_4bit_inner"); -# MMX version performs 3.5 times better on P4 (see comment in non-MMX -# routine for further details), 100% better on Opteron, ~70% better -# on Core2 and PIII... In other words effort is considered to be well -# spent... Since initial release the loop was unrolled in order to -# "liberate" register previously used as loop counter. Instead it's -# used to optimize critical path in 'Z.hi ^= rem_4bit[Z.lo&0xf]'. -# The path involves move of Z.lo from MMX to integer register, -# effective address calculation and finally merge of value to Z.hi. -# Reference to rem_4bit is scheduled so late that I had to >>4 -# rem_4bit elements. This resulted in 20-45% procent improvement -# on contemporary µ-archs. -{ - my $cnt; - my $rem_4bit = "eax"; - my @rem = ($Zhh,$Zll); - my $nhi = $Zhl; - my $nlo = $Zlh; - - my ($Zlo,$Zhi) = ("mm0","mm1"); - my $tmp = "mm2"; - - &xor ($nlo,$nlo); # avoid partial register stalls on PIII - &mov ($nhi,$Zll); - &mov (&LB($nlo),&LB($nhi)); - &shl (&LB($nlo),4); - &and ($nhi,0xf0); - &movq ($Zlo,&QWP(8,$Htbl,$nlo)); - &movq ($Zhi,&QWP(0,$Htbl,$nlo)); - &movd ($rem[0],$Zlo); - - for ($cnt=28;$cnt>=-2;$cnt--) { - my $odd = $cnt&1; - my $nix = $odd ? $nlo : $nhi; - - &shl (&LB($nlo),4) if ($odd); - &psrlq ($Zlo,4); - &movq ($tmp,$Zhi); - &psrlq ($Zhi,4); - &pxor ($Zlo,&QWP(8,$Htbl,$nix)); - &mov (&LB($nlo),&BP($cnt/2,$inp)) if (!$odd && $cnt>=0); - &psllq ($tmp,60); - &and ($nhi,0xf0) if ($odd); - &pxor ($Zhi,&QWP(0,$rem_4bit,$rem[1],8)) if ($cnt<28); - &and ($rem[0],0xf); - &pxor ($Zhi,&QWP(0,$Htbl,$nix)); - &mov ($nhi,$nlo) if (!$odd && $cnt>=0); - &movd ($rem[1],$Zlo); - &pxor ($Zlo,$tmp); - - push (@rem,shift(@rem)); # "rotate" registers - } - - &mov ($inp,&DWP(4,$rem_4bit,$rem[1],8)); # last rem_4bit[rem] - - &psrlq ($Zlo,32); # lower part of Zlo is already there - &movd ($Zhl,$Zhi); - &psrlq ($Zhi,32); - &movd ($Zlh,$Zlo); - &movd ($Zhh,$Zhi); - &shl ($inp,4); # compensate for rem_4bit[i] being >>4 - - &bswap ($Zll); - &bswap ($Zhl); - &bswap ($Zlh); - &xor ($Zhh,$inp); - &bswap ($Zhh); - - &ret (); -} -&function_end_B("_mmx_gmult_4bit_inner"); - -&function_begin("gcm_gmult_4bit_mmx"); - &mov ($inp,&wparam(0)); # load Xi - &mov ($Htbl,&wparam(1)); # load Htable - - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop("eax"); - &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); - - &movz ($Zll,&BP(15,$inp)); - - &call ("_mmx_gmult_4bit_inner"); - - &mov ($inp,&wparam(0)); # load Xi - &emms (); - &mov (&DWP(12,$inp),$Zll); - &mov (&DWP(4,$inp),$Zhl); - &mov (&DWP(8,$inp),$Zlh); - &mov (&DWP(0,$inp),$Zhh); -&function_end("gcm_gmult_4bit_mmx"); - -# Streamed version performs 20% better on P4, 7% on Opteron, -# 10% on Core2 and PIII... -&function_begin("gcm_ghash_4bit_mmx"); - &mov ($Zhh,&wparam(0)); # load Xi - &mov ($Htbl,&wparam(1)); # load Htable - &mov ($inp,&wparam(2)); # load in - &mov ($Zlh,&wparam(3)); # load len - - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop("eax"); - &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); - - &add ($Zlh,$inp); - &mov (&wparam(3),$Zlh); # len to point at the end of input - &stack_push(4+1); # +1 for stack alignment - - &mov ($Zll,&DWP(12,$Zhh)); # load Xi[16] - &mov ($Zhl,&DWP(4,$Zhh)); - &mov ($Zlh,&DWP(8,$Zhh)); - &mov ($Zhh,&DWP(0,$Zhh)); - &jmp (&label("mmx_outer_loop")); - - &set_label("mmx_outer_loop",16); - &xor ($Zll,&DWP(12,$inp)); - &xor ($Zhl,&DWP(4,$inp)); - &xor ($Zlh,&DWP(8,$inp)); - &xor ($Zhh,&DWP(0,$inp)); - &mov (&wparam(2),$inp); - &mov (&DWP(12,"esp"),$Zll); - &mov (&DWP(4,"esp"),$Zhl); - &mov (&DWP(8,"esp"),$Zlh); - &mov (&DWP(0,"esp"),$Zhh); - - &mov ($inp,"esp"); - &shr ($Zll,24); - - &call ("_mmx_gmult_4bit_inner"); - - &mov ($inp,&wparam(2)); - &lea ($inp,&DWP(16,$inp)); - &cmp ($inp,&wparam(3)); - &jb (&label("mmx_outer_loop")); - - &mov ($inp,&wparam(0)); # load Xi - &emms (); - &mov (&DWP(12,$inp),$Zll); - &mov (&DWP(4,$inp),$Zhl); - &mov (&DWP(8,$inp),$Zlh); - &mov (&DWP(0,$inp),$Zhh); - - &stack_pop(4+1); -&function_end("gcm_ghash_4bit_mmx"); - -}} else {{ # "June" MMX version... - # ... has slower "April" gcm_gmult_4bit_mmx with folded - # loop. This is done to conserve code size... -$S=16; # shift factor for rem_4bit - -sub mmx_loop() { -# MMX version performs 2.8 times better on P4 (see comment in non-MMX -# routine for further details), 40% better on Opteron and Core2, 50% -# better on PIII... In other words effort is considered to be well -# spent... - my $inp = shift; - my $rem_4bit = shift; - my $cnt = $Zhh; - my $nhi = $Zhl; - my $nlo = $Zlh; - my $rem = $Zll; - - my ($Zlo,$Zhi) = ("mm0","mm1"); - my $tmp = "mm2"; - - &xor ($nlo,$nlo); # avoid partial register stalls on PIII - &mov ($nhi,$Zll); - &mov (&LB($nlo),&LB($nhi)); - &mov ($cnt,14); - &shl (&LB($nlo),4); - &and ($nhi,0xf0); - &movq ($Zlo,&QWP(8,$Htbl,$nlo)); - &movq ($Zhi,&QWP(0,$Htbl,$nlo)); - &movd ($rem,$Zlo); - &jmp (&label("mmx_loop")); - - &set_label("mmx_loop",16); - &psrlq ($Zlo,4); - &and ($rem,0xf); - &movq ($tmp,$Zhi); - &psrlq ($Zhi,4); - &pxor ($Zlo,&QWP(8,$Htbl,$nhi)); - &mov (&LB($nlo),&BP(0,$inp,$cnt)); - &psllq ($tmp,60); - &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8)); - &dec ($cnt); - &movd ($rem,$Zlo); - &pxor ($Zhi,&QWP(0,$Htbl,$nhi)); - &mov ($nhi,$nlo); - &pxor ($Zlo,$tmp); - &js (&label("mmx_break")); - - &shl (&LB($nlo),4); - &and ($rem,0xf); - &psrlq ($Zlo,4); - &and ($nhi,0xf0); - &movq ($tmp,$Zhi); - &psrlq ($Zhi,4); - &pxor ($Zlo,&QWP(8,$Htbl,$nlo)); - &psllq ($tmp,60); - &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8)); - &movd ($rem,$Zlo); - &pxor ($Zhi,&QWP(0,$Htbl,$nlo)); - &pxor ($Zlo,$tmp); - &jmp (&label("mmx_loop")); - - &set_label("mmx_break",16); - &shl (&LB($nlo),4); - &and ($rem,0xf); - &psrlq ($Zlo,4); - &and ($nhi,0xf0); - &movq ($tmp,$Zhi); - &psrlq ($Zhi,4); - &pxor ($Zlo,&QWP(8,$Htbl,$nlo)); - &psllq ($tmp,60); - &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8)); - &movd ($rem,$Zlo); - &pxor ($Zhi,&QWP(0,$Htbl,$nlo)); - &pxor ($Zlo,$tmp); - - &psrlq ($Zlo,4); - &and ($rem,0xf); - &movq ($tmp,$Zhi); - &psrlq ($Zhi,4); - &pxor ($Zlo,&QWP(8,$Htbl,$nhi)); - &psllq ($tmp,60); - &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8)); - &movd ($rem,$Zlo); - &pxor ($Zhi,&QWP(0,$Htbl,$nhi)); - &pxor ($Zlo,$tmp); - - &psrlq ($Zlo,32); # lower part of Zlo is already there - &movd ($Zhl,$Zhi); - &psrlq ($Zhi,32); - &movd ($Zlh,$Zlo); - &movd ($Zhh,$Zhi); - - &bswap ($Zll); - &bswap ($Zhl); - &bswap ($Zlh); - &bswap ($Zhh); -} - -&function_begin("gcm_gmult_4bit_mmx"); - &mov ($inp,&wparam(0)); # load Xi - &mov ($Htbl,&wparam(1)); # load Htable - - &call (&label("pic_point")); - &set_label("pic_point"); - &blindpop("eax"); - &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); - - &movz ($Zll,&BP(15,$inp)); - - &mmx_loop($inp,"eax"); - - &emms (); - &mov (&DWP(12,$inp),$Zll); - &mov (&DWP(4,$inp),$Zhl); - &mov (&DWP(8,$inp),$Zlh); - &mov (&DWP(0,$inp),$Zhh); -&function_end("gcm_gmult_4bit_mmx"); - -###################################################################### -# Below subroutine is "528B" variant of "4-bit" GCM GHASH function -# (see gcm128.c for details). It provides further 20-40% performance -# improvement over above mentioned "May" version. - -&static_label("rem_8bit"); - -&function_begin("gcm_ghash_4bit_mmx"); -{ my ($Zlo,$Zhi) = ("mm7","mm6"); - my $rem_8bit = "esi"; - my $Htbl = "ebx"; - - # parameter block - &mov ("eax",&wparam(0)); # Xi - &mov ("ebx",&wparam(1)); # Htable - &mov ("ecx",&wparam(2)); # inp - &mov ("edx",&wparam(3)); # len - &mov ("ebp","esp"); # original %esp - &call (&label("pic_point")); - &set_label ("pic_point"); - &blindpop ($rem_8bit); - &lea ($rem_8bit,&DWP(&label("rem_8bit")."-".&label("pic_point"),$rem_8bit)); - - &sub ("esp",512+16+16); # allocate stack frame... - &and ("esp",-64); # ...and align it - &sub ("esp",16); # place for (u8)(H[]<<4) - - &add ("edx","ecx"); # pointer to the end of input - &mov (&DWP(528+16+0,"esp"),"eax"); # save Xi - &mov (&DWP(528+16+8,"esp"),"edx"); # save inp+len - &mov (&DWP(528+16+12,"esp"),"ebp"); # save original %esp - - { my @lo = ("mm0","mm1","mm2"); - my @hi = ("mm3","mm4","mm5"); - my @tmp = ("mm6","mm7"); - my ($off1,$off2,$i) = (0,0,); - - &add ($Htbl,128); # optimize for size - &lea ("edi",&DWP(16+128,"esp")); - &lea ("ebp",&DWP(16+256+128,"esp")); - - # decompose Htable (low and high parts are kept separately), - # generate Htable[]>>4, (u8)(Htable[]<<4), save to stack... - for ($i=0;$i<18;$i++) { - - &mov ("edx",&DWP(16*$i+8-128,$Htbl)) if ($i<16); - &movq ($lo[0],&QWP(16*$i+8-128,$Htbl)) if ($i<16); - &psllq ($tmp[1],60) if ($i>1); - &movq ($hi[0],&QWP(16*$i+0-128,$Htbl)) if ($i<16); - &por ($lo[2],$tmp[1]) if ($i>1); - &movq (&QWP($off1-128,"edi"),$lo[1]) if ($i>0 && $i<17); - &psrlq ($lo[1],4) if ($i>0 && $i<17); - &movq (&QWP($off1,"edi"),$hi[1]) if ($i>0 && $i<17); - &movq ($tmp[0],$hi[1]) if ($i>0 && $i<17); - &movq (&QWP($off2-128,"ebp"),$lo[2]) if ($i>1); - &psrlq ($hi[1],4) if ($i>0 && $i<17); - &movq (&QWP($off2,"ebp"),$hi[2]) if ($i>1); - &shl ("edx",4) if ($i<16); - &mov (&BP($i,"esp"),&LB("edx")) if ($i<16); - - unshift (@lo,pop(@lo)); # "rotate" registers - unshift (@hi,pop(@hi)); - unshift (@tmp,pop(@tmp)); - $off1 += 8 if ($i>0); - $off2 += 8 if ($i>1); - } - } - - &movq ($Zhi,&QWP(0,"eax")); - &mov ("ebx",&DWP(8,"eax")); - &mov ("edx",&DWP(12,"eax")); # load Xi - -&set_label("outer",16); - { my $nlo = "eax"; - my $dat = "edx"; - my @nhi = ("edi","ebp"); - my @rem = ("ebx","ecx"); - my @red = ("mm0","mm1","mm2"); - my $tmp = "mm3"; - - &xor ($dat,&DWP(12,"ecx")); # merge input data - &xor ("ebx",&DWP(8,"ecx")); - &pxor ($Zhi,&QWP(0,"ecx")); - &lea ("ecx",&DWP(16,"ecx")); # inp+=16 - #&mov (&DWP(528+12,"esp"),$dat); # save inp^Xi - &mov (&DWP(528+8,"esp"),"ebx"); - &movq (&QWP(528+0,"esp"),$Zhi); - &mov (&DWP(528+16+4,"esp"),"ecx"); # save inp - - &xor ($nlo,$nlo); - &rol ($dat,8); - &mov (&LB($nlo),&LB($dat)); - &mov ($nhi[1],$nlo); - &and (&LB($nlo),0x0f); - &shr ($nhi[1],4); - &pxor ($red[0],$red[0]); - &rol ($dat,8); # next byte - &pxor ($red[1],$red[1]); - &pxor ($red[2],$red[2]); - - # Just like in "May" verson modulo-schedule for critical path in - # 'Z.hi ^= rem_8bit[Z.lo&0xff^((u8)H[nhi]<<4)]<<48'. Final 'pxor' - # is scheduled so late that rem_8bit[] has to be shifted *right* - # by 16, which is why last argument to pinsrw is 2, which - # corresponds to <<32=<<48>>16... - for ($j=11,$i=0;$i<15;$i++) { - - if ($i>0) { - &pxor ($Zlo,&QWP(16,"esp",$nlo,8)); # Z^=H[nlo] - &rol ($dat,8); # next byte - &pxor ($Zhi,&QWP(16+128,"esp",$nlo,8)); - - &pxor ($Zlo,$tmp); - &pxor ($Zhi,&QWP(16+256+128,"esp",$nhi[0],8)); - &xor (&LB($rem[1]),&BP(0,"esp",$nhi[0])); # rem^(H[nhi]<<4) - } else { - &movq ($Zlo,&QWP(16,"esp",$nlo,8)); - &movq ($Zhi,&QWP(16+128,"esp",$nlo,8)); - } - - &mov (&LB($nlo),&LB($dat)); - &mov ($dat,&DWP(528+$j,"esp")) if (--$j%4==0); - - &movd ($rem[0],$Zlo); - &movz ($rem[1],&LB($rem[1])) if ($i>0); - &psrlq ($Zlo,8); # Z>>=8 - - &movq ($tmp,$Zhi); - &mov ($nhi[0],$nlo); - &psrlq ($Zhi,8); - - &pxor ($Zlo,&QWP(16+256+0,"esp",$nhi[1],8)); # Z^=H[nhi]>>4 - &and (&LB($nlo),0x0f); - &psllq ($tmp,56); - - &pxor ($Zhi,$red[1]) if ($i>1); - &shr ($nhi[0],4); - &pinsrw ($red[0],&WP(0,$rem_8bit,$rem[1],2),2) if ($i>0); - - unshift (@red,pop(@red)); # "rotate" registers - unshift (@rem,pop(@rem)); - unshift (@nhi,pop(@nhi)); - } - - &pxor ($Zlo,&QWP(16,"esp",$nlo,8)); # Z^=H[nlo] - &pxor ($Zhi,&QWP(16+128,"esp",$nlo,8)); - &xor (&LB($rem[1]),&BP(0,"esp",$nhi[0])); # rem^(H[nhi]<<4) - - &pxor ($Zlo,$tmp); - &pxor ($Zhi,&QWP(16+256+128,"esp",$nhi[0],8)); - &movz ($rem[1],&LB($rem[1])); - - &pxor ($red[2],$red[2]); # clear 2nd word - &psllq ($red[1],4); - - &movd ($rem[0],$Zlo); - &psrlq ($Zlo,4); # Z>>=4 - - &movq ($tmp,$Zhi); - &psrlq ($Zhi,4); - &shl ($rem[0],4); # rem<<4 - - &pxor ($Zlo,&QWP(16,"esp",$nhi[1],8)); # Z^=H[nhi] - &psllq ($tmp,60); - &movz ($rem[0],&LB($rem[0])); - - &pxor ($Zlo,$tmp); - &pxor ($Zhi,&QWP(16+128,"esp",$nhi[1],8)); - - &pinsrw ($red[0],&WP(0,$rem_8bit,$rem[1],2),2); - &pxor ($Zhi,$red[1]); - - &movd ($dat,$Zlo); - &pinsrw ($red[2],&WP(0,$rem_8bit,$rem[0],2),3); # last is <<48 - - &psllq ($red[0],12); # correct by <<16>>4 - &pxor ($Zhi,$red[0]); - &psrlq ($Zlo,32); - &pxor ($Zhi,$red[2]); - - &mov ("ecx",&DWP(528+16+4,"esp")); # restore inp - &movd ("ebx",$Zlo); - &movq ($tmp,$Zhi); # 01234567 - &psllw ($Zhi,8); # 1.3.5.7. - &psrlw ($tmp,8); # .0.2.4.6 - &por ($Zhi,$tmp); # 10325476 - &bswap ($dat); - &pshufw ($Zhi,$Zhi,0b00011011); # 76543210 - &bswap ("ebx"); - - &cmp ("ecx",&DWP(528+16+8,"esp")); # are we done? - &jne (&label("outer")); - } - - &mov ("eax",&DWP(528+16+0,"esp")); # restore Xi - &mov (&DWP(12,"eax"),"edx"); - &mov (&DWP(8,"eax"),"ebx"); - &movq (&QWP(0,"eax"),$Zhi); - - &mov ("esp",&DWP(528+16+12,"esp")); # restore original %esp - &emms (); -} -&function_end("gcm_ghash_4bit_mmx"); -}} - -if ($sse2) {{ -###################################################################### -# PCLMULQDQ version. - -$Xip="eax"; -$Htbl="edx"; -$const="ecx"; -$inp="esi"; -$len="ebx"; - -($Xi,$Xhi)=("xmm0","xmm1"); $Hkey="xmm2"; -($T1,$T2,$T3)=("xmm3","xmm4","xmm5"); -($Xn,$Xhn)=("xmm6","xmm7"); - -&static_label("bswap"); - -sub clmul64x64_T2 { # minimal "register" pressure -my ($Xhi,$Xi,$Hkey)=@_; - - &movdqa ($Xhi,$Xi); # - &pshufd ($T1,$Xi,0b01001110); - &pshufd ($T2,$Hkey,0b01001110); - &pxor ($T1,$Xi); # - &pxor ($T2,$Hkey); - - &pclmulqdq ($Xi,$Hkey,0x00); ####### - &pclmulqdq ($Xhi,$Hkey,0x11); ####### - &pclmulqdq ($T1,$T2,0x00); ####### - &xorps ($T1,$Xi); # - &xorps ($T1,$Xhi); # - - &movdqa ($T2,$T1); # - &psrldq ($T1,8); - &pslldq ($T2,8); # - &pxor ($Xhi,$T1); - &pxor ($Xi,$T2); # -} - -sub clmul64x64_T3 { -# Even though this subroutine offers visually better ILP, it -# was empirically found to be a tad slower than above version. -# At least in gcm_ghash_clmul context. But it's just as well, -# because loop modulo-scheduling is possible only thanks to -# minimized "register" pressure... -my ($Xhi,$Xi,$Hkey)=@_; - - &movdqa ($T1,$Xi); # - &movdqa ($Xhi,$Xi); - &pclmulqdq ($Xi,$Hkey,0x00); ####### - &pclmulqdq ($Xhi,$Hkey,0x11); ####### - &pshufd ($T2,$T1,0b01001110); # - &pshufd ($T3,$Hkey,0b01001110); - &pxor ($T2,$T1); # - &pxor ($T3,$Hkey); - &pclmulqdq ($T2,$T3,0x00); ####### - &pxor ($T2,$Xi); # - &pxor ($T2,$Xhi); # - - &movdqa ($T3,$T2); # - &psrldq ($T2,8); - &pslldq ($T3,8); # - &pxor ($Xhi,$T2); - &pxor ($Xi,$T3); # -} - -if (1) { # Algorithm 9 with <<1 twist. - # Reduction is shorter and uses only two - # temporary registers, which makes it better - # candidate for interleaving with 64x64 - # multiplication. Pre-modulo-scheduled loop - # was found to be ~20% faster than Algorithm 5 - # below. Algorithm 9 was therefore chosen for - # further optimization... - -sub reduction_alg9 { # 17/13 times faster than Intel version -my ($Xhi,$Xi) = @_; - - # 1st phase - &movdqa ($T1,$Xi); # - &psllq ($Xi,1); - &pxor ($Xi,$T1); # - &psllq ($Xi,5); # - &pxor ($Xi,$T1); # - &psllq ($Xi,57); # - &movdqa ($T2,$Xi); # - &pslldq ($Xi,8); - &psrldq ($T2,8); # - &pxor ($Xi,$T1); - &pxor ($Xhi,$T2); # - - # 2nd phase - &movdqa ($T2,$Xi); - &psrlq ($Xi,5); - &pxor ($Xi,$T2); # - &psrlq ($Xi,1); # - &pxor ($Xi,$T2); # - &pxor ($T2,$Xhi); - &psrlq ($Xi,1); # - &pxor ($Xi,$T2); # -} - -&function_begin_B("gcm_init_clmul"); - &mov ($Htbl,&wparam(0)); - &mov ($Xip,&wparam(1)); - - &call (&label("pic")); -&set_label("pic"); - &blindpop ($const); - &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); - - &movdqu ($Hkey,&QWP(0,$Xip)); - &pshufd ($Hkey,$Hkey,0b01001110);# dword swap - - # <<1 twist - &pshufd ($T2,$Hkey,0b11111111); # broadcast uppermost dword - &movdqa ($T1,$Hkey); - &psllq ($Hkey,1); - &pxor ($T3,$T3); # - &psrlq ($T1,63); - &pcmpgtd ($T3,$T2); # broadcast carry bit - &pslldq ($T1,8); - &por ($Hkey,$T1); # H<<=1 - - # magic reduction - &pand ($T3,&QWP(16,$const)); # 0x1c2_polynomial - &pxor ($Hkey,$T3); # if(carry) H^=0x1c2_polynomial - - # calculate H^2 - &movdqa ($Xi,$Hkey); - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); - &reduction_alg9 ($Xhi,$Xi); - - &movdqu (&QWP(0,$Htbl),$Hkey); # save H - &movdqu (&QWP(16,$Htbl),$Xi); # save H^2 - - &ret (); -&function_end_B("gcm_init_clmul"); - -&function_begin_B("gcm_gmult_clmul"); - &mov ($Xip,&wparam(0)); - &mov ($Htbl,&wparam(1)); - - &call (&label("pic")); -&set_label("pic"); - &blindpop ($const); - &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); - - &movdqu ($Xi,&QWP(0,$Xip)); - &movdqa ($T3,&QWP(0,$const)); - &movups ($Hkey,&QWP(0,$Htbl)); - &pshufb ($Xi,$T3); - - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); - &reduction_alg9 ($Xhi,$Xi); - - &pshufb ($Xi,$T3); - &movdqu (&QWP(0,$Xip),$Xi); - - &ret (); -&function_end_B("gcm_gmult_clmul"); - -&function_begin("gcm_ghash_clmul"); - &mov ($Xip,&wparam(0)); - &mov ($Htbl,&wparam(1)); - &mov ($inp,&wparam(2)); - &mov ($len,&wparam(3)); - - &call (&label("pic")); -&set_label("pic"); - &blindpop ($const); - &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); - - &movdqu ($Xi,&QWP(0,$Xip)); - &movdqa ($T3,&QWP(0,$const)); - &movdqu ($Hkey,&QWP(0,$Htbl)); - &pshufb ($Xi,$T3); - - &sub ($len,0x10); - &jz (&label("odd_tail")); - - ####### - # Xi+2 =[H*(Ii+1 + Xi+1)] mod P = - # [(H*Ii+1) + (H*Xi+1)] mod P = - # [(H*Ii+1) + H^2*(Ii+Xi)] mod P - # - &movdqu ($T1,&QWP(0,$inp)); # Ii - &movdqu ($Xn,&QWP(16,$inp)); # Ii+1 - &pshufb ($T1,$T3); - &pshufb ($Xn,$T3); - &pxor ($Xi,$T1); # Ii+Xi - - &clmul64x64_T2 ($Xhn,$Xn,$Hkey); # H*Ii+1 - &movups ($Hkey,&QWP(16,$Htbl)); # load H^2 - - &lea ($inp,&DWP(32,$inp)); # i+=2 - &sub ($len,0x20); - &jbe (&label("even_tail")); - -&set_label("mod_loop"); - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi) - &movdqu ($T1,&QWP(0,$inp)); # Ii - &movups ($Hkey,&QWP(0,$Htbl)); # load H - - &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi) - &pxor ($Xhi,$Xhn); - - &movdqu ($Xn,&QWP(16,$inp)); # Ii+1 - &pshufb ($T1,$T3); - &pshufb ($Xn,$T3); - - &movdqa ($T3,$Xn); #&clmul64x64_TX ($Xhn,$Xn,$Hkey); H*Ii+1 - &movdqa ($Xhn,$Xn); - &pxor ($Xhi,$T1); # "Ii+Xi", consume early - - &movdqa ($T1,$Xi); #&reduction_alg9($Xhi,$Xi); 1st phase - &psllq ($Xi,1); - &pxor ($Xi,$T1); # - &psllq ($Xi,5); # - &pxor ($Xi,$T1); # - &pclmulqdq ($Xn,$Hkey,0x00); ####### - &psllq ($Xi,57); # - &movdqa ($T2,$Xi); # - &pslldq ($Xi,8); - &psrldq ($T2,8); # - &pxor ($Xi,$T1); - &pshufd ($T1,$T3,0b01001110); - &pxor ($Xhi,$T2); # - &pxor ($T1,$T3); - &pshufd ($T3,$Hkey,0b01001110); - &pxor ($T3,$Hkey); # - - &pclmulqdq ($Xhn,$Hkey,0x11); ####### - &movdqa ($T2,$Xi); # 2nd phase - &psrlq ($Xi,5); - &pxor ($Xi,$T2); # - &psrlq ($Xi,1); # - &pxor ($Xi,$T2); # - &pxor ($T2,$Xhi); - &psrlq ($Xi,1); # - &pxor ($Xi,$T2); # - - &pclmulqdq ($T1,$T3,0x00); ####### - &movups ($Hkey,&QWP(16,$Htbl)); # load H^2 - &xorps ($T1,$Xn); # - &xorps ($T1,$Xhn); # - - &movdqa ($T3,$T1); # - &psrldq ($T1,8); - &pslldq ($T3,8); # - &pxor ($Xhn,$T1); - &pxor ($Xn,$T3); # - &movdqa ($T3,&QWP(0,$const)); - - &lea ($inp,&DWP(32,$inp)); - &sub ($len,0x20); - &ja (&label("mod_loop")); - -&set_label("even_tail"); - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi) - - &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi) - &pxor ($Xhi,$Xhn); - - &reduction_alg9 ($Xhi,$Xi); - - &test ($len,$len); - &jnz (&label("done")); - - &movups ($Hkey,&QWP(0,$Htbl)); # load H -&set_label("odd_tail"); - &movdqu ($T1,&QWP(0,$inp)); # Ii - &pshufb ($T1,$T3); - &pxor ($Xi,$T1); # Ii+Xi - - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H*(Ii+Xi) - &reduction_alg9 ($Xhi,$Xi); - -&set_label("done"); - &pshufb ($Xi,$T3); - &movdqu (&QWP(0,$Xip),$Xi); -&function_end("gcm_ghash_clmul"); - -} else { # Algorith 5. Kept for reference purposes. - -sub reduction_alg5 { # 19/16 times faster than Intel version -my ($Xhi,$Xi)=@_; - - # <<1 - &movdqa ($T1,$Xi); # - &movdqa ($T2,$Xhi); - &pslld ($Xi,1); - &pslld ($Xhi,1); # - &psrld ($T1,31); - &psrld ($T2,31); # - &movdqa ($T3,$T1); - &pslldq ($T1,4); - &psrldq ($T3,12); # - &pslldq ($T2,4); - &por ($Xhi,$T3); # - &por ($Xi,$T1); - &por ($Xhi,$T2); # - - # 1st phase - &movdqa ($T1,$Xi); - &movdqa ($T2,$Xi); - &movdqa ($T3,$Xi); # - &pslld ($T1,31); - &pslld ($T2,30); - &pslld ($Xi,25); # - &pxor ($T1,$T2); - &pxor ($T1,$Xi); # - &movdqa ($T2,$T1); # - &pslldq ($T1,12); - &psrldq ($T2,4); # - &pxor ($T3,$T1); - - # 2nd phase - &pxor ($Xhi,$T3); # - &movdqa ($Xi,$T3); - &movdqa ($T1,$T3); - &psrld ($Xi,1); # - &psrld ($T1,2); - &psrld ($T3,7); # - &pxor ($Xi,$T1); - &pxor ($Xhi,$T2); - &pxor ($Xi,$T3); # - &pxor ($Xi,$Xhi); # -} - -&function_begin_B("gcm_init_clmul"); - &mov ($Htbl,&wparam(0)); - &mov ($Xip,&wparam(1)); - - &call (&label("pic")); -&set_label("pic"); - &blindpop ($const); - &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); - - &movdqu ($Hkey,&QWP(0,$Xip)); - &pshufd ($Hkey,$Hkey,0b01001110);# dword swap - - # calculate H^2 - &movdqa ($Xi,$Hkey); - &clmul64x64_T3 ($Xhi,$Xi,$Hkey); - &reduction_alg5 ($Xhi,$Xi); - - &movdqu (&QWP(0,$Htbl),$Hkey); # save H - &movdqu (&QWP(16,$Htbl),$Xi); # save H^2 - - &ret (); -&function_end_B("gcm_init_clmul"); - -&function_begin_B("gcm_gmult_clmul"); - &mov ($Xip,&wparam(0)); - &mov ($Htbl,&wparam(1)); - - &call (&label("pic")); -&set_label("pic"); - &blindpop ($const); - &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); - - &movdqu ($Xi,&QWP(0,$Xip)); - &movdqa ($Xn,&QWP(0,$const)); - &movdqu ($Hkey,&QWP(0,$Htbl)); - &pshufb ($Xi,$Xn); - - &clmul64x64_T3 ($Xhi,$Xi,$Hkey); - &reduction_alg5 ($Xhi,$Xi); - - &pshufb ($Xi,$Xn); - &movdqu (&QWP(0,$Xip),$Xi); - - &ret (); -&function_end_B("gcm_gmult_clmul"); - -&function_begin("gcm_ghash_clmul"); - &mov ($Xip,&wparam(0)); - &mov ($Htbl,&wparam(1)); - &mov ($inp,&wparam(2)); - &mov ($len,&wparam(3)); - - &call (&label("pic")); -&set_label("pic"); - &blindpop ($const); - &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); - - &movdqu ($Xi,&QWP(0,$Xip)); - &movdqa ($T3,&QWP(0,$const)); - &movdqu ($Hkey,&QWP(0,$Htbl)); - &pshufb ($Xi,$T3); - - &sub ($len,0x10); - &jz (&label("odd_tail")); - - ####### - # Xi+2 =[H*(Ii+1 + Xi+1)] mod P = - # [(H*Ii+1) + (H*Xi+1)] mod P = - # [(H*Ii+1) + H^2*(Ii+Xi)] mod P - # - &movdqu ($T1,&QWP(0,$inp)); # Ii - &movdqu ($Xn,&QWP(16,$inp)); # Ii+1 - &pshufb ($T1,$T3); - &pshufb ($Xn,$T3); - &pxor ($Xi,$T1); # Ii+Xi - - &clmul64x64_T3 ($Xhn,$Xn,$Hkey); # H*Ii+1 - &movdqu ($Hkey,&QWP(16,$Htbl)); # load H^2 - - &sub ($len,0x20); - &lea ($inp,&DWP(32,$inp)); # i+=2 - &jbe (&label("even_tail")); - -&set_label("mod_loop"); - &clmul64x64_T3 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi) - &movdqu ($Hkey,&QWP(0,$Htbl)); # load H - - &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi) - &pxor ($Xhi,$Xhn); - - &reduction_alg5 ($Xhi,$Xi); - - ####### - &movdqa ($T3,&QWP(0,$const)); - &movdqu ($T1,&QWP(0,$inp)); # Ii - &movdqu ($Xn,&QWP(16,$inp)); # Ii+1 - &pshufb ($T1,$T3); - &pshufb ($Xn,$T3); - &pxor ($Xi,$T1); # Ii+Xi - - &clmul64x64_T3 ($Xhn,$Xn,$Hkey); # H*Ii+1 - &movdqu ($Hkey,&QWP(16,$Htbl)); # load H^2 - - &sub ($len,0x20); - &lea ($inp,&DWP(32,$inp)); - &ja (&label("mod_loop")); - -&set_label("even_tail"); - &clmul64x64_T3 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi) - - &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi) - &pxor ($Xhi,$Xhn); - - &reduction_alg5 ($Xhi,$Xi); - - &movdqa ($T3,&QWP(0,$const)); - &test ($len,$len); - &jnz (&label("done")); - - &movdqu ($Hkey,&QWP(0,$Htbl)); # load H -&set_label("odd_tail"); - &movdqu ($T1,&QWP(0,$inp)); # Ii - &pshufb ($T1,$T3); - &pxor ($Xi,$T1); # Ii+Xi - - &clmul64x64_T3 ($Xhi,$Xi,$Hkey); # H*(Ii+Xi) - &reduction_alg5 ($Xhi,$Xi); - - &movdqa ($T3,&QWP(0,$const)); -&set_label("done"); - &pshufb ($Xi,$T3); - &movdqu (&QWP(0,$Xip),$Xi); -&function_end("gcm_ghash_clmul"); - -} - -&set_label("bswap",64); - &data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); - &data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial -}} # $sse2 - -&set_label("rem_4bit",64); - &data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S); - &data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S); - &data_word(0,0xE100<<$S,0,0xFD20<<$S,0,0xD940<<$S,0,0xC560<<$S); - &data_word(0,0x9180<<$S,0,0x8DA0<<$S,0,0xA9C0<<$S,0,0xB5E0<<$S); -&set_label("rem_8bit",64); - &data_short(0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E); - &data_short(0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E); - &data_short(0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E); - &data_short(0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E); - &data_short(0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E); - &data_short(0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E); - &data_short(0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E); - &data_short(0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E); - &data_short(0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE); - &data_short(0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE); - &data_short(0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE); - &data_short(0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE); - &data_short(0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E); - &data_short(0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E); - &data_short(0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE); - &data_short(0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE); - &data_short(0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E); - &data_short(0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E); - &data_short(0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E); - &data_short(0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E); - &data_short(0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E); - &data_short(0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E); - &data_short(0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E); - &data_short(0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E); - &data_short(0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE); - &data_short(0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE); - &data_short(0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE); - &data_short(0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE); - &data_short(0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E); - &data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E); - &data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE); - &data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE); -}}} # !$x86only - -&asciz("GHASH for x86, CRYPTOGAMS by "); -&asm_finish(); - -# A question was risen about choice of vanilla MMX. Or rather why wasn't -# SSE2 chosen instead? In addition to the fact that MMX runs on legacy -# CPUs such as PIII, "4-bit" MMX version was observed to provide better -# performance than *corresponding* SSE2 one even on contemporary CPUs. -# SSE2 results were provided by Peter-Michael Hager. He maintains SSE2 -# implementation featuring full range of lookup-table sizes, but with -# per-invocation lookup table setup. Latter means that table size is -# chosen depending on how much data is to be hashed in every given call, -# more data - larger table. Best reported result for Core2 is ~4 cycles -# per processed byte out of 64KB block. This number accounts even for -# 64KB table setup overhead. As discussed in gcm128.c we choose to be -# more conservative in respect to lookup table sizes, but how do the -# results compare? Minimalistic "256B" MMX version delivers ~11 cycles -# on same platform. As also discussed in gcm128.c, next in line "8-bit -# Shoup's" or "4KB" method should deliver twice the performance of -# "256B" one, in other words not worse than ~6 cycles per byte. It -# should be also be noted that in SSE2 case improvement can be "super- -# linear," i.e. more than twice, mostly because >>8 maps to single -# instruction on SSE2 register. This is unlike "4-bit" case when >>4 -# maps to same amount of instructions in both MMX and SSE2 cases. -# Bottom line is that switch to SSE2 is considered to be justifiable -# only in case we choose to implement "8-bit" method... diff --git a/drivers/builtin_openssl2/crypto/modes/asm/ghash-x86_64.pl b/drivers/builtin_openssl2/crypto/modes/asm/ghash-x86_64.pl deleted file mode 100644 index 38d779edbc..0000000000 --- a/drivers/builtin_openssl2/crypto/modes/asm/ghash-x86_64.pl +++ /dev/null @@ -1,806 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# March, June 2010 -# -# The module implements "4-bit" GCM GHASH function and underlying -# single multiplication operation in GF(2^128). "4-bit" means that -# it uses 256 bytes per-key table [+128 bytes shared table]. GHASH -# function features so called "528B" variant utilizing additional -# 256+16 bytes of per-key storage [+512 bytes shared table]. -# Performance results are for this streamed GHASH subroutine and are -# expressed in cycles per processed byte, less is better: -# -# gcc 3.4.x(*) assembler -# -# P4 28.6 14.0 +100% -# Opteron 19.3 7.7 +150% -# Core2 17.8 8.1(**) +120% -# -# (*) comparison is not completely fair, because C results are -# for vanilla "256B" implementation, while assembler results -# are for "528B";-) -# (**) it's mystery [to me] why Core2 result is not same as for -# Opteron; - -# May 2010 -# -# Add PCLMULQDQ version performing at 2.02 cycles per processed byte. -# See ghash-x86.pl for background information and details about coding -# techniques. -# -# Special thanks to David Woodhouse for -# providing access to a Westmere-based system on behalf of Intel -# Open Source Technology Centre. - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -# common register layout -$nlo="%rax"; -$nhi="%rbx"; -$Zlo="%r8"; -$Zhi="%r9"; -$tmp="%r10"; -$rem_4bit = "%r11"; - -$Xi="%rdi"; -$Htbl="%rsi"; - -# per-function register layout -$cnt="%rcx"; -$rem="%rdx"; - -sub LB() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/ or - $r =~ s/%[er]([sd]i)/%\1l/ or - $r =~ s/%[er](bp)/%\1l/ or - $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; } - -sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm -{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; - my $arg = pop; - $arg = "\$$arg" if ($arg*1 eq $arg); - $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n"; -} - -{ my $N; - sub loop() { - my $inp = shift; - - $N++; -$code.=<<___; - xor $nlo,$nlo - xor $nhi,$nhi - mov `&LB("$Zlo")`,`&LB("$nlo")` - mov `&LB("$Zlo")`,`&LB("$nhi")` - shl \$4,`&LB("$nlo")` - mov \$14,$cnt - mov 8($Htbl,$nlo),$Zlo - mov ($Htbl,$nlo),$Zhi - and \$0xf0,`&LB("$nhi")` - mov $Zlo,$rem - jmp .Loop$N - -.align 16 -.Loop$N: - shr \$4,$Zlo - and \$0xf,$rem - mov $Zhi,$tmp - mov ($inp,$cnt),`&LB("$nlo")` - shr \$4,$Zhi - xor 8($Htbl,$nhi),$Zlo - shl \$60,$tmp - xor ($Htbl,$nhi),$Zhi - mov `&LB("$nlo")`,`&LB("$nhi")` - xor ($rem_4bit,$rem,8),$Zhi - mov $Zlo,$rem - shl \$4,`&LB("$nlo")` - xor $tmp,$Zlo - dec $cnt - js .Lbreak$N - - shr \$4,$Zlo - and \$0xf,$rem - mov $Zhi,$tmp - shr \$4,$Zhi - xor 8($Htbl,$nlo),$Zlo - shl \$60,$tmp - xor ($Htbl,$nlo),$Zhi - and \$0xf0,`&LB("$nhi")` - xor ($rem_4bit,$rem,8),$Zhi - mov $Zlo,$rem - xor $tmp,$Zlo - jmp .Loop$N - -.align 16 -.Lbreak$N: - shr \$4,$Zlo - and \$0xf,$rem - mov $Zhi,$tmp - shr \$4,$Zhi - xor 8($Htbl,$nlo),$Zlo - shl \$60,$tmp - xor ($Htbl,$nlo),$Zhi - and \$0xf0,`&LB("$nhi")` - xor ($rem_4bit,$rem,8),$Zhi - mov $Zlo,$rem - xor $tmp,$Zlo - - shr \$4,$Zlo - and \$0xf,$rem - mov $Zhi,$tmp - shr \$4,$Zhi - xor 8($Htbl,$nhi),$Zlo - shl \$60,$tmp - xor ($Htbl,$nhi),$Zhi - xor $tmp,$Zlo - xor ($rem_4bit,$rem,8),$Zhi - - bswap $Zlo - bswap $Zhi -___ -}} - -$code=<<___; -.text - -.globl gcm_gmult_4bit -.type gcm_gmult_4bit,\@function,2 -.align 16 -gcm_gmult_4bit: - push %rbx - push %rbp # %rbp and %r12 are pushed exclusively in - push %r12 # order to reuse Win64 exception handler... -.Lgmult_prologue: - - movzb 15($Xi),$Zlo - lea .Lrem_4bit(%rip),$rem_4bit -___ - &loop ($Xi); -$code.=<<___; - mov $Zlo,8($Xi) - mov $Zhi,($Xi) - - mov 16(%rsp),%rbx - lea 24(%rsp),%rsp -.Lgmult_epilogue: - ret -.size gcm_gmult_4bit,.-gcm_gmult_4bit -___ - -# per-function register layout -$inp="%rdx"; -$len="%rcx"; -$rem_8bit=$rem_4bit; - -$code.=<<___; -.globl gcm_ghash_4bit -.type gcm_ghash_4bit,\@function,4 -.align 16 -gcm_ghash_4bit: - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - sub \$280,%rsp -.Lghash_prologue: - mov $inp,%r14 # reassign couple of args - mov $len,%r15 -___ -{ my $inp="%r14"; - my $dat="%edx"; - my $len="%r15"; - my @nhi=("%ebx","%ecx"); - my @rem=("%r12","%r13"); - my $Hshr4="%rbp"; - - &sub ($Htbl,-128); # size optimization - &lea ($Hshr4,"16+128(%rsp)"); - { my @lo =($nlo,$nhi); - my @hi =($Zlo,$Zhi); - - &xor ($dat,$dat); - for ($i=0,$j=-2;$i<18;$i++,$j++) { - &mov ("$j(%rsp)",&LB($dat)) if ($i>1); - &or ($lo[0],$tmp) if ($i>1); - &mov (&LB($dat),&LB($lo[1])) if ($i>0 && $i<17); - &shr ($lo[1],4) if ($i>0 && $i<17); - &mov ($tmp,$hi[1]) if ($i>0 && $i<17); - &shr ($hi[1],4) if ($i>0 && $i<17); - &mov ("8*$j($Hshr4)",$hi[0]) if ($i>1); - &mov ($hi[0],"16*$i+0-128($Htbl)") if ($i<16); - &shl (&LB($dat),4) if ($i>0 && $i<17); - &mov ("8*$j-128($Hshr4)",$lo[0]) if ($i>1); - &mov ($lo[0],"16*$i+8-128($Htbl)") if ($i<16); - &shl ($tmp,60) if ($i>0 && $i<17); - - push (@lo,shift(@lo)); - push (@hi,shift(@hi)); - } - } - &add ($Htbl,-128); - &mov ($Zlo,"8($Xi)"); - &mov ($Zhi,"0($Xi)"); - &add ($len,$inp); # pointer to the end of data - &lea ($rem_8bit,".Lrem_8bit(%rip)"); - &jmp (".Louter_loop"); - -$code.=".align 16\n.Louter_loop:\n"; - &xor ($Zhi,"($inp)"); - &mov ("%rdx","8($inp)"); - &lea ($inp,"16($inp)"); - &xor ("%rdx",$Zlo); - &mov ("($Xi)",$Zhi); - &mov ("8($Xi)","%rdx"); - &shr ("%rdx",32); - - &xor ($nlo,$nlo); - &rol ($dat,8); - &mov (&LB($nlo),&LB($dat)); - &movz ($nhi[0],&LB($dat)); - &shl (&LB($nlo),4); - &shr ($nhi[0],4); - - for ($j=11,$i=0;$i<15;$i++) { - &rol ($dat,8); - &xor ($Zlo,"8($Htbl,$nlo)") if ($i>0); - &xor ($Zhi,"($Htbl,$nlo)") if ($i>0); - &mov ($Zlo,"8($Htbl,$nlo)") if ($i==0); - &mov ($Zhi,"($Htbl,$nlo)") if ($i==0); - - &mov (&LB($nlo),&LB($dat)); - &xor ($Zlo,$tmp) if ($i>0); - &movzw ($rem[1],"($rem_8bit,$rem[1],2)") if ($i>0); - - &movz ($nhi[1],&LB($dat)); - &shl (&LB($nlo),4); - &movzb ($rem[0],"(%rsp,$nhi[0])"); - - &shr ($nhi[1],4) if ($i<14); - &and ($nhi[1],0xf0) if ($i==14); - &shl ($rem[1],48) if ($i>0); - &xor ($rem[0],$Zlo); - - &mov ($tmp,$Zhi); - &xor ($Zhi,$rem[1]) if ($i>0); - &shr ($Zlo,8); - - &movz ($rem[0],&LB($rem[0])); - &mov ($dat,"$j($Xi)") if (--$j%4==0); - &shr ($Zhi,8); - - &xor ($Zlo,"-128($Hshr4,$nhi[0],8)"); - &shl ($tmp,56); - &xor ($Zhi,"($Hshr4,$nhi[0],8)"); - - unshift (@nhi,pop(@nhi)); # "rotate" registers - unshift (@rem,pop(@rem)); - } - &movzw ($rem[1],"($rem_8bit,$rem[1],2)"); - &xor ($Zlo,"8($Htbl,$nlo)"); - &xor ($Zhi,"($Htbl,$nlo)"); - - &shl ($rem[1],48); - &xor ($Zlo,$tmp); - - &xor ($Zhi,$rem[1]); - &movz ($rem[0],&LB($Zlo)); - &shr ($Zlo,4); - - &mov ($tmp,$Zhi); - &shl (&LB($rem[0]),4); - &shr ($Zhi,4); - - &xor ($Zlo,"8($Htbl,$nhi[0])"); - &movzw ($rem[0],"($rem_8bit,$rem[0],2)"); - &shl ($tmp,60); - - &xor ($Zhi,"($Htbl,$nhi[0])"); - &xor ($Zlo,$tmp); - &shl ($rem[0],48); - - &bswap ($Zlo); - &xor ($Zhi,$rem[0]); - - &bswap ($Zhi); - &cmp ($inp,$len); - &jb (".Louter_loop"); -} -$code.=<<___; - mov $Zlo,8($Xi) - mov $Zhi,($Xi) - - lea 280(%rsp),%rsi - mov 0(%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lghash_epilogue: - ret -.size gcm_ghash_4bit,.-gcm_ghash_4bit -___ - -###################################################################### -# PCLMULQDQ version. - -@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order - ("%rdi","%rsi","%rdx","%rcx"); # Unix order - -($Xi,$Xhi)=("%xmm0","%xmm1"); $Hkey="%xmm2"; -($T1,$T2,$T3)=("%xmm3","%xmm4","%xmm5"); - -sub clmul64x64_T2 { # minimal register pressure -my ($Xhi,$Xi,$Hkey,$modulo)=@_; - -$code.=<<___ if (!defined($modulo)); - movdqa $Xi,$Xhi # - pshufd \$0b01001110,$Xi,$T1 - pshufd \$0b01001110,$Hkey,$T2 - pxor $Xi,$T1 # - pxor $Hkey,$T2 -___ -$code.=<<___; - pclmulqdq \$0x00,$Hkey,$Xi ####### - pclmulqdq \$0x11,$Hkey,$Xhi ####### - pclmulqdq \$0x00,$T2,$T1 ####### - pxor $Xi,$T1 # - pxor $Xhi,$T1 # - - movdqa $T1,$T2 # - psrldq \$8,$T1 - pslldq \$8,$T2 # - pxor $T1,$Xhi - pxor $T2,$Xi # -___ -} - -sub reduction_alg9 { # 17/13 times faster than Intel version -my ($Xhi,$Xi) = @_; - -$code.=<<___; - # 1st phase - movdqa $Xi,$T1 # - psllq \$1,$Xi - pxor $T1,$Xi # - psllq \$5,$Xi # - pxor $T1,$Xi # - psllq \$57,$Xi # - movdqa $Xi,$T2 # - pslldq \$8,$Xi - psrldq \$8,$T2 # - pxor $T1,$Xi - pxor $T2,$Xhi # - - # 2nd phase - movdqa $Xi,$T2 - psrlq \$5,$Xi - pxor $T2,$Xi # - psrlq \$1,$Xi # - pxor $T2,$Xi # - pxor $Xhi,$T2 - psrlq \$1,$Xi # - pxor $T2,$Xi # -___ -} - -{ my ($Htbl,$Xip)=@_4args; - -$code.=<<___; -.globl gcm_init_clmul -.type gcm_init_clmul,\@abi-omnipotent -.align 16 -gcm_init_clmul: - movdqu ($Xip),$Hkey - pshufd \$0b01001110,$Hkey,$Hkey # dword swap - - # <<1 twist - pshufd \$0b11111111,$Hkey,$T2 # broadcast uppermost dword - movdqa $Hkey,$T1 - psllq \$1,$Hkey - pxor $T3,$T3 # - psrlq \$63,$T1 - pcmpgtd $T2,$T3 # broadcast carry bit - pslldq \$8,$T1 - por $T1,$Hkey # H<<=1 - - # magic reduction - pand .L0x1c2_polynomial(%rip),$T3 - pxor $T3,$Hkey # if(carry) H^=0x1c2_polynomial - - # calculate H^2 - movdqa $Hkey,$Xi -___ - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); - &reduction_alg9 ($Xhi,$Xi); -$code.=<<___; - movdqu $Hkey,($Htbl) # save H - movdqu $Xi,16($Htbl) # save H^2 - ret -.size gcm_init_clmul,.-gcm_init_clmul -___ -} - -{ my ($Xip,$Htbl)=@_4args; - -$code.=<<___; -.globl gcm_gmult_clmul -.type gcm_gmult_clmul,\@abi-omnipotent -.align 16 -gcm_gmult_clmul: - movdqu ($Xip),$Xi - movdqa .Lbswap_mask(%rip),$T3 - movdqu ($Htbl),$Hkey - pshufb $T3,$Xi -___ - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); - &reduction_alg9 ($Xhi,$Xi); -$code.=<<___; - pshufb $T3,$Xi - movdqu $Xi,($Xip) - ret -.size gcm_gmult_clmul,.-gcm_gmult_clmul -___ -} - -{ my ($Xip,$Htbl,$inp,$len)=@_4args; - my $Xn="%xmm6"; - my $Xhn="%xmm7"; - my $Hkey2="%xmm8"; - my $T1n="%xmm9"; - my $T2n="%xmm10"; - -$code.=<<___; -.globl gcm_ghash_clmul -.type gcm_ghash_clmul,\@abi-omnipotent -.align 16 -gcm_ghash_clmul: -___ -$code.=<<___ if ($win64); -.LSEH_begin_gcm_ghash_clmul: - # I can't trust assembler to use specific encoding:-( - .byte 0x48,0x83,0xec,0x58 #sub \$0x58,%rsp - .byte 0x0f,0x29,0x34,0x24 #movaps %xmm6,(%rsp) - .byte 0x0f,0x29,0x7c,0x24,0x10 #movdqa %xmm7,0x10(%rsp) - .byte 0x44,0x0f,0x29,0x44,0x24,0x20 #movaps %xmm8,0x20(%rsp) - .byte 0x44,0x0f,0x29,0x4c,0x24,0x30 #movaps %xmm9,0x30(%rsp) - .byte 0x44,0x0f,0x29,0x54,0x24,0x40 #movaps %xmm10,0x40(%rsp) -___ -$code.=<<___; - movdqa .Lbswap_mask(%rip),$T3 - - movdqu ($Xip),$Xi - movdqu ($Htbl),$Hkey - pshufb $T3,$Xi - - sub \$0x10,$len - jz .Lodd_tail - - movdqu 16($Htbl),$Hkey2 - ####### - # Xi+2 =[H*(Ii+1 + Xi+1)] mod P = - # [(H*Ii+1) + (H*Xi+1)] mod P = - # [(H*Ii+1) + H^2*(Ii+Xi)] mod P - # - movdqu ($inp),$T1 # Ii - movdqu 16($inp),$Xn # Ii+1 - pshufb $T3,$T1 - pshufb $T3,$Xn - pxor $T1,$Xi # Ii+Xi -___ - &clmul64x64_T2 ($Xhn,$Xn,$Hkey); # H*Ii+1 -$code.=<<___; - movdqa $Xi,$Xhi # - pshufd \$0b01001110,$Xi,$T1 - pshufd \$0b01001110,$Hkey2,$T2 - pxor $Xi,$T1 # - pxor $Hkey2,$T2 - - lea 32($inp),$inp # i+=2 - sub \$0x20,$len - jbe .Leven_tail - -.Lmod_loop: -___ - &clmul64x64_T2 ($Xhi,$Xi,$Hkey2,1); # H^2*(Ii+Xi) -$code.=<<___; - movdqu ($inp),$T1 # Ii - pxor $Xn,$Xi # (H*Ii+1) + H^2*(Ii+Xi) - pxor $Xhn,$Xhi - - movdqu 16($inp),$Xn # Ii+1 - pshufb $T3,$T1 - pshufb $T3,$Xn - - movdqa $Xn,$Xhn # - pshufd \$0b01001110,$Xn,$T1n - pshufd \$0b01001110,$Hkey,$T2n - pxor $Xn,$T1n # - pxor $Hkey,$T2n - pxor $T1,$Xhi # "Ii+Xi", consume early - - movdqa $Xi,$T1 # 1st phase - psllq \$1,$Xi - pxor $T1,$Xi # - psllq \$5,$Xi # - pxor $T1,$Xi # - pclmulqdq \$0x00,$Hkey,$Xn ####### - psllq \$57,$Xi # - movdqa $Xi,$T2 # - pslldq \$8,$Xi - psrldq \$8,$T2 # - pxor $T1,$Xi - pxor $T2,$Xhi # - - pclmulqdq \$0x11,$Hkey,$Xhn ####### - movdqa $Xi,$T2 # 2nd phase - psrlq \$5,$Xi - pxor $T2,$Xi # - psrlq \$1,$Xi # - pxor $T2,$Xi # - pxor $Xhi,$T2 - psrlq \$1,$Xi # - pxor $T2,$Xi # - - pclmulqdq \$0x00,$T2n,$T1n ####### - movdqa $Xi,$Xhi # - pshufd \$0b01001110,$Xi,$T1 - pshufd \$0b01001110,$Hkey2,$T2 - pxor $Xi,$T1 # - pxor $Hkey2,$T2 - - pxor $Xn,$T1n # - pxor $Xhn,$T1n # - movdqa $T1n,$T2n # - psrldq \$8,$T1n - pslldq \$8,$T2n # - pxor $T1n,$Xhn - pxor $T2n,$Xn # - - lea 32($inp),$inp - sub \$0x20,$len - ja .Lmod_loop - -.Leven_tail: -___ - &clmul64x64_T2 ($Xhi,$Xi,$Hkey2,1); # H^2*(Ii+Xi) -$code.=<<___; - pxor $Xn,$Xi # (H*Ii+1) + H^2*(Ii+Xi) - pxor $Xhn,$Xhi -___ - &reduction_alg9 ($Xhi,$Xi); -$code.=<<___; - test $len,$len - jnz .Ldone - -.Lodd_tail: - movdqu ($inp),$T1 # Ii - pshufb $T3,$T1 - pxor $T1,$Xi # Ii+Xi -___ - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H*(Ii+Xi) - &reduction_alg9 ($Xhi,$Xi); -$code.=<<___; -.Ldone: - pshufb $T3,$Xi - movdqu $Xi,($Xip) -___ -$code.=<<___ if ($win64); - movaps (%rsp),%xmm6 - movaps 0x10(%rsp),%xmm7 - movaps 0x20(%rsp),%xmm8 - movaps 0x30(%rsp),%xmm9 - movaps 0x40(%rsp),%xmm10 - add \$0x58,%rsp -___ -$code.=<<___; - ret -.LSEH_end_gcm_ghash_clmul: -.size gcm_ghash_clmul,.-gcm_ghash_clmul -___ -} - -$code.=<<___; -.align 64 -.Lbswap_mask: - .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 -.L0x1c2_polynomial: - .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 -.align 64 -.type .Lrem_4bit,\@object -.Lrem_4bit: - .long 0,`0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16` - .long 0,`0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16` - .long 0,`0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16` - .long 0,`0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16` -.type .Lrem_8bit,\@object -.Lrem_8bit: - .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E - .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E - .value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E - .value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E - .value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E - .value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E - .value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E - .value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E - .value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE - .value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE - .value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE - .value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE - .value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E - .value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E - .value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE - .value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE - .value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E - .value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E - .value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E - .value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E - .value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E - .value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E - .value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E - .value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E - .value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE - .value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE - .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE - .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE - .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E - .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E - .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE - .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE - -.asciz "GHASH for x86_64, CRYPTOGAMS by " -.align 64 -___ - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type se_handler,\@abi-omnipotent -.align 16 -se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - mov 8($disp),%rsi # disp->ImageBase - mov 56($disp),%r11 # disp->HandlerData - - mov 0(%r11),%r10d # HandlerData[0] - lea (%rsi,%r10),%r10 # prologue label - cmp %r10,%rbx # context->RipRsp - - mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lin_prologue - - lea 24(%rax),%rax # adjust "rsp" - - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - -.Lin_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$`1232/8`,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size se_handler,.-se_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_gcm_gmult_4bit - .rva .LSEH_end_gcm_gmult_4bit - .rva .LSEH_info_gcm_gmult_4bit - - .rva .LSEH_begin_gcm_ghash_4bit - .rva .LSEH_end_gcm_ghash_4bit - .rva .LSEH_info_gcm_ghash_4bit - - .rva .LSEH_begin_gcm_ghash_clmul - .rva .LSEH_end_gcm_ghash_clmul - .rva .LSEH_info_gcm_ghash_clmul - -.section .xdata -.align 8 -.LSEH_info_gcm_gmult_4bit: - .byte 9,0,0,0 - .rva se_handler - .rva .Lgmult_prologue,.Lgmult_epilogue # HandlerData -.LSEH_info_gcm_ghash_4bit: - .byte 9,0,0,0 - .rva se_handler - .rva .Lghash_prologue,.Lghash_epilogue # HandlerData -.LSEH_info_gcm_ghash_clmul: - .byte 0x01,0x1f,0x0b,0x00 - .byte 0x1f,0xa8,0x04,0x00 #movaps 0x40(rsp),xmm10 - .byte 0x19,0x98,0x03,0x00 #movaps 0x30(rsp),xmm9 - .byte 0x13,0x88,0x02,0x00 #movaps 0x20(rsp),xmm8 - .byte 0x0d,0x78,0x01,0x00 #movaps 0x10(rsp),xmm7 - .byte 0x08,0x68,0x00,0x00 #movaps (rsp),xmm6 - .byte 0x04,0xa2,0x00,0x00 #sub rsp,0x58 -___ -} - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; - -print $code; - -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/modes/cbc128.c b/drivers/builtin_openssl2/crypto/modes/cbc128.c index 1ed7967274..c13caea535 100644 --- a/drivers/builtin_openssl2/crypto/modes/cbc128.c +++ b/drivers/builtin_openssl2/crypto/modes/cbc128.c @@ -59,7 +59,7 @@ #endif #include -#ifndef STRICT_ALIGNMENT +#if !defined(STRICT_ALIGNMENT) && !defined(PEDANTIC) # define STRICT_ALIGNMENT 0 #endif diff --git a/drivers/builtin_openssl2/crypto/modes/gcm128.c b/drivers/builtin_openssl2/crypto/modes/gcm128.c index 0ee569fb7a..e299131c13 100644 --- a/drivers/builtin_openssl2/crypto/modes/gcm128.c +++ b/drivers/builtin_openssl2/crypto/modes/gcm128.c @@ -687,20 +687,31 @@ static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) #endif -#if TABLE_BITS==4 && defined(GHASH_ASM) +#if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ)) # if !defined(I386_ONLY) && \ (defined(__i386) || defined(__i386__) || \ defined(__x86_64) || defined(__x86_64__) || \ defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) # define GHASH_ASM_X86_OR_64 # define GCM_FUNCREF_4BIT -extern unsigned int OPENSSL_ia32cap_P[2]; +extern unsigned int OPENSSL_ia32cap_P[]; void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]); void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]); void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp, size_t len); +# if defined(__i386) || defined(__i386__) || defined(_M_IX86) +# define gcm_init_avx gcm_init_clmul +# define gcm_gmult_avx gcm_gmult_clmul +# define gcm_ghash_avx gcm_ghash_clmul +# else +void gcm_init_avx(u128 Htable[16], const u64 Xi[2]); +void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]); +void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp, + size_t len); +# endif + # if defined(__i386) || defined(__i386__) || defined(_M_IX86) # define GHASH_ASM_X86 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]); @@ -711,15 +722,41 @@ void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]); void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp, size_t len); # endif -# elif defined(__arm__) || defined(__arm) +# elif defined(__arm__) || defined(__arm) || defined(__aarch64__) # include "arm_arch.h" -# if __ARM_ARCH__>=7 +# if __ARM_MAX_ARCH__>=7 # define GHASH_ASM_ARM # define GCM_FUNCREF_4BIT +# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL) +# if defined(__arm__) || defined(__arm) +# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) +# endif +void gcm_init_neon(u128 Htable[16], const u64 Xi[2]); void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]); void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp, size_t len); +void gcm_init_v8(u128 Htable[16], const u64 Xi[2]); +void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]); +void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp, + size_t len); # endif +# elif defined(__sparc__) || defined(__sparc) +# include "sparc_arch.h" +# define GHASH_ASM_SPARC +# define GCM_FUNCREF_4BIT +extern unsigned int OPENSSL_sparcv9cap_P[]; +void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]); +void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]); +void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp, + size_t len); +# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) +# include "ppc_arch.h" +# define GHASH_ASM_PPC +# define GCM_FUNCREF_4BIT +void gcm_init_p8(u128 Htable[16], const u64 Xi[2]); +void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]); +void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp, + size_t len); # endif #endif @@ -768,9 +805,15 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */ OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */ - gcm_init_clmul(ctx->Htable, ctx->H.u); - ctx->gmult = gcm_gmult_clmul; - ctx->ghash = gcm_ghash_clmul; + if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */ + gcm_init_avx(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_avx; + ctx->ghash = gcm_ghash_avx; + } else { + gcm_init_clmul(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_clmul; + ctx->ghash = gcm_ghash_clmul; + } return; } # endif @@ -792,13 +835,52 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) ctx->ghash = gcm_ghash_4bit; # endif # elif defined(GHASH_ASM_ARM) - if (OPENSSL_armcap_P & ARMV7_NEON) { +# ifdef PMULL_CAPABLE + if (PMULL_CAPABLE) { + gcm_init_v8(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_v8; + ctx->ghash = gcm_ghash_v8; + } else +# endif +# ifdef NEON_CAPABLE + if (NEON_CAPABLE) { + gcm_init_neon(ctx->Htable, ctx->H.u); ctx->gmult = gcm_gmult_neon; ctx->ghash = gcm_ghash_neon; + } else +# endif + { + gcm_init_4bit(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_4bit; +# if defined(GHASH) + ctx->ghash = gcm_ghash_4bit; +# else + ctx->ghash = NULL; +# endif + } +# elif defined(GHASH_ASM_SPARC) + if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) { + gcm_init_vis3(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_vis3; + ctx->ghash = gcm_ghash_vis3; + } else { + gcm_init_4bit(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_4bit; + ctx->ghash = gcm_ghash_4bit; + } +# elif defined(GHASH_ASM_PPC) + if (OPENSSL_ppccap_P & PPC_CRYPTO207) { + gcm_init_p8(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_p8; + ctx->ghash = gcm_ghash_p8; } else { gcm_init_4bit(ctx->Htable, ctx->H.u); ctx->gmult = gcm_gmult_4bit; +# if defined(GHASH) ctx->ghash = gcm_ghash_4bit; +# else + ctx->ghash = NULL; +# endif } # else gcm_init_4bit(ctx->Htable, ctx->H.u); diff --git a/drivers/builtin_openssl2/crypto/modes/modes_lcl.h b/drivers/builtin_openssl2/crypto/modes/modes_lcl.h index 296849b867..fe14ec7002 100644 --- a/drivers/builtin_openssl2/crypto/modes/modes_lcl.h +++ b/drivers/builtin_openssl2/crypto/modes/modes_lcl.h @@ -25,39 +25,49 @@ typedef unsigned int u32; typedef unsigned char u8; #define STRICT_ALIGNMENT 1 -#if defined(__i386) || defined(__i386__) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ - defined(__s390__) || defined(__s390x__) -# undef STRICT_ALIGNMENT +#ifndef PEDANTIC +# if defined(__i386) || defined(__i386__) || \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ + defined(__aarch64__) || \ + defined(__s390__) || defined(__s390x__) +# undef STRICT_ALIGNMENT +# endif #endif #if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) # if defined(__GNUC__) && __GNUC__>=2 # if defined(__x86_64) || defined(__x86_64__) -# define BSWAP8(x) ({ u64 ret=(x); \ +# define BSWAP8(x) ({ u64 ret_=(x); \ asm ("bswapq %0" \ - : "+r"(ret)); ret; }) -# define BSWAP4(x) ({ u32 ret=(x); \ + : "+r"(ret_)); ret_; }) +# define BSWAP4(x) ({ u32 ret_=(x); \ asm ("bswapl %0" \ - : "+r"(ret)); ret; }) + : "+r"(ret_)); ret_; }) # elif (defined(__i386) || defined(__i386__)) && !defined(I386_ONLY) -# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \ +# define BSWAP8(x) ({ u32 lo_=(u64)(x)>>32,hi_=(x); \ asm ("bswapl %0; bswapl %1" \ - : "+r"(hi),"+r"(lo)); \ - (u64)hi<<32|lo; }) -# define BSWAP4(x) ({ u32 ret=(x); \ + : "+r"(hi_),"+r"(lo_)); \ + (u64)hi_<<32|lo_; }) +# define BSWAP4(x) ({ u32 ret_=(x); \ asm ("bswapl %0" \ - : "+r"(ret)); ret; }) + : "+r"(ret_)); ret_; }) +# elif defined(__aarch64__) +# define BSWAP8(x) ({ u64 ret_; \ + asm ("rev %0,%1" \ + : "=r"(ret_) : "r"(x)); ret_; }) +# define BSWAP4(x) ({ u32 ret_; \ + asm ("rev %w0,%w1" \ + : "=r"(ret_) : "r"(x)); ret_; }) # elif (defined(__arm__) || defined(__arm)) && !defined(STRICT_ALIGNMENT) -# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \ +# define BSWAP8(x) ({ u32 lo_=(u64)(x)>>32,hi_=(x); \ asm ("rev %0,%0; rev %1,%1" \ - : "+r"(hi),"+r"(lo)); \ - (u64)hi<<32|lo; }) -# define BSWAP4(x) ({ u32 ret; \ + : "+r"(hi_),"+r"(lo_)); \ + (u64)hi_<<32|lo_; }) +# define BSWAP4(x) ({ u32 ret_; \ asm ("rev %0,%1" \ - : "=r"(ret) : "r"((u32)(x))); \ - ret; }) + : "=r"(ret_) : "r"((u32)(x))); \ + ret_; }) # endif # elif defined(_MSC_VER) # if _MSC_VER>=1300 diff --git a/drivers/builtin_openssl2/crypto/modes/wrap128.c b/drivers/builtin_openssl2/crypto/modes/wrap128.c new file mode 100644 index 0000000000..384978371a --- /dev/null +++ b/drivers/builtin_openssl2/crypto/modes/wrap128.c @@ -0,0 +1,138 @@ +/* crypto/modes/wrap128.c */ +/* + * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include "cryptlib.h" +#include + +static const unsigned char default_iv[] = { + 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, +}; + +/* + * Input size limit: lower than maximum of standards but far larger than + * anything that will be used in practice. + */ +#define CRYPTO128_WRAP_MAX (1UL << 31) + +size_t CRYPTO_128_wrap(void *key, const unsigned char *iv, + unsigned char *out, + const unsigned char *in, size_t inlen, + block128_f block) +{ + unsigned char *A, B[16], *R; + size_t i, j, t; + if ((inlen & 0x7) || (inlen < 8) || (inlen > CRYPTO128_WRAP_MAX)) + return 0; + A = B; + t = 1; + memmove(out + 8, in, inlen); + if (!iv) + iv = default_iv; + + memcpy(A, iv, 8); + + for (j = 0; j < 6; j++) { + R = out + 8; + for (i = 0; i < inlen; i += 8, t++, R += 8) { + memcpy(B + 8, R, 8); + block(B, B, key); + A[7] ^= (unsigned char)(t & 0xff); + if (t > 0xff) { + A[6] ^= (unsigned char)((t >> 8) & 0xff); + A[5] ^= (unsigned char)((t >> 16) & 0xff); + A[4] ^= (unsigned char)((t >> 24) & 0xff); + } + memcpy(R, B + 8, 8); + } + } + memcpy(out, A, 8); + return inlen + 8; +} + +size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv, + unsigned char *out, + const unsigned char *in, size_t inlen, + block128_f block) +{ + unsigned char *A, B[16], *R; + size_t i, j, t; + inlen -= 8; + if ((inlen & 0x7) || (inlen < 16) || (inlen > CRYPTO128_WRAP_MAX)) + return 0; + A = B; + t = 6 * (inlen >> 3); + memcpy(A, in, 8); + memmove(out, in + 8, inlen); + for (j = 0; j < 6; j++) { + R = out + inlen - 8; + for (i = 0; i < inlen; i += 8, t--, R -= 8) { + A[7] ^= (unsigned char)(t & 0xff); + if (t > 0xff) { + A[6] ^= (unsigned char)((t >> 8) & 0xff); + A[5] ^= (unsigned char)((t >> 16) & 0xff); + A[4] ^= (unsigned char)((t >> 24) & 0xff); + } + memcpy(B + 8, R, 8); + block(B, B, key); + memcpy(R, B + 8, 8); + } + } + if (!iv) + iv = default_iv; + if (memcmp(A, iv, 8)) { + OPENSSL_cleanse(out, inlen); + return 0; + } + return inlen; +} diff --git a/drivers/builtin_openssl2/crypto/o_dir_test.c b/drivers/builtin_openssl2/crypto/o_dir_test.c deleted file mode 100644 index 60436b72ce..0000000000 --- a/drivers/builtin_openssl2/crypto/o_dir_test.c +++ /dev/null @@ -1,68 +0,0 @@ -/* crypto/o_dir.h */ -/* - * Copied from Richard Levitte's (richard@levitte.org) LP library. All - * symbol names have been changed, with permission from the author. - */ - -/* $LP: LPlib/test/test_dir.c,v 1.1 2004/06/16 22:59:47 _cvs_levitte Exp $ */ -/* - * Copyright (c) 2004, Richard Levitte - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include "e_os2.h" -#include "o_dir.h" - -#if defined OPENSSL_SYS_UNIX || defined OPENSSL_SYS_WIN32 || defined OPENSSL_SYS_WINCE -# define CURRDIR "." -#elif defined OPENSSL_SYS_VMS -# define CURRDIR "SYS$DISK:[]" -#else -# error "No supported platform defined!" -#endif - -int main() -{ - OPENSSL_DIR_CTX *ctx = NULL; - const char *result; - - while ((result = OPENSSL_DIR_read(&ctx, CURRDIR)) != NULL) { - printf("%s\n", result); - } - - if (errno) { - perror("test_dir"); - exit(1); - } - - if (!OPENSSL_DIR_end(&ctx)) { - perror("test_dir"); - exit(2); - } - exit(0); -} diff --git a/drivers/builtin_openssl2/crypto/o_str.c b/drivers/builtin_openssl2/crypto/o_str.c index 72de37eaf5..c10842300d 100644 --- a/drivers/builtin_openssl2/crypto/o_str.c +++ b/drivers/builtin_openssl2/crypto/o_str.c @@ -62,7 +62,7 @@ #include "o_str.h" #if !defined(OPENSSL_IMPLEMENTS_strncasecmp) && \ - !defined(OPENSSL_SYSNAME_WIN32) && \ + !defined(OPENSSL_SYSNAME_WIN32) && !defined(OPENSSL_SYSNAME_WINCE) && \ !defined(NETWARE_CLIB) #ifdef _WIN32 #include diff --git a/drivers/builtin_openssl2/crypto/o_time.c b/drivers/builtin_openssl2/crypto/o_time.c index 297bfafaf9..635dae184d 100644 --- a/drivers/builtin_openssl2/crypto/o_time.c +++ b/drivers/builtin_openssl2/crypto/o_time.c @@ -246,8 +246,72 @@ struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result) static long date_to_julian(int y, int m, int d); static void julian_to_date(long jd, int *y, int *m, int *d); +static int julian_adj(const struct tm *tm, int off_day, long offset_sec, + long *pday, int *psec); int OPENSSL_gmtime_adj(struct tm *tm, int off_day, long offset_sec) +{ + int time_sec, time_year, time_month, time_day; + long time_jd; + + /* Convert time and offset into julian day and seconds */ + if (!julian_adj(tm, off_day, offset_sec, &time_jd, &time_sec)) + return 0; + + /* Convert Julian day back to date */ + + julian_to_date(time_jd, &time_year, &time_month, &time_day); + + if (time_year < 1900 || time_year > 9999) + return 0; + + /* Update tm structure */ + + tm->tm_year = time_year - 1900; + tm->tm_mon = time_month - 1; + tm->tm_mday = time_day; + + tm->tm_hour = time_sec / 3600; + tm->tm_min = (time_sec / 60) % 60; + tm->tm_sec = time_sec % 60; + + return 1; + +} + +int OPENSSL_gmtime_diff(int *pday, int *psec, + const struct tm *from, const struct tm *to) +{ + int from_sec, to_sec, diff_sec; + long from_jd, to_jd, diff_day; + if (!julian_adj(from, 0, 0, &from_jd, &from_sec)) + return 0; + if (!julian_adj(to, 0, 0, &to_jd, &to_sec)) + return 0; + diff_day = to_jd - from_jd; + diff_sec = to_sec - from_sec; + /* Adjust differences so both positive or both negative */ + if (diff_day > 0 && diff_sec < 0) { + diff_day--; + diff_sec += SECS_PER_DAY; + } + if (diff_day < 0 && diff_sec > 0) { + diff_day++; + diff_sec -= SECS_PER_DAY; + } + + if (pday) + *pday = (int)diff_day; + if (psec) + *psec = diff_sec; + + return 1; + +} + +/* Convert tm structure and offset into julian day and seconds */ +static int julian_adj(const struct tm *tm, int off_day, long offset_sec, + long *pday, int *psec) { int offset_hms, offset_day; long time_jd; @@ -284,25 +348,9 @@ int OPENSSL_gmtime_adj(struct tm *tm, int off_day, long offset_sec) if (time_jd < 0) return 0; - /* Convert Julian day back to date */ - - julian_to_date(time_jd, &time_year, &time_month, &time_day); - - if (time_year < 1900 || time_year > 9999) - return 0; - - /* Update tm structure */ - - tm->tm_year = time_year - 1900; - tm->tm_mon = time_month - 1; - tm->tm_mday = time_day; - - tm->tm_hour = offset_hms / 3600; - tm->tm_min = (offset_hms / 60) % 60; - tm->tm_sec = offset_hms % 60; - + *pday = time_jd; + *psec = offset_hms; return 1; - } /* @@ -354,27 +402,39 @@ int main(int argc, char **argv) int check_time(long offset) { - struct tm tm1, tm2; + struct tm tm1, tm2, o1; + int off_day, off_sec; + long toffset; time_t t1, t2; time(&t1); t2 = t1 + offset; OPENSSL_gmtime(&t2, &tm2); OPENSSL_gmtime(&t1, &tm1); + o1 = tm1; OPENSSL_gmtime_adj(&tm1, 0, offset); - if ((tm1.tm_year == tm2.tm_year) && - (tm1.tm_mon == tm2.tm_mon) && - (tm1.tm_mday == tm2.tm_mday) && - (tm1.tm_hour == tm2.tm_hour) && - (tm1.tm_min == tm2.tm_min) && (tm1.tm_sec == tm2.tm_sec)) - return 1; - fprintf(stderr, "TIME ERROR!!\n"); - fprintf(stderr, "Time1: %d/%d/%d, %d:%02d:%02d\n", - tm2.tm_mday, tm2.tm_mon + 1, tm2.tm_year + 1900, - tm2.tm_hour, tm2.tm_min, tm2.tm_sec); - fprintf(stderr, "Time2: %d/%d/%d, %d:%02d:%02d\n", - tm1.tm_mday, tm1.tm_mon + 1, tm1.tm_year + 1900, - tm1.tm_hour, tm1.tm_min, tm1.tm_sec); - return 0; + if ((tm1.tm_year != tm2.tm_year) || + (tm1.tm_mon != tm2.tm_mon) || + (tm1.tm_mday != tm2.tm_mday) || + (tm1.tm_hour != tm2.tm_hour) || + (tm1.tm_min != tm2.tm_min) || (tm1.tm_sec != tm2.tm_sec)) { + fprintf(stderr, "TIME ERROR!!\n"); + fprintf(stderr, "Time1: %d/%d/%d, %d:%02d:%02d\n", + tm2.tm_mday, tm2.tm_mon + 1, tm2.tm_year + 1900, + tm2.tm_hour, tm2.tm_min, tm2.tm_sec); + fprintf(stderr, "Time2: %d/%d/%d, %d:%02d:%02d\n", + tm1.tm_mday, tm1.tm_mon + 1, tm1.tm_year + 1900, + tm1.tm_hour, tm1.tm_min, tm1.tm_sec); + return 0; + } + OPENSSL_gmtime_diff(&o1, &tm1, &off_day, &off_sec); + toffset = (long)off_day *SECS_PER_DAY + off_sec; + if (offset != toffset) { + fprintf(stderr, "TIME OFFSET ERROR!!\n"); + fprintf(stderr, "Expected %ld, Got %ld (%d:%d)\n", + offset, toffset, off_day, off_sec); + return 0; + } + return 1; } #endif diff --git a/drivers/builtin_openssl2/crypto/o_time.h b/drivers/builtin_openssl2/crypto/o_time.h index 8e49e66b91..f192c6dccf 100644 --- a/drivers/builtin_openssl2/crypto/o_time.h +++ b/drivers/builtin_openssl2/crypto/o_time.h @@ -64,5 +64,7 @@ struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result); int OPENSSL_gmtime_adj(struct tm *tm, int offset_day, long offset_sec); +int OPENSSL_gmtime_diff(int *pday, int *psec, + const struct tm *from, const struct tm *to); #endif diff --git a/drivers/builtin_openssl2/crypto/objects/obj_dat.h b/drivers/builtin_openssl2/crypto/objects/obj_dat.h index bc69665bc5..b7e3cf280e 100644 --- a/drivers/builtin_openssl2/crypto/objects/obj_dat.h +++ b/drivers/builtin_openssl2/crypto/objects/obj_dat.h @@ -62,12 +62,12 @@ * [including the GNU Public Licence.] */ -#define NUM_NID 920 -#define NUM_SN 913 -#define NUM_LN 913 -#define NUM_OBJ 857 +#define NUM_NID 958 +#define NUM_SN 951 +#define NUM_LN 951 +#define NUM_OBJ 890 -static const unsigned char lvalues[5974]={ +static const unsigned char lvalues[6255]={ 0x2A,0x86,0x48,0x86,0xF7,0x0D, /* [ 0] OBJ_rsadsi */ 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01, /* [ 6] OBJ_pkcs */ 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x02,0x02, /* [ 13] OBJ_md2 */ @@ -919,6 +919,39 @@ static const unsigned char lvalues[5974]={ 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x08,/* [5946] OBJ_mgf1 */ 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x0A,/* [5955] OBJ_rsassaPss */ 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x07,/* [5964] OBJ_rsaesOaep */ +0x2A,0x86,0x48,0xCE,0x3E,0x02,0x01, /* [5973] OBJ_dhpublicnumber */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x01,/* [5980] OBJ_brainpoolP160r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x02,/* [5989] OBJ_brainpoolP160t1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x03,/* [5998] OBJ_brainpoolP192r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x04,/* [6007] OBJ_brainpoolP192t1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x05,/* [6016] OBJ_brainpoolP224r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x06,/* [6025] OBJ_brainpoolP224t1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x07,/* [6034] OBJ_brainpoolP256r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x08,/* [6043] OBJ_brainpoolP256t1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x09,/* [6052] OBJ_brainpoolP320r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0A,/* [6061] OBJ_brainpoolP320t1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0B,/* [6070] OBJ_brainpoolP384r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0C,/* [6079] OBJ_brainpoolP384t1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0D,/* [6088] OBJ_brainpoolP512r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0E,/* [6097] OBJ_brainpoolP512t1 */ +0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x09,/* [6106] OBJ_pSpecified */ +0x2B,0x81,0x05,0x10,0x86,0x48,0x3F,0x00,0x02,/* [6115] OBJ_dhSinglePass_stdDH_sha1kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0B,0x00, /* [6124] OBJ_dhSinglePass_stdDH_sha224kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0B,0x01, /* [6130] OBJ_dhSinglePass_stdDH_sha256kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0B,0x02, /* [6136] OBJ_dhSinglePass_stdDH_sha384kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0B,0x03, /* [6142] OBJ_dhSinglePass_stdDH_sha512kdf_scheme */ +0x2B,0x81,0x05,0x10,0x86,0x48,0x3F,0x00,0x03,/* [6148] OBJ_dhSinglePass_cofactorDH_sha1kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0E,0x00, /* [6157] OBJ_dhSinglePass_cofactorDH_sha224kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0E,0x01, /* [6163] OBJ_dhSinglePass_cofactorDH_sha256kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0E,0x02, /* [6169] OBJ_dhSinglePass_cofactorDH_sha384kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0E,0x03, /* [6175] OBJ_dhSinglePass_cofactorDH_sha512kdf_scheme */ +0x2B,0x06,0x01,0x04,0x01,0xD6,0x79,0x02,0x04,0x02,/* [6181] OBJ_ct_precert_scts */ +0x2B,0x06,0x01,0x04,0x01,0xD6,0x79,0x02,0x04,0x03,/* [6191] OBJ_ct_precert_poison */ +0x2B,0x06,0x01,0x04,0x01,0xD6,0x79,0x02,0x04,0x04,/* [6201] OBJ_ct_precert_signer */ +0x2B,0x06,0x01,0x04,0x01,0xD6,0x79,0x02,0x04,0x05,/* [6211] OBJ_ct_cert_scts */ +0x2B,0x06,0x01,0x04,0x01,0x82,0x37,0x3C,0x02,0x01,0x01,/* [6221] OBJ_jurisdictionLocalityName */ +0x2B,0x06,0x01,0x04,0x01,0x82,0x37,0x3C,0x02,0x01,0x02,/* [6232] OBJ_jurisdictionStateOrProvinceName */ +0x2B,0x06,0x01,0x04,0x01,0x82,0x37,0x3C,0x02,0x01,0x03,/* [6243] OBJ_jurisdictionCountryName */ }; static const ASN1_OBJECT nid_objs[NUM_NID]={ @@ -2399,12 +2432,95 @@ static const ASN1_OBJECT nid_objs[NUM_NID]={ {"AES-256-CBC-HMAC-SHA1","aes-256-cbc-hmac-sha1", NID_aes_256_cbc_hmac_sha1,0,NULL,0}, {"RSAES-OAEP","rsaesOaep",NID_rsaesOaep,9,&(lvalues[5964]),0}, +{"dhpublicnumber","X9.42 DH",NID_dhpublicnumber,7,&(lvalues[5973]),0}, +{"brainpoolP160r1","brainpoolP160r1",NID_brainpoolP160r1,9, + &(lvalues[5980]),0}, +{"brainpoolP160t1","brainpoolP160t1",NID_brainpoolP160t1,9, + &(lvalues[5989]),0}, +{"brainpoolP192r1","brainpoolP192r1",NID_brainpoolP192r1,9, + &(lvalues[5998]),0}, +{"brainpoolP192t1","brainpoolP192t1",NID_brainpoolP192t1,9, + &(lvalues[6007]),0}, +{"brainpoolP224r1","brainpoolP224r1",NID_brainpoolP224r1,9, + &(lvalues[6016]),0}, +{"brainpoolP224t1","brainpoolP224t1",NID_brainpoolP224t1,9, + &(lvalues[6025]),0}, +{"brainpoolP256r1","brainpoolP256r1",NID_brainpoolP256r1,9, + &(lvalues[6034]),0}, +{"brainpoolP256t1","brainpoolP256t1",NID_brainpoolP256t1,9, + &(lvalues[6043]),0}, +{"brainpoolP320r1","brainpoolP320r1",NID_brainpoolP320r1,9, + &(lvalues[6052]),0}, +{"brainpoolP320t1","brainpoolP320t1",NID_brainpoolP320t1,9, + &(lvalues[6061]),0}, +{"brainpoolP384r1","brainpoolP384r1",NID_brainpoolP384r1,9, + &(lvalues[6070]),0}, +{"brainpoolP384t1","brainpoolP384t1",NID_brainpoolP384t1,9, + &(lvalues[6079]),0}, +{"brainpoolP512r1","brainpoolP512r1",NID_brainpoolP512r1,9, + &(lvalues[6088]),0}, +{"brainpoolP512t1","brainpoolP512t1",NID_brainpoolP512t1,9, + &(lvalues[6097]),0}, +{"PSPECIFIED","pSpecified",NID_pSpecified,9,&(lvalues[6106]),0}, +{"dhSinglePass-stdDH-sha1kdf-scheme", + "dhSinglePass-stdDH-sha1kdf-scheme", + NID_dhSinglePass_stdDH_sha1kdf_scheme,9,&(lvalues[6115]),0}, +{"dhSinglePass-stdDH-sha224kdf-scheme", + "dhSinglePass-stdDH-sha224kdf-scheme", + NID_dhSinglePass_stdDH_sha224kdf_scheme,6,&(lvalues[6124]),0}, +{"dhSinglePass-stdDH-sha256kdf-scheme", + "dhSinglePass-stdDH-sha256kdf-scheme", + NID_dhSinglePass_stdDH_sha256kdf_scheme,6,&(lvalues[6130]),0}, +{"dhSinglePass-stdDH-sha384kdf-scheme", + "dhSinglePass-stdDH-sha384kdf-scheme", + NID_dhSinglePass_stdDH_sha384kdf_scheme,6,&(lvalues[6136]),0}, +{"dhSinglePass-stdDH-sha512kdf-scheme", + "dhSinglePass-stdDH-sha512kdf-scheme", + NID_dhSinglePass_stdDH_sha512kdf_scheme,6,&(lvalues[6142]),0}, +{"dhSinglePass-cofactorDH-sha1kdf-scheme", + "dhSinglePass-cofactorDH-sha1kdf-scheme", + NID_dhSinglePass_cofactorDH_sha1kdf_scheme,9,&(lvalues[6148]),0}, +{"dhSinglePass-cofactorDH-sha224kdf-scheme", + "dhSinglePass-cofactorDH-sha224kdf-scheme", + NID_dhSinglePass_cofactorDH_sha224kdf_scheme,6,&(lvalues[6157]),0}, +{"dhSinglePass-cofactorDH-sha256kdf-scheme", + "dhSinglePass-cofactorDH-sha256kdf-scheme", + NID_dhSinglePass_cofactorDH_sha256kdf_scheme,6,&(lvalues[6163]),0}, +{"dhSinglePass-cofactorDH-sha384kdf-scheme", + "dhSinglePass-cofactorDH-sha384kdf-scheme", + NID_dhSinglePass_cofactorDH_sha384kdf_scheme,6,&(lvalues[6169]),0}, +{"dhSinglePass-cofactorDH-sha512kdf-scheme", + "dhSinglePass-cofactorDH-sha512kdf-scheme", + NID_dhSinglePass_cofactorDH_sha512kdf_scheme,6,&(lvalues[6175]),0}, +{"dh-std-kdf","dh-std-kdf",NID_dh_std_kdf,0,NULL,0}, +{"dh-cofactor-kdf","dh-cofactor-kdf",NID_dh_cofactor_kdf,0,NULL,0}, +{"AES-128-CBC-HMAC-SHA256","aes-128-cbc-hmac-sha256", + NID_aes_128_cbc_hmac_sha256,0,NULL,0}, +{"AES-192-CBC-HMAC-SHA256","aes-192-cbc-hmac-sha256", + NID_aes_192_cbc_hmac_sha256,0,NULL,0}, +{"AES-256-CBC-HMAC-SHA256","aes-256-cbc-hmac-sha256", + NID_aes_256_cbc_hmac_sha256,0,NULL,0}, +{"ct_precert_scts","CT Precertificate SCTs",NID_ct_precert_scts,10, + &(lvalues[6181]),0}, +{"ct_precert_poison","CT Precertificate Poison",NID_ct_precert_poison, + 10,&(lvalues[6191]),0}, +{"ct_precert_signer","CT Precertificate Signer",NID_ct_precert_signer, + 10,&(lvalues[6201]),0}, +{"ct_cert_scts","CT Certificate SCTs",NID_ct_cert_scts,10, + &(lvalues[6211]),0}, +{"jurisdictionL","jurisdictionLocalityName", + NID_jurisdictionLocalityName,11,&(lvalues[6221]),0}, +{"jurisdictionST","jurisdictionStateOrProvinceName", + NID_jurisdictionStateOrProvinceName,11,&(lvalues[6232]),0}, +{"jurisdictionC","jurisdictionCountryName", + NID_jurisdictionCountryName,11,&(lvalues[6243]),0}, }; static const unsigned int sn_objs[NUM_SN]={ 364, /* "AD_DVCS" */ 419, /* "AES-128-CBC" */ 916, /* "AES-128-CBC-HMAC-SHA1" */ +948, /* "AES-128-CBC-HMAC-SHA256" */ 421, /* "AES-128-CFB" */ 650, /* "AES-128-CFB1" */ 653, /* "AES-128-CFB8" */ @@ -2414,6 +2530,7 @@ static const unsigned int sn_objs[NUM_SN]={ 913, /* "AES-128-XTS" */ 423, /* "AES-192-CBC" */ 917, /* "AES-192-CBC-HMAC-SHA1" */ +949, /* "AES-192-CBC-HMAC-SHA256" */ 425, /* "AES-192-CFB" */ 651, /* "AES-192-CFB1" */ 654, /* "AES-192-CFB8" */ @@ -2422,6 +2539,7 @@ static const unsigned int sn_objs[NUM_SN]={ 424, /* "AES-192-OFB" */ 427, /* "AES-256-CBC" */ 918, /* "AES-256-CBC-HMAC-SHA1" */ +950, /* "AES-256-CBC-HMAC-SHA256" */ 429, /* "AES-256-CFB" */ 652, /* "AES-256-CFB1" */ 655, /* "AES-256-CFB8" */ @@ -2537,6 +2655,7 @@ static const unsigned int sn_objs[NUM_SN]={ 69, /* "PBKDF2" */ 162, /* "PBMAC1" */ 127, /* "PKIX" */ +935, /* "PSPECIFIED" */ 98, /* "RC2-40-CBC" */ 166, /* "RC2-64-CBC" */ 37, /* "RC2-CBC" */ @@ -2613,6 +2732,20 @@ static const unsigned int sn_objs[NUM_SN]={ 87, /* "basicConstraints" */ 365, /* "basicOCSPResponse" */ 285, /* "biometricInfo" */ +921, /* "brainpoolP160r1" */ +922, /* "brainpoolP160t1" */ +923, /* "brainpoolP192r1" */ +924, /* "brainpoolP192t1" */ +925, /* "brainpoolP224r1" */ +926, /* "brainpoolP224t1" */ +927, /* "brainpoolP256r1" */ +928, /* "brainpoolP256t1" */ +929, /* "brainpoolP320r1" */ +930, /* "brainpoolP320t1" */ +931, /* "brainpoolP384r1" */ +932, /* "brainpoolP384t1" */ +933, /* "brainpoolP512r1" */ +934, /* "brainpoolP512t1" */ 494, /* "buildingName" */ 860, /* "businessCategory" */ 691, /* "c2onb191v4" */ @@ -2658,6 +2791,10 @@ static const unsigned int sn_objs[NUM_SN]={ 884, /* "crossCertificatePair" */ 806, /* "cryptocom" */ 805, /* "cryptopro" */ +954, /* "ct_cert_scts" */ +952, /* "ct_precert_poison" */ +951, /* "ct_precert_scts" */ +953, /* "ct_precert_signer" */ 500, /* "dITRedirect" */ 451, /* "dNSDomain" */ 495, /* "dSAQuality" */ @@ -2667,7 +2804,20 @@ static const unsigned int sn_objs[NUM_SN]={ 891, /* "deltaRevocationList" */ 107, /* "description" */ 871, /* "destinationIndicator" */ +947, /* "dh-cofactor-kdf" */ +946, /* "dh-std-kdf" */ 28, /* "dhKeyAgreement" */ +941, /* "dhSinglePass-cofactorDH-sha1kdf-scheme" */ +942, /* "dhSinglePass-cofactorDH-sha224kdf-scheme" */ +943, /* "dhSinglePass-cofactorDH-sha256kdf-scheme" */ +944, /* "dhSinglePass-cofactorDH-sha384kdf-scheme" */ +945, /* "dhSinglePass-cofactorDH-sha512kdf-scheme" */ +936, /* "dhSinglePass-stdDH-sha1kdf-scheme" */ +937, /* "dhSinglePass-stdDH-sha224kdf-scheme" */ +938, /* "dhSinglePass-stdDH-sha256kdf-scheme" */ +939, /* "dhSinglePass-stdDH-sha384kdf-scheme" */ +940, /* "dhSinglePass-stdDH-sha512kdf-scheme" */ +920, /* "dhpublicnumber" */ 382, /* "directory" */ 887, /* "distinguishedName" */ 892, /* "dmdName" */ @@ -2978,6 +3128,9 @@ static const unsigned int sn_objs[NUM_SN]={ 86, /* "issuerAltName" */ 770, /* "issuingDistributionPoint" */ 492, /* "janetMailbox" */ +957, /* "jurisdictionC" */ +955, /* "jurisdictionL" */ +956, /* "jurisdictionST" */ 150, /* "keyBag" */ 83, /* "keyUsage" */ 477, /* "lastModifiedBy" */ @@ -3328,6 +3481,10 @@ static const unsigned int ln_objs[NUM_LN]={ 285, /* "Biometric Info" */ 179, /* "CA Issuers" */ 785, /* "CA Repository" */ +954, /* "CT Certificate SCTs" */ +952, /* "CT Precertificate Poison" */ +951, /* "CT Precertificate SCTs" */ +953, /* "CT Precertificate Signer" */ 131, /* "Code Signing" */ 783, /* "Diffie-Hellman based MAC" */ 382, /* "Directory" */ @@ -3451,6 +3608,7 @@ static const unsigned int ln_objs[NUM_LN]={ 85, /* "X509v3 Subject Alternative Name" */ 769, /* "X509v3 Subject Directory Attributes" */ 82, /* "X509v3 Subject Key Identifier" */ +920, /* "X9.42 DH" */ 184, /* "X9.57" */ 185, /* "X9.57 CM ?" */ 478, /* "aRecord" */ @@ -3463,6 +3621,7 @@ static const unsigned int ln_objs[NUM_LN]={ 606, /* "additional verification" */ 419, /* "aes-128-cbc" */ 916, /* "aes-128-cbc-hmac-sha1" */ +948, /* "aes-128-cbc-hmac-sha256" */ 896, /* "aes-128-ccm" */ 421, /* "aes-128-cfb" */ 650, /* "aes-128-cfb1" */ @@ -3474,6 +3633,7 @@ static const unsigned int ln_objs[NUM_LN]={ 913, /* "aes-128-xts" */ 423, /* "aes-192-cbc" */ 917, /* "aes-192-cbc-hmac-sha1" */ +949, /* "aes-192-cbc-hmac-sha256" */ 899, /* "aes-192-ccm" */ 425, /* "aes-192-cfb" */ 651, /* "aes-192-cfb1" */ @@ -3484,6 +3644,7 @@ static const unsigned int ln_objs[NUM_LN]={ 424, /* "aes-192-ofb" */ 427, /* "aes-256-cbc" */ 918, /* "aes-256-cbc-hmac-sha1" */ +950, /* "aes-256-cbc-hmac-sha256" */ 902, /* "aes-256-ccm" */ 429, /* "aes-256-cfb" */ 652, /* "aes-256-cfb1" */ @@ -3502,6 +3663,20 @@ static const unsigned int ln_objs[NUM_LN]={ 93, /* "bf-cfb" */ 92, /* "bf-ecb" */ 94, /* "bf-ofb" */ +921, /* "brainpoolP160r1" */ +922, /* "brainpoolP160t1" */ +923, /* "brainpoolP192r1" */ +924, /* "brainpoolP192t1" */ +925, /* "brainpoolP224r1" */ +926, /* "brainpoolP224t1" */ +927, /* "brainpoolP256r1" */ +928, /* "brainpoolP256t1" */ +929, /* "brainpoolP320r1" */ +930, /* "brainpoolP320t1" */ +931, /* "brainpoolP384r1" */ +932, /* "brainpoolP384t1" */ +933, /* "brainpoolP512r1" */ +934, /* "brainpoolP512t1" */ 494, /* "buildingName" */ 860, /* "businessCategory" */ 691, /* "c2onb191v4" */ @@ -3593,7 +3768,19 @@ static const unsigned int ln_objs[NUM_LN]={ 107, /* "description" */ 871, /* "destinationIndicator" */ 80, /* "desx-cbc" */ +947, /* "dh-cofactor-kdf" */ +946, /* "dh-std-kdf" */ 28, /* "dhKeyAgreement" */ +941, /* "dhSinglePass-cofactorDH-sha1kdf-scheme" */ +942, /* "dhSinglePass-cofactorDH-sha224kdf-scheme" */ +943, /* "dhSinglePass-cofactorDH-sha256kdf-scheme" */ +944, /* "dhSinglePass-cofactorDH-sha384kdf-scheme" */ +945, /* "dhSinglePass-cofactorDH-sha512kdf-scheme" */ +936, /* "dhSinglePass-stdDH-sha1kdf-scheme" */ +937, /* "dhSinglePass-stdDH-sha224kdf-scheme" */ +938, /* "dhSinglePass-stdDH-sha256kdf-scheme" */ +939, /* "dhSinglePass-stdDH-sha384kdf-scheme" */ +940, /* "dhSinglePass-stdDH-sha512kdf-scheme" */ 11, /* "directory services (X.500)" */ 378, /* "directory services - algorithms" */ 887, /* "distinguishedName" */ @@ -3881,6 +4068,9 @@ static const unsigned int ln_objs[NUM_LN]={ 645, /* "itu-t" */ 492, /* "janetMailbox" */ 646, /* "joint-iso-itu-t" */ +957, /* "jurisdictionCountryName" */ +955, /* "jurisdictionLocalityName" */ +956, /* "jurisdictionStateOrProvinceName" */ 150, /* "keyBag" */ 773, /* "kisa" */ 477, /* "lastModifiedBy" */ @@ -3917,6 +4107,7 @@ static const unsigned int ln_objs[NUM_LN]={ 18, /* "organizationalUnitName" */ 475, /* "otherMailbox" */ 876, /* "owner" */ +935, /* "pSpecified" */ 489, /* "pagerTelephoneNumber" */ 782, /* "password based MAC" */ 374, /* "path" */ @@ -4560,6 +4751,14 @@ static const unsigned int obj_objs[NUM_OBJ]={ 505, /* OBJ_mime_mhs_headings 1 3 6 1 7 1 1 */ 506, /* OBJ_mime_mhs_bodies 1 3 6 1 7 1 2 */ 119, /* OBJ_ripemd160WithRSA 1 3 36 3 3 1 2 */ +937, /* OBJ_dhSinglePass_stdDH_sha224kdf_scheme 1 3 132 1 11 0 */ +938, /* OBJ_dhSinglePass_stdDH_sha256kdf_scheme 1 3 132 1 11 1 */ +939, /* OBJ_dhSinglePass_stdDH_sha384kdf_scheme 1 3 132 1 11 2 */ +940, /* OBJ_dhSinglePass_stdDH_sha512kdf_scheme 1 3 132 1 11 3 */ +942, /* OBJ_dhSinglePass_cofactorDH_sha224kdf_scheme 1 3 132 1 14 0 */ +943, /* OBJ_dhSinglePass_cofactorDH_sha256kdf_scheme 1 3 132 1 14 1 */ +944, /* OBJ_dhSinglePass_cofactorDH_sha384kdf_scheme 1 3 132 1 14 2 */ +945, /* OBJ_dhSinglePass_cofactorDH_sha512kdf_scheme 1 3 132 1 14 3 */ 631, /* OBJ_setAttr_GenCryptgrm 2 23 42 3 3 3 1 */ 632, /* OBJ_setAttr_T2Enc 2 23 42 3 3 4 1 */ 633, /* OBJ_setAttr_T2cleartxt 2 23 42 3 3 4 2 */ @@ -4608,6 +4807,7 @@ static const unsigned int obj_objs[NUM_OBJ]={ 416, /* OBJ_ecdsa_with_SHA1 1 2 840 10045 4 1 */ 791, /* OBJ_ecdsa_with_Recommended 1 2 840 10045 4 2 */ 792, /* OBJ_ecdsa_with_Specified 1 2 840 10045 4 3 */ +920, /* OBJ_dhpublicnumber 1 2 840 10046 2 1 */ 258, /* OBJ_id_pkix_mod 1 3 6 1 5 5 7 0 */ 175, /* OBJ_id_pe 1 3 6 1 5 5 7 1 */ 259, /* OBJ_id_qt 1 3 6 1 5 5 7 2 */ @@ -4825,6 +5025,7 @@ static const unsigned int obj_objs[NUM_OBJ]={ 644, /* OBJ_rsaOAEPEncryptionSET 1 2 840 113549 1 1 6 */ 919, /* OBJ_rsaesOaep 1 2 840 113549 1 1 7 */ 911, /* OBJ_mgf1 1 2 840 113549 1 1 8 */ +935, /* OBJ_pSpecified 1 2 840 113549 1 1 9 */ 912, /* OBJ_rsassaPss 1 2 840 113549 1 1 10 */ 668, /* OBJ_sha256WithRSAEncryption 1 2 840 113549 1 1 11 */ 669, /* OBJ_sha384WithRSAEncryption 1 2 840 113549 1 1 12 */ @@ -4886,6 +5087,22 @@ static const unsigned int obj_objs[NUM_OBJ]={ 373, /* OBJ_id_pkix_OCSP_valid 1 3 6 1 5 5 7 48 1 9 */ 374, /* OBJ_id_pkix_OCSP_path 1 3 6 1 5 5 7 48 1 10 */ 375, /* OBJ_id_pkix_OCSP_trustRoot 1 3 6 1 5 5 7 48 1 11 */ +921, /* OBJ_brainpoolP160r1 1 3 36 3 3 2 8 1 1 1 */ +922, /* OBJ_brainpoolP160t1 1 3 36 3 3 2 8 1 1 2 */ +923, /* OBJ_brainpoolP192r1 1 3 36 3 3 2 8 1 1 3 */ +924, /* OBJ_brainpoolP192t1 1 3 36 3 3 2 8 1 1 4 */ +925, /* OBJ_brainpoolP224r1 1 3 36 3 3 2 8 1 1 5 */ +926, /* OBJ_brainpoolP224t1 1 3 36 3 3 2 8 1 1 6 */ +927, /* OBJ_brainpoolP256r1 1 3 36 3 3 2 8 1 1 7 */ +928, /* OBJ_brainpoolP256t1 1 3 36 3 3 2 8 1 1 8 */ +929, /* OBJ_brainpoolP320r1 1 3 36 3 3 2 8 1 1 9 */ +930, /* OBJ_brainpoolP320t1 1 3 36 3 3 2 8 1 1 10 */ +931, /* OBJ_brainpoolP384r1 1 3 36 3 3 2 8 1 1 11 */ +932, /* OBJ_brainpoolP384t1 1 3 36 3 3 2 8 1 1 12 */ +933, /* OBJ_brainpoolP512r1 1 3 36 3 3 2 8 1 1 13 */ +934, /* OBJ_brainpoolP512t1 1 3 36 3 3 2 8 1 1 14 */ +936, /* OBJ_dhSinglePass_stdDH_sha1kdf_scheme 1 3 133 16 840 63 0 2 */ +941, /* OBJ_dhSinglePass_cofactorDH_sha1kdf_scheme 1 3 133 16 840 63 0 3 */ 418, /* OBJ_aes_128_ecb 2 16 840 1 101 3 4 1 1 */ 419, /* OBJ_aes_128_cbc 2 16 840 1 101 3 4 1 2 */ 420, /* OBJ_aes_128_ofb128 2 16 840 1 101 3 4 1 3 */ @@ -5013,6 +5230,10 @@ static const unsigned int obj_objs[NUM_OBJ]={ 138, /* OBJ_ms_efs 1 3 6 1 4 1 311 10 3 4 */ 648, /* OBJ_ms_smartcard_login 1 3 6 1 4 1 311 20 2 2 */ 649, /* OBJ_ms_upn 1 3 6 1 4 1 311 20 2 3 */ +951, /* OBJ_ct_precert_scts 1 3 6 1 4 1 11129 2 4 2 */ +952, /* OBJ_ct_precert_poison 1 3 6 1 4 1 11129 2 4 3 */ +953, /* OBJ_ct_precert_signer 1 3 6 1 4 1 11129 2 4 4 */ +954, /* OBJ_ct_cert_scts 1 3 6 1 4 1 11129 2 4 5 */ 751, /* OBJ_camellia_128_cbc 1 2 392 200011 61 1 1 1 2 */ 752, /* OBJ_camellia_192_cbc 1 2 392 200011 61 1 1 1 3 */ 753, /* OBJ_camellia_256_cbc 1 2 392 200011 61 1 1 1 4 */ @@ -5091,5 +5312,8 @@ static const unsigned int obj_objs[NUM_OBJ]={ 154, /* OBJ_secretBag 1 2 840 113549 1 12 10 1 5 */ 155, /* OBJ_safeContentsBag 1 2 840 113549 1 12 10 1 6 */ 34, /* OBJ_idea_cbc 1 3 6 1 4 1 188 7 1 1 2 */ +955, /* OBJ_jurisdictionLocalityName 1 3 6 1 4 1 311 60 2 1 1 */ +956, /* OBJ_jurisdictionStateOrProvinceName 1 3 6 1 4 1 311 60 2 1 2 */ +957, /* OBJ_jurisdictionCountryName 1 3 6 1 4 1 311 60 2 1 3 */ }; diff --git a/drivers/builtin_openssl2/crypto/objects/obj_dat.pl b/drivers/builtin_openssl2/crypto/objects/obj_dat.pl deleted file mode 100644 index 86bcefb97a..0000000000 --- a/drivers/builtin_openssl2/crypto/objects/obj_dat.pl +++ /dev/null @@ -1,307 +0,0 @@ -#!/usr/local/bin/perl - -# fixes bug in floating point emulation on sparc64 when -# this script produces off-by-one output on sparc64 -use integer; - -sub obj_cmp - { - local(@a,@b,$_,$r); - - $A=$obj_len{$obj{$nid{$a}}}; - $B=$obj_len{$obj{$nid{$b}}}; - - $r=($A-$B); - return($r) if $r != 0; - - $A=$obj_der{$obj{$nid{$a}}}; - $B=$obj_der{$obj{$nid{$b}}}; - - return($A cmp $B); - } - -sub expand_obj - { - local(*v)=@_; - local($k,$d); - local($i); - - do { - $i=0; - foreach $k (keys %v) - { - if (($v{$k} =~ s/(OBJ_[^,]+),/$v{$1},/)) - { $i++; } - } - } while($i); - foreach $k (keys %v) - { - @a=split(/,/,$v{$k}); - $objn{$k}=$#a+1; - } - return(%objn); - } - -open (IN,"$ARGV[0]") || die "Can't open input file $ARGV[0]"; -open (OUT,">$ARGV[1]") || die "Can't open output file $ARGV[1]"; - -while () - { - next unless /^\#define\s+(\S+)\s+(.*)$/; - $v=$1; - $d=$2; - $d =~ s/^\"//; - $d =~ s/\"$//; - if ($v =~ /^SN_(.*)$/) - { - if(defined $snames{$d}) - { - print "WARNING: Duplicate short name \"$d\"\n"; - } - else - { $snames{$d} = "X"; } - $sn{$1}=$d; - } - elsif ($v =~ /^LN_(.*)$/) - { - if(defined $lnames{$d}) - { - print "WARNING: Duplicate long name \"$d\"\n"; - } - else - { $lnames{$d} = "X"; } - $ln{$1}=$d; - } - elsif ($v =~ /^NID_(.*)$/) - { $nid{$d}=$1; } - elsif ($v =~ /^OBJ_(.*)$/) - { - $obj{$1}=$v; - $objd{$v}=$d; - } - } -close IN; - -%ob=&expand_obj(*objd); - -@a=sort { $a <=> $b } keys %nid; -$n=$a[$#a]+1; - -@lvalues=(); -$lvalues=0; - -for ($i=0; $i<$n; $i++) - { - if (!defined($nid{$i})) - { - push(@out,"{NULL,NULL,NID_undef,0,NULL,0},\n"); - } - else - { - $sn=defined($sn{$nid{$i}})?"$sn{$nid{$i}}":"NULL"; - $ln=defined($ln{$nid{$i}})?"$ln{$nid{$i}}":"NULL"; - - if ($sn eq "NULL") { - $sn=$ln; - $sn{$nid{$i}} = $ln; - } - - if ($ln eq "NULL") { - $ln=$sn; - $ln{$nid{$i}} = $sn; - } - - $out ="{"; - $out.="\"$sn\""; - $out.=","."\"$ln\""; - $out.=",NID_$nid{$i},"; - if (defined($obj{$nid{$i}}) && $objd{$obj{$nid{$i}}} =~ /,/) - { - $v=$objd{$obj{$nid{$i}}}; - $v =~ s/L//g; - $v =~ s/,/ /g; - $r=&der_it($v); - $z=""; - $length=0; - foreach (unpack("C*",$r)) - { - $z.=sprintf("0x%02X,",$_); - $length++; - } - $obj_der{$obj{$nid{$i}}}=$z; - $obj_len{$obj{$nid{$i}}}=$length; - - push(@lvalues,sprintf("%-45s/* [%3d] %s */\n", - $z,$lvalues,$obj{$nid{$i}})); - $out.="$length,&(lvalues[$lvalues]),0"; - $lvalues+=$length; - } - else - { - $out.="0,NULL,0"; - } - $out.="},\n"; - push(@out,$out); - } - } - -@a=grep(defined($sn{$nid{$_}}),0 .. $n); -foreach (sort { $sn{$nid{$a}} cmp $sn{$nid{$b}} } @a) - { - push(@sn,sprintf("%2d,\t/* \"$sn{$nid{$_}}\" */\n",$_)); - } - -@a=grep(defined($ln{$nid{$_}}),0 .. $n); -foreach (sort { $ln{$nid{$a}} cmp $ln{$nid{$b}} } @a) - { - push(@ln,sprintf("%2d,\t/* \"$ln{$nid{$_}}\" */\n",$_)); - } - -@a=grep(defined($obj{$nid{$_}}),0 .. $n); -foreach (sort obj_cmp @a) - { - $m=$obj{$nid{$_}}; - $v=$objd{$m}; - $v =~ s/L//g; - $v =~ s/,/ /g; - push(@ob,sprintf("%2d,\t/* %-32s %s */\n",$_,$m,$v)); - } - -print OUT <<'EOF'; -/* crypto/objects/obj_dat.h */ - -/* THIS FILE IS GENERATED FROM objects.h by obj_dat.pl via the - * following command: - * perl obj_dat.pl obj_mac.h obj_dat.h - */ - -/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -EOF - -printf OUT "#define NUM_NID %d\n",$n; -printf OUT "#define NUM_SN %d\n",$#sn+1; -printf OUT "#define NUM_LN %d\n",$#ln+1; -printf OUT "#define NUM_OBJ %d\n\n",$#ob+1; - -printf OUT "static const unsigned char lvalues[%d]={\n",$lvalues+1; -print OUT @lvalues; -print OUT "};\n\n"; - -printf OUT "static const ASN1_OBJECT nid_objs[NUM_NID]={\n"; -foreach (@out) - { - if (length($_) > 75) - { - $out=""; - foreach (split(/,/)) - { - $t=$out.$_.","; - if (length($t) > 70) - { - print OUT "$out\n"; - $t="\t$_,"; - } - $out=$t; - } - chop $out; - print OUT "$out"; - } - else - { print OUT $_; } - } -print OUT "};\n\n"; - -printf OUT "static const unsigned int sn_objs[NUM_SN]={\n"; -print OUT @sn; -print OUT "};\n\n"; - -printf OUT "static const unsigned int ln_objs[NUM_LN]={\n"; -print OUT @ln; -print OUT "};\n\n"; - -printf OUT "static const unsigned int obj_objs[NUM_OBJ]={\n"; -print OUT @ob; -print OUT "};\n\n"; - -close OUT; - -sub der_it - { - local($v)=@_; - local(@a,$i,$ret,@r); - - @a=split(/\s+/,$v); - $ret.=pack("C*",$a[0]*40+$a[1]); - shift @a; - shift @a; - foreach (@a) - { - @r=(); - $t=0; - while ($_ >= 128) - { - $x=$_%128; - $_/=128; - push(@r,((($t++)?0x80:0)|$x)); - } - push(@r,((($t++)?0x80:0)|$_)); - $ret.=pack("C*",reverse(@r)); - } - return($ret); - } diff --git a/drivers/builtin_openssl2/crypto/objects/obj_mac.num b/drivers/builtin_openssl2/crypto/objects/obj_mac.num index 1d0a7c802d..8e5ea83363 100644 --- a/drivers/builtin_openssl2/crypto/objects/obj_mac.num +++ b/drivers/builtin_openssl2/crypto/objects/obj_mac.num @@ -917,3 +917,41 @@ aes_128_cbc_hmac_sha1 916 aes_192_cbc_hmac_sha1 917 aes_256_cbc_hmac_sha1 918 rsaesOaep 919 +dhpublicnumber 920 +brainpoolP160r1 921 +brainpoolP160t1 922 +brainpoolP192r1 923 +brainpoolP192t1 924 +brainpoolP224r1 925 +brainpoolP224t1 926 +brainpoolP256r1 927 +brainpoolP256t1 928 +brainpoolP320r1 929 +brainpoolP320t1 930 +brainpoolP384r1 931 +brainpoolP384t1 932 +brainpoolP512r1 933 +brainpoolP512t1 934 +pSpecified 935 +dhSinglePass_stdDH_sha1kdf_scheme 936 +dhSinglePass_stdDH_sha224kdf_scheme 937 +dhSinglePass_stdDH_sha256kdf_scheme 938 +dhSinglePass_stdDH_sha384kdf_scheme 939 +dhSinglePass_stdDH_sha512kdf_scheme 940 +dhSinglePass_cofactorDH_sha1kdf_scheme 941 +dhSinglePass_cofactorDH_sha224kdf_scheme 942 +dhSinglePass_cofactorDH_sha256kdf_scheme 943 +dhSinglePass_cofactorDH_sha384kdf_scheme 944 +dhSinglePass_cofactorDH_sha512kdf_scheme 945 +dh_std_kdf 946 +dh_cofactor_kdf 947 +aes_128_cbc_hmac_sha256 948 +aes_192_cbc_hmac_sha256 949 +aes_256_cbc_hmac_sha256 950 +ct_precert_scts 951 +ct_precert_poison 952 +ct_precert_signer 953 +ct_cert_scts 954 +jurisdictionLocalityName 955 +jurisdictionStateOrProvinceName 956 +jurisdictionCountryName 957 diff --git a/drivers/builtin_openssl2/crypto/objects/obj_xref.h b/drivers/builtin_openssl2/crypto/objects/obj_xref.h index b8f7d341f7..e453e99f83 100644 --- a/drivers/builtin_openssl2/crypto/objects/obj_xref.h +++ b/drivers/builtin_openssl2/crypto/objects/obj_xref.h @@ -41,6 +41,21 @@ static const nid_triple sigoid_srt[] = { {NID_id_GostR3411_94_with_GostR3410_2001_cc, NID_id_GostR3411_94, NID_id_GostR3410_2001_cc}, {NID_rsassaPss, NID_undef, NID_rsaEncryption}, + {NID_dhSinglePass_stdDH_sha1kdf_scheme, NID_sha1, NID_dh_std_kdf}, + {NID_dhSinglePass_stdDH_sha224kdf_scheme, NID_sha224, NID_dh_std_kdf}, + {NID_dhSinglePass_stdDH_sha256kdf_scheme, NID_sha256, NID_dh_std_kdf}, + {NID_dhSinglePass_stdDH_sha384kdf_scheme, NID_sha384, NID_dh_std_kdf}, + {NID_dhSinglePass_stdDH_sha512kdf_scheme, NID_sha512, NID_dh_std_kdf}, + {NID_dhSinglePass_cofactorDH_sha1kdf_scheme, NID_sha1, + NID_dh_cofactor_kdf}, + {NID_dhSinglePass_cofactorDH_sha224kdf_scheme, NID_sha224, + NID_dh_cofactor_kdf}, + {NID_dhSinglePass_cofactorDH_sha256kdf_scheme, NID_sha256, + NID_dh_cofactor_kdf}, + {NID_dhSinglePass_cofactorDH_sha384kdf_scheme, NID_sha384, + NID_dh_cofactor_kdf}, + {NID_dhSinglePass_cofactorDH_sha512kdf_scheme, NID_sha512, + NID_dh_cofactor_kdf}, }; static const nid_triple *const sigoid_srt_xref[] = { @@ -54,19 +69,29 @@ static const nid_triple *const sigoid_srt_xref[] = { &sigoid_srt[5], &sigoid_srt[8], &sigoid_srt[12], + &sigoid_srt[30], + &sigoid_srt[35], &sigoid_srt[6], &sigoid_srt[10], &sigoid_srt[11], &sigoid_srt[13], &sigoid_srt[24], &sigoid_srt[20], + &sigoid_srt[32], + &sigoid_srt[37], &sigoid_srt[14], &sigoid_srt[21], + &sigoid_srt[33], + &sigoid_srt[38], &sigoid_srt[15], &sigoid_srt[22], + &sigoid_srt[34], + &sigoid_srt[39], &sigoid_srt[16], &sigoid_srt[23], &sigoid_srt[19], + &sigoid_srt[31], + &sigoid_srt[36], &sigoid_srt[25], &sigoid_srt[26], &sigoid_srt[27], diff --git a/drivers/builtin_openssl2/crypto/objects/obj_xref.txt b/drivers/builtin_openssl2/crypto/objects/obj_xref.txt index cb917182ee..19c94226b2 100644 --- a/drivers/builtin_openssl2/crypto/objects/obj_xref.txt +++ b/drivers/builtin_openssl2/crypto/objects/obj_xref.txt @@ -44,3 +44,15 @@ id_GostR3411_94_with_GostR3410_2001 id_GostR3411_94 id_GostR3410_2001 id_GostR3411_94_with_GostR3410_94 id_GostR3411_94 id_GostR3410_94 id_GostR3411_94_with_GostR3410_94_cc id_GostR3411_94 id_GostR3410_94_cc id_GostR3411_94_with_GostR3410_2001_cc id_GostR3411_94 id_GostR3410_2001_cc +# ECDH KDFs and their corresponding message digests and schemes +dhSinglePass_stdDH_sha1kdf_scheme sha1 dh_std_kdf +dhSinglePass_stdDH_sha224kdf_scheme sha224 dh_std_kdf +dhSinglePass_stdDH_sha256kdf_scheme sha256 dh_std_kdf +dhSinglePass_stdDH_sha384kdf_scheme sha384 dh_std_kdf +dhSinglePass_stdDH_sha512kdf_scheme sha512 dh_std_kdf + +dhSinglePass_cofactorDH_sha1kdf_scheme sha1 dh_cofactor_kdf +dhSinglePass_cofactorDH_sha224kdf_scheme sha224 dh_cofactor_kdf +dhSinglePass_cofactorDH_sha256kdf_scheme sha256 dh_cofactor_kdf +dhSinglePass_cofactorDH_sha384kdf_scheme sha384 dh_cofactor_kdf +dhSinglePass_cofactorDH_sha512kdf_scheme sha512 dh_cofactor_kdf diff --git a/drivers/builtin_openssl2/crypto/objects/objects.pl b/drivers/builtin_openssl2/crypto/objects/objects.pl deleted file mode 100644 index 389dc34837..0000000000 --- a/drivers/builtin_openssl2/crypto/objects/objects.pl +++ /dev/null @@ -1,240 +0,0 @@ -#!/usr/local/bin/perl - -open (NUMIN,"$ARGV[1]") || die "Can't open number file $ARGV[1]"; -$max_nid=0; -$o=0; -while() - { - chop; - $o++; - s/#.*$//; - next if /^\s*$/; - $_ = 'X'.$_; - ($Cname,$mynum) = split; - $Cname =~ s/^X//; - if (defined($nidn{$mynum})) - { die "$ARGV[1]:$o:There's already an object with NID ",$mynum," on line ",$order{$mynum},"\n"; } - if (defined($nid{$Cname})) - { die "$ARGV[1]:$o:There's already an object with name ",$Cname," on line ",$order{$nid{$Cname}},"\n"; } - $nid{$Cname} = $mynum; - $nidn{$mynum} = $Cname; - $order{$mynum} = $o; - $max_nid = $mynum if $mynum > $max_nid; - } -close NUMIN; - -open (IN,"$ARGV[0]") || die "Can't open input file $ARGV[0]"; -$Cname=""; -$o=0; -while () - { - chop; - $o++; - if (/^!module\s+(.*)$/) - { - $module = $1."-"; - $module =~ s/\./_/g; - $module =~ s/-/_/g; - } - if (/^!global$/) - { $module = ""; } - if (/^!Cname\s+(.*)$/) - { $Cname = $1; } - if (/^!Alias\s+(.+?)\s+(.*)$/) - { - $Cname = $module.$1; - $myoid = $2; - $myoid = &process_oid($myoid); - $Cname =~ s/-/_/g; - $ordern{$o} = $Cname; - $order{$Cname} = $o; - $obj{$Cname} = $myoid; - $_ = ""; - $Cname = ""; - } - s/!.*$//; - s/#.*$//; - next if /^\s*$/; - ($myoid,$mysn,$myln) = split ':'; - $mysn =~ s/^\s*//; - $mysn =~ s/\s*$//; - $myln =~ s/^\s*//; - $myln =~ s/\s*$//; - $myoid =~ s/^\s*//; - $myoid =~ s/\s*$//; - if ($myoid ne "") - { - $myoid = &process_oid($myoid); - } - - if ($Cname eq "" && ($myln =~ /^[_A-Za-z][\w.-]*$/ )) - { - $Cname = $myln; - $Cname =~ s/\./_/g; - $Cname =~ s/-/_/g; - if ($Cname ne "" && defined($ln{$module.$Cname})) - { die "objects.txt:$o:There's already an object with long name ",$ln{$module.$Cname}," on line ",$order{$module.$Cname},"\n"; } - } - if ($Cname eq "") - { - $Cname = $mysn; - $Cname =~ s/-/_/g; - if ($Cname ne "" && defined($sn{$module.$Cname})) - { die "objects.txt:$o:There's already an object with short name ",$sn{$module.$Cname}," on line ",$order{$module.$Cname},"\n"; } - } - if ($Cname eq "") - { - $Cname = $myln; - $Cname =~ s/-/_/g; - $Cname =~ s/\./_/g; - $Cname =~ s/ /_/g; - if ($Cname ne "" && defined($ln{$module.$Cname})) - { die "objects.txt:$o:There's already an object with long name ",$ln{$module.$Cname}," on line ",$order{$module.$Cname},"\n"; } - } - $Cname =~ s/\./_/g; - $Cname =~ s/-/_/g; - $Cname = $module.$Cname; - $ordern{$o} = $Cname; - $order{$Cname} = $o; - $sn{$Cname} = $mysn; - $ln{$Cname} = $myln; - $obj{$Cname} = $myoid; - if (!defined($nid{$Cname})) - { - $max_nid++; - $nid{$Cname} = $max_nid; - $nidn{$max_nid} = $Cname; -print STDERR "Added OID $Cname\n"; - } - $Cname=""; - } -close IN; - -open (NUMOUT,">$ARGV[1]") || die "Can't open output file $ARGV[1]"; -foreach (sort { $a <=> $b } keys %nidn) - { - print NUMOUT $nidn{$_},"\t\t",$_,"\n"; - } -close NUMOUT; - -open (OUT,">$ARGV[2]") || die "Can't open output file $ARGV[2]"; -print OUT <<'EOF'; -/* crypto/objects/obj_mac.h */ - -/* - * THIS FILE IS GENERATED FROM objects.txt by objects.pl via the following - * command: perl objects.pl objects.txt obj_mac.num obj_mac.h - */ - -/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#define SN_undef "UNDEF" -#define LN_undef "undefined" -#define NID_undef 0 -#define OBJ_undef 0L -EOF - -sub expand - { - my $string = shift; - - 1 while $string =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e; - - return $string; - } - -foreach (sort { $a <=> $b } keys %ordern) - { - $Cname=$ordern{$_}; - print OUT "\n"; - print OUT expand("#define SN_$Cname\t\t\"$sn{$Cname}\"\n") if $sn{$Cname} ne ""; - print OUT expand("#define LN_$Cname\t\t\"$ln{$Cname}\"\n") if $ln{$Cname} ne ""; - print OUT expand("#define NID_$Cname\t\t$nid{$Cname}\n") if $nid{$Cname} ne ""; - print OUT expand("#define OBJ_$Cname\t\t$obj{$Cname}\n") if $obj{$Cname} ne ""; - } - -close OUT; - -sub process_oid - { - local($oid)=@_; - local(@a,$oid_pref); - - @a = split(/\s+/,$myoid); - $pref_oid = ""; - $pref_sep = ""; - if (!($a[0] =~ /^[0-9]+$/)) - { - $a[0] =~ s/-/_/g; - if (!defined($obj{$a[0]})) - { die "$ARGV[0]:$o:Undefined identifier ",$a[0],"\n"; } - $pref_oid = "OBJ_" . $a[0]; - $pref_sep = ","; - shift @a; - } - $oids = join('L,',@a) . "L"; - if ($oids ne "L") - { - $oids = $pref_oid . $pref_sep . $oids; - } - else - { - $oids = $pref_oid; - } - return($oids); - } diff --git a/drivers/builtin_openssl2/crypto/objects/objects.txt b/drivers/builtin_openssl2/crypto/objects/objects.txt index d3bfad72a2..b57aabb226 100644 --- a/drivers/builtin_openssl2/crypto/objects/objects.txt +++ b/drivers/builtin_openssl2/crypto/objects/objects.txt @@ -168,6 +168,7 @@ pkcs1 5 : RSA-SHA1 : sha1WithRSAEncryption # According to PKCS #1 version 2.1 pkcs1 7 : RSAES-OAEP : rsaesOaep pkcs1 8 : MGF1 : mgf1 +pkcs1 9 : PSPECIFIED : pSpecified pkcs1 10 : RSASSA-PSS : rsassaPss pkcs1 11 : RSA-SHA256 : sha256WithRSAEncryption @@ -1290,3 +1291,60 @@ kisa 1 6 : SEED-OFB : seed-ofb : AES-128-CBC-HMAC-SHA1 : aes-128-cbc-hmac-sha1 : AES-192-CBC-HMAC-SHA1 : aes-192-cbc-hmac-sha1 : AES-256-CBC-HMAC-SHA1 : aes-256-cbc-hmac-sha1 + : AES-128-CBC-HMAC-SHA256 : aes-128-cbc-hmac-sha256 + : AES-192-CBC-HMAC-SHA256 : aes-192-cbc-hmac-sha256 + : AES-256-CBC-HMAC-SHA256 : aes-256-cbc-hmac-sha256 + +ISO-US 10046 2 1 : dhpublicnumber : X9.42 DH + +# RFC 5639 curve OIDs (see http://www.ietf.org/rfc/rfc5639.txt) +# versionOne OBJECT IDENTIFIER ::= { +# iso(1) identifified-organization(3) teletrust(36) algorithm(3) +# signature-algorithm(3) ecSign(2) ecStdCurvesAndGeneration(8) +# ellipticCurve(1) 1 } +1 3 36 3 3 2 8 1 1 1 : brainpoolP160r1 +1 3 36 3 3 2 8 1 1 2 : brainpoolP160t1 +1 3 36 3 3 2 8 1 1 3 : brainpoolP192r1 +1 3 36 3 3 2 8 1 1 4 : brainpoolP192t1 +1 3 36 3 3 2 8 1 1 5 : brainpoolP224r1 +1 3 36 3 3 2 8 1 1 6 : brainpoolP224t1 +1 3 36 3 3 2 8 1 1 7 : brainpoolP256r1 +1 3 36 3 3 2 8 1 1 8 : brainpoolP256t1 +1 3 36 3 3 2 8 1 1 9 : brainpoolP320r1 +1 3 36 3 3 2 8 1 1 10 : brainpoolP320t1 +1 3 36 3 3 2 8 1 1 11 : brainpoolP384r1 +1 3 36 3 3 2 8 1 1 12 : brainpoolP384t1 +1 3 36 3 3 2 8 1 1 13 : brainpoolP512r1 +1 3 36 3 3 2 8 1 1 14 : brainpoolP512t1 + +# ECDH schemes from RFC5753 +!Alias x9-63-scheme 1 3 133 16 840 63 0 +!Alias secg-scheme certicom-arc 1 + +x9-63-scheme 2 : dhSinglePass-stdDH-sha1kdf-scheme +secg-scheme 11 0 : dhSinglePass-stdDH-sha224kdf-scheme +secg-scheme 11 1 : dhSinglePass-stdDH-sha256kdf-scheme +secg-scheme 11 2 : dhSinglePass-stdDH-sha384kdf-scheme +secg-scheme 11 3 : dhSinglePass-stdDH-sha512kdf-scheme + +x9-63-scheme 3 : dhSinglePass-cofactorDH-sha1kdf-scheme +secg-scheme 14 0 : dhSinglePass-cofactorDH-sha224kdf-scheme +secg-scheme 14 1 : dhSinglePass-cofactorDH-sha256kdf-scheme +secg-scheme 14 2 : dhSinglePass-cofactorDH-sha384kdf-scheme +secg-scheme 14 3 : dhSinglePass-cofactorDH-sha512kdf-scheme +# NIDs for use with lookup tables. + : dh-std-kdf + : dh-cofactor-kdf + +# RFC 6962 Extension OIDs (see http://www.ietf.org/rfc/rfc6962.txt) +1 3 6 1 4 1 11129 2 4 2 : ct_precert_scts : CT Precertificate SCTs +1 3 6 1 4 1 11129 2 4 3 : ct_precert_poison : CT Precertificate Poison +1 3 6 1 4 1 11129 2 4 4 : ct_precert_signer : CT Precertificate Signer +1 3 6 1 4 1 11129 2 4 5 : ct_cert_scts : CT Certificate SCTs + +# CABForum EV SSL Certificate Guidelines +# (see https://cabforum.org/extended-validation/) +# OIDs for Subject Jurisdiction of Incorporation or Registration +1 3 6 1 4 1 311 60 2 1 1 : jurisdictionL : jurisdictionLocalityName +1 3 6 1 4 1 311 60 2 1 2 : jurisdictionST : jurisdictionStateOrProvinceName +1 3 6 1 4 1 311 60 2 1 3 : jurisdictionC : jurisdictionCountryName diff --git a/drivers/builtin_openssl2/crypto/objects/objxref.pl b/drivers/builtin_openssl2/crypto/objects/objxref.pl deleted file mode 100644 index 35c06514b9..0000000000 --- a/drivers/builtin_openssl2/crypto/objects/objxref.pl +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/local/bin/perl - -use strict; - -my %xref_tbl; -my %oid_tbl; - -my ($mac_file, $xref_file) = @ARGV; - -open(IN, $mac_file) || die "Can't open $mac_file"; - -# Read in OID nid values for a lookup table. - -while () - { - chomp; - my ($name, $num) = /^(\S+)\s+(\S+)$/; - $oid_tbl{$name} = $num; - } -close IN; - -open(IN, $xref_file) || die "Can't open $xref_file"; - -my $ln = 1; - -while () - { - chomp; - s/#.*$//; - next if (/^\S*$/); - my ($xr, $p1, $p2) = /^(\S+)\s+(\S+)\s+(\S+)/; - check_oid($xr); - check_oid($p1); - check_oid($p2); - $xref_tbl{$xr} = [$p1, $p2, $ln]; - } - -my @xrkeys = keys %xref_tbl; - -my @srt1 = sort { $oid_tbl{$a} <=> $oid_tbl{$b}} @xrkeys; - -for(my $i = 0; $i <= $#srt1; $i++) - { - $xref_tbl{$srt1[$i]}[2] = $i; - } - -my @srt2 = sort - { - my$ap1 = $oid_tbl{$xref_tbl{$a}[0]}; - my$bp1 = $oid_tbl{$xref_tbl{$b}[0]}; - return $ap1 - $bp1 if ($ap1 != $bp1); - my$ap2 = $oid_tbl{$xref_tbl{$a}[1]}; - my$bp2 = $oid_tbl{$xref_tbl{$b}[1]}; - - return $ap2 - $bp2; - } @xrkeys; - -my $pname = $0; - -$pname =~ s|^.[^/]/||; - -print <state = OHS_ERROR; + rctx->max_resp_len = OCSP_MAX_RESP_LENGTH; + rctx->mem = BIO_new(BIO_s_mem()); + rctx->io = io; + rctx->asn1_len = 0; + if (maxline > 0) + rctx->iobuflen = maxline; + else + rctx->iobuflen = OCSP_MAX_LINE_LEN; + rctx->iobuf = OPENSSL_malloc(rctx->iobuflen); + if (!rctx->iobuf || !rctx->mem) { + OCSP_REQ_CTX_free(rctx); + return NULL; + } + return rctx; +} + void OCSP_REQ_CTX_free(OCSP_REQ_CTX *rctx) { if (rctx->mem) @@ -118,20 +146,71 @@ void OCSP_REQ_CTX_free(OCSP_REQ_CTX *rctx) OPENSSL_free(rctx); } -int OCSP_REQ_CTX_set1_req(OCSP_REQ_CTX *rctx, OCSP_REQUEST *req) +BIO *OCSP_REQ_CTX_get0_mem_bio(OCSP_REQ_CTX *rctx) +{ + return rctx->mem; +} + +void OCSP_set_max_response_length(OCSP_REQ_CTX *rctx, unsigned long len) +{ + if (len == 0) + rctx->max_resp_len = OCSP_MAX_RESP_LENGTH; + else + rctx->max_resp_len = len; +} + +int OCSP_REQ_CTX_i2d(OCSP_REQ_CTX *rctx, const ASN1_ITEM *it, ASN1_VALUE *val) { static const char req_hdr[] = "Content-Type: application/ocsp-request\r\n" "Content-Length: %d\r\n\r\n"; - if (BIO_printf(rctx->mem, req_hdr, i2d_OCSP_REQUEST(req, NULL)) <= 0) + int reqlen = ASN1_item_i2d(val, NULL, it); + if (BIO_printf(rctx->mem, req_hdr, reqlen) <= 0) + return 0; + if (ASN1_item_i2d_bio(it, rctx->mem, val) <= 0) + return 0; + rctx->state = OHS_ASN1_WRITE_INIT; + return 1; +} + +int OCSP_REQ_CTX_nbio_d2i(OCSP_REQ_CTX *rctx, + ASN1_VALUE **pval, const ASN1_ITEM *it) +{ + int rv, len; + const unsigned char *p; + + rv = OCSP_REQ_CTX_nbio(rctx); + if (rv != 1) + return rv; + + len = BIO_get_mem_data(rctx->mem, &p); + *pval = ASN1_item_d2i(NULL, &p, len, it); + if (*pval == NULL) { + rctx->state = OHS_ERROR; return 0; - if (i2d_OCSP_REQUEST_bio(rctx->mem, req) <= 0) + } + return 1; +} + +int OCSP_REQ_CTX_http(OCSP_REQ_CTX *rctx, const char *op, const char *path) +{ + static const char http_hdr[] = "%s %s HTTP/1.0\r\n"; + + if (!path) + path = "/"; + + if (BIO_printf(rctx->mem, http_hdr, op, path) <= 0) return 0; - rctx->state = OHS_ASN1_WRITE; - rctx->asn1_len = BIO_get_mem_data(rctx->mem, NULL); + rctx->state = OHS_HTTP_HEADER; return 1; } +int OCSP_REQ_CTX_set1_req(OCSP_REQ_CTX *rctx, OCSP_REQUEST *req) +{ + return OCSP_REQ_CTX_i2d(rctx, ASN1_ITEM_rptr(OCSP_REQUEST), + (ASN1_VALUE *)req); +} + int OCSP_REQ_CTX_add1_header(OCSP_REQ_CTX *rctx, const char *name, const char *value) { @@ -147,39 +226,27 @@ int OCSP_REQ_CTX_add1_header(OCSP_REQ_CTX *rctx, } if (BIO_write(rctx->mem, "\r\n", 2) != 2) return 0; + rctx->state = OHS_HTTP_HEADER; return 1; } -OCSP_REQ_CTX *OCSP_sendreq_new(BIO *io, char *path, OCSP_REQUEST *req, +OCSP_REQ_CTX *OCSP_sendreq_new(BIO *io, const char *path, OCSP_REQUEST *req, int maxline) { - static const char post_hdr[] = "POST %s HTTP/1.0\r\n"; - OCSP_REQ_CTX *rctx; - rctx = OPENSSL_malloc(sizeof(OCSP_REQ_CTX)); + OCSP_REQ_CTX *rctx = NULL; + rctx = OCSP_REQ_CTX_new(io, maxline); if (!rctx) return NULL; - rctx->state = OHS_ERROR; - rctx->mem = BIO_new(BIO_s_mem()); - rctx->io = io; - rctx->asn1_len = 0; - if (maxline > 0) - rctx->iobuflen = maxline; - else - rctx->iobuflen = OCSP_MAX_LINE_LEN; - rctx->iobuf = OPENSSL_malloc(rctx->iobuflen); - if (!rctx->mem || !rctx->iobuf) - goto err; - if (!path) - path = "/"; - if (BIO_printf(rctx->mem, post_hdr, path) <= 0) + if (!OCSP_REQ_CTX_http(rctx, "POST", path)) goto err; if (req && !OCSP_REQ_CTX_set1_req(rctx, req)) goto err; return rctx; + err: OCSP_REQ_CTX_free(rctx); return NULL; @@ -256,7 +323,7 @@ static int parse_http_line1(char *line) } -int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx) +int OCSP_REQ_CTX_nbio(OCSP_REQ_CTX *rctx) { int i, n; const unsigned char *p; @@ -277,6 +344,17 @@ int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx) } switch (rctx->state) { + case OHS_HTTP_HEADER: + /* Last operation was adding headers: need a final \r\n */ + if (BIO_write(rctx->mem, "\r\n", 2) != 2) { + rctx->state = OHS_ERROR; + return 0; + } + rctx->state = OHS_ASN1_WRITE_INIT; + + case OHS_ASN1_WRITE_INIT: + rctx->asn1_len = BIO_get_mem_data(rctx->mem, NULL); + rctx->state = OHS_ASN1_WRITE; case OHS_ASN1_WRITE: n = BIO_get_mem_data(rctx->mem, &p); @@ -412,7 +490,7 @@ int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx) rctx->asn1_len |= *p++; } - if (rctx->asn1_len > OCSP_MAX_REQUEST_LENGTH) { + if (rctx->asn1_len > rctx->max_resp_len) { rctx->state = OHS_ERROR; return 0; } @@ -426,18 +504,12 @@ int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx) /* Fall thru */ case OHS_ASN1_CONTENT: - n = BIO_get_mem_data(rctx->mem, &p); + n = BIO_get_mem_data(rctx->mem, NULL); if (n < (int)rctx->asn1_len) goto next_io; - *presp = d2i_OCSP_RESPONSE(NULL, &p, rctx->asn1_len); - if (*presp) { - rctx->state = OHS_DONE; - return 1; - } - - rctx->state = OHS_ERROR; - return 0; + rctx->state = OHS_DONE; + return 1; break; @@ -450,9 +522,16 @@ int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx) } +int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx) +{ + return OCSP_REQ_CTX_nbio_d2i(rctx, + (ASN1_VALUE **)presp, + ASN1_ITEM_rptr(OCSP_RESPONSE)); +} + /* Blocking OCSP request handler: now a special case of non-blocking I/O */ -OCSP_RESPONSE *OCSP_sendreq_bio(BIO *b, char *path, OCSP_REQUEST *req) +OCSP_RESPONSE *OCSP_sendreq_bio(BIO *b, const char *path, OCSP_REQUEST *req) { OCSP_RESPONSE *resp = NULL; OCSP_REQ_CTX *ctx; diff --git a/drivers/builtin_openssl2/crypto/ocsp/ocsp_lib.c b/drivers/builtin_openssl2/crypto/ocsp/ocsp_lib.c index 8db62ba765..cabf53933a 100644 --- a/drivers/builtin_openssl2/crypto/ocsp/ocsp_lib.c +++ b/drivers/builtin_openssl2/crypto/ocsp/ocsp_lib.c @@ -175,7 +175,7 @@ int OCSP_id_cmp(OCSP_CERTID *a, OCSP_CERTID *b) * whether it is SSL. */ -int OCSP_parse_url(char *url, char **phost, char **pport, char **ppath, +int OCSP_parse_url(const char *url, char **phost, char **pport, char **ppath, int *pssl) { char *p, *buf; diff --git a/drivers/builtin_openssl2/crypto/opensslconf.h.bak b/drivers/builtin_openssl2/crypto/opensslconf.h.bak deleted file mode 100644 index b18f4da496..0000000000 --- a/drivers/builtin_openssl2/crypto/opensslconf.h.bak +++ /dev/null @@ -1,229 +0,0 @@ -/* opensslconf.h */ -/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ - -/* OpenSSL was configured with the following options: */ -#ifndef OPENSSL_DOING_MAKEDEPEND - - -#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 -# define OPENSSL_NO_EC_NISTP_64_GCC_128 -#endif -#ifndef OPENSSL_NO_GMP -# define OPENSSL_NO_GMP -#endif -#ifndef OPENSSL_NO_JPAKE -# define OPENSSL_NO_JPAKE -#endif -#ifndef OPENSSL_NO_KRB5 -# define OPENSSL_NO_KRB5 -#endif -#ifndef OPENSSL_NO_MD2 -# define OPENSSL_NO_MD2 -#endif -#ifndef OPENSSL_NO_RC5 -# define OPENSSL_NO_RC5 -#endif -#ifndef OPENSSL_NO_RFC3779 -# define OPENSSL_NO_RFC3779 -#endif -#ifndef OPENSSL_NO_SCTP -# define OPENSSL_NO_SCTP -#endif -#ifndef OPENSSL_NO_STORE -# define OPENSSL_NO_STORE -#endif - -#endif /* OPENSSL_DOING_MAKEDEPEND */ - -#ifndef OPENSSL_NO_DYNAMIC_ENGINE -# define OPENSSL_NO_DYNAMIC_ENGINE -#endif - -/* The OPENSSL_NO_* macros are also defined as NO_* if the application - asks for it. This is a transient feature that is provided for those - who haven't had the time to do the appropriate changes in their - applications. */ -#ifdef OPENSSL_ALGORITHM_DEFINES -# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) -# define NO_EC_NISTP_64_GCC_128 -# endif -# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) -# define NO_GMP -# endif -# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) -# define NO_JPAKE -# endif -# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) -# define NO_KRB5 -# endif -# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) -# define NO_MD2 -# endif -# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) -# define NO_RC5 -# endif -# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) -# define NO_RFC3779 -# endif -# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) -# define NO_SCTP -# endif -# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) -# define NO_STORE -# endif -#endif - -/* crypto/opensslconf.h.in */ - -/* Generate 80386 code? */ -#undef I386_ONLY - -#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ -#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) -#define ENGINESDIR "/usr/local/ssl/lib/engines" -#define OPENSSLDIR "/usr/local/ssl" -#endif -#endif - -#undef OPENSSL_UNISTD -#define OPENSSL_UNISTD - -#undef OPENSSL_EXPORT_VAR_AS_FUNCTION - -#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) -#define IDEA_INT unsigned int -#endif - -#if defined(HEADER_MD2_H) && !defined(MD2_INT) -#define MD2_INT unsigned int -#endif - -#if defined(HEADER_RC2_H) && !defined(RC2_INT) -/* I need to put in a mod for the alpha - eay */ -#define RC2_INT unsigned int -#endif - -#if defined(HEADER_RC4_H) -#if !defined(RC4_INT) -/* using int types make the structure larger but make the code faster - * on most boxes I have tested - up to %20 faster. */ -/* - * I don't know what does "most" mean, but declaring "int" is a must on: - * - Intel P6 because partial register stalls are very expensive; - * - elder Alpha because it lacks byte load/store instructions; - */ -#define RC4_INT unsigned int -#endif -#if !defined(RC4_CHUNK) -/* - * This enables code handling data aligned at natural CPU word - * boundary. See crypto/rc4/rc4_enc.c for further details. - */ -#undef RC4_CHUNK -#endif -#endif - -#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) -/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a - * %20 speed up (longs are 8 bytes, int's are 4). */ -#ifndef DES_LONG -#define DES_LONG unsigned long -#endif -#endif - -#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) -#define CONFIG_HEADER_BN_H -#undef BN_LLONG - -/* Should we define BN_DIV2W here? */ - -/* Only one for the following should be defined */ -#undef SIXTY_FOUR_BIT_LONG -#undef SIXTY_FOUR_BIT -#define THIRTY_TWO_BIT -#endif - -#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) -#define CONFIG_HEADER_RC4_LOCL_H -/* if this is defined data[i] is used instead of *data, this is a %20 - * speedup on x86 */ -#undef RC4_INDEX -#endif - -#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) -#define CONFIG_HEADER_BF_LOCL_H -#undef BF_PTR -#endif /* HEADER_BF_LOCL_H */ - -#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) -#define CONFIG_HEADER_DES_LOCL_H -#ifndef DES_DEFAULT_OPTIONS -/* the following is tweaked from a config script, that is why it is a - * protected undef/define */ -#ifndef DES_PTR -#undef DES_PTR -#endif - -/* This helps C compiler generate the correct code for multiple functional - * units. It reduces register dependancies at the expense of 2 more - * registers */ -#ifndef DES_RISC1 -#undef DES_RISC1 -#endif - -#ifndef DES_RISC2 -#undef DES_RISC2 -#endif - -#if defined(DES_RISC1) && defined(DES_RISC2) -YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! -#endif - -/* Unroll the inner loop, this sometimes helps, sometimes hinders. - * Very mucy CPU dependant */ -#ifndef DES_UNROLL -#undef DES_UNROLL -#endif - -/* These default values were supplied by - * Peter Gutman - * They are only used if nothing else has been defined */ -#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) -/* Special defines which change the way the code is built depending on the - CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find - even newer MIPS CPU's, but at the moment one size fits all for - optimization options. Older Sparc's work better with only UNROLL, but - there's no way to tell at compile time what it is you're running on */ - -#if defined( sun ) /* Newer Sparc's */ -# define DES_PTR -# define DES_RISC1 -# define DES_UNROLL -#elif defined( __ultrix ) /* Older MIPS */ -# define DES_PTR -# define DES_RISC2 -# define DES_UNROLL -#elif defined( __osf1__ ) /* Alpha */ -# define DES_PTR -# define DES_RISC2 -#elif defined ( _AIX ) /* RS6000 */ - /* Unknown */ -#elif defined( __hpux ) /* HP-PA */ - /* Unknown */ -#elif defined( __aux ) /* 68K */ - /* Unknown */ -#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ -# define DES_UNROLL -#elif defined( __sgi ) /* Newer MIPS */ -# define DES_PTR -# define DES_RISC2 -# define DES_UNROLL -#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ -# define DES_PTR -# define DES_RISC1 -# define DES_UNROLL -#endif /* Systems-specific speed defines */ -#endif - -#endif /* DES_DEFAULT_OPTIONS */ -#endif /* HEADER_DES_LOCL_H */ diff --git a/drivers/builtin_openssl2/crypto/pariscid.pl b/drivers/builtin_openssl2/crypto/pariscid.pl deleted file mode 100644 index bfc56fdc7f..0000000000 --- a/drivers/builtin_openssl2/crypto/pariscid.pl +++ /dev/null @@ -1,225 +0,0 @@ -#!/usr/bin/env perl - -$flavour = shift; -$output = shift; -open STDOUT,">$output"; - -if ($flavour =~ /64/) { - $LEVEL ="2.0W"; - $SIZE_T =8; - $ST ="std"; -} else { - $LEVEL ="1.1"; - $SIZE_T =4; - $ST ="stw"; -} - -$rp="%r2"; -$sp="%r30"; -$rv="%r28"; - -$code=<<___; - .LEVEL $LEVEL - .SPACE \$TEXT\$ - .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY - - .EXPORT OPENSSL_cpuid_setup,ENTRY - .ALIGN 8 -OPENSSL_cpuid_setup - .PROC - .CALLINFO NO_CALLS - .ENTRY - bv ($rp) - .EXIT - nop - .PROCEND - - .EXPORT OPENSSL_rdtsc,ENTRY - .ALIGN 8 -OPENSSL_rdtsc - .PROC - .CALLINFO NO_CALLS - .ENTRY - mfctl %cr16,$rv - bv ($rp) - .EXIT - nop - .PROCEND - - .EXPORT OPENSSL_wipe_cpu,ENTRY - .ALIGN 8 -OPENSSL_wipe_cpu - .PROC - .CALLINFO NO_CALLS - .ENTRY - xor %r0,%r0,%r1 - fcpy,dbl %fr0,%fr4 - xor %r0,%r0,%r19 - fcpy,dbl %fr0,%fr5 - xor %r0,%r0,%r20 - fcpy,dbl %fr0,%fr6 - xor %r0,%r0,%r21 - fcpy,dbl %fr0,%fr7 - xor %r0,%r0,%r22 - fcpy,dbl %fr0,%fr8 - xor %r0,%r0,%r23 - fcpy,dbl %fr0,%fr9 - xor %r0,%r0,%r24 - fcpy,dbl %fr0,%fr10 - xor %r0,%r0,%r25 - fcpy,dbl %fr0,%fr11 - xor %r0,%r0,%r26 - fcpy,dbl %fr0,%fr22 - xor %r0,%r0,%r29 - fcpy,dbl %fr0,%fr23 - xor %r0,%r0,%r31 - fcpy,dbl %fr0,%fr24 - fcpy,dbl %fr0,%fr25 - fcpy,dbl %fr0,%fr26 - fcpy,dbl %fr0,%fr27 - fcpy,dbl %fr0,%fr28 - fcpy,dbl %fr0,%fr29 - fcpy,dbl %fr0,%fr30 - fcpy,dbl %fr0,%fr31 - bv ($rp) - .EXIT - ldo 0($sp),$rv - .PROCEND -___ -{ -my $inp="%r26"; -my $len="%r25"; - -$code.=<<___; - .EXPORT OPENSSL_cleanse,ENTRY,ARGW0=GR,ARGW1=GR - .ALIGN 8 -OPENSSL_cleanse - .PROC - .CALLINFO NO_CALLS - .ENTRY - cmpib,*= 0,$len,L\$done - nop - cmpib,*>>= 15,$len,L\$ittle - ldi $SIZE_T-1,%r1 - -L\$align - and,*<> $inp,%r1,%r28 - b,n L\$aligned - stb %r0,0($inp) - ldo -1($len),$len - b L\$align - ldo 1($inp),$inp - -L\$aligned - andcm $len,%r1,%r28 -L\$ot - $ST %r0,0($inp) - addib,*<> -$SIZE_T,%r28,L\$ot - ldo $SIZE_T($inp),$inp - - and,*<> $len,%r1,$len - b,n L\$done -L\$ittle - stb %r0,0($inp) - addib,*<> -1,$len,L\$ittle - ldo 1($inp),$inp -L\$done - bv ($rp) - .EXIT - nop - .PROCEND -___ -} -{ -my ($out,$cnt,$max)=("%r26","%r25","%r24"); -my ($tick,$lasttick)=("%r23","%r22"); -my ($diff,$lastdiff)=("%r21","%r20"); - -$code.=<<___; - .EXPORT OPENSSL_instrument_bus,ENTRY,ARGW0=GR,ARGW1=GR - .ALIGN 8 -OPENSSL_instrument_bus - .PROC - .CALLINFO NO_CALLS - .ENTRY - copy $cnt,$rv - mfctl %cr16,$tick - copy $tick,$lasttick - ldi 0,$diff - - fdc 0($out) - ldw 0($out),$tick - add $diff,$tick,$tick - stw $tick,0($out) -L\$oop - mfctl %cr16,$tick - sub $tick,$lasttick,$diff - copy $tick,$lasttick - - fdc 0($out) - ldw 0($out),$tick - add $diff,$tick,$tick - stw $tick,0($out) - - addib,<> -1,$cnt,L\$oop - addi 4,$out,$out - - bv ($rp) - .EXIT - sub $rv,$cnt,$rv - .PROCEND - - .EXPORT OPENSSL_instrument_bus2,ENTRY,ARGW0=GR,ARGW1=GR - .ALIGN 8 -OPENSSL_instrument_bus2 - .PROC - .CALLINFO NO_CALLS - .ENTRY - copy $cnt,$rv - sub %r0,$cnt,$cnt - - mfctl %cr16,$tick - copy $tick,$lasttick - ldi 0,$diff - - fdc 0($out) - ldw 0($out),$tick - add $diff,$tick,$tick - stw $tick,0($out) - - mfctl %cr16,$tick - sub $tick,$lasttick,$diff - copy $tick,$lasttick -L\$oop2 - copy $diff,$lastdiff - fdc 0($out) - ldw 0($out),$tick - add $diff,$tick,$tick - stw $tick,0($out) - - addib,= -1,$max,L\$done2 - nop - - mfctl %cr16,$tick - sub $tick,$lasttick,$diff - copy $tick,$lasttick - cmpclr,<> $lastdiff,$diff,$tick - ldi 1,$tick - - ldi 1,%r1 - xor %r1,$tick,$tick - addb,<> $tick,$cnt,L\$oop2 - shladd,l $tick,2,$out,$out -L\$done2 - bv ($rp) - .EXIT - add $rv,$cnt,$rv - .PROCEND -___ -} -$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4); -$code =~ s/,\*/,/gm if ($SIZE_T==4); -$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8); -print $code; -close STDOUT; - diff --git a/drivers/builtin_openssl2/crypto/pem/pem_all.c b/drivers/builtin_openssl2/crypto/pem/pem_all.c index 64b8ba7c25..0e5be63ef0 100644 --- a/drivers/builtin_openssl2/crypto/pem/pem_all.c +++ b/drivers/builtin_openssl2/crypto/pem/pem_all.c @@ -421,6 +421,7 @@ EC_KEY *PEM_read_ECPrivateKey(FILE *fp, EC_KEY **eckey, pem_password_cb *cb, #ifndef OPENSSL_NO_DH -IMPLEMENT_PEM_rw_const(DHparams, DH, PEM_STRING_DHPARAMS, DHparams) +IMPLEMENT_PEM_write_const(DHparams, DH, PEM_STRING_DHPARAMS, DHparams) + IMPLEMENT_PEM_write_const(DHxparams, DH, PEM_STRING_DHXPARAMS, DHxparams) #endif - IMPLEMENT_PEM_rw(PUBKEY, EVP_PKEY, PEM_STRING_PUBLIC, PUBKEY) +IMPLEMENT_PEM_rw(PUBKEY, EVP_PKEY, PEM_STRING_PUBLIC, PUBKEY) diff --git a/drivers/builtin_openssl2/crypto/pem/pem_err.c b/drivers/builtin_openssl2/crypto/pem/pem_err.c index 702c5adecb..e1f4fdb432 100644 --- a/drivers/builtin_openssl2/crypto/pem/pem_err.c +++ b/drivers/builtin_openssl2/crypto/pem/pem_err.c @@ -1,6 +1,6 @@ /* crypto/pem/pem_err.c */ /* ==================================================================== - * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -99,8 +99,10 @@ static ERR_STRING_DATA PEM_str_functs[] = { {ERR_FUNC(PEM_F_PEM_PK8PKEY), "PEM_PK8PKEY"}, {ERR_FUNC(PEM_F_PEM_READ), "PEM_read"}, {ERR_FUNC(PEM_F_PEM_READ_BIO), "PEM_read_bio"}, + {ERR_FUNC(PEM_F_PEM_READ_BIO_DHPARAMS), "PEM_READ_BIO_DHPARAMS"}, {ERR_FUNC(PEM_F_PEM_READ_BIO_PARAMETERS), "PEM_read_bio_Parameters"}, {ERR_FUNC(PEM_F_PEM_READ_BIO_PRIVATEKEY), "PEM_READ_BIO_PRIVATEKEY"}, + {ERR_FUNC(PEM_F_PEM_READ_DHPARAMS), "PEM_READ_DHPARAMS"}, {ERR_FUNC(PEM_F_PEM_READ_PRIVATEKEY), "PEM_READ_PRIVATEKEY"}, {ERR_FUNC(PEM_F_PEM_SEALFINAL), "PEM_SealFinal"}, {ERR_FUNC(PEM_F_PEM_SEALINIT), "PEM_SealInit"}, diff --git a/drivers/builtin_openssl2/crypto/pem/pem_lib.c b/drivers/builtin_openssl2/crypto/pem/pem_lib.c index 55071616e2..a29821aab2 100644 --- a/drivers/builtin_openssl2/crypto/pem/pem_lib.c +++ b/drivers/builtin_openssl2/crypto/pem/pem_lib.c @@ -229,6 +229,10 @@ static int check_pem(const char *nm, const char *name) } return 0; } + /* If reading DH parameters handle X9.42 DH format too */ + if (!strcmp(nm, PEM_STRING_DHXPARAMS) && + !strcmp(name, PEM_STRING_DHPARAMS)) + return 1; /* Permit older strings */ @@ -472,8 +476,9 @@ int PEM_do_header(EVP_CIPHER_INFO *cipher, unsigned char *data, long *plen, EVP_CIPHER_CTX_cleanup(&ctx); OPENSSL_cleanse((char *)buf, sizeof(buf)); OPENSSL_cleanse((char *)key, sizeof(key)); - j += i; - if (!o) { + if (o) + j += i; + else { PEMerr(PEM_F_PEM_DO_HEADER, PEM_R_BAD_DECRYPT); return (0); } @@ -574,8 +579,8 @@ static int load_iv(char **fromp, unsigned char *to, int num) } #ifndef OPENSSL_NO_FP_API -int PEM_write(FILE *fp, char *name, char *header, unsigned char *data, - long len) +int PEM_write(FILE *fp, const char *name, const char *header, + const unsigned char *data, long len) { BIO *b; int ret; @@ -591,8 +596,8 @@ int PEM_write(FILE *fp, char *name, char *header, unsigned char *data, } #endif -int PEM_write_bio(BIO *bp, const char *name, char *header, - unsigned char *data, long len) +int PEM_write_bio(BIO *bp, const char *name, const char *header, + const unsigned char *data, long len) { int nlen, n, i, j, outl; unsigned char *buf = NULL; diff --git a/drivers/builtin_openssl2/crypto/pem/pem_pkey.c b/drivers/builtin_openssl2/crypto/pem/pem_pkey.c index 0b05e63fd7..04d6319a22 100644 --- a/drivers/builtin_openssl2/crypto/pem/pem_pkey.c +++ b/drivers/builtin_openssl2/crypto/pem/pem_pkey.c @@ -68,6 +68,9 @@ #ifndef OPENSSL_NO_ENGINE # include #endif +#ifndef OPENSSL_NO_DH +# include +#endif #include "asn1_locl.h" int pem_check_suffix(const char *pem_str, const char *suffix); @@ -241,3 +244,50 @@ int PEM_write_PrivateKey(FILE *fp, EVP_PKEY *x, const EVP_CIPHER *enc, } #endif + +#ifndef OPENSSL_NO_DH + +/* Transparently read in PKCS#3 or X9.42 DH parameters */ + +DH *PEM_read_bio_DHparams(BIO *bp, DH **x, pem_password_cb *cb, void *u) +{ + char *nm = NULL; + const unsigned char *p = NULL; + unsigned char *data = NULL; + long len; + DH *ret = NULL; + + if (!PEM_bytes_read_bio(&data, &len, &nm, PEM_STRING_DHPARAMS, bp, cb, u)) + return NULL; + p = data; + + if (!strcmp(nm, PEM_STRING_DHXPARAMS)) + ret = d2i_DHxparams(x, &p, len); + else + ret = d2i_DHparams(x, &p, len); + + if (ret == NULL) + PEMerr(PEM_F_PEM_READ_BIO_DHPARAMS, ERR_R_ASN1_LIB); + OPENSSL_free(nm); + OPENSSL_free(data); + return ret; +} + +# ifndef OPENSSL_NO_FP_API +DH *PEM_read_DHparams(FILE *fp, DH **x, pem_password_cb *cb, void *u) +{ + BIO *b; + DH *ret; + + if ((b = BIO_new(BIO_s_file())) == NULL) { + PEMerr(PEM_F_PEM_READ_DHPARAMS, ERR_R_BUF_LIB); + return (0); + } + BIO_set_fp(b, fp, BIO_NOCLOSE); + ret = PEM_read_bio_DHparams(b, x, cb, u); + BIO_free(b); + return (ret); +} +# endif + +#endif diff --git a/drivers/builtin_openssl2/crypto/perlasm/cbc.pl b/drivers/builtin_openssl2/crypto/perlasm/cbc.pl deleted file mode 100644 index 24561e759a..0000000000 --- a/drivers/builtin_openssl2/crypto/perlasm/cbc.pl +++ /dev/null @@ -1,349 +0,0 @@ -#!/usr/local/bin/perl - -# void des_ncbc_encrypt(input, output, length, schedule, ivec, enc) -# des_cblock (*input); -# des_cblock (*output); -# long length; -# des_key_schedule schedule; -# des_cblock (*ivec); -# int enc; -# -# calls -# des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT); -# - -#&cbc("des_ncbc_encrypt","des_encrypt",0); -#&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt", -# 1,4,5,3,5,-1); -#&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt", -# 0,4,5,3,5,-1); -#&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3", -# 0,6,7,3,4,5); -# -# When doing a cipher that needs bigendian order, -# for encrypt, the iv is kept in bigendian form, -# while for decrypt, it is kept in little endian. -sub cbc - { - local($name,$enc_func,$dec_func,$swap,$iv_off,$enc_off,$p1,$p2,$p3)=@_; - # name is the function name - # enc_func and dec_func and the functions to call for encrypt/decrypt - # swap is true if byte order needs to be reversed - # iv_off is parameter number for the iv - # enc_off is parameter number for the encrypt/decrypt flag - # p1,p2,p3 are the offsets for parameters to be passed to the - # underlying calls. - - &function_begin_B($name,""); - &comment(""); - - $in="esi"; - $out="edi"; - $count="ebp"; - - &push("ebp"); - &push("ebx"); - &push("esi"); - &push("edi"); - - $data_off=4; - $data_off+=4 if ($p1 > 0); - $data_off+=4 if ($p2 > 0); - $data_off+=4 if ($p3 > 0); - - &mov($count, &wparam(2)); # length - - &comment("getting iv ptr from parameter $iv_off"); - &mov("ebx", &wparam($iv_off)); # Get iv ptr - - &mov($in, &DWP(0,"ebx","",0));# iv[0] - &mov($out, &DWP(4,"ebx","",0));# iv[1] - - &push($out); - &push($in); - &push($out); # used in decrypt for iv[1] - &push($in); # used in decrypt for iv[0] - - &mov("ebx", "esp"); # This is the address of tin[2] - - &mov($in, &wparam(0)); # in - &mov($out, &wparam(1)); # out - - # We have loaded them all, how lets push things - &comment("getting encrypt flag from parameter $enc_off"); - &mov("ecx", &wparam($enc_off)); # Get enc flag - if ($p3 > 0) - { - &comment("get and push parameter $p3"); - if ($enc_off != $p3) - { &mov("eax", &wparam($p3)); &push("eax"); } - else { &push("ecx"); } - } - if ($p2 > 0) - { - &comment("get and push parameter $p2"); - if ($enc_off != $p2) - { &mov("eax", &wparam($p2)); &push("eax"); } - else { &push("ecx"); } - } - if ($p1 > 0) - { - &comment("get and push parameter $p1"); - if ($enc_off != $p1) - { &mov("eax", &wparam($p1)); &push("eax"); } - else { &push("ecx"); } - } - &push("ebx"); # push data/iv - - &cmp("ecx",0); - &jz(&label("decrypt")); - - &and($count,0xfffffff8); - &mov("eax", &DWP($data_off,"esp","",0)); # load iv[0] - &mov("ebx", &DWP($data_off+4,"esp","",0)); # load iv[1] - - &jz(&label("encrypt_finish")); - - ############################################################# - - &set_label("encrypt_loop"); - # encrypt start - # "eax" and "ebx" hold iv (or the last cipher text) - - &mov("ecx", &DWP(0,$in,"",0)); # load first 4 bytes - &mov("edx", &DWP(4,$in,"",0)); # second 4 bytes - - &xor("eax", "ecx"); - &xor("ebx", "edx"); - - &bswap("eax") if $swap; - &bswap("ebx") if $swap; - - &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call - &mov(&DWP($data_off+4,"esp","",0), "ebx"); # - - &call($enc_func); - - &mov("eax", &DWP($data_off,"esp","",0)); - &mov("ebx", &DWP($data_off+4,"esp","",0)); - - &bswap("eax") if $swap; - &bswap("ebx") if $swap; - - &mov(&DWP(0,$out,"",0),"eax"); - &mov(&DWP(4,$out,"",0),"ebx"); - - # eax and ebx are the next iv. - - &add($in, 8); - &add($out, 8); - - &sub($count, 8); - &jnz(&label("encrypt_loop")); - -###################################################################3 - &set_label("encrypt_finish"); - &mov($count, &wparam(2)); # length - &and($count, 7); - &jz(&label("finish")); - &call(&label("PIC_point")); -&set_label("PIC_point"); - &blindpop("edx"); - &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx")); - &mov($count,&DWP(0,"ecx",$count,4)); - &add($count,"edx"); - &xor("ecx","ecx"); - &xor("edx","edx"); - #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4)); - &jmp_ptr($count); - -&set_label("ej7"); - &movb(&HB("edx"), &BP(6,$in,"",0)); - &shl("edx",8); -&set_label("ej6"); - &movb(&HB("edx"), &BP(5,$in,"",0)); -&set_label("ej5"); - &movb(&LB("edx"), &BP(4,$in,"",0)); -&set_label("ej4"); - &mov("ecx", &DWP(0,$in,"",0)); - &jmp(&label("ejend")); -&set_label("ej3"); - &movb(&HB("ecx"), &BP(2,$in,"",0)); - &shl("ecx",8); -&set_label("ej2"); - &movb(&HB("ecx"), &BP(1,$in,"",0)); -&set_label("ej1"); - &movb(&LB("ecx"), &BP(0,$in,"",0)); -&set_label("ejend"); - - &xor("eax", "ecx"); - &xor("ebx", "edx"); - - &bswap("eax") if $swap; - &bswap("ebx") if $swap; - - &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call - &mov(&DWP($data_off+4,"esp","",0), "ebx"); # - - &call($enc_func); - - &mov("eax", &DWP($data_off,"esp","",0)); - &mov("ebx", &DWP($data_off+4,"esp","",0)); - - &bswap("eax") if $swap; - &bswap("ebx") if $swap; - - &mov(&DWP(0,$out,"",0),"eax"); - &mov(&DWP(4,$out,"",0),"ebx"); - - &jmp(&label("finish")); - - ############################################################# - ############################################################# - &set_label("decrypt",1); - # decrypt start - &and($count,0xfffffff8); - # The next 2 instructions are only for if the jz is taken - &mov("eax", &DWP($data_off+8,"esp","",0)); # get iv[0] - &mov("ebx", &DWP($data_off+12,"esp","",0)); # get iv[1] - &jz(&label("decrypt_finish")); - - &set_label("decrypt_loop"); - &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes - &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes - - &bswap("eax") if $swap; - &bswap("ebx") if $swap; - - &mov(&DWP($data_off,"esp","",0), "eax"); # put back - &mov(&DWP($data_off+4,"esp","",0), "ebx"); # - - &call($dec_func); - - &mov("eax", &DWP($data_off,"esp","",0)); # get return - &mov("ebx", &DWP($data_off+4,"esp","",0)); # - - &bswap("eax") if $swap; - &bswap("ebx") if $swap; - - &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0] - &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1] - - &xor("ecx", "eax"); - &xor("edx", "ebx"); - - &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, - &mov("ebx", &DWP(4,$in,"",0)); # next iv actually - - &mov(&DWP(0,$out,"",0),"ecx"); - &mov(&DWP(4,$out,"",0),"edx"); - - &mov(&DWP($data_off+8,"esp","",0), "eax"); # save iv - &mov(&DWP($data_off+12,"esp","",0), "ebx"); # - - &add($in, 8); - &add($out, 8); - - &sub($count, 8); - &jnz(&label("decrypt_loop")); -############################ ENDIT #######################3 - &set_label("decrypt_finish"); - &mov($count, &wparam(2)); # length - &and($count, 7); - &jz(&label("finish")); - - &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes - &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes - - &bswap("eax") if $swap; - &bswap("ebx") if $swap; - - &mov(&DWP($data_off,"esp","",0), "eax"); # put back - &mov(&DWP($data_off+4,"esp","",0), "ebx"); # - - &call($dec_func); - - &mov("eax", &DWP($data_off,"esp","",0)); # get return - &mov("ebx", &DWP($data_off+4,"esp","",0)); # - - &bswap("eax") if $swap; - &bswap("ebx") if $swap; - - &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0] - &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1] - - &xor("ecx", "eax"); - &xor("edx", "ebx"); - - # this is for when we exit - &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, - &mov("ebx", &DWP(4,$in,"",0)); # next iv actually - -&set_label("dj7"); - &rotr("edx", 16); - &movb(&BP(6,$out,"",0), &LB("edx")); - &shr("edx",16); -&set_label("dj6"); - &movb(&BP(5,$out,"",0), &HB("edx")); -&set_label("dj5"); - &movb(&BP(4,$out,"",0), &LB("edx")); -&set_label("dj4"); - &mov(&DWP(0,$out,"",0), "ecx"); - &jmp(&label("djend")); -&set_label("dj3"); - &rotr("ecx", 16); - &movb(&BP(2,$out,"",0), &LB("ecx")); - &shl("ecx",16); -&set_label("dj2"); - &movb(&BP(1,$in,"",0), &HB("ecx")); -&set_label("dj1"); - &movb(&BP(0,$in,"",0), &LB("ecx")); -&set_label("djend"); - - # final iv is still in eax:ebx - &jmp(&label("finish")); - - -############################ FINISH #######################3 - &set_label("finish",1); - &mov("ecx", &wparam($iv_off)); # Get iv ptr - - ################################################# - $total=16+4; - $total+=4 if ($p1 > 0); - $total+=4 if ($p2 > 0); - $total+=4 if ($p3 > 0); - &add("esp",$total); - - &mov(&DWP(0,"ecx","",0), "eax"); # save iv - &mov(&DWP(4,"ecx","",0), "ebx"); # save iv - - &function_end_A($name); - - &align(64); - &set_label("cbc_enc_jmp_table"); - &data_word("0"); - &data_word(&label("ej1")."-".&label("PIC_point")); - &data_word(&label("ej2")."-".&label("PIC_point")); - &data_word(&label("ej3")."-".&label("PIC_point")); - &data_word(&label("ej4")."-".&label("PIC_point")); - &data_word(&label("ej5")."-".&label("PIC_point")); - &data_word(&label("ej6")."-".&label("PIC_point")); - &data_word(&label("ej7")."-".&label("PIC_point")); - # not used - #&set_label("cbc_dec_jmp_table",1); - #&data_word("0"); - #&data_word(&label("dj1")."-".&label("PIC_point")); - #&data_word(&label("dj2")."-".&label("PIC_point")); - #&data_word(&label("dj3")."-".&label("PIC_point")); - #&data_word(&label("dj4")."-".&label("PIC_point")); - #&data_word(&label("dj5")."-".&label("PIC_point")); - #&data_word(&label("dj6")."-".&label("PIC_point")); - #&data_word(&label("dj7")."-".&label("PIC_point")); - &align(64); - - &function_end_B($name); - - } - -1; diff --git a/drivers/builtin_openssl2/crypto/perlasm/ppc-xlate.pl b/drivers/builtin_openssl2/crypto/perlasm/ppc-xlate.pl deleted file mode 100755 index a3edd982b6..0000000000 --- a/drivers/builtin_openssl2/crypto/perlasm/ppc-xlate.pl +++ /dev/null @@ -1,159 +0,0 @@ -#!/usr/bin/env perl - -# PowerPC assembler distiller by . - -my $flavour = shift; -my $output = shift; -open STDOUT,">$output" || die "can't open $output: $!"; - -my %GLOBALS; -my $dotinlocallabels=($flavour=~/linux/)?1:0; - -################################################################ -# directives which need special treatment on different platforms -################################################################ -my $globl = sub { - my $junk = shift; - my $name = shift; - my $global = \$GLOBALS{$name}; - my $ret; - - $name =~ s|^[\.\_]||; - - SWITCH: for ($flavour) { - /aix/ && do { $name = ".$name"; - last; - }; - /osx/ && do { $name = "_$name"; - last; - }; - /linux.*32/ && do { $ret .= ".globl $name\n"; - $ret .= ".type $name,\@function"; - last; - }; - /linux.*64/ && do { $ret .= ".globl $name\n"; - $ret .= ".type $name,\@function\n"; - $ret .= ".section \".opd\",\"aw\"\n"; - $ret .= ".align 3\n"; - $ret .= "$name:\n"; - $ret .= ".quad .$name,.TOC.\@tocbase,0\n"; - $ret .= ".size $name,24\n"; - $ret .= ".previous\n"; - - $name = ".$name"; - last; - }; - } - - $ret = ".globl $name" if (!$ret); - $$global = $name; - $ret; -}; -my $text = sub { - ($flavour =~ /aix/) ? ".csect" : ".text"; -}; -my $machine = sub { - my $junk = shift; - my $arch = shift; - if ($flavour =~ /osx/) - { $arch =~ s/\"//g; - $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); - } - ".machine $arch"; -}; -my $size = sub { - if ($flavour =~ /linux.*32/) - { shift; - ".size " . join(",",@_); - } - else - { ""; } -}; -my $asciz = sub { - shift; - my $line = join(",",@_); - if ($line =~ /^"(.*)"$/) - { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } - else - { ""; } -}; - -################################################################ -# simplified mnemonics not handled by at least one assembler -################################################################ -my $cmplw = sub { - my $f = shift; - my $cr = 0; $cr = shift if ($#_>1); - # Some out-of-date 32-bit GNU assembler just can't handle cmplw... - ($flavour =~ /linux.*32/) ? - " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : - " cmplw ".join(',',$cr,@_); -}; -my $bdnz = sub { - my $f = shift; - my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint - " bc $bo,0,".shift; -} if ($flavour!~/linux/); -my $bltlr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : - " bclr $bo,0"; -}; -my $bnelr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -my $beqlr = sub { - my $f = shift; - my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two -# arguments is 64, with "operand out of range" error. -my $extrdi = sub { - my ($f,$ra,$rs,$n,$b) = @_; - $b = ($b+$n)&63; $n = 64-$n; - " rldicl $ra,$rs,$b,$n"; -}; - -while($line=<>) { - - $line =~ s|[#!;].*$||; # get rid of asm-style comments... - $line =~ s|/\*.*\*/||; # ... and C-style comments... - $line =~ s|^\s+||; # ... and skip white spaces in beginning... - $line =~ s|\s+$||; # ... and at the end - - { - $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel - $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); - } - - { - $line =~ s|(^[\.\w]+)\:\s*||; - my $label = $1; - printf "%s:",($GLOBALS{$label} or $label) if ($label); - } - - { - $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; - my $c = $1; $c = "\t" if ($c eq ""); - my $mnemonic = $2; - my $f = $3; - my $opcode = eval("\$$mnemonic"); - $line =~ s|\bc?[rf]([0-9]+)\b|$1|g if ($c ne "." and $flavour !~ /osx/); - if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } - elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } - } - - print $line if ($line); - print "\n"; -} - -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/perlasm/x86_64-xlate.pl b/drivers/builtin_openssl2/crypto/perlasm/x86_64-xlate.pl deleted file mode 100755 index b262b8dae9..0000000000 --- a/drivers/builtin_openssl2/crypto/perlasm/x86_64-xlate.pl +++ /dev/null @@ -1,1080 +0,0 @@ -#!/usr/bin/env perl - -# Ascetic x86_64 AT&T to MASM/NASM assembler translator by . -# -# Why AT&T to MASM and not vice versa? Several reasons. Because AT&T -# format is way easier to parse. Because it's simpler to "gear" from -# Unix ABI to Windows one [see cross-reference "card" at the end of -# file]. Because Linux targets were available first... -# -# In addition the script also "distills" code suitable for GNU -# assembler, so that it can be compiled with more rigid assemblers, -# such as Solaris /usr/ccs/bin/as. -# -# This translator is not designed to convert *arbitrary* assembler -# code from AT&T format to MASM one. It's designed to convert just -# enough to provide for dual-ABI OpenSSL modules development... -# There *are* limitations and you might have to modify your assembler -# code or this script to achieve the desired result... -# -# Currently recognized limitations: -# -# - can't use multiple ops per line; -# -# Dual-ABI styling rules. -# -# 1. Adhere to Unix register and stack layout [see cross-reference -# ABI "card" at the end for explanation]. -# 2. Forget about "red zone," stick to more traditional blended -# stack frame allocation. If volatile storage is actually required -# that is. If not, just leave the stack as is. -# 3. Functions tagged with ".type name,@function" get crafted with -# unified Win64 prologue and epilogue automatically. If you want -# to take care of ABI differences yourself, tag functions as -# ".type name,@abi-omnipotent" instead. -# 4. To optimize the Win64 prologue you can specify number of input -# arguments as ".type name,@function,N." Keep in mind that if N is -# larger than 6, then you *have to* write "abi-omnipotent" code, -# because >6 cases can't be addressed with unified prologue. -# 5. Name local labels as .L*, do *not* use dynamic labels such as 1: -# (sorry about latter). -# 6. Don't use [or hand-code with .byte] "rep ret." "ret" mnemonic is -# required to identify the spots, where to inject Win64 epilogue! -# But on the pros, it's then prefixed with rep automatically:-) -# 7. Stick to explicit ip-relative addressing. If you have to use -# GOTPCREL addressing, stick to mov symbol@GOTPCREL(%rip),%r??. -# Both are recognized and translated to proper Win64 addressing -# modes. To support legacy code a synthetic directive, .picmeup, -# is implemented. It puts address of the *next* instruction into -# target register, e.g.: -# -# .picmeup %rax -# lea .Label-.(%rax),%rax -# -# 8. In order to provide for structured exception handling unified -# Win64 prologue copies %rsp value to %rax. For further details -# see SEH paragraph at the end. -# 9. .init segment is allowed to contain calls to functions only. -# a. If function accepts more than 4 arguments *and* >4th argument -# is declared as non 64-bit value, do clear its upper part. - -my $flavour = shift; -my $output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -open STDOUT,">$output" || die "can't open $output: $!" - if (defined($output)); - -my $gas=1; $gas=0 if ($output =~ /\.asm$/); -my $elf=1; $elf=0 if (!$gas); -my $win64=0; -my $prefix=""; -my $decor=".L"; - -my $masmref=8 + 50727*2**-32; # 8.00.50727 shipped with VS2005 -my $masm=0; -my $PTR=" PTR"; - -my $nasmref=2.03; -my $nasm=0; - -if ($flavour eq "mingw64") { $gas=1; $elf=0; $win64=1; - $prefix=`echo __USER_LABEL_PREFIX__ | $ENV{CC} -E -P -`; - chomp($prefix); - } -elsif ($flavour eq "macosx") { $gas=1; $elf=0; $prefix="_"; $decor="L\$"; } -elsif ($flavour eq "masm") { $gas=0; $elf=0; $masm=$masmref; $win64=1; $decor="\$L\$"; } -elsif ($flavour eq "nasm") { $gas=0; $elf=0; $nasm=$nasmref; $win64=1; $decor="\$L\$"; $PTR=""; } -elsif (!$gas) -{ if ($ENV{ASM} =~ m/nasm/ && `nasm -v` =~ m/version ([0-9]+)\.([0-9]+)/i) - { $nasm = $1 + $2*0.01; $PTR=""; } - elsif (`ml64 2>&1` =~ m/Version ([0-9]+)\.([0-9]+)(\.([0-9]+))?/) - { $masm = $1 + $2*2**-16 + $4*2**-32; } - die "no assembler found on %PATH" if (!($nasm || $masm)); - $win64=1; - $elf=0; - $decor="\$L\$"; -} - -my $current_segment; -my $current_function; -my %globals; - -{ package opcode; # pick up opcodes - sub re { - my $self = shift; # single instance in enough... - local *line = shift; - undef $ret; - - if ($line =~ /^([a-z][a-z0-9]*)/i) { - $self->{op} = $1; - $ret = $self; - $line = substr($line,@+[0]); $line =~ s/^\s+//; - - undef $self->{sz}; - if ($self->{op} =~ /^(movz)x?([bw]).*/) { # movz is pain... - $self->{op} = $1; - $self->{sz} = $2; - } elsif ($self->{op} =~ /call|jmp/) { - $self->{sz} = ""; - } elsif ($self->{op} =~ /^p/ && $' !~ /^(ush|op|insrw)/) { # SSEn - $self->{sz} = ""; - } elsif ($self->{op} =~ /^v/) { # VEX - $self->{sz} = ""; - } elsif ($self->{op} =~ /mov[dq]/ && $line =~ /%xmm/) { - $self->{sz} = ""; - } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { - $self->{op} = $1; - $self->{sz} = $2; - } - } - $ret; - } - sub size { - my $self = shift; - my $sz = shift; - $self->{sz} = $sz if (defined($sz) && !defined($self->{sz})); - $self->{sz}; - } - sub out { - my $self = shift; - if ($gas) { - if ($self->{op} eq "movz") { # movz is pain... - sprintf "%s%s%s",$self->{op},$self->{sz},shift; - } elsif ($self->{op} =~ /^set/) { - "$self->{op}"; - } elsif ($self->{op} eq "ret") { - my $epilogue = ""; - if ($win64 && $current_function->{abi} eq "svr4") { - $epilogue = "movq 8(%rsp),%rdi\n\t" . - "movq 16(%rsp),%rsi\n\t"; - } - $epilogue . ".byte 0xf3,0xc3"; - } elsif ($self->{op} eq "call" && !$elf && $current_segment eq ".init") { - ".p2align\t3\n\t.quad"; - } else { - "$self->{op}$self->{sz}"; - } - } else { - $self->{op} =~ s/^movz/movzx/; - if ($self->{op} eq "ret") { - $self->{op} = ""; - if ($win64 && $current_function->{abi} eq "svr4") { - $self->{op} = "mov rdi,QWORD${PTR}[8+rsp]\t;WIN64 epilogue\n\t". - "mov rsi,QWORD${PTR}[16+rsp]\n\t"; - } - $self->{op} .= "DB\t0F3h,0C3h\t\t;repret"; - } elsif ($self->{op} =~ /^(pop|push)f/) { - $self->{op} .= $self->{sz}; - } elsif ($self->{op} eq "call" && $current_segment eq ".CRT\$XCU") { - $self->{op} = "\tDQ"; - } - $self->{op}; - } - } - sub mnemonic { - my $self=shift; - my $op=shift; - $self->{op}=$op if (defined($op)); - $self->{op}; - } -} -{ package const; # pick up constants, which start with $ - sub re { - my $self = shift; # single instance in enough... - local *line = shift; - undef $ret; - - if ($line =~ /^\$([^,]+)/) { - $self->{value} = $1; - $ret = $self; - $line = substr($line,@+[0]); $line =~ s/^\s+//; - } - $ret; - } - sub out { - my $self = shift; - - if ($gas) { - # Solaris /usr/ccs/bin/as can't handle multiplications - # in $self->{value} - $self->{value} =~ s/(?{value} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg; - sprintf "\$%s",$self->{value}; - } else { - $self->{value} =~ s/(0b[0-1]+)/oct($1)/eig; - $self->{value} =~ s/0x([0-9a-f]+)/0$1h/ig if ($masm); - sprintf "%s",$self->{value}; - } - } -} -{ package ea; # pick up effective addresses: expr(%reg,%reg,scale) - sub re { - my $self = shift; # single instance in enough... - local *line = shift; - undef $ret; - - # optional * ---vvv--- appears in indirect jmp/call - if ($line =~ /^(\*?)([^\(,]*)\(([%\w,]+)\)/) { - $self->{asterisk} = $1; - $self->{label} = $2; - ($self->{base},$self->{index},$self->{scale})=split(/,/,$3); - $self->{scale} = 1 if (!defined($self->{scale})); - $ret = $self; - $line = substr($line,@+[0]); $line =~ s/^\s+//; - - if ($win64 && $self->{label} =~ s/\@GOTPCREL//) { - die if (opcode->mnemonic() ne "mov"); - opcode->mnemonic("lea"); - } - $self->{base} =~ s/^%//; - $self->{index} =~ s/^%// if (defined($self->{index})); - } - $ret; - } - sub size {} - sub out { - my $self = shift; - my $sz = shift; - - $self->{label} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; - $self->{label} =~ s/\.L/$decor/g; - - # Silently convert all EAs to 64-bit. This is required for - # elder GNU assembler and results in more compact code, - # *but* most importantly AES module depends on this feature! - $self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; - $self->{base} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; - - # Solaris /usr/ccs/bin/as can't handle multiplications - # in $self->{label}, new gas requires sign extension... - use integer; - $self->{label} =~ s/(?{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg; - $self->{label} =~ s/([0-9]+)/$1<<32>>32/eg; - - if ($gas) { - $self->{label} =~ s/^___imp_/__imp__/ if ($flavour eq "mingw64"); - - if (defined($self->{index})) { - sprintf "%s%s(%s,%%%s,%d)",$self->{asterisk}, - $self->{label}, - $self->{base}?"%$self->{base}":"", - $self->{index},$self->{scale}; - } else { - sprintf "%s%s(%%%s)", $self->{asterisk},$self->{label},$self->{base}; - } - } else { - %szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR", l=>"DWORD$PTR", - q=>"QWORD$PTR",o=>"OWORD$PTR",x=>"XMMWORD$PTR" ); - - $self->{label} =~ s/\./\$/g; - $self->{label} =~ s/(?{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/); - $sz="q" if ($self->{asterisk} || opcode->mnemonic() eq "movq"); - $sz="l" if (opcode->mnemonic() eq "movd"); - - if (defined($self->{index})) { - sprintf "%s[%s%s*%d%s]",$szmap{$sz}, - $self->{label}?"$self->{label}+":"", - $self->{index},$self->{scale}, - $self->{base}?"+$self->{base}":""; - } elsif ($self->{base} eq "rip") { - sprintf "%s[%s]",$szmap{$sz},$self->{label}; - } else { - sprintf "%s[%s%s]",$szmap{$sz}, - $self->{label}?"$self->{label}+":"", - $self->{base}; - } - } - } -} -{ package register; # pick up registers, which start with %. - sub re { - my $class = shift; # muliple instances... - my $self = {}; - local *line = shift; - undef $ret; - - # optional * ---vvv--- appears in indirect jmp/call - if ($line =~ /^(\*?)%(\w+)/) { - bless $self,$class; - $self->{asterisk} = $1; - $self->{value} = $2; - $ret = $self; - $line = substr($line,@+[0]); $line =~ s/^\s+//; - } - $ret; - } - sub size { - my $self = shift; - undef $ret; - - if ($self->{value} =~ /^r[\d]+b$/i) { $ret="b"; } - elsif ($self->{value} =~ /^r[\d]+w$/i) { $ret="w"; } - elsif ($self->{value} =~ /^r[\d]+d$/i) { $ret="l"; } - elsif ($self->{value} =~ /^r[\w]+$/i) { $ret="q"; } - elsif ($self->{value} =~ /^[a-d][hl]$/i){ $ret="b"; } - elsif ($self->{value} =~ /^[\w]{2}l$/i) { $ret="b"; } - elsif ($self->{value} =~ /^[\w]{2}$/i) { $ret="w"; } - elsif ($self->{value} =~ /^e[a-z]{2}$/i){ $ret="l"; } - - $ret; - } - sub out { - my $self = shift; - if ($gas) { sprintf "%s%%%s",$self->{asterisk},$self->{value}; } - else { $self->{value}; } - } -} -{ package label; # pick up labels, which end with : - sub re { - my $self = shift; # single instance is enough... - local *line = shift; - undef $ret; - - if ($line =~ /(^[\.\w]+)\:/) { - $self->{value} = $1; - $ret = $self; - $line = substr($line,@+[0]); $line =~ s/^\s+//; - - $self->{value} =~ s/^\.L/$decor/; - } - $ret; - } - sub out { - my $self = shift; - - if ($gas) { - my $func = ($globals{$self->{value}} or $self->{value}) . ":"; - if ($win64 && - $current_function->{name} eq $self->{value} && - $current_function->{abi} eq "svr4") { - $func .= "\n"; - $func .= " movq %rdi,8(%rsp)\n"; - $func .= " movq %rsi,16(%rsp)\n"; - $func .= " movq %rsp,%rax\n"; - $func .= "${decor}SEH_begin_$current_function->{name}:\n"; - my $narg = $current_function->{narg}; - $narg=6 if (!defined($narg)); - $func .= " movq %rcx,%rdi\n" if ($narg>0); - $func .= " movq %rdx,%rsi\n" if ($narg>1); - $func .= " movq %r8,%rdx\n" if ($narg>2); - $func .= " movq %r9,%rcx\n" if ($narg>3); - $func .= " movq 40(%rsp),%r8\n" if ($narg>4); - $func .= " movq 48(%rsp),%r9\n" if ($narg>5); - } - $func; - } elsif ($self->{value} ne "$current_function->{name}") { - $self->{value} .= ":" if ($masm && $ret!~m/^\$/); - $self->{value} . ":"; - } elsif ($win64 && $current_function->{abi} eq "svr4") { - my $func = "$current_function->{name}" . - ($nasm ? ":" : "\tPROC $current_function->{scope}") . - "\n"; - $func .= " mov QWORD${PTR}[8+rsp],rdi\t;WIN64 prologue\n"; - $func .= " mov QWORD${PTR}[16+rsp],rsi\n"; - $func .= " mov rax,rsp\n"; - $func .= "${decor}SEH_begin_$current_function->{name}:"; - $func .= ":" if ($masm); - $func .= "\n"; - my $narg = $current_function->{narg}; - $narg=6 if (!defined($narg)); - $func .= " mov rdi,rcx\n" if ($narg>0); - $func .= " mov rsi,rdx\n" if ($narg>1); - $func .= " mov rdx,r8\n" if ($narg>2); - $func .= " mov rcx,r9\n" if ($narg>3); - $func .= " mov r8,QWORD${PTR}[40+rsp]\n" if ($narg>4); - $func .= " mov r9,QWORD${PTR}[48+rsp]\n" if ($narg>5); - $func .= "\n"; - } else { - "$current_function->{name}". - ($nasm ? ":" : "\tPROC $current_function->{scope}"); - } - } -} -{ package expr; # pick up expressioins - sub re { - my $self = shift; # single instance is enough... - local *line = shift; - undef $ret; - - if ($line =~ /(^[^,]+)/) { - $self->{value} = $1; - $ret = $self; - $line = substr($line,@+[0]); $line =~ s/^\s+//; - - $self->{value} =~ s/\@PLT// if (!$elf); - $self->{value} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; - $self->{value} =~ s/\.L/$decor/g; - } - $ret; - } - sub out { - my $self = shift; - if ($nasm && opcode->mnemonic()=~m/^j/) { - "NEAR ".$self->{value}; - } else { - $self->{value}; - } - } -} -{ package directive; # pick up directives, which start with . - sub re { - my $self = shift; # single instance is enough... - local *line = shift; - undef $ret; - my $dir; - my %opcode = # lea 2f-1f(%rip),%dst; 1: nop; 2: - ( "%rax"=>0x01058d48, "%rcx"=>0x010d8d48, - "%rdx"=>0x01158d48, "%rbx"=>0x011d8d48, - "%rsp"=>0x01258d48, "%rbp"=>0x012d8d48, - "%rsi"=>0x01358d48, "%rdi"=>0x013d8d48, - "%r8" =>0x01058d4c, "%r9" =>0x010d8d4c, - "%r10"=>0x01158d4c, "%r11"=>0x011d8d4c, - "%r12"=>0x01258d4c, "%r13"=>0x012d8d4c, - "%r14"=>0x01358d4c, "%r15"=>0x013d8d4c ); - - if ($line =~ /^\s*(\.\w+)/) { - $dir = $1; - $ret = $self; - undef $self->{value}; - $line = substr($line,@+[0]); $line =~ s/^\s+//; - - SWITCH: for ($dir) { - /\.picmeup/ && do { if ($line =~ /(%r[\w]+)/i) { - $dir="\t.long"; - $line=sprintf "0x%x,0x90000000",$opcode{$1}; - } - last; - }; - /\.global|\.globl|\.extern/ - && do { $globals{$line} = $prefix . $line; - $line = $globals{$line} if ($prefix); - last; - }; - /\.type/ && do { ($sym,$type,$narg) = split(',',$line); - if ($type eq "\@function") { - undef $current_function; - $current_function->{name} = $sym; - $current_function->{abi} = "svr4"; - $current_function->{narg} = $narg; - $current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE"; - } elsif ($type eq "\@abi-omnipotent") { - undef $current_function; - $current_function->{name} = $sym; - $current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE"; - } - $line =~ s/\@abi\-omnipotent/\@function/; - $line =~ s/\@function.*/\@function/; - last; - }; - /\.asciz/ && do { if ($line =~ /^"(.*)"$/) { - $dir = ".byte"; - $line = join(",",unpack("C*",$1),0); - } - last; - }; - /\.rva|\.long|\.quad/ - && do { $line =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; - $line =~ s/\.L/$decor/g; - last; - }; - } - - if ($gas) { - $self->{value} = $dir . "\t" . $line; - - if ($dir =~ /\.extern/) { - $self->{value} = ""; # swallow extern - } elsif (!$elf && $dir =~ /\.type/) { - $self->{value} = ""; - $self->{value} = ".def\t" . ($globals{$1} or $1) . ";\t" . - (defined($globals{$1})?".scl 2;":".scl 3;") . - "\t.type 32;\t.endef" - if ($win64 && $line =~ /([^,]+),\@function/); - } elsif (!$elf && $dir =~ /\.size/) { - $self->{value} = ""; - if (defined($current_function)) { - $self->{value} .= "${decor}SEH_end_$current_function->{name}:" - if ($win64 && $current_function->{abi} eq "svr4"); - undef $current_function; - } - } elsif (!$elf && $dir =~ /\.align/) { - $self->{value} = ".p2align\t" . (log($line)/log(2)); - } elsif ($dir eq ".section") { - $current_segment=$line; - if (!$elf && $current_segment eq ".init") { - if ($flavour eq "macosx") { $self->{value} = ".mod_init_func"; } - elsif ($flavour eq "mingw64") { $self->{value} = ".section\t.ctors"; } - } - } elsif ($dir =~ /\.(text|data)/) { - $current_segment=".$1"; - } elsif ($dir =~ /\.hidden/) { - if ($flavour eq "macosx") { $self->{value} = ".private_extern\t$prefix$line"; } - elsif ($flavour eq "mingw64") { $self->{value} = ""; } - } elsif ($dir =~ /\.comm/) { - $self->{value} = "$dir\t$prefix$line"; - $self->{value} =~ s|,([0-9]+),([0-9]+)$|",$1,".log($2)/log(2)|e if ($flavour eq "macosx"); - } - $line = ""; - return $self; - } - - # non-gas case or nasm/masm - SWITCH: for ($dir) { - /\.text/ && do { my $v=undef; - if ($nasm) { - $v="section .text code align=64\n"; - } else { - $v="$current_segment\tENDS\n" if ($current_segment); - $current_segment = ".text\$"; - $v.="$current_segment\tSEGMENT "; - $v.=$masm>=$masmref ? "ALIGN(64)" : "PAGE"; - $v.=" 'CODE'"; - } - $self->{value} = $v; - last; - }; - /\.data/ && do { my $v=undef; - if ($nasm) { - $v="section .data data align=8\n"; - } else { - $v="$current_segment\tENDS\n" if ($current_segment); - $current_segment = "_DATA"; - $v.="$current_segment\tSEGMENT"; - } - $self->{value} = $v; - last; - }; - /\.section/ && do { my $v=undef; - $line =~ s/([^,]*).*/$1/; - $line = ".CRT\$XCU" if ($line eq ".init"); - if ($nasm) { - $v="section $line"; - if ($line=~/\.([px])data/) { - $v.=" rdata align="; - $v.=$1 eq "p"? 4 : 8; - } elsif ($line=~/\.CRT\$/i) { - $v.=" rdata align=8"; - } - } else { - $v="$current_segment\tENDS\n" if ($current_segment); - $v.="$line\tSEGMENT"; - if ($line=~/\.([px])data/) { - $v.=" READONLY"; - $v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref); - } elsif ($line=~/\.CRT\$/i) { - $v.=" READONLY "; - $v.=$masm>=$masmref ? "ALIGN(8)" : "DWORD"; - } - } - $current_segment = $line; - $self->{value} = $v; - last; - }; - /\.extern/ && do { $self->{value} = "EXTERN\t".$line; - $self->{value} .= ":NEAR" if ($masm); - last; - }; - /\.globl|.global/ - && do { $self->{value} = $masm?"PUBLIC":"global"; - $self->{value} .= "\t".$line; - last; - }; - /\.size/ && do { if (defined($current_function)) { - undef $self->{value}; - if ($current_function->{abi} eq "svr4") { - $self->{value}="${decor}SEH_end_$current_function->{name}:"; - $self->{value}.=":\n" if($masm); - } - $self->{value}.="$current_function->{name}\tENDP" if($masm && $current_function->{name}); - undef $current_function; - } - last; - }; - /\.align/ && do { $self->{value} = "ALIGN\t".$line; last; }; - /\.(value|long|rva|quad)/ - && do { my $sz = substr($1,0,1); - my @arr = split(/,\s*/,$line); - my $last = pop(@arr); - my $conv = sub { my $var=shift; - $var=~s/^(0b[0-1]+)/oct($1)/eig; - $var=~s/^0x([0-9a-f]+)/0$1h/ig if ($masm); - if ($sz eq "D" && ($current_segment=~/.[px]data/ || $dir eq ".rva")) - { $var=~s/([_a-z\$\@][_a-z0-9\$\@]*)/$nasm?"$1 wrt ..imagebase":"imagerel $1"/egi; } - $var; - }; - - $sz =~ tr/bvlrq/BWDDQ/; - $self->{value} = "\tD$sz\t"; - for (@arr) { $self->{value} .= &$conv($_).","; } - $self->{value} .= &$conv($last); - last; - }; - /\.byte/ && do { my @str=split(/,\s*/,$line); - map(s/(0b[0-1]+)/oct($1)/eig,@str); - map(s/0x([0-9a-f]+)/0$1h/ig,@str) if ($masm); - while ($#str>15) { - $self->{value}.="DB\t" - .join(",",@str[0..15])."\n"; - foreach (0..15) { shift @str; } - } - $self->{value}.="DB\t" - .join(",",@str) if (@str); - last; - }; - /\.comm/ && do { my @str=split(/,\s*/,$line); - my $v=undef; - if ($nasm) { - $v.="common $prefix@str[0] @str[1]"; - } else { - $v="$current_segment\tENDS\n" if ($current_segment); - $current_segment = "_DATA"; - $v.="$current_segment\tSEGMENT\n"; - $v.="COMM @str[0]:DWORD:".@str[1]/4; - } - $self->{value} = $v; - last; - }; - } - $line = ""; - } - - $ret; - } - sub out { - my $self = shift; - $self->{value}; - } -} - -sub rex { - local *opcode=shift; - my ($dst,$src,$rex)=@_; - - $rex|=0x04 if($dst>=8); - $rex|=0x01 if($src>=8); - push @opcode,($rex|0x40) if ($rex); -} - -# older gas and ml64 don't handle SSE>2 instructions -my %regrm = ( "%eax"=>0, "%ecx"=>1, "%edx"=>2, "%ebx"=>3, - "%esp"=>4, "%ebp"=>5, "%esi"=>6, "%edi"=>7 ); - -my $movq = sub { # elderly gas can't handle inter-register movq - my $arg = shift; - my @opcode=(0x66); - if ($arg =~ /%xmm([0-9]+),\s*%r(\w+)/) { - my ($src,$dst)=($1,$2); - if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } - rex(\@opcode,$src,$dst,0x8); - push @opcode,0x0f,0x7e; - push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M - @opcode; - } elsif ($arg =~ /%r(\w+),\s*%xmm([0-9]+)/) { - my ($src,$dst)=($2,$1); - if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } - rex(\@opcode,$src,$dst,0x8); - push @opcode,0x0f,0x6e; - push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M - @opcode; - } else { - (); - } -}; - -my $pextrd = sub { - if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*(%\w+)/) { - my @opcode=(0x66); - $imm=$1; - $src=$2; - $dst=$3; - if ($dst =~ /%r([0-9]+)d/) { $dst = $1; } - elsif ($dst =~ /%e/) { $dst = $regrm{$dst}; } - rex(\@opcode,$src,$dst); - push @opcode,0x0f,0x3a,0x16; - push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M - push @opcode,$imm; - @opcode; - } else { - (); - } -}; - -my $pinsrd = sub { - if (shift =~ /\$([0-9]+),\s*(%\w+),\s*%xmm([0-9]+)/) { - my @opcode=(0x66); - $imm=$1; - $src=$2; - $dst=$3; - if ($src =~ /%r([0-9]+)/) { $src = $1; } - elsif ($src =~ /%e/) { $src = $regrm{$src}; } - rex(\@opcode,$dst,$src); - push @opcode,0x0f,0x3a,0x22; - push @opcode,0xc0|(($dst&7)<<3)|($src&7); # ModR/M - push @opcode,$imm; - @opcode; - } else { - (); - } -}; - -my $pshufb = sub { - if (shift =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { - my @opcode=(0x66); - rex(\@opcode,$2,$1); - push @opcode,0x0f,0x38,0x00; - push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M - @opcode; - } else { - (); - } -}; - -my $palignr = sub { - if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { - my @opcode=(0x66); - rex(\@opcode,$3,$2); - push @opcode,0x0f,0x3a,0x0f; - push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M - push @opcode,$1; - @opcode; - } else { - (); - } -}; - -my $pclmulqdq = sub { - if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { - my @opcode=(0x66); - rex(\@opcode,$3,$2); - push @opcode,0x0f,0x3a,0x44; - push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M - my $c=$1; - push @opcode,$c=~/^0/?oct($c):$c; - @opcode; - } else { - (); - } -}; - -my $rdrand = sub { - if (shift =~ /%[er](\w+)/) { - my @opcode=(); - my $dst=$1; - if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } - rex(\@opcode,0,$1,8); - push @opcode,0x0f,0xc7,0xf0|($dst&7); - @opcode; - } else { - (); - } -}; - -if ($nasm) { - print <<___; -default rel -%define XMMWORD -___ -} elsif ($masm) { - print <<___; -OPTION DOTNAME -___ -} -while($line=<>) { - - chomp($line); - - $line =~ s|[#!].*$||; # get rid of asm-style comments... - $line =~ s|/\*.*\*/||; # ... and C-style comments... - $line =~ s|^\s+||; # ... and skip white spaces in beginning - - undef $label; - undef $opcode; - undef @args; - - if ($label=label->re(\$line)) { print $label->out(); } - - if (directive->re(\$line)) { - printf "%s",directive->out(); - } elsif ($opcode=opcode->re(\$line)) { - my $asm = eval("\$".$opcode->mnemonic()); - undef @bytes; - - if ((ref($asm) eq 'CODE') && scalar(@bytes=&$asm($line))) { - print $gas?".byte\t":"DB\t",join(',',@bytes),"\n"; - next; - } - - ARGUMENT: while (1) { - my $arg; - - if ($arg=register->re(\$line)) { opcode->size($arg->size()); } - elsif ($arg=const->re(\$line)) { } - elsif ($arg=ea->re(\$line)) { } - elsif ($arg=expr->re(\$line)) { } - else { last ARGUMENT; } - - push @args,$arg; - - last ARGUMENT if ($line !~ /^,/); - - $line =~ s/^,\s*//; - } # ARGUMENT: - - if ($#args>=0) { - my $insn; - my $sz=opcode->size(); - - if ($gas) { - $insn = $opcode->out($#args>=1?$args[$#args]->size():$sz); - @args = map($_->out($sz),@args); - printf "\t%s\t%s",$insn,join(",",@args); - } else { - $insn = $opcode->out(); - foreach (@args) { - my $arg = $_->out(); - # $insn.=$sz compensates for movq, pinsrw, ... - if ($arg =~ /^xmm[0-9]+$/) { $insn.=$sz; $sz="x" if(!$sz); last; } - if ($arg =~ /^mm[0-9]+$/) { $insn.=$sz; $sz="q" if(!$sz); last; } - } - @args = reverse(@args); - undef $sz if ($nasm && $opcode->mnemonic() eq "lea"); - printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args)); - } - } else { - printf "\t%s",$opcode->out(); - } - } - - print $line,"\n"; -} - -print "\n$current_segment\tENDS\n" if ($current_segment && $masm); -print "END\n" if ($masm); - -close STDOUT; - - ################################################# -# Cross-reference x86_64 ABI "card" -# -# Unix Win64 -# %rax * * -# %rbx - - -# %rcx #4 #1 -# %rdx #3 #2 -# %rsi #2 - -# %rdi #1 - -# %rbp - - -# %rsp - - -# %r8 #5 #3 -# %r9 #6 #4 -# %r10 * * -# %r11 * * -# %r12 - - -# %r13 - - -# %r14 - - -# %r15 - - -# -# (*) volatile register -# (-) preserved by callee -# (#) Nth argument, volatile -# -# In Unix terms top of stack is argument transfer area for arguments -# which could not be accomodated in registers. Or in other words 7th -# [integer] argument resides at 8(%rsp) upon function entry point. -# 128 bytes above %rsp constitute a "red zone" which is not touched -# by signal handlers and can be used as temporal storage without -# allocating a frame. -# -# In Win64 terms N*8 bytes on top of stack is argument transfer area, -# which belongs to/can be overwritten by callee. N is the number of -# arguments passed to callee, *but* not less than 4! This means that -# upon function entry point 5th argument resides at 40(%rsp), as well -# as that 32 bytes from 8(%rsp) can always be used as temporal -# storage [without allocating a frame]. One can actually argue that -# one can assume a "red zone" above stack pointer under Win64 as well. -# Point is that at apparently no occasion Windows kernel would alter -# the area above user stack pointer in true asynchronous manner... -# -# All the above means that if assembler programmer adheres to Unix -# register and stack layout, but disregards the "red zone" existense, -# it's possible to use following prologue and epilogue to "gear" from -# Unix to Win64 ABI in leaf functions with not more than 6 arguments. -# -# omnipotent_function: -# ifdef WIN64 -# movq %rdi,8(%rsp) -# movq %rsi,16(%rsp) -# movq %rcx,%rdi ; if 1st argument is actually present -# movq %rdx,%rsi ; if 2nd argument is actually ... -# movq %r8,%rdx ; if 3rd argument is ... -# movq %r9,%rcx ; if 4th argument ... -# movq 40(%rsp),%r8 ; if 5th ... -# movq 48(%rsp),%r9 ; if 6th ... -# endif -# ... -# ifdef WIN64 -# movq 8(%rsp),%rdi -# movq 16(%rsp),%rsi -# endif -# ret -# - ################################################# -# Win64 SEH, Structured Exception Handling. -# -# Unlike on Unix systems(*) lack of Win64 stack unwinding information -# has undesired side-effect at run-time: if an exception is raised in -# assembler subroutine such as those in question (basically we're -# referring to segmentation violations caused by malformed input -# parameters), the application is briskly terminated without invoking -# any exception handlers, most notably without generating memory dump -# or any user notification whatsoever. This poses a problem. It's -# possible to address it by registering custom language-specific -# handler that would restore processor context to the state at -# subroutine entry point and return "exception is not handled, keep -# unwinding" code. Writing such handler can be a challenge... But it's -# doable, though requires certain coding convention. Consider following -# snippet: -# -# .type function,@function -# function: -# movq %rsp,%rax # copy rsp to volatile register -# pushq %r15 # save non-volatile registers -# pushq %rbx -# pushq %rbp -# movq %rsp,%r11 -# subq %rdi,%r11 # prepare [variable] stack frame -# andq $-64,%r11 -# movq %rax,0(%r11) # check for exceptions -# movq %r11,%rsp # allocate [variable] stack frame -# movq %rax,0(%rsp) # save original rsp value -# magic_point: -# ... -# movq 0(%rsp),%rcx # pull original rsp value -# movq -24(%rcx),%rbp # restore non-volatile registers -# movq -16(%rcx),%rbx -# movq -8(%rcx),%r15 -# movq %rcx,%rsp # restore original rsp -# ret -# .size function,.-function -# -# The key is that up to magic_point copy of original rsp value remains -# in chosen volatile register and no non-volatile register, except for -# rsp, is modified. While past magic_point rsp remains constant till -# the very end of the function. In this case custom language-specific -# exception handler would look like this: -# -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -# { ULONG64 *rsp = (ULONG64 *)context->Rax; -# if (context->Rip >= magic_point) -# { rsp = ((ULONG64 **)context->Rsp)[0]; -# context->Rbp = rsp[-3]; -# context->Rbx = rsp[-2]; -# context->R15 = rsp[-1]; -# } -# context->Rsp = (ULONG64)rsp; -# context->Rdi = rsp[1]; -# context->Rsi = rsp[2]; -# -# memcpy (disp->ContextRecord,context,sizeof(CONTEXT)); -# RtlVirtualUnwind(UNW_FLAG_NHANDLER,disp->ImageBase, -# dips->ControlPc,disp->FunctionEntry,disp->ContextRecord, -# &disp->HandlerData,&disp->EstablisherFrame,NULL); -# return ExceptionContinueSearch; -# } -# -# It's appropriate to implement this handler in assembler, directly in -# function's module. In order to do that one has to know members' -# offsets in CONTEXT and DISPATCHER_CONTEXT structures and some constant -# values. Here they are: -# -# CONTEXT.Rax 120 -# CONTEXT.Rcx 128 -# CONTEXT.Rdx 136 -# CONTEXT.Rbx 144 -# CONTEXT.Rsp 152 -# CONTEXT.Rbp 160 -# CONTEXT.Rsi 168 -# CONTEXT.Rdi 176 -# CONTEXT.R8 184 -# CONTEXT.R9 192 -# CONTEXT.R10 200 -# CONTEXT.R11 208 -# CONTEXT.R12 216 -# CONTEXT.R13 224 -# CONTEXT.R14 232 -# CONTEXT.R15 240 -# CONTEXT.Rip 248 -# CONTEXT.Xmm6 512 -# sizeof(CONTEXT) 1232 -# DISPATCHER_CONTEXT.ControlPc 0 -# DISPATCHER_CONTEXT.ImageBase 8 -# DISPATCHER_CONTEXT.FunctionEntry 16 -# DISPATCHER_CONTEXT.EstablisherFrame 24 -# DISPATCHER_CONTEXT.TargetIp 32 -# DISPATCHER_CONTEXT.ContextRecord 40 -# DISPATCHER_CONTEXT.LanguageHandler 48 -# DISPATCHER_CONTEXT.HandlerData 56 -# UNW_FLAG_NHANDLER 0 -# ExceptionContinueSearch 1 -# -# In order to tie the handler to the function one has to compose -# couple of structures: one for .xdata segment and one for .pdata. -# -# UNWIND_INFO structure for .xdata segment would be -# -# function_unwind_info: -# .byte 9,0,0,0 -# .rva handler -# -# This structure designates exception handler for a function with -# zero-length prologue, no stack frame or frame register. -# -# To facilitate composing of .pdata structures, auto-generated "gear" -# prologue copies rsp value to rax and denotes next instruction with -# .LSEH_begin_{function_name} label. This essentially defines the SEH -# styling rule mentioned in the beginning. Position of this label is -# chosen in such manner that possible exceptions raised in the "gear" -# prologue would be accounted to caller and unwound from latter's frame. -# End of function is marked with respective .LSEH_end_{function_name} -# label. To summarize, .pdata segment would contain -# -# .rva .LSEH_begin_function -# .rva .LSEH_end_function -# .rva function_unwind_info -# -# Reference to functon_unwind_info from .xdata segment is the anchor. -# In case you wonder why references are 32-bit .rvas and not 64-bit -# .quads. References put into these two segments are required to be -# *relative* to the base address of the current binary module, a.k.a. -# image base. No Win64 module, be it .exe or .dll, can be larger than -# 2GB and thus such relative references can be and are accommodated in -# 32 bits. -# -# Having reviewed the example function code, one can argue that "movq -# %rsp,%rax" above is redundant. It is not! Keep in mind that on Unix -# rax would contain an undefined value. If this "offends" you, use -# another register and refrain from modifying rax till magic_point is -# reached, i.e. as if it was a non-volatile register. If more registers -# are required prior [variable] frame setup is completed, note that -# nobody says that you can have only one "magic point." You can -# "liberate" non-volatile registers by denoting last stack off-load -# instruction and reflecting it in finer grade unwind logic in handler. -# After all, isn't it why it's called *language-specific* handler... -# -# Attentive reader can notice that exceptions would be mishandled in -# auto-generated "gear" epilogue. Well, exception effectively can't -# occur there, because if memory area used by it was subject to -# segmentation violation, then it would be raised upon call to the -# function (and as already mentioned be accounted to caller, which is -# not a problem). If you're still not comfortable, then define tail -# "magic point" just prior ret instruction and have handler treat it... -# -# (*) Note that we're talking about run-time, not debug-time. Lack of -# unwind information makes debugging hard on both Windows and -# Unix. "Unlike" referes to the fact that on Unix signal handler -# will always be invoked, core dumped and appropriate exit code -# returned to parent (for user notification). diff --git a/drivers/builtin_openssl2/crypto/perlasm/x86asm.pl b/drivers/builtin_openssl2/crypto/perlasm/x86asm.pl deleted file mode 100644 index eb543db2f6..0000000000 --- a/drivers/builtin_openssl2/crypto/perlasm/x86asm.pl +++ /dev/null @@ -1,260 +0,0 @@ -#!/usr/bin/env perl - -# require 'x86asm.pl'; -# &asm_init(,"des-586.pl"[,$i386only]); -# &function_begin("foo"); -# ... -# &function_end("foo"); -# &asm_finish - -$out=(); -$i386=0; - -# AUTOLOAD is this context has quite unpleasant side effect, namely -# that typos in function calls effectively go to assembler output, -# but on the pros side we don't have to implement one subroutine per -# each opcode... -sub ::AUTOLOAD -{ my $opcode = $AUTOLOAD; - - die "more than 4 arguments passed to $opcode" if ($#_>3); - - $opcode =~ s/.*:://; - if ($opcode =~ /^push/) { $stack+=4; } - elsif ($opcode =~ /^pop/) { $stack-=4; } - - &generic($opcode,@_) or die "undefined subroutine \&$AUTOLOAD"; -} - -sub ::emit -{ my $opcode=shift; - - if ($#_==-1) { push(@out,"\t$opcode\n"); } - else { push(@out,"\t$opcode\t".join(',',@_)."\n"); } -} - -sub ::LB -{ $_[0] =~ m/^e?([a-d])x$/o or die "$_[0] does not have a 'low byte'"; - $1."l"; -} -sub ::HB -{ $_[0] =~ m/^e?([a-d])x$/o or die "$_[0] does not have a 'high byte'"; - $1."h"; -} -sub ::stack_push{ my $num=$_[0]*4; $stack+=$num; &sub("esp",$num); } -sub ::stack_pop { my $num=$_[0]*4; $stack-=$num; &add("esp",$num); } -sub ::blindpop { &pop($_[0]); $stack+=4; } -sub ::wparam { &DWP($stack+4*$_[0],"esp"); } -sub ::swtmp { &DWP(4*$_[0],"esp"); } - -sub ::bswap -{ if ($i386) # emulate bswap for i386 - { &comment("bswap @_"); - &xchg(&HB(@_),&LB(@_)); - &ror (@_,16); - &xchg(&HB(@_),&LB(@_)); - } - else - { &generic("bswap",@_); } -} -# These are made-up opcodes introduced over the years essentially -# by ignorance, just alias them to real ones... -sub ::movb { &mov(@_); } -sub ::xorb { &xor(@_); } -sub ::rotl { &rol(@_); } -sub ::rotr { &ror(@_); } -sub ::exch { &xchg(@_); } -sub ::halt { &hlt; } -sub ::movz { &movzx(@_); } -sub ::pushf { &pushfd; } -sub ::popf { &popfd; } - -# 3 argument instructions -sub ::movq -{ my($p1,$p2,$optimize)=@_; - - if ($optimize && $p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/) - # movq between mmx registers can sink Intel CPUs - { &::pshufw($p1,$p2,0xe4); } - else - { &::generic("movq",@_); } -} - -# SSE>2 instructions -my %regrm = ( "eax"=>0, "ecx"=>1, "edx"=>2, "ebx"=>3, - "esp"=>4, "ebp"=>5, "esi"=>6, "edi"=>7 ); -sub ::pextrd -{ my($dst,$src,$imm)=@_; - if ("$dst:$src" =~ /(e[a-dsd][ixp]):xmm([0-7])/) - { &::data_byte(0x66,0x0f,0x3a,0x16,0xc0|($2<<3)|$regrm{$1},$imm); } - else - { &::generic("pextrd",@_); } -} - -sub ::pinsrd -{ my($dst,$src,$imm)=@_; - if ("$dst:$src" =~ /xmm([0-7]):(e[a-dsd][ixp])/) - { &::data_byte(0x66,0x0f,0x3a,0x22,0xc0|($1<<3)|$regrm{$2},$imm); } - else - { &::generic("pinsrd",@_); } -} - -sub ::pshufb -{ my($dst,$src)=@_; - if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) - { &data_byte(0x66,0x0f,0x38,0x00,0xc0|($1<<3)|$2); } - else - { &::generic("pshufb",@_); } -} - -sub ::palignr -{ my($dst,$src,$imm)=@_; - if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) - { &::data_byte(0x66,0x0f,0x3a,0x0f,0xc0|($1<<3)|$2,$imm); } - else - { &::generic("palignr",@_); } -} - -sub ::pclmulqdq -{ my($dst,$src,$imm)=@_; - if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) - { &::data_byte(0x66,0x0f,0x3a,0x44,0xc0|($1<<3)|$2,$imm); } - else - { &::generic("pclmulqdq",@_); } -} - -sub ::rdrand -{ my ($dst)=@_; - if ($dst =~ /(e[a-dsd][ixp])/) - { &::data_byte(0x0f,0xc7,0xf0|$regrm{$dst}); } - else - { &::generic("rdrand",@_); } -} - -# label management -$lbdecor="L"; # local label decoration, set by package -$label="000"; - -sub ::islabel # see is argument is a known label -{ my $i; - foreach $i (values %label) { return $i if ($i eq $_[0]); } - $label{$_[0]}; # can be undef -} - -sub ::label # instantiate a function-scope label -{ if (!defined($label{$_[0]})) - { $label{$_[0]}="${lbdecor}${label}${_[0]}"; $label++; } - $label{$_[0]}; -} - -sub ::LABEL # instantiate a file-scope label -{ $label{$_[0]}=$_[1] if (!defined($label{$_[0]})); - $label{$_[0]}; -} - -sub ::static_label { &::LABEL($_[0],$lbdecor.$_[0]); } - -sub ::set_label_B { push(@out,"@_:\n"); } -sub ::set_label -{ my $label=&::label($_[0]); - &::align($_[1]) if ($_[1]>1); - &::set_label_B($label); - $label; -} - -sub ::wipe_labels # wipes function-scope labels -{ foreach $i (keys %label) - { delete $label{$i} if ($label{$i} =~ /^\Q${lbdecor}\E[0-9]{3}/); } -} - -# subroutine management -sub ::function_begin -{ &function_begin_B(@_); - $stack=4; - &push("ebp"); - &push("ebx"); - &push("esi"); - &push("edi"); -} - -sub ::function_end -{ &pop("edi"); - &pop("esi"); - &pop("ebx"); - &pop("ebp"); - &ret(); - &function_end_B(@_); - $stack=0; - &wipe_labels(); -} - -sub ::function_end_A -{ &pop("edi"); - &pop("esi"); - &pop("ebx"); - &pop("ebp"); - &ret(); - $stack+=16; # readjust esp as if we didn't pop anything -} - -sub ::asciz -{ my @str=unpack("C*",shift); - push @str,0; - while ($#str>15) { - &data_byte(@str[0..15]); - foreach (0..15) { shift @str; } - } - &data_byte(@str) if (@str); -} - -sub ::asm_finish -{ &file_end(); - print @out; -} - -sub ::asm_init -{ my ($type,$fn,$cpu)=@_; - - $filename=$fn; - $i386=$cpu; - - $elf=$cpp=$coff=$aout=$macosx=$win32=$netware=$mwerks=$android=0; - if (($type eq "elf")) - { $elf=1; require "x86gas.pl"; } - elsif (($type eq "a\.out")) - { $aout=1; require "x86gas.pl"; } - elsif (($type eq "coff" or $type eq "gaswin")) - { $coff=1; require "x86gas.pl"; } - elsif (($type eq "win32n")) - { $win32=1; require "x86nasm.pl"; } - elsif (($type eq "nw-nasm")) - { $netware=1; require "x86nasm.pl"; } - #elsif (($type eq "nw-mwasm")) - #{ $netware=1; $mwerks=1; require "x86nasm.pl"; } - elsif (($type eq "win32")) - { $win32=1; require "x86masm.pl"; } - elsif (($type eq "macosx")) - { $aout=1; $macosx=1; require "x86gas.pl"; } - elsif (($type eq "android")) - { $elf=1; $android=1; require "x86gas.pl"; } - else - { print STDERR <<"EOF"; -Pick one target type from - elf - Linux, FreeBSD, Solaris x86, etc. - a.out - DJGPP, elder OpenBSD, etc. - coff - GAS/COFF such as Win32 targets - win32n - Windows 95/Windows NT NASM format - nw-nasm - NetWare NASM format - macosx - Mac OS X -EOF - exit(1); - } - - $pic=0; - for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); } - - $filename =~ s/\.pl$//; - &file($filename); -} - -1; diff --git a/drivers/builtin_openssl2/crypto/perlasm/x86gas.pl b/drivers/builtin_openssl2/crypto/perlasm/x86gas.pl deleted file mode 100644 index 682a3a3163..0000000000 --- a/drivers/builtin_openssl2/crypto/perlasm/x86gas.pl +++ /dev/null @@ -1,253 +0,0 @@ -#!/usr/bin/env perl - -package x86gas; - -*out=\@::out; - -$::lbdecor=$::aout?"L":".L"; # local label decoration -$nmdecor=($::aout or $::coff)?"_":""; # external name decoration - -$initseg=""; - -$align=16; -$align=log($align)/log(2) if ($::aout); -$com_start="#" if ($::aout or $::coff); - -sub opsize() -{ my $reg=shift; - if ($reg =~ m/^%e/o) { "l"; } - elsif ($reg =~ m/^%[a-d][hl]$/o) { "b"; } - elsif ($reg =~ m/^%[xm]/o) { undef; } - else { "w"; } -} - -# swap arguments; -# expand opcode with size suffix; -# prefix numeric constants with $; -sub ::generic -{ my($opcode,@arg)=@_; - my($suffix,$dst,$src); - - @arg=reverse(@arg); - - for (@arg) - { s/^(\*?)(e?[a-dsixphl]{2})$/$1%$2/o; # gp registers - s/^([xy]?mm[0-7])$/%$1/o; # xmm/mmx registers - s/^(\-?[0-9]+)$/\$$1/o; # constants - s/^(\-?0x[0-9a-f]+)$/\$$1/o; # constants - } - - $dst = $arg[$#arg] if ($#arg>=0); - $src = $arg[$#arg-1] if ($#arg>=1); - if ($dst =~ m/^%/o) { $suffix=&opsize($dst); } - elsif ($src =~ m/^%/o) { $suffix=&opsize($src); } - else { $suffix="l"; } - undef $suffix if ($dst =~ m/^%[xm]/o || $src =~ m/^%[xm]/o); - - if ($#_==0) { &::emit($opcode); } - elsif ($#_==1 && $opcode =~ m/^(call|clflush|j|loop|set)/o) - { &::emit($opcode,@arg); } - else { &::emit($opcode.$suffix,@arg);} - - 1; -} -# -# opcodes not covered by ::generic above, mostly inconsistent namings... -# -sub ::movzx { &::movzb(@_); } -sub ::pushfd { &::pushfl; } -sub ::popfd { &::popfl; } -sub ::cpuid { &::emit(".byte\t0x0f,0xa2"); } -sub ::rdtsc { &::emit(".byte\t0x0f,0x31"); } - -sub ::call { &::emit("call",(&::islabel($_[0]) or "$nmdecor$_[0]")); } -sub ::call_ptr { &::generic("call","*$_[0]"); } -sub ::jmp_ptr { &::generic("jmp","*$_[0]"); } - -*::bswap = sub { &::emit("bswap","%$_[0]"); } if (!$::i386); - -sub ::DWP -{ my($addr,$reg1,$reg2,$idx)=@_; - my $ret=""; - - $addr =~ s/^\s+//; - # prepend global references with optional underscore - $addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige; - - $reg1 = "%$reg1" if ($reg1); - $reg2 = "%$reg2" if ($reg2); - - $ret .= $addr if (($addr ne "") && ($addr ne 0)); - - if ($reg2) - { $idx!= 0 or $idx=1; - $ret .= "($reg1,$reg2,$idx)"; - } - elsif ($reg1) - { $ret .= "($reg1)"; } - - $ret; -} -sub ::QWP { &::DWP(@_); } -sub ::BP { &::DWP(@_); } -sub ::WP { &::DWP(@_); } -sub ::BC { @_; } -sub ::DWC { @_; } - -sub ::file -{ push(@out,".file\t\"$_[0].s\"\n.text\n"); } - -sub ::function_begin_B -{ my $func=shift; - my $global=($func !~ /^_/); - my $begin="${::lbdecor}_${func}_begin"; - - &::LABEL($func,$global?"$begin":"$nmdecor$func"); - $func=$nmdecor.$func; - - push(@out,".globl\t$func\n") if ($global); - if ($::coff) - { push(@out,".def\t$func;\t.scl\t".(3-$global).";\t.type\t32;\t.endef\n"); } - elsif (($::aout and !$::pic) or $::macosx) - { } - else - { push(@out,".type $func,\@function\n"); } - push(@out,".align\t$align\n"); - push(@out,"$func:\n"); - push(@out,"$begin:\n") if ($global); - $::stack=4; -} - -sub ::function_end_B -{ my $func=shift; - push(@out,".size\t$nmdecor$func,.-".&::LABEL($func)."\n") if ($::elf); - $::stack=0; - &::wipe_labels(); -} - -sub ::comment - { - if (!defined($com_start) or $::elf) - { # Regarding $::elf above... - # GNU and SVR4 as'es use different comment delimiters, - push(@out,"\n"); # so we just skip ELF comments... - return; - } - foreach (@_) - { - if (/^\s*$/) - { push(@out,"\n"); } - else - { push(@out,"\t$com_start $_ $com_end\n"); } - } - } - -sub ::external_label -{ foreach(@_) { &::LABEL($_,$nmdecor.$_); } } - -sub ::public_label -{ push(@out,".globl\t".&::LABEL($_[0],$nmdecor.$_[0])."\n"); } - -sub ::file_end -{ if ($::macosx) - { if (%non_lazy_ptr) - { push(@out,".section __IMPORT,__pointers,non_lazy_symbol_pointers\n"); - foreach $i (keys %non_lazy_ptr) - { push(@out,"$non_lazy_ptr{$i}:\n.indirect_symbol\t$i\n.long\t0\n"); } - } - } - if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { - my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8"; - if ($::macosx) { push (@out,"$tmp,2\n"); } - elsif ($::elf) { push (@out,"$tmp,4\n"); } - else { push (@out,"$tmp\n"); } - } - push(@out,$initseg) if ($initseg); -} - -sub ::data_byte { push(@out,".byte\t".join(',',@_)."\n"); } -sub ::data_short{ push(@out,".value\t".join(',',@_)."\n"); } -sub ::data_word { push(@out,".long\t".join(',',@_)."\n"); } - -sub ::align -{ my $val=$_[0],$p2,$i; - if ($::aout) - { for ($p2=0;$val!=0;$val>>=1) { $p2++; } - $val=$p2-1; - $val.=",0x90"; - } - push(@out,".align\t$val\n"); -} - -sub ::picmeup -{ my($dst,$sym,$base,$reflabel)=@_; - - if (($::pic && ($::elf || $::aout)) || $::macosx) - { if (!defined($base)) - { &::call(&::label("PIC_me_up")); - &::set_label("PIC_me_up"); - &::blindpop($dst); - $base=$dst; - $reflabel=&::label("PIC_me_up"); - } - if ($::macosx) - { my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr"); - &::mov($dst,&::DWP("$indirect-$reflabel",$base)); - $non_lazy_ptr{"$nmdecor$sym"}=$indirect; - } - else - { &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", - $base)); - &::mov($dst,&::DWP("$sym\@GOT",$dst)); - } - } - else - { &::lea($dst,&::DWP($sym)); } -} - -sub ::initseg -{ my $f=$nmdecor.shift; - - if ($::android) - { $initseg.=<<___; -.section .init_array -.align 4 -.long $f -___ - } - elsif ($::elf) - { $initseg.=<<___; -.section .init - call $f -___ - } - elsif ($::coff) - { $initseg.=<<___; # applies to both Cygwin and Mingw -.section .ctors -.long $f -___ - } - elsif ($::macosx) - { $initseg.=<<___; -.mod_init_func -.align 2 -.long $f -___ - } - elsif ($::aout) - { my $ctor="${nmdecor}_GLOBAL_\$I\$$f"; - $initseg.=".text\n"; - $initseg.=".type $ctor,\@function\n" if ($::pic); - $initseg.=<<___; # OpenBSD way... -.globl $ctor -.align 2 -$ctor: - jmp $f -___ - } -} - -sub ::dataseg -{ push(@out,".data\n"); } - -1; diff --git a/drivers/builtin_openssl2/crypto/perlasm/x86masm.pl b/drivers/builtin_openssl2/crypto/perlasm/x86masm.pl deleted file mode 100644 index f937d07c87..0000000000 --- a/drivers/builtin_openssl2/crypto/perlasm/x86masm.pl +++ /dev/null @@ -1,198 +0,0 @@ -#!/usr/bin/env perl - -package x86masm; - -*out=\@::out; - -$::lbdecor="\$L"; # local label decoration -$nmdecor="_"; # external name decoration - -$initseg=""; -$segment=""; - -sub ::generic -{ my ($opcode,@arg)=@_; - - # fix hexadecimal constants - for (@arg) { s/(?= 0x02030000\n"); - push(@out,"safeseh ".&::LABEL($nm,$nmdecor.$nm)."\n"); - push(@out,"%endif\n"); -} - -1; diff --git a/drivers/builtin_openssl2/crypto/pkcs12/p12_decr.c b/drivers/builtin_openssl2/crypto/pkcs12/p12_decr.c index d46eae3c66..b40ea10ccb 100644 --- a/drivers/builtin_openssl2/crypto/pkcs12/p12_decr.c +++ b/drivers/builtin_openssl2/crypto/pkcs12/p12_decr.c @@ -171,28 +171,32 @@ ASN1_OCTET_STRING *PKCS12_item_i2d_encrypt(X509_ALGOR *algor, const char *pass, int passlen, void *obj, int zbuf) { - ASN1_OCTET_STRING *oct; + ASN1_OCTET_STRING *oct = NULL; unsigned char *in = NULL; int inlen; if (!(oct = M_ASN1_OCTET_STRING_new())) { PKCS12err(PKCS12_F_PKCS12_ITEM_I2D_ENCRYPT, ERR_R_MALLOC_FAILURE); - return NULL; + goto err; } inlen = ASN1_item_i2d(obj, &in, it); if (!in) { PKCS12err(PKCS12_F_PKCS12_ITEM_I2D_ENCRYPT, PKCS12_R_ENCODE_ERROR); - return NULL; + goto err; } if (!PKCS12_pbe_crypt(algor, pass, passlen, in, inlen, &oct->data, &oct->length, 1)) { PKCS12err(PKCS12_F_PKCS12_ITEM_I2D_ENCRYPT, PKCS12_R_ENCRYPT_ERROR); OPENSSL_free(in); - return NULL; + goto err; } if (zbuf) OPENSSL_cleanse(in, inlen); OPENSSL_free(in); return oct; + err: + if (oct) + ASN1_OCTET_STRING_free(oct); + return NULL; } IMPLEMENT_PKCS12_STACK_OF(PKCS7) diff --git a/drivers/builtin_openssl2/crypto/pkcs12/p12_p8e.c b/drivers/builtin_openssl2/crypto/pkcs12/p12_p8e.c index d970f0544c..861a087f80 100644 --- a/drivers/builtin_openssl2/crypto/pkcs12/p12_p8e.c +++ b/drivers/builtin_openssl2/crypto/pkcs12/p12_p8e.c @@ -76,8 +76,12 @@ X509_SIG *PKCS8_encrypt(int pbe_nid, const EVP_CIPHER *cipher, if (pbe_nid == -1) pbe = PKCS5_pbe2_set(cipher, iter, salt, saltlen); - else + else if (EVP_PBE_find(EVP_PBE_TYPE_PRF, pbe_nid, NULL, NULL, 0)) + pbe = PKCS5_pbe2_set_iv(cipher, iter, salt, saltlen, NULL, pbe_nid); + else { + ERR_clear_error(); pbe = PKCS5_pbe_set(pbe_nid, iter, salt, saltlen); + } if (!pbe) { PKCS12err(PKCS12_F_PKCS8_ENCRYPT, ERR_R_ASN1_LIB); goto err; diff --git a/drivers/builtin_openssl2/crypto/pkcs7/bio_ber.c b/drivers/builtin_openssl2/crypto/pkcs7/bio_ber.c deleted file mode 100644 index 31973fcd1f..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/bio_ber.c +++ /dev/null @@ -1,466 +0,0 @@ -/* crypto/evp/bio_ber.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include "cryptlib.h" -#include -#include - -static int ber_write(BIO *h,char *buf,int num); -static int ber_read(BIO *h,char *buf,int size); -/*static int ber_puts(BIO *h,char *str); */ -/*static int ber_gets(BIO *h,char *str,int size); */ -static long ber_ctrl(BIO *h,int cmd,long arg1,char *arg2); -static int ber_new(BIO *h); -static int ber_free(BIO *data); -static long ber_callback_ctrl(BIO *h,int cmd,void *(*fp)()); -#define BER_BUF_SIZE (32) - -/* This is used to hold the state of the BER objects being read. */ -typedef struct ber_struct - { - int tag; - int class; - long length; - int inf; - int num_left; - int depth; - } BER_CTX; - -typedef struct bio_ber_struct - { - int tag; - int class; - long length; - int inf; - - /* most of the following are used when doing non-blocking IO */ - /* reading */ - long num_left; /* number of bytes still to read/write in block */ - int depth; /* used with indefinite encoding. */ - int finished; /* No more read data */ - - /* writting */ - char *w_addr; - int w_offset; - int w_left; - - int buf_len; - int buf_off; - unsigned char buf[BER_BUF_SIZE]; - } BIO_BER_CTX; - -static BIO_METHOD methods_ber= - { - BIO_TYPE_CIPHER,"cipher", - ber_write, - ber_read, - NULL, /* ber_puts, */ - NULL, /* ber_gets, */ - ber_ctrl, - ber_new, - ber_free, - ber_callback_ctrl, - }; - -BIO_METHOD *BIO_f_ber(void) - { - return(&methods_ber); - } - -static int ber_new(BIO *bi) - { - BIO_BER_CTX *ctx; - - ctx=(BIO_BER_CTX *)OPENSSL_malloc(sizeof(BIO_BER_CTX)); - if (ctx == NULL) return(0); - - memset((char *)ctx,0,sizeof(BIO_BER_CTX)); - - bi->init=0; - bi->ptr=(char *)ctx; - bi->flags=0; - return(1); - } - -static int ber_free(BIO *a) - { - BIO_BER_CTX *b; - - if (a == NULL) return(0); - b=(BIO_BER_CTX *)a->ptr; - OPENSSL_cleanse(a->ptr,sizeof(BIO_BER_CTX)); - OPENSSL_free(a->ptr); - a->ptr=NULL; - a->init=0; - a->flags=0; - return(1); - } - -int bio_ber_get_header(BIO *bio, BIO_BER_CTX *ctx) - { - char buf[64]; - int i,j,n; - int ret; - unsigned char *p; - unsigned long length - int tag; - int class; - long max; - - BIO_clear_retry_flags(b); - - /* Pack the buffer down if there is a hole at the front */ - if (ctx->buf_off != 0) - { - p=ctx->buf; - j=ctx->buf_off; - n=ctx->buf_len-j; - for (i=0; ibuf_len-j; - ctx->buf_off=0; - } - - /* If there is more room, read some more data */ - i=BER_BUF_SIZE-ctx->buf_len; - if (i) - { - i=BIO_read(bio->next_bio,&(ctx->buf[ctx->buf_len]),i); - if (i <= 0) - { - BIO_copy_next_retry(b); - return(i); - } - else - ctx->buf_len+=i; - } - - max=ctx->buf_len; - p=ctx->buf; - ret=ASN1_get_object(&p,&length,&tag,&class,max); - - if (ret & 0x80) - { - if ((ctx->buf_len < BER_BUF_SIZE) && - (ERR_GET_REASON(ERR_peek_error()) == ASN1_R_TOO_LONG)) - { - ERR_clear_error(); /* clear the error */ - BIO_set_retry_read(b); - } - return(-1); - } - - /* We have no error, we have a header, so make use of it */ - - if ((ctx->tag >= 0) && (ctx->tag != tag)) - { - BIOerr(BIO_F_BIO_BER_GET_HEADER,BIO_R_TAG_MISMATCH); - sprintf(buf,"tag=%d, got %d",ctx->tag,tag); - ERR_add_error_data(1,buf); - return(-1); - } - if (ret & 0x01) - if (ret & V_ASN1_CONSTRUCTED) - } - -static int ber_read(BIO *b, char *out, int outl) - { - int ret=0,i,n; - BIO_BER_CTX *ctx; - - BIO_clear_retry_flags(b); - - if (out == NULL) return(0); - ctx=(BIO_BER_CTX *)b->ptr; - - if ((ctx == NULL) || (b->next_bio == NULL)) return(0); - - if (ctx->finished) return(0); - -again: - /* First see if we are half way through reading a block */ - if (ctx->num_left > 0) - { - if (ctx->num_left < outl) - n=ctx->num_left; - else - n=outl; - i=BIO_read(b->next_bio,out,n); - if (i <= 0) - { - BIO_copy_next_retry(b); - return(i); - } - ctx->num_left-=i; - outl-=i; - ret+=i; - if (ctx->num_left <= 0) - { - ctx->depth--; - if (ctx->depth <= 0) - ctx->finished=1; - } - if (outl <= 0) - return(ret); - else - goto again; - } - else /* we need to read another BER header */ - { - } - } - -static int ber_write(BIO *b, char *in, int inl) - { - int ret=0,n,i; - BIO_ENC_CTX *ctx; - - ctx=(BIO_ENC_CTX *)b->ptr; - ret=inl; - - BIO_clear_retry_flags(b); - n=ctx->buf_len-ctx->buf_off; - while (n > 0) - { - i=BIO_write(b->next_bio,&(ctx->buf[ctx->buf_off]),n); - if (i <= 0) - { - BIO_copy_next_retry(b); - return(i); - } - ctx->buf_off+=i; - n-=i; - } - /* at this point all pending data has been written */ - - if ((in == NULL) || (inl <= 0)) return(0); - - ctx->buf_off=0; - while (inl > 0) - { - n=(inl > ENC_BLOCK_SIZE)?ENC_BLOCK_SIZE:inl; - EVP_CipherUpdate(&(ctx->cipher), - (unsigned char *)ctx->buf,&ctx->buf_len, - (unsigned char *)in,n); - inl-=n; - in+=n; - - ctx->buf_off=0; - n=ctx->buf_len; - while (n > 0) - { - i=BIO_write(b->next_bio,&(ctx->buf[ctx->buf_off]),n); - if (i <= 0) - { - BIO_copy_next_retry(b); - return(i); - } - n-=i; - ctx->buf_off+=i; - } - ctx->buf_len=0; - ctx->buf_off=0; - } - BIO_copy_next_retry(b); - return(ret); - } - -static long ber_ctrl(BIO *b, int cmd, long num, char *ptr) - { - BIO *dbio; - BIO_ENC_CTX *ctx,*dctx; - long ret=1; - int i; - - ctx=(BIO_ENC_CTX *)b->ptr; - - switch (cmd) - { - case BIO_CTRL_RESET: - ctx->ok=1; - ctx->finished=0; - EVP_CipherInit_ex(&(ctx->cipher),NULL,NULL,NULL,NULL, - ctx->cipher.berrypt); - ret=BIO_ctrl(b->next_bio,cmd,num,ptr); - break; - case BIO_CTRL_EOF: /* More to read */ - if (ctx->cont <= 0) - ret=1; - else - ret=BIO_ctrl(b->next_bio,cmd,num,ptr); - break; - case BIO_CTRL_WPENDING: - ret=ctx->buf_len-ctx->buf_off; - if (ret <= 0) - ret=BIO_ctrl(b->next_bio,cmd,num,ptr); - break; - case BIO_CTRL_PENDING: /* More to read in buffer */ - ret=ctx->buf_len-ctx->buf_off; - if (ret <= 0) - ret=BIO_ctrl(b->next_bio,cmd,num,ptr); - break; - case BIO_CTRL_FLUSH: - /* do a final write */ -again: - while (ctx->buf_len != ctx->buf_off) - { - i=ber_write(b,NULL,0); - if (i < 0) - { - ret=i; - break; - } - } - - if (!ctx->finished) - { - ctx->finished=1; - ctx->buf_off=0; - ret=EVP_CipherFinal_ex(&(ctx->cipher), - (unsigned char *)ctx->buf, - &(ctx->buf_len)); - ctx->ok=(int)ret; - if (ret <= 0) break; - - /* push out the bytes */ - goto again; - } - - /* Finally flush the underlying BIO */ - ret=BIO_ctrl(b->next_bio,cmd,num,ptr); - break; - case BIO_C_GET_CIPHER_STATUS: - ret=(long)ctx->ok; - break; - case BIO_C_DO_STATE_MACHINE: - BIO_clear_retry_flags(b); - ret=BIO_ctrl(b->next_bio,cmd,num,ptr); - BIO_copy_next_retry(b); - break; - - case BIO_CTRL_DUP: - dbio=(BIO *)ptr; - dctx=(BIO_ENC_CTX *)dbio->ptr; - memcpy(&(dctx->cipher),&(ctx->cipher),sizeof(ctx->cipher)); - dbio->init=1; - break; - default: - ret=BIO_ctrl(b->next_bio,cmd,num,ptr); - break; - } - return(ret); - } - -static long ber_callback_ctrl(BIO *b, int cmd, void *(*fp)()) - { - long ret=1; - - if (b->next_bio == NULL) return(0); - switch (cmd) - { - default: - ret=BIO_callback_ctrl(b->next_bio,cmd,fp); - break; - } - return(ret); - } - -/* -void BIO_set_cipher_ctx(b,c) -BIO *b; -EVP_CIPHER_ctx *c; - { - if (b == NULL) return; - - if ((b->callback != NULL) && - (b->callback(b,BIO_CB_CTRL,(char *)c,BIO_CTRL_SET,e,0L) <= 0)) - return; - - b->init=1; - ctx=(BIO_ENC_CTX *)b->ptr; - memcpy(ctx->cipher,c,sizeof(EVP_CIPHER_CTX)); - - if (b->callback != NULL) - b->callback(b,BIO_CB_CTRL,(char *)c,BIO_CTRL_SET,e,1L); - } -*/ - -void BIO_set_cipher(BIO *b, EVP_CIPHER *c, unsigned char *k, unsigned char *i, - int e) - { - BIO_ENC_CTX *ctx; - - if (b == NULL) return; - - if ((b->callback != NULL) && - (b->callback(b,BIO_CB_CTRL,(char *)c,BIO_CTRL_SET,e,0L) <= 0)) - return; - - b->init=1; - ctx=(BIO_ENC_CTX *)b->ptr; - EVP_CipherInit_ex(&(ctx->cipher),c,NULL,k,i,e); - - if (b->callback != NULL) - b->callback(b,BIO_CB_CTRL,(char *)c,BIO_CTRL_SET,e,1L); - } - diff --git a/drivers/builtin_openssl2/crypto/pkcs7/dec.c b/drivers/builtin_openssl2/crypto/pkcs7/dec.c deleted file mode 100644 index 6752ec568a..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/dec.c +++ /dev/null @@ -1,248 +0,0 @@ -/* crypto/pkcs7/verify.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ -#include -#include -#include -#include -#include -#include -#include -#include - -int verify_callback(int ok, X509_STORE_CTX *ctx); - -BIO *bio_err=NULL; - -int main(argc,argv) -int argc; -char *argv[]; - { - char *keyfile=NULL; - BIO *in; - EVP_PKEY *pkey; - X509 *x509; - PKCS7 *p7; - PKCS7_SIGNER_INFO *si; - X509_STORE_CTX cert_ctx; - X509_STORE *cert_store=NULL; - BIO *data,*detached=NULL,*p7bio=NULL; - char buf[1024*4]; - unsigned char *pp; - int i,printit=0; - STACK_OF(PKCS7_SIGNER_INFO) *sk; - - OpenSSL_add_all_algorithms(); - bio_err=BIO_new_fp(stderr,BIO_NOCLOSE); - - data=BIO_new(BIO_s_file()); - pp=NULL; - while (argc > 1) - { - argc--; - argv++; - if (strcmp(argv[0],"-p") == 0) - { - printit=1; - } - else if ((strcmp(argv[0],"-k") == 0) && (argc >= 2)) { - keyfile = argv[1]; - argc-=1; - argv+=1; - } else if ((strcmp(argv[0],"-d") == 0) && (argc >= 2)) - { - detached=BIO_new(BIO_s_file()); - if (!BIO_read_filename(detached,argv[1])) - goto err; - argc-=1; - argv+=1; - } - else break; - } - - if (!BIO_read_filename(data,argv[0])) goto err; - - if(!keyfile) { - fprintf(stderr, "No private key file specified\n"); - goto err; - } - - if ((in=BIO_new_file(keyfile,"r")) == NULL) goto err; - if ((x509=PEM_read_bio_X509(in,NULL,NULL,NULL)) == NULL) goto err; - BIO_reset(in); - if ((pkey=PEM_read_bio_PrivateKey(in,NULL,NULL,NULL)) == NULL) - goto err; - BIO_free(in); - - if (pp == NULL) - BIO_set_fp(data,stdin,BIO_NOCLOSE); - - - /* Load the PKCS7 object from a file */ - if ((p7=PEM_read_bio_PKCS7(data,NULL,NULL,NULL)) == NULL) goto err; - - - - /* This stuff is being setup for certificate verification. - * When using SSL, it could be replaced with a - * cert_stre=SSL_CTX_get_cert_store(ssl_ctx); */ - cert_store=X509_STORE_new(); - X509_STORE_set_default_paths(cert_store); - X509_STORE_load_locations(cert_store,NULL,"../../certs"); - X509_STORE_set_verify_cb_func(cert_store,verify_callback); - - ERR_clear_error(); - - /* We need to process the data */ - /* We cannot support detached encryption */ - p7bio=PKCS7_dataDecode(p7,pkey,detached,x509); - - if (p7bio == NULL) - { - printf("problems decoding\n"); - goto err; - } - - /* We now have to 'read' from p7bio to calculate digests etc. */ - for (;;) - { - i=BIO_read(p7bio,buf,sizeof(buf)); - /* print it? */ - if (i <= 0) break; - fwrite(buf,1, i, stdout); - } - - /* We can now verify signatures */ - sk=PKCS7_get_signer_info(p7); - if (sk == NULL) - { - fprintf(stderr, "there are no signatures on this data\n"); - } - else - { - /* Ok, first we need to, for each subject entry, - * see if we can verify */ - ERR_clear_error(); - for (i=0; ierror) - { - case X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT: - X509_NAME_oneline(X509_get_issuer_name(ctx->current_cert),buf,256); - BIO_printf(bio_err,"issuer= %s\n",buf); - break; - case X509_V_ERR_CERT_NOT_YET_VALID: - case X509_V_ERR_ERROR_IN_CERT_NOT_BEFORE_FIELD: - BIO_printf(bio_err,"notBefore="); - ASN1_UTCTIME_print(bio_err,X509_get_notBefore(ctx->current_cert)); - BIO_printf(bio_err,"\n"); - break; - case X509_V_ERR_CERT_HAS_EXPIRED: - case X509_V_ERR_ERROR_IN_CERT_NOT_AFTER_FIELD: - BIO_printf(bio_err,"notAfter="); - ASN1_UTCTIME_print(bio_err,X509_get_notAfter(ctx->current_cert)); - BIO_printf(bio_err,"\n"); - break; - } - BIO_printf(bio_err,"verify return:%d\n",ok); - return(ok); - } diff --git a/drivers/builtin_openssl2/crypto/pkcs7/des.pem b/drivers/builtin_openssl2/crypto/pkcs7/des.pem deleted file mode 100644 index 62d1657e3e..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/des.pem +++ /dev/null @@ -1,15 +0,0 @@ - -MIAGCSqGSIb3DQEHA6CAMIACAQAxggHmMIHwAgEAMIGZMIGSMQswCQYDVQQGEwJBVTETMBEG -A1UECBMKUXVlZW5zbGFuZDERMA8GA1UEBxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29m -dCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNUUkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQD -ExJERU1PIFpFUk8gVkFMVUUgQ0ECAgR+MA0GCSqGSIb3DQEBAQUABEC2vXI1xQDW6lUHM3zQ -/9uBEBOO5A3TtkrklAXq7v01gsIC21t52qSk36REXY+slhNZ0OQ349tgkTsoETHFLoEwMIHw -AgEAMIGZMIGSMQswCQYDVQQGEwJBVTETMBEGA1UECBMKUXVlZW5zbGFuZDERMA8GA1UEBxMI -QnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29mdCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNU -UkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQDExJERU1PIFpFUk8gVkFMVUUgQ0ECAgR9MA0G -CSqGSIb3DQEBAQUABEB8ujxbabxXUYJhopuDm3oDq4JNqX6Io4p3ro+ShqfIndsXTZ1v5a2N -WtLLCWlHn/habjBwZ/DgQgcKASbZ7QxNMIAGCSqGSIb3DQEHATAaBggqhkiG9w0DAjAOAgIA -oAQIbsL5v1wX98KggAQoAaJ4WHm68fXY1WE5OIjfVBIDpO1K+i8dmKhjnAjrjoyZ9Bwc8rDL -lgQg4CXb805h5xl+GfvSwUaHJayte1m2mcOhs3J2YyqbQ+MEIMIiJQccmhO3oDKm36CFvYR8 -5PjpclVcZyX2ngbwPFMnBAgy0clOAE6UKAAAAAAAAAAAAAA= - diff --git a/drivers/builtin_openssl2/crypto/pkcs7/doc b/drivers/builtin_openssl2/crypto/pkcs7/doc deleted file mode 100644 index d2e8b7b2a3..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/doc +++ /dev/null @@ -1,24 +0,0 @@ -int PKCS7_set_content_type(PKCS7 *p7, int type); -Call to set the type of PKCS7 object we are working on - -int PKCS7_SIGNER_INFO_set(PKCS7_SIGNER_INFO *p7i, X509 *x509, EVP_PKEY *pkey, - EVP_MD *dgst); -Use this to setup a signer info -There will also be functions to add signed and unsigned attributes. - -int PKCS7_add_signer(PKCS7 *p7, PKCS7_SIGNER_INFO *p7i); -Add a signer info to the content. - -int PKCS7_add_certificae(PKCS7 *p7, X509 *x509); -int PKCS7_add_crl(PKCS7 *p7, X509_CRL *x509); - ----- - -p7=PKCS7_new(); -PKCS7_set_content_type(p7,NID_pkcs7_signed); - -signer=PKCS7_SINGNER_INFO_new(); -PKCS7_SIGNER_INFO_set(signer,x509,pkey,EVP_md5()); -PKCS7_add_signer(py,signer); - -we are now setup. diff --git a/drivers/builtin_openssl2/crypto/pkcs7/enc.c b/drivers/builtin_openssl2/crypto/pkcs7/enc.c deleted file mode 100644 index 7417f8a4e0..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/enc.c +++ /dev/null @@ -1,174 +0,0 @@ -/* crypto/pkcs7/enc.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ -#include -#include -#include -#include -#include -#include - -int main(argc,argv) -int argc; -char *argv[]; - { - X509 *x509; - PKCS7 *p7; - BIO *in; - BIO *data,*p7bio; - char buf[1024*4]; - int i; - int nodetach=1; - char *keyfile = NULL; - const EVP_CIPHER *cipher=NULL; - STACK_OF(X509) *recips=NULL; - - OpenSSL_add_all_algorithms(); - - data=BIO_new(BIO_s_file()); - while(argc > 1) - { - if (strcmp(argv[1],"-nd") == 0) - { - nodetach=1; - argv++; argc--; - } - else if ((strcmp(argv[1],"-c") == 0) && (argc >= 2)) { - if(!(cipher = EVP_get_cipherbyname(argv[2]))) { - fprintf(stderr, "Unknown cipher %s\n", argv[2]); - goto err; - } - argc-=2; - argv+=2; - } else if ((strcmp(argv[1],"-k") == 0) && (argc >= 2)) { - keyfile = argv[2]; - argc-=2; - argv+=2; - if (!(in=BIO_new_file(keyfile,"r"))) goto err; - if (!(x509=PEM_read_bio_X509(in,NULL,NULL,NULL))) - goto err; - if(!recips) recips = sk_X509_new_null(); - sk_X509_push(recips, x509); - BIO_free(in); - } else break; - } - - if(!recips) { - fprintf(stderr, "No recipients\n"); - goto err; - } - - if (!BIO_read_filename(data,argv[1])) goto err; - - p7=PKCS7_new(); -#if 0 - BIO_reset(in); - if ((pkey=PEM_read_bio_PrivateKey(in,NULL,NULL)) == NULL) goto err; - BIO_free(in); - PKCS7_set_type(p7,NID_pkcs7_signedAndEnveloped); - - if (PKCS7_add_signature(p7,x509,pkey,EVP_sha1()) == NULL) goto err; - /* we may want to add more */ - PKCS7_add_certificate(p7,x509); -#else - PKCS7_set_type(p7,NID_pkcs7_enveloped); -#endif - if(!cipher) { -#ifndef OPENSSL_NO_DES - cipher = EVP_des_ede3_cbc(); -#else - fprintf(stderr, "No cipher selected\n"); - goto err; -#endif - } - - if (!PKCS7_set_cipher(p7,cipher)) goto err; - for(i = 0; i < sk_X509_num(recips); i++) { - if (!PKCS7_add_recipient(p7,sk_X509_value(recips, i))) goto err; - } - sk_X509_pop_free(recips, X509_free); - - /* Set the content of the signed to 'data' */ - /* PKCS7_content_new(p7,NID_pkcs7_data); not used in envelope */ - - /* could be used, but not in this version :-) - if (!nodetach) PKCS7_set_detached(p7,1); - */ - - if ((p7bio=PKCS7_dataInit(p7,NULL)) == NULL) goto err; - - for (;;) - { - i=BIO_read(data,buf,sizeof(buf)); - if (i <= 0) break; - BIO_write(p7bio,buf,i); - } - BIO_flush(p7bio); - - if (!PKCS7_dataFinal(p7,p7bio)) goto err; - BIO_free(p7bio); - - PEM_write_PKCS7(stdout,p7); - PKCS7_free(p7); - - exit(0); -err: - ERR_load_crypto_strings(); - ERR_print_errors_fp(stderr); - exit(1); - } - diff --git a/drivers/builtin_openssl2/crypto/pkcs7/es1.pem b/drivers/builtin_openssl2/crypto/pkcs7/es1.pem deleted file mode 100644 index 47112a238f..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/es1.pem +++ /dev/null @@ -1,66 +0,0 @@ ------BEGIN PKCS7----- -MIAGCSqGSIb3DQEHA6CAMIACAQAxggHmMIHwAgEAMIGZMIGSMQswCQYDVQQGEwJBVTETMBEG -A1UECBMKUXVlZW5zbGFuZDERMA8GA1UEBxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29m -dCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNUUkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQD -ExJERU1PIFpFUk8gVkFMVUUgQ0ECAgRuMA0GCSqGSIb3DQEBAQUABEDWak0y/5XZJhQJeCLo -KECcHXkTEbjzYkYNHIinbiPmRK4QbNfs9z2mA3z/c2ykQ4eAqFR2jyNrUMN/+I5XEiv6MIHw -AgEAMIGZMIGSMQswCQYDVQQGEwJBVTETMBEGA1UECBMKUXVlZW5zbGFuZDERMA8GA1UEBxMI -QnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29mdCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNU -UkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQDExJERU1PIFpFUk8gVkFMVUUgQ0ECAgR9MA0G -CSqGSIb3DQEBAQUABEAWg9+KgtCjc77Jdj1Ve4wGgHjVHbbSYEA1ZqKFDoi15vSr9hfpHmC4 -ycZzcRo16JkTfolefiHZzmyjVz94vSN6MIAGCSqGSIb3DQEHATAaBggqhkiG9w0DAjAOAgIA -oAQI7X4Tk4mcbV6ggASBsHl1mCaJ3RhXWlNPCgCRU53d7M5x6TDZRkvwdtdvW96m1lupT03F -XtonkBqk7oMkH7kGfs5/REQOPjx0QE2Ixmgt1W3szum82EZwA7pZNppcraK7W/odw/7bYZO+ -II3HPmRklE2N9qiu1LPaPUsnYogkO6SennyeL5tZ382vBweL/8pnG0qsbT1OBb65v+llnsjT -pa1T/p+fIx/iJJGE6K9fYFokC6gXLQ6ozXRdOu5oBDB8mPCYYvAqKycidM/MrGGUkpEtS4f0 -lS31PwQi5YTim8Ig3/TOwVpPX32i46FTuEIEIMHkD/OvpfwCCzXUHHJnKnKUAUvIsSY3vGBs -8ezpUDfBBBj9LHDy32hZ2tQilkDefP5VM2LLdrWgamYEgfiyITQvn08Ul5lQOQxbFKBheFq5 -otCCN4MR+w5eq12xQu6y+f9z0159ag2ru87D0lLtUtXXtCELbO1nUkT2sJ0k/iDs9TOXr6Cx -go1XKYho83hlkXYiCteVizdAbgVGNsNRD4wtIdajsorET/LuJECgp11YeL9w1dlDB0HLEZfi -XCsUphH4jGagba3hDeUSibnjSiJlN0ukfuQurBBbI2UkBAujiEAubKPn7C1FZJRSw6CPPX5t -KEpmcqT1JNk6LO8Js6/1sCmmBh1VGCy1+EuTI9J1p7Dagf4nQ8cHitoCRpHuKZlFHnZyv7tw -Rn/KOhHaYP2VzAh40gQIvKMAAWh9oFsEEIMwIoOmLwLH5wf+8QdbDhoECH8HwZt9a12dBAjL -r4j2zlvtfgQIt7nmEM3wz1EECKlc3EIy1irCBBCAKINcermK3A+jI6ISN2RzBFA3dsh/xwMu -l61aWMBBZzEz/SF92k6n35KZhCC0d6fIVC/1WMv0fnCwQ8oEDynSre216VEFiYKBaQLJe5o/ -mTAxC7Ht3goXnuc+i1FItOkLrgRI/wyvTICEn2WsNZiMADnGaee2bqPnUopo+VMGexJEtCPk -l0ZNlDJGquPDkpUwaEtecVZzCNyVPYyyF4J/l8rmGDhDdYUIC8IKBEg/ip/E0BuubBLWVbv+ -HRl4QrnGpyCyeXRXXK603QP3sT1Zbbm1v5pI/loOhVHi724LmtXHSyp5qv9MDcxE1PoX10LY -gBRtlwwESPeCF8bK5jk4xIQMhK5NMHj1Y1KQWTZ9NGITBL4hjRq2qp4Qk5GIpGgOVPopAuCo -TIyPikpqBRNtLSPRSsDs6QPUPzWBh6JgxwRQblnDKKUkxUcnJiD4i9QtGa/ZabMn4KxtNOBL -5JSh1nJkaLXCZY070131WWPAByLcd5TiXq8x84pmzV5NNk4tiMpoXhJNsx8e4rskQQlKd6ME -SCe2eYDHKcKPX3WJbUzhrJSQ92/aWnI2iUY8WQ+kSNyiZ2QUjyuUg9Z66g/0d2STlvPOBHT/ -y5ODP2CwbcWX4QmCbUc9TT66fQRIrRVuwvtOfnUueyGgYhJ3HpAJfVaB/7kap5bj7Fi/azW4 -9JDfd1bC/W9h0Kyk7RO2gxvE0hIHc26mZJHTm9MNP5D328MnM2MdBEjKjQBtgrp+lFIii7MP -nGHFTKUkG4WAIZJCf/CsT+p6/SW0qG71Me/YcSw5STB24j+a+HgMV8RVIeUlkP4z0IWWrSoB -Gh4d/Z0EUMCVHs/HZ/bWgiyhtHpvuVAzidm8D81p1LJ5BQX5/5f/m+q5+fS/npL27dTEbNqs -LSB6ij3MZAi7LwHWpTn9zWnDajCMEj9vlaV7mcKtHK5iBEg85agFi1h3MvicqLtoFe5hVv9T -tG0j6CRkjkixPzivltlrf44KHv14gLM0XJxCGyq7vd3l8QYr3+9at0zNnX/yqTiBnsnE5dUE -SIgrYuz87M2gi/ER9PcDoTtONH3+CkcqVy03q/Sj8cVWD/b1KgEhqnNOfc8Ak9PctyR/ItcR -8Me5XVn1GJKkQJk4O29fxvgNoAQIrIESvUWGshAEQByXiFoFTDUByjTlgjcy77H1lrH+y3P/ -wAInJjJAut9kCNyGJV0PA4kdPB5USWltuO6t8gk4Pd2YBMl09zqUWkAEUCjFrtZ3mapjcGZI -uQTASKR5LSjXoWxTT5gae/+64MerF/oCEeO3ehRTpjnPrsiRDo0rWIQTaj9+Nro8Z2xtWstw -RnfoAHIxV1lEamPwjsceBEi2SD9hiifFeO5ECiVoaE1FdXUXhU+jwYAMx6jHWO9hMkYzS9pM -Y3IyWR5ybtOjiQgkUdvRJPUPGf5DVVMPnymGX25aDh5PYpIESPbsM9akCpOOVuscywcUswmU -o7dXvlB48WWCfg/al3BQKAZbn5ZXtWNwpUZkrEdHsrxAVv3rxRcdkT3Z1fzUbIuYkLJN200o -WgRIJvn6RO8KEj7/HOg2sYuuM8nz1kR0TSgwX7/0y/7JfjBa0JIlP7o75sNJscE8oyoIMzuy -Dvn6/U9g3BCDXn83A/s+ke60qn9gBFC6NAeLOlXal1YVWYhMQNOqCyUfAjiXBTawaysQb1Mk -YgeNlF8xuEFcUQWIP+vNG7FJ5JPMaMRL4YEoaQ3sVFhYOERJR1cSb+8xt4QCYtBKQgRIUOmJ -CHW5o1hXJWJiTkZK2qWFcEMzTINSj5EpYFySr8aVBjkRnI7vxegRT/+XZZXoYedQ3UNsnGI3 -DdkWii5VzX0PNF6C60pfBEiVpausYuX7Wjb3Lfm8cBj7GgN69i6Pm2gxtobVcmpo2nS4D714 -ePyhlX9n8kJ6QAcqWMRj22smDPrHVGNTizfzHBh5zNllK9gESJizILOWI327og3ZWp+qUht5 -kNDJCzMK7Z09UAy+h+vq0VTQuEo3FgLzVdqkJujjSL4Nx97lXg51AovrEn3nd4evydwcjKLX -1wRIo72NaeWuUEQ+rt1SlCsOJ7k1ioJSqhrPOfvwcaFcb4beVet1JWiy4yvowTjLDGbUje2s -xjrlVt4BJWI/uA6jbQsrxSe89ADZBAi5YAlR4qszeAQIXD3VSBVKbRUECNTtyvw9vvqXBAhb -IZNn4H4cxgQI+XW7GkfL+ekECCCCg2reMyGDBAh1PYqkg3lw3gQQkNlggEPU+BH8eh7Gm7n7 -7AQIjC5EWbkil5cEEKcpuqwTWww/X89KnQAg8TcECJPomqHvrlZFBBiRSuIiHpmN+PaujXpv -qZV2VhjkB2j09GEECOIdv8AVOJgKBAjlHgIqAD9jZQQIXHbs44+wogcEIGGqTACRJxrhMcMG -X8drNjksIPt+snxTXUBIkTVpZWoABAh6unXPTyIr8QQgBF8xKoX27MWk7iTNmkSNZggZXa2a -DWCGHSYLngbSOHIECD9XmO6VsvTgBAjfqB70CEW4WwQIVIBkbCocznUEEHB/zFXy/sR4OYHe -UfbNPnIEEDWBB/NTCLMGE+o8BfyujcAECFik7GQnnF9VBBAhLXExQeWAofZNc6NtN7qZBCC1 -gVIS3ruTwKltmcrgx3heT3M8ZJhCfWa+6KzchnmKygQQ+1NL5sSzR4m/fdrqxHFyUAQYCT2x -PamQr3wK3h0lyZER+4H0zPM86AhFBBC3CkmvL2vjflMfujnzPBVpBBge9rMbI5+0q9DLrTiT -5F3AIgXLpD8PQWAECHkHVo6RomV3BAgMbi8E271UeAQIqtS8wnI3XngECG3TWmOMb3/iBEha -y+mvCS6I3n3JfL8e1B5P4qX9/czJRaERLuKpGNjLiL4A+zxN0LZ0UHd0qfmJjwOTxAx3iJAC -lGXX4nB9ATYPUT5EU+o1Y4sECN01pP6vWNIdBDAsiE0Ts8/9ltJlqX2B3AoOM4qOt9EaCjXf -lB+aEmrhtjUwuZ6GqS5Ke7P6XnakTk4ECCLIMatNdootAAAAAAAAAAAAAA== ------END PKCS7----- diff --git a/drivers/builtin_openssl2/crypto/pkcs7/example.c b/drivers/builtin_openssl2/crypto/pkcs7/example.c deleted file mode 100644 index 2953d04b5c..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/example.c +++ /dev/null @@ -1,329 +0,0 @@ -#include -#include -#include -#include -#include -#include - -int add_signed_time(PKCS7_SIGNER_INFO *si) - { - ASN1_UTCTIME *sign_time; - - /* The last parameter is the amount to add/subtract from the current - * time (in seconds) */ - sign_time=X509_gmtime_adj(NULL,0); - PKCS7_add_signed_attribute(si,NID_pkcs9_signingTime, - V_ASN1_UTCTIME,(char *)sign_time); - return(1); - } - -ASN1_UTCTIME *get_signed_time(PKCS7_SIGNER_INFO *si) - { - ASN1_TYPE *so; - - so=PKCS7_get_signed_attribute(si,NID_pkcs9_signingTime); - if (so->type == V_ASN1_UTCTIME) - return so->value.utctime; - return NULL; - } - -static int signed_string_nid= -1; - -void add_signed_string(PKCS7_SIGNER_INFO *si, char *str) - { - ASN1_OCTET_STRING *os; - - /* To a an object of OID 1.2.3.4.5, which is an octet string */ - if (signed_string_nid == -1) - signed_string_nid= - OBJ_create("1.2.3.4.5","OID_example","Our example OID"); - os=ASN1_OCTET_STRING_new(); - ASN1_OCTET_STRING_set(os,(unsigned char*)str,strlen(str)); - /* When we add, we do not free */ - PKCS7_add_signed_attribute(si,signed_string_nid, - V_ASN1_OCTET_STRING,(char *)os); - } - -int get_signed_string(PKCS7_SIGNER_INFO *si, char *buf, int len) - { - ASN1_TYPE *so; - ASN1_OCTET_STRING *os; - int i; - - if (signed_string_nid == -1) - signed_string_nid= - OBJ_create("1.2.3.4.5","OID_example","Our example OID"); - /* To retrieve */ - so=PKCS7_get_signed_attribute(si,signed_string_nid); - if (so != NULL) - { - if (so->type == V_ASN1_OCTET_STRING) - { - os=so->value.octet_string; - i=os->length; - if ((i+1) > len) - i=len-1; - memcpy(buf,os->data,i); - return(i); - } - } - return(0); - } - -static int signed_seq2string_nid= -1; -/* ########################################### */ -int add_signed_seq2string(PKCS7_SIGNER_INFO *si, char *str1, char *str2) - { - /* To add an object of OID 1.9.999, which is a sequence containing - * 2 octet strings */ - unsigned char *p; - ASN1_OCTET_STRING *os1,*os2; - ASN1_STRING *seq; - unsigned char *data; - int i,total; - - if (signed_seq2string_nid == -1) - signed_seq2string_nid= - OBJ_create("1.9.9999","OID_example","Our example OID"); - - os1=ASN1_OCTET_STRING_new(); - os2=ASN1_OCTET_STRING_new(); - ASN1_OCTET_STRING_set(os1,(unsigned char*)str1,strlen(str1)); - ASN1_OCTET_STRING_set(os2,(unsigned char*)str1,strlen(str1)); - i =i2d_ASN1_OCTET_STRING(os1,NULL); - i+=i2d_ASN1_OCTET_STRING(os2,NULL); - total=ASN1_object_size(1,i,V_ASN1_SEQUENCE); - - data=malloc(total); - p=data; - ASN1_put_object(&p,1,i,V_ASN1_SEQUENCE,V_ASN1_UNIVERSAL); - i2d_ASN1_OCTET_STRING(os1,&p); - i2d_ASN1_OCTET_STRING(os2,&p); - - seq=ASN1_STRING_new(); - ASN1_STRING_set(seq,data,total); - free(data); - ASN1_OCTET_STRING_free(os1); - ASN1_OCTET_STRING_free(os2); - - PKCS7_add_signed_attribute(si,signed_seq2string_nid, - V_ASN1_SEQUENCE,(char *)seq); - return(1); - } - -/* For this case, I will malloc the return strings */ -int get_signed_seq2string(PKCS7_SIGNER_INFO *si, char **str1, char **str2) - { - ASN1_TYPE *so; - - if (signed_seq2string_nid == -1) - signed_seq2string_nid= - OBJ_create("1.9.9999","OID_example","Our example OID"); - /* To retrieve */ - so=PKCS7_get_signed_attribute(si,signed_seq2string_nid); - if (so && (so->type == V_ASN1_SEQUENCE)) - { - ASN1_const_CTX c; - ASN1_STRING *s; - long length; - ASN1_OCTET_STRING *os1,*os2; - - s=so->value.sequence; - c.p=ASN1_STRING_data(s); - c.max=c.p+ASN1_STRING_length(s); - if (!asn1_GetSequence(&c,&length)) goto err; - /* Length is the length of the seqence */ - - c.q=c.p; - if ((os1=d2i_ASN1_OCTET_STRING(NULL,&c.p,c.slen)) == NULL) - goto err; - c.slen-=(c.p-c.q); - - c.q=c.p; - if ((os2=d2i_ASN1_OCTET_STRING(NULL,&c.p,c.slen)) == NULL) - goto err; - c.slen-=(c.p-c.q); - - if (!asn1_const_Finish(&c)) goto err; - *str1=malloc(os1->length+1); - *str2=malloc(os2->length+1); - memcpy(*str1,os1->data,os1->length); - memcpy(*str2,os2->data,os2->length); - (*str1)[os1->length]='\0'; - (*str2)[os2->length]='\0'; - ASN1_OCTET_STRING_free(os1); - ASN1_OCTET_STRING_free(os2); - return(1); - } -err: - return(0); - } - - -/* ####################################### - * THE OTHER WAY TO DO THINGS - * ####################################### - */ -X509_ATTRIBUTE *create_time(void) - { - ASN1_UTCTIME *sign_time; - X509_ATTRIBUTE *ret; - - /* The last parameter is the amount to add/subtract from the current - * time (in seconds) */ - sign_time=X509_gmtime_adj(NULL,0); - ret=X509_ATTRIBUTE_create(NID_pkcs9_signingTime, - V_ASN1_UTCTIME,(char *)sign_time); - return(ret); - } - -ASN1_UTCTIME *sk_get_time(STACK_OF(X509_ATTRIBUTE) *sk) - { - ASN1_TYPE *so; - PKCS7_SIGNER_INFO si; - - si.auth_attr=sk; - so=PKCS7_get_signed_attribute(&si,NID_pkcs9_signingTime); - if (so->type == V_ASN1_UTCTIME) - return so->value.utctime; - return NULL; - } - -X509_ATTRIBUTE *create_string(char *str) - { - ASN1_OCTET_STRING *os; - X509_ATTRIBUTE *ret; - - /* To a an object of OID 1.2.3.4.5, which is an octet string */ - if (signed_string_nid == -1) - signed_string_nid= - OBJ_create("1.2.3.4.5","OID_example","Our example OID"); - os=ASN1_OCTET_STRING_new(); - ASN1_OCTET_STRING_set(os,(unsigned char*)str,strlen(str)); - /* When we add, we do not free */ - ret=X509_ATTRIBUTE_create(signed_string_nid, - V_ASN1_OCTET_STRING,(char *)os); - return(ret); - } - -int sk_get_string(STACK_OF(X509_ATTRIBUTE) *sk, char *buf, int len) - { - ASN1_TYPE *so; - ASN1_OCTET_STRING *os; - int i; - PKCS7_SIGNER_INFO si; - - si.auth_attr=sk; - - if (signed_string_nid == -1) - signed_string_nid= - OBJ_create("1.2.3.4.5","OID_example","Our example OID"); - /* To retrieve */ - so=PKCS7_get_signed_attribute(&si,signed_string_nid); - if (so != NULL) - { - if (so->type == V_ASN1_OCTET_STRING) - { - os=so->value.octet_string; - i=os->length; - if ((i+1) > len) - i=len-1; - memcpy(buf,os->data,i); - return(i); - } - } - return(0); - } - -X509_ATTRIBUTE *add_seq2string(PKCS7_SIGNER_INFO *si, char *str1, char *str2) - { - /* To add an object of OID 1.9.999, which is a sequence containing - * 2 octet strings */ - unsigned char *p; - ASN1_OCTET_STRING *os1,*os2; - ASN1_STRING *seq; - X509_ATTRIBUTE *ret; - unsigned char *data; - int i,total; - - if (signed_seq2string_nid == -1) - signed_seq2string_nid= - OBJ_create("1.9.9999","OID_example","Our example OID"); - - os1=ASN1_OCTET_STRING_new(); - os2=ASN1_OCTET_STRING_new(); - ASN1_OCTET_STRING_set(os1,(unsigned char*)str1,strlen(str1)); - ASN1_OCTET_STRING_set(os2,(unsigned char*)str1,strlen(str1)); - i =i2d_ASN1_OCTET_STRING(os1,NULL); - i+=i2d_ASN1_OCTET_STRING(os2,NULL); - total=ASN1_object_size(1,i,V_ASN1_SEQUENCE); - - data=malloc(total); - p=data; - ASN1_put_object(&p,1,i,V_ASN1_SEQUENCE,V_ASN1_UNIVERSAL); - i2d_ASN1_OCTET_STRING(os1,&p); - i2d_ASN1_OCTET_STRING(os2,&p); - - seq=ASN1_STRING_new(); - ASN1_STRING_set(seq,data,total); - free(data); - ASN1_OCTET_STRING_free(os1); - ASN1_OCTET_STRING_free(os2); - - ret=X509_ATTRIBUTE_create(signed_seq2string_nid, - V_ASN1_SEQUENCE,(char *)seq); - return(ret); - } - -/* For this case, I will malloc the return strings */ -int sk_get_seq2string(STACK_OF(X509_ATTRIBUTE) *sk, char **str1, char **str2) - { - ASN1_TYPE *so; - PKCS7_SIGNER_INFO si; - - if (signed_seq2string_nid == -1) - signed_seq2string_nid= - OBJ_create("1.9.9999","OID_example","Our example OID"); - - si.auth_attr=sk; - /* To retrieve */ - so=PKCS7_get_signed_attribute(&si,signed_seq2string_nid); - if (so->type == V_ASN1_SEQUENCE) - { - ASN1_const_CTX c; - ASN1_STRING *s; - long length; - ASN1_OCTET_STRING *os1,*os2; - - s=so->value.sequence; - c.p=ASN1_STRING_data(s); - c.max=c.p+ASN1_STRING_length(s); - if (!asn1_GetSequence(&c,&length)) goto err; - /* Length is the length of the seqence */ - - c.q=c.p; - if ((os1=d2i_ASN1_OCTET_STRING(NULL,&c.p,c.slen)) == NULL) - goto err; - c.slen-=(c.p-c.q); - - c.q=c.p; - if ((os2=d2i_ASN1_OCTET_STRING(NULL,&c.p,c.slen)) == NULL) - goto err; - c.slen-=(c.p-c.q); - - if (!asn1_const_Finish(&c)) goto err; - *str1=malloc(os1->length+1); - *str2=malloc(os2->length+1); - memcpy(*str1,os1->data,os1->length); - memcpy(*str2,os2->data,os2->length); - (*str1)[os1->length]='\0'; - (*str2)[os2->length]='\0'; - ASN1_OCTET_STRING_free(os1); - ASN1_OCTET_STRING_free(os2); - return(1); - } -err: - return(0); - } - - diff --git a/drivers/builtin_openssl2/crypto/pkcs7/example.h b/drivers/builtin_openssl2/crypto/pkcs7/example.h deleted file mode 100644 index 96167de188..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/example.h +++ /dev/null @@ -1,57 +0,0 @@ -/* ==================================================================== - * Copyright (c) 1999 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - -int add_signed_time(PKCS7_SIGNER_INFO *si); -ASN1_UTCTIME *get_signed_time(PKCS7_SIGNER_INFO *si); -int get_signed_seq2string(PKCS7_SIGNER_INFO *si, char **str1, char **str2); diff --git a/drivers/builtin_openssl2/crypto/pkcs7/info.pem b/drivers/builtin_openssl2/crypto/pkcs7/info.pem deleted file mode 100644 index 989baf8709..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/info.pem +++ /dev/null @@ -1,57 +0,0 @@ -issuer :/C=AU/SP=Queensland/L=Brisbane/O=Cryptsoft Pty Ltd/OU=DEMONSTRATION AND TESTING/CN=DEMO ZERO VALUE CA -subject:/C=AU/SP=Queensland/L=Brisbane/O=Cryptsoft Pty Ltd/OU=SMIME 003/CN=Information/Email=info@cryptsoft.com -serial :047D - -Certificate: - Data: - Version: 3 (0x2) - Serial Number: 1149 (0x47d) - Signature Algorithm: md5withRSAEncryption - Issuer: C=AU, SP=Queensland, L=Brisbane, O=Cryptsoft Pty Ltd, OU=DEMONSTRATION AND TESTING, CN=DEMO ZERO VALUE CA - Validity - Not Before: May 13 05:40:58 1998 GMT - Not After : May 12 05:40:58 2000 GMT - Subject: C=AU, SP=Queensland, L=Brisbane, O=Cryptsoft Pty Ltd, OU=SMIME 003, CN=Information/Email=info@cryptsoft.com - Subject Public Key Info: - Public Key Algorithm: rsaEncryption - Modulus: - 00:ad:e7:23:89:ee:0d:87:b7:9c:32:44:4b:95:81: - 73:dd:22:80:4b:2d:c5:60:b8:fe:1e:18:63:ef:dc: - 89:89:22:df:95:3c:7a:db:3d:9a:06:a8:08:d6:29: - fd:ef:41:09:91:ed:bc:ad:98:f9:f6:28:90:62:6f: - e7:e7:0c:4d:0b - Exponent: 65537 (0x10001) - X509v3 extensions: - Netscape Comment: - Generated with SSLeay - Signature Algorithm: md5withRSAEncryption - 52:15:ea:88:f4:f0:f9:0b:ef:ce:d5:f8:83:40:61:16:5e:55: - f9:ce:2d:d1:8b:31:5c:03:c6:2d:10:7c:61:d5:5c:0a:42:97: - d1:fd:65:b6:b6:84:a5:39:ec:46:ec:fc:e0:0d:d9:22:da:1b: - 50:74:ad:92:cb:4e:90:e5:fa:7d - ------BEGIN CERTIFICATE----- -MIICTDCCAfagAwIBAgICBH0wDQYJKoZIhvcNAQEEBQAwgZIxCzAJBgNVBAYTAkFV -MRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFuZTEaMBgGA1UE -ChMRQ3J5cHRzb2Z0IFB0eSBMdGQxIjAgBgNVBAsTGURFTU9OU1RSQVRJT04gQU5E -IFRFU1RJTkcxGzAZBgNVBAMTEkRFTU8gWkVSTyBWQUxVRSBDQTAeFw05ODA1MTMw -NTQwNThaFw0wMDA1MTIwNTQwNThaMIGeMQswCQYDVQQGEwJBVTETMBEGA1UECBMK -UXVlZW5zbGFuZDERMA8GA1UEBxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29m -dCBQdHkgTHRkMRIwEAYDVQQLEwlTTUlNRSAwMDMxFDASBgNVBAMTC0luZm9ybWF0 -aW9uMSEwHwYJKoZIhvcNAQkBFhJpbmZvQGNyeXB0c29mdC5jb20wXDANBgkqhkiG -9w0BAQEFAANLADBIAkEArecjie4Nh7ecMkRLlYFz3SKASy3FYLj+Hhhj79yJiSLf -lTx62z2aBqgI1in970EJke28rZj59iiQYm/n5wxNCwIDAQABoygwJjAkBglghkgB -hvhCAQ0EFxYVR2VuZXJhdGVkIHdpdGggU1NMZWF5MA0GCSqGSIb3DQEBBAUAA0EA -UhXqiPTw+QvvztX4g0BhFl5V+c4t0YsxXAPGLRB8YdVcCkKX0f1ltraEpTnsRuz8 -4A3ZItobUHStkstOkOX6fQ== ------END CERTIFICATE----- - ------BEGIN RSA PRIVATE KEY----- -MIIBOgIBAAJBAK3nI4nuDYe3nDJES5WBc90igEstxWC4/h4YY+/ciYki35U8ets9 -mgaoCNYp/e9BCZHtvK2Y+fYokGJv5+cMTQsCAwEAAQJBAIHpvXvqEcOEoDRRHuIG -fkcB4jPHcr9KE9TpxabH6xs9beN6OJnkePXAHwaz5MnUgSnbpOKq+cw8miKjXwe/ -zVECIQDVLwncT2lRmXarEYHzb+q/0uaSvKhWKKt3kJasLNTrAwIhANDUc/ghut29 -p3jJYjurzUKuG774/5eLjPLsxPPIZzNZAiA/10hSq41UnGqHLEUIS9m2/EeEZe7b -bm567dfRU9OnVQIgDo8ROrZXSchEGbaog5J5r/Fle83uO8l93R3GqVxKXZkCIFfk -IPD5PIYQAyyod3hyKKza7ZP4CGY4oOfZetbkSGGG ------END RSA PRIVATE KEY----- diff --git a/drivers/builtin_openssl2/crypto/pkcs7/infokey.pem b/drivers/builtin_openssl2/crypto/pkcs7/infokey.pem deleted file mode 100644 index 1e2acc954d..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/infokey.pem +++ /dev/null @@ -1,9 +0,0 @@ ------BEGIN RSA PRIVATE KEY----- -MIIBOgIBAAJBAK3nI4nuDYe3nDJES5WBc90igEstxWC4/h4YY+/ciYki35U8ets9 -mgaoCNYp/e9BCZHtvK2Y+fYokGJv5+cMTQsCAwEAAQJBAIHpvXvqEcOEoDRRHuIG -fkcB4jPHcr9KE9TpxabH6xs9beN6OJnkePXAHwaz5MnUgSnbpOKq+cw8miKjXwe/ -zVECIQDVLwncT2lRmXarEYHzb+q/0uaSvKhWKKt3kJasLNTrAwIhANDUc/ghut29 -p3jJYjurzUKuG774/5eLjPLsxPPIZzNZAiA/10hSq41UnGqHLEUIS9m2/EeEZe7b -bm567dfRU9OnVQIgDo8ROrZXSchEGbaog5J5r/Fle83uO8l93R3GqVxKXZkCIFfk -IPD5PIYQAyyod3hyKKza7ZP4CGY4oOfZetbkSGGG ------END RSA PRIVATE KEY----- diff --git a/drivers/builtin_openssl2/crypto/pkcs7/p7/a1 b/drivers/builtin_openssl2/crypto/pkcs7/p7/a1 deleted file mode 100644 index 56ca943762..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/p7/a1 +++ /dev/null @@ -1,2 +0,0 @@ -j,H>__DzEL VJ觬E3Yx%_k -3)DLSc8% M \ No newline at end of file diff --git a/drivers/builtin_openssl2/crypto/pkcs7/p7/a2 b/drivers/builtin_openssl2/crypto/pkcs7/p7/a2 deleted file mode 100644 index 23d8fb5e93..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/p7/a2 +++ /dev/null @@ -1 +0,0 @@ -k~@a,NM͹ Uo_Bqrm?٠t?tρId2 \ No newline at end of file diff --git a/drivers/builtin_openssl2/crypto/pkcs7/p7/cert.p7c b/drivers/builtin_openssl2/crypto/pkcs7/p7/cert.p7c deleted file mode 100644 index 2b75ec05f7..0000000000 Binary files a/drivers/builtin_openssl2/crypto/pkcs7/p7/cert.p7c and /dev/null differ diff --git a/drivers/builtin_openssl2/crypto/pkcs7/p7/smime.p7m b/drivers/builtin_openssl2/crypto/pkcs7/p7/smime.p7m deleted file mode 100644 index 2b6e6f82ba..0000000000 Binary files a/drivers/builtin_openssl2/crypto/pkcs7/p7/smime.p7m and /dev/null differ diff --git a/drivers/builtin_openssl2/crypto/pkcs7/p7/smime.p7s b/drivers/builtin_openssl2/crypto/pkcs7/p7/smime.p7s deleted file mode 100644 index 2b5d4fb0e3..0000000000 Binary files a/drivers/builtin_openssl2/crypto/pkcs7/p7/smime.p7s and /dev/null differ diff --git a/drivers/builtin_openssl2/crypto/pkcs7/pk7_smime.c b/drivers/builtin_openssl2/crypto/pkcs7/pk7_smime.c index dbd4100c8d..dc9b484078 100644 --- a/drivers/builtin_openssl2/crypto/pkcs7/pk7_smime.c +++ b/drivers/builtin_openssl2/crypto/pkcs7/pk7_smime.c @@ -256,8 +256,8 @@ int PKCS7_verify(PKCS7 *p7, STACK_OF(X509) *certs, X509_STORE *store, X509_STORE_CTX cert_ctx; char buf[4096]; int i, j = 0, k, ret = 0; - BIO *p7bio; - BIO *tmpin, *tmpout; + BIO *p7bio = NULL; + BIO *tmpin = NULL, *tmpout = NULL; if (!p7) { PKCS7err(PKCS7_F_PKCS7_VERIFY, PKCS7_R_INVALID_NULL_POINTER); @@ -277,7 +277,18 @@ int PKCS7_verify(PKCS7 *p7, STACK_OF(X509) *certs, X509_STORE *store, #if 0 /* * NB: this test commented out because some versions of Netscape - * illegally include zero length content when signing data. + * illegally include zero length content when signing data. Also + * Microsoft Authenticode includes a SpcIndirectDataContent data + * structure which describes the content to be protected by the + * signature, rather than directly embedding that content. So + * Authenticode implementations are also expected to use + * PKCS7_verify() with explicit external data, on non-detached + * PKCS#7 signatures. + * + * In OpenSSL 1.1 a new flag PKCS7_NO_DUAL_CONTENT has been + * introduced to disable this sanity check. For the 1.0.2 branch + * this change is not acceptable, so the check remains completely + * commented out (as it has been for a long time). */ /* Check for data and content: two sets of data */ @@ -295,7 +306,6 @@ int PKCS7_verify(PKCS7 *p7, STACK_OF(X509) *certs, X509_STORE *store, } signers = PKCS7_get0_signers(p7, certs, flags); - if (!signers) return 0; @@ -308,14 +318,12 @@ int PKCS7_verify(PKCS7 *p7, STACK_OF(X509) *certs, X509_STORE *store, if (!X509_STORE_CTX_init(&cert_ctx, store, signer, p7->d.sign->cert)) { PKCS7err(PKCS7_F_PKCS7_VERIFY, ERR_R_X509_LIB); - sk_X509_free(signers); - return 0; + goto err; } X509_STORE_CTX_set_default(&cert_ctx, "smime_sign"); } else if (!X509_STORE_CTX_init(&cert_ctx, store, signer, NULL)) { PKCS7err(PKCS7_F_PKCS7_VERIFY, ERR_R_X509_LIB); - sk_X509_free(signers); - return 0; + goto err; } if (!(flags & PKCS7_NOCRL)) X509_STORE_CTX_set0_crls(&cert_ctx, p7->d.sign->crl); @@ -328,8 +336,7 @@ int PKCS7_verify(PKCS7 *p7, STACK_OF(X509) *certs, X509_STORE *store, PKCS7_R_CERTIFICATE_VERIFY_ERROR); ERR_add_error_data(2, "Verify error:", X509_verify_cert_error_string(j)); - sk_X509_free(signers); - return 0; + goto err; } /* Check for revocation status here */ } @@ -348,7 +355,7 @@ int PKCS7_verify(PKCS7 *p7, STACK_OF(X509) *certs, X509_STORE *store, tmpin = BIO_new_mem_buf(ptr, len); if (tmpin == NULL) { PKCS7err(PKCS7_F_PKCS7_VERIFY, ERR_R_MALLOC_FAILURE); - return 0; + goto err; } } else tmpin = indata; @@ -398,15 +405,12 @@ int PKCS7_verify(PKCS7 *p7, STACK_OF(X509) *certs, X509_STORE *store, ret = 1; err: - if (tmpin == indata) { if (indata) BIO_pop(p7bio); } BIO_free_all(p7bio); - sk_X509_free(signers); - return ret; } diff --git a/drivers/builtin_openssl2/crypto/pkcs7/server.pem b/drivers/builtin_openssl2/crypto/pkcs7/server.pem deleted file mode 100644 index d0fc265f04..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/server.pem +++ /dev/null @@ -1,52 +0,0 @@ -subject= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = Test Server Cert -issuer= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = OpenSSL Test Intermediate CA ------BEGIN CERTIFICATE----- -MIID5zCCAs+gAwIBAgIJALnu1NlVpZ6zMA0GCSqGSIb3DQEBBQUAMHAxCzAJBgNV -BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMSIwIAYDVQQLDBlGT1IgVEVT -VElORyBQVVJQT1NFUyBPTkxZMSUwIwYDVQQDDBxPcGVuU1NMIFRlc3QgSW50ZXJt -ZWRpYXRlIENBMB4XDTExMTIwODE0MDE0OFoXDTIxMTAxNjE0MDE0OFowZDELMAkG -A1UEBhMCVUsxFjAUBgNVBAoMDU9wZW5TU0wgR3JvdXAxIjAgBgNVBAsMGUZPUiBU -RVNUSU5HIFBVUlBPU0VTIE9OTFkxGTAXBgNVBAMMEFRlc3QgU2VydmVyIENlcnQw -ggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDzhPOSNtyyRspmeuUpxfNJ -KCLTuf7g3uQ4zu4iHOmRO5TQci+HhVlLZrHF9XqFXcIP0y4pWDbMSGuiorUmzmfi -R7bfSdI/+qIQt8KXRH6HNG1t8ou0VSvWId5TS5Dq/er5ODUr9OaaDva7EquHIcMv -vPQGuI+OEAcnleVCy9HVEIySrO4P3CNIicnGkwwiAud05yUAq/gPXBC1hTtmlPD7 -TVcGVSEiJdvzqqlgv02qedGrkki6GY4S7GjZxrrf7Foc2EP+51LJzwLQx3/JfrCU -41NEWAsu/Sl0tQabXESN+zJ1pDqoZ3uHMgpQjeGiE0olr+YcsSW/tJmiU9OiAr8R -AgMBAAGjgY8wgYwwDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBeAwLAYJYIZI -AYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRlMB0GA1UdDgQW -BBSCvM8AABPR9zklmifnr9LvIBturDAfBgNVHSMEGDAWgBQ2w2yI55X+sL3szj49 -hqshgYfa2jANBgkqhkiG9w0BAQUFAAOCAQEAqb1NV0B0/pbpK9Z4/bNjzPQLTRLK -WnSNm/Jh5v0GEUOE/Beg7GNjNrmeNmqxAlpqWz9qoeoFZax+QBpIZYjROU3TS3fp -yLsrnlr0CDQ5R7kCCDGa8dkXxemmpZZLbUCpW2Uoy8sAA4JjN9OtsZY7dvUXFgJ7 -vVNTRnI01ghknbtD+2SxSQd3CWF6QhcRMAzZJ1z1cbbwGDDzfvGFPzJ+Sq+zEPds -xoVLLSetCiBc+40ZcDS5dV98h9XD7JMTQfxzA7mNGv73JoZJA6nFgj+ADSlJsY/t -JBv+z1iQRueoh9Qeee+ZbRifPouCB8FDx+AltvHTANdAq0t/K3o+pplMVA== ------END CERTIFICATE----- ------BEGIN RSA PRIVATE KEY----- -MIIEpAIBAAKCAQEA84TzkjbcskbKZnrlKcXzSSgi07n+4N7kOM7uIhzpkTuU0HIv -h4VZS2axxfV6hV3CD9MuKVg2zEhroqK1Js5n4ke230nSP/qiELfCl0R+hzRtbfKL -tFUr1iHeU0uQ6v3q+Tg1K/Tmmg72uxKrhyHDL7z0BriPjhAHJ5XlQsvR1RCMkqzu -D9wjSInJxpMMIgLndOclAKv4D1wQtYU7ZpTw+01XBlUhIiXb86qpYL9NqnnRq5JI -uhmOEuxo2ca63+xaHNhD/udSyc8C0Md/yX6wlONTRFgLLv0pdLUGm1xEjfsydaQ6 -qGd7hzIKUI3hohNKJa/mHLElv7SZolPTogK/EQIDAQABAoIBAADq9FwNtuE5IRQn -zGtO4q7Y5uCzZ8GDNYr9RKp+P2cbuWDbvVAecYq2NV9QoIiWJOAYZKklOvekIju3 -r0UZLA0PRiIrTg6NrESx3JrjWDK8QNlUO7CPTZ39/K+FrmMkV9lem9yxjJjyC34D -AQB+YRTx+l14HppjdxNwHjAVQpIx/uO2F5xAMuk32+3K+pq9CZUtrofe1q4Agj9R -5s8mSy9pbRo9kW9wl5xdEotz1LivFOEiqPUJTUq5J5PeMKao3vdK726XI4Z455Nm -W2/MA0YV0ug2FYinHcZdvKM6dimH8GLfa3X8xKRfzjGjTiMSwsdjgMa4awY3tEHH -674jhAECgYEA/zqMrc0zsbNk83sjgaYIug5kzEpN4ic020rSZsmQxSCerJTgNhmg -utKSCt0Re09Jt3LqG48msahX8ycqDsHNvlEGPQSbMu9IYeO3Wr3fAm75GEtFWePY -BhM73I7gkRt4s8bUiUepMG/wY45c5tRF23xi8foReHFFe9MDzh8fJFECgYEA9EFX -4qAik1pOJGNei9BMwmx0I0gfVEIgu0tzeVqT45vcxbxr7RkTEaDoAG6PlbWP6D9a -WQNLp4gsgRM90ZXOJ4up5DsAWDluvaF4/omabMA+MJJ5kGZ0gCj5rbZbKqUws7x8 -bp+6iBfUPJUbcqNqFmi/08Yt7vrDnMnyMw2A/sECgYEAiiuRMxnuzVm34hQcsbhH -6ymVqf7j0PW2qK0F4H1ocT9qhzWFd+RB3kHWrCjnqODQoI6GbGr/4JepHUpre1ex -4UEN5oSS3G0ru0rC3U4C59dZ5KwDHFm7ffZ1pr52ljfQDUsrjjIMRtuiwNK2OoRa -WSsqiaL+SDzSB+nBmpnAizECgYBdt/y6rerWUx4MhDwwtTnel7JwHyo2MDFS6/5g -n8qC2Lj6/fMDRE22w+CA2esp7EJNQJGv+b27iFpbJEDh+/Lf5YzIT4MwVskQ5bYB -JFcmRxUVmf4e09D7o705U/DjCgMH09iCsbLmqQ38ONIRSHZaJtMDtNTHD1yi+jF+ -OT43gQKBgQC/2OHZoko6iRlNOAQ/tMVFNq7fL81GivoQ9F1U0Qr+DH3ZfaH8eIkX -xT0ToMPJUzWAn8pZv0snA0um6SIgvkCuxO84OkANCVbttzXImIsL7pFzfcwV/ERK -UM6j0ZuSMFOCr/lGPAoOQU0fskidGEHi1/kW+suSr28TqsyYZpwBDQ== ------END RSA PRIVATE KEY----- diff --git a/drivers/builtin_openssl2/crypto/pkcs7/sign.c b/drivers/builtin_openssl2/crypto/pkcs7/sign.c deleted file mode 100644 index 22f53fb601..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/sign.c +++ /dev/null @@ -1,160 +0,0 @@ -/* demos/sign/sign.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -/* - * sign-it.cpp - Simple test app using SSLeay envelopes to sign data - * 29.9.1996, Sampo Kellomaki - */ - -/* converted to C - eay :-) */ - -/* - * reformated a bit and converted to use the more common functions: this was - * initially written at the dawn of time :-) - Steve. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -int main() -{ - int err; - int sig_len; - unsigned char sig_buf[4096]; - static char certfile[] = "cert.pem"; - static char keyfile[] = "key.pem"; - static char data[] = "I owe you..."; - EVP_MD_CTX md_ctx; - EVP_PKEY *pkey; - FILE *fp; - X509 *x509; - - /* - * Just load the crypto library error strings, SSL_load_error_strings() - * loads the crypto AND the SSL ones - */ - /* SSL_load_error_strings(); */ - ERR_load_crypto_strings(); - - /* Read private key */ - - fp = fopen(keyfile, "r"); - if (fp == NULL) - exit(1); - pkey = PEM_read_PrivateKey(fp, NULL, NULL, NULL); - fclose(fp); - - if (pkey == NULL) { - ERR_print_errors_fp(stderr); - exit(1); - } - - /* Do the signature */ - - EVP_SignInit(&md_ctx, EVP_sha1()); - EVP_SignUpdate(&md_ctx, data, strlen(data)); - sig_len = sizeof(sig_buf); - err = EVP_SignFinal(&md_ctx, sig_buf, &sig_len, pkey); - - if (err != 1) { - ERR_print_errors_fp(stderr); - exit(1); - } - - EVP_PKEY_free(pkey); - - /* Read public key */ - - fp = fopen(certfile, "r"); - if (fp == NULL) - exit(1); - x509 = PEM_read_X509(fp, NULL, NULL, NULL); - fclose(fp); - - if (x509 == NULL) { - ERR_print_errors_fp(stderr); - exit(1); - } - - /* Get public key - eay */ - pkey = X509_get_pubkey(x509); - if (pkey == NULL) { - ERR_print_errors_fp(stderr); - exit(1); - } - - /* Verify the signature */ - - EVP_VerifyInit(&md_ctx, EVP_sha1()); - EVP_VerifyUpdate(&md_ctx, data, strlen((char *)data)); - err = EVP_VerifyFinal(&md_ctx, sig_buf, sig_len, pkey); - EVP_PKEY_free(pkey); - - if (err != 1) { - ERR_print_errors_fp(stderr); - exit(1); - } - printf("Signature Verified Ok.\n"); - return (0); -} diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/3des.pem b/drivers/builtin_openssl2/crypto/pkcs7/t/3des.pem deleted file mode 100644 index b2b5081a10..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/3des.pem +++ /dev/null @@ -1,16 +0,0 @@ ------BEGIN PKCS7----- -MIAGCSqGSIb3DQEHA6CAMIACAQAxggHmMIHwAgEAMIGZMIGSMQswCQYDVQQGEwJBVTETMBEG -A1UECBMKUXVlZW5zbGFuZDERMA8GA1UEBxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29m -dCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNUUkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQD -ExJERU1PIFpFUk8gVkFMVUUgQ0ECAgR+MA0GCSqGSIb3DQEBAQUABEC2vXI1xQDW6lUHM3zQ -/9uBEBOO5A3TtkrklAXq7v01gsIC21t52qSk36REXY+slhNZ0OQ349tgkTsoETHFLoEwMIHw -AgEAMIGZMIGSMQswCQYDVQQGEwJBVTETMBEGA1UECBMKUXVlZW5zbGFuZDERMA8GA1UEBxMI -QnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29mdCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNU -UkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQDExJERU1PIFpFUk8gVkFMVUUgQ0ECAgR9MA0G -CSqGSIb3DQEBAQUABEB8ujxbabxXUYJhopuDm3oDq4JNqX6Io4p3ro+ShqfIndsXTZ1v5a2N -WtLLCWlHn/habjBwZ/DgQgcKASbZ7QxNMIAGCSqGSIb3DQEHATAaBggqhkiG9w0DAjAOAgIA -oAQIbsL5v1wX98KggAQoAaJ4WHm68fXY1WE5OIjfVBIDpO1K+i8dmKhjnAjrjoyZ9Bwc8rDL -lgQg4CXb805h5xl+GfvSwUaHJayte1m2mcOhs3J2YyqbQ+MEIMIiJQccmhO3oDKm36CFvYR8 -5PjpclVcZyX2ngbwPFMnBAgy0clOAE6UKAAAAAAAAAAAAAA= ------END PKCS7----- - diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/3dess.pem b/drivers/builtin_openssl2/crypto/pkcs7/t/3dess.pem deleted file mode 100644 index 23f013516a..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/3dess.pem +++ /dev/null @@ -1,32 +0,0 @@ ------BEGIN PKCS7----- -MIIGHgYJKoZIhvcNAQcCoIIGDzCCBgsCAQExCzAJBgUrDgMCGgUAMAsGCSqGSIb3DQEHAaCC -BGswggJTMIIB/aADAgECAgIEfjANBgkqhkiG9w0BAQQFADCBkjELMAkGA1UEBhMCQVUxEzAR -BgNVBAgTClF1ZWVuc2xhbmQxETAPBgNVBAcTCEJyaXNiYW5lMRowGAYDVQQKExFDcnlwdHNv -ZnQgUHR5IEx0ZDEiMCAGA1UECxMZREVNT05TVFJBVElPTiBBTkQgVEVTVElORzEbMBkGA1UE -AxMSREVNTyBaRVJPIFZBTFVFIENBMB4XDTk4MDUxMzA2MjY1NloXDTAwMDUxMjA2MjY1Nlow -gaUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFu -ZTEaMBgGA1UEChMRQ3J5cHRzb2Z0IFB0eSBMdGQxEjAQBgNVBAsTCVNNSU1FIDAwMzEZMBcG -A1UEAxMQQW5nZWxhIHZhbiBMZWVudDEjMCEGCSqGSIb3DQEJARYUYW5nZWxhQGNyeXB0c29m -dC5jb20wXDANBgkqhkiG9w0BAQEFAANLADBIAkEAuC3+7dAb2LhuO7gt2cTM8vsNjhG5JfDh -hX1Vl/wVGbKEEj0MA6vWEolvefQlxB+EzwCtR0YZ7eEC/T/4JoCyeQIDAQABoygwJjAkBglg -hkgBhvhCAQ0EFxYVR2VuZXJhdGVkIHdpdGggU1NMZWF5MA0GCSqGSIb3DQEBBAUAA0EAUnSP -igs6TMFISTjw8cBtJYb98czgAVkVFjKyJQwYMH8FbDnCyx6NocM555nsyDstaw8fKR11Khds -syd3ikkrhDCCAhAwggG6AgEDMA0GCSqGSIb3DQEBBAUAMIGSMQswCQYDVQQGEwJBVTETMBEG -A1UECBMKUXVlZW5zbGFuZDERMA8GA1UEBxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29m -dCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNUUkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQD -ExJERU1PIFpFUk8gVkFMVUUgQ0EwHhcNOTgwMzAzMDc0MTMyWhcNMDgwMjI5MDc0MTMyWjCB -kjELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxETAPBgNVBAcTCEJyaXNiYW5l -MRowGAYDVQQKExFDcnlwdHNvZnQgUHR5IEx0ZDEiMCAGA1UECxMZREVNT05TVFJBVElPTiBB -TkQgVEVTVElORzEbMBkGA1UEAxMSREVNTyBaRVJPIFZBTFVFIENBMFwwDQYJKoZIhvcNAQEB -BQADSwAwSAJBAL+0E2fLej3FSCwe2A2iRnMuC3z12qHIp6Ky1wo2zZcxft7AI+RfkrWrSGtf -mfzBEuPrLdfulncC5Y1pNcM8RTUCAwEAATANBgkqhkiG9w0BAQQFAANBAGSbLMphL6F5pp3s -8o0Xyh86FHFdpVOwYx09ELLkuG17V/P9pgIc0Eo/gDMbN+KT3IdgECf8S//pCRA6RrNjcXIx -ggF7MIIBdwIBATCBmTCBkjELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxETAP -BgNVBAcTCEJyaXNiYW5lMRowGAYDVQQKExFDcnlwdHNvZnQgUHR5IEx0ZDEiMCAGA1UECxMZ -REVNT05TVFJBVElPTiBBTkQgVEVTVElORzEbMBkGA1UEAxMSREVNTyBaRVJPIFZBTFVFIENB -AgIEfjAJBgUrDgMCGgUAoHowGAYJKoZIhvcNAQkDMQsGCSqGSIb3DQEHATAbBgkqhkiG9w0B -CQ8xDjAMMAoGCCqGSIb3DQMHMBwGCSqGSIb3DQEJBTEPFw05ODA1MTQwMzM5MzdaMCMGCSqG -SIb3DQEJBDEWBBQstNMnSV26ba8PapQEDhO21yNFrjANBgkqhkiG9w0BAQEFAARAW9Xb9YXv -BfcNkutgFX9Gr8iXhBVsNtGEVrjrpkQwpKa7jHI8SjAlLhk/4RFwDHf+ISB9Np3Z1WDWnLcA -9CWR6g== ------END PKCS7----- diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/c.pem b/drivers/builtin_openssl2/crypto/pkcs7/t/c.pem deleted file mode 100644 index a4b55e321a..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/c.pem +++ /dev/null @@ -1,48 +0,0 @@ -issuer :/C=AU/SP=Queensland/L=Brisbane/O=Cryptsoft Pty Ltd/OU=DEMONSTRATION AND TESTING/CN=DEMO ZERO VALUE CA -subject:/C=AU/SP=Queensland/L=Brisbane/O=Cryptsoft Pty Ltd/OU=SMIME 003/CN=Information/Email=info@cryptsoft.com -serial :047D - -Certificate: - Data: - Version: 3 (0x2) - Serial Number: 1149 (0x47d) - Signature Algorithm: md5withRSAEncryption - Issuer: C=AU, SP=Queensland, L=Brisbane, O=Cryptsoft Pty Ltd, OU=DEMONSTRATION AND TESTING, CN=DEMO ZERO VALUE CA - Validity - Not Before: May 13 05:40:58 1998 GMT - Not After : May 12 05:40:58 2000 GMT - Subject: C=AU, SP=Queensland, L=Brisbane, O=Cryptsoft Pty Ltd, OU=SMIME 003, CN=Information/Email=info@cryptsoft.com - Subject Public Key Info: - Public Key Algorithm: rsaEncryption - Modulus: - 00:ad:e7:23:89:ee:0d:87:b7:9c:32:44:4b:95:81: - 73:dd:22:80:4b:2d:c5:60:b8:fe:1e:18:63:ef:dc: - 89:89:22:df:95:3c:7a:db:3d:9a:06:a8:08:d6:29: - fd:ef:41:09:91:ed:bc:ad:98:f9:f6:28:90:62:6f: - e7:e7:0c:4d:0b - Exponent: 65537 (0x10001) - X509v3 extensions: - Netscape Comment: - Generated with SSLeay - Signature Algorithm: md5withRSAEncryption - 52:15:ea:88:f4:f0:f9:0b:ef:ce:d5:f8:83:40:61:16:5e:55: - f9:ce:2d:d1:8b:31:5c:03:c6:2d:10:7c:61:d5:5c:0a:42:97: - d1:fd:65:b6:b6:84:a5:39:ec:46:ec:fc:e0:0d:d9:22:da:1b: - 50:74:ad:92:cb:4e:90:e5:fa:7d - ------BEGIN CERTIFICATE----- -MIICTDCCAfagAwIBAgICBH0wDQYJKoZIhvcNAQEEBQAwgZIxCzAJBgNVBAYTAkFV -MRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFuZTEaMBgGA1UE -ChMRQ3J5cHRzb2Z0IFB0eSBMdGQxIjAgBgNVBAsTGURFTU9OU1RSQVRJT04gQU5E -IFRFU1RJTkcxGzAZBgNVBAMTEkRFTU8gWkVSTyBWQUxVRSBDQTAeFw05ODA1MTMw -NTQwNThaFw0wMDA1MTIwNTQwNThaMIGeMQswCQYDVQQGEwJBVTETMBEGA1UECBMK -UXVlZW5zbGFuZDERMA8GA1UEBxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29m -dCBQdHkgTHRkMRIwEAYDVQQLEwlTTUlNRSAwMDMxFDASBgNVBAMTC0luZm9ybWF0 -aW9uMSEwHwYJKoZIhvcNAQkBFhJpbmZvQGNyeXB0c29mdC5jb20wXDANBgkqhkiG -9w0BAQEFAANLADBIAkEArecjie4Nh7ecMkRLlYFz3SKASy3FYLj+Hhhj79yJiSLf -lTx62z2aBqgI1in970EJke28rZj59iiQYm/n5wxNCwIDAQABoygwJjAkBglghkgB -hvhCAQ0EFxYVR2VuZXJhdGVkIHdpdGggU1NMZWF5MA0GCSqGSIb3DQEBBAUAA0EA -UhXqiPTw+QvvztX4g0BhFl5V+c4t0YsxXAPGLRB8YdVcCkKX0f1ltraEpTnsRuz8 -4A3ZItobUHStkstOkOX6fQ== ------END CERTIFICATE----- - diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/ff b/drivers/builtin_openssl2/crypto/pkcs7/t/ff deleted file mode 100644 index 23f013516a..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/ff +++ /dev/null @@ -1,32 +0,0 @@ ------BEGIN PKCS7----- -MIIGHgYJKoZIhvcNAQcCoIIGDzCCBgsCAQExCzAJBgUrDgMCGgUAMAsGCSqGSIb3DQEHAaCC -BGswggJTMIIB/aADAgECAgIEfjANBgkqhkiG9w0BAQQFADCBkjELMAkGA1UEBhMCQVUxEzAR -BgNVBAgTClF1ZWVuc2xhbmQxETAPBgNVBAcTCEJyaXNiYW5lMRowGAYDVQQKExFDcnlwdHNv -ZnQgUHR5IEx0ZDEiMCAGA1UECxMZREVNT05TVFJBVElPTiBBTkQgVEVTVElORzEbMBkGA1UE -AxMSREVNTyBaRVJPIFZBTFVFIENBMB4XDTk4MDUxMzA2MjY1NloXDTAwMDUxMjA2MjY1Nlow -gaUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFu -ZTEaMBgGA1UEChMRQ3J5cHRzb2Z0IFB0eSBMdGQxEjAQBgNVBAsTCVNNSU1FIDAwMzEZMBcG -A1UEAxMQQW5nZWxhIHZhbiBMZWVudDEjMCEGCSqGSIb3DQEJARYUYW5nZWxhQGNyeXB0c29m -dC5jb20wXDANBgkqhkiG9w0BAQEFAANLADBIAkEAuC3+7dAb2LhuO7gt2cTM8vsNjhG5JfDh -hX1Vl/wVGbKEEj0MA6vWEolvefQlxB+EzwCtR0YZ7eEC/T/4JoCyeQIDAQABoygwJjAkBglg -hkgBhvhCAQ0EFxYVR2VuZXJhdGVkIHdpdGggU1NMZWF5MA0GCSqGSIb3DQEBBAUAA0EAUnSP -igs6TMFISTjw8cBtJYb98czgAVkVFjKyJQwYMH8FbDnCyx6NocM555nsyDstaw8fKR11Khds -syd3ikkrhDCCAhAwggG6AgEDMA0GCSqGSIb3DQEBBAUAMIGSMQswCQYDVQQGEwJBVTETMBEG -A1UECBMKUXVlZW5zbGFuZDERMA8GA1UEBxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29m -dCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNUUkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQD -ExJERU1PIFpFUk8gVkFMVUUgQ0EwHhcNOTgwMzAzMDc0MTMyWhcNMDgwMjI5MDc0MTMyWjCB -kjELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxETAPBgNVBAcTCEJyaXNiYW5l -MRowGAYDVQQKExFDcnlwdHNvZnQgUHR5IEx0ZDEiMCAGA1UECxMZREVNT05TVFJBVElPTiBB -TkQgVEVTVElORzEbMBkGA1UEAxMSREVNTyBaRVJPIFZBTFVFIENBMFwwDQYJKoZIhvcNAQEB -BQADSwAwSAJBAL+0E2fLej3FSCwe2A2iRnMuC3z12qHIp6Ky1wo2zZcxft7AI+RfkrWrSGtf -mfzBEuPrLdfulncC5Y1pNcM8RTUCAwEAATANBgkqhkiG9w0BAQQFAANBAGSbLMphL6F5pp3s -8o0Xyh86FHFdpVOwYx09ELLkuG17V/P9pgIc0Eo/gDMbN+KT3IdgECf8S//pCRA6RrNjcXIx -ggF7MIIBdwIBATCBmTCBkjELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxETAP -BgNVBAcTCEJyaXNiYW5lMRowGAYDVQQKExFDcnlwdHNvZnQgUHR5IEx0ZDEiMCAGA1UECxMZ -REVNT05TVFJBVElPTiBBTkQgVEVTVElORzEbMBkGA1UEAxMSREVNTyBaRVJPIFZBTFVFIENB -AgIEfjAJBgUrDgMCGgUAoHowGAYJKoZIhvcNAQkDMQsGCSqGSIb3DQEHATAbBgkqhkiG9w0B -CQ8xDjAMMAoGCCqGSIb3DQMHMBwGCSqGSIb3DQEJBTEPFw05ODA1MTQwMzM5MzdaMCMGCSqG -SIb3DQEJBDEWBBQstNMnSV26ba8PapQEDhO21yNFrjANBgkqhkiG9w0BAQEFAARAW9Xb9YXv -BfcNkutgFX9Gr8iXhBVsNtGEVrjrpkQwpKa7jHI8SjAlLhk/4RFwDHf+ISB9Np3Z1WDWnLcA -9CWR6g== ------END PKCS7----- diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-e b/drivers/builtin_openssl2/crypto/pkcs7/t/msie-e deleted file mode 100644 index aafae69fc9..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-e +++ /dev/null @@ -1,20 +0,0 @@ - -MIAGCSqGSIb3DQEHA6CAMIACAQAxggHCMIHMAgEAMHYwYjERMA8GA1UEBxMISW50ZXJuZXQxFzAV -BgNVBAoTDlZlcmlTaWduLCBJbmMuMTQwMgYDVQQLEytWZXJpU2lnbiBDbGFzcyAxIENBIC0gSW5k -aXZpZHVhbCBTdWJzY3JpYmVyAhBgQJiC3qfbCbjdj5INYLnKMA0GCSqGSIb3DQEBAQUABECMzu8y -wQ/qZbO8cAGMRBF+mPruv3+Dvb9aWNZ2k8njUgqF6mcdhVB2MkGcsG3memRXJBixvMYWVkU3qK4Z -VuKsMIHwAgEAMIGZMIGSMQswCQYDVQQGEwJBVTETMBEGA1UECBMKUXVlZW5zbGFuZDERMA8GA1UE -BxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29mdCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNU -UkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQDExJERU1PIFpFUk8gVkFMVUUgQ0ECAgRuMA0GCSqG -SIb3DQEBAQUABEBcWwYFHJbJGhiztt7lzue3Lc9CH5WAbyR+2BZ3uv+JxZfRs1PuaWPOwRa0Vgs3 -YwSJoRfxQj2Gk0wFqG1qt6d1MIAGCSqGSIb3DQEHATAaBggqhkiG9w0DAjAOAgIAoAQI8vRlP/Nx -2iSggASCAZhR5srxyspy7DfomRJ9ff8eMCtaNwEoEx7G25PZRonC57hBvGoScLtEPU3Wp9FEbPN7 -oJESeC+AqMTyTLNy8aQsyC5s53E9UkoIvg62ekYZBbXZqXsrxx4PhiiX3NH8GVh42phB0Chjw0nK -HZeRDmxGY3Cmk+J+l0uVKxbNIfJIKOguLBnhqmnKH/PrnzDt591u0ULy2aTLqRm+4/1Yat/QPb6J -eoKGwNPBbS9ogBdrCNCp9ZFg3Xar2AtQHzyTQIfYeH3SRQUpKmRm5U5o9p5emgEdT+ZfJm/J4tSH -OmbgAFsbHQakA4MBZ4J5qfDJhOA2g5lWk1hIeu5Dn/AaLRZd0yz3oY0Ieo/erPWx/bCqtBzYbMe9 -qSFTedKlbc9EGe3opOTdBZVzK8KH3w3zsy5luxKdOUG59YYb5F1IZiWGiDyuo/HuacX+griu5LeD -bEzOtZnko+TZXvWIko30fD79j3T4MRRhWXbgj2HKza+4vJ0mzcC/1+GPsJjAEAA/JgIEDU4w6/DI -/HQHhLAO3G+9xKD7MvmrzkoAAAAAAAAAAAAA - - diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-e.pem b/drivers/builtin_openssl2/crypto/pkcs7/t/msie-e.pem deleted file mode 100644 index a2a5e24e74..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-e.pem +++ /dev/null @@ -1,22 +0,0 @@ ------BEGIN PKCS7----- -MIAGCSqGSIb3DQEHA6CAMIIDkAIBADGCAcIwgcwCAQAwdjBiMREwDwYDVQQHEwhJ -bnRlcm5ldDEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xNDAyBgNVBAsTK1ZlcmlT -aWduIENsYXNzIDEgQ0EgLSBJbmRpdmlkdWFsIFN1YnNjcmliZXICEGBAmILep9sJ -uN2Pkg1gucowDQYJKoZIhvcNAQEBBQAEQIzO7zLBD+pls7xwAYxEEX6Y+u6/f4O9 -v1pY1naTyeNSCoXqZx2FUHYyQZywbeZ6ZFckGLG8xhZWRTeorhlW4qwwgfACAQAw -gZkwgZIxCzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQH -EwhCcmlzYmFuZTEaMBgGA1UEChMRQ3J5cHRzb2Z0IFB0eSBMdGQxIjAgBgNVBAsT -GURFTU9OU1RSQVRJT04gQU5EIFRFU1RJTkcxGzAZBgNVBAMTEkRFTU8gWkVSTyBW -QUxVRSBDQQICBG4wDQYJKoZIhvcNAQEBBQAEQFxbBgUclskaGLO23uXO57ctz0If -lYBvJH7YFne6/4nFl9GzU+5pY87BFrRWCzdjBImhF/FCPYaTTAWobWq3p3UwggHD -BgkqhkiG9w0BBwEwGgYIKoZIhvcNAwIwDgICAKAECPL0ZT/zcdokgIIBmFHmyvHK -ynLsN+iZEn19/x4wK1o3ASgTHsbbk9lGicLnuEG8ahJwu0Q9Tdan0URs83ugkRJ4 -L4CoxPJMs3LxpCzILmzncT1SSgi+DrZ6RhkFtdmpeyvHHg+GKJfc0fwZWHjamEHQ -KGPDScodl5EObEZjcKaT4n6XS5UrFs0h8kgo6C4sGeGqacof8+ufMO3n3W7RQvLZ -pMupGb7j/Vhq39A9vol6gobA08FtL2iAF2sI0Kn1kWDddqvYC1AfPJNAh9h4fdJF -BSkqZGblTmj2nl6aAR1P5l8mb8ni1Ic6ZuAAWxsdBqQDgwFngnmp8MmE4DaDmVaT -WEh67kOf8BotFl3TLPehjQh6j96s9bH9sKq0HNhsx72pIVN50qVtz0QZ7eik5N0F -lXMrwoffDfOzLmW7Ep05Qbn1hhvkXUhmJYaIPK6j8e5pxf6CuK7kt4NsTM61meSj -5Nle9YiSjfR8Pv2PdPgxFGFZduCPYcrNr7i8nSbNwL/X4Y+wmMAQAD8mAgQNTjDr -8Mj8dAeEsA7cb73EoPsy+avOSgAAAAA= ------END PKCS7----- diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-01 b/drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-01 deleted file mode 100644 index 2c93ab6462..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-01 +++ /dev/null @@ -1,62 +0,0 @@ - -MIAGCSqGSIb3DQEHA6CAMIACAQAxgfMwgfACAQAwgZkwgZIxCzAJBgNVBAYTAkFVMRMwEQYD -VQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFuZTEaMBgGA1UEChMRQ3J5cHRzb2Z0 -IFB0eSBMdGQxIjAgBgNVBAsTGURFTU9OU1RSQVRJT04gQU5EIFRFU1RJTkcxGzAZBgNVBAMT -EkRFTU8gWkVSTyBWQUxVRSBDQQICBG4wDQYJKoZIhvcNAQEBBQAEQKvMaW8xh6oF/X+CJivz -IZV7yHxlp4O3NHQtWG0A8MOZB+CtKlU7/6g5e/a9Du/TOqxRMqtYRp63pa2Q/mM4IYMwgAYJ -KoZIhvcNAQcBMBoGCCqGSIb3DQMCMA4CAgCgBAifz6RvzOPYlKCABIGwxtGA/FLBBRs1wbBP -gDCbSG0yCwjJNsFg89/k6xuXo8c5YTwsw8+XlIVq03navpew6XxxzY090rD2OJ0t6HA6GqrI -pd8WiSh/Atqn0yfLFmkLqgIAPRfzxUxqUocxLpQsLIFp2YNUGE+yps+UZmIjw/WHfdqrcWTm -STSvKuy3UkIJZCkGDBpTvqk4BFaHh4oTXEpgpNY+GKxjf9TDN9GQPqQZR7sgQki4t2g4/Saq -Kl4EMISgluk6swdND0tiHY7v5d6YR29ePCl2/STJ98eJpWkEEC22GNNvOy7ru/Rv2He4MgQg -optd7sk9MMd9xhJppg7CcH/yDx//HrtgpOcWmn6VxpgECFqon4uXkQtIBIH4PaNclFn7/hLx -Pw2VmBGaC0SYF3U1jyN96EBxdjqy8Aa6ByMXYDW5BcfqniD5mYXfw+b81lh1kutxaPaV4YJ9 -ZlRUW752N7VHo/fG0/fukoe5W9a8kIhgLpygllb/GP4oSF4wM6n1/OgRzZj2IWFiobKO4d/t -Mnh+C+PoEVAuFZcxQwi9GqvsK5OoIjVwNx0XcVSOl1TTYS9SwC7ugMBCab73JiruC24pL78Y -M+NaIpIQ3On4DokJA2ZHtjBjZIxF4tKA144RvFN6pBd6TVE5XM6KD/Vh9bjSmujtEAfdQ3Te -dvKJsbZuu0stErbvWcRy11I328l557EECAJT7d44OJ3rBBBj6bnnx6dDU2SRqp2CEoQaBAhK -RBuyhNxkygQIOY9/NhwqAJAECOvX0Zd0DqgoBAjobPpMHhVV3gQQWLU2vEoZ51BwzxdzCmxO -wwQI4oKfudaNqoAESKzBNAqv5kGumHOlMKsRfrs7jZCcSaOuEj97pYx08FLEgF23cav39MOQ -NUEM1dNU+EYslL4o3RoSHRjUgPU+2t9c0prS9A/bPARIEOP94PynaTNxwHi3VTK7SzuQmgzA -4n942E9joSiqsQPlsKAb3sPUaLC3SuUxSjNBgfpvD0bmrA/5h+WZoYXvIogFpwjkSmnFBEie -0lh5Ov1aRrvCw5/j3Q/W/4ZtN5U+aeVBJMtA8n0Mxd5kPxHbNVh4oGprZ6wEegV8ht3voyZa -mZ5Cyxc8ffMYnM/JJI6/oEYEUEMyyiS5FnYyvxKzfMtyn2lZ2st9nZGNNgMc9N62r5HgNbdD -FHuRdKKzV+8kQfuMc3mOPpK1t9TFY+QgrxiB5p6S7VooI97YtP3PbfknszCEBEh4PdXYbbaR -3AacN3Q5kYYmWsq3WW6xgrg0mmEGosGvwSQxBBuiXZrxScCa4ivEq05UZwyShePvKduOvnUE -2zDO6IXFLZxhTZAESEm9/FovLgGAiJ7iMGmYvsISLJScwG4n+wrSaQNQXizs9N3ykys54wBN -d/+BQ4F7pncHhDQ2Dyt5MekB8Y8iNOocUTFCu524vQRIaWCXmXP3vU7D21dp0XnAMzRQJ565 -JV3aHRoY7XDa4LePa7PP9ywyafOE5yCW7ndqx3J+2JhTDvSFsW8/q3H3iyeFhykuJVS6BFDK -6CmKbnyyjOfE2iLGJmTFa905V2KrVDCmlEu/xyGMs80yTyZC+ySzM83FMVvLEQmSzcTNUZVp -DfA1kNXbXkPouBXXT6g8r8JCRljaKKABmgRIlMheOJQRUUU4cgvhMreXPayhq5Ao4VMSCkA5 -hYRCBczm4Di/MMohF0SxIsdRY6gY9CPnrBXAsY6h1RbR7Tw0iQZmeXi52DCiBEj0by+SYMAa -9z0CReIzl8JLL6EVIFz8kFxlkGWjr4dnOzhhPOq/mCpp0WxbavDfdhE87MdXJZBnLwoT62QG -955HlAoEQBOGJbcESCgd5XSirZ9Y3AbCfuKOqoMBvEUGn+w/pMaqnGvnr5FZhuBDKrhRXqtx -QsxA//drGUxsrZOuSL/0+fbvo7n2h1Z8Ny86jOvVZAQIAjw2l1Yc5RAESNc9i3I8pKEOVQf/ -UBczJ0NR9aTEF80dRg2lpXwD0ho4N0AvSiVbgxC7cPZHQwIqvq9LHRUs/4n+Vu3SVYU3cAxo -lUTiCGUSlARIF+TD57SI5+RI+MNtnD9rs4E1ml51YoHGWFj3UPriDmY0FKEwIgqtMXMY3fZ9 -Kq8d83bjDzxwbDX7WwR7KbSeJWT42pCz7kM+BEjjPsOnZHuusXT3x2rrsBnYtYsbt98mSFiS -KzTtFmXfkOBbCQdit1P76QnYJ1aXMGs6zP6GypQTadK/zYWvlm38QkVwueaJ0woESKW2pqKA -70h2UMDHOrpepU1lj0YMzmotDHSTU3L909VvUMNg9uqfrQ6mSkb9j5Tl8oF2otOw5EzA1Yda -KPmgsv62RWLYl80wXQRQwG0e/mgG75jp9lOhJdVXqcYbQpS9viwVaVkwH+69mu/bQI4gjoEs -UYX6O71Re2z+cYhcm9UrK+DXuSFBXQOIlAFxKMW4B0apd6fU84FsZLMESOorXE5OE0A2B2ji -J8QI0Exk4hUvWrMNJfUZwFyS7E05xV9ORuX1xmsKqkT4tVR5Nqln4vhvAY860VBoloz0CDkd -8seSBEjeMgRI9FvpYuflIeHg9urkwp6N+1f0DrJJhJY9ZQ0HTQhziJmIfvbEjNqCl7hEC28+ -F8I5tuViLgfSwcFFCvnS6WFoN4X6QdFdqMCbBEjdlI1c+IQGA/IuTDMJYCuQ/v+8BG5ZeWVH -icPZmXfRat9eFK1dGKAJef6+Tf9HPuDjSpDyffrifsp7Dc34lmm7GN1+ON3ZMtwEUNm6epb8 -1RKWjoI7jIKUV/M2p/0eeGSqs4b06KF/VR6dBwsJVL5DpnTsp3MV4j/CAOlRdSPZ5++tsKbM -aplk+ceqQtpEFz1MYTtVV4+rlrWaBEA1okJyNZ5/tNOwM7B+XfOZ0xw+uyVi9v4byTZM2Qds -J+d3YGYLAugTGHISLqQEerD8/gGK+/SL06b2gNedXPHtBAiBKX+Mdy3wFQQIqE9gVgvrFNUE -CKKoTFoMGqnPBAjDPgLCklNfrwQI3Ek1vSq68w8ECBodu2FOZJVkBAgzwjfSr2N9WQQQTCoQ -KkAbrS9tnjXn1I3+ZwQIrPx3eINo/YUECIeYWCFskxlYBAiDUdvZXwD3vgQIkEyZbbZWbUUE -CH4+odl1Isk3BBj68fkqJ0fKJRWVLWuW/O3VE4BOPKwFlaIECFseVTdDUho8BAj+cOKvV2WA -hgQgaXr+wwq+ItblG0Qxz8IVUXX6PV2mIdHwz4SCCvnCsaIECJhBYxdfLI/XBCDswamPn9MR -yXi2HVQBineV+GtWVkIoZ2dCLFB9mQRMoAQI0nUR5a5AOJoECA+AunKlAlx8BAi5RtFeF4g1 -FQQIz/ie+16LlQcECOmNuVg5DXjMBAjH2nkfpXZgWwQIVdLuO/+kuHAECO/5rEHmyI9vBBD4 -16BU4Rd3YerDQnHtrwOQBCCkho1XxK5Maz8KLCNi20wvcGt8wsIXlj2h5q9ITBq7IgQQvKVY -4OfJ7bKbItP2dylwQgQYPIGxwkkbRXNraONYvN19G8UdF35rFOuIBAjf0sKz/618ZQQIxObr -xJkRe0sECIC+ssnjEb2NBBBI+XM4OntVWGsRV9Td3sFgBAinGwIroo8O0gQQMGAwgc9PaLaG -gBCiwSTrYQQIVHjfCQgOtygEUIoraFoANfhZgIShpOd/RRxFU4/7xZR5tMdGoYz/g0thR0lM -+Hi88FtFD4mAh/Oat4Ri8B7bv04aokjN2UHz6nPbHHjZ8zIqpbYTCy043GNZBAhOqjyB2JbD -NwQoR23XCYD9x6E20ChHJRXmaHwyMdYXKl5CUxypl7ois+sy2D7jDukS3wQIsTyyPgJi0GsA -AAAAAAAAAAAA - diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-01.pem b/drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-01.pem deleted file mode 100644 index 9abf00b2f2..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-01.pem +++ /dev/null @@ -1,66 +0,0 @@ ------BEGIN PKCS7----- -MIAGCSqGSIb3DQEHA6CAMIILyAIBADGB8zCB8AIBADCBmTCBkjELMAkGA1UEBhMC -QVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxETAPBgNVBAcTCEJyaXNiYW5lMRowGAYD -VQQKExFDcnlwdHNvZnQgUHR5IEx0ZDEiMCAGA1UECxMZREVNT05TVFJBVElPTiBB -TkQgVEVTVElORzEbMBkGA1UEAxMSREVNTyBaRVJPIFZBTFVFIENBAgIEbjANBgkq -hkiG9w0BAQEFAARAq8xpbzGHqgX9f4ImK/MhlXvIfGWng7c0dC1YbQDww5kH4K0q -VTv/qDl79r0O79M6rFEyq1hGnrelrZD+YzghgzCCCssGCSqGSIb3DQEHATAaBggq -hkiG9w0DAjAOAgIAoAQIn8+kb8zj2JSAggqgxtGA/FLBBRs1wbBPgDCbSG0yCwjJ -NsFg89/k6xuXo8c5YTwsw8+XlIVq03navpew6XxxzY090rD2OJ0t6HA6GqrIpd8W -iSh/Atqn0yfLFmkLqgIAPRfzxUxqUocxLpQsLIFp2YNUGE+yps+UZmIjw/WHfdqr -cWTmSTSvKuy3UkIJZCkGDBpTvqk4BFaHh4oTXEpgpNY+GKxjf9TDN9GQPqQZR7sg -Qki4t2g4/SaqKl6EoJbpOrMHTQ9LYh2O7+XemEdvXjwpdv0kyffHiaVpBBAtthjT -bzsu67v0b9h3uDKim13uyT0wx33GEmmmDsJwf/IPH/8eu2Ck5xaafpXGmFqon4uX -kQtIPaNclFn7/hLxPw2VmBGaC0SYF3U1jyN96EBxdjqy8Aa6ByMXYDW5BcfqniD5 -mYXfw+b81lh1kutxaPaV4YJ9ZlRUW752N7VHo/fG0/fukoe5W9a8kIhgLpygllb/ -GP4oSF4wM6n1/OgRzZj2IWFiobKO4d/tMnh+C+PoEVAuFZcxQwi9GqvsK5OoIjVw -Nx0XcVSOl1TTYS9SwC7ugMBCab73JiruC24pL78YM+NaIpIQ3On4DokJA2ZHtjBj -ZIxF4tKA144RvFN6pBd6TVE5XM6KD/Vh9bjSmujtEAfdQ3TedvKJsbZuu0stErbv -WcRy11I328l557ECU+3eODid62PpuefHp0NTZJGqnYIShBpKRBuyhNxkyjmPfzYc -KgCQ69fRl3QOqCjobPpMHhVV3li1NrxKGedQcM8XcwpsTsPigp+51o2qgKzBNAqv -5kGumHOlMKsRfrs7jZCcSaOuEj97pYx08FLEgF23cav39MOQNUEM1dNU+EYslL4o -3RoSHRjUgPU+2t9c0prS9A/bPBDj/eD8p2kzccB4t1Uyu0s7kJoMwOJ/eNhPY6Eo -qrED5bCgG97D1Giwt0rlMUozQYH6bw9G5qwP+YflmaGF7yKIBacI5EppxZ7SWHk6 -/VpGu8LDn+PdD9b/hm03lT5p5UEky0DyfQzF3mQ/Eds1WHigamtnrAR6BXyG3e+j -JlqZnkLLFzx98xicz8kkjr+gRkMyyiS5FnYyvxKzfMtyn2lZ2st9nZGNNgMc9N62 -r5HgNbdDFHuRdKKzV+8kQfuMc3mOPpK1t9TFY+QgrxiB5p6S7VooI97YtP3Pbfkn -szCEeD3V2G22kdwGnDd0OZGGJlrKt1lusYK4NJphBqLBr8EkMQQbol2a8UnAmuIr -xKtOVGcMkoXj7ynbjr51BNswzuiFxS2cYU2QSb38Wi8uAYCInuIwaZi+whIslJzA -bif7CtJpA1BeLOz03fKTKznjAE13/4FDgXumdweENDYPK3kx6QHxjyI06hxRMUK7 -nbi9aWCXmXP3vU7D21dp0XnAMzRQJ565JV3aHRoY7XDa4LePa7PP9ywyafOE5yCW -7ndqx3J+2JhTDvSFsW8/q3H3iyeFhykuJVS6yugpim58soznxNoixiZkxWvdOVdi -q1QwppRLv8chjLPNMk8mQvskszPNxTFbyxEJks3EzVGVaQ3wNZDV215D6LgV10+o -PK/CQkZY2iigAZqUyF44lBFRRThyC+Eyt5c9rKGrkCjhUxIKQDmFhEIFzObgOL8w -yiEXRLEix1FjqBj0I+esFcCxjqHVFtHtPDSJBmZ5eLnYMKL0by+SYMAa9z0CReIz -l8JLL6EVIFz8kFxlkGWjr4dnOzhhPOq/mCpp0WxbavDfdhE87MdXJZBnLwoT62QG -955HlAoEQBOGJbcoHeV0oq2fWNwGwn7ijqqDAbxFBp/sP6TGqpxr56+RWYbgQyq4 -UV6rcULMQP/3axlMbK2Trki/9Pn276O59odWfDcvOozr1WQCPDaXVhzlENc9i3I8 -pKEOVQf/UBczJ0NR9aTEF80dRg2lpXwD0ho4N0AvSiVbgxC7cPZHQwIqvq9LHRUs -/4n+Vu3SVYU3cAxolUTiCGUSlBfkw+e0iOfkSPjDbZw/a7OBNZpedWKBxlhY91D6 -4g5mNBShMCIKrTFzGN32fSqvHfN24w88cGw1+1sEeym0niVk+NqQs+5DPuM+w6dk -e66xdPfHauuwGdi1ixu33yZIWJIrNO0WZd+Q4FsJB2K3U/vpCdgnVpcwazrM/obK -lBNp0r/Nha+WbfxCRXC55onTCqW2pqKA70h2UMDHOrpepU1lj0YMzmotDHSTU3L9 -09VvUMNg9uqfrQ6mSkb9j5Tl8oF2otOw5EzA1YdaKPmgsv62RWLYl80wXcBtHv5o -Bu+Y6fZToSXVV6nGG0KUvb4sFWlZMB/uvZrv20COII6BLFGF+ju9UXts/nGIXJvV -Kyvg17khQV0DiJQBcSjFuAdGqXen1POBbGSz6itcTk4TQDYHaOInxAjQTGTiFS9a -sw0l9RnAXJLsTTnFX05G5fXGawqqRPi1VHk2qWfi+G8BjzrRUGiWjPQIOR3yx5IE -SN4y9FvpYuflIeHg9urkwp6N+1f0DrJJhJY9ZQ0HTQhziJmIfvbEjNqCl7hEC28+ -F8I5tuViLgfSwcFFCvnS6WFoN4X6QdFdqMCb3ZSNXPiEBgPyLkwzCWArkP7/vARu -WXllR4nD2Zl30WrfXhStXRigCXn+vk3/Rz7g40qQ8n364n7Kew3N+JZpuxjdfjjd -2TLc2bp6lvzVEpaOgjuMgpRX8zan/R54ZKqzhvTooX9VHp0HCwlUvkOmdOyncxXi -P8IA6VF1I9nn762wpsxqmWT5x6pC2kQXPUxhO1VXj6uWtZo1okJyNZ5/tNOwM7B+ -XfOZ0xw+uyVi9v4byTZM2QdsJ+d3YGYLAugTGHISLqQEerD8/gGK+/SL06b2gNed -XPHtgSl/jHct8BWoT2BWC+sU1aKoTFoMGqnPwz4CwpJTX6/cSTW9KrrzDxodu2FO -ZJVkM8I30q9jfVlMKhAqQButL22eNefUjf5nrPx3eINo/YWHmFghbJMZWINR29lf -APe+kEyZbbZWbUV+PqHZdSLJN/rx+SonR8olFZUta5b87dUTgE48rAWVolseVTdD -Uho8/nDir1dlgIZpev7DCr4i1uUbRDHPwhVRdfo9XaYh0fDPhIIK+cKxophBYxdf -LI/X7MGpj5/TEcl4th1UAYp3lfhrVlZCKGdnQixQfZkETKDSdRHlrkA4mg+AunKl -Alx8uUbRXheINRXP+J77XouVB+mNuVg5DXjMx9p5H6V2YFtV0u47/6S4cO/5rEHm -yI9v+NegVOEXd2Hqw0Jx7a8DkKSGjVfErkxrPwosI2LbTC9wa3zCwheWPaHmr0hM -GrsivKVY4OfJ7bKbItP2dylwQjyBscJJG0Vza2jjWLzdfRvFHRd+axTriN/SwrP/ -rXxlxObrxJkRe0uAvrLJ4xG9jUj5czg6e1VYaxFX1N3ewWCnGwIroo8O0jBgMIHP -T2i2hoAQosEk62FUeN8JCA63KIoraFoANfhZgIShpOd/RRxFU4/7xZR5tMdGoYz/ -g0thR0lM+Hi88FtFD4mAh/Oat4Ri8B7bv04aokjN2UHz6nPbHHjZ8zIqpbYTCy04 -3GNZTqo8gdiWwzdHbdcJgP3HoTbQKEclFeZofDIx1hcqXkJTHKmXuiKz6zLYPuMO -6RLfsTyyPgJi0GsAAAAA ------END PKCS7----- diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-02 b/drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-02 deleted file mode 100644 index 7017055965..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-02 +++ /dev/null @@ -1,90 +0,0 @@ - -MIAGCSqGSIb3DQEHA6CAMIACAQAxggHCMIHMAgEAMHYwYjERMA8GA1UEBxMISW50ZXJuZXQxFzAV -BgNVBAoTDlZlcmlTaWduLCBJbmMuMTQwMgYDVQQLEytWZXJpU2lnbiBDbGFzcyAxIENBIC0gSW5k -aXZpZHVhbCBTdWJzY3JpYmVyAhBgQJiC3qfbCbjdj5INYLnKMA0GCSqGSIb3DQEBAQUABEACr4tn -kSzvo3aIlHfJLGbfokNCV6FjdDP1vQhL+kdXONqcFCEf9ReETCvaHslIr/Wepc5j2hjZselzgqLn -rM1ZMIHwAgEAMIGZMIGSMQswCQYDVQQGEwJBVTETMBEGA1UECBMKUXVlZW5zbGFuZDERMA8GA1UE -BxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29mdCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNU -UkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQDExJERU1PIFpFUk8gVkFMVUUgQ0ECAgRuMA0GCSqG -SIb3DQEBAQUABEBanBxKOvUoRn3DiFY55lly2TPu2Cv+dI/GLrzW6qvnUMZPWGPGaUlPyWLMZrXJ -xGXZUiRJKTBwDu91fnodUEK9MIAGCSqGSIb3DQEHATAaBggqhkiG9w0DAjAOAgIAoAQImxKZEDWP -EuOggASCBACBi1bX/qc3geqFyfRpX7JyIo/g4CDr62GlwvassAGlIO8zJ5Z/UDIIooeV6QS4D4OW -PymKd0WXhwcJI0yBcJTWEoxND27LM7CWFJpA07AoxVCRHTOPgm794NynLecNUOqVTFyS4CRuLhVG -PAk0nFZG/RE2yMtx4rAkSiVgOexES7wq/xWuoDSSmuTMNQOTbKfkEKqdFLkM/d62gD2wnaph7vKk -PPK82wdZP8rF3nUUC5c4ahbNoa8g+5B3tIF/Jz3ZZK3vGLU0IWO+i7W451dna13MglDDjXOeikNl -XLsQdAVo0nsjfGu+f66besJojPzysNA+IEZl6gNWUetl9lim4SqrxubUExdS2rmXnXXmEuEW/HC7 -dlTAeYq5Clqx5id6slhC2C2oegMww3XH9yxHw6OqzvXY6pVPEScEtBMQLgaKFQT+m2SRtbTVFG7c -QcnUODyVB1IbpQTF1DHeeOX1W/HfpWZym8dzkti6SCyeumHmqO406xDiIMVKtHOqM86nEHuAMZsr -cLy+ey6TEJvR6S4N8QRzng8JJDZDTJXQN6q84aEudsnOrw2KyOVwPpI6ey4qBsHUgQ8kAFy5lsQa -WV45h6exgUwbBcKLgPZGFj+OdD2RKJsTb83/UqbJS5Q/lGXhzBlnaYucyJxEprRxbntmcnOEPFJe -+tRDUwOTd7qlJljdhIJL+uDcooL9Ahgo6Cwep6tduekv2cSEohJeTE8Dvy34YRhMbLvnFNdmnpNy -rNZDYVVxxaKoyd2AfB8NPFZh1VdAYfI3R1QAQ2kXEef5NNIfVQfMzD9akJn4RP+Kv32Qaxm4FrnK -xmwRyGJShavIBc2ax+F1r1+NZXuSBHn5vfoRTxOk0ST4dXsw74dnlYUMRaSu4qqUdM9jsXSyeX4Z -gQgkR2bkaYO6ezFgenFIa7QWVw8rXZAEZ5aibCxbnY1VE41PYIvhlLdbFJhH9gY22s+fFAuwnzyA -SRjC40A9aAEItRlaPStWSGiqlLRgNkBBwdpv2l2YPBd2QzHx6ek6XGrvRJuAC+Nh62rtQKwpNH54 -YAOHW55maBFW2SQ3TF+cZ6NbbqhCmHTyyR7mcSYc9sXSVDWEhYKQ1iyU870zhHWVpvglZizZetJC -ZFjYex3b1ngVdcgargOvpPq9urCKKi2mbkqv/EFpzSWGXkKSpfCG/XfMnEOtkNrB8S06vnk2JcJB -OBqJot+uuSH5hOg0vTpxX2DuONJSiWSWyfRE/lTfJJFXwhod7SXclUyXPeSyibcSic2hVAzDmwjD -31js/j2k02PI/agPhr3UQ8cMgcNAiaoCKbNaWfn6BGbCAbTchxzUlo2cSJiLlrX2IDZmfXbXmZCo -m1smWIG+BIIEALiuAxDb6dWLAYyVBoN9hYI4AiPeZAY9MtvQ6AV8o2/EFm6PvYGXy3Hei5830CH0 -PBeX7Kdd6ff1y33TW/l5qSkIL1ULTGR7okFfJePHDmq1dFt6/JOMptiQ8WSu7CsJQvZ9VTFXeYFc -ZqCPPZc1NrPegNK70Zf9QxWIbDAevJ5KLBf1c6j8pU2/6LnvDY6VjaTvYSgr7vTR8eVzH4Rm77W0 -iOHxg5VcODv6cGSVyuvbX8UAGo8Cmb58ERDtBDJBQXVpWKLNAuDJ9GX8n2zNkpjZLbPSkcmuhqGa -BJBE/BaCTkUQWlY9dIbRtEnxIU1mfbPPdx1Ppa8DqGDjSOsQdKcKYNNZtayEw++EIpmpdBNsKphC -fB8UEK2Wkk4ZVW+qyGoi/r0MFsvO1NmSOOZ0o/jy/YHmoeURHhPy97AO3eVTkEAa5CfJEJybmo56 -7CDw/FwoGAUCgsoz7rlxzMudr/IhHIH+APinncxXlHO2ecvHD9i8DaHGA8tVifgsUhqQoZieULut -eF94O5UAxOkv41UZssYTwN4nYrN1QkesZl3BX4ORS4EE30/PQ23ARf3WZptZrCJevGm2ZYzGeh8x -g17mCDfiLO+bff4qP/4mC96Pu4ia6j4to5BwKIJS/+DCuoD8WeSKF4pugXQkMUiHdQnNnVP9Sp2O -/4ly5mO8JzrQC59V2bnTNBqPhpno8kfJvK5TypPSVC+bTzern3rJ6UceB3srcn9zxKx9GdNydJQj -yWjv8ec3n3d1nuQwhz5Q053NBhIjwoGg3Go7LO6i78ZOlpF7dcoAO13NfHLyNjnyHCaiWtVRTct9 -rLf5vN00urSn8YJngHk1eTKK8nHGIcOg6YdYDOD2nE5XwRijKmieG8Xa3eKRzfbL06GrBQENle6J -mC131bp3cRVxpjq+o6RAbGoMm4yICsL4eTarCQrsyHmoPHqr91UHo91avyxU7knWmEhX27ybmsrs -8aeZwPHixL14TeyhruCqRVvkf1Ks7P+z8MPUboGNqQe2WLN8ktCGEr15O8MJR/em86G03Jfo4oaw -/DVUH5RwLT6acedOGuzMh/2r8BcmemhVQ8/cWvV4YJ0tOW4hzyVHC5hQf8sZ3LzxXLH6Ohnrbprh -xvrdbaSdChWZDDP0bCCbxEhkwuBkBeKZrMbwRTP+TPTPYLVTH/CmKLzKh/114tkGkyO3hHS4qExU -V39F2Sj4mylx+hD0+20D9pntpNi7htccGlOm6yNM69at/3+kLgJJyoIlaxLcCUYHNMifDt+T3p/t -5U4XmD53uUQ6M8dvj/udqPekNSUfse15yrd9pjOt5PcJuqW28q0sFHf9pHIgz3XZFMe5PD7ppw6r -S+C6Ir4PrYIEggQA7ZDVtiCm+BbtNNB/UJm79/OQ5mp5bTI0kPmDeycaWTa0Ojpum+c/dpG/iJOB -DICj7jHOXSHT7JlGyX6aSFJUltucAnZvwzhPDmdDaIDiKSk85GqgdDWVfGosSCX9Ph/T3WpIxnwf -WSDRtIHkWTjly+pe4yy5K6/XISy/L5Zh/fhiI5fjHjgzmlibs2ru4nVw6hBhUvlSSe2BEs5d9h/y -NH8Wy3qvb2D3jh7hkepFtZJGNTHp8ZUC7Ns2JIpQYObsaxdI65i3mMOu7fRwI+0/4ejsWhP6KCEi -LgwvLg0qM82ma6YB7qHAHboaczRVEffDcJUG4a5uycB0DoZFn+uEaEFyili20hCn4hVfsqUQk2PT -8Mo1tSl5e30xI1YJZrRgiJm9nHRX6fLizngP+ILJLPHZsPvlSVIfY+/v/FR8feKOjaGhyGF51BAx -aM2NIQ4jMP5/X+U5gQybi0E6u7rroDhaHsKmCMgXqszwXWCpedA/sEbeHpiTC59YlPPSlIOMc9vP -Ko/mQCfWy/9icUaIfKQldvkllUxxNkqu6AbIpHVscbAEzSPs5xbQXU8EZNNCDisFnnpY3nQ3eLnl -m89saTJxRb7NWHRMlmPv7qgD7uMIq3vdOGA7i5wT9MeoNIgK1/DsgH30s6RWjJy4YyyLmRTXPzbj -hbQVpEmiMRbEidIvUx2OjKVxVQIcgtLsa2lvHQ4XL1cpLr5GVtOgy0fMg5OCDUUDsvjgjgLQ3P2U -p2nVY5FM6/QpPc5DTLuuR9ekI2/c9Biz09RtcYDUQK2ajdo8h1IyKqHFoB7h48OXxXKKY94DY0TG -x6PonB/epj8orAw4QKmm5M0vXYwBOqRymCTHTqOJGObdLx1euFFyqguzHJOU2gAGZI0z9Lg1yRuF -yhdPZyuniIcmtLNxRZ1duYHErcAyX56qndmLXt7UVkATai/rIMuoJLfAsUnVuTUS5p7tJM754UZT -7lTcXvDJgOUNnBRaIcxC3pxvbrYDJ2iFJ72xkxUP2p74gucqg25XnCVmQuLg6zDDxF6CLuw9isxy -Xg4pkneMN//7fpp8GYl9nyZm2yqYYM+jcw0fcVc64L+X4w/gL3H2UMGgxIHSJp7HIG7VKHtXrNyj -dPXXPVUsMsAAimqOr0Lr2sZWirfuivLaPTqhbkvG5PF7K3gT80AOIcd/6EIHBy2hZ7ukfjHmdP4L -yQOhTQklaKzGHI0mypq0uFLWJOUlZnVrMiLP1xrWkpC8Ro9eo6mfjjQ45z8adC43a47klwTEzvod -3rNEFIGJJUEjAN3mbqie7IxoSJknBBJK0D9lZEQ8lZWlq7vuN8JdqPM6xh155jMVsPwjLK6Tzkj5 -BpRD9Tgm3u6HPQSCBADgkWEN75Mu9TGosXY0xm1k6K6sPv8L949CrLWo4r1I2LA072bTGvQP28Vs -hUA76jgcT1ocC++9PoktIK10YCq5w+FfMAQ04KeCXuAdmiY2iAT4Slea61PMCMta3mVGyLUZCLEm -P+I0UKR5mlO0fGEcjU9j8TmbjZqxNFqloLsU7oSi7Os0EtYHkdAVrExUyOc/ZDie6fBjdLTmLdCm -bE9JNwjlbXypdTZupGgLNhKGDIskUAAMwZYayI6YfSIMkNCeAYTnjOuGZZ1msCXGXsfMBR1sfUIj -9UeGjwD8gq+UVVHX/oeoH/m0eJ5ppqi3+nUlgc9DvpYsC/Fg0G2KuYb9B+VJ+a4GMzQSPREoFtQp -B9dtLkBb7Ha/hpGWTIdqzW0eAo5llyN8FNvl2Fu2IcLaNmWFO69gLjRKQopp0dvFOuwAVI6fvGDj -p1WigoNbFZl8N+iiWmzKOjoG2ZLbez1clZCms/JPJrXhEMMOxWpVzkQyN336VWHmGgMcjaKCGSeA -2nnESIGuiCXMrkHlGfabYIsKcHFCo2t13uXyZPf0zSPTkuD0Eh92wqC9pvA3gvrrCUfo9Mn3bs+e -KWKmDlpcs8mDn032oIg+zrQhIduMqXVn3evzeVM3B5MBOGMvg51/SXg7R+MC/463juQQEb9IVe/I -YGnO//oWm9lw/377Af/qH+FnN02obJw1FvesQIs9e5RHNQykKbO+vmVJQl1nd9DZWrHDNO7/80Yz -2hCm7Tws5nSRN2iFlyRaYJHr7ypxkU2rCak2r6ua7XDwu1qU2RT3+qPjT1RuxQ2oTlHyGkKPMZGC -Rc+CSWz5aeeCmHZVwdb3nC8YpfsujMiYqygLeuQ82pjKuR7DIKGmnfcOLdv5F+Ek2Wyy0D98iSgk -+aoQGYLhL9llU13pn21uRsDY5uGcXiIw1IETFlTdgENEv8futZuJsegrp7fmFXyNoNyFNyypeDrM -6ZqR4vKxFjg3tKKeVpkw/W4EAklzMxmNiazGNDBHsnYV3rwPlKa+HeeE2YxnsKwGLCNgRYUXTaJk -461vS160z3dvh/mLfdZ7MYCkmO3bNE3ELUDAw7YQkSuo9ujzdFKte9LC34sjg9fOex3ThAg5Y50n -wYm4zBmGM7yEqL8O6QgnM6tIDFS9XryDaLNzcGhMWqMvhzO6sC/AA2WfLgwS517Cp03IkJQWqG9q -w52+E+GAtpioJfczEhlv9BrhjttdugRSjJrG8SYVYE4zG3Aur5eNBoGaALIOHOtPw8+JovQmIWcF -oaJ/WQuglFrWtew51IK6F8RiHAOBVavZOuZcO7tV+5enVfreOd0rX8ZOy4hYmHhmF1hOrrWOn+Ee -E0SYKonXN01BM9xMBIIBSLCvNAppnGPTUGjwbMJRg1VJ2KMiBWH5oJp8tyfIAxMuWFdtaLYbRSOD -XbOAshPVK8JAY8DQDkzqaCTAkLTfSRAt9yY6SbUpMsRv7xa8nMZNJBJzJT9b/wNjgiOJgaGuJMkV -2g/DX2jfP3PrMM/Sbnz7edORXHj1Pa5XTT8nG5MS0FuZgvevdq3o/gVVAz+ZCKOH3ShMzZvfp01l -SX5gaJTflmU6cdNwtn2yZ6IScF7OrjUeA9iEoSVR9dQcA+4lB3RAG3LMwcnxXY35D7+PMJzHIZdF -cSnq+n03ACY2/E/T31iijRH29rvYHGI+mP/ieYs45iq4fTWo6i1HofeWLdP0fX7xW3XO0/hWYFiw -BxKu66whAbRhaib3XJNvetVs25ToYXyiDpjG+cd5rCMei8sGQwTBj9Zeh0URoeMW1inTP0JvCmMU -rZgAAAAAAAAAAAAA - diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-02.pem b/drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-02.pem deleted file mode 100644 index 279c5d830b..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-enc-02.pem +++ /dev/null @@ -1,106 +0,0 @@ ------BEGIN PKCS7----- -MIAGCSqGSIb3DQEHA6CAMIITQAIBADGCAcIwgcwCAQAwdjBiMREwDwYDVQQHEwhJ -bnRlcm5ldDEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xNDAyBgNVBAsTK1ZlcmlT -aWduIENsYXNzIDEgQ0EgLSBJbmRpdmlkdWFsIFN1YnNjcmliZXICEGBAmILep9sJ -uN2Pkg1gucowDQYJKoZIhvcNAQEBBQAEQAKvi2eRLO+jdoiUd8ksZt+iQ0JXoWN0 -M/W9CEv6R1c42pwUIR/1F4RMK9oeyUiv9Z6lzmPaGNmx6XOCoueszVkwgfACAQAw -gZkwgZIxCzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQH -EwhCcmlzYmFuZTEaMBgGA1UEChMRQ3J5cHRzb2Z0IFB0eSBMdGQxIjAgBgNVBAsT -GURFTU9OU1RSQVRJT04gQU5EIFRFU1RJTkcxGzAZBgNVBAMTEkRFTU8gWkVSTyBW -QUxVRSBDQQICBG4wDQYJKoZIhvcNAQEBBQAEQFqcHEo69ShGfcOIVjnmWXLZM+7Y -K/50j8YuvNbqq+dQxk9YY8ZpSU/JYsxmtcnEZdlSJEkpMHAO73V+eh1QQr0wghFz -BgkqhkiG9w0BBwEwGgYIKoZIhvcNAwIwDgICAKAECJsSmRA1jxLjgIIRSIGLVtf+ -pzeB6oXJ9GlfsnIij+DgIOvrYaXC9qywAaUg7zMnln9QMgiih5XpBLgPg5Y/KYp3 -RZeHBwkjTIFwlNYSjE0PbsszsJYUmkDTsCjFUJEdM4+Cbv3g3Kct5w1Q6pVMXJLg -JG4uFUY8CTScVkb9ETbIy3HisCRKJWA57ERLvCr/Fa6gNJKa5Mw1A5Nsp+QQqp0U -uQz93raAPbCdqmHu8qQ88rzbB1k/ysXedRQLlzhqFs2hryD7kHe0gX8nPdlkre8Y -tTQhY76LtbjnV2drXcyCUMONc56KQ2VcuxB0BWjSeyN8a75/rpt6wmiM/PKw0D4g -RmXqA1ZR62X2WKbhKqvG5tQTF1LauZeddeYS4Rb8cLt2VMB5irkKWrHmJ3qyWELY -Lah6AzDDdcf3LEfDo6rO9djqlU8RJwS0ExAuBooVBP6bZJG1tNUUbtxBydQ4PJUH -UhulBMXUMd545fVb8d+lZnKbx3OS2LpILJ66Yeao7jTrEOIgxUq0c6ozzqcQe4Ax -mytwvL57LpMQm9HpLg3xBHOeDwkkNkNMldA3qrzhoS52yc6vDYrI5XA+kjp7LioG -wdSBDyQAXLmWxBpZXjmHp7GBTBsFwouA9kYWP450PZEomxNvzf9SpslLlD+UZeHM -GWdpi5zInESmtHFue2Zyc4Q8Ul761ENTA5N3uqUmWN2Egkv64Nyigv0CGCjoLB6n -q1256S/ZxISiEl5MTwO/LfhhGExsu+cU12aek3Ks1kNhVXHFoqjJ3YB8Hw08VmHV -V0Bh8jdHVABDaRcR5/k00h9VB8zMP1qQmfhE/4q/fZBrGbgWucrGbBHIYlKFq8gF -zZrH4XWvX41le5IEefm9+hFPE6TRJPh1ezDvh2eVhQxFpK7iqpR0z2OxdLJ5fhmB -CCRHZuRpg7p7MWB6cUhrtBZXDytdkARnlqJsLFudjVUTjU9gi+GUt1sUmEf2Bjba -z58UC7CfPIBJGMLjQD1oAQi1GVo9K1ZIaKqUtGA2QEHB2m/aXZg8F3ZDMfHp6Tpc -au9Em4AL42Hrau1ArCk0fnhgA4dbnmZoEVbZJDdMX5xno1tuqEKYdPLJHuZxJhz2 -xdJUNYSFgpDWLJTzvTOEdZWm+CVmLNl60kJkWNh7HdvWeBV1yBquA6+k+r26sIoq -LaZuSq/8QWnNJYZeQpKl8Ib9d8ycQ62Q2sHxLTq+eTYlwkE4Gomi3665IfmE6DS9 -OnFfYO440lKJZJbJ9ET+VN8kkVfCGh3tJdyVTJc95LKJtxKJzaFUDMObCMPfWOz+ -PaTTY8j9qA+GvdRDxwyBw0CJqgIps1pZ+foEZsIBtNyHHNSWjZxImIuWtfYgNmZ9 -dteZkKibWyZYgb64rgMQ2+nViwGMlQaDfYWCOAIj3mQGPTLb0OgFfKNvxBZuj72B -l8tx3oufN9Ah9DwXl+ynXen39ct901v5eakpCC9VC0xke6JBXyXjxw5qtXRbevyT -jKbYkPFkruwrCUL2fVUxV3mBXGagjz2XNTaz3oDSu9GX/UMViGwwHryeSiwX9XOo -/KVNv+i57w2OlY2k72EoK+700fHlcx+EZu+1tIjh8YOVXDg7+nBklcrr21/FABqP -Apm+fBEQ7QQyQUF1aViizQLgyfRl/J9szZKY2S2z0pHJroahmgSQRPwWgk5FEFpW -PXSG0bRJ8SFNZn2zz3cdT6WvA6hg40jrEHSnCmDTWbWshMPvhCKZqXQTbCqYQnwf -FBCtlpJOGVVvqshqIv69DBbLztTZkjjmdKP48v2B5qHlER4T8vewDt3lU5BAGuQn -yRCcm5qOeuwg8PxcKBgFAoLKM+65cczLna/yIRyB/gD4p53MV5RztnnLxw/YvA2h -xgPLVYn4LFIakKGYnlC7rXhfeDuVAMTpL+NVGbLGE8DeJ2KzdUJHrGZdwV+DkUuB -BN9Pz0NtwEX91mabWawiXrxptmWMxnofMYNe5gg34izvm33+Kj/+Jgvej7uImuo+ -LaOQcCiCUv/gwrqA/FnkiheKboF0JDFIh3UJzZ1T/Uqdjv+JcuZjvCc60AufVdm5 -0zQaj4aZ6PJHybyuU8qT0lQvm083q596yelHHgd7K3J/c8SsfRnTcnSUI8lo7/Hn -N593dZ7kMIc+UNOdzQYSI8KBoNxqOyzuou/GTpaRe3XKADtdzXxy8jY58hwmolrV -UU3Lfay3+bzdNLq0p/GCZ4B5NXkyivJxxiHDoOmHWAzg9pxOV8EYoyponhvF2t3i -kc32y9OhqwUBDZXuiZgtd9W6d3EVcaY6vqOkQGxqDJuMiArC+Hk2qwkK7Mh5qDx6 -q/dVB6PdWr8sVO5J1phIV9u8m5rK7PGnmcDx4sS9eE3soa7gqkVb5H9SrOz/s/DD -1G6BjakHtlizfJLQhhK9eTvDCUf3pvOhtNyX6OKGsPw1VB+UcC0+mnHnThrszIf9 -q/AXJnpoVUPP3Fr1eGCdLTluIc8lRwuYUH/LGdy88Vyx+joZ626a4cb63W2knQoV -mQwz9Gwgm8RIZMLgZAXimazG8EUz/kz0z2C1Ux/wpii8yof9deLZBpMjt4R0uKhM -VFd/Rdko+JspcfoQ9PttA/aZ7aTYu4bXHBpTpusjTOvWrf9/pC4CScqCJWsS3AlG -BzTInw7fk96f7eVOF5g+d7lEOjPHb4/7naj3pDUlH7Htecq3faYzreT3CbqltvKt -LBR3/aRyIM912RTHuTw+6acOq0vguiK+D62C7ZDVtiCm+BbtNNB/UJm79/OQ5mp5 -bTI0kPmDeycaWTa0Ojpum+c/dpG/iJOBDICj7jHOXSHT7JlGyX6aSFJUltucAnZv -wzhPDmdDaIDiKSk85GqgdDWVfGosSCX9Ph/T3WpIxnwfWSDRtIHkWTjly+pe4yy5 -K6/XISy/L5Zh/fhiI5fjHjgzmlibs2ru4nVw6hBhUvlSSe2BEs5d9h/yNH8Wy3qv -b2D3jh7hkepFtZJGNTHp8ZUC7Ns2JIpQYObsaxdI65i3mMOu7fRwI+0/4ejsWhP6 -KCEiLgwvLg0qM82ma6YB7qHAHboaczRVEffDcJUG4a5uycB0DoZFn+uEaEFyili2 -0hCn4hVfsqUQk2PT8Mo1tSl5e30xI1YJZrRgiJm9nHRX6fLizngP+ILJLPHZsPvl -SVIfY+/v/FR8feKOjaGhyGF51BAxaM2NIQ4jMP5/X+U5gQybi0E6u7rroDhaHsKm -CMgXqszwXWCpedA/sEbeHpiTC59YlPPSlIOMc9vPKo/mQCfWy/9icUaIfKQldvkl -lUxxNkqu6AbIpHVscbAEzSPs5xbQXU8EZNNCDisFnnpY3nQ3eLnlm89saTJxRb7N -WHRMlmPv7qgD7uMIq3vdOGA7i5wT9MeoNIgK1/DsgH30s6RWjJy4YyyLmRTXPzbj -hbQVpEmiMRbEidIvUx2OjKVxVQIcgtLsa2lvHQ4XL1cpLr5GVtOgy0fMg5OCDUUD -svjgjgLQ3P2Up2nVY5FM6/QpPc5DTLuuR9ekI2/c9Biz09RtcYDUQK2ajdo8h1Iy -KqHFoB7h48OXxXKKY94DY0TGx6PonB/epj8orAw4QKmm5M0vXYwBOqRymCTHTqOJ -GObdLx1euFFyqguzHJOU2gAGZI0z9Lg1yRuFyhdPZyuniIcmtLNxRZ1duYHErcAy -X56qndmLXt7UVkATai/rIMuoJLfAsUnVuTUS5p7tJM754UZT7lTcXvDJgOUNnBRa -IcxC3pxvbrYDJ2iFJ72xkxUP2p74gucqg25XnCVmQuLg6zDDxF6CLuw9isxyXg4p -kneMN//7fpp8GYl9nyZm2yqYYM+jcw0fcVc64L+X4w/gL3H2UMGgxIHSJp7HIG7V -KHtXrNyjdPXXPVUsMsAAimqOr0Lr2sZWirfuivLaPTqhbkvG5PF7K3gT80AOIcd/ -6EIHBy2hZ7ukfjHmdP4LyQOhTQklaKzGHI0mypq0uFLWJOUlZnVrMiLP1xrWkpC8 -Ro9eo6mfjjQ45z8adC43a47klwTEzvod3rNEFIGJJUEjAN3mbqie7IxoSJknBBJK -0D9lZEQ8lZWlq7vuN8JdqPM6xh155jMVsPwjLK6Tzkj5BpRD9Tgm3u6HPeCRYQ3v -ky71MaixdjTGbWTorqw+/wv3j0KstajivUjYsDTvZtMa9A/bxWyFQDvqOBxPWhwL -770+iS0grXRgKrnD4V8wBDTgp4Je4B2aJjaIBPhKV5rrU8wIy1reZUbItRkIsSY/ -4jRQpHmaU7R8YRyNT2PxOZuNmrE0WqWguxTuhKLs6zQS1geR0BWsTFTI5z9kOJ7p -8GN0tOYt0KZsT0k3COVtfKl1Nm6kaAs2EoYMiyRQAAzBlhrIjph9IgyQ0J4BhOeM -64ZlnWawJcZex8wFHWx9QiP1R4aPAPyCr5RVUdf+h6gf+bR4nmmmqLf6dSWBz0O+ -liwL8WDQbYq5hv0H5Un5rgYzNBI9ESgW1CkH120uQFvsdr+GkZZMh2rNbR4CjmWX -I3wU2+XYW7Yhwto2ZYU7r2AuNEpCimnR28U67ABUjp+8YOOnVaKCg1sVmXw36KJa -bMo6OgbZktt7PVyVkKaz8k8mteEQww7FalXORDI3ffpVYeYaAxyNooIZJ4DaecRI -ga6IJcyuQeUZ9ptgiwpwcUKja3Xe5fJk9/TNI9OS4PQSH3bCoL2m8DeC+usJR+j0 -yfduz54pYqYOWlyzyYOfTfagiD7OtCEh24ypdWfd6/N5UzcHkwE4Yy+DnX9JeDtH -4wL/jreO5BARv0hV78hgac7/+hab2XD/fvsB/+of4Wc3TahsnDUW96xAiz17lEc1 -DKQps76+ZUlCXWd30NlascM07v/zRjPaEKbtPCzmdJE3aIWXJFpgkevvKnGRTasJ -qTavq5rtcPC7WpTZFPf6o+NPVG7FDahOUfIaQo8xkYJFz4JJbPlp54KYdlXB1vec -Lxil+y6MyJirKAt65DzamMq5HsMgoaad9w4t2/kX4STZbLLQP3yJKCT5qhAZguEv -2WVTXemfbW5GwNjm4ZxeIjDUgRMWVN2AQ0S/x+61m4mx6Cunt+YVfI2g3IU3LKl4 -OszpmpHi8rEWODe0op5WmTD9bgQCSXMzGY2JrMY0MEeydhXevA+Upr4d54TZjGew -rAYsI2BFhRdNomTjrW9LXrTPd2+H+Yt91nsxgKSY7ds0TcQtQMDDthCRK6j26PN0 -Uq170sLfiyOD1857HdOECDljnSfBibjMGYYzvISovw7pCCczq0gMVL1evINos3Nw -aExaoy+HM7qwL8ADZZ8uDBLnXsKnTciQlBaob2rDnb4T4YC2mKgl9zMSGW/0GuGO -2126BFKMmsbxJhVgTjMbcC6vl40GgZoAsg4c60/Dz4mi9CYhZwWhon9ZC6CUWta1 -7DnUgroXxGIcA4FVq9k65lw7u1X7l6dV+t453Stfxk7LiFiYeGYXWE6utY6f4R4T -RJgqidc3TUEz3EywrzQKaZxj01Bo8GzCUYNVSdijIgVh+aCafLcnyAMTLlhXbWi2 -G0Ujg12zgLIT1SvCQGPA0A5M6mgkwJC030kQLfcmOkm1KTLEb+8WvJzGTSQScyU/ -W/8DY4IjiYGhriTJFdoPw19o3z9z6zDP0m58+3nTkVx49T2uV00/JxuTEtBbmYL3 -r3at6P4FVQM/mQijh90oTM2b36dNZUl+YGiU35ZlOnHTcLZ9smeiEnBezq41HgPY -hKElUfXUHAPuJQd0QBtyzMHJ8V2N+Q+/jzCcxyGXRXEp6vp9NwAmNvxP099Yoo0R -9va72BxiPpj/4nmLOOYquH01qOotR6H3li3T9H1+8Vt1ztP4VmBYsAcSruusIQG0 -YWom91yTb3rVbNuU6GF8og6YxvnHeawjHovLBkMEwY/WXodFEaHjFtYp0z9Cbwpj -FK2YAAAAAA== ------END PKCS7----- diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-s-a-e b/drivers/builtin_openssl2/crypto/pkcs7/t/msie-s-a-e deleted file mode 100644 index 0067794d70..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-s-a-e +++ /dev/null @@ -1,91 +0,0 @@ - -MIAGCSqGSIb3DQEHA6CAMIACAQAxggHCMIHMAgEAMHYwYjERMA8GA1UEBxMISW50ZXJuZXQxFzAV -BgNVBAoTDlZlcmlTaWduLCBJbmMuMTQwMgYDVQQLEytWZXJpU2lnbiBDbGFzcyAxIENBIC0gSW5k -aXZpZHVhbCBTdWJzY3JpYmVyAhBgQJiC3qfbCbjdj5INYLnKMA0GCSqGSIb3DQEBAQUABECjscaS -G0U299fqiEAgTqTFQBp8Ai6zzjl557cVb3k6z4QZ7CbqBjSXAjLbh5e7S5Hd/FrFcDnxl1Ka06ha -VHGPMIHwAgEAMIGZMIGSMQswCQYDVQQGEwJBVTETMBEGA1UECBMKUXVlZW5zbGFuZDERMA8GA1UE -BxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29mdCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNU -UkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQDExJERU1PIFpFUk8gVkFMVUUgQ0ECAgRuMA0GCSqG -SIb3DQEBAQUABECsyHXZ1xaiv0UQRvOmVYsaF38AL2XX75wxbCsz5/wOg7g3RP4aicZxaR4sBog0 -f2G1o9om/hu+A0rIYF/L4/GUMIAGCSqGSIb3DQEHATAaBggqhkiG9w0DAjAOAgIAoAQIsozQrnwj -cc2ggASCBAAQz/LPoJe/+iYWeTwSebz6Q9UeKZzQ2UWm7GLtEM3s3c9SCvpmkwIRdEhLjWaBJMyI -DiL7t1I1vMf9inB8LXgAcIEYkpNScjS8ERA9Ebb7ieNKSBg7w7B8ATHFxLSlDADqRgoZrB1Ctfgf -ximp3EgxTgnhtyQhZxXW7kBQyFRwumplrJXOp7albP7IothrOKncw30IJT1fwPxWNMItI9juXF0U -CbWVSjPzGBo4+XNXMvUO6MplOQEz/ywEQ9E8OZAQex1Zw9qq5ppsXB2pMsYV5sLJGikukMYKquiz -3YK+tN6J8ahLcDUs+VGwqvZi17gpBTlbEP+ZmXJpnO63t1yTEB0V5AZcRKWUOhzlCBM5YUagqNoY -cpsmSvOK6bYzkUKOrzWpDCAtGZ/Dvul5dTZZmxs2WpM+iyeHXMxO3huy8K1brPTqt1f1sHhuq1jD -1eXedaCjIgUW9qV18vNAQCof/Yb6T/1fxztf/jD7pPLQJ+7LJkKCAEHGcaizpoKqhYcttaEhLq1G -O+Ohqf7yFegMdTJ3wwP324w5ZYSU5fLo2Z34/Edf6EGvXyTIqVfAmEBALd6JGVdN5GlYYTxrL+eO -P80Z4ao4YKoxwEmRp5bmQsQ8B29QhOFKmC6eiG5B96qLMtp7Zmu1grDNxTd6OXShWVwYARD0/B1P -Sy0PAfk9Gb4fAkO9fZJDQYZ7s0mM5iOPEeSR7820TolOb+KfRabLA9d714jsc2jEykKlpP66Bh4j -aCsyqJ0uUQcE8SnzrKAqGwgWiCGQpiTa+HBiP6eRlRGOKQj5Y06vcNx6Ija4cGe6+yCN8HV8tCY0 -okZK98NQCl5t79R/ZB2c3NvBJH+/g3ulU48ikT3tVmDxE3mOZofZyGFEM99P+YCMScLDxTl3hzGy -0YkI8U855P7qOAbcFfh2T5n+LSELwLhbkymEfZT917GWTfmypBWMvJx0WHeDhKwQYPdzbKgWETnc -yeKasaCW+oLdhBwrd6Ws2r4MA8cwiYXDLbwYmCxJA8VF++8kubF2HJOjSyMBS+QT2PSV/0D9UWoi -Vfk7R4OvWBJVvq7nV+lXS0O5igjExxlmx1OaBfg7+Cr/MbK4zVNrKSJn82NnKKt6LC6RaTmvFYay -0sDFxQ7Xo+Th6tDNKmKWJt6Kegfjc+qTWJTKb3kL+UI8vS0zTLy1+M/rZ4ekos/JiS5rYIcAswvg -58kBgp/0rc6upBeWjBaK5O0aLAeBQfLulo1axWX04OSVKmYeoAltyR6UO9ME3acurQyg7Ta24yqO -whi/PrIaEiO7dsWvFtzsshVzBLic02NlAkPkMUzliPYnZHWQglDAVxL5K2qhvK1OFCkQpIgBsBDM -6KYRL/mkBIIEALIl927rIkaN37/BQIcxLcSa05YfC0Hl3mxWESt1A0D4lA37A9S8EbYmDfAYlMc0 -3HhZGdZEtawfpJFyDHzNZceNWBch6nxeNZCY4YFdsbzuGS0RKpwNA9S/czOJ4p9ymBCxuhGepI3U -PKbC8C749Www1/wMdAot1n+K7M/PBGR8hWmaH5SS7U3yMwAB1fq2NDjx4ur+Um+MclSdN01MDXzG -EO+eAo1pdAY8479234l8dB2YVAhZ1ZlJ4KmbqMKJrGJXnQUEYS6/cTDRjsUocsoW7uGg1ci2GiHa -qjlkfpBfie3SdhFW/K8hwAH0HALs56oFN66wUkP/AaJAPfIUNhR6RpHKzZ9zCC42oB2mNawQRMnF -ETBl1s/SwMxLKRp7jAfKs4NZxSY6I9z/2dTpzS3tsHMjxVDuxkolvRNWBILEMeL1CBvip2HhmoUw -/Sz5NDgyzk1aQLV6DQNJ2RZLMZDRCtSwZSBu6lhhSgTJGazP0+NbqXXC5aQTrqrFIcWyDXz+ADle -kszzYM/gSaQTCALTwfDDaU9Ek3xVgW+XBtExtJ3U+0AN3l0j86rUIdIvp6eWdxWQqv9LtpoorKMD -KfUc5PYV09Z1JgsT4X51Zzq+74l5dz7udIM7UNbdTpmRm9PDj3TUbGCvNR9hqOEGTLbkvb1ZR24a -h6uGRl2znB25IpDAGRhNRb9is/pO2tvHwHTDMOjrgvZG/pNvXgSUxz0pRjUjXIcqBe2X2gcQfeal -r8gY76o83WEGL6ODryV9vTQVHt52+izgpYoBZaVlpgqbZl54c+OE0Zxf9RwXwDbcYu5Ku5E0MPL0 -qUjc0y2+Y6E4P5bAWaZGMGT+ORkyVUzcaWmM/+XlO7PER5wrWlCIMZCX1L/nvioY0q0CKqALn7DJ -QU+qenbwrb6uwS7uNZY6V86s0aDYpU7yRyqxC5SbuyNJb02gdxUCgpIscFaMUjMVRml4M4BIjX/b -U+HgHoVMUm8SnN9gRcT2izPrgOGVcMTJjfenzoCKoCPo9RjgGMctgB4DvKamErNU7OrilIfuoqzE -PNSeP9SPw/zkDmNvMebM499We9CVnsHUWqF00/ZJWoua77+0f1bLS/tmci1JBvIcMo/4SJvgH+KF -o0gijP9gqAPd5iCOnpnJlHUqRIym42SmyKEDuzdSwXKjAR6j7uXda39JyMJr8gGzEsu0jYRkAmj1 -YdiqwKXUcLMkcj1AKeU/PxTUVw0YKsv/rowrPYww3xQUWqNivrXB7GCHE3BzsYNdHsmziaGIXQbA -+EBHdkuKrM8BcC+fxhF/l/KUxngsD1E75IcUv8zFDF+sk4CBYHqks9S4JYlcubuizqsILbdGzIMN -Z7w34k0XT+sEggQAyzr8MHeIJGsT+AYnZr08PeTbyr01JEoT7lPYT6PzX4F63QKKDl+mB+PwLMzY -CXrxZcUmuay6/MV8w/f5T6vQXdoSw5puWodBYwVReYh1IaEN+jiTapm9YBVmcIsJPO6abHowknSV -OWSvST0AtAX57fFOTckm+facfBK9s9T1lUUgF44Bh5e8f9qKqfOV44nqdCOEyUm0Dao497ieN4Eg -XBLNvOZY9+irMiXjp0lcyFvhrJOczfyCr9EiiaiH1TfSzKGKsf2W84iKn/JH6x2eOo7xjwJ40BQD -c6S1cUNEuqBhP6by0FioOXYOKVyifpxk84Eb+F/4CNdTJTvCPwsiegdfsX/Q53DvKVtXp9Ycam5J -TmKRHXK/bMHF4ONv3p/O/kn/BqRx+fbbP2eMX8Z1F/ltHKfp6B+06HljUwQLBJs9XtCfqH5Zgdz9 -gad5WZF5ykFArmHDgeFlgggvbZ7z9vqnjN/TH68TxJzauYQ5vLHQ6wGXik4/4uq7/TqNmhxlQEM4 -zVkwsn203bUmKLyz+yl1zItDpn5zy1uXfGo99rBdUzdbdE9LmEFPMaFsaHd4a8oDaUroD7FgCbeD -JJVld3ac6F8+3QbExPs48OrgA1kI3/UwXr52ldjiYzTLfAGR9BjqNFTw45FUHuMf8TEM5hcHx56w -95eKAqraDk28o9k+M2UKpcmrdlWoWzdqVVFeWGpM8x9Y9Nt0lf/4VUQgrXjqTkUCQkJyqTeTeGgH -rn3QBk2XAgpxZhaJs3InW0BkAlBmK99cMinUiJeFt5a4p5wPeXrVuh6V9m7Mpl9hzpogg++EZqah -fzzNnDgxOZfW342DX052PdgXo0NnkhCk005LvFt6M2mRn0fLgNVfyUZZoOp8cO5ZWbhXXlrhrgUt -j2zKPK6Q94Zj4kdXHBGpAkrB8ZQ4EGGODE0Dqusm8WPXzB+9236IMHPU7lFbyjBrFNI7O4jg+qRI -Ipi+7tX0FsilqEbmjG+OPwhZXrdqUqyF+rjKQuSRq7lOeDB4c6S2dq4OOny01i5HCbbyc9UvSHRm -hOhGqUlzHyHLo3W7j+26V/MhkDXJ+Tx+qfylv4pbliwTteJJj+CZwzjv29qb6lxYi+38Bw10ERap -m8UCRFBecVN7xXlcIfyeAl666Vi7EBJZv3EdFNrx1nlLwM65nYya7uj6L7IwJWotIUx8E0XH0/cU -xS/dG8bxf9L/8652h5gq3LI+wTNGuEX0DMuz7BGQG+NtgabrZ6SsKGthGa7eULTpz0McWTLRU0y/ -/tkckpm5pDnXSFbIMskwwjECz82UZBSPpigdN/Pjg5d+0yWu7s3VJxw4ENWPPpzZ+j7sOXmdvn9P -O1tQd60EO+3awASCBAAZQvWV3/yJ6FxPttbP+qeURpJoPEZfpN2UYZmd8HqtR0YbaOZ6Rln9nvpd -K9fylXdw9z2xeCbjDWUttJB4VqZxGJM8eCTC1VDVyAOsQ5n7SY55dMkQbU+o4Z/4J5m8+wz50BBI -LfruL1eZ6/CF6CdvxVRiJ10sXc0Tn2sVMXqkw7Adp1GYoCI9c6VFSFK74+n+y7LVFQ5HBnbQyKJc -dvdLOXwZOPaFHC5UNXRmOpcwdPqyXUe+xIsOMYbzdlAnI9eGDNeRDktUa/Rh0CbZCxjmJzoZEYOE -ZjsYZlEfp1Kb61t8z4m28hGLEg88T1Ihmxa2HeUWes1RpmgIOP+/2Lb3smj/l/fpSu4gabFgyCAV -H5HdCYMScUv8SVu55+tpeO8ELoHHQUXV4rr084O4budzhgNSOPyLGDl5sfDUXiyusPCxS4JVO/KY -6V2Qrtg/q2wtmXpEkZnGT+Qi3WDzwt4W81alztnYMP17oGLmxX71KV9OEiMZjI4WaaGt+OOINLtR -qefioZ1NI2L1s5M0tybwTsyU9WERM+3pUwXIfJVsbMZRlNaO2OogcHbaR4UWvhOj+3CTG1sThiYQ -MxMnp1Rpqx3nhyzqLO3TRrkYvxnA3cdPBn9EeqpgBMg7X3hCiMV3Fl5cj/WOMhtHYgY7BgeCXo46 -EFVZ4+WroGZ46xGiRDiIblo8bzLd7QCxvukzxy3mUDgsZQ8pds4N28weSUhBk5MAPbfBpRvXUVJx -MhKqXucQU1Md1qSGLbuuIQuz9pAGp1JFUx/vEkCgm74daSoVWCZuB+1ZE4f48clvrBj51xMNf8CP -EFE7vySzVb6X2H1i5X3Z+Y3DdIcWw4Y2FClfcJk4Mwq8Cq2GALGFEge9YSEE9YmyuU6OFeU0ICon -iXAgZ72SM8fBwJPruLFbdsNYKW+oAfmPisXSWMcZmdSbfk0GYv+vKtu3eegSbWw1UsCVtZOh9E5Z -uQ83l59CBqO9sV/SFU3WrrJ0qNWxrmXu9nJn5Qf5iCRoFGYNHYHkIG5FS6N00GEDZxGkxmro2d++ -Adj5LVHc/b1cYWmrux+jEqI8ZK8cyTB0XMbBA/HYbx9NXazr7znP4/Mlv3pZToEcYt+lgLHAArtU -AdhybhbLIwNMq0gr6EwtDklBa3ns4Wx/rJU8H7LGs6gV8uqeaSketv+nz+sQhfctxZ1rx+5qzXfy -FOQVpO23KDQunBi1Bl9k61Di4q9JWcyADBXPHXJzp7mL8Fk7zdvMAEfuED1phdRm6GgDYoYUs4yQ -IrhSjFlWyk7hT8475xk3BIv++obvWSAv/3+pF6A6U2RXDChVmnG0JnPa9wYYtdzBmLfZKBjX+DjD -yEMsuhPsCzuN4R6tBIIBWCVRKmKwdkatmpsQBgDw48u0/Arffl5/DRlS9ee+QffFecUitDdCK+kt -X5L2fGYrL5g6SltncMIeV1ptx4nuSjC/O944q1KYtqvQiPFWJqEXIRMNbbYOC47sjLza0tEFrimN -wxcrWGSzsy5R9beFQ1aHPcMrDWfCoviNRk2qPtxuKIC5Qk2ZuOmJLjCiLwUGEb0/1Mpzv3MqQa7d -mRayXg3DZWJPajxNZv6eS357ElMvwGQmqafb2mlQJwWLsg9m9PG7uqEoyrqSc6MiuY+icLEFib9j -OfRQrx70rTSKUfTr4MtP0aZZAefjCrpVIyTekhFDOk0Nmx057eonlyGgmGpl5/Uo+t1J1Z11Ya/l -bNbfmebRISJeTVW0I8FhseAZMI1GSwp/ludJxSLYOgyRkh+GX134MexNo7O9F1SxLCfWaSG9Fc3s -5ify04ua9/t8SGrYZPm/l3MkAAAAAAAAAAAAAA== - - diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-s-a-e.pem b/drivers/builtin_openssl2/crypto/pkcs7/t/msie-s-a-e.pem deleted file mode 100644 index 55dbd8f80b..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/msie-s-a-e.pem +++ /dev/null @@ -1,106 +0,0 @@ ------BEGIN PKCS7----- -MIAGCSqGSIb3DQEHA6CAMIITUAIBADGCAcIwgcwCAQAwdjBiMREwDwYDVQQHEwhJ -bnRlcm5ldDEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xNDAyBgNVBAsTK1ZlcmlT -aWduIENsYXNzIDEgQ0EgLSBJbmRpdmlkdWFsIFN1YnNjcmliZXICEGBAmILep9sJ -uN2Pkg1gucowDQYJKoZIhvcNAQEBBQAEQKOxxpIbRTb31+qIQCBOpMVAGnwCLrPO -OXnntxVveTrPhBnsJuoGNJcCMtuHl7tLkd38WsVwOfGXUprTqFpUcY8wgfACAQAw -gZkwgZIxCzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQH -EwhCcmlzYmFuZTEaMBgGA1UEChMRQ3J5cHRzb2Z0IFB0eSBMdGQxIjAgBgNVBAsT -GURFTU9OU1RSQVRJT04gQU5EIFRFU1RJTkcxGzAZBgNVBAMTEkRFTU8gWkVSTyBW -QUxVRSBDQQICBG4wDQYJKoZIhvcNAQEBBQAEQKzIddnXFqK/RRBG86ZVixoXfwAv -ZdfvnDFsKzPn/A6DuDdE/hqJxnFpHiwGiDR/YbWj2ib+G74DSshgX8vj8ZQwghGD -BgkqhkiG9w0BBwEwGgYIKoZIhvcNAwIwDgICAKAECLKM0K58I3HNgIIRWBDP8s+g -l7/6JhZ5PBJ5vPpD1R4pnNDZRabsYu0Qzezdz1IK+maTAhF0SEuNZoEkzIgOIvu3 -UjW8x/2KcHwteABwgRiSk1JyNLwRED0RtvuJ40pIGDvDsHwBMcXEtKUMAOpGChms -HUK1+B/GKancSDFOCeG3JCFnFdbuQFDIVHC6amWslc6ntqVs/sii2Gs4qdzDfQgl -PV/A/FY0wi0j2O5cXRQJtZVKM/MYGjj5c1cy9Q7oymU5ATP/LARD0Tw5kBB7HVnD -2qrmmmxcHakyxhXmwskaKS6Qxgqq6LPdgr603onxqEtwNSz5UbCq9mLXuCkFOVsQ -/5mZcmmc7re3XJMQHRXkBlxEpZQ6HOUIEzlhRqCo2hhymyZK84rptjORQo6vNakM -IC0Zn8O+6Xl1NlmbGzZakz6LJ4dczE7eG7LwrVus9Oq3V/WweG6rWMPV5d51oKMi -BRb2pXXy80BAKh/9hvpP/V/HO1/+MPuk8tAn7ssmQoIAQcZxqLOmgqqFhy21oSEu -rUY746Gp/vIV6Ax1MnfDA/fbjDllhJTl8ujZnfj8R1/oQa9fJMipV8CYQEAt3okZ -V03kaVhhPGsv544/zRnhqjhgqjHASZGnluZCxDwHb1CE4UqYLp6IbkH3qosy2ntm -a7WCsM3FN3o5dKFZXBgBEPT8HU9LLQ8B+T0Zvh8CQ719kkNBhnuzSYzmI48R5JHv -zbROiU5v4p9FpssD13vXiOxzaMTKQqWk/roGHiNoKzKonS5RBwTxKfOsoCobCBaI -IZCmJNr4cGI/p5GVEY4pCPljTq9w3HoiNrhwZ7r7II3wdXy0JjSiRkr3w1AKXm3v -1H9kHZzc28Ekf7+De6VTjyKRPe1WYPETeY5mh9nIYUQz30/5gIxJwsPFOXeHMbLR -iQjxTznk/uo4BtwV+HZPmf4tIQvAuFuTKYR9lP3XsZZN+bKkFYy8nHRYd4OErBBg -93NsqBYROdzJ4pqxoJb6gt2EHCt3pazavgwDxzCJhcMtvBiYLEkDxUX77yS5sXYc -k6NLIwFL5BPY9JX/QP1RaiJV+TtHg69YElW+rudX6VdLQ7mKCMTHGWbHU5oF+Dv4 -Kv8xsrjNU2spImfzY2coq3osLpFpOa8VhrLSwMXFDtej5OHq0M0qYpYm3op6B+Nz -6pNYlMpveQv5Qjy9LTNMvLX4z+tnh6Siz8mJLmtghwCzC+DnyQGCn/Stzq6kF5aM -Fork7RosB4FB8u6WjVrFZfTg5JUqZh6gCW3JHpQ70wTdpy6tDKDtNrbjKo7CGL8+ -shoSI7t2xa8W3OyyFXMEuJzTY2UCQ+QxTOWI9idkdZCCUMBXEvkraqG8rU4UKRCk -iAGwEMzophEv+aSyJfdu6yJGjd+/wUCHMS3EmtOWHwtB5d5sVhErdQNA+JQN+wPU -vBG2Jg3wGJTHNNx4WRnWRLWsH6SRcgx8zWXHjVgXIep8XjWQmOGBXbG87hktESqc -DQPUv3MzieKfcpgQsboRnqSN1DymwvAu+PVsMNf8DHQKLdZ/iuzPzwRkfIVpmh+U -ku1N8jMAAdX6tjQ48eLq/lJvjHJUnTdNTA18xhDvngKNaXQGPOO/dt+JfHQdmFQI -WdWZSeCpm6jCiaxiV50FBGEuv3Ew0Y7FKHLKFu7hoNXIthoh2qo5ZH6QX4nt0nYR -VvyvIcAB9BwC7OeqBTeusFJD/wGiQD3yFDYUekaRys2fcwguNqAdpjWsEETJxREw -ZdbP0sDMSykae4wHyrODWcUmOiPc/9nU6c0t7bBzI8VQ7sZKJb0TVgSCxDHi9Qgb -4qdh4ZqFMP0s+TQ4Ms5NWkC1eg0DSdkWSzGQ0QrUsGUgbupYYUoEyRmsz9PjW6l1 -wuWkE66qxSHFsg18/gA5XpLM82DP4EmkEwgC08Hww2lPRJN8VYFvlwbRMbSd1PtA -Dd5dI/Oq1CHSL6enlncVkKr/S7aaKKyjAyn1HOT2FdPWdSYLE+F+dWc6vu+JeXc+ -7nSDO1DW3U6ZkZvTw4901GxgrzUfYajhBky25L29WUduGoerhkZds5wduSKQwBkY -TUW/YrP6Ttrbx8B0wzDo64L2Rv6Tb14ElMc9KUY1I1yHKgXtl9oHEH3mpa/IGO+q -PN1hBi+jg68lfb00FR7edvos4KWKAWWlZaYKm2ZeeHPjhNGcX/UcF8A23GLuSruR -NDDy9KlI3NMtvmOhOD+WwFmmRjBk/jkZMlVM3GlpjP/l5TuzxEecK1pQiDGQl9S/ -574qGNKtAiqgC5+wyUFPqnp28K2+rsEu7jWWOlfOrNGg2KVO8kcqsQuUm7sjSW9N -oHcVAoKSLHBWjFIzFUZpeDOASI1/21Ph4B6FTFJvEpzfYEXE9osz64DhlXDEyY33 -p86AiqAj6PUY4BjHLYAeA7ymphKzVOzq4pSH7qKsxDzUnj/Uj8P85A5jbzHmzOPf -VnvQlZ7B1FqhdNP2SVqLmu+/tH9Wy0v7ZnItSQbyHDKP+Eib4B/ihaNIIoz/YKgD -3eYgjp6ZyZR1KkSMpuNkpsihA7s3UsFyowEeo+7l3Wt/ScjCa/IBsxLLtI2EZAJo -9WHYqsCl1HCzJHI9QCnlPz8U1FcNGCrL/66MKz2MMN8UFFqjYr61wexghxNwc7GD -XR7Js4mhiF0GwPhAR3ZLiqzPAXAvn8YRf5fylMZ4LA9RO+SHFL/MxQxfrJOAgWB6 -pLPUuCWJXLm7os6rCC23RsyDDWe8N+JNF0/ryzr8MHeIJGsT+AYnZr08PeTbyr01 -JEoT7lPYT6PzX4F63QKKDl+mB+PwLMzYCXrxZcUmuay6/MV8w/f5T6vQXdoSw5pu -WodBYwVReYh1IaEN+jiTapm9YBVmcIsJPO6abHowknSVOWSvST0AtAX57fFOTckm -+facfBK9s9T1lUUgF44Bh5e8f9qKqfOV44nqdCOEyUm0Dao497ieN4EgXBLNvOZY -9+irMiXjp0lcyFvhrJOczfyCr9EiiaiH1TfSzKGKsf2W84iKn/JH6x2eOo7xjwJ4 -0BQDc6S1cUNEuqBhP6by0FioOXYOKVyifpxk84Eb+F/4CNdTJTvCPwsiegdfsX/Q -53DvKVtXp9Ycam5JTmKRHXK/bMHF4ONv3p/O/kn/BqRx+fbbP2eMX8Z1F/ltHKfp -6B+06HljUwQLBJs9XtCfqH5Zgdz9gad5WZF5ykFArmHDgeFlgggvbZ7z9vqnjN/T -H68TxJzauYQ5vLHQ6wGXik4/4uq7/TqNmhxlQEM4zVkwsn203bUmKLyz+yl1zItD -pn5zy1uXfGo99rBdUzdbdE9LmEFPMaFsaHd4a8oDaUroD7FgCbeDJJVld3ac6F8+ -3QbExPs48OrgA1kI3/UwXr52ldjiYzTLfAGR9BjqNFTw45FUHuMf8TEM5hcHx56w -95eKAqraDk28o9k+M2UKpcmrdlWoWzdqVVFeWGpM8x9Y9Nt0lf/4VUQgrXjqTkUC -QkJyqTeTeGgHrn3QBk2XAgpxZhaJs3InW0BkAlBmK99cMinUiJeFt5a4p5wPeXrV -uh6V9m7Mpl9hzpogg++EZqahfzzNnDgxOZfW342DX052PdgXo0NnkhCk005LvFt6 -M2mRn0fLgNVfyUZZoOp8cO5ZWbhXXlrhrgUtj2zKPK6Q94Zj4kdXHBGpAkrB8ZQ4 -EGGODE0Dqusm8WPXzB+9236IMHPU7lFbyjBrFNI7O4jg+qRIIpi+7tX0FsilqEbm -jG+OPwhZXrdqUqyF+rjKQuSRq7lOeDB4c6S2dq4OOny01i5HCbbyc9UvSHRmhOhG -qUlzHyHLo3W7j+26V/MhkDXJ+Tx+qfylv4pbliwTteJJj+CZwzjv29qb6lxYi+38 -Bw10ERapm8UCRFBecVN7xXlcIfyeAl666Vi7EBJZv3EdFNrx1nlLwM65nYya7uj6 -L7IwJWotIUx8E0XH0/cUxS/dG8bxf9L/8652h5gq3LI+wTNGuEX0DMuz7BGQG+Nt -gabrZ6SsKGthGa7eULTpz0McWTLRU0y//tkckpm5pDnXSFbIMskwwjECz82UZBSP -pigdN/Pjg5d+0yWu7s3VJxw4ENWPPpzZ+j7sOXmdvn9PO1tQd60EO+3awBlC9ZXf -/InoXE+21s/6p5RGkmg8Rl+k3ZRhmZ3weq1HRhto5npGWf2e+l0r1/KVd3D3PbF4 -JuMNZS20kHhWpnEYkzx4JMLVUNXIA6xDmftJjnl0yRBtT6jhn/gnmbz7DPnQEEgt -+u4vV5nr8IXoJ2/FVGInXSxdzROfaxUxeqTDsB2nUZigIj1zpUVIUrvj6f7LstUV -DkcGdtDIolx290s5fBk49oUcLlQ1dGY6lzB0+rJdR77Eiw4xhvN2UCcj14YM15EO -S1Rr9GHQJtkLGOYnOhkRg4RmOxhmUR+nUpvrW3zPibbyEYsSDzxPUiGbFrYd5RZ6 -zVGmaAg4/7/YtveyaP+X9+lK7iBpsWDIIBUfkd0JgxJxS/xJW7nn62l47wQugcdB -RdXiuvTzg7hu53OGA1I4/IsYOXmx8NReLK6w8LFLglU78pjpXZCu2D+rbC2ZekSR -mcZP5CLdYPPC3hbzVqXO2dgw/XugYubFfvUpX04SIxmMjhZpoa3444g0u1Gp5+Kh -nU0jYvWzkzS3JvBOzJT1YREz7elTBch8lWxsxlGU1o7Y6iBwdtpHhRa+E6P7cJMb -WxOGJhAzEyenVGmrHeeHLOos7dNGuRi/GcDdx08Gf0R6qmAEyDtfeEKIxXcWXlyP -9Y4yG0diBjsGB4JejjoQVVnj5augZnjrEaJEOIhuWjxvMt3tALG+6TPHLeZQOCxl -Dyl2zg3bzB5JSEGTkwA9t8GlG9dRUnEyEqpe5xBTUx3WpIYtu64hC7P2kAanUkVT -H+8SQKCbvh1pKhVYJm4H7VkTh/jxyW+sGPnXEw1/wI8QUTu/JLNVvpfYfWLlfdn5 -jcN0hxbDhjYUKV9wmTgzCrwKrYYAsYUSB71hIQT1ibK5To4V5TQgKieJcCBnvZIz -x8HAk+u4sVt2w1gpb6gB+Y+KxdJYxxmZ1Jt+TQZi/68q27d56BJtbDVSwJW1k6H0 -Tlm5DzeXn0IGo72xX9IVTdausnSo1bGuZe72cmflB/mIJGgUZg0dgeQgbkVLo3TQ -YQNnEaTGaujZ374B2PktUdz9vVxhaau7H6MSojxkrxzJMHRcxsED8dhvH01drOvv -Oc/j8yW/ellOgRxi36WAscACu1QB2HJuFssjA0yrSCvoTC0OSUFreezhbH+slTwf -ssazqBXy6p5pKR62/6fP6xCF9y3FnWvH7mrNd/IU5BWk7bcoNC6cGLUGX2TrUOLi -r0lZzIAMFc8dcnOnuYvwWTvN28wAR+4QPWmF1GboaANihhSzjJAiuFKMWVbKTuFP -zjvnGTcEi/76hu9ZIC//f6kXoDpTZFcMKFWacbQmc9r3Bhi13MGYt9koGNf4OMPI -Qyy6E+wLO43hHq0lUSpisHZGrZqbEAYA8OPLtPwK335efw0ZUvXnvkH3xXnFIrQ3 -QivpLV+S9nxmKy+YOkpbZ3DCHldabceJ7kowvzveOKtSmLar0IjxViahFyETDW22 -DguO7Iy82tLRBa4pjcMXK1hks7MuUfW3hUNWhz3DKw1nwqL4jUZNqj7cbiiAuUJN -mbjpiS4woi8FBhG9P9TKc79zKkGu3ZkWsl4Nw2ViT2o8TWb+nkt+exJTL8BkJqmn -29ppUCcFi7IPZvTxu7qhKMq6knOjIrmPonCxBYm/Yzn0UK8e9K00ilH06+DLT9Gm -WQHn4wq6VSMk3pIRQzpNDZsdOe3qJ5choJhqZef1KPrdSdWddWGv5WzW35nm0SEi -Xk1VtCPBYbHgGTCNRksKf5bnScUi2DoMkZIfhl9d+DHsTaOzvRdUsSwn1mkhvRXN -7OYn8tOLmvf7fEhq2GT5v5dzJAAAAAA= ------END PKCS7----- diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/nav-smime b/drivers/builtin_openssl2/crypto/pkcs7/t/nav-smime deleted file mode 100644 index 6ee4b597a1..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/nav-smime +++ /dev/null @@ -1,157 +0,0 @@ -From angela@c2.net.au Thu May 14 13:32:27 1998 -X-UIDL: 83c94dd550e54329bf9571b72038b8c8 -Return-Path: angela@c2.net.au -Received: from cryptsoft.com (play.cryptsoft.com [203.56.44.3]) by pandora.cryptsoft.com (8.8.3/8.7.3) with ESMTP id NAA27838 for ; Thu, 14 May 1998 13:32:26 +1000 (EST) -Message-ID: <355A6779.4B63E64C@cryptsoft.com> -Date: Thu, 14 May 1998 13:39:37 +1000 -From: Angela van Lent -X-Mailer: Mozilla 4.03 [en] (Win95; U) -MIME-Version: 1.0 -To: tjh@cryptsoft.com -Subject: signed -Content-Type: multipart/signed; protocol="application/x-pkcs7-signature"; micalg=sha1; boundary="------------ms9A58844C95949ECC78A1C54C" -Content-Length: 2604 -Status: OR - -This is a cryptographically signed message in MIME format. - ---------------ms9A58844C95949ECC78A1C54C -Content-Type: text/plain; charset=us-ascii -Content-Transfer-Encoding: 7bit - -signed body - ---------------ms9A58844C95949ECC78A1C54C -Content-Type: application/x-pkcs7-signature; name="smime.p7s" -Content-Transfer-Encoding: base64 -Content-Disposition: attachment; filename="smime.p7s" -Content-Description: S/MIME Cryptographic Signature - -MIIGHgYJKoZIhvcNAQcCoIIGDzCCBgsCAQExCzAJBgUrDgMCGgUAMAsGCSqGSIb3DQEHAaCC -BGswggJTMIIB/aADAgECAgIEfjANBgkqhkiG9w0BAQQFADCBkjELMAkGA1UEBhMCQVUxEzAR -BgNVBAgTClF1ZWVuc2xhbmQxETAPBgNVBAcTCEJyaXNiYW5lMRowGAYDVQQKExFDcnlwdHNv -ZnQgUHR5IEx0ZDEiMCAGA1UECxMZREVNT05TVFJBVElPTiBBTkQgVEVTVElORzEbMBkGA1UE -AxMSREVNTyBaRVJPIFZBTFVFIENBMB4XDTk4MDUxMzA2MjY1NloXDTAwMDUxMjA2MjY1Nlow -gaUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFu -ZTEaMBgGA1UEChMRQ3J5cHRzb2Z0IFB0eSBMdGQxEjAQBgNVBAsTCVNNSU1FIDAwMzEZMBcG -A1UEAxMQQW5nZWxhIHZhbiBMZWVudDEjMCEGCSqGSIb3DQEJARYUYW5nZWxhQGNyeXB0c29m -dC5jb20wXDANBgkqhkiG9w0BAQEFAANLADBIAkEAuC3+7dAb2LhuO7gt2cTM8vsNjhG5JfDh -hX1Vl/wVGbKEEj0MA6vWEolvefQlxB+EzwCtR0YZ7eEC/T/4JoCyeQIDAQABoygwJjAkBglg -hkgBhvhCAQ0EFxYVR2VuZXJhdGVkIHdpdGggU1NMZWF5MA0GCSqGSIb3DQEBBAUAA0EAUnSP -igs6TMFISTjw8cBtJYb98czgAVkVFjKyJQwYMH8FbDnCyx6NocM555nsyDstaw8fKR11Khds -syd3ikkrhDCCAhAwggG6AgEDMA0GCSqGSIb3DQEBBAUAMIGSMQswCQYDVQQGEwJBVTETMBEG -A1UECBMKUXVlZW5zbGFuZDERMA8GA1UEBxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29m -dCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNUUkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQD -ExJERU1PIFpFUk8gVkFMVUUgQ0EwHhcNOTgwMzAzMDc0MTMyWhcNMDgwMjI5MDc0MTMyWjCB -kjELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxETAPBgNVBAcTCEJyaXNiYW5l -MRowGAYDVQQKExFDcnlwdHNvZnQgUHR5IEx0ZDEiMCAGA1UECxMZREVNT05TVFJBVElPTiBB -TkQgVEVTVElORzEbMBkGA1UEAxMSREVNTyBaRVJPIFZBTFVFIENBMFwwDQYJKoZIhvcNAQEB -BQADSwAwSAJBAL+0E2fLej3FSCwe2A2iRnMuC3z12qHIp6Ky1wo2zZcxft7AI+RfkrWrSGtf -mfzBEuPrLdfulncC5Y1pNcM8RTUCAwEAATANBgkqhkiG9w0BAQQFAANBAGSbLMphL6F5pp3s -8o0Xyh86FHFdpVOwYx09ELLkuG17V/P9pgIc0Eo/gDMbN+KT3IdgECf8S//pCRA6RrNjcXIx -ggF7MIIBdwIBATCBmTCBkjELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxETAP -BgNVBAcTCEJyaXNiYW5lMRowGAYDVQQKExFDcnlwdHNvZnQgUHR5IEx0ZDEiMCAGA1UECxMZ -REVNT05TVFJBVElPTiBBTkQgVEVTVElORzEbMBkGA1UEAxMSREVNTyBaRVJPIFZBTFVFIENB -AgIEfjAJBgUrDgMCGgUAoHowGAYJKoZIhvcNAQkDMQsGCSqGSIb3DQEHATAbBgkqhkiG9w0B -CQ8xDjAMMAoGCCqGSIb3DQMHMBwGCSqGSIb3DQEJBTEPFw05ODA1MTQwMzM5MzdaMCMGCSqG -SIb3DQEJBDEWBBQstNMnSV26ba8PapQEDhO21yNFrjANBgkqhkiG9w0BAQEFAARAW9Xb9YXv -BfcNkutgFX9Gr8iXhBVsNtGEVrjrpkQwpKa7jHI8SjAlLhk/4RFwDHf+ISB9Np3Z1WDWnLcA -9CWR6g== ---------------ms9A58844C95949ECC78A1C54C-- - - -From angela@c2.net.au Thu May 14 13:33:16 1998 -X-UIDL: 8f076c44ff7c5967fd5b00c4588a8731 -Return-Path: angela@c2.net.au -Received: from cryptsoft.com (play.cryptsoft.com [203.56.44.3]) by pandora.cryptsoft.com (8.8.3/8.7.3) with ESMTP id NAA27847 for ; Thu, 14 May 1998 13:33:15 +1000 (EST) -Message-ID: <355A67AB.2AF38806@cryptsoft.com> -Date: Thu, 14 May 1998 13:40:27 +1000 -From: Angela van Lent -X-Mailer: Mozilla 4.03 [en] (Win95; U) -MIME-Version: 1.0 -To: tjh@cryptsoft.com -Subject: signed -Content-Type: multipart/signed; protocol="application/x-pkcs7-signature"; micalg=sha1; boundary="------------msD7863B84BD61E02C407F2F5E" -Content-Length: 2679 -Status: OR - -This is a cryptographically signed message in MIME format. - ---------------msD7863B84BD61E02C407F2F5E -Content-Type: text/plain; charset=us-ascii -Content-Transfer-Encoding: 7bit - -signed body 2 - ---------------msD7863B84BD61E02C407F2F5E -Content-Type: application/x-pkcs7-signature; name="smime.p7s" -Content-Transfer-Encoding: base64 -Content-Disposition: attachment; filename="smime.p7s" -Content-Description: S/MIME Cryptographic Signature - -MIIGVgYJKoZIhvcNAQcCoIIGRzCCBkMCAQExCzAJBgUrDgMCGgUAMAsGCSqGSIb3DQEHAaCC -BGswggJTMIIB/aADAgECAgIEfjANBgkqhkiG9w0BAQQFADCBkjELMAkGA1UEBhMCQVUxEzAR -BgNVBAgTClF1ZWVuc2xhbmQxETAPBgNVBAcTCEJyaXNiYW5lMRowGAYDVQQKExFDcnlwdHNv -ZnQgUHR5IEx0ZDEiMCAGA1UECxMZREVNT05TVFJBVElPTiBBTkQgVEVTVElORzEbMBkGA1UE -AxMSREVNTyBaRVJPIFZBTFVFIENBMB4XDTk4MDUxMzA2MjY1NloXDTAwMDUxMjA2MjY1Nlow -gaUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFu -ZTEaMBgGA1UEChMRQ3J5cHRzb2Z0IFB0eSBMdGQxEjAQBgNVBAsTCVNNSU1FIDAwMzEZMBcG -A1UEAxMQQW5nZWxhIHZhbiBMZWVudDEjMCEGCSqGSIb3DQEJARYUYW5nZWxhQGNyeXB0c29m -dC5jb20wXDANBgkqhkiG9w0BAQEFAANLADBIAkEAuC3+7dAb2LhuO7gt2cTM8vsNjhG5JfDh -hX1Vl/wVGbKEEj0MA6vWEolvefQlxB+EzwCtR0YZ7eEC/T/4JoCyeQIDAQABoygwJjAkBglg -hkgBhvhCAQ0EFxYVR2VuZXJhdGVkIHdpdGggU1NMZWF5MA0GCSqGSIb3DQEBBAUAA0EAUnSP -igs6TMFISTjw8cBtJYb98czgAVkVFjKyJQwYMH8FbDnCyx6NocM555nsyDstaw8fKR11Khds -syd3ikkrhDCCAhAwggG6AgEDMA0GCSqGSIb3DQEBBAUAMIGSMQswCQYDVQQGEwJBVTETMBEG -A1UECBMKUXVlZW5zbGFuZDERMA8GA1UEBxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29m -dCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNUUkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQD -ExJERU1PIFpFUk8gVkFMVUUgQ0EwHhcNOTgwMzAzMDc0MTMyWhcNMDgwMjI5MDc0MTMyWjCB -kjELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxETAPBgNVBAcTCEJyaXNiYW5l -MRowGAYDVQQKExFDcnlwdHNvZnQgUHR5IEx0ZDEiMCAGA1UECxMZREVNT05TVFJBVElPTiBB -TkQgVEVTVElORzEbMBkGA1UEAxMSREVNTyBaRVJPIFZBTFVFIENBMFwwDQYJKoZIhvcNAQEB -BQADSwAwSAJBAL+0E2fLej3FSCwe2A2iRnMuC3z12qHIp6Ky1wo2zZcxft7AI+RfkrWrSGtf -mfzBEuPrLdfulncC5Y1pNcM8RTUCAwEAATANBgkqhkiG9w0BAQQFAANBAGSbLMphL6F5pp3s -8o0Xyh86FHFdpVOwYx09ELLkuG17V/P9pgIc0Eo/gDMbN+KT3IdgECf8S//pCRA6RrNjcXIx -ggGzMIIBrwIBATCBmTCBkjELMAkGA1UEBhMCQVUxEzARBgNVBAgTClF1ZWVuc2xhbmQxETAP -BgNVBAcTCEJyaXNiYW5lMRowGAYDVQQKExFDcnlwdHNvZnQgUHR5IEx0ZDEiMCAGA1UECxMZ -REVNT05TVFJBVElPTiBBTkQgVEVTVElORzEbMBkGA1UEAxMSREVNTyBaRVJPIFZBTFVFIENB -AgIEfjAJBgUrDgMCGgUAoIGxMBgGCSqGSIb3DQEJAzELBgkqhkiG9w0BBwEwHAYJKoZIhvcN -AQkFMQ8XDTk4MDUxNDAzNDAyN1owIwYJKoZIhvcNAQkEMRYEFOKcV8mNYJnM8rHQajcSEqJN -rwdDMFIGCSqGSIb3DQEJDzFFMEMwCgYIKoZIhvcNAwcwDgYIKoZIhvcNAwICAgCAMAcGBSsO -AwIHMA0GCCqGSIb3DQMCAgFAMA0GCCqGSIb3DQMCAgEoMA0GCSqGSIb3DQEBAQUABEADPE/N -coH+zTFuX5YpolupTKxKK8eEjc48TuADuO8bIHHDE/fEYaWunlwDuTlcFJl1ig0idffPB1qC -Zp8SSVVY ---------------msD7863B84BD61E02C407F2F5E-- - - -From angela@c2.net.au Thu May 14 14:05:32 1998 -X-UIDL: a7d629b4b9acacaee8b39371b860a32a -Return-Path: angela@c2.net.au -Received: from cryptsoft.com (play.cryptsoft.com [203.56.44.3]) by pandora.cryptsoft.com (8.8.3/8.7.3) with ESMTP id OAA28033 for ; Thu, 14 May 1998 14:05:32 +1000 (EST) -Message-ID: <355A6F3B.AC385981@cryptsoft.com> -Date: Thu, 14 May 1998 14:12:43 +1000 -From: Angela van Lent -X-Mailer: Mozilla 4.03 [en] (Win95; U) -MIME-Version: 1.0 -To: tjh@cryptsoft.com -Subject: encrypted -Content-Type: application/x-pkcs7-mime; name="smime.p7m" -Content-Transfer-Encoding: base64 -Content-Disposition: attachment; filename="smime.p7m" -Content-Description: S/MIME Encrypted Message -Content-Length: 905 -Status: OR - -MIAGCSqGSIb3DQEHA6CAMIACAQAxggHmMIHwAgEAMIGZMIGSMQswCQYDVQQGEwJBVTETMBEG -A1UECBMKUXVlZW5zbGFuZDERMA8GA1UEBxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29m -dCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNUUkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQD -ExJERU1PIFpFUk8gVkFMVUUgQ0ECAgR+MA0GCSqGSIb3DQEBAQUABEA92N29Yk39RUY2tIVd -exGT2MFX3J6H8LB8aDRJjw7843ALgJ5zXpM5+f80QkAWwEN2A6Pl3VxiCeKLi435zXVyMIHw -AgEAMIGZMIGSMQswCQYDVQQGEwJBVTETMBEGA1UECBMKUXVlZW5zbGFuZDERMA8GA1UEBxMI -QnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29mdCBQdHkgTHRkMSIwIAYDVQQLExlERU1PTlNU -UkFUSU9OIEFORCBURVNUSU5HMRswGQYDVQQDExJERU1PIFpFUk8gVkFMVUUgQ0ECAgRuMA0G -CSqGSIb3DQEBAQUABECR9IfyHtvnjFmZ8B2oUCEs1vxMsG0u1kxKE4RMPFyDqDCEARq7zXMg -nzSUI7Wgv5USSKDqcLRJeW+jvYURv/nJMIAGCSqGSIb3DQEHATAaBggqhkiG9w0DAjAOAgIA -oAQIrLqrij2ZMpeggAQoibtn6reRZWuWk5Iv5IAhgitr8EYE4w4ySQ7EMB6mTlBoFpccUMWX -BwQgQn1UoWCvYAlhDzURdbui64Dc0rS2wtj+kE/InS6y25EEEPe4NUKaF8/UlE+lo3LtILQE -CL3uV8k7m0iqAAAAAAAAAAAAAA== - diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/s.pem b/drivers/builtin_openssl2/crypto/pkcs7/t/s.pem deleted file mode 100644 index 4fa925b182..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/s.pem +++ /dev/null @@ -1,57 +0,0 @@ ------BEGIN RSA PRIVATE KEY----- -MIIBOgIBAAJBAK3nI4nuDYe3nDJES5WBc90igEstxWC4/h4YY+/ciYki35U8ets9 -mgaoCNYp/e9BCZHtvK2Y+fYokGJv5+cMTQsCAwEAAQJBAIHpvXvqEcOEoDRRHuIG -fkcB4jPHcr9KE9TpxabH6xs9beN6OJnkePXAHwaz5MnUgSnbpOKq+cw8miKjXwe/ -zVECIQDVLwncT2lRmXarEYHzb+q/0uaSvKhWKKt3kJasLNTrAwIhANDUc/ghut29 -p3jJYjurzUKuG774/5eLjPLsxPPIZzNZAiA/10hSq41UnGqHLEUIS9m2/EeEZe7b -bm567dfRU9OnVQIgDo8ROrZXSchEGbaog5J5r/Fle83uO8l93R3GqVxKXZkCIFfk -IPD5PIYQAyyod3hyKKza7ZP4CGY4oOfZetbkSGGG ------END RSA PRIVATE KEY----- -issuer :/C=AU/SP=Queensland/L=Brisbane/O=Cryptsoft Pty Ltd/OU=DEMONSTRATION AND TESTING/CN=DEMO ZERO VALUE CA -subject:/C=AU/SP=Queensland/L=Brisbane/O=Cryptsoft Pty Ltd/OU=SMIME 003/CN=Information/Email=info@cryptsoft.com -serial :047D - -Certificate: - Data: - Version: 3 (0x2) - Serial Number: 1149 (0x47d) - Signature Algorithm: md5withRSAEncryption - Issuer: C=AU, SP=Queensland, L=Brisbane, O=Cryptsoft Pty Ltd, OU=DEMONSTRATION AND TESTING, CN=DEMO ZERO VALUE CA - Validity - Not Before: May 13 05:40:58 1998 GMT - Not After : May 12 05:40:58 2000 GMT - Subject: C=AU, SP=Queensland, L=Brisbane, O=Cryptsoft Pty Ltd, OU=SMIME 003, CN=Information/Email=info@cryptsoft.com - Subject Public Key Info: - Public Key Algorithm: rsaEncryption - Modulus: - 00:ad:e7:23:89:ee:0d:87:b7:9c:32:44:4b:95:81: - 73:dd:22:80:4b:2d:c5:60:b8:fe:1e:18:63:ef:dc: - 89:89:22:df:95:3c:7a:db:3d:9a:06:a8:08:d6:29: - fd:ef:41:09:91:ed:bc:ad:98:f9:f6:28:90:62:6f: - e7:e7:0c:4d:0b - Exponent: 65537 (0x10001) - X509v3 extensions: - Netscape Comment: - Generated with SSLeay - Signature Algorithm: md5withRSAEncryption - 52:15:ea:88:f4:f0:f9:0b:ef:ce:d5:f8:83:40:61:16:5e:55: - f9:ce:2d:d1:8b:31:5c:03:c6:2d:10:7c:61:d5:5c:0a:42:97: - d1:fd:65:b6:b6:84:a5:39:ec:46:ec:fc:e0:0d:d9:22:da:1b: - 50:74:ad:92:cb:4e:90:e5:fa:7d - ------BEGIN CERTIFICATE----- -MIICTDCCAfagAwIBAgICBH0wDQYJKoZIhvcNAQEEBQAwgZIxCzAJBgNVBAYTAkFV -MRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFuZTEaMBgGA1UE -ChMRQ3J5cHRzb2Z0IFB0eSBMdGQxIjAgBgNVBAsTGURFTU9OU1RSQVRJT04gQU5E -IFRFU1RJTkcxGzAZBgNVBAMTEkRFTU8gWkVSTyBWQUxVRSBDQTAeFw05ODA1MTMw -NTQwNThaFw0wMDA1MTIwNTQwNThaMIGeMQswCQYDVQQGEwJBVTETMBEGA1UECBMK -UXVlZW5zbGFuZDERMA8GA1UEBxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29m -dCBQdHkgTHRkMRIwEAYDVQQLEwlTTUlNRSAwMDMxFDASBgNVBAMTC0luZm9ybWF0 -aW9uMSEwHwYJKoZIhvcNAQkBFhJpbmZvQGNyeXB0c29mdC5jb20wXDANBgkqhkiG -9w0BAQEFAANLADBIAkEArecjie4Nh7ecMkRLlYFz3SKASy3FYLj+Hhhj79yJiSLf -lTx62z2aBqgI1in970EJke28rZj59iiQYm/n5wxNCwIDAQABoygwJjAkBglghkgB -hvhCAQ0EFxYVR2VuZXJhdGVkIHdpdGggU1NMZWF5MA0GCSqGSIb3DQEBBAUAA0EA -UhXqiPTw+QvvztX4g0BhFl5V+c4t0YsxXAPGLRB8YdVcCkKX0f1ltraEpTnsRuz8 -4A3ZItobUHStkstOkOX6fQ== ------END CERTIFICATE----- - diff --git a/drivers/builtin_openssl2/crypto/pkcs7/t/server.pem b/drivers/builtin_openssl2/crypto/pkcs7/t/server.pem deleted file mode 100644 index 989baf8709..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/t/server.pem +++ /dev/null @@ -1,57 +0,0 @@ -issuer :/C=AU/SP=Queensland/L=Brisbane/O=Cryptsoft Pty Ltd/OU=DEMONSTRATION AND TESTING/CN=DEMO ZERO VALUE CA -subject:/C=AU/SP=Queensland/L=Brisbane/O=Cryptsoft Pty Ltd/OU=SMIME 003/CN=Information/Email=info@cryptsoft.com -serial :047D - -Certificate: - Data: - Version: 3 (0x2) - Serial Number: 1149 (0x47d) - Signature Algorithm: md5withRSAEncryption - Issuer: C=AU, SP=Queensland, L=Brisbane, O=Cryptsoft Pty Ltd, OU=DEMONSTRATION AND TESTING, CN=DEMO ZERO VALUE CA - Validity - Not Before: May 13 05:40:58 1998 GMT - Not After : May 12 05:40:58 2000 GMT - Subject: C=AU, SP=Queensland, L=Brisbane, O=Cryptsoft Pty Ltd, OU=SMIME 003, CN=Information/Email=info@cryptsoft.com - Subject Public Key Info: - Public Key Algorithm: rsaEncryption - Modulus: - 00:ad:e7:23:89:ee:0d:87:b7:9c:32:44:4b:95:81: - 73:dd:22:80:4b:2d:c5:60:b8:fe:1e:18:63:ef:dc: - 89:89:22:df:95:3c:7a:db:3d:9a:06:a8:08:d6:29: - fd:ef:41:09:91:ed:bc:ad:98:f9:f6:28:90:62:6f: - e7:e7:0c:4d:0b - Exponent: 65537 (0x10001) - X509v3 extensions: - Netscape Comment: - Generated with SSLeay - Signature Algorithm: md5withRSAEncryption - 52:15:ea:88:f4:f0:f9:0b:ef:ce:d5:f8:83:40:61:16:5e:55: - f9:ce:2d:d1:8b:31:5c:03:c6:2d:10:7c:61:d5:5c:0a:42:97: - d1:fd:65:b6:b6:84:a5:39:ec:46:ec:fc:e0:0d:d9:22:da:1b: - 50:74:ad:92:cb:4e:90:e5:fa:7d - ------BEGIN CERTIFICATE----- -MIICTDCCAfagAwIBAgICBH0wDQYJKoZIhvcNAQEEBQAwgZIxCzAJBgNVBAYTAkFV -MRMwEQYDVQQIEwpRdWVlbnNsYW5kMREwDwYDVQQHEwhCcmlzYmFuZTEaMBgGA1UE -ChMRQ3J5cHRzb2Z0IFB0eSBMdGQxIjAgBgNVBAsTGURFTU9OU1RSQVRJT04gQU5E -IFRFU1RJTkcxGzAZBgNVBAMTEkRFTU8gWkVSTyBWQUxVRSBDQTAeFw05ODA1MTMw -NTQwNThaFw0wMDA1MTIwNTQwNThaMIGeMQswCQYDVQQGEwJBVTETMBEGA1UECBMK -UXVlZW5zbGFuZDERMA8GA1UEBxMIQnJpc2JhbmUxGjAYBgNVBAoTEUNyeXB0c29m -dCBQdHkgTHRkMRIwEAYDVQQLEwlTTUlNRSAwMDMxFDASBgNVBAMTC0luZm9ybWF0 -aW9uMSEwHwYJKoZIhvcNAQkBFhJpbmZvQGNyeXB0c29mdC5jb20wXDANBgkqhkiG -9w0BAQEFAANLADBIAkEArecjie4Nh7ecMkRLlYFz3SKASy3FYLj+Hhhj79yJiSLf -lTx62z2aBqgI1in970EJke28rZj59iiQYm/n5wxNCwIDAQABoygwJjAkBglghkgB -hvhCAQ0EFxYVR2VuZXJhdGVkIHdpdGggU1NMZWF5MA0GCSqGSIb3DQEBBAUAA0EA -UhXqiPTw+QvvztX4g0BhFl5V+c4t0YsxXAPGLRB8YdVcCkKX0f1ltraEpTnsRuz8 -4A3ZItobUHStkstOkOX6fQ== ------END CERTIFICATE----- - ------BEGIN RSA PRIVATE KEY----- -MIIBOgIBAAJBAK3nI4nuDYe3nDJES5WBc90igEstxWC4/h4YY+/ciYki35U8ets9 -mgaoCNYp/e9BCZHtvK2Y+fYokGJv5+cMTQsCAwEAAQJBAIHpvXvqEcOEoDRRHuIG -fkcB4jPHcr9KE9TpxabH6xs9beN6OJnkePXAHwaz5MnUgSnbpOKq+cw8miKjXwe/ -zVECIQDVLwncT2lRmXarEYHzb+q/0uaSvKhWKKt3kJasLNTrAwIhANDUc/ghut29 -p3jJYjurzUKuG774/5eLjPLsxPPIZzNZAiA/10hSq41UnGqHLEUIS9m2/EeEZe7b -bm567dfRU9OnVQIgDo8ROrZXSchEGbaog5J5r/Fle83uO8l93R3GqVxKXZkCIFfk -IPD5PIYQAyyod3hyKKza7ZP4CGY4oOfZetbkSGGG ------END RSA PRIVATE KEY----- diff --git a/drivers/builtin_openssl2/crypto/pkcs7/verify.c b/drivers/builtin_openssl2/crypto/pkcs7/verify.c deleted file mode 100644 index e29f9bb7e0..0000000000 --- a/drivers/builtin_openssl2/crypto/pkcs7/verify.c +++ /dev/null @@ -1,347 +0,0 @@ -/* apps/verify.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include -#include "apps.h" -#include -#include -#include -#include -#include - -#undef PROG -#define PROG verify_main - -static int MS_CALLBACK cb(int ok, X509_STORE_CTX *ctx); -static int check(X509_STORE *ctx, char *file, - STACK_OF(X509) *uchain, STACK_OF(X509) *tchain, - STACK_OF(X509_CRL) *crls, ENGINE *e); -static int v_verbose = 0, vflags = 0; - -int MAIN(int, char **); - -int MAIN(int argc, char **argv) -{ - ENGINE *e = NULL; - int i, ret = 1, badarg = 0; - char *CApath = NULL, *CAfile = NULL; - char *untfile = NULL, *trustfile = NULL, *crlfile = NULL; - STACK_OF(X509) *untrusted = NULL, *trusted = NULL; - STACK_OF(X509_CRL) *crls = NULL; - X509_STORE *cert_ctx = NULL; - X509_LOOKUP *lookup = NULL; - X509_VERIFY_PARAM *vpm = NULL; -#ifndef OPENSSL_NO_ENGINE - char *engine = NULL; -#endif - - cert_ctx = X509_STORE_new(); - if (cert_ctx == NULL) - goto end; - X509_STORE_set_verify_cb(cert_ctx, cb); - - ERR_load_crypto_strings(); - - apps_startup(); - - if (bio_err == NULL) - if ((bio_err = BIO_new(BIO_s_file())) != NULL) - BIO_set_fp(bio_err, stderr, BIO_NOCLOSE | BIO_FP_TEXT); - - if (!load_config(bio_err, NULL)) - goto end; - - argc--; - argv++; - for (;;) { - if (argc >= 1) { - if (strcmp(*argv, "-CApath") == 0) { - if (argc-- < 1) - goto end; - CApath = *(++argv); - } else if (strcmp(*argv, "-CAfile") == 0) { - if (argc-- < 1) - goto end; - CAfile = *(++argv); - } else if (args_verify(&argv, &argc, &badarg, bio_err, &vpm)) { - if (badarg) - goto end; - continue; - } else if (strcmp(*argv, "-untrusted") == 0) { - if (argc-- < 1) - goto end; - untfile = *(++argv); - } else if (strcmp(*argv, "-trusted") == 0) { - if (argc-- < 1) - goto end; - trustfile = *(++argv); - } else if (strcmp(*argv, "-CRLfile") == 0) { - if (argc-- < 1) - goto end; - crlfile = *(++argv); - } -#ifndef OPENSSL_NO_ENGINE - else if (strcmp(*argv, "-engine") == 0) { - if (--argc < 1) - goto end; - engine = *(++argv); - } -#endif - else if (strcmp(*argv, "-help") == 0) - goto end; - else if (strcmp(*argv, "-verbose") == 0) - v_verbose = 1; - else if (argv[0][0] == '-') - goto end; - else - break; - argc--; - argv++; - } else - break; - } - -#ifndef OPENSSL_NO_ENGINE - e = setup_engine(bio_err, engine, 0); -#endif - - if (vpm) - X509_STORE_set1_param(cert_ctx, vpm); - - lookup = X509_STORE_add_lookup(cert_ctx, X509_LOOKUP_file()); - if (lookup == NULL) - abort(); - if (CAfile) { - i = X509_LOOKUP_load_file(lookup, CAfile, X509_FILETYPE_PEM); - if (!i) { - BIO_printf(bio_err, "Error loading file %s\n", CAfile); - ERR_print_errors(bio_err); - goto end; - } - } else - X509_LOOKUP_load_file(lookup, NULL, X509_FILETYPE_DEFAULT); - - lookup = X509_STORE_add_lookup(cert_ctx, X509_LOOKUP_hash_dir()); - if (lookup == NULL) - abort(); - if (CApath) { - i = X509_LOOKUP_add_dir(lookup, CApath, X509_FILETYPE_PEM); - if (!i) { - BIO_printf(bio_err, "Error loading directory %s\n", CApath); - ERR_print_errors(bio_err); - goto end; - } - } else - X509_LOOKUP_add_dir(lookup, NULL, X509_FILETYPE_DEFAULT); - - ERR_clear_error(); - - if (untfile) { - untrusted = load_certs(bio_err, untfile, FORMAT_PEM, - NULL, e, "untrusted certificates"); - if (!untrusted) - goto end; - } - - if (trustfile) { - trusted = load_certs(bio_err, trustfile, FORMAT_PEM, - NULL, e, "trusted certificates"); - if (!trusted) - goto end; - } - - if (crlfile) { - crls = load_crls(bio_err, crlfile, FORMAT_PEM, NULL, e, "other CRLs"); - if (!crls) - goto end; - } - - ret = 0; - if (argc < 1) { - if (1 != check(cert_ctx, NULL, untrusted, trusted, crls, e)) - ret = -1; - } else { - for (i = 0; i < argc; i++) - if (1 != check(cert_ctx, argv[i], untrusted, trusted, crls, e)) - ret = -1; - } - - end: - if (ret == 1) { - BIO_printf(bio_err, - "usage: verify [-verbose] [-CApath path] [-CAfile file] [-purpose purpose] [-crl_check]"); - BIO_printf(bio_err, " [-no_alt_chains] [-attime timestamp]"); -#ifndef OPENSSL_NO_ENGINE - BIO_printf(bio_err, " [-engine e]"); -#endif - BIO_printf(bio_err, " cert1 cert2 ...\n"); - - BIO_printf(bio_err, "recognized usages:\n"); - for (i = 0; i < X509_PURPOSE_get_count(); i++) { - X509_PURPOSE *ptmp; - ptmp = X509_PURPOSE_get0(i); - BIO_printf(bio_err, "\t%-10s\t%s\n", - X509_PURPOSE_get0_sname(ptmp), - X509_PURPOSE_get0_name(ptmp)); - } - } - if (vpm) - X509_VERIFY_PARAM_free(vpm); - if (cert_ctx != NULL) - X509_STORE_free(cert_ctx); - sk_X509_pop_free(untrusted, X509_free); - sk_X509_pop_free(trusted, X509_free); - sk_X509_CRL_pop_free(crls, X509_CRL_free); - apps_shutdown(); - OPENSSL_EXIT(ret < 0 ? 2 : ret); -} - -static int check(X509_STORE *ctx, char *file, - STACK_OF(X509) *uchain, STACK_OF(X509) *tchain, - STACK_OF(X509_CRL) *crls, ENGINE *e) -{ - X509 *x = NULL; - int i = 0, ret = 0; - X509_STORE_CTX *csc; - - x = load_cert(bio_err, file, FORMAT_PEM, NULL, e, "certificate file"); - if (x == NULL) - goto end; - fprintf(stdout, "%s: ", (file == NULL) ? "stdin" : file); - - csc = X509_STORE_CTX_new(); - if (csc == NULL) { - ERR_print_errors(bio_err); - goto end; - } - X509_STORE_set_flags(ctx, vflags); - if (!X509_STORE_CTX_init(csc, ctx, x, uchain)) { - ERR_print_errors(bio_err); - goto end; - } - if (tchain) - X509_STORE_CTX_trusted_stack(csc, tchain); - if (crls) - X509_STORE_CTX_set0_crls(csc, crls); - i = X509_verify_cert(csc); - X509_STORE_CTX_free(csc); - - ret = 0; - end: - if (i > 0) { - fprintf(stdout, "OK\n"); - ret = 1; - } else - ERR_print_errors(bio_err); - if (x != NULL) - X509_free(x); - - return (ret); -} - -static int MS_CALLBACK cb(int ok, X509_STORE_CTX *ctx) -{ - int cert_error = X509_STORE_CTX_get_error(ctx); - X509 *current_cert = X509_STORE_CTX_get_current_cert(ctx); - - if (!ok) { - if (current_cert) { - X509_NAME_print_ex_fp(stdout, - X509_get_subject_name(current_cert), - 0, XN_FLAG_ONELINE); - printf("\n"); - } - printf("%serror %d at %d depth lookup:%s\n", - X509_STORE_CTX_get0_parent_ctx(ctx) ? "[CRL path]" : "", - cert_error, - X509_STORE_CTX_get_error_depth(ctx), - X509_verify_cert_error_string(cert_error)); - switch (cert_error) { - case X509_V_ERR_NO_EXPLICIT_POLICY: - policies_print(NULL, ctx); - case X509_V_ERR_CERT_HAS_EXPIRED: - - /* - * since we are just checking the certificates, it is ok if they - * are self signed. But we should still warn the user. - */ - - case X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT: - /* Continue after extension errors too */ - case X509_V_ERR_INVALID_CA: - case X509_V_ERR_INVALID_NON_CA: - case X509_V_ERR_PATH_LENGTH_EXCEEDED: - case X509_V_ERR_INVALID_PURPOSE: - case X509_V_ERR_CRL_HAS_EXPIRED: - case X509_V_ERR_CRL_NOT_YET_VALID: - case X509_V_ERR_UNHANDLED_CRITICAL_EXTENSION: - ok = 1; - - } - - return ok; - - } - if (cert_error == X509_V_OK && ok == 2) - policies_print(NULL, ctx); - if (!v_verbose) - ERR_clear_error(); - return (ok); -} diff --git a/drivers/builtin_openssl2/crypto/ppc_arch.h b/drivers/builtin_openssl2/crypto/ppc_arch.h new file mode 100644 index 0000000000..b50ec996a5 --- /dev/null +++ b/drivers/builtin_openssl2/crypto/ppc_arch.h @@ -0,0 +1,10 @@ +#ifndef __PPC_ARCH_H__ +# define __PPC_ARCH_H__ + +extern unsigned int OPENSSL_ppccap_P; + +# define PPC_FPU64 (1<<0) +# define PPC_ALTIVEC (1<<1) +# define PPC_CRYPTO207 (1<<2) + +#endif diff --git a/drivers/builtin_openssl2/crypto/ppccap.c b/drivers/builtin_openssl2/crypto/ppccap.c index 5242294310..74af4732b5 100644 --- a/drivers/builtin_openssl2/crypto/ppccap.c +++ b/drivers/builtin_openssl2/crypto/ppccap.c @@ -4,13 +4,15 @@ #include #include #include -#include +#if defined(__linux) || defined(_AIX) +# include +#endif +#include #include -#define PPC_FPU64 (1<<0) -#define PPC_ALTIVEC (1<<1) +#include "ppc_arch.h" -static int OPENSSL_ppccap_P = 0; +unsigned int OPENSSL_ppccap_P = 0; static sigset_t all_masked; @@ -25,7 +27,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); if (sizeof(size_t) == 4) { -# if (defined(__APPLE__) && defined(__MACH__)) +# if 1 || (defined(__APPLE__) && defined(__MACH__)) if (num >= 8 && (num & 3) == 0 && (OPENSSL_ppccap_P & PPC_FPU64)) return bn_mul_mont_fpu64(rp, ap, bp, np, n0, num); # else @@ -55,6 +57,22 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, } #endif +void sha256_block_p8(void *ctx, const void *inp, size_t len); +void sha256_block_ppc(void *ctx, const void *inp, size_t len); +void sha256_block_data_order(void *ctx, const void *inp, size_t len) +{ + OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha256_block_p8(ctx, inp, len) : + sha256_block_ppc(ctx, inp, len); +} + +void sha512_block_p8(void *ctx, const void *inp, size_t len); +void sha512_block_ppc(void *ctx, const void *inp, size_t len); +void sha512_block_data_order(void *ctx, const void *inp, size_t len) +{ + OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha512_block_p8(ctx, inp, len) : + sha512_block_ppc(ctx, inp, len); +} + static sigjmp_buf ill_jmp; static void ill_handler(int sig) { @@ -63,6 +81,7 @@ static void ill_handler(int sig) void OPENSSL_ppc64_probe(void); void OPENSSL_altivec_probe(void); +void OPENSSL_crypto207_probe(void); void OPENSSL_cpuid_setup(void) { @@ -93,12 +112,15 @@ void OPENSSL_cpuid_setup(void) OPENSSL_ppccap_P = 0; #if defined(_AIX) - if (sizeof(size_t) == 4 + if (sizeof(size_t) == 4) { + struct utsname uts; # if defined(_SC_AIX_KERNEL_BITMODE) - && sysconf(_SC_AIX_KERNEL_BITMODE) != 64 + if (sysconf(_SC_AIX_KERNEL_BITMODE) != 64) + return; # endif - ) - return; + if (uname(&uts) != 0 || atoi(uts.version) < 6) + return; + } #endif memset(&ill_act, 0, sizeof(ill_act)); @@ -109,10 +131,14 @@ void OPENSSL_cpuid_setup(void) sigaction(SIGILL, &ill_act, &ill_oact); if (sizeof(size_t) == 4) { - if (sigsetjmp(ill_jmp, 1) == 0) { - OPENSSL_ppc64_probe(); - OPENSSL_ppccap_P |= PPC_FPU64; - } +#ifdef __linux + struct utsname uts; + if (uname(&uts) == 0 && strcmp(uts.machine, "ppc64") == 0) +#endif + if (sigsetjmp(ill_jmp, 1) == 0) { + OPENSSL_ppc64_probe(); + OPENSSL_ppccap_P |= PPC_FPU64; + } } else { /* * Wanted code detecting POWER6 CPU and setting PPC_FPU64 @@ -122,6 +148,10 @@ void OPENSSL_cpuid_setup(void) if (sigsetjmp(ill_jmp, 1) == 0) { OPENSSL_altivec_probe(); OPENSSL_ppccap_P |= PPC_ALTIVEC; + if (sigsetjmp(ill_jmp, 1) == 0) { + OPENSSL_crypto207_probe(); + OPENSSL_ppccap_P |= PPC_CRYPTO207; + } } sigaction(SIGILL, &ill_oact, NULL); diff --git a/drivers/builtin_openssl2/crypto/ppccpuid.pl b/drivers/builtin_openssl2/crypto/ppccpuid.pl deleted file mode 100755 index 4ba736a1d1..0000000000 --- a/drivers/builtin_openssl2/crypto/ppccpuid.pl +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/env perl - -$flavour = shift; - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; - -if ($flavour=~/64/) { - $CMPLI="cmpldi"; - $SHRLI="srdi"; - $SIGNX="extsw"; -} else { - $CMPLI="cmplwi"; - $SHRLI="srwi"; - $SIGNX="mr"; -} - -$code=<<___; -.machine "any" -.text - -.globl .OPENSSL_ppc64_probe -.align 4 -.OPENSSL_ppc64_probe: - fcfid f1,f1 - extrdi r0,r0,32,0 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - -.globl .OPENSSL_altivec_probe -.align 4 -.OPENSSL_altivec_probe: - .long 0x10000484 # vor v0,v0,v0 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - -.globl .OPENSSL_wipe_cpu -.align 4 -.OPENSSL_wipe_cpu: - xor r0,r0,r0 - fmr f0,f31 - fmr f1,f31 - fmr f2,f31 - mr r3,r1 - fmr f3,f31 - xor r4,r4,r4 - fmr f4,f31 - xor r5,r5,r5 - fmr f5,f31 - xor r6,r6,r6 - fmr f6,f31 - xor r7,r7,r7 - fmr f7,f31 - xor r8,r8,r8 - fmr f8,f31 - xor r9,r9,r9 - fmr f9,f31 - xor r10,r10,r10 - fmr f10,f31 - xor r11,r11,r11 - fmr f11,f31 - xor r12,r12,r12 - fmr f12,f31 - fmr f13,f31 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - -.globl .OPENSSL_atomic_add -.align 4 -.OPENSSL_atomic_add: -Ladd: lwarx r5,0,r3 - add r0,r4,r5 - stwcx. r0,0,r3 - bne- Ladd - $SIGNX r3,r0 - blr - .long 0 - .byte 0,12,0x14,0,0,0,2,0 - .long 0 - -.globl .OPENSSL_rdtsc -.align 4 -.OPENSSL_rdtsc: - mftb r3 - mftbu r4 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - -.globl .OPENSSL_cleanse -.align 4 -.OPENSSL_cleanse: - $CMPLI r4,7 - li r0,0 - bge Lot - $CMPLI r4,0 - beqlr- -Little: mtctr r4 - stb r0,0(r3) - addi r3,r3,1 - bdnz \$-8 - blr -Lot: andi. r5,r3,3 - beq Laligned - stb r0,0(r3) - subi r4,r4,1 - addi r3,r3,1 - b Lot -Laligned: - $SHRLI r5,r4,2 - mtctr r5 - stw r0,0(r3) - addi r3,r3,4 - bdnz \$-8 - andi. r4,r4,3 - bne Little - blr - .long 0 - .byte 0,12,0x14,0,0,0,2,0 - .long 0 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/pqueue/pq_test.c b/drivers/builtin_openssl2/crypto/pqueue/pq_test.c deleted file mode 100644 index 479ab22488..0000000000 --- a/drivers/builtin_openssl2/crypto/pqueue/pq_test.c +++ /dev/null @@ -1,94 +0,0 @@ -/* crypto/pqueue/pq_test.c */ -/* - * DTLS implementation written by Nagendra Modadugu - * (nagendra@cs.stanford.edu) for the OpenSSL project 2005. - */ -/* ==================================================================== - * Copyright (c) 1999-2005 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - -#include "pqueue.h" - -int main(void) -{ - pitem *item; - pqueue pq; - - pq = pqueue_new(); - - item = pitem_new(3, NULL); - pqueue_insert(pq, item); - - item = pitem_new(1, NULL); - pqueue_insert(pq, item); - - item = pitem_new(2, NULL); - pqueue_insert(pq, item); - - item = pqueue_find(pq, 1); - fprintf(stderr, "found %ld\n", item->priority); - - item = pqueue_find(pq, 2); - fprintf(stderr, "found %ld\n", item->priority); - - item = pqueue_find(pq, 3); - fprintf(stderr, "found %ld\n", item ? item->priority : 0); - - pqueue_print(pq); - - for (item = pqueue_pop(pq); item != NULL; item = pqueue_pop(pq)) - pitem_free(item); - - pqueue_free(pq); - return 0; -} diff --git a/drivers/builtin_openssl2/crypto/rand/rand_win.c b/drivers/builtin_openssl2/crypto/rand/rand_win.c index 0c616c4c57..06670ae017 100644 --- a/drivers/builtin_openssl2/crypto/rand/rand_win.c +++ b/drivers/builtin_openssl2/crypto/rand/rand_win.c @@ -684,9 +684,7 @@ static void readscreen(void) { # if !defined(OPENSSL_SYS_WINCE) && !defined(OPENSSL_SYS_WIN32_CYGWIN) HDC hScrDC; /* screen DC */ - HDC hMemDC; /* memory DC */ HBITMAP hBitmap; /* handle for our bitmap */ - HBITMAP hOldBitmap; /* handle for previous bitmap */ BITMAP bm; /* bitmap properties */ unsigned int size; /* size of bitmap */ char *bmbits; /* contents of bitmap */ @@ -694,13 +692,13 @@ static void readscreen(void) int h; /* screen height */ int y; /* y-coordinate of screen lines to grab */ int n = 16; /* number of screen lines to grab at a time */ + BITMAPINFOHEADER bi; /* info about the bitmap */ if (check_winnt() && OPENSSL_isservice() > 0) return; - /* Create a screen DC and a memory DC compatible to screen DC */ - hScrDC = CreateDC(TEXT("DISPLAY"), NULL, NULL, NULL); - hMemDC = CreateCompatibleDC(hScrDC); + /* Get a reference to the screen DC */ + hScrDC = GetDC(NULL); /* Get screen resolution */ w = GetDeviceCaps(hScrDC, HORZRES); @@ -709,24 +707,31 @@ static void readscreen(void) /* Create a bitmap compatible with the screen DC */ hBitmap = CreateCompatibleBitmap(hScrDC, w, n); - /* Select new bitmap into memory DC */ - hOldBitmap = SelectObject(hMemDC, hBitmap); - /* Get bitmap properties */ GetObject(hBitmap, sizeof(BITMAP), (LPSTR) & bm); size = (unsigned int)bm.bmWidthBytes * bm.bmHeight * bm.bmPlanes; + bi.biSize = sizeof(BITMAPINFOHEADER); + bi.biWidth = bm.bmWidth; + bi.biHeight = bm.bmHeight; + bi.biPlanes = bm.bmPlanes; + bi.biBitCount = bm.bmBitsPixel; + bi.biCompression = BI_RGB; + bi.biSizeImage = 0; + bi.biXPelsPerMeter = 0; + bi.biYPelsPerMeter = 0; + bi.biClrUsed = 0; + bi.biClrImportant = 0; + bmbits = OPENSSL_malloc(size); if (bmbits) { /* Now go through the whole screen, repeatedly grabbing n lines */ for (y = 0; y < h - n; y += n) { unsigned char md[MD_DIGEST_LENGTH]; - /* Bitblt screen DC to memory DC */ - BitBlt(hMemDC, 0, 0, w, n, hScrDC, 0, y, SRCCOPY); - - /* Copy bitmap bits from memory DC to bmbits */ - GetBitmapBits(hBitmap, size, bmbits); + /* Copy the bits of the current line range into the buffer */ + GetDIBits(hScrDC, hBitmap, y, n, + bmbits, (BITMAPINFO *) & bi, DIB_RGB_COLORS); /* Get the hash of the bitmap */ MD(bmbits, size, md); @@ -738,13 +743,9 @@ static void readscreen(void) OPENSSL_free(bmbits); } - /* Select old bitmap back into memory DC */ - hBitmap = SelectObject(hMemDC, hOldBitmap); - /* Clean up */ DeleteObject(hBitmap); - DeleteDC(hMemDC); - DeleteDC(hScrDC); + ReleaseDC(NULL, hScrDC); # endif /* !OPENSSL_SYS_WINCE */ } diff --git a/drivers/builtin_openssl2/crypto/rand/randtest.c b/drivers/builtin_openssl2/crypto/rand/randtest.c deleted file mode 100644 index 91bcac9906..0000000000 --- a/drivers/builtin_openssl2/crypto/rand/randtest.c +++ /dev/null @@ -1,209 +0,0 @@ -/* crypto/rand/randtest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -#include "../e_os.h" - -/* some FIPS 140-1 random number test */ -/* some simple tests */ - -int main(int argc, char **argv) -{ - unsigned char buf[2500]; - int i, j, k, s, sign, nsign, err = 0; - unsigned long n1; - unsigned long n2[16]; - unsigned long runs[2][34]; - /* - * double d; - */ - long d; - - i = RAND_pseudo_bytes(buf, 2500); - if (i < 0) { - printf("init failed, the rand method is not properly installed\n"); - err++; - goto err; - } - - n1 = 0; - for (i = 0; i < 16; i++) - n2[i] = 0; - for (i = 0; i < 34; i++) - runs[0][i] = runs[1][i] = 0; - - /* test 1 and 2 */ - sign = 0; - nsign = 0; - for (i = 0; i < 2500; i++) { - j = buf[i]; - - n2[j & 0x0f]++; - n2[(j >> 4) & 0x0f]++; - - for (k = 0; k < 8; k++) { - s = (j & 0x01); - if (s == sign) - nsign++; - else { - if (nsign > 34) - nsign = 34; - if (nsign != 0) { - runs[sign][nsign - 1]++; - if (nsign > 6) - runs[sign][5]++; - } - sign = s; - nsign = 1; - } - - if (s) - n1++; - j >>= 1; - } - } - if (nsign > 34) - nsign = 34; - if (nsign != 0) - runs[sign][nsign - 1]++; - - /* test 1 */ - if (!((9654 < n1) && (n1 < 10346))) { - printf("test 1 failed, X=%lu\n", n1); - err++; - } - printf("test 1 done\n"); - - /* test 2 */ -#ifdef undef - d = 0; - for (i = 0; i < 16; i++) - d += n2[i] * n2[i]; - d = d * 16.0 / 5000.0 - 5000.0; - if (!((1.03 < d) && (d < 57.4))) { - printf("test 2 failed, X=%.2f\n", d); - err++; - } -#endif - d = 0; - for (i = 0; i < 16; i++) - d += n2[i] * n2[i]; - d = (d * 8) / 25 - 500000; - if (!((103 < d) && (d < 5740))) { - printf("test 2 failed, X=%ld.%02ld\n", d / 100L, d % 100L); - err++; - } - printf("test 2 done\n"); - - /* test 3 */ - for (i = 0; i < 2; i++) { - if (!((2267 < runs[i][0]) && (runs[i][0] < 2733))) { - printf("test 3 failed, bit=%d run=%d num=%lu\n", - i, 1, runs[i][0]); - err++; - } - if (!((1079 < runs[i][1]) && (runs[i][1] < 1421))) { - printf("test 3 failed, bit=%d run=%d num=%lu\n", - i, 2, runs[i][1]); - err++; - } - if (!((502 < runs[i][2]) && (runs[i][2] < 748))) { - printf("test 3 failed, bit=%d run=%d num=%lu\n", - i, 3, runs[i][2]); - err++; - } - if (!((223 < runs[i][3]) && (runs[i][3] < 402))) { - printf("test 3 failed, bit=%d run=%d num=%lu\n", - i, 4, runs[i][3]); - err++; - } - if (!((90 < runs[i][4]) && (runs[i][4] < 223))) { - printf("test 3 failed, bit=%d run=%d num=%lu\n", - i, 5, runs[i][4]); - err++; - } - if (!((90 < runs[i][5]) && (runs[i][5] < 223))) { - printf("test 3 failed, bit=%d run=%d num=%lu\n", - i, 6, runs[i][5]); - err++; - } - } - printf("test 3 done\n"); - - /* test 4 */ - if (runs[0][33] != 0) { - printf("test 4 failed, bit=%d run=%d num=%lu\n", 0, 34, runs[0][33]); - err++; - } - if (runs[1][33] != 0) { - printf("test 4 failed, bit=%d run=%d num=%lu\n", 1, 34, runs[1][33]); - err++; - } - printf("test 4 done\n"); - err: - err = ((err) ? 1 : 0); -#ifdef OPENSSL_SYS_NETWARE - if (err) - printf("ERROR: %d\n", err); -#endif - EXIT(err); - return (err); -} diff --git a/drivers/builtin_openssl2/crypto/rc2/rc2test.c b/drivers/builtin_openssl2/crypto/rc2/rc2test.c deleted file mode 100644 index e61df342ea..0000000000 --- a/drivers/builtin_openssl2/crypto/rc2/rc2test.c +++ /dev/null @@ -1,274 +0,0 @@ -/* crypto/rc2/rc2test.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -/* - * This has been a quickly hacked 'ideatest.c'. When I add tests for other - * RC2 modes, more of the code will be uncommented. - */ - -#include -#include -#include - -#include "../e_os.h" - -#ifdef OPENSSL_NO_RC2 -int main(int argc, char *argv[]) -{ - printf("No RC2 support\n"); - return (0); -} -#else -# include - -static unsigned char RC2key[4][16] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}, -}; - -static unsigned char RC2plain[4][8] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, -}; - -static unsigned char RC2cipher[4][8] = { - {0x1C, 0x19, 0x8A, 0x83, 0x8D, 0xF0, 0x28, 0xB7}, - {0x21, 0x82, 0x9C, 0x78, 0xA9, 0xF9, 0xC0, 0x74}, - {0x13, 0xDB, 0x35, 0x17, 0xD3, 0x21, 0x86, 0x9E}, - {0x50, 0xDC, 0x01, 0x62, 0xBD, 0x75, 0x7F, 0x31}, -}; - -/************/ -# ifdef undef -unsigned char k[16] = { - 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, - 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 -}; - -unsigned char in[8] = { 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03 }; -unsigned char c[8] = { 0x11, 0xFB, 0xED, 0x2B, 0x01, 0x98, 0x6D, 0xE5 }; - -unsigned char out[80]; - -char *text = "Hello to all people out there"; - -static unsigned char cfb_key[16] = { - 0xe1, 0xf0, 0xc3, 0xd2, 0xa5, 0xb4, 0x87, 0x96, - 0x69, 0x78, 0x4b, 0x5a, 0x2d, 0x3c, 0x0f, 0x1e, -}; -static unsigned char cfb_iv[80] = - { 0x34, 0x12, 0x78, 0x56, 0xab, 0x90, 0xef, 0xcd }; -static unsigned char cfb_buf1[40], cfb_buf2[40], cfb_tmp[8]; -# define CFB_TEST_SIZE 24 -static unsigned char plain[CFB_TEST_SIZE] = { - 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, - 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, - 0x69, 0x6d, 0x65, 0x20, 0x66, 0x6f, - 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20 -}; - -static unsigned char cfb_cipher64[CFB_TEST_SIZE] = { - 0x59, 0xD8, 0xE2, 0x65, 0x00, 0x58, 0x6C, 0x3F, - 0x2C, 0x17, 0x25, 0xD0, 0x1A, 0x38, 0xB7, 0x2A, - 0x39, 0x61, 0x37, 0xDC, 0x79, 0xFB, 0x9F, 0x45 -/*- 0xF9,0x78,0x32,0xB5,0x42,0x1A,0x6B,0x38, - 0x9A,0x44,0xD6,0x04,0x19,0x43,0xC4,0xD9, - 0x3D,0x1E,0xAE,0x47,0xFC,0xCF,0x29,0x0B,*/ -}; - -/* - * static int cfb64_test(unsigned char *cfb_cipher); - */ -static char *pt(unsigned char *p); -# endif - -int main(int argc, char *argv[]) -{ - int i, n, err = 0; - RC2_KEY key; - unsigned char buf[8], buf2[8]; - - for (n = 0; n < 4; n++) { - RC2_set_key(&key, 16, &(RC2key[n][0]), 0 /* or 1024 */ ); - - RC2_ecb_encrypt(&(RC2plain[n][0]), buf, &key, RC2_ENCRYPT); - if (memcmp(&(RC2cipher[n][0]), buf, 8) != 0) { - printf("ecb rc2 error encrypting\n"); - printf("got :"); - for (i = 0; i < 8; i++) - printf("%02X ", buf[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 8; i++) - printf("%02X ", RC2cipher[n][i]); - err = 20; - printf("\n"); - } - - RC2_ecb_encrypt(buf, buf2, &key, RC2_DECRYPT); - if (memcmp(&(RC2plain[n][0]), buf2, 8) != 0) { - printf("ecb RC2 error decrypting\n"); - printf("got :"); - for (i = 0; i < 8; i++) - printf("%02X ", buf[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 8; i++) - printf("%02X ", RC2plain[n][i]); - printf("\n"); - err = 3; - } - } - - if (err == 0) - printf("ecb RC2 ok\n"); -# ifdef undef - memcpy(iv, k, 8); - idea_cbc_encrypt((unsigned char *)text, out, strlen(text) + 1, &key, iv, - 1); - memcpy(iv, k, 8); - idea_cbc_encrypt(out, out, 8, &dkey, iv, 0); - idea_cbc_encrypt(&(out[8]), &(out[8]), strlen(text) + 1 - 8, &dkey, iv, - 0); - if (memcmp(text, out, strlen(text) + 1) != 0) { - printf("cbc idea bad\n"); - err = 4; - } else - printf("cbc idea ok\n"); - - printf("cfb64 idea "); - if (cfb64_test(cfb_cipher64)) { - printf("bad\n"); - err = 5; - } else - printf("ok\n"); -# endif - -# ifdef OPENSSL_SYS_NETWARE - if (err) - printf("ERROR: %d\n", err); -# endif - EXIT(err); - return (err); -} - -# ifdef undef -static int cfb64_test(unsigned char *cfb_cipher) -{ - IDEA_KEY_SCHEDULE eks, dks; - int err = 0, i, n; - - idea_set_encrypt_key(cfb_key, &eks); - idea_set_decrypt_key(&eks, &dks); - memcpy(cfb_tmp, cfb_iv, 8); - n = 0; - idea_cfb64_encrypt(plain, cfb_buf1, (long)12, &eks, - cfb_tmp, &n, IDEA_ENCRYPT); - idea_cfb64_encrypt(&(plain[12]), &(cfb_buf1[12]), - (long)CFB_TEST_SIZE - 12, &eks, - cfb_tmp, &n, IDEA_ENCRYPT); - if (memcmp(cfb_cipher, cfb_buf1, CFB_TEST_SIZE) != 0) { - err = 1; - printf("idea_cfb64_encrypt encrypt error\n"); - for (i = 0; i < CFB_TEST_SIZE; i += 8) - printf("%s\n", pt(&(cfb_buf1[i]))); - } - memcpy(cfb_tmp, cfb_iv, 8); - n = 0; - idea_cfb64_encrypt(cfb_buf1, cfb_buf2, (long)17, &eks, - cfb_tmp, &n, IDEA_DECRYPT); - idea_cfb64_encrypt(&(cfb_buf1[17]), &(cfb_buf2[17]), - (long)CFB_TEST_SIZE - 17, &dks, - cfb_tmp, &n, IDEA_DECRYPT); - if (memcmp(plain, cfb_buf2, CFB_TEST_SIZE) != 0) { - err = 1; - printf("idea_cfb_encrypt decrypt error\n"); - for (i = 0; i < 24; i += 8) - printf("%s\n", pt(&(cfb_buf2[i]))); - } - return (err); -} - -static char *pt(unsigned char *p) -{ - static char bufs[10][20]; - static int bnum = 0; - char *ret; - int i; - static char *f = "0123456789ABCDEF"; - - ret = &(bufs[bnum++][0]); - bnum %= 10; - for (i = 0; i < 8; i++) { - ret[i * 2] = f[(p[i] >> 4) & 0xf]; - ret[i * 2 + 1] = f[p[i] & 0xf]; - } - ret[16] = '\0'; - return (ret); -} - -# endif -#endif diff --git a/drivers/builtin_openssl2/crypto/rc4/asm/rc4-586.pl b/drivers/builtin_openssl2/crypto/rc4/asm/rc4-586.pl deleted file mode 100644 index 5c9ac6ad28..0000000000 --- a/drivers/builtin_openssl2/crypto/rc4/asm/rc4-586.pl +++ /dev/null @@ -1,410 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# [Re]written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# At some point it became apparent that the original SSLeay RC4 -# assembler implementation performs suboptimally on latest IA-32 -# microarchitectures. After re-tuning performance has changed as -# following: -# -# Pentium -10% -# Pentium III +12% -# AMD +50%(*) -# P4 +250%(**) -# -# (*) This number is actually a trade-off:-) It's possible to -# achieve +72%, but at the cost of -48% off PIII performance. -# In other words code performing further 13% faster on AMD -# would perform almost 2 times slower on Intel PIII... -# For reference! This code delivers ~80% of rc4-amd64.pl -# performance on the same Opteron machine. -# (**) This number requires compressed key schedule set up by -# RC4_set_key [see commentary below for further details]. -# -# - -# May 2011 -# -# Optimize for Core2 and Westmere [and incidentally Opteron]. Current -# performance in cycles per processed byte (less is better) and -# improvement relative to previous version of this module is: -# -# Pentium 10.2 # original numbers -# Pentium III 7.8(*) -# Intel P4 7.5 -# -# Opteron 6.1/+20% # new MMX numbers -# Core2 5.3/+67%(**) -# Westmere 5.1/+94%(**) -# Sandy Bridge 5.0/+8% -# Atom 12.6/+6% -# -# (*) PIII can actually deliver 6.6 cycles per byte with MMX code, -# but this specific code performs poorly on Core2. And vice -# versa, below MMX/SSE code delivering 5.8/7.1 on Core2 performs -# poorly on PIII, at 8.0/14.5:-( As PIII is not a "hot" CPU -# [anymore], I chose to discard PIII-specific code path and opt -# for original IALU-only code, which is why MMX/SSE code path -# is guarded by SSE2 bit (see below), not MMX/SSE. -# (**) Performance vs. block size on Core2 and Westmere had a maximum -# at ... 64 bytes block size. And it was quite a maximum, 40-60% -# in comparison to largest 8KB block size. Above improvement -# coefficients are for the largest block size. - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],"rc4-586.pl"); - -$xx="eax"; -$yy="ebx"; -$tx="ecx"; -$ty="edx"; -$inp="esi"; -$out="ebp"; -$dat="edi"; - -sub RC4_loop { - my $i=shift; - my $func = ($i==0)?*mov:*or; - - &add (&LB($yy),&LB($tx)); - &mov ($ty,&DWP(0,$dat,$yy,4)); - &mov (&DWP(0,$dat,$yy,4),$tx); - &mov (&DWP(0,$dat,$xx,4),$ty); - &add ($ty,$tx); - &inc (&LB($xx)); - &and ($ty,0xff); - &ror ($out,8) if ($i!=0); - if ($i<3) { - &mov ($tx,&DWP(0,$dat,$xx,4)); - } else { - &mov ($tx,&wparam(3)); # reload [re-biased] out - } - &$func ($out,&DWP(0,$dat,$ty,4)); -} - -if ($alt=0) { - # >20% faster on Atom and Sandy Bridge[!], 8% faster on Opteron, - # but ~40% slower on Core2 and Westmere... Attempt to add movz - # brings down Opteron by 25%, Atom and Sandy Bridge by 15%, yet - # on Core2 with movz it's almost 20% slower than below alternative - # code... Yes, it's a total mess... - my @XX=($xx,$out); - $RC4_loop_mmx = sub { # SSE actually... - my $i=shift; - my $j=$i<=0?0:$i>>1; - my $mm=$i<=0?"mm0":"mm".($i&1); - - &add (&LB($yy),&LB($tx)); - &lea (@XX[1],&DWP(1,@XX[0])); - &pxor ("mm2","mm0") if ($i==0); - &psllq ("mm1",8) if ($i==0); - &and (@XX[1],0xff); - &pxor ("mm0","mm0") if ($i<=0); - &mov ($ty,&DWP(0,$dat,$yy,4)); - &mov (&DWP(0,$dat,$yy,4),$tx); - &pxor ("mm1","mm2") if ($i==0); - &mov (&DWP(0,$dat,$XX[0],4),$ty); - &add (&LB($ty),&LB($tx)); - &movd (@XX[0],"mm7") if ($i==0); - &mov ($tx,&DWP(0,$dat,@XX[1],4)); - &pxor ("mm1","mm1") if ($i==1); - &movq ("mm2",&QWP(0,$inp)) if ($i==1); - &movq (&QWP(-8,(@XX[0],$inp)),"mm1") if ($i==0); - &pinsrw ($mm,&DWP(0,$dat,$ty,4),$j); - - push (@XX,shift(@XX)) if ($i>=0); - } -} else { - # Using pinsrw here improves performane on Intel CPUs by 2-3%, but - # brings down AMD by 7%... - $RC4_loop_mmx = sub { - my $i=shift; - - &add (&LB($yy),&LB($tx)); - &psllq ("mm1",8*(($i-1)&7)) if (abs($i)!=1); - &mov ($ty,&DWP(0,$dat,$yy,4)); - &mov (&DWP(0,$dat,$yy,4),$tx); - &mov (&DWP(0,$dat,$xx,4),$ty); - &inc ($xx); - &add ($ty,$tx); - &movz ($xx,&LB($xx)); # (*) - &movz ($ty,&LB($ty)); # (*) - &pxor ("mm2",$i==1?"mm0":"mm1") if ($i>=0); - &movq ("mm0",&QWP(0,$inp)) if ($i<=0); - &movq (&QWP(-8,($out,$inp)),"mm2") if ($i==0); - &mov ($tx,&DWP(0,$dat,$xx,4)); - &movd ($i>0?"mm1":"mm2",&DWP(0,$dat,$ty,4)); - - # (*) This is the key to Core2 and Westmere performance. - # Whithout movz out-of-order execution logic confuses - # itself and fails to reorder loads and stores. Problem - # appears to be fixed in Sandy Bridge... - } -} - -&external_label("OPENSSL_ia32cap_P"); - -# void RC4(RC4_KEY *key,size_t len,const unsigned char *inp,unsigned char *out); -&function_begin("RC4"); - &mov ($dat,&wparam(0)); # load key schedule pointer - &mov ($ty, &wparam(1)); # load len - &mov ($inp,&wparam(2)); # load inp - &mov ($out,&wparam(3)); # load out - - &xor ($xx,$xx); # avoid partial register stalls - &xor ($yy,$yy); - - &cmp ($ty,0); # safety net - &je (&label("abort")); - - &mov (&LB($xx),&BP(0,$dat)); # load key->x - &mov (&LB($yy),&BP(4,$dat)); # load key->y - &add ($dat,8); - - &lea ($tx,&DWP(0,$inp,$ty)); - &sub ($out,$inp); # re-bias out - &mov (&wparam(1),$tx); # save input+len - - &inc (&LB($xx)); - - # detect compressed key schedule... - &cmp (&DWP(256,$dat),-1); - &je (&label("RC4_CHAR")); - - &mov ($tx,&DWP(0,$dat,$xx,4)); - - &and ($ty,-4); # how many 4-byte chunks? - &jz (&label("loop1")); - - &test ($ty,-8); - &mov (&wparam(3),$out); # $out as accumulator in these loops - &jz (&label("go4loop4")); - - &picmeup($out,"OPENSSL_ia32cap_P"); - &bt (&DWP(0,$out),26); # check SSE2 bit [could have been MMX] - &jnc (&label("go4loop4")); - - &mov ($out,&wparam(3)) if (!$alt); - &movd ("mm7",&wparam(3)) if ($alt); - &and ($ty,-8); - &lea ($ty,&DWP(-8,$inp,$ty)); - &mov (&DWP(-4,$dat),$ty); # save input+(len/8)*8-8 - - &$RC4_loop_mmx(-1); - &jmp(&label("loop_mmx_enter")); - - &set_label("loop_mmx",16); - &$RC4_loop_mmx(0); - &set_label("loop_mmx_enter"); - for ($i=1;$i<8;$i++) { &$RC4_loop_mmx($i); } - &mov ($ty,$yy); - &xor ($yy,$yy); # this is second key to Core2 - &mov (&LB($yy),&LB($ty)); # and Westmere performance... - &cmp ($inp,&DWP(-4,$dat)); - &lea ($inp,&DWP(8,$inp)); - &jb (&label("loop_mmx")); - - if ($alt) { - &movd ($out,"mm7"); - &pxor ("mm2","mm0"); - &psllq ("mm1",8); - &pxor ("mm1","mm2"); - &movq (&QWP(-8,$out,$inp),"mm1"); - } else { - &psllq ("mm1",56); - &pxor ("mm2","mm1"); - &movq (&QWP(-8,$out,$inp),"mm2"); - } - &emms (); - - &cmp ($inp,&wparam(1)); # compare to input+len - &je (&label("done")); - &jmp (&label("loop1")); - -&set_label("go4loop4",16); - &lea ($ty,&DWP(-4,$inp,$ty)); - &mov (&wparam(2),$ty); # save input+(len/4)*4-4 - - &set_label("loop4"); - for ($i=0;$i<4;$i++) { RC4_loop($i); } - &ror ($out,8); - &xor ($out,&DWP(0,$inp)); - &cmp ($inp,&wparam(2)); # compare to input+(len/4)*4-4 - &mov (&DWP(0,$tx,$inp),$out);# $tx holds re-biased out here - &lea ($inp,&DWP(4,$inp)); - &mov ($tx,&DWP(0,$dat,$xx,4)); - &jb (&label("loop4")); - - &cmp ($inp,&wparam(1)); # compare to input+len - &je (&label("done")); - &mov ($out,&wparam(3)); # restore $out - - &set_label("loop1",16); - &add (&LB($yy),&LB($tx)); - &mov ($ty,&DWP(0,$dat,$yy,4)); - &mov (&DWP(0,$dat,$yy,4),$tx); - &mov (&DWP(0,$dat,$xx,4),$ty); - &add ($ty,$tx); - &inc (&LB($xx)); - &and ($ty,0xff); - &mov ($ty,&DWP(0,$dat,$ty,4)); - &xor (&LB($ty),&BP(0,$inp)); - &lea ($inp,&DWP(1,$inp)); - &mov ($tx,&DWP(0,$dat,$xx,4)); - &cmp ($inp,&wparam(1)); # compare to input+len - &mov (&BP(-1,$out,$inp),&LB($ty)); - &jb (&label("loop1")); - - &jmp (&label("done")); - -# this is essentially Intel P4 specific codepath... -&set_label("RC4_CHAR",16); - &movz ($tx,&BP(0,$dat,$xx)); - # strangely enough unrolled loop performs over 20% slower... - &set_label("cloop1"); - &add (&LB($yy),&LB($tx)); - &movz ($ty,&BP(0,$dat,$yy)); - &mov (&BP(0,$dat,$yy),&LB($tx)); - &mov (&BP(0,$dat,$xx),&LB($ty)); - &add (&LB($ty),&LB($tx)); - &movz ($ty,&BP(0,$dat,$ty)); - &add (&LB($xx),1); - &xor (&LB($ty),&BP(0,$inp)); - &lea ($inp,&DWP(1,$inp)); - &movz ($tx,&BP(0,$dat,$xx)); - &cmp ($inp,&wparam(1)); - &mov (&BP(-1,$out,$inp),&LB($ty)); - &jb (&label("cloop1")); - -&set_label("done"); - &dec (&LB($xx)); - &mov (&DWP(-4,$dat),$yy); # save key->y - &mov (&BP(-8,$dat),&LB($xx)); # save key->x -&set_label("abort"); -&function_end("RC4"); - -######################################################################## - -$inp="esi"; -$out="edi"; -$idi="ebp"; -$ido="ecx"; -$idx="edx"; - -# void RC4_set_key(RC4_KEY *key,int len,const unsigned char *data); -&function_begin("private_RC4_set_key"); - &mov ($out,&wparam(0)); # load key - &mov ($idi,&wparam(1)); # load len - &mov ($inp,&wparam(2)); # load data - &picmeup($idx,"OPENSSL_ia32cap_P"); - - &lea ($out,&DWP(2*4,$out)); # &key->data - &lea ($inp,&DWP(0,$inp,$idi)); # $inp to point at the end - &neg ($idi); - &xor ("eax","eax"); - &mov (&DWP(-4,$out),$idi); # borrow key->y - - &bt (&DWP(0,$idx),20); # check for bit#20 - &jc (&label("c1stloop")); - -&set_label("w1stloop",16); - &mov (&DWP(0,$out,"eax",4),"eax"); # key->data[i]=i; - &add (&LB("eax"),1); # i++; - &jnc (&label("w1stloop")); - - &xor ($ido,$ido); - &xor ($idx,$idx); - -&set_label("w2ndloop",16); - &mov ("eax",&DWP(0,$out,$ido,4)); - &add (&LB($idx),&BP(0,$inp,$idi)); - &add (&LB($idx),&LB("eax")); - &add ($idi,1); - &mov ("ebx",&DWP(0,$out,$idx,4)); - &jnz (&label("wnowrap")); - &mov ($idi,&DWP(-4,$out)); - &set_label("wnowrap"); - &mov (&DWP(0,$out,$idx,4),"eax"); - &mov (&DWP(0,$out,$ido,4),"ebx"); - &add (&LB($ido),1); - &jnc (&label("w2ndloop")); -&jmp (&label("exit")); - -# Unlike all other x86 [and x86_64] implementations, Intel P4 core -# [including EM64T] was found to perform poorly with above "32-bit" key -# schedule, a.k.a. RC4_INT. Performance improvement for IA-32 hand-coded -# assembler turned out to be 3.5x if re-coded for compressed 8-bit one, -# a.k.a. RC4_CHAR! It's however inappropriate to just switch to 8-bit -# schedule for x86[_64], because non-P4 implementations suffer from -# significant performance losses then, e.g. PIII exhibits >2x -# deterioration, and so does Opteron. In order to assure optimal -# all-round performance, we detect P4 at run-time and set up compressed -# key schedule, which is recognized by RC4 procedure. - -&set_label("c1stloop",16); - &mov (&BP(0,$out,"eax"),&LB("eax")); # key->data[i]=i; - &add (&LB("eax"),1); # i++; - &jnc (&label("c1stloop")); - - &xor ($ido,$ido); - &xor ($idx,$idx); - &xor ("ebx","ebx"); - -&set_label("c2ndloop",16); - &mov (&LB("eax"),&BP(0,$out,$ido)); - &add (&LB($idx),&BP(0,$inp,$idi)); - &add (&LB($idx),&LB("eax")); - &add ($idi,1); - &mov (&LB("ebx"),&BP(0,$out,$idx)); - &jnz (&label("cnowrap")); - &mov ($idi,&DWP(-4,$out)); - &set_label("cnowrap"); - &mov (&BP(0,$out,$idx),&LB("eax")); - &mov (&BP(0,$out,$ido),&LB("ebx")); - &add (&LB($ido),1); - &jnc (&label("c2ndloop")); - - &mov (&DWP(256,$out),-1); # mark schedule as compressed - -&set_label("exit"); - &xor ("eax","eax"); - &mov (&DWP(-8,$out),"eax"); # key->x=0; - &mov (&DWP(-4,$out),"eax"); # key->y=0; -&function_end("private_RC4_set_key"); - -# const char *RC4_options(void); -&function_begin_B("RC4_options"); - &call (&label("pic_point")); -&set_label("pic_point"); - &blindpop("eax"); - &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax")); - &picmeup("edx","OPENSSL_ia32cap_P"); - &mov ("edx",&DWP(0,"edx")); - &bt ("edx",20); - &jc (&label("1xchar")); - &bt ("edx",26); - &jnc (&label("ret")); - &add ("eax",25); - &ret (); -&set_label("1xchar"); - &add ("eax",12); -&set_label("ret"); - &ret (); -&set_label("opts",64); -&asciz ("rc4(4x,int)"); -&asciz ("rc4(1x,char)"); -&asciz ("rc4(8x,mmx)"); -&asciz ("RC4 for x86, CRYPTOGAMS by "); -&align (64); -&function_end_B("RC4_options"); - -&asm_finish(); - diff --git a/drivers/builtin_openssl2/crypto/rc4/asm/rc4-ia64.pl b/drivers/builtin_openssl2/crypto/rc4/asm/rc4-ia64.pl deleted file mode 100644 index 49cd5b5e69..0000000000 --- a/drivers/builtin_openssl2/crypto/rc4/asm/rc4-ia64.pl +++ /dev/null @@ -1,755 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by David Mosberger based on the -# Itanium optimized Crypto code which was released by HP Labs at -# http://www.hpl.hp.com/research/linux/crypto/. -# -# Copyright (c) 2005 Hewlett-Packard Development Company, L.P. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - - - -# This is a little helper program which generates a software-pipelined -# for RC4 encryption. The basic algorithm looks like this: -# -# for (counter = 0; counter < len; ++counter) -# { -# in = inp[counter]; -# SI = S[I]; -# J = (SI + J) & 0xff; -# SJ = S[J]; -# T = (SI + SJ) & 0xff; -# S[I] = SJ, S[J] = SI; -# ST = S[T]; -# outp[counter] = in ^ ST; -# I = (I + 1) & 0xff; -# } -# -# Pipelining this loop isn't easy, because the stores to the S[] array -# need to be observed in the right order. The loop generated by the -# code below has the following pipeline diagram: -# -# cycle -# | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |10 |11 |12 |13 |14 |15 |16 |17 | -# iter -# 1: xxx LDI xxx xxx xxx LDJ xxx SWP xxx LDT xxx xxx -# 2: xxx LDI xxx xxx xxx LDJ xxx SWP xxx LDT xxx xxx -# 3: xxx LDI xxx xxx xxx LDJ xxx SWP xxx LDT xxx xxx -# -# where: -# LDI = load of S[I] -# LDJ = load of S[J] -# SWP = swap of S[I] and S[J] -# LDT = load of S[T] -# -# Note that in the above diagram, the major trouble-spot is that LDI -# of the 2nd iteration is performed BEFORE the SWP of the first -# iteration. Fortunately, this is easy to detect (I of the 1st -# iteration will be equal to J of the 2nd iteration) and when this -# happens, we simply forward the proper value from the 1st iteration -# to the 2nd one. The proper value in this case is simply the value -# of S[I] from the first iteration (thanks to the fact that SWP -# simply swaps the contents of S[I] and S[J]). -# -# Another potential trouble-spot is in cycle 7, where SWP of the 1st -# iteration issues at the same time as the LDI of the 3rd iteration. -# However, thanks to IA-64 execution semantics, this can be taken -# care of simply by placing LDI later in the instruction-group than -# SWP. IA-64 CPUs will automatically forward the value if they -# detect that the SWP and LDI are accessing the same memory-location. - -# The core-loop that can be pipelined then looks like this (annotated -# with McKinley/Madison issue port & latency numbers, assuming L1 -# cache hits for the most part): - -# operation: instruction: issue-ports: latency -# ------------------ ----------------------------- ------------- ------- - -# Data = *inp++ ld1 data = [inp], 1 M0-M1 1 cyc c0 -# shladd Iptr = I, KeyTable, 3 M0-M3, I0, I1 1 cyc -# I = (I + 1) & 0xff padd1 nextI = I, one M0-M3, I0, I1 3 cyc -# ;; -# SI = S[I] ld8 SI = [Iptr] M0-M1 1 cyc c1 * after SWAP! -# ;; -# cmp.eq.unc pBypass = I, J * after J is valid! -# J = SI + J add J = J, SI M0-M3, I0, I1 1 cyc c2 -# (pBypass) br.cond.spnt Bypass -# ;; -# --------------------------------------------------------------------------------------- -# J = J & 0xff zxt1 J = J I0, I1, 1 cyc c3 -# ;; -# shladd Jptr = J, KeyTable, 3 M0-M3, I0, I1 1 cyc c4 -# ;; -# SJ = S[J] ld8 SJ = [Jptr] M0-M1 1 cyc c5 -# ;; -# --------------------------------------------------------------------------------------- -# T = (SI + SJ) add T = SI, SJ M0-M3, I0, I1 1 cyc c6 -# ;; -# T = T & 0xff zxt1 T = T I0, I1 1 cyc -# S[I] = SJ st8 [Iptr] = SJ M2-M3 c7 -# S[J] = SI st8 [Jptr] = SI M2-M3 -# ;; -# shladd Tptr = T, KeyTable, 3 M0-M3, I0, I1 1 cyc c8 -# ;; -# --------------------------------------------------------------------------------------- -# T = S[T] ld8 T = [Tptr] M0-M1 1 cyc c9 -# ;; -# data ^= T xor data = data, T M0-M3, I0, I1 1 cyc c10 -# ;; -# *out++ = Data ^ T dep word = word, data, 8, POS I0, I1 1 cyc c11 -# ;; -# --------------------------------------------------------------------------------------- - -# There are several points worth making here: - -# - Note that due to the bypass/forwarding-path, the first two -# phases of the loop are strangly mingled together. In -# particular, note that the first stage of the pipeline is -# using the value of "J", as calculated by the second stage. -# - Each bundle-pair will have exactly 6 instructions. -# - Pipelined, the loop can execute in 3 cycles/iteration and -# 4 stages. However, McKinley/Madison can issue "st1" to -# the same bank at a rate of at most one per 4 cycles. Thus, -# instead of storing each byte, we accumulate them in a word -# and then write them back at once with a single "st8" (this -# implies that the setup code needs to ensure that the output -# buffer is properly aligned, if need be, by encoding the -# first few bytes separately). -# - There is no space for a "br.ctop" instruction. For this -# reason we can't use module-loop support in IA-64 and have -# to do a traditional, purely software-pipelined loop. -# - We can't replace any of the remaining "add/zxt1" pairs with -# "padd1" because the latency for that instruction is too high -# and would push the loop to the point where more bypasses -# would be needed, which we don't have space for. -# - The above loop runs at around 3.26 cycles/byte, or roughly -# 440 MByte/sec on a 1.5GHz Madison. This is well below the -# system bus bandwidth and hence with judicious use of -# "lfetch" this loop can run at (almost) peak speed even when -# the input and output data reside in memory. The -# max. latency that can be tolerated is (PREFETCH_DISTANCE * -# L2_LINE_SIZE * 3 cyc), or about 384 cycles assuming (at -# least) 1-ahead prefetching of 128 byte cache-lines. Note -# that we do NOT prefetch into L1, since that would only -# interfere with the S[] table values stored there. This is -# acceptable because there is a 10 cycle latency between -# load and first use of the input data. -# - We use a branch to out-of-line bypass-code of cycle-pressure: -# we calculate the next J, check for the need to activate the -# bypass path, and activate the bypass path ALL IN THE SAME -# CYCLE. If we didn't have these constraints, we could do -# the bypass with a simple conditional move instruction. -# Fortunately, the bypass paths get activated relatively -# infrequently, so the extra branches don't cost all that much -# (about 0.04 cycles/byte, measured on a 16396 byte file with -# random input data). -# - -$phases = 4; # number of stages/phases in the pipelined-loop -$unroll_count = 6; # number of times we unrolled it -$pComI = (1 << 0); -$pComJ = (1 << 1); -$pComT = (1 << 2); -$pOut = (1 << 3); - -$NData = 4; -$NIP = 3; -$NJP = 2; -$NI = 2; -$NSI = 3; -$NSJ = 2; -$NT = 2; -$NOutWord = 2; - -# -# $threshold is the minimum length before we attempt to use the -# big software-pipelined loop. It MUST be greater-or-equal -# to: -# PHASES * (UNROLL_COUNT + 1) + 7 -# -# The "+ 7" comes from the fact we may have to encode up to -# 7 bytes separately before the output pointer is aligned. -# -$threshold = (3 * ($phases * ($unroll_count + 1)) + 7); - -sub I { - local *code = shift; - local $format = shift; - $code .= sprintf ("\t\t".$format."\n", @_); -} - -sub P { - local *code = shift; - local $format = shift; - $code .= sprintf ($format."\n", @_); -} - -sub STOP { - local *code = shift; - $code .=<<___; - ;; -___ -} - -sub emit_body { - local *c = shift; - local *bypass = shift; - local ($iteration, $p) = @_; - - local $i0 = $iteration; - local $i1 = $iteration - 1; - local $i2 = $iteration - 2; - local $i3 = $iteration - 3; - local $iw0 = ($iteration - 3) / 8; - local $iw1 = ($iteration > 3) ? ($iteration - 4) / 8 : 1; - local $byte_num = ($iteration - 3) % 8; - local $label = $iteration + 1; - local $pAny = ($p & 0xf) == 0xf; - local $pByp = (($p & $pComI) && ($iteration > 0)); - - $c.=<<___; -////////////////////////////////////////////////// -___ - - if (($p & 0xf) == 0) { - $c.="#ifdef HOST_IS_BIG_ENDIAN\n"; - &I(\$c,"shr.u OutWord[%u] = OutWord[%u], 32;;", - $iw1 % $NOutWord, $iw1 % $NOutWord); - $c.="#endif\n"; - &I(\$c, "st4 [OutPtr] = OutWord[%u], 4", $iw1 % $NOutWord); - return; - } - - # Cycle 0 - &I(\$c, "{ .mmi") if ($pAny); - &I(\$c, "ld1 Data[%u] = [InPtr], 1", $i0 % $NData) if ($p & $pComI); - &I(\$c, "padd1 I[%u] = One, I[%u]", $i0 % $NI, $i1 % $NI)if ($p & $pComI); - &I(\$c, "zxt1 J = J") if ($p & $pComJ); - &I(\$c, "}") if ($pAny); - &I(\$c, "{ .mmi") if ($pAny); - &I(\$c, "LKEY T[%u] = [T[%u]]", $i1 % $NT, $i1 % $NT) if ($p & $pOut); - &I(\$c, "add T[%u] = SI[%u], SJ[%u]", - $i0 % $NT, $i2 % $NSI, $i1 % $NSJ) if ($p & $pComT); - &I(\$c, "KEYADDR(IPr[%u], I[%u])", $i0 % $NIP, $i1 % $NI) if ($p & $pComI); - &I(\$c, "}") if ($pAny); - &STOP(\$c); - - # Cycle 1 - &I(\$c, "{ .mmi") if ($pAny); - &I(\$c, "SKEY [IPr[%u]] = SJ[%u]", $i2 % $NIP, $i1%$NSJ)if ($p & $pComT); - &I(\$c, "SKEY [JP[%u]] = SI[%u]", $i1 % $NJP, $i2%$NSI) if ($p & $pComT); - &I(\$c, "zxt1 T[%u] = T[%u]", $i0 % $NT, $i0 % $NT) if ($p & $pComT); - &I(\$c, "}") if ($pAny); - &I(\$c, "{ .mmi") if ($pAny); - &I(\$c, "LKEY SI[%u] = [IPr[%u]]", $i0 % $NSI, $i0%$NIP)if ($p & $pComI); - &I(\$c, "KEYADDR(JP[%u], J)", $i0 % $NJP) if ($p & $pComJ); - &I(\$c, "xor Data[%u] = Data[%u], T[%u]", - $i3 % $NData, $i3 % $NData, $i1 % $NT) if ($p & $pOut); - &I(\$c, "}") if ($pAny); - &STOP(\$c); - - # Cycle 2 - &I(\$c, "{ .mmi") if ($pAny); - &I(\$c, "LKEY SJ[%u] = [JP[%u]]", $i0 % $NSJ, $i0%$NJP) if ($p & $pComJ); - &I(\$c, "cmp.eq pBypass, p0 = I[%u], J", $i1 % $NI) if ($pByp); - &I(\$c, "dep OutWord[%u] = Data[%u], OutWord[%u], BYTE_POS(%u), 8", - $iw0%$NOutWord, $i3%$NData, $iw1%$NOutWord, $byte_num) if ($p & $pOut); - &I(\$c, "}") if ($pAny); - &I(\$c, "{ .mmb") if ($pAny); - &I(\$c, "add J = J, SI[%u]", $i0 % $NSI) if ($p & $pComI); - &I(\$c, "KEYADDR(T[%u], T[%u])", $i0 % $NT, $i0 % $NT) if ($p & $pComT); - &P(\$c, "(pBypass)\tbr.cond.spnt.many .rc4Bypass%u",$label)if ($pByp); - &I(\$c, "}") if ($pAny); - &STOP(\$c); - - &P(\$c, ".rc4Resume%u:", $label) if ($pByp); - if ($byte_num == 0 && $iteration >= $phases) { - &I(\$c, "st8 [OutPtr] = OutWord[%u], 8", - $iw1 % $NOutWord) if ($p & $pOut); - if ($iteration == (1 + $unroll_count) * $phases - 1) { - if ($unroll_count == 6) { - &I(\$c, "mov OutWord[%u] = OutWord[%u]", - $iw1 % $NOutWord, $iw0 % $NOutWord); - } - &I(\$c, "lfetch.nt1 [InPrefetch], %u", - $unroll_count * $phases); - &I(\$c, "lfetch.excl.nt1 [OutPrefetch], %u", - $unroll_count * $phases); - &I(\$c, "br.cloop.sptk.few .rc4Loop"); - } - } - - if ($pByp) { - &P(\$bypass, ".rc4Bypass%u:", $label); - &I(\$bypass, "sub J = J, SI[%u]", $i0 % $NSI); - &I(\$bypass, "nop 0"); - &I(\$bypass, "nop 0"); - &I(\$bypass, ";;"); - &I(\$bypass, "add J = J, SI[%u]", $i1 % $NSI); - &I(\$bypass, "mov SI[%u] = SI[%u]", $i0 % $NSI, $i1 % $NSI); - &I(\$bypass, "br.sptk.many .rc4Resume%u\n", $label); - &I(\$bypass, ";;"); - } -} - -$code=<<___; -.ident \"rc4-ia64.s, version 3.0\" -.ident \"Copyright (c) 2005 Hewlett-Packard Development Company, L.P.\" - -#define LCSave r8 -#define PRSave r9 - -/* Inputs become invalid once rotation begins! */ - -#define StateTable in0 -#define DataLen in1 -#define InputBuffer in2 -#define OutputBuffer in3 - -#define KTable r14 -#define J r15 -#define InPtr r16 -#define OutPtr r17 -#define InPrefetch r18 -#define OutPrefetch r19 -#define One r20 -#define LoopCount r21 -#define Remainder r22 -#define IFinal r23 -#define EndPtr r24 - -#define tmp0 r25 -#define tmp1 r26 - -#define pBypass p6 -#define pDone p7 -#define pSmall p8 -#define pAligned p9 -#define pUnaligned p10 - -#define pComputeI pPhase[0] -#define pComputeJ pPhase[1] -#define pComputeT pPhase[2] -#define pOutput pPhase[3] - -#define RetVal r8 -#define L_OK p7 -#define L_NOK p8 - -#define _NINPUTS 4 -#define _NOUTPUT 0 - -#define _NROTATE 24 -#define _NLOCALS (_NROTATE - _NINPUTS - _NOUTPUT) - -#ifndef SZ -# define SZ 4 // this must be set to sizeof(RC4_INT) -#endif - -#if SZ == 1 -# define LKEY ld1 -# define SKEY st1 -# define KEYADDR(dst, i) add dst = i, KTable -#elif SZ == 2 -# define LKEY ld2 -# define SKEY st2 -# define KEYADDR(dst, i) shladd dst = i, 1, KTable -#elif SZ == 4 -# define LKEY ld4 -# define SKEY st4 -# define KEYADDR(dst, i) shladd dst = i, 2, KTable -#else -# define LKEY ld8 -# define SKEY st8 -# define KEYADDR(dst, i) shladd dst = i, 3, KTable -#endif - -#if defined(_HPUX_SOURCE) && !defined(_LP64) -# define ADDP addp4 -#else -# define ADDP add -#endif - -/* Define a macro for the bit number of the n-th byte: */ - -#if defined(_HPUX_SOURCE) || defined(B_ENDIAN) -# define HOST_IS_BIG_ENDIAN -# define BYTE_POS(n) (56 - (8 * (n))) -#else -# define BYTE_POS(n) (8 * (n)) -#endif - -/* - We must perform the first phase of the pipeline explicitly since - we will always load from the stable the first time. The br.cexit - will never be taken since regardless of the number of bytes because - the epilogue count is 4. -*/ -/* MODSCHED_RC4 macro was split to _PROLOGUE and _LOOP, because HP-UX - assembler failed on original macro with syntax error. */ -#define MODSCHED_RC4_PROLOGUE \\ - { \\ - ld1 Data[0] = [InPtr], 1; \\ - add IFinal = 1, I[1]; \\ - KEYADDR(IPr[0], I[1]); \\ - } ;; \\ - { \\ - LKEY SI[0] = [IPr[0]]; \\ - mov pr.rot = 0x10000; \\ - mov ar.ec = 4; \\ - } ;; \\ - { \\ - add J = J, SI[0]; \\ - zxt1 I[0] = IFinal; \\ - br.cexit.spnt.few .+16; /* never taken */ \\ - } ;; -#define MODSCHED_RC4_LOOP(label) \\ -label: \\ - { .mmi; \\ - (pComputeI) ld1 Data[0] = [InPtr], 1; \\ - (pComputeI) add IFinal = 1, I[1]; \\ - (pComputeJ) zxt1 J = J; \\ - }{ .mmi; \\ - (pOutput) LKEY T[1] = [T[1]]; \\ - (pComputeT) add T[0] = SI[2], SJ[1]; \\ - (pComputeI) KEYADDR(IPr[0], I[1]); \\ - } ;; \\ - { .mmi; \\ - (pComputeT) SKEY [IPr[2]] = SJ[1]; \\ - (pComputeT) SKEY [JP[1]] = SI[2]; \\ - (pComputeT) zxt1 T[0] = T[0]; \\ - }{ .mmi; \\ - (pComputeI) LKEY SI[0] = [IPr[0]]; \\ - (pComputeJ) KEYADDR(JP[0], J); \\ - (pComputeI) cmp.eq.unc pBypass, p0 = I[1], J; \\ - } ;; \\ - { .mmi; \\ - (pComputeJ) LKEY SJ[0] = [JP[0]]; \\ - (pOutput) xor Data[3] = Data[3], T[1]; \\ - nop 0x0; \\ - }{ .mmi; \\ - (pComputeT) KEYADDR(T[0], T[0]); \\ - (pBypass) mov SI[0] = SI[1]; \\ - (pComputeI) zxt1 I[0] = IFinal; \\ - } ;; \\ - { .mmb; \\ - (pOutput) st1 [OutPtr] = Data[3], 1; \\ - (pComputeI) add J = J, SI[0]; \\ - br.ctop.sptk.few label; \\ - } ;; - - .text - - .align 32 - - .type RC4, \@function - .global RC4 - - .proc RC4 - .prologue - -RC4: - { - .mmi - alloc r2 = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE - - .rotr Data[4], I[2], IPr[3], SI[3], JP[2], SJ[2], T[2], \\ - OutWord[2] - .rotp pPhase[4] - - ADDP InPrefetch = 0, InputBuffer - ADDP KTable = 0, StateTable - } - { - .mmi - ADDP InPtr = 0, InputBuffer - ADDP OutPtr = 0, OutputBuffer - mov RetVal = r0 - } - ;; - { - .mmi - lfetch.nt1 [InPrefetch], 0x80 - ADDP OutPrefetch = 0, OutputBuffer - } - { // Return 0 if the input length is nonsensical - .mib - ADDP StateTable = 0, StateTable - cmp.ge.unc L_NOK, L_OK = r0, DataLen - (L_NOK) br.ret.sptk.few rp - } - ;; - { - .mib - cmp.eq.or L_NOK, L_OK = r0, InPtr - cmp.eq.or L_NOK, L_OK = r0, OutPtr - nop 0x0 - } - { - .mib - cmp.eq.or L_NOK, L_OK = r0, StateTable - nop 0x0 - (L_NOK) br.ret.sptk.few rp - } - ;; - LKEY I[1] = [KTable], SZ -/* Prefetch the state-table. It contains 256 elements of size SZ */ - -#if SZ == 1 - ADDP tmp0 = 1*128, StateTable -#elif SZ == 2 - ADDP tmp0 = 3*128, StateTable - ADDP tmp1 = 2*128, StateTable -#elif SZ == 4 - ADDP tmp0 = 7*128, StateTable - ADDP tmp1 = 6*128, StateTable -#elif SZ == 8 - ADDP tmp0 = 15*128, StateTable - ADDP tmp1 = 14*128, StateTable -#endif - ;; -#if SZ >= 8 - lfetch.fault.nt1 [tmp0], -256 // 15 - lfetch.fault.nt1 [tmp1], -256;; - lfetch.fault.nt1 [tmp0], -256 // 13 - lfetch.fault.nt1 [tmp1], -256;; - lfetch.fault.nt1 [tmp0], -256 // 11 - lfetch.fault.nt1 [tmp1], -256;; - lfetch.fault.nt1 [tmp0], -256 // 9 - lfetch.fault.nt1 [tmp1], -256;; -#endif -#if SZ >= 4 - lfetch.fault.nt1 [tmp0], -256 // 7 - lfetch.fault.nt1 [tmp1], -256;; - lfetch.fault.nt1 [tmp0], -256 // 5 - lfetch.fault.nt1 [tmp1], -256;; -#endif -#if SZ >= 2 - lfetch.fault.nt1 [tmp0], -256 // 3 - lfetch.fault.nt1 [tmp1], -256;; -#endif - { - .mii - lfetch.fault.nt1 [tmp0] // 1 - add I[1]=1,I[1];; - zxt1 I[1]=I[1] - } - { - .mmi - lfetch.nt1 [InPrefetch], 0x80 - lfetch.excl.nt1 [OutPrefetch], 0x80 - .save pr, PRSave - mov PRSave = pr - } ;; - { - .mmi - lfetch.excl.nt1 [OutPrefetch], 0x80 - LKEY J = [KTable], SZ - ADDP EndPtr = DataLen, InPtr - } ;; - { - .mmi - ADDP EndPtr = -1, EndPtr // Make it point to - // last data byte. - mov One = 1 - .save ar.lc, LCSave - mov LCSave = ar.lc - .body - } ;; - { - .mmb - sub Remainder = 0, OutPtr - cmp.gtu pSmall, p0 = $threshold, DataLen -(pSmall) br.cond.dpnt .rc4Remainder // Data too small for - // big loop. - } ;; - { - .mmi - and Remainder = 0x7, Remainder - ;; - cmp.eq pAligned, pUnaligned = Remainder, r0 - nop 0x0 - } ;; - { - .mmb -.pred.rel "mutex",pUnaligned,pAligned -(pUnaligned) add Remainder = -1, Remainder -(pAligned) sub Remainder = EndPtr, InPtr -(pAligned) br.cond.dptk.many .rc4Aligned - } ;; - { - .mmi - nop 0x0 - nop 0x0 - mov.i ar.lc = Remainder - } - -/* Do the initial few bytes via the compact, modulo-scheduled loop - until the output pointer is 8-byte-aligned. */ - - MODSCHED_RC4_PROLOGUE - MODSCHED_RC4_LOOP(.RC4AlignLoop) - - { - .mib - sub Remainder = EndPtr, InPtr - zxt1 IFinal = IFinal - clrrrb // Clear CFM.rrb.pr so - ;; // next "mov pr.rot = N" - // does the right thing. - } - { - .mmi - mov I[1] = IFinal - nop 0x0 - nop 0x0 - } ;; - - -.rc4Aligned: - -/* - Unrolled loop count = (Remainder - ($unroll_count+1)*$phases)/($unroll_count*$phases) - */ - - { - .mlx - add LoopCount = 1 - ($unroll_count + 1)*$phases, Remainder - movl Remainder = 0xaaaaaaaaaaaaaaab - } ;; - { - .mmi - setf.sig f6 = LoopCount // M2, M3 6 cyc - setf.sig f7 = Remainder // M2, M3 6 cyc - nop 0x0 - } ;; - { - .mfb - nop 0x0 - xmpy.hu f6 = f6, f7 - nop 0x0 - } ;; - { - .mmi - getf.sig LoopCount = f6;; // M2 5 cyc - nop 0x0 - shr.u LoopCount = LoopCount, 4 - } ;; - { - .mmi - nop 0x0 - nop 0x0 - mov.i ar.lc = LoopCount - } ;; - -/* Now comes the unrolled loop: */ - -.rc4Prologue: -___ - -$iteration = 0; - -# Generate the prologue: -$predicates = 1; -for ($i = 0; $i < $phases; ++$i) { - &emit_body (\$code, \$bypass, $iteration++, $predicates); - $predicates = ($predicates << 1) | 1; -} - -$code.=<<___; -.rc4Loop: -___ - -# Generate the body: -for ($i = 0; $i < $unroll_count*$phases; ++$i) { - &emit_body (\$code, \$bypass, $iteration++, $predicates); -} - -$code.=<<___; -.rc4Epilogue: -___ - -# Generate the epilogue: -for ($i = 0; $i < $phases; ++$i) { - $predicates <<= 1; - &emit_body (\$code, \$bypass, $iteration++, $predicates); -} - -$code.=<<___; - { - .mmi - lfetch.nt1 [EndPtr] // fetch line with last byte - mov IFinal = I[1] - nop 0x0 - } - -.rc4Remainder: - { - .mmi - sub Remainder = EndPtr, InPtr // Calculate - // # of bytes - // left - 1 - nop 0x0 - nop 0x0 - } ;; - { - .mib - cmp.eq pDone, p0 = -1, Remainder // done already? - mov.i ar.lc = Remainder -(pDone) br.cond.dptk.few .rc4Complete - } - -/* Do the remaining bytes via the compact, modulo-scheduled loop */ - - MODSCHED_RC4_PROLOGUE - MODSCHED_RC4_LOOP(.RC4RestLoop) - -.rc4Complete: - { - .mmi - add KTable = -SZ, KTable - add IFinal = -1, IFinal - mov ar.lc = LCSave - } ;; - { - .mii - SKEY [KTable] = J,-SZ - zxt1 IFinal = IFinal - mov pr = PRSave, 0x1FFFF - } ;; - { - .mib - SKEY [KTable] = IFinal - add RetVal = 1, r0 - br.ret.sptk.few rp - } ;; -___ - -# Last but not least, emit the code for the bypass-code of the unrolled loop: - -$code.=$bypass; - -$code.=<<___; - .endp RC4 -___ - -print $code; diff --git a/drivers/builtin_openssl2/crypto/rc4/asm/rc4-md5-x86_64.pl b/drivers/builtin_openssl2/crypto/rc4/asm/rc4-md5-x86_64.pl deleted file mode 100644 index 272fa91e1a..0000000000 --- a/drivers/builtin_openssl2/crypto/rc4/asm/rc4-md5-x86_64.pl +++ /dev/null @@ -1,632 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# June 2011 -# -# This is RC4+MD5 "stitch" implementation. The idea, as spelled in -# http://download.intel.com/design/intarch/papers/323686.pdf, is that -# since both algorithms exhibit instruction-level parallelism, ILP, -# below theoretical maximum, interleaving them would allow to utilize -# processor resources better and achieve better performance. RC4 -# instruction sequence is virtually identical to rc4-x86_64.pl, which -# is heavily based on submission by Maxim Perminov, Maxim Locktyukhin -# and Jim Guilford of Intel. MD5 is fresh implementation aiming to -# minimize register usage, which was used as "main thread" with RC4 -# weaved into it, one RC4 round per one MD5 round. In addition to the -# stiched subroutine the script can generate standalone replacement -# md5_block_asm_data_order and RC4. Below are performance numbers in -# cycles per processed byte, less is better, for these the standalone -# subroutines, sum of them, and stitched one: -# -# RC4 MD5 RC4+MD5 stitch gain -# Opteron 6.5(*) 5.4 11.9 7.0 +70%(*) -# Core2 6.5 5.8 12.3 7.7 +60% -# Westmere 4.3 5.2 9.5 7.0 +36% -# Sandy Bridge 4.2 5.5 9.7 6.8 +43% -# Atom 9.3 6.5 15.8 11.1 +42% -# -# (*) rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement -# is +53%... - -my ($rc4,$md5)=(1,1); # what to generate? -my $D="#" if (!$md5); # if set to "#", MD5 is stitched into RC4(), - # but its result is discarded. Idea here is - # to be able to use 'openssl speed rc4' for - # benchmarking the stitched subroutine... - -my $flavour = shift; -my $output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -my $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -my ($dat,$in0,$out,$ctx,$inp,$len, $func,$nargs); - -if ($rc4 && !$md5) { - ($dat,$len,$in0,$out) = ("%rdi","%rsi","%rdx","%rcx"); - $func="RC4"; $nargs=4; -} elsif ($md5 && !$rc4) { - ($ctx,$inp,$len) = ("%rdi","%rsi","%rdx"); - $func="md5_block_asm_data_order"; $nargs=3; -} else { - ($dat,$in0,$out,$ctx,$inp,$len) = ("%rdi","%rsi","%rdx","%rcx","%r8","%r9"); - $func="rc4_md5_enc"; $nargs=6; - # void rc4_md5_enc( - # RC4_KEY *key, # - # const void *in0, # RC4 input - # void *out, # RC4 output - # MD5_CTX *ctx, # - # const void *inp, # MD5 input - # size_t len); # number of 64-byte blocks -} - -my @K=( 0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee, - 0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501, - 0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be, - 0x6b901122,0xfd987193,0xa679438e,0x49b40821, - - 0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa, - 0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8, - 0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed, - 0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a, - - 0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c, - 0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70, - 0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05, - 0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665, - - 0xf4292244,0x432aff97,0xab9423a7,0xfc93a039, - 0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1, - 0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1, - 0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391 ); - -my @V=("%r8d","%r9d","%r10d","%r11d"); # MD5 registers -my $tmp="%r12d"; - -my @XX=("%rbp","%rsi"); # RC4 registers -my @TX=("%rax","%rbx"); -my $YY="%rcx"; -my $TY="%rdx"; - -my $MOD=32; # 16, 32 or 64 - -$code.=<<___; -.text -.align 16 - -.globl $func -.type $func,\@function,$nargs -$func: - cmp \$0,$len - je .Labort - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - sub \$40,%rsp -.Lbody: -___ -if ($rc4) { -$code.=<<___; -$D#md5# mov $ctx,%r11 # reassign arguments - mov $len,%r12 - mov $in0,%r13 - mov $out,%r14 -$D#md5# mov $inp,%r15 -___ - $ctx="%r11" if ($md5); # reassign arguments - $len="%r12"; - $in0="%r13"; - $out="%r14"; - $inp="%r15" if ($md5); - $inp=$in0 if (!$md5); -$code.=<<___; - xor $XX[0],$XX[0] - xor $YY,$YY - - lea 8($dat),$dat - mov -8($dat),$XX[0]#b - mov -4($dat),$YY#b - - inc $XX[0]#b - sub $in0,$out - movl ($dat,$XX[0],4),$TX[0]#d -___ -$code.=<<___ if (!$md5); - xor $TX[1],$TX[1] - test \$-128,$len - jz .Loop1 - sub $XX[0],$TX[1] - and \$`$MOD-1`,$TX[1] - jz .Loop${MOD}_is_hot - sub $TX[1],$len -.Loop${MOD}_warmup: - add $TX[0]#b,$YY#b - movl ($dat,$YY,4),$TY#d - movl $TX[0]#d,($dat,$YY,4) - movl $TY#d,($dat,$XX[0],4) - add $TY#b,$TX[0]#b - inc $XX[0]#b - movl ($dat,$TX[0],4),$TY#d - movl ($dat,$XX[0],4),$TX[0]#d - xorb ($in0),$TY#b - movb $TY#b,($out,$in0) - lea 1($in0),$in0 - dec $TX[1] - jnz .Loop${MOD}_warmup - - mov $YY,$TX[1] - xor $YY,$YY - mov $TX[1]#b,$YY#b - -.Loop${MOD}_is_hot: - mov $len,32(%rsp) # save original $len - shr \$6,$len # number of 64-byte blocks -___ - if ($D && !$md5) { # stitch in dummy MD5 - $md5=1; - $ctx="%r11"; - $inp="%r15"; - $code.=<<___; - mov %rsp,$ctx - mov $in0,$inp -___ - } -} -$code.=<<___; -#rc4# add $TX[0]#b,$YY#b -#rc4# lea ($dat,$XX[0],4),$XX[1] - shl \$6,$len - add $inp,$len # pointer to the end of input - mov $len,16(%rsp) - -#md5# mov $ctx,24(%rsp) # save pointer to MD5_CTX -#md5# mov 0*4($ctx),$V[0] # load current hash value from MD5_CTX -#md5# mov 1*4($ctx),$V[1] -#md5# mov 2*4($ctx),$V[2] -#md5# mov 3*4($ctx),$V[3] - jmp .Loop - -.align 16 -.Loop: -#md5# mov $V[0],0*4(%rsp) # put aside current hash value -#md5# mov $V[1],1*4(%rsp) -#md5# mov $V[2],2*4(%rsp) -#md5# mov $V[3],$tmp # forward reference -#md5# mov $V[3],3*4(%rsp) -___ - -sub R0 { - my ($i,$a,$b,$c,$d)=@_; - my @rot0=(7,12,17,22); - my $j=$i%16; - my $k=$i%$MOD; - my $xmm="%xmm".($j&1); - $code.=" movdqu ($in0),%xmm2\n" if ($rc4 && $j==15); - $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); - $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); - $code.=<<___; -#rc4# movl ($dat,$YY,4),$TY#d -#md5# xor $c,$tmp -#rc4# movl $TX[0]#d,($dat,$YY,4) -#md5# and $b,$tmp -#md5# add 4*`$j`($inp),$a -#rc4# add $TY#b,$TX[0]#b -#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d -#md5# add \$$K[$i],$a -#md5# xor $d,$tmp -#rc4# movz $TX[0]#b,$TX[0]#d -#rc4# movl $TY#d,4*$k($XX[1]) -#md5# add $tmp,$a -#rc4# add $TX[1]#b,$YY#b -#md5# rol \$$rot0[$j%4],$a -#md5# mov `$j==15?"$b":"$c"`,$tmp # forward reference -#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n -#md5# add $b,$a -___ - $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); - mov $YY,$XX[1] - xor $YY,$YY # keyword to partial register - mov $XX[1]#b,$YY#b - lea ($dat,$XX[0],4),$XX[1] -___ - $code.=<<___ if ($rc4 && $j==15); - psllq \$8,%xmm1 - pxor %xmm0,%xmm2 - pxor %xmm1,%xmm2 -___ -} -sub R1 { - my ($i,$a,$b,$c,$d)=@_; - my @rot1=(5,9,14,20); - my $j=$i%16; - my $k=$i%$MOD; - my $xmm="%xmm".($j&1); - $code.=" movdqu 16($in0),%xmm3\n" if ($rc4 && $j==15); - $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); - $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); - $code.=<<___; -#rc4# movl ($dat,$YY,4),$TY#d -#md5# xor $b,$tmp -#rc4# movl $TX[0]#d,($dat,$YY,4) -#md5# and $d,$tmp -#md5# add 4*`((1+5*$j)%16)`($inp),$a -#rc4# add $TY#b,$TX[0]#b -#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d -#md5# add \$$K[$i],$a -#md5# xor $c,$tmp -#rc4# movz $TX[0]#b,$TX[0]#d -#rc4# movl $TY#d,4*$k($XX[1]) -#md5# add $tmp,$a -#rc4# add $TX[1]#b,$YY#b -#md5# rol \$$rot1[$j%4],$a -#md5# mov `$j==15?"$c":"$b"`,$tmp # forward reference -#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n -#md5# add $b,$a -___ - $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); - mov $YY,$XX[1] - xor $YY,$YY # keyword to partial register - mov $XX[1]#b,$YY#b - lea ($dat,$XX[0],4),$XX[1] -___ - $code.=<<___ if ($rc4 && $j==15); - psllq \$8,%xmm1 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 -___ -} -sub R2 { - my ($i,$a,$b,$c,$d)=@_; - my @rot2=(4,11,16,23); - my $j=$i%16; - my $k=$i%$MOD; - my $xmm="%xmm".($j&1); - $code.=" movdqu 32($in0),%xmm4\n" if ($rc4 && $j==15); - $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); - $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); - $code.=<<___; -#rc4# movl ($dat,$YY,4),$TY#d -#md5# xor $c,$tmp -#rc4# movl $TX[0]#d,($dat,$YY,4) -#md5# xor $b,$tmp -#md5# add 4*`((5+3*$j)%16)`($inp),$a -#rc4# add $TY#b,$TX[0]#b -#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d -#md5# add \$$K[$i],$a -#rc4# movz $TX[0]#b,$TX[0]#d -#md5# add $tmp,$a -#rc4# movl $TY#d,4*$k($XX[1]) -#rc4# add $TX[1]#b,$YY#b -#md5# rol \$$rot2[$j%4],$a -#md5# mov `$j==15?"\\\$-1":"$c"`,$tmp # forward reference -#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n -#md5# add $b,$a -___ - $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); - mov $YY,$XX[1] - xor $YY,$YY # keyword to partial register - mov $XX[1]#b,$YY#b - lea ($dat,$XX[0],4),$XX[1] -___ - $code.=<<___ if ($rc4 && $j==15); - psllq \$8,%xmm1 - pxor %xmm0,%xmm4 - pxor %xmm1,%xmm4 -___ -} -sub R3 { - my ($i,$a,$b,$c,$d)=@_; - my @rot3=(6,10,15,21); - my $j=$i%16; - my $k=$i%$MOD; - my $xmm="%xmm".($j&1); - $code.=" movdqu 48($in0),%xmm5\n" if ($rc4 && $j==15); - $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); - $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); - $code.=<<___; -#rc4# movl ($dat,$YY,4),$TY#d -#md5# xor $d,$tmp -#rc4# movl $TX[0]#d,($dat,$YY,4) -#md5# or $b,$tmp -#md5# add 4*`((7*$j)%16)`($inp),$a -#rc4# add $TY#b,$TX[0]#b -#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d -#md5# add \$$K[$i],$a -#rc4# movz $TX[0]#b,$TX[0]#d -#md5# xor $c,$tmp -#rc4# movl $TY#d,4*$k($XX[1]) -#md5# add $tmp,$a -#rc4# add $TX[1]#b,$YY#b -#md5# rol \$$rot3[$j%4],$a -#md5# mov \$-1,$tmp # forward reference -#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n -#md5# add $b,$a -___ - $code.=<<___ if ($rc4 && $j==15); - mov $XX[0],$XX[1] - xor $XX[0],$XX[0] # keyword to partial register - mov $XX[1]#b,$XX[0]#b - mov $YY,$XX[1] - xor $YY,$YY # keyword to partial register - mov $XX[1]#b,$YY#b - lea ($dat,$XX[0],4),$XX[1] - psllq \$8,%xmm1 - pxor %xmm0,%xmm5 - pxor %xmm1,%xmm5 -___ -} - -my $i=0; -for(;$i<16;$i++) { R0($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } -for(;$i<32;$i++) { R1($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } -for(;$i<48;$i++) { R2($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } -for(;$i<64;$i++) { R3($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } - -$code.=<<___; -#md5# add 0*4(%rsp),$V[0] # accumulate hash value -#md5# add 1*4(%rsp),$V[1] -#md5# add 2*4(%rsp),$V[2] -#md5# add 3*4(%rsp),$V[3] - -#rc4# movdqu %xmm2,($out,$in0) # write RC4 output -#rc4# movdqu %xmm3,16($out,$in0) -#rc4# movdqu %xmm4,32($out,$in0) -#rc4# movdqu %xmm5,48($out,$in0) -#md5# lea 64($inp),$inp -#rc4# lea 64($in0),$in0 - cmp 16(%rsp),$inp # are we done? - jb .Loop - -#md5# mov 24(%rsp),$len # restore pointer to MD5_CTX -#rc4# sub $TX[0]#b,$YY#b # correct $YY -#md5# mov $V[0],0*4($len) # write MD5_CTX -#md5# mov $V[1],1*4($len) -#md5# mov $V[2],2*4($len) -#md5# mov $V[3],3*4($len) -___ -$code.=<<___ if ($rc4 && (!$md5 || $D)); - mov 32(%rsp),$len # restore original $len - and \$63,$len # remaining bytes - jnz .Loop1 - jmp .Ldone - -.align 16 -.Loop1: - add $TX[0]#b,$YY#b - movl ($dat,$YY,4),$TY#d - movl $TX[0]#d,($dat,$YY,4) - movl $TY#d,($dat,$XX[0],4) - add $TY#b,$TX[0]#b - inc $XX[0]#b - movl ($dat,$TX[0],4),$TY#d - movl ($dat,$XX[0],4),$TX[0]#d - xorb ($in0),$TY#b - movb $TY#b,($out,$in0) - lea 1($in0),$in0 - dec $len - jnz .Loop1 - -.Ldone: -___ -$code.=<<___; -#rc4# sub \$1,$XX[0]#b -#rc4# movl $XX[0]#d,-8($dat) -#rc4# movl $YY#d,-4($dat) - - mov 40(%rsp),%r15 - mov 48(%rsp),%r14 - mov 56(%rsp),%r13 - mov 64(%rsp),%r12 - mov 72(%rsp),%rbp - mov 80(%rsp),%rbx - lea 88(%rsp),%rsp -.Lepilogue: -.Labort: - ret -.size $func,.-$func -___ - -if ($rc4 && $D) { # sole purpose of this section is to provide - # option to use the generated module as drop-in - # replacement for rc4-x86_64.pl for debugging - # and testing purposes... -my ($idx,$ido)=("%r8","%r9"); -my ($dat,$len,$inp)=("%rdi","%rsi","%rdx"); - -$code.=<<___; -.globl RC4_set_key -.type RC4_set_key,\@function,3 -.align 16 -RC4_set_key: - lea 8($dat),$dat - lea ($inp,$len),$inp - neg $len - mov $len,%rcx - xor %eax,%eax - xor $ido,$ido - xor %r10,%r10 - xor %r11,%r11 - jmp .Lw1stloop - -.align 16 -.Lw1stloop: - mov %eax,($dat,%rax,4) - add \$1,%al - jnc .Lw1stloop - - xor $ido,$ido - xor $idx,$idx -.align 16 -.Lw2ndloop: - mov ($dat,$ido,4),%r10d - add ($inp,$len,1),$idx#b - add %r10b,$idx#b - add \$1,$len - mov ($dat,$idx,4),%r11d - cmovz %rcx,$len - mov %r10d,($dat,$idx,4) - mov %r11d,($dat,$ido,4) - add \$1,$ido#b - jnc .Lw2ndloop - - xor %eax,%eax - mov %eax,-8($dat) - mov %eax,-4($dat) - ret -.size RC4_set_key,.-RC4_set_key - -.globl RC4_options -.type RC4_options,\@abi-omnipotent -.align 16 -RC4_options: - lea .Lopts(%rip),%rax - ret -.align 64 -.Lopts: -.asciz "rc4(64x,int)" -.align 64 -.size RC4_options,.-RC4_options -___ -} -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -my $rec="%rcx"; -my $frame="%rdx"; -my $context="%r8"; -my $disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type se_handler,\@abi-omnipotent -.align 16 -se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lbody(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lbody - jb .Lin_prologue - - mov 152($context),%rax # pull context->Rsp - - lea .Lepilogue(%rip),%r10 - cmp %r10,%rbx # context->Rip>=.Lepilogue - jae .Lin_prologue - - mov 40(%rax),%r15 - mov 48(%rax),%r14 - mov 56(%rax),%r13 - mov 64(%rax),%r12 - mov 72(%rax),%rbp - mov 80(%rax),%rbx - lea 88(%rax),%rax - - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R12 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size se_handler,.-se_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_$func - .rva .LSEH_end_$func - .rva .LSEH_info_$func - -.section .xdata -.align 8 -.LSEH_info_$func: - .byte 9,0,0,0 - .rva se_handler -___ -} - -sub reg_part { -my ($reg,$conv)=@_; - if ($reg =~ /%r[0-9]+/) { $reg .= $conv; } - elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; } - elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; } - elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; } - return $reg; -} - -$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem; -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/pinsrw\s+\$0,/movd /gm; - -$code =~ s/#md5#//gm if ($md5); -$code =~ s/#rc4#//gm if ($rc4); - -print $code; - -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/rc4/asm/rc4-parisc.pl b/drivers/builtin_openssl2/crypto/rc4/asm/rc4-parisc.pl deleted file mode 100644 index ad7e65651c..0000000000 --- a/drivers/builtin_openssl2/crypto/rc4/asm/rc4-parisc.pl +++ /dev/null @@ -1,314 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# RC4 for PA-RISC. - -# June 2009. -# -# Performance is 33% better than gcc 3.2 generated code on PA-7100LC. -# For reference, [4x] unrolled loop is >40% faster than folded one. -# It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement -# is believed to be not sufficient to justify the effort... -# -# Special thanks to polarhome.com for providing HP-UX account. - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; - -$flavour = shift; -$output = shift; -open STDOUT,">$output"; - -if ($flavour =~ /64/) { - $LEVEL ="2.0W"; - $SIZE_T =8; - $FRAME_MARKER =80; - $SAVED_RP =16; - $PUSH ="std"; - $PUSHMA ="std,ma"; - $POP ="ldd"; - $POPMB ="ldd,mb"; -} else { - $LEVEL ="1.0"; - $SIZE_T =4; - $FRAME_MARKER =48; - $SAVED_RP =20; - $PUSH ="stw"; - $PUSHMA ="stwm"; - $POP ="ldw"; - $POPMB ="ldwm"; -} - -$FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker - # [+ argument transfer] -$SZ=1; # defaults to RC4_CHAR -if (open CONF,"<${dir}../../opensslconf.h") { - while() { - if (m/#\s*define\s+RC4_INT\s+(.*)/) { - $SZ = ($1=~/char$/) ? 1 : 4; - last; - } - } - close CONF; -} - -if ($SZ==1) { # RC4_CHAR - $LD="ldb"; - $LDX="ldbx"; - $MKX="addl"; - $ST="stb"; -} else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC) - $LD="ldw"; - $LDX="ldwx,s"; - $MKX="sh2addl"; - $ST="stw"; -} - -$key="%r26"; -$len="%r25"; -$inp="%r24"; -$out="%r23"; - -@XX=("%r19","%r20"); -@TX=("%r21","%r22"); -$YY="%r28"; -$TY="%r29"; - -$acc="%r1"; -$ix="%r2"; -$iy="%r3"; -$dat0="%r4"; -$dat1="%r5"; -$rem="%r6"; -$mask="%r31"; - -sub unrolledloopbody { -for ($i=0;$i<4;$i++) { -$code.=<<___; - ldo 1($XX[0]),$XX[1] - `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)` - and $mask,$XX[1],$XX[1] - $LDX $YY($key),$TY - $MKX $YY,$key,$ix - $LDX $XX[1]($key),$TX[1] - $MKX $XX[0],$key,$iy - $ST $TX[0],0($ix) - comclr,<> $XX[1],$YY,%r0 ; conditional - copy $TX[0],$TX[1] ; move - `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)` - $ST $TY,0($iy) - addl $TX[0],$TY,$TY - addl $TX[1],$YY,$YY - and $mask,$TY,$TY - and $mask,$YY,$YY -___ -push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers -} } - -sub foldedloop { -my ($label,$count)=@_; -$code.=<<___; -$label - $MKX $YY,$key,$iy - $LDX $YY($key),$TY - $MKX $XX[0],$key,$ix - $ST $TX[0],0($iy) - ldo 1($XX[0]),$XX[0] - $ST $TY,0($ix) - addl $TX[0],$TY,$TY - ldbx $inp($out),$dat1 - and $mask,$TY,$TY - and $mask,$XX[0],$XX[0] - $LDX $TY($key),$acc - $LDX $XX[0]($key),$TX[0] - ldo 1($out),$out - xor $dat1,$acc,$acc - addl $TX[0],$YY,$YY - stb $acc,-1($out) - addib,<> -1,$count,$label ; $count is always small - and $mask,$YY,$YY -___ -} - -$code=<<___; - .LEVEL $LEVEL - .SPACE \$TEXT\$ - .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY - - .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR -RC4 - .PROC - .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6 - .ENTRY - $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue - $PUSHMA %r3,$FRAME(%sp) - $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) - $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) - $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) - - cmpib,*= 0,$len,L\$abort - sub $inp,$out,$inp ; distance between $inp and $out - - $LD `0*$SZ`($key),$XX[0] - $LD `1*$SZ`($key),$YY - ldo `2*$SZ`($key),$key - - ldi 0xff,$mask - ldi 3,$dat0 - - ldo 1($XX[0]),$XX[0] ; warm up loop - and $mask,$XX[0],$XX[0] - $LDX $XX[0]($key),$TX[0] - addl $TX[0],$YY,$YY - cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother? - and $mask,$YY,$YY - - and,<> $out,$dat0,$rem ; is $out aligned? - b L\$alignedout - subi 4,$rem,$rem - sub $len,$rem,$len -___ -&foldedloop("L\$alignout",$rem); # process till $out is aligned - -$code.=<<___; -L\$alignedout ; $len is at least 4 here - and,<> $inp,$dat0,$acc ; is $inp aligned? - b L\$oop4 - sub $inp,$acc,$rem ; align $inp - - sh3addl $acc,%r0,$acc - subi 32,$acc,$acc - mtctl $acc,%cr11 ; load %sar with vshd align factor - ldwx $rem($out),$dat0 - ldo 4($rem),$rem -L\$oop4misalignedinp -___ -&unrolledloopbody(); -$code.=<<___; - $LDX $TY($key),$ix - ldwx $rem($out),$dat1 - ldo -4($len),$len - or $ix,$acc,$acc ; last piece, no need to dep - vshd $dat0,$dat1,$iy ; align data - copy $dat1,$dat0 - xor $iy,$acc,$acc - stw $acc,0($out) - cmpib,*<< 3,$len,L\$oop4misalignedinp - ldo 4($out),$out - cmpib,*= 0,$len,L\$done - nop - b L\$oop1 - nop - - .ALIGN 8 -L\$oop4 -___ -&unrolledloopbody(); -$code.=<<___; - $LDX $TY($key),$ix - ldwx $inp($out),$dat0 - ldo -4($len),$len - or $ix,$acc,$acc ; last piece, no need to dep - xor $dat0,$acc,$acc - stw $acc,0($out) - cmpib,*<< 3,$len,L\$oop4 - ldo 4($out),$out - cmpib,*= 0,$len,L\$done - nop -___ -&foldedloop("L\$oop1",$len); -$code.=<<___; -L\$done - $POP `-$FRAME-$SAVED_RP`(%sp),%r2 - ldo -1($XX[0]),$XX[0] ; chill out loop - sub $YY,$TX[0],$YY - and $mask,$XX[0],$XX[0] - and $mask,$YY,$YY - $ST $XX[0],`-2*$SZ`($key) - $ST $YY,`-1*$SZ`($key) - $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 - $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 - $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 -L\$abort - bv (%r2) - .EXIT - $POPMB -$FRAME(%sp),%r3 - .PROCEND -___ - -$code.=<<___; - - .EXPORT private_RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR - .ALIGN 8 -private_RC4_set_key - .PROC - .CALLINFO NO_CALLS - .ENTRY - $ST %r0,`0*$SZ`($key) - $ST %r0,`1*$SZ`($key) - ldo `2*$SZ`($key),$key - copy %r0,@XX[0] -L\$1st - $ST @XX[0],0($key) - ldo 1(@XX[0]),@XX[0] - bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256 - ldo $SZ($key),$key - - ldo `-256*$SZ`($key),$key ; rewind $key - addl $len,$inp,$inp ; $inp to point at the end - sub %r0,$len,%r23 ; inverse index - copy %r0,@XX[0] - copy %r0,@XX[1] - ldi 0xff,$mask - -L\$2nd - $LDX @XX[0]($key),@TX[0] - ldbx %r23($inp),@TX[1] - addi,nuv 1,%r23,%r23 ; increment and conditional - sub %r0,$len,%r23 ; inverse index - addl @TX[0],@XX[1],@XX[1] - addl @TX[1],@XX[1],@XX[1] - and $mask,@XX[1],@XX[1] - $MKX @XX[0],$key,$TY - $LDX @XX[1]($key),@TX[1] - $MKX @XX[1],$key,$YY - ldo 1(@XX[0]),@XX[0] - $ST @TX[0],0($YY) - bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256 - $ST @TX[1],0($TY) - - bv,n (%r2) - .EXIT - nop - .PROCEND - - .EXPORT RC4_options,ENTRY - .ALIGN 8 -RC4_options - .PROC - .CALLINFO NO_CALLS - .ENTRY - blr %r0,%r28 - ldi 3,%r1 -L\$pic - andcm %r28,%r1,%r28 - bv (%r2) - .EXIT - ldo L\$opts-L\$pic(%r28),%r28 - .PROCEND - .ALIGN 8 -L\$opts - .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)" - .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by " -___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4); -$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8); - -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/rc4/asm/rc4-s390x.pl b/drivers/builtin_openssl2/crypto/rc4/asm/rc4-s390x.pl deleted file mode 100644 index 7528ece13c..0000000000 --- a/drivers/builtin_openssl2/crypto/rc4/asm/rc4-s390x.pl +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# February 2009 -# -# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to -# "cluster" Address Generation Interlocks, so that one pipeline stall -# resolves several dependencies. - -# November 2010. -# -# Adapt for -m31 build. If kernel supports what's called "highgprs" -# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit -# instructions and achieve "64-bit" performance even in 31-bit legacy -# application context. The feature is not specific to any particular -# processor, as long as it's "z-CPU". Latter implies that the code -# remains z/Architecture specific. On z990 it was measured to perform -# 50% better than code generated by gcc 4.3. - -$flavour = shift; - -if ($flavour =~ /3[12]/) { - $SIZE_T=4; - $g=""; -} else { - $SIZE_T=8; - $g="g"; -} - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -$rp="%r14"; -$sp="%r15"; -$code=<<___; -.text - -___ - -# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out) -{ -$acc="%r0"; -$cnt="%r1"; -$key="%r2"; -$len="%r3"; -$inp="%r4"; -$out="%r5"; - -@XX=("%r6","%r7"); -@TX=("%r8","%r9"); -$YY="%r10"; -$TY="%r11"; - -$code.=<<___; -.globl RC4 -.type RC4,\@function -.align 64 -RC4: - stm${g} %r6,%r11,6*$SIZE_T($sp) -___ -$code.=<<___ if ($flavour =~ /3[12]/); - llgfr $len,$len -___ -$code.=<<___; - llgc $XX[0],0($key) - llgc $YY,1($key) - la $XX[0],1($XX[0]) - nill $XX[0],0xff - srlg $cnt,$len,3 - ltgr $cnt,$cnt - llgc $TX[0],2($XX[0],$key) - jz .Lshort - j .Loop8 - -.align 64 -.Loop8: -___ -for ($i=0;$i<8;$i++) { -$code.=<<___; - la $YY,0($YY,$TX[0]) # $i - nill $YY,255 - la $XX[1],1($XX[0]) - nill $XX[1],255 -___ -$code.=<<___ if ($i==1); - llgc $acc,2($TY,$key) -___ -$code.=<<___ if ($i>1); - sllg $acc,$acc,8 - ic $acc,2($TY,$key) -___ -$code.=<<___; - llgc $TY,2($YY,$key) - stc $TX[0],2($YY,$key) - llgc $TX[1],2($XX[1],$key) - stc $TY,2($XX[0],$key) - cr $XX[1],$YY - jne .Lcmov$i - la $TX[1],0($TX[0]) -.Lcmov$i: - la $TY,0($TY,$TX[0]) - nill $TY,255 -___ -push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers -} - -$code.=<<___; - lg $TX[1],0($inp) - sllg $acc,$acc,8 - la $inp,8($inp) - ic $acc,2($TY,$key) - xgr $acc,$TX[1] - stg $acc,0($out) - la $out,8($out) - brctg $cnt,.Loop8 - -.Lshort: - lghi $acc,7 - ngr $len,$acc - jz .Lexit - j .Loop1 - -.align 16 -.Loop1: - la $YY,0($YY,$TX[0]) - nill $YY,255 - llgc $TY,2($YY,$key) - stc $TX[0],2($YY,$key) - stc $TY,2($XX[0],$key) - ar $TY,$TX[0] - ahi $XX[0],1 - nill $TY,255 - nill $XX[0],255 - llgc $acc,0($inp) - la $inp,1($inp) - llgc $TY,2($TY,$key) - llgc $TX[0],2($XX[0],$key) - xr $acc,$TY - stc $acc,0($out) - la $out,1($out) - brct $len,.Loop1 - -.Lexit: - ahi $XX[0],-1 - stc $XX[0],0($key) - stc $YY,1($key) - lm${g} %r6,%r11,6*$SIZE_T($sp) - br $rp -.size RC4,.-RC4 -.string "RC4 for s390x, CRYPTOGAMS by " - -___ -} - -# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp) -{ -$cnt="%r0"; -$idx="%r1"; -$key="%r2"; -$len="%r3"; -$inp="%r4"; -$acc="%r5"; -$dat="%r6"; -$ikey="%r7"; -$iinp="%r8"; - -$code.=<<___; -.globl private_RC4_set_key -.type private_RC4_set_key,\@function -.align 64 -private_RC4_set_key: - stm${g} %r6,%r8,6*$SIZE_T($sp) - lhi $cnt,256 - la $idx,0(%r0) - sth $idx,0($key) -.align 4 -.L1stloop: - stc $idx,2($idx,$key) - la $idx,1($idx) - brct $cnt,.L1stloop - - lghi $ikey,-256 - lr $cnt,$len - la $iinp,0(%r0) - la $idx,0(%r0) -.align 16 -.L2ndloop: - llgc $acc,2+256($ikey,$key) - llgc $dat,0($iinp,$inp) - la $idx,0($idx,$acc) - la $ikey,1($ikey) - la $idx,0($idx,$dat) - nill $idx,255 - la $iinp,1($iinp) - tml $ikey,255 - llgc $dat,2($idx,$key) - stc $dat,2+256-1($ikey,$key) - stc $acc,2($idx,$key) - jz .Ldone - brct $cnt,.L2ndloop - lr $cnt,$len - la $iinp,0(%r0) - j .L2ndloop -.Ldone: - lm${g} %r6,%r8,6*$SIZE_T($sp) - br $rp -.size private_RC4_set_key,.-private_RC4_set_key - -___ -} - -# const char *RC4_options() -$code.=<<___; -.globl RC4_options -.type RC4_options,\@function -.align 16 -RC4_options: - larl %r2,.Loptions - br %r14 -.size RC4_options,.-RC4_options -.section .rodata -.Loptions: -.align 8 -.string "rc4(8x,char)" -___ - -print $code; -close STDOUT; # force flush diff --git a/drivers/builtin_openssl2/crypto/rc4/asm/rc4-x86_64.pl b/drivers/builtin_openssl2/crypto/rc4/asm/rc4-x86_64.pl deleted file mode 100755 index 20722d3e72..0000000000 --- a/drivers/builtin_openssl2/crypto/rc4/asm/rc4-x86_64.pl +++ /dev/null @@ -1,677 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# July 2004 -# -# 2.22x RC4 tune-up:-) It should be noted though that my hand [as in -# "hand-coded assembler"] doesn't stand for the whole improvement -# coefficient. It turned out that eliminating RC4_CHAR from config -# line results in ~40% improvement (yes, even for C implementation). -# Presumably it has everything to do with AMD cache architecture and -# RAW or whatever penalties. Once again! The module *requires* config -# line *without* RC4_CHAR! As for coding "secret," I bet on partial -# register arithmetics. For example instead of 'inc %r8; and $255,%r8' -# I simply 'inc %r8b'. Even though optimization manual discourages -# to operate on partial registers, it turned out to be the best bet. -# At least for AMD... How IA32E would perform remains to be seen... - -# November 2004 -# -# As was shown by Marc Bevand reordering of couple of load operations -# results in even higher performance gain of 3.3x:-) At least on -# Opteron... For reference, 1x in this case is RC4_CHAR C-code -# compiled with gcc 3.3.2, which performs at ~54MBps per 1GHz clock. -# Latter means that if you want to *estimate* what to expect from -# *your* Opteron, then multiply 54 by 3.3 and clock frequency in GHz. - -# November 2004 -# -# Intel P4 EM64T core was found to run the AMD64 code really slow... -# The only way to achieve comparable performance on P4 was to keep -# RC4_CHAR. Kind of ironic, huh? As it's apparently impossible to -# compose blended code, which would perform even within 30% marginal -# on either AMD and Intel platforms, I implement both cases. See -# rc4_skey.c for further details... - -# April 2005 -# -# P4 EM64T core appears to be "allergic" to 64-bit inc/dec. Replacing -# those with add/sub results in 50% performance improvement of folded -# loop... - -# May 2005 -# -# As was shown by Zou Nanhai loop unrolling can improve Intel EM64T -# performance by >30% [unlike P4 32-bit case that is]. But this is -# provided that loads are reordered even more aggressively! Both code -# pathes, AMD64 and EM64T, reorder loads in essentially same manner -# as my IA-64 implementation. On Opteron this resulted in modest 5% -# improvement [I had to test it], while final Intel P4 performance -# achieves respectful 432MBps on 2.8GHz processor now. For reference. -# If executed on Xeon, current RC4_CHAR code-path is 2.7x faster than -# RC4_INT code-path. While if executed on Opteron, it's only 25% -# slower than the RC4_INT one [meaning that if CPU µ-arch detection -# is not implemented, then this final RC4_CHAR code-path should be -# preferred, as it provides better *all-round* performance]. - -# March 2007 -# -# Intel Core2 was observed to perform poorly on both code paths:-( It -# apparently suffers from some kind of partial register stall, which -# occurs in 64-bit mode only [as virtually identical 32-bit loop was -# observed to outperform 64-bit one by almost 50%]. Adding two movzb to -# cloop1 boosts its performance by 80%! This loop appears to be optimal -# fit for Core2 and therefore the code was modified to skip cloop8 on -# this CPU. - -# May 2010 -# -# Intel Westmere was observed to perform suboptimally. Adding yet -# another movzb to cloop1 improved performance by almost 50%! Core2 -# performance is improved too, but nominally... - -# May 2011 -# -# The only code path that was not modified is P4-specific one. Non-P4 -# Intel code path optimization is heavily based on submission by Maxim -# Perminov, Maxim Locktyukhin and Jim Guilford of Intel. I've used -# some of the ideas even in attempt to optmize the original RC4_INT -# code path... Current performance in cycles per processed byte (less -# is better) and improvement coefficients relative to previous -# version of this module are: -# -# Opteron 5.3/+0%(*) -# P4 6.5 -# Core2 6.2/+15%(**) -# Westmere 4.2/+60% -# Sandy Bridge 4.2/+120% -# Atom 9.3/+80% -# -# (*) But corresponding loop has less instructions, which should have -# positive effect on upcoming Bulldozer, which has one less ALU. -# For reference, Intel code runs at 6.8 cpb rate on Opteron. -# (**) Note that Core2 result is ~15% lower than corresponding result -# for 32-bit code, meaning that it's possible to improve it, -# but more than likely at the cost of the others (see rc4-586.pl -# to get the idea)... - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -$dat="%rdi"; # arg1 -$len="%rsi"; # arg2 -$inp="%rdx"; # arg3 -$out="%rcx"; # arg4 - -{ -$code=<<___; -.text -.extern OPENSSL_ia32cap_P - -.globl RC4 -.type RC4,\@function,4 -.align 16 -RC4: or $len,$len - jne .Lentry - ret -.Lentry: - push %rbx - push %r12 - push %r13 -.Lprologue: - mov $len,%r11 - mov $inp,%r12 - mov $out,%r13 -___ -my $len="%r11"; # reassign input arguments -my $inp="%r12"; -my $out="%r13"; - -my @XX=("%r10","%rsi"); -my @TX=("%rax","%rbx"); -my $YY="%rcx"; -my $TY="%rdx"; - -$code.=<<___; - xor $XX[0],$XX[0] - xor $YY,$YY - - lea 8($dat),$dat - mov -8($dat),$XX[0]#b - mov -4($dat),$YY#b - cmpl \$-1,256($dat) - je .LRC4_CHAR - mov OPENSSL_ia32cap_P(%rip),%r8d - xor $TX[1],$TX[1] - inc $XX[0]#b - sub $XX[0],$TX[1] - sub $inp,$out - movl ($dat,$XX[0],4),$TX[0]#d - test \$-16,$len - jz .Lloop1 - bt \$30,%r8d # Intel CPU? - jc .Lintel - and \$7,$TX[1] - lea 1($XX[0]),$XX[1] - jz .Loop8 - sub $TX[1],$len -.Loop8_warmup: - add $TX[0]#b,$YY#b - movl ($dat,$YY,4),$TY#d - movl $TX[0]#d,($dat,$YY,4) - movl $TY#d,($dat,$XX[0],4) - add $TY#b,$TX[0]#b - inc $XX[0]#b - movl ($dat,$TX[0],4),$TY#d - movl ($dat,$XX[0],4),$TX[0]#d - xorb ($inp),$TY#b - movb $TY#b,($out,$inp) - lea 1($inp),$inp - dec $TX[1] - jnz .Loop8_warmup - - lea 1($XX[0]),$XX[1] - jmp .Loop8 -.align 16 -.Loop8: -___ -for ($i=0;$i<8;$i++) { -$code.=<<___ if ($i==7); - add \$8,$XX[1]#b -___ -$code.=<<___; - add $TX[0]#b,$YY#b - movl ($dat,$YY,4),$TY#d - movl $TX[0]#d,($dat,$YY,4) - movl `4*($i==7?-1:$i)`($dat,$XX[1],4),$TX[1]#d - ror \$8,%r8 # ror is redundant when $i=0 - movl $TY#d,4*$i($dat,$XX[0],4) - add $TX[0]#b,$TY#b - movb ($dat,$TY,4),%r8b -___ -push(@TX,shift(@TX)); #push(@XX,shift(@XX)); # "rotate" registers -} -$code.=<<___; - add \$8,$XX[0]#b - ror \$8,%r8 - sub \$8,$len - - xor ($inp),%r8 - mov %r8,($out,$inp) - lea 8($inp),$inp - - test \$-8,$len - jnz .Loop8 - cmp \$0,$len - jne .Lloop1 - jmp .Lexit - -.align 16 -.Lintel: - test \$-32,$len - jz .Lloop1 - and \$15,$TX[1] - jz .Loop16_is_hot - sub $TX[1],$len -.Loop16_warmup: - add $TX[0]#b,$YY#b - movl ($dat,$YY,4),$TY#d - movl $TX[0]#d,($dat,$YY,4) - movl $TY#d,($dat,$XX[0],4) - add $TY#b,$TX[0]#b - inc $XX[0]#b - movl ($dat,$TX[0],4),$TY#d - movl ($dat,$XX[0],4),$TX[0]#d - xorb ($inp),$TY#b - movb $TY#b,($out,$inp) - lea 1($inp),$inp - dec $TX[1] - jnz .Loop16_warmup - - mov $YY,$TX[1] - xor $YY,$YY - mov $TX[1]#b,$YY#b - -.Loop16_is_hot: - lea ($dat,$XX[0],4),$XX[1] -___ -sub RC4_loop { - my $i=shift; - my $j=$i<0?0:$i; - my $xmm="%xmm".($j&1); - - $code.=" add \$16,$XX[0]#b\n" if ($i==15); - $code.=" movdqu ($inp),%xmm2\n" if ($i==15); - $code.=" add $TX[0]#b,$YY#b\n" if ($i<=0); - $code.=" movl ($dat,$YY,4),$TY#d\n"; - $code.=" pxor %xmm0,%xmm2\n" if ($i==0); - $code.=" psllq \$8,%xmm1\n" if ($i==0); - $code.=" pxor $xmm,$xmm\n" if ($i<=1); - $code.=" movl $TX[0]#d,($dat,$YY,4)\n"; - $code.=" add $TY#b,$TX[0]#b\n"; - $code.=" movl `4*($j+1)`($XX[1]),$TX[1]#d\n" if ($i<15); - $code.=" movz $TX[0]#b,$TX[0]#d\n"; - $code.=" movl $TY#d,4*$j($XX[1])\n"; - $code.=" pxor %xmm1,%xmm2\n" if ($i==0); - $code.=" lea ($dat,$XX[0],4),$XX[1]\n" if ($i==15); - $code.=" add $TX[1]#b,$YY#b\n" if ($i<15); - $code.=" pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n"; - $code.=" movdqu %xmm2,($out,$inp)\n" if ($i==0); - $code.=" lea 16($inp),$inp\n" if ($i==0); - $code.=" movl ($XX[1]),$TX[1]#d\n" if ($i==15); -} - RC4_loop(-1); -$code.=<<___; - jmp .Loop16_enter -.align 16 -.Loop16: -___ - -for ($i=0;$i<16;$i++) { - $code.=".Loop16_enter:\n" if ($i==1); - RC4_loop($i); - push(@TX,shift(@TX)); # "rotate" registers -} -$code.=<<___; - mov $YY,$TX[1] - xor $YY,$YY # keyword to partial register - sub \$16,$len - mov $TX[1]#b,$YY#b - test \$-16,$len - jnz .Loop16 - - psllq \$8,%xmm1 - pxor %xmm0,%xmm2 - pxor %xmm1,%xmm2 - movdqu %xmm2,($out,$inp) - lea 16($inp),$inp - - cmp \$0,$len - jne .Lloop1 - jmp .Lexit - -.align 16 -.Lloop1: - add $TX[0]#b,$YY#b - movl ($dat,$YY,4),$TY#d - movl $TX[0]#d,($dat,$YY,4) - movl $TY#d,($dat,$XX[0],4) - add $TY#b,$TX[0]#b - inc $XX[0]#b - movl ($dat,$TX[0],4),$TY#d - movl ($dat,$XX[0],4),$TX[0]#d - xorb ($inp),$TY#b - movb $TY#b,($out,$inp) - lea 1($inp),$inp - dec $len - jnz .Lloop1 - jmp .Lexit - -.align 16 -.LRC4_CHAR: - add \$1,$XX[0]#b - movzb ($dat,$XX[0]),$TX[0]#d - test \$-8,$len - jz .Lcloop1 - jmp .Lcloop8 -.align 16 -.Lcloop8: - mov ($inp),%r8d - mov 4($inp),%r9d -___ -# unroll 2x4-wise, because 64-bit rotates kill Intel P4... -for ($i=0;$i<4;$i++) { -$code.=<<___; - add $TX[0]#b,$YY#b - lea 1($XX[0]),$XX[1] - movzb ($dat,$YY),$TY#d - movzb $XX[1]#b,$XX[1]#d - movzb ($dat,$XX[1]),$TX[1]#d - movb $TX[0]#b,($dat,$YY) - cmp $XX[1],$YY - movb $TY#b,($dat,$XX[0]) - jne .Lcmov$i # Intel cmov is sloooow... - mov $TX[0],$TX[1] -.Lcmov$i: - add $TX[0]#b,$TY#b - xor ($dat,$TY),%r8b - ror \$8,%r8d -___ -push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers -} -for ($i=4;$i<8;$i++) { -$code.=<<___; - add $TX[0]#b,$YY#b - lea 1($XX[0]),$XX[1] - movzb ($dat,$YY),$TY#d - movzb $XX[1]#b,$XX[1]#d - movzb ($dat,$XX[1]),$TX[1]#d - movb $TX[0]#b,($dat,$YY) - cmp $XX[1],$YY - movb $TY#b,($dat,$XX[0]) - jne .Lcmov$i # Intel cmov is sloooow... - mov $TX[0],$TX[1] -.Lcmov$i: - add $TX[0]#b,$TY#b - xor ($dat,$TY),%r9b - ror \$8,%r9d -___ -push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers -} -$code.=<<___; - lea -8($len),$len - mov %r8d,($out) - lea 8($inp),$inp - mov %r9d,4($out) - lea 8($out),$out - - test \$-8,$len - jnz .Lcloop8 - cmp \$0,$len - jne .Lcloop1 - jmp .Lexit -___ -$code.=<<___; -.align 16 -.Lcloop1: - add $TX[0]#b,$YY#b - movzb $YY#b,$YY#d - movzb ($dat,$YY),$TY#d - movb $TX[0]#b,($dat,$YY) - movb $TY#b,($dat,$XX[0]) - add $TX[0]#b,$TY#b - add \$1,$XX[0]#b - movzb $TY#b,$TY#d - movzb $XX[0]#b,$XX[0]#d - movzb ($dat,$TY),$TY#d - movzb ($dat,$XX[0]),$TX[0]#d - xorb ($inp),$TY#b - lea 1($inp),$inp - movb $TY#b,($out) - lea 1($out),$out - sub \$1,$len - jnz .Lcloop1 - jmp .Lexit - -.align 16 -.Lexit: - sub \$1,$XX[0]#b - movl $XX[0]#d,-8($dat) - movl $YY#d,-4($dat) - - mov (%rsp),%r13 - mov 8(%rsp),%r12 - mov 16(%rsp),%rbx - add \$24,%rsp -.Lepilogue: - ret -.size RC4,.-RC4 -___ -} - -$idx="%r8"; -$ido="%r9"; - -$code.=<<___; -.globl private_RC4_set_key -.type private_RC4_set_key,\@function,3 -.align 16 -private_RC4_set_key: - lea 8($dat),$dat - lea ($inp,$len),$inp - neg $len - mov $len,%rcx - xor %eax,%eax - xor $ido,$ido - xor %r10,%r10 - xor %r11,%r11 - - mov OPENSSL_ia32cap_P(%rip),$idx#d - bt \$20,$idx#d # RC4_CHAR? - jc .Lc1stloop - jmp .Lw1stloop - -.align 16 -.Lw1stloop: - mov %eax,($dat,%rax,4) - add \$1,%al - jnc .Lw1stloop - - xor $ido,$ido - xor $idx,$idx -.align 16 -.Lw2ndloop: - mov ($dat,$ido,4),%r10d - add ($inp,$len,1),$idx#b - add %r10b,$idx#b - add \$1,$len - mov ($dat,$idx,4),%r11d - cmovz %rcx,$len - mov %r10d,($dat,$idx,4) - mov %r11d,($dat,$ido,4) - add \$1,$ido#b - jnc .Lw2ndloop - jmp .Lexit_key - -.align 16 -.Lc1stloop: - mov %al,($dat,%rax) - add \$1,%al - jnc .Lc1stloop - - xor $ido,$ido - xor $idx,$idx -.align 16 -.Lc2ndloop: - mov ($dat,$ido),%r10b - add ($inp,$len),$idx#b - add %r10b,$idx#b - add \$1,$len - mov ($dat,$idx),%r11b - jnz .Lcnowrap - mov %rcx,$len -.Lcnowrap: - mov %r10b,($dat,$idx) - mov %r11b,($dat,$ido) - add \$1,$ido#b - jnc .Lc2ndloop - movl \$-1,256($dat) - -.align 16 -.Lexit_key: - xor %eax,%eax - mov %eax,-8($dat) - mov %eax,-4($dat) - ret -.size private_RC4_set_key,.-private_RC4_set_key - -.globl RC4_options -.type RC4_options,\@abi-omnipotent -.align 16 -RC4_options: - lea .Lopts(%rip),%rax - mov OPENSSL_ia32cap_P(%rip),%edx - bt \$20,%edx - jc .L8xchar - bt \$30,%edx - jnc .Ldone - add \$25,%rax - ret -.L8xchar: - add \$12,%rax -.Ldone: - ret -.align 64 -.Lopts: -.asciz "rc4(8x,int)" -.asciz "rc4(8x,char)" -.asciz "rc4(16x,int)" -.asciz "RC4 for x86_64, CRYPTOGAMS by " -.align 64 -.size RC4_options,.-RC4_options -___ - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type stream_se_handler,\@abi-omnipotent -.align 16 -stream_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lprologue(%rip),%r10 - cmp %r10,%rbx # context->RipRsp - - lea .Lepilogue(%rip),%r10 - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lin_prologue - - lea 24(%rax),%rax - - mov -8(%rax),%rbx - mov -16(%rax),%r12 - mov -24(%rax),%r13 - mov %rbx,144($context) # restore context->Rbx - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - -.Lin_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - jmp .Lcommon_seh_exit -.size stream_se_handler,.-stream_se_handler - -.type key_se_handler,\@abi-omnipotent -.align 16 -key_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 152($context),%rax # pull context->Rsp - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - -.Lcommon_seh_exit: - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size key_se_handler,.-key_se_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_RC4 - .rva .LSEH_end_RC4 - .rva .LSEH_info_RC4 - - .rva .LSEH_begin_private_RC4_set_key - .rva .LSEH_end_private_RC4_set_key - .rva .LSEH_info_private_RC4_set_key - -.section .xdata -.align 8 -.LSEH_info_RC4: - .byte 9,0,0,0 - .rva stream_se_handler -.LSEH_info_private_RC4_set_key: - .byte 9,0,0,0 - .rva key_se_handler -___ -} - -sub reg_part { -my ($reg,$conv)=@_; - if ($reg =~ /%r[0-9]+/) { $reg .= $conv; } - elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; } - elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; } - elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; } - return $reg; -} - -$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem; -$code =~ s/\`([^\`]*)\`/eval $1/gem; - -print $code; - -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/rc4/rc4_enc.c b/drivers/builtin_openssl2/crypto/rc4/rc4_enc.c index 6ebd54d46c..0f0a2487a7 100644 --- a/drivers/builtin_openssl2/crypto/rc4/rc4_enc.c +++ b/drivers/builtin_openssl2/crypto/rc4/rc4_enc.c @@ -79,7 +79,7 @@ void RC4(RC4_KEY *key, size_t len, const unsigned char *indata, y = key->y; d = key->data; -#if defined(RC4_CHUNK) +#if defined(RC4_CHUNK) && !defined(PEDANTIC) /*- * The original reason for implementing this(*) was the fact that * pre-21164a Alpha CPUs don't have byte load/store instructions diff --git a/drivers/builtin_openssl2/crypto/rc4/rc4test.c b/drivers/builtin_openssl2/crypto/rc4/rc4test.c deleted file mode 100644 index e2bfbfa1fc..0000000000 --- a/drivers/builtin_openssl2/crypto/rc4/rc4test.c +++ /dev/null @@ -1,235 +0,0 @@ -/* crypto/rc4/rc4test.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -#include "../e_os.h" - -#ifdef OPENSSL_NO_RC4 -int main(int argc, char *argv[]) -{ - printf("No RC4 support\n"); - return (0); -} -#else -# include -# include - -static unsigned char keys[7][30] = { - {8, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef}, - {8, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef}, - {8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {4, 0xef, 0x01, 0x23, 0x45}, - {8, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef}, - {4, 0xef, 0x01, 0x23, 0x45}, -}; - -static unsigned char data_len[7] = { 8, 8, 8, 20, 28, 10 }; - -static unsigned char data[7][30] = { - {0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff}, - {0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0, - 0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0, - 0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0, - 0x12, 0x34, 0x56, 0x78, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff}, - {0}, -}; - -static unsigned char output[7][30] = { - {0x75, 0xb7, 0x87, 0x80, 0x99, 0xe0, 0xc5, 0x96, 0x00}, - {0x74, 0x94, 0xc2, 0xe7, 0x10, 0x4b, 0x08, 0x79, 0x00}, - {0xde, 0x18, 0x89, 0x41, 0xa3, 0x37, 0x5d, 0x3a, 0x00}, - {0xd6, 0xa1, 0x41, 0xa7, 0xec, 0x3c, 0x38, 0xdf, - 0xbd, 0x61, 0x5a, 0x11, 0x62, 0xe1, 0xc7, 0xba, - 0x36, 0xb6, 0x78, 0x58, 0x00}, - {0x66, 0xa0, 0x94, 0x9f, 0x8a, 0xf7, 0xd6, 0x89, - 0x1f, 0x7f, 0x83, 0x2b, 0xa8, 0x33, 0xc0, 0x0c, - 0x89, 0x2e, 0xbe, 0x30, 0x14, 0x3c, 0xe2, 0x87, - 0x40, 0x01, 0x1e, 0xcf, 0x00}, - {0xd6, 0xa1, 0x41, 0xa7, 0xec, 0x3c, 0x38, 0xdf, 0xbd, 0x61, 0x00}, - {0}, -}; - -int main(int argc, char *argv[]) -{ - int i, err = 0; - int j; - unsigned char *p; - RC4_KEY key; - unsigned char obuf[512]; - -# if !defined(OPENSSL_PIC) - void OPENSSL_cpuid_setup(void); - - OPENSSL_cpuid_setup(); -# endif - - for (i = 0; i < 6; i++) { - RC4_set_key(&key, keys[i][0], &(keys[i][1])); - memset(obuf, 0x00, sizeof(obuf)); - RC4(&key, data_len[i], &(data[i][0]), obuf); - if (memcmp(obuf, output[i], data_len[i] + 1) != 0) { - printf("error calculating RC4\n"); - printf("output:"); - for (j = 0; j < data_len[i] + 1; j++) - printf(" %02x", obuf[j]); - printf("\n"); - printf("expect:"); - p = &(output[i][0]); - for (j = 0; j < data_len[i] + 1; j++) - printf(" %02x", *(p++)); - printf("\n"); - err++; - } else - printf("test %d ok\n", i); - } - printf("test end processing "); - for (i = 0; i < data_len[3]; i++) { - RC4_set_key(&key, keys[3][0], &(keys[3][1])); - memset(obuf, 0x00, sizeof(obuf)); - RC4(&key, i, &(data[3][0]), obuf); - if ((memcmp(obuf, output[3], i) != 0) || (obuf[i] != 0)) { - printf("error in RC4 length processing\n"); - printf("output:"); - for (j = 0; j < i + 1; j++) - printf(" %02x", obuf[j]); - printf("\n"); - printf("expect:"); - p = &(output[3][0]); - for (j = 0; j < i; j++) - printf(" %02x", *(p++)); - printf(" 00\n"); - err++; - } else { - printf("."); - fflush(stdout); - } - } - printf("done\n"); - printf("test multi-call "); - for (i = 0; i < data_len[3]; i++) { - RC4_set_key(&key, keys[3][0], &(keys[3][1])); - memset(obuf, 0x00, sizeof(obuf)); - RC4(&key, i, &(data[3][0]), obuf); - RC4(&key, data_len[3] - i, &(data[3][i]), &(obuf[i])); - if (memcmp(obuf, output[3], data_len[3] + 1) != 0) { - printf("error in RC4 multi-call processing\n"); - printf("output:"); - for (j = 0; j < data_len[3] + 1; j++) - printf(" %02x", obuf[j]); - printf("\n"); - printf("expect:"); - p = &(output[3][0]); - for (j = 0; j < data_len[3] + 1; j++) - printf(" %02x", *(p++)); - err++; - } else { - printf("."); - fflush(stdout); - } - } - printf("done\n"); - printf("bulk test "); - { - unsigned char buf[513]; - SHA_CTX c; - unsigned char md[SHA_DIGEST_LENGTH]; - static unsigned char expected[] = { - 0xa4, 0x7b, 0xcc, 0x00, 0x3d, 0xd0, 0xbd, 0xe1, 0xac, 0x5f, - 0x12, 0x1e, 0x45, 0xbc, 0xfb, 0x1a, 0xa1, 0xf2, 0x7f, 0xc5 - }; - - RC4_set_key(&key, keys[0][0], &(keys[3][1])); - memset(buf, '\0', sizeof(buf)); - SHA1_Init(&c); - for (i = 0; i < 2571; i++) { - RC4(&key, sizeof(buf), buf, buf); - SHA1_Update(&c, buf, sizeof(buf)); - } - SHA1_Final(md, &c); - - if (memcmp(md, expected, sizeof(md))) { - printf("error in RC4 bulk test\n"); - printf("output:"); - for (j = 0; j < (int)sizeof(md); j++) - printf(" %02x", md[j]); - printf("\n"); - printf("expect:"); - for (j = 0; j < (int)sizeof(md); j++) - printf(" %02x", expected[j]); - printf("\n"); - err++; - } else - printf("ok\n"); - } -# ifdef OPENSSL_SYS_NETWARE - if (err) - printf("ERROR: %d\n", err); -# endif - EXIT(err); - return (0); -} -#endif diff --git a/drivers/builtin_openssl2/crypto/rc5/asm/rc5-586.pl b/drivers/builtin_openssl2/crypto/rc5/asm/rc5-586.pl deleted file mode 100644 index 61ac6effc6..0000000000 --- a/drivers/builtin_openssl2/crypto/rc5/asm/rc5-586.pl +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/local/bin/perl - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; -require "cbc.pl"; - -&asm_init($ARGV[0],"rc5-586.pl"); - -$RC5_MAX_ROUNDS=16; -$RC5_32_OFF=($RC5_MAX_ROUNDS+2)*4; -$A="edi"; -$B="esi"; -$S="ebp"; -$tmp1="eax"; -$r="ebx"; -$tmpc="ecx"; -$tmp4="edx"; - -&RC5_32_encrypt("RC5_32_encrypt",1); -&RC5_32_encrypt("RC5_32_decrypt",0); -&cbc("RC5_32_cbc_encrypt","RC5_32_encrypt","RC5_32_decrypt",0,4,5,3,-1,-1); -&asm_finish(); - -sub RC5_32_encrypt - { - local($name,$enc)=@_; - - &function_begin_B($name,""); - - &comment(""); - - &push("ebp"); - &push("esi"); - &push("edi"); - &mov($tmp4,&wparam(0)); - &mov($S,&wparam(1)); - - &comment("Load the 2 words"); - &mov($A,&DWP(0,$tmp4,"",0)); - &mov($B,&DWP(4,$tmp4,"",0)); - - &push($r); - &mov($r, &DWP(0,$S,"",0)); - - # encrypting part - - if ($enc) - { - &add($A, &DWP(4+0,$S,"",0)); - &add($B, &DWP(4+4,$S,"",0)); - - for ($i=0; $i<$RC5_MAX_ROUNDS; $i++) - { - &xor($A, $B); - &mov($tmp1, &DWP(12+$i*8,$S,"",0)); - &mov($tmpc, $B); - &rotl($A, &LB("ecx")); - &add($A, $tmp1); - - &xor($B, $A); - &mov($tmp1, &DWP(16+$i*8,$S,"",0)); - &mov($tmpc, $A); - &rotl($B, &LB("ecx")); - &add($B, $tmp1); - if (($i == 7) || ($i == 11)) - { - &cmp($r, $i+1); - &je(&label("rc5_exit")); - } - } - } - else - { - &cmp($r, 12); - &je(&label("rc5_dec_12")); - &cmp($r, 8); - &je(&label("rc5_dec_8")); - for ($i=$RC5_MAX_ROUNDS; $i > 0; $i--) - { - &set_label("rc5_dec_$i") if ($i == 12) || ($i == 8); - &mov($tmp1, &DWP($i*8+8,$S,"",0)); - &sub($B, $tmp1); - &mov($tmpc, $A); - &rotr($B, &LB("ecx")); - &xor($B, $A); - - &mov($tmp1, &DWP($i*8+4,$S,"",0)); - &sub($A, $tmp1); - &mov($tmpc, $B); - &rotr($A, &LB("ecx")); - &xor($A, $B); - } - &sub($B, &DWP(4+4,$S,"",0)); - &sub($A, &DWP(4+0,$S,"",0)); - } - - &set_label("rc5_exit"); - &mov(&DWP(0,$tmp4,"",0),$A); - &mov(&DWP(4,$tmp4,"",0),$B); - - &pop("ebx"); - &pop("edi"); - &pop("esi"); - &pop("ebp"); - &ret(); - &function_end_B($name); - } - - diff --git a/drivers/builtin_openssl2/crypto/rc5/rc5_locl.h b/drivers/builtin_openssl2/crypto/rc5/rc5_locl.h index 1e83f19fec..ee757e6477 100644 --- a/drivers/builtin_openssl2/crypto/rc5/rc5_locl.h +++ b/drivers/builtin_openssl2/crypto/rc5/rc5_locl.h @@ -146,9 +146,12 @@ *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \ *((c)++)=(unsigned char)(((l) )&0xff)) -#if (defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)) || defined(__ICC) +#if (defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)) # define ROTATE_l32(a,n) _lrotl(a,n) # define ROTATE_r32(a,n) _lrotr(a,n) +#elif defined(__ICC) +# define ROTATE_l32(a,n) _rotl(a,n) +# define ROTATE_r32(a,n) _rotr(a,n) #elif defined(__GNUC__) && __GNUC__>=2 && !defined(__STRICT_ANSI__) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC) # if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) # define ROTATE_l32(a,n) ({ register unsigned int ret; \ diff --git a/drivers/builtin_openssl2/crypto/rc5/rc5test.c b/drivers/builtin_openssl2/crypto/rc5/rc5test.c deleted file mode 100644 index b29a436cec..0000000000 --- a/drivers/builtin_openssl2/crypto/rc5/rc5test.c +++ /dev/null @@ -1,381 +0,0 @@ -/* crypto/rc5/rc5test.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -/* - * This has been a quickly hacked 'ideatest.c'. When I add tests for other - * RC5 modes, more of the code will be uncommented. - */ - -#include -#include -#include - -#include "../e_os.h" - -#ifdef OPENSSL_NO_RC5 -int main(int argc, char *argv[]) -{ - printf("No RC5 support\n"); - return (0); -} -#else -# include - -static unsigned char RC5key[5][16] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x91, 0x5f, 0x46, 0x19, 0xbe, 0x41, 0xb2, 0x51, - 0x63, 0x55, 0xa5, 0x01, 0x10, 0xa9, 0xce, 0x91}, - {0x78, 0x33, 0x48, 0xe7, 0x5a, 0xeb, 0x0f, 0x2f, - 0xd7, 0xb1, 0x69, 0xbb, 0x8d, 0xc1, 0x67, 0x87}, - {0xdc, 0x49, 0xdb, 0x13, 0x75, 0xa5, 0x58, 0x4f, - 0x64, 0x85, 0xb4, 0x13, 0xb5, 0xf1, 0x2b, 0xaf}, - {0x52, 0x69, 0xf1, 0x49, 0xd4, 0x1b, 0xa0, 0x15, - 0x24, 0x97, 0x57, 0x4d, 0x7f, 0x15, 0x31, 0x25}, -}; - -static unsigned char RC5plain[5][8] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x21, 0xA5, 0xDB, 0xEE, 0x15, 0x4B, 0x8F, 0x6D}, - {0xF7, 0xC0, 0x13, 0xAC, 0x5B, 0x2B, 0x89, 0x52}, - {0x2F, 0x42, 0xB3, 0xB7, 0x03, 0x69, 0xFC, 0x92}, - {0x65, 0xC1, 0x78, 0xB2, 0x84, 0xD1, 0x97, 0xCC}, -}; - -static unsigned char RC5cipher[5][8] = { - {0x21, 0xA5, 0xDB, 0xEE, 0x15, 0x4B, 0x8F, 0x6D}, - {0xF7, 0xC0, 0x13, 0xAC, 0x5B, 0x2B, 0x89, 0x52}, - {0x2F, 0x42, 0xB3, 0xB7, 0x03, 0x69, 0xFC, 0x92}, - {0x65, 0xC1, 0x78, 0xB2, 0x84, 0xD1, 0x97, 0xCC}, - {0xEB, 0x44, 0xE4, 0x15, 0xDA, 0x31, 0x98, 0x24}, -}; - -# define RC5_CBC_NUM 27 -static unsigned char rc5_cbc_cipher[RC5_CBC_NUM][8] = { - {0x7a, 0x7b, 0xba, 0x4d, 0x79, 0x11, 0x1d, 0x1e}, - {0x79, 0x7b, 0xba, 0x4d, 0x78, 0x11, 0x1d, 0x1e}, - {0x7a, 0x7b, 0xba, 0x4d, 0x79, 0x11, 0x1d, 0x1f}, - {0x7a, 0x7b, 0xba, 0x4d, 0x79, 0x11, 0x1d, 0x1f}, - {0x8b, 0x9d, 0xed, 0x91, 0xce, 0x77, 0x94, 0xa6}, - {0x2f, 0x75, 0x9f, 0xe7, 0xad, 0x86, 0xa3, 0x78}, - {0xdc, 0xa2, 0x69, 0x4b, 0xf4, 0x0e, 0x07, 0x88}, - {0xdc, 0xa2, 0x69, 0x4b, 0xf4, 0x0e, 0x07, 0x88}, - {0xdc, 0xfe, 0x09, 0x85, 0x77, 0xec, 0xa5, 0xff}, - {0x96, 0x46, 0xfb, 0x77, 0x63, 0x8f, 0x9c, 0xa8}, - {0xb2, 0xb3, 0x20, 0x9d, 0xb6, 0x59, 0x4d, 0xa4}, - {0x54, 0x5f, 0x7f, 0x32, 0xa5, 0xfc, 0x38, 0x36}, - {0x82, 0x85, 0xe7, 0xc1, 0xb5, 0xbc, 0x74, 0x02}, - {0xfc, 0x58, 0x6f, 0x92, 0xf7, 0x08, 0x09, 0x34}, - {0xcf, 0x27, 0x0e, 0xf9, 0x71, 0x7f, 0xf7, 0xc4}, - {0xe4, 0x93, 0xf1, 0xc1, 0xbb, 0x4d, 0x6e, 0x8c}, - {0x5c, 0x4c, 0x04, 0x1e, 0x0f, 0x21, 0x7a, 0xc3}, - {0x92, 0x1f, 0x12, 0x48, 0x53, 0x73, 0xb4, 0xf7}, - {0x5b, 0xa0, 0xca, 0x6b, 0xbe, 0x7f, 0x5f, 0xad}, - {0xc5, 0x33, 0x77, 0x1c, 0xd0, 0x11, 0x0e, 0x63}, - {0x29, 0x4d, 0xdb, 0x46, 0xb3, 0x27, 0x8d, 0x60}, - {0xda, 0xd6, 0xbd, 0xa9, 0xdf, 0xe8, 0xf7, 0xe8}, - {0x97, 0xe0, 0x78, 0x78, 0x37, 0xed, 0x31, 0x7f}, - {0x78, 0x75, 0xdb, 0xf6, 0x73, 0x8c, 0x64, 0x78}, - {0x8f, 0x34, 0xc3, 0xc6, 0x81, 0xc9, 0x96, 0x95}, - {0x7c, 0xb3, 0xf1, 0xdf, 0x34, 0xf9, 0x48, 0x11}, - {0x7f, 0xd1, 0xa0, 0x23, 0xa5, 0xbb, 0xa2, 0x17}, -}; - -static unsigned char rc5_cbc_key[RC5_CBC_NUM][17] = { - {1, 0x00}, - {1, 0x00}, - {1, 0x00}, - {1, 0x00}, - {1, 0x00}, - {1, 0x11}, - {1, 0x00}, - {4, 0x00, 0x00, 0x00, 0x00}, - {1, 0x00}, - {1, 0x00}, - {1, 0x00}, - {1, 0x00}, - {4, 0x01, 0x02, 0x03, 0x04}, - {4, 0x01, 0x02, 0x03, 0x04}, - {4, 0x01, 0x02, 0x03, 0x04}, - {8, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {8, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {8, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {8, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {16, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80}, - {16, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80}, - {16, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80}, - {5, 0x01, 0x02, 0x03, 0x04, 0x05}, - {5, 0x01, 0x02, 0x03, 0x04, 0x05}, - {5, 0x01, 0x02, 0x03, 0x04, 0x05}, - {5, 0x01, 0x02, 0x03, 0x04, 0x05}, - {5, 0x01, 0x02, 0x03, 0x04, 0x05}, -}; - -static unsigned char rc5_cbc_plain[RC5_CBC_NUM][8] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, - {0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80}, - {0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80}, - {0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80}, - {0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80}, - {0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80}, - {0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80}, - {0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80}, - {0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x01}, -}; - -static int rc5_cbc_rounds[RC5_CBC_NUM] = { - 0, 0, 0, 0, 0, 1, 2, 2, - 8, 8, 12, 16, 8, 12, 16, 12, - 8, 12, 16, 8, 12, 16, 12, 8, - 8, 8, 8, -}; - -static unsigned char rc5_cbc_iv[RC5_CBC_NUM][8] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x78, 0x75, 0xdb, 0xf6, 0x73, 0x8c, 0x64, 0x78}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x7c, 0xb3, 0xf1, 0xdf, 0x34, 0xf9, 0x48, 0x11}, -}; - -int main(int argc, char *argv[]) -{ - int i, n, err = 0; - RC5_32_KEY key; - unsigned char buf[8], buf2[8], ivb[8]; - - for (n = 0; n < 5; n++) { - RC5_32_set_key(&key, 16, &(RC5key[n][0]), 12); - - RC5_32_ecb_encrypt(&(RC5plain[n][0]), buf, &key, RC5_ENCRYPT); - if (memcmp(&(RC5cipher[n][0]), buf, 8) != 0) { - printf("ecb RC5 error encrypting (%d)\n", n + 1); - printf("got :"); - for (i = 0; i < 8; i++) - printf("%02X ", buf[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 8; i++) - printf("%02X ", RC5cipher[n][i]); - err = 20; - printf("\n"); - } - - RC5_32_ecb_encrypt(buf, buf2, &key, RC5_DECRYPT); - if (memcmp(&(RC5plain[n][0]), buf2, 8) != 0) { - printf("ecb RC5 error decrypting (%d)\n", n + 1); - printf("got :"); - for (i = 0; i < 8; i++) - printf("%02X ", buf2[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 8; i++) - printf("%02X ", RC5plain[n][i]); - printf("\n"); - err = 3; - } - } - if (err == 0) - printf("ecb RC5 ok\n"); - - for (n = 0; n < RC5_CBC_NUM; n++) { - i = rc5_cbc_rounds[n]; - if (i < 8) - continue; - - RC5_32_set_key(&key, rc5_cbc_key[n][0], &(rc5_cbc_key[n][1]), i); - - memcpy(ivb, &(rc5_cbc_iv[n][0]), 8); - RC5_32_cbc_encrypt(&(rc5_cbc_plain[n][0]), buf, 8, - &key, &(ivb[0]), RC5_ENCRYPT); - - if (memcmp(&(rc5_cbc_cipher[n][0]), buf, 8) != 0) { - printf("cbc RC5 error encrypting (%d)\n", n + 1); - printf("got :"); - for (i = 0; i < 8; i++) - printf("%02X ", buf[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 8; i++) - printf("%02X ", rc5_cbc_cipher[n][i]); - err = 30; - printf("\n"); - } - - memcpy(ivb, &(rc5_cbc_iv[n][0]), 8); - RC5_32_cbc_encrypt(buf, buf2, 8, &key, &(ivb[0]), RC5_DECRYPT); - if (memcmp(&(rc5_cbc_plain[n][0]), buf2, 8) != 0) { - printf("cbc RC5 error decrypting (%d)\n", n + 1); - printf("got :"); - for (i = 0; i < 8; i++) - printf("%02X ", buf2[i]); - printf("\n"); - printf("expected:"); - for (i = 0; i < 8; i++) - printf("%02X ", rc5_cbc_plain[n][i]); - printf("\n"); - err = 3; - } - } - if (err == 0) - printf("cbc RC5 ok\n"); - - EXIT(err); - return (err); -} - -# ifdef undef -static int cfb64_test(unsigned char *cfb_cipher) -{ - IDEA_KEY_SCHEDULE eks, dks; - int err = 0, i, n; - - idea_set_encrypt_key(cfb_key, &eks); - idea_set_decrypt_key(&eks, &dks); - memcpy(cfb_tmp, cfb_iv, 8); - n = 0; - idea_cfb64_encrypt(plain, cfb_buf1, (long)12, &eks, - cfb_tmp, &n, IDEA_ENCRYPT); - idea_cfb64_encrypt(&(plain[12]), &(cfb_buf1[12]), - (long)CFB_TEST_SIZE - 12, &eks, - cfb_tmp, &n, IDEA_ENCRYPT); - if (memcmp(cfb_cipher, cfb_buf1, CFB_TEST_SIZE) != 0) { - err = 1; - printf("idea_cfb64_encrypt encrypt error\n"); - for (i = 0; i < CFB_TEST_SIZE; i += 8) - printf("%s\n", pt(&(cfb_buf1[i]))); - } - memcpy(cfb_tmp, cfb_iv, 8); - n = 0; - idea_cfb64_encrypt(cfb_buf1, cfb_buf2, (long)17, &eks, - cfb_tmp, &n, IDEA_DECRYPT); - idea_cfb64_encrypt(&(cfb_buf1[17]), &(cfb_buf2[17]), - (long)CFB_TEST_SIZE - 17, &dks, - cfb_tmp, &n, IDEA_DECRYPT); - if (memcmp(plain, cfb_buf2, CFB_TEST_SIZE) != 0) { - err = 1; - printf("idea_cfb_encrypt decrypt error\n"); - for (i = 0; i < 24; i += 8) - printf("%s\n", pt(&(cfb_buf2[i]))); - } - return (err); -} - -static char *pt(unsigned char *p) -{ - static char bufs[10][20]; - static int bnum = 0; - char *ret; - int i; - static char *f = "0123456789ABCDEF"; - - ret = &(bufs[bnum++][0]); - bnum %= 10; - for (i = 0; i < 8; i++) { - ret[i * 2] = f[(p[i] >> 4) & 0xf]; - ret[i * 2 + 1] = f[p[i] & 0xf]; - } - ret[16] = '\0'; - return (ret); -} - -# endif -#endif diff --git a/drivers/builtin_openssl2/crypto/ripemd/asm/rmd-586.pl b/drivers/builtin_openssl2/crypto/ripemd/asm/rmd-586.pl deleted file mode 100644 index e8b2bc2db2..0000000000 --- a/drivers/builtin_openssl2/crypto/ripemd/asm/rmd-586.pl +++ /dev/null @@ -1,591 +0,0 @@ -#!/usr/local/bin/perl - -# Normal is the -# ripemd160_block_asm_data_order(RIPEMD160_CTX *c, ULONG *X,int blocks); - -$normal=0; - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],$0); - -$A="ecx"; -$B="esi"; -$C="edi"; -$D="ebx"; -$E="ebp"; -$tmp1="eax"; -$tmp2="edx"; - -$KL1=0x5A827999; -$KL2=0x6ED9EBA1; -$KL3=0x8F1BBCDC; -$KL4=0xA953FD4E; -$KR0=0x50A28BE6; -$KR1=0x5C4DD124; -$KR2=0x6D703EF3; -$KR3=0x7A6D76E9; - - -@wl=( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, - 7, 4,13, 1,10, 6,15, 3,12, 0, 9, 5, 2,14,11, 8, - 3,10,14, 4, 9,15, 8, 1, 2, 7, 0, 6,13,11, 5,12, - 1, 9,11,10, 0, 8,12, 4,13, 3, 7,15,14, 5, 6, 2, - 4, 0, 5, 9, 7,12, 2,10,14, 1, 3, 8,11, 6,15,13, - ); - -@wr=( 5,14, 7, 0, 9, 2,11, 4,13, 6,15, 8, 1,10, 3,12, - 6,11, 3, 7, 0,13, 5,10,14,15, 8,12, 4, 9, 1, 2, - 15, 5, 1, 3, 7,14, 6, 9,11, 8,12, 2,10, 0, 4,13, - 8, 6, 4, 1, 3,11,15, 0, 5,12, 2,13, 9, 7,10,14, - 12,15,10, 4, 1, 5, 8, 7, 6, 2,13,14, 0, 3, 9,11, - ); - -@sl=( 11,14,15,12, 5, 8, 7, 9,11,13,14,15, 6, 7, 9, 8, - 7, 6, 8,13,11, 9, 7,15, 7,12,15, 9,11, 7,13,12, - 11,13, 6, 7,14, 9,13,15,14, 8,13, 6, 5,12, 7, 5, - 11,12,14,15,14,15, 9, 8, 9,14, 5, 6, 8, 6, 5,12, - 9,15, 5,11, 6, 8,13,12, 5,12,13,14,11, 8, 5, 6, - ); - -@sr=( 8, 9, 9,11,13,15,15, 5, 7, 7, 8,11,14,14,12, 6, - 9,13,15, 7,12, 8, 9,11, 7, 7,12, 7, 6,15,13,11, - 9, 7,15,11, 8, 6, 6,14,12,13, 5,14,13,13, 7, 5, - 15, 5, 8,11,14,14, 6,14, 6, 9,12, 9,12, 5,15, 8, - 8, 5,12, 9,12, 5,14, 6, 8,13, 6, 5,15,13,11,11, - ); - -&ripemd160_block("ripemd160_block_asm_data_order"); -&asm_finish(); - -sub Xv - { - local($n)=@_; - return(&swtmp($n)); - # tmp on stack - } - -sub Np - { - local($p)=@_; - local(%n)=($A,$E,$B,$A,$C,$B,$D,$C,$E,$D); - return($n{$p}); - } - -sub RIP1 - { - local($a,$b,$c,$d,$e,$pos,$s,$o,$pos2)=@_; - - &comment($p++); - if ($p & 1) - { - #&mov($tmp1, $c) if $o == -1; - &xor($tmp1, $d) if $o == -1; - &mov($tmp2, &Xv($pos)); - &xor($tmp1, $b); - &add($a, $tmp2); - &rotl($c, 10); - &add($a, $tmp1); - &mov($tmp1, &Np($c)); # NEXT - # XXX - &rotl($a, $s); - &add($a, $e); - } - else - { - &xor($tmp1, $d); - &mov($tmp2, &Xv($pos)); - &xor($tmp1, $b); - &add($a, $tmp1); - &mov($tmp1, &Np($c)) if $o <= 0; - &mov($tmp1, -1) if $o == 1; - # XXX if $o == 2; - &rotl($c, 10); - &add($a, $tmp2); - &xor($tmp1, &Np($d)) if $o <= 0; - &mov($tmp2, &Xv($pos2)) if $o == 1; - &mov($tmp2, &wparam(0)) if $o == 2; - &rotl($a, $s); - &add($a, $e); - } - } - -sub RIP2 - { - local($a,$b,$c,$d,$e,$pos,$pos2,$s,$K,$o)=@_; - -# XXXXXX - &comment($p++); - if ($p & 1) - { -# &mov($tmp2, &Xv($pos)) if $o < -1; -# &mov($tmp1, -1) if $o < -1; - - &add($a, $tmp2); - &mov($tmp2, $c); - &sub($tmp1, $b); - &and($tmp2, $b); - &and($tmp1, $d); - &or($tmp2, $tmp1); - &mov($tmp1, &Xv($pos2)) if $o <= 0; # XXXXXXXXXXXXXX - # XXX - &rotl($c, 10); - &lea($a, &DWP($K,$a,$tmp2,1)); - &mov($tmp2, -1) if $o <= 0; - # XXX - &rotl($a, $s); - &add($a, $e); - } - else - { - # XXX - &add($a, $tmp1); - &mov($tmp1, $c); - &sub($tmp2, $b); - &and($tmp1, $b); - &and($tmp2, $d); - if ($o != 2) - { - &or($tmp1, $tmp2); - &mov($tmp2, &Xv($pos2)) if $o <= 0; - &mov($tmp2, -1) if $o == 1; - &rotl($c, 10); - &lea($a, &DWP($K,$a,$tmp1,1)); - &mov($tmp1, -1) if $o <= 0; - &sub($tmp2, &Np($c)) if $o == 1; - } else { - &or($tmp2, $tmp1); - &mov($tmp1, &Np($c)); - &rotl($c, 10); - &lea($a, &DWP($K,$a,$tmp2,1)); - &xor($tmp1, &Np($d)); - } - &rotl($a, $s); - &add($a, $e); - } - } - -sub RIP3 - { - local($a,$b,$c,$d,$e,$pos,$s,$K,$o,$pos2)=@_; - - &comment($p++); - if ($p & 1) - { -# &mov($tmp2, -1) if $o < -1; -# &sub($tmp2, $c) if $o < -1; - &mov($tmp1, &Xv($pos)); - &or($tmp2, $b); - &add($a, $tmp1); - &xor($tmp2, $d); - &mov($tmp1, -1) if $o <= 0; # NEXT - # XXX - &rotl($c, 10); - &lea($a, &DWP($K,$a,$tmp2,1)); - &sub($tmp1, &Np($c)) if $o <= 0; # NEXT - # XXX - &rotl($a, $s); - &add($a, $e); - } - else - { - &mov($tmp2, &Xv($pos)); - &or($tmp1, $b); - &add($a, $tmp2); - &xor($tmp1, $d); - &mov($tmp2, -1) if $o <= 0; # NEXT - &mov($tmp2, -1) if $o == 1; - &mov($tmp2, &Xv($pos2)) if $o == 2; - &rotl($c, 10); - &lea($a, &DWP($K,$a,$tmp1,1)); - &sub($tmp2, &Np($c)) if $o <= 0; # NEXT - &mov($tmp1, &Np($d)) if $o == 1; - &mov($tmp1, -1) if $o == 2; - &rotl($a, $s); - &add($a, $e); - } - } - -sub RIP4 - { - local($a,$b,$c,$d,$e,$pos,$s,$K,$o)=@_; - - &comment($p++); - if ($p & 1) - { -# &mov($tmp2, -1) if $o == -2; -# &mov($tmp1, $d) if $o == -2; - &sub($tmp2, $d); - &and($tmp1, $b); - &and($tmp2, $c); - &or($tmp2, $tmp1); - &mov($tmp1, &Xv($pos)); - &rotl($c, 10); - &lea($a, &DWP($K,$a,$tmp2)); - &mov($tmp2, -1) unless $o > 0; # NEXT - # XXX - &add($a, $tmp1); - &mov($tmp1, &Np($d)) unless $o > 0; # NEXT - # XXX - &rotl($a, $s); - &add($a, $e); - } - else - { - &sub($tmp2, $d); - &and($tmp1, $b); - &and($tmp2, $c); - &or($tmp2, $tmp1); - &mov($tmp1, &Xv($pos)); - &rotl($c, 10); - &lea($a, &DWP($K,$a,$tmp2)); - &mov($tmp2, -1) if $o == 0; # NEXT - &mov($tmp2, -1) if $o == 1; - &mov($tmp2, -1) if $o == 2; - # XXX - &add($a, $tmp1); - &mov($tmp1, &Np($d)) if $o == 0; # NEXT - &sub($tmp2, &Np($d)) if $o == 1; - &sub($tmp2, &Np($c)) if $o == 2; - # XXX - &rotl($a, $s); - &add($a, $e); - } - } - -sub RIP5 - { - local($a,$b,$c,$d,$e,$pos,$s,$K,$o)=@_; - - &comment($p++); - if ($p & 1) - { - &mov($tmp2, -1) if $o == -2; - &sub($tmp2, $d) if $o == -2; - &mov($tmp1, &Xv($pos)); - &or($tmp2, $c); - &add($a, $tmp1); - &xor($tmp2, $b); - &mov($tmp1, -1) if $o <= 0; - # XXX - &rotl($c, 10); - &lea($a, &DWP($K,$a,$tmp2,1)); - &sub($tmp1, &Np($d)) if $o <= 0; - # XXX - &rotl($a, $s); - &add($a, $e); - } - else - { - &mov($tmp2, &Xv($pos)); - &or($tmp1, $c); - &add($a, $tmp2); - &xor($tmp1, $b); - &mov($tmp2, -1) if $o <= 0; - &mov($tmp2, &wparam(0)) if $o == 1; # Middle code - &mov($tmp2, -1) if $o == 2; - &rotl($c, 10); - &lea($a, &DWP($K,$a,$tmp1,1)); - &sub($tmp2, &Np($d)) if $o <= 0; - &mov(&swtmp(16), $A) if $o == 1; - &mov($tmp1, &Np($d)) if $o == 2; - &rotl($a, $s); - &add($a, $e); - } - } - -sub ripemd160_block - { - local($name)=@_; - - &function_begin_B($name,"",3); - - # parameter 1 is the RIPEMD160_CTX structure. - # A 0 - # B 4 - # C 8 - # D 12 - # E 16 - - &mov($tmp2, &wparam(0)); - &mov($tmp1, &wparam(1)); - &push("esi"); - &mov($A, &DWP( 0,$tmp2,"",0)); - &push("edi"); - &mov($B, &DWP( 4,$tmp2,"",0)); - &push("ebp"); - &mov($C, &DWP( 8,$tmp2,"",0)); - &push("ebx"); - &stack_push(16+5+6); - # Special comment about the figure of 6. - # Idea is to pad the current frame so - # that the top of the stack gets fairly - # aligned. Well, as you realize it would - # always depend on how the frame below is - # aligned. The good news are that gcc-2.95 - # and later does keep first argument at - # least double-wise aligned. - # - - &set_label("start") unless $normal; - &comment(""); - - # &mov($tmp1, &wparam(1)); # Done at end of loop - # &mov($tmp2, &wparam(0)); # Done at end of loop - - for ($z=0; $z<16; $z+=2) - { - &mov($D, &DWP( $z*4,$tmp1,"",0)); - &mov($E, &DWP( ($z+1)*4,$tmp1,"",0)); - &mov(&swtmp($z), $D); - &mov(&swtmp($z+1), $E); - } - &mov($tmp1, $C); - &mov($D, &DWP(12,$tmp2,"",0)); - &mov($E, &DWP(16,$tmp2,"",0)); - - &RIP1($A,$B,$C,$D,$E,$wl[ 0],$sl[ 0],-1); - &RIP1($E,$A,$B,$C,$D,$wl[ 1],$sl[ 1],0); - &RIP1($D,$E,$A,$B,$C,$wl[ 2],$sl[ 2],0); - &RIP1($C,$D,$E,$A,$B,$wl[ 3],$sl[ 3],0); - &RIP1($B,$C,$D,$E,$A,$wl[ 4],$sl[ 4],0); - &RIP1($A,$B,$C,$D,$E,$wl[ 5],$sl[ 5],0); - &RIP1($E,$A,$B,$C,$D,$wl[ 6],$sl[ 6],0); - &RIP1($D,$E,$A,$B,$C,$wl[ 7],$sl[ 7],0); - &RIP1($C,$D,$E,$A,$B,$wl[ 8],$sl[ 8],0); - &RIP1($B,$C,$D,$E,$A,$wl[ 9],$sl[ 9],0); - &RIP1($A,$B,$C,$D,$E,$wl[10],$sl[10],0); - &RIP1($E,$A,$B,$C,$D,$wl[11],$sl[11],0); - &RIP1($D,$E,$A,$B,$C,$wl[12],$sl[12],0); - &RIP1($C,$D,$E,$A,$B,$wl[13],$sl[13],0); - &RIP1($B,$C,$D,$E,$A,$wl[14],$sl[14],0); - &RIP1($A,$B,$C,$D,$E,$wl[15],$sl[15],1,$wl[16]); - - &RIP2($E,$A,$B,$C,$D,$wl[16],$wl[17],$sl[16],$KL1,-1); - &RIP2($D,$E,$A,$B,$C,$wl[17],$wl[18],$sl[17],$KL1,0); - &RIP2($C,$D,$E,$A,$B,$wl[18],$wl[19],$sl[18],$KL1,0); - &RIP2($B,$C,$D,$E,$A,$wl[19],$wl[20],$sl[19],$KL1,0); - &RIP2($A,$B,$C,$D,$E,$wl[20],$wl[21],$sl[20],$KL1,0); - &RIP2($E,$A,$B,$C,$D,$wl[21],$wl[22],$sl[21],$KL1,0); - &RIP2($D,$E,$A,$B,$C,$wl[22],$wl[23],$sl[22],$KL1,0); - &RIP2($C,$D,$E,$A,$B,$wl[23],$wl[24],$sl[23],$KL1,0); - &RIP2($B,$C,$D,$E,$A,$wl[24],$wl[25],$sl[24],$KL1,0); - &RIP2($A,$B,$C,$D,$E,$wl[25],$wl[26],$sl[25],$KL1,0); - &RIP2($E,$A,$B,$C,$D,$wl[26],$wl[27],$sl[26],$KL1,0); - &RIP2($D,$E,$A,$B,$C,$wl[27],$wl[28],$sl[27],$KL1,0); - &RIP2($C,$D,$E,$A,$B,$wl[28],$wl[29],$sl[28],$KL1,0); - &RIP2($B,$C,$D,$E,$A,$wl[29],$wl[30],$sl[29],$KL1,0); - &RIP2($A,$B,$C,$D,$E,$wl[30],$wl[31],$sl[30],$KL1,0); - &RIP2($E,$A,$B,$C,$D,$wl[31],$wl[32],$sl[31],$KL1,1); - - &RIP3($D,$E,$A,$B,$C,$wl[32],$sl[32],$KL2,-1); - &RIP3($C,$D,$E,$A,$B,$wl[33],$sl[33],$KL2,0); - &RIP3($B,$C,$D,$E,$A,$wl[34],$sl[34],$KL2,0); - &RIP3($A,$B,$C,$D,$E,$wl[35],$sl[35],$KL2,0); - &RIP3($E,$A,$B,$C,$D,$wl[36],$sl[36],$KL2,0); - &RIP3($D,$E,$A,$B,$C,$wl[37],$sl[37],$KL2,0); - &RIP3($C,$D,$E,$A,$B,$wl[38],$sl[38],$KL2,0); - &RIP3($B,$C,$D,$E,$A,$wl[39],$sl[39],$KL2,0); - &RIP3($A,$B,$C,$D,$E,$wl[40],$sl[40],$KL2,0); - &RIP3($E,$A,$B,$C,$D,$wl[41],$sl[41],$KL2,0); - &RIP3($D,$E,$A,$B,$C,$wl[42],$sl[42],$KL2,0); - &RIP3($C,$D,$E,$A,$B,$wl[43],$sl[43],$KL2,0); - &RIP3($B,$C,$D,$E,$A,$wl[44],$sl[44],$KL2,0); - &RIP3($A,$B,$C,$D,$E,$wl[45],$sl[45],$KL2,0); - &RIP3($E,$A,$B,$C,$D,$wl[46],$sl[46],$KL2,0); - &RIP3($D,$E,$A,$B,$C,$wl[47],$sl[47],$KL2,1); - - &RIP4($C,$D,$E,$A,$B,$wl[48],$sl[48],$KL3,-1); - &RIP4($B,$C,$D,$E,$A,$wl[49],$sl[49],$KL3,0); - &RIP4($A,$B,$C,$D,$E,$wl[50],$sl[50],$KL3,0); - &RIP4($E,$A,$B,$C,$D,$wl[51],$sl[51],$KL3,0); - &RIP4($D,$E,$A,$B,$C,$wl[52],$sl[52],$KL3,0); - &RIP4($C,$D,$E,$A,$B,$wl[53],$sl[53],$KL3,0); - &RIP4($B,$C,$D,$E,$A,$wl[54],$sl[54],$KL3,0); - &RIP4($A,$B,$C,$D,$E,$wl[55],$sl[55],$KL3,0); - &RIP4($E,$A,$B,$C,$D,$wl[56],$sl[56],$KL3,0); - &RIP4($D,$E,$A,$B,$C,$wl[57],$sl[57],$KL3,0); - &RIP4($C,$D,$E,$A,$B,$wl[58],$sl[58],$KL3,0); - &RIP4($B,$C,$D,$E,$A,$wl[59],$sl[59],$KL3,0); - &RIP4($A,$B,$C,$D,$E,$wl[60],$sl[60],$KL3,0); - &RIP4($E,$A,$B,$C,$D,$wl[61],$sl[61],$KL3,0); - &RIP4($D,$E,$A,$B,$C,$wl[62],$sl[62],$KL3,0); - &RIP4($C,$D,$E,$A,$B,$wl[63],$sl[63],$KL3,1); - - &RIP5($B,$C,$D,$E,$A,$wl[64],$sl[64],$KL4,-1); - &RIP5($A,$B,$C,$D,$E,$wl[65],$sl[65],$KL4,0); - &RIP5($E,$A,$B,$C,$D,$wl[66],$sl[66],$KL4,0); - &RIP5($D,$E,$A,$B,$C,$wl[67],$sl[67],$KL4,0); - &RIP5($C,$D,$E,$A,$B,$wl[68],$sl[68],$KL4,0); - &RIP5($B,$C,$D,$E,$A,$wl[69],$sl[69],$KL4,0); - &RIP5($A,$B,$C,$D,$E,$wl[70],$sl[70],$KL4,0); - &RIP5($E,$A,$B,$C,$D,$wl[71],$sl[71],$KL4,0); - &RIP5($D,$E,$A,$B,$C,$wl[72],$sl[72],$KL4,0); - &RIP5($C,$D,$E,$A,$B,$wl[73],$sl[73],$KL4,0); - &RIP5($B,$C,$D,$E,$A,$wl[74],$sl[74],$KL4,0); - &RIP5($A,$B,$C,$D,$E,$wl[75],$sl[75],$KL4,0); - &RIP5($E,$A,$B,$C,$D,$wl[76],$sl[76],$KL4,0); - &RIP5($D,$E,$A,$B,$C,$wl[77],$sl[77],$KL4,0); - &RIP5($C,$D,$E,$A,$B,$wl[78],$sl[78],$KL4,0); - &RIP5($B,$C,$D,$E,$A,$wl[79],$sl[79],$KL4,1); - - # &mov($tmp2, &wparam(0)); # moved into last RIP5 - # &mov(&swtmp(16), $A); - &mov($A, &DWP( 0,$tmp2,"",0)); - &mov(&swtmp(16+1), $B); - &mov(&swtmp(16+2), $C); - &mov($B, &DWP( 4,$tmp2,"",0)); - &mov(&swtmp(16+3), $D); - &mov($C, &DWP( 8,$tmp2,"",0)); - &mov(&swtmp(16+4), $E); - &mov($D, &DWP(12,$tmp2,"",0)); - &mov($E, &DWP(16,$tmp2,"",0)); - - &RIP5($A,$B,$C,$D,$E,$wr[ 0],$sr[ 0],$KR0,-2); - &RIP5($E,$A,$B,$C,$D,$wr[ 1],$sr[ 1],$KR0,0); - &RIP5($D,$E,$A,$B,$C,$wr[ 2],$sr[ 2],$KR0,0); - &RIP5($C,$D,$E,$A,$B,$wr[ 3],$sr[ 3],$KR0,0); - &RIP5($B,$C,$D,$E,$A,$wr[ 4],$sr[ 4],$KR0,0); - &RIP5($A,$B,$C,$D,$E,$wr[ 5],$sr[ 5],$KR0,0); - &RIP5($E,$A,$B,$C,$D,$wr[ 6],$sr[ 6],$KR0,0); - &RIP5($D,$E,$A,$B,$C,$wr[ 7],$sr[ 7],$KR0,0); - &RIP5($C,$D,$E,$A,$B,$wr[ 8],$sr[ 8],$KR0,0); - &RIP5($B,$C,$D,$E,$A,$wr[ 9],$sr[ 9],$KR0,0); - &RIP5($A,$B,$C,$D,$E,$wr[10],$sr[10],$KR0,0); - &RIP5($E,$A,$B,$C,$D,$wr[11],$sr[11],$KR0,0); - &RIP5($D,$E,$A,$B,$C,$wr[12],$sr[12],$KR0,0); - &RIP5($C,$D,$E,$A,$B,$wr[13],$sr[13],$KR0,0); - &RIP5($B,$C,$D,$E,$A,$wr[14],$sr[14],$KR0,0); - &RIP5($A,$B,$C,$D,$E,$wr[15],$sr[15],$KR0,2); - - &RIP4($E,$A,$B,$C,$D,$wr[16],$sr[16],$KR1,-2); - &RIP4($D,$E,$A,$B,$C,$wr[17],$sr[17],$KR1,0); - &RIP4($C,$D,$E,$A,$B,$wr[18],$sr[18],$KR1,0); - &RIP4($B,$C,$D,$E,$A,$wr[19],$sr[19],$KR1,0); - &RIP4($A,$B,$C,$D,$E,$wr[20],$sr[20],$KR1,0); - &RIP4($E,$A,$B,$C,$D,$wr[21],$sr[21],$KR1,0); - &RIP4($D,$E,$A,$B,$C,$wr[22],$sr[22],$KR1,0); - &RIP4($C,$D,$E,$A,$B,$wr[23],$sr[23],$KR1,0); - &RIP4($B,$C,$D,$E,$A,$wr[24],$sr[24],$KR1,0); - &RIP4($A,$B,$C,$D,$E,$wr[25],$sr[25],$KR1,0); - &RIP4($E,$A,$B,$C,$D,$wr[26],$sr[26],$KR1,0); - &RIP4($D,$E,$A,$B,$C,$wr[27],$sr[27],$KR1,0); - &RIP4($C,$D,$E,$A,$B,$wr[28],$sr[28],$KR1,0); - &RIP4($B,$C,$D,$E,$A,$wr[29],$sr[29],$KR1,0); - &RIP4($A,$B,$C,$D,$E,$wr[30],$sr[30],$KR1,0); - &RIP4($E,$A,$B,$C,$D,$wr[31],$sr[31],$KR1,2); - - &RIP3($D,$E,$A,$B,$C,$wr[32],$sr[32],$KR2,-2); - &RIP3($C,$D,$E,$A,$B,$wr[33],$sr[33],$KR2,0); - &RIP3($B,$C,$D,$E,$A,$wr[34],$sr[34],$KR2,0); - &RIP3($A,$B,$C,$D,$E,$wr[35],$sr[35],$KR2,0); - &RIP3($E,$A,$B,$C,$D,$wr[36],$sr[36],$KR2,0); - &RIP3($D,$E,$A,$B,$C,$wr[37],$sr[37],$KR2,0); - &RIP3($C,$D,$E,$A,$B,$wr[38],$sr[38],$KR2,0); - &RIP3($B,$C,$D,$E,$A,$wr[39],$sr[39],$KR2,0); - &RIP3($A,$B,$C,$D,$E,$wr[40],$sr[40],$KR2,0); - &RIP3($E,$A,$B,$C,$D,$wr[41],$sr[41],$KR2,0); - &RIP3($D,$E,$A,$B,$C,$wr[42],$sr[42],$KR2,0); - &RIP3($C,$D,$E,$A,$B,$wr[43],$sr[43],$KR2,0); - &RIP3($B,$C,$D,$E,$A,$wr[44],$sr[44],$KR2,0); - &RIP3($A,$B,$C,$D,$E,$wr[45],$sr[45],$KR2,0); - &RIP3($E,$A,$B,$C,$D,$wr[46],$sr[46],$KR2,0); - &RIP3($D,$E,$A,$B,$C,$wr[47],$sr[47],$KR2,2,$wr[48]); - - &RIP2($C,$D,$E,$A,$B,$wr[48],$wr[49],$sr[48],$KR3,-2); - &RIP2($B,$C,$D,$E,$A,$wr[49],$wr[50],$sr[49],$KR3,0); - &RIP2($A,$B,$C,$D,$E,$wr[50],$wr[51],$sr[50],$KR3,0); - &RIP2($E,$A,$B,$C,$D,$wr[51],$wr[52],$sr[51],$KR3,0); - &RIP2($D,$E,$A,$B,$C,$wr[52],$wr[53],$sr[52],$KR3,0); - &RIP2($C,$D,$E,$A,$B,$wr[53],$wr[54],$sr[53],$KR3,0); - &RIP2($B,$C,$D,$E,$A,$wr[54],$wr[55],$sr[54],$KR3,0); - &RIP2($A,$B,$C,$D,$E,$wr[55],$wr[56],$sr[55],$KR3,0); - &RIP2($E,$A,$B,$C,$D,$wr[56],$wr[57],$sr[56],$KR3,0); - &RIP2($D,$E,$A,$B,$C,$wr[57],$wr[58],$sr[57],$KR3,0); - &RIP2($C,$D,$E,$A,$B,$wr[58],$wr[59],$sr[58],$KR3,0); - &RIP2($B,$C,$D,$E,$A,$wr[59],$wr[60],$sr[59],$KR3,0); - &RIP2($A,$B,$C,$D,$E,$wr[60],$wr[61],$sr[60],$KR3,0); - &RIP2($E,$A,$B,$C,$D,$wr[61],$wr[62],$sr[61],$KR3,0); - &RIP2($D,$E,$A,$B,$C,$wr[62],$wr[63],$sr[62],$KR3,0); - &RIP2($C,$D,$E,$A,$B,$wr[63],$wr[64],$sr[63],$KR3,2); - - &RIP1($B,$C,$D,$E,$A,$wr[64],$sr[64],-2); - &RIP1($A,$B,$C,$D,$E,$wr[65],$sr[65],0); - &RIP1($E,$A,$B,$C,$D,$wr[66],$sr[66],0); - &RIP1($D,$E,$A,$B,$C,$wr[67],$sr[67],0); - &RIP1($C,$D,$E,$A,$B,$wr[68],$sr[68],0); - &RIP1($B,$C,$D,$E,$A,$wr[69],$sr[69],0); - &RIP1($A,$B,$C,$D,$E,$wr[70],$sr[70],0); - &RIP1($E,$A,$B,$C,$D,$wr[71],$sr[71],0); - &RIP1($D,$E,$A,$B,$C,$wr[72],$sr[72],0); - &RIP1($C,$D,$E,$A,$B,$wr[73],$sr[73],0); - &RIP1($B,$C,$D,$E,$A,$wr[74],$sr[74],0); - &RIP1($A,$B,$C,$D,$E,$wr[75],$sr[75],0); - &RIP1($E,$A,$B,$C,$D,$wr[76],$sr[76],0); - &RIP1($D,$E,$A,$B,$C,$wr[77],$sr[77],0); - &RIP1($C,$D,$E,$A,$B,$wr[78],$sr[78],0); - &RIP1($B,$C,$D,$E,$A,$wr[79],$sr[79],2); - - # &mov($tmp2, &wparam(0)); # Moved into last round - - &mov($tmp1, &DWP( 4,$tmp2,"",0)); # ctx->B - &add($D, $tmp1); - &mov($tmp1, &swtmp(16+2)); # $c - &add($D, $tmp1); - - &mov($tmp1, &DWP( 8,$tmp2,"",0)); # ctx->C - &add($E, $tmp1); - &mov($tmp1, &swtmp(16+3)); # $d - &add($E, $tmp1); - - &mov($tmp1, &DWP(12,$tmp2,"",0)); # ctx->D - &add($A, $tmp1); - &mov($tmp1, &swtmp(16+4)); # $e - &add($A, $tmp1); - - - &mov($tmp1, &DWP(16,$tmp2,"",0)); # ctx->E - &add($B, $tmp1); - &mov($tmp1, &swtmp(16+0)); # $a - &add($B, $tmp1); - - &mov($tmp1, &DWP( 0,$tmp2,"",0)); # ctx->A - &add($C, $tmp1); - &mov($tmp1, &swtmp(16+1)); # $b - &add($C, $tmp1); - - &mov($tmp1, &wparam(2)); - - &mov(&DWP( 0,$tmp2,"",0), $D); - &mov(&DWP( 4,$tmp2,"",0), $E); - &mov(&DWP( 8,$tmp2,"",0), $A); - &sub($tmp1,1); - &mov(&DWP(12,$tmp2,"",0), $B); - &mov(&DWP(16,$tmp2,"",0), $C); - - &jle(&label("get_out")); - - &mov(&wparam(2),$tmp1); - &mov($C, $A); - &mov($tmp1, &wparam(1)); - &mov($A, $D); - &add($tmp1, 64); - &mov($B, $E); - &mov(&wparam(1),$tmp1); - - &jmp(&label("start")); - - &set_label("get_out"); - - &stack_pop(16+5+6); - - &pop("ebx"); - &pop("ebp"); - &pop("edi"); - &pop("esi"); - &ret(); - &function_end_B($name); - } - diff --git a/drivers/builtin_openssl2/crypto/ripemd/rmdtest.c b/drivers/builtin_openssl2/crypto/ripemd/rmdtest.c deleted file mode 100644 index 95f6f46ab1..0000000000 --- a/drivers/builtin_openssl2/crypto/ripemd/rmdtest.c +++ /dev/null @@ -1,143 +0,0 @@ -/* crypto/ripemd/rmdtest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -#include "../e_os.h" - -#ifdef OPENSSL_NO_RIPEMD -int main(int argc, char *argv[]) -{ - printf("No ripemd support\n"); - return (0); -} -#else -# include -# include - -# ifdef CHARSET_EBCDIC -# include -# endif - -static char *test[] = { - "", - "a", - "abc", - "message digest", - "abcdefghijklmnopqrstuvwxyz", - "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", - "12345678901234567890123456789012345678901234567890123456789012345678901234567890", - NULL, -}; - -static char *ret[] = { - "9c1185a5c5e9fc54612808977ee8f548b2258d31", - "0bdc9d2d256b3ee9daae347be6f4dc835a467ffe", - "8eb208f7e05d987a9b044a8e98c6b087f15a0bfc", - "5d0689ef49d2fae572b881b123a85ffa21595f36", - "f71c27109c692c1b56bbdceb5b9d2865b3708dbc", - "12a053384a9c0c88e405a06c27dcf49ada62eb2b", - "b0e20b6e3116640286ed3a87a5713079b21f5189", - "9b752e45573d4b39f4dbd3323cab82bf63326bfb", -}; - -static char *pt(unsigned char *md); -int main(int argc, char *argv[]) -{ - int i, err = 0; - char **P, **R; - char *p; - unsigned char md[RIPEMD160_DIGEST_LENGTH]; - - P = test; - R = ret; - i = 1; - while (*P != NULL) { -# ifdef CHARSET_EBCDIC - ebcdic2ascii((char *)*P, (char *)*P, strlen((char *)*P)); -# endif - EVP_Digest(&(P[0][0]), strlen((char *)*P), md, NULL, EVP_ripemd160(), - NULL); - p = pt(md); - if (strcmp(p, (char *)*R) != 0) { - printf("error calculating RIPEMD160 on '%s'\n", *P); - printf("got %s instead of %s\n", p, *R); - err++; - } else - printf("test %d ok\n", i); - i++; - R++; - P++; - } - EXIT(err); - return (0); -} - -static char *pt(unsigned char *md) -{ - int i; - static char buf[80]; - - for (i = 0; i < RIPEMD160_DIGEST_LENGTH; i++) - sprintf(&(buf[i * 2]), "%02x", md[i]); - return (buf); -} -#endif diff --git a/drivers/builtin_openssl2/crypto/rsa/rsa_ameth.c b/drivers/builtin_openssl2/crypto/rsa/rsa_ameth.c index c7f1148a1d..4e0621827c 100644 --- a/drivers/builtin_openssl2/crypto/rsa/rsa_ameth.c +++ b/drivers/builtin_openssl2/crypto/rsa/rsa_ameth.c @@ -68,6 +68,11 @@ #endif #include "asn1_locl.h" +static int rsa_cms_sign(CMS_SignerInfo *si); +static int rsa_cms_verify(CMS_SignerInfo *si); +static int rsa_cms_decrypt(CMS_RecipientInfo *ri); +static int rsa_cms_encrypt(CMS_RecipientInfo *ri); + static int rsa_pub_encode(X509_PUBKEY *pk, const EVP_PKEY *pkey) { unsigned char *penc = NULL; @@ -258,6 +263,23 @@ static int rsa_priv_print(BIO *bp, const EVP_PKEY *pkey, int indent, return do_rsa_print(bp, pkey->pkey.rsa, indent, 1); } +/* Given an MGF1 Algorithm ID decode to an Algorithm Identifier */ +static X509_ALGOR *rsa_mgf1_decode(X509_ALGOR *alg) +{ + const unsigned char *p; + int plen; + if (alg == NULL || alg->parameter == NULL) + return NULL; + if (OBJ_obj2nid(alg->algorithm) != NID_mgf1) + return NULL; + if (alg->parameter->type != V_ASN1_SEQUENCE) + return NULL; + + p = alg->parameter->value.sequence->data; + plen = alg->parameter->value.sequence->length; + return d2i_X509_ALGOR(NULL, &p, plen); +} + static RSA_PSS_PARAMS *rsa_pss_decode(const X509_ALGOR *alg, X509_ALGOR **pmaskHash) { @@ -276,15 +298,7 @@ static RSA_PSS_PARAMS *rsa_pss_decode(const X509_ALGOR *alg, if (!pss) return NULL; - if (pss->maskGenAlgorithm) { - ASN1_TYPE *param = pss->maskGenAlgorithm->parameter; - if (OBJ_obj2nid(pss->maskGenAlgorithm->algorithm) == NID_mgf1 - && param && param->type == V_ASN1_SEQUENCE) { - p = param->value.sequence->data; - plen = param->value.sequence->length; - *pmaskHash = d2i_X509_ALGOR(NULL, &p, plen); - } - } + *pmaskHash = rsa_mgf1_decode(pss->maskGenAlgorithm); return pss; } @@ -401,17 +415,25 @@ static int rsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) #ifndef OPENSSL_NO_CMS case ASN1_PKEY_CTRL_CMS_SIGN: if (arg1 == 0) - CMS_SignerInfo_get0_algs(arg2, NULL, NULL, NULL, &alg); + return rsa_cms_sign(arg2); + else if (arg1 == 1) + return rsa_cms_verify(arg2); break; case ASN1_PKEY_CTRL_CMS_ENVELOPE: if (arg1 == 0) - CMS_RecipientInfo_ktri_get0_algs(arg2, NULL, NULL, &alg); + return rsa_cms_encrypt(arg2); + else if (arg1 == 1) + return rsa_cms_decrypt(arg2); break; + + case ASN1_PKEY_CTRL_CMS_RI_TYPE: + *(int *)arg2 = CMS_RECIPINFO_TRANS; + return 1; #endif case ASN1_PKEY_CTRL_DEFAULT_MD_NID: - *(int *)arg2 = NID_sha1; + *(int *)arg2 = NID_sha256; return 1; default: @@ -426,59 +448,166 @@ static int rsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) } +/* allocate and set algorithm ID from EVP_MD, default SHA1 */ +static int rsa_md_to_algor(X509_ALGOR **palg, const EVP_MD *md) +{ + if (EVP_MD_type(md) == NID_sha1) + return 1; + *palg = X509_ALGOR_new(); + if (!*palg) + return 0; + X509_ALGOR_set_md(*palg, md); + return 1; +} + +/* Allocate and set MGF1 algorithm ID from EVP_MD */ +static int rsa_md_to_mgf1(X509_ALGOR **palg, const EVP_MD *mgf1md) +{ + X509_ALGOR *algtmp = NULL; + ASN1_STRING *stmp = NULL; + *palg = NULL; + if (EVP_MD_type(mgf1md) == NID_sha1) + return 1; + /* need to embed algorithm ID inside another */ + if (!rsa_md_to_algor(&algtmp, mgf1md)) + goto err; + if (!ASN1_item_pack(algtmp, ASN1_ITEM_rptr(X509_ALGOR), &stmp)) + goto err; + *palg = X509_ALGOR_new(); + if (!*palg) + goto err; + X509_ALGOR_set0(*palg, OBJ_nid2obj(NID_mgf1), V_ASN1_SEQUENCE, stmp); + stmp = NULL; + err: + if (stmp) + ASN1_STRING_free(stmp); + if (algtmp) + X509_ALGOR_free(algtmp); + if (*palg) + return 1; + return 0; +} + +/* convert algorithm ID to EVP_MD, default SHA1 */ +static const EVP_MD *rsa_algor_to_md(X509_ALGOR *alg) +{ + const EVP_MD *md; + if (!alg) + return EVP_sha1(); + md = EVP_get_digestbyobj(alg->algorithm); + if (md == NULL) + RSAerr(RSA_F_RSA_ALGOR_TO_MD, RSA_R_UNKNOWN_DIGEST); + return md; +} + +/* convert MGF1 algorithm ID to EVP_MD, default SHA1 */ +static const EVP_MD *rsa_mgf1_to_md(X509_ALGOR *alg, X509_ALGOR *maskHash) +{ + const EVP_MD *md; + if (!alg) + return EVP_sha1(); + /* Check mask and lookup mask hash algorithm */ + if (OBJ_obj2nid(alg->algorithm) != NID_mgf1) { + RSAerr(RSA_F_RSA_MGF1_TO_MD, RSA_R_UNSUPPORTED_MASK_ALGORITHM); + return NULL; + } + if (!maskHash) { + RSAerr(RSA_F_RSA_MGF1_TO_MD, RSA_R_UNSUPPORTED_MASK_PARAMETER); + return NULL; + } + md = EVP_get_digestbyobj(maskHash->algorithm); + if (md == NULL) { + RSAerr(RSA_F_RSA_MGF1_TO_MD, RSA_R_UNKNOWN_MASK_DIGEST); + return NULL; + } + return md; +} + /* - * Customised RSA item verification routine. This is called when a signature - * is encountered requiring special handling. We currently only handle PSS. + * Convert EVP_PKEY_CTX is PSS mode into corresponding algorithm parameter, + * suitable for setting an AlgorithmIdentifier. */ -static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, - X509_ALGOR *sigalg, ASN1_BIT_STRING *sig, - EVP_PKEY *pkey) +static ASN1_STRING *rsa_ctx_to_pss(EVP_PKEY_CTX *pkctx) +{ + const EVP_MD *sigmd, *mgf1md; + RSA_PSS_PARAMS *pss = NULL; + ASN1_STRING *os = NULL; + EVP_PKEY *pk = EVP_PKEY_CTX_get0_pkey(pkctx); + int saltlen, rv = 0; + if (EVP_PKEY_CTX_get_signature_md(pkctx, &sigmd) <= 0) + goto err; + if (EVP_PKEY_CTX_get_rsa_mgf1_md(pkctx, &mgf1md) <= 0) + goto err; + if (!EVP_PKEY_CTX_get_rsa_pss_saltlen(pkctx, &saltlen)) + goto err; + if (saltlen == -1) + saltlen = EVP_MD_size(sigmd); + else if (saltlen == -2) { + saltlen = EVP_PKEY_size(pk) - EVP_MD_size(sigmd) - 2; + if (((EVP_PKEY_bits(pk) - 1) & 0x7) == 0) + saltlen--; + } + pss = RSA_PSS_PARAMS_new(); + if (!pss) + goto err; + if (saltlen != 20) { + pss->saltLength = ASN1_INTEGER_new(); + if (!pss->saltLength) + goto err; + if (!ASN1_INTEGER_set(pss->saltLength, saltlen)) + goto err; + } + if (!rsa_md_to_algor(&pss->hashAlgorithm, sigmd)) + goto err; + if (!rsa_md_to_mgf1(&pss->maskGenAlgorithm, mgf1md)) + goto err; + /* Finally create string with pss parameter encoding. */ + if (!ASN1_item_pack(pss, ASN1_ITEM_rptr(RSA_PSS_PARAMS), &os)) + goto err; + rv = 1; + err: + if (pss) + RSA_PSS_PARAMS_free(pss); + if (rv) + return os; + if (os) + ASN1_STRING_free(os); + return NULL; +} + +/* + * From PSS AlgorithmIdentifier set public key parameters. If pkey isn't NULL + * then the EVP_MD_CTX is setup and initalised. If it is NULL parameters are + * passed to pkctx instead. + */ + +static int rsa_pss_to_ctx(EVP_MD_CTX *ctx, EVP_PKEY_CTX *pkctx, + X509_ALGOR *sigalg, EVP_PKEY *pkey) { int rv = -1; int saltlen; const EVP_MD *mgf1md = NULL, *md = NULL; RSA_PSS_PARAMS *pss; X509_ALGOR *maskHash; - EVP_PKEY_CTX *pkctx; /* Sanity check: make sure it is PSS */ if (OBJ_obj2nid(sigalg->algorithm) != NID_rsassaPss) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_SIGNATURE_TYPE); + RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_UNSUPPORTED_SIGNATURE_TYPE); return -1; } /* Decode PSS parameters */ pss = rsa_pss_decode(sigalg, &maskHash); if (pss == NULL) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_PSS_PARAMETERS); + RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_INVALID_PSS_PARAMETERS); goto err; } - /* Check mask and lookup mask hash algorithm */ - if (pss->maskGenAlgorithm) { - if (OBJ_obj2nid(pss->maskGenAlgorithm->algorithm) != NID_mgf1) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_MASK_ALGORITHM); - goto err; - } - if (!maskHash) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_MASK_PARAMETER); - goto err; - } - mgf1md = EVP_get_digestbyobj(maskHash->algorithm); - if (mgf1md == NULL) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNKNOWN_MASK_DIGEST); - goto err; - } - } else - mgf1md = EVP_sha1(); - - if (pss->hashAlgorithm) { - md = EVP_get_digestbyobj(pss->hashAlgorithm->algorithm); - if (md == NULL) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNKNOWN_PSS_DIGEST); - goto err; - } - } else - md = EVP_sha1(); + mgf1md = rsa_mgf1_to_md(pss->maskGenAlgorithm, maskHash); + if (!mgf1md) + goto err; + md = rsa_algor_to_md(pss->hashAlgorithm); + if (!md) + goto err; if (pss->saltLength) { saltlen = ASN1_INTEGER_get(pss->saltLength); @@ -488,7 +617,7 @@ static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, * routines will trap other invalid values anyway. */ if (saltlen < 0) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_SALT_LENGTH); + RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_INVALID_SALT_LENGTH); goto err; } } else @@ -499,14 +628,24 @@ static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, * PKCS#1 says we should reject any other value anyway. */ if (pss->trailerField && ASN1_INTEGER_get(pss->trailerField) != 1) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_TRAILER); + RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_INVALID_TRAILER); goto err; } /* We have all parameters now set up context */ - if (!EVP_DigestVerifyInit(ctx, &pkctx, md, NULL, pkey)) - goto err; + if (pkey) { + if (!EVP_DigestVerifyInit(ctx, &pkctx, md, NULL, pkey)) + goto err; + } else { + const EVP_MD *checkmd; + if (EVP_PKEY_CTX_get_signature_md(pkctx, &checkmd) <= 0) + goto err; + if (EVP_MD_type(md) != EVP_MD_type(checkmd)) { + RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_DIGEST_DOES_NOT_MATCH); + goto err; + } + } if (EVP_PKEY_CTX_set_rsa_padding(pkctx, RSA_PKCS1_PSS_PADDING) <= 0) goto err; @@ -517,7 +656,7 @@ static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, if (EVP_PKEY_CTX_set_rsa_mgf1_md(pkctx, mgf1md) <= 0) goto err; /* Carry on */ - rv = 2; + rv = 1; err: RSA_PSS_PARAMS_free(pss); @@ -526,6 +665,71 @@ static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, return rv; } +static int rsa_cms_verify(CMS_SignerInfo *si) +{ + int nid, nid2; + X509_ALGOR *alg; + EVP_PKEY_CTX *pkctx = CMS_SignerInfo_get0_pkey_ctx(si); + CMS_SignerInfo_get0_algs(si, NULL, NULL, NULL, &alg); + nid = OBJ_obj2nid(alg->algorithm); + if (nid == NID_rsaEncryption) + return 1; + if (nid == NID_rsassaPss) + return rsa_pss_to_ctx(NULL, pkctx, alg, NULL); + /* Workaround for some implementation that use a signature OID */ + if (OBJ_find_sigid_algs(nid, NULL, &nid2)) { + if (nid2 == NID_rsaEncryption) + return 1; + } + return 0; +} + +/* + * Customised RSA item verification routine. This is called when a signature + * is encountered requiring special handling. We currently only handle PSS. + */ + +static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, + X509_ALGOR *sigalg, ASN1_BIT_STRING *sig, + EVP_PKEY *pkey) +{ + /* Sanity check: make sure it is PSS */ + if (OBJ_obj2nid(sigalg->algorithm) != NID_rsassaPss) { + RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_SIGNATURE_TYPE); + return -1; + } + if (rsa_pss_to_ctx(ctx, NULL, sigalg, pkey) > 0) { + /* Carry on */ + return 2; + } + return -1; +} + +static int rsa_cms_sign(CMS_SignerInfo *si) +{ + int pad_mode = RSA_PKCS1_PADDING; + X509_ALGOR *alg; + EVP_PKEY_CTX *pkctx = CMS_SignerInfo_get0_pkey_ctx(si); + ASN1_STRING *os = NULL; + CMS_SignerInfo_get0_algs(si, NULL, NULL, NULL, &alg); + if (pkctx) { + if (EVP_PKEY_CTX_get_rsa_padding(pkctx, &pad_mode) <= 0) + return 0; + } + if (pad_mode == RSA_PKCS1_PADDING) { + X509_ALGOR_set0(alg, OBJ_nid2obj(NID_rsaEncryption), V_ASN1_NULL, 0); + return 1; + } + /* We don't support it */ + if (pad_mode != RSA_PKCS1_PSS_PADDING) + return 0; + os = rsa_ctx_to_pss(pkctx); + if (!os) + return 0; + X509_ALGOR_set0(alg, OBJ_nid2obj(NID_rsassaPss), V_ASN1_SEQUENCE, os); + return 1; +} + static int rsa_item_sign(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, X509_ALGOR *alg1, X509_ALGOR *alg2, ASN1_BIT_STRING *sig) @@ -537,78 +741,184 @@ static int rsa_item_sign(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, if (pad_mode == RSA_PKCS1_PADDING) return 2; if (pad_mode == RSA_PKCS1_PSS_PADDING) { - const EVP_MD *sigmd, *mgf1md; - RSA_PSS_PARAMS *pss = NULL; - X509_ALGOR *mgf1alg = NULL; - ASN1_STRING *os1 = NULL, *os2 = NULL; - EVP_PKEY *pk = EVP_PKEY_CTX_get0_pkey(pkctx); - int saltlen, rv = 0; - sigmd = EVP_MD_CTX_md(ctx); - if (EVP_PKEY_CTX_get_rsa_mgf1_md(pkctx, &mgf1md) <= 0) - goto err; - if (!EVP_PKEY_CTX_get_rsa_pss_saltlen(pkctx, &saltlen)) - goto err; - if (saltlen == -1) - saltlen = EVP_MD_size(sigmd); - else if (saltlen == -2) { - saltlen = EVP_PKEY_size(pk) - EVP_MD_size(sigmd) - 2; - if (((EVP_PKEY_bits(pk) - 1) & 0x7) == 0) - saltlen--; - } - pss = RSA_PSS_PARAMS_new(); - if (!pss) - goto err; - if (saltlen != 20) { - pss->saltLength = ASN1_INTEGER_new(); - if (!pss->saltLength) - goto err; - if (!ASN1_INTEGER_set(pss->saltLength, saltlen)) - goto err; - } - if (EVP_MD_type(sigmd) != NID_sha1) { - pss->hashAlgorithm = X509_ALGOR_new(); - if (!pss->hashAlgorithm) - goto err; - X509_ALGOR_set_md(pss->hashAlgorithm, sigmd); - } - if (EVP_MD_type(mgf1md) != NID_sha1) { - ASN1_STRING *stmp = NULL; - /* need to embed algorithm ID inside another */ - mgf1alg = X509_ALGOR_new(); - X509_ALGOR_set_md(mgf1alg, mgf1md); - if (!ASN1_item_pack(mgf1alg, ASN1_ITEM_rptr(X509_ALGOR), &stmp)) - goto err; - pss->maskGenAlgorithm = X509_ALGOR_new(); - if (!pss->maskGenAlgorithm) - goto err; - X509_ALGOR_set0(pss->maskGenAlgorithm, - OBJ_nid2obj(NID_mgf1), V_ASN1_SEQUENCE, stmp); - } - /* Finally create string with pss parameter encoding. */ - if (!ASN1_item_pack(pss, ASN1_ITEM_rptr(RSA_PSS_PARAMS), &os1)) - goto err; + ASN1_STRING *os1 = NULL; + os1 = rsa_ctx_to_pss(pkctx); + if (!os1) + return 0; + /* Duplicate parameters if we have to */ if (alg2) { - os2 = ASN1_STRING_dup(os1); - if (!os2) - goto err; + ASN1_STRING *os2 = ASN1_STRING_dup(os1); + if (!os2) { + ASN1_STRING_free(os1); + return 0; + } X509_ALGOR_set0(alg2, OBJ_nid2obj(NID_rsassaPss), V_ASN1_SEQUENCE, os2); } X509_ALGOR_set0(alg1, OBJ_nid2obj(NID_rsassaPss), V_ASN1_SEQUENCE, os1); - os1 = os2 = NULL; - rv = 3; + return 3; + } + return 2; +} + +static RSA_OAEP_PARAMS *rsa_oaep_decode(const X509_ALGOR *alg, + X509_ALGOR **pmaskHash) +{ + const unsigned char *p; + int plen; + RSA_OAEP_PARAMS *pss; + + *pmaskHash = NULL; + + if (!alg->parameter || alg->parameter->type != V_ASN1_SEQUENCE) + return NULL; + p = alg->parameter->value.sequence->data; + plen = alg->parameter->value.sequence->length; + pss = d2i_RSA_OAEP_PARAMS(NULL, &p, plen); + + if (!pss) + return NULL; + + *pmaskHash = rsa_mgf1_decode(pss->maskGenFunc); + + return pss; +} + +static int rsa_cms_decrypt(CMS_RecipientInfo *ri) +{ + EVP_PKEY_CTX *pkctx; + X509_ALGOR *cmsalg; + int nid; + int rv = -1; + unsigned char *label = NULL; + int labellen = 0; + const EVP_MD *mgf1md = NULL, *md = NULL; + RSA_OAEP_PARAMS *oaep; + X509_ALGOR *maskHash; + pkctx = CMS_RecipientInfo_get0_pkey_ctx(ri); + if (!pkctx) + return 0; + if (!CMS_RecipientInfo_ktri_get0_algs(ri, NULL, NULL, &cmsalg)) + return -1; + nid = OBJ_obj2nid(cmsalg->algorithm); + if (nid == NID_rsaEncryption) + return 1; + if (nid != NID_rsaesOaep) { + RSAerr(RSA_F_RSA_CMS_DECRYPT, RSA_R_UNSUPPORTED_ENCRYPTION_TYPE); + return -1; + } + /* Decode OAEP parameters */ + oaep = rsa_oaep_decode(cmsalg, &maskHash); + + if (oaep == NULL) { + RSAerr(RSA_F_RSA_CMS_DECRYPT, RSA_R_INVALID_OAEP_PARAMETERS); + goto err; + } + + mgf1md = rsa_mgf1_to_md(oaep->maskGenFunc, maskHash); + if (!mgf1md) + goto err; + md = rsa_algor_to_md(oaep->hashFunc); + if (!md) + goto err; + + if (oaep->pSourceFunc) { + X509_ALGOR *plab = oaep->pSourceFunc; + if (OBJ_obj2nid(plab->algorithm) != NID_pSpecified) { + RSAerr(RSA_F_RSA_CMS_DECRYPT, RSA_R_UNSUPPORTED_LABEL_SOURCE); + goto err; + } + if (plab->parameter->type != V_ASN1_OCTET_STRING) { + RSAerr(RSA_F_RSA_CMS_DECRYPT, RSA_R_INVALID_LABEL); + goto err; + } + + label = plab->parameter->value.octet_string->data; + /* Stop label being freed when OAEP parameters are freed */ + plab->parameter->value.octet_string->data = NULL; + labellen = plab->parameter->value.octet_string->length; + } + + if (EVP_PKEY_CTX_set_rsa_padding(pkctx, RSA_PKCS1_OAEP_PADDING) <= 0) + goto err; + if (EVP_PKEY_CTX_set_rsa_oaep_md(pkctx, md) <= 0) + goto err; + if (EVP_PKEY_CTX_set_rsa_mgf1_md(pkctx, mgf1md) <= 0) + goto err; + if (EVP_PKEY_CTX_set0_rsa_oaep_label(pkctx, label, labellen) <= 0) + goto err; + /* Carry on */ + rv = 1; + err: - if (mgf1alg) - X509_ALGOR_free(mgf1alg); - if (pss) - RSA_PSS_PARAMS_free(pss); - if (os1) - ASN1_STRING_free(os1); - return rv; + RSA_OAEP_PARAMS_free(oaep); + if (maskHash) + X509_ALGOR_free(maskHash); + return rv; +} +static int rsa_cms_encrypt(CMS_RecipientInfo *ri) +{ + const EVP_MD *md, *mgf1md; + RSA_OAEP_PARAMS *oaep = NULL; + ASN1_STRING *os = NULL; + X509_ALGOR *alg; + EVP_PKEY_CTX *pkctx = CMS_RecipientInfo_get0_pkey_ctx(ri); + int pad_mode = RSA_PKCS1_PADDING, rv = 0, labellen; + unsigned char *label; + CMS_RecipientInfo_ktri_get0_algs(ri, NULL, NULL, &alg); + if (pkctx) { + if (EVP_PKEY_CTX_get_rsa_padding(pkctx, &pad_mode) <= 0) + return 0; } - return 2; + if (pad_mode == RSA_PKCS1_PADDING) { + X509_ALGOR_set0(alg, OBJ_nid2obj(NID_rsaEncryption), V_ASN1_NULL, 0); + return 1; + } + /* Not supported */ + if (pad_mode != RSA_PKCS1_OAEP_PADDING) + return 0; + if (EVP_PKEY_CTX_get_rsa_oaep_md(pkctx, &md) <= 0) + goto err; + if (EVP_PKEY_CTX_get_rsa_mgf1_md(pkctx, &mgf1md) <= 0) + goto err; + labellen = EVP_PKEY_CTX_get0_rsa_oaep_label(pkctx, &label); + if (labellen < 0) + goto err; + oaep = RSA_OAEP_PARAMS_new(); + if (!oaep) + goto err; + if (!rsa_md_to_algor(&oaep->hashFunc, md)) + goto err; + if (!rsa_md_to_mgf1(&oaep->maskGenFunc, mgf1md)) + goto err; + if (labellen > 0) { + ASN1_OCTET_STRING *los = ASN1_OCTET_STRING_new(); + oaep->pSourceFunc = X509_ALGOR_new(); + if (!oaep->pSourceFunc) + goto err; + if (!los) + goto err; + if (!ASN1_OCTET_STRING_set(los, label, labellen)) { + ASN1_OCTET_STRING_free(los); + goto err; + } + X509_ALGOR_set0(oaep->pSourceFunc, OBJ_nid2obj(NID_pSpecified), + V_ASN1_OCTET_STRING, los); + } + /* create string with pss parameter encoding. */ + if (!ASN1_item_pack(oaep, ASN1_ITEM_rptr(RSA_OAEP_PARAMS), &os)) + goto err; + X509_ALGOR_set0(alg, OBJ_nid2obj(NID_rsaesOaep), V_ASN1_SEQUENCE, os); + os = NULL; + rv = 1; + err: + if (oaep) + RSA_OAEP_PARAMS_free(oaep); + if (os) + ASN1_STRING_free(os); + return rv; } const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[] = { diff --git a/drivers/builtin_openssl2/crypto/rsa/rsa_asn1.c b/drivers/builtin_openssl2/crypto/rsa/rsa_asn1.c index 3d82c1d0c7..aff8b583fa 100644 --- a/drivers/builtin_openssl2/crypto/rsa/rsa_asn1.c +++ b/drivers/builtin_openssl2/crypto/rsa/rsa_asn1.c @@ -108,6 +108,14 @@ ASN1_SEQUENCE(RSA_PSS_PARAMS) = { IMPLEMENT_ASN1_FUNCTIONS(RSA_PSS_PARAMS) +ASN1_SEQUENCE(RSA_OAEP_PARAMS) = { + ASN1_EXP_OPT(RSA_OAEP_PARAMS, hashFunc, X509_ALGOR, 0), + ASN1_EXP_OPT(RSA_OAEP_PARAMS, maskGenFunc, X509_ALGOR, 1), + ASN1_EXP_OPT(RSA_OAEP_PARAMS, pSourceFunc, X509_ALGOR, 2), +} ASN1_SEQUENCE_END(RSA_OAEP_PARAMS) + +IMPLEMENT_ASN1_FUNCTIONS(RSA_OAEP_PARAMS) + IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(RSA, RSAPrivateKey, RSAPrivateKey) IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(RSA, RSAPublicKey, RSAPublicKey) diff --git a/drivers/builtin_openssl2/crypto/rsa/rsa_err.c b/drivers/builtin_openssl2/crypto/rsa/rsa_err.c index 25b3fa743d..0bab05efcf 100644 --- a/drivers/builtin_openssl2/crypto/rsa/rsa_err.c +++ b/drivers/builtin_openssl2/crypto/rsa/rsa_err.c @@ -1,6 +1,6 @@ /* crypto/rsa/rsa_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2014 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -80,8 +80,10 @@ static ERR_STRING_DATA RSA_str_functs[] = { {ERR_FUNC(RSA_F_PKEY_RSA_SIGN), "PKEY_RSA_SIGN"}, {ERR_FUNC(RSA_F_PKEY_RSA_VERIFY), "PKEY_RSA_VERIFY"}, {ERR_FUNC(RSA_F_PKEY_RSA_VERIFYRECOVER), "PKEY_RSA_VERIFYRECOVER"}, + {ERR_FUNC(RSA_F_RSA_ALGOR_TO_MD), "RSA_ALGOR_TO_MD"}, {ERR_FUNC(RSA_F_RSA_BUILTIN_KEYGEN), "RSA_BUILTIN_KEYGEN"}, {ERR_FUNC(RSA_F_RSA_CHECK_KEY), "RSA_check_key"}, + {ERR_FUNC(RSA_F_RSA_CMS_DECRYPT), "RSA_CMS_DECRYPT"}, {ERR_FUNC(RSA_F_RSA_EAY_PRIVATE_DECRYPT), "RSA_EAY_PRIVATE_DECRYPT"}, {ERR_FUNC(RSA_F_RSA_EAY_PRIVATE_ENCRYPT), "RSA_EAY_PRIVATE_ENCRYPT"}, {ERR_FUNC(RSA_F_RSA_EAY_PUBLIC_DECRYPT), "RSA_EAY_PUBLIC_DECRYPT"}, @@ -90,6 +92,7 @@ static ERR_STRING_DATA RSA_str_functs[] = { {ERR_FUNC(RSA_F_RSA_GENERATE_KEY_EX), "RSA_generate_key_ex"}, {ERR_FUNC(RSA_F_RSA_ITEM_VERIFY), "RSA_ITEM_VERIFY"}, {ERR_FUNC(RSA_F_RSA_MEMORY_LOCK), "RSA_memory_lock"}, + {ERR_FUNC(RSA_F_RSA_MGF1_TO_MD), "RSA_MGF1_TO_MD"}, {ERR_FUNC(RSA_F_RSA_NEW_METHOD), "RSA_new_method"}, {ERR_FUNC(RSA_F_RSA_NULL), "RSA_NULL"}, {ERR_FUNC(RSA_F_RSA_NULL_MOD_EXP), "RSA_NULL_MOD_EXP"}, @@ -100,6 +103,8 @@ static ERR_STRING_DATA RSA_str_functs[] = { {ERR_FUNC(RSA_F_RSA_PADDING_ADD_NONE), "RSA_padding_add_none"}, {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP), "RSA_padding_add_PKCS1_OAEP"}, + {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1), + "RSA_padding_add_PKCS1_OAEP_mgf1"}, {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_PSS), "RSA_padding_add_PKCS1_PSS"}, {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1), "RSA_padding_add_PKCS1_PSS_mgf1"}, @@ -112,6 +117,8 @@ static ERR_STRING_DATA RSA_str_functs[] = { {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_NONE), "RSA_padding_check_none"}, {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP), "RSA_padding_check_PKCS1_OAEP"}, + {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1), + "RSA_padding_check_PKCS1_OAEP_mgf1"}, {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_1), "RSA_padding_check_PKCS1_type_1"}, {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2), @@ -124,6 +131,7 @@ static ERR_STRING_DATA RSA_str_functs[] = { {ERR_FUNC(RSA_F_RSA_PRIVATE_ENCRYPT), "RSA_private_encrypt"}, {ERR_FUNC(RSA_F_RSA_PRIV_DECODE), "RSA_PRIV_DECODE"}, {ERR_FUNC(RSA_F_RSA_PRIV_ENCODE), "RSA_PRIV_ENCODE"}, + {ERR_FUNC(RSA_F_RSA_PSS_TO_CTX), "RSA_PSS_TO_CTX"}, {ERR_FUNC(RSA_F_RSA_PUBLIC_DECRYPT), "RSA_public_decrypt"}, {ERR_FUNC(RSA_F_RSA_PUBLIC_ENCRYPT), "RSA_public_encrypt"}, {ERR_FUNC(RSA_F_RSA_PUB_DECODE), "RSA_PUB_DECODE"}, @@ -157,6 +165,7 @@ static ERR_STRING_DATA RSA_str_reasons[] = { {ERR_REASON(RSA_R_DATA_TOO_SMALL), "data too small"}, {ERR_REASON(RSA_R_DATA_TOO_SMALL_FOR_KEY_SIZE), "data too small for key size"}, + {ERR_REASON(RSA_R_DIGEST_DOES_NOT_MATCH), "digest does not match"}, {ERR_REASON(RSA_R_DIGEST_TOO_BIG_FOR_RSA_KEY), "digest too big for rsa key"}, {ERR_REASON(RSA_R_DMP1_NOT_CONGRUENT_TO_D), "dmp1 not congruent to d"}, @@ -165,11 +174,14 @@ static ERR_STRING_DATA RSA_str_reasons[] = { {ERR_REASON(RSA_R_FIRST_OCTET_INVALID), "first octet invalid"}, {ERR_REASON(RSA_R_ILLEGAL_OR_UNSUPPORTED_PADDING_MODE), "illegal or unsupported padding mode"}, + {ERR_REASON(RSA_R_INVALID_DIGEST), "invalid digest"}, {ERR_REASON(RSA_R_INVALID_DIGEST_LENGTH), "invalid digest length"}, {ERR_REASON(RSA_R_INVALID_HEADER), "invalid header"}, {ERR_REASON(RSA_R_INVALID_KEYBITS), "invalid keybits"}, + {ERR_REASON(RSA_R_INVALID_LABEL), "invalid label"}, {ERR_REASON(RSA_R_INVALID_MESSAGE_LENGTH), "invalid message length"}, {ERR_REASON(RSA_R_INVALID_MGF1_MD), "invalid mgf1 md"}, + {ERR_REASON(RSA_R_INVALID_OAEP_PARAMETERS), "invalid oaep parameters"}, {ERR_REASON(RSA_R_INVALID_PADDING), "invalid padding"}, {ERR_REASON(RSA_R_INVALID_PADDING_MODE), "invalid padding mode"}, {ERR_REASON(RSA_R_INVALID_PSS_PARAMETERS), "invalid pss parameters"}, @@ -203,9 +215,13 @@ static ERR_STRING_DATA RSA_str_reasons[] = { {ERR_REASON(RSA_R_THE_ASN1_OBJECT_IDENTIFIER_IS_NOT_KNOWN_FOR_THIS_MD), "the asn1 object identifier is not known for this md"}, {ERR_REASON(RSA_R_UNKNOWN_ALGORITHM_TYPE), "unknown algorithm type"}, + {ERR_REASON(RSA_R_UNKNOWN_DIGEST), "unknown digest"}, {ERR_REASON(RSA_R_UNKNOWN_MASK_DIGEST), "unknown mask digest"}, {ERR_REASON(RSA_R_UNKNOWN_PADDING_TYPE), "unknown padding type"}, {ERR_REASON(RSA_R_UNKNOWN_PSS_DIGEST), "unknown pss digest"}, + {ERR_REASON(RSA_R_UNSUPPORTED_ENCRYPTION_TYPE), + "unsupported encryption type"}, + {ERR_REASON(RSA_R_UNSUPPORTED_LABEL_SOURCE), "unsupported label source"}, {ERR_REASON(RSA_R_UNSUPPORTED_MASK_ALGORITHM), "unsupported mask algorithm"}, {ERR_REASON(RSA_R_UNSUPPORTED_MASK_PARAMETER), diff --git a/drivers/builtin_openssl2/crypto/rsa/rsa_oaep.c b/drivers/builtin_openssl2/crypto/rsa/rsa_oaep.c index 499835f814..9c2a943cf7 100644 --- a/drivers/builtin_openssl2/crypto/rsa/rsa_oaep.c +++ b/drivers/builtin_openssl2/crypto/rsa/rsa_oaep.c @@ -28,39 +28,53 @@ # include # include -static int MGF1(unsigned char *mask, long len, - const unsigned char *seed, long seedlen); - int RSA_padding_add_PKCS1_OAEP(unsigned char *to, int tlen, const unsigned char *from, int flen, const unsigned char *param, int plen) +{ + return RSA_padding_add_PKCS1_OAEP_mgf1(to, tlen, from, flen, + param, plen, NULL, NULL); +} + +int RSA_padding_add_PKCS1_OAEP_mgf1(unsigned char *to, int tlen, + const unsigned char *from, int flen, + const unsigned char *param, int plen, + const EVP_MD *md, const EVP_MD *mgf1md) { int i, emlen = tlen - 1; unsigned char *db, *seed; - unsigned char *dbmask, seedmask[SHA_DIGEST_LENGTH]; + unsigned char *dbmask, seedmask[EVP_MAX_MD_SIZE]; + int mdlen; + + if (md == NULL) + md = EVP_sha1(); + if (mgf1md == NULL) + mgf1md = md; - if (flen > emlen - 2 * SHA_DIGEST_LENGTH - 1) { - RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP, + mdlen = EVP_MD_size(md); + + if (flen > emlen - 2 * mdlen - 1) { + RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1, RSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE); return 0; } - if (emlen < 2 * SHA_DIGEST_LENGTH + 1) { - RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP, RSA_R_KEY_SIZE_TOO_SMALL); + if (emlen < 2 * mdlen + 1) { + RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1, + RSA_R_KEY_SIZE_TOO_SMALL); return 0; } to[0] = 0; seed = to + 1; - db = to + SHA_DIGEST_LENGTH + 1; + db = to + mdlen + 1; - if (!EVP_Digest((void *)param, plen, db, NULL, EVP_sha1(), NULL)) + if (!EVP_Digest((void *)param, plen, db, NULL, md, NULL)) return 0; - memset(db + SHA_DIGEST_LENGTH, 0, - emlen - flen - 2 * SHA_DIGEST_LENGTH - 1); - db[emlen - flen - SHA_DIGEST_LENGTH - 1] = 0x01; - memcpy(db + emlen - flen - SHA_DIGEST_LENGTH, from, (unsigned int)flen); - if (RAND_bytes(seed, SHA_DIGEST_LENGTH) <= 0) + memset(db + mdlen, 0, emlen - flen - 2 * mdlen - 1); + db[emlen - flen - mdlen - 1] = 0x01; + memcpy(db + emlen - flen - mdlen, from, (unsigned int)flen); + if (RAND_bytes(seed, mdlen) <= 0) return 0; # ifdef PKCS_TESTVECT memcpy(seed, @@ -68,20 +82,20 @@ int RSA_padding_add_PKCS1_OAEP(unsigned char *to, int tlen, 20); # endif - dbmask = OPENSSL_malloc(emlen - SHA_DIGEST_LENGTH); + dbmask = OPENSSL_malloc(emlen - mdlen); if (dbmask == NULL) { - RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP, ERR_R_MALLOC_FAILURE); + RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1, ERR_R_MALLOC_FAILURE); return 0; } - if (MGF1(dbmask, emlen - SHA_DIGEST_LENGTH, seed, SHA_DIGEST_LENGTH) < 0) + if (PKCS1_MGF1(dbmask, emlen - mdlen, seed, mdlen, mgf1md) < 0) return 0; - for (i = 0; i < emlen - SHA_DIGEST_LENGTH; i++) + for (i = 0; i < emlen - mdlen; i++) db[i] ^= dbmask[i]; - if (MGF1(seedmask, SHA_DIGEST_LENGTH, db, emlen - SHA_DIGEST_LENGTH) < 0) + if (PKCS1_MGF1(seedmask, mdlen, db, emlen - mdlen, mgf1md) < 0) return 0; - for (i = 0; i < SHA_DIGEST_LENGTH; i++) + for (i = 0; i < mdlen; i++) seed[i] ^= seedmask[i]; OPENSSL_free(dbmask); @@ -91,6 +105,16 @@ int RSA_padding_add_PKCS1_OAEP(unsigned char *to, int tlen, int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen, const unsigned char *from, int flen, int num, const unsigned char *param, int plen) +{ + return RSA_padding_check_PKCS1_OAEP_mgf1(to, tlen, from, flen, num, + param, plen, NULL, NULL); +} + +int RSA_padding_check_PKCS1_OAEP_mgf1(unsigned char *to, int tlen, + const unsigned char *from, int flen, + int num, const unsigned char *param, + int plen, const EVP_MD *md, + const EVP_MD *mgf1md) { int i, dblen, mlen = -1, one_index = 0, msg_index; unsigned int good, found_one_byte; @@ -101,26 +125,33 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen, */ unsigned char *db = NULL, *em = NULL, seed[EVP_MAX_MD_SIZE], phash[EVP_MAX_MD_SIZE]; + int mdlen; + + if (md == NULL) + md = EVP_sha1(); + if (mgf1md == NULL) + mgf1md = md; + + mdlen = EVP_MD_size(md); if (tlen <= 0 || flen <= 0) return -1; - /* * |num| is the length of the modulus; |flen| is the length of the * encoded message. Therefore, for any |from| that was obtained by * decrypting a ciphertext, we must have |flen| <= |num|. Similarly, - * num < 2 * SHA_DIGEST_LENGTH + 2 must hold for the modulus - * irrespective of the ciphertext, see PKCS #1 v2.2, section 7.1.2. + * num < 2 * mdlen + 2 must hold for the modulus irrespective of + * the ciphertext, see PKCS #1 v2.2, section 7.1.2. * This does not leak any side-channel information. */ - if (num < flen || num < 2 * SHA_DIGEST_LENGTH + 2) + if (num < flen || num < 2 * mdlen + 2) goto decoding_err; - dblen = num - SHA_DIGEST_LENGTH - 1; + dblen = num - mdlen - 1; db = OPENSSL_malloc(dblen); em = OPENSSL_malloc(num); if (db == NULL || em == NULL) { - RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP, ERR_R_MALLOC_FAILURE); + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, ERR_R_MALLOC_FAILURE); goto cleanup; } @@ -143,26 +174,25 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen, good = constant_time_is_zero(em[0]); maskedseed = em + 1; - maskeddb = em + 1 + SHA_DIGEST_LENGTH; + maskeddb = em + 1 + mdlen; - if (MGF1(seed, SHA_DIGEST_LENGTH, maskeddb, dblen)) + if (PKCS1_MGF1(seed, mdlen, maskeddb, dblen, mgf1md)) goto cleanup; - for (i = 0; i < SHA_DIGEST_LENGTH; i++) + for (i = 0; i < mdlen; i++) seed[i] ^= maskedseed[i]; - if (MGF1(db, dblen, seed, SHA_DIGEST_LENGTH)) + if (PKCS1_MGF1(db, dblen, seed, mdlen, mgf1md)) goto cleanup; for (i = 0; i < dblen; i++) db[i] ^= maskeddb[i]; - if (!EVP_Digest((void *)param, plen, phash, NULL, EVP_sha1(), NULL)) + if (!EVP_Digest((void *)param, plen, phash, NULL, md, NULL)) goto cleanup; - good &= - constant_time_is_zero(CRYPTO_memcmp(db, phash, SHA_DIGEST_LENGTH)); + good &= constant_time_is_zero(CRYPTO_memcmp(db, phash, mdlen)); found_one_byte = 0; - for (i = SHA_DIGEST_LENGTH; i < dblen; i++) { + for (i = mdlen; i < dblen; i++) { /* * Padding consists of a number of 0-bytes, followed by a 1. */ @@ -188,7 +218,7 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen, mlen = dblen - msg_index; if (tlen < mlen) { - RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP, RSA_R_DATA_TOO_LARGE); + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, RSA_R_DATA_TOO_LARGE); mlen = -1; } else { memcpy(to, db + msg_index, mlen); @@ -200,7 +230,8 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen, * To avoid chosen ciphertext attacks, the error message should not * reveal which kind of decoding error happened. */ - RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP, RSA_R_OAEP_DECODING_ERROR); + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, + RSA_R_OAEP_DECODING_ERROR); cleanup: if (db != NULL) OPENSSL_free(db); @@ -249,9 +280,4 @@ int PKCS1_MGF1(unsigned char *mask, long len, return rv; } -static int MGF1(unsigned char *mask, long len, const unsigned char *seed, - long seedlen) -{ - return PKCS1_MGF1(mask, len, seed, seedlen, EVP_sha1()); -} #endif diff --git a/drivers/builtin_openssl2/crypto/rsa/rsa_pmeth.c b/drivers/builtin_openssl2/crypto/rsa/rsa_pmeth.c index 6a7c67cdb8..203635595f 100644 --- a/drivers/builtin_openssl2/crypto/rsa/rsa_pmeth.c +++ b/drivers/builtin_openssl2/crypto/rsa/rsa_pmeth.c @@ -64,6 +64,7 @@ #include #include #include +#include #ifndef OPENSSL_NO_CMS # include #endif @@ -87,10 +88,13 @@ typedef struct { const EVP_MD *md; /* message digest for MGF1 */ const EVP_MD *mgf1md; - /* PSS/OAEP salt length */ + /* PSS salt length */ int saltlen; /* Temp buffer */ unsigned char *tbuf; + /* OAEP label */ + unsigned char *oaep_label; + size_t oaep_labellen; } RSA_PKEY_CTX; static int pkey_rsa_init(EVP_PKEY_CTX *ctx) @@ -108,6 +112,9 @@ static int pkey_rsa_init(EVP_PKEY_CTX *ctx) rctx->saltlen = -2; + rctx->oaep_label = NULL; + rctx->oaep_labellen = 0; + ctx->data = rctx; ctx->keygen_info = rctx->gentmp; ctx->keygen_info_count = 2; @@ -130,6 +137,15 @@ static int pkey_rsa_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src) } dctx->pad_mode = sctx->pad_mode; dctx->md = sctx->md; + dctx->mgf1md = sctx->mgf1md; + if (sctx->oaep_label) { + if (dctx->oaep_label) + OPENSSL_free(dctx->oaep_label); + dctx->oaep_label = BUF_memdup(sctx->oaep_label, sctx->oaep_labellen); + if (!dctx->oaep_label) + return 0; + dctx->oaep_labellen = sctx->oaep_labellen; + } return 1; } @@ -151,6 +167,8 @@ static void pkey_rsa_cleanup(EVP_PKEY_CTX *ctx) BN_free(rctx->pub_exp); if (rctx->tbuf) OPENSSL_free(rctx->tbuf); + if (rctx->oaep_label) + OPENSSL_free(rctx->oaep_label); OPENSSL_free(rctx); } } @@ -173,10 +191,18 @@ static int pkey_fips_check_ctx(EVP_PKEY_CTX *ctx) rv = 0; if (!(rsa->meth->flags & RSA_FLAG_FIPS_METHOD) && rv) return -1; - if (rctx->md && !(rctx->md->flags & EVP_MD_FLAG_FIPS)) - return rv; - if (rctx->mgf1md && !(rctx->mgf1md->flags & EVP_MD_FLAG_FIPS)) - return rv; + if (rctx->md) { + const EVP_MD *fmd; + fmd = FIPS_get_digestbynid(EVP_MD_type(rctx->md)); + if (!fmd || !(fmd->flags & EVP_MD_FLAG_FIPS)) + return rv; + } + if (rctx->mgf1md && !(rctx->mgf1md->flags & EVP_MD_FLAG_FIPS)) { + const EVP_MD *fmd; + fmd = FIPS_get_digestbynid(EVP_MD_type(rctx->mgf1md)); + if (!fmd || !(fmd->flags & EVP_MD_FLAG_FIPS)) + return rv; + } return 1; } #endif @@ -388,8 +414,21 @@ static int pkey_rsa_encrypt(EVP_PKEY_CTX *ctx, { int ret; RSA_PKEY_CTX *rctx = ctx->data; - ret = RSA_public_encrypt(inlen, in, out, ctx->pkey->pkey.rsa, - rctx->pad_mode); + if (rctx->pad_mode == RSA_PKCS1_OAEP_PADDING) { + int klen = RSA_size(ctx->pkey->pkey.rsa); + if (!setup_tbuf(rctx, ctx)) + return -1; + if (!RSA_padding_add_PKCS1_OAEP_mgf1(rctx->tbuf, klen, + in, inlen, + rctx->oaep_label, + rctx->oaep_labellen, + rctx->md, rctx->mgf1md)) + return -1; + ret = RSA_public_encrypt(klen, rctx->tbuf, out, + ctx->pkey->pkey.rsa, RSA_NO_PADDING); + } else + ret = RSA_public_encrypt(inlen, in, out, ctx->pkey->pkey.rsa, + rctx->pad_mode); if (ret < 0) return ret; *outlen = ret; @@ -402,8 +441,26 @@ static int pkey_rsa_decrypt(EVP_PKEY_CTX *ctx, { int ret; RSA_PKEY_CTX *rctx = ctx->data; - ret = RSA_private_decrypt(inlen, in, out, ctx->pkey->pkey.rsa, - rctx->pad_mode); + if (rctx->pad_mode == RSA_PKCS1_OAEP_PADDING) { + int i; + if (!setup_tbuf(rctx, ctx)) + return -1; + ret = RSA_private_decrypt(inlen, in, rctx->tbuf, + ctx->pkey->pkey.rsa, RSA_NO_PADDING); + if (ret <= 0) + return ret; + for (i = 0; i < ret; i++) { + if (rctx->tbuf[i]) + break; + } + ret = RSA_padding_check_PKCS1_OAEP_mgf1(out, ret, rctx->tbuf + i, + ret - i, ret, + rctx->oaep_label, + rctx->oaep_labellen, + rctx->md, rctx->mgf1md); + } else + ret = RSA_private_decrypt(inlen, in, out, ctx->pkey->pkey.rsa, + rctx->pad_mode); if (ret < 0) return ret; *outlen = ret; @@ -490,18 +547,36 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) case EVP_PKEY_CTRL_RSA_KEYGEN_PUBEXP: if (!p2) return -2; + BN_free(rctx->pub_exp); rctx->pub_exp = p2; return 1; + case EVP_PKEY_CTRL_RSA_OAEP_MD: + case EVP_PKEY_CTRL_GET_RSA_OAEP_MD: + if (rctx->pad_mode != RSA_PKCS1_OAEP_PADDING) { + RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_PADDING_MODE); + return -2; + } + if (type == EVP_PKEY_CTRL_GET_RSA_OAEP_MD) + *(const EVP_MD **)p2 = rctx->md; + else + rctx->md = p2; + return 1; + case EVP_PKEY_CTRL_MD: if (!check_padding_md(p2, rctx->pad_mode)) return 0; rctx->md = p2; return 1; + case EVP_PKEY_CTRL_GET_MD: + *(const EVP_MD **)p2 = rctx->md; + return 1; + case EVP_PKEY_CTRL_RSA_MGF1_MD: case EVP_PKEY_CTRL_GET_RSA_MGF1_MD: - if (rctx->pad_mode != RSA_PKCS1_PSS_PADDING) { + if (rctx->pad_mode != RSA_PKCS1_PSS_PADDING + && rctx->pad_mode != RSA_PKCS1_OAEP_PADDING) { RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_MGF1_MD); return -2; } @@ -514,6 +589,30 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) rctx->mgf1md = p2; return 1; + case EVP_PKEY_CTRL_RSA_OAEP_LABEL: + if (rctx->pad_mode != RSA_PKCS1_OAEP_PADDING) { + RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_PADDING_MODE); + return -2; + } + if (rctx->oaep_label) + OPENSSL_free(rctx->oaep_label); + if (p2 && p1 > 0) { + rctx->oaep_label = p2; + rctx->oaep_labellen = p1; + } else { + rctx->oaep_label = NULL; + rctx->oaep_labellen = 0; + } + return 1; + + case EVP_PKEY_CTRL_GET_RSA_OAEP_LABEL: + if (rctx->pad_mode != RSA_PKCS1_OAEP_PADDING) { + RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_PADDING_MODE); + return -2; + } + *(unsigned char **)p2 = rctx->oaep_label; + return rctx->oaep_labellen; + case EVP_PKEY_CTRL_DIGESTINIT: case EVP_PKEY_CTRL_PKCS7_ENCRYPT: case EVP_PKEY_CTRL_PKCS7_DECRYPT: @@ -521,16 +620,6 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) return 1; #ifndef OPENSSL_NO_CMS case EVP_PKEY_CTRL_CMS_DECRYPT: - { - X509_ALGOR *alg = NULL; - ASN1_OBJECT *encalg = NULL; - if (p2) - CMS_RecipientInfo_ktri_get0_algs(p2, NULL, NULL, &alg); - if (alg) - X509_ALGOR_get0(&encalg, NULL, NULL, alg); - if (encalg && OBJ_obj2nid(encalg) == NID_rsaesOaep) - rctx->pad_mode = RSA_PKCS1_OAEP_PADDING; - } case EVP_PKEY_CTRL_CMS_ENCRYPT: case EVP_PKEY_CTRL_CMS_SIGN: return 1; @@ -599,6 +688,36 @@ static int pkey_rsa_ctrl_str(EVP_PKEY_CTX *ctx, return ret; } + if (!strcmp(type, "rsa_mgf1_md")) { + const EVP_MD *md; + if (!(md = EVP_get_digestbyname(value))) { + RSAerr(RSA_F_PKEY_RSA_CTRL_STR, RSA_R_INVALID_DIGEST); + return 0; + } + return EVP_PKEY_CTX_set_rsa_mgf1_md(ctx, md); + } + + if (!strcmp(type, "rsa_oaep_md")) { + const EVP_MD *md; + if (!(md = EVP_get_digestbyname(value))) { + RSAerr(RSA_F_PKEY_RSA_CTRL_STR, RSA_R_INVALID_DIGEST); + return 0; + } + return EVP_PKEY_CTX_set_rsa_oaep_md(ctx, md); + } + if (!strcmp(type, "rsa_oaep_label")) { + unsigned char *lab; + long lablen; + int ret; + lab = string_to_hex(value, &lablen); + if (!lab) + return 0; + ret = EVP_PKEY_CTX_set0_rsa_oaep_label(ctx, lab, lablen); + if (ret <= 0) + OPENSSL_free(lab); + return ret; + } + return -2; } diff --git a/drivers/builtin_openssl2/crypto/rsa/rsa_sign.c b/drivers/builtin_openssl2/crypto/rsa/rsa_sign.c index 41c827f453..82ca8324df 100644 --- a/drivers/builtin_openssl2/crypto/rsa/rsa_sign.c +++ b/drivers/builtin_openssl2/crypto/rsa/rsa_sign.c @@ -260,19 +260,8 @@ int int_rsa_verify(int dtype, const unsigned char *m, OBJ_nid2ln(dtype)); #endif if (sigtype != dtype) { - if (((dtype == NID_md5) && - (sigtype == NID_md5WithRSAEncryption)) || - ((dtype == NID_md2) && - (sigtype == NID_md2WithRSAEncryption))) { - /* ok, we will let it through */ -#if !defined(OPENSSL_NO_STDIO) && !defined(OPENSSL_SYS_WIN16) - fprintf(stderr, - "signature has problems, re-make with post SSLeay045\n"); -#endif - } else { - RSAerr(RSA_F_INT_RSA_VERIFY, RSA_R_ALGORITHM_MISMATCH); - goto err; - } + RSAerr(RSA_F_INT_RSA_VERIFY, RSA_R_ALGORITHM_MISMATCH); + goto err; } if (rm) { const EVP_MD *md; diff --git a/drivers/builtin_openssl2/crypto/rsa/rsa_test.c b/drivers/builtin_openssl2/crypto/rsa/rsa_test.c deleted file mode 100644 index 85c7440b8c..0000000000 --- a/drivers/builtin_openssl2/crypto/rsa/rsa_test.c +++ /dev/null @@ -1,339 +0,0 @@ -/* test vectors from p1ovect1.txt */ - -#include -#include - -#include "e_os.h" - -#include -#include -#include -#include -#ifdef OPENSSL_NO_RSA -int main(int argc, char *argv[]) -{ - printf("No RSA support\n"); - return (0); -} -#else -# include - -# define SetKey \ - key->n = BN_bin2bn(n, sizeof(n)-1, key->n); \ - key->e = BN_bin2bn(e, sizeof(e)-1, key->e); \ - key->d = BN_bin2bn(d, sizeof(d)-1, key->d); \ - key->p = BN_bin2bn(p, sizeof(p)-1, key->p); \ - key->q = BN_bin2bn(q, sizeof(q)-1, key->q); \ - key->dmp1 = BN_bin2bn(dmp1, sizeof(dmp1)-1, key->dmp1); \ - key->dmq1 = BN_bin2bn(dmq1, sizeof(dmq1)-1, key->dmq1); \ - key->iqmp = BN_bin2bn(iqmp, sizeof(iqmp)-1, key->iqmp); \ - memcpy(c, ctext_ex, sizeof(ctext_ex) - 1); \ - return (sizeof(ctext_ex) - 1); - -static int key1(RSA *key, unsigned char *c) -{ - static unsigned char n[] = - "\x00\xAA\x36\xAB\xCE\x88\xAC\xFD\xFF\x55\x52\x3C\x7F\xC4\x52\x3F" - "\x90\xEF\xA0\x0D\xF3\x77\x4A\x25\x9F\x2E\x62\xB4\xC5\xD9\x9C\xB5" - "\xAD\xB3\x00\xA0\x28\x5E\x53\x01\x93\x0E\x0C\x70\xFB\x68\x76\x93" - "\x9C\xE6\x16\xCE\x62\x4A\x11\xE0\x08\x6D\x34\x1E\xBC\xAC\xA0\xA1" - "\xF5"; - - static unsigned char e[] = "\x11"; - - static unsigned char d[] = - "\x0A\x03\x37\x48\x62\x64\x87\x69\x5F\x5F\x30\xBC\x38\xB9\x8B\x44" - "\xC2\xCD\x2D\xFF\x43\x40\x98\xCD\x20\xD8\xA1\x38\xD0\x90\xBF\x64" - "\x79\x7C\x3F\xA7\xA2\xCD\xCB\x3C\xD1\xE0\xBD\xBA\x26\x54\xB4\xF9" - "\xDF\x8E\x8A\xE5\x9D\x73\x3D\x9F\x33\xB3\x01\x62\x4A\xFD\x1D\x51"; - - static unsigned char p[] = - "\x00\xD8\x40\xB4\x16\x66\xB4\x2E\x92\xEA\x0D\xA3\xB4\x32\x04\xB5" - "\xCF\xCE\x33\x52\x52\x4D\x04\x16\xA5\xA4\x41\xE7\x00\xAF\x46\x12" - "\x0D"; - - static unsigned char q[] = - "\x00\xC9\x7F\xB1\xF0\x27\xF4\x53\xF6\x34\x12\x33\xEA\xAA\xD1\xD9" - "\x35\x3F\x6C\x42\xD0\x88\x66\xB1\xD0\x5A\x0F\x20\x35\x02\x8B\x9D" - "\x89"; - - static unsigned char dmp1[] = - "\x59\x0B\x95\x72\xA2\xC2\xA9\xC4\x06\x05\x9D\xC2\xAB\x2F\x1D\xAF" - "\xEB\x7E\x8B\x4F\x10\xA7\x54\x9E\x8E\xED\xF5\xB4\xFC\xE0\x9E\x05"; - - static unsigned char dmq1[] = - "\x00\x8E\x3C\x05\x21\xFE\x15\xE0\xEA\x06\xA3\x6F\xF0\xF1\x0C\x99" - "\x52\xC3\x5B\x7A\x75\x14\xFD\x32\x38\xB8\x0A\xAD\x52\x98\x62\x8D" - "\x51"; - - static unsigned char iqmp[] = - "\x36\x3F\xF7\x18\x9D\xA8\xE9\x0B\x1D\x34\x1F\x71\xD0\x9B\x76\xA8" - "\xA9\x43\xE1\x1D\x10\xB2\x4D\x24\x9F\x2D\xEA\xFE\xF8\x0C\x18\x26"; - - static unsigned char ctext_ex[] = - "\x1b\x8f\x05\xf9\xca\x1a\x79\x52\x6e\x53\xf3\xcc\x51\x4f\xdb\x89" - "\x2b\xfb\x91\x93\x23\x1e\x78\xb9\x92\xe6\x8d\x50\xa4\x80\xcb\x52" - "\x33\x89\x5c\x74\x95\x8d\x5d\x02\xab\x8c\x0f\xd0\x40\xeb\x58\x44" - "\xb0\x05\xc3\x9e\xd8\x27\x4a\x9d\xbf\xa8\x06\x71\x40\x94\x39\xd2"; - - SetKey; -} - -static int key2(RSA *key, unsigned char *c) -{ - static unsigned char n[] = - "\x00\xA3\x07\x9A\x90\xDF\x0D\xFD\x72\xAC\x09\x0C\xCC\x2A\x78\xB8" - "\x74\x13\x13\x3E\x40\x75\x9C\x98\xFA\xF8\x20\x4F\x35\x8A\x0B\x26" - "\x3C\x67\x70\xE7\x83\xA9\x3B\x69\x71\xB7\x37\x79\xD2\x71\x7B\xE8" - "\x34\x77\xCF"; - - static unsigned char e[] = "\x3"; - - static unsigned char d[] = - "\x6C\xAF\xBC\x60\x94\xB3\xFE\x4C\x72\xB0\xB3\x32\xC6\xFB\x25\xA2" - "\xB7\x62\x29\x80\x4E\x68\x65\xFC\xA4\x5A\x74\xDF\x0F\x8F\xB8\x41" - "\x3B\x52\xC0\xD0\xE5\x3D\x9B\x59\x0F\xF1\x9B\xE7\x9F\x49\xDD\x21" - "\xE5\xEB"; - - static unsigned char p[] = - "\x00\xCF\x20\x35\x02\x8B\x9D\x86\x98\x40\xB4\x16\x66\xB4\x2E\x92" - "\xEA\x0D\xA3\xB4\x32\x04\xB5\xCF\xCE\x91"; - - static unsigned char q[] = - "\x00\xC9\x7F\xB1\xF0\x27\xF4\x53\xF6\x34\x12\x33\xEA\xAA\xD1\xD9" - "\x35\x3F\x6C\x42\xD0\x88\x66\xB1\xD0\x5F"; - - static unsigned char dmp1[] = - "\x00\x8A\x15\x78\xAC\x5D\x13\xAF\x10\x2B\x22\xB9\x99\xCD\x74\x61" - "\xF1\x5E\x6D\x22\xCC\x03\x23\xDF\xDF\x0B"; - - static unsigned char dmq1[] = - "\x00\x86\x55\x21\x4A\xC5\x4D\x8D\x4E\xCD\x61\x77\xF1\xC7\x36\x90" - "\xCE\x2A\x48\x2C\x8B\x05\x99\xCB\xE0\x3F"; - - static unsigned char iqmp[] = - "\x00\x83\xEF\xEF\xB8\xA9\xA4\x0D\x1D\xB6\xED\x98\xAD\x84\xED\x13" - "\x35\xDC\xC1\x08\xF3\x22\xD0\x57\xCF\x8D"; - - static unsigned char ctext_ex[] = - "\x14\xbd\xdd\x28\xc9\x83\x35\x19\x23\x80\xe8\xe5\x49\xb1\x58\x2a" - "\x8b\x40\xb4\x48\x6d\x03\xa6\xa5\x31\x1f\x1f\xd5\xf0\xa1\x80\xe4" - "\x17\x53\x03\x29\xa9\x34\x90\x74\xb1\x52\x13\x54\x29\x08\x24\x52" - "\x62\x51"; - - SetKey; -} - -static int key3(RSA *key, unsigned char *c) -{ - static unsigned char n[] = - "\x00\xBB\xF8\x2F\x09\x06\x82\xCE\x9C\x23\x38\xAC\x2B\x9D\xA8\x71" - "\xF7\x36\x8D\x07\xEE\xD4\x10\x43\xA4\x40\xD6\xB6\xF0\x74\x54\xF5" - "\x1F\xB8\xDF\xBA\xAF\x03\x5C\x02\xAB\x61\xEA\x48\xCE\xEB\x6F\xCD" - "\x48\x76\xED\x52\x0D\x60\xE1\xEC\x46\x19\x71\x9D\x8A\x5B\x8B\x80" - "\x7F\xAF\xB8\xE0\xA3\xDF\xC7\x37\x72\x3E\xE6\xB4\xB7\xD9\x3A\x25" - "\x84\xEE\x6A\x64\x9D\x06\x09\x53\x74\x88\x34\xB2\x45\x45\x98\x39" - "\x4E\xE0\xAA\xB1\x2D\x7B\x61\xA5\x1F\x52\x7A\x9A\x41\xF6\xC1\x68" - "\x7F\xE2\x53\x72\x98\xCA\x2A\x8F\x59\x46\xF8\xE5\xFD\x09\x1D\xBD" - "\xCB"; - - static unsigned char e[] = "\x11"; - - static unsigned char d[] = - "\x00\xA5\xDA\xFC\x53\x41\xFA\xF2\x89\xC4\xB9\x88\xDB\x30\xC1\xCD" - "\xF8\x3F\x31\x25\x1E\x06\x68\xB4\x27\x84\x81\x38\x01\x57\x96\x41" - "\xB2\x94\x10\xB3\xC7\x99\x8D\x6B\xC4\x65\x74\x5E\x5C\x39\x26\x69" - "\xD6\x87\x0D\xA2\xC0\x82\xA9\x39\xE3\x7F\xDC\xB8\x2E\xC9\x3E\xDA" - "\xC9\x7F\xF3\xAD\x59\x50\xAC\xCF\xBC\x11\x1C\x76\xF1\xA9\x52\x94" - "\x44\xE5\x6A\xAF\x68\xC5\x6C\x09\x2C\xD3\x8D\xC3\xBE\xF5\xD2\x0A" - "\x93\x99\x26\xED\x4F\x74\xA1\x3E\xDD\xFB\xE1\xA1\xCE\xCC\x48\x94" - "\xAF\x94\x28\xC2\xB7\xB8\x88\x3F\xE4\x46\x3A\x4B\xC8\x5B\x1C\xB3" - "\xC1"; - - static unsigned char p[] = - "\x00\xEE\xCF\xAE\x81\xB1\xB9\xB3\xC9\x08\x81\x0B\x10\xA1\xB5\x60" - "\x01\x99\xEB\x9F\x44\xAE\xF4\xFD\xA4\x93\xB8\x1A\x9E\x3D\x84\xF6" - "\x32\x12\x4E\xF0\x23\x6E\x5D\x1E\x3B\x7E\x28\xFA\xE7\xAA\x04\x0A" - "\x2D\x5B\x25\x21\x76\x45\x9D\x1F\x39\x75\x41\xBA\x2A\x58\xFB\x65" - "\x99"; - - static unsigned char q[] = - "\x00\xC9\x7F\xB1\xF0\x27\xF4\x53\xF6\x34\x12\x33\xEA\xAA\xD1\xD9" - "\x35\x3F\x6C\x42\xD0\x88\x66\xB1\xD0\x5A\x0F\x20\x35\x02\x8B\x9D" - "\x86\x98\x40\xB4\x16\x66\xB4\x2E\x92\xEA\x0D\xA3\xB4\x32\x04\xB5" - "\xCF\xCE\x33\x52\x52\x4D\x04\x16\xA5\xA4\x41\xE7\x00\xAF\x46\x15" - "\x03"; - - static unsigned char dmp1[] = - "\x54\x49\x4C\xA6\x3E\xBA\x03\x37\xE4\xE2\x40\x23\xFC\xD6\x9A\x5A" - "\xEB\x07\xDD\xDC\x01\x83\xA4\xD0\xAC\x9B\x54\xB0\x51\xF2\xB1\x3E" - "\xD9\x49\x09\x75\xEA\xB7\x74\x14\xFF\x59\xC1\xF7\x69\x2E\x9A\x2E" - "\x20\x2B\x38\xFC\x91\x0A\x47\x41\x74\xAD\xC9\x3C\x1F\x67\xC9\x81"; - - static unsigned char dmq1[] = - "\x47\x1E\x02\x90\xFF\x0A\xF0\x75\x03\x51\xB7\xF8\x78\x86\x4C\xA9" - "\x61\xAD\xBD\x3A\x8A\x7E\x99\x1C\x5C\x05\x56\xA9\x4C\x31\x46\xA7" - "\xF9\x80\x3F\x8F\x6F\x8A\xE3\x42\xE9\x31\xFD\x8A\xE4\x7A\x22\x0D" - "\x1B\x99\xA4\x95\x84\x98\x07\xFE\x39\xF9\x24\x5A\x98\x36\xDA\x3D"; - - static unsigned char iqmp[] = - "\x00\xB0\x6C\x4F\xDA\xBB\x63\x01\x19\x8D\x26\x5B\xDB\xAE\x94\x23" - "\xB3\x80\xF2\x71\xF7\x34\x53\x88\x50\x93\x07\x7F\xCD\x39\xE2\x11" - "\x9F\xC9\x86\x32\x15\x4F\x58\x83\xB1\x67\xA9\x67\xBF\x40\x2B\x4E" - "\x9E\x2E\x0F\x96\x56\xE6\x98\xEA\x36\x66\xED\xFB\x25\x79\x80\x39" - "\xF7"; - - static unsigned char ctext_ex[] = - "\xb8\x24\x6b\x56\xa6\xed\x58\x81\xae\xb5\x85\xd9\xa2\x5b\x2a\xd7" - "\x90\xc4\x17\xe0\x80\x68\x1b\xf1\xac\x2b\xc3\xde\xb6\x9d\x8b\xce" - "\xf0\xc4\x36\x6f\xec\x40\x0a\xf0\x52\xa7\x2e\x9b\x0e\xff\xb5\xb3" - "\xf2\xf1\x92\xdb\xea\xca\x03\xc1\x27\x40\x05\x71\x13\xbf\x1f\x06" - "\x69\xac\x22\xe9\xf3\xa7\x85\x2e\x3c\x15\xd9\x13\xca\xb0\xb8\x86" - "\x3a\x95\xc9\x92\x94\xce\x86\x74\x21\x49\x54\x61\x03\x46\xf4\xd4" - "\x74\xb2\x6f\x7c\x48\xb4\x2e\xe6\x8e\x1f\x57\x2a\x1f\xc4\x02\x6a" - "\xc4\x56\xb4\xf5\x9f\x7b\x62\x1e\xa1\xb9\xd8\x8f\x64\x20\x2f\xb1"; - - SetKey; -} - -static int pad_unknown(void) -{ - unsigned long l; - while ((l = ERR_get_error()) != 0) - if (ERR_GET_REASON(l) == RSA_R_UNKNOWN_PADDING_TYPE) - return (1); - return (0); -} - -static const char rnd_seed[] = - "string to make the random number generator think it has entropy"; - -int main(int argc, char *argv[]) -{ - int err = 0; - int v; - RSA *key; - unsigned char ptext[256]; - unsigned char ctext[256]; - static unsigned char ptext_ex[] = "\x54\x85\x9b\x34\x2c\x49\xea\x2a"; - unsigned char ctext_ex[256]; - int plen; - int clen = 0; - int num; - int n; - - CRYPTO_malloc_debug_init(); - CRYPTO_dbg_set_options(V_CRYPTO_MDEBUG_ALL); - CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON); - - RAND_seed(rnd_seed, sizeof rnd_seed); /* or OAEP may fail */ - - plen = sizeof(ptext_ex) - 1; - - for (v = 0; v < 6; v++) { - key = RSA_new(); - switch (v % 3) { - case 0: - clen = key1(key, ctext_ex); - break; - case 1: - clen = key2(key, ctext_ex); - break; - case 2: - clen = key3(key, ctext_ex); - break; - } - if (v / 3 >= 1) - key->flags |= RSA_FLAG_NO_CONSTTIME; - - num = RSA_public_encrypt(plen, ptext_ex, ctext, key, - RSA_PKCS1_PADDING); - if (num != clen) { - printf("PKCS#1 v1.5 encryption failed!\n"); - err = 1; - goto oaep; - } - - num = RSA_private_decrypt(num, ctext, ptext, key, RSA_PKCS1_PADDING); - if (num != plen || memcmp(ptext, ptext_ex, num) != 0) { - printf("PKCS#1 v1.5 decryption failed!\n"); - err = 1; - } else - printf("PKCS #1 v1.5 encryption/decryption ok\n"); - - oaep: - ERR_clear_error(); - num = RSA_public_encrypt(plen, ptext_ex, ctext, key, - RSA_PKCS1_OAEP_PADDING); - if (num == -1 && pad_unknown()) { - printf("No OAEP support\n"); - goto next; - } - if (num != clen) { - printf("OAEP encryption failed!\n"); - err = 1; - goto next; - } - - num = RSA_private_decrypt(num, ctext, ptext, key, - RSA_PKCS1_OAEP_PADDING); - if (num != plen || memcmp(ptext, ptext_ex, num) != 0) { - printf("OAEP decryption (encrypted data) failed!\n"); - err = 1; - } else if (memcmp(ctext, ctext_ex, num) == 0) - printf("OAEP test vector %d passed!\n", v); - - /* - * Different ciphertexts (rsa_oaep.c without -DPKCS_TESTVECT). Try - * decrypting ctext_ex - */ - - num = RSA_private_decrypt(clen, ctext_ex, ptext, key, - RSA_PKCS1_OAEP_PADDING); - - if (num != plen || memcmp(ptext, ptext_ex, num) != 0) { - printf("OAEP decryption (test vector data) failed!\n"); - err = 1; - } else - printf("OAEP encryption/decryption ok\n"); - - /* Try decrypting corrupted ciphertexts. */ - for (n = 0; n < clen; ++n) { - ctext[n] ^= 1; - num = RSA_private_decrypt(clen, ctext, ptext, key, - RSA_PKCS1_OAEP_PADDING); - if (num > 0) { - printf("Corrupt data decrypted!\n"); - err = 1; - break; - } - ctext[n] ^= 1; - } - - /* Test truncated ciphertexts, as well as negative length. */ - for (n = -1; n < clen; ++n) { - num = RSA_private_decrypt(n, ctext, ptext, key, - RSA_PKCS1_OAEP_PADDING); - if (num > 0) { - printf("Truncated data decrypted!\n"); - err = 1; - break; - } - } - - next: - RSA_free(key); - } - - CRYPTO_cleanup_all_ex_data(); - ERR_remove_thread_state(NULL); - - CRYPTO_mem_leaks_fp(stderr); - -# ifdef OPENSSL_SYS_NETWARE - if (err) - printf("ERROR: %d\n", err); -# endif - return err; -} -#endif diff --git a/drivers/builtin_openssl2/crypto/s390xcpuid.S b/drivers/builtin_openssl2/crypto/s390xcpuid.S deleted file mode 100644 index 06815347e6..0000000000 --- a/drivers/builtin_openssl2/crypto/s390xcpuid.S +++ /dev/null @@ -1,99 +0,0 @@ -.text - -.globl OPENSSL_s390x_facilities -.type OPENSSL_s390x_facilities,@function -.align 16 -OPENSSL_s390x_facilities: - lghi %r0,0 - larl %r2,OPENSSL_s390xcap_P - stg %r0,8(%r2) - .long 0xb2b02000 # stfle 0(%r2) - brc 8,.Ldone - lghi %r0,1 - .long 0xb2b02000 # stfle 0(%r2) -.Ldone: - lg %r2,0(%r2) - br %r14 -.size OPENSSL_s390x_facilities,.-OPENSSL_s390x_facilities - -.globl OPENSSL_rdtsc -.type OPENSSL_rdtsc,@function -.align 16 -OPENSSL_rdtsc: - stck 16(%r15) - lg %r2,16(%r15) - br %r14 -.size OPENSSL_rdtsc,.-OPENSSL_rdtsc - -.globl OPENSSL_atomic_add -.type OPENSSL_atomic_add,@function -.align 16 -OPENSSL_atomic_add: - l %r1,0(%r2) -.Lspin: lr %r0,%r1 - ar %r0,%r3 - cs %r1,%r0,0(%r2) - brc 4,.Lspin - lgfr %r2,%r0 # OpenSSL expects the new value - br %r14 -.size OPENSSL_atomic_add,.-OPENSSL_atomic_add - -.globl OPENSSL_wipe_cpu -.type OPENSSL_wipe_cpu,@function -.align 16 -OPENSSL_wipe_cpu: - xgr %r0,%r0 - xgr %r1,%r1 - lgr %r2,%r15 - xgr %r3,%r3 - xgr %r4,%r4 - lzdr %f0 - lzdr %f1 - lzdr %f2 - lzdr %f3 - lzdr %f4 - lzdr %f5 - lzdr %f6 - lzdr %f7 - br %r14 -.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu - -.globl OPENSSL_cleanse -.type OPENSSL_cleanse,@function -.align 16 -OPENSSL_cleanse: -#if !defined(__s390x__) && !defined(__s390x) - llgfr %r3,%r3 -#endif - lghi %r4,15 - lghi %r0,0 - clgr %r3,%r4 - jh .Lot - clgr %r3,%r0 - bcr 8,%r14 -.Little: - stc %r0,0(%r2) - la %r2,1(%r2) - brctg %r3,.Little - br %r14 -.align 4 -.Lot: tmll %r2,7 - jz .Laligned - stc %r0,0(%r2) - la %r2,1(%r2) - brctg %r3,.Lot -.Laligned: - srlg %r4,%r3,3 -.Loop: stg %r0,0(%r2) - la %r2,8(%r2) - brctg %r4,.Loop - lghi %r4,7 - ngr %r3,%r4 - jnz .Little - br %r14 -.size OPENSSL_cleanse,.-OPENSSL_cleanse - -.section .init - brasl %r14,OPENSSL_cpuid_setup - -.comm OPENSSL_s390xcap_P,16,8 diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha1-586.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha1-586.pl deleted file mode 100644 index 2b119ffa46..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha1-586.pl +++ /dev/null @@ -1,1229 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# [Re]written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# "[Re]written" was achieved in two major overhauls. In 2004 BODY_* -# functions were re-implemented to address P4 performance issue [see -# commentary below], and in 2006 the rest was rewritten in order to -# gain freedom to liberate licensing terms. - -# January, September 2004. -# -# It was noted that Intel IA-32 C compiler generates code which -# performs ~30% *faster* on P4 CPU than original *hand-coded* -# SHA1 assembler implementation. To address this problem (and -# prove that humans are still better than machines:-), the -# original code was overhauled, which resulted in following -# performance changes: -# -# compared with original compared with Intel cc -# assembler impl. generated code -# Pentium -16% +48% -# PIII/AMD +8% +16% -# P4 +85%(!) +45% -# -# As you can see Pentium came out as looser:-( Yet I reckoned that -# improvement on P4 outweights the loss and incorporate this -# re-tuned code to 0.9.7 and later. -# ---------------------------------------------------------------- -# - -# August 2009. -# -# George Spelvin has tipped that F_40_59(b,c,d) can be rewritten as -# '(c&d) + (b&(c^d))', which allows to accumulate partial results -# and lighten "pressure" on scratch registers. This resulted in -# >12% performance improvement on contemporary AMD cores (with no -# degradation on other CPUs:-). Also, the code was revised to maximize -# "distance" between instructions producing input to 'lea' instruction -# and the 'lea' instruction itself, which is essential for Intel Atom -# core and resulted in ~15% improvement. - -# October 2010. -# -# Add SSSE3, Supplemental[!] SSE3, implementation. The idea behind it -# is to offload message schedule denoted by Wt in NIST specification, -# or Xupdate in OpenSSL source, to SIMD unit. The idea is not novel, -# and in SSE2 context was first explored by Dean Gaudet in 2004, see -# http://arctic.org/~dean/crypto/sha1.html. Since then several things -# have changed that made it interesting again: -# -# a) XMM units became faster and wider; -# b) instruction set became more versatile; -# c) an important observation was made by Max Locktykhin, which made -# it possible to reduce amount of instructions required to perform -# the operation in question, for further details see -# http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/. - -# April 2011. -# -# Add AVX code path, probably most controversial... The thing is that -# switch to AVX alone improves performance by as little as 4% in -# comparison to SSSE3 code path. But below result doesn't look like -# 4% improvement... Trouble is that Sandy Bridge decodes 'ro[rl]' as -# pair of µ-ops, and it's the additional µ-ops, two per round, that -# make it run slower than Core2 and Westmere. But 'sh[rl]d' is decoded -# as single µ-op by Sandy Bridge and it's replacing 'ro[rl]' with -# equivalent 'sh[rl]d' that is responsible for the impressive 5.1 -# cycles per processed byte. But 'sh[rl]d' is not something that used -# to be fast, nor does it appear to be fast in upcoming Bulldozer -# [according to its optimization manual]. Which is why AVX code path -# is guarded by *both* AVX and synthetic bit denoting Intel CPUs. -# One can argue that it's unfair to AMD, but without 'sh[rl]d' it -# makes no sense to keep the AVX code path. If somebody feels that -# strongly, it's probably more appropriate to discuss possibility of -# using vector rotate XOP on AMD... - -###################################################################### -# Current performance is summarized in following table. Numbers are -# CPU clock cycles spent to process single byte (less is better). -# -# x86 SSSE3 AVX -# Pentium 15.7 - -# PIII 11.5 - -# P4 10.6 - -# AMD K8 7.1 - -# Core2 7.3 6.1/+20% - -# Atom 12.5 9.5(*)/+32% - -# Westmere 7.3 5.6/+30% - -# Sandy Bridge 8.8 6.2/+40% 5.1(**)/+70% -# -# (*) Loop is 1056 instructions long and expected result is ~8.25. -# It remains mystery [to me] why ILP is limited to 1.7. -# -# (**) As per above comment, the result is for AVX *plus* sh[rl]d. - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],"sha1-586.pl",$ARGV[$#ARGV] eq "386"); - -$xmm=$ymm=0; -for (@ARGV) { $xmm=1 if (/-DOPENSSL_IA32_SSE2/); } - -$ymm=1 if ($xmm && - `$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` - =~ /GNU assembler version ([2-9]\.[0-9]+)/ && - $1>=2.19); # first version supporting AVX - -$ymm=1 if ($xmm && !$ymm && $ARGV[0] eq "win32n" && - `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ && - $1>=2.03); # first version supporting AVX - -&external_label("OPENSSL_ia32cap_P") if ($xmm); - - -$A="eax"; -$B="ebx"; -$C="ecx"; -$D="edx"; -$E="edi"; -$T="esi"; -$tmp1="ebp"; - -@V=($A,$B,$C,$D,$E,$T); - -$alt=0; # 1 denotes alternative IALU implementation, which performs - # 8% *worse* on P4, same on Westmere and Atom, 2% better on - # Sandy Bridge... - -sub BODY_00_15 - { - local($n,$a,$b,$c,$d,$e,$f)=@_; - - &comment("00_15 $n"); - - &mov($f,$c); # f to hold F_00_19(b,c,d) - if ($n==0) { &mov($tmp1,$a); } - else { &mov($a,$tmp1); } - &rotl($tmp1,5); # tmp1=ROTATE(a,5) - &xor($f,$d); - &add($tmp1,$e); # tmp1+=e; - &mov($e,&swtmp($n%16)); # e becomes volatile and is loaded - # with xi, also note that e becomes - # f in next round... - &and($f,$b); - &rotr($b,2); # b=ROTATE(b,30) - &xor($f,$d); # f holds F_00_19(b,c,d) - &lea($tmp1,&DWP(0x5a827999,$tmp1,$e)); # tmp1+=K_00_19+xi - - if ($n==15) { &mov($e,&swtmp(($n+1)%16));# pre-fetch f for next round - &add($f,$tmp1); } # f+=tmp1 - else { &add($tmp1,$f); } # f becomes a in next round - &mov($tmp1,$a) if ($alt && $n==15); - } - -sub BODY_16_19 - { - local($n,$a,$b,$c,$d,$e,$f)=@_; - - &comment("16_19 $n"); - -if ($alt) { - &xor($c,$d); - &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) - &and($tmp1,$c); # tmp1 to hold F_00_19(b,c,d), b&=c^d - &xor($f,&swtmp(($n+8)%16)); - &xor($tmp1,$d); # tmp1=F_00_19(b,c,d) - &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd - &rotl($f,1); # f=ROTATE(f,1) - &add($e,$tmp1); # e+=F_00_19(b,c,d) - &xor($c,$d); # restore $c - &mov($tmp1,$a); # b in next round - &rotr($b,$n==16?2:7); # b=ROTATE(b,30) - &mov(&swtmp($n%16),$f); # xi=f - &rotl($a,5); # ROTATE(a,5) - &lea($f,&DWP(0x5a827999,$f,$e));# f+=F_00_19(b,c,d)+e - &mov($e,&swtmp(($n+1)%16)); # pre-fetch f for next round - &add($f,$a); # f+=ROTATE(a,5) -} else { - &mov($tmp1,$c); # tmp1 to hold F_00_19(b,c,d) - &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) - &xor($tmp1,$d); - &xor($f,&swtmp(($n+8)%16)); - &and($tmp1,$b); - &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd - &rotl($f,1); # f=ROTATE(f,1) - &xor($tmp1,$d); # tmp1=F_00_19(b,c,d) - &add($e,$tmp1); # e+=F_00_19(b,c,d) - &mov($tmp1,$a); - &rotr($b,2); # b=ROTATE(b,30) - &mov(&swtmp($n%16),$f); # xi=f - &rotl($tmp1,5); # ROTATE(a,5) - &lea($f,&DWP(0x5a827999,$f,$e));# f+=F_00_19(b,c,d)+e - &mov($e,&swtmp(($n+1)%16)); # pre-fetch f for next round - &add($f,$tmp1); # f+=ROTATE(a,5) -} - } - -sub BODY_20_39 - { - local($n,$a,$b,$c,$d,$e,$f)=@_; - local $K=($n<40)?0x6ed9eba1:0xca62c1d6; - - &comment("20_39 $n"); - -if ($alt) { - &xor($tmp1,$c); # tmp1 to hold F_20_39(b,c,d), b^=c - &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) - &xor($tmp1,$d); # tmp1 holds F_20_39(b,c,d) - &xor($f,&swtmp(($n+8)%16)); - &add($e,$tmp1); # e+=F_20_39(b,c,d) - &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd - &rotl($f,1); # f=ROTATE(f,1) - &mov($tmp1,$a); # b in next round - &rotr($b,7); # b=ROTATE(b,30) - &mov(&swtmp($n%16),$f) if($n<77);# xi=f - &rotl($a,5); # ROTATE(a,5) - &xor($b,$c) if($n==39);# warm up for BODY_40_59 - &and($tmp1,$b) if($n==39); - &lea($f,&DWP($K,$f,$e)); # f+=e+K_XX_YY - &mov($e,&swtmp(($n+1)%16)) if($n<79);# pre-fetch f for next round - &add($f,$a); # f+=ROTATE(a,5) - &rotr($a,5) if ($n==79); -} else { - &mov($tmp1,$b); # tmp1 to hold F_20_39(b,c,d) - &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) - &xor($tmp1,$c); - &xor($f,&swtmp(($n+8)%16)); - &xor($tmp1,$d); # tmp1 holds F_20_39(b,c,d) - &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd - &rotl($f,1); # f=ROTATE(f,1) - &add($e,$tmp1); # e+=F_20_39(b,c,d) - &rotr($b,2); # b=ROTATE(b,30) - &mov($tmp1,$a); - &rotl($tmp1,5); # ROTATE(a,5) - &mov(&swtmp($n%16),$f) if($n<77);# xi=f - &lea($f,&DWP($K,$f,$e)); # f+=e+K_XX_YY - &mov($e,&swtmp(($n+1)%16)) if($n<79);# pre-fetch f for next round - &add($f,$tmp1); # f+=ROTATE(a,5) -} - } - -sub BODY_40_59 - { - local($n,$a,$b,$c,$d,$e,$f)=@_; - - &comment("40_59 $n"); - -if ($alt) { - &add($e,$tmp1); # e+=b&(c^d) - &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) - &mov($tmp1,$d); - &xor($f,&swtmp(($n+8)%16)); - &xor($c,$d); # restore $c - &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd - &rotl($f,1); # f=ROTATE(f,1) - &and($tmp1,$c); - &rotr($b,7); # b=ROTATE(b,30) - &add($e,$tmp1); # e+=c&d - &mov($tmp1,$a); # b in next round - &mov(&swtmp($n%16),$f); # xi=f - &rotl($a,5); # ROTATE(a,5) - &xor($b,$c) if ($n<59); - &and($tmp1,$b) if ($n<59);# tmp1 to hold F_40_59(b,c,d) - &lea($f,&DWP(0x8f1bbcdc,$f,$e));# f+=K_40_59+e+(b&(c^d)) - &mov($e,&swtmp(($n+1)%16)); # pre-fetch f for next round - &add($f,$a); # f+=ROTATE(a,5) -} else { - &mov($tmp1,$c); # tmp1 to hold F_40_59(b,c,d) - &xor($f,&swtmp(($n+2)%16)); # f to hold Xupdate(xi,xa,xb,xc,xd) - &xor($tmp1,$d); - &xor($f,&swtmp(($n+8)%16)); - &and($tmp1,$b); - &xor($f,&swtmp(($n+13)%16)); # f holds xa^xb^xc^xd - &rotl($f,1); # f=ROTATE(f,1) - &add($tmp1,$e); # b&(c^d)+=e - &rotr($b,2); # b=ROTATE(b,30) - &mov($e,$a); # e becomes volatile - &rotl($e,5); # ROTATE(a,5) - &mov(&swtmp($n%16),$f); # xi=f - &lea($f,&DWP(0x8f1bbcdc,$f,$tmp1));# f+=K_40_59+e+(b&(c^d)) - &mov($tmp1,$c); - &add($f,$e); # f+=ROTATE(a,5) - &and($tmp1,$d); - &mov($e,&swtmp(($n+1)%16)); # pre-fetch f for next round - &add($f,$tmp1); # f+=c&d -} - } - -&function_begin("sha1_block_data_order"); -if ($xmm) { - &static_label("ssse3_shortcut"); - &static_label("avx_shortcut") if ($ymm); - &static_label("K_XX_XX"); - - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tmp1); - &picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point")); - &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); - - &mov ($A,&DWP(0,$T)); - &mov ($D,&DWP(4,$T)); - &test ($D,1<<9); # check SSSE3 bit - &jz (&label("x86")); - &test ($A,1<<24); # check FXSR bit - &jz (&label("x86")); - if ($ymm) { - &and ($D,1<<28); # mask AVX bit - &and ($A,1<<30); # mask "Intel CPU" bit - &or ($A,$D); - &cmp ($A,1<<28|1<<30); - &je (&label("avx_shortcut")); - } - &jmp (&label("ssse3_shortcut")); - &set_label("x86",16); -} - &mov($tmp1,&wparam(0)); # SHA_CTX *c - &mov($T,&wparam(1)); # const void *input - &mov($A,&wparam(2)); # size_t num - &stack_push(16+3); # allocate X[16] - &shl($A,6); - &add($A,$T); - &mov(&wparam(2),$A); # pointer beyond the end of input - &mov($E,&DWP(16,$tmp1));# pre-load E - &jmp(&label("loop")); - -&set_label("loop",16); - - # copy input chunk to X, but reversing byte order! - for ($i=0; $i<16; $i+=4) - { - &mov($A,&DWP(4*($i+0),$T)); - &mov($B,&DWP(4*($i+1),$T)); - &mov($C,&DWP(4*($i+2),$T)); - &mov($D,&DWP(4*($i+3),$T)); - &bswap($A); - &bswap($B); - &bswap($C); - &bswap($D); - &mov(&swtmp($i+0),$A); - &mov(&swtmp($i+1),$B); - &mov(&swtmp($i+2),$C); - &mov(&swtmp($i+3),$D); - } - &mov(&wparam(1),$T); # redundant in 1st spin - - &mov($A,&DWP(0,$tmp1)); # load SHA_CTX - &mov($B,&DWP(4,$tmp1)); - &mov($C,&DWP(8,$tmp1)); - &mov($D,&DWP(12,$tmp1)); - # E is pre-loaded - - for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } - for(;$i<20;$i++) { &BODY_16_19($i,@V); unshift(@V,pop(@V)); } - for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } - for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } - for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } - - (($V[5] eq $D) and ($V[0] eq $E)) or die; # double-check - - &mov($tmp1,&wparam(0)); # re-load SHA_CTX* - &mov($D,&wparam(1)); # D is last "T" and is discarded - - &add($E,&DWP(0,$tmp1)); # E is last "A"... - &add($T,&DWP(4,$tmp1)); - &add($A,&DWP(8,$tmp1)); - &add($B,&DWP(12,$tmp1)); - &add($C,&DWP(16,$tmp1)); - - &mov(&DWP(0,$tmp1),$E); # update SHA_CTX - &add($D,64); # advance input pointer - &mov(&DWP(4,$tmp1),$T); - &cmp($D,&wparam(2)); # have we reached the end yet? - &mov(&DWP(8,$tmp1),$A); - &mov($E,$C); # C is last "E" which needs to be "pre-loaded" - &mov(&DWP(12,$tmp1),$B); - &mov($T,$D); # input pointer - &mov(&DWP(16,$tmp1),$C); - &jb(&label("loop")); - - &stack_pop(16+3); -&function_end("sha1_block_data_order"); - -if ($xmm) { -###################################################################### -# The SSSE3 implementation. -# -# %xmm[0-7] are used as ring @X[] buffer containing quadruples of last -# 32 elements of the message schedule or Xupdate outputs. First 4 -# quadruples are simply byte-swapped input, next 4 are calculated -# according to method originally suggested by Dean Gaudet (modulo -# being implemented in SSSE3). Once 8 quadruples or 32 elements are -# collected, it switches to routine proposed by Max Locktyukhin. -# -# Calculations inevitably require temporary reqisters, and there are -# no %xmm registers left to spare. For this reason part of the ring -# buffer, X[2..4] to be specific, is offloaded to 3 quadriples ring -# buffer on the stack. Keep in mind that X[2] is alias X[-6], X[3] - -# X[-5], and X[4] - X[-4]... -# -# Another notable optimization is aggressive stack frame compression -# aiming to minimize amount of 9-byte instructions... -# -# Yet another notable optimization is "jumping" $B variable. It means -# that there is no register permanently allocated for $B value. This -# allowed to eliminate one instruction from body_20_39... -# -my $Xi=4; # 4xSIMD Xupdate round, start pre-seeded -my @X=map("xmm$_",(4..7,0..3)); # pre-seeded for $Xi=4 -my @V=($A,$B,$C,$D,$E); -my $j=0; # hash round -my @T=($T,$tmp1); -my $inp; - -my $_rol=sub { &rol(@_) }; -my $_ror=sub { &ror(@_) }; - -&function_begin("_sha1_block_data_order_ssse3"); - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tmp1); - &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); -&set_label("ssse3_shortcut"); - - &movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19 - &movdqa (@X[4],&QWP(16,$tmp1)); # K_20_39 - &movdqa (@X[5],&QWP(32,$tmp1)); # K_40_59 - &movdqa (@X[6],&QWP(48,$tmp1)); # K_60_79 - &movdqa (@X[2],&QWP(64,$tmp1)); # pbswap mask - - &mov ($E,&wparam(0)); # load argument block - &mov ($inp=@T[1],&wparam(1)); - &mov ($D,&wparam(2)); - &mov (@T[0],"esp"); - - # stack frame layout - # - # +0 X[0]+K X[1]+K X[2]+K X[3]+K # XMM->IALU xfer area - # X[4]+K X[5]+K X[6]+K X[7]+K - # X[8]+K X[9]+K X[10]+K X[11]+K - # X[12]+K X[13]+K X[14]+K X[15]+K - # - # +64 X[0] X[1] X[2] X[3] # XMM->XMM backtrace area - # X[4] X[5] X[6] X[7] - # X[8] X[9] X[10] X[11] # even borrowed for K_00_19 - # - # +112 K_20_39 K_20_39 K_20_39 K_20_39 # constants - # K_40_59 K_40_59 K_40_59 K_40_59 - # K_60_79 K_60_79 K_60_79 K_60_79 - # K_00_19 K_00_19 K_00_19 K_00_19 - # pbswap mask - # - # +192 ctx # argument block - # +196 inp - # +200 end - # +204 esp - &sub ("esp",208); - &and ("esp",-64); - - &movdqa (&QWP(112+0,"esp"),@X[4]); # copy constants - &movdqa (&QWP(112+16,"esp"),@X[5]); - &movdqa (&QWP(112+32,"esp"),@X[6]); - &shl ($D,6); # len*64 - &movdqa (&QWP(112+48,"esp"),@X[3]); - &add ($D,$inp); # end of input - &movdqa (&QWP(112+64,"esp"),@X[2]); - &add ($inp,64); - &mov (&DWP(192+0,"esp"),$E); # save argument block - &mov (&DWP(192+4,"esp"),$inp); - &mov (&DWP(192+8,"esp"),$D); - &mov (&DWP(192+12,"esp"),@T[0]); # save original %esp - - &mov ($A,&DWP(0,$E)); # load context - &mov ($B,&DWP(4,$E)); - &mov ($C,&DWP(8,$E)); - &mov ($D,&DWP(12,$E)); - &mov ($E,&DWP(16,$E)); - &mov (@T[0],$B); # magic seed - - &movdqu (@X[-4&7],&QWP(-64,$inp)); # load input to %xmm[0-3] - &movdqu (@X[-3&7],&QWP(-48,$inp)); - &movdqu (@X[-2&7],&QWP(-32,$inp)); - &movdqu (@X[-1&7],&QWP(-16,$inp)); - &pshufb (@X[-4&7],@X[2]); # byte swap - &pshufb (@X[-3&7],@X[2]); - &pshufb (@X[-2&7],@X[2]); - &movdqa (&QWP(112-16,"esp"),@X[3]); # borrow last backtrace slot - &pshufb (@X[-1&7],@X[2]); - &paddd (@X[-4&7],@X[3]); # add K_00_19 - &paddd (@X[-3&7],@X[3]); - &paddd (@X[-2&7],@X[3]); - &movdqa (&QWP(0,"esp"),@X[-4&7]); # X[]+K xfer to IALU - &psubd (@X[-4&7],@X[3]); # restore X[] - &movdqa (&QWP(0+16,"esp"),@X[-3&7]); - &psubd (@X[-3&7],@X[3]); - &movdqa (&QWP(0+32,"esp"),@X[-2&7]); - &psubd (@X[-2&7],@X[3]); - &movdqa (@X[0],@X[-3&7]); - &jmp (&label("loop")); - -###################################################################### -# SSE instruction sequence is first broken to groups of indepentent -# instructions, independent in respect to their inputs and shifter -# (not all architectures have more than one). Then IALU instructions -# are "knitted in" between the SSE groups. Distance is maintained for -# SSE latency of 2 in hope that it fits better upcoming AMD Bulldozer -# [which allegedly also implements SSSE3]... -# -# Temporary registers usage. X[2] is volatile at the entry and at the -# end is restored from backtrace ring buffer. X[3] is expected to -# contain current K_XX_XX constant and is used to caclulate X[-1]+K -# from previous round, it becomes volatile the moment the value is -# saved to stack for transfer to IALU. X[4] becomes volatile whenever -# X[-4] is accumulated and offloaded to backtrace ring buffer, at the -# end it is loaded with next K_XX_XX [which becomes X[3] in next -# round]... -# -sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4 -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 40 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - eval(shift(@insns)); - &palignr(@X[0],@X[-4&7],8); # compose "X[-14]" in "X[0]" - &movdqa (@X[2],@X[-1&7]); - eval(shift(@insns)); - eval(shift(@insns)); - - &paddd (@X[3],@X[-1&7]); - &movdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]);# save X[] to backtrace buffer - eval(shift(@insns)); - eval(shift(@insns)); - &psrldq (@X[2],4); # "X[-3]", 3 dwords - eval(shift(@insns)); - eval(shift(@insns)); - &pxor (@X[0],@X[-4&7]); # "X[0]"^="X[-16]" - eval(shift(@insns)); - eval(shift(@insns)); - - &pxor (@X[2],@X[-2&7]); # "X[-3]"^"X[-8]" - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &pxor (@X[0],@X[2]); # "X[0]"^="X[-3]"^"X[-8]" - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - - &movdqa (@X[4],@X[0]); - &movdqa (@X[2],@X[0]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &pslldq (@X[4],12); # "X[0]"<<96, extract one dword - &paddd (@X[0],@X[0]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &psrld (@X[2],31); - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (@X[3],@X[4]); - eval(shift(@insns)); - eval(shift(@insns)); - - &psrld (@X[4],30); - &por (@X[0],@X[2]); # "X[0]"<<<=1 - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if ($Xi>5); # restore X[] from backtrace buffer - eval(shift(@insns)); - eval(shift(@insns)); - - &pslld (@X[3],2); - &pxor (@X[0],@X[4]); - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (@X[4],&QWP(112-16+16*(($Xi)/5),"esp")); # K_XX_XX - eval(shift(@insns)); - eval(shift(@insns)); - - &pxor (@X[0],@X[3]); # "X[0]"^=("X[0]"<<96)<<<2 - &movdqa (@X[1],@X[-2&7]) if ($Xi<7); - eval(shift(@insns)); - eval(shift(@insns)); - - foreach (@insns) { eval; } # remaining instructions [if any] - - $Xi++; push(@X,shift(@X)); # "rotate" X[] -} - -sub Xupdate_ssse3_32_79() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions - my ($a,$b,$c,$d,$e); - - &movdqa (@X[2],@X[-1&7]) if ($Xi==8); - eval(shift(@insns)); # body_20_39 - &pxor (@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" - &palignr(@X[2],@X[-2&7],8); # compose "X[-6]" - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - - &pxor (@X[0],@X[-7&7]); # "X[0]"^="X[-28]" - &movdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]); # save X[] to backtrace buffer - eval(shift(@insns)); - eval(shift(@insns)); - if ($Xi%5) { - &movdqa (@X[4],@X[3]); # "perpetuate" K_XX_XX... - } else { # ... or load next one - &movdqa (@X[4],&QWP(112-16+16*($Xi/5),"esp")); - } - &paddd (@X[3],@X[-1&7]); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &pxor (@X[0],@X[2]); # "X[0]"^="X[-6]" - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - - &movdqa (@X[2],@X[0]); - &movdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &pslld (@X[0],2); - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - &psrld (@X[2],30); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &por (@X[0],@X[2]); # "X[0]"<<<=2 - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - &movdqa (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if($Xi<19); # restore X[] from backtrace buffer - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - &movdqa (@X[3],@X[0]) if ($Xi<19); - eval(shift(@insns)); - - foreach (@insns) { eval; } # remaining instructions - - $Xi++; push(@X,shift(@X)); # "rotate" X[] -} - -sub Xuplast_ssse3_80() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - &paddd (@X[3],@X[-1&7]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &movdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer IALU - - foreach (@insns) { eval; } # remaining instructions - - &mov ($inp=@T[1],&DWP(192+4,"esp")); - &cmp ($inp,&DWP(192+8,"esp")); - &je (&label("done")); - - &movdqa (@X[3],&QWP(112+48,"esp")); # K_00_19 - &movdqa (@X[2],&QWP(112+64,"esp")); # pbswap mask - &movdqu (@X[-4&7],&QWP(0,$inp)); # load input - &movdqu (@X[-3&7],&QWP(16,$inp)); - &movdqu (@X[-2&7],&QWP(32,$inp)); - &movdqu (@X[-1&7],&QWP(48,$inp)); - &add ($inp,64); - &pshufb (@X[-4&7],@X[2]); # byte swap - &mov (&DWP(192+4,"esp"),$inp); - &movdqa (&QWP(112-16,"esp"),@X[3]); # borrow last backtrace slot - - $Xi=0; -} - -sub Xloop_ssse3() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - eval(shift(@insns)); - &pshufb (@X[($Xi-3)&7],@X[2]); - eval(shift(@insns)); - eval(shift(@insns)); - &paddd (@X[($Xi-4)&7],@X[3]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (&QWP(0+16*$Xi,"esp"),@X[($Xi-4)&7]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - &psubd (@X[($Xi-4)&7],@X[3]); - - foreach (@insns) { eval; } - $Xi++; -} - -sub Xtail_ssse3() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - foreach (@insns) { eval; } -} - -sub body_00_19 () { - ( - '($a,$b,$c,$d,$e)=@V;'. - '&add ($e,&DWP(4*($j&15),"esp"));', # X[]+K xfer - '&xor ($c,$d);', - '&mov (@T[1],$a);', # $b in next round - '&$_rol ($a,5);', - '&and (@T[0],$c);', # ($b&($c^$d)) - '&xor ($c,$d);', # restore $c - '&xor (@T[0],$d);', - '&add ($e,$a);', - '&$_ror ($b,$j?7:2);', # $b>>>2 - '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' - ); -} - -sub body_20_39 () { - ( - '($a,$b,$c,$d,$e)=@V;'. - '&add ($e,&DWP(4*($j++&15),"esp"));', # X[]+K xfer - '&xor (@T[0],$d);', # ($b^$d) - '&mov (@T[1],$a);', # $b in next round - '&$_rol ($a,5);', - '&xor (@T[0],$c);', # ($b^$d^$c) - '&add ($e,$a);', - '&$_ror ($b,7);', # $b>>>2 - '&add ($e,@T[0]);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' - ); -} - -sub body_40_59 () { - ( - '($a,$b,$c,$d,$e)=@V;'. - '&mov (@T[1],$c);', - '&xor ($c,$d);', - '&add ($e,&DWP(4*($j++&15),"esp"));', # X[]+K xfer - '&and (@T[1],$d);', - '&and (@T[0],$c);', # ($b&($c^$d)) - '&$_ror ($b,7);', # $b>>>2 - '&add ($e,@T[1]);', - '&mov (@T[1],$a);', # $b in next round - '&$_rol ($a,5);', - '&add ($e,@T[0]);', - '&xor ($c,$d);', # restore $c - '&add ($e,$a);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' - ); -} - -&set_label("loop",16); - &Xupdate_ssse3_16_31(\&body_00_19); - &Xupdate_ssse3_16_31(\&body_00_19); - &Xupdate_ssse3_16_31(\&body_00_19); - &Xupdate_ssse3_16_31(\&body_00_19); - &Xupdate_ssse3_32_79(\&body_00_19); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xuplast_ssse3_80(\&body_20_39); # can jump to "done" - - $saved_j=$j; @saved_V=@V; - - &Xloop_ssse3(\&body_20_39); - &Xloop_ssse3(\&body_20_39); - &Xloop_ssse3(\&body_20_39); - - &mov (@T[1],&DWP(192,"esp")); # update context - &add ($A,&DWP(0,@T[1])); - &add (@T[0],&DWP(4,@T[1])); # $b - &add ($C,&DWP(8,@T[1])); - &mov (&DWP(0,@T[1]),$A); - &add ($D,&DWP(12,@T[1])); - &mov (&DWP(4,@T[1]),@T[0]); - &add ($E,&DWP(16,@T[1])); - &mov (&DWP(8,@T[1]),$C); - &mov ($B,@T[0]); - &mov (&DWP(12,@T[1]),$D); - &mov (&DWP(16,@T[1]),$E); - &movdqa (@X[0],@X[-3&7]); - - &jmp (&label("loop")); - -&set_label("done",16); $j=$saved_j; @V=@saved_V; - - &Xtail_ssse3(\&body_20_39); - &Xtail_ssse3(\&body_20_39); - &Xtail_ssse3(\&body_20_39); - - &mov (@T[1],&DWP(192,"esp")); # update context - &add ($A,&DWP(0,@T[1])); - &mov ("esp",&DWP(192+12,"esp")); # restore %esp - &add (@T[0],&DWP(4,@T[1])); # $b - &add ($C,&DWP(8,@T[1])); - &mov (&DWP(0,@T[1]),$A); - &add ($D,&DWP(12,@T[1])); - &mov (&DWP(4,@T[1]),@T[0]); - &add ($E,&DWP(16,@T[1])); - &mov (&DWP(8,@T[1]),$C); - &mov (&DWP(12,@T[1]),$D); - &mov (&DWP(16,@T[1]),$E); - -&function_end("_sha1_block_data_order_ssse3"); - -if ($ymm) { -my $Xi=4; # 4xSIMD Xupdate round, start pre-seeded -my @X=map("xmm$_",(4..7,0..3)); # pre-seeded for $Xi=4 -my @V=($A,$B,$C,$D,$E); -my $j=0; # hash round -my @T=($T,$tmp1); -my $inp; - -my $_rol=sub { &shld(@_[0],@_) }; -my $_ror=sub { &shrd(@_[0],@_) }; - -&function_begin("_sha1_block_data_order_avx"); - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tmp1); - &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); -&set_label("avx_shortcut"); - &vzeroall(); - - &vmovdqa(@X[3],&QWP(0,$tmp1)); # K_00_19 - &vmovdqa(@X[4],&QWP(16,$tmp1)); # K_20_39 - &vmovdqa(@X[5],&QWP(32,$tmp1)); # K_40_59 - &vmovdqa(@X[6],&QWP(48,$tmp1)); # K_60_79 - &vmovdqa(@X[2],&QWP(64,$tmp1)); # pbswap mask - - &mov ($E,&wparam(0)); # load argument block - &mov ($inp=@T[1],&wparam(1)); - &mov ($D,&wparam(2)); - &mov (@T[0],"esp"); - - # stack frame layout - # - # +0 X[0]+K X[1]+K X[2]+K X[3]+K # XMM->IALU xfer area - # X[4]+K X[5]+K X[6]+K X[7]+K - # X[8]+K X[9]+K X[10]+K X[11]+K - # X[12]+K X[13]+K X[14]+K X[15]+K - # - # +64 X[0] X[1] X[2] X[3] # XMM->XMM backtrace area - # X[4] X[5] X[6] X[7] - # X[8] X[9] X[10] X[11] # even borrowed for K_00_19 - # - # +112 K_20_39 K_20_39 K_20_39 K_20_39 # constants - # K_40_59 K_40_59 K_40_59 K_40_59 - # K_60_79 K_60_79 K_60_79 K_60_79 - # K_00_19 K_00_19 K_00_19 K_00_19 - # pbswap mask - # - # +192 ctx # argument block - # +196 inp - # +200 end - # +204 esp - &sub ("esp",208); - &and ("esp",-64); - - &vmovdqa(&QWP(112+0,"esp"),@X[4]); # copy constants - &vmovdqa(&QWP(112+16,"esp"),@X[5]); - &vmovdqa(&QWP(112+32,"esp"),@X[6]); - &shl ($D,6); # len*64 - &vmovdqa(&QWP(112+48,"esp"),@X[3]); - &add ($D,$inp); # end of input - &vmovdqa(&QWP(112+64,"esp"),@X[2]); - &add ($inp,64); - &mov (&DWP(192+0,"esp"),$E); # save argument block - &mov (&DWP(192+4,"esp"),$inp); - &mov (&DWP(192+8,"esp"),$D); - &mov (&DWP(192+12,"esp"),@T[0]); # save original %esp - - &mov ($A,&DWP(0,$E)); # load context - &mov ($B,&DWP(4,$E)); - &mov ($C,&DWP(8,$E)); - &mov ($D,&DWP(12,$E)); - &mov ($E,&DWP(16,$E)); - &mov (@T[0],$B); # magic seed - - &vmovdqu(@X[-4&7],&QWP(-64,$inp)); # load input to %xmm[0-3] - &vmovdqu(@X[-3&7],&QWP(-48,$inp)); - &vmovdqu(@X[-2&7],&QWP(-32,$inp)); - &vmovdqu(@X[-1&7],&QWP(-16,$inp)); - &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap - &vpshufb(@X[-3&7],@X[-3&7],@X[2]); - &vpshufb(@X[-2&7],@X[-2&7],@X[2]); - &vmovdqa(&QWP(112-16,"esp"),@X[3]); # borrow last backtrace slot - &vpshufb(@X[-1&7],@X[-1&7],@X[2]); - &vpaddd (@X[0],@X[-4&7],@X[3]); # add K_00_19 - &vpaddd (@X[1],@X[-3&7],@X[3]); - &vpaddd (@X[2],@X[-2&7],@X[3]); - &vmovdqa(&QWP(0,"esp"),@X[0]); # X[]+K xfer to IALU - &vmovdqa(&QWP(0+16,"esp"),@X[1]); - &vmovdqa(&QWP(0+32,"esp"),@X[2]); - &jmp (&label("loop")); - -sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4 -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 40 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - eval(shift(@insns)); - &vpalignr(@X[0],@X[-3&7],@X[-4&7],8); # compose "X[-14]" in "X[0]" - eval(shift(@insns)); - eval(shift(@insns)); - - &vpaddd (@X[3],@X[3],@X[-1&7]); - &vmovdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]);# save X[] to backtrace buffer - eval(shift(@insns)); - eval(shift(@insns)); - &vpsrldq(@X[2],@X[-1&7],4); # "X[-3]", 3 dwords - eval(shift(@insns)); - eval(shift(@insns)); - &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]" - eval(shift(@insns)); - eval(shift(@insns)); - - &vpxor (@X[2],@X[2],@X[-2&7]); # "X[-3]"^"X[-8]" - eval(shift(@insns)); - eval(shift(@insns)); - &vmovdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - - &vpxor (@X[0],@X[0],@X[2]); # "X[0]"^="X[-3]"^"X[-8]" - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpsrld (@X[2],@X[0],31); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpslldq(@X[4],@X[0],12); # "X[0]"<<96, extract one dword - &vpaddd (@X[0],@X[0],@X[0]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpsrld (@X[3],@X[4],30); - &vpor (@X[0],@X[0],@X[2]); # "X[0]"<<<=1 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpslld (@X[4],@X[4],2); - &vmovdqa (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if ($Xi>5); # restore X[] from backtrace buffer - eval(shift(@insns)); - eval(shift(@insns)); - &vpxor (@X[0],@X[0],@X[3]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpxor (@X[0],@X[0],@X[4]); # "X[0]"^=("X[0]"<<96)<<<2 - eval(shift(@insns)); - eval(shift(@insns)); - &vmovdqa (@X[4],&QWP(112-16+16*(($Xi)/5),"esp")); # K_XX_XX - eval(shift(@insns)); - eval(shift(@insns)); - - foreach (@insns) { eval; } # remaining instructions [if any] - - $Xi++; push(@X,shift(@X)); # "rotate" X[] -} - -sub Xupdate_avx_32_79() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions - my ($a,$b,$c,$d,$e); - - &vpalignr(@X[2],@X[-1&7],@X[-2&7],8); # compose "X[-6]" - &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - - &vpxor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]" - &vmovdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]); # save X[] to backtrace buffer - eval(shift(@insns)); - eval(shift(@insns)); - if ($Xi%5) { - &vmovdqa (@X[4],@X[3]); # "perpetuate" K_XX_XX... - } else { # ... or load next one - &vmovdqa (@X[4],&QWP(112-16+16*($Xi/5),"esp")); - } - &vpaddd (@X[3],@X[3],@X[-1&7]); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &vpxor (@X[0],@X[0],@X[2]); # "X[0]"^="X[-6]" - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - - &vpsrld (@X[2],@X[0],30); - &vmovdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &vpslld (@X[0],@X[0],2); - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &vpor (@X[0],@X[0],@X[2]); # "X[0]"<<<=2 - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - &vmovdqa (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if($Xi<19); # restore X[] from backtrace buffer - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - foreach (@insns) { eval; } # remaining instructions - - $Xi++; push(@X,shift(@X)); # "rotate" X[] -} - -sub Xuplast_avx_80() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - &vpaddd (@X[3],@X[3],@X[-1&7]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vmovdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer IALU - - foreach (@insns) { eval; } # remaining instructions - - &mov ($inp=@T[1],&DWP(192+4,"esp")); - &cmp ($inp,&DWP(192+8,"esp")); - &je (&label("done")); - - &vmovdqa(@X[3],&QWP(112+48,"esp")); # K_00_19 - &vmovdqa(@X[2],&QWP(112+64,"esp")); # pbswap mask - &vmovdqu(@X[-4&7],&QWP(0,$inp)); # load input - &vmovdqu(@X[-3&7],&QWP(16,$inp)); - &vmovdqu(@X[-2&7],&QWP(32,$inp)); - &vmovdqu(@X[-1&7],&QWP(48,$inp)); - &add ($inp,64); - &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap - &mov (&DWP(192+4,"esp"),$inp); - &vmovdqa(&QWP(112-16,"esp"),@X[3]); # borrow last backtrace slot - - $Xi=0; -} - -sub Xloop_avx() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - eval(shift(@insns)); - &vpshufb (@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]); - eval(shift(@insns)); - eval(shift(@insns)); - &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@X[3]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &vmovdqa (&QWP(0+16*$Xi,"esp"),@X[$Xi&7]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - - foreach (@insns) { eval; } - $Xi++; -} - -sub Xtail_avx() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - foreach (@insns) { eval; } -} - -&set_label("loop",16); - &Xupdate_avx_16_31(\&body_00_19); - &Xupdate_avx_16_31(\&body_00_19); - &Xupdate_avx_16_31(\&body_00_19); - &Xupdate_avx_16_31(\&body_00_19); - &Xupdate_avx_32_79(\&body_00_19); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_20_39); - &Xuplast_avx_80(\&body_20_39); # can jump to "done" - - $saved_j=$j; @saved_V=@V; - - &Xloop_avx(\&body_20_39); - &Xloop_avx(\&body_20_39); - &Xloop_avx(\&body_20_39); - - &mov (@T[1],&DWP(192,"esp")); # update context - &add ($A,&DWP(0,@T[1])); - &add (@T[0],&DWP(4,@T[1])); # $b - &add ($C,&DWP(8,@T[1])); - &mov (&DWP(0,@T[1]),$A); - &add ($D,&DWP(12,@T[1])); - &mov (&DWP(4,@T[1]),@T[0]); - &add ($E,&DWP(16,@T[1])); - &mov (&DWP(8,@T[1]),$C); - &mov ($B,@T[0]); - &mov (&DWP(12,@T[1]),$D); - &mov (&DWP(16,@T[1]),$E); - - &jmp (&label("loop")); - -&set_label("done",16); $j=$saved_j; @V=@saved_V; - - &Xtail_avx(\&body_20_39); - &Xtail_avx(\&body_20_39); - &Xtail_avx(\&body_20_39); - - &vzeroall(); - - &mov (@T[1],&DWP(192,"esp")); # update context - &add ($A,&DWP(0,@T[1])); - &mov ("esp",&DWP(192+12,"esp")); # restore %esp - &add (@T[0],&DWP(4,@T[1])); # $b - &add ($C,&DWP(8,@T[1])); - &mov (&DWP(0,@T[1]),$A); - &add ($D,&DWP(12,@T[1])); - &mov (&DWP(4,@T[1]),@T[0]); - &add ($E,&DWP(16,@T[1])); - &mov (&DWP(8,@T[1]),$C); - &mov (&DWP(12,@T[1]),$D); - &mov (&DWP(16,@T[1]),$E); -&function_end("_sha1_block_data_order_avx"); -} -&set_label("K_XX_XX",64); -&data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19 -&data_word(0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1); # K_20_39 -&data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc); # K_40_59 -&data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6); # K_60_79 -&data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f); # pbswap mask -} -&asciz("SHA1 block transform for x86, CRYPTOGAMS by "); - -&asm_finish(); diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha1-alpha.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha1-alpha.pl deleted file mode 100644 index 6c4b9251fd..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha1-alpha.pl +++ /dev/null @@ -1,322 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# SHA1 block procedure for Alpha. - -# On 21264 performance is 33% better than code generated by vendor -# compiler, and 75% better than GCC [3.4], and in absolute terms is -# 8.7 cycles per processed byte. Implementation features vectorized -# byte swap, but not Xupdate. - -@X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7", - "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15"); -$ctx="a0"; # $16 -$inp="a1"; -$num="a2"; -$A="a3"; -$B="a4"; # 20 -$C="a5"; -$D="t8"; -$E="t9"; @V=($A,$B,$C,$D,$E); -$t0="t10"; # 24 -$t1="t11"; -$t2="ra"; -$t3="t12"; -$K="AT"; # 28 - -sub BODY_00_19 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -$code.=<<___ if ($i==0); - ldq_u @X[0],0+0($inp) - ldq_u @X[1],0+7($inp) -___ -$code.=<<___ if (!($i&1) && $i<14); - ldq_u @X[$i+2],($i+2)*4+0($inp) - ldq_u @X[$i+3],($i+2)*4+7($inp) -___ -$code.=<<___ if (!($i&1) && $i<15); - extql @X[$i],$inp,@X[$i] - extqh @X[$i+1],$inp,@X[$i+1] - - or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched - - srl @X[$i],24,$t0 # vectorized byte swap - srl @X[$i],8,$t2 - - sll @X[$i],8,$t3 - sll @X[$i],24,@X[$i] - zapnot $t0,0x11,$t0 - zapnot $t2,0x22,$t2 - - zapnot @X[$i],0x88,@X[$i] - or $t0,$t2,$t0 - zapnot $t3,0x44,$t3 - sll $a,5,$t1 - - or @X[$i],$t0,@X[$i] - addl $K,$e,$e - and $b,$c,$t2 - zapnot $a,0xf,$a - - or @X[$i],$t3,@X[$i] - srl $a,27,$t0 - bic $d,$b,$t3 - sll $b,30,$b - - extll @X[$i],4,@X[$i+1] # extract upper half - or $t2,$t3,$t2 - addl @X[$i],$e,$e - - addl $t1,$e,$e - srl $b,32,$t3 - zapnot @X[$i],0xf,@X[$i] - - addl $t0,$e,$e - addl $t2,$e,$e - or $t3,$b,$b -___ -$code.=<<___ if (($i&1) && $i<15); - sll $a,5,$t1 - addl $K,$e,$e - and $b,$c,$t2 - zapnot $a,0xf,$a - - srl $a,27,$t0 - addl @X[$i%16],$e,$e - bic $d,$b,$t3 - sll $b,30,$b - - or $t2,$t3,$t2 - addl $t1,$e,$e - srl $b,32,$t3 - zapnot @X[$i],0xf,@X[$i] - - addl $t0,$e,$e - addl $t2,$e,$e - or $t3,$b,$b -___ -$code.=<<___ if ($i>=15); # with forward Xupdate - sll $a,5,$t1 - addl $K,$e,$e - and $b,$c,$t2 - xor @X[($j+2)%16],@X[$j%16],@X[$j%16] - - zapnot $a,0xf,$a - addl @X[$i%16],$e,$e - bic $d,$b,$t3 - xor @X[($j+8)%16],@X[$j%16],@X[$j%16] - - srl $a,27,$t0 - addl $t1,$e,$e - or $t2,$t3,$t2 - xor @X[($j+13)%16],@X[$j%16],@X[$j%16] - - sll $b,30,$b - addl $t0,$e,$e - srl @X[$j%16],31,$t1 - - addl $t2,$e,$e - srl $b,32,$t3 - addl @X[$j%16],@X[$j%16],@X[$j%16] - - or $t3,$b,$b - zapnot @X[$i%16],0xf,@X[$i%16] - or $t1,@X[$j%16],@X[$j%16] -___ -} - -sub BODY_20_39 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -$code.=<<___ if ($i<79); # with forward Xupdate - sll $a,5,$t1 - addl $K,$e,$e - zapnot $a,0xf,$a - xor @X[($j+2)%16],@X[$j%16],@X[$j%16] - - sll $b,30,$t3 - addl $t1,$e,$e - xor $b,$c,$t2 - xor @X[($j+8)%16],@X[$j%16],@X[$j%16] - - srl $b,2,$b - addl @X[$i%16],$e,$e - xor $d,$t2,$t2 - xor @X[($j+13)%16],@X[$j%16],@X[$j%16] - - srl @X[$j%16],31,$t1 - addl $t2,$e,$e - srl $a,27,$t0 - addl @X[$j%16],@X[$j%16],@X[$j%16] - - or $t3,$b,$b - addl $t0,$e,$e - or $t1,@X[$j%16],@X[$j%16] -___ -$code.=<<___ if ($i<77); - zapnot @X[$i%16],0xf,@X[$i%16] -___ -$code.=<<___ if ($i==79); # with context fetch - sll $a,5,$t1 - addl $K,$e,$e - zapnot $a,0xf,$a - ldl @X[0],0($ctx) - - sll $b,30,$t3 - addl $t1,$e,$e - xor $b,$c,$t2 - ldl @X[1],4($ctx) - - srl $b,2,$b - addl @X[$i%16],$e,$e - xor $d,$t2,$t2 - ldl @X[2],8($ctx) - - srl $a,27,$t0 - addl $t2,$e,$e - ldl @X[3],12($ctx) - - or $t3,$b,$b - addl $t0,$e,$e - ldl @X[4],16($ctx) -___ -} - -sub BODY_40_59 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -$code.=<<___; # with forward Xupdate - sll $a,5,$t1 - addl $K,$e,$e - zapnot $a,0xf,$a - xor @X[($j+2)%16],@X[$j%16],@X[$j%16] - - srl $a,27,$t0 - and $b,$c,$t2 - and $b,$d,$t3 - xor @X[($j+8)%16],@X[$j%16],@X[$j%16] - - sll $b,30,$b - addl $t1,$e,$e - xor @X[($j+13)%16],@X[$j%16],@X[$j%16] - - srl @X[$j%16],31,$t1 - addl $t0,$e,$e - or $t2,$t3,$t2 - and $c,$d,$t3 - - or $t2,$t3,$t2 - srl $b,32,$t3 - addl @X[$i%16],$e,$e - addl @X[$j%16],@X[$j%16],@X[$j%16] - - or $t3,$b,$b - addl $t2,$e,$e - or $t1,@X[$j%16],@X[$j%16] - zapnot @X[$i%16],0xf,@X[$i%16] -___ -} - -$code=<<___; -#ifdef __linux__ -#include -#else -#include -#include -#endif - -.text - -.set noat -.set noreorder -.globl sha1_block_data_order -.align 5 -.ent sha1_block_data_order -sha1_block_data_order: - lda sp,-64(sp) - stq ra,0(sp) - stq s0,8(sp) - stq s1,16(sp) - stq s2,24(sp) - stq s3,32(sp) - stq s4,40(sp) - stq s5,48(sp) - stq fp,56(sp) - .mask 0x0400fe00,-64 - .frame sp,64,ra - .prologue 0 - - ldl $A,0($ctx) - ldl $B,4($ctx) - sll $num,6,$num - ldl $C,8($ctx) - ldl $D,12($ctx) - ldl $E,16($ctx) - addq $inp,$num,$num - -.Lloop: - .set noreorder - ldah $K,23170(zero) - zapnot $B,0xf,$B - lda $K,31129($K) # K_00_19 -___ -for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } - -$code.=<<___; - ldah $K,28378(zero) - lda $K,-5215($K) # K_20_39 -___ -for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } - -$code.=<<___; - ldah $K,-28900(zero) - lda $K,-17188($K) # K_40_59 -___ -for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } - -$code.=<<___; - ldah $K,-13725(zero) - lda $K,-15914($K) # K_60_79 -___ -for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } - -$code.=<<___; - addl @X[0],$A,$A - addl @X[1],$B,$B - addl @X[2],$C,$C - addl @X[3],$D,$D - addl @X[4],$E,$E - stl $A,0($ctx) - stl $B,4($ctx) - addq $inp,64,$inp - stl $C,8($ctx) - stl $D,12($ctx) - stl $E,16($ctx) - cmpult $inp,$num,$t1 - bne $t1,.Lloop - - .set noreorder - ldq ra,0(sp) - ldq s0,8(sp) - ldq s1,16(sp) - ldq s2,24(sp) - ldq s3,32(sp) - ldq s4,40(sp) - ldq s5,48(sp) - ldq fp,56(sp) - lda sp,64(sp) - ret (ra) -.end sha1_block_data_order -.ascii "SHA1 block transform for Alpha, CRYPTOGAMS by " -.align 2 -___ -$output=shift and open STDOUT,">$output"; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha1-armv4-large.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha1-armv4-large.pl deleted file mode 100644 index 33da3e0e3c..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha1-armv4-large.pl +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# sha1_block procedure for ARMv4. -# -# January 2007. - -# Size/performance trade-off -# ==================================================================== -# impl size in bytes comp cycles[*] measured performance -# ==================================================================== -# thumb 304 3212 4420 -# armv4-small 392/+29% 1958/+64% 2250/+96% -# armv4-compact 740/+89% 1552/+26% 1840/+22% -# armv4-large 1420/+92% 1307/+19% 1370/+34%[***] -# full unroll ~5100/+260% ~1260/+4% ~1300/+5% -# ==================================================================== -# thumb = same as 'small' but in Thumb instructions[**] and -# with recurring code in two private functions; -# small = detached Xload/update, loops are folded; -# compact = detached Xload/update, 5x unroll; -# large = interleaved Xload/update, 5x unroll; -# full unroll = interleaved Xload/update, full unroll, estimated[!]; -# -# [*] Manually counted instructions in "grand" loop body. Measured -# performance is affected by prologue and epilogue overhead, -# i-cache availability, branch penalties, etc. -# [**] While each Thumb instruction is twice smaller, they are not as -# diverse as ARM ones: e.g., there are only two arithmetic -# instructions with 3 arguments, no [fixed] rotate, addressing -# modes are limited. As result it takes more instructions to do -# the same job in Thumb, therefore the code is never twice as -# small and always slower. -# [***] which is also ~35% better than compiler generated code. Dual- -# issue Cortex A8 core was measured to process input block in -# ~990 cycles. - -# August 2010. -# -# Rescheduling for dual-issue pipeline resulted in 13% improvement on -# Cortex A8 core and in absolute terms ~870 cycles per input block -# [or 13.6 cycles per byte]. - -# February 2011. -# -# Profiler-assisted and platform-specific optimization resulted in 10% -# improvement on Cortex A8 core and 12.2 cycles per byte. - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -$ctx="r0"; -$inp="r1"; -$len="r2"; -$a="r3"; -$b="r4"; -$c="r5"; -$d="r6"; -$e="r7"; -$K="r8"; -$t0="r9"; -$t1="r10"; -$t2="r11"; -$t3="r12"; -$Xi="r14"; -@V=($a,$b,$c,$d,$e); - -sub Xupdate { -my ($a,$b,$c,$d,$e,$opt1,$opt2)=@_; -$code.=<<___; - ldr $t0,[$Xi,#15*4] - ldr $t1,[$Xi,#13*4] - ldr $t2,[$Xi,#7*4] - add $e,$K,$e,ror#2 @ E+=K_xx_xx - ldr $t3,[$Xi,#2*4] - eor $t0,$t0,$t1 - eor $t2,$t2,$t3 @ 1 cycle stall - eor $t1,$c,$d @ F_xx_xx - mov $t0,$t0,ror#31 - add $e,$e,$a,ror#27 @ E+=ROR(A,27) - eor $t0,$t0,$t2,ror#31 - str $t0,[$Xi,#-4]! - $opt1 @ F_xx_xx - $opt2 @ F_xx_xx - add $e,$e,$t0 @ E+=X[i] -___ -} - -sub BODY_00_15 { -my ($a,$b,$c,$d,$e)=@_; -$code.=<<___; -#if __ARM_ARCH__<7 - ldrb $t1,[$inp,#2] - ldrb $t0,[$inp,#3] - ldrb $t2,[$inp,#1] - add $e,$K,$e,ror#2 @ E+=K_00_19 - ldrb $t3,[$inp],#4 - orr $t0,$t0,$t1,lsl#8 - eor $t1,$c,$d @ F_xx_xx - orr $t0,$t0,$t2,lsl#16 - add $e,$e,$a,ror#27 @ E+=ROR(A,27) - orr $t0,$t0,$t3,lsl#24 -#else - ldr $t0,[$inp],#4 @ handles unaligned - add $e,$K,$e,ror#2 @ E+=K_00_19 - eor $t1,$c,$d @ F_xx_xx - add $e,$e,$a,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev $t0,$t0 @ byte swap -#endif -#endif - and $t1,$b,$t1,ror#2 - add $e,$e,$t0 @ E+=X[i] - eor $t1,$t1,$d,ror#2 @ F_00_19(B,C,D) - str $t0,[$Xi,#-4]! - add $e,$e,$t1 @ E+=F_00_19(B,C,D) -___ -} - -sub BODY_16_19 { -my ($a,$b,$c,$d,$e)=@_; - &Xupdate(@_,"and $t1,$b,$t1,ror#2"); -$code.=<<___; - eor $t1,$t1,$d,ror#2 @ F_00_19(B,C,D) - add $e,$e,$t1 @ E+=F_00_19(B,C,D) -___ -} - -sub BODY_20_39 { -my ($a,$b,$c,$d,$e)=@_; - &Xupdate(@_,"eor $t1,$b,$t1,ror#2"); -$code.=<<___; - add $e,$e,$t1 @ E+=F_20_39(B,C,D) -___ -} - -sub BODY_40_59 { -my ($a,$b,$c,$d,$e)=@_; - &Xupdate(@_,"and $t1,$b,$t1,ror#2","and $t2,$c,$d"); -$code.=<<___; - add $e,$e,$t1 @ E+=F_40_59(B,C,D) - add $e,$e,$t2,ror#2 -___ -} - -$code=<<___; -#include "arm_arch.h" - -.text - -.global sha1_block_data_order -.type sha1_block_data_order,%function - -.align 2 -sha1_block_data_order: - stmdb sp!,{r4-r12,lr} - add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp - ldmia $ctx,{$a,$b,$c,$d,$e} -.Lloop: - ldr $K,.LK_00_19 - mov $Xi,sp - sub sp,sp,#15*4 - mov $c,$c,ror#30 - mov $d,$d,ror#30 - mov $e,$e,ror#30 @ [6] -.L_00_15: -___ -for($i=0;$i<5;$i++) { - &BODY_00_15(@V); unshift(@V,pop(@V)); -} -$code.=<<___; - teq $Xi,sp - bne .L_00_15 @ [((11+4)*5+2)*3] - sub sp,sp,#25*4 -___ - &BODY_00_15(@V); unshift(@V,pop(@V)); - &BODY_16_19(@V); unshift(@V,pop(@V)); - &BODY_16_19(@V); unshift(@V,pop(@V)); - &BODY_16_19(@V); unshift(@V,pop(@V)); - &BODY_16_19(@V); unshift(@V,pop(@V)); -$code.=<<___; - - ldr $K,.LK_20_39 @ [+15+16*4] - cmn sp,#0 @ [+3], clear carry to denote 20_39 -.L_20_39_or_60_79: -___ -for($i=0;$i<5;$i++) { - &BODY_20_39(@V); unshift(@V,pop(@V)); -} -$code.=<<___; - teq $Xi,sp @ preserve carry - bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] - bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes - - ldr $K,.LK_40_59 - sub sp,sp,#20*4 @ [+2] -.L_40_59: -___ -for($i=0;$i<5;$i++) { - &BODY_40_59(@V); unshift(@V,pop(@V)); -} -$code.=<<___; - teq $Xi,sp - bne .L_40_59 @ [+((12+5)*5+2)*4] - - ldr $K,.LK_60_79 - sub sp,sp,#20*4 - cmp sp,#0 @ set carry to denote 60_79 - b .L_20_39_or_60_79 @ [+4], spare 300 bytes -.L_done: - add sp,sp,#80*4 @ "deallocate" stack frame - ldmia $ctx,{$K,$t0,$t1,$t2,$t3} - add $a,$K,$a - add $b,$t0,$b - add $c,$t1,$c,ror#2 - add $d,$t2,$d,ror#2 - add $e,$t3,$e,ror#2 - stmia $ctx,{$a,$b,$c,$d,$e} - teq $inp,$len - bne .Lloop @ [+18], total 1307 - -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r12,pc} -#else - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -#endif -.align 2 -.LK_00_19: .word 0x5a827999 -.LK_20_39: .word 0x6ed9eba1 -.LK_40_59: .word 0x8f1bbcdc -.LK_60_79: .word 0xca62c1d6 -.size sha1_block_data_order,.-sha1_block_data_order -.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by " -.align 2 -___ - -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; -close STDOUT; # enforce flush diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha1-ia64.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha1-ia64.pl deleted file mode 100644 index 02d35d1614..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha1-ia64.pl +++ /dev/null @@ -1,305 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# Eternal question is what's wrong with compiler generated code? The -# trick is that it's possible to reduce the number of shifts required -# to perform rotations by maintaining copy of 32-bit value in upper -# bits of 64-bit register. Just follow mux2 and shrp instructions... -# Performance under big-endian OS such as HP-UX is 179MBps*1GHz, which -# is >50% better than HP C and >2x better than gcc. - -$code=<<___; -.ident \"sha1-ia64.s, version 1.3\" -.ident \"IA-64 ISA artwork by Andy Polyakov \" -.explicit - -___ - - -if ($^O eq "hpux") { - $ADDP="addp4"; - for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); } -} else { $ADDP="add"; } - -#$human=1; -if ($human) { # useful for visual code auditing... - ($A,$B,$C,$D,$E) = ("A","B","C","D","E"); - ($h0,$h1,$h2,$h3,$h4) = ("h0","h1","h2","h3","h4"); - ($K_00_19, $K_20_39, $K_40_59, $K_60_79) = - ( "K_00_19","K_20_39","K_40_59","K_60_79" ); - @X= ( "X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7", - "X8", "X9","X10","X11","X12","X13","X14","X15" ); -} -else { - ($A,$B,$C,$D,$E) = ("loc0","loc1","loc2","loc3","loc4"); - ($h0,$h1,$h2,$h3,$h4) = ("loc5","loc6","loc7","loc8","loc9"); - ($K_00_19, $K_20_39, $K_40_59, $K_60_79) = - ( "r14", "r15", "loc10", "loc11" ); - @X= ( "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", - "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" ); -} - -sub BODY_00_15 { -local *code=shift; -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -my $Xn=@X[$j%16]; - -$code.=<<___ if ($i==0); -{ .mmi; ld1 $X[$i]=[inp],2 // MSB - ld1 tmp2=[tmp3],2 };; -{ .mmi; ld1 tmp0=[inp],2 - ld1 tmp4=[tmp3],2 // LSB - dep $X[$i]=$X[$i],tmp2,8,8 };; -___ -if ($i<15) { - $code.=<<___; -{ .mmi; ld1 $Xn=[inp],2 // forward Xload - nop.m 0x0 - dep tmp1=tmp0,tmp4,8,8 };; -{ .mmi; ld1 tmp2=[tmp3],2 // forward Xload - and tmp4=$c,$b - dep $X[$i]=$X[$i],tmp1,16,16} //;; -{ .mmi; add $e=$e,$K_00_19 // e+=K_00_19 - andcm tmp1=$d,$b - dep.z tmp5=$a,5,27 };; // a<<5 -{ .mmi; add $e=$e,$X[$i] // e+=Xload - or tmp4=tmp4,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) - extr.u tmp1=$a,27,5 };; // a>>27 -{ .mmi; ld1 tmp0=[inp],2 // forward Xload - add $e=$e,tmp4 // e+=F_00_19(b,c,d) - shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) -{ .mmi; ld1 tmp4=[tmp3],2 // forward Xload - or tmp5=tmp1,tmp5 // ROTATE(a,5) - mux2 tmp6=$a,0x44 };; // see b in next iteration -{ .mii; add $e=$e,tmp5 // e+=ROTATE(a,5) - dep $Xn=$Xn,tmp2,8,8 // forward Xload - mux2 $X[$i]=$X[$i],0x44 } //;; - -___ - } -else { - $code.=<<___; -{ .mii; and tmp3=$c,$b - dep tmp1=tmp0,tmp4,8,8;; - dep $X[$i]=$X[$i],tmp1,16,16} //;; -{ .mmi; add $e=$e,$K_00_19 // e+=K_00_19 - andcm tmp1=$d,$b - dep.z tmp5=$a,5,27 };; // a<<5 -{ .mmi; add $e=$e,$X[$i] // e+=Xupdate - or tmp4=tmp3,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) - extr.u tmp1=$a,27,5 } // a>>27 -{ .mmi; xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate - xor tmp3=$X[($j+8)%16],$X[($j+13)%16] // forward Xupdate - nop.i 0 };; -{ .mmi; add $e=$e,tmp4 // e+=F_00_19(b,c,d) - xor $Xn=$Xn,tmp3 // forward Xupdate - shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) -{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) - mux2 tmp6=$a,0x44 };; // see b in next iteration -{ .mii; add $e=$e,tmp1 // e+=ROTATE(a,5) - shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1) - mux2 $X[$i]=$X[$i],0x44 };; - -___ - } -} - -sub BODY_16_19 { -local *code=shift; -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -my $Xn=@X[$j%16]; - -$code.=<<___; -{ .mib; add $e=$e,$K_00_19 // e+=K_00_19 - dep.z tmp5=$a,5,27 } // a<<5 -{ .mib; andcm tmp1=$d,$b - and tmp0=$c,$b };; -{ .mmi; add $e=$e,$X[$i%16] // e+=Xupdate - or tmp0=tmp0,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) - extr.u tmp1=$a,27,5 } // a>>27 -{ .mmi; xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate - xor tmp3=$X[($j+8)%16],$X[($j+13)%16] // forward Xupdate - nop.i 0 };; -{ .mmi; add $e=$e,tmp0 // f+=F_00_19(b,c,d) - xor $Xn=$Xn,tmp3 // forward Xupdate - shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) -{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) - mux2 tmp6=$a,0x44 };; // see b in next iteration -{ .mii; add $e=$e,tmp1 // e+=ROTATE(a,5) - shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1) - nop.i 0 };; - -___ -} - -sub BODY_20_39 { -local *code=shift; -my ($i,$a,$b,$c,$d,$e,$Konst)=@_; - $Konst = $K_20_39 if (!defined($Konst)); -my $j=$i+1; -my $Xn=@X[$j%16]; - -if ($i<79) { -$code.=<<___; -{ .mib; add $e=$e,$Konst // e+=K_XX_XX - dep.z tmp5=$a,5,27 } // a<<5 -{ .mib; xor tmp0=$c,$b - xor $Xn=$Xn,$X[($j+2)%16] };; // forward Xupdate -{ .mib; add $e=$e,$X[$i%16] // e+=Xupdate - extr.u tmp1=$a,27,5 } // a>>27 -{ .mib; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d - xor $Xn=$Xn,$X[($j+8)%16] };; // forward Xupdate -{ .mmi; add $e=$e,tmp0 // e+=F_20_39(b,c,d) - xor $Xn=$Xn,$X[($j+13)%16] // forward Xupdate - shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) -{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) - mux2 tmp6=$a,0x44 };; // see b in next iteration -{ .mii; add $e=$e,tmp1 // e+=ROTATE(a,5) - shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1) - nop.i 0 };; - -___ -} -else { -$code.=<<___; -{ .mib; add $e=$e,$Konst // e+=K_60_79 - dep.z tmp5=$a,5,27 } // a<<5 -{ .mib; xor tmp0=$c,$b - add $h1=$h1,$a };; // wrap up -{ .mib; add $e=$e,$X[$i%16] // e+=Xupdate - extr.u tmp1=$a,27,5 } // a>>27 -{ .mib; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d - add $h3=$h3,$c };; // wrap up -{ .mmi; add $e=$e,tmp0 // e+=F_20_39(b,c,d) - or tmp1=tmp1,tmp5 // ROTATE(a,5) - shrp $b=tmp6,tmp6,2 };; // b=ROTATE(b,30) ;;? -{ .mmi; add $e=$e,tmp1 // e+=ROTATE(a,5) - add tmp3=1,inp // used in unaligned codepath - add $h4=$h4,$d };; // wrap up - -___ -} -} - -sub BODY_40_59 { -local *code=shift; -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -my $Xn=@X[$j%16]; - -$code.=<<___; -{ .mib; add $e=$e,$K_40_59 // e+=K_40_59 - dep.z tmp5=$a,5,27 } // a<<5 -{ .mib; and tmp1=$c,$d - xor tmp0=$c,$d };; -{ .mmi; add $e=$e,$X[$i%16] // e+=Xupdate - add tmp5=tmp5,tmp1 // a<<5+(c&d) - extr.u tmp1=$a,27,5 } // a>>27 -{ .mmi; and tmp0=tmp0,$b - xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate - xor tmp3=$X[($j+8)%16],$X[($j+13)%16] };; // forward Xupdate -{ .mmi; add $e=$e,tmp0 // e+=b&(c^d) - add tmp5=tmp5,tmp1 // ROTATE(a,5)+(c&d) - shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) -{ .mmi; xor $Xn=$Xn,tmp3 - mux2 tmp6=$a,0x44 };; // see b in next iteration -{ .mii; add $e=$e,tmp5 // e+=ROTATE(a,5)+(c&d) - shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1) - nop.i 0x0 };; - -___ -} -sub BODY_60_79 { &BODY_20_39(@_,$K_60_79); } - -$code.=<<___; -.text - -tmp0=r8; -tmp1=r9; -tmp2=r10; -tmp3=r11; -ctx=r32; // in0 -inp=r33; // in1 - -// void sha1_block_data_order(SHA_CTX *c,const void *p,size_t num); -.global sha1_block_data_order# -.proc sha1_block_data_order# -.align 32 -sha1_block_data_order: - .prologue -{ .mmi; alloc tmp1=ar.pfs,3,14,0,0 - $ADDP tmp0=4,ctx - .save ar.lc,r3 - mov r3=ar.lc } -{ .mmi; $ADDP ctx=0,ctx - $ADDP inp=0,inp - mov r2=pr };; -tmp4=in2; -tmp5=loc12; -tmp6=loc13; - .body -{ .mlx; ld4 $h0=[ctx],8 - movl $K_00_19=0x5a827999 } -{ .mlx; ld4 $h1=[tmp0],8 - movl $K_20_39=0x6ed9eba1 };; -{ .mlx; ld4 $h2=[ctx],8 - movl $K_40_59=0x8f1bbcdc } -{ .mlx; ld4 $h3=[tmp0] - movl $K_60_79=0xca62c1d6 };; -{ .mmi; ld4 $h4=[ctx],-16 - add in2=-1,in2 // adjust num for ar.lc - mov ar.ec=1 };; -{ .mmi; nop.m 0 - add tmp3=1,inp - mov ar.lc=in2 };; // brp.loop.imp: too far - -.Ldtop: -{ .mmi; mov $A=$h0 - mov $B=$h1 - mux2 tmp6=$h1,0x44 } -{ .mmi; mov $C=$h2 - mov $D=$h3 - mov $E=$h4 };; - -___ - -{ my $i; - my @V=($A,$B,$C,$D,$E); - - for($i=0;$i<16;$i++) { &BODY_00_15(\$code,$i,@V); unshift(@V,pop(@V)); } - for(;$i<20;$i++) { &BODY_16_19(\$code,$i,@V); unshift(@V,pop(@V)); } - for(;$i<40;$i++) { &BODY_20_39(\$code,$i,@V); unshift(@V,pop(@V)); } - for(;$i<60;$i++) { &BODY_40_59(\$code,$i,@V); unshift(@V,pop(@V)); } - for(;$i<80;$i++) { &BODY_60_79(\$code,$i,@V); unshift(@V,pop(@V)); } - - (($V[0] eq $A) and ($V[4] eq $E)) or die; # double-check -} - -$code.=<<___; -{ .mmb; add $h0=$h0,$A - add $h2=$h2,$C - br.ctop.dptk.many .Ldtop };; -.Ldend: -{ .mmi; add tmp0=4,ctx - mov ar.lc=r3 };; -{ .mmi; st4 [ctx]=$h0,8 - st4 [tmp0]=$h1,8 };; -{ .mmi; st4 [ctx]=$h2,8 - st4 [tmp0]=$h3 };; -{ .mib; st4 [ctx]=$h4,-16 - mov pr=r2,0x1ffff - br.ret.sptk.many b0 };; -.endp sha1_block_data_order# -stringz "SHA1 block transform for IA64, CRYPTOGAMS by " -___ - -$output=shift and open STDOUT,">$output"; -print $code; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha1-mips.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha1-mips.pl deleted file mode 100644 index 197bc6b50e..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha1-mips.pl +++ /dev/null @@ -1,354 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# SHA1 block procedure for MIPS. - -# Performance improvement is 30% on unaligned input. The "secret" is -# to deploy lwl/lwr pair to load unaligned input. One could have -# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32- -# compatible subroutine. There is room for minor optimization on -# little-endian platforms... - -###################################################################### -# There is a number of MIPS ABI in use, O32 and N32/64 are most -# widely used. Then there is a new contender: NUBI. It appears that if -# one picks the latter, it's possible to arrange code in ABI neutral -# manner. Therefore let's stick to NUBI register layout: -# -($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); -($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); -($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); -# -# The return value is placed in $a0. Following coding rules facilitate -# interoperability: -# -# - never ever touch $tp, "thread pointer", former $gp; -# - copy return value to $t0, former $v0 [or to $a0 if you're adapting -# old code]; -# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; -# -# For reference here is register layout for N32/64 MIPS ABIs: -# -# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); -# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); -# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); -# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); -# -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 - -if ($flavour =~ /64|n32/i) { - $PTR_ADD="dadd"; # incidentally works even on n32 - $PTR_SUB="dsub"; # incidentally works even on n32 - $REG_S="sd"; - $REG_L="ld"; - $PTR_SLL="dsll"; # incidentally works even on n32 - $SZREG=8; -} else { - $PTR_ADD="add"; - $PTR_SUB="sub"; - $REG_S="sw"; - $REG_L="lw"; - $PTR_SLL="sll"; - $SZREG=4; -} -# -# -# -###################################################################### - -$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC}); - -for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } -open STDOUT,">$output"; - -if (!defined($big_endian)) - { $big_endian=(unpack('L',pack('N',1))==1); } - -# offsets of the Most and Least Significant Bytes -$MSB=$big_endian?0:3; -$LSB=3&~$MSB; - -@X=map("\$$_",(8..23)); # a4-a7,s0-s11 - -$ctx=$a0; -$inp=$a1; -$num=$a2; -$A="\$1"; -$B="\$2"; -$C="\$3"; -$D="\$7"; -$E="\$24"; @V=($A,$B,$C,$D,$E); -$t0="\$25"; -$t1=$num; # $num is offloaded to stack -$t2="\$30"; # fp -$K="\$31"; # ra - -sub BODY_00_14 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -$code.=<<___ if (!$big_endian); - srl $t0,@X[$i],24 # byte swap($i) - srl $t1,@X[$i],8 - andi $t2,@X[$i],0xFF00 - sll @X[$i],@X[$i],24 - andi $t1,0xFF00 - sll $t2,$t2,8 - or @X[$i],$t0 - or $t1,$t2 - or @X[$i],$t1 -___ -$code.=<<___; - lwl @X[$j],$j*4+$MSB($inp) - sll $t0,$a,5 # $i - addu $e,$K - lwr @X[$j],$j*4+$LSB($inp) - srl $t1,$a,27 - addu $e,$t0 - xor $t0,$c,$d - addu $e,$t1 - sll $t2,$b,30 - and $t0,$b - srl $b,$b,2 - xor $t0,$d - addu $e,@X[$i] - or $b,$t2 - addu $e,$t0 -___ -} - -sub BODY_15_19 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; - -$code.=<<___ if (!$big_endian && $i==15); - srl $t0,@X[$i],24 # byte swap($i) - srl $t1,@X[$i],8 - andi $t2,@X[$i],0xFF00 - sll @X[$i],@X[$i],24 - andi $t1,0xFF00 - sll $t2,$t2,8 - or @X[$i],$t0 - or @X[$i],$t1 - or @X[$i],$t2 -___ -$code.=<<___; - xor @X[$j%16],@X[($j+2)%16] - sll $t0,$a,5 # $i - addu $e,$K - srl $t1,$a,27 - addu $e,$t0 - xor @X[$j%16],@X[($j+8)%16] - xor $t0,$c,$d - addu $e,$t1 - xor @X[$j%16],@X[($j+13)%16] - sll $t2,$b,30 - and $t0,$b - srl $t1,@X[$j%16],31 - addu @X[$j%16],@X[$j%16] - srl $b,$b,2 - xor $t0,$d - or @X[$j%16],$t1 - addu $e,@X[$i%16] - or $b,$t2 - addu $e,$t0 -___ -} - -sub BODY_20_39 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -$code.=<<___ if ($i<79); - xor @X[$j%16],@X[($j+2)%16] - sll $t0,$a,5 # $i - addu $e,$K - srl $t1,$a,27 - addu $e,$t0 - xor @X[$j%16],@X[($j+8)%16] - xor $t0,$c,$d - addu $e,$t1 - xor @X[$j%16],@X[($j+13)%16] - sll $t2,$b,30 - xor $t0,$b - srl $t1,@X[$j%16],31 - addu @X[$j%16],@X[$j%16] - srl $b,$b,2 - addu $e,@X[$i%16] - or @X[$j%16],$t1 - or $b,$t2 - addu $e,$t0 -___ -$code.=<<___ if ($i==79); - lw @X[0],0($ctx) - sll $t0,$a,5 # $i - addu $e,$K - lw @X[1],4($ctx) - srl $t1,$a,27 - addu $e,$t0 - lw @X[2],8($ctx) - xor $t0,$c,$d - addu $e,$t1 - lw @X[3],12($ctx) - sll $t2,$b,30 - xor $t0,$b - lw @X[4],16($ctx) - srl $b,$b,2 - addu $e,@X[$i%16] - or $b,$t2 - addu $e,$t0 -___ -} - -sub BODY_40_59 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -$code.=<<___ if ($i<79); - xor @X[$j%16],@X[($j+2)%16] - sll $t0,$a,5 # $i - addu $e,$K - srl $t1,$a,27 - addu $e,$t0 - xor @X[$j%16],@X[($j+8)%16] - and $t0,$c,$d - addu $e,$t1 - xor @X[$j%16],@X[($j+13)%16] - sll $t2,$b,30 - addu $e,$t0 - srl $t1,@X[$j%16],31 - xor $t0,$c,$d - addu @X[$j%16],@X[$j%16] - and $t0,$b - srl $b,$b,2 - or @X[$j%16],$t1 - addu $e,@X[$i%16] - or $b,$t2 - addu $e,$t0 -___ -} - -$FRAMESIZE=16; # large enough to accomodate NUBI saved registers -$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; - -$code=<<___; -#ifdef OPENSSL_FIPSCANISTER -# include -#endif - -.text - -.set noat -.set noreorder -.align 5 -.globl sha1_block_data_order -.ent sha1_block_data_order -sha1_block_data_order: - .frame $sp,$FRAMESIZE*$SZREG,$ra - .mask $SAVED_REGS_MASK,-$SZREG - .set noreorder - $PTR_SUB $sp,$FRAMESIZE*$SZREG - $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp) - $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp) - $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp) - $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp) - $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp) - $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp) - $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp) - $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp) - $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp) - $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue - $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp) - $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp) - $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp) - $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp) - $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp) -___ -$code.=<<___; - $PTR_SLL $num,6 - $PTR_ADD $num,$inp - $REG_S $num,0($sp) - lw $A,0($ctx) - lw $B,4($ctx) - lw $C,8($ctx) - lw $D,12($ctx) - b .Loop - lw $E,16($ctx) -.align 4 -.Loop: - .set reorder - lwl @X[0],$MSB($inp) - lui $K,0x5a82 - lwr @X[0],$LSB($inp) - ori $K,0x7999 # K_00_19 -___ -for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); } -for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - lui $K,0x6ed9 - ori $K,0xeba1 # K_20_39 -___ -for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - lui $K,0x8f1b - ori $K,0xbcdc # K_40_59 -___ -for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - lui $K,0xca62 - ori $K,0xc1d6 # K_60_79 -___ -for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - $PTR_ADD $inp,64 - $REG_L $num,0($sp) - - addu $A,$X[0] - addu $B,$X[1] - sw $A,0($ctx) - addu $C,$X[2] - addu $D,$X[3] - sw $B,4($ctx) - addu $E,$X[4] - sw $C,8($ctx) - sw $D,12($ctx) - sw $E,16($ctx) - .set noreorder - bne $inp,$num,.Loop - nop - - .set noreorder - $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp) - $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp) - $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp) - $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp) - $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp) - $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp) - $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp) - $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp) - $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp) - $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp) - $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp) - $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp) - $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp) - $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp) -___ -$code.=<<___; - jr $ra - $PTR_ADD $sp,$FRAMESIZE*$SZREG -.end sha1_block_data_order -.rdata -.asciiz "SHA1 for MIPS, CRYPTOGAMS by " -___ -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha1-parisc.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha1-parisc.pl deleted file mode 100644 index 6e5a328a6f..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha1-parisc.pl +++ /dev/null @@ -1,260 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# SHA1 block procedure for PA-RISC. - -# June 2009. -# -# On PA-7100LC performance is >30% better than gcc 3.2 generated code -# for aligned input and >50% better for unaligned. Compared to vendor -# compiler on PA-8600 it's almost 60% faster in 64-bit build and just -# few percent faster in 32-bit one (this for aligned input, data for -# unaligned input is not available). -# -# Special thanks to polarhome.com for providing HP-UX account. - -$flavour = shift; -$output = shift; -open STDOUT,">$output"; - -if ($flavour =~ /64/) { - $LEVEL ="2.0W"; - $SIZE_T =8; - $FRAME_MARKER =80; - $SAVED_RP =16; - $PUSH ="std"; - $PUSHMA ="std,ma"; - $POP ="ldd"; - $POPMB ="ldd,mb"; -} else { - $LEVEL ="1.0"; - $SIZE_T =4; - $FRAME_MARKER =48; - $SAVED_RP =20; - $PUSH ="stw"; - $PUSHMA ="stwm"; - $POP ="ldw"; - $POPMB ="ldwm"; -} - -$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker - # [+ argument transfer] -$ctx="%r26"; # arg0 -$inp="%r25"; # arg1 -$num="%r24"; # arg2 - -$t0="%r28"; -$t1="%r29"; -$K="%r31"; - -@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", - "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0); - -@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23"); - -sub BODY_00_19 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -$code.=<<___ if ($i<15); - addl $K,$e,$e ; $i - shd $a,$a,27,$t1 - addl @X[$i],$e,$e - and $c,$b,$t0 - addl $t1,$e,$e - andcm $d,$b,$t1 - shd $b,$b,2,$b - or $t1,$t0,$t0 - addl $t0,$e,$e -___ -$code.=<<___ if ($i>=15); # with forward Xupdate - addl $K,$e,$e ; $i - shd $a,$a,27,$t1 - xor @X[($j+2)%16],@X[$j%16],@X[$j%16] - addl @X[$i%16],$e,$e - and $c,$b,$t0 - xor @X[($j+8)%16],@X[$j%16],@X[$j%16] - addl $t1,$e,$e - andcm $d,$b,$t1 - shd $b,$b,2,$b - or $t1,$t0,$t0 - xor @X[($j+13)%16],@X[$j%16],@X[$j%16] - add $t0,$e,$e - shd @X[$j%16],@X[$j%16],31,@X[$j%16] -___ -} - -sub BODY_20_39 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -$code.=<<___ if ($i<79); - xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i - addl $K,$e,$e - shd $a,$a,27,$t1 - xor @X[($j+8)%16],@X[$j%16],@X[$j%16] - addl @X[$i%16],$e,$e - xor $b,$c,$t0 - xor @X[($j+13)%16],@X[$j%16],@X[$j%16] - addl $t1,$e,$e - shd $b,$b,2,$b - xor $d,$t0,$t0 - shd @X[$j%16],@X[$j%16],31,@X[$j%16] - addl $t0,$e,$e -___ -$code.=<<___ if ($i==79); # with context load - ldw 0($ctx),@X[0] ; $i - addl $K,$e,$e - shd $a,$a,27,$t1 - ldw 4($ctx),@X[1] - addl @X[$i%16],$e,$e - xor $b,$c,$t0 - ldw 8($ctx),@X[2] - addl $t1,$e,$e - shd $b,$b,2,$b - xor $d,$t0,$t0 - ldw 12($ctx),@X[3] - addl $t0,$e,$e - ldw 16($ctx),@X[4] -___ -} - -sub BODY_40_59 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -$code.=<<___; - shd $a,$a,27,$t1 ; $i - addl $K,$e,$e - xor @X[($j+2)%16],@X[$j%16],@X[$j%16] - xor $d,$c,$t0 - addl @X[$i%16],$e,$e - xor @X[($j+8)%16],@X[$j%16],@X[$j%16] - and $b,$t0,$t0 - addl $t1,$e,$e - shd $b,$b,2,$b - xor @X[($j+13)%16],@X[$j%16],@X[$j%16] - addl $t0,$e,$e - and $d,$c,$t1 - shd @X[$j%16],@X[$j%16],31,@X[$j%16] - addl $t1,$e,$e -___ -} - -$code=<<___; - .LEVEL $LEVEL - .SPACE \$TEXT\$ - .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY - - .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR -sha1_block_data_order - .PROC - .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16 - .ENTRY - $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue - $PUSHMA %r3,$FRAME(%sp) - $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) - $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) - $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) - $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) - $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) - $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) - $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) - $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) - $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) - $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) - $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) - $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) - $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) - - ldw 0($ctx),$A - ldw 4($ctx),$B - ldw 8($ctx),$C - ldw 12($ctx),$D - ldw 16($ctx),$E - - extru $inp,31,2,$t0 ; t0=inp&3; - sh3addl $t0,%r0,$t0 ; t0*=8; - subi 32,$t0,$t0 ; t0=32-t0; - mtctl $t0,%cr11 ; %sar=t0; - -L\$oop - ldi 3,$t0 - andcm $inp,$t0,$t0 ; 64-bit neutral -___ - for ($i=0;$i<15;$i++) { # load input block - $code.="\tldw `4*$i`($t0),@X[$i]\n"; } -$code.=<<___; - cmpb,*= $inp,$t0,L\$aligned - ldw 60($t0),@X[15] - ldw 64($t0),@X[16] -___ - for ($i=0;$i<16;$i++) { # align input - $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; } -$code.=<<___; -L\$aligned - ldil L'0x5a827000,$K ; K_00_19 - ldo 0x999($K),$K -___ -for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - ldil L'0x6ed9e000,$K ; K_20_39 - ldo 0xba1($K),$K -___ - -for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - ldil L'0x8f1bb000,$K ; K_40_59 - ldo 0xcdc($K),$K -___ - -for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - ldil L'0xca62c000,$K ; K_60_79 - ldo 0x1d6($K),$K -___ -for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } - -$code.=<<___; - addl @X[0],$A,$A - addl @X[1],$B,$B - addl @X[2],$C,$C - addl @X[3],$D,$D - addl @X[4],$E,$E - stw $A,0($ctx) - stw $B,4($ctx) - stw $C,8($ctx) - stw $D,12($ctx) - stw $E,16($ctx) - addib,*<> -1,$num,L\$oop - ldo 64($inp),$inp - - $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue - $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 - $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 - $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 - $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 - $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 - $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 - $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 - $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 - $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 - $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 - $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 - $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 - $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 - bv (%r2) - .EXIT - $POPMB -$FRAME(%sp),%r3 - .PROCEND - .STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by " -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/,\*/,/gm if ($SIZE_T==4); -$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8); -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha1-ppc.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha1-ppc.pl deleted file mode 100755 index 2140dd2f8d..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha1-ppc.pl +++ /dev/null @@ -1,326 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# I let hardware handle unaligned input(*), except on page boundaries -# (see below for details). Otherwise straightforward implementation -# with X vector in register bank. The module is big-endian [which is -# not big deal as there're no little-endian targets left around]. -# -# (*) this means that this module is inappropriate for PPC403? Does -# anybody know if pre-POWER3 can sustain unaligned load? - -# -m64 -m32 -# ---------------------------------- -# PPC970,gcc-4.0.0 +76% +59% -# Power6,xlc-7 +68% +33% - -$flavour = shift; - -if ($flavour =~ /64/) { - $SIZE_T =8; - $LRSAVE =2*$SIZE_T; - $UCMP ="cmpld"; - $STU ="stdu"; - $POP ="ld"; - $PUSH ="std"; -} elsif ($flavour =~ /32/) { - $SIZE_T =4; - $LRSAVE =$SIZE_T; - $UCMP ="cmplw"; - $STU ="stwu"; - $POP ="lwz"; - $PUSH ="stw"; -} else { die "nonsense $flavour"; } - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; - -$FRAME=24*$SIZE_T+64; -$LOCALS=6*$SIZE_T; - -$K ="r0"; -$sp ="r1"; -$toc="r2"; -$ctx="r3"; -$inp="r4"; -$num="r5"; -$t0 ="r15"; -$t1 ="r6"; - -$A ="r7"; -$B ="r8"; -$C ="r9"; -$D ="r10"; -$E ="r11"; -$T ="r12"; - -@V=($A,$B,$C,$D,$E,$T); -@X=("r16","r17","r18","r19","r20","r21","r22","r23", - "r24","r25","r26","r27","r28","r29","r30","r31"); - -sub BODY_00_19 { -my ($i,$a,$b,$c,$d,$e,$f)=@_; -my $j=$i+1; -$code.=<<___ if ($i==0); - lwz @X[$i],`$i*4`($inp) -___ -$code.=<<___ if ($i<15); - lwz @X[$j],`$j*4`($inp) - add $f,$K,$e - rotlwi $e,$a,5 - add $f,$f,@X[$i] - and $t0,$c,$b - add $f,$f,$e - andc $t1,$d,$b - rotlwi $b,$b,30 - or $t0,$t0,$t1 - add $f,$f,$t0 -___ -$code.=<<___ if ($i>=15); - add $f,$K,$e - rotlwi $e,$a,5 - xor @X[$j%16],@X[$j%16],@X[($j+2)%16] - add $f,$f,@X[$i%16] - and $t0,$c,$b - xor @X[$j%16],@X[$j%16],@X[($j+8)%16] - add $f,$f,$e - andc $t1,$d,$b - rotlwi $b,$b,30 - or $t0,$t0,$t1 - xor @X[$j%16],@X[$j%16],@X[($j+13)%16] - add $f,$f,$t0 - rotlwi @X[$j%16],@X[$j%16],1 -___ -} - -sub BODY_20_39 { -my ($i,$a,$b,$c,$d,$e,$f)=@_; -my $j=$i+1; -$code.=<<___ if ($i<79); - add $f,$K,$e - rotlwi $e,$a,5 - xor @X[$j%16],@X[$j%16],@X[($j+2)%16] - add $f,$f,@X[$i%16] - xor $t0,$b,$c - xor @X[$j%16],@X[$j%16],@X[($j+8)%16] - add $f,$f,$e - rotlwi $b,$b,30 - xor $t0,$t0,$d - xor @X[$j%16],@X[$j%16],@X[($j+13)%16] - add $f,$f,$t0 - rotlwi @X[$j%16],@X[$j%16],1 -___ -$code.=<<___ if ($i==79); - add $f,$K,$e - rotlwi $e,$a,5 - lwz r16,0($ctx) - add $f,$f,@X[$i%16] - xor $t0,$b,$c - lwz r17,4($ctx) - add $f,$f,$e - rotlwi $b,$b,30 - lwz r18,8($ctx) - xor $t0,$t0,$d - lwz r19,12($ctx) - add $f,$f,$t0 - lwz r20,16($ctx) -___ -} - -sub BODY_40_59 { -my ($i,$a,$b,$c,$d,$e,$f)=@_; -my $j=$i+1; -$code.=<<___; - add $f,$K,$e - rotlwi $e,$a,5 - xor @X[$j%16],@X[$j%16],@X[($j+2)%16] - add $f,$f,@X[$i%16] - and $t0,$b,$c - xor @X[$j%16],@X[$j%16],@X[($j+8)%16] - add $f,$f,$e - or $t1,$b,$c - rotlwi $b,$b,30 - xor @X[$j%16],@X[$j%16],@X[($j+13)%16] - and $t1,$t1,$d - or $t0,$t0,$t1 - rotlwi @X[$j%16],@X[$j%16],1 - add $f,$f,$t0 -___ -} - -$code=<<___; -.machine "any" -.text - -.globl .sha1_block_data_order -.align 4 -.sha1_block_data_order: - $STU $sp,-$FRAME($sp) - mflr r0 - $PUSH r15,`$FRAME-$SIZE_T*17`($sp) - $PUSH r16,`$FRAME-$SIZE_T*16`($sp) - $PUSH r17,`$FRAME-$SIZE_T*15`($sp) - $PUSH r18,`$FRAME-$SIZE_T*14`($sp) - $PUSH r19,`$FRAME-$SIZE_T*13`($sp) - $PUSH r20,`$FRAME-$SIZE_T*12`($sp) - $PUSH r21,`$FRAME-$SIZE_T*11`($sp) - $PUSH r22,`$FRAME-$SIZE_T*10`($sp) - $PUSH r23,`$FRAME-$SIZE_T*9`($sp) - $PUSH r24,`$FRAME-$SIZE_T*8`($sp) - $PUSH r25,`$FRAME-$SIZE_T*7`($sp) - $PUSH r26,`$FRAME-$SIZE_T*6`($sp) - $PUSH r27,`$FRAME-$SIZE_T*5`($sp) - $PUSH r28,`$FRAME-$SIZE_T*4`($sp) - $PUSH r29,`$FRAME-$SIZE_T*3`($sp) - $PUSH r30,`$FRAME-$SIZE_T*2`($sp) - $PUSH r31,`$FRAME-$SIZE_T*1`($sp) - $PUSH r0,`$FRAME+$LRSAVE`($sp) - lwz $A,0($ctx) - lwz $B,4($ctx) - lwz $C,8($ctx) - lwz $D,12($ctx) - lwz $E,16($ctx) - andi. r0,$inp,3 - bne Lunaligned -Laligned: - mtctr $num - bl Lsha1_block_private - b Ldone - -; PowerPC specification allows an implementation to be ill-behaved -; upon unaligned access which crosses page boundary. "Better safe -; than sorry" principle makes me treat it specially. But I don't -; look for particular offending word, but rather for 64-byte input -; block which crosses the boundary. Once found that block is aligned -; and hashed separately... -.align 4 -Lunaligned: - subfic $t1,$inp,4096 - andi. $t1,$t1,4095 ; distance to closest page boundary - srwi. $t1,$t1,6 ; t1/=64 - beq Lcross_page - $UCMP $num,$t1 - ble- Laligned ; didn't cross the page boundary - mtctr $t1 - subfc $num,$t1,$num - bl Lsha1_block_private -Lcross_page: - li $t1,16 - mtctr $t1 - addi r20,$sp,$LOCALS ; spot within the frame -Lmemcpy: - lbz r16,0($inp) - lbz r17,1($inp) - lbz r18,2($inp) - lbz r19,3($inp) - addi $inp,$inp,4 - stb r16,0(r20) - stb r17,1(r20) - stb r18,2(r20) - stb r19,3(r20) - addi r20,r20,4 - bdnz Lmemcpy - - $PUSH $inp,`$FRAME-$SIZE_T*18`($sp) - li $t1,1 - addi $inp,$sp,$LOCALS - mtctr $t1 - bl Lsha1_block_private - $POP $inp,`$FRAME-$SIZE_T*18`($sp) - addic. $num,$num,-1 - bne- Lunaligned - -Ldone: - $POP r0,`$FRAME+$LRSAVE`($sp) - $POP r15,`$FRAME-$SIZE_T*17`($sp) - $POP r16,`$FRAME-$SIZE_T*16`($sp) - $POP r17,`$FRAME-$SIZE_T*15`($sp) - $POP r18,`$FRAME-$SIZE_T*14`($sp) - $POP r19,`$FRAME-$SIZE_T*13`($sp) - $POP r20,`$FRAME-$SIZE_T*12`($sp) - $POP r21,`$FRAME-$SIZE_T*11`($sp) - $POP r22,`$FRAME-$SIZE_T*10`($sp) - $POP r23,`$FRAME-$SIZE_T*9`($sp) - $POP r24,`$FRAME-$SIZE_T*8`($sp) - $POP r25,`$FRAME-$SIZE_T*7`($sp) - $POP r26,`$FRAME-$SIZE_T*6`($sp) - $POP r27,`$FRAME-$SIZE_T*5`($sp) - $POP r28,`$FRAME-$SIZE_T*4`($sp) - $POP r29,`$FRAME-$SIZE_T*3`($sp) - $POP r30,`$FRAME-$SIZE_T*2`($sp) - $POP r31,`$FRAME-$SIZE_T*1`($sp) - mtlr r0 - addi $sp,$sp,$FRAME - blr - .long 0 - .byte 0,12,4,1,0x80,18,3,0 - .long 0 -___ - -# This is private block function, which uses tailored calling -# interface, namely upon entry SHA_CTX is pre-loaded to given -# registers and counter register contains amount of chunks to -# digest... -$code.=<<___; -.align 4 -Lsha1_block_private: -___ -$code.=<<___; # load K_00_19 - lis $K,0x5a82 - ori $K,$K,0x7999 -___ -for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; # load K_20_39 - lis $K,0x6ed9 - ori $K,$K,0xeba1 -___ -for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; # load K_40_59 - lis $K,0x8f1b - ori $K,$K,0xbcdc -___ -for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; # load K_60_79 - lis $K,0xca62 - ori $K,$K,0xc1d6 -___ -for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - add r16,r16,$E - add r17,r17,$T - add r18,r18,$A - add r19,r19,$B - add r20,r20,$C - stw r16,0($ctx) - mr $A,r16 - stw r17,4($ctx) - mr $B,r17 - stw r18,8($ctx) - mr $C,r18 - stw r19,12($ctx) - mr $D,r19 - stw r20,16($ctx) - mr $E,r20 - addi $inp,$inp,`16*4` - bdnz- Lsha1_block_private - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 -___ -$code.=<<___; -.asciz "SHA1 block transform for PPC, CRYPTOGAMS by " -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha1-s390x.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha1-s390x.pl deleted file mode 100644 index 9193dda45e..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha1-s390x.pl +++ /dev/null @@ -1,246 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# SHA1 block procedure for s390x. - -# April 2007. -# -# Performance is >30% better than gcc 3.3 generated code. But the real -# twist is that SHA1 hardware support is detected and utilized. In -# which case performance can reach further >4.5x for larger chunks. - -# January 2009. -# -# Optimize Xupdate for amount of memory references and reschedule -# instructions to favour dual-issue z10 pipeline. On z10 hardware is -# "only" ~2.3x faster than software. - -# November 2010. -# -# Adapt for -m31 build. If kernel supports what's called "highgprs" -# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit -# instructions and achieve "64-bit" performance even in 31-bit legacy -# application context. The feature is not specific to any particular -# processor, as long as it's "z-CPU". Latter implies that the code -# remains z/Architecture specific. - -$kimdfunc=1; # magic function code for kimd instruction - -$flavour = shift; - -if ($flavour =~ /3[12]/) { - $SIZE_T=4; - $g=""; -} else { - $SIZE_T=8; - $g="g"; -} - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -$K_00_39="%r0"; $K=$K_00_39; -$K_40_79="%r1"; -$ctx="%r2"; $prefetch="%r2"; -$inp="%r3"; -$len="%r4"; - -$A="%r5"; -$B="%r6"; -$C="%r7"; -$D="%r8"; -$E="%r9"; @V=($A,$B,$C,$D,$E); -$t0="%r10"; -$t1="%r11"; -@X=("%r12","%r13","%r14"); -$sp="%r15"; - -$stdframe=16*$SIZE_T+4*8; -$frame=$stdframe+16*4; - -sub Xupdate { -my $i=shift; - -$code.=<<___ if ($i==15); - lg $prefetch,$stdframe($sp) ### Xupdate(16) warm-up - lr $X[0],$X[2] -___ -return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle -$code.=<<___ if ($i<16); - lg $X[0],`$i*4`($inp) ### Xload($i) - rllg $X[1],$X[0],32 -___ -$code.=<<___ if ($i>=16); - xgr $X[0],$prefetch ### Xupdate($i) - lg $prefetch,`$stdframe+4*(($i+2)%16)`($sp) - xg $X[0],`$stdframe+4*(($i+8)%16)`($sp) - xgr $X[0],$prefetch - rll $X[0],$X[0],1 - rllg $X[1],$X[0],32 - rll $X[1],$X[1],1 - rllg $X[0],$X[1],32 - lr $X[2],$X[1] # feedback -___ -$code.=<<___ if ($i<=70); - stg $X[0],`$stdframe+4*($i%16)`($sp) -___ -unshift(@X,pop(@X)); -} - -sub BODY_00_19 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $xi=$X[1]; - - &Xupdate($i); -$code.=<<___; - alr $e,$K ### $i - rll $t1,$a,5 - lr $t0,$d - xr $t0,$c - alr $e,$t1 - nr $t0,$b - alr $e,$xi - xr $t0,$d - rll $b,$b,30 - alr $e,$t0 -___ -} - -sub BODY_20_39 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $xi=$X[1]; - - &Xupdate($i); -$code.=<<___; - alr $e,$K ### $i - rll $t1,$a,5 - lr $t0,$b - alr $e,$t1 - xr $t0,$c - alr $e,$xi - xr $t0,$d - rll $b,$b,30 - alr $e,$t0 -___ -} - -sub BODY_40_59 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $xi=$X[1]; - - &Xupdate($i); -$code.=<<___; - alr $e,$K ### $i - rll $t1,$a,5 - lr $t0,$b - alr $e,$t1 - or $t0,$c - lr $t1,$b - nr $t0,$d - nr $t1,$c - alr $e,$xi - or $t0,$t1 - rll $b,$b,30 - alr $e,$t0 -___ -} - -$code.=<<___; -.text -.align 64 -.type Ktable,\@object -Ktable: .long 0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6 - .skip 48 #.long 0,0,0,0,0,0,0,0,0,0,0,0 -.size Ktable,.-Ktable -.globl sha1_block_data_order -.type sha1_block_data_order,\@function -sha1_block_data_order: -___ -$code.=<<___ if ($kimdfunc); - larl %r1,OPENSSL_s390xcap_P - lg %r0,0(%r1) - tmhl %r0,0x4000 # check for message-security assist - jz .Lsoftware - lghi %r0,0 - la %r1,`2*$SIZE_T`($sp) - .long 0xb93e0002 # kimd %r0,%r2 - lg %r0,`2*$SIZE_T`($sp) - tmhh %r0,`0x8000>>$kimdfunc` - jz .Lsoftware - lghi %r0,$kimdfunc - lgr %r1,$ctx - lgr %r2,$inp - sllg %r3,$len,6 - .long 0xb93e0002 # kimd %r0,%r2 - brc 1,.-4 # pay attention to "partial completion" - br %r14 -.align 16 -.Lsoftware: -___ -$code.=<<___; - lghi %r1,-$frame - st${g} $ctx,`2*$SIZE_T`($sp) - stm${g} %r6,%r15,`6*$SIZE_T`($sp) - lgr %r0,$sp - la $sp,0(%r1,$sp) - st${g} %r0,0($sp) - - larl $t0,Ktable - llgf $A,0($ctx) - llgf $B,4($ctx) - llgf $C,8($ctx) - llgf $D,12($ctx) - llgf $E,16($ctx) - - lg $K_00_39,0($t0) - lg $K_40_79,8($t0) - -.Lloop: - rllg $K_00_39,$K_00_39,32 -___ -for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - rllg $K_00_39,$K_00_39,32 -___ -for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; $K=$K_40_79; - rllg $K_40_79,$K_40_79,32 -___ -for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - rllg $K_40_79,$K_40_79,32 -___ -for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - - l${g} $ctx,`$frame+2*$SIZE_T`($sp) - la $inp,64($inp) - al $A,0($ctx) - al $B,4($ctx) - al $C,8($ctx) - al $D,12($ctx) - al $E,16($ctx) - st $A,0($ctx) - st $B,4($ctx) - st $C,8($ctx) - st $D,12($ctx) - st $E,16($ctx) - brct${g} $len,.Lloop - - lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp) - br %r14 -.size sha1_block_data_order,.-sha1_block_data_order -.string "SHA1 block transform for s390x, CRYPTOGAMS by " -.comm OPENSSL_s390xcap_P,16,8 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; - -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha1-sparcv9.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha1-sparcv9.pl deleted file mode 100644 index 5c161cecd6..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha1-sparcv9.pl +++ /dev/null @@ -1,284 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# Performance improvement is not really impressive on pre-T1 CPU: +8% -# over Sun C and +25% over gcc [3.3]. While on T1, a.k.a. Niagara, it -# turned to be 40% faster than 64-bit code generated by Sun C 5.8 and -# >2x than 64-bit code generated by gcc 3.4. And there is a gimmick. -# X[16] vector is packed to 8 64-bit registers and as result nothing -# is spilled on stack. In addition input data is loaded in compact -# instruction sequence, thus minimizing the window when the code is -# subject to [inter-thread] cache-thrashing hazard. The goal is to -# ensure scalability on UltraSPARC T1, or rather to avoid decay when -# amount of active threads exceeds the number of physical cores. - -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=112; } - -$output=shift; -open STDOUT,">$output"; - -@X=("%o0","%o1","%o2","%o3","%o4","%o5","%g1","%o7"); -$rot1m="%g2"; -$tmp64="%g3"; -$Xi="%g4"; -$A="%l0"; -$B="%l1"; -$C="%l2"; -$D="%l3"; -$E="%l4"; -@V=($A,$B,$C,$D,$E); -$K_00_19="%l5"; -$K_20_39="%l6"; -$K_40_59="%l7"; -$K_60_79="%g5"; -@K=($K_00_19,$K_20_39,$K_40_59,$K_60_79); - -$ctx="%i0"; -$inp="%i1"; -$len="%i2"; -$tmp0="%i3"; -$tmp1="%i4"; -$tmp2="%i5"; - -sub BODY_00_15 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $xi=($i&1)?@X[($i/2)%8]:$Xi; - -$code.=<<___; - sll $a,5,$tmp0 !! $i - add @K[$i/20],$e,$e - srl $a,27,$tmp1 - add $tmp0,$e,$e - and $c,$b,$tmp0 - add $tmp1,$e,$e - sll $b,30,$tmp2 - andn $d,$b,$tmp1 - srl $b,2,$b - or $tmp1,$tmp0,$tmp1 - or $tmp2,$b,$b - add $xi,$e,$e -___ -if ($i&1 && $i<15) { - $code.= - " srlx @X[(($i+1)/2)%8],32,$Xi\n"; -} -$code.=<<___; - add $tmp1,$e,$e -___ -} - -sub Xupdate { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i/2; - -if ($i&1) { -$code.=<<___; - sll $a,5,$tmp0 !! $i - add @K[$i/20],$e,$e - srl $a,27,$tmp1 -___ -} else { -$code.=<<___; - sllx @X[($j+6)%8],32,$Xi ! Xupdate($i) - xor @X[($j+1)%8],@X[$j%8],@X[$j%8] - srlx @X[($j+7)%8],32,$tmp1 - xor @X[($j+4)%8],@X[$j%8],@X[$j%8] - sll $a,5,$tmp0 !! $i - or $tmp1,$Xi,$Xi - add @K[$i/20],$e,$e !! - xor $Xi,@X[$j%8],@X[$j%8] - srlx @X[$j%8],31,$Xi - add @X[$j%8],@X[$j%8],@X[$j%8] - and $Xi,$rot1m,$Xi - andn @X[$j%8],$rot1m,@X[$j%8] - srl $a,27,$tmp1 !! - or $Xi,@X[$j%8],@X[$j%8] -___ -} -} - -sub BODY_16_19 { -my ($i,$a,$b,$c,$d,$e)=@_; - - &Xupdate(@_); - if ($i&1) { - $xi=@X[($i/2)%8]; - } else { - $xi=$Xi; - $code.="\tsrlx @X[($i/2)%8],32,$xi\n"; - } -$code.=<<___; - add $tmp0,$e,$e !! - and $c,$b,$tmp0 - add $tmp1,$e,$e - sll $b,30,$tmp2 - add $xi,$e,$e - andn $d,$b,$tmp1 - srl $b,2,$b - or $tmp1,$tmp0,$tmp1 - or $tmp2,$b,$b - add $tmp1,$e,$e -___ -} - -sub BODY_20_39 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $xi; - &Xupdate(@_); - if ($i&1) { - $xi=@X[($i/2)%8]; - } else { - $xi=$Xi; - $code.="\tsrlx @X[($i/2)%8],32,$xi\n"; - } -$code.=<<___; - add $tmp0,$e,$e !! - xor $c,$b,$tmp0 - add $tmp1,$e,$e - sll $b,30,$tmp2 - xor $d,$tmp0,$tmp1 - srl $b,2,$b - add $tmp1,$e,$e - or $tmp2,$b,$b - add $xi,$e,$e -___ -} - -sub BODY_40_59 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $xi; - &Xupdate(@_); - if ($i&1) { - $xi=@X[($i/2)%8]; - } else { - $xi=$Xi; - $code.="\tsrlx @X[($i/2)%8],32,$xi\n"; - } -$code.=<<___; - add $tmp0,$e,$e !! - and $c,$b,$tmp0 - add $tmp1,$e,$e - sll $b,30,$tmp2 - or $c,$b,$tmp1 - srl $b,2,$b - and $d,$tmp1,$tmp1 - add $xi,$e,$e - or $tmp1,$tmp0,$tmp1 - or $tmp2,$b,$b - add $tmp1,$e,$e -___ -} - -$code.=<<___ if ($bits==64); -.register %g2,#scratch -.register %g3,#scratch -___ -$code.=<<___; -.section ".text",#alloc,#execinstr - -.align 32 -.globl sha1_block_data_order -sha1_block_data_order: - save %sp,-$frame,%sp - sllx $len,6,$len - add $inp,$len,$len - - or %g0,1,$rot1m - sllx $rot1m,32,$rot1m - or $rot1m,1,$rot1m - - ld [$ctx+0],$A - ld [$ctx+4],$B - ld [$ctx+8],$C - ld [$ctx+12],$D - ld [$ctx+16],$E - andn $inp,7,$tmp0 - - sethi %hi(0x5a827999),$K_00_19 - or $K_00_19,%lo(0x5a827999),$K_00_19 - sethi %hi(0x6ed9eba1),$K_20_39 - or $K_20_39,%lo(0x6ed9eba1),$K_20_39 - sethi %hi(0x8f1bbcdc),$K_40_59 - or $K_40_59,%lo(0x8f1bbcdc),$K_40_59 - sethi %hi(0xca62c1d6),$K_60_79 - or $K_60_79,%lo(0xca62c1d6),$K_60_79 - -.Lloop: - ldx [$tmp0+0],@X[0] - ldx [$tmp0+16],@X[2] - ldx [$tmp0+32],@X[4] - ldx [$tmp0+48],@X[6] - and $inp,7,$tmp1 - ldx [$tmp0+8],@X[1] - sll $tmp1,3,$tmp1 - ldx [$tmp0+24],@X[3] - subcc %g0,$tmp1,$tmp2 ! should be 64-$tmp1, but -$tmp1 works too - ldx [$tmp0+40],@X[5] - bz,pt %icc,.Laligned - ldx [$tmp0+56],@X[7] - - sllx @X[0],$tmp1,@X[0] - ldx [$tmp0+64],$tmp64 -___ -for($i=0;$i<7;$i++) -{ $code.=<<___; - srlx @X[$i+1],$tmp2,$Xi - sllx @X[$i+1],$tmp1,@X[$i+1] - or $Xi,@X[$i],@X[$i] -___ -} -$code.=<<___; - srlx $tmp64,$tmp2,$tmp64 - or $tmp64,@X[7],@X[7] -.Laligned: - srlx @X[0],32,$Xi -___ -for ($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } -for (;$i<20;$i++) { &BODY_16_19($i,@V); unshift(@V,pop(@V)); } -for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } -for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - - ld [$ctx+0],@X[0] - ld [$ctx+4],@X[1] - ld [$ctx+8],@X[2] - ld [$ctx+12],@X[3] - add $inp,64,$inp - ld [$ctx+16],@X[4] - cmp $inp,$len - - add $A,@X[0],$A - st $A,[$ctx+0] - add $B,@X[1],$B - st $B,[$ctx+4] - add $C,@X[2],$C - st $C,[$ctx+8] - add $D,@X[3],$D - st $D,[$ctx+12] - add $E,@X[4],$E - st $E,[$ctx+16] - - bne `$bits==64?"%xcc":"%icc"`,.Lloop - andn $inp,7,$tmp0 - - ret - restore -.type sha1_block_data_order,#function -.size sha1_block_data_order,(.-sha1_block_data_order) -.asciz "SHA1 block transform for SPARCv9, CRYPTOGAMS by " -.align 4 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha1-sparcv9a.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha1-sparcv9a.pl deleted file mode 100644 index e65291bbd9..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha1-sparcv9a.pl +++ /dev/null @@ -1,601 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# January 2009 -# -# Provided that UltraSPARC VIS instructions are pipe-lined(*) and -# pairable(*) with IALU ones, offloading of Xupdate to the UltraSPARC -# Graphic Unit would make it possible to achieve higher instruction- -# level parallelism, ILP, and thus higher performance. It should be -# explicitly noted that ILP is the keyword, and it means that this -# code would be unsuitable for cores like UltraSPARC-Tx. The idea is -# not really novel, Sun had VIS-powered implementation for a while. -# Unlike Sun's implementation this one can process multiple unaligned -# input blocks, and as such works as drop-in replacement for OpenSSL -# sha1_block_data_order. Performance improvement was measured to be -# 40% over pure IALU sha1-sparcv9.pl on UltraSPARC-IIi, but 12% on -# UltraSPARC-III. See below for discussion... -# -# The module does not present direct interest for OpenSSL, because -# it doesn't provide better performance on contemporary SPARCv9 CPUs, -# UltraSPARC-Tx and SPARC64-V[II] to be specific. Those who feel they -# absolutely must score on UltraSPARC-I-IV can simply replace -# crypto/sha/asm/sha1-sparcv9.pl with this module. -# -# (*) "Pipe-lined" means that even if it takes several cycles to -# complete, next instruction using same functional unit [but not -# depending on the result of the current instruction] can start -# execution without having to wait for the unit. "Pairable" -# means that two [or more] independent instructions can be -# issued at the very same time. - -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=112; } - -$output=shift; -open STDOUT,">$output"; - -$ctx="%i0"; -$inp="%i1"; -$len="%i2"; -$tmp0="%i3"; -$tmp1="%i4"; -$tmp2="%i5"; -$tmp3="%g5"; - -$base="%g1"; -$align="%g4"; -$Xfer="%o5"; -$nXfer=$tmp3; -$Xi="%o7"; - -$A="%l0"; -$B="%l1"; -$C="%l2"; -$D="%l3"; -$E="%l4"; -@V=($A,$B,$C,$D,$E); - -$Actx="%o0"; -$Bctx="%o1"; -$Cctx="%o2"; -$Dctx="%o3"; -$Ectx="%o4"; - -$fmul="%f32"; -$VK_00_19="%f34"; -$VK_20_39="%f36"; -$VK_40_59="%f38"; -$VK_60_79="%f40"; -@VK=($VK_00_19,$VK_20_39,$VK_40_59,$VK_60_79); -@X=("%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", - "%f8", "%f9","%f10","%f11","%f12","%f13","%f14","%f15","%f16"); - -# This is reference 2x-parallelized VIS-powered Xupdate procedure. It -# covers even K_NN_MM addition... -sub Xupdate { -my ($i)=@_; -my $K=@VK[($i+16)/20]; -my $j=($i+16)%16; - -# [ provided that GSR.alignaddr_offset is 5, $mul contains -# 0x100ULL<<32|0x100 value and K_NN_MM are pre-loaded to -# chosen registers... ] -$code.=<<___; - fxors @X[($j+13)%16],@X[$j],@X[$j] !-1/-1/-1:X[0]^=X[13] - fxors @X[($j+14)%16],@X[$j+1],@X[$j+1]! 0/ 0/ 0:X[1]^=X[14] - fxor @X[($j+2)%16],@X[($j+8)%16],%f18! 1/ 1/ 1:Tmp=X[2,3]^X[8,9] - fxor %f18,@X[$j],@X[$j] ! 2/ 4/ 3:X[0,1]^=X[2,3]^X[8,9] - faligndata @X[$j],@X[$j],%f18 ! 3/ 7/ 5:Tmp=X[0,1]>>>24 - fpadd32 @X[$j],@X[$j],@X[$j] ! 4/ 8/ 6:X[0,1]<<=1 - fmul8ulx16 %f18,$fmul,%f18 ! 5/10/ 7:Tmp>>=7, Tmp&=1 - ![fxors %f15,%f2,%f2] - for %f18,@X[$j],@X[$j] ! 8/14/10:X[0,1]|=Tmp - ![fxors %f0,%f3,%f3] !10/17/12:X[0] dependency - fpadd32 $K,@X[$j],%f20 - std %f20,[$Xfer+`4*$j`] -___ -# The numbers delimited with slash are the earliest possible dispatch -# cycles for given instruction assuming 1 cycle latency for simple VIS -# instructions, such as on UltraSPARC-I&II, 3 cycles latency, such as -# on UltraSPARC-III&IV, and 2 cycles latency(*), respectively. Being -# 2x-parallelized the procedure is "worth" 5, 8.5 or 6 ticks per SHA1 -# round. As [long as] FPU/VIS instructions are perfectly pairable with -# IALU ones, the round timing is defined by the maximum between VIS -# and IALU timings. The latter varies from round to round and averages -# out at 6.25 ticks. This means that USI&II should operate at IALU -# rate, while USIII&IV - at VIS rate. This explains why performance -# improvement varies among processors. Well, given that pure IALU -# sha1-sparcv9.pl module exhibits virtually uniform performance of -# ~9.3 cycles per SHA1 round. Timings mentioned above are theoretical -# lower limits. Real-life performance was measured to be 6.6 cycles -# per SHA1 round on USIIi and 8.3 on USIII. The latter is lower than -# half-round VIS timing, because there are 16 Xupdate-free rounds, -# which "push down" average theoretical timing to 8 cycles... - -# (*) SPARC64-V[II] was originally believed to have 2 cycles VIS -# latency. Well, it might have, but it doesn't have dedicated -# VIS-unit. Instead, VIS instructions are executed by other -# functional units, ones used here - by IALU. This doesn't -# improve effective ILP... -} - -# The reference Xupdate procedure is then "strained" over *pairs* of -# BODY_NN_MM and kind of modulo-scheduled in respect to X[n]^=X[n+13] -# and K_NN_MM addition. It's "running" 15 rounds ahead, which leaves -# plenty of room to amortize for read-after-write hazard, as well as -# to fetch and align input for the next spin. The VIS instructions are -# scheduled for latency of 2 cycles, because there are not enough IALU -# instructions to schedule for latency of 3, while scheduling for 1 -# would give no gain on USI&II anyway. - -sub BODY_00_19 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i&~1; -my $k=($j+16+2)%16; # ahead reference -my $l=($j+16-2)%16; # behind reference -my $K=@VK[($j+16-2)/20]; - -$j=($j+16)%16; - -$code.=<<___ if (!($i&1)); - sll $a,5,$tmp0 !! $i - and $c,$b,$tmp3 - ld [$Xfer+`4*($i%16)`],$Xi - fxors @X[($j+14)%16],@X[$j+1],@X[$j+1]! 0/ 0/ 0:X[1]^=X[14] - srl $a,27,$tmp1 - add $tmp0,$e,$e - fxor @X[($j+2)%16],@X[($j+8)%16],%f18! 1/ 1/ 1:Tmp=X[2,3]^X[8,9] - sll $b,30,$tmp2 - add $tmp1,$e,$e - andn $d,$b,$tmp1 - add $Xi,$e,$e - fxor %f18,@X[$j],@X[$j] ! 2/ 4/ 3:X[0,1]^=X[2,3]^X[8,9] - srl $b,2,$b - or $tmp1,$tmp3,$tmp1 - or $tmp2,$b,$b - add $tmp1,$e,$e - faligndata @X[$j],@X[$j],%f18 ! 3/ 7/ 5:Tmp=X[0,1]>>>24 -___ -$code.=<<___ if ($i&1); - sll $a,5,$tmp0 !! $i - and $c,$b,$tmp3 - ld [$Xfer+`4*($i%16)`],$Xi - fpadd32 @X[$j],@X[$j],@X[$j] ! 4/ 8/ 6:X[0,1]<<=1 - srl $a,27,$tmp1 - add $tmp0,$e,$e - fmul8ulx16 %f18,$fmul,%f18 ! 5/10/ 7:Tmp>>=7, Tmp&=1 - sll $b,30,$tmp2 - add $tmp1,$e,$e - fpadd32 $K,@X[$l],%f20 ! - andn $d,$b,$tmp1 - add $Xi,$e,$e - fxors @X[($k+13)%16],@X[$k],@X[$k] !-1/-1/-1:X[0]^=X[13] - srl $b,2,$b - or $tmp1,$tmp3,$tmp1 - fxor %f18,@X[$j],@X[$j] ! 8/14/10:X[0,1]|=Tmp - or $tmp2,$b,$b - add $tmp1,$e,$e -___ -$code.=<<___ if ($i&1 && $i>=2); - std %f20,[$Xfer+`4*$l`] ! -___ -} - -sub BODY_20_39 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i&~1; -my $k=($j+16+2)%16; # ahead reference -my $l=($j+16-2)%16; # behind reference -my $K=@VK[($j+16-2)/20]; - -$j=($j+16)%16; - -$code.=<<___ if (!($i&1) && $i<64); - sll $a,5,$tmp0 !! $i - ld [$Xfer+`4*($i%16)`],$Xi - fxors @X[($j+14)%16],@X[$j+1],@X[$j+1]! 0/ 0/ 0:X[1]^=X[14] - srl $a,27,$tmp1 - add $tmp0,$e,$e - fxor @X[($j+2)%16],@X[($j+8)%16],%f18! 1/ 1/ 1:Tmp=X[2,3]^X[8,9] - xor $c,$b,$tmp0 - add $tmp1,$e,$e - sll $b,30,$tmp2 - xor $d,$tmp0,$tmp1 - fxor %f18,@X[$j],@X[$j] ! 2/ 4/ 3:X[0,1]^=X[2,3]^X[8,9] - srl $b,2,$b - add $tmp1,$e,$e - or $tmp2,$b,$b - add $Xi,$e,$e - faligndata @X[$j],@X[$j],%f18 ! 3/ 7/ 5:Tmp=X[0,1]>>>24 -___ -$code.=<<___ if ($i&1 && $i<64); - sll $a,5,$tmp0 !! $i - ld [$Xfer+`4*($i%16)`],$Xi - fpadd32 @X[$j],@X[$j],@X[$j] ! 4/ 8/ 6:X[0,1]<<=1 - srl $a,27,$tmp1 - add $tmp0,$e,$e - fmul8ulx16 %f18,$fmul,%f18 ! 5/10/ 7:Tmp>>=7, Tmp&=1 - xor $c,$b,$tmp0 - add $tmp1,$e,$e - fpadd32 $K,@X[$l],%f20 ! - sll $b,30,$tmp2 - xor $d,$tmp0,$tmp1 - fxors @X[($k+13)%16],@X[$k],@X[$k] !-1/-1/-1:X[0]^=X[13] - srl $b,2,$b - add $tmp1,$e,$e - fxor %f18,@X[$j],@X[$j] ! 8/14/10:X[0,1]|=Tmp - or $tmp2,$b,$b - add $Xi,$e,$e - std %f20,[$Xfer+`4*$l`] ! -___ -$code.=<<___ if ($i==64); - sll $a,5,$tmp0 !! $i - ld [$Xfer+`4*($i%16)`],$Xi - fpadd32 $K,@X[$l],%f20 - srl $a,27,$tmp1 - add $tmp0,$e,$e - xor $c,$b,$tmp0 - add $tmp1,$e,$e - sll $b,30,$tmp2 - xor $d,$tmp0,$tmp1 - std %f20,[$Xfer+`4*$l`] - srl $b,2,$b - add $tmp1,$e,$e - or $tmp2,$b,$b - add $Xi,$e,$e -___ -$code.=<<___ if ($i>64); - sll $a,5,$tmp0 !! $i - ld [$Xfer+`4*($i%16)`],$Xi - srl $a,27,$tmp1 - add $tmp0,$e,$e - xor $c,$b,$tmp0 - add $tmp1,$e,$e - sll $b,30,$tmp2 - xor $d,$tmp0,$tmp1 - srl $b,2,$b - add $tmp1,$e,$e - or $tmp2,$b,$b - add $Xi,$e,$e -___ -} - -sub BODY_40_59 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i&~1; -my $k=($j+16+2)%16; # ahead reference -my $l=($j+16-2)%16; # behind reference -my $K=@VK[($j+16-2)/20]; - -$j=($j+16)%16; - -$code.=<<___ if (!($i&1)); - sll $a,5,$tmp0 !! $i - ld [$Xfer+`4*($i%16)`],$Xi - fxors @X[($j+14)%16],@X[$j+1],@X[$j+1]! 0/ 0/ 0:X[1]^=X[14] - srl $a,27,$tmp1 - add $tmp0,$e,$e - fxor @X[($j+2)%16],@X[($j+8)%16],%f18! 1/ 1/ 1:Tmp=X[2,3]^X[8,9] - and $c,$b,$tmp0 - add $tmp1,$e,$e - sll $b,30,$tmp2 - or $c,$b,$tmp1 - fxor %f18,@X[$j],@X[$j] ! 2/ 4/ 3:X[0,1]^=X[2,3]^X[8,9] - srl $b,2,$b - and $d,$tmp1,$tmp1 - add $Xi,$e,$e - or $tmp1,$tmp0,$tmp1 - faligndata @X[$j],@X[$j],%f18 ! 3/ 7/ 5:Tmp=X[0,1]>>>24 - or $tmp2,$b,$b - add $tmp1,$e,$e - fpadd32 @X[$j],@X[$j],@X[$j] ! 4/ 8/ 6:X[0,1]<<=1 -___ -$code.=<<___ if ($i&1); - sll $a,5,$tmp0 !! $i - ld [$Xfer+`4*($i%16)`],$Xi - srl $a,27,$tmp1 - add $tmp0,$e,$e - fmul8ulx16 %f18,$fmul,%f18 ! 5/10/ 7:Tmp>>=7, Tmp&=1 - and $c,$b,$tmp0 - add $tmp1,$e,$e - fpadd32 $K,@X[$l],%f20 ! - sll $b,30,$tmp2 - or $c,$b,$tmp1 - fxors @X[($k+13)%16],@X[$k],@X[$k] !-1/-1/-1:X[0]^=X[13] - srl $b,2,$b - and $d,$tmp1,$tmp1 - fxor %f18,@X[$j],@X[$j] ! 8/14/10:X[0,1]|=Tmp - add $Xi,$e,$e - or $tmp1,$tmp0,$tmp1 - or $tmp2,$b,$b - add $tmp1,$e,$e - std %f20,[$Xfer+`4*$l`] ! -___ -} - -# If there is more data to process, then we pre-fetch the data for -# next iteration in last ten rounds... -sub BODY_70_79 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i&~1; -my $m=($i%8)*2; - -$j=($j+16)%16; - -$code.=<<___ if ($i==70); - sll $a,5,$tmp0 !! $i - ld [$Xfer+`4*($i%16)`],$Xi - srl $a,27,$tmp1 - add $tmp0,$e,$e - ldd [$inp+64],@X[0] - xor $c,$b,$tmp0 - add $tmp1,$e,$e - sll $b,30,$tmp2 - xor $d,$tmp0,$tmp1 - srl $b,2,$b - add $tmp1,$e,$e - or $tmp2,$b,$b - add $Xi,$e,$e - - and $inp,-64,$nXfer - inc 64,$inp - and $nXfer,255,$nXfer - alignaddr %g0,$align,%g0 - add $base,$nXfer,$nXfer -___ -$code.=<<___ if ($i==71); - sll $a,5,$tmp0 !! $i - ld [$Xfer+`4*($i%16)`],$Xi - srl $a,27,$tmp1 - add $tmp0,$e,$e - xor $c,$b,$tmp0 - add $tmp1,$e,$e - sll $b,30,$tmp2 - xor $d,$tmp0,$tmp1 - srl $b,2,$b - add $tmp1,$e,$e - or $tmp2,$b,$b - add $Xi,$e,$e -___ -$code.=<<___ if ($i>=72); - faligndata @X[$m],@X[$m+2],@X[$m] - sll $a,5,$tmp0 !! $i - ld [$Xfer+`4*($i%16)`],$Xi - srl $a,27,$tmp1 - add $tmp0,$e,$e - xor $c,$b,$tmp0 - add $tmp1,$e,$e - fpadd32 $VK_00_19,@X[$m],%f20 - sll $b,30,$tmp2 - xor $d,$tmp0,$tmp1 - srl $b,2,$b - add $tmp1,$e,$e - or $tmp2,$b,$b - add $Xi,$e,$e -___ -$code.=<<___ if ($i<77); - ldd [$inp+`8*($i+1-70)`],@X[2*($i+1-70)] -___ -$code.=<<___ if ($i==77); # redundant if $inp was aligned - add $align,63,$tmp0 - and $tmp0,-8,$tmp0 - ldd [$inp+$tmp0],@X[16] -___ -$code.=<<___ if ($i>=72); - std %f20,[$nXfer+`4*$m`] -___ -} - -$code.=<<___; -.section ".text",#alloc,#execinstr - -.align 64 -vis_const: -.long 0x5a827999,0x5a827999 ! K_00_19 -.long 0x6ed9eba1,0x6ed9eba1 ! K_20_39 -.long 0x8f1bbcdc,0x8f1bbcdc ! K_40_59 -.long 0xca62c1d6,0xca62c1d6 ! K_60_79 -.long 0x00000100,0x00000100 -.align 64 -.type vis_const,#object -.size vis_const,(.-vis_const) - -.globl sha1_block_data_order -sha1_block_data_order: - save %sp,-$frame,%sp - add %fp,$bias-256,$base - -1: call .+8 - add %o7,vis_const-1b,$tmp0 - - ldd [$tmp0+0],$VK_00_19 - ldd [$tmp0+8],$VK_20_39 - ldd [$tmp0+16],$VK_40_59 - ldd [$tmp0+24],$VK_60_79 - ldd [$tmp0+32],$fmul - - ld [$ctx+0],$Actx - and $base,-256,$base - ld [$ctx+4],$Bctx - sub $base,$bias+$frame,%sp - ld [$ctx+8],$Cctx - and $inp,7,$align - ld [$ctx+12],$Dctx - and $inp,-8,$inp - ld [$ctx+16],$Ectx - - ! X[16] is maintained in FP register bank - alignaddr %g0,$align,%g0 - ldd [$inp+0],@X[0] - sub $inp,-64,$Xfer - ldd [$inp+8],@X[2] - and $Xfer,-64,$Xfer - ldd [$inp+16],@X[4] - and $Xfer,255,$Xfer - ldd [$inp+24],@X[6] - add $base,$Xfer,$Xfer - ldd [$inp+32],@X[8] - ldd [$inp+40],@X[10] - ldd [$inp+48],@X[12] - brz,pt $align,.Laligned - ldd [$inp+56],@X[14] - - ldd [$inp+64],@X[16] - faligndata @X[0],@X[2],@X[0] - faligndata @X[2],@X[4],@X[2] - faligndata @X[4],@X[6],@X[4] - faligndata @X[6],@X[8],@X[6] - faligndata @X[8],@X[10],@X[8] - faligndata @X[10],@X[12],@X[10] - faligndata @X[12],@X[14],@X[12] - faligndata @X[14],@X[16],@X[14] - -.Laligned: - mov 5,$tmp0 - dec 1,$len - alignaddr %g0,$tmp0,%g0 - fpadd32 $VK_00_19,@X[0],%f16 - fpadd32 $VK_00_19,@X[2],%f18 - fpadd32 $VK_00_19,@X[4],%f20 - fpadd32 $VK_00_19,@X[6],%f22 - fpadd32 $VK_00_19,@X[8],%f24 - fpadd32 $VK_00_19,@X[10],%f26 - fpadd32 $VK_00_19,@X[12],%f28 - fpadd32 $VK_00_19,@X[14],%f30 - std %f16,[$Xfer+0] - mov $Actx,$A - std %f18,[$Xfer+8] - mov $Bctx,$B - std %f20,[$Xfer+16] - mov $Cctx,$C - std %f22,[$Xfer+24] - mov $Dctx,$D - std %f24,[$Xfer+32] - mov $Ectx,$E - std %f26,[$Xfer+40] - fxors @X[13],@X[0],@X[0] - std %f28,[$Xfer+48] - ba .Loop - std %f30,[$Xfer+56] -.align 32 -.Loop: -___ -for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } -for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } -for (;$i<70;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - tst $len - bz,pn `$bits==32?"%icc":"%xcc"`,.Ltail - nop -___ -for (;$i<80;$i++) { &BODY_70_79($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - add $A,$Actx,$Actx - add $B,$Bctx,$Bctx - add $C,$Cctx,$Cctx - add $D,$Dctx,$Dctx - add $E,$Ectx,$Ectx - mov 5,$tmp0 - fxors @X[13],@X[0],@X[0] - mov $Actx,$A - mov $Bctx,$B - mov $Cctx,$C - mov $Dctx,$D - mov $Ectx,$E - alignaddr %g0,$tmp0,%g0 - dec 1,$len - ba .Loop - mov $nXfer,$Xfer - -.align 32 -.Ltail: -___ -for($i=70;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - add $A,$Actx,$Actx - add $B,$Bctx,$Bctx - add $C,$Cctx,$Cctx - add $D,$Dctx,$Dctx - add $E,$Ectx,$Ectx - - st $Actx,[$ctx+0] - st $Bctx,[$ctx+4] - st $Cctx,[$ctx+8] - st $Dctx,[$ctx+12] - st $Ectx,[$ctx+16] - - ret - restore -.type sha1_block_data_order,#function -.size sha1_block_data_order,(.-sha1_block_data_order) -.asciz "SHA1 block transform for SPARCv9a, CRYPTOGAMS by " -.align 4 -___ - -# Purpose of these subroutines is to explicitly encode VIS instructions, -# so that one can compile the module without having to specify VIS -# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. -# Idea is to reserve for option to produce "universal" binary and let -# programmer detect if current CPU is VIS capable at run-time. -sub unvis { -my ($mnemonic,$rs1,$rs2,$rd)=@_; -my ($ref,$opf); -my %visopf = ( "fmul8ulx16" => 0x037, - "faligndata" => 0x048, - "fpadd32" => 0x052, - "fxor" => 0x06c, - "fxors" => 0x06d ); - - $ref = "$mnemonic\t$rs1,$rs2,$rd"; - - if ($opf=$visopf{$mnemonic}) { - foreach ($rs1,$rs2,$rd) { - return $ref if (!/%f([0-9]{1,2})/); - $_=$1; - if ($1>=32) { - return $ref if ($1&1); - # re-encode for upper double register addressing - $_=($1|$1>>5)&31; - } - } - - return sprintf ".word\t0x%08x !%s", - 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, - $ref; - } else { - return $ref; - } -} -sub unalignaddr { -my ($mnemonic,$rs1,$rs2,$rd)=@_; -my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); -my $ref="$mnemonic\t$rs1,$rs2,$rd"; - - foreach ($rs1,$rs2,$rd) { - if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; } - else { return $ref; } - } - return sprintf ".word\t0x%08x !%s", - 0x81b00300|$rd<<25|$rs1<<14|$rs2, - $ref; -} - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),(%f[0-9]{1,2}),(%f[0-9]{1,2})/ - &unvis($1,$2,$3,$4) - /gem; -$code =~ s/\b(alignaddr)\s+(%[goli][0-7]),(%[goli][0-7]),(%[goli][0-7])/ - &unalignaddr($1,$2,$3,$4) - /gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha1-thumb.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha1-thumb.pl deleted file mode 100644 index 7c9ea9b029..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha1-thumb.pl +++ /dev/null @@ -1,259 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# sha1_block for Thumb. -# -# January 2007. -# -# The code does not present direct interest to OpenSSL, because of low -# performance. Its purpose is to establish _size_ benchmark. Pretty -# useless one I must say, because 30% or 88 bytes larger ARMv4 code -# [avialable on demand] is almost _twice_ as fast. It should also be -# noted that in-lining of .Lcommon and .Lrotate improves performance -# by over 40%, while code increases by only 10% or 32 bytes. But once -# again, the goal was to establish _size_ benchmark, not performance. - -$output=shift; -open STDOUT,">$output"; - -$inline=0; -#$cheat_on_binutils=1; - -$t0="r0"; -$t1="r1"; -$t2="r2"; -$a="r3"; -$b="r4"; -$c="r5"; -$d="r6"; -$e="r7"; -$K="r8"; # "upper" registers can be used in add/sub and mov insns -$ctx="r9"; -$inp="r10"; -$len="r11"; -$Xi="r12"; - -sub common { -<<___; - sub $t0,#4 - ldr $t1,[$t0] - add $e,$K @ E+=K_xx_xx - lsl $t2,$a,#5 - add $t2,$e - lsr $e,$a,#27 - add $t2,$e @ E+=ROR(A,27) - add $t2,$t1 @ E+=X[i] -___ -} -sub rotate { -<<___; - mov $e,$d @ E=D - mov $d,$c @ D=C - lsl $c,$b,#30 - lsr $b,$b,#2 - orr $c,$b @ C=ROR(B,2) - mov $b,$a @ B=A - add $a,$t2,$t1 @ A=E+F_xx_xx(B,C,D) -___ -} - -sub BODY_00_19 { -$code.=$inline?&common():"\tbl .Lcommon\n"; -$code.=<<___; - mov $t1,$c - eor $t1,$d - and $t1,$b - eor $t1,$d @ F_00_19(B,C,D) -___ -$code.=$inline?&rotate():"\tbl .Lrotate\n"; -} - -sub BODY_20_39 { -$code.=$inline?&common():"\tbl .Lcommon\n"; -$code.=<<___; - mov $t1,$b - eor $t1,$c - eor $t1,$d @ F_20_39(B,C,D) -___ -$code.=$inline?&rotate():"\tbl .Lrotate\n"; -} - -sub BODY_40_59 { -$code.=$inline?&common():"\tbl .Lcommon\n"; -$code.=<<___; - mov $t1,$b - and $t1,$c - mov $e,$b - orr $e,$c - and $e,$d - orr $t1,$e @ F_40_59(B,C,D) -___ -$code.=$inline?&rotate():"\tbl .Lrotate\n"; -} - -$code=<<___; -.text -.code 16 - -.global sha1_block_data_order -.type sha1_block_data_order,%function - -.align 2 -sha1_block_data_order: -___ -if ($cheat_on_binutils) { -$code.=<<___; -.code 32 - add r3,pc,#1 - bx r3 @ switch to Thumb ISA -.code 16 -___ -} -$code.=<<___; - push {r4-r7} - mov r3,r8 - mov r4,r9 - mov r5,r10 - mov r6,r11 - mov r7,r12 - push {r3-r7,lr} - lsl r2,#6 - mov $ctx,r0 @ save context - mov $inp,r1 @ save inp - mov $len,r2 @ save len - add $len,$inp @ $len to point at inp end - -.Lloop: - mov $Xi,sp - mov $t2,sp - sub $t2,#16*4 @ [3] -.LXload: - ldrb $a,[$t1,#0] @ $t1 is r1 and holds inp - ldrb $b,[$t1,#1] - ldrb $c,[$t1,#2] - ldrb $d,[$t1,#3] - lsl $a,#24 - lsl $b,#16 - lsl $c,#8 - orr $a,$b - orr $a,$c - orr $a,$d - add $t1,#4 - push {$a} - cmp sp,$t2 - bne .LXload @ [+14*16] - - mov $inp,$t1 @ update $inp - sub $t2,#32*4 - sub $t2,#32*4 - mov $e,#31 @ [+4] -.LXupdate: - ldr $a,[sp,#15*4] - ldr $b,[sp,#13*4] - ldr $c,[sp,#7*4] - ldr $d,[sp,#2*4] - eor $a,$b - eor $a,$c - eor $a,$d - ror $a,$e - push {$a} - cmp sp,$t2 - bne .LXupdate @ [+(11+1)*64] - - ldmia $t0!,{$a,$b,$c,$d,$e} @ $t0 is r0 and holds ctx - mov $t0,$Xi - - ldr $t2,.LK_00_19 - mov $t1,$t0 - sub $t1,#20*4 - mov $Xi,$t1 - mov $K,$t2 @ [+7+4] -.L_00_19: -___ - &BODY_00_19(); -$code.=<<___; - cmp $Xi,$t0 - bne .L_00_19 @ [+(2+9+4+2+8+2)*20] - - ldr $t2,.LK_20_39 - mov $t1,$t0 - sub $t1,#20*4 - mov $Xi,$t1 - mov $K,$t2 @ [+5] -.L_20_39_or_60_79: -___ - &BODY_20_39(); -$code.=<<___; - cmp $Xi,$t0 - bne .L_20_39_or_60_79 @ [+(2+9+3+2+8+2)*20*2] - cmp sp,$t0 - beq .Ldone @ [+2] - - ldr $t2,.LK_40_59 - mov $t1,$t0 - sub $t1,#20*4 - mov $Xi,$t1 - mov $K,$t2 @ [+5] -.L_40_59: -___ - &BODY_40_59(); -$code.=<<___; - cmp $Xi,$t0 - bne .L_40_59 @ [+(2+9+6+2+8+2)*20] - - ldr $t2,.LK_60_79 - mov $Xi,sp - mov $K,$t2 - b .L_20_39_or_60_79 @ [+4] -.Ldone: - mov $t0,$ctx - ldr $t1,[$t0,#0] - ldr $t2,[$t0,#4] - add $a,$t1 - ldr $t1,[$t0,#8] - add $b,$t2 - ldr $t2,[$t0,#12] - add $c,$t1 - ldr $t1,[$t0,#16] - add $d,$t2 - add $e,$t1 - stmia $t0!,{$a,$b,$c,$d,$e} @ [+20] - - add sp,#80*4 @ deallocate stack frame - mov $t0,$ctx @ restore ctx - mov $t1,$inp @ restore inp - cmp $t1,$len - beq .Lexit - b .Lloop @ [+6] total 3212 cycles -.Lexit: - pop {r2-r7} - mov r8,r2 - mov r9,r3 - mov r10,r4 - mov r11,r5 - mov r12,r6 - mov lr,r7 - pop {r4-r7} - bx lr -.align 2 -___ -$code.=".Lcommon:\n".&common()."\tmov pc,lr\n" if (!$inline); -$code.=".Lrotate:\n".&rotate()."\tmov pc,lr\n" if (!$inline); -$code.=<<___; -.align 2 -.LK_00_19: .word 0x5a827999 -.LK_20_39: .word 0x6ed9eba1 -.LK_40_59: .word 0x8f1bbcdc -.LK_60_79: .word 0xca62c1d6 -.size sha1_block_data_order,.-sha1_block_data_order -.asciz "SHA1 block transform for Thumb, CRYPTOGAMS by " -___ - -print $code; -close STDOUT; # enforce flush diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha1-x86_64.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha1-x86_64.pl deleted file mode 100755 index f15c7ec39b..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha1-x86_64.pl +++ /dev/null @@ -1,1261 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# sha1_block procedure for x86_64. -# -# It was brought to my attention that on EM64T compiler-generated code -# was far behind 32-bit assembler implementation. This is unlike on -# Opteron where compiler-generated code was only 15% behind 32-bit -# assembler, which originally made it hard to motivate the effort. -# There was suggestion to mechanically translate 32-bit code, but I -# dismissed it, reasoning that x86_64 offers enough register bank -# capacity to fully utilize SHA-1 parallelism. Therefore this fresh -# implementation:-) However! While 64-bit code does perform better -# on Opteron, I failed to beat 32-bit assembler on EM64T core. Well, -# x86_64 does offer larger *addressable* bank, but out-of-order core -# reaches for even more registers through dynamic aliasing, and EM64T -# core must have managed to run-time optimize even 32-bit code just as -# good as 64-bit one. Performance improvement is summarized in the -# following table: -# -# gcc 3.4 32-bit asm cycles/byte -# Opteron +45% +20% 6.8 -# Xeon P4 +65% +0% 9.9 -# Core2 +60% +10% 7.0 - -# August 2009. -# -# The code was revised to minimize code size and to maximize -# "distance" between instructions producing input to 'lea' -# instruction and the 'lea' instruction itself, which is essential -# for Intel Atom core. - -# October 2010. -# -# Add SSSE3, Supplemental[!] SSE3, implementation. The idea behind it -# is to offload message schedule denoted by Wt in NIST specification, -# or Xupdate in OpenSSL source, to SIMD unit. See sha1-586.pl module -# for background and implementation details. The only difference from -# 32-bit code is that 64-bit code doesn't have to spill @X[] elements -# to free temporary registers. - -# April 2011. -# -# Add AVX code path. See sha1-586.pl for further information. - -###################################################################### -# Current performance is summarized in following table. Numbers are -# CPU clock cycles spent to process single byte (less is better). -# -# x86_64 SSSE3 AVX -# P4 9.8 - -# Opteron 6.6 - -# Core2 6.7 6.1/+10% - -# Atom 11.0 9.7/+13% - -# Westmere 7.1 5.6/+27% - -# Sandy Bridge 7.9 6.3/+25% 5.2/+51% - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -$avx=1 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` - =~ /GNU assembler version ([2-9]\.[0-9]+)/ && - $1>=2.19); -$avx=1 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && - `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ && - $1>=2.09); -$avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && - `ml64 2>&1` =~ /Version ([0-9]+)\./ && - $1>=10); - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -$ctx="%rdi"; # 1st arg -$inp="%rsi"; # 2nd arg -$num="%rdx"; # 3rd arg - -# reassign arguments in order to produce more compact code -$ctx="%r8"; -$inp="%r9"; -$num="%r10"; - -$t0="%eax"; -$t1="%ebx"; -$t2="%ecx"; -@xi=("%edx","%ebp"); -$A="%esi"; -$B="%edi"; -$C="%r11d"; -$D="%r12d"; -$E="%r13d"; - -@V=($A,$B,$C,$D,$E); - -sub BODY_00_19 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -$code.=<<___ if ($i==0); - mov `4*$i`($inp),$xi[0] - bswap $xi[0] - mov $xi[0],`4*$i`(%rsp) -___ -$code.=<<___ if ($i<15); - mov $c,$t0 - mov `4*$j`($inp),$xi[1] - mov $a,$t2 - xor $d,$t0 - bswap $xi[1] - rol \$5,$t2 - lea 0x5a827999($xi[0],$e),$e - and $b,$t0 - mov $xi[1],`4*$j`(%rsp) - add $t2,$e - xor $d,$t0 - rol \$30,$b - add $t0,$e -___ -$code.=<<___ if ($i>=15); - mov `4*($j%16)`(%rsp),$xi[1] - mov $c,$t0 - mov $a,$t2 - xor `4*(($j+2)%16)`(%rsp),$xi[1] - xor $d,$t0 - rol \$5,$t2 - xor `4*(($j+8)%16)`(%rsp),$xi[1] - and $b,$t0 - lea 0x5a827999($xi[0],$e),$e - xor `4*(($j+13)%16)`(%rsp),$xi[1] - xor $d,$t0 - rol \$1,$xi[1] - add $t2,$e - rol \$30,$b - mov $xi[1],`4*($j%16)`(%rsp) - add $t0,$e -___ -unshift(@xi,pop(@xi)); -} - -sub BODY_20_39 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -my $K=($i<40)?0x6ed9eba1:0xca62c1d6; -$code.=<<___ if ($i<79); - mov `4*($j%16)`(%rsp),$xi[1] - mov $c,$t0 - mov $a,$t2 - xor `4*(($j+2)%16)`(%rsp),$xi[1] - xor $b,$t0 - rol \$5,$t2 - lea $K($xi[0],$e),$e - xor `4*(($j+8)%16)`(%rsp),$xi[1] - xor $d,$t0 - add $t2,$e - xor `4*(($j+13)%16)`(%rsp),$xi[1] - rol \$30,$b - add $t0,$e - rol \$1,$xi[1] -___ -$code.=<<___ if ($i<76); - mov $xi[1],`4*($j%16)`(%rsp) -___ -$code.=<<___ if ($i==79); - mov $c,$t0 - mov $a,$t2 - xor $b,$t0 - lea $K($xi[0],$e),$e - rol \$5,$t2 - xor $d,$t0 - add $t2,$e - rol \$30,$b - add $t0,$e -___ -unshift(@xi,pop(@xi)); -} - -sub BODY_40_59 { -my ($i,$a,$b,$c,$d,$e)=@_; -my $j=$i+1; -$code.=<<___; - mov `4*($j%16)`(%rsp),$xi[1] - mov $c,$t0 - mov $c,$t1 - xor `4*(($j+2)%16)`(%rsp),$xi[1] - and $d,$t0 - mov $a,$t2 - xor `4*(($j+8)%16)`(%rsp),$xi[1] - xor $d,$t1 - lea 0x8f1bbcdc($xi[0],$e),$e - rol \$5,$t2 - xor `4*(($j+13)%16)`(%rsp),$xi[1] - add $t0,$e - and $b,$t1 - rol \$1,$xi[1] - add $t1,$e - rol \$30,$b - mov $xi[1],`4*($j%16)`(%rsp) - add $t2,$e -___ -unshift(@xi,pop(@xi)); -} - -$code.=<<___; -.text -.extern OPENSSL_ia32cap_P - -.globl sha1_block_data_order -.type sha1_block_data_order,\@function,3 -.align 16 -sha1_block_data_order: - mov OPENSSL_ia32cap_P+0(%rip),%r9d - mov OPENSSL_ia32cap_P+4(%rip),%r8d - test \$`1<<9`,%r8d # check SSSE3 bit - jz .Lialu -___ -$code.=<<___ if ($avx); - and \$`1<<28`,%r8d # mask AVX bit - and \$`1<<30`,%r9d # mask "Intel CPU" bit - or %r9d,%r8d - cmp \$`1<<28|1<<30`,%r8d - je _avx_shortcut -___ -$code.=<<___; - jmp _ssse3_shortcut - -.align 16 -.Lialu: - push %rbx - push %rbp - push %r12 - push %r13 - mov %rsp,%r11 - mov %rdi,$ctx # reassigned argument - sub \$`8+16*4`,%rsp - mov %rsi,$inp # reassigned argument - and \$-64,%rsp - mov %rdx,$num # reassigned argument - mov %r11,`16*4`(%rsp) -.Lprologue: - - mov 0($ctx),$A - mov 4($ctx),$B - mov 8($ctx),$C - mov 12($ctx),$D - mov 16($ctx),$E - jmp .Lloop - -.align 16 -.Lloop: -___ -for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } -for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } -for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - add 0($ctx),$A - add 4($ctx),$B - add 8($ctx),$C - add 12($ctx),$D - add 16($ctx),$E - mov $A,0($ctx) - mov $B,4($ctx) - mov $C,8($ctx) - mov $D,12($ctx) - mov $E,16($ctx) - - sub \$1,$num - lea `16*4`($inp),$inp - jnz .Lloop - - mov `16*4`(%rsp),%rsi - mov (%rsi),%r13 - mov 8(%rsi),%r12 - mov 16(%rsi),%rbp - mov 24(%rsi),%rbx - lea 32(%rsi),%rsp -.Lepilogue: - ret -.size sha1_block_data_order,.-sha1_block_data_order -___ -{{{ -my $Xi=4; -my @X=map("%xmm$_",(4..7,0..3)); -my @Tx=map("%xmm$_",(8..10)); -my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization -my @T=("%esi","%edi"); -my $j=0; -my $K_XX_XX="%r11"; - -my $_rol=sub { &rol(@_) }; -my $_ror=sub { &ror(@_) }; - -$code.=<<___; -.type sha1_block_data_order_ssse3,\@function,3 -.align 16 -sha1_block_data_order_ssse3: -_ssse3_shortcut: - push %rbx - push %rbp - push %r12 - lea `-64-($win64?5*16:0)`(%rsp),%rsp -___ -$code.=<<___ if ($win64); - movaps %xmm6,64+0(%rsp) - movaps %xmm7,64+16(%rsp) - movaps %xmm8,64+32(%rsp) - movaps %xmm9,64+48(%rsp) - movaps %xmm10,64+64(%rsp) -.Lprologue_ssse3: -___ -$code.=<<___; - mov %rdi,$ctx # reassigned argument - mov %rsi,$inp # reassigned argument - mov %rdx,$num # reassigned argument - - shl \$6,$num - add $inp,$num - lea K_XX_XX(%rip),$K_XX_XX - - mov 0($ctx),$A # load context - mov 4($ctx),$B - mov 8($ctx),$C - mov 12($ctx),$D - mov $B,@T[0] # magic seed - mov 16($ctx),$E - - movdqa 64($K_XX_XX),@X[2] # pbswap mask - movdqa 0($K_XX_XX),@Tx[1] # K_00_19 - movdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] - movdqu 16($inp),@X[-3&7] - movdqu 32($inp),@X[-2&7] - movdqu 48($inp),@X[-1&7] - pshufb @X[2],@X[-4&7] # byte swap - add \$64,$inp - pshufb @X[2],@X[-3&7] - pshufb @X[2],@X[-2&7] - pshufb @X[2],@X[-1&7] - paddd @Tx[1],@X[-4&7] # add K_00_19 - paddd @Tx[1],@X[-3&7] - paddd @Tx[1],@X[-2&7] - movdqa @X[-4&7],0(%rsp) # X[]+K xfer to IALU - psubd @Tx[1],@X[-4&7] # restore X[] - movdqa @X[-3&7],16(%rsp) - psubd @Tx[1],@X[-3&7] - movdqa @X[-2&7],32(%rsp) - psubd @Tx[1],@X[-2&7] - jmp .Loop_ssse3 -___ - -sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm -{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; - my $arg = pop; - $arg = "\$$arg" if ($arg*1 eq $arg); - $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n"; -} - -sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4 -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 40 instructions - my ($a,$b,$c,$d,$e); - - &movdqa (@X[0],@X[-3&7]); - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (@Tx[0],@X[-1&7]); - &palignr(@X[0],@X[-4&7],8); # compose "X[-14]" in "X[0]" - eval(shift(@insns)); - eval(shift(@insns)); - - &paddd (@Tx[1],@X[-1&7]); - eval(shift(@insns)); - eval(shift(@insns)); - &psrldq (@Tx[0],4); # "X[-3]", 3 dwords - eval(shift(@insns)); - eval(shift(@insns)); - &pxor (@X[0],@X[-4&7]); # "X[0]"^="X[-16]" - eval(shift(@insns)); - eval(shift(@insns)); - - &pxor (@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]" - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - - &movdqa (@Tx[2],@X[0]); - &movdqa (@Tx[0],@X[0]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &pslldq (@Tx[2],12); # "X[0]"<<96, extract one dword - &paddd (@X[0],@X[0]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &psrld (@Tx[0],31); - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (@Tx[1],@Tx[2]); - eval(shift(@insns)); - eval(shift(@insns)); - - &psrld (@Tx[2],30); - &por (@X[0],@Tx[0]); # "X[0]"<<<=1 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &pslld (@Tx[1],2); - &pxor (@X[0],@Tx[2]); - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX - eval(shift(@insns)); - eval(shift(@insns)); - - &pxor (@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2 - - foreach (@insns) { eval; } # remaining instructions [if any] - - $Xi++; push(@X,shift(@X)); # "rotate" X[] - push(@Tx,shift(@Tx)); -} - -sub Xupdate_ssse3_32_79() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions - my ($a,$b,$c,$d,$e); - - &movdqa (@Tx[0],@X[-1&7]) if ($Xi==8); - eval(shift(@insns)); # body_20_39 - &pxor (@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" - &palignr(@Tx[0],@X[-2&7],8); # compose "X[-6]" - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - - &pxor (@X[0],@X[-7&7]); # "X[0]"^="X[-28]" - eval(shift(@insns)); - eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/); - if ($Xi%5) { - &movdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX... - } else { # ... or load next one - &movdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)"); - } - &paddd (@Tx[1],@X[-1&7]); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-6]" - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - - &movdqa (@Tx[0],@X[0]); - &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &pslld (@X[0],2); - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - &psrld (@Tx[0],30); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &por (@X[0],@Tx[0]); # "X[0]"<<<=2 - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - &movdqa (@Tx[1],@X[0]) if ($Xi<19); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - - foreach (@insns) { eval; } # remaining instructions - - $Xi++; push(@X,shift(@X)); # "rotate" X[] - push(@Tx,shift(@Tx)); -} - -sub Xuplast_ssse3_80() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - &paddd (@Tx[1],@X[-1&7]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU - - foreach (@insns) { eval; } # remaining instructions - - &cmp ($inp,$num); - &je (".Ldone_ssse3"); - - unshift(@Tx,pop(@Tx)); - - &movdqa (@X[2],"64($K_XX_XX)"); # pbswap mask - &movdqa (@Tx[1],"0($K_XX_XX)"); # K_00_19 - &movdqu (@X[-4&7],"0($inp)"); # load input - &movdqu (@X[-3&7],"16($inp)"); - &movdqu (@X[-2&7],"32($inp)"); - &movdqu (@X[-1&7],"48($inp)"); - &pshufb (@X[-4&7],@X[2]); # byte swap - &add ($inp,64); - - $Xi=0; -} - -sub Xloop_ssse3() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - eval(shift(@insns)); - &pshufb (@X[($Xi-3)&7],@X[2]); - eval(shift(@insns)); - eval(shift(@insns)); - &paddd (@X[($Xi-4)&7],@Tx[1]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (eval(16*$Xi)."(%rsp)",@X[($Xi-4)&7]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - &psubd (@X[($Xi-4)&7],@Tx[1]); - - foreach (@insns) { eval; } - $Xi++; -} - -sub Xtail_ssse3() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - foreach (@insns) { eval; } -} - -sub body_00_19 () { - ( - '($a,$b,$c,$d,$e)=@V;'. - '&add ($e,eval(4*($j&15))."(%rsp)");', # X[]+K xfer - '&xor ($c,$d);', - '&mov (@T[1],$a);', # $b in next round - '&$_rol ($a,5);', - '&and (@T[0],$c);', # ($b&($c^$d)) - '&xor ($c,$d);', # restore $c - '&xor (@T[0],$d);', - '&add ($e,$a);', - '&$_ror ($b,$j?7:2);', # $b>>>2 - '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' - ); -} - -sub body_20_39 () { - ( - '($a,$b,$c,$d,$e)=@V;'. - '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer - '&xor (@T[0],$d);', # ($b^$d) - '&mov (@T[1],$a);', # $b in next round - '&$_rol ($a,5);', - '&xor (@T[0],$c);', # ($b^$d^$c) - '&add ($e,$a);', - '&$_ror ($b,7);', # $b>>>2 - '&add ($e,@T[0]);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' - ); -} - -sub body_40_59 () { - ( - '($a,$b,$c,$d,$e)=@V;'. - '&mov (@T[1],$c);', - '&xor ($c,$d);', - '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer - '&and (@T[1],$d);', - '&and (@T[0],$c);', # ($b&($c^$d)) - '&$_ror ($b,7);', # $b>>>2 - '&add ($e,@T[1]);', - '&mov (@T[1],$a);', # $b in next round - '&$_rol ($a,5);', - '&add ($e,@T[0]);', - '&xor ($c,$d);', # restore $c - '&add ($e,$a);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' - ); -} -$code.=<<___; -.align 16 -.Loop_ssse3: -___ - &Xupdate_ssse3_16_31(\&body_00_19); - &Xupdate_ssse3_16_31(\&body_00_19); - &Xupdate_ssse3_16_31(\&body_00_19); - &Xupdate_ssse3_16_31(\&body_00_19); - &Xupdate_ssse3_32_79(\&body_00_19); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_40_59); - &Xupdate_ssse3_32_79(\&body_20_39); - &Xuplast_ssse3_80(\&body_20_39); # can jump to "done" - - $saved_j=$j; @saved_V=@V; - - &Xloop_ssse3(\&body_20_39); - &Xloop_ssse3(\&body_20_39); - &Xloop_ssse3(\&body_20_39); - -$code.=<<___; - add 0($ctx),$A # update context - add 4($ctx),@T[0] - add 8($ctx),$C - add 12($ctx),$D - mov $A,0($ctx) - add 16($ctx),$E - mov @T[0],4($ctx) - mov @T[0],$B # magic seed - mov $C,8($ctx) - mov $D,12($ctx) - mov $E,16($ctx) - jmp .Loop_ssse3 - -.align 16 -.Ldone_ssse3: -___ - $j=$saved_j; @V=@saved_V; - - &Xtail_ssse3(\&body_20_39); - &Xtail_ssse3(\&body_20_39); - &Xtail_ssse3(\&body_20_39); - -$code.=<<___; - add 0($ctx),$A # update context - add 4($ctx),@T[0] - add 8($ctx),$C - mov $A,0($ctx) - add 12($ctx),$D - mov @T[0],4($ctx) - add 16($ctx),$E - mov $C,8($ctx) - mov $D,12($ctx) - mov $E,16($ctx) -___ -$code.=<<___ if ($win64); - movaps 64+0(%rsp),%xmm6 - movaps 64+16(%rsp),%xmm7 - movaps 64+32(%rsp),%xmm8 - movaps 64+48(%rsp),%xmm9 - movaps 64+64(%rsp),%xmm10 -___ -$code.=<<___; - lea `64+($win64?5*16:0)`(%rsp),%rsi - mov 0(%rsi),%r12 - mov 8(%rsi),%rbp - mov 16(%rsi),%rbx - lea 24(%rsi),%rsp -.Lepilogue_ssse3: - ret -.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 -___ - -if ($avx) { -my $Xi=4; -my @X=map("%xmm$_",(4..7,0..3)); -my @Tx=map("%xmm$_",(8..10)); -my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization -my @T=("%esi","%edi"); -my $j=0; -my $K_XX_XX="%r11"; - -my $_rol=sub { &shld(@_[0],@_) }; -my $_ror=sub { &shrd(@_[0],@_) }; - -$code.=<<___; -.type sha1_block_data_order_avx,\@function,3 -.align 16 -sha1_block_data_order_avx: -_avx_shortcut: - push %rbx - push %rbp - push %r12 - lea `-64-($win64?5*16:0)`(%rsp),%rsp -___ -$code.=<<___ if ($win64); - movaps %xmm6,64+0(%rsp) - movaps %xmm7,64+16(%rsp) - movaps %xmm8,64+32(%rsp) - movaps %xmm9,64+48(%rsp) - movaps %xmm10,64+64(%rsp) -.Lprologue_avx: -___ -$code.=<<___; - mov %rdi,$ctx # reassigned argument - mov %rsi,$inp # reassigned argument - mov %rdx,$num # reassigned argument - vzeroupper - - shl \$6,$num - add $inp,$num - lea K_XX_XX(%rip),$K_XX_XX - - mov 0($ctx),$A # load context - mov 4($ctx),$B - mov 8($ctx),$C - mov 12($ctx),$D - mov $B,@T[0] # magic seed - mov 16($ctx),$E - - vmovdqa 64($K_XX_XX),@X[2] # pbswap mask - vmovdqa 0($K_XX_XX),@Tx[1] # K_00_19 - vmovdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] - vmovdqu 16($inp),@X[-3&7] - vmovdqu 32($inp),@X[-2&7] - vmovdqu 48($inp),@X[-1&7] - vpshufb @X[2],@X[-4&7],@X[-4&7] # byte swap - add \$64,$inp - vpshufb @X[2],@X[-3&7],@X[-3&7] - vpshufb @X[2],@X[-2&7],@X[-2&7] - vpshufb @X[2],@X[-1&7],@X[-1&7] - vpaddd @Tx[1],@X[-4&7],@X[0] # add K_00_19 - vpaddd @Tx[1],@X[-3&7],@X[1] - vpaddd @Tx[1],@X[-2&7],@X[2] - vmovdqa @X[0],0(%rsp) # X[]+K xfer to IALU - vmovdqa @X[1],16(%rsp) - vmovdqa @X[2],32(%rsp) - jmp .Loop_avx -___ - -sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4 -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 40 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - eval(shift(@insns)); - &vpalignr(@X[0],@X[-3&7],@X[-4&7],8); # compose "X[-14]" in "X[0]" - eval(shift(@insns)); - eval(shift(@insns)); - - &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); - eval(shift(@insns)); - eval(shift(@insns)); - &vpsrldq(@Tx[0],@X[-1&7],4); # "X[-3]", 3 dwords - eval(shift(@insns)); - eval(shift(@insns)); - &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]" - eval(shift(@insns)); - eval(shift(@insns)); - - &vpxor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]" - eval(shift(@insns)); - eval(shift(@insns)); - &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - - &vpsrld (@Tx[0],@X[0],31); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpslldq(@Tx[2],@X[0],12); # "X[0]"<<96, extract one dword - &vpaddd (@X[0],@X[0],@X[0]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpsrld (@Tx[1],@Tx[2],30); - &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=1 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpslld (@Tx[2],@Tx[2],2); - &vpxor (@X[0],@X[0],@Tx[1]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &vpxor (@X[0],@X[0],@Tx[2]); # "X[0]"^=("X[0]">>96)<<<2 - eval(shift(@insns)); - eval(shift(@insns)); - &vmovdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX - eval(shift(@insns)); - eval(shift(@insns)); - - - foreach (@insns) { eval; } # remaining instructions [if any] - - $Xi++; push(@X,shift(@X)); # "rotate" X[] - push(@Tx,shift(@Tx)); -} - -sub Xupdate_avx_32_79() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions - my ($a,$b,$c,$d,$e); - - &vpalignr(@Tx[0],@X[-1&7],@X[-2&7],8); # compose "X[-6]" - &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - - &vpxor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]" - eval(shift(@insns)); - eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/); - if ($Xi%5) { - &vmovdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX... - } else { # ... or load next one - &vmovdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)"); - } - &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-6]" - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - - &vpsrld (@Tx[0],@X[0],30); - &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &vpslld (@X[0],@X[0],2); - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # ror - eval(shift(@insns)); - - &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=2 - eval(shift(@insns)); # body_20_39 - eval(shift(@insns)); - &vmovdqa (@Tx[1],@X[0]) if ($Xi<19); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); # rol - eval(shift(@insns)); - - foreach (@insns) { eval; } # remaining instructions - - $Xi++; push(@X,shift(@X)); # "rotate" X[] - push(@Tx,shift(@Tx)); -} - -sub Xuplast_avx_80() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - - &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU - - foreach (@insns) { eval; } # remaining instructions - - &cmp ($inp,$num); - &je (".Ldone_avx"); - - unshift(@Tx,pop(@Tx)); - - &vmovdqa(@X[2],"64($K_XX_XX)"); # pbswap mask - &vmovdqa(@Tx[1],"0($K_XX_XX)"); # K_00_19 - &vmovdqu(@X[-4&7],"0($inp)"); # load input - &vmovdqu(@X[-3&7],"16($inp)"); - &vmovdqu(@X[-2&7],"32($inp)"); - &vmovdqu(@X[-1&7],"48($inp)"); - &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap - &add ($inp,64); - - $Xi=0; -} - -sub Xloop_avx() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - eval(shift(@insns)); - eval(shift(@insns)); - &vpshufb(@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]); - eval(shift(@insns)); - eval(shift(@insns)); - &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@Tx[1]); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); - &vmovdqa(eval(16*$Xi)."(%rsp)",@X[$Xi&7]); # X[]+K xfer to IALU - eval(shift(@insns)); - eval(shift(@insns)); - - foreach (@insns) { eval; } - $Xi++; -} - -sub Xtail_avx() -{ use integer; - my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 instructions - my ($a,$b,$c,$d,$e); - - foreach (@insns) { eval; } -} - -$code.=<<___; -.align 16 -.Loop_avx: -___ - &Xupdate_avx_16_31(\&body_00_19); - &Xupdate_avx_16_31(\&body_00_19); - &Xupdate_avx_16_31(\&body_00_19); - &Xupdate_avx_16_31(\&body_00_19); - &Xupdate_avx_32_79(\&body_00_19); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_20_39); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_40_59); - &Xupdate_avx_32_79(\&body_20_39); - &Xuplast_avx_80(\&body_20_39); # can jump to "done" - - $saved_j=$j; @saved_V=@V; - - &Xloop_avx(\&body_20_39); - &Xloop_avx(\&body_20_39); - &Xloop_avx(\&body_20_39); - -$code.=<<___; - add 0($ctx),$A # update context - add 4($ctx),@T[0] - add 8($ctx),$C - add 12($ctx),$D - mov $A,0($ctx) - add 16($ctx),$E - mov @T[0],4($ctx) - mov @T[0],$B # magic seed - mov $C,8($ctx) - mov $D,12($ctx) - mov $E,16($ctx) - jmp .Loop_avx - -.align 16 -.Ldone_avx: -___ - $j=$saved_j; @V=@saved_V; - - &Xtail_avx(\&body_20_39); - &Xtail_avx(\&body_20_39); - &Xtail_avx(\&body_20_39); - -$code.=<<___; - vzeroupper - - add 0($ctx),$A # update context - add 4($ctx),@T[0] - add 8($ctx),$C - mov $A,0($ctx) - add 12($ctx),$D - mov @T[0],4($ctx) - add 16($ctx),$E - mov $C,8($ctx) - mov $D,12($ctx) - mov $E,16($ctx) -___ -$code.=<<___ if ($win64); - movaps 64+0(%rsp),%xmm6 - movaps 64+16(%rsp),%xmm7 - movaps 64+32(%rsp),%xmm8 - movaps 64+48(%rsp),%xmm9 - movaps 64+64(%rsp),%xmm10 -___ -$code.=<<___; - lea `64+($win64?5*16:0)`(%rsp),%rsi - mov 0(%rsi),%r12 - mov 8(%rsi),%rbp - mov 16(%rsi),%rbx - lea 24(%rsi),%rsp -.Lepilogue_avx: - ret -.size sha1_block_data_order_avx,.-sha1_block_data_order_avx -___ -} -$code.=<<___; -.align 64 -K_XX_XX: -.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 # K_00_19 -.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 # K_20_39 -.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59 -.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79 -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap mask -___ -}}} -$code.=<<___; -.asciz "SHA1 block transform for x86_64, CRYPTOGAMS by " -.align 64 -___ - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type se_handler,\@abi-omnipotent -.align 16 -se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lprologue(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lprologue - jb .Lcommon_seh_tail - - mov 152($context),%rax # pull context->Rsp - - lea .Lepilogue(%rip),%r10 - cmp %r10,%rbx # context->Rip>=.Lepilogue - jae .Lcommon_seh_tail - - mov `16*4`(%rax),%rax # pull saved stack pointer - lea 32(%rax),%rax - - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov -32(%rax),%r13 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - - jmp .Lcommon_seh_tail -.size se_handler,.-se_handler - -.type ssse3_handler,\@abi-omnipotent -.align 16 -ssse3_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - mov 8($disp),%rsi # disp->ImageBase - mov 56($disp),%r11 # disp->HandlerData - - mov 0(%r11),%r10d # HandlerData[0] - lea (%rsi,%r10),%r10 # prologue label - cmp %r10,%rbx # context->RipRsp - - mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lcommon_seh_tail - - lea 64(%rax),%rsi - lea 512($context),%rdi # &context.Xmm6 - mov \$10,%ecx - .long 0xa548f3fc # cld; rep movsq - lea `24+64+5*16`(%rax),%rax # adjust stack pointer - - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore cotnext->R12 - -.Lcommon_seh_tail: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size ssse3_handler,.-ssse3_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_sha1_block_data_order - .rva .LSEH_end_sha1_block_data_order - .rva .LSEH_info_sha1_block_data_order - .rva .LSEH_begin_sha1_block_data_order_ssse3 - .rva .LSEH_end_sha1_block_data_order_ssse3 - .rva .LSEH_info_sha1_block_data_order_ssse3 -___ -$code.=<<___ if ($avx); - .rva .LSEH_begin_sha1_block_data_order_avx - .rva .LSEH_end_sha1_block_data_order_avx - .rva .LSEH_info_sha1_block_data_order_avx -___ -$code.=<<___; -.section .xdata -.align 8 -.LSEH_info_sha1_block_data_order: - .byte 9,0,0,0 - .rva se_handler -.LSEH_info_sha1_block_data_order_ssse3: - .byte 9,0,0,0 - .rva ssse3_handler - .rva .Lprologue_ssse3,.Lepilogue_ssse3 # HandlerData[] -___ -$code.=<<___ if ($avx); -.LSEH_info_sha1_block_data_order_avx: - .byte 9,0,0,0 - .rva ssse3_handler - .rva .Lprologue_avx,.Lepilogue_avx # HandlerData[] -___ -} - -#################################################################### - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha256-586.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha256-586.pl deleted file mode 100644 index 52a7c7f8a3..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha256-586.pl +++ /dev/null @@ -1,249 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# SHA256 block transform for x86. September 2007. -# -# Performance in clock cycles per processed byte (less is better): -# -# Pentium PIII P4 AMD K8 Core2 -# gcc 46 36 41 27 26 -# icc 57 33 38 25 23 -# x86 asm 40 30 33 20 18 -# x86_64 asm(*) - - 21 16 16 -# -# (*) x86_64 assembler performance is presented for reference -# purposes. -# -# Performance improvement over compiler generated code varies from -# 10% to 40% [see above]. Not very impressive on some µ-archs, but -# it's 5 times smaller and optimizies amount of writes. - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386"); - -$A="eax"; -$E="edx"; -$T="ebx"; -$Aoff=&DWP(0,"esp"); -$Boff=&DWP(4,"esp"); -$Coff=&DWP(8,"esp"); -$Doff=&DWP(12,"esp"); -$Eoff=&DWP(16,"esp"); -$Foff=&DWP(20,"esp"); -$Goff=&DWP(24,"esp"); -$Hoff=&DWP(28,"esp"); -$Xoff=&DWP(32,"esp"); -$K256="ebp"; - -sub BODY_00_15() { - my $in_16_63=shift; - - &mov ("ecx",$E); - &add ($T,"edi") if ($in_16_63); # T += sigma1(X[-2]) - &ror ("ecx",25-11); - &mov ("esi",$Foff); - &xor ("ecx",$E); - &ror ("ecx",11-6); - &mov (&DWP(4*(8+15),"esp"),$T) if ($in_16_63); # save X[0] - &xor ("ecx",$E); - &ror ("ecx",6); # Sigma1(e) - &mov ("edi",$Goff); - &add ($T,"ecx"); # T += Sigma1(e) - - &xor ("esi","edi"); - &mov ($Eoff,$E); # modulo-scheduled - &mov ("ecx",$A); - &and ("esi",$E); - &mov ($E,$Doff); # e becomes d, which is e in next iteration - &xor ("esi","edi"); # Ch(e,f,g) - &mov ("edi",$A); - &add ($T,"esi"); # T += Ch(e,f,g) - - &ror ("ecx",22-13); - &add ($T,$Hoff); # T += h - &xor ("ecx",$A); - &ror ("ecx",13-2); - &mov ("esi",$Boff); - &xor ("ecx",$A); - &ror ("ecx",2); # Sigma0(a) - &add ($E,$T); # d += T - &mov ("edi",$Coff); - - &add ($T,"ecx"); # T += Sigma0(a) - &mov ($Aoff,$A); # modulo-scheduled - - &mov ("ecx",$A); - &sub ("esp",4); - &or ($A,"esi"); # a becomes h, which is a in next iteration - &and ("ecx","esi"); - &and ($A,"edi"); - &mov ("esi",&DWP(0,$K256)); - &or ($A,"ecx"); # h=Maj(a,b,c) - - &add ($K256,4); - &add ($A,$T); # h += T - &mov ($T,&DWP(4*(8+15+16-1),"esp")) if ($in_16_63); # preload T - &add ($E,"esi"); # d += K256[i] - &add ($A,"esi"); # h += K256[i] -} - -&function_begin("sha256_block_data_order"); - &mov ("esi",wparam(0)); # ctx - &mov ("edi",wparam(1)); # inp - &mov ("eax",wparam(2)); # num - &mov ("ebx","esp"); # saved sp - - &call (&label("pic_point")); # make it PIC! -&set_label("pic_point"); - &blindpop($K256); - &lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256)); - - &sub ("esp",16); - &and ("esp",-64); - - &shl ("eax",6); - &add ("eax","edi"); - &mov (&DWP(0,"esp"),"esi"); # ctx - &mov (&DWP(4,"esp"),"edi"); # inp - &mov (&DWP(8,"esp"),"eax"); # inp+num*128 - &mov (&DWP(12,"esp"),"ebx"); # saved sp - -&set_label("loop",16); - # copy input block to stack reversing byte and dword order - for($i=0;$i<4;$i++) { - &mov ("eax",&DWP($i*16+0,"edi")); - &mov ("ebx",&DWP($i*16+4,"edi")); - &mov ("ecx",&DWP($i*16+8,"edi")); - &mov ("edx",&DWP($i*16+12,"edi")); - &bswap ("eax"); - &bswap ("ebx"); - &bswap ("ecx"); - &bswap ("edx"); - &push ("eax"); - &push ("ebx"); - &push ("ecx"); - &push ("edx"); - } - &add ("edi",64); - &sub ("esp",4*8); # place for A,B,C,D,E,F,G,H - &mov (&DWP(4*(8+16)+4,"esp"),"edi"); - - # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack - &mov ($A,&DWP(0,"esi")); - &mov ("ebx",&DWP(4,"esi")); - &mov ("ecx",&DWP(8,"esi")); - &mov ("edi",&DWP(12,"esi")); - # &mov ($Aoff,$A); - &mov ($Boff,"ebx"); - &mov ($Coff,"ecx"); - &mov ($Doff,"edi"); - &mov ($E,&DWP(16,"esi")); - &mov ("ebx",&DWP(20,"esi")); - &mov ("ecx",&DWP(24,"esi")); - &mov ("edi",&DWP(28,"esi")); - # &mov ($Eoff,$E); - &mov ($Foff,"ebx"); - &mov ($Goff,"ecx"); - &mov ($Hoff,"edi"); - -&set_label("00_15",16); - &mov ($T,&DWP(4*(8+15),"esp")); - - &BODY_00_15(); - - &cmp ("esi",0xc19bf174); - &jne (&label("00_15")); - - &mov ($T,&DWP(4*(8+15+16-1),"esp")); # preloaded in BODY_00_15(1) -&set_label("16_63",16); - &mov ("esi",$T); - &mov ("ecx",&DWP(4*(8+15+16-14),"esp")); - &ror ("esi",18-7); - &mov ("edi","ecx"); - &xor ("esi",$T); - &ror ("esi",7); - &shr ($T,3); - - &ror ("edi",19-17); - &xor ($T,"esi"); # T = sigma0(X[-15]) - &xor ("edi","ecx"); - &ror ("edi",17); - &shr ("ecx",10); - &add ($T,&DWP(4*(8+15+16),"esp")); # T += X[-16] - &xor ("edi","ecx"); # sigma1(X[-2]) - - &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7] - # &add ($T,"edi"); # T += sigma1(X[-2]) - # &mov (&DWP(4*(8+15),"esp"),$T); # save X[0] - - &BODY_00_15(1); - - &cmp ("esi",0xc67178f2); - &jne (&label("16_63")); - - &mov ("esi",&DWP(4*(8+16+64)+0,"esp"));#ctx - # &mov ($A,$Aoff); - &mov ("ebx",$Boff); - &mov ("ecx",$Coff); - &mov ("edi",$Doff); - &add ($A,&DWP(0,"esi")); - &add ("ebx",&DWP(4,"esi")); - &add ("ecx",&DWP(8,"esi")); - &add ("edi",&DWP(12,"esi")); - &mov (&DWP(0,"esi"),$A); - &mov (&DWP(4,"esi"),"ebx"); - &mov (&DWP(8,"esi"),"ecx"); - &mov (&DWP(12,"esi"),"edi"); - # &mov ($E,$Eoff); - &mov ("eax",$Foff); - &mov ("ebx",$Goff); - &mov ("ecx",$Hoff); - &mov ("edi",&DWP(4*(8+16+64)+4,"esp"));#inp - &add ($E,&DWP(16,"esi")); - &add ("eax",&DWP(20,"esi")); - &add ("ebx",&DWP(24,"esi")); - &add ("ecx",&DWP(28,"esi")); - &mov (&DWP(16,"esi"),$E); - &mov (&DWP(20,"esi"),"eax"); - &mov (&DWP(24,"esi"),"ebx"); - &mov (&DWP(28,"esi"),"ecx"); - - &add ("esp",4*(8+16+64)); # destroy frame - &sub ($K256,4*64); # rewind K - - &cmp ("edi",&DWP(8,"esp")); # are we done yet? - &jb (&label("loop")); - - &mov ("esp",&DWP(12,"esp")); # restore sp -&function_end_A(); - -&set_label("K256",64); # Yes! I keep it in the code segment! - &data_word(0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5); - &data_word(0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5); - &data_word(0xd807aa98,0x12835b01,0x243185be,0x550c7dc3); - &data_word(0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174); - &data_word(0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc); - &data_word(0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da); - &data_word(0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7); - &data_word(0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967); - &data_word(0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13); - &data_word(0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85); - &data_word(0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3); - &data_word(0xd192e819,0xd6990624,0xf40e3585,0x106aa070); - &data_word(0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5); - &data_word(0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3); - &data_word(0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208); - &data_word(0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2); -&function_end_B("sha256_block_data_order"); -&asciz("SHA256 block transform for x86, CRYPTOGAMS by "); - -&asm_finish(); diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha256-armv4.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha256-armv4.pl deleted file mode 100644 index 9c84e8d93c..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha256-armv4.pl +++ /dev/null @@ -1,211 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# SHA256 block procedure for ARMv4. May 2007. - -# Performance is ~2x better than gcc 3.4 generated code and in "abso- -# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per -# byte [on single-issue Xscale PXA250 core]. - -# July 2010. -# -# Rescheduling for dual-issue pipeline resulted in 22% improvement on -# Cortex A8 core and ~20 cycles per processed byte. - -# February 2011. -# -# Profiler-assisted and platform-specific optimization resulted in 16% -# improvement on Cortex A8 core and ~17 cycles per processed byte. - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -$ctx="r0"; $t0="r0"; -$inp="r1"; $t3="r1"; -$len="r2"; $t1="r2"; -$T1="r3"; -$A="r4"; -$B="r5"; -$C="r6"; -$D="r7"; -$E="r8"; -$F="r9"; -$G="r10"; -$H="r11"; -@V=($A,$B,$C,$D,$E,$F,$G,$H); -$t2="r12"; -$Ktbl="r14"; - -@Sigma0=( 2,13,22); -@Sigma1=( 6,11,25); -@sigma0=( 7,18, 3); -@sigma1=(17,19,10); - -sub BODY_00_15 { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; - -$code.=<<___ if ($i<16); -#if __ARM_ARCH__>=7 - ldr $T1,[$inp],#4 -#else - ldrb $T1,[$inp,#3] @ $i - ldrb $t2,[$inp,#2] - ldrb $t1,[$inp,#1] - ldrb $t0,[$inp],#4 - orr $T1,$T1,$t2,lsl#8 - orr $T1,$T1,$t1,lsl#16 - orr $T1,$T1,$t0,lsl#24 -#endif -___ -$code.=<<___; - mov $t0,$e,ror#$Sigma1[0] - ldr $t2,[$Ktbl],#4 @ *K256++ - eor $t0,$t0,$e,ror#$Sigma1[1] - eor $t1,$f,$g -#if $i>=16 - add $T1,$T1,$t3 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev $T1,$T1 -#endif -#if $i==15 - str $inp,[sp,#17*4] @ leave room for $t3 -#endif - eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e) - and $t1,$t1,$e - str $T1,[sp,#`$i%16`*4] - add $T1,$T1,$t0 - eor $t1,$t1,$g @ Ch(e,f,g) - add $T1,$T1,$h - mov $h,$a,ror#$Sigma0[0] - add $T1,$T1,$t1 - eor $h,$h,$a,ror#$Sigma0[1] - add $T1,$T1,$t2 - eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a) -#if $i>=15 - ldr $t3,[sp,#`($i+2)%16`*4] @ from BODY_16_xx -#endif - orr $t0,$a,$b - and $t1,$a,$b - and $t0,$t0,$c - add $h,$h,$T1 - orr $t0,$t0,$t1 @ Maj(a,b,c) - add $d,$d,$T1 - add $h,$h,$t0 -___ -} - -sub BODY_16_XX { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; - -$code.=<<___; - @ ldr $t3,[sp,#`($i+1)%16`*4] @ $i - ldr $t2,[sp,#`($i+14)%16`*4] - mov $t0,$t3,ror#$sigma0[0] - ldr $T1,[sp,#`($i+0)%16`*4] - eor $t0,$t0,$t3,ror#$sigma0[1] - ldr $t1,[sp,#`($i+9)%16`*4] - eor $t0,$t0,$t3,lsr#$sigma0[2] @ sigma0(X[i+1]) - mov $t3,$t2,ror#$sigma1[0] - add $T1,$T1,$t0 - eor $t3,$t3,$t2,ror#$sigma1[1] - add $T1,$T1,$t1 - eor $t3,$t3,$t2,lsr#$sigma1[2] @ sigma1(X[i+14]) - @ add $T1,$T1,$t3 -___ - &BODY_00_15(@_); -} - -$code=<<___; -#include "arm_arch.h" - -.text -.code 32 - -.type K256,%object -.align 5 -K256: -.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 -.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 -.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 -.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 -.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc -.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da -.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 -.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 -.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 -.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 -.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 -.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 -.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 -.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 -.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 -.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -.size K256,.-K256 - -.global sha256_block_data_order -.type sha256_block_data_order,%function -sha256_block_data_order: - sub r3,pc,#8 @ sha256_block_data_order - add $len,$inp,$len,lsl#6 @ len to point at the end of inp - stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} - ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} - sub $Ktbl,r3,#256 @ K256 - sub sp,sp,#16*4 @ alloca(X[16]) -.Loop: -___ -for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } -$code.=".Lrounds_16_xx:\n"; -for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - and $t2,$t2,#0xff - cmp $t2,#0xf2 - bne .Lrounds_16_xx - - ldr $T1,[sp,#16*4] @ pull ctx - ldr $t0,[$T1,#0] - ldr $t1,[$T1,#4] - ldr $t2,[$T1,#8] - add $A,$A,$t0 - ldr $t0,[$T1,#12] - add $B,$B,$t1 - ldr $t1,[$T1,#16] - add $C,$C,$t2 - ldr $t2,[$T1,#20] - add $D,$D,$t0 - ldr $t0,[$T1,#24] - add $E,$E,$t1 - ldr $t1,[$T1,#28] - add $F,$F,$t2 - ldr $inp,[sp,#17*4] @ pull inp - ldr $t2,[sp,#18*4] @ pull inp+len - add $G,$G,$t0 - add $H,$H,$t1 - stmia $T1,{$A,$B,$C,$D,$E,$F,$G,$H} - cmp $inp,$t2 - sub $Ktbl,$Ktbl,#256 @ rewind Ktbl - bne .Loop - - add sp,sp,#`16+3`*4 @ destroy frame -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r11,pc} -#else - ldmia sp!,{r4-r11,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -#endif -.size sha256_block_data_order,.-sha256_block_data_order -.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by " -.align 2 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; -close STDOUT; # enforce flush diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha512-586.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha512-586.pl deleted file mode 100644 index 9f8c51eb54..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha512-586.pl +++ /dev/null @@ -1,644 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# SHA512 block transform for x86. September 2007. -# -# Performance in clock cycles per processed byte (less is better): -# -# Pentium PIII P4 AMD K8 Core2 -# gcc 100 75 116 54 66 -# icc 97 77 95 55 57 -# x86 asm 61 56 82 36 40 -# SSE2 asm - - 38 24 20 -# x86_64 asm(*) - - 30 10.0 10.5 -# -# (*) x86_64 assembler performance is presented for reference -# purposes. -# -# IALU code-path is optimized for elder Pentiums. On vanilla Pentium -# performance improvement over compiler generated code reaches ~60%, -# while on PIII - ~35%. On newer µ-archs improvement varies from 15% -# to 50%, but it's less important as they are expected to execute SSE2 -# code-path, which is commonly ~2-3x faster [than compiler generated -# code]. SSE2 code-path is as fast as original sha512-sse2.pl, even -# though it does not use 128-bit operations. The latter means that -# SSE2-aware kernel is no longer required to execute the code. Another -# difference is that new code optimizes amount of writes, but at the -# cost of increased data cache "footprint" by 1/2KB. - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386"); - -$sse2=0; -for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } - -&external_label("OPENSSL_ia32cap_P") if ($sse2); - -$Tlo=&DWP(0,"esp"); $Thi=&DWP(4,"esp"); -$Alo=&DWP(8,"esp"); $Ahi=&DWP(8+4,"esp"); -$Blo=&DWP(16,"esp"); $Bhi=&DWP(16+4,"esp"); -$Clo=&DWP(24,"esp"); $Chi=&DWP(24+4,"esp"); -$Dlo=&DWP(32,"esp"); $Dhi=&DWP(32+4,"esp"); -$Elo=&DWP(40,"esp"); $Ehi=&DWP(40+4,"esp"); -$Flo=&DWP(48,"esp"); $Fhi=&DWP(48+4,"esp"); -$Glo=&DWP(56,"esp"); $Ghi=&DWP(56+4,"esp"); -$Hlo=&DWP(64,"esp"); $Hhi=&DWP(64+4,"esp"); -$K512="ebp"; - -$Asse2=&QWP(0,"esp"); -$Bsse2=&QWP(8,"esp"); -$Csse2=&QWP(16,"esp"); -$Dsse2=&QWP(24,"esp"); -$Esse2=&QWP(32,"esp"); -$Fsse2=&QWP(40,"esp"); -$Gsse2=&QWP(48,"esp"); -$Hsse2=&QWP(56,"esp"); - -$A="mm0"; # B-D and -$E="mm4"; # F-H are commonly loaded to respectively mm1-mm3 and - # mm5-mm7, but it's done on on-demand basis... - -sub BODY_00_15_sse2 { - my $prefetch=shift; - - &movq ("mm5",$Fsse2); # load f - &movq ("mm6",$Gsse2); # load g - &movq ("mm7",$Hsse2); # load h - - &movq ("mm1",$E); # %mm1 is sliding right - &movq ("mm2",$E); # %mm2 is sliding left - &psrlq ("mm1",14); - &movq ($Esse2,$E); # modulo-scheduled save e - &psllq ("mm2",23); - &movq ("mm3","mm1"); # %mm3 is T1 - &psrlq ("mm1",4); - &pxor ("mm3","mm2"); - &psllq ("mm2",23); - &pxor ("mm3","mm1"); - &psrlq ("mm1",23); - &pxor ("mm3","mm2"); - &psllq ("mm2",4); - &pxor ("mm3","mm1"); - &paddq ("mm7",QWP(0,$K512)); # h+=K512[i] - &pxor ("mm3","mm2"); # T1=Sigma1_512(e) - - &pxor ("mm5","mm6"); # f^=g - &movq ("mm1",$Bsse2); # load b - &pand ("mm5",$E); # f&=e - &movq ("mm2",$Csse2); # load c - &pxor ("mm5","mm6"); # f^=g - &movq ($E,$Dsse2); # e = load d - &paddq ("mm3","mm5"); # T1+=Ch(e,f,g) - &movq (&QWP(0,"esp"),$A); # modulo-scheduled save a - &paddq ("mm3","mm7"); # T1+=h - - &movq ("mm5",$A); # %mm5 is sliding right - &movq ("mm6",$A); # %mm6 is sliding left - &paddq ("mm3",&QWP(8*9,"esp")); # T1+=X[0] - &psrlq ("mm5",28); - &paddq ($E,"mm3"); # e += T1 - &psllq ("mm6",25); - &movq ("mm7","mm5"); # %mm7 is T2 - &psrlq ("mm5",6); - &pxor ("mm7","mm6"); - &psllq ("mm6",5); - &pxor ("mm7","mm5"); - &psrlq ("mm5",5); - &pxor ("mm7","mm6"); - &psllq ("mm6",6); - &pxor ("mm7","mm5"); - &sub ("esp",8); - &pxor ("mm7","mm6"); # T2=Sigma0_512(a) - - &movq ("mm5",$A); # %mm5=a - &por ($A,"mm2"); # a=a|c - &movq ("mm6",&QWP(8*(9+16-14),"esp")) if ($prefetch); - &pand ("mm5","mm2"); # %mm5=a&c - &pand ($A,"mm1"); # a=(a|c)&b - &movq ("mm2",&QWP(8*(9+16-1),"esp")) if ($prefetch); - &por ("mm5",$A); # %mm5=(a&c)|((a|c)&b) - &paddq ("mm7","mm5"); # T2+=Maj(a,b,c) - &movq ($A,"mm3"); # a=T1 - - &mov (&LB("edx"),&BP(0,$K512)); - &paddq ($A,"mm7"); # a+=T2 - &add ($K512,8); -} - -sub BODY_00_15_x86 { - #define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) - # LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 - # HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 - &mov ("ecx",$Elo); - &mov ("edx",$Ehi); - &mov ("esi","ecx"); - - &shr ("ecx",9); # lo>>9 - &mov ("edi","edx"); - &shr ("edx",9); # hi>>9 - &mov ("ebx","ecx"); - &shl ("esi",14); # lo<<14 - &mov ("eax","edx"); - &shl ("edi",14); # hi<<14 - &xor ("ebx","esi"); - - &shr ("ecx",14-9); # lo>>14 - &xor ("eax","edi"); - &shr ("edx",14-9); # hi>>14 - &xor ("eax","ecx"); - &shl ("esi",18-14); # lo<<18 - &xor ("ebx","edx"); - &shl ("edi",18-14); # hi<<18 - &xor ("ebx","esi"); - - &shr ("ecx",18-14); # lo>>18 - &xor ("eax","edi"); - &shr ("edx",18-14); # hi>>18 - &xor ("eax","ecx"); - &shl ("esi",23-18); # lo<<23 - &xor ("ebx","edx"); - &shl ("edi",23-18); # hi<<23 - &xor ("eax","esi"); - &xor ("ebx","edi"); # T1 = Sigma1(e) - - &mov ("ecx",$Flo); - &mov ("edx",$Fhi); - &mov ("esi",$Glo); - &mov ("edi",$Ghi); - &add ("eax",$Hlo); - &adc ("ebx",$Hhi); # T1 += h - &xor ("ecx","esi"); - &xor ("edx","edi"); - &and ("ecx",$Elo); - &and ("edx",$Ehi); - &add ("eax",&DWP(8*(9+15)+0,"esp")); - &adc ("ebx",&DWP(8*(9+15)+4,"esp")); # T1 += X[0] - &xor ("ecx","esi"); - &xor ("edx","edi"); # Ch(e,f,g) = (f^g)&e)^g - - &mov ("esi",&DWP(0,$K512)); - &mov ("edi",&DWP(4,$K512)); # K[i] - &add ("eax","ecx"); - &adc ("ebx","edx"); # T1 += Ch(e,f,g) - &mov ("ecx",$Dlo); - &mov ("edx",$Dhi); - &add ("eax","esi"); - &adc ("ebx","edi"); # T1 += K[i] - &mov ($Tlo,"eax"); - &mov ($Thi,"ebx"); # put T1 away - &add ("eax","ecx"); - &adc ("ebx","edx"); # d += T1 - - #define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) - # LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 - # HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 - &mov ("ecx",$Alo); - &mov ("edx",$Ahi); - &mov ($Dlo,"eax"); - &mov ($Dhi,"ebx"); - &mov ("esi","ecx"); - - &shr ("ecx",2); # lo>>2 - &mov ("edi","edx"); - &shr ("edx",2); # hi>>2 - &mov ("ebx","ecx"); - &shl ("esi",4); # lo<<4 - &mov ("eax","edx"); - &shl ("edi",4); # hi<<4 - &xor ("ebx","esi"); - - &shr ("ecx",7-2); # lo>>7 - &xor ("eax","edi"); - &shr ("edx",7-2); # hi>>7 - &xor ("ebx","ecx"); - &shl ("esi",25-4); # lo<<25 - &xor ("eax","edx"); - &shl ("edi",25-4); # hi<<25 - &xor ("eax","esi"); - - &shr ("ecx",28-7); # lo>>28 - &xor ("ebx","edi"); - &shr ("edx",28-7); # hi>>28 - &xor ("eax","ecx"); - &shl ("esi",30-25); # lo<<30 - &xor ("ebx","edx"); - &shl ("edi",30-25); # hi<<30 - &xor ("eax","esi"); - &xor ("ebx","edi"); # Sigma0(a) - - &mov ("ecx",$Alo); - &mov ("edx",$Ahi); - &mov ("esi",$Blo); - &mov ("edi",$Bhi); - &add ("eax",$Tlo); - &adc ("ebx",$Thi); # T1 = Sigma0(a)+T1 - &or ("ecx","esi"); - &or ("edx","edi"); - &and ("ecx",$Clo); - &and ("edx",$Chi); - &and ("esi",$Alo); - &and ("edi",$Ahi); - &or ("ecx","esi"); - &or ("edx","edi"); # Maj(a,b,c) = ((a|b)&c)|(a&b) - - &add ("eax","ecx"); - &adc ("ebx","edx"); # T1 += Maj(a,b,c) - &mov ($Tlo,"eax"); - &mov ($Thi,"ebx"); - - &mov (&LB("edx"),&BP(0,$K512)); # pre-fetch LSB of *K - &sub ("esp",8); - &lea ($K512,&DWP(8,$K512)); # K++ -} - - -&function_begin("sha512_block_data_order"); - &mov ("esi",wparam(0)); # ctx - &mov ("edi",wparam(1)); # inp - &mov ("eax",wparam(2)); # num - &mov ("ebx","esp"); # saved sp - - &call (&label("pic_point")); # make it PIC! -&set_label("pic_point"); - &blindpop($K512); - &lea ($K512,&DWP(&label("K512")."-".&label("pic_point"),$K512)); - - &sub ("esp",16); - &and ("esp",-64); - - &shl ("eax",7); - &add ("eax","edi"); - &mov (&DWP(0,"esp"),"esi"); # ctx - &mov (&DWP(4,"esp"),"edi"); # inp - &mov (&DWP(8,"esp"),"eax"); # inp+num*128 - &mov (&DWP(12,"esp"),"ebx"); # saved sp - -if ($sse2) { - &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512")); - &bt (&DWP(0,"edx"),26); - &jnc (&label("loop_x86")); - - # load ctx->h[0-7] - &movq ($A,&QWP(0,"esi")); - &movq ("mm1",&QWP(8,"esi")); - &movq ("mm2",&QWP(16,"esi")); - &movq ("mm3",&QWP(24,"esi")); - &movq ($E,&QWP(32,"esi")); - &movq ("mm5",&QWP(40,"esi")); - &movq ("mm6",&QWP(48,"esi")); - &movq ("mm7",&QWP(56,"esi")); - &sub ("esp",8*10); - -&set_label("loop_sse2",16); - # &movq ($Asse2,$A); - &movq ($Bsse2,"mm1"); - &movq ($Csse2,"mm2"); - &movq ($Dsse2,"mm3"); - # &movq ($Esse2,$E); - &movq ($Fsse2,"mm5"); - &movq ($Gsse2,"mm6"); - &movq ($Hsse2,"mm7"); - - &mov ("ecx",&DWP(0,"edi")); - &mov ("edx",&DWP(4,"edi")); - &add ("edi",8); - &bswap ("ecx"); - &bswap ("edx"); - &mov (&DWP(8*9+4,"esp"),"ecx"); - &mov (&DWP(8*9+0,"esp"),"edx"); - -&set_label("00_14_sse2",16); - &mov ("eax",&DWP(0,"edi")); - &mov ("ebx",&DWP(4,"edi")); - &add ("edi",8); - &bswap ("eax"); - &bswap ("ebx"); - &mov (&DWP(8*8+4,"esp"),"eax"); - &mov (&DWP(8*8+0,"esp"),"ebx"); - - &BODY_00_15_sse2(); - - &cmp (&LB("edx"),0x35); - &jne (&label("00_14_sse2")); - - &BODY_00_15_sse2(1); - -&set_label("16_79_sse2",16); - #&movq ("mm2",&QWP(8*(9+16-1),"esp")); #prefetched in BODY_00_15 - #&movq ("mm6",&QWP(8*(9+16-14),"esp")); - &movq ("mm1","mm2"); - - &psrlq ("mm2",1); - &movq ("mm7","mm6"); - &psrlq ("mm6",6); - &movq ("mm3","mm2"); - - &psrlq ("mm2",7-1); - &movq ("mm5","mm6"); - &psrlq ("mm6",19-6); - &pxor ("mm3","mm2"); - - &psrlq ("mm2",8-7); - &pxor ("mm5","mm6"); - &psrlq ("mm6",61-19); - &pxor ("mm3","mm2"); - - &movq ("mm2",&QWP(8*(9+16),"esp")); - - &psllq ("mm1",56); - &pxor ("mm5","mm6"); - &psllq ("mm7",3); - &pxor ("mm3","mm1"); - - &paddq ("mm2",&QWP(8*(9+16-9),"esp")); - - &psllq ("mm1",63-56); - &pxor ("mm5","mm7"); - &psllq ("mm7",45-3); - &pxor ("mm3","mm1"); - &pxor ("mm5","mm7"); - - &paddq ("mm3","mm5"); - &paddq ("mm3","mm2"); - &movq (&QWP(8*9,"esp"),"mm3"); - - &BODY_00_15_sse2(1); - - &cmp (&LB("edx"),0x17); - &jne (&label("16_79_sse2")); - - # &movq ($A,$Asse2); - &movq ("mm1",$Bsse2); - &movq ("mm2",$Csse2); - &movq ("mm3",$Dsse2); - # &movq ($E,$Esse2); - &movq ("mm5",$Fsse2); - &movq ("mm6",$Gsse2); - &movq ("mm7",$Hsse2); - - &paddq ($A,&QWP(0,"esi")); - &paddq ("mm1",&QWP(8,"esi")); - &paddq ("mm2",&QWP(16,"esi")); - &paddq ("mm3",&QWP(24,"esi")); - &paddq ($E,&QWP(32,"esi")); - &paddq ("mm5",&QWP(40,"esi")); - &paddq ("mm6",&QWP(48,"esi")); - &paddq ("mm7",&QWP(56,"esi")); - - &movq (&QWP(0,"esi"),$A); - &movq (&QWP(8,"esi"),"mm1"); - &movq (&QWP(16,"esi"),"mm2"); - &movq (&QWP(24,"esi"),"mm3"); - &movq (&QWP(32,"esi"),$E); - &movq (&QWP(40,"esi"),"mm5"); - &movq (&QWP(48,"esi"),"mm6"); - &movq (&QWP(56,"esi"),"mm7"); - - &add ("esp",8*80); # destroy frame - &sub ($K512,8*80); # rewind K - - &cmp ("edi",&DWP(8*10+8,"esp")); # are we done yet? - &jb (&label("loop_sse2")); - - &emms (); - &mov ("esp",&DWP(8*10+12,"esp")); # restore sp -&function_end_A(); -} -&set_label("loop_x86",16); - # copy input block to stack reversing byte and qword order - for ($i=0;$i<8;$i++) { - &mov ("eax",&DWP($i*16+0,"edi")); - &mov ("ebx",&DWP($i*16+4,"edi")); - &mov ("ecx",&DWP($i*16+8,"edi")); - &mov ("edx",&DWP($i*16+12,"edi")); - &bswap ("eax"); - &bswap ("ebx"); - &bswap ("ecx"); - &bswap ("edx"); - &push ("eax"); - &push ("ebx"); - &push ("ecx"); - &push ("edx"); - } - &add ("edi",128); - &sub ("esp",9*8); # place for T,A,B,C,D,E,F,G,H - &mov (&DWP(8*(9+16)+4,"esp"),"edi"); - - # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack - &lea ("edi",&DWP(8,"esp")); - &mov ("ecx",16); - &data_word(0xA5F3F689); # rep movsd - -&set_label("00_15_x86",16); - &BODY_00_15_x86(); - - &cmp (&LB("edx"),0x94); - &jne (&label("00_15_x86")); - -&set_label("16_79_x86",16); - #define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) - # LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 - # HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 - &mov ("ecx",&DWP(8*(9+15+16-1)+0,"esp")); - &mov ("edx",&DWP(8*(9+15+16-1)+4,"esp")); - &mov ("esi","ecx"); - - &shr ("ecx",1); # lo>>1 - &mov ("edi","edx"); - &shr ("edx",1); # hi>>1 - &mov ("eax","ecx"); - &shl ("esi",24); # lo<<24 - &mov ("ebx","edx"); - &shl ("edi",24); # hi<<24 - &xor ("ebx","esi"); - - &shr ("ecx",7-1); # lo>>7 - &xor ("eax","edi"); - &shr ("edx",7-1); # hi>>7 - &xor ("eax","ecx"); - &shl ("esi",31-24); # lo<<31 - &xor ("ebx","edx"); - &shl ("edi",25-24); # hi<<25 - &xor ("ebx","esi"); - - &shr ("ecx",8-7); # lo>>8 - &xor ("eax","edi"); - &shr ("edx",8-7); # hi>>8 - &xor ("eax","ecx"); - &shl ("edi",31-25); # hi<<31 - &xor ("ebx","edx"); - &xor ("eax","edi"); # T1 = sigma0(X[-15]) - - &mov (&DWP(0,"esp"),"eax"); - &mov (&DWP(4,"esp"),"ebx"); # put T1 away - - #define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) - # LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 - # HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 - &mov ("ecx",&DWP(8*(9+15+16-14)+0,"esp")); - &mov ("edx",&DWP(8*(9+15+16-14)+4,"esp")); - &mov ("esi","ecx"); - - &shr ("ecx",6); # lo>>6 - &mov ("edi","edx"); - &shr ("edx",6); # hi>>6 - &mov ("eax","ecx"); - &shl ("esi",3); # lo<<3 - &mov ("ebx","edx"); - &shl ("edi",3); # hi<<3 - &xor ("eax","esi"); - - &shr ("ecx",19-6); # lo>>19 - &xor ("ebx","edi"); - &shr ("edx",19-6); # hi>>19 - &xor ("eax","ecx"); - &shl ("esi",13-3); # lo<<13 - &xor ("ebx","edx"); - &shl ("edi",13-3); # hi<<13 - &xor ("ebx","esi"); - - &shr ("ecx",29-19); # lo>>29 - &xor ("eax","edi"); - &shr ("edx",29-19); # hi>>29 - &xor ("ebx","ecx"); - &shl ("edi",26-13); # hi<<26 - &xor ("eax","edx"); - &xor ("eax","edi"); # sigma1(X[-2]) - - &mov ("ecx",&DWP(8*(9+15+16)+0,"esp")); - &mov ("edx",&DWP(8*(9+15+16)+4,"esp")); - &add ("eax",&DWP(0,"esp")); - &adc ("ebx",&DWP(4,"esp")); # T1 = sigma1(X[-2])+T1 - &mov ("esi",&DWP(8*(9+15+16-9)+0,"esp")); - &mov ("edi",&DWP(8*(9+15+16-9)+4,"esp")); - &add ("eax","ecx"); - &adc ("ebx","edx"); # T1 += X[-16] - &add ("eax","esi"); - &adc ("ebx","edi"); # T1 += X[-7] - &mov (&DWP(8*(9+15)+0,"esp"),"eax"); - &mov (&DWP(8*(9+15)+4,"esp"),"ebx"); # save X[0] - - &BODY_00_15_x86(); - - &cmp (&LB("edx"),0x17); - &jne (&label("16_79_x86")); - - &mov ("esi",&DWP(8*(9+16+80)+0,"esp"));# ctx - &mov ("edi",&DWP(8*(9+16+80)+4,"esp"));# inp - for($i=0;$i<4;$i++) { - &mov ("eax",&DWP($i*16+0,"esi")); - &mov ("ebx",&DWP($i*16+4,"esi")); - &mov ("ecx",&DWP($i*16+8,"esi")); - &mov ("edx",&DWP($i*16+12,"esi")); - &add ("eax",&DWP(8+($i*16)+0,"esp")); - &adc ("ebx",&DWP(8+($i*16)+4,"esp")); - &mov (&DWP($i*16+0,"esi"),"eax"); - &mov (&DWP($i*16+4,"esi"),"ebx"); - &add ("ecx",&DWP(8+($i*16)+8,"esp")); - &adc ("edx",&DWP(8+($i*16)+12,"esp")); - &mov (&DWP($i*16+8,"esi"),"ecx"); - &mov (&DWP($i*16+12,"esi"),"edx"); - } - &add ("esp",8*(9+16+80)); # destroy frame - &sub ($K512,8*80); # rewind K - - &cmp ("edi",&DWP(8,"esp")); # are we done yet? - &jb (&label("loop_x86")); - - &mov ("esp",&DWP(12,"esp")); # restore sp -&function_end_A(); - -&set_label("K512",64); # Yes! I keep it in the code segment! - &data_word(0xd728ae22,0x428a2f98); # u64 - &data_word(0x23ef65cd,0x71374491); # u64 - &data_word(0xec4d3b2f,0xb5c0fbcf); # u64 - &data_word(0x8189dbbc,0xe9b5dba5); # u64 - &data_word(0xf348b538,0x3956c25b); # u64 - &data_word(0xb605d019,0x59f111f1); # u64 - &data_word(0xaf194f9b,0x923f82a4); # u64 - &data_word(0xda6d8118,0xab1c5ed5); # u64 - &data_word(0xa3030242,0xd807aa98); # u64 - &data_word(0x45706fbe,0x12835b01); # u64 - &data_word(0x4ee4b28c,0x243185be); # u64 - &data_word(0xd5ffb4e2,0x550c7dc3); # u64 - &data_word(0xf27b896f,0x72be5d74); # u64 - &data_word(0x3b1696b1,0x80deb1fe); # u64 - &data_word(0x25c71235,0x9bdc06a7); # u64 - &data_word(0xcf692694,0xc19bf174); # u64 - &data_word(0x9ef14ad2,0xe49b69c1); # u64 - &data_word(0x384f25e3,0xefbe4786); # u64 - &data_word(0x8b8cd5b5,0x0fc19dc6); # u64 - &data_word(0x77ac9c65,0x240ca1cc); # u64 - &data_word(0x592b0275,0x2de92c6f); # u64 - &data_word(0x6ea6e483,0x4a7484aa); # u64 - &data_word(0xbd41fbd4,0x5cb0a9dc); # u64 - &data_word(0x831153b5,0x76f988da); # u64 - &data_word(0xee66dfab,0x983e5152); # u64 - &data_word(0x2db43210,0xa831c66d); # u64 - &data_word(0x98fb213f,0xb00327c8); # u64 - &data_word(0xbeef0ee4,0xbf597fc7); # u64 - &data_word(0x3da88fc2,0xc6e00bf3); # u64 - &data_word(0x930aa725,0xd5a79147); # u64 - &data_word(0xe003826f,0x06ca6351); # u64 - &data_word(0x0a0e6e70,0x14292967); # u64 - &data_word(0x46d22ffc,0x27b70a85); # u64 - &data_word(0x5c26c926,0x2e1b2138); # u64 - &data_word(0x5ac42aed,0x4d2c6dfc); # u64 - &data_word(0x9d95b3df,0x53380d13); # u64 - &data_word(0x8baf63de,0x650a7354); # u64 - &data_word(0x3c77b2a8,0x766a0abb); # u64 - &data_word(0x47edaee6,0x81c2c92e); # u64 - &data_word(0x1482353b,0x92722c85); # u64 - &data_word(0x4cf10364,0xa2bfe8a1); # u64 - &data_word(0xbc423001,0xa81a664b); # u64 - &data_word(0xd0f89791,0xc24b8b70); # u64 - &data_word(0x0654be30,0xc76c51a3); # u64 - &data_word(0xd6ef5218,0xd192e819); # u64 - &data_word(0x5565a910,0xd6990624); # u64 - &data_word(0x5771202a,0xf40e3585); # u64 - &data_word(0x32bbd1b8,0x106aa070); # u64 - &data_word(0xb8d2d0c8,0x19a4c116); # u64 - &data_word(0x5141ab53,0x1e376c08); # u64 - &data_word(0xdf8eeb99,0x2748774c); # u64 - &data_word(0xe19b48a8,0x34b0bcb5); # u64 - &data_word(0xc5c95a63,0x391c0cb3); # u64 - &data_word(0xe3418acb,0x4ed8aa4a); # u64 - &data_word(0x7763e373,0x5b9cca4f); # u64 - &data_word(0xd6b2b8a3,0x682e6ff3); # u64 - &data_word(0x5defb2fc,0x748f82ee); # u64 - &data_word(0x43172f60,0x78a5636f); # u64 - &data_word(0xa1f0ab72,0x84c87814); # u64 - &data_word(0x1a6439ec,0x8cc70208); # u64 - &data_word(0x23631e28,0x90befffa); # u64 - &data_word(0xde82bde9,0xa4506ceb); # u64 - &data_word(0xb2c67915,0xbef9a3f7); # u64 - &data_word(0xe372532b,0xc67178f2); # u64 - &data_word(0xea26619c,0xca273ece); # u64 - &data_word(0x21c0c207,0xd186b8c7); # u64 - &data_word(0xcde0eb1e,0xeada7dd6); # u64 - &data_word(0xee6ed178,0xf57d4f7f); # u64 - &data_word(0x72176fba,0x06f067aa); # u64 - &data_word(0xa2c898a6,0x0a637dc5); # u64 - &data_word(0xbef90dae,0x113f9804); # u64 - &data_word(0x131c471b,0x1b710b35); # u64 - &data_word(0x23047d84,0x28db77f5); # u64 - &data_word(0x40c72493,0x32caab7b); # u64 - &data_word(0x15c9bebc,0x3c9ebe0a); # u64 - &data_word(0x9c100d4c,0x431d67c4); # u64 - &data_word(0xcb3e42b6,0x4cc5d4be); # u64 - &data_word(0xfc657e2a,0x597f299c); # u64 - &data_word(0x3ad6faec,0x5fcb6fab); # u64 - &data_word(0x4a475817,0x6c44198c); # u64 -&function_end_B("sha512_block_data_order"); -&asciz("SHA512 block transform for x86, CRYPTOGAMS by "); - -&asm_finish(); diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha512-armv4.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha512-armv4.pl deleted file mode 100644 index 7faf37b147..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha512-armv4.pl +++ /dev/null @@ -1,582 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# SHA512 block procedure for ARMv4. September 2007. - -# This code is ~4.5 (four and a half) times faster than code generated -# by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue -# Xscale PXA250 core]. -# -# July 2010. -# -# Rescheduling for dual-issue pipeline resulted in 6% improvement on -# Cortex A8 core and ~40 cycles per processed byte. - -# February 2011. -# -# Profiler-assisted and platform-specific optimization resulted in 7% -# improvement on Coxtex A8 core and ~38 cycles per byte. - -# March 2011. -# -# Add NEON implementation. On Cortex A8 it was measured to process -# one byte in 25.5 cycles or 47% faster than integer-only code. - -# Byte order [in]dependence. ========================================= -# -# Originally caller was expected to maintain specific *dword* order in -# h[0-7], namely with most significant dword at *lower* address, which -# was reflected in below two parameters as 0 and 4. Now caller is -# expected to maintain native byte order for whole 64-bit values. -$hi="HI"; -$lo="LO"; -# ==================================================================== - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -$ctx="r0"; # parameter block -$inp="r1"; -$len="r2"; - -$Tlo="r3"; -$Thi="r4"; -$Alo="r5"; -$Ahi="r6"; -$Elo="r7"; -$Ehi="r8"; -$t0="r9"; -$t1="r10"; -$t2="r11"; -$t3="r12"; -############ r13 is stack pointer -$Ktbl="r14"; -############ r15 is program counter - -$Aoff=8*0; -$Boff=8*1; -$Coff=8*2; -$Doff=8*3; -$Eoff=8*4; -$Foff=8*5; -$Goff=8*6; -$Hoff=8*7; -$Xoff=8*8; - -sub BODY_00_15() { -my $magic = shift; -$code.=<<___; - @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) - @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 - @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 - mov $t0,$Elo,lsr#14 - str $Tlo,[sp,#$Xoff+0] - mov $t1,$Ehi,lsr#14 - str $Thi,[sp,#$Xoff+4] - eor $t0,$t0,$Ehi,lsl#18 - ldr $t2,[sp,#$Hoff+0] @ h.lo - eor $t1,$t1,$Elo,lsl#18 - ldr $t3,[sp,#$Hoff+4] @ h.hi - eor $t0,$t0,$Elo,lsr#18 - eor $t1,$t1,$Ehi,lsr#18 - eor $t0,$t0,$Ehi,lsl#14 - eor $t1,$t1,$Elo,lsl#14 - eor $t0,$t0,$Ehi,lsr#9 - eor $t1,$t1,$Elo,lsr#9 - eor $t0,$t0,$Elo,lsl#23 - eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e) - adds $Tlo,$Tlo,$t0 - ldr $t0,[sp,#$Foff+0] @ f.lo - adc $Thi,$Thi,$t1 @ T += Sigma1(e) - ldr $t1,[sp,#$Foff+4] @ f.hi - adds $Tlo,$Tlo,$t2 - ldr $t2,[sp,#$Goff+0] @ g.lo - adc $Thi,$Thi,$t3 @ T += h - ldr $t3,[sp,#$Goff+4] @ g.hi - - eor $t0,$t0,$t2 - str $Elo,[sp,#$Eoff+0] - eor $t1,$t1,$t3 - str $Ehi,[sp,#$Eoff+4] - and $t0,$t0,$Elo - str $Alo,[sp,#$Aoff+0] - and $t1,$t1,$Ehi - str $Ahi,[sp,#$Aoff+4] - eor $t0,$t0,$t2 - ldr $t2,[$Ktbl,#$lo] @ K[i].lo - eor $t1,$t1,$t3 @ Ch(e,f,g) - ldr $t3,[$Ktbl,#$hi] @ K[i].hi - - adds $Tlo,$Tlo,$t0 - ldr $Elo,[sp,#$Doff+0] @ d.lo - adc $Thi,$Thi,$t1 @ T += Ch(e,f,g) - ldr $Ehi,[sp,#$Doff+4] @ d.hi - adds $Tlo,$Tlo,$t2 - and $t0,$t2,#0xff - adc $Thi,$Thi,$t3 @ T += K[i] - adds $Elo,$Elo,$Tlo - ldr $t2,[sp,#$Boff+0] @ b.lo - adc $Ehi,$Ehi,$Thi @ d += T - teq $t0,#$magic - - ldr $t3,[sp,#$Coff+0] @ c.lo - orreq $Ktbl,$Ktbl,#1 - @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) - @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 - @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 - mov $t0,$Alo,lsr#28 - mov $t1,$Ahi,lsr#28 - eor $t0,$t0,$Ahi,lsl#4 - eor $t1,$t1,$Alo,lsl#4 - eor $t0,$t0,$Ahi,lsr#2 - eor $t1,$t1,$Alo,lsr#2 - eor $t0,$t0,$Alo,lsl#30 - eor $t1,$t1,$Ahi,lsl#30 - eor $t0,$t0,$Ahi,lsr#7 - eor $t1,$t1,$Alo,lsr#7 - eor $t0,$t0,$Alo,lsl#25 - eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a) - adds $Tlo,$Tlo,$t0 - and $t0,$Alo,$t2 - adc $Thi,$Thi,$t1 @ T += Sigma0(a) - - ldr $t1,[sp,#$Boff+4] @ b.hi - orr $Alo,$Alo,$t2 - ldr $t2,[sp,#$Coff+4] @ c.hi - and $Alo,$Alo,$t3 - and $t3,$Ahi,$t1 - orr $Ahi,$Ahi,$t1 - orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo - and $Ahi,$Ahi,$t2 - adds $Alo,$Alo,$Tlo - orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi - sub sp,sp,#8 - adc $Ahi,$Ahi,$Thi @ h += T - tst $Ktbl,#1 - add $Ktbl,$Ktbl,#8 -___ -} -$code=<<___; -#include "arm_arch.h" -#ifdef __ARMEL__ -# define LO 0 -# define HI 4 -# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 -#else -# define HI 0 -# define LO 4 -# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 -#endif - -.text -.code 32 -.type K512,%object -.align 5 -K512: -WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) -WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) -WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) -WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) -WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) -WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) -WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) -WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) -WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) -WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) -WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) -WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) -WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) -WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) -WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) -WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) -WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) -WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) -WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) -WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) -WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) -WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) -WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) -WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) -WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) -WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) -WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) -WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) -WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) -WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) -WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) -WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) -WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) -WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) -WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) -WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) -WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) -WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) -WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) -WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) -.size K512,.-K512 -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-sha512_block_data_order -.skip 32-4 - -.global sha512_block_data_order -.type sha512_block_data_order,%function -sha512_block_data_order: - sub r3,pc,#8 @ sha512_block_data_order - add $len,$inp,$len,lsl#7 @ len to point at the end of inp -#if __ARM_ARCH__>=7 - ldr r12,.LOPENSSL_armcap - ldr r12,[r3,r12] @ OPENSSL_armcap_P - tst r12,#1 - bne .LNEON -#endif - stmdb sp!,{r4-r12,lr} - sub $Ktbl,r3,#672 @ K512 - sub sp,sp,#9*8 - - ldr $Elo,[$ctx,#$Eoff+$lo] - ldr $Ehi,[$ctx,#$Eoff+$hi] - ldr $t0, [$ctx,#$Goff+$lo] - ldr $t1, [$ctx,#$Goff+$hi] - ldr $t2, [$ctx,#$Hoff+$lo] - ldr $t3, [$ctx,#$Hoff+$hi] -.Loop: - str $t0, [sp,#$Goff+0] - str $t1, [sp,#$Goff+4] - str $t2, [sp,#$Hoff+0] - str $t3, [sp,#$Hoff+4] - ldr $Alo,[$ctx,#$Aoff+$lo] - ldr $Ahi,[$ctx,#$Aoff+$hi] - ldr $Tlo,[$ctx,#$Boff+$lo] - ldr $Thi,[$ctx,#$Boff+$hi] - ldr $t0, [$ctx,#$Coff+$lo] - ldr $t1, [$ctx,#$Coff+$hi] - ldr $t2, [$ctx,#$Doff+$lo] - ldr $t3, [$ctx,#$Doff+$hi] - str $Tlo,[sp,#$Boff+0] - str $Thi,[sp,#$Boff+4] - str $t0, [sp,#$Coff+0] - str $t1, [sp,#$Coff+4] - str $t2, [sp,#$Doff+0] - str $t3, [sp,#$Doff+4] - ldr $Tlo,[$ctx,#$Foff+$lo] - ldr $Thi,[$ctx,#$Foff+$hi] - str $Tlo,[sp,#$Foff+0] - str $Thi,[sp,#$Foff+4] - -.L00_15: -#if __ARM_ARCH__<7 - ldrb $Tlo,[$inp,#7] - ldrb $t0, [$inp,#6] - ldrb $t1, [$inp,#5] - ldrb $t2, [$inp,#4] - ldrb $Thi,[$inp,#3] - ldrb $t3, [$inp,#2] - orr $Tlo,$Tlo,$t0,lsl#8 - ldrb $t0, [$inp,#1] - orr $Tlo,$Tlo,$t1,lsl#16 - ldrb $t1, [$inp],#8 - orr $Tlo,$Tlo,$t2,lsl#24 - orr $Thi,$Thi,$t3,lsl#8 - orr $Thi,$Thi,$t0,lsl#16 - orr $Thi,$Thi,$t1,lsl#24 -#else - ldr $Tlo,[$inp,#4] - ldr $Thi,[$inp],#8 -#ifdef __ARMEL__ - rev $Tlo,$Tlo - rev $Thi,$Thi -#endif -#endif -___ - &BODY_00_15(0x94); -$code.=<<___; - tst $Ktbl,#1 - beq .L00_15 - ldr $t0,[sp,#`$Xoff+8*(16-1)`+0] - ldr $t1,[sp,#`$Xoff+8*(16-1)`+4] - bic $Ktbl,$Ktbl,#1 -.L16_79: - @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) - @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 - @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 - mov $Tlo,$t0,lsr#1 - ldr $t2,[sp,#`$Xoff+8*(16-14)`+0] - mov $Thi,$t1,lsr#1 - ldr $t3,[sp,#`$Xoff+8*(16-14)`+4] - eor $Tlo,$Tlo,$t1,lsl#31 - eor $Thi,$Thi,$t0,lsl#31 - eor $Tlo,$Tlo,$t0,lsr#8 - eor $Thi,$Thi,$t1,lsr#8 - eor $Tlo,$Tlo,$t1,lsl#24 - eor $Thi,$Thi,$t0,lsl#24 - eor $Tlo,$Tlo,$t0,lsr#7 - eor $Thi,$Thi,$t1,lsr#7 - eor $Tlo,$Tlo,$t1,lsl#25 - - @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) - @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 - @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 - mov $t0,$t2,lsr#19 - mov $t1,$t3,lsr#19 - eor $t0,$t0,$t3,lsl#13 - eor $t1,$t1,$t2,lsl#13 - eor $t0,$t0,$t3,lsr#29 - eor $t1,$t1,$t2,lsr#29 - eor $t0,$t0,$t2,lsl#3 - eor $t1,$t1,$t3,lsl#3 - eor $t0,$t0,$t2,lsr#6 - eor $t1,$t1,$t3,lsr#6 - ldr $t2,[sp,#`$Xoff+8*(16-9)`+0] - eor $t0,$t0,$t3,lsl#26 - - ldr $t3,[sp,#`$Xoff+8*(16-9)`+4] - adds $Tlo,$Tlo,$t0 - ldr $t0,[sp,#`$Xoff+8*16`+0] - adc $Thi,$Thi,$t1 - - ldr $t1,[sp,#`$Xoff+8*16`+4] - adds $Tlo,$Tlo,$t2 - adc $Thi,$Thi,$t3 - adds $Tlo,$Tlo,$t0 - adc $Thi,$Thi,$t1 -___ - &BODY_00_15(0x17); -$code.=<<___; - ldreq $t0,[sp,#`$Xoff+8*(16-1)`+0] - ldreq $t1,[sp,#`$Xoff+8*(16-1)`+4] - beq .L16_79 - bic $Ktbl,$Ktbl,#1 - - ldr $Tlo,[sp,#$Boff+0] - ldr $Thi,[sp,#$Boff+4] - ldr $t0, [$ctx,#$Aoff+$lo] - ldr $t1, [$ctx,#$Aoff+$hi] - ldr $t2, [$ctx,#$Boff+$lo] - ldr $t3, [$ctx,#$Boff+$hi] - adds $t0,$Alo,$t0 - str $t0, [$ctx,#$Aoff+$lo] - adc $t1,$Ahi,$t1 - str $t1, [$ctx,#$Aoff+$hi] - adds $t2,$Tlo,$t2 - str $t2, [$ctx,#$Boff+$lo] - adc $t3,$Thi,$t3 - str $t3, [$ctx,#$Boff+$hi] - - ldr $Alo,[sp,#$Coff+0] - ldr $Ahi,[sp,#$Coff+4] - ldr $Tlo,[sp,#$Doff+0] - ldr $Thi,[sp,#$Doff+4] - ldr $t0, [$ctx,#$Coff+$lo] - ldr $t1, [$ctx,#$Coff+$hi] - ldr $t2, [$ctx,#$Doff+$lo] - ldr $t3, [$ctx,#$Doff+$hi] - adds $t0,$Alo,$t0 - str $t0, [$ctx,#$Coff+$lo] - adc $t1,$Ahi,$t1 - str $t1, [$ctx,#$Coff+$hi] - adds $t2,$Tlo,$t2 - str $t2, [$ctx,#$Doff+$lo] - adc $t3,$Thi,$t3 - str $t3, [$ctx,#$Doff+$hi] - - ldr $Tlo,[sp,#$Foff+0] - ldr $Thi,[sp,#$Foff+4] - ldr $t0, [$ctx,#$Eoff+$lo] - ldr $t1, [$ctx,#$Eoff+$hi] - ldr $t2, [$ctx,#$Foff+$lo] - ldr $t3, [$ctx,#$Foff+$hi] - adds $Elo,$Elo,$t0 - str $Elo,[$ctx,#$Eoff+$lo] - adc $Ehi,$Ehi,$t1 - str $Ehi,[$ctx,#$Eoff+$hi] - adds $t2,$Tlo,$t2 - str $t2, [$ctx,#$Foff+$lo] - adc $t3,$Thi,$t3 - str $t3, [$ctx,#$Foff+$hi] - - ldr $Alo,[sp,#$Goff+0] - ldr $Ahi,[sp,#$Goff+4] - ldr $Tlo,[sp,#$Hoff+0] - ldr $Thi,[sp,#$Hoff+4] - ldr $t0, [$ctx,#$Goff+$lo] - ldr $t1, [$ctx,#$Goff+$hi] - ldr $t2, [$ctx,#$Hoff+$lo] - ldr $t3, [$ctx,#$Hoff+$hi] - adds $t0,$Alo,$t0 - str $t0, [$ctx,#$Goff+$lo] - adc $t1,$Ahi,$t1 - str $t1, [$ctx,#$Goff+$hi] - adds $t2,$Tlo,$t2 - str $t2, [$ctx,#$Hoff+$lo] - adc $t3,$Thi,$t3 - str $t3, [$ctx,#$Hoff+$hi] - - add sp,sp,#640 - sub $Ktbl,$Ktbl,#640 - - teq $inp,$len - bne .Loop - - add sp,sp,#8*9 @ destroy frame -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r12,pc} -#else - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -#endif -___ - -{ -my @Sigma0=(28,34,39); -my @Sigma1=(14,18,41); -my @sigma0=(1, 8, 7); -my @sigma1=(19,61,6); - -my $Ktbl="r3"; -my $cnt="r12"; # volatile register known as ip, intra-procedure-call scratch - -my @X=map("d$_",(0..15)); -my @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("d$_",(16..23)); - -sub NEON_00_15() { -my $i=shift; -my ($a,$b,$c,$d,$e,$f,$g,$h)=@_; -my ($t0,$t1,$t2,$T1,$K,$Ch,$Maj)=map("d$_",(24..31)); # temps - -$code.=<<___ if ($i<16 || $i&1); - vshr.u64 $t0,$e,#@Sigma1[0] @ $i -#if $i<16 - vld1.64 {@X[$i%16]},[$inp]! @ handles unaligned -#endif - vshr.u64 $t1,$e,#@Sigma1[1] - vshr.u64 $t2,$e,#@Sigma1[2] -___ -$code.=<<___; - vld1.64 {$K},[$Ktbl,:64]! @ K[i++] - vsli.64 $t0,$e,#`64-@Sigma1[0]` - vsli.64 $t1,$e,#`64-@Sigma1[1]` - vsli.64 $t2,$e,#`64-@Sigma1[2]` -#if $i<16 && defined(__ARMEL__) - vrev64.8 @X[$i],@X[$i] -#endif - vadd.i64 $T1,$K,$h - veor $Ch,$f,$g - veor $t0,$t1 - vand $Ch,$e - veor $t0,$t2 @ Sigma1(e) - veor $Ch,$g @ Ch(e,f,g) - vadd.i64 $T1,$t0 - vshr.u64 $t0,$a,#@Sigma0[0] - vadd.i64 $T1,$Ch - vshr.u64 $t1,$a,#@Sigma0[1] - vshr.u64 $t2,$a,#@Sigma0[2] - vsli.64 $t0,$a,#`64-@Sigma0[0]` - vsli.64 $t1,$a,#`64-@Sigma0[1]` - vsli.64 $t2,$a,#`64-@Sigma0[2]` - vadd.i64 $T1,@X[$i%16] - vorr $Maj,$a,$c - vand $Ch,$a,$c - veor $h,$t0,$t1 - vand $Maj,$b - veor $h,$t2 @ Sigma0(a) - vorr $Maj,$Ch @ Maj(a,b,c) - vadd.i64 $h,$T1 - vadd.i64 $d,$T1 - vadd.i64 $h,$Maj -___ -} - -sub NEON_16_79() { -my $i=shift; - -if ($i&1) { &NEON_00_15($i,@_); return; } - -# 2x-vectorized, therefore runs every 2nd round -my @X=map("q$_",(0..7)); # view @X as 128-bit vector -my ($t0,$t1,$s0,$s1) = map("q$_",(12..15)); # temps -my ($d0,$d1,$d2) = map("d$_",(24..26)); # temps from NEON_00_15 -my $e=@_[4]; # $e from NEON_00_15 -$i /= 2; -$code.=<<___; - vshr.u64 $t0,@X[($i+7)%8],#@sigma1[0] - vshr.u64 $t1,@X[($i+7)%8],#@sigma1[1] - vshr.u64 $s1,@X[($i+7)%8],#@sigma1[2] - vsli.64 $t0,@X[($i+7)%8],#`64-@sigma1[0]` - vext.8 $s0,@X[$i%8],@X[($i+1)%8],#8 @ X[i+1] - vsli.64 $t1,@X[($i+7)%8],#`64-@sigma1[1]` - veor $s1,$t0 - vshr.u64 $t0,$s0,#@sigma0[0] - veor $s1,$t1 @ sigma1(X[i+14]) - vshr.u64 $t1,$s0,#@sigma0[1] - vadd.i64 @X[$i%8],$s1 - vshr.u64 $s1,$s0,#@sigma0[2] - vsli.64 $t0,$s0,#`64-@sigma0[0]` - vsli.64 $t1,$s0,#`64-@sigma0[1]` - vext.8 $s0,@X[($i+4)%8],@X[($i+5)%8],#8 @ X[i+9] - veor $s1,$t0 - vshr.u64 $d0,$e,#@Sigma1[0] @ from NEON_00_15 - vadd.i64 @X[$i%8],$s0 - vshr.u64 $d1,$e,#@Sigma1[1] @ from NEON_00_15 - veor $s1,$t1 @ sigma0(X[i+1]) - vshr.u64 $d2,$e,#@Sigma1[2] @ from NEON_00_15 - vadd.i64 @X[$i%8],$s1 -___ - &NEON_00_15(2*$i,@_); -} - -$code.=<<___; -#if __ARM_ARCH__>=7 -.fpu neon - -.align 4 -.LNEON: - dmb @ errata #451034 on early Cortex A8 - vstmdb sp!,{d8-d15} @ ABI specification says so - sub $Ktbl,r3,#672 @ K512 - vldmia $ctx,{$A-$H} @ load context -.Loop_neon: -___ -for($i=0;$i<16;$i++) { &NEON_00_15($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - mov $cnt,#4 -.L16_79_neon: - subs $cnt,#1 -___ -for(;$i<32;$i++) { &NEON_16_79($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - bne .L16_79_neon - - vldmia $ctx,{d24-d31} @ load context to temp - vadd.i64 q8,q12 @ vectorized accumulate - vadd.i64 q9,q13 - vadd.i64 q10,q14 - vadd.i64 q11,q15 - vstmia $ctx,{$A-$H} @ save context - teq $inp,$len - sub $Ktbl,#640 @ rewind K512 - bne .Loop_neon - - vldmia sp!,{d8-d15} @ epilogue - bx lr -#endif -___ -} -$code.=<<___; -.size sha512_block_data_order,.-sha512_block_data_order -.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by " -.align 2 -.comm OPENSSL_armcap_P,4,4 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; -close STDOUT; # enforce flush diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha512-ia64.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha512-ia64.pl deleted file mode 100755 index 1c6ce56522..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha512-ia64.pl +++ /dev/null @@ -1,672 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# SHA256/512_Transform for Itanium. -# -# sha512_block runs in 1003 cycles on Itanium 2, which is almost 50% -# faster than gcc and >60%(!) faster than code generated by HP-UX -# compiler (yes, HP-UX is generating slower code, because unlike gcc, -# it failed to deploy "shift right pair," 'shrp' instruction, which -# substitutes for 64-bit rotate). -# -# 924 cycles long sha256_block outperforms gcc by over factor of 2(!) -# and HP-UX compiler - by >40% (yes, gcc won sha512_block, but lost -# this one big time). Note that "formally" 924 is about 100 cycles -# too much. I mean it's 64 32-bit rounds vs. 80 virtually identical -# 64-bit ones and 1003*64/80 gives 802. Extra cycles, 2 per round, -# are spent on extra work to provide for 32-bit rotations. 32-bit -# rotations are still handled by 'shrp' instruction and for this -# reason lower 32 bits are deposited to upper half of 64-bit register -# prior 'shrp' issue. And in order to minimize the amount of such -# operations, X[16] values are *maintained* with copies of lower -# halves in upper halves, which is why you'll spot such instructions -# as custom 'mux2', "parallel 32-bit add," 'padd4' and "parallel -# 32-bit unsigned right shift," 'pshr4.u' instructions here. -# -# Rules of engagement. -# -# There is only one integer shifter meaning that if I have two rotate, -# deposit or extract instructions in adjacent bundles, they shall -# split [at run-time if they have to]. But note that variable and -# parallel shifts are performed by multi-media ALU and *are* pairable -# with rotates [and alike]. On the backside MMALU is rather slow: it -# takes 2 extra cycles before the result of integer operation is -# available *to* MMALU and 2(*) extra cycles before the result of MM -# operation is available "back" *to* integer ALU, not to mention that -# MMALU itself has 2 cycles latency. However! I explicitly scheduled -# these MM instructions to avoid MM stalls, so that all these extra -# latencies get "hidden" in instruction-level parallelism. -# -# (*) 2 cycles on Itanium 1 and 1 cycle on Itanium 2. But I schedule -# for 2 in order to provide for best *overall* performance, -# because on Itanium 1 stall on MM result is accompanied by -# pipeline flush, which takes 6 cycles:-( -# -# Resulting performance numbers for 900MHz Itanium 2 system: -# -# The 'numbers' are in 1000s of bytes per second processed. -# type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes -# sha1(*) 6210.14k 20376.30k 52447.83k 85870.05k 105478.12k -# sha256 7476.45k 20572.05k 41538.34k 56062.29k 62093.18k -# sha512 4996.56k 20026.28k 47597.20k 85278.79k 111501.31k -# -# (*) SHA1 numbers are for HP-UX compiler and are presented purely -# for reference purposes. I bet it can improved too... -# -# To generate code, pass the file name with either 256 or 512 in its -# name and compiler flags. - -$output=shift; - -if ($output =~ /512.*\.[s|asm]/) { - $SZ=8; - $BITS=8*$SZ; - $LDW="ld8"; - $STW="st8"; - $ADD="add"; - $SHRU="shr.u"; - $TABLE="K512"; - $func="sha512_block_data_order"; - @Sigma0=(28,34,39); - @Sigma1=(14,18,41); - @sigma0=(1, 8, 7); - @sigma1=(19,61, 6); - $rounds=80; -} elsif ($output =~ /256.*\.[s|asm]/) { - $SZ=4; - $BITS=8*$SZ; - $LDW="ld4"; - $STW="st4"; - $ADD="padd4"; - $SHRU="pshr4.u"; - $TABLE="K256"; - $func="sha256_block_data_order"; - @Sigma0=( 2,13,22); - @Sigma1=( 6,11,25); - @sigma0=( 7,18, 3); - @sigma1=(17,19,10); - $rounds=64; -} else { die "nonsense $output"; } - -open STDOUT,">$output" || die "can't open $output: $!"; - -if ($^O eq "hpux") { - $ADDP="addp4"; - for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); } -} else { $ADDP="add"; } -for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/); - $big_endian=0 if (/\-DL_ENDIAN/); } -if (!defined($big_endian)) - { $big_endian=(unpack('L',pack('N',1))==1); } - -$code=<<___; -.ident \"$output, version 1.1\" -.ident \"IA-64 ISA artwork by Andy Polyakov \" -.explicit -.text - -pfssave=r2; -lcsave=r3; -prsave=r14; -K=r15; -A=r16; B=r17; C=r18; D=r19; -E=r20; F=r21; G=r22; H=r23; -T1=r24; T2=r25; -s0=r26; s1=r27; t0=r28; t1=r29; -Ktbl=r30; -ctx=r31; // 1st arg -input=r48; // 2nd arg -num=r49; // 3rd arg -sgm0=r50; sgm1=r51; // small constants -A_=r54; B_=r55; C_=r56; D_=r57; -E_=r58; F_=r59; G_=r60; H_=r61; - -// void $func (SHA_CTX *ctx, const void *in,size_t num[,int host]) -.global $func# -.proc $func# -.align 32 -$func: - .prologue - .save ar.pfs,pfssave -{ .mmi; alloc pfssave=ar.pfs,3,27,0,16 - $ADDP ctx=0,r32 // 1st arg - .save ar.lc,lcsave - mov lcsave=ar.lc } -{ .mmi; $ADDP input=0,r33 // 2nd arg - mov num=r34 // 3rd arg - .save pr,prsave - mov prsave=pr };; - - .body -{ .mib; add r8=0*$SZ,ctx - add r9=1*$SZ,ctx - brp.loop.imp .L_first16,.L_first16_end-16 } -{ .mib; add r10=2*$SZ,ctx - add r11=3*$SZ,ctx - brp.loop.imp .L_rest,.L_rest_end-16 };; - -// load A-H -.Lpic_point: -{ .mmi; $LDW A_=[r8],4*$SZ - $LDW B_=[r9],4*$SZ - mov Ktbl=ip } -{ .mmi; $LDW C_=[r10],4*$SZ - $LDW D_=[r11],4*$SZ - mov sgm0=$sigma0[2] };; -{ .mmi; $LDW E_=[r8] - $LDW F_=[r9] - add Ktbl=($TABLE#-.Lpic_point),Ktbl } -{ .mmi; $LDW G_=[r10] - $LDW H_=[r11] - cmp.ne p0,p16=0,r0 };; // used in sha256_block -___ -$code.=<<___ if ($BITS==64); -{ .mii; and r8=7,input - and input=~7,input;; - cmp.eq p9,p0=1,r8 } -{ .mmi; cmp.eq p10,p0=2,r8 - cmp.eq p11,p0=3,r8 - cmp.eq p12,p0=4,r8 } -{ .mmi; cmp.eq p13,p0=5,r8 - cmp.eq p14,p0=6,r8 - cmp.eq p15,p0=7,r8 };; -___ -$code.=<<___; -.L_outer: -.rotr X[16] -{ .mmi; mov A=A_ - mov B=B_ - mov ar.lc=14 } -{ .mmi; mov C=C_ - mov D=D_ - mov E=E_ } -{ .mmi; mov F=F_ - mov G=G_ - mov ar.ec=2 } -{ .mmi; ld1 X[15]=[input],$SZ // eliminated in 64-bit - mov H=H_ - mov sgm1=$sigma1[2] };; - -___ -$t0="t0", $t1="t1", $code.=<<___ if ($BITS==32); -.align 32 -.L_first16: -{ .mmi; add r9=1-$SZ,input - add r10=2-$SZ,input - add r11=3-$SZ,input };; -{ .mmi; ld1 r9=[r9] - ld1 r10=[r10] - dep.z $t1=E,32,32 } -{ .mmi; $LDW K=[Ktbl],$SZ - ld1 r11=[r11] - zxt4 E=E };; -{ .mii; or $t1=$t1,E - dep X[15]=X[15],r9,8,8 - dep r11=r10,r11,8,8 };; -{ .mmi; and T1=F,E - and T2=A,B - dep X[15]=X[15],r11,16,16 } -{ .mmi; andcm r8=G,E - and r9=A,C - mux2 $t0=A,0x44 };; // copy lower half to upper -{ .mmi; (p16) ld1 X[15-1]=[input],$SZ // prefetch - xor T1=T1,r8 // T1=((e & f) ^ (~e & g)) - _rotr r11=$t1,$Sigma1[0] } // ROTR(e,14) -{ .mib; and r10=B,C - xor T2=T2,r9 };; -___ -$t0="A", $t1="E", $code.=<<___ if ($BITS==64); -// in 64-bit mode I load whole X[16] at once and take care of alignment... -{ .mmi; add r8=1*$SZ,input - add r9=2*$SZ,input - add r10=3*$SZ,input };; -{ .mmb; $LDW X[15]=[input],4*$SZ - $LDW X[14]=[r8],4*$SZ -(p9) br.cond.dpnt.many .L1byte };; -{ .mmb; $LDW X[13]=[r9],4*$SZ - $LDW X[12]=[r10],4*$SZ -(p10) br.cond.dpnt.many .L2byte };; -{ .mmb; $LDW X[11]=[input],4*$SZ - $LDW X[10]=[r8],4*$SZ -(p11) br.cond.dpnt.many .L3byte };; -{ .mmb; $LDW X[ 9]=[r9],4*$SZ - $LDW X[ 8]=[r10],4*$SZ -(p12) br.cond.dpnt.many .L4byte };; -{ .mmb; $LDW X[ 7]=[input],4*$SZ - $LDW X[ 6]=[r8],4*$SZ -(p13) br.cond.dpnt.many .L5byte };; -{ .mmb; $LDW X[ 5]=[r9],4*$SZ - $LDW X[ 4]=[r10],4*$SZ -(p14) br.cond.dpnt.many .L6byte };; -{ .mmb; $LDW X[ 3]=[input],4*$SZ - $LDW X[ 2]=[r8],4*$SZ -(p15) br.cond.dpnt.many .L7byte };; -{ .mmb; $LDW X[ 1]=[r9],4*$SZ - $LDW X[ 0]=[r10],4*$SZ - br.many .L_first16 };; -.L1byte: -{ .mmi; $LDW X[13]=[r9],4*$SZ - $LDW X[12]=[r10],4*$SZ - shrp X[15]=X[15],X[14],56 };; -{ .mmi; $LDW X[11]=[input],4*$SZ - $LDW X[10]=[r8],4*$SZ - shrp X[14]=X[14],X[13],56 } -{ .mmi; $LDW X[ 9]=[r9],4*$SZ - $LDW X[ 8]=[r10],4*$SZ - shrp X[13]=X[13],X[12],56 };; -{ .mmi; $LDW X[ 7]=[input],4*$SZ - $LDW X[ 6]=[r8],4*$SZ - shrp X[12]=X[12],X[11],56 } -{ .mmi; $LDW X[ 5]=[r9],4*$SZ - $LDW X[ 4]=[r10],4*$SZ - shrp X[11]=X[11],X[10],56 };; -{ .mmi; $LDW X[ 3]=[input],4*$SZ - $LDW X[ 2]=[r8],4*$SZ - shrp X[10]=X[10],X[ 9],56 } -{ .mmi; $LDW X[ 1]=[r9],4*$SZ - $LDW X[ 0]=[r10],4*$SZ - shrp X[ 9]=X[ 9],X[ 8],56 };; -{ .mii; $LDW T1=[input] - shrp X[ 8]=X[ 8],X[ 7],56 - shrp X[ 7]=X[ 7],X[ 6],56 } -{ .mii; shrp X[ 6]=X[ 6],X[ 5],56 - shrp X[ 5]=X[ 5],X[ 4],56 };; -{ .mii; shrp X[ 4]=X[ 4],X[ 3],56 - shrp X[ 3]=X[ 3],X[ 2],56 } -{ .mii; shrp X[ 2]=X[ 2],X[ 1],56 - shrp X[ 1]=X[ 1],X[ 0],56 } -{ .mib; shrp X[ 0]=X[ 0],T1,56 - br.many .L_first16 };; -.L2byte: -{ .mmi; $LDW X[11]=[input],4*$SZ - $LDW X[10]=[r8],4*$SZ - shrp X[15]=X[15],X[14],48 } -{ .mmi; $LDW X[ 9]=[r9],4*$SZ - $LDW X[ 8]=[r10],4*$SZ - shrp X[14]=X[14],X[13],48 };; -{ .mmi; $LDW X[ 7]=[input],4*$SZ - $LDW X[ 6]=[r8],4*$SZ - shrp X[13]=X[13],X[12],48 } -{ .mmi; $LDW X[ 5]=[r9],4*$SZ - $LDW X[ 4]=[r10],4*$SZ - shrp X[12]=X[12],X[11],48 };; -{ .mmi; $LDW X[ 3]=[input],4*$SZ - $LDW X[ 2]=[r8],4*$SZ - shrp X[11]=X[11],X[10],48 } -{ .mmi; $LDW X[ 1]=[r9],4*$SZ - $LDW X[ 0]=[r10],4*$SZ - shrp X[10]=X[10],X[ 9],48 };; -{ .mii; $LDW T1=[input] - shrp X[ 9]=X[ 9],X[ 8],48 - shrp X[ 8]=X[ 8],X[ 7],48 } -{ .mii; shrp X[ 7]=X[ 7],X[ 6],48 - shrp X[ 6]=X[ 6],X[ 5],48 };; -{ .mii; shrp X[ 5]=X[ 5],X[ 4],48 - shrp X[ 4]=X[ 4],X[ 3],48 } -{ .mii; shrp X[ 3]=X[ 3],X[ 2],48 - shrp X[ 2]=X[ 2],X[ 1],48 } -{ .mii; shrp X[ 1]=X[ 1],X[ 0],48 - shrp X[ 0]=X[ 0],T1,48 } -{ .mfb; br.many .L_first16 };; -.L3byte: -{ .mmi; $LDW X[ 9]=[r9],4*$SZ - $LDW X[ 8]=[r10],4*$SZ - shrp X[15]=X[15],X[14],40 };; -{ .mmi; $LDW X[ 7]=[input],4*$SZ - $LDW X[ 6]=[r8],4*$SZ - shrp X[14]=X[14],X[13],40 } -{ .mmi; $LDW X[ 5]=[r9],4*$SZ - $LDW X[ 4]=[r10],4*$SZ - shrp X[13]=X[13],X[12],40 };; -{ .mmi; $LDW X[ 3]=[input],4*$SZ - $LDW X[ 2]=[r8],4*$SZ - shrp X[12]=X[12],X[11],40 } -{ .mmi; $LDW X[ 1]=[r9],4*$SZ - $LDW X[ 0]=[r10],4*$SZ - shrp X[11]=X[11],X[10],40 };; -{ .mii; $LDW T1=[input] - shrp X[10]=X[10],X[ 9],40 - shrp X[ 9]=X[ 9],X[ 8],40 } -{ .mii; shrp X[ 8]=X[ 8],X[ 7],40 - shrp X[ 7]=X[ 7],X[ 6],40 };; -{ .mii; shrp X[ 6]=X[ 6],X[ 5],40 - shrp X[ 5]=X[ 5],X[ 4],40 } -{ .mii; shrp X[ 4]=X[ 4],X[ 3],40 - shrp X[ 3]=X[ 3],X[ 2],40 } -{ .mii; shrp X[ 2]=X[ 2],X[ 1],40 - shrp X[ 1]=X[ 1],X[ 0],40 } -{ .mib; shrp X[ 0]=X[ 0],T1,40 - br.many .L_first16 };; -.L4byte: -{ .mmi; $LDW X[ 7]=[input],4*$SZ - $LDW X[ 6]=[r8],4*$SZ - shrp X[15]=X[15],X[14],32 } -{ .mmi; $LDW X[ 5]=[r9],4*$SZ - $LDW X[ 4]=[r10],4*$SZ - shrp X[14]=X[14],X[13],32 };; -{ .mmi; $LDW X[ 3]=[input],4*$SZ - $LDW X[ 2]=[r8],4*$SZ - shrp X[13]=X[13],X[12],32 } -{ .mmi; $LDW X[ 1]=[r9],4*$SZ - $LDW X[ 0]=[r10],4*$SZ - shrp X[12]=X[12],X[11],32 };; -{ .mii; $LDW T1=[input] - shrp X[11]=X[11],X[10],32 - shrp X[10]=X[10],X[ 9],32 } -{ .mii; shrp X[ 9]=X[ 9],X[ 8],32 - shrp X[ 8]=X[ 8],X[ 7],32 };; -{ .mii; shrp X[ 7]=X[ 7],X[ 6],32 - shrp X[ 6]=X[ 6],X[ 5],32 } -{ .mii; shrp X[ 5]=X[ 5],X[ 4],32 - shrp X[ 4]=X[ 4],X[ 3],32 } -{ .mii; shrp X[ 3]=X[ 3],X[ 2],32 - shrp X[ 2]=X[ 2],X[ 1],32 } -{ .mii; shrp X[ 1]=X[ 1],X[ 0],32 - shrp X[ 0]=X[ 0],T1,32 } -{ .mfb; br.many .L_first16 };; -.L5byte: -{ .mmi; $LDW X[ 5]=[r9],4*$SZ - $LDW X[ 4]=[r10],4*$SZ - shrp X[15]=X[15],X[14],24 };; -{ .mmi; $LDW X[ 3]=[input],4*$SZ - $LDW X[ 2]=[r8],4*$SZ - shrp X[14]=X[14],X[13],24 } -{ .mmi; $LDW X[ 1]=[r9],4*$SZ - $LDW X[ 0]=[r10],4*$SZ - shrp X[13]=X[13],X[12],24 };; -{ .mii; $LDW T1=[input] - shrp X[12]=X[12],X[11],24 - shrp X[11]=X[11],X[10],24 } -{ .mii; shrp X[10]=X[10],X[ 9],24 - shrp X[ 9]=X[ 9],X[ 8],24 };; -{ .mii; shrp X[ 8]=X[ 8],X[ 7],24 - shrp X[ 7]=X[ 7],X[ 6],24 } -{ .mii; shrp X[ 6]=X[ 6],X[ 5],24 - shrp X[ 5]=X[ 5],X[ 4],24 } -{ .mii; shrp X[ 4]=X[ 4],X[ 3],24 - shrp X[ 3]=X[ 3],X[ 2],24 } -{ .mii; shrp X[ 2]=X[ 2],X[ 1],24 - shrp X[ 1]=X[ 1],X[ 0],24 } -{ .mib; shrp X[ 0]=X[ 0],T1,24 - br.many .L_first16 };; -.L6byte: -{ .mmi; $LDW X[ 3]=[input],4*$SZ - $LDW X[ 2]=[r8],4*$SZ - shrp X[15]=X[15],X[14],16 } -{ .mmi; $LDW X[ 1]=[r9],4*$SZ - $LDW X[ 0]=[r10],4*$SZ - shrp X[14]=X[14],X[13],16 };; -{ .mii; $LDW T1=[input] - shrp X[13]=X[13],X[12],16 - shrp X[12]=X[12],X[11],16 } -{ .mii; shrp X[11]=X[11],X[10],16 - shrp X[10]=X[10],X[ 9],16 };; -{ .mii; shrp X[ 9]=X[ 9],X[ 8],16 - shrp X[ 8]=X[ 8],X[ 7],16 } -{ .mii; shrp X[ 7]=X[ 7],X[ 6],16 - shrp X[ 6]=X[ 6],X[ 5],16 } -{ .mii; shrp X[ 5]=X[ 5],X[ 4],16 - shrp X[ 4]=X[ 4],X[ 3],16 } -{ .mii; shrp X[ 3]=X[ 3],X[ 2],16 - shrp X[ 2]=X[ 2],X[ 1],16 } -{ .mii; shrp X[ 1]=X[ 1],X[ 0],16 - shrp X[ 0]=X[ 0],T1,16 } -{ .mfb; br.many .L_first16 };; -.L7byte: -{ .mmi; $LDW X[ 1]=[r9],4*$SZ - $LDW X[ 0]=[r10],4*$SZ - shrp X[15]=X[15],X[14],8 };; -{ .mii; $LDW T1=[input] - shrp X[14]=X[14],X[13],8 - shrp X[13]=X[13],X[12],8 } -{ .mii; shrp X[12]=X[12],X[11],8 - shrp X[11]=X[11],X[10],8 };; -{ .mii; shrp X[10]=X[10],X[ 9],8 - shrp X[ 9]=X[ 9],X[ 8],8 } -{ .mii; shrp X[ 8]=X[ 8],X[ 7],8 - shrp X[ 7]=X[ 7],X[ 6],8 } -{ .mii; shrp X[ 6]=X[ 6],X[ 5],8 - shrp X[ 5]=X[ 5],X[ 4],8 } -{ .mii; shrp X[ 4]=X[ 4],X[ 3],8 - shrp X[ 3]=X[ 3],X[ 2],8 } -{ .mii; shrp X[ 2]=X[ 2],X[ 1],8 - shrp X[ 1]=X[ 1],X[ 0],8 } -{ .mib; shrp X[ 0]=X[ 0],T1,8 - br.many .L_first16 };; - -.align 32 -.L_first16: -{ .mmi; $LDW K=[Ktbl],$SZ - and T1=F,E - and T2=A,B } -{ .mmi; //$LDW X[15]=[input],$SZ // X[i]=*input++ - andcm r8=G,E - and r9=A,C };; -{ .mmi; xor T1=T1,r8 //T1=((e & f) ^ (~e & g)) - and r10=B,C - _rotr r11=$t1,$Sigma1[0] } // ROTR(e,14) -{ .mmi; xor T2=T2,r9 - mux1 X[15]=X[15],\@rev };; // eliminated in big-endian -___ -$code.=<<___; -{ .mib; add T1=T1,H // T1=Ch(e,f,g)+h - _rotr r8=$t1,$Sigma1[1] } // ROTR(e,18) -{ .mib; xor T2=T2,r10 // T2=((a & b) ^ (a & c) ^ (b & c)) - mov H=G };; -{ .mib; xor r11=r8,r11 - _rotr r9=$t1,$Sigma1[2] } // ROTR(e,41) -{ .mib; mov G=F - mov F=E };; -{ .mib; xor r9=r9,r11 // r9=Sigma1(e) - _rotr r10=$t0,$Sigma0[0] } // ROTR(a,28) -{ .mib; add T1=T1,K // T1=Ch(e,f,g)+h+K512[i] - mov E=D };; -{ .mib; add T1=T1,r9 // T1+=Sigma1(e) - _rotr r11=$t0,$Sigma0[1] } // ROTR(a,34) -{ .mib; mov D=C - mov C=B };; -{ .mib; add T1=T1,X[15] // T1+=X[i] - _rotr r8=$t0,$Sigma0[2] } // ROTR(a,39) -{ .mib; xor r10=r10,r11 - mux2 X[15]=X[15],0x44 };; // eliminated in 64-bit -{ .mmi; xor r10=r8,r10 // r10=Sigma0(a) - mov B=A - add A=T1,T2 };; -{ .mib; add E=E,T1 - add A=A,r10 // T2=Maj(a,b,c)+Sigma0(a) - br.ctop.sptk .L_first16 };; -.L_first16_end: - -{ .mii; mov ar.lc=$rounds-17 - mov ar.ec=1 };; - -.align 32 -.L_rest: -.rotr X[16] -{ .mib; $LDW K=[Ktbl],$SZ - _rotr r8=X[15-1],$sigma0[0] } // ROTR(s0,1) -{ .mib; $ADD X[15]=X[15],X[15-9] // X[i&0xF]+=X[(i+9)&0xF] - $SHRU s0=X[15-1],sgm0 };; // s0=X[(i+1)&0xF]>>7 -{ .mib; and T1=F,E - _rotr r9=X[15-1],$sigma0[1] } // ROTR(s0,8) -{ .mib; andcm r10=G,E - $SHRU s1=X[15-14],sgm1 };; // s1=X[(i+14)&0xF]>>6 -{ .mmi; xor T1=T1,r10 // T1=((e & f) ^ (~e & g)) - xor r9=r8,r9 - _rotr r10=X[15-14],$sigma1[0] };;// ROTR(s1,19) -{ .mib; and T2=A,B - _rotr r11=X[15-14],$sigma1[1] }// ROTR(s1,61) -{ .mib; and r8=A,C };; -___ -$t0="t0", $t1="t1", $code.=<<___ if ($BITS==32); -// I adhere to mmi; in order to hold Itanium 1 back and avoid 6 cycle -// pipeline flush in last bundle. Note that even on Itanium2 the -// latter stalls for one clock cycle... -{ .mmi; xor s0=s0,r9 // s0=sigma0(X[(i+1)&0xF]) - dep.z $t1=E,32,32 } -{ .mmi; xor r10=r11,r10 - zxt4 E=E };; -{ .mmi; or $t1=$t1,E - xor s1=s1,r10 // s1=sigma1(X[(i+14)&0xF]) - mux2 $t0=A,0x44 };; // copy lower half to upper -{ .mmi; xor T2=T2,r8 - _rotr r9=$t1,$Sigma1[0] } // ROTR(e,14) -{ .mmi; and r10=B,C - add T1=T1,H // T1=Ch(e,f,g)+h - $ADD X[15]=X[15],s0 };; // X[i&0xF]+=sigma0(X[(i+1)&0xF]) -___ -$t0="A", $t1="E", $code.=<<___ if ($BITS==64); -{ .mib; xor s0=s0,r9 // s0=sigma0(X[(i+1)&0xF]) - _rotr r9=$t1,$Sigma1[0] } // ROTR(e,14) -{ .mib; xor r10=r11,r10 - xor T2=T2,r8 };; -{ .mib; xor s1=s1,r10 // s1=sigma1(X[(i+14)&0xF]) - add T1=T1,H } -{ .mib; and r10=B,C - $ADD X[15]=X[15],s0 };; // X[i&0xF]+=sigma0(X[(i+1)&0xF]) -___ -$code.=<<___; -{ .mmi; xor T2=T2,r10 // T2=((a & b) ^ (a & c) ^ (b & c)) - mov H=G - _rotr r8=$t1,$Sigma1[1] };; // ROTR(e,18) -{ .mmi; xor r11=r8,r9 - $ADD X[15]=X[15],s1 // X[i&0xF]+=sigma1(X[(i+14)&0xF]) - _rotr r9=$t1,$Sigma1[2] } // ROTR(e,41) -{ .mmi; mov G=F - mov F=E };; -{ .mib; xor r9=r9,r11 // r9=Sigma1(e) - _rotr r10=$t0,$Sigma0[0] } // ROTR(a,28) -{ .mib; add T1=T1,K // T1=Ch(e,f,g)+h+K512[i] - mov E=D };; -{ .mib; add T1=T1,r9 // T1+=Sigma1(e) - _rotr r11=$t0,$Sigma0[1] } // ROTR(a,34) -{ .mib; mov D=C - mov C=B };; -{ .mmi; add T1=T1,X[15] // T1+=X[i] - xor r10=r10,r11 - _rotr r8=$t0,$Sigma0[2] };; // ROTR(a,39) -{ .mmi; xor r10=r8,r10 // r10=Sigma0(a) - mov B=A - add A=T1,T2 };; -{ .mib; add E=E,T1 - add A=A,r10 // T2=Maj(a,b,c)+Sigma0(a) - br.ctop.sptk .L_rest };; -.L_rest_end: - -{ .mmi; add A_=A_,A - add B_=B_,B - add C_=C_,C } -{ .mmi; add D_=D_,D - add E_=E_,E - cmp.ltu p16,p0=1,num };; -{ .mmi; add F_=F_,F - add G_=G_,G - add H_=H_,H } -{ .mmb; add Ktbl=-$SZ*$rounds,Ktbl -(p16) add num=-1,num -(p16) br.dptk.many .L_outer };; - -{ .mib; add r8=0*$SZ,ctx - add r9=1*$SZ,ctx } -{ .mib; add r10=2*$SZ,ctx - add r11=3*$SZ,ctx };; -{ .mmi; $STW [r8]=A_,4*$SZ - $STW [r9]=B_,4*$SZ - mov ar.lc=lcsave } -{ .mmi; $STW [r10]=C_,4*$SZ - $STW [r11]=D_,4*$SZ - mov pr=prsave,0x1ffff };; -{ .mmb; $STW [r8]=E_ - $STW [r9]=F_ } -{ .mmb; $STW [r10]=G_ - $STW [r11]=H_ - br.ret.sptk.many b0 };; -.endp $func# -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/_rotr(\s+)([^=]+)=([^,]+),([0-9]+)/shrp$1$2=$3,$3,$4/gm; -if ($BITS==64) { - $code =~ s/mux2(\s+)\S+/nop.i$1 0x0/gm; - $code =~ s/mux1(\s+)\S+/nop.i$1 0x0/gm if ($big_endian); - $code =~ s/(shrp\s+X\[[^=]+)=([^,]+),([^,]+),([1-9]+)/$1=$3,$2,64-$4/gm - if (!$big_endian); - $code =~ s/ld1(\s+)X\[\S+/nop.m$1 0x0/gm; -} - -print $code; - -print<<___ if ($BITS==32); -.align 64 -.type K256#,\@object -K256: data4 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 - data4 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 - data4 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 - data4 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 - data4 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc - data4 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da - data4 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 - data4 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 - data4 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 - data4 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 - data4 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 - data4 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 - data4 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 - data4 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 - data4 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 - data4 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -.size K256#,$SZ*$rounds -stringz "SHA256 block transform for IA64, CRYPTOGAMS by " -___ -print<<___ if ($BITS==64); -.align 64 -.type K512#,\@object -K512: data8 0x428a2f98d728ae22,0x7137449123ef65cd - data8 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc - data8 0x3956c25bf348b538,0x59f111f1b605d019 - data8 0x923f82a4af194f9b,0xab1c5ed5da6d8118 - data8 0xd807aa98a3030242,0x12835b0145706fbe - data8 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 - data8 0x72be5d74f27b896f,0x80deb1fe3b1696b1 - data8 0x9bdc06a725c71235,0xc19bf174cf692694 - data8 0xe49b69c19ef14ad2,0xefbe4786384f25e3 - data8 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 - data8 0x2de92c6f592b0275,0x4a7484aa6ea6e483 - data8 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 - data8 0x983e5152ee66dfab,0xa831c66d2db43210 - data8 0xb00327c898fb213f,0xbf597fc7beef0ee4 - data8 0xc6e00bf33da88fc2,0xd5a79147930aa725 - data8 0x06ca6351e003826f,0x142929670a0e6e70 - data8 0x27b70a8546d22ffc,0x2e1b21385c26c926 - data8 0x4d2c6dfc5ac42aed,0x53380d139d95b3df - data8 0x650a73548baf63de,0x766a0abb3c77b2a8 - data8 0x81c2c92e47edaee6,0x92722c851482353b - data8 0xa2bfe8a14cf10364,0xa81a664bbc423001 - data8 0xc24b8b70d0f89791,0xc76c51a30654be30 - data8 0xd192e819d6ef5218,0xd69906245565a910 - data8 0xf40e35855771202a,0x106aa07032bbd1b8 - data8 0x19a4c116b8d2d0c8,0x1e376c085141ab53 - data8 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 - data8 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb - data8 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 - data8 0x748f82ee5defb2fc,0x78a5636f43172f60 - data8 0x84c87814a1f0ab72,0x8cc702081a6439ec - data8 0x90befffa23631e28,0xa4506cebde82bde9 - data8 0xbef9a3f7b2c67915,0xc67178f2e372532b - data8 0xca273eceea26619c,0xd186b8c721c0c207 - data8 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 - data8 0x06f067aa72176fba,0x0a637dc5a2c898a6 - data8 0x113f9804bef90dae,0x1b710b35131c471b - data8 0x28db77f523047d84,0x32caab7b40c72493 - data8 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c - data8 0x4cc5d4becb3e42b6,0x597f299cfc657e2a - data8 0x5fcb6fab3ad6faec,0x6c44198c4a475817 -.size K512#,$SZ*$rounds -stringz "SHA512 block transform for IA64, CRYPTOGAMS by " -___ diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha512-mips.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha512-mips.pl deleted file mode 100644 index 6807a2c722..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha512-mips.pl +++ /dev/null @@ -1,455 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# SHA2 block procedures for MIPS. - -# October 2010. -# -# SHA256 performance improvement on MIPS R5000 CPU is ~27% over gcc- -# generated code in o32 build and ~55% in n32/64 build. SHA512 [which -# for now can only be compiled for MIPS64 ISA] improvement is modest -# ~17%, but it comes for free, because it's same instruction sequence. -# Improvement coefficients are for aligned input. - -###################################################################### -# There is a number of MIPS ABI in use, O32 and N32/64 are most -# widely used. Then there is a new contender: NUBI. It appears that if -# one picks the latter, it's possible to arrange code in ABI neutral -# manner. Therefore let's stick to NUBI register layout: -# -($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); -($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); -($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); -# -# The return value is placed in $a0. Following coding rules facilitate -# interoperability: -# -# - never ever touch $tp, "thread pointer", former $gp [o32 can be -# excluded from the rule, because it's specified volatile]; -# - copy return value to $t0, former $v0 [or to $a0 if you're adapting -# old code]; -# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; -# -# For reference here is register layout for N32/64 MIPS ABIs: -# -# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); -# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); -# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); -# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); -# -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 - -if ($flavour =~ /64|n32/i) { - $PTR_ADD="dadd"; # incidentally works even on n32 - $PTR_SUB="dsub"; # incidentally works even on n32 - $REG_S="sd"; - $REG_L="ld"; - $PTR_SLL="dsll"; # incidentally works even on n32 - $SZREG=8; -} else { - $PTR_ADD="add"; - $PTR_SUB="sub"; - $REG_S="sw"; - $REG_L="lw"; - $PTR_SLL="sll"; - $SZREG=4; -} -$pf = ($flavour =~ /nubi/i) ? $t0 : $t2; -# -# -# -###################################################################### - -$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC}); - -for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } -open STDOUT,">$output"; - -if (!defined($big_endian)) { $big_endian=(unpack('L',pack('N',1))==1); } - -if ($output =~ /512/) { - $label="512"; - $SZ=8; - $LD="ld"; # load from memory - $ST="sd"; # store to memory - $SLL="dsll"; # shift left logical - $SRL="dsrl"; # shift right logical - $ADDU="daddu"; - @Sigma0=(28,34,39); - @Sigma1=(14,18,41); - @sigma0=( 7, 1, 8); # right shift first - @sigma1=( 6,19,61); # right shift first - $lastK=0x817; - $rounds=80; -} else { - $label="256"; - $SZ=4; - $LD="lw"; # load from memory - $ST="sw"; # store to memory - $SLL="sll"; # shift left logical - $SRL="srl"; # shift right logical - $ADDU="addu"; - @Sigma0=( 2,13,22); - @Sigma1=( 6,11,25); - @sigma0=( 3, 7,18); # right shift first - @sigma1=(10,17,19); # right shift first - $lastK=0x8f2; - $rounds=64; -} - -$MSB = $big_endian ? 0 : ($SZ-1); -$LSB = ($SZ-1)&~$MSB; - -@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("\$$_",(1,2,3,7,24,25,30,31)); -@X=map("\$$_",(8..23)); - -$ctx=$a0; -$inp=$a1; -$len=$a2; $Ktbl=$len; - -sub BODY_00_15 { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; -my ($T1,$tmp0,$tmp1,$tmp2)=(@X[4],@X[5],@X[6],@X[7]); - -$code.=<<___ if ($i<15); - ${LD}l @X[1],`($i+1)*$SZ+$MSB`($inp) - ${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp) -___ -$code.=<<___ if (!$big_endian && $i<16 && $SZ==4); - srl $tmp0,@X[0],24 # byte swap($i) - srl $tmp1,@X[0],8 - andi $tmp2,@X[0],0xFF00 - sll @X[0],@X[0],24 - andi $tmp1,0xFF00 - sll $tmp2,$tmp2,8 - or @X[0],$tmp0 - or $tmp1,$tmp2 - or @X[0],$tmp1 -___ -$code.=<<___ if (!$big_endian && $i<16 && $SZ==8); - ori $tmp0,$zero,0xFF - dsll $tmp2,$tmp0,32 - or $tmp0,$tmp2 # 0x000000FF000000FF - and $tmp1,@X[0],$tmp0 # byte swap($i) - dsrl $tmp2,@X[0],24 - dsll $tmp1,24 - and $tmp2,$tmp0 - dsll $tmp0,8 # 0x0000FF000000FF00 - or $tmp1,$tmp2 - and $tmp2,@X[0],$tmp0 - dsrl @X[0],8 - dsll $tmp2,8 - and @X[0],$tmp0 - or $tmp1,$tmp2 - or @X[0],$tmp1 - dsrl $tmp1,@X[0],32 - dsll @X[0],32 - or @X[0],$tmp1 -___ -$code.=<<___; - $ADDU $T1,$X[0],$h # $i - $SRL $h,$e,@Sigma1[0] - xor $tmp2,$f,$g - $SLL $tmp1,$e,`$SZ*8-@Sigma1[2]` - and $tmp2,$e - $SRL $tmp0,$e,@Sigma1[1] - xor $h,$tmp1 - $SLL $tmp1,$e,`$SZ*8-@Sigma1[1]` - xor $h,$tmp0 - $SRL $tmp0,$e,@Sigma1[2] - xor $h,$tmp1 - $SLL $tmp1,$e,`$SZ*8-@Sigma1[0]` - xor $h,$tmp0 - xor $tmp2,$g # Ch(e,f,g) - xor $tmp0,$tmp1,$h # Sigma1(e) - - $SRL $h,$a,@Sigma0[0] - $ADDU $T1,$tmp2 - $LD $tmp2,`$i*$SZ`($Ktbl) # K[$i] - $SLL $tmp1,$a,`$SZ*8-@Sigma0[2]` - $ADDU $T1,$tmp0 - $SRL $tmp0,$a,@Sigma0[1] - xor $h,$tmp1 - $SLL $tmp1,$a,`$SZ*8-@Sigma0[1]` - xor $h,$tmp0 - $SRL $tmp0,$a,@Sigma0[2] - xor $h,$tmp1 - $SLL $tmp1,$a,`$SZ*8-@Sigma0[0]` - xor $h,$tmp0 - $ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer - xor $h,$tmp1 # Sigma0(a) - - or $tmp0,$a,$b - and $tmp1,$a,$b - and $tmp0,$c - or $tmp1,$tmp0 # Maj(a,b,c) - $ADDU $T1,$tmp2 # +=K[$i] - $ADDU $h,$tmp1 - - $ADDU $d,$T1 - $ADDU $h,$T1 -___ -$code.=<<___ if ($i>=13); - $LD @X[3],`(($i+3)%16)*$SZ`($sp) # prefetch from ring buffer -___ -} - -sub BODY_16_XX { -my $i=@_[0]; -my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]); - -$code.=<<___; - $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i) - $ADDU @X[0],@X[9] # +=X[i+9] - $SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]` - $SRL $tmp0,@X[1],@sigma0[1] - xor $tmp2,$tmp1 - $SLL $tmp1,`@sigma0[2]-@sigma0[1]` - xor $tmp2,$tmp0 - $SRL $tmp0,@X[1],@sigma0[2] - xor $tmp2,$tmp1 - - $SRL $tmp3,@X[14],@sigma1[0] - xor $tmp2,$tmp0 # sigma0(X[i+1]) - $SLL $tmp1,@X[14],`$SZ*8-@sigma1[2]` - $ADDU @X[0],$tmp2 - $SRL $tmp0,@X[14],@sigma1[1] - xor $tmp3,$tmp1 - $SLL $tmp1,`@sigma1[2]-@sigma1[1]` - xor $tmp3,$tmp0 - $SRL $tmp0,@X[14],@sigma1[2] - xor $tmp3,$tmp1 - - xor $tmp3,$tmp0 # sigma1(X[i+14]) - $ADDU @X[0],$tmp3 -___ - &BODY_00_15(@_); -} - -$FRAMESIZE=16*$SZ+16*$SZREG; -$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; - -$code.=<<___; -#ifdef OPENSSL_FIPSCANISTER -# include -#endif - -.text -.set noat -#if !defined(__vxworks) || defined(__pic__) -.option pic2 -#endif - -.align 5 -.globl sha${label}_block_data_order -.ent sha${label}_block_data_order -sha${label}_block_data_order: - .frame $sp,$FRAMESIZE,$ra - .mask $SAVED_REGS_MASK,-$SZREG - .set noreorder -___ -$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification - .cpload $pf -___ -$code.=<<___; - $PTR_SUB $sp,$FRAMESIZE - $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) - $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) - $REG_S $s11,$FRAMESIZE-3*$SZREG($sp) - $REG_S $s10,$FRAMESIZE-4*$SZREG($sp) - $REG_S $s9,$FRAMESIZE-5*$SZREG($sp) - $REG_S $s8,$FRAMESIZE-6*$SZREG($sp) - $REG_S $s7,$FRAMESIZE-7*$SZREG($sp) - $REG_S $s6,$FRAMESIZE-8*$SZREG($sp) - $REG_S $s5,$FRAMESIZE-9*$SZREG($sp) - $REG_S $s4,$FRAMESIZE-10*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue - $REG_S $s3,$FRAMESIZE-11*$SZREG($sp) - $REG_S $s2,$FRAMESIZE-12*$SZREG($sp) - $REG_S $s1,$FRAMESIZE-13*$SZREG($sp) - $REG_S $s0,$FRAMESIZE-14*$SZREG($sp) - $REG_S $gp,$FRAMESIZE-15*$SZREG($sp) -___ -$code.=<<___; - $PTR_SLL @X[15],$len,`log(16*$SZ)/log(2)` -___ -$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification - .cplocal $Ktbl - .cpsetup $pf,$zero,sha${label}_block_data_order -___ -$code.=<<___; - .set reorder - la $Ktbl,K${label} # PIC-ified 'load address' - - $LD $A,0*$SZ($ctx) # load context - $LD $B,1*$SZ($ctx) - $LD $C,2*$SZ($ctx) - $LD $D,3*$SZ($ctx) - $LD $E,4*$SZ($ctx) - $LD $F,5*$SZ($ctx) - $LD $G,6*$SZ($ctx) - $LD $H,7*$SZ($ctx) - - $PTR_ADD @X[15],$inp # pointer to the end of input - $REG_S @X[15],16*$SZ($sp) - b .Loop - -.align 5 -.Loop: - ${LD}l @X[0],$MSB($inp) - ${LD}r @X[0],$LSB($inp) -___ -for ($i=0;$i<16;$i++) -{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); } -$code.=<<___; - b .L16_xx -.align 4 -.L16_xx: -___ -for (;$i<32;$i++) -{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); } -$code.=<<___; - and @X[6],0xfff - li @X[7],$lastK - .set noreorder - bne @X[6],@X[7],.L16_xx - $PTR_ADD $Ktbl,16*$SZ # Ktbl+=16 - - $REG_L @X[15],16*$SZ($sp) # restore pointer to the end of input - $LD @X[0],0*$SZ($ctx) - $LD @X[1],1*$SZ($ctx) - $LD @X[2],2*$SZ($ctx) - $PTR_ADD $inp,16*$SZ - $LD @X[3],3*$SZ($ctx) - $ADDU $A,@X[0] - $LD @X[4],4*$SZ($ctx) - $ADDU $B,@X[1] - $LD @X[5],5*$SZ($ctx) - $ADDU $C,@X[2] - $LD @X[6],6*$SZ($ctx) - $ADDU $D,@X[3] - $LD @X[7],7*$SZ($ctx) - $ADDU $E,@X[4] - $ST $A,0*$SZ($ctx) - $ADDU $F,@X[5] - $ST $B,1*$SZ($ctx) - $ADDU $G,@X[6] - $ST $C,2*$SZ($ctx) - $ADDU $H,@X[7] - $ST $D,3*$SZ($ctx) - $ST $E,4*$SZ($ctx) - $ST $F,5*$SZ($ctx) - $ST $G,6*$SZ($ctx) - $ST $H,7*$SZ($ctx) - - bne $inp,@X[15],.Loop - $PTR_SUB $Ktbl,`($rounds-16)*$SZ` # rewind $Ktbl - - $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) - $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) - $REG_L $s11,$FRAMESIZE-3*$SZREG($sp) - $REG_L $s10,$FRAMESIZE-4*$SZREG($sp) - $REG_L $s9,$FRAMESIZE-5*$SZREG($sp) - $REG_L $s8,$FRAMESIZE-6*$SZREG($sp) - $REG_L $s7,$FRAMESIZE-7*$SZREG($sp) - $REG_L $s6,$FRAMESIZE-8*$SZREG($sp) - $REG_L $s5,$FRAMESIZE-9*$SZREG($sp) - $REG_L $s4,$FRAMESIZE-10*$SZREG($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); - $REG_L $s3,$FRAMESIZE-11*$SZREG($sp) - $REG_L $s2,$FRAMESIZE-12*$SZREG($sp) - $REG_L $s1,$FRAMESIZE-13*$SZREG($sp) - $REG_L $s0,$FRAMESIZE-14*$SZREG($sp) - $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) -___ -$code.=<<___; - jr $ra - $PTR_ADD $sp,$FRAMESIZE -.end sha${label}_block_data_order - -.rdata -.align 5 -K${label}: -___ -if ($SZ==4) { -$code.=<<___; - .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 - .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 - .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 - .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 - .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc - .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da - .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 - .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 - .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 - .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 - .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 - .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 - .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 - .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 - .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 - .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -___ -} else { -$code.=<<___; - .dword 0x428a2f98d728ae22, 0x7137449123ef65cd - .dword 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc - .dword 0x3956c25bf348b538, 0x59f111f1b605d019 - .dword 0x923f82a4af194f9b, 0xab1c5ed5da6d8118 - .dword 0xd807aa98a3030242, 0x12835b0145706fbe - .dword 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2 - .dword 0x72be5d74f27b896f, 0x80deb1fe3b1696b1 - .dword 0x9bdc06a725c71235, 0xc19bf174cf692694 - .dword 0xe49b69c19ef14ad2, 0xefbe4786384f25e3 - .dword 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65 - .dword 0x2de92c6f592b0275, 0x4a7484aa6ea6e483 - .dword 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5 - .dword 0x983e5152ee66dfab, 0xa831c66d2db43210 - .dword 0xb00327c898fb213f, 0xbf597fc7beef0ee4 - .dword 0xc6e00bf33da88fc2, 0xd5a79147930aa725 - .dword 0x06ca6351e003826f, 0x142929670a0e6e70 - .dword 0x27b70a8546d22ffc, 0x2e1b21385c26c926 - .dword 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df - .dword 0x650a73548baf63de, 0x766a0abb3c77b2a8 - .dword 0x81c2c92e47edaee6, 0x92722c851482353b - .dword 0xa2bfe8a14cf10364, 0xa81a664bbc423001 - .dword 0xc24b8b70d0f89791, 0xc76c51a30654be30 - .dword 0xd192e819d6ef5218, 0xd69906245565a910 - .dword 0xf40e35855771202a, 0x106aa07032bbd1b8 - .dword 0x19a4c116b8d2d0c8, 0x1e376c085141ab53 - .dword 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8 - .dword 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb - .dword 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3 - .dword 0x748f82ee5defb2fc, 0x78a5636f43172f60 - .dword 0x84c87814a1f0ab72, 0x8cc702081a6439ec - .dword 0x90befffa23631e28, 0xa4506cebde82bde9 - .dword 0xbef9a3f7b2c67915, 0xc67178f2e372532b - .dword 0xca273eceea26619c, 0xd186b8c721c0c207 - .dword 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178 - .dword 0x06f067aa72176fba, 0x0a637dc5a2c898a6 - .dword 0x113f9804bef90dae, 0x1b710b35131c471b - .dword 0x28db77f523047d84, 0x32caab7b40c72493 - .dword 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c - .dword 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a - .dword 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 -___ -} -$code.=<<___; -.asciiz "SHA${label} for MIPS, CRYPTOGAMS by " -.align 5 - -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha512-parisc.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha512-parisc.pl deleted file mode 100755 index 6cad72e255..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha512-parisc.pl +++ /dev/null @@ -1,793 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# SHA256/512 block procedure for PA-RISC. - -# June 2009. -# -# SHA256 performance is >75% better than gcc 3.2 generated code on -# PA-7100LC. Compared to code generated by vendor compiler this -# implementation is almost 70% faster in 64-bit build, but delivers -# virtually same performance in 32-bit build on PA-8600. -# -# SHA512 performance is >2.9x better than gcc 3.2 generated code on -# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the -# code is executed on PA-RISC 2.0 processor and switches to 64-bit -# code path delivering adequate performance even in "blended" 32-bit -# build. Though 64-bit code is not any faster than code generated by -# vendor compiler on PA-8600... -# -# Special thanks to polarhome.com for providing HP-UX account. - -$flavour = shift; -$output = shift; -open STDOUT,">$output"; - -if ($flavour =~ /64/) { - $LEVEL ="2.0W"; - $SIZE_T =8; - $FRAME_MARKER =80; - $SAVED_RP =16; - $PUSH ="std"; - $PUSHMA ="std,ma"; - $POP ="ldd"; - $POPMB ="ldd,mb"; -} else { - $LEVEL ="1.0"; - $SIZE_T =4; - $FRAME_MARKER =48; - $SAVED_RP =20; - $PUSH ="stw"; - $PUSHMA ="stwm"; - $POP ="ldw"; - $POPMB ="ldwm"; -} - -if ($output =~ /512/) { - $func="sha512_block_data_order"; - $SZ=8; - @Sigma0=(28,34,39); - @Sigma1=(14,18,41); - @sigma0=(1, 8, 7); - @sigma1=(19,61, 6); - $rounds=80; - $LAST10BITS=0x017; - $LD="ldd"; - $LDM="ldd,ma"; - $ST="std"; -} else { - $func="sha256_block_data_order"; - $SZ=4; - @Sigma0=( 2,13,22); - @Sigma1=( 6,11,25); - @sigma0=( 7,18, 3); - @sigma1=(17,19,10); - $rounds=64; - $LAST10BITS=0x0f2; - $LD="ldw"; - $LDM="ldwm"; - $ST="stw"; -} - -$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker - # [+ argument transfer] -$XOFF=16*$SZ+32; # local variables -$FRAME+=$XOFF; -$XOFF+=$FRAME_MARKER; # distance between %sp and local variables - -$ctx="%r26"; # zapped by $a0 -$inp="%r25"; # zapped by $a1 -$num="%r24"; # zapped by $t0 - -$a0 ="%r26"; -$a1 ="%r25"; -$t0 ="%r24"; -$t1 ="%r29"; -$Tbl="%r31"; - -@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28"); - -@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", - "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp); - -sub ROUND_00_15 { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; -$code.=<<___; - _ror $e,$Sigma1[0],$a0 - and $f,$e,$t0 - _ror $e,$Sigma1[1],$a1 - addl $t1,$h,$h - andcm $g,$e,$t1 - xor $a1,$a0,$a0 - _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1 - or $t0,$t1,$t1 ; Ch(e,f,g) - addl @X[$i%16],$h,$h - xor $a0,$a1,$a1 ; Sigma1(e) - addl $t1,$h,$h - _ror $a,$Sigma0[0],$a0 - addl $a1,$h,$h - - _ror $a,$Sigma0[1],$a1 - and $a,$b,$t0 - and $a,$c,$t1 - xor $a1,$a0,$a0 - _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1 - xor $t1,$t0,$t0 - and $b,$c,$t1 - xor $a0,$a1,$a1 ; Sigma0(a) - addl $h,$d,$d - xor $t1,$t0,$t0 ; Maj(a,b,c) - `"$LDM $SZ($Tbl),$t1" if ($i<15)` - addl $a1,$h,$h - addl $t0,$h,$h - -___ -} - -sub ROUND_16_xx { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; -$i-=16; -$code.=<<___; - _ror @X[($i+1)%16],$sigma0[0],$a0 - _ror @X[($i+1)%16],$sigma0[1],$a1 - addl @X[($i+9)%16],@X[$i],@X[$i] - _ror @X[($i+14)%16],$sigma1[0],$t0 - _ror @X[($i+14)%16],$sigma1[1],$t1 - xor $a1,$a0,$a0 - _shr @X[($i+1)%16],$sigma0[2],$a1 - xor $t1,$t0,$t0 - _shr @X[($i+14)%16],$sigma1[2],$t1 - xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f]) - xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f]) - $LDM $SZ($Tbl),$t1 - addl $a0,@X[$i],@X[$i] - addl $t0,@X[$i],@X[$i] -___ -$code.=<<___ if ($i==15); - extru $t1,31,10,$a1 - comiclr,<> $LAST10BITS,$a1,%r0 - ldo 1($Tbl),$Tbl ; signal end of $Tbl -___ -&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h); -} - -$code=<<___; - .LEVEL $LEVEL - .SPACE \$TEXT\$ - .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY - - .ALIGN 64 -L\$table -___ -$code.=<<___ if ($SZ==8); - .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd - .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc - .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019 - .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118 - .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe - .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2 - .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1 - .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694 - .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3 - .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65 - .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483 - .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5 - .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210 - .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4 - .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725 - .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70 - .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926 - .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df - .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8 - .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b - .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001 - .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30 - .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910 - .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8 - .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53 - .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8 - .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb - .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3 - .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60 - .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec - .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9 - .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b - .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207 - .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178 - .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6 - .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b - .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493 - .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c - .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a - .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817 -___ -$code.=<<___ if ($SZ==4); - .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 - .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 - .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 - .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 - .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc - .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da - .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 - .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 - .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 - .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 - .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 - .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 - .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 - .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 - .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 - .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -___ -$code.=<<___; - - .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR - .ALIGN 64 -$func - .PROC - .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18 - .ENTRY - $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue - $PUSHMA %r3,$FRAME(%sp) - $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) - $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) - $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) - $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) - $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) - $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) - $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) - $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) - $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) - $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) - $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) - $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) - $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) - $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp) - $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp) - - _shl $num,`log(16*$SZ)/log(2)`,$num - addl $inp,$num,$num ; $num to point at the end of $inp - - $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments - $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) - $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp) - - blr %r0,$Tbl - ldi 3,$t1 -L\$pic - andcm $Tbl,$t1,$Tbl ; wipe privilege level - ldo L\$table-L\$pic($Tbl),$Tbl -___ -$code.=<<___ if ($SZ==8 && $SIZE_T==4); - ldi 31,$t1 - mtctl $t1,%cr11 - extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0 - b L\$parisc1 - nop -___ -$code.=<<___; - $LD `0*$SZ`($ctx),$A ; load context - $LD `1*$SZ`($ctx),$B - $LD `2*$SZ`($ctx),$C - $LD `3*$SZ`($ctx),$D - $LD `4*$SZ`($ctx),$E - $LD `5*$SZ`($ctx),$F - $LD `6*$SZ`($ctx),$G - $LD `7*$SZ`($ctx),$H - - extru $inp,31,`log($SZ)/log(2)`,$t0 - sh3addl $t0,%r0,$t0 - subi `8*$SZ`,$t0,$t0 - mtctl $t0,%cr11 ; load %sar with align factor - -L\$oop - ldi `$SZ-1`,$t0 - $LDM $SZ($Tbl),$t1 - andcm $inp,$t0,$t0 ; align $inp -___ - for ($i=0;$i<15;$i++) { # load input block - $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; } -$code.=<<___; - cmpb,*= $inp,$t0,L\$aligned - $LD `$SZ*15`($t0),@X[15] - $LD `$SZ*16`($t0),@X[16] -___ - for ($i=0;$i<16;$i++) { # align data - $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; } -$code.=<<___; -L\$aligned - nop ; otherwise /usr/ccs/bin/as is confused by below .WORD -___ - -for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; -L\$rounds - nop ; otherwise /usr/ccs/bin/as is confused by below .WORD -___ -for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled? - nop - - $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments - $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp - $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num - ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl - - $LD `0*$SZ`($ctx),@X[0] ; load context - $LD `1*$SZ`($ctx),@X[1] - $LD `2*$SZ`($ctx),@X[2] - $LD `3*$SZ`($ctx),@X[3] - $LD `4*$SZ`($ctx),@X[4] - $LD `5*$SZ`($ctx),@X[5] - addl @X[0],$A,$A - $LD `6*$SZ`($ctx),@X[6] - addl @X[1],$B,$B - $LD `7*$SZ`($ctx),@X[7] - ldo `16*$SZ`($inp),$inp ; advance $inp - - $ST $A,`0*$SZ`($ctx) ; save context - addl @X[2],$C,$C - $ST $B,`1*$SZ`($ctx) - addl @X[3],$D,$D - $ST $C,`2*$SZ`($ctx) - addl @X[4],$E,$E - $ST $D,`3*$SZ`($ctx) - addl @X[5],$F,$F - $ST $E,`4*$SZ`($ctx) - addl @X[6],$G,$G - $ST $F,`5*$SZ`($ctx) - addl @X[7],$H,$H - $ST $G,`6*$SZ`($ctx) - $ST $H,`7*$SZ`($ctx) - - cmpb,*<>,n $inp,$num,L\$oop - $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp -___ -if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0 -{{ -$code.=<<___; - b L\$done - nop - - .ALIGN 64 -L\$parisc1 -___ - -@V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo, - $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) = - ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", - "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16"); -$a0 ="%r17"; -$a1 ="%r18"; -$a2 ="%r19"; -$a3 ="%r20"; -$t0 ="%r21"; -$t1 ="%r22"; -$t2 ="%r28"; -$t3 ="%r29"; -$Tbl="%r31"; - -@X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx - -sub ROUND_00_15_pa1 { -my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, - $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_; -my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; - -$code.=<<___ if (!$flag); - ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi - ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] -___ -$code.=<<___; - shd $ehi,$elo,$Sigma1[0],$t0 - add $Xlo,$hlo,$hlo - shd $elo,$ehi,$Sigma1[0],$t1 - addc $Xhi,$hhi,$hhi ; h += X[i] - shd $ehi,$elo,$Sigma1[1],$t2 - ldwm 8($Tbl),$Xhi - shd $elo,$ehi,$Sigma1[1],$t3 - ldw -4($Tbl),$Xlo ; load K[i] - xor $t2,$t0,$t0 - xor $t3,$t1,$t1 - and $flo,$elo,$a0 - and $fhi,$ehi,$a1 - shd $ehi,$elo,$Sigma1[2],$t2 - andcm $glo,$elo,$a2 - shd $elo,$ehi,$Sigma1[2],$t3 - andcm $ghi,$ehi,$a3 - xor $t2,$t0,$t0 - xor $t3,$t1,$t1 ; Sigma1(e) - add $Xlo,$hlo,$hlo - xor $a2,$a0,$a0 - addc $Xhi,$hhi,$hhi ; h += K[i] - xor $a3,$a1,$a1 ; Ch(e,f,g) - - add $t0,$hlo,$hlo - shd $ahi,$alo,$Sigma0[0],$t0 - addc $t1,$hhi,$hhi ; h += Sigma1(e) - shd $alo,$ahi,$Sigma0[0],$t1 - add $a0,$hlo,$hlo - shd $ahi,$alo,$Sigma0[1],$t2 - addc $a1,$hhi,$hhi ; h += Ch(e,f,g) - shd $alo,$ahi,$Sigma0[1],$t3 - - xor $t2,$t0,$t0 - xor $t3,$t1,$t1 - shd $ahi,$alo,$Sigma0[2],$t2 - and $alo,$blo,$a0 - shd $alo,$ahi,$Sigma0[2],$t3 - and $ahi,$bhi,$a1 - xor $t2,$t0,$t0 - xor $t3,$t1,$t1 ; Sigma0(a) - - and $alo,$clo,$a2 - and $ahi,$chi,$a3 - xor $a2,$a0,$a0 - add $hlo,$dlo,$dlo - xor $a3,$a1,$a1 - addc $hhi,$dhi,$dhi ; d += h - and $blo,$clo,$a2 - add $t0,$hlo,$hlo - and $bhi,$chi,$a3 - addc $t1,$hhi,$hhi ; h += Sigma0(a) - xor $a2,$a0,$a0 - add $a0,$hlo,$hlo - xor $a3,$a1,$a1 ; Maj(a,b,c) - addc $a1,$hhi,$hhi ; h += Maj(a,b,c) - -___ -$code.=<<___ if ($i==15 && $flag); - extru $Xlo,31,10,$Xlo - comiclr,= $LAST10BITS,$Xlo,%r0 - b L\$rounds_pa1 - nop -___ -push(@X,shift(@X)); push(@X,shift(@X)); -} - -sub ROUND_16_xx_pa1 { -my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; -my ($i)=shift; -$i-=16; -$code.=<<___; - ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi - ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] - ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1 - ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9] - ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3 - ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14] - shd $Xnhi,$Xnlo,$sigma0[0],$t0 - shd $Xnlo,$Xnhi,$sigma0[0],$t1 - add $a0,$Xlo,$Xlo - shd $Xnhi,$Xnlo,$sigma0[1],$t2 - addc $a1,$Xhi,$Xhi - shd $Xnlo,$Xnhi,$sigma0[1],$t3 - xor $t2,$t0,$t0 - shd $Xnhi,$Xnlo,$sigma0[2],$t2 - xor $t3,$t1,$t1 - extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3 - xor $t2,$t0,$t0 - shd $a3,$a2,$sigma1[0],$a0 - xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f]) - shd $a2,$a3,$sigma1[0],$a1 - add $t0,$Xlo,$Xlo - shd $a3,$a2,$sigma1[1],$t2 - addc $t1,$Xhi,$Xhi - shd $a2,$a3,$sigma1[1],$t3 - xor $t2,$a0,$a0 - shd $a3,$a2,$sigma1[2],$t2 - xor $t3,$a1,$a1 - extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3 - xor $t2,$a0,$a0 - xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f]) - add $a0,$Xlo,$Xlo - addc $a1,$Xhi,$Xhi - - stw $Xhi,`-$XOFF+8*($i%16)`(%sp) - stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp) -___ -&ROUND_00_15_pa1($i,@_,1); -} -$code.=<<___; - ldw `0*4`($ctx),$Ahi ; load context - ldw `1*4`($ctx),$Alo - ldw `2*4`($ctx),$Bhi - ldw `3*4`($ctx),$Blo - ldw `4*4`($ctx),$Chi - ldw `5*4`($ctx),$Clo - ldw `6*4`($ctx),$Dhi - ldw `7*4`($ctx),$Dlo - ldw `8*4`($ctx),$Ehi - ldw `9*4`($ctx),$Elo - ldw `10*4`($ctx),$Fhi - ldw `11*4`($ctx),$Flo - ldw `12*4`($ctx),$Ghi - ldw `13*4`($ctx),$Glo - ldw `14*4`($ctx),$Hhi - ldw `15*4`($ctx),$Hlo - - extru $inp,31,2,$t0 - sh3addl $t0,%r0,$t0 - subi 32,$t0,$t0 - mtctl $t0,%cr11 ; load %sar with align factor - -L\$oop_pa1 - extru $inp,31,2,$a3 - comib,= 0,$a3,L\$aligned_pa1 - sub $inp,$a3,$inp - - ldw `0*4`($inp),$X[0] - ldw `1*4`($inp),$X[1] - ldw `2*4`($inp),$t2 - ldw `3*4`($inp),$t3 - ldw `4*4`($inp),$a0 - ldw `5*4`($inp),$a1 - ldw `6*4`($inp),$a2 - ldw `7*4`($inp),$a3 - vshd $X[0],$X[1],$X[0] - vshd $X[1],$t2,$X[1] - stw $X[0],`-$XOFF+0*4`(%sp) - ldw `8*4`($inp),$t0 - vshd $t2,$t3,$t2 - stw $X[1],`-$XOFF+1*4`(%sp) - ldw `9*4`($inp),$t1 - vshd $t3,$a0,$t3 -___ -{ -my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); -for ($i=2;$i<=(128/4-8);$i++) { -$code.=<<___; - stw $t[0],`-$XOFF+$i*4`(%sp) - ldw `(8+$i)*4`($inp),$t[0] - vshd $t[1],$t[2],$t[1] -___ -push(@t,shift(@t)); -} -for (;$i<(128/4-1);$i++) { -$code.=<<___; - stw $t[0],`-$XOFF+$i*4`(%sp) - vshd $t[1],$t[2],$t[1] -___ -push(@t,shift(@t)); -} -$code.=<<___; - b L\$collected_pa1 - stw $t[0],`-$XOFF+$i*4`(%sp) - -___ -} -$code.=<<___; -L\$aligned_pa1 - ldw `0*4`($inp),$X[0] - ldw `1*4`($inp),$X[1] - ldw `2*4`($inp),$t2 - ldw `3*4`($inp),$t3 - ldw `4*4`($inp),$a0 - ldw `5*4`($inp),$a1 - ldw `6*4`($inp),$a2 - ldw `7*4`($inp),$a3 - stw $X[0],`-$XOFF+0*4`(%sp) - ldw `8*4`($inp),$t0 - stw $X[1],`-$XOFF+1*4`(%sp) - ldw `9*4`($inp),$t1 -___ -{ -my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); -for ($i=2;$i<(128/4-8);$i++) { -$code.=<<___; - stw $t[0],`-$XOFF+$i*4`(%sp) - ldw `(8+$i)*4`($inp),$t[0] -___ -push(@t,shift(@t)); -} -for (;$i<128/4;$i++) { -$code.=<<___; - stw $t[0],`-$XOFF+$i*4`(%sp) -___ -push(@t,shift(@t)); -} -$code.="L\$collected_pa1\n"; -} - -for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } -$code.="L\$rounds_pa1\n"; -for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } - -$code.=<<___; - $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments - $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp - $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num - ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl - - ldw `0*4`($ctx),$t1 ; update context - ldw `1*4`($ctx),$t0 - ldw `2*4`($ctx),$t3 - ldw `3*4`($ctx),$t2 - ldw `4*4`($ctx),$a1 - ldw `5*4`($ctx),$a0 - ldw `6*4`($ctx),$a3 - add $t0,$Alo,$Alo - ldw `7*4`($ctx),$a2 - addc $t1,$Ahi,$Ahi - ldw `8*4`($ctx),$t1 - add $t2,$Blo,$Blo - ldw `9*4`($ctx),$t0 - addc $t3,$Bhi,$Bhi - ldw `10*4`($ctx),$t3 - add $a0,$Clo,$Clo - ldw `11*4`($ctx),$t2 - addc $a1,$Chi,$Chi - ldw `12*4`($ctx),$a1 - add $a2,$Dlo,$Dlo - ldw `13*4`($ctx),$a0 - addc $a3,$Dhi,$Dhi - ldw `14*4`($ctx),$a3 - add $t0,$Elo,$Elo - ldw `15*4`($ctx),$a2 - addc $t1,$Ehi,$Ehi - stw $Ahi,`0*4`($ctx) - add $t2,$Flo,$Flo - stw $Alo,`1*4`($ctx) - addc $t3,$Fhi,$Fhi - stw $Bhi,`2*4`($ctx) - add $a0,$Glo,$Glo - stw $Blo,`3*4`($ctx) - addc $a1,$Ghi,$Ghi - stw $Chi,`4*4`($ctx) - add $a2,$Hlo,$Hlo - stw $Clo,`5*4`($ctx) - addc $a3,$Hhi,$Hhi - stw $Dhi,`6*4`($ctx) - ldo `16*$SZ`($inp),$inp ; advance $inp - stw $Dlo,`7*4`($ctx) - stw $Ehi,`8*4`($ctx) - stw $Elo,`9*4`($ctx) - stw $Fhi,`10*4`($ctx) - stw $Flo,`11*4`($ctx) - stw $Ghi,`12*4`($ctx) - stw $Glo,`13*4`($ctx) - stw $Hhi,`14*4`($ctx) - comb,= $inp,$num,L\$done - stw $Hlo,`15*4`($ctx) - b L\$oop_pa1 - $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp -L\$done -___ -}} -$code.=<<___; - $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue - $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 - $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 - $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 - $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 - $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 - $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 - $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 - $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 - $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 - $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 - $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 - $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 - $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 - $POP `-$FRAME+14*$SIZE_T`(%sp),%r17 - $POP `-$FRAME+15*$SIZE_T`(%sp),%r18 - bv (%r2) - .EXIT - $POPMB -$FRAME(%sp),%r3 - .PROCEND - .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by " -___ - -# Explicitly encode PA-RISC 2.0 instructions used in this module, so -# that it can be compiled with .LEVEL 1.0. It should be noted that I -# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 -# directive... - -my $ldd = sub { - my ($mod,$args) = @_; - my $orig = "ldd$mod\t$args"; - - if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices - { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1); - $opcode|=(1<<3) if ($mod =~ /^,m/); - $opcode|=(1<<2) if ($mod =~ /^,mb/); - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - else { "\t".$orig; } -}; - -my $std = sub { - my ($mod,$args) = @_; - my $orig = "std$mod\t$args"; - - if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices - { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - else { "\t".$orig; } -}; - -my $extrd = sub { - my ($mod,$args) = @_; - my $orig = "extrd$mod\t$args"; - - # I only have ",u" completer, it's implicitly encoded... - if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 - { my $opcode=(0x36<<26)|($1<<21)|($4<<16); - my $len=32-$3; - $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos - $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 - { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); - my $len=32-$2; - $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len - $opcode |= (1<<13) if ($mod =~ /,\**=/); - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - else { "\t".$orig; } -}; - -my $shrpd = sub { - my ($mod,$args) = @_; - my $orig = "shrpd$mod\t$args"; - - if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 - { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; - my $cpos=63-$3; - $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa - sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; - } - elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 - { sprintf "\t.WORD\t0x%08x\t; %s", - (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; - } - else { "\t".$orig; } -}; - -sub assemble { - my ($mnemonic,$mod,$args)=@_; - my $opcode = eval("\$$mnemonic"); - - ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; -} - -foreach (split("\n",$code)) { - s/\`([^\`]*)\`/eval $1/ge; - - s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/ - $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32 - : sprintf("shd\t%$1,%$2,%d",$3)/e or - # translate made up instructons: _ror, _shr, _align, _shl - s/_ror(\s+)(%r[0-9]+),/ - ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or - - s/_shr(\s+%r[0-9]+),([0-9]+),/ - $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2) - : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or - - s/_align(\s+%r[0-9]+,%r[0-9]+),/ - ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or - - s/_shl(\s+%r[0-9]+),([0-9]+),/ - $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2) - : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e; - - s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4); - - s/cmpb,\*/comb,/ if ($SIZE_T==4); - - s/\bbv\b/bve/ if ($SIZE_T==8); - - print $_,"\n"; -} - -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha512-ppc.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha512-ppc.pl deleted file mode 100755 index 6b44a68e59..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha512-ppc.pl +++ /dev/null @@ -1,460 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# I let hardware handle unaligned input, except on page boundaries -# (see below for details). Otherwise straightforward implementation -# with X vector in register bank. The module is big-endian [which is -# not big deal as there're no little-endian targets left around]. - -# sha256 | sha512 -# -m64 -m32 | -m64 -m32 -# --------------------------------------+----------------------- -# PPC970,gcc-4.0.0 +50% +38% | +40% +410%(*) -# Power6,xlc-7 +150% +90% | +100% +430%(*) -# -# (*) 64-bit code in 32-bit application context, which actually is -# on TODO list. It should be noted that for safe deployment in -# 32-bit *mutli-threaded* context asyncronous signals should be -# blocked upon entry to SHA512 block routine. This is because -# 32-bit signaling procedure invalidates upper halves of GPRs. -# Context switch procedure preserves them, but not signaling:-( - -# Second version is true multi-thread safe. Trouble with the original -# version was that it was using thread local storage pointer register. -# Well, it scrupulously preserved it, but the problem would arise the -# moment asynchronous signal was delivered and signal handler would -# dereference the TLS pointer. While it's never the case in openssl -# application or test suite, we have to respect this scenario and not -# use TLS pointer register. Alternative would be to require caller to -# block signals prior calling this routine. For the record, in 32-bit -# context R2 serves as TLS pointer, while in 64-bit context - R13. - -$flavour=shift; -$output =shift; - -if ($flavour =~ /64/) { - $SIZE_T=8; - $LRSAVE=2*$SIZE_T; - $STU="stdu"; - $UCMP="cmpld"; - $SHL="sldi"; - $POP="ld"; - $PUSH="std"; -} elsif ($flavour =~ /32/) { - $SIZE_T=4; - $LRSAVE=$SIZE_T; - $STU="stwu"; - $UCMP="cmplw"; - $SHL="slwi"; - $POP="lwz"; - $PUSH="stw"; -} else { die "nonsense $flavour"; } - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; - -if ($output =~ /512/) { - $func="sha512_block_data_order"; - $SZ=8; - @Sigma0=(28,34,39); - @Sigma1=(14,18,41); - @sigma0=(1, 8, 7); - @sigma1=(19,61, 6); - $rounds=80; - $LD="ld"; - $ST="std"; - $ROR="rotrdi"; - $SHR="srdi"; -} else { - $func="sha256_block_data_order"; - $SZ=4; - @Sigma0=( 2,13,22); - @Sigma1=( 6,11,25); - @sigma0=( 7,18, 3); - @sigma1=(17,19,10); - $rounds=64; - $LD="lwz"; - $ST="stw"; - $ROR="rotrwi"; - $SHR="srwi"; -} - -$FRAME=32*$SIZE_T+16*$SZ; -$LOCALS=6*$SIZE_T; - -$sp ="r1"; -$toc="r2"; -$ctx="r3"; # zapped by $a0 -$inp="r4"; # zapped by $a1 -$num="r5"; # zapped by $t0 - -$T ="r0"; -$a0 ="r3"; -$a1 ="r4"; -$t0 ="r5"; -$t1 ="r6"; -$Tbl="r7"; - -$A ="r8"; -$B ="r9"; -$C ="r10"; -$D ="r11"; -$E ="r12"; -$F ="r13"; $F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer -$G ="r14"; -$H ="r15"; - -@V=($A,$B,$C,$D,$E,$F,$G,$H); -@X=("r16","r17","r18","r19","r20","r21","r22","r23", - "r24","r25","r26","r27","r28","r29","r30","r31"); - -$inp="r31"; # reassigned $inp! aliases with @X[15] - -sub ROUND_00_15 { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; -$code.=<<___; - $LD $T,`$i*$SZ`($Tbl) - $ROR $a0,$e,$Sigma1[0] - $ROR $a1,$e,$Sigma1[1] - and $t0,$f,$e - andc $t1,$g,$e - add $T,$T,$h - xor $a0,$a0,$a1 - $ROR $a1,$a1,`$Sigma1[2]-$Sigma1[1]` - or $t0,$t0,$t1 ; Ch(e,f,g) - add $T,$T,@X[$i] - xor $a0,$a0,$a1 ; Sigma1(e) - add $T,$T,$t0 - add $T,$T,$a0 - - $ROR $a0,$a,$Sigma0[0] - $ROR $a1,$a,$Sigma0[1] - and $t0,$a,$b - and $t1,$a,$c - xor $a0,$a0,$a1 - $ROR $a1,$a1,`$Sigma0[2]-$Sigma0[1]` - xor $t0,$t0,$t1 - and $t1,$b,$c - xor $a0,$a0,$a1 ; Sigma0(a) - add $d,$d,$T - xor $t0,$t0,$t1 ; Maj(a,b,c) - add $h,$T,$a0 - add $h,$h,$t0 - -___ -} - -sub ROUND_16_xx { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; -$i-=16; -$code.=<<___; - $ROR $a0,@X[($i+1)%16],$sigma0[0] - $ROR $a1,@X[($i+1)%16],$sigma0[1] - $ROR $t0,@X[($i+14)%16],$sigma1[0] - $ROR $t1,@X[($i+14)%16],$sigma1[1] - xor $a0,$a0,$a1 - $SHR $a1,@X[($i+1)%16],$sigma0[2] - xor $t0,$t0,$t1 - $SHR $t1,@X[($i+14)%16],$sigma1[2] - add @X[$i],@X[$i],@X[($i+9)%16] - xor $a0,$a0,$a1 ; sigma0(X[(i+1)&0x0f]) - xor $t0,$t0,$t1 ; sigma1(X[(i+14)&0x0f]) - add @X[$i],@X[$i],$a0 - add @X[$i],@X[$i],$t0 -___ -&ROUND_00_15($i,$a,$b,$c,$d,$e,$f,$g,$h); -} - -$code=<<___; -.machine "any" -.text - -.globl $func -.align 6 -$func: - $STU $sp,-$FRAME($sp) - mflr r0 - $SHL $num,$num,`log(16*$SZ)/log(2)` - - $PUSH $ctx,`$FRAME-$SIZE_T*22`($sp) - - $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) - $PUSH r13,`$FRAME-$SIZE_T*19`($sp) - $PUSH r14,`$FRAME-$SIZE_T*18`($sp) - $PUSH r15,`$FRAME-$SIZE_T*17`($sp) - $PUSH r16,`$FRAME-$SIZE_T*16`($sp) - $PUSH r17,`$FRAME-$SIZE_T*15`($sp) - $PUSH r18,`$FRAME-$SIZE_T*14`($sp) - $PUSH r19,`$FRAME-$SIZE_T*13`($sp) - $PUSH r20,`$FRAME-$SIZE_T*12`($sp) - $PUSH r21,`$FRAME-$SIZE_T*11`($sp) - $PUSH r22,`$FRAME-$SIZE_T*10`($sp) - $PUSH r23,`$FRAME-$SIZE_T*9`($sp) - $PUSH r24,`$FRAME-$SIZE_T*8`($sp) - $PUSH r25,`$FRAME-$SIZE_T*7`($sp) - $PUSH r26,`$FRAME-$SIZE_T*6`($sp) - $PUSH r27,`$FRAME-$SIZE_T*5`($sp) - $PUSH r28,`$FRAME-$SIZE_T*4`($sp) - $PUSH r29,`$FRAME-$SIZE_T*3`($sp) - $PUSH r30,`$FRAME-$SIZE_T*2`($sp) - $PUSH r31,`$FRAME-$SIZE_T*1`($sp) - $PUSH r0,`$FRAME+$LRSAVE`($sp) - - $LD $A,`0*$SZ`($ctx) - mr $inp,r4 ; incarnate $inp - $LD $B,`1*$SZ`($ctx) - $LD $C,`2*$SZ`($ctx) - $LD $D,`3*$SZ`($ctx) - $LD $E,`4*$SZ`($ctx) - $LD $F,`5*$SZ`($ctx) - $LD $G,`6*$SZ`($ctx) - $LD $H,`7*$SZ`($ctx) - - bl LPICmeup -LPICedup: - andi. r0,$inp,3 - bne Lunaligned -Laligned: - add $num,$inp,$num - $PUSH $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer - $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer - bl Lsha2_block_private - b Ldone - -; PowerPC specification allows an implementation to be ill-behaved -; upon unaligned access which crosses page boundary. "Better safe -; than sorry" principle makes me treat it specially. But I don't -; look for particular offending word, but rather for the input -; block which crosses the boundary. Once found that block is aligned -; and hashed separately... -.align 4 -Lunaligned: - subfic $t1,$inp,4096 - andi. $t1,$t1,`4096-16*$SZ` ; distance to closest page boundary - beq Lcross_page - $UCMP $num,$t1 - ble- Laligned ; didn't cross the page boundary - subfc $num,$t1,$num - add $t1,$inp,$t1 - $PUSH $num,`$FRAME-$SIZE_T*25`($sp) ; save real remaining num - $PUSH $t1,`$FRAME-$SIZE_T*24`($sp) ; intermediate end pointer - $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer - bl Lsha2_block_private - ; $inp equals to the intermediate end pointer here - $POP $num,`$FRAME-$SIZE_T*25`($sp) ; restore real remaining num -Lcross_page: - li $t1,`16*$SZ/4` - mtctr $t1 - addi r20,$sp,$LOCALS ; aligned spot below the frame -Lmemcpy: - lbz r16,0($inp) - lbz r17,1($inp) - lbz r18,2($inp) - lbz r19,3($inp) - addi $inp,$inp,4 - stb r16,0(r20) - stb r17,1(r20) - stb r18,2(r20) - stb r19,3(r20) - addi r20,r20,4 - bdnz Lmemcpy - - $PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp - addi $t1,$sp,`$LOCALS+16*$SZ` ; fictitious end pointer - addi $inp,$sp,$LOCALS ; fictitious inp pointer - $PUSH $num,`$FRAME-$SIZE_T*25`($sp) ; save real num - $PUSH $t1,`$FRAME-$SIZE_T*24`($sp) ; end pointer - $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer - bl Lsha2_block_private - $POP $inp,`$FRAME-$SIZE_T*26`($sp) ; restore real inp - $POP $num,`$FRAME-$SIZE_T*25`($sp) ; restore real num - addic. $num,$num,`-16*$SZ` ; num-- - bne- Lunaligned - -Ldone: - $POP r0,`$FRAME+$LRSAVE`($sp) - $POP $toc,`$FRAME-$SIZE_T*20`($sp) - $POP r13,`$FRAME-$SIZE_T*19`($sp) - $POP r14,`$FRAME-$SIZE_T*18`($sp) - $POP r15,`$FRAME-$SIZE_T*17`($sp) - $POP r16,`$FRAME-$SIZE_T*16`($sp) - $POP r17,`$FRAME-$SIZE_T*15`($sp) - $POP r18,`$FRAME-$SIZE_T*14`($sp) - $POP r19,`$FRAME-$SIZE_T*13`($sp) - $POP r20,`$FRAME-$SIZE_T*12`($sp) - $POP r21,`$FRAME-$SIZE_T*11`($sp) - $POP r22,`$FRAME-$SIZE_T*10`($sp) - $POP r23,`$FRAME-$SIZE_T*9`($sp) - $POP r24,`$FRAME-$SIZE_T*8`($sp) - $POP r25,`$FRAME-$SIZE_T*7`($sp) - $POP r26,`$FRAME-$SIZE_T*6`($sp) - $POP r27,`$FRAME-$SIZE_T*5`($sp) - $POP r28,`$FRAME-$SIZE_T*4`($sp) - $POP r29,`$FRAME-$SIZE_T*3`($sp) - $POP r30,`$FRAME-$SIZE_T*2`($sp) - $POP r31,`$FRAME-$SIZE_T*1`($sp) - mtlr r0 - addi $sp,$sp,$FRAME - blr - .long 0 - .byte 0,12,4,1,0x80,18,3,0 - .long 0 - -.align 4 -Lsha2_block_private: -___ -for($i=0;$i<16;$i++) { -$code.=<<___ if ($SZ==4); - lwz @X[$i],`$i*$SZ`($inp) -___ -# 64-bit loads are split to 2x32-bit ones, as CPU can't handle -# unaligned 64-bit loads, only 32-bit ones... -$code.=<<___ if ($SZ==8); - lwz $t0,`$i*$SZ`($inp) - lwz @X[$i],`$i*$SZ+4`($inp) - insrdi @X[$i],$t0,32,0 -___ - &ROUND_00_15($i,@V); - unshift(@V,pop(@V)); -} -$code.=<<___; - li $T,`$rounds/16-1` - mtctr $T -.align 4 -Lrounds: - addi $Tbl,$Tbl,`16*$SZ` -___ -for(;$i<32;$i++) { - &ROUND_16_xx($i,@V); - unshift(@V,pop(@V)); -} -$code.=<<___; - bdnz- Lrounds - - $POP $ctx,`$FRAME-$SIZE_T*22`($sp) - $POP $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer - $POP $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer - subi $Tbl,$Tbl,`($rounds-16)*$SZ` ; rewind Tbl - - $LD r16,`0*$SZ`($ctx) - $LD r17,`1*$SZ`($ctx) - $LD r18,`2*$SZ`($ctx) - $LD r19,`3*$SZ`($ctx) - $LD r20,`4*$SZ`($ctx) - $LD r21,`5*$SZ`($ctx) - $LD r22,`6*$SZ`($ctx) - addi $inp,$inp,`16*$SZ` ; advance inp - $LD r23,`7*$SZ`($ctx) - add $A,$A,r16 - add $B,$B,r17 - $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) - add $C,$C,r18 - $ST $A,`0*$SZ`($ctx) - add $D,$D,r19 - $ST $B,`1*$SZ`($ctx) - add $E,$E,r20 - $ST $C,`2*$SZ`($ctx) - add $F,$F,r21 - $ST $D,`3*$SZ`($ctx) - add $G,$G,r22 - $ST $E,`4*$SZ`($ctx) - add $H,$H,r23 - $ST $F,`5*$SZ`($ctx) - $ST $G,`6*$SZ`($ctx) - $UCMP $inp,$num - $ST $H,`7*$SZ`($ctx) - bne Lsha2_block_private - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 -___ - -# Ugly hack here, because PPC assembler syntax seem to vary too -# much from platforms to platform... -$code.=<<___; -.align 6 -LPICmeup: - mflr r0 - bcl 20,31,\$+4 - mflr $Tbl ; vvvvvv "distance" between . and 1st data entry - addi $Tbl,$Tbl,`64-8` - mtlr r0 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - .space `64-9*4` -___ -$code.=<<___ if ($SZ==8); - .long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd - .long 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc - .long 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019 - .long 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118 - .long 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe - .long 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2 - .long 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1 - .long 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694 - .long 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3 - .long 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65 - .long 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483 - .long 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5 - .long 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210 - .long 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4 - .long 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725 - .long 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70 - .long 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926 - .long 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df - .long 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8 - .long 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b - .long 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001 - .long 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30 - .long 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910 - .long 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8 - .long 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53 - .long 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8 - .long 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb - .long 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3 - .long 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60 - .long 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec - .long 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9 - .long 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b - .long 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207 - .long 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178 - .long 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6 - .long 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b - .long 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493 - .long 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c - .long 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a - .long 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817 -___ -$code.=<<___ if ($SZ==4); - .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 - .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 - .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 - .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 - .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc - .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da - .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 - .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 - .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 - .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 - .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 - .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 - .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 - .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 - .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 - .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha512-s390x.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha512-s390x.pl deleted file mode 100644 index 079a3fc78a..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha512-s390x.pl +++ /dev/null @@ -1,322 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# SHA256/512 block procedures for s390x. - -# April 2007. -# -# sha256_block_data_order is reportedly >3 times faster than gcc 3.3 -# generated code (must be a bug in compiler, as improvement is -# "pathologically" high, in particular in comparison to other SHA -# modules). But the real twist is that it detects if hardware support -# for SHA256 is available and in such case utilizes it. Then the -# performance can reach >6.5x of assembler one for larger chunks. -# -# sha512_block_data_order is ~70% faster than gcc 3.3 generated code. - -# January 2009. -# -# Add support for hardware SHA512 and reschedule instructions to -# favour dual-issue z10 pipeline. Hardware SHA256/512 is ~4.7x faster -# than software. - -# November 2010. -# -# Adapt for -m31 build. If kernel supports what's called "highgprs" -# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit -# instructions and achieve "64-bit" performance even in 31-bit legacy -# application context. The feature is not specific to any particular -# processor, as long as it's "z-CPU". Latter implies that the code -# remains z/Architecture specific. On z900 SHA256 was measured to -# perform 2.4x and SHA512 - 13x better than code generated by gcc 4.3. - -$flavour = shift; - -if ($flavour =~ /3[12]/) { - $SIZE_T=4; - $g=""; -} else { - $SIZE_T=8; - $g="g"; -} - -$t0="%r0"; -$t1="%r1"; -$ctx="%r2"; $t2="%r2"; -$inp="%r3"; -$len="%r4"; # used as index in inner loop - -$A="%r5"; -$B="%r6"; -$C="%r7"; -$D="%r8"; -$E="%r9"; -$F="%r10"; -$G="%r11"; -$H="%r12"; @V=($A,$B,$C,$D,$E,$F,$G,$H); -$tbl="%r13"; -$T1="%r14"; -$sp="%r15"; - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -if ($output =~ /512/) { - $label="512"; - $SZ=8; - $LD="lg"; # load from memory - $ST="stg"; # store to memory - $ADD="alg"; # add with memory operand - $ROT="rllg"; # rotate left - $SHR="srlg"; # logical right shift [see even at the end] - @Sigma0=(25,30,36); - @Sigma1=(23,46,50); - @sigma0=(56,63, 7); - @sigma1=( 3,45, 6); - $rounds=80; - $kimdfunc=3; # 0 means unknown/unsupported/unimplemented/disabled -} else { - $label="256"; - $SZ=4; - $LD="llgf"; # load from memory - $ST="st"; # store to memory - $ADD="al"; # add with memory operand - $ROT="rll"; # rotate left - $SHR="srl"; # logical right shift - @Sigma0=(10,19,30); - @Sigma1=( 7,21,26); - @sigma0=(14,25, 3); - @sigma1=(13,15,10); - $rounds=64; - $kimdfunc=2; # magic function code for kimd instruction -} -$Func="sha${label}_block_data_order"; -$Table="K${label}"; -$stdframe=16*$SIZE_T+4*8; -$frame=$stdframe+16*$SZ; - -sub BODY_00_15 { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; - -$code.=<<___ if ($i<16); - $LD $T1,`$i*$SZ`($inp) ### $i -___ -$code.=<<___; - $ROT $t0,$e,$Sigma1[0] - $ROT $t1,$e,$Sigma1[1] - lgr $t2,$f - xgr $t0,$t1 - $ROT $t1,$t1,`$Sigma1[2]-$Sigma1[1]` - xgr $t2,$g - $ST $T1,`$stdframe+$SZ*($i%16)`($sp) - xgr $t0,$t1 # Sigma1(e) - algr $T1,$h # T1+=h - ngr $t2,$e - lgr $t1,$a - algr $T1,$t0 # T1+=Sigma1(e) - $ROT $h,$a,$Sigma0[0] - xgr $t2,$g # Ch(e,f,g) - $ADD $T1,`$i*$SZ`($len,$tbl) # T1+=K[i] - $ROT $t0,$a,$Sigma0[1] - algr $T1,$t2 # T1+=Ch(e,f,g) - ogr $t1,$b - xgr $h,$t0 - lgr $t2,$a - ngr $t1,$c - $ROT $t0,$t0,`$Sigma0[2]-$Sigma0[1]` - xgr $h,$t0 # h=Sigma0(a) - ngr $t2,$b - algr $h,$T1 # h+=T1 - ogr $t2,$t1 # Maj(a,b,c) - algr $d,$T1 # d+=T1 - algr $h,$t2 # h+=Maj(a,b,c) -___ -} - -sub BODY_16_XX { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; - -$code.=<<___; - $LD $T1,`$stdframe+$SZ*(($i+1)%16)`($sp) ### $i - $LD $t1,`$stdframe+$SZ*(($i+14)%16)`($sp) - $ROT $t0,$T1,$sigma0[0] - $SHR $T1,$sigma0[2] - $ROT $t2,$t0,`$sigma0[1]-$sigma0[0]` - xgr $T1,$t0 - $ROT $t0,$t1,$sigma1[0] - xgr $T1,$t2 # sigma0(X[i+1]) - $SHR $t1,$sigma1[2] - $ADD $T1,`$stdframe+$SZ*($i%16)`($sp) # +=X[i] - xgr $t1,$t0 - $ROT $t0,$t0,`$sigma1[1]-$sigma1[0]` - $ADD $T1,`$stdframe+$SZ*(($i+9)%16)`($sp) # +=X[i+9] - xgr $t1,$t0 # sigma1(X[i+14]) - algr $T1,$t1 # +=sigma1(X[i+14]) -___ - &BODY_00_15(@_); -} - -$code.=<<___; -.text -.align 64 -.type $Table,\@object -$Table: -___ -$code.=<<___ if ($SZ==4); - .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 - .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 - .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 - .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 - .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc - .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da - .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 - .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 - .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 - .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 - .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 - .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 - .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 - .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 - .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 - .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -___ -$code.=<<___ if ($SZ==8); - .quad 0x428a2f98d728ae22,0x7137449123ef65cd - .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc - .quad 0x3956c25bf348b538,0x59f111f1b605d019 - .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 - .quad 0xd807aa98a3030242,0x12835b0145706fbe - .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 - .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 - .quad 0x9bdc06a725c71235,0xc19bf174cf692694 - .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 - .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 - .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 - .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 - .quad 0x983e5152ee66dfab,0xa831c66d2db43210 - .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 - .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 - .quad 0x06ca6351e003826f,0x142929670a0e6e70 - .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 - .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df - .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 - .quad 0x81c2c92e47edaee6,0x92722c851482353b - .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 - .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 - .quad 0xd192e819d6ef5218,0xd69906245565a910 - .quad 0xf40e35855771202a,0x106aa07032bbd1b8 - .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 - .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 - .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb - .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 - .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 - .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec - .quad 0x90befffa23631e28,0xa4506cebde82bde9 - .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b - .quad 0xca273eceea26619c,0xd186b8c721c0c207 - .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 - .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 - .quad 0x113f9804bef90dae,0x1b710b35131c471b - .quad 0x28db77f523047d84,0x32caab7b40c72493 - .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c - .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a - .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 -___ -$code.=<<___; -.size $Table,.-$Table -.globl $Func -.type $Func,\@function -$Func: - sllg $len,$len,`log(16*$SZ)/log(2)` -___ -$code.=<<___ if ($kimdfunc); - larl %r1,OPENSSL_s390xcap_P - lg %r0,0(%r1) - tmhl %r0,0x4000 # check for message-security assist - jz .Lsoftware - lghi %r0,0 - la %r1,`2*$SIZE_T`($sp) - .long 0xb93e0002 # kimd %r0,%r2 - lg %r0,`2*$SIZE_T`($sp) - tmhh %r0,`0x8000>>$kimdfunc` - jz .Lsoftware - lghi %r0,$kimdfunc - lgr %r1,$ctx - lgr %r2,$inp - lgr %r3,$len - .long 0xb93e0002 # kimd %r0,%r2 - brc 1,.-4 # pay attention to "partial completion" - br %r14 -.align 16 -.Lsoftware: -___ -$code.=<<___; - lghi %r1,-$frame - la $len,0($len,$inp) - stm${g} $ctx,%r15,`2*$SIZE_T`($sp) - lgr %r0,$sp - la $sp,0(%r1,$sp) - st${g} %r0,0($sp) - - larl $tbl,$Table - $LD $A,`0*$SZ`($ctx) - $LD $B,`1*$SZ`($ctx) - $LD $C,`2*$SZ`($ctx) - $LD $D,`3*$SZ`($ctx) - $LD $E,`4*$SZ`($ctx) - $LD $F,`5*$SZ`($ctx) - $LD $G,`6*$SZ`($ctx) - $LD $H,`7*$SZ`($ctx) - -.Lloop: - lghi $len,0 -___ -for ($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } -$code.=".Lrounds_16_xx:\n"; -for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - aghi $len,`16*$SZ` - lghi $t0,`($rounds-16)*$SZ` - clgr $len,$t0 - jne .Lrounds_16_xx - - l${g} $ctx,`$frame+2*$SIZE_T`($sp) - la $inp,`16*$SZ`($inp) - $ADD $A,`0*$SZ`($ctx) - $ADD $B,`1*$SZ`($ctx) - $ADD $C,`2*$SZ`($ctx) - $ADD $D,`3*$SZ`($ctx) - $ADD $E,`4*$SZ`($ctx) - $ADD $F,`5*$SZ`($ctx) - $ADD $G,`6*$SZ`($ctx) - $ADD $H,`7*$SZ`($ctx) - $ST $A,`0*$SZ`($ctx) - $ST $B,`1*$SZ`($ctx) - $ST $C,`2*$SZ`($ctx) - $ST $D,`3*$SZ`($ctx) - $ST $E,`4*$SZ`($ctx) - $ST $F,`5*$SZ`($ctx) - $ST $G,`6*$SZ`($ctx) - $ST $H,`7*$SZ`($ctx) - cl${g} $inp,`$frame+4*$SIZE_T`($sp) - jne .Lloop - - lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp) - br %r14 -.size $Func,.-$Func -.string "SHA${label} block transform for s390x, CRYPTOGAMS by " -.comm OPENSSL_s390xcap_P,16,8 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -# unlike 32-bit shift 64-bit one takes three arguments -$code =~ s/(srlg\s+)(%r[0-9]+),/$1$2,$2,/gm; - -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha512-sparcv9.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha512-sparcv9.pl deleted file mode 100644 index 585740789e..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha512-sparcv9.pl +++ /dev/null @@ -1,594 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# SHA256 performance improvement over compiler generated code varies -# from 40% for Sun C [32-bit build] to 70% for gcc [3.3, 64-bit -# build]. Just like in SHA1 module I aim to ensure scalability on -# UltraSPARC T1 by packing X[16] to 8 64-bit registers. - -# SHA512 on pre-T1 UltraSPARC. -# -# Performance is >75% better than 64-bit code generated by Sun C and -# over 2x than 32-bit code. X[16] resides on stack, but access to it -# is scheduled for L2 latency and staged through 32 least significant -# bits of %l0-%l7. The latter is done to achieve 32-/64-bit ABI -# duality. Nevetheless it's ~40% faster than SHA256, which is pretty -# good [optimal coefficient is 50%]. -# -# SHA512 on UltraSPARC T1. -# -# It's not any faster than 64-bit code generated by Sun C 5.8. This is -# because 64-bit code generator has the advantage of using 64-bit -# loads(*) to access X[16], which I consciously traded for 32-/64-bit -# ABI duality [as per above]. But it surpasses 32-bit Sun C generated -# code by 60%, not to mention that it doesn't suffer from severe decay -# when running 4 times physical cores threads and that it leaves gcc -# [3.4] behind by over 4x factor! If compared to SHA256, single thread -# performance is only 10% better, but overall throughput for maximum -# amount of threads for given CPU exceeds corresponding one of SHA256 -# by 30% [again, optimal coefficient is 50%]. -# -# (*) Unlike pre-T1 UltraSPARC loads on T1 are executed strictly -# in-order, i.e. load instruction has to complete prior next -# instruction in given thread is executed, even if the latter is -# not dependent on load result! This means that on T1 two 32-bit -# loads are always slower than one 64-bit load. Once again this -# is unlike pre-T1 UltraSPARC, where, if scheduled appropriately, -# 2x32-bit loads can be as fast as 1x64-bit ones. - -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=112; } - -$output=shift; -open STDOUT,">$output"; - -if ($output =~ /512/) { - $label="512"; - $SZ=8; - $LD="ldx"; # load from memory - $ST="stx"; # store to memory - $SLL="sllx"; # shift left logical - $SRL="srlx"; # shift right logical - @Sigma0=(28,34,39); - @Sigma1=(14,18,41); - @sigma0=( 7, 1, 8); # right shift first - @sigma1=( 6,19,61); # right shift first - $lastK=0x817; - $rounds=80; - $align=4; - - $locals=16*$SZ; # X[16] - - $A="%o0"; - $B="%o1"; - $C="%o2"; - $D="%o3"; - $E="%o4"; - $F="%o5"; - $G="%g1"; - $H="%o7"; - @V=($A,$B,$C,$D,$E,$F,$G,$H); -} else { - $label="256"; - $SZ=4; - $LD="ld"; # load from memory - $ST="st"; # store to memory - $SLL="sll"; # shift left logical - $SRL="srl"; # shift right logical - @Sigma0=( 2,13,22); - @Sigma1=( 6,11,25); - @sigma0=( 3, 7,18); # right shift first - @sigma1=(10,17,19); # right shift first - $lastK=0x8f2; - $rounds=64; - $align=8; - - $locals=0; # X[16] is register resident - @X=("%o0","%o1","%o2","%o3","%o4","%o5","%g1","%o7"); - - $A="%l0"; - $B="%l1"; - $C="%l2"; - $D="%l3"; - $E="%l4"; - $F="%l5"; - $G="%l6"; - $H="%l7"; - @V=($A,$B,$C,$D,$E,$F,$G,$H); -} -$T1="%g2"; -$tmp0="%g3"; -$tmp1="%g4"; -$tmp2="%g5"; - -$ctx="%i0"; -$inp="%i1"; -$len="%i2"; -$Ktbl="%i3"; -$tmp31="%i4"; -$tmp32="%i5"; - -########### SHA256 -$Xload = sub { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; - - if ($i==0) { -$code.=<<___; - ldx [$inp+0],@X[0] - ldx [$inp+16],@X[2] - ldx [$inp+32],@X[4] - ldx [$inp+48],@X[6] - ldx [$inp+8],@X[1] - ldx [$inp+24],@X[3] - subcc %g0,$tmp31,$tmp32 ! should be 64-$tmp31, but -$tmp31 works too - ldx [$inp+40],@X[5] - bz,pt %icc,.Laligned - ldx [$inp+56],@X[7] - - sllx @X[0],$tmp31,@X[0] - ldx [$inp+64],$T1 -___ -for($j=0;$j<7;$j++) -{ $code.=<<___; - srlx @X[$j+1],$tmp32,$tmp1 - sllx @X[$j+1],$tmp31,@X[$j+1] - or $tmp1,@X[$j],@X[$j] -___ -} -$code.=<<___; - srlx $T1,$tmp32,$T1 - or $T1,@X[7],@X[7] -.Laligned: -___ - } - - if ($i&1) { - $code.="\tadd @X[$i/2],$h,$T1\n"; - } else { - $code.="\tsrlx @X[$i/2],32,$T1\n\tadd $h,$T1,$T1\n"; - } -} if ($SZ==4); - -########### SHA512 -$Xload = sub { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; -my @pair=("%l".eval(($i*2)%8),"%l".eval(($i*2)%8+1),"%l".eval((($i+1)*2)%8)); - -$code.=<<___ if ($i==0); - ld [$inp+0],%l0 - ld [$inp+4],%l1 - ld [$inp+8],%l2 - ld [$inp+12],%l3 - ld [$inp+16],%l4 - ld [$inp+20],%l5 - ld [$inp+24],%l6 - ld [$inp+28],%l7 -___ -$code.=<<___ if ($i<15); - sllx @pair[1],$tmp31,$tmp2 ! Xload($i) - add $tmp31,32,$tmp0 - sllx @pair[0],$tmp0,$tmp1 - `"ld [$inp+".eval(32+0+$i*8)."],@pair[0]" if ($i<12)` - srlx @pair[2],$tmp32,@pair[1] - or $tmp1,$tmp2,$tmp2 - or @pair[1],$tmp2,$tmp2 - `"ld [$inp+".eval(32+4+$i*8)."],@pair[1]" if ($i<12)` - add $h,$tmp2,$T1 - $ST $tmp2,[%sp+`$bias+$frame+$i*$SZ`] -___ -$code.=<<___ if ($i==12); - brnz,a $tmp31,.+8 - ld [$inp+128],%l0 -___ -$code.=<<___ if ($i==15); - ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2 - sllx @pair[1],$tmp31,$tmp2 ! Xload($i) - add $tmp31,32,$tmp0 - ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3 - sllx @pair[0],$tmp0,$tmp1 - ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4 - srlx @pair[2],$tmp32,@pair[1] - or $tmp1,$tmp2,$tmp2 - ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5 - or @pair[1],$tmp2,$tmp2 - ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6 - add $h,$tmp2,$T1 - $ST $tmp2,[%sp+`$bias+$frame+$i*$SZ`] - ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7 - ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0 - ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1 -___ -} if ($SZ==8); - -########### common -sub BODY_00_15 { -my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; - - if ($i<16) { - &$Xload(@_); - } else { - $code.="\tadd $h,$T1,$T1\n"; - } - -$code.=<<___; - $SRL $e,@Sigma1[0],$h !! $i - xor $f,$g,$tmp2 - $SLL $e,`$SZ*8-@Sigma1[2]`,$tmp1 - and $e,$tmp2,$tmp2 - $SRL $e,@Sigma1[1],$tmp0 - xor $tmp1,$h,$h - $SLL $e,`$SZ*8-@Sigma1[1]`,$tmp1 - xor $tmp0,$h,$h - $SRL $e,@Sigma1[2],$tmp0 - xor $tmp1,$h,$h - $SLL $e,`$SZ*8-@Sigma1[0]`,$tmp1 - xor $tmp0,$h,$h - xor $g,$tmp2,$tmp2 ! Ch(e,f,g) - xor $tmp1,$h,$tmp0 ! Sigma1(e) - - $SRL $a,@Sigma0[0],$h - add $tmp2,$T1,$T1 - $LD [$Ktbl+`$i*$SZ`],$tmp2 ! K[$i] - $SLL $a,`$SZ*8-@Sigma0[2]`,$tmp1 - add $tmp0,$T1,$T1 - $SRL $a,@Sigma0[1],$tmp0 - xor $tmp1,$h,$h - $SLL $a,`$SZ*8-@Sigma0[1]`,$tmp1 - xor $tmp0,$h,$h - $SRL $a,@Sigma0[2],$tmp0 - xor $tmp1,$h,$h - $SLL $a,`$SZ*8-@Sigma0[0]`,$tmp1 - xor $tmp0,$h,$h - xor $tmp1,$h,$h ! Sigma0(a) - - or $a,$b,$tmp0 - and $a,$b,$tmp1 - and $c,$tmp0,$tmp0 - or $tmp0,$tmp1,$tmp1 ! Maj(a,b,c) - add $tmp2,$T1,$T1 ! +=K[$i] - add $tmp1,$h,$h - - add $T1,$d,$d - add $T1,$h,$h -___ -} - -########### SHA256 -$BODY_16_XX = sub { -my $i=@_[0]; -my $xi; - - if ($i&1) { - $xi=$tmp32; - $code.="\tsrlx @X[(($i+1)/2)%8],32,$xi\n"; - } else { - $xi=@X[(($i+1)/2)%8]; - } -$code.=<<___; - srl $xi,@sigma0[0],$T1 !! Xupdate($i) - sll $xi,`32-@sigma0[2]`,$tmp1 - srl $xi,@sigma0[1],$tmp0 - xor $tmp1,$T1,$T1 - sll $tmp1,`@sigma0[2]-@sigma0[1]`,$tmp1 - xor $tmp0,$T1,$T1 - srl $xi,@sigma0[2],$tmp0 - xor $tmp1,$T1,$T1 -___ - if ($i&1) { - $xi=@X[(($i+14)/2)%8]; - } else { - $xi=$tmp32; - $code.="\tsrlx @X[(($i+14)/2)%8],32,$xi\n"; - } -$code.=<<___; - srl $xi,@sigma1[0],$tmp2 - xor $tmp0,$T1,$T1 ! T1=sigma0(X[i+1]) - sll $xi,`32-@sigma1[2]`,$tmp1 - srl $xi,@sigma1[1],$tmp0 - xor $tmp1,$tmp2,$tmp2 - sll $tmp1,`@sigma1[2]-@sigma1[1]`,$tmp1 - xor $tmp0,$tmp2,$tmp2 - srl $xi,@sigma1[2],$tmp0 - xor $tmp1,$tmp2,$tmp2 -___ - if ($i&1) { - $xi=@X[($i/2)%8]; -$code.=<<___; - srlx @X[(($i+9)/2)%8],32,$tmp1 ! X[i+9] - xor $tmp0,$tmp2,$tmp2 ! sigma1(X[i+14]) - srl @X[($i/2)%8],0,$tmp0 - add $tmp2,$tmp1,$tmp1 - add $xi,$T1,$T1 ! +=X[i] - xor $tmp0,@X[($i/2)%8],@X[($i/2)%8] - add $tmp1,$T1,$T1 - - srl $T1,0,$T1 - or $T1,@X[($i/2)%8],@X[($i/2)%8] -___ - } else { - $xi=@X[(($i+9)/2)%8]; -$code.=<<___; - srlx @X[($i/2)%8],32,$tmp1 ! X[i] - xor $tmp0,$tmp2,$tmp2 ! sigma1(X[i+14]) - add $xi,$T1,$T1 ! +=X[i+9] - add $tmp2,$tmp1,$tmp1 - srl @X[($i/2)%8],0,@X[($i/2)%8] - add $tmp1,$T1,$T1 - - sllx $T1,32,$tmp0 - or $tmp0,@X[($i/2)%8],@X[($i/2)%8] -___ - } - &BODY_00_15(@_); -} if ($SZ==4); - -########### SHA512 -$BODY_16_XX = sub { -my $i=@_[0]; -my @pair=("%l".eval(($i*2)%8),"%l".eval(($i*2)%8+1)); - -$code.=<<___; - sllx %l2,32,$tmp0 !! Xupdate($i) - or %l3,$tmp0,$tmp0 - - srlx $tmp0,@sigma0[0],$T1 - ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2 - sllx $tmp0,`64-@sigma0[2]`,$tmp1 - ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3 - srlx $tmp0,@sigma0[1],$tmp0 - xor $tmp1,$T1,$T1 - sllx $tmp1,`@sigma0[2]-@sigma0[1]`,$tmp1 - xor $tmp0,$T1,$T1 - srlx $tmp0,`@sigma0[2]-@sigma0[1]`,$tmp0 - xor $tmp1,$T1,$T1 - sllx %l6,32,$tmp2 - xor $tmp0,$T1,$T1 ! sigma0(X[$i+1]) - or %l7,$tmp2,$tmp2 - - srlx $tmp2,@sigma1[0],$tmp1 - ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6 - sllx $tmp2,`64-@sigma1[2]`,$tmp0 - ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7 - srlx $tmp2,@sigma1[1],$tmp2 - xor $tmp0,$tmp1,$tmp1 - sllx $tmp0,`@sigma1[2]-@sigma1[1]`,$tmp0 - xor $tmp2,$tmp1,$tmp1 - srlx $tmp2,`@sigma1[2]-@sigma1[1]`,$tmp2 - xor $tmp0,$tmp1,$tmp1 - sllx %l4,32,$tmp0 - xor $tmp2,$tmp1,$tmp1 ! sigma1(X[$i+14]) - ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4 - or %l5,$tmp0,$tmp0 - ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5 - - sllx %l0,32,$tmp2 - add $tmp1,$T1,$T1 - ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0 - or %l1,$tmp2,$tmp2 - add $tmp0,$T1,$T1 ! +=X[$i+9] - ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1 - add $tmp2,$T1,$T1 ! +=X[$i] - $ST $T1,[%sp+`$bias+$frame+($i%16)*$SZ`] -___ - &BODY_00_15(@_); -} if ($SZ==8); - -$code.=<<___ if ($bits==64); -.register %g2,#scratch -.register %g3,#scratch -___ -$code.=<<___; -.section ".text",#alloc,#execinstr - -.align 64 -K${label}: -.type K${label},#object -___ -if ($SZ==4) { -$code.=<<___; - .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 - .long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 - .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 - .long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 - .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc - .long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da - .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 - .long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 - .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 - .long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 - .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 - .long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 - .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 - .long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 - .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 - .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -___ -} else { -$code.=<<___; - .long 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd - .long 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc - .long 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019 - .long 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118 - .long 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe - .long 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2 - .long 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1 - .long 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694 - .long 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3 - .long 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65 - .long 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483 - .long 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5 - .long 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210 - .long 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4 - .long 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725 - .long 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70 - .long 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926 - .long 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df - .long 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8 - .long 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b - .long 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001 - .long 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30 - .long 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910 - .long 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8 - .long 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53 - .long 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8 - .long 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb - .long 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3 - .long 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60 - .long 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec - .long 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9 - .long 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b - .long 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207 - .long 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178 - .long 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6 - .long 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b - .long 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493 - .long 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c - .long 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a - .long 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817 -___ -} -$code.=<<___; -.size K${label},.-K${label} -.globl sha${label}_block_data_order -sha${label}_block_data_order: - save %sp,`-$frame-$locals`,%sp - and $inp,`$align-1`,$tmp31 - sllx $len,`log(16*$SZ)/log(2)`,$len - andn $inp,`$align-1`,$inp - sll $tmp31,3,$tmp31 - add $inp,$len,$len -___ -$code.=<<___ if ($SZ==8); # SHA512 - mov 32,$tmp32 - sub $tmp32,$tmp31,$tmp32 -___ -$code.=<<___; -.Lpic: call .+8 - add %o7,K${label}-.Lpic,$Ktbl - - $LD [$ctx+`0*$SZ`],$A - $LD [$ctx+`1*$SZ`],$B - $LD [$ctx+`2*$SZ`],$C - $LD [$ctx+`3*$SZ`],$D - $LD [$ctx+`4*$SZ`],$E - $LD [$ctx+`5*$SZ`],$F - $LD [$ctx+`6*$SZ`],$G - $LD [$ctx+`7*$SZ`],$H - -.Lloop: -___ -for ($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } -$code.=".L16_xx:\n"; -for (;$i<32;$i++) { &$BODY_16_XX($i,@V); unshift(@V,pop(@V)); } -$code.=<<___; - and $tmp2,0xfff,$tmp2 - cmp $tmp2,$lastK - bne .L16_xx - add $Ktbl,`16*$SZ`,$Ktbl ! Ktbl+=16 - -___ -$code.=<<___ if ($SZ==4); # SHA256 - $LD [$ctx+`0*$SZ`],@X[0] - $LD [$ctx+`1*$SZ`],@X[1] - $LD [$ctx+`2*$SZ`],@X[2] - $LD [$ctx+`3*$SZ`],@X[3] - $LD [$ctx+`4*$SZ`],@X[4] - $LD [$ctx+`5*$SZ`],@X[5] - $LD [$ctx+`6*$SZ`],@X[6] - $LD [$ctx+`7*$SZ`],@X[7] - - add $A,@X[0],$A - $ST $A,[$ctx+`0*$SZ`] - add $B,@X[1],$B - $ST $B,[$ctx+`1*$SZ`] - add $C,@X[2],$C - $ST $C,[$ctx+`2*$SZ`] - add $D,@X[3],$D - $ST $D,[$ctx+`3*$SZ`] - add $E,@X[4],$E - $ST $E,[$ctx+`4*$SZ`] - add $F,@X[5],$F - $ST $F,[$ctx+`5*$SZ`] - add $G,@X[6],$G - $ST $G,[$ctx+`6*$SZ`] - add $H,@X[7],$H - $ST $H,[$ctx+`7*$SZ`] -___ -$code.=<<___ if ($SZ==8); # SHA512 - ld [$ctx+`0*$SZ+0`],%l0 - ld [$ctx+`0*$SZ+4`],%l1 - ld [$ctx+`1*$SZ+0`],%l2 - ld [$ctx+`1*$SZ+4`],%l3 - ld [$ctx+`2*$SZ+0`],%l4 - ld [$ctx+`2*$SZ+4`],%l5 - ld [$ctx+`3*$SZ+0`],%l6 - - sllx %l0,32,$tmp0 - ld [$ctx+`3*$SZ+4`],%l7 - sllx %l2,32,$tmp1 - or %l1,$tmp0,$tmp0 - or %l3,$tmp1,$tmp1 - add $tmp0,$A,$A - add $tmp1,$B,$B - $ST $A,[$ctx+`0*$SZ`] - sllx %l4,32,$tmp2 - $ST $B,[$ctx+`1*$SZ`] - sllx %l6,32,$T1 - or %l5,$tmp2,$tmp2 - or %l7,$T1,$T1 - add $tmp2,$C,$C - $ST $C,[$ctx+`2*$SZ`] - add $T1,$D,$D - $ST $D,[$ctx+`3*$SZ`] - - ld [$ctx+`4*$SZ+0`],%l0 - ld [$ctx+`4*$SZ+4`],%l1 - ld [$ctx+`5*$SZ+0`],%l2 - ld [$ctx+`5*$SZ+4`],%l3 - ld [$ctx+`6*$SZ+0`],%l4 - ld [$ctx+`6*$SZ+4`],%l5 - ld [$ctx+`7*$SZ+0`],%l6 - - sllx %l0,32,$tmp0 - ld [$ctx+`7*$SZ+4`],%l7 - sllx %l2,32,$tmp1 - or %l1,$tmp0,$tmp0 - or %l3,$tmp1,$tmp1 - add $tmp0,$E,$E - add $tmp1,$F,$F - $ST $E,[$ctx+`4*$SZ`] - sllx %l4,32,$tmp2 - $ST $F,[$ctx+`5*$SZ`] - sllx %l6,32,$T1 - or %l5,$tmp2,$tmp2 - or %l7,$T1,$T1 - add $tmp2,$G,$G - $ST $G,[$ctx+`6*$SZ`] - add $T1,$H,$H - $ST $H,[$ctx+`7*$SZ`] -___ -$code.=<<___; - add $inp,`16*$SZ`,$inp ! advance inp - cmp $inp,$len - bne `$bits==64?"%xcc":"%icc"`,.Lloop - sub $Ktbl,`($rounds-16)*$SZ`,$Ktbl ! rewind Ktbl - - ret - restore -.type sha${label}_block_data_order,#function -.size sha${label}_block_data_order,(.-sha${label}_block_data_order) -.asciz "SHA${label} block transform for SPARCv9, CRYPTOGAMS by " -.align 4 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/asm/sha512-x86_64.pl b/drivers/builtin_openssl2/crypto/sha/asm/sha512-x86_64.pl deleted file mode 100755 index 8d51678557..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/asm/sha512-x86_64.pl +++ /dev/null @@ -1,451 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. Rights for redistribution and usage in source and binary -# forms are granted according to the OpenSSL license. -# ==================================================================== -# -# sha256/512_block procedure for x86_64. -# -# 40% improvement over compiler-generated code on Opteron. On EM64T -# sha256 was observed to run >80% faster and sha512 - >40%. No magical -# tricks, just straight implementation... I really wonder why gcc -# [being armed with inline assembler] fails to generate as fast code. -# The only thing which is cool about this module is that it's very -# same instruction sequence used for both SHA-256 and SHA-512. In -# former case the instructions operate on 32-bit operands, while in -# latter - on 64-bit ones. All I had to do is to get one flavor right, -# the other one passed the test right away:-) -# -# sha256_block runs in ~1005 cycles on Opteron, which gives you -# asymptotic performance of 64*1000/1005=63.7MBps times CPU clock -# frequency in GHz. sha512_block runs in ~1275 cycles, which results -# in 128*1000/1275=100MBps per GHz. Is there room for improvement? -# Well, if you compare it to IA-64 implementation, which maintains -# X[16] in register bank[!], tends to 4 instructions per CPU clock -# cycle and runs in 1003 cycles, 1275 is very good result for 3-way -# issue Opteron pipeline and X[16] maintained in memory. So that *if* -# there is a way to improve it, *then* the only way would be to try to -# offload X[16] updates to SSE unit, but that would require "deeper" -# loop unroll, which in turn would naturally cause size blow-up, not -# to mention increased complexity! And once again, only *if* it's -# actually possible to noticeably improve overall ILP, instruction -# level parallelism, on a given CPU implementation in this case. -# -# Special note on Intel EM64T. While Opteron CPU exhibits perfect -# perfromance ratio of 1.5 between 64- and 32-bit flavors [see above], -# [currently available] EM64T CPUs apparently are far from it. On the -# contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit -# sha256_block:-( This is presumably because 64-bit shifts/rotates -# apparently are not atomic instructions, but implemented in microcode. - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -if ($output =~ /512/) { - $func="sha512_block_data_order"; - $TABLE="K512"; - $SZ=8; - @ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%rax","%rbx","%rcx","%rdx", - "%r8", "%r9", "%r10","%r11"); - ($T1,$a0,$a1,$a2)=("%r12","%r13","%r14","%r15"); - @Sigma0=(28,34,39); - @Sigma1=(14,18,41); - @sigma0=(1, 8, 7); - @sigma1=(19,61, 6); - $rounds=80; -} else { - $func="sha256_block_data_order"; - $TABLE="K256"; - $SZ=4; - @ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%eax","%ebx","%ecx","%edx", - "%r8d","%r9d","%r10d","%r11d"); - ($T1,$a0,$a1,$a2)=("%r12d","%r13d","%r14d","%r15d"); - @Sigma0=( 2,13,22); - @Sigma1=( 6,11,25); - @sigma0=( 7,18, 3); - @sigma1=(17,19,10); - $rounds=64; -} - -$ctx="%rdi"; # 1st arg -$round="%rdi"; # zaps $ctx -$inp="%rsi"; # 2nd arg -$Tbl="%rbp"; - -$_ctx="16*$SZ+0*8(%rsp)"; -$_inp="16*$SZ+1*8(%rsp)"; -$_end="16*$SZ+2*8(%rsp)"; -$_rsp="16*$SZ+3*8(%rsp)"; -$framesz="16*$SZ+4*8"; - - -sub ROUND_00_15() -{ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; - -$code.=<<___; - ror \$`$Sigma1[2]-$Sigma1[1]`,$a0 - mov $f,$a2 - mov $T1,`$SZ*($i&0xf)`(%rsp) - - ror \$`$Sigma0[2]-$Sigma0[1]`,$a1 - xor $e,$a0 - xor $g,$a2 # f^g - - ror \$`$Sigma1[1]-$Sigma1[0]`,$a0 - add $h,$T1 # T1+=h - xor $a,$a1 - - add ($Tbl,$round,$SZ),$T1 # T1+=K[round] - and $e,$a2 # (f^g)&e - mov $b,$h - - ror \$`$Sigma0[1]-$Sigma0[0]`,$a1 - xor $e,$a0 - xor $g,$a2 # Ch(e,f,g)=((f^g)&e)^g - - xor $c,$h # b^c - xor $a,$a1 - add $a2,$T1 # T1+=Ch(e,f,g) - mov $b,$a2 - - ror \$$Sigma1[0],$a0 # Sigma1(e) - and $a,$h # h=(b^c)&a - and $c,$a2 # b&c - - ror \$$Sigma0[0],$a1 # Sigma0(a) - add $a0,$T1 # T1+=Sigma1(e) - add $a2,$h # h+=b&c (completes +=Maj(a,b,c) - - add $T1,$d # d+=T1 - add $T1,$h # h+=T1 - lea 1($round),$round # round++ - add $a1,$h # h+=Sigma0(a) - -___ -} - -sub ROUND_16_XX() -{ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; - -$code.=<<___; - mov `$SZ*(($i+1)&0xf)`(%rsp),$a0 - mov `$SZ*(($i+14)&0xf)`(%rsp),$a1 - mov $a0,$T1 - mov $a1,$a2 - - ror \$`$sigma0[1]-$sigma0[0]`,$T1 - xor $a0,$T1 - shr \$$sigma0[2],$a0 - - ror \$$sigma0[0],$T1 - xor $T1,$a0 # sigma0(X[(i+1)&0xf]) - mov `$SZ*(($i+9)&0xf)`(%rsp),$T1 - - ror \$`$sigma1[1]-$sigma1[0]`,$a2 - xor $a1,$a2 - shr \$$sigma1[2],$a1 - - ror \$$sigma1[0],$a2 - add $a0,$T1 - xor $a2,$a1 # sigma1(X[(i+14)&0xf]) - - add `$SZ*($i&0xf)`(%rsp),$T1 - mov $e,$a0 - add $a1,$T1 - mov $a,$a1 -___ - &ROUND_00_15(@_); -} - -$code=<<___; -.text - -.globl $func -.type $func,\@function,4 -.align 16 -$func: - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - mov %rsp,%r11 # copy %rsp - shl \$4,%rdx # num*16 - sub \$$framesz,%rsp - lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ - and \$-64,%rsp # align stack frame - mov $ctx,$_ctx # save ctx, 1st arg - mov $inp,$_inp # save inp, 2nd arh - mov %rdx,$_end # save end pointer, "3rd" arg - mov %r11,$_rsp # save copy of %rsp -.Lprologue: - - lea $TABLE(%rip),$Tbl - - mov $SZ*0($ctx),$A - mov $SZ*1($ctx),$B - mov $SZ*2($ctx),$C - mov $SZ*3($ctx),$D - mov $SZ*4($ctx),$E - mov $SZ*5($ctx),$F - mov $SZ*6($ctx),$G - mov $SZ*7($ctx),$H - jmp .Lloop - -.align 16 -.Lloop: - xor $round,$round -___ - for($i=0;$i<16;$i++) { - $code.=" mov $SZ*$i($inp),$T1\n"; - $code.=" mov @ROT[4],$a0\n"; - $code.=" mov @ROT[0],$a1\n"; - $code.=" bswap $T1\n"; - &ROUND_00_15($i,@ROT); - unshift(@ROT,pop(@ROT)); - } -$code.=<<___; - jmp .Lrounds_16_xx -.align 16 -.Lrounds_16_xx: -___ - for(;$i<32;$i++) { - &ROUND_16_XX($i,@ROT); - unshift(@ROT,pop(@ROT)); - } - -$code.=<<___; - cmp \$$rounds,$round - jb .Lrounds_16_xx - - mov $_ctx,$ctx - lea 16*$SZ($inp),$inp - - add $SZ*0($ctx),$A - add $SZ*1($ctx),$B - add $SZ*2($ctx),$C - add $SZ*3($ctx),$D - add $SZ*4($ctx),$E - add $SZ*5($ctx),$F - add $SZ*6($ctx),$G - add $SZ*7($ctx),$H - - cmp $_end,$inp - - mov $A,$SZ*0($ctx) - mov $B,$SZ*1($ctx) - mov $C,$SZ*2($ctx) - mov $D,$SZ*3($ctx) - mov $E,$SZ*4($ctx) - mov $F,$SZ*5($ctx) - mov $G,$SZ*6($ctx) - mov $H,$SZ*7($ctx) - jb .Lloop - - mov $_rsp,%rsi - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lepilogue: - ret -.size $func,.-$func -___ - -if ($SZ==4) { -$code.=<<___; -.align 64 -.type $TABLE,\@object -$TABLE: - .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 - .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 - .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 - .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 - .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc - .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da - .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 - .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 - .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 - .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 - .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 - .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 - .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 - .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 - .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 - .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -___ -} else { -$code.=<<___; -.align 64 -.type $TABLE,\@object -$TABLE: - .quad 0x428a2f98d728ae22,0x7137449123ef65cd - .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc - .quad 0x3956c25bf348b538,0x59f111f1b605d019 - .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 - .quad 0xd807aa98a3030242,0x12835b0145706fbe - .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 - .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 - .quad 0x9bdc06a725c71235,0xc19bf174cf692694 - .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 - .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 - .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 - .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 - .quad 0x983e5152ee66dfab,0xa831c66d2db43210 - .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 - .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 - .quad 0x06ca6351e003826f,0x142929670a0e6e70 - .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 - .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df - .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 - .quad 0x81c2c92e47edaee6,0x92722c851482353b - .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 - .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 - .quad 0xd192e819d6ef5218,0xd69906245565a910 - .quad 0xf40e35855771202a,0x106aa07032bbd1b8 - .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 - .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 - .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb - .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 - .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 - .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec - .quad 0x90befffa23631e28,0xa4506cebde82bde9 - .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b - .quad 0xca273eceea26619c,0xd186b8c721c0c207 - .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 - .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 - .quad 0x113f9804bef90dae,0x1b710b35131c471b - .quad 0x28db77f523047d84,0x32caab7b40c72493 - .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c - .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a - .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 -___ -} - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type se_handler,\@abi-omnipotent -.align 16 -se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lprologue(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lprologue - jb .Lin_prologue - - mov 152($context),%rax # pull context->Rsp - - lea .Lepilogue(%rip),%r10 - cmp %r10,%rbx # context->Rip>=.Lepilogue - jae .Lin_prologue - - mov 16*$SZ+3*8(%rax),%rax # pull $_rsp - lea 48(%rax),%rax - - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov -32(%rax),%r13 - mov -40(%rax),%r14 - mov -48(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size se_handler,.-se_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_$func - .rva .LSEH_end_$func - .rva .LSEH_info_$func - -.section .xdata -.align 8 -.LSEH_info_$func: - .byte 9,0,0,0 - .rva se_handler -___ -} - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/sha/sha1test.c b/drivers/builtin_openssl2/crypto/sha/sha1test.c deleted file mode 100644 index 551a348df3..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/sha1test.c +++ /dev/null @@ -1,174 +0,0 @@ -/* crypto/sha/sha1test.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -#include "../e_os.h" - -#ifdef OPENSSL_NO_SHA -int main(int argc, char *argv[]) -{ - printf("No SHA support\n"); - return (0); -} -#else -# include -# include - -# ifdef CHARSET_EBCDIC -# include -# endif - -# undef SHA_0 /* FIPS 180 */ -# define SHA_1 /* FIPS 180-1 */ - -static char *test[] = { - "abc", - "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", - NULL, -}; - -# ifdef SHA_0 -static char *ret[] = { - "0164b8a914cd2a5e74c4f7ff082c4d97f1edf880", - "d2516ee1acfa5baf33dfc1c471e438449ef134c8", -}; - -static char *bigret = "3232affa48628a26653b5aaa44541fd90d690603"; -# endif -# ifdef SHA_1 -static char *ret[] = { - "a9993e364706816aba3e25717850c26c9cd0d89d", - "84983e441c3bd26ebaae4aa1f95129e5e54670f1", -}; - -static char *bigret = "34aa973cd4c4daa4f61eeb2bdbad27316534016f"; -# endif - -static char *pt(unsigned char *md); -int main(int argc, char *argv[]) -{ - int i, err = 0; - char **P, **R; - static unsigned char buf[1000]; - char *p, *r; - EVP_MD_CTX c; - unsigned char md[SHA_DIGEST_LENGTH]; - -# ifdef CHARSET_EBCDIC - ebcdic2ascii(test[0], test[0], strlen(test[0])); - ebcdic2ascii(test[1], test[1], strlen(test[1])); -# endif - - EVP_MD_CTX_init(&c); - P = test; - R = ret; - i = 1; - while (*P != NULL) { - EVP_Digest(*P, strlen((char *)*P), md, NULL, EVP_sha1(), NULL); - p = pt(md); - if (strcmp(p, (char *)*R) != 0) { - printf("error calculating SHA1 on '%s'\n", *P); - printf("got %s instead of %s\n", p, *R); - err++; - } else - printf("test %d ok\n", i); - i++; - R++; - P++; - } - - memset(buf, 'a', 1000); -# ifdef CHARSET_EBCDIC - ebcdic2ascii(buf, buf, 1000); -# endif /* CHARSET_EBCDIC */ - EVP_DigestInit_ex(&c, EVP_sha1(), NULL); - for (i = 0; i < 1000; i++) - EVP_DigestUpdate(&c, buf, 1000); - EVP_DigestFinal_ex(&c, md, NULL); - p = pt(md); - - r = bigret; - if (strcmp(p, r) != 0) { - printf("error calculating SHA1 on 'a' * 1000\n"); - printf("got %s instead of %s\n", p, r); - err++; - } else - printf("test 3 ok\n"); - -# ifdef OPENSSL_SYS_NETWARE - if (err) - printf("ERROR: %d\n", err); -# endif - EVP_MD_CTX_cleanup(&c); - EXIT(err); - return (0); -} - -static char *pt(unsigned char *md) -{ - int i; - static char buf[80]; - - for (i = 0; i < SHA_DIGEST_LENGTH; i++) - sprintf(&(buf[i * 2]), "%02x", md[i]); - return (buf); -} -#endif diff --git a/drivers/builtin_openssl2/crypto/sha/sha512.c b/drivers/builtin_openssl2/crypto/sha/sha512.c index de0aad8c9d..3bf66ae198 100644 --- a/drivers/builtin_openssl2/crypto/sha/sha512.c +++ b/drivers/builtin_openssl2/crypto/sha/sha512.c @@ -55,6 +55,7 @@ const char SHA512_version[] = "SHA-512" OPENSSL_VERSION_PTEXT; # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \ defined(__s390__) || defined(__s390x__) || \ + defined(__aarch64__) || \ defined(SHA512_ASM) # define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA # endif @@ -353,6 +354,18 @@ static const SHA_LONG64 K512[80] = { asm ("rotrdi %0,%1,%2" \ : "=r"(ret) \ : "r"(a),"K"(n)); ret; }) +# elif defined(__aarch64__) +# define ROTR(a,n) ({ SHA_LONG64 ret; \ + asm ("ror %0,%1,%2" \ + : "=r"(ret) \ + : "r"(a),"I"(n)); ret; }) +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ + __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__ +# define PULL64(x) ({ SHA_LONG64 ret; \ + asm ("rev %0,%1" \ + : "=r"(ret) \ + : "r"(*((const SHA_LONG64 *)(&(x))))); ret; }) +# endif # endif # elif defined(_MSC_VER) # if defined(_WIN64) /* applies to both IA-64 and AMD64 */ diff --git a/drivers/builtin_openssl2/crypto/sha/shatest.c b/drivers/builtin_openssl2/crypto/sha/shatest.c deleted file mode 100644 index 105060a7ec..0000000000 --- a/drivers/builtin_openssl2/crypto/sha/shatest.c +++ /dev/null @@ -1,174 +0,0 @@ -/* crypto/sha/shatest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include - -#include "../e_os.h" - -#if defined(OPENSSL_NO_SHA) || defined(OPENSSL_NO_SHA0) -int main(int argc, char *argv[]) -{ - printf("No SHA0 support\n"); - return (0); -} -#else -# include -# include - -# ifdef CHARSET_EBCDIC -# include -# endif - -# define SHA_0 /* FIPS 180 */ -# undef SHA_1 /* FIPS 180-1 */ - -static char *test[] = { - "abc", - "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", - NULL, -}; - -# ifdef SHA_0 -static char *ret[] = { - "0164b8a914cd2a5e74c4f7ff082c4d97f1edf880", - "d2516ee1acfa5baf33dfc1c471e438449ef134c8", -}; - -static char *bigret = "3232affa48628a26653b5aaa44541fd90d690603"; -# endif -# ifdef SHA_1 -static char *ret[] = { - "a9993e364706816aba3e25717850c26c9cd0d89d", - "84983e441c3bd26ebaae4aa1f95129e5e54670f1", -}; - -static char *bigret = "34aa973cd4c4daa4f61eeb2bdbad27316534016f"; -# endif - -static char *pt(unsigned char *md); -int main(int argc, char *argv[]) -{ - int i, err = 0; - char **P, **R; - static unsigned char buf[1000]; - char *p, *r; - EVP_MD_CTX c; - unsigned char md[SHA_DIGEST_LENGTH]; - -# ifdef CHARSET_EBCDIC - ebcdic2ascii(test[0], test[0], strlen(test[0])); - ebcdic2ascii(test[1], test[1], strlen(test[1])); -# endif - - EVP_MD_CTX_init(&c); - P = test; - R = ret; - i = 1; - while (*P != NULL) { - EVP_Digest(*P, strlen(*P), md, NULL, EVP_sha(), NULL); - p = pt(md); - if (strcmp(p, *R) != 0) { - printf("error calculating SHA on '%s'\n", *P); - printf("got %s instead of %s\n", p, *R); - err++; - } else - printf("test %d ok\n", i); - i++; - R++; - P++; - } - - memset(buf, 'a', 1000); -# ifdef CHARSET_EBCDIC - ebcdic2ascii(buf, buf, 1000); -# endif /* CHARSET_EBCDIC */ - EVP_DigestInit_ex(&c, EVP_sha(), NULL); - for (i = 0; i < 1000; i++) - EVP_DigestUpdate(&c, buf, 1000); - EVP_DigestFinal_ex(&c, md, NULL); - p = pt(md); - - r = bigret; - if (strcmp(p, r) != 0) { - printf("error calculating SHA on '%s'\n", p); - printf("got %s instead of %s\n", p, r); - err++; - } else - printf("test 3 ok\n"); - -# ifdef OPENSSL_SYS_NETWARE - if (err) - printf("ERROR: %d\n", err); -# endif - EVP_MD_CTX_cleanup(&c); - EXIT(err); - return (0); -} - -static char *pt(unsigned char *md) -{ - int i; - static char buf[80]; - - for (i = 0; i < SHA_DIGEST_LENGTH; i++) - sprintf(&(buf[i * 2]), "%02x", md[i]); - return (buf); -} -#endif diff --git a/drivers/builtin_openssl2/crypto/sparc_arch.h b/drivers/builtin_openssl2/crypto/sparc_arch.h new file mode 100644 index 0000000000..e30d322a4a --- /dev/null +++ b/drivers/builtin_openssl2/crypto/sparc_arch.h @@ -0,0 +1,101 @@ +#ifndef __SPARC_ARCH_H__ +# define __SPARC_ARCH_H__ + +# define SPARCV9_TICK_PRIVILEGED (1<<0) +# define SPARCV9_PREFER_FPU (1<<1) +# define SPARCV9_VIS1 (1<<2) +# define SPARCV9_VIS2 (1<<3)/* reserved */ +# define SPARCV9_FMADD (1<<4)/* reserved for SPARC64 V */ +# define SPARCV9_BLK (1<<5)/* VIS1 block copy */ +# define SPARCV9_VIS3 (1<<6) +# define SPARCV9_RANDOM (1<<7) +# define SPARCV9_64BIT_STACK (1<<8) + +/* + * OPENSSL_sparcv9cap_P[1] is copy of Compatibility Feature Register, + * %asr26, SPARC-T4 and later. There is no SPARCV9_CFR bit in + * OPENSSL_sparcv9cap_P[0], as %cfr copy is sufficient... + */ +# define CFR_AES 0x00000001/* Supports AES opcodes */ +# define CFR_DES 0x00000002/* Supports DES opcodes */ +# define CFR_KASUMI 0x00000004/* Supports KASUMI opcodes */ +# define CFR_CAMELLIA 0x00000008/* Supports CAMELLIA opcodes */ +# define CFR_MD5 0x00000010/* Supports MD5 opcodes */ +# define CFR_SHA1 0x00000020/* Supports SHA1 opcodes */ +# define CFR_SHA256 0x00000040/* Supports SHA256 opcodes */ +# define CFR_SHA512 0x00000080/* Supports SHA512 opcodes */ +# define CFR_MPMUL 0x00000100/* Supports MPMUL opcodes */ +# define CFR_MONTMUL 0x00000200/* Supports MONTMUL opcodes */ +# define CFR_MONTSQR 0x00000400/* Supports MONTSQR opcodes */ +# define CFR_CRC32C 0x00000800/* Supports CRC32C opcodes */ + +# if defined(OPENSSL_PIC) && !defined(__PIC__) +# define __PIC__ +# endif + +# if defined(__SUNPRO_C) && defined(__sparcv9) && !defined(__arch64__) +# define __arch64__ +# endif + +# define SPARC_PIC_THUNK(reg) \ + .align 32; \ +.Lpic_thunk: \ + jmp %o7 + 8; \ + add %o7, reg, reg; + +# define SPARC_PIC_THUNK_CALL(reg) \ + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \ + call .Lpic_thunk; \ + or reg, %lo(_GLOBAL_OFFSET_TABLE_+4), reg; + +# if 1 +# define SPARC_SETUP_GOT_REG(reg) SPARC_PIC_THUNK_CALL(reg) +# else +# define SPARC_SETUP_GOT_REG(reg) \ + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \ + call .+8; \ + or reg,%lo(_GLOBAL_OFFSET_TABLE_+4), reg; \ + add %o7, reg, reg +# endif + +# if defined(__arch64__) + +# define SPARC_LOAD_ADDRESS(SYM, reg) \ + setx SYM, %o7, reg; +# define LDPTR ldx +# define SIZE_T_CC %xcc +# define STACK_FRAME 192 +# define STACK_BIAS 2047 +# define STACK_7thARG (STACK_BIAS+176) + +# else + +# define SPARC_LOAD_ADDRESS(SYM, reg) \ + set SYM, reg; +# define LDPTR ld +# define SIZE_T_CC %icc +# define STACK_FRAME 112 +# define STACK_BIAS 0 +# define STACK_7thARG 92 +# define SPARC_LOAD_ADDRESS_LEAF(SYM,reg,tmp) SPARC_LOAD_ADDRESS(SYM,reg) + +# endif + +# ifdef __PIC__ +# undef SPARC_LOAD_ADDRESS +# undef SPARC_LOAD_ADDRESS_LEAF +# define SPARC_LOAD_ADDRESS(SYM, reg) \ + SPARC_SETUP_GOT_REG(reg); \ + sethi %hi(SYM), %o7; \ + or %o7, %lo(SYM), %o7; \ + LDPTR [reg + %o7], reg; +# endif + +# ifndef SPARC_LOAD_ADDRESS_LEAF +# define SPARC_LOAD_ADDRESS_LEAF(SYM, reg, tmp) \ + mov %o7, tmp; \ + SPARC_LOAD_ADDRESS(SYM, reg) \ + mov tmp, %o7; +# endif + +#endif /* __SPARC_ARCH_H__ */ diff --git a/drivers/builtin_openssl2/crypto/sparccpuid.S b/drivers/builtin_openssl2/crypto/sparccpuid.S deleted file mode 100644 index c63d5da46f..0000000000 --- a/drivers/builtin_openssl2/crypto/sparccpuid.S +++ /dev/null @@ -1,402 +0,0 @@ -#if defined(__SUNPRO_C) && defined(__sparcv9) -# define ABI64 /* They've said -xarch=v9 at command line */ -#elif defined(__GNUC__) && defined(__arch64__) -# define ABI64 /* They've said -m64 at command line */ -#endif - -#ifdef ABI64 - .register %g2,#scratch - .register %g3,#scratch -# define FRAME -192 -# define BIAS 2047 -#else -# define FRAME -96 -# define BIAS 0 -#endif - -.text -.align 32 -.global OPENSSL_wipe_cpu -.type OPENSSL_wipe_cpu,#function -! Keep in mind that this does not excuse us from wiping the stack! -! This routine wipes registers, but not the backing store [which -! resides on the stack, toward lower addresses]. To facilitate for -! stack wiping I return pointer to the top of stack of the *caller*. -OPENSSL_wipe_cpu: - save %sp,FRAME,%sp - nop -#ifdef __sun -#include - ta ST_CLEAN_WINDOWS -#else - call .walk.reg.wins -#endif - nop - call .PIC.zero.up - mov .zero-(.-4),%o0 - ld [%o0],%f0 - ld [%o0],%f1 - - subcc %g0,1,%o0 - ! Following is V9 "rd %ccr,%o0" instruction. However! V8 - ! specification says that it ("rd %asr2,%o0" in V8 terms) does - ! not cause illegal_instruction trap. It therefore can be used - ! to determine if the CPU the code is executing on is V8- or - ! V9-compliant, as V9 returns a distinct value of 0x99, - ! "negative" and "borrow" bits set in both %icc and %xcc. - .word 0x91408000 !rd %ccr,%o0 - cmp %o0,0x99 - bne .v8 - nop - ! Even though we do not use %fp register bank, - ! we wipe it as memcpy might have used it... - .word 0xbfa00040 !fmovd %f0,%f62 - .word 0xbba00040 !... - .word 0xb7a00040 - .word 0xb3a00040 - .word 0xafa00040 - .word 0xaba00040 - .word 0xa7a00040 - .word 0xa3a00040 - .word 0x9fa00040 - .word 0x9ba00040 - .word 0x97a00040 - .word 0x93a00040 - .word 0x8fa00040 - .word 0x8ba00040 - .word 0x87a00040 - .word 0x83a00040 !fmovd %f0,%f32 -.v8: fmovs %f1,%f31 - clr %o0 - fmovs %f0,%f30 - clr %o1 - fmovs %f1,%f29 - clr %o2 - fmovs %f0,%f28 - clr %o3 - fmovs %f1,%f27 - clr %o4 - fmovs %f0,%f26 - clr %o5 - fmovs %f1,%f25 - clr %o7 - fmovs %f0,%f24 - clr %l0 - fmovs %f1,%f23 - clr %l1 - fmovs %f0,%f22 - clr %l2 - fmovs %f1,%f21 - clr %l3 - fmovs %f0,%f20 - clr %l4 - fmovs %f1,%f19 - clr %l5 - fmovs %f0,%f18 - clr %l6 - fmovs %f1,%f17 - clr %l7 - fmovs %f0,%f16 - clr %i0 - fmovs %f1,%f15 - clr %i1 - fmovs %f0,%f14 - clr %i2 - fmovs %f1,%f13 - clr %i3 - fmovs %f0,%f12 - clr %i4 - fmovs %f1,%f11 - clr %i5 - fmovs %f0,%f10 - clr %g1 - fmovs %f1,%f9 - clr %g2 - fmovs %f0,%f8 - clr %g3 - fmovs %f1,%f7 - clr %g4 - fmovs %f0,%f6 - clr %g5 - fmovs %f1,%f5 - fmovs %f0,%f4 - fmovs %f1,%f3 - fmovs %f0,%f2 - - add %fp,BIAS,%i0 ! return pointer to caller´s top of stack - - ret - restore - -.zero: .long 0x0,0x0 -.PIC.zero.up: - retl - add %o0,%o7,%o0 -#ifdef DEBUG -.global walk_reg_wins -.type walk_reg_wins,#function -walk_reg_wins: -#endif -.walk.reg.wins: - save %sp,FRAME,%sp - cmp %i7,%o7 - be 2f - clr %o0 - cmp %o7,0 ! compiler never cleans %o7... - be 1f ! could have been a leaf function... - clr %o1 - call .walk.reg.wins - nop -1: clr %o2 - clr %o3 - clr %o4 - clr %o5 - clr %o7 - clr %l0 - clr %l1 - clr %l2 - clr %l3 - clr %l4 - clr %l5 - clr %l6 - clr %l7 - add %o0,1,%i0 ! used for debugging -2: ret - restore -.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu - -.global OPENSSL_atomic_add -.type OPENSSL_atomic_add,#function -.align 32 -OPENSSL_atomic_add: -#ifndef ABI64 - subcc %g0,1,%o2 - .word 0x95408000 !rd %ccr,%o2, see comment above - cmp %o2,0x99 - be .v9 - nop - save %sp,FRAME,%sp - ba .enter - nop -#ifdef __sun -! Note that you do not have to link with libthread to call thr_yield, -! as libc provides a stub, which is overloaded the moment you link -! with *either* libpthread or libthread... -#define YIELD_CPU thr_yield -#else -! applies at least to Linux and FreeBSD... Feedback expected... -#define YIELD_CPU sched_yield -#endif -.spin: call YIELD_CPU - nop -.enter: ld [%i0],%i2 - cmp %i2,-4096 - be .spin - mov -1,%i2 - swap [%i0],%i2 - cmp %i2,-1 - be .spin - add %i2,%i1,%i2 - stbar - st %i2,[%i0] - sra %i2,%g0,%i0 - ret - restore -.v9: -#endif - ld [%o0],%o2 -1: add %o1,%o2,%o3 - .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3 - cmp %o2,%o3 - bne 1b - mov %o3,%o2 ! cas is always fetching to dest. register - add %o1,%o2,%o0 ! OpenSSL expects the new value - retl - sra %o0,%g0,%o0 ! we return signed int, remember? -.size OPENSSL_atomic_add,.-OPENSSL_atomic_add - -.global _sparcv9_rdtick -.align 32 -_sparcv9_rdtick: - subcc %g0,1,%o0 - .word 0x91408000 !rd %ccr,%o0 - cmp %o0,0x99 - bne .notick - xor %o0,%o0,%o0 - .word 0x91410000 !rd %tick,%o0 - retl - .word 0x93323020 !srlx %o0,32,%o1 -.notick: - retl - xor %o1,%o1,%o1 -.type _sparcv9_rdtick,#function -.size _sparcv9_rdtick,.-_sparcv9_rdtick - -.global _sparcv9_vis1_probe -.align 8 -_sparcv9_vis1_probe: - add %sp,BIAS+2,%o1 - .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 - retl - .word 0x81b00d80 !fxor %f0,%f0,%f0 -.type _sparcv9_vis1_probe,#function -.size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe - -! Probe and instrument VIS1 instruction. Output is number of cycles it -! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit -! is slow (documented to be 6 cycles on T2) and the core is in-order -! single-issue, it should be possible to distinguish Tx reliably... -! Observed return values are: -! -! UltraSPARC IIe 7 -! UltraSPARC III 7 -! UltraSPARC T1 24 -! -! Numbers for T2 and SPARC64 V-VII are more than welcomed. -! -! It would be possible to detect specifically US-T1 by instrumenting -! fmul8ulx16, which is emulated on T1 and as such accounts for quite -! a lot of %tick-s, couple of thousand on Linux... -.global _sparcv9_vis1_instrument -.align 8 -_sparcv9_vis1_instrument: - .word 0x91410000 !rd %tick,%o0 - .word 0x81b00d80 !fxor %f0,%f0,%f0 - .word 0x85b08d82 !fxor %f2,%f2,%f2 - .word 0x93410000 !rd %tick,%o1 - .word 0x81b00d80 !fxor %f0,%f0,%f0 - .word 0x85b08d82 !fxor %f2,%f2,%f2 - .word 0x95410000 !rd %tick,%o2 - .word 0x81b00d80 !fxor %f0,%f0,%f0 - .word 0x85b08d82 !fxor %f2,%f2,%f2 - .word 0x97410000 !rd %tick,%o3 - .word 0x81b00d80 !fxor %f0,%f0,%f0 - .word 0x85b08d82 !fxor %f2,%f2,%f2 - .word 0x99410000 !rd %tick,%o4 - - ! calculate intervals - sub %o1,%o0,%o0 - sub %o2,%o1,%o1 - sub %o3,%o2,%o2 - sub %o4,%o3,%o3 - - ! find minumum value - cmp %o0,%o1 - .word 0x38680002 !bgu,a %xcc,.+8 - mov %o1,%o0 - cmp %o0,%o2 - .word 0x38680002 !bgu,a %xcc,.+8 - mov %o2,%o0 - cmp %o0,%o3 - .word 0x38680002 !bgu,a %xcc,.+8 - mov %o3,%o0 - - retl - nop -.type _sparcv9_vis1_instrument,#function -.size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument - -.global _sparcv9_vis2_probe -.align 8 -_sparcv9_vis2_probe: - retl - .word 0x81b00980 !bshuffle %f0,%f0,%f0 -.type _sparcv9_vis2_probe,#function -.size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe - -.global _sparcv9_fmadd_probe -.align 8 -_sparcv9_fmadd_probe: - .word 0x81b00d80 !fxor %f0,%f0,%f0 - .word 0x85b08d82 !fxor %f2,%f2,%f2 - retl - .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0 -.type _sparcv9_fmadd_probe,#function -.size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe - -.global OPENSSL_cleanse -.align 32 -OPENSSL_cleanse: - cmp %o1,14 - nop -#ifdef ABI64 - bgu %xcc,.Lot -#else - bgu .Lot -#endif - cmp %o1,0 - bne .Little - nop - retl - nop - -.Little: - stb %g0,[%o0] - subcc %o1,1,%o1 - bnz .Little - add %o0,1,%o0 - retl - nop -.align 32 -.Lot: -#ifndef ABI64 - subcc %g0,1,%g1 - ! see above for explanation - .word 0x83408000 !rd %ccr,%g1 - cmp %g1,0x99 - bne .v8lot - nop -#endif - -.v9lot: andcc %o0,7,%g0 - bz .v9aligned - nop - stb %g0,[%o0] - sub %o1,1,%o1 - ba .v9lot - add %o0,1,%o0 -.align 16,0x01000000 -.v9aligned: - .word 0xc0720000 !stx %g0,[%o0] - sub %o1,8,%o1 - andcc %o1,-8,%g0 -#ifdef ABI64 - .word 0x126ffffd !bnz %xcc,.v9aligned -#else - .word 0x124ffffd !bnz %icc,.v9aligned -#endif - add %o0,8,%o0 - - cmp %o1,0 - bne .Little - nop - retl - nop -#ifndef ABI64 -.v8lot: andcc %o0,3,%g0 - bz .v8aligned - nop - stb %g0,[%o0] - sub %o1,1,%o1 - ba .v8lot - add %o0,1,%o0 - nop -.v8aligned: - st %g0,[%o0] - sub %o1,4,%o1 - andcc %o1,-4,%g0 - bnz .v8aligned - add %o0,4,%o0 - - cmp %o1,0 - bne .Little - nop - retl - nop -#endif -.type OPENSSL_cleanse,#function -.size OPENSSL_cleanse,.-OPENSSL_cleanse - -.section ".init",#alloc,#execinstr - call OPENSSL_cpuid_setup - nop diff --git a/drivers/builtin_openssl2/crypto/sparcv9cap.c b/drivers/builtin_openssl2/crypto/sparcv9cap.c index d9f986f6b9..a36e461792 100644 --- a/drivers/builtin_openssl2/crypto/sparcv9cap.c +++ b/drivers/builtin_openssl2/crypto/sparcv9cap.c @@ -4,30 +4,68 @@ #include #include #include +#include #include -#define SPARCV9_TICK_PRIVILEGED (1<<0) -#define SPARCV9_PREFER_FPU (1<<1) -#define SPARCV9_VIS1 (1<<2) -#define SPARCV9_VIS2 (1<<3) /* reserved */ -#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ +#include "sparc_arch.h" -static int OPENSSL_sparcv9cap_P = SPARCV9_TICK_PRIVILEGED; +#if defined(__GNUC__) && defined(__linux) +__attribute__ ((visibility("hidden"))) +#endif +unsigned int OPENSSL_sparcv9cap_P[2] = { SPARCV9_TICK_PRIVILEGED, 0 }; int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) { + int bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, int num); int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); - if (num >= 8 && !(num & 1) && - (OPENSSL_sparcv9cap_P & (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) == - (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) - return bn_mul_mont_fpu(rp, ap, bp, np, n0, num); - else - return bn_mul_mont_int(rp, ap, bp, np, n0, num); + if (!(num & 1) && num >= 6) { + if ((num & 15) == 0 && num <= 64 && + (OPENSSL_sparcv9cap_P[1] & (CFR_MONTMUL | CFR_MONTSQR)) == + (CFR_MONTMUL | CFR_MONTSQR)) { + typedef int (*bn_mul_mont_f) (BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *bp, + const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *bp, const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *bp, const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *bp, const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *bp, const BN_ULONG *np, + const BN_ULONG *n0); + static const bn_mul_mont_f funcs[4] = { + bn_mul_mont_t4_8, bn_mul_mont_t4_16, + bn_mul_mont_t4_24, bn_mul_mont_t4_32 + }; + bn_mul_mont_f worker = funcs[num / 16 - 1]; + + if ((*worker) (rp, ap, bp, np, n0)) + return 1; + /* retry once and fall back */ + if ((*worker) (rp, ap, bp, np, n0)) + return 1; + return bn_mul_mont_vis3(rp, ap, bp, np, n0, num); + } + if ((OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3)) + return bn_mul_mont_vis3(rp, ap, bp, np, n0, num); + else if (num >= 8 && + (OPENSSL_sparcv9cap_P[0] & + (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) == + (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) + return bn_mul_mont_fpu(rp, ap, bp, np, n0, num); + } + return bn_mul_mont_int(rp, ap, bp, np, n0, num); } unsigned long _sparcv9_rdtick(void); @@ -35,10 +73,15 @@ void _sparcv9_vis1_probe(void); unsigned long _sparcv9_vis1_instrument(void); void _sparcv9_vis2_probe(void); void _sparcv9_fmadd_probe(void); +unsigned long _sparcv9_rdcfr(void); +void _sparcv9_vis3_probe(void); +unsigned long _sparcv9_random(void); +size_t _sparcv9_vis1_instrument_bus(unsigned int *, size_t); +size_t _sparcv9_vis1_instrument_bus2(unsigned int *, size_t, size_t); unsigned long OPENSSL_rdtsc(void) { - if (OPENSSL_sparcv9cap_P & SPARCV9_TICK_PRIVILEGED) + if (OPENSSL_sparcv9cap_P[0] & SPARCV9_TICK_PRIVILEGED) #if defined(__sun) && defined(__SVR4) return gethrtime(); #else @@ -48,6 +91,24 @@ unsigned long OPENSSL_rdtsc(void) return _sparcv9_rdtick(); } +size_t OPENSSL_instrument_bus(unsigned int *out, size_t cnt) +{ + if ((OPENSSL_sparcv9cap_P[0] & (SPARCV9_TICK_PRIVILEGED | SPARCV9_BLK)) == + SPARCV9_BLK) + return _sparcv9_vis1_instrument_bus(out, cnt); + else + return 0; +} + +size_t OPENSSL_instrument_bus2(unsigned int *out, size_t cnt, size_t max) +{ + if ((OPENSSL_sparcv9cap_P[0] & (SPARCV9_TICK_PRIVILEGED | SPARCV9_BLK)) == + SPARCV9_BLK) + return _sparcv9_vis1_instrument_bus2(out, cnt, max); + else + return 0; +} + #if 0 && defined(__sun) && defined(__SVR4) /* * This code path is disabled, because of incompatibility of libdevinfo.so.1 @@ -74,17 +135,17 @@ static int walk_nodename(di_node_t node, di_node_name_t di_node_name) if (!strcmp(name, "SUNW,UltraSPARC") || /* covers II,III,IV */ !strncmp(name, "SUNW,UltraSPARC-I", 17)) { - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU | SPARCV9_VIS1; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU | SPARCV9_VIS1; /* %tick is privileged only on UltraSPARC-I/II, but not IIe */ if (name[14] != '\0' && name[17] != '\0' && name[18] != '\0') - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return DI_WALK_TERMINATE; } /* This is expected to catch remaining UltraSPARCs, such as T1 */ else if (!strncmp(name, "SUNW,UltraSPARC", 15)) { - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return DI_WALK_TERMINATE; } @@ -103,22 +164,22 @@ void OPENSSL_cpuid_setup(void) trigger = 1; if ((e = getenv("OPENSSL_sparcv9cap"))) { - OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0); + OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0); return; } if (sysinfo(SI_MACHINE, si, sizeof(si)) > 0) { if (strcmp(si, "sun4v")) /* FPU is preferred for all CPUs, but US-T1/2 */ - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU; } if (sysinfo(SI_ISALIST, si, sizeof(si)) > 0) { if (strstr(si, "+vis")) - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1 | SPARCV9_BLK; if (strstr(si, "+vis2")) { - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return; } } @@ -176,6 +237,17 @@ static void common_handler(int sig) siglongjmp(common_jmp, sig); } +#if defined(__sun) && defined(__SVR4) +# if defined(__GNUC__) && __GNUC__>=2 +extern unsigned int getisax(unsigned int vec[], unsigned int sz) __attribute__ ((weak)); +# elif defined(__SUNPRO_C) +#pragma weak getisax +extern unsigned int getisax(unsigned int vec[], unsigned int sz); +# else +static unsigned int (*getisax) (unsigned int vec[], unsigned int sz) = NULL; +# endif +#endif + void OPENSSL_cpuid_setup(void) { char *e; @@ -188,12 +260,50 @@ void OPENSSL_cpuid_setup(void) trigger = 1; if ((e = getenv("OPENSSL_sparcv9cap"))) { - OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0); + OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0); + if ((e = strchr(e, ':'))) + OPENSSL_sparcv9cap_P[1] = strtoul(e + 1, NULL, 0); + return; + } + +#if defined(__sun) && defined(__SVR4) + if (getisax != NULL) { + unsigned int vec[1]; + + if (getisax (vec,1)) { + if (vec[0]&0x0020) OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1; + if (vec[0]&0x0040) OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; + if (vec[0]&0x0080) OPENSSL_sparcv9cap_P[0] |= SPARCV9_BLK; + if (vec[0]&0x0100) OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD; + if (vec[0]&0x0400) OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3; + + /* reconstruct %cfr copy */ + OPENSSL_sparcv9cap_P[1] = (vec[0]>>17)&0x3ff; + OPENSSL_sparcv9cap_P[1] |= (OPENSSL_sparcv9cap_P[1]&CFR_MONTMUL)<<1; + if (vec[0]&0x20000000) OPENSSL_sparcv9cap_P[1] |= CFR_CRC32C; + + /* Some heuristics */ + /* all known VIS2-capable CPUs have unprivileged tick counter */ + if (OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS2) + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; + + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU; + + /* detect UltraSPARC-Tx, see sparccpud.S for details... */ + if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS1) && + _sparcv9_vis1_instrument() >= 12) + OPENSSL_sparcv9cap_P[0] &= ~(SPARCV9_VIS1 | SPARCV9_PREFER_FPU); + } + + if (sizeof(size_t) == 8) + OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; + return; } +#endif /* Initial value, fits UltraSPARC-I&II... */ - OPENSSL_sparcv9cap_P = SPARCV9_PREFER_FPU | SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] = SPARCV9_PREFER_FPU | SPARCV9_TICK_PRIVILEGED; sigfillset(&all_masked); sigdelset(&all_masked, SIGILL); @@ -216,30 +326,68 @@ void OPENSSL_cpuid_setup(void) if (sigsetjmp(common_jmp, 1) == 0) { _sparcv9_rdtick(); - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; } if (sigsetjmp(common_jmp, 1) == 0) { _sparcv9_vis1_probe(); - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1 | SPARCV9_BLK; /* detect UltraSPARC-Tx, see sparccpud.S for details... */ if (_sparcv9_vis1_instrument() >= 12) - OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1 | SPARCV9_PREFER_FPU); + OPENSSL_sparcv9cap_P[0] &= ~(SPARCV9_VIS1 | SPARCV9_PREFER_FPU); else { _sparcv9_vis2_probe(); - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; } } if (sigsetjmp(common_jmp, 1) == 0) { _sparcv9_fmadd_probe(); - OPENSSL_sparcv9cap_P |= SPARCV9_FMADD; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD; + } + + /* + * VIS3 flag is tested independently from VIS1, unlike VIS2 that is, + * because VIS3 defines even integer instructions. + */ + if (sigsetjmp(common_jmp, 1) == 0) { + _sparcv9_vis3_probe(); + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3; + } +# if 0 /* was planned at some point but never + * implemented in hardware */ + if (sigsetjmp(common_jmp, 1) == 0) { + (void)_sparcv9_random(); + OPENSSL_sparcv9cap_P[0] |= SPARCV9_RANDOM; + } +# endif + + /* + * In wait for better solution _sparcv9_rdcfr is masked by + * VIS3 flag, because it goes to uninterruptable endless + * loop on UltraSPARC II running Solaris. Things might be + * different on Linux... + */ + if ((OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) && + sigsetjmp(common_jmp, 1) == 0) { + OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr(); } sigaction(SIGBUS, &bus_oact, NULL); sigaction(SIGILL, &ill_oact, NULL); sigprocmask(SIG_SETMASK, &oset, NULL); + + if (sizeof(size_t) == 8) + OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; +# ifdef __linux + else { + int ret = syscall(340); + + if (ret >= 0 && ret & 1) + OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; + } +# endif } #endif diff --git a/drivers/builtin_openssl2/crypto/srp/srptest.c b/drivers/builtin_openssl2/crypto/srp/srptest.c deleted file mode 100644 index 451c70e40f..0000000000 --- a/drivers/builtin_openssl2/crypto/srp/srptest.c +++ /dev/null @@ -1,154 +0,0 @@ -#include -#ifdef OPENSSL_NO_SRP - -# include - -int main(int argc, char *argv[]) -{ - printf("No SRP support\n"); - return (0); -} - -#else - -# include -# include -# include - -static void showbn(const char *name, const BIGNUM *bn) -{ - fputs(name, stdout); - fputs(" = ", stdout); - BN_print_fp(stdout, bn); - putc('\n', stdout); -} - -# define RANDOM_SIZE 32 /* use 256 bits on each side */ - -static int run_srp(const char *username, const char *client_pass, - const char *server_pass) -{ - int ret = -1; - BIGNUM *s = NULL; - BIGNUM *v = NULL; - BIGNUM *a = NULL; - BIGNUM *b = NULL; - BIGNUM *u = NULL; - BIGNUM *x = NULL; - BIGNUM *Apub = NULL; - BIGNUM *Bpub = NULL; - BIGNUM *Kclient = NULL; - BIGNUM *Kserver = NULL; - unsigned char rand_tmp[RANDOM_SIZE]; - /* use builtin 1024-bit params */ - SRP_gN *GN = SRP_get_default_gN("1024"); - - if (GN == NULL) { - fprintf(stderr, "Failed to get SRP parameters\n"); - return -1; - } - /* Set up server's password entry */ - if (!SRP_create_verifier_BN(username, server_pass, &s, &v, GN->N, GN->g)) { - fprintf(stderr, "Failed to create SRP verifier\n"); - return -1; - } - - showbn("N", GN->N); - showbn("g", GN->g); - showbn("Salt", s); - showbn("Verifier", v); - - /* Server random */ - RAND_pseudo_bytes(rand_tmp, sizeof(rand_tmp)); - b = BN_bin2bn(rand_tmp, sizeof(rand_tmp), NULL); - /* TODO - check b != 0 */ - showbn("b", b); - - /* Server's first message */ - Bpub = SRP_Calc_B(b, GN->N, GN->g, v); - showbn("B", Bpub); - - if (!SRP_Verify_B_mod_N(Bpub, GN->N)) { - fprintf(stderr, "Invalid B\n"); - return -1; - } - - /* Client random */ - RAND_pseudo_bytes(rand_tmp, sizeof(rand_tmp)); - a = BN_bin2bn(rand_tmp, sizeof(rand_tmp), NULL); - /* TODO - check a != 0 */ - showbn("a", a); - - /* Client's response */ - Apub = SRP_Calc_A(a, GN->N, GN->g); - showbn("A", Apub); - - if (!SRP_Verify_A_mod_N(Apub, GN->N)) { - fprintf(stderr, "Invalid A\n"); - return -1; - } - - /* Both sides calculate u */ - u = SRP_Calc_u(Apub, Bpub, GN->N); - - /* Client's key */ - x = SRP_Calc_x(s, username, client_pass); - Kclient = SRP_Calc_client_key(GN->N, Bpub, GN->g, x, a, u); - showbn("Client's key", Kclient); - - /* Server's key */ - Kserver = SRP_Calc_server_key(Apub, v, u, b, GN->N); - showbn("Server's key", Kserver); - - if (BN_cmp(Kclient, Kserver) == 0) { - ret = 0; - } else { - fprintf(stderr, "Keys mismatch\n"); - ret = 1; - } - - BN_clear_free(Kclient); - BN_clear_free(Kserver); - BN_clear_free(x); - BN_free(u); - BN_free(Apub); - BN_clear_free(a); - BN_free(Bpub); - BN_clear_free(b); - BN_free(s); - BN_clear_free(v); - - return ret; -} - -int main(int argc, char **argv) -{ - BIO *bio_err; - bio_err = BIO_new_fp(stderr, BIO_NOCLOSE); - - CRYPTO_malloc_debug_init(); - CRYPTO_dbg_set_options(V_CRYPTO_MDEBUG_ALL); - CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON); - - ERR_load_crypto_strings(); - - /* "Negative" test, expect a mismatch */ - if (run_srp("alice", "password1", "password2") == 0) { - fprintf(stderr, "Mismatched SRP run failed\n"); - return 1; - } - - /* "Positive" test, should pass */ - if (run_srp("alice", "password", "password") != 0) { - fprintf(stderr, "Plain SRP run failed\n"); - return 1; - } - - CRYPTO_cleanup_all_ex_data(); - ERR_remove_thread_state(NULL); - ERR_free_strings(); - CRYPTO_mem_leaks(bio_err); - - return 0; -} -#endif diff --git a/drivers/builtin_openssl2/crypto/stack/stack.c b/drivers/builtin_openssl2/crypto/stack/stack.c index 331f907190..fa50083e22 100644 --- a/drivers/builtin_openssl2/crypto/stack/stack.c +++ b/drivers/builtin_openssl2/crypto/stack/stack.c @@ -115,6 +115,40 @@ _STACK *sk_dup(_STACK *sk) return (NULL); } +_STACK *sk_deep_copy(_STACK *sk, void *(*copy_func) (void *), + void (*free_func) (void *)) +{ + _STACK *ret; + int i; + + if ((ret = OPENSSL_malloc(sizeof(_STACK))) == NULL) + return ret; + ret->comp = sk->comp; + ret->sorted = sk->sorted; + ret->num = sk->num; + ret->num_alloc = sk->num > MIN_NODES ? sk->num : MIN_NODES; + ret->data = OPENSSL_malloc(sizeof(char *) * ret->num_alloc); + if (ret->data == NULL) { + OPENSSL_free(ret); + return NULL; + } + for (i = 0; i < ret->num_alloc; i++) + ret->data[i] = NULL; + + for (i = 0; i < ret->num; ++i) { + if (sk->data[i] == NULL) + continue; + if ((ret->data[i] = copy_func(sk->data[i])) == NULL) { + while (--i >= 0) + if (ret->data[i] != NULL) + free_func(ret->data[i]); + sk_free(ret); + return NULL; + } + } + return ret; +} + _STACK *sk_new_null(void) { return sk_new((int (*)(const void *, const void *))0); @@ -326,7 +360,7 @@ void *sk_set(_STACK *st, int i, void *value) void sk_sort(_STACK *st) { - if (st && !st->sorted) { + if (st && !st->sorted && st->comp != NULL) { int (*comp_func) (const void *, const void *); /* diff --git a/drivers/builtin_openssl2/crypto/threads/mttest.c b/drivers/builtin_openssl2/crypto/threads/mttest.c deleted file mode 100644 index dbff4a69f3..0000000000 --- a/drivers/builtin_openssl2/crypto/threads/mttest.c +++ /dev/null @@ -1,1211 +0,0 @@ -/* crypto/threads/mttest.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include -#include -#ifdef LINUX -# include -#endif -#ifdef OPENSSL_SYS_WIN32 -# include -#endif -#ifdef SOLARIS -# include -# include -#endif -#ifdef IRIX -# include -# include -#endif -#ifdef PTHREADS -# include -#endif -#ifdef OPENSSL_SYS_NETWARE -# if !defined __int64 -# define __int64 long long -# endif -# include -#endif -#include -#include -#include -#include -#include -#include -#include - -#ifdef OPENSSL_SYS_NETWARE -# define TEST_SERVER_CERT "/openssl/apps/server.pem" -# define TEST_CLIENT_CERT "/openssl/apps/client.pem" -#else -# define TEST_SERVER_CERT "../../apps/server.pem" -# define TEST_CLIENT_CERT "../../apps/client.pem" -#endif - -#define MAX_THREAD_NUMBER 100 - -int verify_callback(int ok, X509_STORE_CTX *xs); -void thread_setup(void); -void thread_cleanup(void); -void do_threads(SSL_CTX *s_ctx, SSL_CTX *c_ctx); - -void irix_locking_callback(int mode, int type, const char *file, int line); -void solaris_locking_callback(int mode, int type, const char *file, int line); -void win32_locking_callback(int mode, int type, const char *file, int line); -void pthreads_locking_callback(int mode, int type, const char *file, int line); -void netware_locking_callback(int mode, int type, const char *file, int line); -void beos_locking_callback(int mode, int type, const char *file, int line); - -void irix_thread_id(CRYPTO_THREADID *tid); -void solaris_thread_id(CRYPTO_THREADID *tid); -void pthreads_thread_id(CRYPTO_THREADID *tid); -void netware_thread_id(CRYPTO_THREADID *tid); -void beos_thread_id(CRYPTO_THREADID *tid); - -#if defined(OPENSSL_SYS_NETWARE) -static MPKMutex *lock_cs; -static MPKSema ThreadSem; -static long *lock_count; -#endif - -BIO *bio_err = NULL; -BIO *bio_stdout = NULL; - -static char *cipher = NULL; -int verbose = 0; -#ifdef FIONBIO -static int s_nbio = 0; -#endif - -int thread_number = 10; -int number_of_loops = 10; -int reconnect = 0; -int cache_stats = 0; - -static const char rnd_seed[] = - "string to make the random number generator think it has entropy"; - -int doit(char *ctx[4]); -static void print_stats(BIO *bio, SSL_CTX *ctx) -{ - BIO_printf(bio, "%4ld items in the session cache\n", - SSL_CTX_sess_number(ctx)); - BIO_printf(bio, "%4d client connects (SSL_connect())\n", - SSL_CTX_sess_connect(ctx)); - BIO_printf(bio, "%4d client connects that finished\n", - SSL_CTX_sess_connect_good(ctx)); - BIO_printf(bio, "%4d server connects (SSL_accept())\n", - SSL_CTX_sess_accept(ctx)); - BIO_printf(bio, "%4d server connects that finished\n", - SSL_CTX_sess_accept_good(ctx)); - BIO_printf(bio, "%4d session cache hits\n", SSL_CTX_sess_hits(ctx)); - BIO_printf(bio, "%4d session cache misses\n", SSL_CTX_sess_misses(ctx)); - BIO_printf(bio, "%4d session cache timeouts\n", SSL_CTX_sess_timeouts(ctx)); -} - -static void sv_usage(void) -{ - BIO_printf(bio_err, "usage: ssltest [args ...]\n"); - BIO_printf(bio_err, "\n"); - BIO_printf(bio_err, " -server_auth - check server certificate\n"); - BIO_printf(bio_err, " -client_auth - do client authentication\n"); - BIO_printf(bio_err, " -v - more output\n"); - BIO_printf(bio_err, " -CApath arg - PEM format directory of CA's\n"); - BIO_printf(bio_err, " -CAfile arg - PEM format file of CA's\n"); - BIO_printf(bio_err, " -threads arg - number of threads\n"); - BIO_printf(bio_err, " -loops arg - number of 'connections', per thread\n"); - BIO_printf(bio_err, " -reconnect - reuse session-id's\n"); - BIO_printf(bio_err, " -stats - server session-id cache stats\n"); - BIO_printf(bio_err, " -cert arg - server certificate/key\n"); - BIO_printf(bio_err, " -ccert arg - client certificate/key\n"); - BIO_printf(bio_err, " -ssl3 - just SSLv3n\n"); -} - -int main(int argc, char *argv[]) -{ - char *CApath = NULL, *CAfile = NULL; - int badop = 0; - int ret = 1; - int client_auth = 0; - int server_auth = 0; - SSL_CTX *s_ctx = NULL; - SSL_CTX *c_ctx = NULL; - char *scert = TEST_SERVER_CERT; - char *ccert = TEST_CLIENT_CERT; - const SSL_METHOD *ssl_method = SSLv23_method(); - - RAND_seed(rnd_seed, sizeof rnd_seed); - - if (bio_err == NULL) - bio_err = BIO_new_fd(2, BIO_NOCLOSE); - if (bio_stdout == NULL) - bio_stdout = BIO_new_fd(1, BIO_NOCLOSE); - argc--; - argv++; - - while (argc >= 1) { - if (strcmp(*argv, "-server_auth") == 0) - server_auth = 1; - else if (strcmp(*argv, "-client_auth") == 0) - client_auth = 1; - else if (strcmp(*argv, "-reconnect") == 0) - reconnect = 1; - else if (strcmp(*argv, "-stats") == 0) - cache_stats = 1; - else if (strcmp(*argv, "-ssl3") == 0) - ssl_method = SSLv3_method(); - else if (strcmp(*argv, "-ssl2") == 0) - ssl_method = SSLv2_method(); - else if (strcmp(*argv, "-CApath") == 0) { - if (--argc < 1) - goto bad; - CApath = *(++argv); - } else if (strcmp(*argv, "-CAfile") == 0) { - if (--argc < 1) - goto bad; - CAfile = *(++argv); - } else if (strcmp(*argv, "-cert") == 0) { - if (--argc < 1) - goto bad; - scert = *(++argv); - } else if (strcmp(*argv, "-ccert") == 0) { - if (--argc < 1) - goto bad; - ccert = *(++argv); - } else if (strcmp(*argv, "-threads") == 0) { - if (--argc < 1) - goto bad; - thread_number = atoi(*(++argv)); - if (thread_number == 0) - thread_number = 1; - if (thread_number > MAX_THREAD_NUMBER) - thread_number = MAX_THREAD_NUMBER; - } else if (strcmp(*argv, "-loops") == 0) { - if (--argc < 1) - goto bad; - number_of_loops = atoi(*(++argv)); - if (number_of_loops == 0) - number_of_loops = 1; - } else { - BIO_printf(bio_err, "unknown option %s\n", *argv); - badop = 1; - break; - } - argc--; - argv++; - } - if (badop) { - bad: - sv_usage(); - goto end; - } - - if (cipher == NULL && OPENSSL_issetugid() == 0) - cipher = getenv("SSL_CIPHER"); - - SSL_load_error_strings(); - OpenSSL_add_ssl_algorithms(); - - c_ctx = SSL_CTX_new(ssl_method); - s_ctx = SSL_CTX_new(ssl_method); - if ((c_ctx == NULL) || (s_ctx == NULL)) { - ERR_print_errors(bio_err); - goto end; - } - - SSL_CTX_set_session_cache_mode(s_ctx, - SSL_SESS_CACHE_NO_AUTO_CLEAR | - SSL_SESS_CACHE_SERVER); - SSL_CTX_set_session_cache_mode(c_ctx, - SSL_SESS_CACHE_NO_AUTO_CLEAR | - SSL_SESS_CACHE_SERVER); - - if (!SSL_CTX_use_certificate_file(s_ctx, scert, SSL_FILETYPE_PEM)) { - BIO_printf(bio_err, "SSL_CTX_use_certificate_file (%s)\n", scert); - ERR_print_errors(bio_err); - goto end; - } else - if (!SSL_CTX_use_RSAPrivateKey_file(s_ctx, scert, SSL_FILETYPE_PEM)) { - BIO_printf(bio_err, "SSL_CTX_use_RSAPrivateKey_file (%s)\n", scert); - ERR_print_errors(bio_err); - goto end; - } - - if (client_auth) { - SSL_CTX_use_certificate_file(c_ctx, ccert, SSL_FILETYPE_PEM); - SSL_CTX_use_RSAPrivateKey_file(c_ctx, ccert, SSL_FILETYPE_PEM); - } - - if ((!SSL_CTX_load_verify_locations(s_ctx, CAfile, CApath)) || - (!SSL_CTX_set_default_verify_paths(s_ctx)) || - (!SSL_CTX_load_verify_locations(c_ctx, CAfile, CApath)) || - (!SSL_CTX_set_default_verify_paths(c_ctx))) { - BIO_printf(bio_err, "SSL_load_verify_locations\n"); - ERR_print_errors(bio_err); - goto end; - } - - if (client_auth) { - BIO_printf(bio_err, "client authentication\n"); - SSL_CTX_set_verify(s_ctx, - SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT, - verify_callback); - } - if (server_auth) { - BIO_printf(bio_err, "server authentication\n"); - SSL_CTX_set_verify(c_ctx, SSL_VERIFY_PEER, verify_callback); - } - - thread_setup(); - do_threads(s_ctx, c_ctx); - thread_cleanup(); - end: - - if (c_ctx != NULL) { - BIO_printf(bio_err, "Client SSL_CTX stats then free it\n"); - print_stats(bio_err, c_ctx); - SSL_CTX_free(c_ctx); - } - if (s_ctx != NULL) { - BIO_printf(bio_err, "Server SSL_CTX stats then free it\n"); - print_stats(bio_err, s_ctx); - if (cache_stats) { - BIO_printf(bio_err, "-----\n"); - lh_SSL_SESSION_stats_bio(SSL_CTX_sessions(s_ctx), bio_err); - BIO_printf(bio_err, "-----\n"); - /*- lh_SSL_SESSION_node_stats_bio(SSL_CTX_sessions(s_ctx),bio_err); - BIO_printf(bio_err,"-----\n"); */ - lh_SSL_SESSION_node_usage_stats_bio(SSL_CTX_sessions(s_ctx), bio_err); - BIO_printf(bio_err, "-----\n"); - } - SSL_CTX_free(s_ctx); - BIO_printf(bio_err, "done free\n"); - } - exit(ret); - return (0); -} - -#define W_READ 1 -#define W_WRITE 2 -#define C_DONE 1 -#define S_DONE 2 - -int ndoit(SSL_CTX *ssl_ctx[2]) -{ - int i; - int ret; - char *ctx[4]; - CRYPTO_THREADID thread_id; - - ctx[0] = (char *)ssl_ctx[0]; - ctx[1] = (char *)ssl_ctx[1]; - - if (reconnect) { - ctx[2] = (char *)SSL_new(ssl_ctx[0]); - ctx[3] = (char *)SSL_new(ssl_ctx[1]); - } else { - ctx[2] = NULL; - ctx[3] = NULL; - } - - CRYPTO_THREADID_current(&thread_id); - BIO_printf(bio_stdout, "started thread %lu\n", - CRYPTO_THREADID_hash(&thread_id)); - for (i = 0; i < number_of_loops; i++) { -/*- BIO_printf(bio_err,"%4d %2d ctx->ref (%3d,%3d)\n", - CRYPTO_THREADID_hash(&thread_id),i, - ssl_ctx[0]->references, - ssl_ctx[1]->references); */ -/* pthread_delay_np(&tm); */ - - ret = doit(ctx); - if (ret != 0) { - BIO_printf(bio_stdout, "error[%d] %lu - %d\n", - i, CRYPTO_THREADID_hash(&thread_id), ret); - return (ret); - } - } - BIO_printf(bio_stdout, "DONE %lu\n", CRYPTO_THREADID_hash(&thread_id)); - if (reconnect) { - SSL_free((SSL *)ctx[2]); - SSL_free((SSL *)ctx[3]); - } -#ifdef OPENSSL_SYS_NETWARE - MPKSemaphoreSignal(ThreadSem); -#endif - return (0); -} - -int doit(char *ctx[4]) -{ - SSL_CTX *s_ctx, *c_ctx; - static char cbuf[200], sbuf[200]; - SSL *c_ssl = NULL; - SSL *s_ssl = NULL; - BIO *c_to_s = NULL; - BIO *s_to_c = NULL; - BIO *c_bio = NULL; - BIO *s_bio = NULL; - int c_r, c_w, s_r, s_w; - int c_want, s_want; - int i; - int done = 0; - int c_write, s_write; - int do_server = 0, do_client = 0; - - s_ctx = (SSL_CTX *)ctx[0]; - c_ctx = (SSL_CTX *)ctx[1]; - - if (ctx[2] != NULL) - s_ssl = (SSL *)ctx[2]; - else - s_ssl = SSL_new(s_ctx); - - if (ctx[3] != NULL) - c_ssl = (SSL *)ctx[3]; - else - c_ssl = SSL_new(c_ctx); - - if ((s_ssl == NULL) || (c_ssl == NULL)) - goto err; - - c_to_s = BIO_new(BIO_s_mem()); - s_to_c = BIO_new(BIO_s_mem()); - if ((s_to_c == NULL) || (c_to_s == NULL)) - goto err; - - c_bio = BIO_new(BIO_f_ssl()); - s_bio = BIO_new(BIO_f_ssl()); - if ((c_bio == NULL) || (s_bio == NULL)) - goto err; - - SSL_set_connect_state(c_ssl); - SSL_set_bio(c_ssl, s_to_c, c_to_s); - BIO_set_ssl(c_bio, c_ssl, (ctx[2] == NULL) ? BIO_CLOSE : BIO_NOCLOSE); - - SSL_set_accept_state(s_ssl); - SSL_set_bio(s_ssl, c_to_s, s_to_c); - BIO_set_ssl(s_bio, s_ssl, (ctx[3] == NULL) ? BIO_CLOSE : BIO_NOCLOSE); - - c_r = 0; - s_r = 1; - c_w = 1; - s_w = 0; - c_want = W_WRITE; - s_want = 0; - c_write = 1, s_write = 0; - - /* We can always do writes */ - for (;;) { - do_server = 0; - do_client = 0; - - i = (int)BIO_pending(s_bio); - if ((i && s_r) || s_w) - do_server = 1; - - i = (int)BIO_pending(c_bio); - if ((i && c_r) || c_w) - do_client = 1; - - if (do_server && verbose) { - if (SSL_in_init(s_ssl)) - BIO_printf(bio_stdout, "server waiting in SSL_accept - %s\n", - SSL_state_string_long(s_ssl)); - else if (s_write) - BIO_printf(bio_stdout, "server:SSL_write()\n"); - else - BIO_printf(bio_stdout, "server:SSL_read()\n"); - } - - if (do_client && verbose) { - if (SSL_in_init(c_ssl)) - BIO_printf(bio_stdout, "client waiting in SSL_connect - %s\n", - SSL_state_string_long(c_ssl)); - else if (c_write) - BIO_printf(bio_stdout, "client:SSL_write()\n"); - else - BIO_printf(bio_stdout, "client:SSL_read()\n"); - } - - if (!do_client && !do_server) { - BIO_printf(bio_stdout, "ERROR IN STARTUP\n"); - break; - } - if (do_client && !(done & C_DONE)) { - if (c_write) { - i = BIO_write(c_bio, "hello from client\n", 18); - if (i < 0) { - c_r = 0; - c_w = 0; - if (BIO_should_retry(c_bio)) { - if (BIO_should_read(c_bio)) - c_r = 1; - if (BIO_should_write(c_bio)) - c_w = 1; - } else { - BIO_printf(bio_err, "ERROR in CLIENT\n"); - ERR_print_errors_fp(stderr); - return (1); - } - } else if (i == 0) { - BIO_printf(bio_err, "SSL CLIENT STARTUP FAILED\n"); - return (1); - } else { - /* ok */ - c_write = 0; - } - } else { - i = BIO_read(c_bio, cbuf, 100); - if (i < 0) { - c_r = 0; - c_w = 0; - if (BIO_should_retry(c_bio)) { - if (BIO_should_read(c_bio)) - c_r = 1; - if (BIO_should_write(c_bio)) - c_w = 1; - } else { - BIO_printf(bio_err, "ERROR in CLIENT\n"); - ERR_print_errors_fp(stderr); - return (1); - } - } else if (i == 0) { - BIO_printf(bio_err, "SSL CLIENT STARTUP FAILED\n"); - return (1); - } else { - done |= C_DONE; -#ifdef undef - BIO_printf(bio_stdout, "CLIENT:from server:"); - BIO_write(bio_stdout, cbuf, i); - BIO_flush(bio_stdout); -#endif - } - } - } - - if (do_server && !(done & S_DONE)) { - if (!s_write) { - i = BIO_read(s_bio, sbuf, 100); - if (i < 0) { - s_r = 0; - s_w = 0; - if (BIO_should_retry(s_bio)) { - if (BIO_should_read(s_bio)) - s_r = 1; - if (BIO_should_write(s_bio)) - s_w = 1; - } else { - BIO_printf(bio_err, "ERROR in SERVER\n"); - ERR_print_errors_fp(stderr); - return (1); - } - } else if (i == 0) { - BIO_printf(bio_err, "SSL SERVER STARTUP FAILED\n"); - return (1); - } else { - s_write = 1; - s_w = 1; -#ifdef undef - BIO_printf(bio_stdout, "SERVER:from client:"); - BIO_write(bio_stdout, sbuf, i); - BIO_flush(bio_stdout); -#endif - } - } else { - i = BIO_write(s_bio, "hello from server\n", 18); - if (i < 0) { - s_r = 0; - s_w = 0; - if (BIO_should_retry(s_bio)) { - if (BIO_should_read(s_bio)) - s_r = 1; - if (BIO_should_write(s_bio)) - s_w = 1; - } else { - BIO_printf(bio_err, "ERROR in SERVER\n"); - ERR_print_errors_fp(stderr); - return (1); - } - } else if (i == 0) { - BIO_printf(bio_err, "SSL SERVER STARTUP FAILED\n"); - return (1); - } else { - s_write = 0; - s_r = 1; - done |= S_DONE; - } - } - } - - if ((done & S_DONE) && (done & C_DONE)) - break; -#if defined(OPENSSL_SYS_NETWARE) - ThreadSwitchWithDelay(); -#endif - } - - SSL_set_shutdown(c_ssl, SSL_SENT_SHUTDOWN | SSL_RECEIVED_SHUTDOWN); - SSL_set_shutdown(s_ssl, SSL_SENT_SHUTDOWN | SSL_RECEIVED_SHUTDOWN); - -#ifdef undef - BIO_printf(bio_stdout, "DONE\n"); -#endif - err: - /* - * We have to set the BIO's to NULL otherwise they will be free()ed - * twice. Once when th s_ssl is SSL_free()ed and again when c_ssl is - * SSL_free()ed. This is a hack required because s_ssl and c_ssl are - * sharing the same BIO structure and SSL_set_bio() and SSL_free() - * automatically BIO_free non NULL entries. You should not normally do - * this or be required to do this - */ - - if (s_ssl != NULL) { - s_ssl->rbio = NULL; - s_ssl->wbio = NULL; - } - if (c_ssl != NULL) { - c_ssl->rbio = NULL; - c_ssl->wbio = NULL; - } - - /* The SSL's are optionally freed in the following calls */ - if (c_to_s != NULL) - BIO_free(c_to_s); - if (s_to_c != NULL) - BIO_free(s_to_c); - - if (c_bio != NULL) - BIO_free(c_bio); - if (s_bio != NULL) - BIO_free(s_bio); - return (0); -} - -int verify_callback(int ok, X509_STORE_CTX *ctx) -{ - char *s, buf[256]; - - if (verbose) { - s = X509_NAME_oneline(X509_get_subject_name(ctx->current_cert), - buf, 256); - if (s != NULL) { - if (ok) - BIO_printf(bio_err, "depth=%d %s\n", ctx->error_depth, buf); - else - BIO_printf(bio_err, "depth=%d error=%d %s\n", - ctx->error_depth, ctx->error, buf); - } - } - return (ok); -} - -#define THREAD_STACK_SIZE (16*1024) - -#ifdef OPENSSL_SYS_WIN32 - -static HANDLE *lock_cs; - -void thread_setup(void) -{ - int i; - - lock_cs = OPENSSL_malloc(CRYPTO_num_locks() * sizeof(HANDLE)); - for (i = 0; i < CRYPTO_num_locks(); i++) { - lock_cs[i] = CreateMutex(NULL, FALSE, NULL); - } - - CRYPTO_set_locking_callback((void (*)(int, int, char *, int)) - win32_locking_callback); - /* id callback defined */ -} - -void thread_cleanup(void) -{ - int i; - - CRYPTO_set_locking_callback(NULL); - for (i = 0; i < CRYPTO_num_locks(); i++) - CloseHandle(lock_cs[i]); - OPENSSL_free(lock_cs); -} - -void win32_locking_callback(int mode, int type, const char *file, int line) -{ - if (mode & CRYPTO_LOCK) { - WaitForSingleObject(lock_cs[type], INFINITE); - } else { - ReleaseMutex(lock_cs[type]); - } -} - -void do_threads(SSL_CTX *s_ctx, SSL_CTX *c_ctx) -{ - double ret; - SSL_CTX *ssl_ctx[2]; - DWORD thread_id[MAX_THREAD_NUMBER]; - HANDLE thread_handle[MAX_THREAD_NUMBER]; - int i; - SYSTEMTIME start, end; - - ssl_ctx[0] = s_ctx; - ssl_ctx[1] = c_ctx; - - GetSystemTime(&start); - for (i = 0; i < thread_number; i++) { - thread_handle[i] = CreateThread(NULL, - THREAD_STACK_SIZE, - (LPTHREAD_START_ROUTINE) ndoit, - (void *)ssl_ctx, 0L, &(thread_id[i])); - } - - BIO_printf(bio_stdout, "reaping\n"); - for (i = 0; i < thread_number; i += 50) { - int j; - - j = (thread_number < (i + 50)) ? (thread_number - i) : 50; - - if (WaitForMultipleObjects(j, - (CONST HANDLE *) & (thread_handle[i]), - TRUE, INFINITE) - == WAIT_FAILED) { - BIO_printf(bio_err, "WaitForMultipleObjects failed:%d\n", - GetLastError()); - exit(1); - } - } - GetSystemTime(&end); - - if (start.wDayOfWeek > end.wDayOfWeek) - end.wDayOfWeek += 7; - ret = (end.wDayOfWeek - start.wDayOfWeek) * 24; - - ret = (ret + end.wHour - start.wHour) * 60; - ret = (ret + end.wMinute - start.wMinute) * 60; - ret = (ret + end.wSecond - start.wSecond); - ret += (end.wMilliseconds - start.wMilliseconds) / 1000.0; - - BIO_printf(bio_stdout, "win32 threads done - %.3f seconds\n", ret); -} - -#endif /* OPENSSL_SYS_WIN32 */ - -#ifdef SOLARIS - -static mutex_t *lock_cs; -/* - * static rwlock_t *lock_cs; - */ -static long *lock_count; - -void thread_setup(void) -{ - int i; - - lock_cs = OPENSSL_malloc(CRYPTO_num_locks() * sizeof(mutex_t)); - lock_count = OPENSSL_malloc(CRYPTO_num_locks() * sizeof(long)); - for (i = 0; i < CRYPTO_num_locks(); i++) { - lock_count[i] = 0; - /* rwlock_init(&(lock_cs[i]),USYNC_THREAD,NULL); */ - mutex_init(&(lock_cs[i]), USYNC_THREAD, NULL); - } - - CRYPTO_set_id_callback(solaris_thread_id); - CRYPTO_set_locking_callback(solaris_locking_callback); -} - -void thread_cleanup(void) -{ - int i; - - CRYPTO_set_locking_callback(NULL); - - BIO_printf(bio_err, "cleanup\n"); - - for (i = 0; i < CRYPTO_num_locks(); i++) { - /* rwlock_destroy(&(lock_cs[i])); */ - mutex_destroy(&(lock_cs[i])); - BIO_printf(bio_err, "%8ld:%s\n", lock_count[i], CRYPTO_get_lock_name(i)); - } - OPENSSL_free(lock_cs); - OPENSSL_free(lock_count); - - BIO_printf(bio_err, "done cleanup\n"); - -} - -void solaris_locking_callback(int mode, int type, const char *file, int line) -{ -# ifdef undef - BIO_printf(bio_err, "thread=%4d mode=%s lock=%s %s:%d\n", - CRYPTO_thread_id(), - (mode & CRYPTO_LOCK) ? "l" : "u", - (type & CRYPTO_READ) ? "r" : "w", file, line); -# endif - - /*- - if (CRYPTO_LOCK_SSL_CERT == type) - BIO_printf(bio_err,"(t,m,f,l) %ld %d %s %d\n", - CRYPTO_thread_id(), - mode,file,line); - */ - if (mode & CRYPTO_LOCK) { - /*- - if (mode & CRYPTO_READ) - rw_rdlock(&(lock_cs[type])); - else - rw_wrlock(&(lock_cs[type])); */ - - mutex_lock(&(lock_cs[type])); - lock_count[type]++; - } else { -/* rw_unlock(&(lock_cs[type])); */ - mutex_unlock(&(lock_cs[type])); - } -} - -void do_threads(SSL_CTX *s_ctx, SSL_CTX *c_ctx) -{ - SSL_CTX *ssl_ctx[2]; - thread_t thread_ctx[MAX_THREAD_NUMBER]; - int i; - - ssl_ctx[0] = s_ctx; - ssl_ctx[1] = c_ctx; - - thr_setconcurrency(thread_number); - for (i = 0; i < thread_number; i++) { - thr_create(NULL, THREAD_STACK_SIZE, - (void *(*)())ndoit, (void *)ssl_ctx, 0L, &(thread_ctx[i])); - } - - BIO_printf(bio_stdout, "reaping\n"); - for (i = 0; i < thread_number; i++) { - thr_join(thread_ctx[i], NULL, NULL); - } - -#if 0 /* We can't currently find out the reference amount */ - BIO_printf(bio_stdout, "solaris threads done (%d,%d)\n", - s_ctx->references, c_ctx->references); -#else - BIO_printf(bio_stdout, "solaris threads done\n"); -#endif -} - -void solaris_thread_id(CRYPTO_THREADID *tid) -{ - CRYPTO_THREADID_set_numeric((unsigned long)thr_self()); -} -#endif /* SOLARIS */ - -#ifdef IRIX - -static usptr_t *arena; -static usema_t **lock_cs; - -void thread_setup(void) -{ - int i; - char filename[20]; - - strcpy(filename, "/tmp/mttest.XXXXXX"); - mktemp(filename); - - usconfig(CONF_STHREADIOOFF); - usconfig(CONF_STHREADMALLOCOFF); - usconfig(CONF_INITUSERS, 100); - usconfig(CONF_LOCKTYPE, US_DEBUGPLUS); - arena = usinit(filename); - unlink(filename); - - lock_cs = OPENSSL_malloc(CRYPTO_num_locks() * sizeof(usema_t *)); - for (i = 0; i < CRYPTO_num_locks(); i++) { - lock_cs[i] = usnewsema(arena, 1); - } - - CRYPTO_set_id_callback(irix_thread_id); - CRYPTO_set_locking_callback(irix_locking_callback); -} - -void thread_cleanup(void) -{ - int i; - - CRYPTO_set_locking_callback(NULL); - for (i = 0; i < CRYPTO_num_locks(); i++) { - char buf[10]; - - sprintf(buf, "%2d:", i); - usdumpsema(lock_cs[i], stdout, buf); - usfreesema(lock_cs[i], arena); - } - OPENSSL_free(lock_cs); -} - -void irix_locking_callback(int mode, int type, const char *file, int line) -{ - if (mode & CRYPTO_LOCK) { - BIO_printf(bio_stdout, "lock %d\n", type); - uspsema(lock_cs[type]); - } else { - BIO_printf(bio_stdout, "unlock %d\n", type); - usvsema(lock_cs[type]); - } -} - -void do_threads(SSL_CTX *s_ctx, SSL_CTX *c_ctx) -{ - SSL_CTX *ssl_ctx[2]; - int thread_ctx[MAX_THREAD_NUMBER]; - int i; - - ssl_ctx[0] = s_ctx; - ssl_ctx[1] = c_ctx; - - for (i = 0; i < thread_number; i++) { - thread_ctx[i] = sproc((void (*)())ndoit, - PR_SADDR | PR_SFDS, (void *)ssl_ctx); - } - - BIO_printf(bio_stdout, "reaping\n"); - for (i = 0; i < thread_number; i++) { - wait(NULL); - } - -#if 0 /* We can't currently find out the reference amount */ - BIO_printf(bio_stdout, "irix threads done (%d,%d)\n", - s_ctx->references, c_ctx->references); -#else - BIO_printf(bio_stdout, "irix threads done\n"); -#endif -} - -unsigned long irix_thread_id(void) -{ - CRYPTO_THREADID_set_numeric((unsigned long)getpid()); -} -#endif /* IRIX */ - -#ifdef PTHREADS - -static pthread_mutex_t *lock_cs; -static long *lock_count; - -void thread_setup(void) -{ - int i; - - lock_cs = OPENSSL_malloc(CRYPTO_num_locks() * sizeof(pthread_mutex_t)); - lock_count = OPENSSL_malloc(CRYPTO_num_locks() * sizeof(long)); - for (i = 0; i < CRYPTO_num_locks(); i++) { - lock_count[i] = 0; - pthread_mutex_init(&(lock_cs[i]), NULL); - } - - CRYPTO_THREADID_set_callback(pthreads_thread_id); - CRYPTO_set_locking_callback(pthreads_locking_callback); -} - -void thread_cleanup(void) -{ - int i; - - CRYPTO_set_locking_callback(NULL); - BIO_printf(bio_err, "cleanup\n"); - for (i = 0; i < CRYPTO_num_locks(); i++) { - pthread_mutex_destroy(&(lock_cs[i])); - BIO_printf(bio_err, "%8ld:%s\n", lock_count[i], CRYPTO_get_lock_name(i)); - } - OPENSSL_free(lock_cs); - OPENSSL_free(lock_count); - - BIO_printf(bio_err, "done cleanup\n"); -} - -void pthreads_locking_callback(int mode, int type, const char *file, int line) -{ -# ifdef undef - BIO_printf(bio_err, "thread=%4d mode=%s lock=%s %s:%d\n", - CRYPTO_thread_id(), - (mode & CRYPTO_LOCK) ? "l" : "u", - (type & CRYPTO_READ) ? "r" : "w", file, line); -# endif -/*- - if (CRYPTO_LOCK_SSL_CERT == type) - BIO_printf(bio_err,"(t,m,f,l) %ld %d %s %d\n", - CRYPTO_thread_id(), - mode,file,line); -*/ - if (mode & CRYPTO_LOCK) { - pthread_mutex_lock(&(lock_cs[type])); - lock_count[type]++; - } else { - pthread_mutex_unlock(&(lock_cs[type])); - } -} - -void do_threads(SSL_CTX *s_ctx, SSL_CTX *c_ctx) -{ - SSL_CTX *ssl_ctx[2]; - pthread_t thread_ctx[MAX_THREAD_NUMBER]; - int i; - - ssl_ctx[0] = s_ctx; - ssl_ctx[1] = c_ctx; - - /* - * thr_setconcurrency(thread_number); - */ - for (i = 0; i < thread_number; i++) { - pthread_create(&(thread_ctx[i]), NULL, - (void *(*)())ndoit, (void *)ssl_ctx); - } - - BIO_printf(bio_stdout, "reaping\n"); - for (i = 0; i < thread_number; i++) { - pthread_join(thread_ctx[i], NULL); - } - -#if 0 /* We can't currently find out the reference amount */ - BIO_printf(bio_stdout, "pthreads threads done (%d,%d)\n", - s_ctx->references, c_ctx->references); -#else - BIO_printf(bio_stdout, "pthreads threads done\n"); -#endif -} - -void pthreads_thread_id(CRYPTO_THREADID *tid) -{ - CRYPTO_THREADID_set_numeric(tid, (unsigned long)pthread_self()); -} - -#endif /* PTHREADS */ - -#ifdef OPENSSL_SYS_NETWARE - -void thread_setup(void) -{ - int i; - - lock_cs = OPENSSL_malloc(CRYPTO_num_locks() * sizeof(MPKMutex)); - lock_count = OPENSSL_malloc(CRYPTO_num_locks() * sizeof(long)); - for (i = 0; i < CRYPTO_num_locks(); i++) { - lock_count[i] = 0; - lock_cs[i] = MPKMutexAlloc("OpenSSL mutex"); - } - - ThreadSem = MPKSemaphoreAlloc("OpenSSL mttest semaphore", 0); - - CRYPTO_set_id_callback(netware_thread_id); - CRYPTO_set_locking_callback(netware_locking_callback); -} - -void thread_cleanup(void) -{ - int i; - - CRYPTO_set_locking_callback(NULL); - - BIO_printf(bio_stdout, "thread_cleanup\n"); - - for (i = 0; i < CRYPTO_num_locks(); i++) { - MPKMutexFree(lock_cs[i]); - BIO_printf(bio_stdout, "%8ld:%s\n", lock_count[i], CRYPTO_get_lock_name(i)); - } - OPENSSL_free(lock_cs); - OPENSSL_free(lock_count); - - MPKSemaphoreFree(ThreadSem); - - BIO_printf(bio_stdout, "done cleanup\n"); -} - -void netware_locking_callback(int mode, int type, const char *file, int line) -{ - if (mode & CRYPTO_LOCK) { - MPKMutexLock(lock_cs[type]); - lock_count[type]++; - } else - MPKMutexUnlock(lock_cs[type]); -} - -void do_threads(SSL_CTX *s_ctx, SSL_CTX *c_ctx) -{ - SSL_CTX *ssl_ctx[2]; - int i; - ssl_ctx[0] = s_ctx; - ssl_ctx[1] = c_ctx; - - for (i = 0; i < thread_number; i++) { - BeginThread((void (*)(void *))ndoit, NULL, THREAD_STACK_SIZE, - (void *)ssl_ctx); - ThreadSwitchWithDelay(); - } - - BIO_printf(bio_stdout, "reaping\n"); - - /* loop until all threads have signaled the semaphore */ - for (i = 0; i < thread_number; i++) { - MPKSemaphoreWait(ThreadSem); - } -#if 0 /* We can't currently find out the reference amount */ - BIO_printf(bio_stdout, "netware threads done (%d,%d)\n", - s_ctx->references, c_ctx->references); -#else - BIO_printf(bio_stdout, "netware threads done\n"); -#endif -} - -unsigned long netware_thread_id(void) -{ - CRYPTO_THREADID_set_numeric((unsigned long)GetThreadID()); -} -#endif /* NETWARE */ - -#ifdef BEOS_THREADS - -# include - -static BLocker **lock_cs; -static long *lock_count; - -void thread_setup(void) -{ - int i; - - lock_cs = - (BLocker **) OPENSSL_malloc(CRYPTO_num_locks() * sizeof(BLocker *)); - lock_count = (long *)OPENSSL_malloc(CRYPTO_num_locks() * sizeof(long)); - for (i = 0; i < CRYPTO_num_locks(); i++) { - lock_count[i] = 0; - lock_cs[i] = new BLocker(CRYPTO_get_lock_name(i)); - } - - CRYPTO_set_id_callback((unsigned long (*)())beos_thread_id); - CRYPTO_set_locking_callback(beos_locking_callback); -} - -void thread_cleanup(void) -{ - int i; - - CRYPTO_set_locking_callback(NULL); - BIO_printf(bio_err, "cleanup\n"); - for (i = 0; i < CRYPTO_num_locks(); i++) { - delete lock_cs[i]; - BIO_printf(bio_err, "%8ld:%s\n", lock_count[i], CRYPTO_get_lock_name(i)); - } - OPENSSL_free(lock_cs); - OPENSSL_free(lock_count); - - BIO_printf(bio_err, "done cleanup\n"); -} - -void beos_locking_callback(int mode, int type, const char *file, int line) -{ -# if 0 - BIO_printf(bio_err, "thread=%4d mode=%s lock=%s %s:%d\n", - CRYPTO_thread_id(), - (mode & CRYPTO_LOCK) ? "l" : "u", - (type & CRYPTO_READ) ? "r" : "w", file, line); -# endif - if (mode & CRYPTO_LOCK) { - lock_cs[type]->Lock(); - lock_count[type]++; - } else { - lock_cs[type]->Unlock(); - } -} - -void do_threads(SSL_CTX *s_ctx, SSL_CTX *c_ctx) -{ - SSL_CTX *ssl_ctx[2]; - thread_id thread_ctx[MAX_THREAD_NUMBER]; - int i; - - ssl_ctx[0] = s_ctx; - ssl_ctx[1] = c_ctx; - - for (i = 0; i < thread_number; i++) { - thread_ctx[i] = spawn_thread((thread_func) ndoit, - NULL, B_NORMAL_PRIORITY, - (void *)ssl_ctx); - resume_thread(thread_ctx[i]); - } - - BIO_printf(bio_stdout, "waiting...\n"); - for (i = 0; i < thread_number; i++) { - status_t result; - wait_for_thread(thread_ctx[i], &result); - } - - BIO_printf(bio_stdout, "beos threads done (%d,%d)\n", - s_ctx->references, c_ctx->references); -} - -unsigned long beos_thread_id(void) -{ - unsigned long ret; - - ret = (unsigned long)find_thread(NULL); - return (ret); -} - -#endif /* BEOS_THREADS */ diff --git a/drivers/builtin_openssl2/crypto/ts/ts_rsp_sign.c b/drivers/builtin_openssl2/crypto/ts/ts_rsp_sign.c index 031d872e2c..db6ce3241f 100644 --- a/drivers/builtin_openssl2/crypto/ts/ts_rsp_sign.c +++ b/drivers/builtin_openssl2/crypto/ts/ts_rsp_sign.c @@ -238,7 +238,6 @@ int TS_RESP_CTX_set_def_policy(TS_RESP_CTX *ctx, ASN1_OBJECT *def_policy) int TS_RESP_CTX_set_certs(TS_RESP_CTX *ctx, STACK_OF(X509) *certs) { - int i; if (ctx->certs) { sk_X509_pop_free(ctx->certs, X509_free); @@ -246,14 +245,10 @@ int TS_RESP_CTX_set_certs(TS_RESP_CTX *ctx, STACK_OF(X509) *certs) } if (!certs) return 1; - if (!(ctx->certs = sk_X509_dup(certs))) { + if (!(ctx->certs = X509_chain_up_ref(certs))) { TSerr(TS_F_TS_RESP_CTX_SET_CERTS, ERR_R_MALLOC_FAILURE); return 0; } - for (i = 0; i < sk_X509_num(ctx->certs); ++i) { - X509 *cert = sk_X509_value(ctx->certs, i); - CRYPTO_add(&cert->references, +1, CRYPTO_LOCK_X509); - } return 1; } diff --git a/drivers/builtin_openssl2/crypto/ts/ts_rsp_verify.c b/drivers/builtin_openssl2/crypto/ts/ts_rsp_verify.c index e24b2d5388..29aa5a497e 100644 --- a/drivers/builtin_openssl2/crypto/ts/ts_rsp_verify.c +++ b/drivers/builtin_openssl2/crypto/ts/ts_rsp_verify.c @@ -638,7 +638,7 @@ static int TS_compute_imprint(BIO *data, TS_TST_INFO *tst_info, X509_ALGOR_free(*md_alg); OPENSSL_free(*imprint); *imprint_len = 0; - *imprint = NULL; + *imprint = 0; return 0; } diff --git a/drivers/builtin_openssl2/crypto/ui/ui_openssl.c b/drivers/builtin_openssl2/crypto/ui/ui_openssl.c index a8b3d0cad6..9ab259b8f6 100644 --- a/drivers/builtin_openssl2/crypto/ui/ui_openssl.c +++ b/drivers/builtin_openssl2/crypto/ui/ui_openssl.c @@ -208,7 +208,7 @@ # elif !defined(OPENSSL_SYS_VMS) \ && !defined(OPENSSL_SYS_MSDOS) \ && !defined(OPENSSL_SYS_MACINTOSH_CLASSIC) \ - && !defined(MAC_OS_GUSI_SOURCE) \ + && !defined(MAC_OS_GUSI_SOURCE) \ && !defined(OPENSSL_SYS_VXWORKS) \ && !defined(OPENSSL_SYS_NETWARE) # define TERMIOS diff --git a/drivers/builtin_openssl2/crypto/vms_rms.h b/drivers/builtin_openssl2/crypto/vms_rms.h old mode 100755 new mode 100644 diff --git a/drivers/builtin_openssl2/crypto/whrlpool/asm/wp-mmx.pl b/drivers/builtin_openssl2/crypto/whrlpool/asm/wp-mmx.pl deleted file mode 100644 index 90c2eca53b..0000000000 --- a/drivers/builtin_openssl2/crypto/whrlpool/asm/wp-mmx.pl +++ /dev/null @@ -1,493 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. Rights for redistribution and usage in source and binary -# forms are granted according to the OpenSSL license. -# ==================================================================== -# -# whirlpool_block_mmx implementation. -# -*SCALE=\(2); # 2 or 8, that is the question:-) Value of 8 results -# in 16KB large table, which is tough on L1 cache, but eliminates -# unaligned references to it. Value of 2 results in 4KB table, but -# 7/8 of references to it are unaligned. AMD cores seem to be -# allergic to the latter, while Intel ones - to former [see the -# table]. I stick to value of 2 for two reasons: 1. smaller table -# minimizes cache trashing and thus mitigates the hazard of side- -# channel leakage similar to AES cache-timing one; 2. performance -# gap among different µ-archs is smaller. -# -# Performance table lists rounded amounts of CPU cycles spent by -# whirlpool_block_mmx routine on single 64 byte input block, i.e. -# smaller is better and asymptotic throughput can be estimated by -# multiplying 64 by CPU clock frequency and dividing by relevant -# value from the given table: -# -# $SCALE=2/8 icc8 gcc3 -# Intel P4 3200/4600 4600(*) 6400 -# Intel PIII 2900/3000 4900 5400 -# AMD K[78] 2500/1800 9900 8200(**) -# -# (*) I've sketched even non-MMX assembler, but for the record -# I've failed to beat the Intel compiler on P4, without using -# MMX that is... -# (**) ... on AMD on the other hand non-MMX assembler was observed -# to perform significantly better, but I figured this MMX -# implementation is even faster anyway, so why bother? As for -# pre-MMX AMD core[s], the improvement coefficient is more -# than likely to vary anyway and I don't know how. But the -# least I know is that gcc-generated code compiled with -# -DL_ENDIAN and -DOPENSSL_SMALL_FOOTPRINT [see C module for -# details] and optimized for Pentium was observed to perform -# *better* on Pentium 100 than unrolled non-MMX assembler -# loop... So we just say that I don't know if maintaining -# non-MMX implementation would actually pay off, but till -# opposite is proved "unlikely" is assumed. - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC,"${dir}","${dir}../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],"wp-mmx.pl"); - -sub L() { &data_byte(@_); } -sub LL() -{ if ($SCALE==2) { &data_byte(@_); &data_byte(@_); } - elsif ($SCALE==8) { for ($i=0;$i<8;$i++) { - &data_byte(@_); - unshift(@_,pop(@_)); - } - } - else { die "unvalid SCALE value"; } -} - -sub scale() -{ if ($SCALE==2) { &lea(@_[0],&DWP(0,@_[1],@_[1])); } - elsif ($SCALE==8) { &lea(@_[0],&DWP(0,"",@_[1],8)); } - else { die "unvalid SCALE value"; } -} - -sub row() -{ if ($SCALE==2) { ((8-shift)&7); } - elsif ($SCALE==8) { (8*shift); } - else { die "unvalid SCALE value"; } -} - -$tbl="ebp"; -@mm=("mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7"); - -&function_begin_B("whirlpool_block_mmx"); - &push ("ebp"); - &push ("ebx"); - &push ("esi"); - &push ("edi"); - - &mov ("esi",&wparam(0)); # hash value - &mov ("edi",&wparam(1)); # input data stream - &mov ("ebp",&wparam(2)); # number of chunks in input - - &mov ("eax","esp"); # copy stack pointer - &sub ("esp",128+20); # allocate frame - &and ("esp",-64); # align for cache-line - - &lea ("ebx",&DWP(128,"esp")); - &mov (&DWP(0,"ebx"),"esi"); # save parameter block - &mov (&DWP(4,"ebx"),"edi"); - &mov (&DWP(8,"ebx"),"ebp"); - &mov (&DWP(16,"ebx"),"eax"); # saved stack pointer - - &call (&label("pic_point")); -&set_label("pic_point"); - &blindpop($tbl); - &lea ($tbl,&DWP(&label("table")."-".&label("pic_point"),$tbl)); - - &xor ("ecx","ecx"); - &xor ("edx","edx"); - - for($i=0;$i<8;$i++) { &movq(@mm[$i],&QWP($i*8,"esi")); } # L=H -&set_label("outerloop"); - for($i=0;$i<8;$i++) { &movq(&QWP($i*8,"esp"),@mm[$i]); } # K=L - for($i=0;$i<8;$i++) { &pxor(@mm[$i],&QWP($i*8,"edi")); } # L^=inp - for($i=0;$i<8;$i++) { &movq(&QWP(64+$i*8,"esp"),@mm[$i]); } # S=L - - &xor ("esi","esi"); - &mov (&DWP(12,"ebx"),"esi"); # zero round counter - -&set_label("round",16); - &movq (@mm[0],&QWP(2048*$SCALE,$tbl,"esi",8)); # rc[r] - &mov ("eax",&DWP(0,"esp")); - &mov ("ebx",&DWP(4,"esp")); -for($i=0;$i<8;$i++) { - my $func = ($i==0)? \&movq : \&pxor; - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); - &scale ("esi","ecx"); - &scale ("edi","edx"); - &shr ("eax",16); - &pxor (@mm[0],&QWP(&row(0),$tbl,"esi",8)); - &$func (@mm[1],&QWP(&row(1),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); - &mov ("eax",&DWP(($i+1)*8,"esp")); - &scale ("esi","ecx"); - &scale ("edi","edx"); - &$func (@mm[2],&QWP(&row(2),$tbl,"esi",8)); - &$func (@mm[3],&QWP(&row(3),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); - &scale ("esi","ecx"); - &scale ("edi","edx"); - &shr ("ebx",16); - &$func (@mm[4],&QWP(&row(4),$tbl,"esi",8)); - &$func (@mm[5],&QWP(&row(5),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); - &mov ("ebx",&DWP(($i+1)*8+4,"esp")); - &scale ("esi","ecx"); - &scale ("edi","edx"); - &$func (@mm[6],&QWP(&row(6),$tbl,"esi",8)); - &$func (@mm[7],&QWP(&row(7),$tbl,"edi",8)); - push(@mm,shift(@mm)); -} - - for($i=0;$i<8;$i++) { &movq(&QWP($i*8,"esp"),@mm[$i]); } # K=L - -for($i=0;$i<8;$i++) { - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); - &scale ("esi","ecx"); - &scale ("edi","edx"); - &shr ("eax",16); - &pxor (@mm[0],&QWP(&row(0),$tbl,"esi",8)); - &pxor (@mm[1],&QWP(&row(1),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); - &mov ("eax",&DWP(64+($i+1)*8,"esp")) if ($i<7); - &scale ("esi","ecx"); - &scale ("edi","edx"); - &pxor (@mm[2],&QWP(&row(2),$tbl,"esi",8)); - &pxor (@mm[3],&QWP(&row(3),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); - &scale ("esi","ecx"); - &scale ("edi","edx"); - &shr ("ebx",16); - &pxor (@mm[4],&QWP(&row(4),$tbl,"esi",8)); - &pxor (@mm[5],&QWP(&row(5),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); - &mov ("ebx",&DWP(64+($i+1)*8+4,"esp")) if ($i<7); - &scale ("esi","ecx"); - &scale ("edi","edx"); - &pxor (@mm[6],&QWP(&row(6),$tbl,"esi",8)); - &pxor (@mm[7],&QWP(&row(7),$tbl,"edi",8)); - push(@mm,shift(@mm)); -} - &lea ("ebx",&DWP(128,"esp")); - &mov ("esi",&DWP(12,"ebx")); # pull round counter - &add ("esi",1); - &cmp ("esi",10); - &je (&label("roundsdone")); - - &mov (&DWP(12,"ebx"),"esi"); # update round counter - for($i=0;$i<8;$i++) { &movq(&QWP(64+$i*8,"esp"),@mm[$i]); } # S=L - &jmp (&label("round")); - -&set_label("roundsdone",16); - &mov ("esi",&DWP(0,"ebx")); # reload argument block - &mov ("edi",&DWP(4,"ebx")); - &mov ("eax",&DWP(8,"ebx")); - - for($i=0;$i<8;$i++) { &pxor(@mm[$i],&QWP($i*8,"edi")); } # L^=inp - for($i=0;$i<8;$i++) { &pxor(@mm[$i],&QWP($i*8,"esi")); } # L^=H - for($i=0;$i<8;$i++) { &movq(&QWP($i*8,"esi"),@mm[$i]); } # H=L - - &lea ("edi",&DWP(64,"edi")); # inp+=64 - &sub ("eax",1); # num-- - &jz (&label("alldone")); - &mov (&DWP(4,"ebx"),"edi"); # update argument block - &mov (&DWP(8,"ebx"),"eax"); - &jmp (&label("outerloop")); - -&set_label("alldone"); - &emms (); - &mov ("esp",&DWP(16,"ebx")); # restore saved stack pointer - &pop ("edi"); - &pop ("esi"); - &pop ("ebx"); - &pop ("ebp"); - &ret (); - -&align(64); -&set_label("table"); - &LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8); - &LL(0x23,0x23,0x8c,0x23,0x05,0xaf,0x46,0x26); - &LL(0xc6,0xc6,0x3f,0xc6,0x7e,0xf9,0x91,0xb8); - &LL(0xe8,0xe8,0x87,0xe8,0x13,0x6f,0xcd,0xfb); - &LL(0x87,0x87,0x26,0x87,0x4c,0xa1,0x13,0xcb); - &LL(0xb8,0xb8,0xda,0xb8,0xa9,0x62,0x6d,0x11); - &LL(0x01,0x01,0x04,0x01,0x08,0x05,0x02,0x09); - &LL(0x4f,0x4f,0x21,0x4f,0x42,0x6e,0x9e,0x0d); - &LL(0x36,0x36,0xd8,0x36,0xad,0xee,0x6c,0x9b); - &LL(0xa6,0xa6,0xa2,0xa6,0x59,0x04,0x51,0xff); - &LL(0xd2,0xd2,0x6f,0xd2,0xde,0xbd,0xb9,0x0c); - &LL(0xf5,0xf5,0xf3,0xf5,0xfb,0x06,0xf7,0x0e); - &LL(0x79,0x79,0xf9,0x79,0xef,0x80,0xf2,0x96); - &LL(0x6f,0x6f,0xa1,0x6f,0x5f,0xce,0xde,0x30); - &LL(0x91,0x91,0x7e,0x91,0xfc,0xef,0x3f,0x6d); - &LL(0x52,0x52,0x55,0x52,0xaa,0x07,0xa4,0xf8); - &LL(0x60,0x60,0x9d,0x60,0x27,0xfd,0xc0,0x47); - &LL(0xbc,0xbc,0xca,0xbc,0x89,0x76,0x65,0x35); - &LL(0x9b,0x9b,0x56,0x9b,0xac,0xcd,0x2b,0x37); - &LL(0x8e,0x8e,0x02,0x8e,0x04,0x8c,0x01,0x8a); - &LL(0xa3,0xa3,0xb6,0xa3,0x71,0x15,0x5b,0xd2); - &LL(0x0c,0x0c,0x30,0x0c,0x60,0x3c,0x18,0x6c); - &LL(0x7b,0x7b,0xf1,0x7b,0xff,0x8a,0xf6,0x84); - &LL(0x35,0x35,0xd4,0x35,0xb5,0xe1,0x6a,0x80); - &LL(0x1d,0x1d,0x74,0x1d,0xe8,0x69,0x3a,0xf5); - &LL(0xe0,0xe0,0xa7,0xe0,0x53,0x47,0xdd,0xb3); - &LL(0xd7,0xd7,0x7b,0xd7,0xf6,0xac,0xb3,0x21); - &LL(0xc2,0xc2,0x2f,0xc2,0x5e,0xed,0x99,0x9c); - &LL(0x2e,0x2e,0xb8,0x2e,0x6d,0x96,0x5c,0x43); - &LL(0x4b,0x4b,0x31,0x4b,0x62,0x7a,0x96,0x29); - &LL(0xfe,0xfe,0xdf,0xfe,0xa3,0x21,0xe1,0x5d); - &LL(0x57,0x57,0x41,0x57,0x82,0x16,0xae,0xd5); - &LL(0x15,0x15,0x54,0x15,0xa8,0x41,0x2a,0xbd); - &LL(0x77,0x77,0xc1,0x77,0x9f,0xb6,0xee,0xe8); - &LL(0x37,0x37,0xdc,0x37,0xa5,0xeb,0x6e,0x92); - &LL(0xe5,0xe5,0xb3,0xe5,0x7b,0x56,0xd7,0x9e); - &LL(0x9f,0x9f,0x46,0x9f,0x8c,0xd9,0x23,0x13); - &LL(0xf0,0xf0,0xe7,0xf0,0xd3,0x17,0xfd,0x23); - &LL(0x4a,0x4a,0x35,0x4a,0x6a,0x7f,0x94,0x20); - &LL(0xda,0xda,0x4f,0xda,0x9e,0x95,0xa9,0x44); - &LL(0x58,0x58,0x7d,0x58,0xfa,0x25,0xb0,0xa2); - &LL(0xc9,0xc9,0x03,0xc9,0x06,0xca,0x8f,0xcf); - &LL(0x29,0x29,0xa4,0x29,0x55,0x8d,0x52,0x7c); - &LL(0x0a,0x0a,0x28,0x0a,0x50,0x22,0x14,0x5a); - &LL(0xb1,0xb1,0xfe,0xb1,0xe1,0x4f,0x7f,0x50); - &LL(0xa0,0xa0,0xba,0xa0,0x69,0x1a,0x5d,0xc9); - &LL(0x6b,0x6b,0xb1,0x6b,0x7f,0xda,0xd6,0x14); - &LL(0x85,0x85,0x2e,0x85,0x5c,0xab,0x17,0xd9); - &LL(0xbd,0xbd,0xce,0xbd,0x81,0x73,0x67,0x3c); - &LL(0x5d,0x5d,0x69,0x5d,0xd2,0x34,0xba,0x8f); - &LL(0x10,0x10,0x40,0x10,0x80,0x50,0x20,0x90); - &LL(0xf4,0xf4,0xf7,0xf4,0xf3,0x03,0xf5,0x07); - &LL(0xcb,0xcb,0x0b,0xcb,0x16,0xc0,0x8b,0xdd); - &LL(0x3e,0x3e,0xf8,0x3e,0xed,0xc6,0x7c,0xd3); - &LL(0x05,0x05,0x14,0x05,0x28,0x11,0x0a,0x2d); - &LL(0x67,0x67,0x81,0x67,0x1f,0xe6,0xce,0x78); - &LL(0xe4,0xe4,0xb7,0xe4,0x73,0x53,0xd5,0x97); - &LL(0x27,0x27,0x9c,0x27,0x25,0xbb,0x4e,0x02); - &LL(0x41,0x41,0x19,0x41,0x32,0x58,0x82,0x73); - &LL(0x8b,0x8b,0x16,0x8b,0x2c,0x9d,0x0b,0xa7); - &LL(0xa7,0xa7,0xa6,0xa7,0x51,0x01,0x53,0xf6); - &LL(0x7d,0x7d,0xe9,0x7d,0xcf,0x94,0xfa,0xb2); - &LL(0x95,0x95,0x6e,0x95,0xdc,0xfb,0x37,0x49); - &LL(0xd8,0xd8,0x47,0xd8,0x8e,0x9f,0xad,0x56); - &LL(0xfb,0xfb,0xcb,0xfb,0x8b,0x30,0xeb,0x70); - &LL(0xee,0xee,0x9f,0xee,0x23,0x71,0xc1,0xcd); - &LL(0x7c,0x7c,0xed,0x7c,0xc7,0x91,0xf8,0xbb); - &LL(0x66,0x66,0x85,0x66,0x17,0xe3,0xcc,0x71); - &LL(0xdd,0xdd,0x53,0xdd,0xa6,0x8e,0xa7,0x7b); - &LL(0x17,0x17,0x5c,0x17,0xb8,0x4b,0x2e,0xaf); - &LL(0x47,0x47,0x01,0x47,0x02,0x46,0x8e,0x45); - &LL(0x9e,0x9e,0x42,0x9e,0x84,0xdc,0x21,0x1a); - &LL(0xca,0xca,0x0f,0xca,0x1e,0xc5,0x89,0xd4); - &LL(0x2d,0x2d,0xb4,0x2d,0x75,0x99,0x5a,0x58); - &LL(0xbf,0xbf,0xc6,0xbf,0x91,0x79,0x63,0x2e); - &LL(0x07,0x07,0x1c,0x07,0x38,0x1b,0x0e,0x3f); - &LL(0xad,0xad,0x8e,0xad,0x01,0x23,0x47,0xac); - &LL(0x5a,0x5a,0x75,0x5a,0xea,0x2f,0xb4,0xb0); - &LL(0x83,0x83,0x36,0x83,0x6c,0xb5,0x1b,0xef); - &LL(0x33,0x33,0xcc,0x33,0x85,0xff,0x66,0xb6); - &LL(0x63,0x63,0x91,0x63,0x3f,0xf2,0xc6,0x5c); - &LL(0x02,0x02,0x08,0x02,0x10,0x0a,0x04,0x12); - &LL(0xaa,0xaa,0x92,0xaa,0x39,0x38,0x49,0x93); - &LL(0x71,0x71,0xd9,0x71,0xaf,0xa8,0xe2,0xde); - &LL(0xc8,0xc8,0x07,0xc8,0x0e,0xcf,0x8d,0xc6); - &LL(0x19,0x19,0x64,0x19,0xc8,0x7d,0x32,0xd1); - &LL(0x49,0x49,0x39,0x49,0x72,0x70,0x92,0x3b); - &LL(0xd9,0xd9,0x43,0xd9,0x86,0x9a,0xaf,0x5f); - &LL(0xf2,0xf2,0xef,0xf2,0xc3,0x1d,0xf9,0x31); - &LL(0xe3,0xe3,0xab,0xe3,0x4b,0x48,0xdb,0xa8); - &LL(0x5b,0x5b,0x71,0x5b,0xe2,0x2a,0xb6,0xb9); - &LL(0x88,0x88,0x1a,0x88,0x34,0x92,0x0d,0xbc); - &LL(0x9a,0x9a,0x52,0x9a,0xa4,0xc8,0x29,0x3e); - &LL(0x26,0x26,0x98,0x26,0x2d,0xbe,0x4c,0x0b); - &LL(0x32,0x32,0xc8,0x32,0x8d,0xfa,0x64,0xbf); - &LL(0xb0,0xb0,0xfa,0xb0,0xe9,0x4a,0x7d,0x59); - &LL(0xe9,0xe9,0x83,0xe9,0x1b,0x6a,0xcf,0xf2); - &LL(0x0f,0x0f,0x3c,0x0f,0x78,0x33,0x1e,0x77); - &LL(0xd5,0xd5,0x73,0xd5,0xe6,0xa6,0xb7,0x33); - &LL(0x80,0x80,0x3a,0x80,0x74,0xba,0x1d,0xf4); - &LL(0xbe,0xbe,0xc2,0xbe,0x99,0x7c,0x61,0x27); - &LL(0xcd,0xcd,0x13,0xcd,0x26,0xde,0x87,0xeb); - &LL(0x34,0x34,0xd0,0x34,0xbd,0xe4,0x68,0x89); - &LL(0x48,0x48,0x3d,0x48,0x7a,0x75,0x90,0x32); - &LL(0xff,0xff,0xdb,0xff,0xab,0x24,0xe3,0x54); - &LL(0x7a,0x7a,0xf5,0x7a,0xf7,0x8f,0xf4,0x8d); - &LL(0x90,0x90,0x7a,0x90,0xf4,0xea,0x3d,0x64); - &LL(0x5f,0x5f,0x61,0x5f,0xc2,0x3e,0xbe,0x9d); - &LL(0x20,0x20,0x80,0x20,0x1d,0xa0,0x40,0x3d); - &LL(0x68,0x68,0xbd,0x68,0x67,0xd5,0xd0,0x0f); - &LL(0x1a,0x1a,0x68,0x1a,0xd0,0x72,0x34,0xca); - &LL(0xae,0xae,0x82,0xae,0x19,0x2c,0x41,0xb7); - &LL(0xb4,0xb4,0xea,0xb4,0xc9,0x5e,0x75,0x7d); - &LL(0x54,0x54,0x4d,0x54,0x9a,0x19,0xa8,0xce); - &LL(0x93,0x93,0x76,0x93,0xec,0xe5,0x3b,0x7f); - &LL(0x22,0x22,0x88,0x22,0x0d,0xaa,0x44,0x2f); - &LL(0x64,0x64,0x8d,0x64,0x07,0xe9,0xc8,0x63); - &LL(0xf1,0xf1,0xe3,0xf1,0xdb,0x12,0xff,0x2a); - &LL(0x73,0x73,0xd1,0x73,0xbf,0xa2,0xe6,0xcc); - &LL(0x12,0x12,0x48,0x12,0x90,0x5a,0x24,0x82); - &LL(0x40,0x40,0x1d,0x40,0x3a,0x5d,0x80,0x7a); - &LL(0x08,0x08,0x20,0x08,0x40,0x28,0x10,0x48); - &LL(0xc3,0xc3,0x2b,0xc3,0x56,0xe8,0x9b,0x95); - &LL(0xec,0xec,0x97,0xec,0x33,0x7b,0xc5,0xdf); - &LL(0xdb,0xdb,0x4b,0xdb,0x96,0x90,0xab,0x4d); - &LL(0xa1,0xa1,0xbe,0xa1,0x61,0x1f,0x5f,0xc0); - &LL(0x8d,0x8d,0x0e,0x8d,0x1c,0x83,0x07,0x91); - &LL(0x3d,0x3d,0xf4,0x3d,0xf5,0xc9,0x7a,0xc8); - &LL(0x97,0x97,0x66,0x97,0xcc,0xf1,0x33,0x5b); - &LL(0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00); - &LL(0xcf,0xcf,0x1b,0xcf,0x36,0xd4,0x83,0xf9); - &LL(0x2b,0x2b,0xac,0x2b,0x45,0x87,0x56,0x6e); - &LL(0x76,0x76,0xc5,0x76,0x97,0xb3,0xec,0xe1); - &LL(0x82,0x82,0x32,0x82,0x64,0xb0,0x19,0xe6); - &LL(0xd6,0xd6,0x7f,0xd6,0xfe,0xa9,0xb1,0x28); - &LL(0x1b,0x1b,0x6c,0x1b,0xd8,0x77,0x36,0xc3); - &LL(0xb5,0xb5,0xee,0xb5,0xc1,0x5b,0x77,0x74); - &LL(0xaf,0xaf,0x86,0xaf,0x11,0x29,0x43,0xbe); - &LL(0x6a,0x6a,0xb5,0x6a,0x77,0xdf,0xd4,0x1d); - &LL(0x50,0x50,0x5d,0x50,0xba,0x0d,0xa0,0xea); - &LL(0x45,0x45,0x09,0x45,0x12,0x4c,0x8a,0x57); - &LL(0xf3,0xf3,0xeb,0xf3,0xcb,0x18,0xfb,0x38); - &LL(0x30,0x30,0xc0,0x30,0x9d,0xf0,0x60,0xad); - &LL(0xef,0xef,0x9b,0xef,0x2b,0x74,0xc3,0xc4); - &LL(0x3f,0x3f,0xfc,0x3f,0xe5,0xc3,0x7e,0xda); - &LL(0x55,0x55,0x49,0x55,0x92,0x1c,0xaa,0xc7); - &LL(0xa2,0xa2,0xb2,0xa2,0x79,0x10,0x59,0xdb); - &LL(0xea,0xea,0x8f,0xea,0x03,0x65,0xc9,0xe9); - &LL(0x65,0x65,0x89,0x65,0x0f,0xec,0xca,0x6a); - &LL(0xba,0xba,0xd2,0xba,0xb9,0x68,0x69,0x03); - &LL(0x2f,0x2f,0xbc,0x2f,0x65,0x93,0x5e,0x4a); - &LL(0xc0,0xc0,0x27,0xc0,0x4e,0xe7,0x9d,0x8e); - &LL(0xde,0xde,0x5f,0xde,0xbe,0x81,0xa1,0x60); - &LL(0x1c,0x1c,0x70,0x1c,0xe0,0x6c,0x38,0xfc); - &LL(0xfd,0xfd,0xd3,0xfd,0xbb,0x2e,0xe7,0x46); - &LL(0x4d,0x4d,0x29,0x4d,0x52,0x64,0x9a,0x1f); - &LL(0x92,0x92,0x72,0x92,0xe4,0xe0,0x39,0x76); - &LL(0x75,0x75,0xc9,0x75,0x8f,0xbc,0xea,0xfa); - &LL(0x06,0x06,0x18,0x06,0x30,0x1e,0x0c,0x36); - &LL(0x8a,0x8a,0x12,0x8a,0x24,0x98,0x09,0xae); - &LL(0xb2,0xb2,0xf2,0xb2,0xf9,0x40,0x79,0x4b); - &LL(0xe6,0xe6,0xbf,0xe6,0x63,0x59,0xd1,0x85); - &LL(0x0e,0x0e,0x38,0x0e,0x70,0x36,0x1c,0x7e); - &LL(0x1f,0x1f,0x7c,0x1f,0xf8,0x63,0x3e,0xe7); - &LL(0x62,0x62,0x95,0x62,0x37,0xf7,0xc4,0x55); - &LL(0xd4,0xd4,0x77,0xd4,0xee,0xa3,0xb5,0x3a); - &LL(0xa8,0xa8,0x9a,0xa8,0x29,0x32,0x4d,0x81); - &LL(0x96,0x96,0x62,0x96,0xc4,0xf4,0x31,0x52); - &LL(0xf9,0xf9,0xc3,0xf9,0x9b,0x3a,0xef,0x62); - &LL(0xc5,0xc5,0x33,0xc5,0x66,0xf6,0x97,0xa3); - &LL(0x25,0x25,0x94,0x25,0x35,0xb1,0x4a,0x10); - &LL(0x59,0x59,0x79,0x59,0xf2,0x20,0xb2,0xab); - &LL(0x84,0x84,0x2a,0x84,0x54,0xae,0x15,0xd0); - &LL(0x72,0x72,0xd5,0x72,0xb7,0xa7,0xe4,0xc5); - &LL(0x39,0x39,0xe4,0x39,0xd5,0xdd,0x72,0xec); - &LL(0x4c,0x4c,0x2d,0x4c,0x5a,0x61,0x98,0x16); - &LL(0x5e,0x5e,0x65,0x5e,0xca,0x3b,0xbc,0x94); - &LL(0x78,0x78,0xfd,0x78,0xe7,0x85,0xf0,0x9f); - &LL(0x38,0x38,0xe0,0x38,0xdd,0xd8,0x70,0xe5); - &LL(0x8c,0x8c,0x0a,0x8c,0x14,0x86,0x05,0x98); - &LL(0xd1,0xd1,0x63,0xd1,0xc6,0xb2,0xbf,0x17); - &LL(0xa5,0xa5,0xae,0xa5,0x41,0x0b,0x57,0xe4); - &LL(0xe2,0xe2,0xaf,0xe2,0x43,0x4d,0xd9,0xa1); - &LL(0x61,0x61,0x99,0x61,0x2f,0xf8,0xc2,0x4e); - &LL(0xb3,0xb3,0xf6,0xb3,0xf1,0x45,0x7b,0x42); - &LL(0x21,0x21,0x84,0x21,0x15,0xa5,0x42,0x34); - &LL(0x9c,0x9c,0x4a,0x9c,0x94,0xd6,0x25,0x08); - &LL(0x1e,0x1e,0x78,0x1e,0xf0,0x66,0x3c,0xee); - &LL(0x43,0x43,0x11,0x43,0x22,0x52,0x86,0x61); - &LL(0xc7,0xc7,0x3b,0xc7,0x76,0xfc,0x93,0xb1); - &LL(0xfc,0xfc,0xd7,0xfc,0xb3,0x2b,0xe5,0x4f); - &LL(0x04,0x04,0x10,0x04,0x20,0x14,0x08,0x24); - &LL(0x51,0x51,0x59,0x51,0xb2,0x08,0xa2,0xe3); - &LL(0x99,0x99,0x5e,0x99,0xbc,0xc7,0x2f,0x25); - &LL(0x6d,0x6d,0xa9,0x6d,0x4f,0xc4,0xda,0x22); - &LL(0x0d,0x0d,0x34,0x0d,0x68,0x39,0x1a,0x65); - &LL(0xfa,0xfa,0xcf,0xfa,0x83,0x35,0xe9,0x79); - &LL(0xdf,0xdf,0x5b,0xdf,0xb6,0x84,0xa3,0x69); - &LL(0x7e,0x7e,0xe5,0x7e,0xd7,0x9b,0xfc,0xa9); - &LL(0x24,0x24,0x90,0x24,0x3d,0xb4,0x48,0x19); - &LL(0x3b,0x3b,0xec,0x3b,0xc5,0xd7,0x76,0xfe); - &LL(0xab,0xab,0x96,0xab,0x31,0x3d,0x4b,0x9a); - &LL(0xce,0xce,0x1f,0xce,0x3e,0xd1,0x81,0xf0); - &LL(0x11,0x11,0x44,0x11,0x88,0x55,0x22,0x99); - &LL(0x8f,0x8f,0x06,0x8f,0x0c,0x89,0x03,0x83); - &LL(0x4e,0x4e,0x25,0x4e,0x4a,0x6b,0x9c,0x04); - &LL(0xb7,0xb7,0xe6,0xb7,0xd1,0x51,0x73,0x66); - &LL(0xeb,0xeb,0x8b,0xeb,0x0b,0x60,0xcb,0xe0); - &LL(0x3c,0x3c,0xf0,0x3c,0xfd,0xcc,0x78,0xc1); - &LL(0x81,0x81,0x3e,0x81,0x7c,0xbf,0x1f,0xfd); - &LL(0x94,0x94,0x6a,0x94,0xd4,0xfe,0x35,0x40); - &LL(0xf7,0xf7,0xfb,0xf7,0xeb,0x0c,0xf3,0x1c); - &LL(0xb9,0xb9,0xde,0xb9,0xa1,0x67,0x6f,0x18); - &LL(0x13,0x13,0x4c,0x13,0x98,0x5f,0x26,0x8b); - &LL(0x2c,0x2c,0xb0,0x2c,0x7d,0x9c,0x58,0x51); - &LL(0xd3,0xd3,0x6b,0xd3,0xd6,0xb8,0xbb,0x05); - &LL(0xe7,0xe7,0xbb,0xe7,0x6b,0x5c,0xd3,0x8c); - &LL(0x6e,0x6e,0xa5,0x6e,0x57,0xcb,0xdc,0x39); - &LL(0xc4,0xc4,0x37,0xc4,0x6e,0xf3,0x95,0xaa); - &LL(0x03,0x03,0x0c,0x03,0x18,0x0f,0x06,0x1b); - &LL(0x56,0x56,0x45,0x56,0x8a,0x13,0xac,0xdc); - &LL(0x44,0x44,0x0d,0x44,0x1a,0x49,0x88,0x5e); - &LL(0x7f,0x7f,0xe1,0x7f,0xdf,0x9e,0xfe,0xa0); - &LL(0xa9,0xa9,0x9e,0xa9,0x21,0x37,0x4f,0x88); - &LL(0x2a,0x2a,0xa8,0x2a,0x4d,0x82,0x54,0x67); - &LL(0xbb,0xbb,0xd6,0xbb,0xb1,0x6d,0x6b,0x0a); - &LL(0xc1,0xc1,0x23,0xc1,0x46,0xe2,0x9f,0x87); - &LL(0x53,0x53,0x51,0x53,0xa2,0x02,0xa6,0xf1); - &LL(0xdc,0xdc,0x57,0xdc,0xae,0x8b,0xa5,0x72); - &LL(0x0b,0x0b,0x2c,0x0b,0x58,0x27,0x16,0x53); - &LL(0x9d,0x9d,0x4e,0x9d,0x9c,0xd3,0x27,0x01); - &LL(0x6c,0x6c,0xad,0x6c,0x47,0xc1,0xd8,0x2b); - &LL(0x31,0x31,0xc4,0x31,0x95,0xf5,0x62,0xa4); - &LL(0x74,0x74,0xcd,0x74,0x87,0xb9,0xe8,0xf3); - &LL(0xf6,0xf6,0xff,0xf6,0xe3,0x09,0xf1,0x15); - &LL(0x46,0x46,0x05,0x46,0x0a,0x43,0x8c,0x4c); - &LL(0xac,0xac,0x8a,0xac,0x09,0x26,0x45,0xa5); - &LL(0x89,0x89,0x1e,0x89,0x3c,0x97,0x0f,0xb5); - &LL(0x14,0x14,0x50,0x14,0xa0,0x44,0x28,0xb4); - &LL(0xe1,0xe1,0xa3,0xe1,0x5b,0x42,0xdf,0xba); - &LL(0x16,0x16,0x58,0x16,0xb0,0x4e,0x2c,0xa6); - &LL(0x3a,0x3a,0xe8,0x3a,0xcd,0xd2,0x74,0xf7); - &LL(0x69,0x69,0xb9,0x69,0x6f,0xd0,0xd2,0x06); - &LL(0x09,0x09,0x24,0x09,0x48,0x2d,0x12,0x41); - &LL(0x70,0x70,0xdd,0x70,0xa7,0xad,0xe0,0xd7); - &LL(0xb6,0xb6,0xe2,0xb6,0xd9,0x54,0x71,0x6f); - &LL(0xd0,0xd0,0x67,0xd0,0xce,0xb7,0xbd,0x1e); - &LL(0xed,0xed,0x93,0xed,0x3b,0x7e,0xc7,0xd6); - &LL(0xcc,0xcc,0x17,0xcc,0x2e,0xdb,0x85,0xe2); - &LL(0x42,0x42,0x15,0x42,0x2a,0x57,0x84,0x68); - &LL(0x98,0x98,0x5a,0x98,0xb4,0xc2,0x2d,0x2c); - &LL(0xa4,0xa4,0xaa,0xa4,0x49,0x0e,0x55,0xed); - &LL(0x28,0x28,0xa0,0x28,0x5d,0x88,0x50,0x75); - &LL(0x5c,0x5c,0x6d,0x5c,0xda,0x31,0xb8,0x86); - &LL(0xf8,0xf8,0xc7,0xf8,0x93,0x3f,0xed,0x6b); - &LL(0x86,0x86,0x22,0x86,0x44,0xa4,0x11,0xc2); - - &L(0x18,0x23,0xc6,0xe8,0x87,0xb8,0x01,0x4f); # rc[ROUNDS] - &L(0x36,0xa6,0xd2,0xf5,0x79,0x6f,0x91,0x52); - &L(0x60,0xbc,0x9b,0x8e,0xa3,0x0c,0x7b,0x35); - &L(0x1d,0xe0,0xd7,0xc2,0x2e,0x4b,0xfe,0x57); - &L(0x15,0x77,0x37,0xe5,0x9f,0xf0,0x4a,0xda); - &L(0x58,0xc9,0x29,0x0a,0xb1,0xa0,0x6b,0x85); - &L(0xbd,0x5d,0x10,0xf4,0xcb,0x3e,0x05,0x67); - &L(0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8); - &L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e); - &L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33); - -&function_end_B("whirlpool_block_mmx"); -&asm_finish(); diff --git a/drivers/builtin_openssl2/crypto/whrlpool/asm/wp-x86_64.pl b/drivers/builtin_openssl2/crypto/whrlpool/asm/wp-x86_64.pl deleted file mode 100644 index 24b2ff60c3..0000000000 --- a/drivers/builtin_openssl2/crypto/whrlpool/asm/wp-x86_64.pl +++ /dev/null @@ -1,590 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. Rights for redistribution and usage in source and binary -# forms are granted according to the OpenSSL license. -# ==================================================================== -# -# whirlpool_block for x86_64. -# -# 2500 cycles per 64-byte input block on AMD64, which is *identical* -# to 32-bit MMX version executed on same CPU. So why did I bother? -# Well, it's faster than gcc 3.3.2 generated code by over 50%, and -# over 80% faster than PathScale 1.4, an "ambitious" commercial -# compiler. Furthermore it surpasses gcc 3.4.3 by 170% and Sun Studio -# 10 - by 360%[!]... What is it with x86_64 compilers? It's not the -# first example when they fail to generate more optimal code, when -# I believe they had *all* chances to... -# -# Note that register and stack frame layout are virtually identical -# to 32-bit MMX version, except that %r8-15 are used instead of -# %mm0-8. You can even notice that K[i] and S[i] are loaded to -# %eax:%ebx as pair of 32-bit values and not as single 64-bit one. -# This is done in order to avoid 64-bit shift penalties on Intel -# EM64T core. Speaking of which! I bet it's possible to improve -# Opteron performance by compressing the table to 2KB and replacing -# unaligned references with complementary rotations [which would -# incidentally replace lea instructions], but it would definitely -# just "kill" EM64T, because it has only 1 shifter/rotator [against -# 3 on Opteron] and which is *unacceptably* slow with 64-bit -# operand. - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -sub L() { $code.=".byte ".join(',',@_)."\n"; } -sub LL(){ $code.=".byte ".join(',',@_).",".join(',',@_)."\n"; } - -@mm=("%r8","%r9","%r10","%r11","%r12","%r13","%r14","%r15"); - -$func="whirlpool_block"; -$table=".Ltable"; - -$code=<<___; -.text - -.globl $func -.type $func,\@function,3 -.align 16 -$func: - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - - mov %rsp,%r11 - sub \$128+40,%rsp - and \$-64,%rsp - - lea 128(%rsp),%r10 - mov %rdi,0(%r10) # save parameter block - mov %rsi,8(%r10) - mov %rdx,16(%r10) - mov %r11,32(%r10) # saved stack pointer -.Lprologue: - - mov %r10,%rbx - lea $table(%rip),%rbp - - xor %rcx,%rcx - xor %rdx,%rdx -___ -for($i=0;$i<8;$i++) { $code.="mov $i*8(%rdi),@mm[$i]\n"; } # L=H -$code.=".Louterloop:\n"; -for($i=0;$i<8;$i++) { $code.="mov @mm[$i],$i*8(%rsp)\n"; } # K=L -for($i=0;$i<8;$i++) { $code.="xor $i*8(%rsi),@mm[$i]\n"; } # L^=inp -for($i=0;$i<8;$i++) { $code.="mov @mm[$i],64+$i*8(%rsp)\n"; } # S=L -$code.=<<___; - xor %rsi,%rsi - mov %rsi,24(%rbx) # zero round counter -.align 16 -.Lround: - mov 4096(%rbp,%rsi,8),@mm[0] # rc[r] - mov 0(%rsp),%eax - mov 4(%rsp),%ebx -___ -for($i=0;$i<8;$i++) { - my $func = ($i==0)? "mov" : "xor"; - $code.=<<___; - mov %al,%cl - mov %ah,%dl - lea (%rcx,%rcx),%rsi - lea (%rdx,%rdx),%rdi - shr \$16,%eax - xor 0(%rbp,%rsi,8),@mm[0] - $func 7(%rbp,%rdi,8),@mm[1] - mov %al,%cl - mov %ah,%dl - mov $i*8+8(%rsp),%eax # ($i+1)*8 - lea (%rcx,%rcx),%rsi - lea (%rdx,%rdx),%rdi - $func 6(%rbp,%rsi,8),@mm[2] - $func 5(%rbp,%rdi,8),@mm[3] - mov %bl,%cl - mov %bh,%dl - lea (%rcx,%rcx),%rsi - lea (%rdx,%rdx),%rdi - shr \$16,%ebx - $func 4(%rbp,%rsi,8),@mm[4] - $func 3(%rbp,%rdi,8),@mm[5] - mov %bl,%cl - mov %bh,%dl - mov $i*8+8+4(%rsp),%ebx # ($i+1)*8+4 - lea (%rcx,%rcx),%rsi - lea (%rdx,%rdx),%rdi - $func 2(%rbp,%rsi,8),@mm[6] - $func 1(%rbp,%rdi,8),@mm[7] -___ - push(@mm,shift(@mm)); -} -for($i=0;$i<8;$i++) { $code.="mov @mm[$i],$i*8(%rsp)\n"; } # K=L -for($i=0;$i<8;$i++) { - $code.=<<___; - mov %al,%cl - mov %ah,%dl - lea (%rcx,%rcx),%rsi - lea (%rdx,%rdx),%rdi - shr \$16,%eax - xor 0(%rbp,%rsi,8),@mm[0] - xor 7(%rbp,%rdi,8),@mm[1] - mov %al,%cl - mov %ah,%dl - `"mov 64+$i*8+8(%rsp),%eax" if($i<7);` # 64+($i+1)*8 - lea (%rcx,%rcx),%rsi - lea (%rdx,%rdx),%rdi - xor 6(%rbp,%rsi,8),@mm[2] - xor 5(%rbp,%rdi,8),@mm[3] - mov %bl,%cl - mov %bh,%dl - lea (%rcx,%rcx),%rsi - lea (%rdx,%rdx),%rdi - shr \$16,%ebx - xor 4(%rbp,%rsi,8),@mm[4] - xor 3(%rbp,%rdi,8),@mm[5] - mov %bl,%cl - mov %bh,%dl - `"mov 64+$i*8+8+4(%rsp),%ebx" if($i<7);` # 64+($i+1)*8+4 - lea (%rcx,%rcx),%rsi - lea (%rdx,%rdx),%rdi - xor 2(%rbp,%rsi,8),@mm[6] - xor 1(%rbp,%rdi,8),@mm[7] -___ - push(@mm,shift(@mm)); -} -$code.=<<___; - lea 128(%rsp),%rbx - mov 24(%rbx),%rsi # pull round counter - add \$1,%rsi - cmp \$10,%rsi - je .Lroundsdone - - mov %rsi,24(%rbx) # update round counter -___ -for($i=0;$i<8;$i++) { $code.="mov @mm[$i],64+$i*8(%rsp)\n"; } # S=L -$code.=<<___; - jmp .Lround -.align 16 -.Lroundsdone: - mov 0(%rbx),%rdi # reload argument block - mov 8(%rbx),%rsi - mov 16(%rbx),%rax -___ -for($i=0;$i<8;$i++) { $code.="xor $i*8(%rsi),@mm[$i]\n"; } # L^=inp -for($i=0;$i<8;$i++) { $code.="xor $i*8(%rdi),@mm[$i]\n"; } # L^=H -for($i=0;$i<8;$i++) { $code.="mov @mm[$i],$i*8(%rdi)\n"; } # H=L -$code.=<<___; - lea 64(%rsi),%rsi # inp+=64 - sub \$1,%rax # num-- - jz .Lalldone - mov %rsi,8(%rbx) # update parameter block - mov %rax,16(%rbx) - jmp .Louterloop -.Lalldone: - mov 32(%rbx),%rsi # restore saved pointer - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lepilogue: - ret -.size $func,.-$func - -.align 64 -.type $table,\@object -$table: -___ - &LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8); - &LL(0x23,0x23,0x8c,0x23,0x05,0xaf,0x46,0x26); - &LL(0xc6,0xc6,0x3f,0xc6,0x7e,0xf9,0x91,0xb8); - &LL(0xe8,0xe8,0x87,0xe8,0x13,0x6f,0xcd,0xfb); - &LL(0x87,0x87,0x26,0x87,0x4c,0xa1,0x13,0xcb); - &LL(0xb8,0xb8,0xda,0xb8,0xa9,0x62,0x6d,0x11); - &LL(0x01,0x01,0x04,0x01,0x08,0x05,0x02,0x09); - &LL(0x4f,0x4f,0x21,0x4f,0x42,0x6e,0x9e,0x0d); - &LL(0x36,0x36,0xd8,0x36,0xad,0xee,0x6c,0x9b); - &LL(0xa6,0xa6,0xa2,0xa6,0x59,0x04,0x51,0xff); - &LL(0xd2,0xd2,0x6f,0xd2,0xde,0xbd,0xb9,0x0c); - &LL(0xf5,0xf5,0xf3,0xf5,0xfb,0x06,0xf7,0x0e); - &LL(0x79,0x79,0xf9,0x79,0xef,0x80,0xf2,0x96); - &LL(0x6f,0x6f,0xa1,0x6f,0x5f,0xce,0xde,0x30); - &LL(0x91,0x91,0x7e,0x91,0xfc,0xef,0x3f,0x6d); - &LL(0x52,0x52,0x55,0x52,0xaa,0x07,0xa4,0xf8); - &LL(0x60,0x60,0x9d,0x60,0x27,0xfd,0xc0,0x47); - &LL(0xbc,0xbc,0xca,0xbc,0x89,0x76,0x65,0x35); - &LL(0x9b,0x9b,0x56,0x9b,0xac,0xcd,0x2b,0x37); - &LL(0x8e,0x8e,0x02,0x8e,0x04,0x8c,0x01,0x8a); - &LL(0xa3,0xa3,0xb6,0xa3,0x71,0x15,0x5b,0xd2); - &LL(0x0c,0x0c,0x30,0x0c,0x60,0x3c,0x18,0x6c); - &LL(0x7b,0x7b,0xf1,0x7b,0xff,0x8a,0xf6,0x84); - &LL(0x35,0x35,0xd4,0x35,0xb5,0xe1,0x6a,0x80); - &LL(0x1d,0x1d,0x74,0x1d,0xe8,0x69,0x3a,0xf5); - &LL(0xe0,0xe0,0xa7,0xe0,0x53,0x47,0xdd,0xb3); - &LL(0xd7,0xd7,0x7b,0xd7,0xf6,0xac,0xb3,0x21); - &LL(0xc2,0xc2,0x2f,0xc2,0x5e,0xed,0x99,0x9c); - &LL(0x2e,0x2e,0xb8,0x2e,0x6d,0x96,0x5c,0x43); - &LL(0x4b,0x4b,0x31,0x4b,0x62,0x7a,0x96,0x29); - &LL(0xfe,0xfe,0xdf,0xfe,0xa3,0x21,0xe1,0x5d); - &LL(0x57,0x57,0x41,0x57,0x82,0x16,0xae,0xd5); - &LL(0x15,0x15,0x54,0x15,0xa8,0x41,0x2a,0xbd); - &LL(0x77,0x77,0xc1,0x77,0x9f,0xb6,0xee,0xe8); - &LL(0x37,0x37,0xdc,0x37,0xa5,0xeb,0x6e,0x92); - &LL(0xe5,0xe5,0xb3,0xe5,0x7b,0x56,0xd7,0x9e); - &LL(0x9f,0x9f,0x46,0x9f,0x8c,0xd9,0x23,0x13); - &LL(0xf0,0xf0,0xe7,0xf0,0xd3,0x17,0xfd,0x23); - &LL(0x4a,0x4a,0x35,0x4a,0x6a,0x7f,0x94,0x20); - &LL(0xda,0xda,0x4f,0xda,0x9e,0x95,0xa9,0x44); - &LL(0x58,0x58,0x7d,0x58,0xfa,0x25,0xb0,0xa2); - &LL(0xc9,0xc9,0x03,0xc9,0x06,0xca,0x8f,0xcf); - &LL(0x29,0x29,0xa4,0x29,0x55,0x8d,0x52,0x7c); - &LL(0x0a,0x0a,0x28,0x0a,0x50,0x22,0x14,0x5a); - &LL(0xb1,0xb1,0xfe,0xb1,0xe1,0x4f,0x7f,0x50); - &LL(0xa0,0xa0,0xba,0xa0,0x69,0x1a,0x5d,0xc9); - &LL(0x6b,0x6b,0xb1,0x6b,0x7f,0xda,0xd6,0x14); - &LL(0x85,0x85,0x2e,0x85,0x5c,0xab,0x17,0xd9); - &LL(0xbd,0xbd,0xce,0xbd,0x81,0x73,0x67,0x3c); - &LL(0x5d,0x5d,0x69,0x5d,0xd2,0x34,0xba,0x8f); - &LL(0x10,0x10,0x40,0x10,0x80,0x50,0x20,0x90); - &LL(0xf4,0xf4,0xf7,0xf4,0xf3,0x03,0xf5,0x07); - &LL(0xcb,0xcb,0x0b,0xcb,0x16,0xc0,0x8b,0xdd); - &LL(0x3e,0x3e,0xf8,0x3e,0xed,0xc6,0x7c,0xd3); - &LL(0x05,0x05,0x14,0x05,0x28,0x11,0x0a,0x2d); - &LL(0x67,0x67,0x81,0x67,0x1f,0xe6,0xce,0x78); - &LL(0xe4,0xe4,0xb7,0xe4,0x73,0x53,0xd5,0x97); - &LL(0x27,0x27,0x9c,0x27,0x25,0xbb,0x4e,0x02); - &LL(0x41,0x41,0x19,0x41,0x32,0x58,0x82,0x73); - &LL(0x8b,0x8b,0x16,0x8b,0x2c,0x9d,0x0b,0xa7); - &LL(0xa7,0xa7,0xa6,0xa7,0x51,0x01,0x53,0xf6); - &LL(0x7d,0x7d,0xe9,0x7d,0xcf,0x94,0xfa,0xb2); - &LL(0x95,0x95,0x6e,0x95,0xdc,0xfb,0x37,0x49); - &LL(0xd8,0xd8,0x47,0xd8,0x8e,0x9f,0xad,0x56); - &LL(0xfb,0xfb,0xcb,0xfb,0x8b,0x30,0xeb,0x70); - &LL(0xee,0xee,0x9f,0xee,0x23,0x71,0xc1,0xcd); - &LL(0x7c,0x7c,0xed,0x7c,0xc7,0x91,0xf8,0xbb); - &LL(0x66,0x66,0x85,0x66,0x17,0xe3,0xcc,0x71); - &LL(0xdd,0xdd,0x53,0xdd,0xa6,0x8e,0xa7,0x7b); - &LL(0x17,0x17,0x5c,0x17,0xb8,0x4b,0x2e,0xaf); - &LL(0x47,0x47,0x01,0x47,0x02,0x46,0x8e,0x45); - &LL(0x9e,0x9e,0x42,0x9e,0x84,0xdc,0x21,0x1a); - &LL(0xca,0xca,0x0f,0xca,0x1e,0xc5,0x89,0xd4); - &LL(0x2d,0x2d,0xb4,0x2d,0x75,0x99,0x5a,0x58); - &LL(0xbf,0xbf,0xc6,0xbf,0x91,0x79,0x63,0x2e); - &LL(0x07,0x07,0x1c,0x07,0x38,0x1b,0x0e,0x3f); - &LL(0xad,0xad,0x8e,0xad,0x01,0x23,0x47,0xac); - &LL(0x5a,0x5a,0x75,0x5a,0xea,0x2f,0xb4,0xb0); - &LL(0x83,0x83,0x36,0x83,0x6c,0xb5,0x1b,0xef); - &LL(0x33,0x33,0xcc,0x33,0x85,0xff,0x66,0xb6); - &LL(0x63,0x63,0x91,0x63,0x3f,0xf2,0xc6,0x5c); - &LL(0x02,0x02,0x08,0x02,0x10,0x0a,0x04,0x12); - &LL(0xaa,0xaa,0x92,0xaa,0x39,0x38,0x49,0x93); - &LL(0x71,0x71,0xd9,0x71,0xaf,0xa8,0xe2,0xde); - &LL(0xc8,0xc8,0x07,0xc8,0x0e,0xcf,0x8d,0xc6); - &LL(0x19,0x19,0x64,0x19,0xc8,0x7d,0x32,0xd1); - &LL(0x49,0x49,0x39,0x49,0x72,0x70,0x92,0x3b); - &LL(0xd9,0xd9,0x43,0xd9,0x86,0x9a,0xaf,0x5f); - &LL(0xf2,0xf2,0xef,0xf2,0xc3,0x1d,0xf9,0x31); - &LL(0xe3,0xe3,0xab,0xe3,0x4b,0x48,0xdb,0xa8); - &LL(0x5b,0x5b,0x71,0x5b,0xe2,0x2a,0xb6,0xb9); - &LL(0x88,0x88,0x1a,0x88,0x34,0x92,0x0d,0xbc); - &LL(0x9a,0x9a,0x52,0x9a,0xa4,0xc8,0x29,0x3e); - &LL(0x26,0x26,0x98,0x26,0x2d,0xbe,0x4c,0x0b); - &LL(0x32,0x32,0xc8,0x32,0x8d,0xfa,0x64,0xbf); - &LL(0xb0,0xb0,0xfa,0xb0,0xe9,0x4a,0x7d,0x59); - &LL(0xe9,0xe9,0x83,0xe9,0x1b,0x6a,0xcf,0xf2); - &LL(0x0f,0x0f,0x3c,0x0f,0x78,0x33,0x1e,0x77); - &LL(0xd5,0xd5,0x73,0xd5,0xe6,0xa6,0xb7,0x33); - &LL(0x80,0x80,0x3a,0x80,0x74,0xba,0x1d,0xf4); - &LL(0xbe,0xbe,0xc2,0xbe,0x99,0x7c,0x61,0x27); - &LL(0xcd,0xcd,0x13,0xcd,0x26,0xde,0x87,0xeb); - &LL(0x34,0x34,0xd0,0x34,0xbd,0xe4,0x68,0x89); - &LL(0x48,0x48,0x3d,0x48,0x7a,0x75,0x90,0x32); - &LL(0xff,0xff,0xdb,0xff,0xab,0x24,0xe3,0x54); - &LL(0x7a,0x7a,0xf5,0x7a,0xf7,0x8f,0xf4,0x8d); - &LL(0x90,0x90,0x7a,0x90,0xf4,0xea,0x3d,0x64); - &LL(0x5f,0x5f,0x61,0x5f,0xc2,0x3e,0xbe,0x9d); - &LL(0x20,0x20,0x80,0x20,0x1d,0xa0,0x40,0x3d); - &LL(0x68,0x68,0xbd,0x68,0x67,0xd5,0xd0,0x0f); - &LL(0x1a,0x1a,0x68,0x1a,0xd0,0x72,0x34,0xca); - &LL(0xae,0xae,0x82,0xae,0x19,0x2c,0x41,0xb7); - &LL(0xb4,0xb4,0xea,0xb4,0xc9,0x5e,0x75,0x7d); - &LL(0x54,0x54,0x4d,0x54,0x9a,0x19,0xa8,0xce); - &LL(0x93,0x93,0x76,0x93,0xec,0xe5,0x3b,0x7f); - &LL(0x22,0x22,0x88,0x22,0x0d,0xaa,0x44,0x2f); - &LL(0x64,0x64,0x8d,0x64,0x07,0xe9,0xc8,0x63); - &LL(0xf1,0xf1,0xe3,0xf1,0xdb,0x12,0xff,0x2a); - &LL(0x73,0x73,0xd1,0x73,0xbf,0xa2,0xe6,0xcc); - &LL(0x12,0x12,0x48,0x12,0x90,0x5a,0x24,0x82); - &LL(0x40,0x40,0x1d,0x40,0x3a,0x5d,0x80,0x7a); - &LL(0x08,0x08,0x20,0x08,0x40,0x28,0x10,0x48); - &LL(0xc3,0xc3,0x2b,0xc3,0x56,0xe8,0x9b,0x95); - &LL(0xec,0xec,0x97,0xec,0x33,0x7b,0xc5,0xdf); - &LL(0xdb,0xdb,0x4b,0xdb,0x96,0x90,0xab,0x4d); - &LL(0xa1,0xa1,0xbe,0xa1,0x61,0x1f,0x5f,0xc0); - &LL(0x8d,0x8d,0x0e,0x8d,0x1c,0x83,0x07,0x91); - &LL(0x3d,0x3d,0xf4,0x3d,0xf5,0xc9,0x7a,0xc8); - &LL(0x97,0x97,0x66,0x97,0xcc,0xf1,0x33,0x5b); - &LL(0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00); - &LL(0xcf,0xcf,0x1b,0xcf,0x36,0xd4,0x83,0xf9); - &LL(0x2b,0x2b,0xac,0x2b,0x45,0x87,0x56,0x6e); - &LL(0x76,0x76,0xc5,0x76,0x97,0xb3,0xec,0xe1); - &LL(0x82,0x82,0x32,0x82,0x64,0xb0,0x19,0xe6); - &LL(0xd6,0xd6,0x7f,0xd6,0xfe,0xa9,0xb1,0x28); - &LL(0x1b,0x1b,0x6c,0x1b,0xd8,0x77,0x36,0xc3); - &LL(0xb5,0xb5,0xee,0xb5,0xc1,0x5b,0x77,0x74); - &LL(0xaf,0xaf,0x86,0xaf,0x11,0x29,0x43,0xbe); - &LL(0x6a,0x6a,0xb5,0x6a,0x77,0xdf,0xd4,0x1d); - &LL(0x50,0x50,0x5d,0x50,0xba,0x0d,0xa0,0xea); - &LL(0x45,0x45,0x09,0x45,0x12,0x4c,0x8a,0x57); - &LL(0xf3,0xf3,0xeb,0xf3,0xcb,0x18,0xfb,0x38); - &LL(0x30,0x30,0xc0,0x30,0x9d,0xf0,0x60,0xad); - &LL(0xef,0xef,0x9b,0xef,0x2b,0x74,0xc3,0xc4); - &LL(0x3f,0x3f,0xfc,0x3f,0xe5,0xc3,0x7e,0xda); - &LL(0x55,0x55,0x49,0x55,0x92,0x1c,0xaa,0xc7); - &LL(0xa2,0xa2,0xb2,0xa2,0x79,0x10,0x59,0xdb); - &LL(0xea,0xea,0x8f,0xea,0x03,0x65,0xc9,0xe9); - &LL(0x65,0x65,0x89,0x65,0x0f,0xec,0xca,0x6a); - &LL(0xba,0xba,0xd2,0xba,0xb9,0x68,0x69,0x03); - &LL(0x2f,0x2f,0xbc,0x2f,0x65,0x93,0x5e,0x4a); - &LL(0xc0,0xc0,0x27,0xc0,0x4e,0xe7,0x9d,0x8e); - &LL(0xde,0xde,0x5f,0xde,0xbe,0x81,0xa1,0x60); - &LL(0x1c,0x1c,0x70,0x1c,0xe0,0x6c,0x38,0xfc); - &LL(0xfd,0xfd,0xd3,0xfd,0xbb,0x2e,0xe7,0x46); - &LL(0x4d,0x4d,0x29,0x4d,0x52,0x64,0x9a,0x1f); - &LL(0x92,0x92,0x72,0x92,0xe4,0xe0,0x39,0x76); - &LL(0x75,0x75,0xc9,0x75,0x8f,0xbc,0xea,0xfa); - &LL(0x06,0x06,0x18,0x06,0x30,0x1e,0x0c,0x36); - &LL(0x8a,0x8a,0x12,0x8a,0x24,0x98,0x09,0xae); - &LL(0xb2,0xb2,0xf2,0xb2,0xf9,0x40,0x79,0x4b); - &LL(0xe6,0xe6,0xbf,0xe6,0x63,0x59,0xd1,0x85); - &LL(0x0e,0x0e,0x38,0x0e,0x70,0x36,0x1c,0x7e); - &LL(0x1f,0x1f,0x7c,0x1f,0xf8,0x63,0x3e,0xe7); - &LL(0x62,0x62,0x95,0x62,0x37,0xf7,0xc4,0x55); - &LL(0xd4,0xd4,0x77,0xd4,0xee,0xa3,0xb5,0x3a); - &LL(0xa8,0xa8,0x9a,0xa8,0x29,0x32,0x4d,0x81); - &LL(0x96,0x96,0x62,0x96,0xc4,0xf4,0x31,0x52); - &LL(0xf9,0xf9,0xc3,0xf9,0x9b,0x3a,0xef,0x62); - &LL(0xc5,0xc5,0x33,0xc5,0x66,0xf6,0x97,0xa3); - &LL(0x25,0x25,0x94,0x25,0x35,0xb1,0x4a,0x10); - &LL(0x59,0x59,0x79,0x59,0xf2,0x20,0xb2,0xab); - &LL(0x84,0x84,0x2a,0x84,0x54,0xae,0x15,0xd0); - &LL(0x72,0x72,0xd5,0x72,0xb7,0xa7,0xe4,0xc5); - &LL(0x39,0x39,0xe4,0x39,0xd5,0xdd,0x72,0xec); - &LL(0x4c,0x4c,0x2d,0x4c,0x5a,0x61,0x98,0x16); - &LL(0x5e,0x5e,0x65,0x5e,0xca,0x3b,0xbc,0x94); - &LL(0x78,0x78,0xfd,0x78,0xe7,0x85,0xf0,0x9f); - &LL(0x38,0x38,0xe0,0x38,0xdd,0xd8,0x70,0xe5); - &LL(0x8c,0x8c,0x0a,0x8c,0x14,0x86,0x05,0x98); - &LL(0xd1,0xd1,0x63,0xd1,0xc6,0xb2,0xbf,0x17); - &LL(0xa5,0xa5,0xae,0xa5,0x41,0x0b,0x57,0xe4); - &LL(0xe2,0xe2,0xaf,0xe2,0x43,0x4d,0xd9,0xa1); - &LL(0x61,0x61,0x99,0x61,0x2f,0xf8,0xc2,0x4e); - &LL(0xb3,0xb3,0xf6,0xb3,0xf1,0x45,0x7b,0x42); - &LL(0x21,0x21,0x84,0x21,0x15,0xa5,0x42,0x34); - &LL(0x9c,0x9c,0x4a,0x9c,0x94,0xd6,0x25,0x08); - &LL(0x1e,0x1e,0x78,0x1e,0xf0,0x66,0x3c,0xee); - &LL(0x43,0x43,0x11,0x43,0x22,0x52,0x86,0x61); - &LL(0xc7,0xc7,0x3b,0xc7,0x76,0xfc,0x93,0xb1); - &LL(0xfc,0xfc,0xd7,0xfc,0xb3,0x2b,0xe5,0x4f); - &LL(0x04,0x04,0x10,0x04,0x20,0x14,0x08,0x24); - &LL(0x51,0x51,0x59,0x51,0xb2,0x08,0xa2,0xe3); - &LL(0x99,0x99,0x5e,0x99,0xbc,0xc7,0x2f,0x25); - &LL(0x6d,0x6d,0xa9,0x6d,0x4f,0xc4,0xda,0x22); - &LL(0x0d,0x0d,0x34,0x0d,0x68,0x39,0x1a,0x65); - &LL(0xfa,0xfa,0xcf,0xfa,0x83,0x35,0xe9,0x79); - &LL(0xdf,0xdf,0x5b,0xdf,0xb6,0x84,0xa3,0x69); - &LL(0x7e,0x7e,0xe5,0x7e,0xd7,0x9b,0xfc,0xa9); - &LL(0x24,0x24,0x90,0x24,0x3d,0xb4,0x48,0x19); - &LL(0x3b,0x3b,0xec,0x3b,0xc5,0xd7,0x76,0xfe); - &LL(0xab,0xab,0x96,0xab,0x31,0x3d,0x4b,0x9a); - &LL(0xce,0xce,0x1f,0xce,0x3e,0xd1,0x81,0xf0); - &LL(0x11,0x11,0x44,0x11,0x88,0x55,0x22,0x99); - &LL(0x8f,0x8f,0x06,0x8f,0x0c,0x89,0x03,0x83); - &LL(0x4e,0x4e,0x25,0x4e,0x4a,0x6b,0x9c,0x04); - &LL(0xb7,0xb7,0xe6,0xb7,0xd1,0x51,0x73,0x66); - &LL(0xeb,0xeb,0x8b,0xeb,0x0b,0x60,0xcb,0xe0); - &LL(0x3c,0x3c,0xf0,0x3c,0xfd,0xcc,0x78,0xc1); - &LL(0x81,0x81,0x3e,0x81,0x7c,0xbf,0x1f,0xfd); - &LL(0x94,0x94,0x6a,0x94,0xd4,0xfe,0x35,0x40); - &LL(0xf7,0xf7,0xfb,0xf7,0xeb,0x0c,0xf3,0x1c); - &LL(0xb9,0xb9,0xde,0xb9,0xa1,0x67,0x6f,0x18); - &LL(0x13,0x13,0x4c,0x13,0x98,0x5f,0x26,0x8b); - &LL(0x2c,0x2c,0xb0,0x2c,0x7d,0x9c,0x58,0x51); - &LL(0xd3,0xd3,0x6b,0xd3,0xd6,0xb8,0xbb,0x05); - &LL(0xe7,0xe7,0xbb,0xe7,0x6b,0x5c,0xd3,0x8c); - &LL(0x6e,0x6e,0xa5,0x6e,0x57,0xcb,0xdc,0x39); - &LL(0xc4,0xc4,0x37,0xc4,0x6e,0xf3,0x95,0xaa); - &LL(0x03,0x03,0x0c,0x03,0x18,0x0f,0x06,0x1b); - &LL(0x56,0x56,0x45,0x56,0x8a,0x13,0xac,0xdc); - &LL(0x44,0x44,0x0d,0x44,0x1a,0x49,0x88,0x5e); - &LL(0x7f,0x7f,0xe1,0x7f,0xdf,0x9e,0xfe,0xa0); - &LL(0xa9,0xa9,0x9e,0xa9,0x21,0x37,0x4f,0x88); - &LL(0x2a,0x2a,0xa8,0x2a,0x4d,0x82,0x54,0x67); - &LL(0xbb,0xbb,0xd6,0xbb,0xb1,0x6d,0x6b,0x0a); - &LL(0xc1,0xc1,0x23,0xc1,0x46,0xe2,0x9f,0x87); - &LL(0x53,0x53,0x51,0x53,0xa2,0x02,0xa6,0xf1); - &LL(0xdc,0xdc,0x57,0xdc,0xae,0x8b,0xa5,0x72); - &LL(0x0b,0x0b,0x2c,0x0b,0x58,0x27,0x16,0x53); - &LL(0x9d,0x9d,0x4e,0x9d,0x9c,0xd3,0x27,0x01); - &LL(0x6c,0x6c,0xad,0x6c,0x47,0xc1,0xd8,0x2b); - &LL(0x31,0x31,0xc4,0x31,0x95,0xf5,0x62,0xa4); - &LL(0x74,0x74,0xcd,0x74,0x87,0xb9,0xe8,0xf3); - &LL(0xf6,0xf6,0xff,0xf6,0xe3,0x09,0xf1,0x15); - &LL(0x46,0x46,0x05,0x46,0x0a,0x43,0x8c,0x4c); - &LL(0xac,0xac,0x8a,0xac,0x09,0x26,0x45,0xa5); - &LL(0x89,0x89,0x1e,0x89,0x3c,0x97,0x0f,0xb5); - &LL(0x14,0x14,0x50,0x14,0xa0,0x44,0x28,0xb4); - &LL(0xe1,0xe1,0xa3,0xe1,0x5b,0x42,0xdf,0xba); - &LL(0x16,0x16,0x58,0x16,0xb0,0x4e,0x2c,0xa6); - &LL(0x3a,0x3a,0xe8,0x3a,0xcd,0xd2,0x74,0xf7); - &LL(0x69,0x69,0xb9,0x69,0x6f,0xd0,0xd2,0x06); - &LL(0x09,0x09,0x24,0x09,0x48,0x2d,0x12,0x41); - &LL(0x70,0x70,0xdd,0x70,0xa7,0xad,0xe0,0xd7); - &LL(0xb6,0xb6,0xe2,0xb6,0xd9,0x54,0x71,0x6f); - &LL(0xd0,0xd0,0x67,0xd0,0xce,0xb7,0xbd,0x1e); - &LL(0xed,0xed,0x93,0xed,0x3b,0x7e,0xc7,0xd6); - &LL(0xcc,0xcc,0x17,0xcc,0x2e,0xdb,0x85,0xe2); - &LL(0x42,0x42,0x15,0x42,0x2a,0x57,0x84,0x68); - &LL(0x98,0x98,0x5a,0x98,0xb4,0xc2,0x2d,0x2c); - &LL(0xa4,0xa4,0xaa,0xa4,0x49,0x0e,0x55,0xed); - &LL(0x28,0x28,0xa0,0x28,0x5d,0x88,0x50,0x75); - &LL(0x5c,0x5c,0x6d,0x5c,0xda,0x31,0xb8,0x86); - &LL(0xf8,0xf8,0xc7,0xf8,0x93,0x3f,0xed,0x6b); - &LL(0x86,0x86,0x22,0x86,0x44,0xa4,0x11,0xc2); - - &L(0x18,0x23,0xc6,0xe8,0x87,0xb8,0x01,0x4f); # rc[ROUNDS] - &L(0x36,0xa6,0xd2,0xf5,0x79,0x6f,0x91,0x52); - &L(0x60,0xbc,0x9b,0x8e,0xa3,0x0c,0x7b,0x35); - &L(0x1d,0xe0,0xd7,0xc2,0x2e,0x4b,0xfe,0x57); - &L(0x15,0x77,0x37,0xe5,0x9f,0xf0,0x4a,0xda); - &L(0x58,0xc9,0x29,0x0a,0xb1,0xa0,0x6b,0x85); - &L(0xbd,0x5d,0x10,0xf4,0xcb,0x3e,0x05,0x67); - &L(0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8); - &L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e); - &L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33); - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type se_handler,\@abi-omnipotent -.align 16 -se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lprologue(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lprologue - jb .Lin_prologue - - mov 152($context),%rax # pull context->Rsp - - lea .Lepilogue(%rip),%r10 - cmp %r10,%rbx # context->Rip>=.Lepilogue - jae .Lin_prologue - - mov 128+32(%rax),%rax # pull saved stack pointer - lea 48(%rax),%rax - - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov -32(%rax),%r13 - mov -40(%rax),%r14 - mov -48(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size se_handler,.-se_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_$func - .rva .LSEH_end_$func - .rva .LSEH_info_$func - -.section .xdata -.align 8 -.LSEH_info_$func: - .byte 9,0,0,0 - .rva se_handler -___ -} - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/drivers/builtin_openssl2/crypto/whrlpool/wp_test.c b/drivers/builtin_openssl2/crypto/whrlpool/wp_test.c deleted file mode 100644 index 2ea6251a58..0000000000 --- a/drivers/builtin_openssl2/crypto/whrlpool/wp_test.c +++ /dev/null @@ -1,241 +0,0 @@ -/* ==================================================================== - * Copyright (c) 2005 The OpenSSL Project. All rights reserved. - * ==================================================================== - */ -#include -#include -#include - -#include -#include - -#if defined(OPENSSL_NO_WHIRLPOOL) -int main(int argc, char *argv[]) -{ - printf("No Whirlpool support\n"); - return (0); -} -#else - -/* ISO/IEC 10118-3 test vector set */ -unsigned char iso_test_1[WHIRLPOOL_DIGEST_LENGTH] = { - 0x19, 0xFA, 0x61, 0xD7, 0x55, 0x22, 0xA4, 0x66, - 0x9B, 0x44, 0xE3, 0x9C, 0x1D, 0x2E, 0x17, 0x26, - 0xC5, 0x30, 0x23, 0x21, 0x30, 0xD4, 0x07, 0xF8, - 0x9A, 0xFE, 0xE0, 0x96, 0x49, 0x97, 0xF7, 0xA7, - 0x3E, 0x83, 0xBE, 0x69, 0x8B, 0x28, 0x8F, 0xEB, - 0xCF, 0x88, 0xE3, 0xE0, 0x3C, 0x4F, 0x07, 0x57, - 0xEA, 0x89, 0x64, 0xE5, 0x9B, 0x63, 0xD9, 0x37, - 0x08, 0xB1, 0x38, 0xCC, 0x42, 0xA6, 0x6E, 0xB3 -}; - -unsigned char iso_test_2[WHIRLPOOL_DIGEST_LENGTH] = { - 0x8A, 0xCA, 0x26, 0x02, 0x79, 0x2A, 0xEC, 0x6F, - 0x11, 0xA6, 0x72, 0x06, 0x53, 0x1F, 0xB7, 0xD7, - 0xF0, 0xDF, 0xF5, 0x94, 0x13, 0x14, 0x5E, 0x69, - 0x73, 0xC4, 0x50, 0x01, 0xD0, 0x08, 0x7B, 0x42, - 0xD1, 0x1B, 0xC6, 0x45, 0x41, 0x3A, 0xEF, 0xF6, - 0x3A, 0x42, 0x39, 0x1A, 0x39, 0x14, 0x5A, 0x59, - 0x1A, 0x92, 0x20, 0x0D, 0x56, 0x01, 0x95, 0xE5, - 0x3B, 0x47, 0x85, 0x84, 0xFD, 0xAE, 0x23, 0x1A -}; - -unsigned char iso_test_3[WHIRLPOOL_DIGEST_LENGTH] = { - 0x4E, 0x24, 0x48, 0xA4, 0xC6, 0xF4, 0x86, 0xBB, - 0x16, 0xB6, 0x56, 0x2C, 0x73, 0xB4, 0x02, 0x0B, - 0xF3, 0x04, 0x3E, 0x3A, 0x73, 0x1B, 0xCE, 0x72, - 0x1A, 0xE1, 0xB3, 0x03, 0xD9, 0x7E, 0x6D, 0x4C, - 0x71, 0x81, 0xEE, 0xBD, 0xB6, 0xC5, 0x7E, 0x27, - 0x7D, 0x0E, 0x34, 0x95, 0x71, 0x14, 0xCB, 0xD6, - 0xC7, 0x97, 0xFC, 0x9D, 0x95, 0xD8, 0xB5, 0x82, - 0xD2, 0x25, 0x29, 0x20, 0x76, 0xD4, 0xEE, 0xF5 -}; - -unsigned char iso_test_4[WHIRLPOOL_DIGEST_LENGTH] = { - 0x37, 0x8C, 0x84, 0xA4, 0x12, 0x6E, 0x2D, 0xC6, - 0xE5, 0x6D, 0xCC, 0x74, 0x58, 0x37, 0x7A, 0xAC, - 0x83, 0x8D, 0x00, 0x03, 0x22, 0x30, 0xF5, 0x3C, - 0xE1, 0xF5, 0x70, 0x0C, 0x0F, 0xFB, 0x4D, 0x3B, - 0x84, 0x21, 0x55, 0x76, 0x59, 0xEF, 0x55, 0xC1, - 0x06, 0xB4, 0xB5, 0x2A, 0xC5, 0xA4, 0xAA, 0xA6, - 0x92, 0xED, 0x92, 0x00, 0x52, 0x83, 0x8F, 0x33, - 0x62, 0xE8, 0x6D, 0xBD, 0x37, 0xA8, 0x90, 0x3E -}; - -unsigned char iso_test_5[WHIRLPOOL_DIGEST_LENGTH] = { - 0xF1, 0xD7, 0x54, 0x66, 0x26, 0x36, 0xFF, 0xE9, - 0x2C, 0x82, 0xEB, 0xB9, 0x21, 0x2A, 0x48, 0x4A, - 0x8D, 0x38, 0x63, 0x1E, 0xAD, 0x42, 0x38, 0xF5, - 0x44, 0x2E, 0xE1, 0x3B, 0x80, 0x54, 0xE4, 0x1B, - 0x08, 0xBF, 0x2A, 0x92, 0x51, 0xC3, 0x0B, 0x6A, - 0x0B, 0x8A, 0xAE, 0x86, 0x17, 0x7A, 0xB4, 0xA6, - 0xF6, 0x8F, 0x67, 0x3E, 0x72, 0x07, 0x86, 0x5D, - 0x5D, 0x98, 0x19, 0xA3, 0xDB, 0xA4, 0xEB, 0x3B -}; - -unsigned char iso_test_6[WHIRLPOOL_DIGEST_LENGTH] = { - 0xDC, 0x37, 0xE0, 0x08, 0xCF, 0x9E, 0xE6, 0x9B, - 0xF1, 0x1F, 0x00, 0xED, 0x9A, 0xBA, 0x26, 0x90, - 0x1D, 0xD7, 0xC2, 0x8C, 0xDE, 0xC0, 0x66, 0xCC, - 0x6A, 0xF4, 0x2E, 0x40, 0xF8, 0x2F, 0x3A, 0x1E, - 0x08, 0xEB, 0xA2, 0x66, 0x29, 0x12, 0x9D, 0x8F, - 0xB7, 0xCB, 0x57, 0x21, 0x1B, 0x92, 0x81, 0xA6, - 0x55, 0x17, 0xCC, 0x87, 0x9D, 0x7B, 0x96, 0x21, - 0x42, 0xC6, 0x5F, 0x5A, 0x7A, 0xF0, 0x14, 0x67 -}; - -unsigned char iso_test_7[WHIRLPOOL_DIGEST_LENGTH] = { - 0x46, 0x6E, 0xF1, 0x8B, 0xAB, 0xB0, 0x15, 0x4D, - 0x25, 0xB9, 0xD3, 0x8A, 0x64, 0x14, 0xF5, 0xC0, - 0x87, 0x84, 0x37, 0x2B, 0xCC, 0xB2, 0x04, 0xD6, - 0x54, 0x9C, 0x4A, 0xFA, 0xDB, 0x60, 0x14, 0x29, - 0x4D, 0x5B, 0xD8, 0xDF, 0x2A, 0x6C, 0x44, 0xE5, - 0x38, 0xCD, 0x04, 0x7B, 0x26, 0x81, 0xA5, 0x1A, - 0x2C, 0x60, 0x48, 0x1E, 0x88, 0xC5, 0xA2, 0x0B, - 0x2C, 0x2A, 0x80, 0xCF, 0x3A, 0x9A, 0x08, 0x3B -}; - -unsigned char iso_test_8[WHIRLPOOL_DIGEST_LENGTH] = { - 0x2A, 0x98, 0x7E, 0xA4, 0x0F, 0x91, 0x70, 0x61, - 0xF5, 0xD6, 0xF0, 0xA0, 0xE4, 0x64, 0x4F, 0x48, - 0x8A, 0x7A, 0x5A, 0x52, 0xDE, 0xEE, 0x65, 0x62, - 0x07, 0xC5, 0x62, 0xF9, 0x88, 0xE9, 0x5C, 0x69, - 0x16, 0xBD, 0xC8, 0x03, 0x1B, 0xC5, 0xBE, 0x1B, - 0x7B, 0x94, 0x76, 0x39, 0xFE, 0x05, 0x0B, 0x56, - 0x93, 0x9B, 0xAA, 0xA0, 0xAD, 0xFF, 0x9A, 0xE6, - 0x74, 0x5B, 0x7B, 0x18, 0x1C, 0x3B, 0xE3, 0xFD -}; - -unsigned char iso_test_9[WHIRLPOOL_DIGEST_LENGTH] = { - 0x0C, 0x99, 0x00, 0x5B, 0xEB, 0x57, 0xEF, 0xF5, - 0x0A, 0x7C, 0xF0, 0x05, 0x56, 0x0D, 0xDF, 0x5D, - 0x29, 0x05, 0x7F, 0xD8, 0x6B, 0x20, 0xBF, 0xD6, - 0x2D, 0xEC, 0xA0, 0xF1, 0xCC, 0xEA, 0x4A, 0xF5, - 0x1F, 0xC1, 0x54, 0x90, 0xED, 0xDC, 0x47, 0xAF, - 0x32, 0xBB, 0x2B, 0x66, 0xC3, 0x4F, 0xF9, 0xAD, - 0x8C, 0x60, 0x08, 0xAD, 0x67, 0x7F, 0x77, 0x12, - 0x69, 0x53, 0xB2, 0x26, 0xE4, 0xED, 0x8B, 0x01 -}; - -int main(int argc, char *argv[]) -{ - unsigned char md[WHIRLPOOL_DIGEST_LENGTH]; - int i; - WHIRLPOOL_CTX ctx; - -# ifdef OPENSSL_IA32_SSE2 - /* - * Alternative to this is to call OpenSSL_add_all_algorithms... The below - * code is retained exclusively for debugging purposes. - */ - { - char *env; - - if ((env = getenv("OPENSSL_ia32cap"))) - OPENSSL_ia32cap = strtoul(env, NULL, 0); - } -# endif - - fprintf(stdout, "Testing Whirlpool "); - - WHIRLPOOL("", 0, md); - if (memcmp(md, iso_test_1, sizeof(iso_test_1))) { - fflush(stdout); - fprintf(stderr, "\nTEST 1 of 9 failed.\n"); - return 1; - } else - fprintf(stdout, "."); - fflush(stdout); - - WHIRLPOOL("a", 1, md); - if (memcmp(md, iso_test_2, sizeof(iso_test_2))) { - fflush(stdout); - fprintf(stderr, "\nTEST 2 of 9 failed.\n"); - return 1; - } else - fprintf(stdout, "."); - fflush(stdout); - - WHIRLPOOL("abc", 3, md); - if (memcmp(md, iso_test_3, sizeof(iso_test_3))) { - fflush(stdout); - fprintf(stderr, "\nTEST 3 of 9 failed.\n"); - return 1; - } else - fprintf(stdout, "."); - fflush(stdout); - - WHIRLPOOL("message digest", 14, md); - if (memcmp(md, iso_test_4, sizeof(iso_test_4))) { - fflush(stdout); - fprintf(stderr, "\nTEST 4 of 9 failed.\n"); - return 1; - } else - fprintf(stdout, "."); - fflush(stdout); - - WHIRLPOOL("abcdefghijklmnopqrstuvwxyz", 26, md); - if (memcmp(md, iso_test_5, sizeof(iso_test_5))) { - fflush(stdout); - fprintf(stderr, "\nTEST 5 of 9 failed.\n"); - return 1; - } else - fprintf(stdout, "."); - fflush(stdout); - - WHIRLPOOL("ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" "0123456789", 62, md); - if (memcmp(md, iso_test_6, sizeof(iso_test_6))) { - fflush(stdout); - fprintf(stderr, "\nTEST 6 of 9 failed.\n"); - return 1; - } else - fprintf(stdout, "."); - fflush(stdout); - - WHIRLPOOL("1234567890" "1234567890" "1234567890" "1234567890" - "1234567890" "1234567890" "1234567890" "1234567890", 80, md); - if (memcmp(md, iso_test_7, sizeof(iso_test_7))) { - fflush(stdout); - fprintf(stderr, "\nTEST 7 of 9 failed.\n"); - return 1; - } else - fprintf(stdout, "."); - fflush(stdout); - - WHIRLPOOL("abcdbcdecdefdefgefghfghighijhijk", 32, md); - if (memcmp(md, iso_test_8, sizeof(iso_test_8))) { - fflush(stdout); - fprintf(stderr, "\nTEST 8 of 9 failed.\n"); - return 1; - } else - fprintf(stdout, "."); - fflush(stdout); - - WHIRLPOOL_Init(&ctx); - for (i = 0; i < 1000000; i += 288) - WHIRLPOOL_Update(&ctx, "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" - "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" - "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" - "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" - "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" - "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" - "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" - "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" - "aaaaaaaa" "aaaaaaaa" "aaaaaaaa" "aaaaaaaa", - (1000000 - i) < 288 ? 1000000 - i : 288); - WHIRLPOOL_Final(md, &ctx); - if (memcmp(md, iso_test_9, sizeof(iso_test_9))) { - fflush(stdout); - fprintf(stderr, "\nTEST 9 of 9 failed.\n"); - return 1; - } else - fprintf(stdout, "."); - fflush(stdout); - - fprintf(stdout, " passed.\n"); - fflush(stdout); - - return 0; -} -#endif diff --git a/drivers/builtin_openssl2/crypto/x509/vpm_int.h b/drivers/builtin_openssl2/crypto/x509/vpm_int.h new file mode 100644 index 0000000000..9c55defc51 --- /dev/null +++ b/drivers/builtin_openssl2/crypto/x509/vpm_int.h @@ -0,0 +1,70 @@ +/* vpm_int.h */ +/* + * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project + * 2013. + */ +/* ==================================================================== + * Copyright (c) 2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +/* internal only structure to hold additional X509_VERIFY_PARAM data */ + +struct X509_VERIFY_PARAM_ID_st { + STACK_OF(OPENSSL_STRING) *hosts; /* Set of acceptable names */ + unsigned int hostflags; /* Flags to control matching features */ + char *peername; /* Matching hostname in peer certificate */ + char *email; /* If not NULL email address to match */ + size_t emaillen; + unsigned char *ip; /* If not NULL IP address to match */ + size_t iplen; /* Length of IP address */ +}; diff --git a/drivers/builtin_openssl2/crypto/x509/x509_cmp.c b/drivers/builtin_openssl2/crypto/x509/x509_cmp.c index 5792e7f51d..49c71b9128 100644 --- a/drivers/builtin_openssl2/crypto/x509/x509_cmp.c +++ b/drivers/builtin_openssl2/crypto/x509/x509_cmp.c @@ -180,7 +180,6 @@ unsigned long X509_subject_name_hash_old(X509 *x) int X509_cmp(const X509 *a, const X509 *b) { int rv; - /* ensure hash is valid */ X509_check_purpose((X509 *)a, -1, 0); X509_check_purpose((X509 *)b, -1, 0); @@ -352,3 +351,148 @@ int X509_check_private_key(X509 *x, EVP_PKEY *k) return 1; return 0; } + +/* + * Check a suite B algorithm is permitted: pass in a public key and the NID + * of its signature (or 0 if no signature). The pflags is a pointer to a + * flags field which must contain the suite B verification flags. + */ + +#ifndef OPENSSL_NO_EC + +static int check_suite_b(EVP_PKEY *pkey, int sign_nid, unsigned long *pflags) +{ + const EC_GROUP *grp = NULL; + int curve_nid; + if (pkey && pkey->type == EVP_PKEY_EC) + grp = EC_KEY_get0_group(pkey->pkey.ec); + if (!grp) + return X509_V_ERR_SUITE_B_INVALID_ALGORITHM; + curve_nid = EC_GROUP_get_curve_name(grp); + /* Check curve is consistent with LOS */ + if (curve_nid == NID_secp384r1) { /* P-384 */ + /* + * Check signature algorithm is consistent with curve. + */ + if (sign_nid != -1 && sign_nid != NID_ecdsa_with_SHA384) + return X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM; + if (!(*pflags & X509_V_FLAG_SUITEB_192_LOS)) + return X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED; + /* If we encounter P-384 we cannot use P-256 later */ + *pflags &= ~X509_V_FLAG_SUITEB_128_LOS_ONLY; + } else if (curve_nid == NID_X9_62_prime256v1) { /* P-256 */ + if (sign_nid != -1 && sign_nid != NID_ecdsa_with_SHA256) + return X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM; + if (!(*pflags & X509_V_FLAG_SUITEB_128_LOS_ONLY)) + return X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED; + } else + return X509_V_ERR_SUITE_B_INVALID_CURVE; + + return X509_V_OK; +} + +int X509_chain_check_suiteb(int *perror_depth, X509 *x, STACK_OF(X509) *chain, + unsigned long flags) +{ + int rv, i, sign_nid; + EVP_PKEY *pk = NULL; + unsigned long tflags; + if (!(flags & X509_V_FLAG_SUITEB_128_LOS)) + return X509_V_OK; + tflags = flags; + /* If no EE certificate passed in must be first in chain */ + if (x == NULL) { + x = sk_X509_value(chain, 0); + i = 1; + } else + i = 0; + + if (X509_get_version(x) != 2) { + rv = X509_V_ERR_SUITE_B_INVALID_VERSION; + /* Correct error depth */ + i = 0; + goto end; + } + + pk = X509_get_pubkey(x); + /* Check EE key only */ + rv = check_suite_b(pk, -1, &tflags); + if (rv != X509_V_OK) { + /* Correct error depth */ + i = 0; + goto end; + } + for (; i < sk_X509_num(chain); i++) { + sign_nid = X509_get_signature_nid(x); + x = sk_X509_value(chain, i); + if (X509_get_version(x) != 2) { + rv = X509_V_ERR_SUITE_B_INVALID_VERSION; + goto end; + } + EVP_PKEY_free(pk); + pk = X509_get_pubkey(x); + rv = check_suite_b(pk, sign_nid, &tflags); + if (rv != X509_V_OK) + goto end; + } + + /* Final check: root CA signature */ + rv = check_suite_b(pk, X509_get_signature_nid(x), &tflags); + end: + if (pk) + EVP_PKEY_free(pk); + if (rv != X509_V_OK) { + /* Invalid signature or LOS errors are for previous cert */ + if ((rv == X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM + || rv == X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED) && i) + i--; + /* + * If we have LOS error and flags changed then we are signing P-384 + * with P-256. Use more meaninggul error. + */ + if (rv == X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED && flags != tflags) + rv = X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256; + if (perror_depth) + *perror_depth = i; + } + return rv; +} + +int X509_CRL_check_suiteb(X509_CRL *crl, EVP_PKEY *pk, unsigned long flags) +{ + int sign_nid; + if (!(flags & X509_V_FLAG_SUITEB_128_LOS)) + return X509_V_OK; + sign_nid = OBJ_obj2nid(crl->crl->sig_alg->algorithm); + return check_suite_b(pk, sign_nid, &flags); +} + +#else +int X509_chain_check_suiteb(int *perror_depth, X509 *x, STACK_OF(X509) *chain, + unsigned long flags) +{ + return 0; +} + +int X509_CRL_check_suiteb(X509_CRL *crl, EVP_PKEY *pk, unsigned long flags) +{ + return 0; +} + +#endif +/* + * Not strictly speaking an "up_ref" as a STACK doesn't have a reference + * count but it has the same effect by duping the STACK and upping the ref of + * each X509 structure. + */ +STACK_OF(X509) *X509_chain_up_ref(STACK_OF(X509) *chain) +{ + STACK_OF(X509) *ret; + int i; + ret = sk_X509_dup(chain); + for (i = 0; i < sk_X509_num(ret); i++) { + X509 *x = sk_X509_value(ret, i); + CRYPTO_add(&x->references, 1, CRYPTO_LOCK_X509); + } + return ret; +} diff --git a/drivers/builtin_openssl2/crypto/x509/x509_err.c b/drivers/builtin_openssl2/crypto/x509/x509_err.c index 61a19f7410..43cde18e49 100644 --- a/drivers/builtin_openssl2/crypto/x509/x509_err.c +++ b/drivers/builtin_openssl2/crypto/x509/x509_err.c @@ -1,6 +1,6 @@ /* crypto/x509/x509_err.c */ /* ==================================================================== - * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2012 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -88,6 +88,7 @@ static ERR_STRING_DATA X509_str_functs[] = { {ERR_FUNC(X509_F_X509_ATTRIBUTE_GET0_DATA), "X509_ATTRIBUTE_get0_data"}, {ERR_FUNC(X509_F_X509_ATTRIBUTE_SET1_DATA), "X509_ATTRIBUTE_set1_data"}, {ERR_FUNC(X509_F_X509_CHECK_PRIVATE_KEY), "X509_check_private_key"}, + {ERR_FUNC(X509_F_X509_CRL_DIFF), "X509_CRL_diff"}, {ERR_FUNC(X509_F_X509_CRL_PRINT_FP), "X509_CRL_print_fp"}, {ERR_FUNC(X509_F_X509_EXTENSION_CREATE_BY_NID), "X509_EXTENSION_create_by_NID"}, @@ -131,22 +132,29 @@ static ERR_STRING_DATA X509_str_functs[] = { }; static ERR_STRING_DATA X509_str_reasons[] = { + {ERR_REASON(X509_R_AKID_MISMATCH), "akid mismatch"}, {ERR_REASON(X509_R_BAD_X509_FILETYPE), "bad x509 filetype"}, {ERR_REASON(X509_R_BASE64_DECODE_ERROR), "base64 decode error"}, {ERR_REASON(X509_R_CANT_CHECK_DH_KEY), "cant check dh key"}, {ERR_REASON(X509_R_CERT_ALREADY_IN_HASH_TABLE), "cert already in hash table"}, + {ERR_REASON(X509_R_CRL_ALREADY_DELTA), "crl already delta"}, + {ERR_REASON(X509_R_CRL_VERIFY_FAILURE), "crl verify failure"}, {ERR_REASON(X509_R_ERR_ASN1_LIB), "err asn1 lib"}, + {ERR_REASON(X509_R_IDP_MISMATCH), "idp mismatch"}, {ERR_REASON(X509_R_INVALID_DIRECTORY), "invalid directory"}, {ERR_REASON(X509_R_INVALID_FIELD_NAME), "invalid field name"}, {ERR_REASON(X509_R_INVALID_TRUST), "invalid trust"}, + {ERR_REASON(X509_R_ISSUER_MISMATCH), "issuer mismatch"}, {ERR_REASON(X509_R_KEY_TYPE_MISMATCH), "key type mismatch"}, {ERR_REASON(X509_R_KEY_VALUES_MISMATCH), "key values mismatch"}, {ERR_REASON(X509_R_LOADING_CERT_DIR), "loading cert dir"}, {ERR_REASON(X509_R_LOADING_DEFAULTS), "loading defaults"}, {ERR_REASON(X509_R_METHOD_NOT_SUPPORTED), "method not supported"}, + {ERR_REASON(X509_R_NEWER_CRL_NOT_NEWER), "newer crl not newer"}, {ERR_REASON(X509_R_NO_CERT_SET_FOR_US_TO_VERIFY), "no cert set for us to verify"}, + {ERR_REASON(X509_R_NO_CRL_NUMBER), "no crl number"}, {ERR_REASON(X509_R_PUBLIC_KEY_DECODE_ERROR), "public key decode error"}, {ERR_REASON(X509_R_PUBLIC_KEY_ENCODE_ERROR), "public key encode error"}, {ERR_REASON(X509_R_SHOULD_RETRY), "should retry"}, diff --git a/drivers/builtin_openssl2/crypto/x509/x509_lu.c b/drivers/builtin_openssl2/crypto/x509/x509_lu.c index 8084c4a474..50120a4d70 100644 --- a/drivers/builtin_openssl2/crypto/x509/x509_lu.c +++ b/drivers/builtin_openssl2/crypto/x509/x509_lu.c @@ -238,6 +238,19 @@ void X509_STORE_free(X509_STORE *vfy) if (vfy == NULL) return; + i = CRYPTO_add(&vfy->references, -1, CRYPTO_LOCK_X509_STORE); +#ifdef REF_PRINT + REF_PRINT("X509_STORE", vfy); +#endif + if (i > 0) + return; +#ifdef REF_CHECK + if (i < 0) { + fprintf(stderr, "X509_STORE_free, bad reference count\n"); + abort(); /* ok */ + } +#endif + sk = vfy->get_cert_methods; for (i = 0; i < sk_X509_LOOKUP_num(sk); i++) { lu = sk_X509_LOOKUP_value(sk, i); @@ -679,6 +692,19 @@ void X509_STORE_set_verify_cb(X509_STORE *ctx, ctx->verify_cb = verify_cb; } +void X509_STORE_set_lookup_crls_cb(X509_STORE *ctx, + STACK_OF(X509_CRL) *(*cb) (X509_STORE_CTX + *ctx, + X509_NAME *nm)) +{ + ctx->lookup_crls = cb; +} + +X509_STORE *X509_STORE_CTX_get0_store(X509_STORE_CTX *ctx) +{ + return ctx->ctx; +} + IMPLEMENT_STACK_OF(X509_LOOKUP) IMPLEMENT_STACK_OF(X509_OBJECT) diff --git a/drivers/builtin_openssl2/crypto/x509/x509_set.c b/drivers/builtin_openssl2/crypto/x509/x509_set.c index 4645777634..5b802bd6c7 100644 --- a/drivers/builtin_openssl2/crypto/x509/x509_set.c +++ b/drivers/builtin_openssl2/crypto/x509/x509_set.c @@ -67,6 +67,11 @@ int X509_set_version(X509 *x, long version) { if (x == NULL) return (0); + if (version == 0) { + M_ASN1_INTEGER_free(x->cert_info->version); + x->cert_info->version = NULL; + return (1); + } if (x->cert_info->version == NULL) { if ((x->cert_info->version = M_ASN1_INTEGER_new()) == NULL) return (0); diff --git a/drivers/builtin_openssl2/crypto/x509/x509_trs.c b/drivers/builtin_openssl2/crypto/x509/x509_trs.c index 7e444795a5..11e0763403 100644 --- a/drivers/builtin_openssl2/crypto/x509/x509_trs.c +++ b/drivers/builtin_openssl2/crypto/x509/x509_trs.c @@ -119,6 +119,14 @@ int X509_check_trust(X509 *x, int id, int flags) int idx; if (id == -1) return 1; + /* We get this as a default value */ + if (id == 0) { + int rv; + rv = obj_trust(NID_anyExtendedKeyUsage, x, 0); + if (rv != X509_TRUST_UNTRUSTED) + return rv; + return trust_compat(NULL, x, 0); + } idx = X509_TRUST_get_by_id(id); if (idx == -1) return default_trust(id, x, flags); diff --git a/drivers/builtin_openssl2/crypto/x509/x509_txt.c b/drivers/builtin_openssl2/crypto/x509/x509_txt.c index d834180ec4..3d46d3ff83 100644 --- a/drivers/builtin_openssl2/crypto/x509/x509_txt.c +++ b/drivers/builtin_openssl2/crypto/x509/x509_txt.c @@ -184,6 +184,26 @@ const char *X509_verify_cert_error_string(long n) case X509_V_ERR_CRL_PATH_VALIDATION_ERROR: return ("CRL path validation error"); + case X509_V_ERR_SUITE_B_INVALID_VERSION: + return ("Suite B: certificate version invalid"); + case X509_V_ERR_SUITE_B_INVALID_ALGORITHM: + return ("Suite B: invalid public key algorithm"); + case X509_V_ERR_SUITE_B_INVALID_CURVE: + return ("Suite B: invalid ECC curve"); + case X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM: + return ("Suite B: invalid signature algorithm"); + case X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED: + return ("Suite B: curve not allowed for this LOS"); + case X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256: + return ("Suite B: cannot sign P-384 with P-256"); + + case X509_V_ERR_HOSTNAME_MISMATCH: + return ("Hostname mismatch"); + case X509_V_ERR_EMAIL_MISMATCH: + return ("Email address mismatch"); + case X509_V_ERR_IP_ADDRESS_MISMATCH: + return ("IP address mismatch"); + default: BIO_snprintf(buf, sizeof buf, "error number %ld", n); return (buf); diff --git a/drivers/builtin_openssl2/crypto/x509/x509_vfy.c b/drivers/builtin_openssl2/crypto/x509/x509_vfy.c index 3bad523f71..4d34dbac93 100644 --- a/drivers/builtin_openssl2/crypto/x509/x509_vfy.c +++ b/drivers/builtin_openssl2/crypto/x509/x509_vfy.c @@ -69,6 +69,7 @@ #include #include #include +#include "vpm_int.h" /* CRL score values */ @@ -113,6 +114,7 @@ static int check_issued(X509_STORE_CTX *ctx, X509 *x, X509 *issuer); static X509 *find_issuer(X509_STORE_CTX *ctx, STACK_OF(X509) *sk, X509 *x); static int check_chain_extensions(X509_STORE_CTX *ctx); static int check_name_constraints(X509_STORE_CTX *ctx); +static int check_id(X509_STORE_CTX *ctx); static int check_trust(X509_STORE_CTX *ctx); static int check_revocation(X509_STORE_CTX *ctx); static int check_cert(X509_STORE_CTX *ctx); @@ -148,6 +150,40 @@ static int x509_subject_cmp(X509 **a, X509 **b) return X509_subject_name_cmp(*a, *b); } #endif +/* Return 1 is a certificate is self signed */ +static int cert_self_signed(X509 *x) +{ + X509_check_purpose(x, -1, 0); + if (x->ex_flags & EXFLAG_SS) + return 1; + else + return 0; +} + +/* Given a certificate try and find an exact match in the store */ + +static X509 *lookup_cert_match(X509_STORE_CTX *ctx, X509 *x) +{ + STACK_OF(X509) *certs; + X509 *xtmp = NULL; + int i; + /* Lookup all certs with matching subject name */ + certs = ctx->lookup_certs(ctx, X509_get_subject_name(x)); + if (certs == NULL) + return NULL; + /* Look for exact match */ + for (i = 0; i < sk_X509_num(certs); i++) { + xtmp = sk_X509_value(certs, i); + if (!X509_cmp(xtmp, x)) + break; + } + if (i < sk_X509_num(certs)) + CRYPTO_add(&xtmp->references, 1, CRYPTO_LOCK_X509); + else + xtmp = NULL; + sk_X509_pop_free(certs, X509_free); + return xtmp; +} int X509_verify_cert(X509_STORE_CTX *ctx) { @@ -158,6 +194,9 @@ int X509_verify_cert(X509_STORE_CTX *ctx) int num, j, retry; int (*cb) (int xok, X509_STORE_CTX *xctx); STACK_OF(X509) *sktmp = NULL; + int trust = X509_TRUST_UNTRUSTED; + int err; + if (ctx->cert == NULL) { X509err(X509_F_X509_VERIFY_CERT, X509_R_NO_CERT_SET_FOR_US_TO_VERIFY); return -1; @@ -180,7 +219,8 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (((ctx->chain = sk_X509_new_null()) == NULL) || (!sk_X509_push(ctx->chain, ctx->cert))) { X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); - goto end; + ok = -1; + goto err; } CRYPTO_add(&ctx->cert->references, 1, CRYPTO_LOCK_X509); ctx->last_untrusted = 1; @@ -189,7 +229,8 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (ctx->untrusted != NULL && (sktmp = sk_X509_dup(ctx->untrusted)) == NULL) { X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); - goto end; + ok = -1; + goto err; } num = sk_X509_num(ctx->chain); @@ -205,8 +246,24 @@ int X509_verify_cert(X509_STORE_CTX *ctx) * later. */ /* If we are self signed, we break */ - if (ctx->check_issued(ctx, x, x)) + if (cert_self_signed(x)) break; + /* + * If asked see if we can find issuer in trusted store first + */ + if (ctx->param->flags & X509_V_FLAG_TRUSTED_FIRST) { + ok = ctx->get_issuer(&xtmp, ctx, x); + if (ok < 0) + goto err; + /* + * If successful for now free up cert so it will be picked up + * again later. + */ + if (ok > 0) { + X509_free(xtmp); + break; + } + } /* If we were passed a cert chain, use it first */ if (ctx->untrusted != NULL) { @@ -214,7 +271,8 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (xtmp != NULL) { if (!sk_X509_push(ctx->chain, xtmp)) { X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); - goto end; + ok = -1; + goto err; } CRYPTO_add(&xtmp->references, 1, CRYPTO_LOCK_X509); (void)sk_X509_delete_ptr(sktmp, xtmp); @@ -244,7 +302,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx) */ i = sk_X509_num(ctx->chain); x = sk_X509_value(ctx->chain, i - 1); - if (ctx->check_issued(ctx, x, x)) { + if (cert_self_signed(x)) { /* we have a self signed certificate */ if (sk_X509_num(ctx->chain) == 1) { /* @@ -262,7 +320,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx) bad_chain = 1; ok = cb(0, ctx); if (!ok) - goto end; + goto err; } else { /* * We have a match: replace certificate with store @@ -290,35 +348,46 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (depth < num) break; /* If we are self signed, we break */ - if (ctx->check_issued(ctx, x, x)) + if (cert_self_signed(x)) break; ok = ctx->get_issuer(&xtmp, ctx, x); + if (ok < 0) - return ok; + goto err; if (ok == 0) break; x = xtmp; if (!sk_X509_push(ctx->chain, x)) { X509_free(xtmp); X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); - return 0; + ok = -1; + goto err; } num++; } + /* we now have our chain, lets check it... */ + if ((trust = check_trust(ctx)) == X509_TRUST_REJECTED) { + /* Callback already issued */ + ok = 0; + goto err; + } + /* - * If we haven't got a least one certificate from our store then check - * if there is an alternative chain that could be used. We only do this - * if the user hasn't switched off alternate chain checking + * If it's not explicitly trusted then check if there is an alternative + * chain that could be used. We only do this if we haven't already + * checked via TRUSTED_FIRST and the user hasn't switched off alternate + * chain checking */ retry = 0; - if (num == ctx->last_untrusted && - !(ctx->param->flags & X509_V_FLAG_NO_ALT_CHAINS)) { + if (trust != X509_TRUST_TRUSTED + && !(ctx->param->flags & X509_V_FLAG_TRUSTED_FIRST) + && !(ctx->param->flags & X509_V_FLAG_NO_ALT_CHAINS)) { while (j-- > 1) { xtmp2 = sk_X509_value(ctx->chain, j - 1); ok = ctx->get_issuer(&xtmp, ctx, xtmp2); if (ok < 0) - goto end; + goto err; /* Check if we found an alternate chain */ if (ok > 0) { /* @@ -343,8 +412,12 @@ int X509_verify_cert(X509_STORE_CTX *ctx) } } while (retry); - /* Is last certificate looked up self signed? */ - if (!ctx->check_issued(ctx, x, x)) { + /* + * If not explicitly trusted then indicate error unless it's a single + * self signed certificate in which case we've indicated an error already + * and set bad_chain == 1 + */ + if (trust != X509_TRUST_TRUSTED && !bad_chain) { if ((chain_ss == NULL) || !ctx->check_issued(ctx, x, chain_ss)) { if (ctx->last_untrusted >= num) ctx->error = X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY; @@ -365,29 +438,26 @@ int X509_verify_cert(X509_STORE_CTX *ctx) bad_chain = 1; ok = cb(0, ctx); if (!ok) - goto end; + goto err; } /* We have the chain complete: now we need to check its purpose */ ok = check_chain_extensions(ctx); if (!ok) - goto end; + goto err; /* Check name constraints */ ok = check_name_constraints(ctx); if (!ok) - goto end; - - /* The chain extensions are OK: check trust */ + goto err; - if (param->trust > 0) - ok = check_trust(ctx); + ok = check_id(ctx); if (!ok) - goto end; + goto err; /* We may as well copy down any DSA parameters that are required */ X509_get_pubkey_parameters(NULL, ctx->chain); @@ -399,7 +469,17 @@ int X509_verify_cert(X509_STORE_CTX *ctx) ok = ctx->check_revocation(ctx); if (!ok) - goto end; + goto err; + + err = X509_chain_check_suiteb(&ctx->error_depth, NULL, ctx->chain, + ctx->param->flags); + if (err != X509_V_OK) { + ctx->error = err; + ctx->current_cert = sk_X509_value(ctx->chain, ctx->error_depth); + ok = cb(0, ctx); + if (!ok) + goto err; + } /* At this point, we have a chain and need to verify it */ if (ctx->verify != NULL) @@ -407,25 +487,28 @@ int X509_verify_cert(X509_STORE_CTX *ctx) else ok = internal_verify(ctx); if (!ok) - goto end; + goto err; #ifndef OPENSSL_NO_RFC3779 /* RFC 3779 path validation, now that CRL check has been done */ ok = v3_asid_validate_path(ctx); if (!ok) - goto end; + goto err; ok = v3_addr_validate_path(ctx); if (!ok) - goto end; + goto err; #endif /* If we get this far evaluate policies */ if (!bad_chain && (ctx->param->flags & X509_V_FLAG_POLICY_CHECK)) ok = ctx->check_policy(ctx); if (!ok) - goto end; + goto err; if (0) { - end: + err: + /* Ensure we return an error */ + if (ok > 0) + ok = 0; X509_get_pubkey_parameters(NULL, ctx->chain); } if (sktmp != NULL) @@ -467,7 +550,6 @@ static int check_issued(X509_STORE_CTX *ctx, X509 *x, X509 *issuer) ctx->current_cert = x; ctx->current_issuer = issuer; return ctx->verify_cb(0, ctx); - return 0; } /* Alternative lookup method: look from a STACK stored in other_ctx */ @@ -667,30 +749,101 @@ static int check_name_constraints(X509_STORE_CTX *ctx) return 1; } -static int check_trust(X509_STORE_CTX *ctx) +static int check_id_error(X509_STORE_CTX *ctx, int errcode) { -#ifdef OPENSSL_NO_CHAIN_VERIFY + ctx->error = errcode; + ctx->current_cert = ctx->cert; + ctx->error_depth = 0; + return ctx->verify_cb(0, ctx); +} + +static int check_hosts(X509 *x, X509_VERIFY_PARAM_ID *id) +{ + int i; + int n = sk_OPENSSL_STRING_num(id->hosts); + char *name; + + if (id->peername != NULL) { + OPENSSL_free(id->peername); + id->peername = NULL; + } + for (i = 0; i < n; ++i) { + name = sk_OPENSSL_STRING_value(id->hosts, i); + if (X509_check_host(x, name, 0, id->hostflags, &id->peername) > 0) + return 1; + } + return n == 0; +} + +static int check_id(X509_STORE_CTX *ctx) +{ + X509_VERIFY_PARAM *vpm = ctx->param; + X509_VERIFY_PARAM_ID *id = vpm->id; + X509 *x = ctx->cert; + if (id->hosts && check_hosts(x, id) <= 0) { + if (!check_id_error(ctx, X509_V_ERR_HOSTNAME_MISMATCH)) + return 0; + } + if (id->email && X509_check_email(x, id->email, id->emaillen, 0) <= 0) { + if (!check_id_error(ctx, X509_V_ERR_EMAIL_MISMATCH)) + return 0; + } + if (id->ip && X509_check_ip(x, id->ip, id->iplen, 0) <= 0) { + if (!check_id_error(ctx, X509_V_ERR_IP_ADDRESS_MISMATCH)) + return 0; + } return 1; -#else +} + +static int check_trust(X509_STORE_CTX *ctx) +{ int i, ok; - X509 *x; + X509 *x = NULL; int (*cb) (int xok, X509_STORE_CTX *xctx); cb = ctx->verify_cb; -/* For now just check the last certificate in the chain */ - i = sk_X509_num(ctx->chain) - 1; - x = sk_X509_value(ctx->chain, i); - ok = X509_check_trust(x, ctx->param->trust, 0); - if (ok == X509_TRUST_TRUSTED) - return 1; - ctx->error_depth = i; - ctx->current_cert = x; - if (ok == X509_TRUST_REJECTED) - ctx->error = X509_V_ERR_CERT_REJECTED; - else - ctx->error = X509_V_ERR_CERT_UNTRUSTED; - ok = cb(0, ctx); - return ok; -#endif + /* Check all trusted certificates in chain */ + for (i = ctx->last_untrusted; i < sk_X509_num(ctx->chain); i++) { + x = sk_X509_value(ctx->chain, i); + ok = X509_check_trust(x, ctx->param->trust, 0); + /* If explicitly trusted return trusted */ + if (ok == X509_TRUST_TRUSTED) + return X509_TRUST_TRUSTED; + /* + * If explicitly rejected notify callback and reject if not + * overridden. + */ + if (ok == X509_TRUST_REJECTED) { + ctx->error_depth = i; + ctx->current_cert = x; + ctx->error = X509_V_ERR_CERT_REJECTED; + ok = cb(0, ctx); + if (!ok) + return X509_TRUST_REJECTED; + } + } + /* + * If we accept partial chains and have at least one trusted certificate + * return success. + */ + if (ctx->param->flags & X509_V_FLAG_PARTIAL_CHAIN) { + X509 *mx; + if (ctx->last_untrusted < sk_X509_num(ctx->chain)) + return X509_TRUST_TRUSTED; + x = sk_X509_value(ctx->chain, 0); + mx = lookup_cert_match(ctx, x); + if (mx) { + (void)sk_X509_set(ctx->chain, 0, mx); + X509_free(x); + ctx->last_untrusted = 0; + return X509_TRUST_TRUSTED; + } + } + + /* + * If no trusted certs in chain at all return untrusted and allow + * standard (no issuer cert) etc errors to be indicated. + */ + return X509_TRUST_UNTRUSTED; } static int check_revocation(X509_STORE_CTX *ctx) @@ -1409,6 +1562,14 @@ static int check_crl(X509_STORE_CTX *ctx, X509_CRL *crl) if (!ok) goto err; } else { + int rv; + rv = X509_CRL_check_suiteb(crl, ikey, ctx->param->flags); + if (rv != X509_V_OK) { + ctx->error = rv; + ok = ctx->verify_cb(0, ctx); + if (!ok) + goto err; + } /* Verify CRL signature */ if (X509_CRL_verify(crl, ikey) <= 0) { ctx->error = X509_V_ERR_CRL_SIGNATURE_FAILURE; @@ -1565,6 +1726,10 @@ static int internal_verify(X509_STORE_CTX *ctx) if (ctx->check_issued(ctx, xi, xi)) xs = xi; else { + if (ctx->param->flags & X509_V_FLAG_PARTIAL_CHAIN) { + xs = xi; + goto check_cert; + } if (n <= 0) { ctx->error = X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE; ctx->current_cert = xi; @@ -1610,6 +1775,7 @@ static int internal_verify(X509_STORE_CTX *ctx) xs->valid = 1; + check_cert: ok = check_cert_time(ctx, xs); if (!ok) goto end; @@ -1824,6 +1990,114 @@ int X509_get_pubkey_parameters(EVP_PKEY *pkey, STACK_OF(X509) *chain) return 1; } +/* Make a delta CRL as the diff between two full CRLs */ + +X509_CRL *X509_CRL_diff(X509_CRL *base, X509_CRL *newer, + EVP_PKEY *skey, const EVP_MD *md, unsigned int flags) +{ + X509_CRL *crl = NULL; + int i; + STACK_OF(X509_REVOKED) *revs = NULL; + /* CRLs can't be delta already */ + if (base->base_crl_number || newer->base_crl_number) { + X509err(X509_F_X509_CRL_DIFF, X509_R_CRL_ALREADY_DELTA); + return NULL; + } + /* Base and new CRL must have a CRL number */ + if (!base->crl_number || !newer->crl_number) { + X509err(X509_F_X509_CRL_DIFF, X509_R_NO_CRL_NUMBER); + return NULL; + } + /* Issuer names must match */ + if (X509_NAME_cmp(X509_CRL_get_issuer(base), X509_CRL_get_issuer(newer))) { + X509err(X509_F_X509_CRL_DIFF, X509_R_ISSUER_MISMATCH); + return NULL; + } + /* AKID and IDP must match */ + if (!crl_extension_match(base, newer, NID_authority_key_identifier)) { + X509err(X509_F_X509_CRL_DIFF, X509_R_AKID_MISMATCH); + return NULL; + } + if (!crl_extension_match(base, newer, NID_issuing_distribution_point)) { + X509err(X509_F_X509_CRL_DIFF, X509_R_IDP_MISMATCH); + return NULL; + } + /* Newer CRL number must exceed full CRL number */ + if (ASN1_INTEGER_cmp(newer->crl_number, base->crl_number) <= 0) { + X509err(X509_F_X509_CRL_DIFF, X509_R_NEWER_CRL_NOT_NEWER); + return NULL; + } + /* CRLs must verify */ + if (skey && (X509_CRL_verify(base, skey) <= 0 || + X509_CRL_verify(newer, skey) <= 0)) { + X509err(X509_F_X509_CRL_DIFF, X509_R_CRL_VERIFY_FAILURE); + return NULL; + } + /* Create new CRL */ + crl = X509_CRL_new(); + if (!crl || !X509_CRL_set_version(crl, 1)) + goto memerr; + /* Set issuer name */ + if (!X509_CRL_set_issuer_name(crl, X509_CRL_get_issuer(newer))) + goto memerr; + + if (!X509_CRL_set_lastUpdate(crl, X509_CRL_get_lastUpdate(newer))) + goto memerr; + if (!X509_CRL_set_nextUpdate(crl, X509_CRL_get_nextUpdate(newer))) + goto memerr; + + /* Set base CRL number: must be critical */ + + if (!X509_CRL_add1_ext_i2d(crl, NID_delta_crl, base->crl_number, 1, 0)) + goto memerr; + + /* + * Copy extensions across from newest CRL to delta: this will set CRL + * number to correct value too. + */ + + for (i = 0; i < X509_CRL_get_ext_count(newer); i++) { + X509_EXTENSION *ext; + ext = X509_CRL_get_ext(newer, i); + if (!X509_CRL_add_ext(crl, ext, -1)) + goto memerr; + } + + /* Go through revoked entries, copying as needed */ + + revs = X509_CRL_get_REVOKED(newer); + + for (i = 0; i < sk_X509_REVOKED_num(revs); i++) { + X509_REVOKED *rvn, *rvtmp; + rvn = sk_X509_REVOKED_value(revs, i); + /* + * Add only if not also in base. TODO: need something cleverer here + * for some more complex CRLs covering multiple CAs. + */ + if (!X509_CRL_get0_by_serial(base, &rvtmp, rvn->serialNumber)) { + rvtmp = X509_REVOKED_dup(rvn); + if (!rvtmp) + goto memerr; + if (!X509_CRL_add0_revoked(crl, rvtmp)) { + X509_REVOKED_free(rvtmp); + goto memerr; + } + } + } + /* TODO: optionally prune deleted entries */ + + if (skey && md && !X509_CRL_sign(crl, skey, md)) + goto memerr; + + return crl; + + memerr: + X509err(X509_F_X509_CRL_DIFF, ERR_R_MALLOC_FAILURE); + if (crl) + X509_CRL_free(crl); + return NULL; +} + int X509_STORE_CTX_get_ex_new_index(long argl, void *argp, CRYPTO_EX_new *new_func, CRYPTO_EX_dup *dup_func, @@ -1874,16 +2148,9 @@ STACK_OF(X509) *X509_STORE_CTX_get_chain(X509_STORE_CTX *ctx) STACK_OF(X509) *X509_STORE_CTX_get1_chain(X509_STORE_CTX *ctx) { - int i; - X509 *x; - STACK_OF(X509) *chain; - if (!ctx->chain || !(chain = sk_X509_dup(ctx->chain))) + if (!ctx->chain) return NULL; - for (i = 0; i < sk_X509_num(chain); i++) { - x = sk_X509_value(chain, i); - CRYPTO_add(&x->references, 1, CRYPTO_LOCK_X509); - } - return chain; + return X509_chain_up_ref(ctx->chain); } X509 *X509_STORE_CTX_get0_current_issuer(X509_STORE_CTX *ctx) diff --git a/drivers/builtin_openssl2/crypto/x509/x509_vpm.c b/drivers/builtin_openssl2/crypto/x509/x509_vpm.c index 6b0bf8a6e6..1ac15a881a 100644 --- a/drivers/builtin_openssl2/crypto/x509/x509_vpm.c +++ b/drivers/builtin_openssl2/crypto/x509/x509_vpm.c @@ -66,10 +66,73 @@ #include #include +#include "vpm_int.h" + /* X509_VERIFY_PARAM functions */ +#define SET_HOST 0 +#define ADD_HOST 1 + +static char *str_copy(const char *s) +{ + return OPENSSL_strdup(s); +} + +static void str_free(char *s) +{ + OPENSSL_free(s); +} + +#define string_stack_free(sk) sk_OPENSSL_STRING_pop_free(sk, str_free) + +static int int_x509_param_set_hosts(X509_VERIFY_PARAM_ID *id, int mode, + const char *name, size_t namelen) +{ + char *copy; + + /* + * Refuse names with embedded NUL bytes, except perhaps as final byte. + * XXX: Do we need to push an error onto the error stack? + */ + if (namelen == 0 || name == NULL) + namelen = name ? strlen(name) : 0; + else if (name && memchr(name, '\0', namelen > 1 ? namelen - 1 : namelen)) + return 0; + if (namelen > 0 && name[namelen - 1] == '\0') + --namelen; + + if (mode == SET_HOST && id->hosts) { + string_stack_free(id->hosts); + id->hosts = NULL; + } + if (name == NULL || namelen == 0) + return 1; + + copy = BUF_strndup(name, namelen); + if (copy == NULL) + return 0; + + if (id->hosts == NULL && + (id->hosts = sk_OPENSSL_STRING_new_null()) == NULL) { + OPENSSL_free(copy); + return 0; + } + + if (!sk_OPENSSL_STRING_push(id->hosts, copy)) { + OPENSSL_free(copy); + if (sk_OPENSSL_STRING_num(id->hosts) == 0) { + sk_OPENSSL_STRING_free(id->hosts); + id->hosts = NULL; + } + return 0; + } + + return 1; +} + static void x509_verify_param_zero(X509_VERIFY_PARAM *param) { + X509_VERIFY_PARAM_ID *paramid; if (!param) return; param->name = NULL; @@ -85,15 +148,49 @@ static void x509_verify_param_zero(X509_VERIFY_PARAM *param) sk_ASN1_OBJECT_pop_free(param->policies, ASN1_OBJECT_free); param->policies = NULL; } + paramid = param->id; + if (paramid->hosts) { + string_stack_free(paramid->hosts); + paramid->hosts = NULL; + } + if (paramid->peername) + OPENSSL_free(paramid->peername); + paramid->peername = NULL; + if (paramid->email) { + OPENSSL_free(paramid->email); + paramid->email = NULL; + paramid->emaillen = 0; + } + if (paramid->ip) { + OPENSSL_free(paramid->ip); + paramid->ip = NULL; + paramid->iplen = 0; + } } X509_VERIFY_PARAM *X509_VERIFY_PARAM_new(void) { X509_VERIFY_PARAM *param; - param = OPENSSL_malloc(sizeof(X509_VERIFY_PARAM)); + X509_VERIFY_PARAM_ID *paramid; + + param = OPENSSL_malloc(sizeof *param); if (!param) return NULL; - memset(param, 0, sizeof(X509_VERIFY_PARAM)); + memset(param, 0, sizeof(*param)); + + paramid = OPENSSL_malloc(sizeof(*paramid)); + if (!paramid) { + OPENSSL_free(param); + return NULL; + } + memset(paramid, 0, sizeof(*paramid)); + /* Exotic platforms may have non-zero bit representation of NULL */ + paramid->hosts = NULL; + paramid->peername = NULL; + paramid->email = NULL; + paramid->ip = NULL; + + param->id = paramid; x509_verify_param_zero(param); return param; } @@ -103,6 +200,7 @@ void X509_VERIFY_PARAM_free(X509_VERIFY_PARAM *param) if (param == NULL) return; x509_verify_param_zero(param); + OPENSSL_free(param->id); OPENSSL_free(param); } @@ -144,6 +242,11 @@ void X509_VERIFY_PARAM_free(X509_VERIFY_PARAM *param) (to_overwrite || \ ((src->field != def) && (to_default || (dest->field == def)))) +/* As above but for ID fields */ + +#define test_x509_verify_param_copy_id(idf, def) \ + test_x509_verify_param_copy(id->idf, def) + /* Macro to test and copy a field if necessary */ #define x509_verify_param_copy(field, def) \ @@ -155,8 +258,10 @@ int X509_VERIFY_PARAM_inherit(X509_VERIFY_PARAM *dest, { unsigned long inh_flags; int to_default, to_overwrite; + X509_VERIFY_PARAM_ID *id; if (!src) return 1; + id = src->id; inh_flags = dest->inh_flags | src->inh_flags; if (inh_flags & X509_VP_FLAG_ONCE) @@ -197,6 +302,31 @@ int X509_VERIFY_PARAM_inherit(X509_VERIFY_PARAM *dest, return 0; } + /* Copy the host flags if and only if we're copying the host list */ + if (test_x509_verify_param_copy_id(hosts, NULL)) { + if (dest->id->hosts) { + string_stack_free(dest->id->hosts); + dest->id->hosts = NULL; + } + if (id->hosts) { + dest->id->hosts = + sk_OPENSSL_STRING_deep_copy(id->hosts, str_copy, str_free); + if (dest->id->hosts == NULL) + return 0; + dest->id->hostflags = id->hostflags; + } + } + + if (test_x509_verify_param_copy_id(email, NULL)) { + if (!X509_VERIFY_PARAM_set1_email(dest, id->email, id->emaillen)) + return 0; + } + + if (test_x509_verify_param_copy_id(ip, NULL)) { + if (!X509_VERIFY_PARAM_set1_ip(dest, id->ip, id->iplen)) + return 0; + } + return 1; } @@ -211,6 +341,30 @@ int X509_VERIFY_PARAM_set1(X509_VERIFY_PARAM *to, return ret; } +static int int_x509_param_set1(char **pdest, size_t *pdestlen, + const char *src, size_t srclen) +{ + void *tmp; + if (src) { + if (srclen == 0) { + tmp = BUF_strdup(src); + srclen = strlen(src); + } else + tmp = BUF_memdup(src, srclen); + if (!tmp) + return 0; + } else { + tmp = NULL; + srclen = 0; + } + if (*pdest) + OPENSSL_free(*pdest); + *pdest = tmp; + if (pdestlen) + *pdestlen = srclen; + return 1; +} + int X509_VERIFY_PARAM_set1_name(X509_VERIFY_PARAM *param, const char *name) { if (param->name) @@ -308,11 +462,70 @@ int X509_VERIFY_PARAM_set1_policies(X509_VERIFY_PARAM *param, return 1; } +int X509_VERIFY_PARAM_set1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen) +{ + return int_x509_param_set_hosts(param->id, SET_HOST, name, namelen); +} + +int X509_VERIFY_PARAM_add1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen) +{ + return int_x509_param_set_hosts(param->id, ADD_HOST, name, namelen); +} + +void X509_VERIFY_PARAM_set_hostflags(X509_VERIFY_PARAM *param, + unsigned int flags) +{ + param->id->hostflags = flags; +} + +char *X509_VERIFY_PARAM_get0_peername(X509_VERIFY_PARAM *param) +{ + return param->id->peername; +} + +int X509_VERIFY_PARAM_set1_email(X509_VERIFY_PARAM *param, + const char *email, size_t emaillen) +{ + return int_x509_param_set1(¶m->id->email, ¶m->id->emaillen, + email, emaillen); +} + +int X509_VERIFY_PARAM_set1_ip(X509_VERIFY_PARAM *param, + const unsigned char *ip, size_t iplen) +{ + if (iplen != 0 && iplen != 4 && iplen != 16) + return 0; + return int_x509_param_set1((char **)¶m->id->ip, ¶m->id->iplen, + (char *)ip, iplen); +} + +int X509_VERIFY_PARAM_set1_ip_asc(X509_VERIFY_PARAM *param, const char *ipasc) +{ + unsigned char ipout[16]; + size_t iplen; + + iplen = (size_t)a2i_ipadd(ipout, ipasc); + if (iplen == 0) + return 0; + return X509_VERIFY_PARAM_set1_ip(param, ipout, iplen); +} + int X509_VERIFY_PARAM_get_depth(const X509_VERIFY_PARAM *param) { return param->depth; } +const char *X509_VERIFY_PARAM_get0_name(const X509_VERIFY_PARAM *param) +{ + return param->name; +} + +static X509_VERIFY_PARAM_ID _empty_id = { NULL, 0U, NULL, NULL, 0, NULL, 0 }; + +#define vpm_empty_id (X509_VERIFY_PARAM_ID *)&_empty_id + /* * Default verify parameters: these are used for various applications and can * be overridden by the user specified table. NB: the 'name' field *must* be @@ -328,8 +541,8 @@ static const X509_VERIFY_PARAM default_table[] = { 0, /* purpose */ 0, /* trust */ 100, /* depth */ - NULL /* policies */ - }, + NULL, /* policies */ + vpm_empty_id}, { "pkcs7", /* S/MIME sign parameters */ 0, /* Check time */ @@ -338,8 +551,8 @@ static const X509_VERIFY_PARAM default_table[] = { X509_PURPOSE_SMIME_SIGN, /* purpose */ X509_TRUST_EMAIL, /* trust */ -1, /* depth */ - NULL /* policies */ - }, + NULL, /* policies */ + vpm_empty_id}, { "smime_sign", /* S/MIME sign parameters */ 0, /* Check time */ @@ -348,8 +561,8 @@ static const X509_VERIFY_PARAM default_table[] = { X509_PURPOSE_SMIME_SIGN, /* purpose */ X509_TRUST_EMAIL, /* trust */ -1, /* depth */ - NULL /* policies */ - }, + NULL, /* policies */ + vpm_empty_id}, { "ssl_client", /* SSL/TLS client parameters */ 0, /* Check time */ @@ -358,8 +571,8 @@ static const X509_VERIFY_PARAM default_table[] = { X509_PURPOSE_SSL_CLIENT, /* purpose */ X509_TRUST_SSL_CLIENT, /* trust */ -1, /* depth */ - NULL /* policies */ - }, + NULL, /* policies */ + vpm_empty_id}, { "ssl_server", /* SSL/TLS server parameters */ 0, /* Check time */ @@ -368,8 +581,8 @@ static const X509_VERIFY_PARAM default_table[] = { X509_PURPOSE_SSL_SERVER, /* purpose */ X509_TRUST_SSL_SERVER, /* trust */ -1, /* depth */ - NULL /* policies */ - } + NULL, /* policies */ + vpm_empty_id} }; static STACK_OF(X509_VERIFY_PARAM) *param_table = NULL; @@ -409,6 +622,22 @@ int X509_VERIFY_PARAM_add0_table(X509_VERIFY_PARAM *param) return 1; } +int X509_VERIFY_PARAM_get_count(void) +{ + int num = sizeof(default_table) / sizeof(X509_VERIFY_PARAM); + if (param_table) + num += sk_X509_VERIFY_PARAM_num(param_table); + return num; +} + +const X509_VERIFY_PARAM *X509_VERIFY_PARAM_get0(int id) +{ + int num = sizeof(default_table) / sizeof(X509_VERIFY_PARAM); + if (id < num) + return default_table + id; + return sk_X509_VERIFY_PARAM_value(param_table, id - num); +} + const X509_VERIFY_PARAM *X509_VERIFY_PARAM_lookup(const char *name) { int idx; diff --git a/drivers/builtin_openssl2/crypto/x509/x_all.c b/drivers/builtin_openssl2/crypto/x509/x_all.c index 43152e933f..0f26c546d8 100644 --- a/drivers/builtin_openssl2/crypto/x509/x_all.c +++ b/drivers/builtin_openssl2/crypto/x509/x_all.c @@ -63,6 +63,7 @@ #include #include #include +#include #ifndef OPENSSL_NO_RSA # include #endif @@ -105,6 +106,12 @@ int X509_sign_ctx(X509 *x, EVP_MD_CTX *ctx) x->sig_alg, x->signature, x->cert_info, ctx); } +int X509_http_nbio(OCSP_REQ_CTX *rctx, X509 **pcert) +{ + return OCSP_REQ_CTX_nbio_d2i(rctx, + (ASN1_VALUE **)pcert, ASN1_ITEM_rptr(X509)); +} + int X509_REQ_sign(X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md) { return (ASN1_item_sign(ASN1_ITEM_rptr(X509_REQ_INFO), x->sig_alg, NULL, @@ -133,6 +140,13 @@ int X509_CRL_sign_ctx(X509_CRL *x, EVP_MD_CTX *ctx) x->crl, ctx); } +int X509_CRL_http_nbio(OCSP_REQ_CTX *rctx, X509_CRL **pcrl) +{ + return OCSP_REQ_CTX_nbio_d2i(rctx, + (ASN1_VALUE **)pcrl, + ASN1_ITEM_rptr(X509_CRL)); +} + int NETSCAPE_SPKI_sign(NETSCAPE_SPKI *x, EVP_PKEY *pkey, const EVP_MD *md) { return (ASN1_item_sign(ASN1_ITEM_rptr(NETSCAPE_SPKAC), x->sig_algor, NULL, diff --git a/drivers/builtin_openssl2/crypto/x509v3/ext_dat.h b/drivers/builtin_openssl2/crypto/x509v3/ext_dat.h index 136b1f8ecf..c3a6fce752 100644 --- a/drivers/builtin_openssl2/crypto/x509v3/ext_dat.h +++ b/drivers/builtin_openssl2/crypto/x509v3/ext_dat.h @@ -69,6 +69,7 @@ extern X509V3_EXT_METHOD v3_crl_hold, v3_pci; extern X509V3_EXT_METHOD v3_policy_mappings, v3_policy_constraints; extern X509V3_EXT_METHOD v3_name_constraints, v3_inhibit_anyp, v3_idp; extern X509V3_EXT_METHOD v3_addr, v3_asid; +extern X509V3_EXT_METHOD v3_ct_scts[]; /* * This table will be searched using OBJ_bsearch so it *must* kept in order @@ -126,6 +127,8 @@ static const X509V3_EXT_METHOD *standard_exts[] = { &v3_idp, &v3_alt[2], &v3_freshest_crl, + &v3_ct_scts[0], + &v3_ct_scts[1], }; /* Number of standard extensions */ diff --git a/drivers/builtin_openssl2/crypto/x509v3/tabtest.c b/drivers/builtin_openssl2/crypto/x509v3/tabtest.c deleted file mode 100644 index 145dc9de56..0000000000 --- a/drivers/builtin_openssl2/crypto/x509v3/tabtest.c +++ /dev/null @@ -1,92 +0,0 @@ -/* tabtest.c */ -/* - * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project - * 1999. - */ -/* ==================================================================== - * Copyright (c) 1999 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - -/* - * Simple program to check the ext_dat.h is correct and print out problems if - * it is not. - */ - -#include - -#include - -#include "ext_dat.h" - -main() -{ - int i, prev = -1, bad = 0; - X509V3_EXT_METHOD **tmp; - i = sizeof(standard_exts) / sizeof(X509V3_EXT_METHOD *); - if (i != STANDARD_EXTENSION_COUNT) - fprintf(stderr, "Extension number invalid expecting %d\n", i); - tmp = standard_exts; - for (i = 0; i < STANDARD_EXTENSION_COUNT; i++, tmp++) { - if ((*tmp)->ext_nid < prev) - bad = 1; - prev = (*tmp)->ext_nid; - - } - if (bad) { - tmp = standard_exts; - fprintf(stderr, "Extensions out of order!\n"); - for (i = 0; i < STANDARD_EXTENSION_COUNT; i++, tmp++) - printf("%d : %s\n", (*tmp)->ext_nid, OBJ_nid2sn((*tmp)->ext_nid)); - } else - fprintf(stderr, "Order OK\n"); -} diff --git a/drivers/builtin_openssl2/crypto/x509v3/v3_lib.c b/drivers/builtin_openssl2/crypto/x509v3/v3_lib.c index b5598c9a3e..8350429aaf 100644 --- a/drivers/builtin_openssl2/crypto/x509v3/v3_lib.c +++ b/drivers/builtin_openssl2/crypto/x509v3/v3_lib.c @@ -122,6 +122,28 @@ const X509V3_EXT_METHOD *X509V3_EXT_get(X509_EXTENSION *ext) return X509V3_EXT_get_nid(nid); } +int X509V3_EXT_free(int nid, void *ext_data) +{ + const X509V3_EXT_METHOD *ext_method = X509V3_EXT_get_nid(nid); + if (ext_method == NULL) { + X509V3err(X509V3_F_X509V3_EXT_FREE, + X509V3_R_CANNOT_FIND_FREE_FUNCTION); + return 0; + } + + if (ext_method->it != NULL) + ASN1_item_free(ext_data, ASN1_ITEM_ptr(ext_method->it)); + else if (ext_method->ext_free != NULL) + ext_method->ext_free(ext_data); + else { + X509V3err(X509V3_F_X509V3_EXT_FREE, + X509V3_R_CANNOT_FIND_FREE_FUNCTION); + return 0; + } + + return 1; +} + int X509V3_EXT_add_list(X509V3_EXT_METHOD *extlist) { for (; extlist->ext_nid != -1; extlist++) diff --git a/drivers/builtin_openssl2/crypto/x509v3/v3_purp.c b/drivers/builtin_openssl2/crypto/x509v3/v3_purp.c index 0d9bc58ce7..845be673b7 100644 --- a/drivers/builtin_openssl2/crypto/x509v3/v3_purp.c +++ b/drivers/builtin_openssl2/crypto/x509v3/v3_purp.c @@ -380,6 +380,14 @@ static void setup_crldp(X509 *x) setup_dp(x, sk_DIST_POINT_value(x->crldp, i)); } +#define V1_ROOT (EXFLAG_V1|EXFLAG_SS) +#define ku_reject(x, usage) \ + (((x)->ex_flags & EXFLAG_KUSAGE) && !((x)->ex_kusage & (usage))) +#define xku_reject(x, usage) \ + (((x)->ex_flags & EXFLAG_XKUSAGE) && !((x)->ex_xkusage & (usage))) +#define ns_reject(x, usage) \ + (((x)->ex_flags & EXFLAG_NSCERT) && !((x)->ex_nscert & (usage))) + static void x509v3_cache_extensions(X509 *x) { BASIC_CONSTRAINTS *bs; @@ -395,9 +403,6 @@ static void x509v3_cache_extensions(X509 *x) #ifndef OPENSSL_NO_SHA X509_digest(x, EVP_sha1(), x->sha1_hash, NULL); #endif - /* Does subject name match issuer ? */ - if (!X509_NAME_cmp(X509_get_subject_name(x), X509_get_issuer_name(x))) - x->ex_flags |= EXFLAG_SI; /* V1 should mean no extensions ... */ if (!X509_get_version(x)) x->ex_flags |= EXFLAG_V1; @@ -479,6 +484,10 @@ static void x509v3_cache_extensions(X509 *x) case NID_dvcs: x->ex_xkusage |= XKU_DVCS; break; + + case NID_anyExtendedKeyUsage: + x->ex_xkusage |= XKU_ANYEKU; + break; } } sk_ASN1_OBJECT_pop_free(extusage, ASN1_OBJECT_free); @@ -494,6 +503,14 @@ static void x509v3_cache_extensions(X509 *x) } x->skid = X509_get_ext_d2i(x, NID_subject_key_identifier, NULL, NULL); x->akid = X509_get_ext_d2i(x, NID_authority_key_identifier, NULL, NULL); + /* Does subject name match issuer ? */ + if (!X509_NAME_cmp(X509_get_subject_name(x), X509_get_issuer_name(x))) { + x->ex_flags |= EXFLAG_SI; + /* If SKID matches AKID also indicate self signed */ + if (X509_check_akid(x, x->akid) == X509_V_OK && + !ku_reject(x, KU_KEY_CERT_SIGN)) + x->ex_flags |= EXFLAG_SS; + } x->altname = X509_get_ext_d2i(x, NID_subject_alt_name, NULL, NULL); x->nc = X509_get_ext_d2i(x, NID_name_constraints, &i, NULL); if (!x->nc && (i != -1)) @@ -530,14 +547,6 @@ static void x509v3_cache_extensions(X509 *x) * 4 basicConstraints absent but keyUsage present and keyCertSign asserted. */ -#define V1_ROOT (EXFLAG_V1|EXFLAG_SS) -#define ku_reject(x, usage) \ - (((x)->ex_flags & EXFLAG_KUSAGE) && !((x)->ex_kusage & (usage))) -#define xku_reject(x, usage) \ - (((x)->ex_flags & EXFLAG_XKUSAGE) && !((x)->ex_xkusage & (usage))) -#define ns_reject(x, usage) \ - (((x)->ex_flags & EXFLAG_NSCERT) && !((x)->ex_nscert & (usage))) - static int check_ca(const X509 *x) { /* keyUsage if present should allow cert signing */ @@ -598,8 +607,8 @@ static int check_purpose_ssl_client(const X509_PURPOSE *xp, const X509 *x, return 0; if (ca) return check_ssl_ca(x); - /* We need to do digital signatures with it */ - if (ku_reject(x, KU_DIGITAL_SIGNATURE)) + /* We need to do digital signatures or key agreement */ + if (ku_reject(x, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT)) return 0; /* nsCertType if present should allow SSL client use */ if (ns_reject(x, NS_SSL_CLIENT)) @@ -607,6 +616,14 @@ static int check_purpose_ssl_client(const X509_PURPOSE *xp, const X509 *x, return 1; } +/* + * Key usage needed for TLS/SSL server: digital signature, encipherment or + * key agreement. The ssl code can check this more thoroughly for individual + * key types. + */ +#define KU_TLS \ + KU_DIGITAL_SIGNATURE|KU_KEY_ENCIPHERMENT|KU_KEY_AGREEMENT + static int check_purpose_ssl_server(const X509_PURPOSE *xp, const X509 *x, int ca) { @@ -617,8 +634,7 @@ static int check_purpose_ssl_server(const X509_PURPOSE *xp, const X509 *x, if (ns_reject(x, NS_SSL_SERVER)) return 0; - /* Now as for keyUsage: we'll at least need to sign OR encipher */ - if (ku_reject(x, KU_DIGITAL_SIGNATURE | KU_KEY_ENCIPHERMENT)) + if (ku_reject(x, KU_TLS)) return 0; return 1; diff --git a/drivers/builtin_openssl2/crypto/x509v3/v3_scts.c b/drivers/builtin_openssl2/crypto/x509v3/v3_scts.c new file mode 100644 index 0000000000..0b7c68180e --- /dev/null +++ b/drivers/builtin_openssl2/crypto/x509v3/v3_scts.c @@ -0,0 +1,334 @@ +/* v3_scts.c */ +/* + * Written by Rob Stradling (rob@comodo.com) for the OpenSSL project 2014. + */ +/* ==================================================================== + * Copyright (c) 2014 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#include +#include "cryptlib.h" +#include +#include + +/* Signature and hash algorithms from RFC 5246 */ +#define TLSEXT_hash_sha256 4 + +#define TLSEXT_signature_rsa 1 +#define TLSEXT_signature_ecdsa 3 + + +#define n2s(c,s) ((s=(((unsigned int)(c[0]))<< 8)| \ + (((unsigned int)(c[1])) )),c+=2) + +#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) +# define SCT_TIMESTAMP unsigned __int64 +#elif defined(__arch64__) +# define SCT_TIMESTAMP unsigned long +#else +# define SCT_TIMESTAMP unsigned long long +#endif + +#define n2l8(c,l) (l =((SCT_TIMESTAMP)(*((c)++)))<<56, \ + l|=((SCT_TIMESTAMP)(*((c)++)))<<48, \ + l|=((SCT_TIMESTAMP)(*((c)++)))<<40, \ + l|=((SCT_TIMESTAMP)(*((c)++)))<<32, \ + l|=((SCT_TIMESTAMP)(*((c)++)))<<24, \ + l|=((SCT_TIMESTAMP)(*((c)++)))<<16, \ + l|=((SCT_TIMESTAMP)(*((c)++)))<< 8, \ + l|=((SCT_TIMESTAMP)(*((c)++)))) + +typedef struct SCT_st { + /* The encoded SCT */ + unsigned char *sct; + unsigned short sctlen; + /* + * Components of the SCT. "logid", "ext" and "sig" point to addresses + * inside "sct". + */ + unsigned char version; + unsigned char *logid; + unsigned short logidlen; + SCT_TIMESTAMP timestamp; + unsigned char *ext; + unsigned short extlen; + unsigned char hash_alg; + unsigned char sig_alg; + unsigned char *sig; + unsigned short siglen; +} SCT; + +DECLARE_STACK_OF(SCT) + +static void SCT_LIST_free(STACK_OF(SCT) *a); +static STACK_OF(SCT) *d2i_SCT_LIST(STACK_OF(SCT) **a, + const unsigned char **pp, long length); +static int i2r_SCT_LIST(X509V3_EXT_METHOD *method, STACK_OF(SCT) *sct_list, + BIO *out, int indent); + +const X509V3_EXT_METHOD v3_ct_scts[] = { + {NID_ct_precert_scts, 0, NULL, + 0, (X509V3_EXT_FREE)SCT_LIST_free, + (X509V3_EXT_D2I)d2i_SCT_LIST, 0, + 0, 0, 0, 0, + (X509V3_EXT_I2R)i2r_SCT_LIST, 0, + NULL}, + + {NID_ct_cert_scts, 0, NULL, + 0, (X509V3_EXT_FREE)SCT_LIST_free, + (X509V3_EXT_D2I)d2i_SCT_LIST, 0, + 0, 0, 0, 0, + (X509V3_EXT_I2R)i2r_SCT_LIST, 0, + NULL}, +}; + +static void tls12_signature_print(BIO *out, const unsigned char hash_alg, + const unsigned char sig_alg) +{ + int nid = NID_undef; + /* RFC6962 only permits two signature algorithms */ + if (hash_alg == TLSEXT_hash_sha256) { + if (sig_alg == TLSEXT_signature_rsa) + nid = NID_sha256WithRSAEncryption; + else if (sig_alg == TLSEXT_signature_ecdsa) + nid = NID_ecdsa_with_SHA256; + } + if (nid == NID_undef) + BIO_printf(out, "%02X%02X", hash_alg, sig_alg); + else + BIO_printf(out, "%s", OBJ_nid2ln(nid)); +} + +static void timestamp_print(BIO *out, SCT_TIMESTAMP timestamp) +{ + ASN1_GENERALIZEDTIME *gen; + char genstr[20]; + gen = ASN1_GENERALIZEDTIME_new(); + ASN1_GENERALIZEDTIME_adj(gen, (time_t)0, + (int)(timestamp / 86400000), + (timestamp % 86400000) / 1000); + /* + * Note GeneralizedTime from ASN1_GENERALIZETIME_adj is always 15 + * characters long with a final Z. Update it with fractional seconds. + */ + BIO_snprintf(genstr, sizeof(genstr), "%.14s.%03dZ", + ASN1_STRING_data(gen), (unsigned int)(timestamp % 1000)); + ASN1_GENERALIZEDTIME_set_string(gen, genstr); + ASN1_GENERALIZEDTIME_print(out, gen); + ASN1_GENERALIZEDTIME_free(gen); +} + +static void SCT_free(SCT *sct) +{ + if (sct) { + if (sct->sct) + OPENSSL_free(sct->sct); + OPENSSL_free(sct); + } +} + +static void SCT_LIST_free(STACK_OF(SCT) *a) +{ + sk_SCT_pop_free(a, SCT_free); +} + +static STACK_OF(SCT) *d2i_SCT_LIST(STACK_OF(SCT) **a, + const unsigned char **pp, long length) +{ + ASN1_OCTET_STRING *oct = NULL; + STACK_OF(SCT) *sk = NULL; + SCT *sct; + unsigned char *p, *p2; + unsigned short listlen, sctlen = 0, fieldlen; + const unsigned char *q = *pp; + + if (d2i_ASN1_OCTET_STRING(&oct, &q, length) == NULL) + return NULL; + if (oct->length < 2) + goto done; + p = oct->data; + n2s(p, listlen); + if (listlen != oct->length - 2) + goto done; + + if ((sk = sk_SCT_new_null()) == NULL) + goto done; + + while (listlen > 0) { + if (listlen < 2) + goto err; + n2s(p, sctlen); + listlen -= 2; + + if ((sctlen < 1) || (sctlen > listlen)) + goto err; + listlen -= sctlen; + + sct = OPENSSL_malloc(sizeof(SCT)); + if (!sct) + goto err; + if (!sk_SCT_push(sk, sct)) { + OPENSSL_free(sct); + goto err; + } + + sct->sct = OPENSSL_malloc(sctlen); + if (!sct->sct) + goto err; + memcpy(sct->sct, p, sctlen); + sct->sctlen = sctlen; + p += sctlen; + p2 = sct->sct; + + sct->version = *p2++; + if (sct->version == 0) { /* SCT v1 */ + /*- + * Fixed-length header: + * struct { + * (1 byte) Version sct_version; + * (32 bytes) LogID id; + * (8 bytes) uint64 timestamp; + * (2 bytes + ?) CtExtensions extensions; + */ + if (sctlen < 43) + goto err; + sctlen -= 43; + + sct->logid = p2; + sct->logidlen = 32; + p2 += 32; + + n2l8(p2, sct->timestamp); + + n2s(p2, fieldlen); + if (sctlen < fieldlen) + goto err; + sct->ext = p2; + sct->extlen = fieldlen; + p2 += fieldlen; + sctlen -= fieldlen; + + /*- + * digitally-signed struct header: + * (1 byte) Hash algorithm + * (1 byte) Signature algorithm + * (2 bytes + ?) Signature + */ + if (sctlen < 4) + goto err; + sctlen -= 4; + + sct->hash_alg = *p2++; + sct->sig_alg = *p2++; + n2s(p2, fieldlen); + if (sctlen != fieldlen) + goto err; + sct->sig = p2; + sct->siglen = fieldlen; + } + } + + done: + ASN1_OCTET_STRING_free(oct); + *pp = q; + return sk; + + err: + SCT_LIST_free(sk); + sk = NULL; + goto done; +} + +static int i2r_SCT_LIST(X509V3_EXT_METHOD *method, STACK_OF(SCT) *sct_list, + BIO *out, int indent) +{ + SCT *sct; + int i; + + for (i = 0; i < sk_SCT_num(sct_list);) { + sct = sk_SCT_value(sct_list, i); + + BIO_printf(out, "%*sSigned Certificate Timestamp:", indent, ""); + BIO_printf(out, "\n%*sVersion : ", indent + 4, ""); + + if (sct->version == 0) { /* SCT v1 */ + BIO_printf(out, "v1(0)"); + + BIO_printf(out, "\n%*sLog ID : ", indent + 4, ""); + BIO_hex_string(out, indent + 16, 16, sct->logid, sct->logidlen); + + BIO_printf(out, "\n%*sTimestamp : ", indent + 4, ""); + timestamp_print(out, sct->timestamp); + + BIO_printf(out, "\n%*sExtensions: ", indent + 4, ""); + if (sct->extlen == 0) + BIO_printf(out, "none"); + else + BIO_hex_string(out, indent + 16, 16, sct->ext, sct->extlen); + + BIO_printf(out, "\n%*sSignature : ", indent + 4, ""); + tls12_signature_print(out, sct->hash_alg, sct->sig_alg); + BIO_printf(out, "\n%*s ", indent + 4, ""); + BIO_hex_string(out, indent + 16, 16, sct->sig, sct->siglen); + } else { /* Unknown version */ + + BIO_printf(out, "unknown\n%*s", indent + 16, ""); + BIO_hex_string(out, indent + 16, 16, sct->sct, sct->sctlen); + } + + if (++i < sk_SCT_num(sct_list)) + BIO_printf(out, "\n"); + } + + return 1; +} diff --git a/drivers/builtin_openssl2/crypto/x509v3/v3_utl.c b/drivers/builtin_openssl2/crypto/x509v3/v3_utl.c index 94aaebba3e..43b9cb9c58 100644 --- a/drivers/builtin_openssl2/crypto/x509v3/v3_utl.c +++ b/drivers/builtin_openssl2/crypto/x509v3/v3_utl.c @@ -632,6 +632,438 @@ void X509_email_free(STACK_OF(OPENSSL_STRING) *sk) sk_OPENSSL_STRING_pop_free(sk, str_free); } +typedef int (*equal_fn) (const unsigned char *pattern, size_t pattern_len, + const unsigned char *subject, size_t subject_len, + unsigned int flags); + +/* Skip pattern prefix to match "wildcard" subject */ +static void skip_prefix(const unsigned char **p, size_t *plen, + const unsigned char *subject, size_t subject_len, + unsigned int flags) +{ + const unsigned char *pattern = *p; + size_t pattern_len = *plen; + + /* + * If subject starts with a leading '.' followed by more octets, and + * pattern is longer, compare just an equal-length suffix with the + * full subject (starting at the '.'), provided the prefix contains + * no NULs. + */ + if ((flags & _X509_CHECK_FLAG_DOT_SUBDOMAINS) == 0) + return; + + while (pattern_len > subject_len && *pattern) { + if ((flags & X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS) && + *pattern == '.') + break; + ++pattern; + --pattern_len; + } + + /* Skip if entire prefix acceptable */ + if (pattern_len == subject_len) { + *p = pattern; + *plen = pattern_len; + } +} + +/* Compare while ASCII ignoring case. */ +static int equal_nocase(const unsigned char *pattern, size_t pattern_len, + const unsigned char *subject, size_t subject_len, + unsigned int flags) +{ + skip_prefix(&pattern, &pattern_len, subject, subject_len, flags); + if (pattern_len != subject_len) + return 0; + while (pattern_len) { + unsigned char l = *pattern; + unsigned char r = *subject; + /* The pattern must not contain NUL characters. */ + if (l == 0) + return 0; + if (l != r) { + if ('A' <= l && l <= 'Z') + l = (l - 'A') + 'a'; + if ('A' <= r && r <= 'Z') + r = (r - 'A') + 'a'; + if (l != r) + return 0; + } + ++pattern; + ++subject; + --pattern_len; + } + return 1; +} + +/* Compare using memcmp. */ +static int equal_case(const unsigned char *pattern, size_t pattern_len, + const unsigned char *subject, size_t subject_len, + unsigned int flags) +{ + skip_prefix(&pattern, &pattern_len, subject, subject_len, flags); + if (pattern_len != subject_len) + return 0; + return !memcmp(pattern, subject, pattern_len); +} + +/* + * RFC 5280, section 7.5, requires that only the domain is compared in a + * case-insensitive manner. + */ +static int equal_email(const unsigned char *a, size_t a_len, + const unsigned char *b, size_t b_len, + unsigned int unused_flags) +{ + size_t i = a_len; + if (a_len != b_len) + return 0; + /* + * We search backwards for the '@' character, so that we do not have to + * deal with quoted local-parts. The domain part is compared in a + * case-insensitive manner. + */ + while (i > 0) { + --i; + if (a[i] == '@' || b[i] == '@') { + if (!equal_nocase(a + i, a_len - i, b + i, a_len - i, 0)) + return 0; + break; + } + } + if (i == 0) + i = a_len; + return equal_case(a, i, b, i, 0); +} + +/* + * Compare the prefix and suffix with the subject, and check that the + * characters in-between are valid. + */ +static int wildcard_match(const unsigned char *prefix, size_t prefix_len, + const unsigned char *suffix, size_t suffix_len, + const unsigned char *subject, size_t subject_len, + unsigned int flags) +{ + const unsigned char *wildcard_start; + const unsigned char *wildcard_end; + const unsigned char *p; + int allow_multi = 0; + int allow_idna = 0; + + if (subject_len < prefix_len + suffix_len) + return 0; + if (!equal_nocase(prefix, prefix_len, subject, prefix_len, flags)) + return 0; + wildcard_start = subject + prefix_len; + wildcard_end = subject + (subject_len - suffix_len); + if (!equal_nocase(wildcard_end, suffix_len, suffix, suffix_len, flags)) + return 0; + /* + * If the wildcard makes up the entire first label, it must match at + * least one character. + */ + if (prefix_len == 0 && *suffix == '.') { + if (wildcard_start == wildcard_end) + return 0; + allow_idna = 1; + if (flags & X509_CHECK_FLAG_MULTI_LABEL_WILDCARDS) + allow_multi = 1; + } + /* IDNA labels cannot match partial wildcards */ + if (!allow_idna && + subject_len >= 4 && strncasecmp((char *)subject, "xn--", 4) == 0) + return 0; + /* The wildcard may match a literal '*' */ + if (wildcard_end == wildcard_start + 1 && *wildcard_start == '*') + return 1; + /* + * Check that the part matched by the wildcard contains only + * permitted characters and only matches a single label unless + * allow_multi is set. + */ + for (p = wildcard_start; p != wildcard_end; ++p) + if (!(('0' <= *p && *p <= '9') || + ('A' <= *p && *p <= 'Z') || + ('a' <= *p && *p <= 'z') || + *p == '-' || (allow_multi && *p == '.'))) + return 0; + return 1; +} + +#define LABEL_START (1 << 0) +#define LABEL_END (1 << 1) +#define LABEL_HYPHEN (1 << 2) +#define LABEL_IDNA (1 << 3) + +static const unsigned char *valid_star(const unsigned char *p, size_t len, + unsigned int flags) +{ + const unsigned char *star = 0; + size_t i; + int state = LABEL_START; + int dots = 0; + for (i = 0; i < len; ++i) { + /* + * Locate first and only legal wildcard, either at the start + * or end of a non-IDNA first and not final label. + */ + if (p[i] == '*') { + int atstart = (state & LABEL_START); + int atend = (i == len - 1 || p[i + 1] == '.'); + /*- + * At most one wildcard per pattern. + * No wildcards in IDNA labels. + * No wildcards after the first label. + */ + if (star != NULL || (state & LABEL_IDNA) != 0 || dots) + return NULL; + /* Only full-label '*.example.com' wildcards? */ + if ((flags & X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS) + && (!atstart || !atend)) + return NULL; + /* No 'foo*bar' wildcards */ + if (!atstart && !atend) + return NULL; + star = &p[i]; + state &= ~LABEL_START; + } else if (('a' <= p[i] && p[i] <= 'z') + || ('A' <= p[i] && p[i] <= 'Z') + || ('0' <= p[i] && p[i] <= '9')) { + if ((state & LABEL_START) != 0 + && len - i >= 4 && strncasecmp((char *)&p[i], "xn--", 4) == 0) + state |= LABEL_IDNA; + state &= ~(LABEL_HYPHEN | LABEL_START); + } else if (p[i] == '.') { + if ((state & (LABEL_HYPHEN | LABEL_START)) != 0) + return NULL; + state = LABEL_START; + ++dots; + } else if (p[i] == '-') { + /* no domain/subdomain starts with '-' */ + if ((state & LABEL_START) != 0) + return NULL; + state |= LABEL_HYPHEN; + } else + return NULL; + } + + /* + * The final label must not end in a hyphen or ".", and + * there must be at least two dots after the star. + */ + if ((state & (LABEL_START | LABEL_HYPHEN)) != 0 || dots < 2) + return NULL; + return star; +} + +/* Compare using wildcards. */ +static int equal_wildcard(const unsigned char *pattern, size_t pattern_len, + const unsigned char *subject, size_t subject_len, + unsigned int flags) +{ + const unsigned char *star = NULL; + + /* + * Subject names starting with '.' can only match a wildcard pattern + * via a subject sub-domain pattern suffix match. + */ + if (!(subject_len > 1 && subject[0] == '.')) + star = valid_star(pattern, pattern_len, flags); + if (star == NULL) + return equal_nocase(pattern, pattern_len, + subject, subject_len, flags); + return wildcard_match(pattern, star - pattern, + star + 1, (pattern + pattern_len) - star - 1, + subject, subject_len, flags); +} + +/* + * Compare an ASN1_STRING to a supplied string. If they match return 1. If + * cmp_type > 0 only compare if string matches the type, otherwise convert it + * to UTF8. + */ + +static int do_check_string(ASN1_STRING *a, int cmp_type, equal_fn equal, + unsigned int flags, const char *b, size_t blen, + char **peername) +{ + int rv = 0; + + if (!a->data || !a->length) + return 0; + if (cmp_type > 0) { + if (cmp_type != a->type) + return 0; + if (cmp_type == V_ASN1_IA5STRING) + rv = equal(a->data, a->length, (unsigned char *)b, blen, flags); + else if (a->length == (int)blen && !memcmp(a->data, b, blen)) + rv = 1; + if (rv > 0 && peername) + *peername = BUF_strndup((char *)a->data, a->length); + } else { + int astrlen; + unsigned char *astr; + astrlen = ASN1_STRING_to_UTF8(&astr, a); + if (astrlen < 0) { + /* + * -1 could be an internal malloc failure or a decoding error from + * malformed input; we can't distinguish. + */ + return -1; + } + rv = equal(astr, astrlen, (unsigned char *)b, blen, flags); + if (rv > 0 && peername) + *peername = BUF_strndup((char *)astr, astrlen); + OPENSSL_free(astr); + } + return rv; +} + +static int do_x509_check(X509 *x, const char *chk, size_t chklen, + unsigned int flags, int check_type, char **peername) +{ + GENERAL_NAMES *gens = NULL; + X509_NAME *name = NULL; + int i; + int cnid = NID_undef; + int alt_type; + int san_present = 0; + int rv = 0; + equal_fn equal; + + /* See below, this flag is internal-only */ + flags &= ~_X509_CHECK_FLAG_DOT_SUBDOMAINS; + if (check_type == GEN_EMAIL) { + cnid = NID_pkcs9_emailAddress; + alt_type = V_ASN1_IA5STRING; + equal = equal_email; + } else if (check_type == GEN_DNS) { + cnid = NID_commonName; + /* Implicit client-side DNS sub-domain pattern */ + if (chklen > 1 && chk[0] == '.') + flags |= _X509_CHECK_FLAG_DOT_SUBDOMAINS; + alt_type = V_ASN1_IA5STRING; + if (flags & X509_CHECK_FLAG_NO_WILDCARDS) + equal = equal_nocase; + else + equal = equal_wildcard; + } else { + alt_type = V_ASN1_OCTET_STRING; + equal = equal_case; + } + + if (chklen == 0) + chklen = strlen(chk); + + gens = X509_get_ext_d2i(x, NID_subject_alt_name, NULL, NULL); + if (gens) { + for (i = 0; i < sk_GENERAL_NAME_num(gens); i++) { + GENERAL_NAME *gen; + ASN1_STRING *cstr; + gen = sk_GENERAL_NAME_value(gens, i); + if (gen->type != check_type) + continue; + san_present = 1; + if (check_type == GEN_EMAIL) + cstr = gen->d.rfc822Name; + else if (check_type == GEN_DNS) + cstr = gen->d.dNSName; + else + cstr = gen->d.iPAddress; + /* Positive on success, negative on error! */ + if ((rv = do_check_string(cstr, alt_type, equal, flags, + chk, chklen, peername)) != 0) + break; + } + GENERAL_NAMES_free(gens); + if (rv != 0) + return rv; + if (cnid == NID_undef + || (san_present + && !(flags & X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT))) + return 0; + } + + /* We're done if CN-ID is not pertinent */ + if (cnid == NID_undef) + return 0; + + i = -1; + name = X509_get_subject_name(x); + while ((i = X509_NAME_get_index_by_NID(name, cnid, i)) >= 0) { + X509_NAME_ENTRY *ne; + ASN1_STRING *str; + ne = X509_NAME_get_entry(name, i); + str = X509_NAME_ENTRY_get_data(ne); + /* Positive on success, negative on error! */ + if ((rv = do_check_string(str, -1, equal, flags, + chk, chklen, peername)) != 0) + return rv; + } + return 0; +} + +int X509_check_host(X509 *x, const char *chk, size_t chklen, + unsigned int flags, char **peername) +{ + if (chk == NULL) + return -2; + /* + * Embedded NULs are disallowed, except as the last character of a + * string of length 2 or more (tolerate caller including terminating + * NUL in string length). + */ + if (chklen == 0) + chklen = strlen(chk); + else if (memchr(chk, '\0', chklen > 1 ? chklen - 1 : chklen)) + return -2; + if (chklen > 1 && chk[chklen - 1] == '\0') + --chklen; + return do_x509_check(x, chk, chklen, flags, GEN_DNS, peername); +} + +int X509_check_email(X509 *x, const char *chk, size_t chklen, + unsigned int flags) +{ + if (chk == NULL) + return -2; + /* + * Embedded NULs are disallowed, except as the last character of a + * string of length 2 or more (tolerate caller including terminating + * NUL in string length). + */ + if (chklen == 0) + chklen = strlen((char *)chk); + else if (memchr(chk, '\0', chklen > 1 ? chklen - 1 : chklen)) + return -2; + if (chklen > 1 && chk[chklen - 1] == '\0') + --chklen; + return do_x509_check(x, chk, chklen, flags, GEN_EMAIL, NULL); +} + +int X509_check_ip(X509 *x, const unsigned char *chk, size_t chklen, + unsigned int flags) +{ + if (chk == NULL) + return -2; + return do_x509_check(x, (char *)chk, chklen, flags, GEN_IPADD, NULL); +} + +int X509_check_ip_asc(X509 *x, const char *ipasc, unsigned int flags) +{ + unsigned char ipout[16]; + size_t iplen; + + if (ipasc == NULL) + return -2; + iplen = (size_t)a2i_ipadd(ipout, ipasc); + if (iplen == 0) + return -2; + return do_x509_check(x, (char *)ipout, iplen, flags, GEN_IPADD, NULL); +} + /* * Convert IP addresses both IPv4 and IPv6 into an OCTET STRING compatible * with RFC3280. diff --git a/drivers/builtin_openssl2/crypto/x509v3/v3err.c b/drivers/builtin_openssl2/crypto/x509v3/v3err.c index 0138f7a846..bcc1be722e 100644 --- a/drivers/builtin_openssl2/crypto/x509v3/v3err.c +++ b/drivers/builtin_openssl2/crypto/x509v3/v3err.c @@ -1,6 +1,6 @@ /* crypto/x509v3/v3err.c */ /* ==================================================================== - * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2014 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -70,7 +70,7 @@ # define ERR_REASON(reason) ERR_PACK(ERR_LIB_X509V3,0,reason) static ERR_STRING_DATA X509V3_str_functs[] = { - {ERR_FUNC(X509V3_F_A2I_GENERAL_NAME), "A2I_GENERAL_NAME"}, + {ERR_FUNC(X509V3_F_A2I_GENERAL_NAME), "a2i_GENERAL_NAME"}, {ERR_FUNC(X509V3_F_ASIDENTIFIERCHOICE_CANONIZE), "ASIDENTIFIERCHOICE_CANONIZE"}, {ERR_FUNC(X509V3_F_ASIDENTIFIERCHOICE_IS_CANONICAL), @@ -132,6 +132,7 @@ static ERR_STRING_DATA X509V3_str_functs[] = { {ERR_FUNC(X509V3_F_X509V3_EXT_ADD), "X509V3_EXT_add"}, {ERR_FUNC(X509V3_F_X509V3_EXT_ADD_ALIAS), "X509V3_EXT_add_alias"}, {ERR_FUNC(X509V3_F_X509V3_EXT_CONF), "X509V3_EXT_conf"}, + {ERR_FUNC(X509V3_F_X509V3_EXT_FREE), "X509V3_EXT_free"}, {ERR_FUNC(X509V3_F_X509V3_EXT_I2D), "X509V3_EXT_i2d"}, {ERR_FUNC(X509V3_F_X509V3_EXT_NCONF), "X509V3_EXT_nconf"}, {ERR_FUNC(X509V3_F_X509V3_GET_SECTION), "X509V3_get_section"}, @@ -149,6 +150,8 @@ static ERR_STRING_DATA X509V3_str_reasons[] = { {ERR_REASON(X509V3_R_BN_DEC2BN_ERROR), "bn dec2bn error"}, {ERR_REASON(X509V3_R_BN_TO_ASN1_INTEGER_ERROR), "bn to asn1 integer error"}, + {ERR_REASON(X509V3_R_CANNOT_FIND_FREE_FUNCTION), + "cannot find free function"}, {ERR_REASON(X509V3_R_DIRNAME_ERROR), "dirname error"}, {ERR_REASON(X509V3_R_DISTPOINT_ALREADY_SET), "distpoint already set"}, {ERR_REASON(X509V3_R_DUPLICATE_ZONE_ID), "duplicate zone id"}, @@ -167,7 +170,6 @@ static ERR_STRING_DATA X509V3_str_reasons[] = { {ERR_REASON(X509V3_R_ILLEGAL_HEX_DIGIT), "illegal hex digit"}, {ERR_REASON(X509V3_R_INCORRECT_POLICY_SYNTAX_TAG), "incorrect policy syntax tag"}, - {ERR_REASON(X509V3_R_INVALID_MULTIPLE_RDNS), "invalid multiple rdns"}, {ERR_REASON(X509V3_R_INVALID_ASNUMBER), "invalid asnumber"}, {ERR_REASON(X509V3_R_INVALID_ASRANGE), "invalid asrange"}, {ERR_REASON(X509V3_R_INVALID_BOOLEAN_STRING), "invalid boolean string"}, @@ -175,6 +177,7 @@ static ERR_STRING_DATA X509V3_str_reasons[] = { "invalid extension string"}, {ERR_REASON(X509V3_R_INVALID_INHERITANCE), "invalid inheritance"}, {ERR_REASON(X509V3_R_INVALID_IPADDRESS), "invalid ipaddress"}, + {ERR_REASON(X509V3_R_INVALID_MULTIPLE_RDNS), "invalid multiple rdns"}, {ERR_REASON(X509V3_R_INVALID_NAME), "invalid name"}, {ERR_REASON(X509V3_R_INVALID_NULL_ARGUMENT), "invalid null argument"}, {ERR_REASON(X509V3_R_INVALID_NULL_NAME), "invalid null name"}, diff --git a/drivers/builtin_openssl2/crypto/x86_64cpuid.pl b/drivers/builtin_openssl2/crypto/x86_64cpuid.pl deleted file mode 100644 index 6ebfd017ea..0000000000 --- a/drivers/builtin_openssl2/crypto/x86_64cpuid.pl +++ /dev/null @@ -1,284 +0,0 @@ -#!/usr/bin/env perl - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order - ("%rdi","%rsi","%rdx","%rcx"); # Unix order - -print<<___; -.extern OPENSSL_cpuid_setup -.hidden OPENSSL_cpuid_setup -.section .init - call OPENSSL_cpuid_setup - -.hidden OPENSSL_ia32cap_P -.comm OPENSSL_ia32cap_P,8,4 - -.text - -.globl OPENSSL_atomic_add -.type OPENSSL_atomic_add,\@abi-omnipotent -.align 16 -OPENSSL_atomic_add: - movl ($arg1),%eax -.Lspin: leaq ($arg2,%rax),%r8 - .byte 0xf0 # lock - cmpxchgl %r8d,($arg1) - jne .Lspin - movl %r8d,%eax - .byte 0x48,0x98 # cltq/cdqe - ret -.size OPENSSL_atomic_add,.-OPENSSL_atomic_add - -.globl OPENSSL_rdtsc -.type OPENSSL_rdtsc,\@abi-omnipotent -.align 16 -OPENSSL_rdtsc: - rdtsc - shl \$32,%rdx - or %rdx,%rax - ret -.size OPENSSL_rdtsc,.-OPENSSL_rdtsc - -.globl OPENSSL_ia32_cpuid -.type OPENSSL_ia32_cpuid,\@abi-omnipotent -.align 16 -OPENSSL_ia32_cpuid: - mov %rbx,%r8 # save %rbx - - xor %eax,%eax - cpuid - mov %eax,%r11d # max value for standard query level - - xor %eax,%eax - cmp \$0x756e6547,%ebx # "Genu" - setne %al - mov %eax,%r9d - cmp \$0x49656e69,%edx # "ineI" - setne %al - or %eax,%r9d - cmp \$0x6c65746e,%ecx # "ntel" - setne %al - or %eax,%r9d # 0 indicates Intel CPU - jz .Lintel - - cmp \$0x68747541,%ebx # "Auth" - setne %al - mov %eax,%r10d - cmp \$0x69746E65,%edx # "enti" - setne %al - or %eax,%r10d - cmp \$0x444D4163,%ecx # "cAMD" - setne %al - or %eax,%r10d # 0 indicates AMD CPU - jnz .Lintel - - # AMD specific - mov \$0x80000000,%eax - cpuid - cmp \$0x80000001,%eax - jb .Lintel - mov %eax,%r10d - mov \$0x80000001,%eax - cpuid - or %ecx,%r9d - and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 - - cmp \$0x80000008,%r10d - jb .Lintel - - mov \$0x80000008,%eax - cpuid - movzb %cl,%r10 # number of cores - 1 - inc %r10 # number of cores - - mov \$1,%eax - cpuid - bt \$28,%edx # test hyper-threading bit - jnc .Lgeneric - shr \$16,%ebx # number of logical processors - cmp %r10b,%bl - ja .Lgeneric - and \$0xefffffff,%edx # ~(1<<28) - jmp .Lgeneric - -.Lintel: - cmp \$4,%r11d - mov \$-1,%r10d - jb .Lnocacheinfo - - mov \$4,%eax - mov \$0,%ecx # query L1D - cpuid - mov %eax,%r10d - shr \$14,%r10d - and \$0xfff,%r10d # number of cores -1 per L1D - -.Lnocacheinfo: - mov \$1,%eax - cpuid - and \$0xbfefffff,%edx # force reserved bits to 0 - cmp \$0,%r9d - jne .Lnotintel - or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs - and \$15,%ah - cmp \$15,%ah # examine Family ID - jne .Lnotintel - or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR -.Lnotintel: - bt \$28,%edx # test hyper-threading bit - jnc .Lgeneric - and \$0xefffffff,%edx # ~(1<<28) - cmp \$0,%r10d - je .Lgeneric - - or \$0x10000000,%edx # 1<<28 - shr \$16,%ebx - cmp \$1,%bl # see if cache is shared - ja .Lgeneric - and \$0xefffffff,%edx # ~(1<<28) -.Lgeneric: - and \$0x00000800,%r9d # isolate AMD XOP flag - and \$0xfffff7ff,%ecx - or %ecx,%r9d # merge AMD XOP flag - - mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx - bt \$27,%r9d # check OSXSAVE bit - jnc .Lclear_avx - xor %ecx,%ecx # XCR0 - .byte 0x0f,0x01,0xd0 # xgetbv - and \$6,%eax # isolate XMM and YMM state support - cmp \$6,%eax - je .Ldone -.Lclear_avx: - mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) - and %eax,%r9d # clear AVX, FMA and AMD XOP bits -.Ldone: - shl \$32,%r9 - mov %r10d,%eax - mov %r8,%rbx # restore %rbx - or %r9,%rax - ret -.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid - -.globl OPENSSL_cleanse -.type OPENSSL_cleanse,\@abi-omnipotent -.align 16 -OPENSSL_cleanse: - xor %rax,%rax - cmp \$15,$arg2 - jae .Lot - cmp \$0,$arg2 - je .Lret -.Little: - mov %al,($arg1) - sub \$1,$arg2 - lea 1($arg1),$arg1 - jnz .Little -.Lret: - ret -.align 16 -.Lot: - test \$7,$arg1 - jz .Laligned - mov %al,($arg1) - lea -1($arg2),$arg2 - lea 1($arg1),$arg1 - jmp .Lot -.Laligned: - mov %rax,($arg1) - lea -8($arg2),$arg2 - test \$-8,$arg2 - lea 8($arg1),$arg1 - jnz .Laligned - cmp \$0,$arg2 - jne .Little - ret -.size OPENSSL_cleanse,.-OPENSSL_cleanse -___ - -print<<___ if (!$win64); -.globl OPENSSL_wipe_cpu -.type OPENSSL_wipe_cpu,\@abi-omnipotent -.align 16 -OPENSSL_wipe_cpu: - pxor %xmm0,%xmm0 - pxor %xmm1,%xmm1 - pxor %xmm2,%xmm2 - pxor %xmm3,%xmm3 - pxor %xmm4,%xmm4 - pxor %xmm5,%xmm5 - pxor %xmm6,%xmm6 - pxor %xmm7,%xmm7 - pxor %xmm8,%xmm8 - pxor %xmm9,%xmm9 - pxor %xmm10,%xmm10 - pxor %xmm11,%xmm11 - pxor %xmm12,%xmm12 - pxor %xmm13,%xmm13 - pxor %xmm14,%xmm14 - pxor %xmm15,%xmm15 - xorq %rcx,%rcx - xorq %rdx,%rdx - xorq %rsi,%rsi - xorq %rdi,%rdi - xorq %r8,%r8 - xorq %r9,%r9 - xorq %r10,%r10 - xorq %r11,%r11 - leaq 8(%rsp),%rax - ret -.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu -___ -print<<___ if ($win64); -.globl OPENSSL_wipe_cpu -.type OPENSSL_wipe_cpu,\@abi-omnipotent -.align 16 -OPENSSL_wipe_cpu: - pxor %xmm0,%xmm0 - pxor %xmm1,%xmm1 - pxor %xmm2,%xmm2 - pxor %xmm3,%xmm3 - pxor %xmm4,%xmm4 - pxor %xmm5,%xmm5 - xorq %rcx,%rcx - xorq %rdx,%rdx - xorq %r8,%r8 - xorq %r9,%r9 - xorq %r10,%r10 - xorq %r11,%r11 - leaq 8(%rsp),%rax - ret -.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu -___ - -print<<___; -.globl OPENSSL_ia32_rdrand -.type OPENSSL_ia32_rdrand,\@abi-omnipotent -.align 16 -OPENSSL_ia32_rdrand: - mov \$8,%ecx -.Loop_rdrand: - rdrand %rax - jc .Lbreak_rdrand - loop .Loop_rdrand -.Lbreak_rdrand: - cmp \$0,%rax - cmove %rcx,%rax - ret -.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand -___ - -close STDOUT; # flush diff --git a/drivers/builtin_openssl2/crypto/x86cpuid.pl b/drivers/builtin_openssl2/crypto/x86cpuid.pl deleted file mode 100644 index b270b44337..0000000000 --- a/drivers/builtin_openssl2/crypto/x86cpuid.pl +++ /dev/null @@ -1,358 +0,0 @@ -#!/usr/bin/env perl - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC, "${dir}perlasm", "perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],"x86cpuid"); - -for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } - -&function_begin("OPENSSL_ia32_cpuid"); - &xor ("edx","edx"); - &pushf (); - &pop ("eax"); - &mov ("ecx","eax"); - &xor ("eax",1<<21); - &push ("eax"); - &popf (); - &pushf (); - &pop ("eax"); - &xor ("ecx","eax"); - &xor ("eax","eax"); - &bt ("ecx",21); - &jnc (&label("nocpuid")); - &cpuid (); - &mov ("edi","eax"); # max value for standard query level - - &xor ("eax","eax"); - &cmp ("ebx",0x756e6547); # "Genu" - &setne (&LB("eax")); - &mov ("ebp","eax"); - &cmp ("edx",0x49656e69); # "ineI" - &setne (&LB("eax")); - &or ("ebp","eax"); - &cmp ("ecx",0x6c65746e); # "ntel" - &setne (&LB("eax")); - &or ("ebp","eax"); # 0 indicates Intel CPU - &jz (&label("intel")); - - &cmp ("ebx",0x68747541); # "Auth" - &setne (&LB("eax")); - &mov ("esi","eax"); - &cmp ("edx",0x69746E65); # "enti" - &setne (&LB("eax")); - &or ("esi","eax"); - &cmp ("ecx",0x444D4163); # "cAMD" - &setne (&LB("eax")); - &or ("esi","eax"); # 0 indicates AMD CPU - &jnz (&label("intel")); - - # AMD specific - &mov ("eax",0x80000000); - &cpuid (); - &cmp ("eax",0x80000001); - &jb (&label("intel")); - &mov ("esi","eax"); - &mov ("eax",0x80000001); - &cpuid (); - &or ("ebp","ecx"); - &and ("ebp",1<<11|1); # isolate XOP bit - &cmp ("esi",0x80000008); - &jb (&label("intel")); - - &mov ("eax",0x80000008); - &cpuid (); - &movz ("esi",&LB("ecx")); # number of cores - 1 - &inc ("esi"); # number of cores - - &mov ("eax",1); - &xor ("ecx","ecx"); - &cpuid (); - &bt ("edx",28); - &jnc (&label("generic")); - &shr ("ebx",16); - &and ("ebx",0xff); - &cmp ("ebx","esi"); - &ja (&label("generic")); - &and ("edx",0xefffffff); # clear hyper-threading bit - &jmp (&label("generic")); - -&set_label("intel"); - &cmp ("edi",4); - &mov ("edi",-1); - &jb (&label("nocacheinfo")); - - &mov ("eax",4); - &mov ("ecx",0); # query L1D - &cpuid (); - &mov ("edi","eax"); - &shr ("edi",14); - &and ("edi",0xfff); # number of cores -1 per L1D - -&set_label("nocacheinfo"); - &mov ("eax",1); - &xor ("ecx","ecx"); - &cpuid (); - &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 - &cmp ("ebp",0); - &jne (&label("notintel")); - &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs - &and (&HB("eax"),15); # familiy ID - &cmp (&HB("eax"),15); # P4? - &jne (&label("notintel")); - &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR -&set_label("notintel"); - &bt ("edx",28); # test hyper-threading bit - &jnc (&label("generic")); - &and ("edx",0xefffffff); - &cmp ("edi",0); - &je (&label("generic")); - - &or ("edx",0x10000000); - &shr ("ebx",16); - &cmp (&LB("ebx"),1); - &ja (&label("generic")); - &and ("edx",0xefffffff); # clear hyper-threading bit if not - -&set_label("generic"); - &and ("ebp",1<<11); # isolate AMD XOP flag - &and ("ecx",0xfffff7ff); # force 11th bit to 0 - &mov ("esi","edx"); - &or ("ebp","ecx"); # merge AMD XOP flag - - &bt ("ecx",27); # check OSXSAVE bit - &jnc (&label("clear_avx")); - &xor ("ecx","ecx"); - &data_byte(0x0f,0x01,0xd0); # xgetbv - &and ("eax",6); - &cmp ("eax",6); - &je (&label("done")); - &cmp ("eax",2); - &je (&label("clear_avx")); -&set_label("clear_xmm"); - &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits - &and ("esi",0xfeffffff); # clear FXSR -&set_label("clear_avx"); - &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits -&set_label("done"); - &mov ("eax","esi"); - &mov ("edx","ebp"); -&set_label("nocpuid"); -&function_end("OPENSSL_ia32_cpuid"); - -&external_label("OPENSSL_ia32cap_P"); - -&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); - &xor ("eax","eax"); - &xor ("edx","edx"); - &picmeup("ecx","OPENSSL_ia32cap_P"); - &bt (&DWP(0,"ecx"),4); - &jnc (&label("notsc")); - &rdtsc (); -&set_label("notsc"); - &ret (); -&function_end_B("OPENSSL_rdtsc"); - -# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], -# but it's safe to call it on any [supported] 32-bit platform... -# Just check for [non-]zero return value... -&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); - &picmeup("ecx","OPENSSL_ia32cap_P"); - &bt (&DWP(0,"ecx"),4); - &jnc (&label("nohalt")); # no TSC - - &data_word(0x9058900e); # push %cs; pop %eax - &and ("eax",3); - &jnz (&label("nohalt")); # not enough privileges - - &pushf (); - &pop ("eax"); - &bt ("eax",9); - &jnc (&label("nohalt")); # interrupts are disabled - - &rdtsc (); - &push ("edx"); - &push ("eax"); - &halt (); - &rdtsc (); - - &sub ("eax",&DWP(0,"esp")); - &sbb ("edx",&DWP(4,"esp")); - &add ("esp",8); - &ret (); - -&set_label("nohalt"); - &xor ("eax","eax"); - &xor ("edx","edx"); - &ret (); -&function_end_B("OPENSSL_instrument_halt"); - -# Essentially there is only one use for this function. Under DJGPP: -# -# #include -# ... -# i=OPENSSL_far_spin(_dos_ds,0x46c); -# ... -# to obtain the number of spins till closest timer interrupt. - -&function_begin_B("OPENSSL_far_spin"); - &pushf (); - &pop ("eax") - &bt ("eax",9); - &jnc (&label("nospin")); # interrupts are disabled - - &mov ("eax",&DWP(4,"esp")); - &mov ("ecx",&DWP(8,"esp")); - &data_word (0x90d88e1e); # push %ds, mov %eax,%ds - &xor ("eax","eax"); - &mov ("edx",&DWP(0,"ecx")); - &jmp (&label("spin")); - - &align (16); -&set_label("spin"); - &inc ("eax"); - &cmp ("edx",&DWP(0,"ecx")); - &je (&label("spin")); - - &data_word (0x1f909090); # pop %ds - &ret (); - -&set_label("nospin"); - &xor ("eax","eax"); - &xor ("edx","edx"); - &ret (); -&function_end_B("OPENSSL_far_spin"); - -&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); - &xor ("eax","eax"); - &xor ("edx","edx"); - &picmeup("ecx","OPENSSL_ia32cap_P"); - &mov ("ecx",&DWP(0,"ecx")); - &bt (&DWP(0,"ecx"),1); - &jnc (&label("no_x87")); - if ($sse2) { - &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits - &cmp ("ecx",1<<26|1<<24); - &jne (&label("no_sse2")); - &pxor ("xmm0","xmm0"); - &pxor ("xmm1","xmm1"); - &pxor ("xmm2","xmm2"); - &pxor ("xmm3","xmm3"); - &pxor ("xmm4","xmm4"); - &pxor ("xmm5","xmm5"); - &pxor ("xmm6","xmm6"); - &pxor ("xmm7","xmm7"); - &set_label("no_sse2"); - } - # just a bunch of fldz to zap the fp/mm bank followed by finit... - &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b); -&set_label("no_x87"); - &lea ("eax",&DWP(4,"esp")); - &ret (); -&function_end_B("OPENSSL_wipe_cpu"); - -&function_begin_B("OPENSSL_atomic_add"); - &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg - &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg - &push ("ebx"); - &nop (); - &mov ("eax",&DWP(0,"edx")); -&set_label("spin"); - &lea ("ebx",&DWP(0,"eax","ecx")); - &nop (); - &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is envolved and is always reloaded - &jne (&label("spin")); - &mov ("eax","ebx"); # OpenSSL expects the new value - &pop ("ebx"); - &ret (); -&function_end_B("OPENSSL_atomic_add"); - -# This function can become handy under Win32 in situations when -# we don't know which calling convention, __stdcall or __cdecl(*), -# indirect callee is using. In C it can be deployed as -# -#ifdef OPENSSL_CPUID_OBJ -# type OPENSSL_indirect_call(void *f,...); -# ... -# OPENSSL_indirect_call(func,[up to $max arguments]); -#endif -# -# (*) it's designed to work even for __fastcall if number of -# arguments is 1 or 2! -&function_begin_B("OPENSSL_indirect_call"); - { - my ($max,$i)=(7,); # $max has to be chosen as 4*n-1 - # in order to preserve eventual - # stack alignment - &push ("ebp"); - &mov ("ebp","esp"); - &sub ("esp",$max*4); - &mov ("ecx",&DWP(12,"ebp")); - &mov (&DWP(0,"esp"),"ecx"); - &mov ("edx",&DWP(16,"ebp")); - &mov (&DWP(4,"esp"),"edx"); - for($i=2;$i<$max;$i++) - { - # Some copies will be redundant/bogus... - &mov ("eax",&DWP(12+$i*4,"ebp")); - &mov (&DWP(0+$i*4,"esp"),"eax"); - } - &call_ptr (&DWP(8,"ebp"));# make the call... - &mov ("esp","ebp"); # ... and just restore the stack pointer - # without paying attention to what we called, - # (__cdecl *func) or (__stdcall *one). - &pop ("ebp"); - &ret (); - } -&function_end_B("OPENSSL_indirect_call"); - -&function_begin_B("OPENSSL_cleanse"); - &mov ("edx",&wparam(0)); - &mov ("ecx",&wparam(1)); - &xor ("eax","eax"); - &cmp ("ecx",7); - &jae (&label("lot")); - &cmp ("ecx",0); - &je (&label("ret")); -&set_label("little"); - &mov (&BP(0,"edx"),"al"); - &sub ("ecx",1); - &lea ("edx",&DWP(1,"edx")); - &jnz (&label("little")); -&set_label("ret"); - &ret (); - -&set_label("lot",16); - &test ("edx",3); - &jz (&label("aligned")); - &mov (&BP(0,"edx"),"al"); - &lea ("ecx",&DWP(-1,"ecx")); - &lea ("edx",&DWP(1,"edx")); - &jmp (&label("lot")); -&set_label("aligned"); - &mov (&DWP(0,"edx"),"eax"); - &lea ("ecx",&DWP(-4,"ecx")); - &test ("ecx",-4); - &lea ("edx",&DWP(4,"edx")); - &jnz (&label("aligned")); - &cmp ("ecx",0); - &jne (&label("little")); - &ret (); -&function_end_B("OPENSSL_cleanse"); - -&function_begin_B("OPENSSL_ia32_rdrand"); - &mov ("ecx",8); -&set_label("loop"); - &rdrand ("eax"); - &jc (&label("break")); - &loop (&label("loop")); -&set_label("break"); - &cmp ("eax",0); - &cmove ("eax","ecx"); - &ret (); -&function_end_B("OPENSSL_ia32_rdrand"); - -&initseg("OPENSSL_cpuid_setup"); - -&asm_finish(); -- cgit v1.2.3