Remove inconsistency in ARM support.

This facilitates "universal" builds, ones that target multiple
architectures, e.g. ARMv5 through ARMv7. See commentary in
Configure for details.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Matt Caswell <matt@openssl.org>
This commit is contained in:
Andy Polyakov 2014-11-07 22:48:22 +01:00
parent 9e557ab262
commit c1669e1c20
14 changed files with 224 additions and 160 deletions

View File

@ -350,8 +350,34 @@ my %table=(
# throw in -D[BL]_ENDIAN, whichever appropriate...
"linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
# It's believed that majority of ARM toolchains predefine appropriate -march.
# If you compiler does not, do complement config command line with one!
#######################################################################
# Note that -march is not among compiler options in below linux-armv4
# target line. Not specifying one is intentional to give you choice to:
#
# a) rely on your compiler default by not specifying one;
# b) specify your target platform explicitly for optimal performance,
# e.g. -march=armv6 or -march=armv7-a;
# c) build "universal" binary that targets *range* of platforms by
# specifying minimum and maximum supported architecture;
#
# As for c) option. It actually makes no sense to specify maximum to be
# less than ARMv7, because it's the least requirement for run-time
# switch between platform-specific code paths. And without run-time
# switch performance would be equivalent to one for minimum. Secondly,
# there are some natural limitations that you'd have to accept and
# respect. Most notably you can *not* build "universal" binary for
# big-endian platform. This is because ARMv7 processor always picks
# instructions in little-endian order. Another similar limitation is
# that -mthumb can't "cross" -march=armv6t2 boundary, because that's
# where it became Thumb-2. Well, this limitation is a bit artificial,
# because it's not really impossible, but it's deemed too tricky to
# support. And of course you have to be sure that your binutils are
# actually up to the task of handling maximum target platform. With all
# this in mind here is an example of how to configure "universal" build:
#
# ./Configure linux-armv4 -march=armv6 -D__ARM_MAX_ARCH__=8
#
"linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-aarch64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${aarch64_asm}:linux64:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
# Configure script adds minimally required -march for assembly support,

View File

@ -35,11 +35,13 @@ $prefix="aes_v8";
$code=<<___;
#include "arm_arch.h"
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
.text
___
$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/);
$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
$code.=".arch armv7-a\n.fpu neon\n.code 32\n" if ($flavour !~ /64/);
#^^^^^^ this is done to simplify adoption by not depending
# on latest binutils.
# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to

View File

@ -702,13 +702,17 @@ $code.=<<___;
# define BSAES_ASM_EXTENDED_KEY
# define XTS_CHAIN_TWEAK
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
#endif
#ifdef __thumb__
# define adrl adr
#endif
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.text
.syntax unified @ ARMv7-capable assembler is expected to handle this
#ifdef __thumb2__
@ -717,8 +721,6 @@ $code.=<<___;
.code 32
#endif
.fpu neon
.type _bsaes_decrypt8,%function
.align 4
_bsaes_decrypt8:

View File

@ -48,6 +48,18 @@
# endif
#endif
#if !defined(__ARM_MAX_ARCH__)
# define __ARM_MAX_ARCH__ __ARM_ARCH__
#endif
#if __ARM_MAX_ARCH__<__ARM_ARCH__
# error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__"
#elif __ARM_MAX_ARCH__!=__ARM_ARCH__
# if __ARM_ARCH__<7 && __ARM_MAX_ARCH__>=7 && defined(__ARMEB__)
# error "can't build universal big-endian binary"
# endif
#endif
#if !__ASSEMBLER__
extern unsigned int OPENSSL_armcap_P;
#endif

View File

@ -7,8 +7,12 @@
#include "arm_arch.h"
unsigned int OPENSSL_armcap_P;
unsigned int OPENSSL_armcap_P=0;
#if __ARM_MAX_ARCH__<7
void OPENSSL_cpuid_setup(void) {}
unsigned long OPENSSL_rdtsc(void) { return 0; }
#else
static sigset_t all_masked;
static sigjmp_buf ill_jmp;
@ -155,3 +159,4 @@ void OPENSSL_cpuid_setup(void)
sigaction (SIGILL,&ill_oact,NULL);
sigprocmask(SIG_SETMASK,&oset,NULL);
}
#endif

View File

@ -3,69 +3,6 @@
.text
.code 32
@ Special note about using .byte directives to encode instructions.
@ Initial reason for hand-coding instructions was to allow module to
@ be compilable by legacy tool-chains. At later point it was pointed
@ out that since ARMv7, instructions are always encoded in little-endian
@ order, therefore one has to opt for endian-neutral presentation.
@ Contemporary tool-chains offer .inst directive for this purpose,
@ but not legacy ones. Therefore .byte. But there is an exception,
@ namely ARMv7-R profile still allows for big-endian encoding even for
@ instructions. This raises the question what if probe instructions
@ appear executable to such processor operating in big-endian order?
@ They have to be chosen in a way that avoids this problem. As failed
@ NEON probe disables a number of other probes we have to ensure that
@ only NEON probe instruction doesn't appear executable in big-endian
@ order, therefore 'vorr q8,q8,q8', and not some other register. The
@ only probe that is not bypassed on failed NEON probe is _armv7_tick,
@ where you'll spot 'mov r0,r6' that serves this purpose. Basic idea is
@ that if fetched in alternative byte oder instruction should crash to
@ denote lack of probed capability...
.align 5
.global _armv7_neon_probe
.type _armv7_neon_probe,%function
_armv7_neon_probe:
.byte 0xf0,0x01,0x60,0xf2 @ vorr q8,q8,q8
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv7_neon_probe,.-_armv7_neon_probe
.global _armv7_tick
.type _armv7_tick,%function
_armv7_tick:
.byte 0x06,0x00,0xa0,0xe1 @ mov r0,r6
.byte 0x1e,0x0f,0x51,0xec @ mrrc p15,1,r0,r1,c14 @ CNTVCT
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
nop
.size _armv7_tick,.-_armv7_tick
.global _armv8_aes_probe
.type _armv8_aes_probe,%function
_armv8_aes_probe:
.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_aes_probe,.-_armv8_aes_probe
.global _armv8_sha1_probe
.type _armv8_sha1_probe,%function
_armv8_sha1_probe:
.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_sha1_probe,.-_armv8_sha1_probe
.global _armv8_sha256_probe
.type _armv8_sha256_probe,%function
_armv8_sha256_probe:
.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_sha256_probe,.-_armv8_sha256_probe
.global _armv8_pmull_probe
.type _armv8_pmull_probe,%function
_armv8_pmull_probe:
.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
.byte 0x1e,0xff,0x2f,0xe1 @ bx lr
.size _armv8_pmull_probe,.-_armv8_pmull_probe
.align 5
.global OPENSSL_atomic_add
.type OPENSSL_atomic_add,%function
@ -139,30 +76,81 @@ OPENSSL_cleanse:
#endif
.size OPENSSL_cleanse,.-OPENSSL_cleanse
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.align 5
.global _armv7_neon_probe
.type _armv7_neon_probe,%function
_armv7_neon_probe:
vorr q0,q0,q0
bx lr
.size _armv7_neon_probe,.-_armv7_neon_probe
.global _armv7_tick
.type _armv7_tick,%function
_armv7_tick:
mrrc p15,1,r0,r1,c14 @ CNTVCT
bx lr
.size _armv7_tick,.-_armv7_tick
.global _armv8_aes_probe
.type _armv8_aes_probe,%function
_armv8_aes_probe:
.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
bx lr
.size _armv8_aes_probe,.-_armv8_aes_probe
.global _armv8_sha1_probe
.type _armv8_sha1_probe,%function
_armv8_sha1_probe:
.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
bx lr
.size _armv8_sha1_probe,.-_armv8_sha1_probe
.global _armv8_sha256_probe
.type _armv8_sha256_probe,%function
_armv8_sha256_probe:
.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
bx lr
.size _armv8_sha256_probe,.-_armv8_sha256_probe
.global _armv8_pmull_probe
.type _armv8_pmull_probe,%function
_armv8_pmull_probe:
.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
bx lr
.size _armv8_pmull_probe,.-_armv8_pmull_probe
#endif
.global OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,%function
OPENSSL_wipe_cpu:
#if __ARM_MAX_ARCH__>=7
ldr r0,.LOPENSSL_armcap
adr r1,.LOPENSSL_armcap
ldr r0,[r1,r0]
#endif
eor r2,r2,r2
eor r3,r3,r3
eor ip,ip,ip
#if __ARM_MAX_ARCH__>=7
tst r0,#1
beq .Lwipe_done
.byte 0x50,0x01,0x00,0xf3 @ veor q0, q0, q0
.byte 0x52,0x21,0x02,0xf3 @ veor q1, q1, q1
.byte 0x54,0x41,0x04,0xf3 @ veor q2, q2, q2
.byte 0x56,0x61,0x06,0xf3 @ veor q3, q3, q3
.byte 0xf0,0x01,0x40,0xf3 @ veor q8, q8, q8
.byte 0xf2,0x21,0x42,0xf3 @ veor q9, q9, q9
.byte 0xf4,0x41,0x44,0xf3 @ veor q10, q10, q10
.byte 0xf6,0x61,0x46,0xf3 @ veor q11, q11, q11
.byte 0xf8,0x81,0x48,0xf3 @ veor q12, q12, q12
.byte 0xfa,0xa1,0x4a,0xf3 @ veor q13, q13, q13
.byte 0xfc,0xc1,0x4c,0xf3 @ veor q14, q14, q14
.byte 0xfe,0xe1,0x4e,0xf3 @ veor q14, q14, q14
veor q0, q0, q0
veor q1, q1, q1
veor q2, q2, q2
veor q3, q3, q3
veor q8, q8, q8
veor q9, q9, q9
veor q10, q10, q10
veor q11, q11, q11
veor q12, q12, q12
veor q13, q13, q13
veor q14, q14, q14
veor q15, q15, q15
.Lwipe_done:
#endif
mov r0,sp
#if __ARM_ARCH__>=5
bx lr
@ -200,8 +188,10 @@ OPENSSL_instrument_bus2:
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.align 5
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-.LOPENSSL_armcap
#endif
#if __ARM_ARCH__>=6
.align 5
#else

View File

@ -40,10 +40,6 @@ $code=<<___;
.text
.code 32
#if __ARM_ARCH__>=7
.fpu neon
#endif
___
################
# private interface to mul_1x1_ialu
@ -142,72 +138,18 @@ ___
# BN_ULONG a1,BN_ULONG a0,
# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0
{
my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
$code.=<<___;
.global bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,%function
.align 5
bn_GF2m_mul_2x2:
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
ldr r12,.LOPENSSL_armcap
.Lpic: ldr r12,[pc,r12]
tst r12,#1
beq .Lialu
ldr r12, [sp] @ 5th argument
vmov.32 $a, r2, r1
vmov.32 $b, r12, r3
vmov.i64 $k48, #0x0000ffffffffffff
vmov.i64 $k32, #0x00000000ffffffff
vmov.i64 $k16, #0x000000000000ffff
vext.8 $t0#lo, $a, $a, #1 @ A1
vmull.p8 $t0, $t0#lo, $b @ F = A1*B
vext.8 $r#lo, $b, $b, #1 @ B1
vmull.p8 $r, $a, $r#lo @ E = A*B1
vext.8 $t1#lo, $a, $a, #2 @ A2
vmull.p8 $t1, $t1#lo, $b @ H = A2*B
vext.8 $t3#lo, $b, $b, #2 @ B2
vmull.p8 $t3, $a, $t3#lo @ G = A*B2
vext.8 $t2#lo, $a, $a, #3 @ A3
veor $t0, $t0, $r @ L = E + F
vmull.p8 $t2, $t2#lo, $b @ J = A3*B
vext.8 $r#lo, $b, $b, #3 @ B3
veor $t1, $t1, $t3 @ M = G + H
vmull.p8 $r, $a, $r#lo @ I = A*B3
veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8
vand $t0#hi, $t0#hi, $k48
vext.8 $t3#lo, $b, $b, #4 @ B4
veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16
vand $t1#hi, $t1#hi, $k32
vmull.p8 $t3, $a, $t3#lo @ K = A*B4
veor $t2, $t2, $r @ N = I + J
veor $t0#lo, $t0#lo, $t0#hi
veor $t1#lo, $t1#lo, $t1#hi
veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24
vand $t2#hi, $t2#hi, $k16
vext.8 $t0, $t0, $t0, #15
veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32
vmov.i64 $t3#hi, #0
vext.8 $t1, $t1, $t1, #14
veor $t2#lo, $t2#lo, $t2#hi
vmull.p8 $r, $a, $b @ D = A*B
vext.8 $t3, $t3, $t3, #12
vext.8 $t2, $t2, $t2, #13
veor $t0, $t0, $t1
veor $t2, $t2, $t3
veor $r, $r, $t0
veor $r, $r, $t2
vst1.32 {$r}, [r0]
ret @ bx lr
.align 4
.Lialu:
bne .LNEON
#endif
___
}
$ret="r10"; # reassigned 1st argument
$code.=<<___;
stmdb sp!,{r4-r10,lr}
@ -257,8 +199,72 @@ $code.=<<___;
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
___
}
{
my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
$code.=<<___;
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.align 5
.LNEON:
ldr r12, [sp] @ 5th argument
vmov.32 $a, r2, r1
vmov.32 $b, r12, r3
vmov.i64 $k48, #0x0000ffffffffffff
vmov.i64 $k32, #0x00000000ffffffff
vmov.i64 $k16, #0x000000000000ffff
vext.8 $t0#lo, $a, $a, #1 @ A1
vmull.p8 $t0, $t0#lo, $b @ F = A1*B
vext.8 $r#lo, $b, $b, #1 @ B1
vmull.p8 $r, $a, $r#lo @ E = A*B1
vext.8 $t1#lo, $a, $a, #2 @ A2
vmull.p8 $t1, $t1#lo, $b @ H = A2*B
vext.8 $t3#lo, $b, $b, #2 @ B2
vmull.p8 $t3, $a, $t3#lo @ G = A*B2
vext.8 $t2#lo, $a, $a, #3 @ A3
veor $t0, $t0, $r @ L = E + F
vmull.p8 $t2, $t2#lo, $b @ J = A3*B
vext.8 $r#lo, $b, $b, #3 @ B3
veor $t1, $t1, $t3 @ M = G + H
vmull.p8 $r, $a, $r#lo @ I = A*B3
veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8
vand $t0#hi, $t0#hi, $k48
vext.8 $t3#lo, $b, $b, #4 @ B4
veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16
vand $t1#hi, $t1#hi, $k32
vmull.p8 $t3, $a, $t3#lo @ K = A*B4
veor $t2, $t2, $r @ N = I + J
veor $t0#lo, $t0#lo, $t0#hi
veor $t1#lo, $t1#lo, $t1#hi
veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24
vand $t2#hi, $t2#hi, $k16
vext.8 $t0, $t0, $t0, #15
veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32
vmov.i64 $t3#hi, #0
vext.8 $t1, $t1, $t1, #14
veor $t2#lo, $t2#lo, $t2#hi
vmull.p8 $r, $a, $b @ D = A*B
vext.8 $t3, $t3, $t3, #12
vext.8 $t2, $t2, $t2, #13
veor $t0, $t0, $t1
veor $t2, $t2, $t3
veor $r, $r, $t0
veor $r, $r, $t2
vst1.32 {$r}, [r0]
ret @ bx lr
#endif
___
}
$code.=<<___;
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-(.Lpic+8)
@ -266,7 +272,9 @@ $code.=<<___;
.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
#endif
___
foreach (split("\n",$code)) {

View File

@ -72,7 +72,7 @@ $code=<<___;
.text
.code 32
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-bn_mul_mont
@ -85,7 +85,7 @@ $code=<<___;
bn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
adr r0,bn_mul_mont
@ -256,7 +256,8 @@ my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5));
my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9));
$code.=<<___;
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.type bn_mul8x_mont_neon,%function
@ -663,7 +664,7 @@ ___
$code.=<<___;
.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
#endif
___

View File

@ -1036,7 +1036,7 @@ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
#if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__))
#include "arm_arch.h"
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
# if defined(BSAES_ASM)
# define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
# endif

View File

@ -365,7 +365,8 @@ ___
}
$code.=<<___;
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.global gcm_init_neon

View File

@ -675,7 +675,7 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
# endif
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
# include "arm_arch.h"
# if __ARM_ARCH__>=7
# if __ARM_MAX_ARCH__>=7
# define GHASH_ASM_ARM
# define GCM_FUNCREF_4BIT
# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)

View File

@ -174,7 +174,7 @@ $code=<<___;
.align 5
sha1_block_data_order:
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
sub r3,pc,#8 @ sha1_block_data_order
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
@ -264,8 +264,10 @@ $code.=<<___;
.LK_20_39: .word 0x6ed9eba1
.LK_40_59: .word 0x8f1bbcdc
.LK_60_79: .word 0xca62c1d6
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha1_block_data_order
#endif
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
___
@ -476,7 +478,8 @@ sub Xloop()
}
$code.=<<___;
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.type sha1_block_data_order_neon,%function
@ -563,7 +566,7 @@ my @Kxx=map("q$_",(8..11));
my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
$code.=<<___;
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
.type sha1_block_data_order_armv8,%function
.align 5
sha1_block_data_order_armv8:
@ -637,7 +640,9 @@ $code.=<<___;
___
}}}
$code.=<<___;
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
#endif
___
{ my %opcode = (

View File

@ -177,8 +177,10 @@ K256:
.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.size K256,.-K256
.word 0 @ terminator
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha256_block_data_order
#endif
.align 5
.global sha256_block_data_order
@ -186,7 +188,7 @@ K256:
sha256_block_data_order:
sub r3,pc,#8 @ sha256_block_data_order
add $len,$inp,$len,lsl#6 @ len to point at the end of inp
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
tst r12,#ARMV8_SHA256
@ -423,7 +425,8 @@ sub body_00_15 () {
}
$code.=<<___;
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.type sha256_block_data_order_neon,%function
@ -545,7 +548,7 @@ my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
my $Ktbl="r3";
$code.=<<___;
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
.type sha256_block_data_order_armv8,%function
.align 5
sha256_block_data_order_armv8:
@ -616,7 +619,9 @@ ___
$code.=<<___;
.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
#if __ARM_MARCH_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
#endif
___
{ my %opcode = (

View File

@ -237,16 +237,20 @@ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.size K512,.-K512
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha512_block_data_order
.skip 32-4
#else
.skip 32
#endif
.global sha512_block_data_order
.type sha512_block_data_order,%function
sha512_block_data_order:
sub r3,pc,#8 @ sha512_block_data_order
add $len,$inp,$len,lsl#7 @ len to point at the end of inp
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
tst r12,#1
@ -551,7 +555,8 @@ ___
}
$code.=<<___;
#if __ARM_ARCH__>=7
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.align 4
@ -592,7 +597,9 @@ $code.=<<___;
.size sha512_block_data_order,.-sha512_block_data_order
.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
#endif
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;