Extension of OPENSSL_ia32cap to accommodate additional CPUID bits
bits 128 - 191 CPUID.(EAX=07H,ECX=0H).EDX and CPUID.(EAX=07H,ECX=1H).EAX bits 192 - 255 CPUID.(EAX=07H,ECX=1H).EDX and CPUID.(EAX=07H,ECX=1H).EBX bits 256 - 319 CPUID.(EAX=07H,ECX=1H).ECX and CPUID.(EAX=24H,ECX=0H).EBX Reviewed-by: Matt Caswell <matt@openssl.org> Reviewed-by: Tomas Mraz <tomas@openssl.org> (Merged from https://github.com/openssl/openssl/pull/25709)
This commit is contained in:
parent
1b3b5a019a
commit
acc2655236
@ -78,6 +78,12 @@ OpenSSL 3.5
|
|||||||
|
|
||||||
*Paul Dale*
|
*Paul Dale*
|
||||||
|
|
||||||
|
* Extended `OPENSSL_ia32cap` support to accommodate additional `CPUID`
|
||||||
|
feature/capability bits in leaf `0x7` (Extended Feature Flags) as well
|
||||||
|
as leaf `0x24` (Converged Vector ISA).
|
||||||
|
|
||||||
|
*Dan Zimmerman, Alina Elizarova*
|
||||||
|
|
||||||
OpenSSL 3.4
|
OpenSSL 3.4
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
defined(__x86_64) || defined(__x86_64__) || \
|
defined(__x86_64) || defined(__x86_64__) || \
|
||||||
defined(_M_AMD64) || defined(_M_X64)
|
defined(_M_AMD64) || defined(_M_X64)
|
||||||
|
|
||||||
extern unsigned int OPENSSL_ia32cap_P[4];
|
extern unsigned int OPENSSL_ia32cap_P[OPENSSL_IA32CAP_P_MAX_INDEXES];
|
||||||
|
|
||||||
# if defined(OPENSSL_CPUID_OBJ)
|
# if defined(OPENSSL_CPUID_OBJ)
|
||||||
|
|
||||||
@ -29,7 +29,7 @@ extern unsigned int OPENSSL_ia32cap_P[4];
|
|||||||
*/
|
*/
|
||||||
# ifdef _WIN32
|
# ifdef _WIN32
|
||||||
typedef WCHAR variant_char;
|
typedef WCHAR variant_char;
|
||||||
|
# define OPENSSL_IA32CAP_P_MAX_CHAR_SIZE 256
|
||||||
static variant_char *ossl_getenv(const char *name)
|
static variant_char *ossl_getenv(const char *name)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -37,10 +37,10 @@ static variant_char *ossl_getenv(const char *name)
|
|||||||
* just ignore |name| and use equivalent wide-char L-literal.
|
* just ignore |name| and use equivalent wide-char L-literal.
|
||||||
* As well as to ignore excessively long values...
|
* As well as to ignore excessively long values...
|
||||||
*/
|
*/
|
||||||
static WCHAR value[48];
|
static WCHAR value[OPENSSL_IA32CAP_P_MAX_CHAR_SIZE];
|
||||||
DWORD len = GetEnvironmentVariableW(L"OPENSSL_ia32cap", value, 48);
|
DWORD len = GetEnvironmentVariableW(L"OPENSSL_ia32cap", value, OPENSSL_IA32CAP_P_MAX_CHAR_SIZE);
|
||||||
|
|
||||||
return (len > 0 && len < 48) ? value : NULL;
|
return (len > 0 && len < OPENSSL_IA32CAP_P_MAX_CHAR_SIZE) ? value : NULL;
|
||||||
}
|
}
|
||||||
# else
|
# else
|
||||||
typedef char variant_char;
|
typedef char variant_char;
|
||||||
@ -98,6 +98,7 @@ void OPENSSL_cpuid_setup(void)
|
|||||||
IA32CAP OPENSSL_ia32_cpuid(unsigned int *);
|
IA32CAP OPENSSL_ia32_cpuid(unsigned int *);
|
||||||
IA32CAP vec;
|
IA32CAP vec;
|
||||||
const variant_char *env;
|
const variant_char *env;
|
||||||
|
int index = 2;
|
||||||
|
|
||||||
if (trigger)
|
if (trigger)
|
||||||
return;
|
return;
|
||||||
@ -126,23 +127,37 @@ void OPENSSL_cpuid_setup(void)
|
|||||||
vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
|
vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((env = ossl_strchr(env, ':')) != NULL) {
|
/* Processed indexes 0, 1 */
|
||||||
IA32CAP vecx;
|
if ((env = ossl_strchr(env, ':')) != NULL)
|
||||||
|
|
||||||
env++;
|
env++;
|
||||||
off = (env[0] == '~') ? 1 : 0;
|
for (; index < OPENSSL_IA32CAP_P_MAX_INDEXES; index += 2) {
|
||||||
vecx = ossl_strtouint64(env + off);
|
if ((env != NULL) && (env[0] != '\0')) {
|
||||||
if (off) {
|
/* if env[0] == ':' current index is skipped */
|
||||||
OPENSSL_ia32cap_P[2] &= ~(unsigned int)vecx;
|
if (env[0] != ':') {
|
||||||
OPENSSL_ia32cap_P[3] &= ~(unsigned int)(vecx >> 32);
|
IA32CAP vecx;
|
||||||
} else {
|
|
||||||
OPENSSL_ia32cap_P[2] = (unsigned int)vecx;
|
off = (env[0] == '~') ? 1 : 0;
|
||||||
OPENSSL_ia32cap_P[3] = (unsigned int)(vecx >> 32);
|
vecx = ossl_strtouint64(env + off);
|
||||||
|
if (off) {
|
||||||
|
OPENSSL_ia32cap_P[index] &= ~(unsigned int)vecx;
|
||||||
|
OPENSSL_ia32cap_P[index + 1] &= ~(unsigned int)(vecx >> 32);
|
||||||
|
} else {
|
||||||
|
OPENSSL_ia32cap_P[index] = (unsigned int)vecx;
|
||||||
|
OPENSSL_ia32cap_P[index + 1] = (unsigned int)(vecx >> 32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* skip delimeter */
|
||||||
|
if ((env = ossl_strchr(env, ':')) != NULL)
|
||||||
|
env++;
|
||||||
|
} else { /* zeroize the next two indexes */
|
||||||
|
OPENSSL_ia32cap_P[index] = 0;
|
||||||
|
OPENSSL_ia32cap_P[index + 1] = 0;
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
OPENSSL_ia32cap_P[2] = 0;
|
|
||||||
OPENSSL_ia32cap_P[3] = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If AVX10 is disabled, zero out its detailed cap bits */
|
||||||
|
if (!(OPENSSL_ia32cap_P[6] & (1 << 19)))
|
||||||
|
OPENSSL_ia32cap_P[9] = 0;
|
||||||
} else {
|
} else {
|
||||||
vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
|
vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
|
||||||
}
|
}
|
||||||
@ -156,7 +171,7 @@ void OPENSSL_cpuid_setup(void)
|
|||||||
OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32);
|
OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32);
|
||||||
}
|
}
|
||||||
# else
|
# else
|
||||||
unsigned int OPENSSL_ia32cap_P[4];
|
unsigned int OPENSSL_ia32cap_P[OPENSSL_IA32CAP_P_MAX_INDEXES];
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@
|
|||||||
# include "crypto/riscv_arch.h"
|
# include "crypto/riscv_arch.h"
|
||||||
# define CPU_INFO_STR_LEN 2048
|
# define CPU_INFO_STR_LEN 2048
|
||||||
#else
|
#else
|
||||||
# define CPU_INFO_STR_LEN 128
|
# define CPU_INFO_STR_LEN 256
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* extern declaration to avoid warning */
|
/* extern declaration to avoid warning */
|
||||||
@ -52,11 +52,18 @@ DEFINE_RUN_ONCE_STATIC(init_info_strings)
|
|||||||
const char *env;
|
const char *env;
|
||||||
|
|
||||||
BIO_snprintf(ossl_cpu_info_str, sizeof(ossl_cpu_info_str),
|
BIO_snprintf(ossl_cpu_info_str, sizeof(ossl_cpu_info_str),
|
||||||
CPUINFO_PREFIX "OPENSSL_ia32cap=0x%llx:0x%llx",
|
CPUINFO_PREFIX "OPENSSL_ia32cap=0x%.16llx:0x%.16llx:0x%.16llx:0x%.16llx:0x%.16llx",
|
||||||
(unsigned long long)OPENSSL_ia32cap_P[0] |
|
(unsigned long long)OPENSSL_ia32cap_P[0] |
|
||||||
(unsigned long long)OPENSSL_ia32cap_P[1] << 32,
|
(unsigned long long)OPENSSL_ia32cap_P[1] << 32,
|
||||||
(unsigned long long)OPENSSL_ia32cap_P[2] |
|
(unsigned long long)OPENSSL_ia32cap_P[2] |
|
||||||
(unsigned long long)OPENSSL_ia32cap_P[3] << 32);
|
(unsigned long long)OPENSSL_ia32cap_P[3] << 32,
|
||||||
|
(unsigned long long)OPENSSL_ia32cap_P[4] |
|
||||||
|
(unsigned long long)OPENSSL_ia32cap_P[5] << 32,
|
||||||
|
(unsigned long long)OPENSSL_ia32cap_P[6] |
|
||||||
|
(unsigned long long)OPENSSL_ia32cap_P[7] << 32,
|
||||||
|
(unsigned long long)OPENSSL_ia32cap_P[8] |
|
||||||
|
(unsigned long long)OPENSSL_ia32cap_P[9] << 32);
|
||||||
|
|
||||||
if ((env = getenv("OPENSSL_ia32cap")) != NULL)
|
if ((env = getenv("OPENSSL_ia32cap")) != NULL)
|
||||||
BIO_snprintf(ossl_cpu_info_str + strlen(ossl_cpu_info_str),
|
BIO_snprintf(ossl_cpu_info_str + strlen(ossl_cpu_info_str),
|
||||||
sizeof(ossl_cpu_info_str) - strlen(ossl_cpu_info_str),
|
sizeof(ossl_cpu_info_str) - strlen(ossl_cpu_info_str),
|
||||||
|
@ -167,7 +167,8 @@ sub ::file_end
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
|
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
|
||||||
my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16";
|
# OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
|
||||||
|
my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,40";
|
||||||
if ($::macosx) { push (@out,"$tmp,2\n"); }
|
if ($::macosx) { push (@out,"$tmp,2\n"); }
|
||||||
elsif ($::elf) { push (@out,"$tmp,4\n"); }
|
elsif ($::elf) { push (@out,"$tmp,4\n"); }
|
||||||
else { push (@out,"$tmp\n"); }
|
else { push (@out,"$tmp\n"); }
|
||||||
|
@ -139,9 +139,10 @@ ___
|
|||||||
push(@out,"$segment ENDS\n");
|
push(@out,"$segment ENDS\n");
|
||||||
|
|
||||||
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
|
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
|
||||||
|
# OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
|
||||||
{ my $comm=<<___;
|
{ my $comm=<<___;
|
||||||
.bss SEGMENT 'BSS'
|
.bss SEGMENT 'BSS'
|
||||||
COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:4
|
COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:10
|
||||||
.bss ENDS
|
.bss ENDS
|
||||||
___
|
___
|
||||||
# comment out OPENSSL_ia32cap_P declarations
|
# comment out OPENSSL_ia32cap_P declarations
|
||||||
|
@ -124,9 +124,10 @@ sub ::function_end_B
|
|||||||
|
|
||||||
sub ::file_end
|
sub ::file_end
|
||||||
{ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
|
{ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
|
||||||
|
# OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
|
||||||
{ my $comm=<<___;
|
{ my $comm=<<___;
|
||||||
${drdecor}segment .bss
|
${drdecor}segment .bss
|
||||||
${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 16
|
${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 40
|
||||||
___
|
___
|
||||||
# comment out OPENSSL_ia32cap_P declarations
|
# comment out OPENSSL_ia32cap_P declarations
|
||||||
grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out;
|
grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out;
|
||||||
|
@ -27,14 +27,14 @@ open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
|
|||||||
("%rdi","%rsi","%rdx","%rcx"); # Unix order
|
("%rdi","%rsi","%rdx","%rcx"); # Unix order
|
||||||
|
|
||||||
print<<___;
|
print<<___;
|
||||||
|
#include crypto/cryptlib.h
|
||||||
.extern OPENSSL_cpuid_setup
|
.extern OPENSSL_cpuid_setup
|
||||||
.hidden OPENSSL_cpuid_setup
|
.hidden OPENSSL_cpuid_setup
|
||||||
.section .init
|
.section .init
|
||||||
call OPENSSL_cpuid_setup
|
call OPENSSL_cpuid_setup
|
||||||
|
|
||||||
.hidden OPENSSL_ia32cap_P
|
.hidden OPENSSL_ia32cap_P
|
||||||
.comm OPENSSL_ia32cap_P,16,4
|
.comm OPENSSL_ia32cap_P,40,4 # <--Should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.globl OPENSSL_atomic_add
|
.globl OPENSSL_atomic_add
|
||||||
@ -192,6 +192,7 @@ OPENSSL_ia32_cpuid:
|
|||||||
mov \$7,%eax
|
mov \$7,%eax
|
||||||
xor %ecx,%ecx
|
xor %ecx,%ecx
|
||||||
cpuid
|
cpuid
|
||||||
|
movd %eax,%xmm1 # put aside leaf 07H Max Sub-leaves
|
||||||
bt \$26,%r9d # check XSAVE bit, cleared on Knights
|
bt \$26,%r9d # check XSAVE bit, cleared on Knights
|
||||||
jc .Lnotknights
|
jc .Lnotknights
|
||||||
and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag
|
and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag
|
||||||
@ -202,9 +203,31 @@ OPENSSL_ia32_cpuid:
|
|||||||
jne .Lnotskylakex
|
jne .Lnotskylakex
|
||||||
and \$0xfffeffff,%ebx # ~(1<<16)
|
and \$0xfffeffff,%ebx # ~(1<<16)
|
||||||
# suppress AVX512F flag on Skylake-X
|
# suppress AVX512F flag on Skylake-X
|
||||||
.Lnotskylakex:
|
|
||||||
mov %ebx,8(%rdi) # save extended feature flags
|
.Lnotskylakex: # save extended feature flags
|
||||||
mov %ecx,12(%rdi)
|
mov %ebx,8(%rdi) # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2]
|
||||||
|
mov %ecx,12(%rdi) # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3]
|
||||||
|
mov %edx,16(%rdi) # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4]
|
||||||
|
|
||||||
|
movd %xmm1,%eax # Restore leaf 07H Max Sub-leaves
|
||||||
|
cmp \$0x1,%eax # Do we have cpuid(EAX=0x7, ECX=0x1)?
|
||||||
|
jb .Lno_extended_info
|
||||||
|
mov \$0x7,%eax
|
||||||
|
mov \$0x1,%ecx
|
||||||
|
cpuid # cpuid(EAX=0x7, ECX=0x1)
|
||||||
|
mov %eax,20(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5]
|
||||||
|
mov %edx,24(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6]
|
||||||
|
mov %ebx,28(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7]
|
||||||
|
mov %ecx,32(%rdi) # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8]
|
||||||
|
|
||||||
|
and \$0x80000,%edx # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support
|
||||||
|
cmp \$0x0,%edx
|
||||||
|
je .Lno_extended_info
|
||||||
|
mov \$0x24,%eax # Have AVX10 Support, query for details
|
||||||
|
mov \$0x0,%ecx
|
||||||
|
cpuid # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf
|
||||||
|
mov %ebx,36(%rdi) # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9]
|
||||||
|
|
||||||
.Lno_extended_info:
|
.Lno_extended_info:
|
||||||
|
|
||||||
bt \$27,%r9d # check OSXSAVE bit
|
bt \$27,%r9d # check OSXSAVE bit
|
||||||
@ -223,6 +246,9 @@ OPENSSL_ia32_cpuid:
|
|||||||
cmp \$6,%eax
|
cmp \$6,%eax
|
||||||
je .Ldone
|
je .Ldone
|
||||||
.Lclear_avx:
|
.Lclear_avx:
|
||||||
|
andl \$0xff7fffff,20(%rdi) # ~(1<<23)
|
||||||
|
# clear AVXIFMA, which is VEX-encoded
|
||||||
|
# and requires YMM state support
|
||||||
mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
|
mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
|
||||||
and %eax,%r9d # clear AVX, FMA and AMD XOP bits
|
and %eax,%r9d # clear AVX, FMA and AMD XOP bits
|
||||||
mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5)
|
mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5)
|
||||||
|
@ -137,7 +137,28 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
|||||||
&mov ("eax",7);
|
&mov ("eax",7);
|
||||||
&xor ("ecx","ecx");
|
&xor ("ecx","ecx");
|
||||||
&cpuid ();
|
&cpuid ();
|
||||||
&mov (&DWP(8,"edi"),"ebx"); # save extended feature flag
|
&mov (&DWP(8,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2]
|
||||||
|
&mov (&DWP(12,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3]
|
||||||
|
&mov (&DWP(16,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4]
|
||||||
|
&cmp ("eax",1); # Do we have cpuid(EAX=0x7, ECX=0x1)?
|
||||||
|
&jb (&label("no_extended_info"));
|
||||||
|
&mov ("eax",7);
|
||||||
|
&mov ("ecx",1);
|
||||||
|
&cpuid (); # cpuid(EAX=0x7, ECX=0x1)
|
||||||
|
&mov (&DWP(20,"edi"),"eax"); # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5]
|
||||||
|
&mov (&DWP(24,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6]
|
||||||
|
&mov (&DWP(28,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7]
|
||||||
|
&mov (&DWP(32,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8]
|
||||||
|
|
||||||
|
&and ("edx",0x80000); # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support
|
||||||
|
&cmp ("edx",0x0);
|
||||||
|
&je (&label("no_extended_info"));
|
||||||
|
|
||||||
|
&mov ("eax",0x24); # Have AVX10 Support, query for details
|
||||||
|
&mov ("ecx",0x0);
|
||||||
|
&cpuid (); # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf
|
||||||
|
&mov (&DWP(36,"edi"),"ebx"); # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9]
|
||||||
|
|
||||||
&set_label("no_extended_info");
|
&set_label("no_extended_info");
|
||||||
|
|
||||||
&bt ("ebp",27); # check OSXSAVE bit
|
&bt ("ebp",27); # check OSXSAVE bit
|
||||||
@ -154,6 +175,9 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
|||||||
&and ("esi",0xfeffffff); # clear FXSR
|
&and ("esi",0xfeffffff); # clear FXSR
|
||||||
&set_label("clear_avx");
|
&set_label("clear_avx");
|
||||||
&and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits
|
&and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits
|
||||||
|
&and (&DWP(20,"edi"),0xff7fffff); # ~(1<<23) clear AVXIFMA,
|
||||||
|
# which is VEX-encoded
|
||||||
|
# and requires YMM state support
|
||||||
&and (&DWP(8,"edi"),0xffffffdf); # clear AVX2
|
&and (&DWP(8,"edi"),0xffffffdf); # clear AVX2
|
||||||
&set_label("done");
|
&set_label("done");
|
||||||
&mov ("eax","esi");
|
&mov ("eax","esi");
|
||||||
|
@ -10,81 +10,77 @@ OPENSSL_ia32cap - the x86[_64] processor capabilities vector
|
|||||||
|
|
||||||
=head1 DESCRIPTION
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
OpenSSL supports a range of x86[_64] instruction set extensions. These
|
OpenSSL supports a range of x86[_64] instruction set extensions and
|
||||||
extensions are denoted by individual bits in capability vector returned
|
features. These extensions are denoted by individual bits or groups of bits
|
||||||
by processor in EDX:ECX register pair after executing CPUID instruction
|
stored internally as ten 32-bit capability vectors and for simplicity
|
||||||
with EAX=1 input value (see Intel Application Note #241618). This vector
|
represented logically below as five 64-bit vectors. This logical
|
||||||
is copied to memory upon toolkit initialization and used to choose
|
vector (LV) representation is used to streamline the definition of the
|
||||||
between different code paths to provide optimal performance across wide
|
OPENSSL_ia32cap environment variable.
|
||||||
range of processors. For the moment of this writing following bits are
|
|
||||||
significant:
|
Upon toolkit initialization, the capability vectors are populated through
|
||||||
|
successive executions of the CPUID instruction, after which any OPENSSL_ia32cap
|
||||||
|
environment variable capability bit modifications are applied. After toolkit
|
||||||
|
initialization is complete, populated vectors are then used to choose
|
||||||
|
between different code paths to provide optimal performance across a wide
|
||||||
|
range of x86[_64] based processors.
|
||||||
|
|
||||||
|
Further CPUID information can be found in the Intel(R) Architecture
|
||||||
|
Instruction Set Extensions Programming Reference, and the AMD64 Architecture
|
||||||
|
Programmer's Manual (Volume 3).
|
||||||
|
|
||||||
|
=head2 Notable Capability Bits for LV0
|
||||||
|
|
||||||
|
The following are notable capability bits from logical vector 0 (LV0)
|
||||||
|
resulting from the following execution of CPUID.(EAX=01H).EDX and
|
||||||
|
CPUID.(EAX=01H).ECX:
|
||||||
|
|
||||||
=over 4
|
=over 4
|
||||||
|
|
||||||
=item bit #4 denoting presence of Time-Stamp Counter.
|
=item bit #0+4 denoting presence of Time-Stamp Counter;
|
||||||
|
|
||||||
=item bit #19 denoting availability of CLFLUSH instruction;
|
=item bit #0+19 denoting availability of CLFLUSH instruction;
|
||||||
|
|
||||||
=item bit #20, reserved by Intel, is used to choose among RC4 code paths;
|
=item bit #0+20, reserved by Intel, is used to choose among RC4 code paths;
|
||||||
|
|
||||||
=item bit #23 denoting MMX support;
|
=item bit #0+23 denoting MMX support;
|
||||||
|
|
||||||
=item bit #24, FXSR bit, denoting availability of XMM registers;
|
=item bit #0+24, FXSR bit, denoting availability of XMM registers;
|
||||||
|
|
||||||
=item bit #25 denoting SSE support;
|
=item bit #0+25 denoting SSE support;
|
||||||
|
|
||||||
=item bit #26 denoting SSE2 support;
|
=item bit #0+26 denoting SSE2 support;
|
||||||
|
|
||||||
=item bit #28 denoting Hyperthreading, which is used to distinguish
|
=item bit #0+28 denoting Hyperthreading, which is used to distinguish
|
||||||
cores with shared cache;
|
cores with shared cache;
|
||||||
|
|
||||||
=item bit #30, reserved by Intel, denotes specifically Intel CPUs;
|
=item bit #0+30, reserved by Intel, denotes specifically Intel CPUs;
|
||||||
|
|
||||||
=item bit #33 denoting availability of PCLMULQDQ instruction;
|
=item bit #0+33 denoting availability of PCLMULQDQ instruction;
|
||||||
|
|
||||||
=item bit #41 denoting SSSE3, Supplemental SSE3, support;
|
=item bit #0+41 denoting SSSE3, Supplemental SSE3, support;
|
||||||
|
|
||||||
=item bit #43 denoting AMD XOP support (forced to zero on non-AMD CPUs);
|
=item bit #0+43 denoting AMD XOP support (forced to zero on non-AMD CPUs);
|
||||||
|
|
||||||
=item bit #54 denoting availability of MOVBE instruction;
|
=item bit #0+54 denoting availability of MOVBE instruction;
|
||||||
|
|
||||||
=item bit #57 denoting AES-NI instruction set extension;
|
=item bit #0+57 denoting AES-NI instruction set extension;
|
||||||
|
|
||||||
=item bit #58, XSAVE bit, lack of which in combination with MOVBE is used
|
=item bit #0+58, XSAVE bit, lack of which in combination with MOVBE is used
|
||||||
to identify Atom Silvermont core;
|
to identify Atom Silvermont core;
|
||||||
|
|
||||||
=item bit #59, OSXSAVE bit, denoting availability of YMM registers;
|
=item bit #0+59, OSXSAVE bit, denoting availability of YMM registers;
|
||||||
|
|
||||||
=item bit #60 denoting AVX extension;
|
=item bit #0+60 denoting AVX extension;
|
||||||
|
|
||||||
=item bit #62 denoting availability of RDRAND instruction;
|
=item bit #0+62 denoting availability of RDRAND instruction;
|
||||||
|
|
||||||
=back
|
=back
|
||||||
|
|
||||||
For example, in 32-bit application context clearing bit #26 at run-time
|
=head2 Notable Capability Bits for LV1
|
||||||
disables high-performance SSE2 code present in the crypto library, while
|
|
||||||
clearing bit #24 disables SSE2 code operating on 128-bit XMM register
|
|
||||||
bank. You might have to do the latter if target OpenSSL application is
|
|
||||||
executed on SSE2 capable CPU, but under control of OS that does not
|
|
||||||
enable XMM registers. Historically address of the capability vector copy
|
|
||||||
was exposed to application through OPENSSL_ia32cap_loc(), but not
|
|
||||||
anymore. Now the only way to affect the capability detection is to set
|
|
||||||
B<OPENSSL_ia32cap> environment variable prior target application start. To
|
|
||||||
give a specific example, on Intel P4 processor
|
|
||||||
C<env OPENSSL_ia32cap=0x16980010 apps/openssl>, or better yet
|
|
||||||
C<env OPENSSL_ia32cap=~0x1000000 apps/openssl> would achieve the desired
|
|
||||||
effect. Alternatively you can reconfigure the toolkit with no-sse2
|
|
||||||
option and recompile.
|
|
||||||
|
|
||||||
Less intuitive is clearing bit #28, or ~0x10000000 in the "environment
|
The following are notable capability bits from logical vector 1 (LV1)
|
||||||
variable" terms. The truth is that it's not copied from CPUID output
|
resulting from the following execution of CPUID.(EAX=07H,ECX=0H).EBX and
|
||||||
verbatim, but is adjusted to reflect whether or not the data cache is
|
CPUID.(EAX=07H,ECX=0H).ECX:
|
||||||
actually shared between logical cores. This in turn affects the decision
|
|
||||||
on whether or not expensive countermeasures against cache-timing attacks
|
|
||||||
are applied, most notably in AES assembler module.
|
|
||||||
|
|
||||||
The capability vector is further extended with EBX value returned by
|
|
||||||
CPUID with EAX=7 and ECX=0 as input. Following bits are significant:
|
|
||||||
|
|
||||||
=over 4
|
=over 4
|
||||||
|
|
||||||
@ -103,8 +99,7 @@ and RORX;
|
|||||||
|
|
||||||
=item bit #64+19 denoting availability of ADCX and ADOX instructions;
|
=item bit #64+19 denoting availability of ADCX and ADOX instructions;
|
||||||
|
|
||||||
=item bit #64+21 denoting availability of VPMADD52[LH]UQ instructions,
|
=item bit #64+21 denoting availability of AVX512IFMA extension;
|
||||||
aka AVX512IFMA extension;
|
|
||||||
|
|
||||||
=item bit #64+29 denoting availability of SHA extension;
|
=item bit #64+29 denoting availability of SHA extension;
|
||||||
|
|
||||||
@ -118,10 +113,109 @@ aka AVX512IFMA extension;
|
|||||||
|
|
||||||
=back
|
=back
|
||||||
|
|
||||||
To control this extended capability word use C<:> as delimiter when
|
=head2 Notable Capability Bits for LV2
|
||||||
setting up B<OPENSSL_ia32cap> environment variable. For example assigning
|
|
||||||
C<:~0x20> would disable AVX2 code paths, and C<:0> - all post-AVX
|
The following are notable capability bits from logical vector 2 (LV2)
|
||||||
extensions.
|
resulting from the following execution of CPUID.(EAX=07H,ECX=0H).EDX and
|
||||||
|
CPUID.(EAX=07H,ECX=1H).EAX:
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item bit #128+15 denoting availability of Hybrid CPU;
|
||||||
|
|
||||||
|
=item bit #128+29 denoting support for IA32_ARCH_CAPABILITIES MSR;
|
||||||
|
|
||||||
|
=item bit #128+32 denoting availability of SHA512 extension;
|
||||||
|
|
||||||
|
=item bit #128+33 denoting availability of SM3 extension;
|
||||||
|
|
||||||
|
=item bit #128+34 denoting availability of SM4 extension;
|
||||||
|
|
||||||
|
=item bit #128+55 denoting availability of AVX-IFMA extension;
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head2 Notable Capability Bits for LV3
|
||||||
|
|
||||||
|
The following are notable capability bits from logical vector 3 (LV3)
|
||||||
|
resulting from the following execution of CPUID.(EAX=07H,ECX=1H).EDX and
|
||||||
|
CPUID.(EAX=07H,ECX=1H).EBX:
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item bit #192+19 denoting availability of AVX10 Converged Vector ISA extension;
|
||||||
|
|
||||||
|
=item bit #192+21 denoting availability of APX_F extension;
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head2 Notable Capability Bits for LV4
|
||||||
|
|
||||||
|
The following are notable capability bits from logical vector 4 (LV4)
|
||||||
|
resulting from the following execution of CPUID.(EAX=07H,ECX=1H).ECX and
|
||||||
|
CPUID.(EAX=24H,ECX=0H).EBX:
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item bits #256+32+[0:7] denoting AVX10 Converged Vector ISA Version (8 bits);
|
||||||
|
|
||||||
|
=item bit #256+48 denoting AVX10 XMM support;
|
||||||
|
|
||||||
|
=item bit #256+49 denoting AVX10 YMM support;
|
||||||
|
|
||||||
|
=item bit #256+50 denoting AVX10 ZMM support;
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head2 OPENSSL_ia32cap environment variable
|
||||||
|
|
||||||
|
The B<OPENSSL_ia32cap> environment variable provides a mechanism to override
|
||||||
|
the default capability vector values at library initialization time.
|
||||||
|
The variable consists of a series of 64-bit numbers representing each
|
||||||
|
of the logical vectors (LV) described above. Each value is delimited by a 'B<:>'.
|
||||||
|
Decimal/Octal/Hexadecimal values representations are supported.
|
||||||
|
|
||||||
|
C<env OPENSSL_ia32cap=LV0:LV1:LV2:LV3:LV4>
|
||||||
|
|
||||||
|
Used in this form, each non-null logical vector will *overwrite* the entire corresponding
|
||||||
|
capability vector pair with the provided value. To keep compatibility with the
|
||||||
|
behaviour of the original OPENSSL_ia32cap environment variable
|
||||||
|
<env OPENSSL_ia32cap=LV0:LV1>, the next capability vector pairs will be set to zero.
|
||||||
|
|
||||||
|
To illustrate, the following will zero all capability bits in logical vectors 1 and further
|
||||||
|
(disable all post-AVX extensions):
|
||||||
|
|
||||||
|
C<env OPENSSL_ia32cap=:0>
|
||||||
|
|
||||||
|
The following will zero all capability bits in logical vectors 2 and further:
|
||||||
|
|
||||||
|
C<env OPENSSL_ia32cap=::0>
|
||||||
|
|
||||||
|
The following will zero all capability bits only in logical vector 1:
|
||||||
|
C<env OPENSSL_ia32cap=:0::::>
|
||||||
|
|
||||||
|
A more likely usage scenario would be to disable specific instruction set extensions.
|
||||||
|
The 'B<~>' character is used to specify a bit mask of the extensions to be disabled for
|
||||||
|
a particular logical vector.
|
||||||
|
|
||||||
|
To illustrate, the following will disable AVX2 code paths and further extensions:
|
||||||
|
|
||||||
|
C<env OPENSSL_ia32cap=:~0x20000000000>
|
||||||
|
|
||||||
|
The following will disable AESNI (LV0 bit 57) and VAES (LV1 bit 41)
|
||||||
|
extensions and therefore any code paths using those extensions but leave
|
||||||
|
the rest of the logical vectors unchanged:
|
||||||
|
|
||||||
|
C<env OPENSSL_ia32cap=~0x200000000000000:~0x20000000000:~0x0:~0x0:~0x0>
|
||||||
|
|
||||||
|
=head1 NOTES
|
||||||
|
|
||||||
|
Not all capability bits are copied from CPUID output verbatim. An example
|
||||||
|
of this is the somewhat less intuitive clearing of LV0 bit #28, or ~0x10000000
|
||||||
|
in the "environment variable" terms. It has been adjusted to reflect whether or
|
||||||
|
not the data cache is actually shared between logical cores. This in turn affects
|
||||||
|
the decision on whether or not expensive countermeasures against cache-timing attacks
|
||||||
|
are applied, most notably in AES assembler module.
|
||||||
|
|
||||||
=head1 RETURN VALUES
|
=head1 RETURN VALUES
|
||||||
|
|
||||||
|
@ -36,8 +36,10 @@ void OPENSSL_cpuid_setup(void);
|
|||||||
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
|
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
|
||||||
defined(__x86_64) || defined(__x86_64__) || \
|
defined(__x86_64) || defined(__x86_64__) || \
|
||||||
defined(_M_AMD64) || defined(_M_X64)
|
defined(_M_AMD64) || defined(_M_X64)
|
||||||
|
# define OPENSSL_IA32CAP_P_MAX_INDEXES 10
|
||||||
extern unsigned int OPENSSL_ia32cap_P[];
|
extern unsigned int OPENSSL_ia32cap_P[];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void OPENSSL_showfatal(const char *fmta, ...);
|
void OPENSSL_showfatal(const char *fmta, ...);
|
||||||
int ossl_do_ex_data_init(OSSL_LIB_CTX *ctx);
|
int ossl_do_ex_data_init(OSSL_LIB_CTX *ctx);
|
||||||
void ossl_crypto_cleanup_all_ex_data_int(OSSL_LIB_CTX *ctx);
|
void ossl_crypto_cleanup_all_ex_data_int(OSSL_LIB_CTX *ctx);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user