From be807c98a6463f18bed2c5ea111a02b670b20f57 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 1 Jan 2025 21:42:10 +0100 Subject: [PATCH 1/5] Identify all cores, group by performance and report the fastest TARGET --- cpuid_arm64.c | 184 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 123 insertions(+), 61 deletions(-) diff --git a/cpuid_arm64.c b/cpuid_arm64.c index fbb78e7943..3e0022b845 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -25,6 +25,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ +#include #include #ifdef __APPLE__ #include @@ -33,6 +34,20 @@ size_t length=sizeof(value); int64_t value64; size_t length64=sizeof(value64); #endif +#if (defined OS_LINUX || defined OS_ANDROID) +#include +#include +#ifndef HWCAP_CPUID +#define HWCAP_CPUID (1 << 11) +#endif +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif + +#define get_cpu_ftr(id, var) ({ \ + __asm__ __volatile__ ("mrs %0, "#id : "=r" (var)); \ + }) +#endif #define CPU_UNKNOWN 0 #define CPU_ARMV8 1 @@ -42,11 +57,9 @@ size_t length64=sizeof(value64); #define CPU_CORTEXA57 3 #define CPU_CORTEXA72 4 #define CPU_CORTEXA73 5 -#define CPU_CORTEXA76 23 #define CPU_NEOVERSEN1 11 #define CPU_NEOVERSEV1 16 #define CPU_NEOVERSEN2 17 -#define CPU_NEOVERSEV2 24 #define CPU_CORTEXX1 18 #define CPU_CORTEXX2 19 #define CPU_CORTEXA510 20 @@ -91,9 +104,7 @@ static char *cpuname[] = { "CORTEXX2", "CORTEXA510", "CORTEXA710", - "FT2000", - "CORTEXA76", - "NEOVERSEV2" + "FT2000" }; static char *cpuname_lower[] = { @@ -119,15 +130,17 @@ static char *cpuname_lower[] = { "cortexx2", "cortexa510", "cortexa710", - "ft2000", - "cortexa76", - "neoversev2" + "ft2000" }; +static int cpulowperf=0; +static int cpumidperf=0; +static int cpuhiperf=0; + int get_feature(char *search) { -#if defined( __linux ) || defined( __NetBSD__ ) +#ifdef __linux FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; @@ -158,33 +171,108 @@ int get_feature(char *search) #endif return(0); } - +static int cpusort(const void *model1, const void *model2) +{ + return (*(int*)model2-*(int*)model1); +} int detect(void) { -#if defined( __linux ) || defined( __NetBSD__ ) - +#ifdef __linux + int n,i,ii; + int midr_el1; + int implementer; + int cpucap[1024]; + int cpucores[1024]; FILE *infile; - char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL; + char cpupart[6],cpuimpl[6]; + char *cpu_impl=NULL,*cpu_pt=NULL; + char buffer[2048], *p, *cpu_part = NULL, *cpu_implementer = NULL; p = (char *) NULL ; - - infile = fopen("/proc/cpuinfo", "r"); - while (fgets(buffer, sizeof(buffer), infile)) { - if ((cpu_part != NULL) && (cpu_implementer != NULL)) { - break; + cpulowperf=cpumidperf=cpuhiperf=0; + for (i=0;i<1024;i++)cpucores[i]=0; + n=0; + infile = fopen("/sys/devices/system/cpu/possible", "r"); + if (!infile) { + infile = fopen("/proc/cpuinfo", "r"); + while (fgets(buffer, sizeof(buffer), infile)) { + if (!strncmp("processor", buffer, 9)) + n++; } - - if ((cpu_part == NULL) && !strncmp("CPU part", buffer, 8)) { - cpu_part = strchr(buffer, ':') + 2; - cpu_part = strdup(cpu_part); - } else if ((cpu_implementer == NULL) && !strncmp("CPU implementer", buffer, 15)) { - cpu_implementer = strchr(buffer, ':') + 2; - cpu_implementer = strdup(cpu_implementer); + } else { + fgets(buffer, sizeof(buffer), infile); + sscanf(buffer,"0-%d",&n); + n++; + } + fclose(infile); + + cpu_implementer=NULL; + for (i=0;i= 0xd4b) cpuhiperf++; + else + if (cpucores[ii] >= 0xd07) cpumidperf++; + else cpulowperf++; + } + else cpulowperf++; + } + fclose(infile); + break; + } else { + (void)fgets(buffer, sizeof(buffer), infile); + midr_el1=strtoul(buffer,NULL,16); + fclose(infile); + implementer = (midr_el1 >> 24) & 0xFF; + cpucores[i] = (midr_el1 >> 4) & 0xFFF; + sprintf(buffer,"/sys/devices/system/cpu/cpu%d/cpu_capacity",i); + infile= fopen(buffer,"r"); + if (!infile) { + if (implementer== 65) { + if (cpucores[i] >= 0xd4b) cpuhiperf++; + else + if (cpucores[i] >= 0xd07) cpumidperf++; + else cpulowperf++; + } + else cpulowperf++; + } else { + (void)fgets(buffer, sizeof(buffer), infile); + sscanf(buffer,"%d",&cpucap[i]); + if (cpucap[i] >= 1000) cpuhiperf++; + else + if (cpucap[i] >= 500) cpumidperf++; + else cpulowperf++; + fclose(infile); + } } + sprintf(cpuimpl,"0x%2x",implementer); + cpu_implementer=strdup(cpuimpl); } - - fclose(infile); + qsort(cpucores,1024,sizeof(int),cpusort); + sprintf(cpupart,"0x%3x",cpucores[0]); + cpu_part=strdup(cpupart); if(cpu_part != NULL && cpu_implementer != NULL) { // Arm if (strstr(cpu_implementer, "0x41")) { @@ -216,10 +304,6 @@ int detect(void) return CPU_CORTEXX2; else if (strstr(cpu_part, "0xd4e")) //X3 return CPU_CORTEXX2; - else if (strstr(cpu_part, "0xd4f")) //NVIDIA Grace et al. - return CPU_NEOVERSEV2; - else if (strstr(cpu_part, "0xd0b")) - return CPU_CORTEXA76; } // Qualcomm else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) @@ -280,8 +364,6 @@ int detect(void) sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0); if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1 if (value64 == 3660830781) return CPU_VORTEX; //A15/M2 - if (value64 == 2271604202) return CPU_VORTEX; //A16/M3 - if (value64 == 1867590060) return CPU_VORTEX; //M4 #endif return CPU_ARMV8; #endif @@ -314,7 +396,7 @@ void get_cpucount(void) { int n=0; -#if defined( __linux ) || defined( __NetBSD__ ) +#ifdef __linux FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; @@ -331,6 +413,12 @@ int n=0; fclose(infile); printf("#define NUM_CORES %d\n",n); + if (cpulowperf >0) + printf("#define NUM_CORES_LP %d\n",cpulowperf); + if (cpumidperf >0) + printf("#define NUM_CORES_MP %d\n",cpumidperf); + if (cpuhiperf >0) + printf("#define NUM_CORES_HP %d\n",cpuhiperf); #endif #ifdef __APPLE__ sysctlbyname("hw.physicalcpu_max",&value,&length,NULL,0); @@ -347,7 +435,6 @@ void get_cpuconfig(void) printf("#define ARMV8\n"); printf("#define HAVE_NEON\n"); // This shouldn't be necessary printf("#define HAVE_VFPV4\n"); // This shouldn't be necessary - int d = detect(); switch (d) { @@ -402,8 +489,6 @@ void get_cpuconfig(void) break; case CPU_NEOVERSEV1: - printf("#define HAVE_SVE 1\n"); - case CPU_CORTEXA76: printf("#define %s\n", cpuname[d]); printf("#define L1_CODE_SIZE 65536\n"); printf("#define L1_CODE_LINESIZE 64\n"); @@ -431,32 +516,12 @@ void get_cpuconfig(void) printf("#define L2_ASSOCIATIVE 8\n"); printf("#define DTB_DEFAULT_ENTRIES 48\n"); printf("#define DTB_SIZE 4096\n"); - printf("#define HAVE_SVE 1\n"); - break; - case CPU_NEOVERSEV2: - printf("#define ARMV9\n"); - printf("#define HAVE_SVE 1\n"); - printf("#define %s\n", cpuname[d]); - printf("#define L1_CODE_SIZE 65536\n"); - printf("#define L1_CODE_LINESIZE 64\n"); - printf("#define L1_CODE_ASSOCIATIVE 4\n"); - printf("#define L1_DATA_SIZE 65536\n"); - printf("#define L1_DATA_LINESIZE 64\n"); - printf("#define L1_DATA_ASSOCIATIVE 4\n"); - printf("#define L2_SIZE 1048576\n"); - printf("#define L2_LINESIZE 64\n"); - printf("#define L2_ASSOCIATIVE 8\n"); - // L1 Data TLB = 48 entries - // L2 Data TLB = 2048 entries - printf("#define DTB_DEFAULT_ENTRIES 48\n"); - printf("#define DTB_SIZE 4096\n"); // Set to 4096 for symmetry with other configs. break; case CPU_CORTEXA510: case CPU_CORTEXA710: case CPU_CORTEXX1: case CPU_CORTEXX2: printf("#define ARMV9\n"); - printf("#define HAVE_SVE 1\n"); printf("#define %s\n", cpuname[d]); printf("#define L1_CODE_SIZE 65536\n"); printf("#define L1_CODE_LINESIZE 64\n"); @@ -559,8 +624,6 @@ void get_cpuconfig(void) case CPU_VORTEX: printf("#define VORTEX \n"); #ifdef __APPLE__ - sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0); - if (value64 == 1867590060) printf("#define HAVE_SME 1\n");; //M4 sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); printf("#define L1_CODE_SIZE %lld \n",value64); sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); @@ -575,7 +638,6 @@ void get_cpuconfig(void) break; case CPU_A64FX: printf("#define A64FX\n"); - printf("#define HAVE_SVE 1\n"); printf("#define L1_CODE_SIZE 65535\n"); printf("#define L1_DATA_SIZE 65535\n"); printf("#define L1_DATA_LINESIZE 256\n"); @@ -608,7 +670,7 @@ void get_libname(void) void get_features(void) { -#if defined( __linux ) || defined( __NetBSD__ ) +#ifdef __linux FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; From 3c3d1c48495091de690bc9a480702528465e1bfa Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 1 Jan 2025 22:21:29 +0100 Subject: [PATCH 2/5] Identify all cores and select the most performant one as TARGET --- driver/others/dynamic_arm64.c | 49 ++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index dc88d816fb..53ec99e476 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -271,15 +271,52 @@ static gotoblas_t *get_coretype(void) { if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) { #ifdef __linux + int i; + int ncores=0; + int p,cpucap,cpulowperf=0,cpumidperf=0,cpuhiperf=0; FILE *infile; char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL; p = (char *) NULL ; - infile = fopen("/sys/devices/system/cpu/cpu0/regs/identification/midr_el1","r"); - if (!infile) return NULL; - (void)fgets(buffer, sizeof(buffer), infile); - midr_el1=strtoul(buffer,NULL,16); - fclose(infile); -#else + infile = fopen("/sys/devices/system/cpu/possible","r"); + if (infile) { + (void)fgets(buffer, sizeof(buffer), infile); + sscanf(buffer,"0-%d",&ncores); + fclose (infile); + ncores++; + } else { + infile = fopen("/proc/cpuinfo","r"); + while (fgets(buffer, sizeof(buffer), infile)) { + if (!strncmp("processor", buffer, 9)) + ncores++; + } + } + for (i=0;i> 24) & 0xFF; + p = (midr_el1 >> 4) & 0xFFF; + fclose(infile); + sprintf(buffer,"/sys/devices/system/cpu/cpu%d/cpu_capability",i); + infile = fopen(buffer,"r"); + if (infile) { + (void)fgets(buffer, sizeof(buffer), infile); + cpucap=strtoul(buffer,NULL,16); + fclose(infile); + if (cpucap >= 1000) cpuhiperf++; + else if (cpucap >=500) cpumidperf++; + else cpulowperf++; + if (cpucap >=1000) part = p; + } else if (implementer == 0x41 ){ + if (p >= 0xd4b) cpuhiperf++: + else if (p>= 0xd07) cpumidperf++; + else cpulowperf++; + } else cpulowperf++; + } + if (!part) part = p; +#else snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n"); openblas_warning(1, coremsg); return NULL; From ed957916182627d7cd20efc5a8dfb400b1c26457 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 1 Jan 2025 23:27:38 +0100 Subject: [PATCH 3/5] fix conflicting variables --- driver/others/dynamic_arm64.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index 53ec99e476..b1aad68d9b 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -273,10 +273,10 @@ static gotoblas_t *get_coretype(void) { #ifdef __linux int i; int ncores=0; - int p,cpucap,cpulowperf=0,cpumidperf=0,cpuhiperf=0; + int prt,cpucap,cpulowperf=0,cpumidperf=0,cpuhiperf=0; FILE *infile; - char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL; - p = (char *) NULL ; + char buffer[512], *cpu_part = NULL, *cpu_implementer = NULL; + infile = fopen("/sys/devices/system/cpu/possible","r"); if (infile) { (void)fgets(buffer, sizeof(buffer), infile); @@ -297,7 +297,7 @@ static gotoblas_t *get_coretype(void) { (void)fgets(buffer, sizeof(buffer), infile); midr_el1=strtoul(buffer,NULL,16); implementer = (midr_el1 >> 24) & 0xFF; - p = (midr_el1 >> 4) & 0xFFF; + prt = (midr_el1 >> 4) & 0xFFF; fclose(infile); sprintf(buffer,"/sys/devices/system/cpu/cpu%d/cpu_capability",i); infile = fopen(buffer,"r"); @@ -308,14 +308,14 @@ static gotoblas_t *get_coretype(void) { if (cpucap >= 1000) cpuhiperf++; else if (cpucap >=500) cpumidperf++; else cpulowperf++; - if (cpucap >=1000) part = p; + if (cpucap >=1000) part = prt; } else if (implementer == 0x41 ){ - if (p >= 0xd4b) cpuhiperf++: - else if (p>= 0xd07) cpumidperf++; + if (prt >= 0xd4b) cpuhiperf++: + else if (prt>= 0xd07) cpumidperf++; else cpulowperf++; } else cpulowperf++; } - if (!part) part = p; + if (!part) part = prt; #else snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n"); openblas_warning(1, coremsg); @@ -323,7 +323,7 @@ static gotoblas_t *get_coretype(void) { #endif } else { get_cpu_ftr(MIDR_EL1, midr_el1); - } + /* * MIDR_EL1 * @@ -334,7 +334,7 @@ static gotoblas_t *get_coretype(void) { */ implementer = (midr_el1 >> 24) & 0xFF; part = (midr_el1 >> 4) & 0xFFF; - + } switch(implementer) { case 0x41: // ARM From a182251284835e5fb56c2074b8bb08c04ebbc9b0 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 2 Jan 2025 00:04:33 +0100 Subject: [PATCH 4/5] fix typo --- driver/others/dynamic_arm64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index b1aad68d9b..37991184ac 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -310,7 +310,7 @@ static gotoblas_t *get_coretype(void) { else cpulowperf++; if (cpucap >=1000) part = prt; } else if (implementer == 0x41 ){ - if (prt >= 0xd4b) cpuhiperf++: + if (prt >= 0xd4b) cpuhiperf++; else if (prt>= 0xd07) cpumidperf++; else cpulowperf++; } else cpulowperf++; From 7fd73a40dc3d949e00e62c7a13f7d376c6c1464b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 2 Jan 2025 06:13:07 -0800 Subject: [PATCH 5/5] Fix accidentally dropped cpu ids and add MacOS performance groups --- cpuid_arm64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/cpuid_arm64.c b/cpuid_arm64.c index 3e0022b845..47e8ffcd61 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -57,9 +57,11 @@ size_t length64=sizeof(value64); #define CPU_CORTEXA57 3 #define CPU_CORTEXA72 4 #define CPU_CORTEXA73 5 +#define CPU_CORTEXA76 23 #define CPU_NEOVERSEN1 11 #define CPU_NEOVERSEV1 16 #define CPU_NEOVERSEN2 17 +#define CPU_NEOVERSEV2 24 #define CPU_CORTEXX1 18 #define CPU_CORTEXX2 19 #define CPU_CORTEXA510 20 @@ -104,7 +106,9 @@ static char *cpuname[] = { "CORTEXX2", "CORTEXA510", "CORTEXA710", - "FT2000" + "FT2000", + "CORTEXA76", + "NEOVERSEV2" }; static char *cpuname_lower[] = { @@ -130,7 +134,9 @@ static char *cpuname_lower[] = { "cortexx2", "cortexa510", "cortexa710", - "ft2000" + "ft2000", + "cortexa76", + "neoversev2" }; static int cpulowperf=0; @@ -140,7 +146,7 @@ static int cpuhiperf=0; int get_feature(char *search) { -#ifdef __linux +#if defined( __linux ) || defined( __NetBSD__ ) FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; @@ -179,7 +185,7 @@ static int cpusort(const void *model1, const void *model2) int detect(void) { -#ifdef __linux +#if defined( __linux ) || defined( __NetBSD__ ) int n,i,ii; int midr_el1; int implementer; @@ -243,8 +249,8 @@ int detect(void) break; } else { (void)fgets(buffer, sizeof(buffer), infile); - midr_el1=strtoul(buffer,NULL,16); - fclose(infile); + midr_el1=strtoul(buffer,NULL,16); + fclose(infile); implementer = (midr_el1 >> 24) & 0xFF; cpucores[i] = (midr_el1 >> 4) & 0xFFF; sprintf(buffer,"/sys/devices/system/cpu/cpu%d/cpu_capacity",i); @@ -304,6 +310,10 @@ int detect(void) return CPU_CORTEXX2; else if (strstr(cpu_part, "0xd4e")) //X3 return CPU_CORTEXX2; + else if (strstr(cpu_part, "0xd4f")) //NVIDIA Grace et al. + return CPU_NEOVERSEV2; + else if (strstr(cpu_part, "0xd0b")) + return CPU_CORTEXA76; } // Qualcomm else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) @@ -361,9 +371,20 @@ int detect(void) } #else #ifdef __APPLE__ + sysctlbyname("hw.ncpu",&value64,&length64,NULL,0); + cpulowperf=value64; + sysctlbyname("hw.nperflevels",&value64,&length64,NULL,0); + if (value64 > 1) { + sysctlbyname("hw.perflevel0.cpusperl",&value64,&length64,NULL,0); + cpuhiperf=value64; + sysctlbyname("hw.perflevel1.cpusperl",&value64,&length64,NULL,0); + cpulowperf=value64; + } sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0); if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1 if (value64 == 3660830781) return CPU_VORTEX; //A15/M2 + if (value64 == 2271604202) return CPU_VORTEX; //A16/M3 + if (value64 == 1867590060) return CPU_VORTEX; //M4 #endif return CPU_ARMV8; #endif @@ -396,7 +417,7 @@ void get_cpucount(void) { int n=0; -#ifdef __linux +#if defined( __linux ) || defined( __NetBSD__ ) FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ; @@ -423,6 +444,12 @@ int n=0; #ifdef __APPLE__ sysctlbyname("hw.physicalcpu_max",&value,&length,NULL,0); printf("#define NUM_CORES %d\n",value); + if (cpulowperf >0) + printf("#define NUM_CORES_LP %d\n",cpulowperf); + if (cpumidperf >0) + printf("#define NUM_CORES_MP %d\n",cpumidperf); + if (cpuhiperf >0) + printf("#define NUM_CORES_HP %d\n",cpuhiperf); #endif } @@ -489,6 +516,8 @@ void get_cpuconfig(void) break; case CPU_NEOVERSEV1: + printf("#define HAVE_SVE 1\n"); + case CPU_CORTEXA76: printf("#define %s\n", cpuname[d]); printf("#define L1_CODE_SIZE 65536\n"); printf("#define L1_CODE_LINESIZE 64\n"); @@ -516,12 +545,32 @@ void get_cpuconfig(void) printf("#define L2_ASSOCIATIVE 8\n"); printf("#define DTB_DEFAULT_ENTRIES 48\n"); printf("#define DTB_SIZE 4096\n"); + printf("#define HAVE_SVE 1\n"); break; + case CPU_NEOVERSEV2: + printf("#define ARMV9\n"); + printf("#define HAVE_SVE 1\n"); + printf("#define %s\n", cpuname[d]); + printf("#define L1_CODE_SIZE 65536\n"); + printf("#define L1_CODE_LINESIZE 64\n"); + printf("#define L1_CODE_ASSOCIATIVE 4\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 64\n"); + printf("#define L1_DATA_ASSOCIATIVE 4\n"); + printf("#define L2_SIZE 1048576\n"); + printf("#define L2_LINESIZE 64\n"); + printf("#define L2_ASSOCIATIVE 8\n"); + // L1 Data TLB = 48 entries + // L2 Data TLB = 2048 entries + printf("#define DTB_DEFAULT_ENTRIES 48\n"); + printf("#define DTB_SIZE 4096\n"); // Set to 4096 for symmetry with other configs. + break; case CPU_CORTEXA510: case CPU_CORTEXA710: case CPU_CORTEXX1: case CPU_CORTEXX2: printf("#define ARMV9\n"); + printf("#define HAVE_SVE 1\n"); printf("#define %s\n", cpuname[d]); printf("#define L1_CODE_SIZE 65536\n"); printf("#define L1_CODE_LINESIZE 64\n"); @@ -638,6 +687,7 @@ void get_cpuconfig(void) break; case CPU_A64FX: printf("#define A64FX\n"); + printf("#define HAVE_SVE 1\n"); printf("#define L1_CODE_SIZE 65535\n"); printf("#define L1_DATA_SIZE 65535\n"); printf("#define L1_DATA_LINESIZE 256\n"); @@ -670,7 +720,7 @@ void get_libname(void) void get_features(void) { -#ifdef __linux +#if defined( __linux ) || defined( __NetBSD__ ) FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL ;