From 9600883c248c7e3d698cabecc3179f28798bdd57 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 30 Jun 2020 11:59:03 -0700 Subject: [PATCH] [X86] Move frontend CPU feature initialization to a look up table based implementation. NFCI This replaces the switch statement implementation in the clang's X86.cpp with a lookup table in X86TargetParser.cpp. I've used constexpr and copy of the FeatureBitset from SubtargetFeature.h to store the features in a lookup table. After the lookup the bitset is translated into strings for use by the rest of the frontend code. I had to modify the implementation of the FeatureBitset to avoid bugs in gcc 5.5 constexpr handling. It seems to not like the same array entry to be used on the left side and right hand side of an assignment or &= or |=. I've also used uint32_t instead of uint64_t and sized based on the X86::CPU_FEATURE_MAX. I've initialized the features for different CPUs outside of the table so that we can express inheritance in an adhoc way. This was one of the big limitations of the switch and we had resorted to labels and gotos. Differential Revision: https://reviews.llvm.org/D82731 --- include/llvm/Support/X86TargetParser.def | 131 +++++--- include/llvm/Support/X86TargetParser.h | 7 +- lib/Support/X86TargetParser.cpp | 388 ++++++++++++++++++----- 3 files changed, 396 insertions(+), 130 deletions(-) diff --git a/include/llvm/Support/X86TargetParser.def b/include/llvm/Support/X86TargetParser.def index 58c6e4320a0..c3a144e2dda 100644 --- a/include/llvm/Support/X86TargetParser.def +++ b/include/llvm/Support/X86TargetParser.def @@ -125,56 +125,97 @@ X86_CPU_SUBTYPE ("geode", AMDPENTIUM_GEODE) // This macro is used for cpu types present in compiler-rt/libgcc. #ifndef X86_FEATURE_COMPAT -#define X86_FEATURE_COMPAT(ENUM, STR) X86_FEATURE(ENUM) +#define X86_FEATURE_COMPAT(ENUM, STR) X86_FEATURE(ENUM, STR) #endif #ifndef X86_FEATURE -#define X86_FEATURE(ENUM) +#define X86_FEATURE(ENUM, STR) #endif -X86_FEATURE_COMPAT(FEATURE_CMOV, "cmov") -X86_FEATURE_COMPAT(FEATURE_MMX, "mmx") -X86_FEATURE_COMPAT(FEATURE_POPCNT, "popcnt") -X86_FEATURE_COMPAT(FEATURE_SSE, "sse") -X86_FEATURE_COMPAT(FEATURE_SSE2, "sse2") -X86_FEATURE_COMPAT(FEATURE_SSE3, "sse3") -X86_FEATURE_COMPAT(FEATURE_SSSE3, "ssse3") -X86_FEATURE_COMPAT(FEATURE_SSE4_1, "sse4.1") -X86_FEATURE_COMPAT(FEATURE_SSE4_2, "sse4.2") -X86_FEATURE_COMPAT(FEATURE_AVX, "avx") -X86_FEATURE_COMPAT(FEATURE_AVX2, "avx2") -X86_FEATURE_COMPAT(FEATURE_SSE4_A, "sse4a") -X86_FEATURE_COMPAT(FEATURE_FMA4, "fma4") -X86_FEATURE_COMPAT(FEATURE_XOP, "xop") -X86_FEATURE_COMPAT(FEATURE_FMA, "fma") -X86_FEATURE_COMPAT(FEATURE_AVX512F, "avx512f") -X86_FEATURE_COMPAT(FEATURE_BMI, "bmi") -X86_FEATURE_COMPAT(FEATURE_BMI2, "bmi2") -X86_FEATURE_COMPAT(FEATURE_AES, "aes") -X86_FEATURE_COMPAT(FEATURE_PCLMUL, "pclmul") -X86_FEATURE_COMPAT(FEATURE_AVX512VL, "avx512vl") -X86_FEATURE_COMPAT(FEATURE_AVX512BW, "avx512bw") -X86_FEATURE_COMPAT(FEATURE_AVX512DQ, "avx512dq") -X86_FEATURE_COMPAT(FEATURE_AVX512CD, "avx512cd") -X86_FEATURE_COMPAT(FEATURE_AVX512ER, "avx512er") -X86_FEATURE_COMPAT(FEATURE_AVX512PF, "avx512pf") -X86_FEATURE_COMPAT(FEATURE_AVX512VBMI, "avx512vbmi") -X86_FEATURE_COMPAT(FEATURE_AVX512IFMA, "avx512ifma") -X86_FEATURE_COMPAT(FEATURE_AVX5124VNNIW, "avx5124vnniw") -X86_FEATURE_COMPAT(FEATURE_AVX5124FMAPS, "avx5124fmaps") -X86_FEATURE_COMPAT(FEATURE_AVX512VPOPCNTDQ, "avx512vpopcntdq") -X86_FEATURE_COMPAT(FEATURE_AVX512VBMI2, "avx512vbmi2") -X86_FEATURE_COMPAT(FEATURE_GFNI, "gfni") -X86_FEATURE_COMPAT(FEATURE_VPCLMULQDQ, "vpclmulqdq") -X86_FEATURE_COMPAT(FEATURE_AVX512VNNI, "avx512vnni") -X86_FEATURE_COMPAT(FEATURE_AVX512BITALG, "avx512bitalg") -X86_FEATURE_COMPAT(FEATURE_AVX512BF16, "avx512bf16") -X86_FEATURE_COMPAT(FEATURE_AVX512VP2INTERSECT, "avx512vp2intersect") +X86_FEATURE_COMPAT(CMOV, "cmov") +X86_FEATURE_COMPAT(MMX, "mmx") +X86_FEATURE_COMPAT(POPCNT, "popcnt") +X86_FEATURE_COMPAT(SSE, "sse") +X86_FEATURE_COMPAT(SSE2, "sse2") +X86_FEATURE_COMPAT(SSE3, "sse3") +X86_FEATURE_COMPAT(SSSE3, "ssse3") +X86_FEATURE_COMPAT(SSE4_1, "sse4.1") +X86_FEATURE_COMPAT(SSE4_2, "sse4.2") +X86_FEATURE_COMPAT(AVX, "avx") +X86_FEATURE_COMPAT(AVX2, "avx2") +X86_FEATURE_COMPAT(SSE4_A, "sse4a") +X86_FEATURE_COMPAT(FMA4, "fma4") +X86_FEATURE_COMPAT(XOP, "xop") +X86_FEATURE_COMPAT(FMA, "fma") +X86_FEATURE_COMPAT(AVX512F, "avx512f") +X86_FEATURE_COMPAT(BMI, "bmi") +X86_FEATURE_COMPAT(BMI2, "bmi2") +X86_FEATURE_COMPAT(AES, "aes") +X86_FEATURE_COMPAT(PCLMUL, "pclmul") +X86_FEATURE_COMPAT(AVX512VL, "avx512vl") +X86_FEATURE_COMPAT(AVX512BW, "avx512bw") +X86_FEATURE_COMPAT(AVX512DQ, "avx512dq") +X86_FEATURE_COMPAT(AVX512CD, "avx512cd") +X86_FEATURE_COMPAT(AVX512ER, "avx512er") +X86_FEATURE_COMPAT(AVX512PF, "avx512pf") +X86_FEATURE_COMPAT(AVX512VBMI, "avx512vbmi") +X86_FEATURE_COMPAT(AVX512IFMA, "avx512ifma") +X86_FEATURE_COMPAT(AVX5124VNNIW, "avx5124vnniw") +X86_FEATURE_COMPAT(AVX5124FMAPS, "avx5124fmaps") +X86_FEATURE_COMPAT(AVX512VPOPCNTDQ, "avx512vpopcntdq") +X86_FEATURE_COMPAT(AVX512VBMI2, "avx512vbmi2") +X86_FEATURE_COMPAT(GFNI, "gfni") +X86_FEATURE_COMPAT(VPCLMULQDQ, "vpclmulqdq") +X86_FEATURE_COMPAT(AVX512VNNI, "avx512vnni") +X86_FEATURE_COMPAT(AVX512BITALG, "avx512bitalg") +X86_FEATURE_COMPAT(AVX512BF16, "avx512bf16") +X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect") // Features below here are not in libgcc/compiler-rt. -X86_FEATURE (FEATURE_MOVBE) -X86_FEATURE (FEATURE_ADX) -X86_FEATURE (FEATURE_EM64T) -X86_FEATURE (FEATURE_CLFLUSHOPT) -X86_FEATURE (FEATURE_SHA) +X86_FEATURE (3DNOW, "3dnow") +X86_FEATURE (3DNOWA, "3dnowa") +X86_FEATURE (ADX, "adx") +X86_FEATURE (CLDEMOTE, "cldemote") +X86_FEATURE (CLFLUSHOPT, "clflushopt") +X86_FEATURE (CLWB, "clwb") +X86_FEATURE (CLZERO, "clzero") +X86_FEATURE (CMPXCHG16B, "cx16") +X86_FEATURE (CMPXCHG8B, "cx8") +X86_FEATURE (EM64T, nullptr) +X86_FEATURE (ENQCMD, "enqcmd") +X86_FEATURE (F16C, "f16c") +X86_FEATURE (FSGSBASE, "fsgsbase") +X86_FEATURE (FXSR, "fxsr") +X86_FEATURE (INVPCID, "invpcid") +X86_FEATURE (LWP, "lwp") +X86_FEATURE (LZCNT, "lzcnt") +X86_FEATURE (MOVBE, "movbe") +X86_FEATURE (MOVDIR64B, "movdir64b") +X86_FEATURE (MOVDIRI, "movdiri") +X86_FEATURE (MWAITX, "mwaitx") +X86_FEATURE (PCONFIG, "pconfig") +X86_FEATURE (PKU, "pku") +X86_FEATURE (PREFETCHWT1, "prefetchwt1") +X86_FEATURE (PRFCHW, "prfchw") +X86_FEATURE (PTWRITE, "ptwrite") +X86_FEATURE (RDPID, "rdpid") +X86_FEATURE (RDRND, "rdrnd") +X86_FEATURE (RDSEED, "rdseed") +X86_FEATURE (RTM, "rtm") +X86_FEATURE (SAHF, "sahf") +X86_FEATURE (SERIALIZE, "serialize") +X86_FEATURE (SGX, "sgx") +X86_FEATURE (SHA, "sha") +X86_FEATURE (SHSTK, "shstk") +X86_FEATURE (SSE4A, "sse4a") +X86_FEATURE (TBM, "tbm") +X86_FEATURE (TSXLDTRK, "tsxldtrk") +X86_FEATURE (VAES, "vaes") +X86_FEATURE (WAITPKG, "waitpkg") +X86_FEATURE (WBNOINVD, "wbnoinvd") +X86_FEATURE (X87, "x87") +X86_FEATURE (XSAVE, "xsave") +X86_FEATURE (XSAVEC, "xsavec") +X86_FEATURE (XSAVEOPT, "xsaveopt") +X86_FEATURE (XSAVES, "xsaves") #undef X86_FEATURE_COMPAT #undef X86_FEATURE diff --git a/include/llvm/Support/X86TargetParser.h b/include/llvm/Support/X86TargetParser.h index 99ae400cc65..5897e79eb28 100644 --- a/include/llvm/Support/X86TargetParser.h +++ b/include/llvm/Support/X86TargetParser.h @@ -53,8 +53,7 @@ enum ProcessorSubtypes : unsigned { // This should be kept in sync with libcc/compiler-rt as it should be used // by clang as a proxy for what's in libgcc/compiler-rt. enum ProcessorFeatures { -#define X86_FEATURE(ENUM) \ - ENUM, +#define X86_FEATURE(ENUM, STRING) FEATURE_##ENUM, #include "llvm/Support/X86TargetParser.def" CPU_FEATURE_MAX }; @@ -132,8 +131,12 @@ CPUKind parseArchX86(StringRef CPU, bool Only64Bit = false); void fillValidCPUArchList(SmallVectorImpl &Values, bool ArchIs32Bit); +/// Get the key feature prioritizing target multiversioning. ProcessorFeatures getKeyFeature(CPUKind Kind); +/// Fill in the features that \p CPU supports into \p Features. +void getFeaturesForCPU(StringRef CPU, SmallVectorImpl &Features); + } // namespace X86 } // namespace llvm diff --git a/lib/Support/X86TargetParser.cpp b/lib/Support/X86TargetParser.cpp index cc3c1778daa..2d70ec49c94 100644 --- a/lib/Support/X86TargetParser.cpp +++ b/lib/Support/X86TargetParser.cpp @@ -19,134 +19,339 @@ using namespace llvm::X86; namespace { +/// Container class for CPU features. +/// This is a constexpr reimplementation of a subset of std::bitset. It would be +/// nice to use std::bitset directly, but it doesn't support constant +/// initialization. +class FeatureBitset { + static constexpr unsigned NUM_FEATURE_WORDS = + (X86::CPU_FEATURE_MAX + 31) / 32; + + // This cannot be a std::array, operator[] is not constexpr until C++17. + uint32_t Bits[NUM_FEATURE_WORDS] = {}; + +public: + constexpr FeatureBitset() = default; + constexpr FeatureBitset(std::initializer_list Init) { + for (auto I : Init) + set(I); + } + + constexpr FeatureBitset &set(unsigned I) { + uint32_t NewBits = Bits[I / 32] | (uint32_t(1) << (I % 32)); + Bits[I / 32] = NewBits; + return *this; + } + + constexpr bool operator[](unsigned I) const { + uint32_t Mask = uint32_t(1) << (I % 32); + return (Bits[I / 32] & Mask) != 0; + } + + constexpr FeatureBitset operator&(const FeatureBitset &RHS) const { + FeatureBitset Result; + for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) + Result.Bits[I] = Bits[I] & RHS.Bits[I]; + return Result; + } + + constexpr FeatureBitset operator|(const FeatureBitset &RHS) const { + FeatureBitset Result; + for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) + Result.Bits[I] = Bits[I] | RHS.Bits[I]; + return Result; + } + + constexpr FeatureBitset operator~() const { + FeatureBitset Result; + for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) + Result.Bits[I] = ~Bits[I]; + return Result; + } +}; + struct ProcInfo { StringLiteral Name; X86::CPUKind Kind; unsigned KeyFeature; - bool Is64Bit; + FeatureBitset Features; }; } // end anonymous namespace -#define PROC_64_BIT true -#define PROC_32_BIT false +#define X86_FEATURE(ENUM, STRING) \ + static constexpr FeatureBitset Feature##ENUM = {X86::FEATURE_##ENUM}; +#include "llvm/Support/X86TargetParser.def" + +// Pentium with MMX. +static constexpr FeatureBitset FeaturesPentiumMMX = + FeatureX87 | FeatureCMPXCHG8B | FeatureMMX; + +// Pentium 2 and 3. +static constexpr FeatureBitset FeaturesPentium2 = + FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | FeatureFXSR; +static constexpr FeatureBitset FeaturesPentium3 = FeaturesPentium2 | FeatureSSE; + +// Pentium 4 CPUs +static constexpr FeatureBitset FeaturesPentium4 = + FeaturesPentium3 | FeatureSSE2; +static constexpr FeatureBitset FeaturesPrescott = + FeaturesPentium4 | FeatureSSE3; +static constexpr FeatureBitset FeaturesNocona = + FeaturesPrescott | FeatureEM64T | FeatureCMPXCHG16B; + +// Basic 64-bit capable CPU. +static constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | FeatureEM64T; + +// Intel Core CPUs +static constexpr FeatureBitset FeaturesCore2 = + FeaturesNocona | FeatureSAHF | FeatureSSSE3; +static constexpr FeatureBitset FeaturesPenryn = FeaturesCore2 | FeatureSSE4_1; +static constexpr FeatureBitset FeaturesNehalem = + FeaturesPenryn | FeaturePOPCNT | FeatureSSE4_2; +static constexpr FeatureBitset FeaturesWestmere = + FeaturesNehalem | FeaturePCLMUL; +static constexpr FeatureBitset FeaturesSandyBridge = + FeaturesWestmere | FeatureAVX | FeatureXSAVE | FeatureXSAVEOPT; +static constexpr FeatureBitset FeaturesIvyBridge = + FeaturesSandyBridge | FeatureF16C | FeatureFSGSBASE | FeatureRDRND; +static constexpr FeatureBitset FeaturesHaswell = + FeaturesIvyBridge | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureFMA | + FeatureINVPCID | FeatureLZCNT | FeatureMOVBE; +static constexpr FeatureBitset FeaturesBroadwell = + FeaturesHaswell | FeatureADX | FeaturePRFCHW | FeatureRDSEED; + +// Intel Knights Landing and Knights Mill +// Knights Landing has feature parity with Broadwell. +static constexpr FeatureBitset FeaturesKNL = + FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureAVX512CD | + FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1; +static constexpr FeatureBitset FeaturesKNM = + FeaturesKNL | FeatureAVX512VPOPCNTDQ; + +// Intel Skylake processors. +static constexpr FeatureBitset FeaturesSkylakeClient = + FeaturesBroadwell | FeatureAES | FeatureCLFLUSHOPT | FeatureXSAVEC | + FeatureXSAVES | FeatureSGX; +// SkylakeServer inherits all SkylakeClient features except SGX. +// FIXME: That doesn't match gcc. +static constexpr FeatureBitset FeaturesSkylakeServer = + (FeaturesSkylakeClient & ~FeatureSGX) | FeatureAVX512F | FeatureAVX512CD | + FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureCLWB | + FeaturePKU; +static constexpr FeatureBitset FeaturesCascadeLake = + FeaturesSkylakeServer | FeatureAVX512VNNI; +static constexpr FeatureBitset FeaturesCooperLake = + FeaturesCascadeLake | FeatureAVX512BF16; + +// Intel 10nm processors. +static constexpr FeatureBitset FeaturesCannonlake = + FeaturesSkylakeClient | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ | + FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI | + FeaturePKU | FeatureSHA; +static constexpr FeatureBitset FeaturesICLClient = + FeaturesCannonlake | FeatureAVX512BITALG | FeatureAVX512VBMI2 | + FeatureAVX512VNNI | FeatureAVX512VPOPCNTDQ | FeatureCLWB | FeatureGFNI | + FeatureRDPID | FeatureVAES | FeatureVPCLMULQDQ; +static constexpr FeatureBitset FeaturesICLServer = + FeaturesICLClient | FeaturePCONFIG | FeatureWBNOINVD; +static constexpr FeatureBitset FeaturesTigerlake = + FeaturesICLClient | FeatureAVX512VP2INTERSECT | FeatureMOVDIR64B | + FeatureMOVDIRI | FeatureSHSTK; + +// Intel Atom processors. +// Bonnell has feature parity with Core2 and adds MOVBE. +static constexpr FeatureBitset FeaturesBonnell = FeaturesCore2 | FeatureMOVBE; +// Silvermont has parity with Westmere and Bonnell plus PRFCHW and RDRND. +static constexpr FeatureBitset FeaturesSilvermont = + FeaturesBonnell | FeaturesWestmere | FeaturePRFCHW | FeatureRDRND; +static constexpr FeatureBitset FeaturesGoldmont = + FeaturesSilvermont | FeatureAES | FeatureCLFLUSHOPT | FeatureFSGSBASE | + FeatureRDSEED | FeatureSHA | FeatureXSAVE | FeatureXSAVEC | + FeatureXSAVEOPT | FeatureXSAVES; +static constexpr FeatureBitset FeaturesGoldmontPlus = + FeaturesGoldmont | FeaturePTWRITE | FeatureRDPID | FeatureSGX; +static constexpr FeatureBitset FeaturesTremont = + FeaturesGoldmontPlus | FeatureCLWB | FeatureGFNI; + +// Geode Processor. +static constexpr FeatureBitset FeaturesGeode = + FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | Feature3DNOW | Feature3DNOWA; + +// K6 processor. +static constexpr FeatureBitset FeaturesK6 = + FeatureX87 | FeatureCMPXCHG8B | FeatureMMX; + +// K7 and K8 architecture processors. +static constexpr FeatureBitset FeaturesAthlon = + FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | Feature3DNOW | Feature3DNOWA; +static constexpr FeatureBitset FeaturesAthlonXP = + FeaturesAthlon | FeatureFXSR | FeatureSSE; +static constexpr FeatureBitset FeaturesK8 = + FeaturesAthlonXP | FeatureSSE2 | FeatureEM64T; +static constexpr FeatureBitset FeaturesK8SSE3 = FeaturesK8 | FeatureSSE3; +static constexpr FeatureBitset FeaturesAMDFAM10 = + FeaturesK8SSE3 | FeatureCMPXCHG16B | FeatureLZCNT | FeaturePOPCNT | + FeaturePRFCHW | FeatureSAHF | FeatureSSE4A; + +// Bobcat architecture processors. +static constexpr FeatureBitset FeaturesBTVER1 = + FeatureX87 | FeatureCMPXCHG8B | FeatureCMPXCHG16B | FeatureEM64T | + FeatureFXSR | FeatureLZCNT | FeatureMMX | FeaturePOPCNT | FeaturePRFCHW | + FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4A | + FeatureSAHF; +static constexpr FeatureBitset FeaturesBTVER2 = + FeaturesBTVER1 | FeatureAES | FeatureAVX | FeatureBMI | FeatureF16C | + FeatureMOVBE | FeaturePCLMUL | FeatureXSAVE | FeatureXSAVEOPT; + +// AMD Bulldozer architecture processors. +static constexpr FeatureBitset FeaturesBDVER1 = + FeatureX87 | FeatureAES | FeatureAVX | FeatureCMPXCHG8B | + FeatureCMPXCHG16B | FeatureEM64T | FeatureFMA4 | FeatureFXSR | + FeatureLZCNT | FeatureLWP | FeatureLZCNT | FeatureMMX | FeaturePCLMUL | + FeaturePOPCNT | FeaturePRFCHW | FeatureSAHF | FeatureSSE | FeatureSSE2 | + FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4A | + FeatureXOP | FeatureXSAVE; +static constexpr FeatureBitset FeaturesBDVER2 = + FeaturesBDVER1 | FeatureBMI | FeatureFMA | FeatureF16C | FeatureTBM; +static constexpr FeatureBitset FeaturesBDVER3 = + FeaturesBDVER2 | FeatureFSGSBASE | FeatureXSAVEOPT; +static constexpr FeatureBitset FeaturesBDVER4 = + FeaturesBDVER3 | FeatureAVX2 | FeatureBMI2 | FeatureMOVBE | FeatureMWAITX | + FeatureRDRND; + +// AMD Zen architecture processors. +static constexpr FeatureBitset FeaturesZNVER1 = + FeatureX87 | FeatureADX | FeatureAES | FeatureAVX | FeatureAVX2 | + FeatureBMI | FeatureBMI2 | FeatureCLFLUSHOPT | FeatureCLZERO | + FeatureCMPXCHG8B | FeatureCMPXCHG16B | FeatureEM64T | FeatureF16C | + FeatureFMA | FeatureFSGSBASE | FeatureFXSR | FeatureLZCNT | FeatureLWP | + FeatureLZCNT | FeatureMOVBE | FeatureMMX | FeatureMWAITX | FeaturePCLMUL | + FeaturePOPCNT | FeaturePRFCHW | FeatureRDRND | FeatureRDSEED | FeatureSAHF | + FeatureSHA | FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | + FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4A | FeatureXSAVE | + FeatureXSAVEC | FeatureXSAVEOPT | FeatureXSAVES; +static constexpr FeatureBitset FeaturesZNVER2 = + FeaturesZNVER1 | FeatureCLWB | FeatureRDPID | FeatureWBNOINVD; static constexpr ProcInfo Processors[] = { + // Empty processor. Include X87 and CMPXCHG8 for backwards compatibility. + { {""}, CK_None, ~0U, FeatureX87 | FeatureCMPXCHG8B }, // i386-generation processors. - { {"i386"}, CK_i386, ~0U, PROC_32_BIT }, + { {"i386"}, CK_i386, ~0U, FeatureX87 }, // i486-generation processors. - { {"i486"}, CK_i486, ~0U, PROC_32_BIT }, - { {"winchip-c6"}, CK_WinChipC6, ~0U, PROC_32_BIT }, - { {"winchip2"}, CK_WinChip2, ~0U, PROC_32_BIT }, - { {"c3"}, CK_C3, ~0U, PROC_32_BIT }, + { {"i486"}, CK_i486, ~0U, FeatureX87 }, + { {"winchip-c6"}, CK_WinChipC6, ~0U, FeaturesPentiumMMX }, + { {"winchip2"}, CK_WinChip2, ~0U, FeaturesPentiumMMX | Feature3DNOW }, + { {"c3"}, CK_C3, ~0U, FeaturesPentiumMMX | Feature3DNOW }, // i586-generation processors, P5 microarchitecture based. - { {"i586"}, CK_i586, ~0U, PROC_32_BIT }, - { {"pentium"}, CK_Pentium, ~0U, PROC_32_BIT }, - { {"pentium-mmx"}, CK_PentiumMMX, ~0U, PROC_32_BIT }, - { {"pentiumpro"}, CK_PentiumPro, ~0U, PROC_32_BIT }, + { {"i586"}, CK_i586, ~0U, FeatureX87 | FeatureCMPXCHG8B }, + { {"pentium"}, CK_Pentium, ~0U, FeatureX87 | FeatureCMPXCHG8B }, + { {"pentium-mmx"}, CK_PentiumMMX, ~0U, FeaturesPentiumMMX }, // i686-generation processors, P6 / Pentium M microarchitecture based. - { {"i686"}, CK_i686, ~0U, PROC_32_BIT }, - { {"pentium2"}, CK_Pentium2, ~0U, PROC_32_BIT }, - { {"pentium3"}, CK_Pentium3, ~0U, PROC_32_BIT }, - { {"pentium3m"}, CK_Pentium3, ~0U, PROC_32_BIT }, - { {"pentium-m"}, CK_PentiumM, ~0U, PROC_32_BIT }, - { {"c3-2"}, CK_C3_2, ~0U, PROC_32_BIT }, - { {"yonah"}, CK_Yonah, ~0U, PROC_32_BIT }, + { {"pentiumpro"}, CK_PentiumPro, ~0U, FeatureX87 | FeatureCMPXCHG8B }, + { {"i686"}, CK_i686, ~0U, FeatureX87 | FeatureCMPXCHG8B }, + { {"pentium2"}, CK_Pentium2, ~0U, FeaturesPentium2 }, + { {"pentium3"}, CK_Pentium3, ~0U, FeaturesPentium3 }, + { {"pentium3m"}, CK_Pentium3, ~0U, FeaturesPentium3 }, + { {"pentium-m"}, CK_PentiumM, ~0U, FeaturesPentium4 }, + { {"c3-2"}, CK_C3_2, ~0U, FeaturesPentium3 }, + { {"yonah"}, CK_Yonah, ~0U, FeaturesPrescott }, // Netburst microarchitecture based processors. - { {"pentium4"}, CK_Pentium4, ~0U, PROC_32_BIT }, - { {"pentium4m"}, CK_Pentium4, ~0U, PROC_32_BIT }, - { {"prescott"}, CK_Prescott, ~0U, PROC_32_BIT }, - { {"nocona"}, CK_Nocona, ~0U, PROC_64_BIT }, + { {"pentium4"}, CK_Pentium4, ~0U, FeaturesPentium4 }, + { {"pentium4m"}, CK_Pentium4, ~0U, FeaturesPentium4 }, + { {"prescott"}, CK_Prescott, ~0U, FeaturesPrescott }, + { {"nocona"}, CK_Nocona, ~0U, FeaturesNocona }, // Core microarchitecture based processors. - { {"core2"}, CK_Core2, ~0U, PROC_64_BIT }, - { {"penryn"}, CK_Penryn, ~0U, PROC_64_BIT }, + { {"core2"}, CK_Core2, ~0U, FeaturesCore2 }, + { {"penryn"}, CK_Penryn, ~0U, FeaturesPenryn }, // Atom processors - { {"bonnell"}, CK_Bonnell, FEATURE_SSSE3, PROC_64_BIT }, - { {"atom"}, CK_Bonnell, FEATURE_SSSE3, PROC_64_BIT }, - { {"silvermont"}, CK_Silvermont, FEATURE_SSE4_2, PROC_64_BIT }, - { {"slm"}, CK_Silvermont, FEATURE_SSE4_2, PROC_64_BIT }, - { {"goldmont"}, CK_Goldmont, FEATURE_SSE4_2, PROC_64_BIT }, - { {"goldmont-plus"}, CK_GoldmontPlus, FEATURE_SSE4_2, PROC_64_BIT }, - { {"tremont"}, CK_Tremont, FEATURE_SSE4_2, PROC_64_BIT }, + { {"bonnell"}, CK_Bonnell, FEATURE_SSSE3, FeaturesBonnell }, + { {"atom"}, CK_Bonnell, FEATURE_SSSE3, FeaturesBonnell }, + { {"silvermont"}, CK_Silvermont, FEATURE_SSE4_2, FeaturesSilvermont }, + { {"slm"}, CK_Silvermont, FEATURE_SSE4_2, FeaturesSilvermont }, + { {"goldmont"}, CK_Goldmont, FEATURE_SSE4_2, FeaturesGoldmont }, + { {"goldmont-plus"}, CK_GoldmontPlus, FEATURE_SSE4_2, FeaturesGoldmontPlus }, + { {"tremont"}, CK_Tremont, FEATURE_SSE4_2, FeaturesTremont }, // Nehalem microarchitecture based processors. - { {"nehalem"}, CK_Nehalem, FEATURE_SSE4_2, PROC_64_BIT }, - { {"corei7"}, CK_Nehalem, FEATURE_SSE4_2, PROC_64_BIT }, + { {"nehalem"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem }, + { {"corei7"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem }, // Westmere microarchitecture based processors. - { {"westmere"}, CK_Westmere, FEATURE_PCLMUL, PROC_64_BIT }, + { {"westmere"}, CK_Westmere, FEATURE_PCLMUL, FeaturesWestmere }, // Sandy Bridge microarchitecture based processors. - { {"sandybridge"}, CK_SandyBridge, FEATURE_AVX, PROC_64_BIT }, - { {"corei7-avx"}, CK_SandyBridge, FEATURE_AVX, PROC_64_BIT }, + { {"sandybridge"}, CK_SandyBridge, FEATURE_AVX, FeaturesSandyBridge }, + { {"corei7-avx"}, CK_SandyBridge, FEATURE_AVX, FeaturesSandyBridge }, // Ivy Bridge microarchitecture based processors. - { {"ivybridge"}, CK_IvyBridge, FEATURE_AVX, PROC_64_BIT }, - { {"core-avx-i"}, CK_IvyBridge, FEATURE_AVX, PROC_64_BIT }, + { {"ivybridge"}, CK_IvyBridge, FEATURE_AVX, FeaturesIvyBridge }, + { {"core-avx-i"}, CK_IvyBridge, FEATURE_AVX, FeaturesIvyBridge }, // Haswell microarchitecture based processors. - { {"haswell"}, CK_Haswell, FEATURE_AVX2, PROC_64_BIT }, - { {"core-avx2"}, CK_Haswell, FEATURE_AVX2, PROC_64_BIT }, + { {"haswell"}, CK_Haswell, FEATURE_AVX2, FeaturesHaswell }, + { {"core-avx2"}, CK_Haswell, FEATURE_AVX2, FeaturesHaswell }, // Broadwell microarchitecture based processors. - { {"broadwell"}, CK_Broadwell, FEATURE_AVX2, PROC_64_BIT }, + { {"broadwell"}, CK_Broadwell, FEATURE_AVX2, FeaturesBroadwell }, // Skylake client microarchitecture based processors. - { {"skylake"}, CK_SkylakeClient, FEATURE_AVX2, PROC_64_BIT }, + { {"skylake"}, CK_SkylakeClient, FEATURE_AVX2, FeaturesSkylakeClient }, // Skylake server microarchitecture based processors. - { {"skylake-avx512"}, CK_SkylakeServer, FEATURE_AVX512F, PROC_64_BIT }, - { {"skx"}, CK_SkylakeServer, FEATURE_AVX512F, PROC_64_BIT }, + { {"skylake-avx512"}, CK_SkylakeServer, FEATURE_AVX512F, FeaturesSkylakeServer }, + { {"skx"}, CK_SkylakeServer, FEATURE_AVX512F, FeaturesSkylakeServer }, // Cascadelake Server microarchitecture based processors. - { {"cascadelake"}, CK_Cascadelake, FEATURE_AVX512VNNI, PROC_64_BIT }, + { {"cascadelake"}, CK_Cascadelake, FEATURE_AVX512VNNI, FeaturesCascadeLake }, // Cooperlake Server microarchitecture based processors. - { {"cooperlake"}, CK_Cooperlake, FEATURE_AVX512BF16, PROC_64_BIT }, + { {"cooperlake"}, CK_Cooperlake, FEATURE_AVX512BF16, FeaturesCooperLake }, // Cannonlake client microarchitecture based processors. - { {"cannonlake"}, CK_Cannonlake, FEATURE_AVX512VBMI, PROC_64_BIT }, + { {"cannonlake"}, CK_Cannonlake, FEATURE_AVX512VBMI, FeaturesCannonlake }, // Icelake client microarchitecture based processors. - { {"icelake-client"}, CK_IcelakeClient, FEATURE_AVX512VBMI2, PROC_64_BIT }, + { {"icelake-client"}, CK_IcelakeClient, FEATURE_AVX512VBMI2, FeaturesICLClient }, // Icelake server microarchitecture based processors. - { {"icelake-server"}, CK_IcelakeServer, FEATURE_AVX512VBMI2, PROC_64_BIT }, + { {"icelake-server"}, CK_IcelakeServer, FEATURE_AVX512VBMI2, FeaturesICLServer }, // Tigerlake microarchitecture based processors. - { {"tigerlake"}, CK_Tigerlake, FEATURE_AVX512VP2INTERSECT, PROC_64_BIT }, + { {"tigerlake"}, CK_Tigerlake, FEATURE_AVX512VP2INTERSECT, FeaturesTigerlake }, // Knights Landing processor. - { {"knl"}, CK_KNL, FEATURE_AVX512F, PROC_64_BIT }, + { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL }, // Knights Mill processor. - { {"knm"}, CK_KNM, FEATURE_AVX5124FMAPS, PROC_64_BIT }, + { {"knm"}, CK_KNM, FEATURE_AVX5124FMAPS, FeaturesKNM }, // Lakemont microarchitecture based processors. - { {"lakemont"}, CK_Lakemont, ~0U, PROC_32_BIT }, + { {"lakemont"}, CK_Lakemont, ~0U, FeatureCMPXCHG8B }, // K6 architecture processors. - { {"k6"}, CK_K6, ~0U, PROC_32_BIT }, - { {"k6-2"}, CK_K6_2, ~0U, PROC_32_BIT }, - { {"k6-3"}, CK_K6_3, ~0U, PROC_32_BIT }, + { {"k6"}, CK_K6, ~0U, FeaturesK6 }, + { {"k6-2"}, CK_K6_2, ~0U, FeaturesK6 | Feature3DNOW }, + { {"k6-3"}, CK_K6_3, ~0U, FeaturesK6 | Feature3DNOW }, // K7 architecture processors. - { {"athlon"}, CK_Athlon, ~0U, PROC_32_BIT }, - { {"athlon-tbird"}, CK_Athlon, ~0U, PROC_32_BIT }, - { {"athlon-xp"}, CK_AthlonXP, ~0U, PROC_32_BIT }, - { {"athlon-mp"}, CK_AthlonXP, ~0U, PROC_32_BIT }, - { {"athlon-4"}, CK_AthlonXP, ~0U, PROC_32_BIT }, + { {"athlon"}, CK_Athlon, ~0U, FeaturesAthlon }, + { {"athlon-tbird"}, CK_Athlon, ~0U, FeaturesAthlon }, + { {"athlon-xp"}, CK_AthlonXP, ~0U, FeaturesAthlonXP }, + { {"athlon-mp"}, CK_AthlonXP, ~0U, FeaturesAthlonXP }, + { {"athlon-4"}, CK_AthlonXP, ~0U, FeaturesAthlonXP }, // K8 architecture processors. - { {"k8"}, CK_K8, ~0U, PROC_64_BIT }, - { {"athlon64"}, CK_K8, ~0U, PROC_64_BIT }, - { {"athlon-fx"}, CK_K8, ~0U, PROC_64_BIT }, - { {"opteron"}, CK_K8, ~0U, PROC_64_BIT }, - { {"k8-sse3"}, CK_K8SSE3, ~0U, PROC_64_BIT }, - { {"athlon64-sse3"}, CK_K8SSE3, ~0U, PROC_64_BIT }, - { {"opteron-sse3"}, CK_K8SSE3, ~0U, PROC_64_BIT }, - { {"amdfam10"}, CK_AMDFAM10, FEATURE_SSE4_A, PROC_64_BIT }, - { {"barcelona"}, CK_AMDFAM10, FEATURE_SSE4_A, PROC_64_BIT }, + { {"k8"}, CK_K8, ~0U, FeaturesK8 }, + { {"athlon64"}, CK_K8, ~0U, FeaturesK8 }, + { {"athlon-fx"}, CK_K8, ~0U, FeaturesK8 }, + { {"opteron"}, CK_K8, ~0U, FeaturesK8 }, + { {"k8-sse3"}, CK_K8SSE3, ~0U, FeaturesK8SSE3 }, + { {"athlon64-sse3"}, CK_K8SSE3, ~0U, FeaturesK8SSE3 }, + { {"opteron-sse3"}, CK_K8SSE3, ~0U, FeaturesK8SSE3 }, + { {"amdfam10"}, CK_AMDFAM10, FEATURE_SSE4_A, FeaturesAMDFAM10 }, + { {"barcelona"}, CK_AMDFAM10, FEATURE_SSE4_A, FeaturesAMDFAM10 }, // Bobcat architecture processors. - { {"btver1"}, CK_BTVER1, FEATURE_SSE4_A, PROC_64_BIT }, - { {"btver2"}, CK_BTVER2, FEATURE_BMI, PROC_64_BIT }, + { {"btver1"}, CK_BTVER1, FEATURE_SSE4_A, FeaturesBTVER1 }, + { {"btver2"}, CK_BTVER2, FEATURE_BMI, FeaturesBTVER2 }, // Bulldozer architecture processors. - { {"bdver1"}, CK_BDVER1, FEATURE_XOP, PROC_64_BIT }, - { {"bdver2"}, CK_BDVER2, FEATURE_FMA, PROC_64_BIT }, - { {"bdver3"}, CK_BDVER3, FEATURE_FMA, PROC_64_BIT }, - { {"bdver4"}, CK_BDVER4, FEATURE_AVX2, PROC_64_BIT }, + { {"bdver1"}, CK_BDVER1, FEATURE_XOP, FeaturesBDVER1 }, + { {"bdver2"}, CK_BDVER2, FEATURE_FMA, FeaturesBDVER2 }, + { {"bdver3"}, CK_BDVER3, FEATURE_FMA, FeaturesBDVER3 }, + { {"bdver4"}, CK_BDVER4, FEATURE_AVX2, FeaturesBDVER4 }, // Zen architecture processors. - { {"znver1"}, CK_ZNVER1, FEATURE_AVX2, PROC_64_BIT }, - { {"znver2"}, CK_ZNVER2, FEATURE_AVX2, PROC_64_BIT }, + { {"znver1"}, CK_ZNVER1, FEATURE_AVX2, FeaturesZNVER1 }, + { {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2 }, // Generic 64-bit processor. - { {"x86-64"}, CK_x86_64, ~0U, PROC_64_BIT }, + { {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64 }, // Geode processors. - { {"geode"}, CK_Geode, ~0U, PROC_32_BIT }, + { {"geode"}, CK_Geode, ~0U, FeaturesGeode }, }; X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) { for (const auto &P : Processors) - if (P.Name == CPU && (P.Is64Bit || !Only64Bit)) + if (P.Name == CPU && (P.Features[FEATURE_EM64T] || !Only64Bit)) return P.Kind; return CK_None; @@ -155,7 +360,7 @@ X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) { void llvm::X86::fillValidCPUArchList(SmallVectorImpl &Values, bool Only64Bit) { for (const auto &P : Processors) - if (P.Is64Bit || !Only64Bit) + if (!P.Name.empty() && (P.Features[FEATURE_EM64T] || !Only64Bit)) Values.emplace_back(P.Name); } @@ -171,3 +376,20 @@ ProcessorFeatures llvm::X86::getKeyFeature(X86::CPUKind Kind) { llvm_unreachable("Unable to find CPU kind!"); } + +static const char *FeatureStrings[X86::CPU_FEATURE_MAX] = { +#define X86_FEATURE(ENUM, STR) STR, +#include "llvm/Support/X86TargetParser.def" +}; + +void llvm::X86::getFeaturesForCPU(StringRef CPU, + SmallVectorImpl &Features) { + auto I = llvm::find_if(Processors, + [&](const ProcInfo &P) { return P.Name == CPU; }); + assert(I != std::end(Processors) && "Processor not found!"); + + // Add the string version of all set bits. + for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) + if (FeatureStrings[i] && I->Features[i]) + Features.push_back(FeatureStrings[i]); +}